Files
site11/services/files/backend/metadata_manager.py
2025-09-28 20:41:57 +09:00

331 lines
11 KiB
Python

"""
Metadata Manager for file information storage in MongoDB
"""
from motor.motor_asyncio import AsyncIOMotorClient
from datetime import datetime
from typing import Optional, Dict, Any, List
import logging
import uuid
from models import FileType, FileStatus
logger = logging.getLogger(__name__)
class MetadataManager:
def __init__(self, mongodb_url: str, database: str = "files_db"):
self.mongodb_url = mongodb_url
self.database_name = database
self.client = None
self.db = None
self.collection = None
self.is_connected = False
async def connect(self):
"""Connect to MongoDB"""
try:
self.client = AsyncIOMotorClient(self.mongodb_url)
self.db = self.client[self.database_name]
self.collection = self.db.files
# Create indexes
await self._create_indexes()
# Test connection
await self.client.admin.command('ping')
self.is_connected = True
logger.info(f"Connected to MongoDB at {self.mongodb_url}")
except Exception as e:
logger.error(f"Failed to connect to MongoDB: {e}")
self.is_connected = False
raise
async def _create_indexes(self):
"""Create database indexes for better performance"""
try:
# Create indexes
await self.collection.create_index("user_id")
await self.collection.create_index("bucket")
await self.collection.create_index("created_at")
await self.collection.create_index("file_type")
await self.collection.create_index([("filename", "text")])
await self.collection.create_index([("user_id", 1), ("created_at", -1)])
logger.info("Database indexes created")
except Exception as e:
logger.error(f"Failed to create indexes: {e}")
async def create_file_metadata(self, metadata: Dict[str, Any]) -> str:
"""Create new file metadata"""
try:
# Add timestamps
metadata["created_at"] = datetime.now()
metadata["updated_at"] = datetime.now()
metadata["download_count"] = 0
metadata["status"] = FileStatus.READY.value
# Generate unique ID if not provided
if "id" not in metadata:
metadata["id"] = str(uuid.uuid4())
# Insert document
result = await self.collection.insert_one(metadata)
logger.info(f"Created metadata for file: {metadata['id']}")
return metadata["id"]
except Exception as e:
logger.error(f"Failed to create file metadata: {e}")
raise
async def get_file_metadata(self, file_id: str) -> Optional[Dict[str, Any]]:
"""Get file metadata by ID"""
try:
metadata = await self.collection.find_one({"id": file_id})
if metadata:
# Remove MongoDB's _id field
metadata.pop("_id", None)
return metadata
except Exception as e:
logger.error(f"Failed to get file metadata: {e}")
raise
async def update_file_metadata(self, file_id: str, updates: Dict[str, Any]) -> Dict[str, Any]:
"""Update file metadata"""
try:
# Add update timestamp
updates["updated_at"] = datetime.now()
# Update document
result = await self.collection.update_one(
{"id": file_id},
{"$set": updates}
)
if result.modified_count == 0:
raise Exception(f"File {file_id} not found")
# Return updated metadata
return await self.get_file_metadata(file_id)
except Exception as e:
logger.error(f"Failed to update file metadata: {e}")
raise
async def delete_file_metadata(self, file_id: str) -> bool:
"""Delete file metadata (soft delete)"""
try:
# Soft delete by marking as deleted
updates = {
"status": FileStatus.DELETED.value,
"deleted_at": datetime.now(),
"updated_at": datetime.now()
}
result = await self.collection.update_one(
{"id": file_id},
{"$set": updates}
)
return result.modified_count > 0
except Exception as e:
logger.error(f"Failed to delete file metadata: {e}")
raise
async def list_files(self, user_id: Optional[str] = None,
bucket: Optional[str] = None,
limit: int = 20,
offset: int = 0,
search: Optional[str] = None,
file_type: Optional[str] = None,
sort_by: str = "created_at",
order: str = "desc") -> Dict[str, Any]:
"""List files with filtering and pagination"""
try:
# Build query
query = {"status": {"$ne": FileStatus.DELETED.value}}
if user_id:
query["user_id"] = user_id
if bucket:
query["bucket"] = bucket
if file_type:
query["file_type"] = file_type
if search:
query["$text"] = {"$search": search}
# Count total documents
total = await self.collection.count_documents(query)
# Sort order
sort_order = -1 if order == "desc" else 1
# Execute query with pagination
cursor = self.collection.find(query)\
.sort(sort_by, sort_order)\
.skip(offset)\
.limit(limit)
files = []
async for doc in cursor:
doc.pop("_id", None)
files.append(doc)
return {
"files": files,
"total": total,
"limit": limit,
"offset": offset,
"has_more": (offset + limit) < total
}
except Exception as e:
logger.error(f"Failed to list files: {e}")
raise
async def increment_download_count(self, file_id: str):
"""Increment download counter for a file"""
try:
await self.collection.update_one(
{"id": file_id},
{
"$inc": {"download_count": 1},
"$set": {"last_accessed": datetime.now()}
}
)
except Exception as e:
logger.error(f"Failed to increment download count: {e}")
async def get_storage_stats(self) -> Dict[str, Any]:
"""Get storage statistics"""
try:
# Aggregation pipeline for statistics
pipeline = [
{"$match": {"status": {"$ne": FileStatus.DELETED.value}}},
{
"$group": {
"_id": None,
"total_files": {"$sum": 1},
"total_size": {"$sum": "$size"},
"users": {"$addToSet": "$user_id"}
}
}
]
cursor = self.collection.aggregate(pipeline)
result = await cursor.to_list(length=1)
if result:
stats = result[0]
users_count = len(stats.get("users", []))
else:
stats = {"total_files": 0, "total_size": 0}
users_count = 0
# Get file type distribution
type_pipeline = [
{"$match": {"status": {"$ne": FileStatus.DELETED.value}}},
{
"$group": {
"_id": "$file_type",
"count": {"$sum": 1}
}
}
]
type_cursor = self.collection.aggregate(type_pipeline)
type_results = await type_cursor.to_list(length=None)
file_types = {
item["_id"]: item["count"]
for item in type_results if item["_id"]
}
return {
"total_files": stats.get("total_files", 0),
"total_size": stats.get("total_size", 0),
"users_count": users_count,
"file_types": file_types
}
except Exception as e:
logger.error(f"Failed to get storage stats: {e}")
raise
async def find_duplicate_files(self, file_hash: str) -> List[Dict[str, Any]]:
"""Find duplicate files by hash"""
try:
cursor = self.collection.find({
"hash": file_hash,
"status": {"$ne": FileStatus.DELETED.value}
})
duplicates = []
async for doc in cursor:
doc.pop("_id", None)
duplicates.append(doc)
return duplicates
except Exception as e:
logger.error(f"Failed to find duplicate files: {e}")
raise
async def get_user_storage_usage(self, user_id: str) -> Dict[str, Any]:
"""Get storage usage for a specific user"""
try:
pipeline = [
{
"$match": {
"user_id": user_id,
"status": {"$ne": FileStatus.DELETED.value}
}
},
{
"$group": {
"_id": "$file_type",
"count": {"$sum": 1},
"size": {"$sum": "$size"}
}
}
]
cursor = self.collection.aggregate(pipeline)
results = await cursor.to_list(length=None)
total_size = sum(item["size"] for item in results)
total_files = sum(item["count"] for item in results)
breakdown = {
item["_id"]: {
"count": item["count"],
"size": item["size"]
}
for item in results if item["_id"]
}
return {
"user_id": user_id,
"total_files": total_files,
"total_size": total_size,
"breakdown": breakdown
}
except Exception as e:
logger.error(f"Failed to get user storage usage: {e}")
raise
async def close(self):
"""Close MongoDB connection"""
if self.client:
self.client.close()
self.is_connected = False
logger.info("MongoDB connection closed")