""" Metadata Manager for file information storage in MongoDB """ from motor.motor_asyncio import AsyncIOMotorClient from datetime import datetime from typing import Optional, Dict, Any, List import logging import uuid from models import FileType, FileStatus logger = logging.getLogger(__name__) class MetadataManager: def __init__(self, mongodb_url: str, database: str = "files_db"): self.mongodb_url = mongodb_url self.database_name = database self.client = None self.db = None self.collection = None self.is_connected = False async def connect(self): """Connect to MongoDB""" try: self.client = AsyncIOMotorClient(self.mongodb_url) self.db = self.client[self.database_name] self.collection = self.db.files # Create indexes await self._create_indexes() # Test connection await self.client.admin.command('ping') self.is_connected = True logger.info(f"Connected to MongoDB at {self.mongodb_url}") except Exception as e: logger.error(f"Failed to connect to MongoDB: {e}") self.is_connected = False raise async def _create_indexes(self): """Create database indexes for better performance""" try: # Create indexes await self.collection.create_index("user_id") await self.collection.create_index("bucket") await self.collection.create_index("created_at") await self.collection.create_index("file_type") await self.collection.create_index([("filename", "text")]) await self.collection.create_index([("user_id", 1), ("created_at", -1)]) logger.info("Database indexes created") except Exception as e: logger.error(f"Failed to create indexes: {e}") async def create_file_metadata(self, metadata: Dict[str, Any]) -> str: """Create new file metadata""" try: # Add timestamps metadata["created_at"] = datetime.now() metadata["updated_at"] = datetime.now() metadata["download_count"] = 0 metadata["status"] = FileStatus.READY.value # Generate unique ID if not provided if "id" not in metadata: metadata["id"] = str(uuid.uuid4()) # Insert document result = await self.collection.insert_one(metadata) logger.info(f"Created metadata for file: {metadata['id']}") return metadata["id"] except Exception as e: logger.error(f"Failed to create file metadata: {e}") raise async def get_file_metadata(self, file_id: str) -> Optional[Dict[str, Any]]: """Get file metadata by ID""" try: metadata = await self.collection.find_one({"id": file_id}) if metadata: # Remove MongoDB's _id field metadata.pop("_id", None) return metadata except Exception as e: logger.error(f"Failed to get file metadata: {e}") raise async def update_file_metadata(self, file_id: str, updates: Dict[str, Any]) -> Dict[str, Any]: """Update file metadata""" try: # Add update timestamp updates["updated_at"] = datetime.now() # Update document result = await self.collection.update_one( {"id": file_id}, {"$set": updates} ) if result.modified_count == 0: raise Exception(f"File {file_id} not found") # Return updated metadata return await self.get_file_metadata(file_id) except Exception as e: logger.error(f"Failed to update file metadata: {e}") raise async def delete_file_metadata(self, file_id: str) -> bool: """Delete file metadata (soft delete)""" try: # Soft delete by marking as deleted updates = { "status": FileStatus.DELETED.value, "deleted_at": datetime.now(), "updated_at": datetime.now() } result = await self.collection.update_one( {"id": file_id}, {"$set": updates} ) return result.modified_count > 0 except Exception as e: logger.error(f"Failed to delete file metadata: {e}") raise async def list_files(self, user_id: Optional[str] = None, bucket: Optional[str] = None, limit: int = 20, offset: int = 0, search: Optional[str] = None, file_type: Optional[str] = None, sort_by: str = "created_at", order: str = "desc") -> Dict[str, Any]: """List files with filtering and pagination""" try: # Build query query = {"status": {"$ne": FileStatus.DELETED.value}} if user_id: query["user_id"] = user_id if bucket: query["bucket"] = bucket if file_type: query["file_type"] = file_type if search: query["$text"] = {"$search": search} # Count total documents total = await self.collection.count_documents(query) # Sort order sort_order = -1 if order == "desc" else 1 # Execute query with pagination cursor = self.collection.find(query)\ .sort(sort_by, sort_order)\ .skip(offset)\ .limit(limit) files = [] async for doc in cursor: doc.pop("_id", None) files.append(doc) return { "files": files, "total": total, "limit": limit, "offset": offset, "has_more": (offset + limit) < total } except Exception as e: logger.error(f"Failed to list files: {e}") raise async def increment_download_count(self, file_id: str): """Increment download counter for a file""" try: await self.collection.update_one( {"id": file_id}, { "$inc": {"download_count": 1}, "$set": {"last_accessed": datetime.now()} } ) except Exception as e: logger.error(f"Failed to increment download count: {e}") async def get_storage_stats(self) -> Dict[str, Any]: """Get storage statistics""" try: # Aggregation pipeline for statistics pipeline = [ {"$match": {"status": {"$ne": FileStatus.DELETED.value}}}, { "$group": { "_id": None, "total_files": {"$sum": 1}, "total_size": {"$sum": "$size"}, "users": {"$addToSet": "$user_id"} } } ] cursor = self.collection.aggregate(pipeline) result = await cursor.to_list(length=1) if result: stats = result[0] users_count = len(stats.get("users", [])) else: stats = {"total_files": 0, "total_size": 0} users_count = 0 # Get file type distribution type_pipeline = [ {"$match": {"status": {"$ne": FileStatus.DELETED.value}}}, { "$group": { "_id": "$file_type", "count": {"$sum": 1} } } ] type_cursor = self.collection.aggregate(type_pipeline) type_results = await type_cursor.to_list(length=None) file_types = { item["_id"]: item["count"] for item in type_results if item["_id"] } return { "total_files": stats.get("total_files", 0), "total_size": stats.get("total_size", 0), "users_count": users_count, "file_types": file_types } except Exception as e: logger.error(f"Failed to get storage stats: {e}") raise async def find_duplicate_files(self, file_hash: str) -> List[Dict[str, Any]]: """Find duplicate files by hash""" try: cursor = self.collection.find({ "hash": file_hash, "status": {"$ne": FileStatus.DELETED.value} }) duplicates = [] async for doc in cursor: doc.pop("_id", None) duplicates.append(doc) return duplicates except Exception as e: logger.error(f"Failed to find duplicate files: {e}") raise async def get_user_storage_usage(self, user_id: str) -> Dict[str, Any]: """Get storage usage for a specific user""" try: pipeline = [ { "$match": { "user_id": user_id, "status": {"$ne": FileStatus.DELETED.value} } }, { "$group": { "_id": "$file_type", "count": {"$sum": 1}, "size": {"$sum": "$size"} } } ] cursor = self.collection.aggregate(pipeline) results = await cursor.to_list(length=None) total_size = sum(item["size"] for item in results) total_files = sum(item["count"] for item in results) breakdown = { item["_id"]: { "count": item["count"], "size": item["size"] } for item in results if item["_id"] } return { "user_id": user_id, "total_files": total_files, "total_size": total_size, "breakdown": breakdown } except Exception as e: logger.error(f"Failed to get user storage usage: {e}") raise async def close(self): """Close MongoDB connection""" if self.client: self.client.close() self.is_connected = False logger.info("MongoDB connection closed")