331 lines
11 KiB
Python
331 lines
11 KiB
Python
"""
|
|
Metadata Manager for file information storage in MongoDB
|
|
"""
|
|
from motor.motor_asyncio import AsyncIOMotorClient
|
|
from datetime import datetime
|
|
from typing import Optional, Dict, Any, List
|
|
import logging
|
|
import uuid
|
|
from models import FileType, FileStatus
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class MetadataManager:
|
|
def __init__(self, mongodb_url: str, database: str = "files_db"):
|
|
self.mongodb_url = mongodb_url
|
|
self.database_name = database
|
|
self.client = None
|
|
self.db = None
|
|
self.collection = None
|
|
self.is_connected = False
|
|
|
|
async def connect(self):
|
|
"""Connect to MongoDB"""
|
|
try:
|
|
self.client = AsyncIOMotorClient(self.mongodb_url)
|
|
self.db = self.client[self.database_name]
|
|
self.collection = self.db.files
|
|
|
|
# Create indexes
|
|
await self._create_indexes()
|
|
|
|
# Test connection
|
|
await self.client.admin.command('ping')
|
|
self.is_connected = True
|
|
logger.info(f"Connected to MongoDB at {self.mongodb_url}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to connect to MongoDB: {e}")
|
|
self.is_connected = False
|
|
raise
|
|
|
|
async def _create_indexes(self):
|
|
"""Create database indexes for better performance"""
|
|
try:
|
|
# Create indexes
|
|
await self.collection.create_index("user_id")
|
|
await self.collection.create_index("bucket")
|
|
await self.collection.create_index("created_at")
|
|
await self.collection.create_index("file_type")
|
|
await self.collection.create_index([("filename", "text")])
|
|
await self.collection.create_index([("user_id", 1), ("created_at", -1)])
|
|
|
|
logger.info("Database indexes created")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to create indexes: {e}")
|
|
|
|
async def create_file_metadata(self, metadata: Dict[str, Any]) -> str:
|
|
"""Create new file metadata"""
|
|
try:
|
|
# Add timestamps
|
|
metadata["created_at"] = datetime.now()
|
|
metadata["updated_at"] = datetime.now()
|
|
metadata["download_count"] = 0
|
|
metadata["status"] = FileStatus.READY.value
|
|
|
|
# Generate unique ID if not provided
|
|
if "id" not in metadata:
|
|
metadata["id"] = str(uuid.uuid4())
|
|
|
|
# Insert document
|
|
result = await self.collection.insert_one(metadata)
|
|
|
|
logger.info(f"Created metadata for file: {metadata['id']}")
|
|
return metadata["id"]
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to create file metadata: {e}")
|
|
raise
|
|
|
|
async def get_file_metadata(self, file_id: str) -> Optional[Dict[str, Any]]:
|
|
"""Get file metadata by ID"""
|
|
try:
|
|
metadata = await self.collection.find_one({"id": file_id})
|
|
|
|
if metadata:
|
|
# Remove MongoDB's _id field
|
|
metadata.pop("_id", None)
|
|
|
|
return metadata
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get file metadata: {e}")
|
|
raise
|
|
|
|
async def update_file_metadata(self, file_id: str, updates: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Update file metadata"""
|
|
try:
|
|
# Add update timestamp
|
|
updates["updated_at"] = datetime.now()
|
|
|
|
# Update document
|
|
result = await self.collection.update_one(
|
|
{"id": file_id},
|
|
{"$set": updates}
|
|
)
|
|
|
|
if result.modified_count == 0:
|
|
raise Exception(f"File {file_id} not found")
|
|
|
|
# Return updated metadata
|
|
return await self.get_file_metadata(file_id)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to update file metadata: {e}")
|
|
raise
|
|
|
|
async def delete_file_metadata(self, file_id: str) -> bool:
|
|
"""Delete file metadata (soft delete)"""
|
|
try:
|
|
# Soft delete by marking as deleted
|
|
updates = {
|
|
"status": FileStatus.DELETED.value,
|
|
"deleted_at": datetime.now(),
|
|
"updated_at": datetime.now()
|
|
}
|
|
|
|
result = await self.collection.update_one(
|
|
{"id": file_id},
|
|
{"$set": updates}
|
|
)
|
|
|
|
return result.modified_count > 0
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to delete file metadata: {e}")
|
|
raise
|
|
|
|
async def list_files(self, user_id: Optional[str] = None,
|
|
bucket: Optional[str] = None,
|
|
limit: int = 20,
|
|
offset: int = 0,
|
|
search: Optional[str] = None,
|
|
file_type: Optional[str] = None,
|
|
sort_by: str = "created_at",
|
|
order: str = "desc") -> Dict[str, Any]:
|
|
"""List files with filtering and pagination"""
|
|
try:
|
|
# Build query
|
|
query = {"status": {"$ne": FileStatus.DELETED.value}}
|
|
|
|
if user_id:
|
|
query["user_id"] = user_id
|
|
|
|
if bucket:
|
|
query["bucket"] = bucket
|
|
|
|
if file_type:
|
|
query["file_type"] = file_type
|
|
|
|
if search:
|
|
query["$text"] = {"$search": search}
|
|
|
|
# Count total documents
|
|
total = await self.collection.count_documents(query)
|
|
|
|
# Sort order
|
|
sort_order = -1 if order == "desc" else 1
|
|
|
|
# Execute query with pagination
|
|
cursor = self.collection.find(query)\
|
|
.sort(sort_by, sort_order)\
|
|
.skip(offset)\
|
|
.limit(limit)
|
|
|
|
files = []
|
|
async for doc in cursor:
|
|
doc.pop("_id", None)
|
|
files.append(doc)
|
|
|
|
return {
|
|
"files": files,
|
|
"total": total,
|
|
"limit": limit,
|
|
"offset": offset,
|
|
"has_more": (offset + limit) < total
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to list files: {e}")
|
|
raise
|
|
|
|
async def increment_download_count(self, file_id: str):
|
|
"""Increment download counter for a file"""
|
|
try:
|
|
await self.collection.update_one(
|
|
{"id": file_id},
|
|
{
|
|
"$inc": {"download_count": 1},
|
|
"$set": {"last_accessed": datetime.now()}
|
|
}
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to increment download count: {e}")
|
|
|
|
async def get_storage_stats(self) -> Dict[str, Any]:
|
|
"""Get storage statistics"""
|
|
try:
|
|
# Aggregation pipeline for statistics
|
|
pipeline = [
|
|
{"$match": {"status": {"$ne": FileStatus.DELETED.value}}},
|
|
{
|
|
"$group": {
|
|
"_id": None,
|
|
"total_files": {"$sum": 1},
|
|
"total_size": {"$sum": "$size"},
|
|
"users": {"$addToSet": "$user_id"}
|
|
}
|
|
}
|
|
]
|
|
|
|
cursor = self.collection.aggregate(pipeline)
|
|
result = await cursor.to_list(length=1)
|
|
|
|
if result:
|
|
stats = result[0]
|
|
users_count = len(stats.get("users", []))
|
|
else:
|
|
stats = {"total_files": 0, "total_size": 0}
|
|
users_count = 0
|
|
|
|
# Get file type distribution
|
|
type_pipeline = [
|
|
{"$match": {"status": {"$ne": FileStatus.DELETED.value}}},
|
|
{
|
|
"$group": {
|
|
"_id": "$file_type",
|
|
"count": {"$sum": 1}
|
|
}
|
|
}
|
|
]
|
|
|
|
type_cursor = self.collection.aggregate(type_pipeline)
|
|
type_results = await type_cursor.to_list(length=None)
|
|
|
|
file_types = {
|
|
item["_id"]: item["count"]
|
|
for item in type_results if item["_id"]
|
|
}
|
|
|
|
return {
|
|
"total_files": stats.get("total_files", 0),
|
|
"total_size": stats.get("total_size", 0),
|
|
"users_count": users_count,
|
|
"file_types": file_types
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get storage stats: {e}")
|
|
raise
|
|
|
|
async def find_duplicate_files(self, file_hash: str) -> List[Dict[str, Any]]:
|
|
"""Find duplicate files by hash"""
|
|
try:
|
|
cursor = self.collection.find({
|
|
"hash": file_hash,
|
|
"status": {"$ne": FileStatus.DELETED.value}
|
|
})
|
|
|
|
duplicates = []
|
|
async for doc in cursor:
|
|
doc.pop("_id", None)
|
|
duplicates.append(doc)
|
|
|
|
return duplicates
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to find duplicate files: {e}")
|
|
raise
|
|
|
|
async def get_user_storage_usage(self, user_id: str) -> Dict[str, Any]:
|
|
"""Get storage usage for a specific user"""
|
|
try:
|
|
pipeline = [
|
|
{
|
|
"$match": {
|
|
"user_id": user_id,
|
|
"status": {"$ne": FileStatus.DELETED.value}
|
|
}
|
|
},
|
|
{
|
|
"$group": {
|
|
"_id": "$file_type",
|
|
"count": {"$sum": 1},
|
|
"size": {"$sum": "$size"}
|
|
}
|
|
}
|
|
]
|
|
|
|
cursor = self.collection.aggregate(pipeline)
|
|
results = await cursor.to_list(length=None)
|
|
|
|
total_size = sum(item["size"] for item in results)
|
|
total_files = sum(item["count"] for item in results)
|
|
|
|
breakdown = {
|
|
item["_id"]: {
|
|
"count": item["count"],
|
|
"size": item["size"]
|
|
}
|
|
for item in results if item["_id"]
|
|
}
|
|
|
|
return {
|
|
"user_id": user_id,
|
|
"total_files": total_files,
|
|
"total_size": total_size,
|
|
"breakdown": breakdown
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get user storage usage: {e}")
|
|
raise
|
|
|
|
async def close(self):
|
|
"""Close MongoDB connection"""
|
|
if self.client:
|
|
self.client.close()
|
|
self.is_connected = False
|
|
logger.info("MongoDB connection closed") |