Initial commit - cleaned repository
This commit is contained in:
331
services/files/backend/metadata_manager.py
Normal file
331
services/files/backend/metadata_manager.py
Normal file
@ -0,0 +1,331 @@
|
||||
"""
|
||||
Metadata Manager for file information storage in MongoDB
|
||||
"""
|
||||
from motor.motor_asyncio import AsyncIOMotorClient
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any, List
|
||||
import logging
|
||||
import uuid
|
||||
from models import FileType, FileStatus
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class MetadataManager:
|
||||
def __init__(self, mongodb_url: str, database: str = "files_db"):
|
||||
self.mongodb_url = mongodb_url
|
||||
self.database_name = database
|
||||
self.client = None
|
||||
self.db = None
|
||||
self.collection = None
|
||||
self.is_connected = False
|
||||
|
||||
async def connect(self):
|
||||
"""Connect to MongoDB"""
|
||||
try:
|
||||
self.client = AsyncIOMotorClient(self.mongodb_url)
|
||||
self.db = self.client[self.database_name]
|
||||
self.collection = self.db.files
|
||||
|
||||
# Create indexes
|
||||
await self._create_indexes()
|
||||
|
||||
# Test connection
|
||||
await self.client.admin.command('ping')
|
||||
self.is_connected = True
|
||||
logger.info(f"Connected to MongoDB at {self.mongodb_url}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to connect to MongoDB: {e}")
|
||||
self.is_connected = False
|
||||
raise
|
||||
|
||||
async def _create_indexes(self):
|
||||
"""Create database indexes for better performance"""
|
||||
try:
|
||||
# Create indexes
|
||||
await self.collection.create_index("user_id")
|
||||
await self.collection.create_index("bucket")
|
||||
await self.collection.create_index("created_at")
|
||||
await self.collection.create_index("file_type")
|
||||
await self.collection.create_index([("filename", "text")])
|
||||
await self.collection.create_index([("user_id", 1), ("created_at", -1)])
|
||||
|
||||
logger.info("Database indexes created")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create indexes: {e}")
|
||||
|
||||
async def create_file_metadata(self, metadata: Dict[str, Any]) -> str:
|
||||
"""Create new file metadata"""
|
||||
try:
|
||||
# Add timestamps
|
||||
metadata["created_at"] = datetime.now()
|
||||
metadata["updated_at"] = datetime.now()
|
||||
metadata["download_count"] = 0
|
||||
metadata["status"] = FileStatus.READY.value
|
||||
|
||||
# Generate unique ID if not provided
|
||||
if "id" not in metadata:
|
||||
metadata["id"] = str(uuid.uuid4())
|
||||
|
||||
# Insert document
|
||||
result = await self.collection.insert_one(metadata)
|
||||
|
||||
logger.info(f"Created metadata for file: {metadata['id']}")
|
||||
return metadata["id"]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create file metadata: {e}")
|
||||
raise
|
||||
|
||||
async def get_file_metadata(self, file_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get file metadata by ID"""
|
||||
try:
|
||||
metadata = await self.collection.find_one({"id": file_id})
|
||||
|
||||
if metadata:
|
||||
# Remove MongoDB's _id field
|
||||
metadata.pop("_id", None)
|
||||
|
||||
return metadata
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get file metadata: {e}")
|
||||
raise
|
||||
|
||||
async def update_file_metadata(self, file_id: str, updates: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Update file metadata"""
|
||||
try:
|
||||
# Add update timestamp
|
||||
updates["updated_at"] = datetime.now()
|
||||
|
||||
# Update document
|
||||
result = await self.collection.update_one(
|
||||
{"id": file_id},
|
||||
{"$set": updates}
|
||||
)
|
||||
|
||||
if result.modified_count == 0:
|
||||
raise Exception(f"File {file_id} not found")
|
||||
|
||||
# Return updated metadata
|
||||
return await self.get_file_metadata(file_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update file metadata: {e}")
|
||||
raise
|
||||
|
||||
async def delete_file_metadata(self, file_id: str) -> bool:
|
||||
"""Delete file metadata (soft delete)"""
|
||||
try:
|
||||
# Soft delete by marking as deleted
|
||||
updates = {
|
||||
"status": FileStatus.DELETED.value,
|
||||
"deleted_at": datetime.now(),
|
||||
"updated_at": datetime.now()
|
||||
}
|
||||
|
||||
result = await self.collection.update_one(
|
||||
{"id": file_id},
|
||||
{"$set": updates}
|
||||
)
|
||||
|
||||
return result.modified_count > 0
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete file metadata: {e}")
|
||||
raise
|
||||
|
||||
async def list_files(self, user_id: Optional[str] = None,
|
||||
bucket: Optional[str] = None,
|
||||
limit: int = 20,
|
||||
offset: int = 0,
|
||||
search: Optional[str] = None,
|
||||
file_type: Optional[str] = None,
|
||||
sort_by: str = "created_at",
|
||||
order: str = "desc") -> Dict[str, Any]:
|
||||
"""List files with filtering and pagination"""
|
||||
try:
|
||||
# Build query
|
||||
query = {"status": {"$ne": FileStatus.DELETED.value}}
|
||||
|
||||
if user_id:
|
||||
query["user_id"] = user_id
|
||||
|
||||
if bucket:
|
||||
query["bucket"] = bucket
|
||||
|
||||
if file_type:
|
||||
query["file_type"] = file_type
|
||||
|
||||
if search:
|
||||
query["$text"] = {"$search": search}
|
||||
|
||||
# Count total documents
|
||||
total = await self.collection.count_documents(query)
|
||||
|
||||
# Sort order
|
||||
sort_order = -1 if order == "desc" else 1
|
||||
|
||||
# Execute query with pagination
|
||||
cursor = self.collection.find(query)\
|
||||
.sort(sort_by, sort_order)\
|
||||
.skip(offset)\
|
||||
.limit(limit)
|
||||
|
||||
files = []
|
||||
async for doc in cursor:
|
||||
doc.pop("_id", None)
|
||||
files.append(doc)
|
||||
|
||||
return {
|
||||
"files": files,
|
||||
"total": total,
|
||||
"limit": limit,
|
||||
"offset": offset,
|
||||
"has_more": (offset + limit) < total
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list files: {e}")
|
||||
raise
|
||||
|
||||
async def increment_download_count(self, file_id: str):
|
||||
"""Increment download counter for a file"""
|
||||
try:
|
||||
await self.collection.update_one(
|
||||
{"id": file_id},
|
||||
{
|
||||
"$inc": {"download_count": 1},
|
||||
"$set": {"last_accessed": datetime.now()}
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to increment download count: {e}")
|
||||
|
||||
async def get_storage_stats(self) -> Dict[str, Any]:
|
||||
"""Get storage statistics"""
|
||||
try:
|
||||
# Aggregation pipeline for statistics
|
||||
pipeline = [
|
||||
{"$match": {"status": {"$ne": FileStatus.DELETED.value}}},
|
||||
{
|
||||
"$group": {
|
||||
"_id": None,
|
||||
"total_files": {"$sum": 1},
|
||||
"total_size": {"$sum": "$size"},
|
||||
"users": {"$addToSet": "$user_id"}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
cursor = self.collection.aggregate(pipeline)
|
||||
result = await cursor.to_list(length=1)
|
||||
|
||||
if result:
|
||||
stats = result[0]
|
||||
users_count = len(stats.get("users", []))
|
||||
else:
|
||||
stats = {"total_files": 0, "total_size": 0}
|
||||
users_count = 0
|
||||
|
||||
# Get file type distribution
|
||||
type_pipeline = [
|
||||
{"$match": {"status": {"$ne": FileStatus.DELETED.value}}},
|
||||
{
|
||||
"$group": {
|
||||
"_id": "$file_type",
|
||||
"count": {"$sum": 1}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
type_cursor = self.collection.aggregate(type_pipeline)
|
||||
type_results = await type_cursor.to_list(length=None)
|
||||
|
||||
file_types = {
|
||||
item["_id"]: item["count"]
|
||||
for item in type_results if item["_id"]
|
||||
}
|
||||
|
||||
return {
|
||||
"total_files": stats.get("total_files", 0),
|
||||
"total_size": stats.get("total_size", 0),
|
||||
"users_count": users_count,
|
||||
"file_types": file_types
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get storage stats: {e}")
|
||||
raise
|
||||
|
||||
async def find_duplicate_files(self, file_hash: str) -> List[Dict[str, Any]]:
|
||||
"""Find duplicate files by hash"""
|
||||
try:
|
||||
cursor = self.collection.find({
|
||||
"hash": file_hash,
|
||||
"status": {"$ne": FileStatus.DELETED.value}
|
||||
})
|
||||
|
||||
duplicates = []
|
||||
async for doc in cursor:
|
||||
doc.pop("_id", None)
|
||||
duplicates.append(doc)
|
||||
|
||||
return duplicates
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to find duplicate files: {e}")
|
||||
raise
|
||||
|
||||
async def get_user_storage_usage(self, user_id: str) -> Dict[str, Any]:
|
||||
"""Get storage usage for a specific user"""
|
||||
try:
|
||||
pipeline = [
|
||||
{
|
||||
"$match": {
|
||||
"user_id": user_id,
|
||||
"status": {"$ne": FileStatus.DELETED.value}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$group": {
|
||||
"_id": "$file_type",
|
||||
"count": {"$sum": 1},
|
||||
"size": {"$sum": "$size"}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
cursor = self.collection.aggregate(pipeline)
|
||||
results = await cursor.to_list(length=None)
|
||||
|
||||
total_size = sum(item["size"] for item in results)
|
||||
total_files = sum(item["count"] for item in results)
|
||||
|
||||
breakdown = {
|
||||
item["_id"]: {
|
||||
"count": item["count"],
|
||||
"size": item["size"]
|
||||
}
|
||||
for item in results if item["_id"]
|
||||
}
|
||||
|
||||
return {
|
||||
"user_id": user_id,
|
||||
"total_files": total_files,
|
||||
"total_size": total_size,
|
||||
"breakdown": breakdown
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get user storage usage: {e}")
|
||||
raise
|
||||
|
||||
async def close(self):
|
||||
"""Close MongoDB connection"""
|
||||
if self.client:
|
||||
self.client.close()
|
||||
self.is_connected = False
|
||||
logger.info("MongoDB connection closed")
|
||||
Reference in New Issue
Block a user