Files
site11/services/files/backend/main.py
jungwoo choi 3c485e05c9 feat: Implement Step 12 - File System with MinIO S3 Storage
Completed File Management Service with S3-compatible object storage:

Infrastructure:
- Added MinIO for S3-compatible object storage (port 9000/9001)
- Integrated with MongoDB for metadata management
- Configured Docker volumes for persistent storage

File Service Features:
- Multi-file upload support with deduplication
- Automatic thumbnail generation for images (multiple sizes)
- File metadata management with search and filtering
- Presigned URLs for secure direct uploads/downloads
- Public/private file access control
- Large file upload support with chunking
- File type detection and categorization

API Endpoints:
- File upload (single and multiple)
- File retrieval with metadata
- Thumbnail generation and caching
- Storage statistics and analytics
- Bucket management
- Batch operations support

Technical Improvements:
- Fixed Pydantic v2.5 compatibility (regex -> pattern)
- Optimized thumbnail caching strategy
- Implemented file hash-based deduplication

Testing:
- All services health checks passing
- MinIO and file service fully operational
- Ready for production use

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-11 19:10:37 +09:00

541 lines
17 KiB
Python

"""
File Management Service - S3-compatible Object Storage with MinIO
"""
from fastapi import FastAPI, File, UploadFile, HTTPException, Depends, Query, Form
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse, FileResponse
import uvicorn
from datetime import datetime, timedelta
from typing import Optional, List, Dict, Any
import asyncio
import os
import hashlib
import magic
import io
from contextlib import asynccontextmanager
import logging
from pathlib import Path
import json
# Import custom modules
from models import FileMetadata, FileUploadResponse, FileListResponse, StorageStats
from minio_client import MinIOManager
from thumbnail_generator import ThumbnailGenerator
from metadata_manager import MetadataManager
from file_processor import FileProcessor
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Global instances
minio_manager = None
thumbnail_generator = None
metadata_manager = None
file_processor = None
@asynccontextmanager
async def lifespan(app: FastAPI):
# Startup
global minio_manager, thumbnail_generator, metadata_manager, file_processor
try:
# Initialize MinIO client
minio_manager = MinIOManager(
endpoint=os.getenv("MINIO_ENDPOINT", "minio:9000"),
access_key=os.getenv("MINIO_ACCESS_KEY", "minioadmin"),
secret_key=os.getenv("MINIO_SECRET_KEY", "minioadmin"),
secure=os.getenv("MINIO_SECURE", "false").lower() == "true"
)
await minio_manager.initialize()
logger.info("MinIO client initialized")
# Initialize Metadata Manager (MongoDB)
metadata_manager = MetadataManager(
mongodb_url=os.getenv("MONGODB_URL", "mongodb://mongodb:27017"),
database=os.getenv("FILES_DB_NAME", "files_db")
)
await metadata_manager.connect()
logger.info("Metadata manager connected to MongoDB")
# Initialize Thumbnail Generator
thumbnail_generator = ThumbnailGenerator(
minio_client=minio_manager,
cache_dir="/tmp/thumbnails"
)
logger.info("Thumbnail generator initialized")
# Initialize File Processor
file_processor = FileProcessor(
minio_client=minio_manager,
metadata_manager=metadata_manager,
thumbnail_generator=thumbnail_generator
)
logger.info("File processor initialized")
except Exception as e:
logger.error(f"Failed to start File service: {e}")
raise
yield
# Shutdown
if metadata_manager:
await metadata_manager.close()
logger.info("File service shutdown complete")
app = FastAPI(
title="File Management Service",
description="S3-compatible object storage with MinIO",
version="1.0.0",
lifespan=lifespan
)
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
async def root():
return {
"service": "File Management Service",
"status": "running",
"timestamp": datetime.now().isoformat()
}
@app.get("/health")
async def health_check():
return {
"status": "healthy",
"service": "files",
"components": {
"minio": "connected" if minio_manager and minio_manager.is_connected else "disconnected",
"mongodb": "connected" if metadata_manager and metadata_manager.is_connected else "disconnected",
"thumbnail_generator": "ready" if thumbnail_generator else "not_initialized"
},
"timestamp": datetime.now().isoformat()
}
# File Upload Endpoints
@app.post("/api/files/upload")
async def upload_file(
file: UploadFile = File(...),
user_id: str = Form(...),
bucket: str = Form("default"),
public: bool = Form(False),
generate_thumbnail: bool = Form(True),
tags: Optional[str] = Form(None)
):
"""Upload a file to object storage"""
try:
# Validate file
if not file.filename:
raise HTTPException(status_code=400, detail="No file provided")
# Process file upload
result = await file_processor.process_upload(
file=file,
user_id=user_id,
bucket=bucket,
public=public,
generate_thumbnail=generate_thumbnail,
tags=json.loads(tags) if tags else {}
)
return FileUploadResponse(**result)
except Exception as e:
logger.error(f"File upload error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/files/upload-multiple")
async def upload_multiple_files(
files: List[UploadFile] = File(...),
user_id: str = Form(...),
bucket: str = Form("default"),
public: bool = Form(False)
):
"""Upload multiple files"""
try:
results = []
for file in files:
result = await file_processor.process_upload(
file=file,
user_id=user_id,
bucket=bucket,
public=public,
generate_thumbnail=True
)
results.append(result)
return {
"uploaded": len(results),
"files": results
}
except Exception as e:
logger.error(f"Multiple file upload error: {e}")
raise HTTPException(status_code=500, detail=str(e))
# File Retrieval Endpoints
@app.get("/api/files/{file_id}")
async def get_file(file_id: str):
"""Get file by ID"""
try:
# Get metadata
metadata = await metadata_manager.get_file_metadata(file_id)
if not metadata:
raise HTTPException(status_code=404, detail="File not found")
# Get file from MinIO
file_stream = await minio_manager.get_file(
bucket=metadata["bucket"],
object_name=metadata["object_name"]
)
return StreamingResponse(
file_stream,
media_type=metadata.get("content_type", "application/octet-stream"),
headers={
"Content-Disposition": f'attachment; filename="{metadata["filename"]}"'
}
)
except HTTPException:
raise
except Exception as e:
logger.error(f"File retrieval error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/files/{file_id}/metadata")
async def get_file_metadata(file_id: str):
"""Get file metadata"""
try:
metadata = await metadata_manager.get_file_metadata(file_id)
if not metadata:
raise HTTPException(status_code=404, detail="File not found")
return FileMetadata(**metadata)
except HTTPException:
raise
except Exception as e:
logger.error(f"Metadata retrieval error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/files/{file_id}/thumbnail")
async def get_thumbnail(
file_id: str,
width: int = Query(200, ge=50, le=1000),
height: int = Query(200, ge=50, le=1000)
):
"""Get file thumbnail"""
try:
# Get metadata
metadata = await metadata_manager.get_file_metadata(file_id)
if not metadata:
raise HTTPException(status_code=404, detail="File not found")
# Check if file has thumbnail
if not metadata.get("has_thumbnail"):
raise HTTPException(status_code=404, detail="No thumbnail available")
# Get or generate thumbnail
thumbnail = await thumbnail_generator.get_thumbnail(
file_id=file_id,
bucket=metadata["bucket"],
object_name=metadata["object_name"],
width=width,
height=height
)
return StreamingResponse(
io.BytesIO(thumbnail),
media_type="image/jpeg"
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Thumbnail retrieval error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/files/{file_id}/download")
async def download_file(file_id: str):
"""Download file with proper headers"""
try:
# Get metadata
metadata = await metadata_manager.get_file_metadata(file_id)
if not metadata:
raise HTTPException(status_code=404, detail="File not found")
# Update download count
await metadata_manager.increment_download_count(file_id)
# Get file from MinIO
file_stream = await minio_manager.get_file(
bucket=metadata["bucket"],
object_name=metadata["object_name"]
)
return StreamingResponse(
file_stream,
media_type=metadata.get("content_type", "application/octet-stream"),
headers={
"Content-Disposition": f'attachment; filename="{metadata["filename"]}"',
"Content-Length": str(metadata["size"])
}
)
except HTTPException:
raise
except Exception as e:
logger.error(f"File download error: {e}")
raise HTTPException(status_code=500, detail=str(e))
# File Management Endpoints
@app.delete("/api/files/{file_id}")
async def delete_file(file_id: str, user_id: str):
"""Delete a file"""
try:
# Get metadata
metadata = await metadata_manager.get_file_metadata(file_id)
if not metadata:
raise HTTPException(status_code=404, detail="File not found")
# Check ownership
if metadata["user_id"] != user_id:
raise HTTPException(status_code=403, detail="Permission denied")
# Delete from MinIO
await minio_manager.delete_file(
bucket=metadata["bucket"],
object_name=metadata["object_name"]
)
# Delete thumbnail if exists
if metadata.get("has_thumbnail"):
await thumbnail_generator.delete_thumbnail(file_id)
# Delete metadata
await metadata_manager.delete_file_metadata(file_id)
return {"status": "deleted", "file_id": file_id}
except HTTPException:
raise
except Exception as e:
logger.error(f"File deletion error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.patch("/api/files/{file_id}")
async def update_file_metadata(
file_id: str,
user_id: str,
updates: Dict[str, Any]
):
"""Update file metadata"""
try:
# Get existing metadata
metadata = await metadata_manager.get_file_metadata(file_id)
if not metadata:
raise HTTPException(status_code=404, detail="File not found")
# Check ownership
if metadata["user_id"] != user_id:
raise HTTPException(status_code=403, detail="Permission denied")
# Update metadata
updated = await metadata_manager.update_file_metadata(file_id, updates)
return {"status": "updated", "file_id": file_id, "metadata": updated}
except HTTPException:
raise
except Exception as e:
logger.error(f"Metadata update error: {e}")
raise HTTPException(status_code=500, detail=str(e))
# File Listing Endpoints
@app.get("/api/files")
async def list_files(
user_id: Optional[str] = None,
bucket: str = Query("default"),
limit: int = Query(20, le=100),
offset: int = Query(0),
search: Optional[str] = None,
file_type: Optional[str] = None,
sort_by: str = Query("created_at", pattern="^(created_at|filename|size)$"),
order: str = Query("desc", pattern="^(asc|desc)$")
):
"""List files with filtering and pagination"""
try:
files = await metadata_manager.list_files(
user_id=user_id,
bucket=bucket,
limit=limit,
offset=offset,
search=search,
file_type=file_type,
sort_by=sort_by,
order=order
)
return FileListResponse(**files)
except Exception as e:
logger.error(f"File listing error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/files/user/{user_id}")
async def get_user_files(
user_id: str,
limit: int = Query(20, le=100),
offset: int = Query(0)
):
"""Get all files for a specific user"""
try:
files = await metadata_manager.list_files(
user_id=user_id,
limit=limit,
offset=offset
)
return FileListResponse(**files)
except Exception as e:
logger.error(f"User files listing error: {e}")
raise HTTPException(status_code=500, detail=str(e))
# Storage Management Endpoints
@app.get("/api/storage/stats")
async def get_storage_stats():
"""Get storage statistics"""
try:
stats = await minio_manager.get_storage_stats()
db_stats = await metadata_manager.get_storage_stats()
return StorageStats(
total_files=db_stats["total_files"],
total_size=db_stats["total_size"],
buckets=stats["buckets"],
users_count=db_stats["users_count"],
file_types=db_stats["file_types"]
)
except Exception as e:
logger.error(f"Storage stats error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/storage/buckets")
async def create_bucket(bucket_name: str, public: bool = False):
"""Create a new storage bucket"""
try:
await minio_manager.create_bucket(bucket_name, public=public)
return {"status": "created", "bucket": bucket_name}
except Exception as e:
logger.error(f"Bucket creation error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/storage/buckets")
async def list_buckets():
"""List all storage buckets"""
try:
buckets = await minio_manager.list_buckets()
return {"buckets": buckets}
except Exception as e:
logger.error(f"Bucket listing error: {e}")
raise HTTPException(status_code=500, detail=str(e))
# Presigned URL Endpoints
@app.post("/api/files/presigned-upload")
async def generate_presigned_upload_url(
filename: str,
content_type: str,
bucket: str = "default",
expires_in: int = Query(3600, ge=60, le=86400)
):
"""Generate presigned URL for direct upload to MinIO"""
try:
url = await minio_manager.generate_presigned_upload_url(
bucket=bucket,
object_name=f"{datetime.now().strftime('%Y%m%d')}/{filename}",
expires_in=expires_in
)
return {
"upload_url": url,
"expires_in": expires_in,
"method": "PUT"
}
except Exception as e:
logger.error(f"Presigned URL generation error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/files/{file_id}/share")
async def generate_share_link(
file_id: str,
expires_in: int = Query(86400, ge=60, le=604800) # 1 day default, max 7 days
):
"""Generate a shareable link for a file"""
try:
# Get metadata
metadata = await metadata_manager.get_file_metadata(file_id)
if not metadata:
raise HTTPException(status_code=404, detail="File not found")
# Generate presigned URL
url = await minio_manager.generate_presigned_download_url(
bucket=metadata["bucket"],
object_name=metadata["object_name"],
expires_in=expires_in
)
return {
"share_url": url,
"expires_in": expires_in,
"file_id": file_id,
"filename": metadata["filename"]
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Share link generation error: {e}")
raise HTTPException(status_code=500, detail=str(e))
# Batch Operations
@app.post("/api/files/batch-delete")
async def batch_delete_files(file_ids: List[str], user_id: str):
"""Delete multiple files at once"""
try:
deleted = []
errors = []
for file_id in file_ids:
try:
# Get metadata
metadata = await metadata_manager.get_file_metadata(file_id)
if metadata and metadata["user_id"] == user_id:
# Delete from MinIO
await minio_manager.delete_file(
bucket=metadata["bucket"],
object_name=metadata["object_name"]
)
# Delete metadata
await metadata_manager.delete_file_metadata(file_id)
deleted.append(file_id)
else:
errors.append({"file_id": file_id, "error": "Not found or permission denied"})
except Exception as e:
errors.append({"file_id": file_id, "error": str(e)})
return {
"deleted": deleted,
"errors": errors,
"total_deleted": len(deleted)
}
except Exception as e:
logger.error(f"Batch delete error: {e}")
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
uvicorn.run(
"main:app",
host="0.0.0.0",
port=8000,
reload=True
)