Initial commit - cleaned repository

This commit is contained in:
jungwoo choi
2025-09-28 20:41:57 +09:00
commit e3c28f796a
188 changed files with 28102 additions and 0 deletions

View File

@ -0,0 +1,247 @@
"""
File Processor for handling file uploads and processing
"""
import hashlib
import mimetypes
from datetime import datetime
from typing import Dict, Any, Optional
import logging
import uuid
from fastapi import UploadFile
from models import FileType, FileStatus
logger = logging.getLogger(__name__)
class FileProcessor:
def __init__(self, minio_client, metadata_manager, thumbnail_generator):
self.minio_client = minio_client
self.metadata_manager = metadata_manager
self.thumbnail_generator = thumbnail_generator
def _determine_file_type(self, content_type: str) -> FileType:
"""Determine file type from content type"""
if content_type.startswith('image/'):
return FileType.IMAGE
elif content_type.startswith('video/'):
return FileType.VIDEO
elif content_type.startswith('audio/'):
return FileType.AUDIO
elif content_type in ['application/pdf', 'application/msword',
'application/vnd.openxmlformats-officedocument',
'text/plain', 'text/html', 'text/csv']:
return FileType.DOCUMENT
elif content_type in ['application/zip', 'application/x-rar-compressed',
'application/x-tar', 'application/gzip']:
return FileType.ARCHIVE
else:
return FileType.OTHER
def _calculate_file_hash(self, file_data: bytes) -> str:
"""Calculate SHA256 hash of file data"""
return hashlib.sha256(file_data).hexdigest()
async def process_upload(self, file: UploadFile, user_id: str,
bucket: str = "default",
public: bool = False,
generate_thumbnail: bool = True,
tags: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""Process file upload"""
try:
# Read file data
file_data = await file.read()
file_size = len(file_data)
# Get content type
content_type = file.content_type or mimetypes.guess_type(file.filename)[0] or 'application/octet-stream'
# Generate file ID and object name
file_id = str(uuid.uuid4())
timestamp = datetime.now().strftime('%Y%m%d')
file_extension = file.filename.split('.')[-1] if '.' in file.filename else ''
object_name = f"{timestamp}/{user_id}/{file_id}.{file_extension}" if file_extension else f"{timestamp}/{user_id}/{file_id}"
# Calculate file hash
file_hash = self._calculate_file_hash(file_data)
# Check for duplicates
duplicates = await self.metadata_manager.find_duplicate_files(file_hash)
if duplicates and not public: # Allow duplicates for public files
# Return existing file info
existing = duplicates[0]
logger.info(f"Duplicate file detected: {existing['id']}")
return {
"file_id": existing["id"],
"filename": existing["filename"],
"size": existing["size"],
"content_type": existing["content_type"],
"file_type": existing["file_type"],
"bucket": existing["bucket"],
"public": existing["public"],
"has_thumbnail": existing.get("has_thumbnail", False),
"thumbnail_url": existing.get("thumbnail_url"),
"created_at": existing["created_at"],
"duplicate": True
}
# Upload to MinIO
upload_result = await self.minio_client.upload_file(
bucket=bucket,
object_name=object_name,
file_data=file_data,
content_type=content_type,
metadata={
"user_id": user_id,
"original_name": file.filename,
"upload_date": datetime.now().isoformat()
}
)
# Determine file type
file_type = self._determine_file_type(content_type)
# Generate thumbnail if applicable
has_thumbnail = False
thumbnail_url = None
if generate_thumbnail and file_type == FileType.IMAGE:
thumbnail_data = await self.thumbnail_generator.generate_thumbnail(
file_data=file_data,
content_type=content_type
)
if thumbnail_data:
has_thumbnail = True
# Generate multiple sizes
await self.thumbnail_generator.generate_multiple_sizes(
file_data=file_data,
content_type=content_type,
file_id=file_id
)
if public:
thumbnail_url = await self.minio_client.generate_presigned_download_url(
bucket="thumbnails",
object_name=f"thumbnails/{file_id}_medium.jpg",
expires_in=86400 * 30 # 30 days
)
# Create metadata
metadata = {
"id": file_id,
"filename": file.filename,
"original_name": file.filename,
"size": file_size,
"content_type": content_type,
"file_type": file_type.value,
"bucket": bucket,
"object_name": object_name,
"user_id": user_id,
"hash": file_hash,
"public": public,
"has_thumbnail": has_thumbnail,
"thumbnail_url": thumbnail_url,
"tags": tags or {},
"metadata": {
"etag": upload_result.get("etag"),
"version_id": upload_result.get("version_id")
}
}
# Save metadata to database
await self.metadata_manager.create_file_metadata(metadata)
# Generate download URL if public
download_url = None
if public:
download_url = await self.minio_client.generate_presigned_download_url(
bucket=bucket,
object_name=object_name,
expires_in=86400 * 30 # 30 days
)
logger.info(f"File uploaded successfully: {file_id}")
return {
"file_id": file_id,
"filename": file.filename,
"size": file_size,
"content_type": content_type,
"file_type": file_type.value,
"bucket": bucket,
"public": public,
"has_thumbnail": has_thumbnail,
"thumbnail_url": thumbnail_url,
"download_url": download_url,
"created_at": datetime.now()
}
except Exception as e:
logger.error(f"File processing error: {e}")
raise
async def process_large_file(self, file: UploadFile, user_id: str,
bucket: str = "default",
chunk_size: int = 1024 * 1024 * 5) -> Dict[str, Any]:
"""Process large file upload in chunks"""
try:
file_id = str(uuid.uuid4())
timestamp = datetime.now().strftime('%Y%m%d')
file_extension = file.filename.split('.')[-1] if '.' in file.filename else ''
object_name = f"{timestamp}/{user_id}/{file_id}.{file_extension}"
# Initialize multipart upload
hasher = hashlib.sha256()
total_size = 0
# Process file in chunks
chunks = []
while True:
chunk = await file.read(chunk_size)
if not chunk:
break
chunks.append(chunk)
hasher.update(chunk)
total_size += len(chunk)
# Combine chunks and upload
file_data = b''.join(chunks)
file_hash = hasher.hexdigest()
# Upload to MinIO
content_type = file.content_type or 'application/octet-stream'
await self.minio_client.upload_file(
bucket=bucket,
object_name=object_name,
file_data=file_data,
content_type=content_type
)
# Create metadata
metadata = {
"id": file_id,
"filename": file.filename,
"original_name": file.filename,
"size": total_size,
"content_type": content_type,
"file_type": self._determine_file_type(content_type).value,
"bucket": bucket,
"object_name": object_name,
"user_id": user_id,
"hash": file_hash,
"public": False,
"has_thumbnail": False
}
await self.metadata_manager.create_file_metadata(metadata)
return {
"file_id": file_id,
"filename": file.filename,
"size": total_size,
"message": "Large file uploaded successfully"
}
except Exception as e:
logger.error(f"Large file processing error: {e}")
raise