feat: integrate MinIO storage for image caching service

- Replace file system storage with MinIO object storage
- Add MinIO cache implementation with 3-level directory structure
- Support dynamic switching between MinIO and filesystem via config
- Fix metadata encoding issue for non-ASCII URLs
- Successfully tested with various image sources including Korean URLs

All image service features working:
- Image proxy and download
- 5 size variants (thumb, card, list, detail, hero)
- WebP format conversion
- Cache hit/miss detection
- Background size generation

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
jungwoo choi
2025-09-12 07:46:12 +09:00
parent 005088189f
commit 8866a90f65
4 changed files with 432 additions and 4 deletions

View File

@ -5,9 +5,14 @@ import mimetypes
from pathlib import Path
import hashlib
from ..core.cache import cache
from ..core.config import settings
# MinIO 사용 여부에 따라 적절한 캐시 모듈 선택
if settings.use_minio:
from ..core.minio_cache import cache
else:
from ..core.cache import cache
router = APIRouter()
@router.get("/image")
@ -113,7 +118,7 @@ async def get_stats():
"""캐시 통계 정보"""
cache_size = await cache.get_cache_size()
# 디렉토리 구조 통계 추가
# 디렉토리 구조 통계 추가 (MinIO 또는 파일시스템)
dir_stats = await cache.get_directory_stats()
return {

View File

@ -6,11 +6,19 @@ class Settings(BaseSettings):
app_name: str = "Image Proxy Service"
debug: bool = True
# 캐시 설정
# 캐시 설정 (MinIO 전환 시에도 로컬 임시 파일용)
cache_dir: Path = Path("/app/cache")
max_cache_size_gb: int = 10
cache_ttl_days: int = 30
# MinIO 설정
use_minio: bool = True # MinIO 사용 여부
minio_endpoint: str = "minio:9000"
minio_access_key: str = "minioadmin"
minio_secret_key: str = "minioadmin"
minio_bucket_name: str = "image-cache"
minio_secure: bool = False
# 이미지 설정
max_image_size_mb: int = 20
allowed_formats: list = ["jpg", "jpeg", "png", "gif", "webp", "svg"]

View File

@ -0,0 +1,414 @@
import hashlib
import os
from pathlib import Path
from datetime import datetime, timedelta
from typing import Optional, Tuple
import httpx
from PIL import Image
try:
from pillow_heif import register_heif_opener, register_avif_opener
register_heif_opener() # HEIF/HEIC 지원
register_avif_opener() # AVIF 지원
print("HEIF/AVIF support enabled successfully")
except ImportError:
print("Warning: pillow_heif not installed, HEIF/AVIF support disabled")
import io
import asyncio
import ssl
from minio import Minio
from minio.error import S3Error
import tempfile
from .config import settings
class MinIOImageCache:
def __init__(self):
# MinIO 클라이언트 초기화
self.client = Minio(
settings.minio_endpoint,
access_key=settings.minio_access_key,
secret_key=settings.minio_secret_key,
secure=settings.minio_secure
)
# 버킷 생성 (동기 호출)
self._ensure_bucket()
# 로컬 임시 디렉토리 (이미지 처리용)
self.temp_dir = Path(tempfile.gettempdir()) / "image_cache_temp"
self.temp_dir.mkdir(parents=True, exist_ok=True)
def _ensure_bucket(self):
"""버킷이 존재하는지 확인하고 없으면 생성"""
try:
if not self.client.bucket_exists(settings.minio_bucket_name):
self.client.make_bucket(settings.minio_bucket_name)
print(f"✅ Created MinIO bucket: {settings.minio_bucket_name}")
else:
print(f"✅ MinIO bucket exists: {settings.minio_bucket_name}")
except S3Error as e:
print(f"❌ Error creating bucket: {e}")
def _get_object_name(self, url: str, size: Optional[str] = None) -> str:
"""URL을 기반으로 MinIO 객체 이름 생성"""
url_hash = hashlib.md5(url.encode()).hexdigest()
# 3단계 디렉토리 구조 생성 (MinIO는 /를 디렉토리처럼 취급)
level1 = url_hash[:2]
level2 = url_hash[2:4]
level3 = url_hash[4:6]
# 크기별로 다른 파일명 사용
if size:
filename = f"{url_hash}_{size}"
else:
filename = url_hash
# 확장자 추출 (WebP로 저장되는 경우 .webp 사용)
if settings.convert_to_webp and size:
filename = f"{filename}.webp"
else:
ext = self._get_extension_from_url(url)
if ext:
filename = f"{filename}.{ext}"
# MinIO 객체 경로 생성
object_name = f"{level1}/{level2}/{level3}/{filename}"
return object_name
def _get_extension_from_url(self, url: str) -> Optional[str]:
"""URL에서 파일 확장자 추출"""
path = url.split('?')[0] # 쿼리 파라미터 제거
parts = path.split('.')
if len(parts) > 1:
ext = parts[-1].lower()
if ext in settings.allowed_formats:
return ext
return None
def _is_svg(self, data: bytes) -> bool:
"""SVG 파일인지 확인"""
if len(data) < 100:
return False
header = data[:1000].lower()
svg_signatures = [
b'<svg',
b'<?xml',
b'<!doctype svg'
]
for sig in svg_signatures:
if sig in header:
return True
return False
def _process_gif(self, gif_data: bytes, target_size: tuple) -> tuple[bytes, str]:
"""GIF 처리 - JPEG로 변환하여 안정적으로 처리"""
try:
img = Image.open(io.BytesIO(gif_data))
if img.mode != 'RGB':
if img.mode == 'P':
img = img.convert('RGBA')
if img.mode == 'RGBA':
background = Image.new('RGB', img.size, (255, 255, 255))
background.paste(img, mask=img.split()[3] if len(img.split()) == 4 else None)
img = background
elif img.mode != 'RGB':
img = img.convert('RGB')
# 리사이즈
img.thumbnail(target_size, Image.Resampling.LANCZOS)
# JPEG로 저장
output = io.BytesIO()
img.save(
output,
format='JPEG',
quality=settings.jpeg_quality,
optimize=True,
progressive=settings.progressive_jpeg
)
return output.getvalue(), 'image/jpeg'
except Exception as e:
print(f"GIF 처리 오류: {e}")
return gif_data, 'image/gif'
def resize_and_optimize_image(self, image_data: bytes, size: str) -> tuple[bytes, str]:
"""이미지 리사이징 및 최적화"""
try:
target_size = settings.thumbnail_sizes.get(size, settings.thumbnail_sizes["thumb"])
# 이미지 열기
img = Image.open(io.BytesIO(image_data))
# EXIF 회전 정보 처리
try:
from PIL import ImageOps
img = ImageOps.exif_transpose(img)
except:
pass
# 리사이즈 (원본 비율 유지)
img.thumbnail(target_size, Image.Resampling.LANCZOS)
# 출력 버퍼
output = io.BytesIO()
# WebP로 변환 설정이 활성화되어 있으면
if settings.convert_to_webp:
# RGBA를 RGB로 변환 (WebP는 투명도 지원하지만 일부 브라우저 호환성 문제)
if img.mode in ('RGBA', 'LA', 'P'):
# 투명 배경을 흰색으로
background = Image.new('RGB', img.size, (255, 255, 255))
if img.mode == 'P':
img = img.convert('RGBA')
background.paste(img, mask=img.split()[-1] if 'A' in img.mode else None)
img = background
elif img.mode != 'RGB':
img = img.convert('RGB')
# WebP로 저장
img.save(
output,
format='WEBP',
quality=settings.webp_quality,
lossless=settings.webp_lossless,
method=6 # 최고 압축
)
content_type = 'image/webp'
else:
# 원본 포맷 유지하면서 최적화
if img.format == 'PNG':
img.save(
output,
format='PNG',
compress_level=settings.png_compress_level,
optimize=settings.optimize_png
)
content_type = 'image/png'
else:
# JPEG로 변환
if img.mode != 'RGB':
img = img.convert('RGB')
img.save(
output,
format='JPEG',
quality=settings.jpeg_quality,
optimize=True,
progressive=settings.progressive_jpeg
)
content_type = 'image/jpeg'
return output.getvalue(), content_type
except Exception as e:
print(f"이미지 최적화 오류: {e}")
import traceback
traceback.print_exc()
return image_data, 'image/jpeg'
async def get(self, url: str, size: Optional[str] = None) -> Optional[bytes]:
"""MinIO에서 캐시된 이미지 가져오기"""
object_name = self._get_object_name(url, size)
try:
# MinIO에서 객체 가져오기
response = self.client.get_object(settings.minio_bucket_name, object_name)
data = response.read()
response.close()
response.release_conn()
print(f"✅ Cache HIT from MinIO: {object_name}")
return data
except S3Error as e:
if e.code == 'NoSuchKey':
print(f"📭 Cache MISS in MinIO: {object_name}")
return None
else:
print(f"❌ MinIO error: {e}")
return None
async def set(self, url: str, data: bytes, size: Optional[str] = None):
"""MinIO에 이미지 캐시 저장"""
object_name = self._get_object_name(url, size)
try:
# 바이트 데이터를 스트림으로 변환
data_stream = io.BytesIO(data)
data_length = len(data)
# content-type 결정
if url.lower().endswith('.svg') or self._is_svg(data):
content_type = 'image/svg+xml'
elif url.lower().endswith('.gif'):
content_type = 'image/gif'
elif settings.convert_to_webp and size:
content_type = 'image/webp'
else:
content_type = 'application/octet-stream'
# MinIO에 저장 (메타데이터는 ASCII만 지원하므로 URL 해시 사용)
self.client.put_object(
settings.minio_bucket_name,
object_name,
data_stream,
data_length,
content_type=content_type,
metadata={
'url_hash': hashlib.md5(url.encode()).hexdigest(),
'cached_at': datetime.utcnow().isoformat(),
'size_variant': size or 'original'
}
)
print(f"✅ Cached to MinIO: {object_name} ({data_length} bytes)")
except S3Error as e:
print(f"❌ Failed to cache to MinIO: {e}")
async def download_image(self, url: str) -> bytes:
"""외부 URL에서 이미지 다운로드"""
# SSL 검증 비활성화 (개발 환경용)
ssl_context = ssl.create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
async with httpx.AsyncClient(
timeout=settings.request_timeout,
verify=False,
follow_redirects=True
) as client:
headers = {
"User-Agent": settings.user_agent,
"Accept": "image/webp,image/apng,image/*,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Cache-Control": "no-cache",
"Referer": url.split('/')[0] + '//' + url.split('/')[2] if len(url.split('/')) > 2 else url
}
response = await client.get(url, headers=headers)
if response.status_code == 403:
headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
response = await client.get(url, headers=headers)
response.raise_for_status()
content_length = response.headers.get("content-length")
if content_length:
size_mb = int(content_length) / (1024 * 1024)
if size_mb > settings.max_image_size_mb:
raise ValueError(f"이미지 크기가 {settings.max_image_size_mb}MB를 초과합니다")
return response.content
async def get_cache_size(self) -> float:
"""MinIO 버킷 크기 조회 (GB)"""
try:
total_size = 0
objects = self.client.list_objects(settings.minio_bucket_name, recursive=True)
for obj in objects:
total_size += obj.size
return total_size / (1024 ** 3) # GB로 변환
except S3Error as e:
print(f"❌ Failed to get cache size: {e}")
return 0.0
async def get_directory_stats(self) -> dict:
"""MinIO 디렉토리 구조 통계"""
try:
total_files = 0
directories = set()
objects = self.client.list_objects(settings.minio_bucket_name, recursive=True)
for obj in objects:
total_files += 1
# 디렉토리 경로 추출
parts = obj.object_name.split('/')
if len(parts) > 1:
dir_path = '/'.join(parts[:-1])
directories.add(dir_path)
return {
"total_files": total_files,
"total_directories": len(directories),
"average_files_per_directory": total_files / max(len(directories), 1),
"bucket_name": settings.minio_bucket_name
}
except S3Error as e:
print(f"❌ Failed to get directory stats: {e}")
return {
"total_files": 0,
"total_directories": 0,
"average_files_per_directory": 0,
"bucket_name": settings.minio_bucket_name
}
async def cleanup_old_cache(self):
"""오래된 캐시 정리"""
try:
cutoff_date = datetime.utcnow() - timedelta(days=settings.cache_ttl_days)
deleted_count = 0
objects = self.client.list_objects(settings.minio_bucket_name, recursive=True)
for obj in objects:
# 객체의 마지막 수정 시간이 cutoff_date 이전이면 삭제
if obj.last_modified.replace(tzinfo=None) < cutoff_date:
self.client.remove_object(settings.minio_bucket_name, obj.object_name)
deleted_count += 1
print(f"🗑️ Deleted old cache: {obj.object_name}")
print(f"✅ Cleaned up {deleted_count} old cached files")
return deleted_count
except S3Error as e:
print(f"❌ Failed to cleanup cache: {e}")
return 0
async def trigger_background_generation(self, url: str):
"""백그라운드에서 다양한 크기 생성"""
asyncio.create_task(self._generate_all_sizes(url))
async def _generate_all_sizes(self, url: str):
"""모든 크기 버전 생성"""
try:
# 원본 이미지 다운로드
image_data = await self.download_image(url)
# SVG는 리사이징 불필요
if self._is_svg(image_data):
return
# 모든 크기 생성
for size_name in settings.thumbnail_sizes.keys():
# 이미 캐시되어 있는지 확인
existing = await self.get(url, size_name)
if not existing:
# 리사이징 및 최적화
if url.lower().endswith('.gif'):
resized_data, _ = self._process_gif(image_data, settings.thumbnail_sizes[size_name])
else:
resized_data, _ = self.resize_and_optimize_image(image_data, size_name)
# 캐시에 저장
await self.set(url, resized_data, size_name)
print(f"✅ Generated {size_name} version for {url}")
except Exception as e:
print(f"❌ Background generation failed for {url}: {e}")
# 싱글톤 인스턴스
cache = MinIOImageCache()

View File

@ -8,4 +8,5 @@ python-multipart==0.0.6
pydantic==2.5.3
pydantic-settings==2.1.0
motor==3.3.2
redis==5.0.1
redis==5.0.1
minio==7.2.3