From 8866a90f655754fbaafe4ad13a34a12de61a0cea Mon Sep 17 00:00:00 2001 From: jungwoo choi Date: Fri, 12 Sep 2025 07:46:12 +0900 Subject: [PATCH] feat: integrate MinIO storage for image caching service MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace file system storage with MinIO object storage - Add MinIO cache implementation with 3-level directory structure - Support dynamic switching between MinIO and filesystem via config - Fix metadata encoding issue for non-ASCII URLs - Successfully tested with various image sources including Korean URLs All image service features working: - Image proxy and download - 5 size variants (thumb, card, list, detail, hero) - WebP format conversion - Cache hit/miss detection - Background size generation ๐Ÿค– Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- services/images/backend/app/api/endpoints.py | 9 +- services/images/backend/app/core/config.py | 10 +- .../images/backend/app/core/minio_cache.py | 414 ++++++++++++++++++ services/images/backend/requirements.txt | 3 +- 4 files changed, 432 insertions(+), 4 deletions(-) create mode 100644 services/images/backend/app/core/minio_cache.py diff --git a/services/images/backend/app/api/endpoints.py b/services/images/backend/app/api/endpoints.py index 945d3cf..fb78cbe 100644 --- a/services/images/backend/app/api/endpoints.py +++ b/services/images/backend/app/api/endpoints.py @@ -5,9 +5,14 @@ import mimetypes from pathlib import Path import hashlib -from ..core.cache import cache from ..core.config import settings +# MinIO ์‚ฌ์šฉ ์—ฌ๋ถ€์— ๋”ฐ๋ผ ์ ์ ˆํ•œ ์บ์‹œ ๋ชจ๋“ˆ ์„ ํƒ +if settings.use_minio: + from ..core.minio_cache import cache +else: + from ..core.cache import cache + router = APIRouter() @router.get("/image") @@ -113,7 +118,7 @@ async def get_stats(): """์บ์‹œ ํ†ต๊ณ„ ์ •๋ณด""" cache_size = await cache.get_cache_size() - # ๋””๋ ‰ํ† ๋ฆฌ ๊ตฌ์กฐ ํ†ต๊ณ„ ์ถ”๊ฐ€ + # ๋””๋ ‰ํ† ๋ฆฌ ๊ตฌ์กฐ ํ†ต๊ณ„ ์ถ”๊ฐ€ (MinIO ๋˜๋Š” ํŒŒ์ผ์‹œ์Šคํ…œ) dir_stats = await cache.get_directory_stats() return { diff --git a/services/images/backend/app/core/config.py b/services/images/backend/app/core/config.py index 418b1eb..2aaf376 100644 --- a/services/images/backend/app/core/config.py +++ b/services/images/backend/app/core/config.py @@ -6,11 +6,19 @@ class Settings(BaseSettings): app_name: str = "Image Proxy Service" debug: bool = True - # ์บ์‹œ ์„ค์ • + # ์บ์‹œ ์„ค์ • (MinIO ์ „ํ™˜ ์‹œ์—๋„ ๋กœ์ปฌ ์ž„์‹œ ํŒŒ์ผ์šฉ) cache_dir: Path = Path("/app/cache") max_cache_size_gb: int = 10 cache_ttl_days: int = 30 + # MinIO ์„ค์ • + use_minio: bool = True # MinIO ์‚ฌ์šฉ ์—ฌ๋ถ€ + minio_endpoint: str = "minio:9000" + minio_access_key: str = "minioadmin" + minio_secret_key: str = "minioadmin" + minio_bucket_name: str = "image-cache" + minio_secure: bool = False + # ์ด๋ฏธ์ง€ ์„ค์ • max_image_size_mb: int = 20 allowed_formats: list = ["jpg", "jpeg", "png", "gif", "webp", "svg"] diff --git a/services/images/backend/app/core/minio_cache.py b/services/images/backend/app/core/minio_cache.py new file mode 100644 index 0000000..e927989 --- /dev/null +++ b/services/images/backend/app/core/minio_cache.py @@ -0,0 +1,414 @@ +import hashlib +import os +from pathlib import Path +from datetime import datetime, timedelta +from typing import Optional, Tuple +import httpx +from PIL import Image +try: + from pillow_heif import register_heif_opener, register_avif_opener + register_heif_opener() # HEIF/HEIC ์ง€์› + register_avif_opener() # AVIF ์ง€์› + print("HEIF/AVIF support enabled successfully") +except ImportError: + print("Warning: pillow_heif not installed, HEIF/AVIF support disabled") +import io +import asyncio +import ssl +from minio import Minio +from minio.error import S3Error +import tempfile + +from .config import settings + +class MinIOImageCache: + def __init__(self): + # MinIO ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™” + self.client = Minio( + settings.minio_endpoint, + access_key=settings.minio_access_key, + secret_key=settings.minio_secret_key, + secure=settings.minio_secure + ) + + # ๋ฒ„ํ‚ท ์ƒ์„ฑ (๋™๊ธฐ ํ˜ธ์ถœ) + self._ensure_bucket() + + # ๋กœ์ปฌ ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ (์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ์šฉ) + self.temp_dir = Path(tempfile.gettempdir()) / "image_cache_temp" + self.temp_dir.mkdir(parents=True, exist_ok=True) + + def _ensure_bucket(self): + """๋ฒ„ํ‚ท์ด ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธํ•˜๊ณ  ์—†์œผ๋ฉด ์ƒ์„ฑ""" + try: + if not self.client.bucket_exists(settings.minio_bucket_name): + self.client.make_bucket(settings.minio_bucket_name) + print(f"โœ… Created MinIO bucket: {settings.minio_bucket_name}") + else: + print(f"โœ… MinIO bucket exists: {settings.minio_bucket_name}") + except S3Error as e: + print(f"โŒ Error creating bucket: {e}") + + def _get_object_name(self, url: str, size: Optional[str] = None) -> str: + """URL์„ ๊ธฐ๋ฐ˜์œผ๋กœ MinIO ๊ฐ์ฒด ์ด๋ฆ„ ์ƒ์„ฑ""" + url_hash = hashlib.md5(url.encode()).hexdigest() + + # 3๋‹จ๊ณ„ ๋””๋ ‰ํ† ๋ฆฌ ๊ตฌ์กฐ ์ƒ์„ฑ (MinIO๋Š” /๋ฅผ ๋””๋ ‰ํ† ๋ฆฌ์ฒ˜๋Ÿผ ์ทจ๊ธ‰) + level1 = url_hash[:2] + level2 = url_hash[2:4] + level3 = url_hash[4:6] + + # ํฌ๊ธฐ๋ณ„๋กœ ๋‹ค๋ฅธ ํŒŒ์ผ๋ช… ์‚ฌ์šฉ + if size: + filename = f"{url_hash}_{size}" + else: + filename = url_hash + + # ํ™•์žฅ์ž ์ถ”์ถœ (WebP๋กœ ์ €์žฅ๋˜๋Š” ๊ฒฝ์šฐ .webp ์‚ฌ์šฉ) + if settings.convert_to_webp and size: + filename = f"{filename}.webp" + else: + ext = self._get_extension_from_url(url) + if ext: + filename = f"{filename}.{ext}" + + # MinIO ๊ฐ์ฒด ๊ฒฝ๋กœ ์ƒ์„ฑ + object_name = f"{level1}/{level2}/{level3}/{filename}" + return object_name + + def _get_extension_from_url(self, url: str) -> Optional[str]: + """URL์—์„œ ํŒŒ์ผ ํ™•์žฅ์ž ์ถ”์ถœ""" + path = url.split('?')[0] # ์ฟผ๋ฆฌ ํŒŒ๋ผ๋ฏธํ„ฐ ์ œ๊ฑฐ + parts = path.split('.') + if len(parts) > 1: + ext = parts[-1].lower() + if ext in settings.allowed_formats: + return ext + return None + + def _is_svg(self, data: bytes) -> bool: + """SVG ํŒŒ์ผ์ธ์ง€ ํ™•์ธ""" + if len(data) < 100: + return False + + header = data[:1000].lower() + svg_signatures = [ + b' tuple[bytes, str]: + """GIF ์ฒ˜๋ฆฌ - JPEG๋กœ ๋ณ€ํ™˜ํ•˜์—ฌ ์•ˆ์ •์ ์œผ๋กœ ์ฒ˜๋ฆฌ""" + try: + img = Image.open(io.BytesIO(gif_data)) + + if img.mode != 'RGB': + if img.mode == 'P': + img = img.convert('RGBA') + if img.mode == 'RGBA': + background = Image.new('RGB', img.size, (255, 255, 255)) + background.paste(img, mask=img.split()[3] if len(img.split()) == 4 else None) + img = background + elif img.mode != 'RGB': + img = img.convert('RGB') + + # ๋ฆฌ์‚ฌ์ด์ฆˆ + img.thumbnail(target_size, Image.Resampling.LANCZOS) + + # JPEG๋กœ ์ €์žฅ + output = io.BytesIO() + img.save( + output, + format='JPEG', + quality=settings.jpeg_quality, + optimize=True, + progressive=settings.progressive_jpeg + ) + + return output.getvalue(), 'image/jpeg' + + except Exception as e: + print(f"GIF ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}") + return gif_data, 'image/gif' + + def resize_and_optimize_image(self, image_data: bytes, size: str) -> tuple[bytes, str]: + """์ด๋ฏธ์ง€ ๋ฆฌ์‚ฌ์ด์ง• ๋ฐ ์ตœ์ ํ™”""" + try: + target_size = settings.thumbnail_sizes.get(size, settings.thumbnail_sizes["thumb"]) + + # ์ด๋ฏธ์ง€ ์—ด๊ธฐ + img = Image.open(io.BytesIO(image_data)) + + # EXIF ํšŒ์ „ ์ •๋ณด ์ฒ˜๋ฆฌ + try: + from PIL import ImageOps + img = ImageOps.exif_transpose(img) + except: + pass + + # ๋ฆฌ์‚ฌ์ด์ฆˆ (์›๋ณธ ๋น„์œจ ์œ ์ง€) + img.thumbnail(target_size, Image.Resampling.LANCZOS) + + # ์ถœ๋ ฅ ๋ฒ„ํผ + output = io.BytesIO() + + # WebP๋กœ ๋ณ€ํ™˜ ์„ค์ •์ด ํ™œ์„ฑํ™”๋˜์–ด ์žˆ์œผ๋ฉด + if settings.convert_to_webp: + # RGBA๋ฅผ RGB๋กœ ๋ณ€ํ™˜ (WebP๋Š” ํˆฌ๋ช…๋„ ์ง€์›ํ•˜์ง€๋งŒ ์ผ๋ถ€ ๋ธŒ๋ผ์šฐ์ € ํ˜ธํ™˜์„ฑ ๋ฌธ์ œ) + if img.mode in ('RGBA', 'LA', 'P'): + # ํˆฌ๋ช… ๋ฐฐ๊ฒฝ์„ ํฐ์ƒ‰์œผ๋กœ + background = Image.new('RGB', img.size, (255, 255, 255)) + if img.mode == 'P': + img = img.convert('RGBA') + background.paste(img, mask=img.split()[-1] if 'A' in img.mode else None) + img = background + elif img.mode != 'RGB': + img = img.convert('RGB') + + # WebP๋กœ ์ €์žฅ + img.save( + output, + format='WEBP', + quality=settings.webp_quality, + lossless=settings.webp_lossless, + method=6 # ์ตœ๊ณ  ์••์ถ• + ) + content_type = 'image/webp' + else: + # ์›๋ณธ ํฌ๋งท ์œ ์ง€ํ•˜๋ฉด์„œ ์ตœ์ ํ™” + if img.format == 'PNG': + img.save( + output, + format='PNG', + compress_level=settings.png_compress_level, + optimize=settings.optimize_png + ) + content_type = 'image/png' + else: + # JPEG๋กœ ๋ณ€ํ™˜ + if img.mode != 'RGB': + img = img.convert('RGB') + img.save( + output, + format='JPEG', + quality=settings.jpeg_quality, + optimize=True, + progressive=settings.progressive_jpeg + ) + content_type = 'image/jpeg' + + return output.getvalue(), content_type + + except Exception as e: + print(f"์ด๋ฏธ์ง€ ์ตœ์ ํ™” ์˜ค๋ฅ˜: {e}") + import traceback + traceback.print_exc() + return image_data, 'image/jpeg' + + async def get(self, url: str, size: Optional[str] = None) -> Optional[bytes]: + """MinIO์—์„œ ์บ์‹œ๋œ ์ด๋ฏธ์ง€ ๊ฐ€์ ธ์˜ค๊ธฐ""" + object_name = self._get_object_name(url, size) + + try: + # MinIO์—์„œ ๊ฐ์ฒด ๊ฐ€์ ธ์˜ค๊ธฐ + response = self.client.get_object(settings.minio_bucket_name, object_name) + data = response.read() + response.close() + response.release_conn() + + print(f"โœ… Cache HIT from MinIO: {object_name}") + return data + + except S3Error as e: + if e.code == 'NoSuchKey': + print(f"๐Ÿ“ญ Cache MISS in MinIO: {object_name}") + return None + else: + print(f"โŒ MinIO error: {e}") + return None + + async def set(self, url: str, data: bytes, size: Optional[str] = None): + """MinIO์— ์ด๋ฏธ์ง€ ์บ์‹œ ์ €์žฅ""" + object_name = self._get_object_name(url, size) + + try: + # ๋ฐ”์ดํŠธ ๋ฐ์ดํ„ฐ๋ฅผ ์ŠคํŠธ๋ฆผ์œผ๋กœ ๋ณ€ํ™˜ + data_stream = io.BytesIO(data) + data_length = len(data) + + # content-type ๊ฒฐ์ • + if url.lower().endswith('.svg') or self._is_svg(data): + content_type = 'image/svg+xml' + elif url.lower().endswith('.gif'): + content_type = 'image/gif' + elif settings.convert_to_webp and size: + content_type = 'image/webp' + else: + content_type = 'application/octet-stream' + + # MinIO์— ์ €์žฅ (๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋Š” ASCII๋งŒ ์ง€์›ํ•˜๋ฏ€๋กœ URL ํ•ด์‹œ ์‚ฌ์šฉ) + self.client.put_object( + settings.minio_bucket_name, + object_name, + data_stream, + data_length, + content_type=content_type, + metadata={ + 'url_hash': hashlib.md5(url.encode()).hexdigest(), + 'cached_at': datetime.utcnow().isoformat(), + 'size_variant': size or 'original' + } + ) + + print(f"โœ… Cached to MinIO: {object_name} ({data_length} bytes)") + + except S3Error as e: + print(f"โŒ Failed to cache to MinIO: {e}") + + async def download_image(self, url: str) -> bytes: + """์™ธ๋ถ€ URL์—์„œ ์ด๋ฏธ์ง€ ๋‹ค์šด๋กœ๋“œ""" + # SSL ๊ฒ€์ฆ ๋น„ํ™œ์„ฑํ™” (๊ฐœ๋ฐœ ํ™˜๊ฒฝ์šฉ) + ssl_context = ssl.create_default_context() + ssl_context.check_hostname = False + ssl_context.verify_mode = ssl.CERT_NONE + + async with httpx.AsyncClient( + timeout=settings.request_timeout, + verify=False, + follow_redirects=True + ) as client: + headers = { + "User-Agent": settings.user_agent, + "Accept": "image/webp,image/apng,image/*,*/*;q=0.8", + "Accept-Encoding": "gzip, deflate, br", + "Cache-Control": "no-cache", + "Referer": url.split('/')[0] + '//' + url.split('/')[2] if len(url.split('/')) > 2 else url + } + + response = await client.get(url, headers=headers) + + if response.status_code == 403: + headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36" + response = await client.get(url, headers=headers) + + response.raise_for_status() + + content_length = response.headers.get("content-length") + if content_length: + size_mb = int(content_length) / (1024 * 1024) + if size_mb > settings.max_image_size_mb: + raise ValueError(f"์ด๋ฏธ์ง€ ํฌ๊ธฐ๊ฐ€ {settings.max_image_size_mb}MB๋ฅผ ์ดˆ๊ณผํ•ฉ๋‹ˆ๋‹ค") + + return response.content + + async def get_cache_size(self) -> float: + """MinIO ๋ฒ„ํ‚ท ํฌ๊ธฐ ์กฐํšŒ (GB)""" + try: + total_size = 0 + objects = self.client.list_objects(settings.minio_bucket_name, recursive=True) + + for obj in objects: + total_size += obj.size + + return total_size / (1024 ** 3) # GB๋กœ ๋ณ€ํ™˜ + + except S3Error as e: + print(f"โŒ Failed to get cache size: {e}") + return 0.0 + + async def get_directory_stats(self) -> dict: + """MinIO ๋””๋ ‰ํ† ๋ฆฌ ๊ตฌ์กฐ ํ†ต๊ณ„""" + try: + total_files = 0 + directories = set() + + objects = self.client.list_objects(settings.minio_bucket_name, recursive=True) + + for obj in objects: + total_files += 1 + # ๋””๋ ‰ํ† ๋ฆฌ ๊ฒฝ๋กœ ์ถ”์ถœ + parts = obj.object_name.split('/') + if len(parts) > 1: + dir_path = '/'.join(parts[:-1]) + directories.add(dir_path) + + return { + "total_files": total_files, + "total_directories": len(directories), + "average_files_per_directory": total_files / max(len(directories), 1), + "bucket_name": settings.minio_bucket_name + } + + except S3Error as e: + print(f"โŒ Failed to get directory stats: {e}") + return { + "total_files": 0, + "total_directories": 0, + "average_files_per_directory": 0, + "bucket_name": settings.minio_bucket_name + } + + async def cleanup_old_cache(self): + """์˜ค๋ž˜๋œ ์บ์‹œ ์ •๋ฆฌ""" + try: + cutoff_date = datetime.utcnow() - timedelta(days=settings.cache_ttl_days) + deleted_count = 0 + + objects = self.client.list_objects(settings.minio_bucket_name, recursive=True) + + for obj in objects: + # ๊ฐ์ฒด์˜ ๋งˆ์ง€๋ง‰ ์ˆ˜์ • ์‹œ๊ฐ„์ด cutoff_date ์ด์ „์ด๋ฉด ์‚ญ์ œ + if obj.last_modified.replace(tzinfo=None) < cutoff_date: + self.client.remove_object(settings.minio_bucket_name, obj.object_name) + deleted_count += 1 + print(f"๐Ÿ—‘๏ธ Deleted old cache: {obj.object_name}") + + print(f"โœ… Cleaned up {deleted_count} old cached files") + return deleted_count + + except S3Error as e: + print(f"โŒ Failed to cleanup cache: {e}") + return 0 + + async def trigger_background_generation(self, url: str): + """๋ฐฑ๊ทธ๋ผ์šด๋“œ์—์„œ ๋‹ค์–‘ํ•œ ํฌ๊ธฐ ์ƒ์„ฑ""" + asyncio.create_task(self._generate_all_sizes(url)) + + async def _generate_all_sizes(self, url: str): + """๋ชจ๋“  ํฌ๊ธฐ ๋ฒ„์ „ ์ƒ์„ฑ""" + try: + # ์›๋ณธ ์ด๋ฏธ์ง€ ๋‹ค์šด๋กœ๋“œ + image_data = await self.download_image(url) + + # SVG๋Š” ๋ฆฌ์‚ฌ์ด์ง• ๋ถˆํ•„์š” + if self._is_svg(image_data): + return + + # ๋ชจ๋“  ํฌ๊ธฐ ์ƒ์„ฑ + for size_name in settings.thumbnail_sizes.keys(): + # ์ด๋ฏธ ์บ์‹œ๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธ + existing = await self.get(url, size_name) + if not existing: + # ๋ฆฌ์‚ฌ์ด์ง• ๋ฐ ์ตœ์ ํ™” + if url.lower().endswith('.gif'): + resized_data, _ = self._process_gif(image_data, settings.thumbnail_sizes[size_name]) + else: + resized_data, _ = self.resize_and_optimize_image(image_data, size_name) + + # ์บ์‹œ์— ์ €์žฅ + await self.set(url, resized_data, size_name) + + print(f"โœ… Generated {size_name} version for {url}") + + except Exception as e: + print(f"โŒ Background generation failed for {url}: {e}") + +# ์‹ฑ๊ธ€ํ†ค ์ธ์Šคํ„ด์Šค +cache = MinIOImageCache() \ No newline at end of file diff --git a/services/images/backend/requirements.txt b/services/images/backend/requirements.txt index 98c0b9b..f7e6e16 100644 --- a/services/images/backend/requirements.txt +++ b/services/images/backend/requirements.txt @@ -8,4 +8,5 @@ python-multipart==0.0.6 pydantic==2.5.3 pydantic-settings==2.1.0 motor==3.3.2 -redis==5.0.1 \ No newline at end of file +redis==5.0.1 +minio==7.2.3 \ No newline at end of file