Files
site11/services/pipeline/image-generator/image_generator.py
jungwoo choi 070032006e feat: Implement async queue-based news pipeline with microservices
Major architectural transformation from synchronous to asynchronous processing:

## Pipeline Services (8 microservices)
- pipeline-scheduler: APScheduler for 30-minute periodic job triggers
- pipeline-rss-collector: RSS feed collection with deduplication (7-day TTL)
- pipeline-google-search: Content enrichment via Google Search API
- pipeline-ai-summarizer: AI summarization using Claude API (claude-sonnet-4-20250514)
- pipeline-translator: Translation using DeepL Pro API
- pipeline-image-generator: Image generation with Replicate API (Stable Diffusion)
- pipeline-article-assembly: Final article assembly and MongoDB storage
- pipeline-monitor: Real-time monitoring dashboard (port 8100)

## Key Features
- Redis-based job queue with deduplication
- Asynchronous processing with Python asyncio
- Shared models and queue manager for inter-service communication
- Docker containerization for all services
- Container names standardized with site11_ prefix

## Removed Services
- Moved to backup: google-search, rss-feed, news-aggregator, ai-writer

## Configuration
- DeepL Pro API: 3abbc796-2515-44a8-972d-22dcf27ab54a
- Claude Model: claude-sonnet-4-20250514
- Redis Queue TTL: 7 days for deduplication

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-13 19:22:14 +09:00

225 lines
9.4 KiB
Python

"""
Image Generation Service
Replicate API를 사용한 이미지 생성 서비스
"""
import asyncio
import logging
import os
import sys
import base64
from typing import List, Dict, Any
import httpx
from io import BytesIO
# Import from shared module
from shared.models import PipelineJob, TranslatedItem, GeneratedImageItem
from shared.queue_manager import QueueManager
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class ImageGeneratorWorker:
def __init__(self):
self.queue_manager = QueueManager(
redis_url=os.getenv("REDIS_URL", "redis://redis:6379")
)
self.replicate_api_key = os.getenv("REPLICATE_API_KEY")
self.replicate_api_url = "https://api.replicate.com/v1/predictions"
# Stable Diffusion 모델 사용
self.model_version = "stability-ai/sdxl:39ed52f2a78e934b3ba6e2a89f5b1c712de7dfea535525255b1aa35c5565e08b"
async def start(self):
"""워커 시작"""
logger.info("Starting Image Generator Worker")
# Redis 연결
await self.queue_manager.connect()
# API 키 확인
if not self.replicate_api_key:
logger.warning("Replicate API key not configured - using placeholder images")
# 메인 처리 루프
while True:
try:
# 큐에서 작업 가져오기
job = await self.queue_manager.dequeue('image_generation', timeout=5)
if job:
await self.process_job(job)
except Exception as e:
logger.error(f"Error in worker loop: {e}")
await asyncio.sleep(1)
async def process_job(self, job: PipelineJob):
"""이미지 생성 작업 처리"""
try:
logger.info(f"Processing job {job.job_id} for image generation")
translated_items = job.data.get('translated_items', [])
generated_items = []
# 최대 3개 아이템만 이미지 생성 (API 비용 절감)
for idx, item_data in enumerate(translated_items[:3]):
translated_item = TranslatedItem(**item_data)
# 이미지 생성을 위한 프롬프트 생성
prompt = self._create_image_prompt(translated_item)
# 이미지 생성
image_url = await self._generate_image(prompt)
generated_item = GeneratedImageItem(
translated_item=translated_item,
image_url=image_url,
image_prompt=prompt
)
generated_items.append(generated_item)
# API 속도 제한
if self.replicate_api_key:
await asyncio.sleep(2)
if generated_items:
logger.info(f"Generated images for {len(generated_items)} items")
# 완료된 데이터를 job에 저장
job.data['generated_items'] = [item.dict() for item in generated_items]
job.stages_completed.append('image_generation')
job.stage = 'completed'
# 최종 기사 조립 단계로 전달 (이미 article-assembly로 수정)
await self.queue_manager.enqueue('article_assembly', job)
await self.queue_manager.mark_completed('image_generation', job.job_id)
else:
logger.warning(f"No images generated for job {job.job_id}")
# 이미지 생성 실패해도 다음 단계로 진행
job.stages_completed.append('image_generation')
await self.queue_manager.enqueue('article_assembly', job)
await self.queue_manager.mark_completed('image_generation', job.job_id)
except Exception as e:
logger.error(f"Error processing job {job.job_id}: {e}")
# 이미지 생성 실패해도 다음 단계로 진행
job.stages_completed.append('image_generation')
await self.queue_manager.enqueue('article_assembly', job)
await self.queue_manager.mark_completed('image_generation', job.job_id)
def _create_image_prompt(self, translated_item: TranslatedItem) -> str:
"""이미지 생성을 위한 프롬프트 생성"""
# 영문 제목과 요약을 기반으로 프롬프트 생성
title = translated_item.translated_title or translated_item.summarized_item['enriched_item']['rss_item']['title']
summary = translated_item.translated_summary or translated_item.summarized_item['ai_summary']
# 뉴스 관련 이미지를 위한 프롬프트
prompt = f"News illustration for: {title[:100]}, professional, photorealistic, high quality, 4k"
return prompt
async def _generate_image(self, prompt: str) -> str:
"""Replicate API를 사용한 이미지 생성"""
try:
if not self.replicate_api_key:
# API 키가 없으면 플레이스홀더 이미지 URL 반환
return "https://via.placeholder.com/800x600.png?text=News+Image"
async with httpx.AsyncClient() as client:
# 예측 생성 요청
response = await client.post(
self.replicate_api_url,
headers={
"Authorization": f"Token {self.replicate_api_key}",
"Content-Type": "application/json"
},
json={
"version": self.model_version,
"input": {
"prompt": prompt,
"width": 768,
"height": 768,
"num_outputs": 1,
"scheduler": "K_EULER",
"num_inference_steps": 25,
"guidance_scale": 7.5,
"prompt_strength": 0.8,
"refine": "expert_ensemble_refiner",
"high_noise_frac": 0.8
}
},
timeout=60
)
if response.status_code in [200, 201]:
result = response.json()
prediction_id = result.get('id')
# 예측 결과 폴링
image_url = await self._poll_prediction(prediction_id)
return image_url
else:
logger.error(f"Replicate API error: {response.status_code}")
return "https://via.placeholder.com/800x600.png?text=Generation+Failed"
except Exception as e:
logger.error(f"Error generating image: {e}")
return "https://via.placeholder.com/800x600.png?text=Error"
async def _poll_prediction(self, prediction_id: str, max_attempts: int = 30) -> str:
"""예측 결과 폴링"""
try:
async with httpx.AsyncClient() as client:
for attempt in range(max_attempts):
response = await client.get(
f"{self.replicate_api_url}/{prediction_id}",
headers={
"Authorization": f"Token {self.replicate_api_key}"
},
timeout=30
)
if response.status_code == 200:
result = response.json()
status = result.get('status')
if status == 'succeeded':
output = result.get('output')
if output and isinstance(output, list) and len(output) > 0:
return output[0]
else:
return "https://via.placeholder.com/800x600.png?text=No+Output"
elif status == 'failed':
logger.error(f"Prediction failed: {result.get('error')}")
return "https://via.placeholder.com/800x600.png?text=Failed"
# 아직 처리중이면 대기
await asyncio.sleep(2)
else:
logger.error(f"Error polling prediction: {response.status_code}")
return "https://via.placeholder.com/800x600.png?text=Poll+Error"
# 최대 시도 횟수 초과
return "https://via.placeholder.com/800x600.png?text=Timeout"
except Exception as e:
logger.error(f"Error polling prediction: {e}")
return "https://via.placeholder.com/800x600.png?text=Poll+Exception"
async def stop(self):
"""워커 중지"""
await self.queue_manager.disconnect()
logger.info("Image Generator Worker stopped")
async def main():
"""메인 함수"""
worker = ImageGeneratorWorker()
try:
await worker.start()
except KeyboardInterrupt:
logger.info("Received interrupt signal")
finally:
await worker.stop()
if __name__ == "__main__":
asyncio.run(main())