Major architectural transformation from synchronous to asynchronous processing: ## Pipeline Services (8 microservices) - pipeline-scheduler: APScheduler for 30-minute periodic job triggers - pipeline-rss-collector: RSS feed collection with deduplication (7-day TTL) - pipeline-google-search: Content enrichment via Google Search API - pipeline-ai-summarizer: AI summarization using Claude API (claude-sonnet-4-20250514) - pipeline-translator: Translation using DeepL Pro API - pipeline-image-generator: Image generation with Replicate API (Stable Diffusion) - pipeline-article-assembly: Final article assembly and MongoDB storage - pipeline-monitor: Real-time monitoring dashboard (port 8100) ## Key Features - Redis-based job queue with deduplication - Asynchronous processing with Python asyncio - Shared models and queue manager for inter-service communication - Docker containerization for all services - Container names standardized with site11_ prefix ## Removed Services - Moved to backup: google-search, rss-feed, news-aggregator, ai-writer ## Configuration - DeepL Pro API: 3abbc796-2515-44a8-972d-22dcf27ab54a - Claude Model: claude-sonnet-4-20250514 - Redis Queue TTL: 7 days for deduplication 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
154 lines
5.3 KiB
Python
154 lines
5.3 KiB
Python
"""
|
|
Translation Service
|
|
DeepL API를 사용한 번역 서비스
|
|
"""
|
|
import asyncio
|
|
import logging
|
|
import os
|
|
import sys
|
|
from typing import List, Dict, Any
|
|
import httpx
|
|
|
|
# Import from shared module
|
|
from shared.models import PipelineJob, SummarizedItem, TranslatedItem
|
|
from shared.queue_manager import QueueManager
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class TranslatorWorker:
|
|
def __init__(self):
|
|
self.queue_manager = QueueManager(
|
|
redis_url=os.getenv("REDIS_URL", "redis://redis:6379")
|
|
)
|
|
self.deepl_api_key = os.getenv("DEEPL_API_KEY", "3abbc796-2515-44a8-972d-22dcf27ab54a")
|
|
# DeepL Pro API 엔드포인트 사용
|
|
self.deepl_api_url = "https://api.deepl.com/v2/translate"
|
|
|
|
async def start(self):
|
|
"""워커 시작"""
|
|
logger.info("Starting Translator Worker")
|
|
|
|
# Redis 연결
|
|
await self.queue_manager.connect()
|
|
|
|
# DeepL API 키 확인
|
|
if not self.deepl_api_key:
|
|
logger.error("DeepL API key not configured")
|
|
return
|
|
|
|
# 메인 처리 루프
|
|
while True:
|
|
try:
|
|
# 큐에서 작업 가져오기
|
|
job = await self.queue_manager.dequeue('translation', timeout=5)
|
|
|
|
if job:
|
|
await self.process_job(job)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in worker loop: {e}")
|
|
await asyncio.sleep(1)
|
|
|
|
async def process_job(self, job: PipelineJob):
|
|
"""번역 작업 처리"""
|
|
try:
|
|
logger.info(f"Processing job {job.job_id} for translation")
|
|
|
|
summarized_items = job.data.get('summarized_items', [])
|
|
translated_items = []
|
|
|
|
for item_data in summarized_items:
|
|
summarized_item = SummarizedItem(**item_data)
|
|
|
|
# 제목과 요약 번역
|
|
translated_title = await self._translate_text(
|
|
summarized_item.enriched_item['rss_item']['title'],
|
|
target_lang='EN'
|
|
)
|
|
|
|
translated_summary = await self._translate_text(
|
|
summarized_item.ai_summary,
|
|
target_lang='EN'
|
|
)
|
|
|
|
translated_item = TranslatedItem(
|
|
summarized_item=summarized_item,
|
|
translated_title=translated_title,
|
|
translated_summary=translated_summary,
|
|
target_language='en'
|
|
)
|
|
translated_items.append(translated_item)
|
|
|
|
# API 속도 제한
|
|
await asyncio.sleep(0.5)
|
|
|
|
if translated_items:
|
|
logger.info(f"Translated {len(translated_items)} items")
|
|
|
|
# 다음 단계로 전달
|
|
job.data['translated_items'] = [item.dict() for item in translated_items]
|
|
job.stages_completed.append('translation')
|
|
job.stage = 'image_generation'
|
|
|
|
await self.queue_manager.enqueue('image_generation', job)
|
|
await self.queue_manager.mark_completed('translation', job.job_id)
|
|
else:
|
|
logger.warning(f"No items translated for job {job.job_id}")
|
|
await self.queue_manager.mark_failed(
|
|
'translation',
|
|
job,
|
|
"No items to translate"
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing job {job.job_id}: {e}")
|
|
await self.queue_manager.mark_failed('translation', job, str(e))
|
|
|
|
async def _translate_text(self, text: str, target_lang: str = 'EN') -> str:
|
|
"""DeepL API를 사용한 텍스트 번역"""
|
|
try:
|
|
if not text:
|
|
return ""
|
|
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.post(
|
|
self.deepl_api_url,
|
|
data={
|
|
'auth_key': self.deepl_api_key,
|
|
'text': text,
|
|
'target_lang': target_lang,
|
|
'source_lang': 'KO'
|
|
},
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
return result['translations'][0]['text']
|
|
else:
|
|
logger.error(f"DeepL API error: {response.status_code}")
|
|
return text # 번역 실패시 원본 반환
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error translating text: {e}")
|
|
return text # 번역 실패시 원본 반환
|
|
|
|
async def stop(self):
|
|
"""워커 중지"""
|
|
await self.queue_manager.disconnect()
|
|
logger.info("Translator Worker stopped")
|
|
|
|
async def main():
|
|
"""메인 함수"""
|
|
worker = TranslatorWorker()
|
|
|
|
try:
|
|
await worker.start()
|
|
except KeyboardInterrupt:
|
|
logger.info("Received interrupt signal")
|
|
finally:
|
|
await worker.stop()
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main()) |