""" Translation Service DeepL API를 사용한 번역 서비스 """ import asyncio import logging import os import sys from typing import List, Dict, Any import httpx from motor.motor_asyncio import AsyncIOMotorClient from datetime import datetime # Import from shared module from shared.models import PipelineJob, FinalArticle from shared.queue_manager import QueueManager logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class TranslatorWorker: def __init__(self): self.queue_manager = QueueManager( redis_url=os.getenv("REDIS_URL", "redis://redis:6379") ) self.deepl_api_key = os.getenv("DEEPL_API_KEY", "3abbc796-2515-44a8-972d-22dcf27ab54a") # DeepL Pro API 엔드포인트 사용 self.deepl_api_url = "https://api.deepl.com/v2/translate" self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") self.db_name = os.getenv("DB_NAME", "ai_writer_db") self.db = None async def start(self): """워커 시작""" logger.info("Starting Translator Worker") # Redis 연결 await self.queue_manager.connect() # MongoDB 연결 client = AsyncIOMotorClient(self.mongodb_url) self.db = client[self.db_name] # DeepL API 키 확인 if not self.deepl_api_key: logger.error("DeepL API key not configured") return # 메인 처리 루프 while True: try: # 큐에서 작업 가져오기 job = await self.queue_manager.dequeue('translation', timeout=5) if job: await self.process_job(job) except Exception as e: logger.error(f"Error in worker loop: {e}") await asyncio.sleep(1) async def process_job(self, job: PipelineJob): """영어 버전 기사 생성 및 저장""" try: logger.info(f"Processing job {job.job_id} for translation") # MongoDB에서 한국어 기사 가져오기 news_id = job.data.get('news_id') if not news_id: logger.error(f"No news_id in job {job.job_id}") await self.queue_manager.mark_failed('translation', job, "No news_id") return # MongoDB에서 한국어 기사 조회 (articles_ko) korean_article = await self.db.articles_ko.find_one({"news_id": news_id}) if not korean_article: logger.error(f"Article {news_id} not found in MongoDB") await self.queue_manager.mark_failed('translation', job, "Article not found") return # 영어로 번역 translated_title = await self._translate_text( korean_article.get('title', ''), target_lang='EN' ) translated_summary = await self._translate_text( korean_article.get('summary', ''), target_lang='EN' ) # Subtopics 번역 from shared.models import Subtopic translated_subtopics = [] for subtopic in korean_article.get('subtopics', []): translated_subtopic_title = await self._translate_text( subtopic.get('title', ''), target_lang='EN' ) translated_content_list = [] for content_para in subtopic.get('content', []): translated_para = await self._translate_text( content_para, target_lang='EN' ) translated_content_list.append(translated_para) await asyncio.sleep(0.2) # API 속도 제한 translated_subtopics.append(Subtopic( title=translated_subtopic_title, content=translated_content_list )) # 카테고리 번역 translated_categories = [] for category in korean_article.get('categories', []): translated_cat = await self._translate_text(category, target_lang='EN') translated_categories.append(translated_cat) await asyncio.sleep(0.2) # API 속도 제한 # Entities 번역 (선택적) from shared.models import Entities entities_data = korean_article.get('entities', {}) translated_entities = Entities( people=entities_data.get('people', []), # 인명은 번역하지 않음 organizations=entities_data.get('organizations', []), # 조직명은 번역하지 않음 groups=entities_data.get('groups', []), countries=entities_data.get('countries', []), events=entities_data.get('events', []) ) # 레퍼런스 가져오기 (번역하지 않음) from shared.models import NewsReference references = [] for ref_data in korean_article.get('references', []): references.append(NewsReference(**ref_data)) # 영어 버전 기사 생성 - 같은 news_id 사용 english_article = FinalArticle( news_id=news_id, # 원본과 같은 news_id 사용 title=translated_title, summary=translated_summary, subtopics=translated_subtopics, categories=translated_categories, entities=translated_entities, source_keyword=job.keyword, source_count=korean_article.get('source_count', 1), references=references, # 원본 레퍼런스 그대로 사용 job_id=job.job_id, keyword_id=job.keyword_id, pipeline_stages=job.stages_completed.copy() + ['translation'], processing_time=korean_article.get('processing_time', 0), language='en', # 영어 ref_news_id=None # 같은 news_id를 사용하므로 ref 불필요 ) # MongoDB에 영어 버전 저장 (articles_en) result = await self.db.articles_en.insert_one(english_article.model_dump()) english_article_id = str(result.inserted_id) logger.info(f"English article saved with _id: {english_article_id}, news_id: {news_id}, language: en") # 원본 한국어 기사 업데이트 - 번역 완료 표시 await self.db.articles_ko.update_one( {"news_id": news_id}, { "$addToSet": { "pipeline_stages": "translation" } } ) # 완료 표시 job.stages_completed.append('translation') await self.queue_manager.mark_completed('translation', job.job_id) logger.info(f"Translation completed for job {job.job_id}") except Exception as e: logger.error(f"Error processing job {job.job_id}: {e}") await self.queue_manager.mark_failed('translation', job, str(e)) async def _translate_text(self, text: str, target_lang: str = 'EN') -> str: """DeepL API를 사용한 텍스트 번역""" try: if not text: return "" async with httpx.AsyncClient() as client: response = await client.post( self.deepl_api_url, data={ 'auth_key': self.deepl_api_key, 'text': text, 'target_lang': target_lang, 'source_lang': 'KO' }, timeout=30 ) if response.status_code == 200: result = response.json() return result['translations'][0]['text'] else: logger.error(f"DeepL API error: {response.status_code}") return text # 번역 실패시 원본 반환 except Exception as e: logger.error(f"Error translating text: {e}") return text # 번역 실패시 원본 반환 async def stop(self): """워커 중지""" await self.queue_manager.disconnect() logger.info("Translator Worker stopped") async def main(): """메인 함수""" worker = TranslatorWorker() try: await worker.start() except KeyboardInterrupt: logger.info("Received interrupt signal") finally: await worker.stop() if __name__ == "__main__": asyncio.run(main())