""" Multi-Language Translation Service 다국어 번역 서비스 - 설정 기반 다중 언어 지원 """ import asyncio import logging import os import sys import json from typing import List, Dict, Any import httpx import redis.asyncio as redis from motor.motor_asyncio import AsyncIOMotorClient from datetime import datetime # Import from shared module from shared.models import PipelineJob, FinalArticle from shared.queue_manager import QueueManager logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class MultiLanguageTranslator: def __init__(self): self.queue_manager = QueueManager( redis_url=os.getenv("REDIS_URL", "redis://redis:6379") ) self.deepl_api_key = os.getenv("DEEPL_API_KEY", "3abbc796-2515-44a8-972d-22dcf27ab54a") self.deepl_api_url = "https://api.deepl.com/v2/translate" self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") self.db_name = os.getenv("DB_NAME", "ai_writer_db") self.db = None self.languages_config = None self.config_path = "/app/config/languages.json" async def load_config(self): """언어 설정 파일 로드""" try: if os.path.exists(self.config_path): with open(self.config_path, 'r', encoding='utf-8') as f: self.languages_config = json.load(f) else: # 기본 설정 (영어만) self.languages_config = { "enabled_languages": [ { "code": "en", "name": "English", "deepl_code": "EN", "collection": "articles_en", "enabled": True } ], "source_language": { "code": "ko", "name": "Korean", "collection": "articles_ko" }, "translation_settings": { "batch_size": 5, "delay_between_languages": 2.0, "delay_between_articles": 0.5, "max_retries": 3 } } logger.info(f"Loaded language config: {len(self.get_enabled_languages())} languages enabled") except Exception as e: logger.error(f"Error loading config: {e}") raise def get_enabled_languages(self) -> List[Dict]: """활성화된 언어 목록 반환""" return [lang for lang in self.languages_config["enabled_languages"] if lang["enabled"]] async def start(self): """워커 시작""" logger.info("Starting Multi-Language Translator Worker") # 설정 로드 await self.load_config() # Redis 연결 await self.queue_manager.connect() # MongoDB 연결 client = AsyncIOMotorClient(self.mongodb_url) self.db = client[self.db_name] # DeepL API 키 확인 if not self.deepl_api_key: logger.error("DeepL API key not configured") return # 메인 처리 루프 while True: try: # 큐에서 작업 가져오기 job = await self.queue_manager.dequeue('translation', timeout=5) if job: await self.process_job(job) except Exception as e: logger.error(f"Error in worker loop: {e}") await asyncio.sleep(1) async def process_job(self, job: PipelineJob): """모든 활성 언어로 번역""" try: logger.info(f"Processing job {job.job_id} for multi-language translation") # MongoDB에서 한국어 기사 가져오기 news_id = job.data.get('news_id') if not news_id: logger.error(f"No news_id in job {job.job_id}") await self.queue_manager.mark_failed('translation', job, "No news_id") return # 원본 컬렉션에서 기사 조회 source_collection = self.languages_config["source_language"]["collection"] korean_article = await self.db[source_collection].find_one({"news_id": news_id}) if not korean_article: logger.error(f"Article {news_id} not found in {source_collection}") await self.queue_manager.mark_failed('translation', job, "Article not found") return # 활성화된 모든 언어로 번역 enabled_languages = self.get_enabled_languages() settings = self.languages_config["translation_settings"] for lang_config in enabled_languages: try: logger.info(f"Translating article {news_id} to {lang_config['name']}") # 이미 번역되었는지 확인 existing = await self.db[lang_config["collection"]].find_one({"news_id": news_id}) if existing: logger.info(f"Article {news_id} already translated to {lang_config['code']}") continue # 번역 수행 await self.translate_article( korean_article, lang_config, job ) # 언어 간 지연 if settings.get("delay_between_languages"): await asyncio.sleep(settings["delay_between_languages"]) except Exception as e: logger.error(f"Error translating to {lang_config['code']}: {e}") continue # 파이프라인 완료 로그 logger.info(f"Translation pipeline completed for news_id: {news_id}") # 완료 표시 job.stages_completed.append('translation') await self.queue_manager.mark_completed('translation', job.job_id) logger.info(f"Multi-language translation completed for job {job.job_id}") except Exception as e: logger.error(f"Error processing job {job.job_id}: {e}") await self.queue_manager.mark_failed('translation', job, str(e)) async def translate_article(self, korean_article: Dict, lang_config: Dict, job: PipelineJob): """특정 언어로 기사 번역""" try: # 제목 번역 translated_title = await self._translate_text( korean_article.get('title', ''), target_lang=lang_config["deepl_code"] ) # 요약 번역 translated_summary = await self._translate_text( korean_article.get('summary', ''), target_lang=lang_config["deepl_code"] ) # Subtopics 번역 from shared.models import Subtopic translated_subtopics = [] for subtopic in korean_article.get('subtopics', []): translated_subtopic_title = await self._translate_text( subtopic.get('title', ''), target_lang=lang_config["deepl_code"] ) translated_content_list = [] for content_para in subtopic.get('content', []): translated_para = await self._translate_text( content_para, target_lang=lang_config["deepl_code"] ) translated_content_list.append(translated_para) # API 속도 제한 settings = self.languages_config["translation_settings"] if settings.get("delay_between_articles"): await asyncio.sleep(settings["delay_between_articles"]) translated_subtopics.append(Subtopic( title=translated_subtopic_title, content=translated_content_list )) # 카테고리 번역 translated_categories = [] for category in korean_article.get('categories', []): translated_cat = await self._translate_text( category, target_lang=lang_config["deepl_code"] ) translated_categories.append(translated_cat) # Entities와 References는 원본 유지 from shared.models import Entities, NewsReference entities_data = korean_article.get('entities', {}) translated_entities = Entities(**entities_data) if entities_data else Entities() references = [] for ref_data in korean_article.get('references', []): references.append(NewsReference(**ref_data)) # 번역된 기사 생성 translated_article = FinalArticle( news_id=korean_article.get('news_id'), # 같은 news_id 사용 title=translated_title, summary=translated_summary, subtopics=translated_subtopics, categories=translated_categories, entities=translated_entities, source_keyword=job.keyword if hasattr(job, 'keyword') else korean_article.get('source_keyword'), source_count=korean_article.get('source_count', 1), references=references, job_id=job.job_id, keyword_id=job.keyword_id if hasattr(job, 'keyword_id') else None, pipeline_stages=korean_article.get('pipeline_stages', []) + ['translation'], processing_time=korean_article.get('processing_time', 0), language=lang_config["code"], ref_news_id=None, # 같은 news_id 사용하므로 불필요 rss_guid=korean_article.get('rss_guid'), # RSS GUID 유지 image_prompt=korean_article.get('image_prompt'), # 이미지 프롬프트 유지 images=korean_article.get('images', []), # 이미지 URL 리스트 유지 translated_languages=korean_article.get('translated_languages', []) # 번역 언어 목록 유지 ) # MongoDB에 저장 collection_name = lang_config["collection"] result = await self.db[collection_name].insert_one(translated_article.model_dump()) logger.info(f"Article saved to {collection_name} with _id: {result.inserted_id}, language: {lang_config['code']}") # 원본 기사에 번역 완료 표시 await self.db[self.languages_config["source_language"]["collection"]].update_one( {"news_id": korean_article.get('news_id')}, { "$addToSet": { "translated_languages": lang_config["code"] } } ) except Exception as e: logger.error(f"Error translating article to {lang_config['code']}: {e}") raise async def _translate_text(self, text: str, target_lang: str = 'EN') -> str: """DeepL API를 사용한 텍스트 번역""" try: if not text: return "" async with httpx.AsyncClient() as client: response = await client.post( self.deepl_api_url, data={ 'auth_key': self.deepl_api_key, 'text': text, 'target_lang': target_lang, 'source_lang': 'KO' }, timeout=30 ) if response.status_code == 200: result = response.json() return result['translations'][0]['text'] else: logger.error(f"DeepL API error: {response.status_code}") return text # 번역 실패시 원본 반환 except Exception as e: logger.error(f"Error translating text: {e}") return text # 번역 실패시 원본 반환 async def stop(self): """워커 중지""" await self.queue_manager.disconnect() logger.info("Multi-Language Translator Worker stopped") async def main(): """메인 함수""" worker = MultiLanguageTranslator() try: await worker.start() except KeyboardInterrupt: logger.info("Received interrupt signal") finally: await worker.stop() if __name__ == "__main__": asyncio.run(main())