""" Language Sync Service 기존 기사를 새로운 언어로 번역하는 백그라운드 서비스 """ import asyncio import logging import os import sys import json from typing import List, Dict, Any import httpx from motor.motor_asyncio import AsyncIOMotorClient from datetime import datetime # Add parent directory to path for shared module sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # Import from shared module from shared.models import FinalArticle, Subtopic, Entities, NewsReference logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class LanguageSyncService: def __init__(self): self.deepl_api_key = os.getenv("DEEPL_API_KEY", "3abbc796-2515-44a8-972d-22dcf27ab54a") self.deepl_api_url = "https://api.deepl.com/v2/translate" self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") self.db_name = os.getenv("DB_NAME", "ai_writer_db") self.db = None self.languages_config = None self.config_path = "/app/config/languages.json" self.sync_batch_size = 10 self.sync_delay = 2.0 # 언어 간 지연 async def load_config(self): """언어 설정 파일 로드""" try: if os.path.exists(self.config_path): with open(self.config_path, 'r', encoding='utf-8') as f: self.languages_config = json.load(f) logger.info(f"Loaded language config") else: raise FileNotFoundError(f"Config file not found: {self.config_path}") except Exception as e: logger.error(f"Error loading config: {e}") raise async def start(self): """백그라운드 싱크 서비스 시작""" logger.info("Starting Language Sync Service") # 설정 로드 await self.load_config() # MongoDB 연결 client = AsyncIOMotorClient(self.mongodb_url) self.db = client[self.db_name] # 주기적으로 싱크 체크 (10분마다) while True: try: await self.sync_missing_translations() await asyncio.sleep(600) # 10분 대기 except Exception as e: logger.error(f"Error in sync loop: {e}") await asyncio.sleep(60) # 에러 시 1분 후 재시도 async def sync_missing_translations(self): """누락된 번역 싱크""" try: # 활성화된 언어 목록 enabled_languages = [ lang for lang in self.languages_config["enabled_languages"] if lang["enabled"] ] if not enabled_languages: logger.info("No enabled languages for sync") return # 원본 언어 컬렉션 source_collection = self.languages_config["source_language"]["collection"] for lang_config in enabled_languages: await self.sync_language(source_collection, lang_config) await asyncio.sleep(self.sync_delay) except Exception as e: logger.error(f"Error in sync_missing_translations: {e}") async def sync_language(self, source_collection: str, lang_config: Dict): """특정 언어로 누락된 기사 번역""" try: target_collection = lang_config["collection"] # 번역되지 않은 기사 찾기 # 원본에는 있지만 대상 컬렉션에는 없는 기사 source_articles = await self.db[source_collection].find( {}, {"news_id": 1} ).to_list(None) source_ids = {article["news_id"] for article in source_articles} translated_articles = await self.db[target_collection].find( {}, {"news_id": 1} ).to_list(None) translated_ids = {article["news_id"] for article in translated_articles} # 누락된 news_id missing_ids = source_ids - translated_ids if not missing_ids: logger.info(f"No missing translations for {lang_config['name']}") return logger.info(f"Found {len(missing_ids)} missing translations for {lang_config['name']}") # 배치로 처리 missing_list = list(missing_ids) for i in range(0, len(missing_list), self.sync_batch_size): batch = missing_list[i:i+self.sync_batch_size] for news_id in batch: try: # 원본 기사 조회 korean_article = await self.db[source_collection].find_one( {"news_id": news_id} ) if not korean_article: continue # 번역 수행 await self.translate_and_save( korean_article, lang_config ) logger.info(f"Synced article {news_id} to {lang_config['code']}") # API 속도 제한 await asyncio.sleep(0.5) except Exception as e: logger.error(f"Error translating {news_id} to {lang_config['code']}: {e}") continue # 배치 간 지연 if i + self.sync_batch_size < len(missing_list): await asyncio.sleep(self.sync_delay) except Exception as e: logger.error(f"Error syncing language {lang_config['code']}: {e}") async def translate_and_save(self, korean_article: Dict, lang_config: Dict): """기사 번역 및 저장""" try: # 제목 번역 translated_title = await self._translate_text( korean_article.get('title', ''), target_lang=lang_config["deepl_code"] ) # 요약 번역 translated_summary = await self._translate_text( korean_article.get('summary', ''), target_lang=lang_config["deepl_code"] ) # Subtopics 번역 translated_subtopics = [] for subtopic in korean_article.get('subtopics', []): translated_subtopic_title = await self._translate_text( subtopic.get('title', ''), target_lang=lang_config["deepl_code"] ) translated_content_list = [] for content_para in subtopic.get('content', []): translated_para = await self._translate_text( content_para, target_lang=lang_config["deepl_code"] ) translated_content_list.append(translated_para) translated_subtopics.append(Subtopic( title=translated_subtopic_title, content=translated_content_list )) # 카테고리 번역 translated_categories = [] for category in korean_article.get('categories', []): translated_cat = await self._translate_text( category, target_lang=lang_config["deepl_code"] ) translated_categories.append(translated_cat) # Entities와 References는 원본 유지 entities_data = korean_article.get('entities', {}) translated_entities = Entities(**entities_data) if entities_data else Entities() references = [] for ref_data in korean_article.get('references', []): references.append(NewsReference(**ref_data)) # 번역된 기사 생성 translated_article = FinalArticle( news_id=korean_article.get('news_id'), title=translated_title, summary=translated_summary, subtopics=translated_subtopics, categories=translated_categories, entities=translated_entities, source_keyword=korean_article.get('source_keyword'), source_count=korean_article.get('source_count', 1), references=references, job_id=korean_article.get('job_id'), keyword_id=korean_article.get('keyword_id'), pipeline_stages=korean_article.get('pipeline_stages', []) + ['sync_translation'], processing_time=korean_article.get('processing_time', 0), language=lang_config["code"], ref_news_id=None, rss_guid=korean_article.get('rss_guid'), # RSS GUID 유지 image_prompt=korean_article.get('image_prompt'), # 이미지 프롬프트 유지 images=korean_article.get('images', []), # 이미지 URL 리스트 유지 translated_languages=korean_article.get('translated_languages', []) # 번역 언어 목록 유지 ) # MongoDB에 저장 collection_name = lang_config["collection"] result = await self.db[collection_name].insert_one(translated_article.model_dump()) # 원본 기사에 번역 완료 표시 await self.db[self.languages_config["source_language"]["collection"]].update_one( {"news_id": korean_article.get('news_id')}, { "$addToSet": { "translated_languages": lang_config["code"] } } ) logger.info(f"Synced article to {collection_name}: {result.inserted_id}") except Exception as e: logger.error(f"Error in translate_and_save: {e}") raise async def _translate_text(self, text: str, target_lang: str = 'EN') -> str: """DeepL API를 사용한 텍스트 번역""" try: if not text: return "" async with httpx.AsyncClient() as client: response = await client.post( self.deepl_api_url, data={ 'auth_key': self.deepl_api_key, 'text': text, 'target_lang': target_lang, 'source_lang': 'KO' }, timeout=30 ) if response.status_code == 200: result = response.json() return result['translations'][0]['text'] else: logger.error(f"DeepL API error: {response.status_code}") return text except Exception as e: logger.error(f"Error translating text: {e}") return text async def manual_sync(self, language_code: str = None): """수동 싱크 실행""" logger.info(f"Manual sync requested for language: {language_code or 'all'}") await self.load_config() client = AsyncIOMotorClient(self.mongodb_url) self.db = client[self.db_name] if language_code: # 특정 언어만 싱크 lang_config = next( (lang for lang in self.languages_config["enabled_languages"] if lang["code"] == language_code and lang["enabled"]), None ) if lang_config: source_collection = self.languages_config["source_language"]["collection"] await self.sync_language(source_collection, lang_config) else: logger.error(f"Language {language_code} not found or not enabled") else: # 모든 활성 언어 싱크 await self.sync_missing_translations() async def main(): """메인 함수""" service = LanguageSyncService() # 명령줄 인수 확인 if len(sys.argv) > 1: if sys.argv[1] == "sync": # 수동 싱크 모드 language = sys.argv[2] if len(sys.argv) > 2 else None await service.manual_sync(language) else: logger.error(f"Unknown command: {sys.argv[1]}") else: # 백그라운드 서비스 모드 try: await service.start() except KeyboardInterrupt: logger.info("Received interrupt signal") if __name__ == "__main__": asyncio.run(main())