Files
site11/services/pipeline/translator/translator.py
jungwoo choi eeaa9dcb4b feat: Implement automated keyword-based news pipeline scheduler
- Add multi-threaded keyword scheduler for periodic news collection
- Create Keyword Manager API for CRUD operations and monitoring
- Implement automatic pipeline triggering (RSS → Google → AI → Translation)
- Add thread status monitoring and dynamic keyword management
- Support priority-based execution and configurable intervals
- Add comprehensive scheduler documentation guide
- Default keywords: AI, 테크놀로지, 경제, 블록체인

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-15 17:09:22 +09:00

230 lines
8.6 KiB
Python

"""
Translation Service
DeepL API를 사용한 번역 서비스
"""
import asyncio
import logging
import os
import sys
from typing import List, Dict, Any
import httpx
from motor.motor_asyncio import AsyncIOMotorClient
from datetime import datetime
# Import from shared module
from shared.models import PipelineJob, FinalArticle
from shared.queue_manager import QueueManager
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class TranslatorWorker:
def __init__(self):
self.queue_manager = QueueManager(
redis_url=os.getenv("REDIS_URL", "redis://redis:6379")
)
self.deepl_api_key = os.getenv("DEEPL_API_KEY", "3abbc796-2515-44a8-972d-22dcf27ab54a")
# DeepL Pro API 엔드포인트 사용
self.deepl_api_url = "https://api.deepl.com/v2/translate"
self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017")
self.db_name = os.getenv("DB_NAME", "ai_writer_db")
self.db = None
async def start(self):
"""워커 시작"""
logger.info("Starting Translator Worker")
# Redis 연결
await self.queue_manager.connect()
# MongoDB 연결
client = AsyncIOMotorClient(self.mongodb_url)
self.db = client[self.db_name]
# DeepL API 키 확인
if not self.deepl_api_key:
logger.error("DeepL API key not configured")
return
# 메인 처리 루프
while True:
try:
# 큐에서 작업 가져오기
job = await self.queue_manager.dequeue('translation', timeout=5)
if job:
await self.process_job(job)
except Exception as e:
logger.error(f"Error in worker loop: {e}")
await asyncio.sleep(1)
async def process_job(self, job: PipelineJob):
"""영어 버전 기사 생성 및 저장"""
try:
logger.info(f"Processing job {job.job_id} for translation")
# MongoDB에서 한국어 기사 가져오기
news_id = job.data.get('news_id')
if not news_id:
logger.error(f"No news_id in job {job.job_id}")
await self.queue_manager.mark_failed('translation', job, "No news_id")
return
# MongoDB에서 한국어 기사 조회 (articles_ko)
korean_article = await self.db.articles_ko.find_one({"news_id": news_id})
if not korean_article:
logger.error(f"Article {news_id} not found in MongoDB")
await self.queue_manager.mark_failed('translation', job, "Article not found")
return
# 영어로 번역
translated_title = await self._translate_text(
korean_article.get('title', ''),
target_lang='EN'
)
translated_summary = await self._translate_text(
korean_article.get('summary', ''),
target_lang='EN'
)
# Subtopics 번역
from shared.models import Subtopic
translated_subtopics = []
for subtopic in korean_article.get('subtopics', []):
translated_subtopic_title = await self._translate_text(
subtopic.get('title', ''),
target_lang='EN'
)
translated_content_list = []
for content_para in subtopic.get('content', []):
translated_para = await self._translate_text(
content_para,
target_lang='EN'
)
translated_content_list.append(translated_para)
await asyncio.sleep(0.2) # API 속도 제한
translated_subtopics.append(Subtopic(
title=translated_subtopic_title,
content=translated_content_list
))
# 카테고리 번역
translated_categories = []
for category in korean_article.get('categories', []):
translated_cat = await self._translate_text(category, target_lang='EN')
translated_categories.append(translated_cat)
await asyncio.sleep(0.2) # API 속도 제한
# Entities 번역 (선택적)
from shared.models import Entities
entities_data = korean_article.get('entities', {})
translated_entities = Entities(
people=entities_data.get('people', []), # 인명은 번역하지 않음
organizations=entities_data.get('organizations', []), # 조직명은 번역하지 않음
groups=entities_data.get('groups', []),
countries=entities_data.get('countries', []),
events=entities_data.get('events', [])
)
# 레퍼런스 가져오기 (번역하지 않음)
from shared.models import NewsReference
references = []
for ref_data in korean_article.get('references', []):
references.append(NewsReference(**ref_data))
# 영어 버전 기사 생성 - 같은 news_id 사용
english_article = FinalArticle(
news_id=news_id, # 원본과 같은 news_id 사용
title=translated_title,
summary=translated_summary,
subtopics=translated_subtopics,
categories=translated_categories,
entities=translated_entities,
source_keyword=job.keyword,
source_count=korean_article.get('source_count', 1),
references=references, # 원본 레퍼런스 그대로 사용
job_id=job.job_id,
keyword_id=job.keyword_id,
pipeline_stages=job.stages_completed.copy() + ['translation'],
processing_time=korean_article.get('processing_time', 0),
language='en', # 영어
ref_news_id=None # 같은 news_id를 사용하므로 ref 불필요
)
# MongoDB에 영어 버전 저장 (articles_en)
result = await self.db.articles_en.insert_one(english_article.model_dump())
english_article_id = str(result.inserted_id)
logger.info(f"English article saved with _id: {english_article_id}, news_id: {news_id}, language: en")
# 원본 한국어 기사 업데이트 - 번역 완료 표시
await self.db.articles_ko.update_one(
{"news_id": news_id},
{
"$addToSet": {
"pipeline_stages": "translation"
}
}
)
# 완료 표시
job.stages_completed.append('translation')
await self.queue_manager.mark_completed('translation', job.job_id)
logger.info(f"Translation completed for job {job.job_id}")
except Exception as e:
logger.error(f"Error processing job {job.job_id}: {e}")
await self.queue_manager.mark_failed('translation', job, str(e))
async def _translate_text(self, text: str, target_lang: str = 'EN') -> str:
"""DeepL API를 사용한 텍스트 번역"""
try:
if not text:
return ""
async with httpx.AsyncClient() as client:
response = await client.post(
self.deepl_api_url,
data={
'auth_key': self.deepl_api_key,
'text': text,
'target_lang': target_lang,
'source_lang': 'KO'
},
timeout=30
)
if response.status_code == 200:
result = response.json()
return result['translations'][0]['text']
else:
logger.error(f"DeepL API error: {response.status_code}")
return text # 번역 실패시 원본 반환
except Exception as e:
logger.error(f"Error translating text: {e}")
return text # 번역 실패시 원본 반환
async def stop(self):
"""워커 중지"""
await self.queue_manager.disconnect()
logger.info("Translator Worker stopped")
async def main():
"""메인 함수"""
worker = TranslatorWorker()
try:
await worker.start()
except KeyboardInterrupt:
logger.info("Received interrupt signal")
finally:
await worker.stop()
if __name__ == "__main__":
asyncio.run(main())