Files
site11/services/pipeline/ai-summarizer/ai_summarizer.py
jungwoo choi 070032006e feat: Implement async queue-based news pipeline with microservices
Major architectural transformation from synchronous to asynchronous processing:

## Pipeline Services (8 microservices)
- pipeline-scheduler: APScheduler for 30-minute periodic job triggers
- pipeline-rss-collector: RSS feed collection with deduplication (7-day TTL)
- pipeline-google-search: Content enrichment via Google Search API
- pipeline-ai-summarizer: AI summarization using Claude API (claude-sonnet-4-20250514)
- pipeline-translator: Translation using DeepL Pro API
- pipeline-image-generator: Image generation with Replicate API (Stable Diffusion)
- pipeline-article-assembly: Final article assembly and MongoDB storage
- pipeline-monitor: Real-time monitoring dashboard (port 8100)

## Key Features
- Redis-based job queue with deduplication
- Asynchronous processing with Python asyncio
- Shared models and queue manager for inter-service communication
- Docker containerization for all services
- Container names standardized with site11_ prefix

## Removed Services
- Moved to backup: google-search, rss-feed, news-aggregator, ai-writer

## Configuration
- DeepL Pro API: 3abbc796-2515-44a8-972d-22dcf27ab54a
- Claude Model: claude-sonnet-4-20250514
- Redis Queue TTL: 7 days for deduplication

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-13 19:22:14 +09:00

161 lines
5.6 KiB
Python

"""
AI Summarizer Service
Claude API를 사용한 뉴스 요약 서비스
"""
import asyncio
import logging
import os
import sys
from typing import List, Dict, Any
from anthropic import AsyncAnthropic
# Import from shared module
from shared.models import PipelineJob, EnrichedItem, SummarizedItem
from shared.queue_manager import QueueManager
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class AISummarizerWorker:
def __init__(self):
self.queue_manager = QueueManager(
redis_url=os.getenv("REDIS_URL", "redis://redis:6379")
)
self.claude_api_key = os.getenv("CLAUDE_API_KEY")
self.claude_client = None
async def start(self):
"""워커 시작"""
logger.info("Starting AI Summarizer Worker")
# Redis 연결
await self.queue_manager.connect()
# Claude 클라이언트 초기화
if self.claude_api_key:
self.claude_client = AsyncAnthropic(api_key=self.claude_api_key)
else:
logger.error("Claude API key not configured")
return
# 메인 처리 루프
while True:
try:
# 큐에서 작업 가져오기
job = await self.queue_manager.dequeue('ai_summarization', timeout=5)
if job:
await self.process_job(job)
except Exception as e:
logger.error(f"Error in worker loop: {e}")
await asyncio.sleep(1)
async def process_job(self, job: PipelineJob):
"""AI 요약 작업 처리"""
try:
logger.info(f"Processing job {job.job_id} for AI summarization")
enriched_items = job.data.get('enriched_items', [])
summarized_items = []
for item_data in enriched_items:
enriched_item = EnrichedItem(**item_data)
# AI 요약 생성
summary = await self._generate_summary(enriched_item)
summarized_item = SummarizedItem(
enriched_item=enriched_item,
ai_summary=summary,
summary_language='ko'
)
summarized_items.append(summarized_item)
# API 속도 제한
await asyncio.sleep(1)
if summarized_items:
logger.info(f"Summarized {len(summarized_items)} items")
# 다음 단계로 전달 (번역 단계로)
job.data['summarized_items'] = [item.dict() for item in summarized_items]
job.stages_completed.append('ai_summarization')
job.stage = 'translation'
await self.queue_manager.enqueue('translation', job)
await self.queue_manager.mark_completed('ai_summarization', job.job_id)
else:
logger.warning(f"No items summarized for job {job.job_id}")
await self.queue_manager.mark_failed(
'ai_summarization',
job,
"No items to summarize"
)
except Exception as e:
logger.error(f"Error processing job {job.job_id}: {e}")
await self.queue_manager.mark_failed('ai_summarization', job, str(e))
async def _generate_summary(self, enriched_item: EnrichedItem) -> str:
"""Claude를 사용한 요약 생성"""
try:
# 컨텐츠 준비
content_parts = [
f"제목: {enriched_item.rss_item.title}",
f"요약: {enriched_item.rss_item.summary or '없음'}"
]
# 검색 결과 추가
if enriched_item.search_results:
content_parts.append("\n관련 검색 결과:")
for idx, result in enumerate(enriched_item.search_results[:3], 1):
content_parts.append(f"{idx}. {result.title}")
if result.snippet:
content_parts.append(f" {result.snippet}")
content = "\n".join(content_parts)
# Claude API 호출
prompt = f"""다음 뉴스 내용을 200자 이내로 핵심만 요약해주세요.
중요한 사실, 수치, 인물, 조직을 포함하고 객관적인 톤을 유지하세요.
{content}
요약:"""
response = await self.claude_client.messages.create(
model="claude-sonnet-4-20250514", # 최신 Sonnet 모델
max_tokens=500,
temperature=0.3,
messages=[
{"role": "user", "content": prompt}
]
)
summary = response.content[0].text.strip()
return summary
except Exception as e:
logger.error(f"Error generating summary: {e}")
# 폴백: 원본 요약 사용
return enriched_item.rss_item.summary[:200] if enriched_item.rss_item.summary else enriched_item.rss_item.title
async def stop(self):
"""워커 중지"""
await self.queue_manager.disconnect()
logger.info("AI Summarizer Worker stopped")
async def main():
"""메인 함수"""
worker = AISummarizerWorker()
try:
await worker.start()
except KeyboardInterrupt:
logger.info("Received interrupt signal")
finally:
await worker.stop()
if __name__ == "__main__":
asyncio.run(main())