feat: Implement async queue-based news pipeline with microservices

Major architectural transformation from synchronous to asynchronous processing:

## Pipeline Services (8 microservices)
- pipeline-scheduler: APScheduler for 30-minute periodic job triggers
- pipeline-rss-collector: RSS feed collection with deduplication (7-day TTL)
- pipeline-google-search: Content enrichment via Google Search API
- pipeline-ai-summarizer: AI summarization using Claude API (claude-sonnet-4-20250514)
- pipeline-translator: Translation using DeepL Pro API
- pipeline-image-generator: Image generation with Replicate API (Stable Diffusion)
- pipeline-article-assembly: Final article assembly and MongoDB storage
- pipeline-monitor: Real-time monitoring dashboard (port 8100)

## Key Features
- Redis-based job queue with deduplication
- Asynchronous processing with Python asyncio
- Shared models and queue manager for inter-service communication
- Docker containerization for all services
- Container names standardized with site11_ prefix

## Removed Services
- Moved to backup: google-search, rss-feed, news-aggregator, ai-writer

## Configuration
- DeepL Pro API: 3abbc796-2515-44a8-972d-22dcf27ab54a
- Claude Model: claude-sonnet-4-20250514
- Redis Queue TTL: 7 days for deduplication

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
jungwoo choi
2025-09-13 19:22:14 +09:00
parent 1d90af7c3c
commit 070032006e
73 changed files with 5922 additions and 4 deletions

View File

@ -0,0 +1,19 @@
FROM python:3.11-slim
WORKDIR /app
# 의존성 설치
COPY ./ai-summarizer/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# 공통 모듈 복사
COPY ./shared /app/shared
# AI Summarizer 코드 복사
COPY ./ai-summarizer /app
# 환경변수
ENV PYTHONUNBUFFERED=1
# 실행
CMD ["python", "ai_summarizer.py"]

View File

@ -0,0 +1,161 @@
"""
AI Summarizer Service
Claude API를 사용한 뉴스 요약 서비스
"""
import asyncio
import logging
import os
import sys
from typing import List, Dict, Any
from anthropic import AsyncAnthropic
# Import from shared module
from shared.models import PipelineJob, EnrichedItem, SummarizedItem
from shared.queue_manager import QueueManager
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class AISummarizerWorker:
def __init__(self):
self.queue_manager = QueueManager(
redis_url=os.getenv("REDIS_URL", "redis://redis:6379")
)
self.claude_api_key = os.getenv("CLAUDE_API_KEY")
self.claude_client = None
async def start(self):
"""워커 시작"""
logger.info("Starting AI Summarizer Worker")
# Redis 연결
await self.queue_manager.connect()
# Claude 클라이언트 초기화
if self.claude_api_key:
self.claude_client = AsyncAnthropic(api_key=self.claude_api_key)
else:
logger.error("Claude API key not configured")
return
# 메인 처리 루프
while True:
try:
# 큐에서 작업 가져오기
job = await self.queue_manager.dequeue('ai_summarization', timeout=5)
if job:
await self.process_job(job)
except Exception as e:
logger.error(f"Error in worker loop: {e}")
await asyncio.sleep(1)
async def process_job(self, job: PipelineJob):
"""AI 요약 작업 처리"""
try:
logger.info(f"Processing job {job.job_id} for AI summarization")
enriched_items = job.data.get('enriched_items', [])
summarized_items = []
for item_data in enriched_items:
enriched_item = EnrichedItem(**item_data)
# AI 요약 생성
summary = await self._generate_summary(enriched_item)
summarized_item = SummarizedItem(
enriched_item=enriched_item,
ai_summary=summary,
summary_language='ko'
)
summarized_items.append(summarized_item)
# API 속도 제한
await asyncio.sleep(1)
if summarized_items:
logger.info(f"Summarized {len(summarized_items)} items")
# 다음 단계로 전달 (번역 단계로)
job.data['summarized_items'] = [item.dict() for item in summarized_items]
job.stages_completed.append('ai_summarization')
job.stage = 'translation'
await self.queue_manager.enqueue('translation', job)
await self.queue_manager.mark_completed('ai_summarization', job.job_id)
else:
logger.warning(f"No items summarized for job {job.job_id}")
await self.queue_manager.mark_failed(
'ai_summarization',
job,
"No items to summarize"
)
except Exception as e:
logger.error(f"Error processing job {job.job_id}: {e}")
await self.queue_manager.mark_failed('ai_summarization', job, str(e))
async def _generate_summary(self, enriched_item: EnrichedItem) -> str:
"""Claude를 사용한 요약 생성"""
try:
# 컨텐츠 준비
content_parts = [
f"제목: {enriched_item.rss_item.title}",
f"요약: {enriched_item.rss_item.summary or '없음'}"
]
# 검색 결과 추가
if enriched_item.search_results:
content_parts.append("\n관련 검색 결과:")
for idx, result in enumerate(enriched_item.search_results[:3], 1):
content_parts.append(f"{idx}. {result.title}")
if result.snippet:
content_parts.append(f" {result.snippet}")
content = "\n".join(content_parts)
# Claude API 호출
prompt = f"""다음 뉴스 내용을 200자 이내로 핵심만 요약해주세요.
중요한 사실, 수치, 인물, 조직을 포함하고 객관적인 톤을 유지하세요.
{content}
요약:"""
response = await self.claude_client.messages.create(
model="claude-sonnet-4-20250514", # 최신 Sonnet 모델
max_tokens=500,
temperature=0.3,
messages=[
{"role": "user", "content": prompt}
]
)
summary = response.content[0].text.strip()
return summary
except Exception as e:
logger.error(f"Error generating summary: {e}")
# 폴백: 원본 요약 사용
return enriched_item.rss_item.summary[:200] if enriched_item.rss_item.summary else enriched_item.rss_item.title
async def stop(self):
"""워커 중지"""
await self.queue_manager.disconnect()
logger.info("AI Summarizer Worker stopped")
async def main():
"""메인 함수"""
worker = AISummarizerWorker()
try:
await worker.start()
except KeyboardInterrupt:
logger.info("Received interrupt signal")
finally:
await worker.stop()
if __name__ == "__main__":
asyncio.run(main())

View File

@ -0,0 +1,3 @@
anthropic==0.50.0
redis[hiredis]==5.0.1
pydantic==2.5.0