Files
site11/backup-services/ai-writer/backend/app/article_generator.py
jungwoo choi 070032006e feat: Implement async queue-based news pipeline with microservices
Major architectural transformation from synchronous to asynchronous processing:

## Pipeline Services (8 microservices)
- pipeline-scheduler: APScheduler for 30-minute periodic job triggers
- pipeline-rss-collector: RSS feed collection with deduplication (7-day TTL)
- pipeline-google-search: Content enrichment via Google Search API
- pipeline-ai-summarizer: AI summarization using Claude API (claude-sonnet-4-20250514)
- pipeline-translator: Translation using DeepL Pro API
- pipeline-image-generator: Image generation with Replicate API (Stable Diffusion)
- pipeline-article-assembly: Final article assembly and MongoDB storage
- pipeline-monitor: Real-time monitoring dashboard (port 8100)

## Key Features
- Redis-based job queue with deduplication
- Asynchronous processing with Python asyncio
- Shared models and queue manager for inter-service communication
- Docker containerization for all services
- Container names standardized with site11_ prefix

## Removed Services
- Moved to backup: google-search, rss-feed, news-aggregator, ai-writer

## Configuration
- DeepL Pro API: 3abbc796-2515-44a8-972d-22dcf27ab54a
- Claude Model: claude-sonnet-4-20250514
- Redis Queue TTL: 7 days for deduplication

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-13 19:22:14 +09:00

218 lines
7.6 KiB
Python

"""
Article Generation Module
Claude API를 사용한 기사 생성 로직
"""
from typing import Dict, Any, List, Optional
from datetime import datetime
import json
import uuid
import logging
from anthropic import AsyncAnthropic
from pydantic import BaseModel, Field
logger = logging.getLogger(__name__)
# Data Models
class NewsSource(BaseModel):
"""뉴스 소스 정보"""
title: str
url: str
published_date: Optional[str] = None
source_site: str = "Unknown"
class EventInfo(BaseModel):
"""이벤트 정보"""
name: str
date: Optional[str] = None
location: Optional[str] = None
class Entities(BaseModel):
"""추출된 엔티티"""
people: List[str] = Field(default_factory=list)
organizations: List[str] = Field(default_factory=list)
groups: List[str] = Field(default_factory=list)
countries: List[str] = Field(default_factory=list)
events: List[EventInfo] = Field(default_factory=list)
keywords: List[str] = Field(default_factory=list)
class SubTopic(BaseModel):
"""기사 소주제"""
title: str
content: List[str]
class GeneratedArticle(BaseModel):
"""생성된 기사"""
news_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
title: str
summary: str
subtopics: List[SubTopic]
categories: List[str]
entities: Entities
sources: List[NewsSource] = Field(default_factory=list)
created_at: datetime = Field(default_factory=datetime.now)
generation_metadata: Dict[str, Any] = Field(default_factory=dict)
async def generate_article_with_claude(
news_data: Dict[str, Any],
style: str = "professional",
claude_api_key: str = None
) -> GeneratedArticle:
"""Claude API를 사용하여 기사 생성"""
if not claude_api_key:
import os
claude_api_key = os.getenv("CLAUDE_API_KEY")
# Initialize Claude client
claude_client = AsyncAnthropic(api_key=claude_api_key)
# Collect source information
sources_info = []
# Prepare the prompt
system_prompt = """당신은 전문적인 한국 언론사의 수석 기자입니다.
제공된 데이터를 기반으로 깊이 있고 통찰력 있는 기사를 작성해야 합니다.
기사는 다음 요구사항을 충족해야 합니다:
1. 소주제는 최소 2개, 최대 6개로 구성해야 합니다
2. 각 소주제는 최소 1개, 최대 10개의 문단으로 구성해야 합니다
3. 전문적이고 객관적인 어조를 유지해야 합니다
4. 사실에 기반한 분석과 통찰을 제공해야 합니다
5. 한국 독자를 대상으로 작성되어야 합니다
6. 이벤트 정보는 가능한 일시와 장소를 포함해야 합니다
7. 핵심 키워드를 최대 10개까지 추출해야 합니다
반드시 다음 JSON 형식으로 응답하세요:
{
"title": "기사 제목",
"summary": "한 줄 요약 (100자 이내)",
"subtopics": [
{
"title": "소주제 제목",
"content": ["문단1", "문단2", ...] // 1-10개 문단
}
], // 2-6개 소주제
"categories": ["카테고리1", "카테고리2"],
"entities": {
"people": ["인물1", "인물2"],
"organizations": ["기관1", "기관2"],
"groups": ["단체1", "단체2"],
"countries": ["나라1", "나라2"],
"events": [
{
"name": "이벤트명",
"date": "2025년 1월 15일", // 선택사항
"location": "서울 코엑스" // 선택사항
}
],
"keywords": ["키워드1", "키워드2", ...] // 최대 10개
}
}"""
# Prepare news content for Claude and collect sources
news_content = []
for item in news_data.get("news_items", []):
# Add RSS source info
rss_title = item.get('rss_title', '')
rss_link = item.get('rss_link', '')
rss_published = item.get('rss_published', '')
if rss_title and rss_link:
sources_info.append(NewsSource(
title=rss_title,
url=rss_link,
published_date=rss_published,
source_site="RSS Feed"
))
item_text = f"제목: {rss_title}\n"
for result in item.get("google_results", []):
# Add Google search result sources
if "title" in result and "link" in result:
sources_info.append(NewsSource(
title=result.get('title', ''),
url=result.get('link', ''),
published_date=None,
source_site="Google Search"
))
if "full_content" in result and result["full_content"]:
content = result["full_content"]
if isinstance(content, dict):
item_text += f"출처: {content.get('url', '')}\n"
item_text += f"내용: {content.get('content', '')[:1000]}...\n\n"
else:
item_text += f"내용: {str(content)[:1000]}...\n\n"
news_content.append(item_text)
combined_content = "\n".join(news_content[:10]) # Limit to prevent token overflow
user_prompt = f"""다음 뉴스 데이터를 기반으로 종합적인 기사를 작성하세요:
키워드: {news_data.get('keyword', '')}
수집된 뉴스 수: {len(news_data.get('news_items', []))}
뉴스 내용:
{combined_content}
스타일: {style}
- professional: 전통적인 뉴스 기사 스타일
- analytical: 분석적이고 심층적인 스타일
- investigative: 탐사보도 스타일
위의 데이터를 종합하여 통찰력 있는 기사를 JSON 형식으로 작성해주세요."""
try:
# Call Claude API
response = await claude_client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=4000,
temperature=0.7,
system=system_prompt,
messages=[
{"role": "user", "content": user_prompt}
]
)
# Parse response
content = response.content[0].text
# Extract JSON from response
json_start = content.find('{')
json_end = content.rfind('}') + 1
if json_start != -1 and json_end > json_start:
json_str = content[json_start:json_end]
article_data = json.loads(json_str)
else:
raise ValueError("No valid JSON found in response")
# Create article object
article = GeneratedArticle(
title=article_data.get("title", ""),
summary=article_data.get("summary", ""),
subtopics=[
SubTopic(
title=st.get("title", ""),
content=st.get("content", [])
) for st in article_data.get("subtopics", [])
],
categories=article_data.get("categories", []),
entities=Entities(**article_data.get("entities", {})),
sources=sources_info,
generation_metadata={
"style": style,
"keyword": news_data.get('keyword', ''),
"model": "claude-3-5-sonnet-20241022",
"timestamp": datetime.now().isoformat()
}
)
logger.info(f"Successfully generated article: {article.title}")
return article
except json.JSONDecodeError as e:
logger.error(f"Failed to parse Claude response as JSON: {e}")
raise
except Exception as e:
logger.error(f"Error generating article with Claude: {e}")
raise