Files
site11/services/pipeline/shared/models.py
jungwoo choi 070032006e feat: Implement async queue-based news pipeline with microservices
Major architectural transformation from synchronous to asynchronous processing:

## Pipeline Services (8 microservices)
- pipeline-scheduler: APScheduler for 30-minute periodic job triggers
- pipeline-rss-collector: RSS feed collection with deduplication (7-day TTL)
- pipeline-google-search: Content enrichment via Google Search API
- pipeline-ai-summarizer: AI summarization using Claude API (claude-sonnet-4-20250514)
- pipeline-translator: Translation using DeepL Pro API
- pipeline-image-generator: Image generation with Replicate API (Stable Diffusion)
- pipeline-article-assembly: Final article assembly and MongoDB storage
- pipeline-monitor: Real-time monitoring dashboard (port 8100)

## Key Features
- Redis-based job queue with deduplication
- Asynchronous processing with Python asyncio
- Shared models and queue manager for inter-service communication
- Docker containerization for all services
- Container names standardized with site11_ prefix

## Removed Services
- Moved to backup: google-search, rss-feed, news-aggregator, ai-writer

## Configuration
- DeepL Pro API: 3abbc796-2515-44a8-972d-22dcf27ab54a
- Claude Model: claude-sonnet-4-20250514
- Redis Queue TTL: 7 days for deduplication

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-13 19:22:14 +09:00

113 lines
3.4 KiB
Python

"""
Pipeline Data Models
파이프라인 전체에서 사용되는 공통 데이터 모델
"""
from datetime import datetime
from typing import List, Dict, Any, Optional
from pydantic import BaseModel, Field
import uuid
class KeywordSubscription(BaseModel):
"""키워드 구독 모델"""
keyword_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
keyword: str
language: str = "ko"
schedule: str = "0 */30 * * *" # Cron expression (30분마다)
is_active: bool = True
is_priority: bool = False
last_processed: Optional[datetime] = None
rss_feeds: List[str] = Field(default_factory=list)
categories: List[str] = Field(default_factory=list)
created_at: datetime = Field(default_factory=datetime.now)
owner: Optional[str] = None
class PipelineJob(BaseModel):
"""파이프라인 작업 모델"""
job_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
keyword_id: str
keyword: str
stage: str # current stage
stages_completed: List[str] = Field(default_factory=list)
data: Dict[str, Any] = Field(default_factory=dict)
retry_count: int = 0
max_retries: int = 3
priority: int = 0
created_at: datetime = Field(default_factory=datetime.now)
updated_at: datetime = Field(default_factory=datetime.now)
class RSSItem(BaseModel):
"""RSS 피드 아이템"""
item_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
title: str
link: str
published: Optional[str] = None
summary: Optional[str] = None
source_feed: str
class SearchResult(BaseModel):
"""검색 결과"""
title: str
link: str
snippet: Optional[str] = None
source: str = "google"
class EnrichedItem(BaseModel):
"""강화된 뉴스 아이템"""
rss_item: RSSItem
search_results: List[SearchResult] = Field(default_factory=list)
class SummarizedItem(BaseModel):
"""요약된 아이템"""
enriched_item: EnrichedItem
ai_summary: str
summary_language: str = "ko"
class TranslatedItem(BaseModel):
"""번역된 아이템"""
summarized_item: SummarizedItem
title_en: str
summary_en: str
class ItemWithImage(BaseModel):
"""이미지가 추가된 아이템"""
translated_item: TranslatedItem
image_url: str
image_prompt: str
class FinalArticle(BaseModel):
"""최종 기사"""
article_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
job_id: str
keyword_id: str
keyword: str
title: str
content: str
summary: str
source_items: List[ItemWithImage]
images: List[str]
categories: List[str] = Field(default_factory=list)
tags: List[str] = Field(default_factory=list)
created_at: datetime = Field(default_factory=datetime.now)
pipeline_stages: List[str]
processing_time: float # seconds
class TranslatedItem(BaseModel):
"""번역된 아이템"""
summarized_item: Dict[str, Any] # SummarizedItem as dict
translated_title: str
translated_summary: str
target_language: str = 'en'
class GeneratedImageItem(BaseModel):
"""이미지 생성된 아이템"""
translated_item: Dict[str, Any] # TranslatedItem as dict
image_url: str
image_prompt: str
class QueueMessage(BaseModel):
"""큐 메시지"""
message_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
queue_name: str
job: PipelineJob
timestamp: datetime = Field(default_factory=datetime.now)
retry_count: int = 0