Major architectural transformation from synchronous to asynchronous processing: ## Pipeline Services (8 microservices) - pipeline-scheduler: APScheduler for 30-minute periodic job triggers - pipeline-rss-collector: RSS feed collection with deduplication (7-day TTL) - pipeline-google-search: Content enrichment via Google Search API - pipeline-ai-summarizer: AI summarization using Claude API (claude-sonnet-4-20250514) - pipeline-translator: Translation using DeepL Pro API - pipeline-image-generator: Image generation with Replicate API (Stable Diffusion) - pipeline-article-assembly: Final article assembly and MongoDB storage - pipeline-monitor: Real-time monitoring dashboard (port 8100) ## Key Features - Redis-based job queue with deduplication - Asynchronous processing with Python asyncio - Shared models and queue manager for inter-service communication - Docker containerization for all services - Container names standardized with site11_ prefix ## Removed Services - Moved to backup: google-search, rss-feed, news-aggregator, ai-writer ## Configuration - DeepL Pro API: 3abbc796-2515-44a8-972d-22dcf27ab54a - Claude Model: claude-sonnet-4-20250514 - Redis Queue TTL: 7 days for deduplication 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
113 lines
3.4 KiB
Python
113 lines
3.4 KiB
Python
"""
|
|
Pipeline Data Models
|
|
파이프라인 전체에서 사용되는 공통 데이터 모델
|
|
"""
|
|
from datetime import datetime
|
|
from typing import List, Dict, Any, Optional
|
|
from pydantic import BaseModel, Field
|
|
import uuid
|
|
|
|
class KeywordSubscription(BaseModel):
|
|
"""키워드 구독 모델"""
|
|
keyword_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
|
keyword: str
|
|
language: str = "ko"
|
|
schedule: str = "0 */30 * * *" # Cron expression (30분마다)
|
|
is_active: bool = True
|
|
is_priority: bool = False
|
|
last_processed: Optional[datetime] = None
|
|
rss_feeds: List[str] = Field(default_factory=list)
|
|
categories: List[str] = Field(default_factory=list)
|
|
created_at: datetime = Field(default_factory=datetime.now)
|
|
owner: Optional[str] = None
|
|
|
|
class PipelineJob(BaseModel):
|
|
"""파이프라인 작업 모델"""
|
|
job_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
|
keyword_id: str
|
|
keyword: str
|
|
stage: str # current stage
|
|
stages_completed: List[str] = Field(default_factory=list)
|
|
data: Dict[str, Any] = Field(default_factory=dict)
|
|
retry_count: int = 0
|
|
max_retries: int = 3
|
|
priority: int = 0
|
|
created_at: datetime = Field(default_factory=datetime.now)
|
|
updated_at: datetime = Field(default_factory=datetime.now)
|
|
|
|
class RSSItem(BaseModel):
|
|
"""RSS 피드 아이템"""
|
|
item_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
|
title: str
|
|
link: str
|
|
published: Optional[str] = None
|
|
summary: Optional[str] = None
|
|
source_feed: str
|
|
|
|
class SearchResult(BaseModel):
|
|
"""검색 결과"""
|
|
title: str
|
|
link: str
|
|
snippet: Optional[str] = None
|
|
source: str = "google"
|
|
|
|
class EnrichedItem(BaseModel):
|
|
"""강화된 뉴스 아이템"""
|
|
rss_item: RSSItem
|
|
search_results: List[SearchResult] = Field(default_factory=list)
|
|
|
|
class SummarizedItem(BaseModel):
|
|
"""요약된 아이템"""
|
|
enriched_item: EnrichedItem
|
|
ai_summary: str
|
|
summary_language: str = "ko"
|
|
|
|
class TranslatedItem(BaseModel):
|
|
"""번역된 아이템"""
|
|
summarized_item: SummarizedItem
|
|
title_en: str
|
|
summary_en: str
|
|
|
|
class ItemWithImage(BaseModel):
|
|
"""이미지가 추가된 아이템"""
|
|
translated_item: TranslatedItem
|
|
image_url: str
|
|
image_prompt: str
|
|
|
|
class FinalArticle(BaseModel):
|
|
"""최종 기사"""
|
|
article_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
|
job_id: str
|
|
keyword_id: str
|
|
keyword: str
|
|
title: str
|
|
content: str
|
|
summary: str
|
|
source_items: List[ItemWithImage]
|
|
images: List[str]
|
|
categories: List[str] = Field(default_factory=list)
|
|
tags: List[str] = Field(default_factory=list)
|
|
created_at: datetime = Field(default_factory=datetime.now)
|
|
pipeline_stages: List[str]
|
|
processing_time: float # seconds
|
|
|
|
class TranslatedItem(BaseModel):
|
|
"""번역된 아이템"""
|
|
summarized_item: Dict[str, Any] # SummarizedItem as dict
|
|
translated_title: str
|
|
translated_summary: str
|
|
target_language: str = 'en'
|
|
|
|
class GeneratedImageItem(BaseModel):
|
|
"""이미지 생성된 아이템"""
|
|
translated_item: Dict[str, Any] # TranslatedItem as dict
|
|
image_url: str
|
|
image_prompt: str
|
|
|
|
class QueueMessage(BaseModel):
|
|
"""큐 메시지"""
|
|
message_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
|
queue_name: str
|
|
job: PipelineJob
|
|
timestamp: datetime = Field(default_factory=datetime.now)
|
|
retry_count: int = 0 |