""" Pipeline Data Models 파이프라인 전체에서 사용되는 공통 데이터 모델 """ from datetime import datetime from typing import List, Dict, Any, Optional from pydantic import BaseModel, Field import uuid class KeywordSubscription(BaseModel): """키워드 구독 모델""" keyword_id: str = Field(default_factory=lambda: str(uuid.uuid4())) keyword: str language: str = "ko" schedule: str = "0 */30 * * *" # Cron expression (30분마다) is_active: bool = True is_priority: bool = False last_processed: Optional[datetime] = None rss_feeds: List[str] = Field(default_factory=list) categories: List[str] = Field(default_factory=list) created_at: datetime = Field(default_factory=datetime.now) owner: Optional[str] = None class PipelineJob(BaseModel): """파이프라인 작업 모델""" job_id: str = Field(default_factory=lambda: str(uuid.uuid4())) keyword_id: str keyword: str stage: str # current stage stages_completed: List[str] = Field(default_factory=list) data: Dict[str, Any] = Field(default_factory=dict) retry_count: int = 0 max_retries: int = 3 priority: int = 0 created_at: datetime = Field(default_factory=datetime.now) updated_at: datetime = Field(default_factory=datetime.now) class RSSItem(BaseModel): """RSS 피드 아이템""" item_id: str = Field(default_factory=lambda: str(uuid.uuid4())) title: str link: str guid: Optional[str] = None # RSS GUID for deduplication published: Optional[str] = None summary: Optional[str] = None source_feed: str class SearchResult(BaseModel): """검색 결과""" title: str link: str snippet: Optional[str] = None source: str = "google" class EnrichedItem(BaseModel): """강화된 뉴스 아이템""" rss_item: RSSItem search_results: List[SearchResult] = Field(default_factory=list) class SummarizedItem(BaseModel): """요약된 아이템""" enriched_item: EnrichedItem ai_summary: str summary_language: str = "ko" class TranslatedItem(BaseModel): """번역된 아이템""" summarized_item: SummarizedItem title_en: str summary_en: str class ItemWithImage(BaseModel): """이미지가 추가된 아이템""" translated_item: TranslatedItem image_url: str image_prompt: str class Subtopic(BaseModel): """기사 소주제""" title: str content: List[str] # 문단별 내용 class Entities(BaseModel): """개체명""" people: List[str] = Field(default_factory=list) organizations: List[str] = Field(default_factory=list) groups: List[str] = Field(default_factory=list) countries: List[str] = Field(default_factory=list) events: List[str] = Field(default_factory=list) class NewsReference(BaseModel): """뉴스 레퍼런스""" title: str link: str source: str published: Optional[str] = None class FinalArticle(BaseModel): """최종 기사 - ai_writer_db.articles 스키마와 일치""" news_id: str = Field(default_factory=lambda: str(uuid.uuid4())) title: str created_at: str = Field(default_factory=lambda: datetime.now().isoformat()) summary: str subtopics: List[Subtopic] = Field(default_factory=list) categories: List[str] = Field(default_factory=list) entities: Entities = Field(default_factory=Entities) source_keyword: str source_count: int = 1 # 레퍼런스 뉴스 정보 references: List[NewsReference] = Field(default_factory=list) # 파이프라인 관련 추가 필드 job_id: Optional[str] = None keyword_id: Optional[str] = None pipeline_stages: List[str] = Field(default_factory=list) processing_time: Optional[float] = None # 다국어 지원 language: str = 'ko' ref_news_id: Optional[str] = None # RSS 중복 체크용 GUID rss_guid: Optional[str] = None # 이미지 관련 필드 image_prompt: Optional[str] = None images: List[str] = Field(default_factory=list) # 번역 추적 translated_languages: List[str] = Field(default_factory=list) class TranslatedItem(BaseModel): """번역된 아이템""" summarized_item: Dict[str, Any] # SummarizedItem as dict translated_title: str translated_summary: str target_language: str = 'en' class GeneratedImageItem(BaseModel): """이미지 생성된 아이템""" translated_item: Dict[str, Any] # TranslatedItem as dict image_url: str image_prompt: str class QueueMessage(BaseModel): """큐 메시지""" message_id: str = Field(default_factory=lambda: str(uuid.uuid4())) queue_name: str job: PipelineJob timestamp: datetime = Field(default_factory=datetime.now) retry_count: int = 0 class Keyword(BaseModel): """스케줄러용 키워드 모델""" keyword_id: str = Field(default_factory=lambda: str(uuid.uuid4())) keyword: str interval_minutes: int = Field(default=60) # 기본 1시간 is_active: bool = Field(default=True) last_run: Optional[datetime] = None next_run: Optional[datetime] = None created_at: datetime = Field(default_factory=datetime.now) updated_at: datetime = Field(default_factory=datetime.now) rss_feeds: List[str] = Field(default_factory=list) # 커스텀 RSS 피드 priority: int = Field(default=0) # 우선순위 (높을수록 우선) max_articles_per_run: int = Field(default=100) # 실행당 최대 기사 수