from pydantic import BaseModel, Field, field_serializer from typing import Optional, List, Dict, Any, Union from datetime import datetime class Subtopic(BaseModel): title: str content: List[str] class Reference(BaseModel): title: str link: str source: str published: Optional[str] = None class Entities(BaseModel): people: List[str] = [] organizations: List[str] = [] groups: List[str] = [] countries: List[str] = [] events: List[str] = [] class Article(BaseModel): id: str = Field(alias="_id") news_id: str title: str summary: Optional[str] = None created_at: Union[str, datetime] language: str @field_serializer('created_at') def serialize_created_at(self, value: Union[str, datetime], _info): if isinstance(value, datetime): return value.isoformat() return value # Content fields subtopics: List[Subtopic] = [] categories: List[str] = [] entities: Optional[Entities] = None # Source information source_keyword: Optional[str] = None source_count: Optional[int] = None references: List[Reference] = [] # Pipeline metadata job_id: Optional[str] = None keyword_id: Optional[str] = None pipeline_stages: List[str] = [] processing_time: Optional[float] = None # Translation & Image ref_news_id: Optional[str] = None rss_guid: Optional[str] = None image_prompt: Optional[str] = None images: List[str] = [] translated_languages: List[str] = [] class Config: populate_by_name = True json_schema_extra = { "example": { "_id": "507f1f77bcf86cd799439011", "news_id": "uuid-string", "title": "Sample News Article", "summary": "A brief summary", "language": "en", "created_at": "2024-01-01T00:00:00Z", "subtopics": [ { "title": "Main Topic", "content": ["Content paragraph 1", "Content paragraph 2"] } ], "categories": ["technology", "business"], "images": ["http://image-url.com/image.png"] } } class ArticleList(BaseModel): total: int page: int page_size: int total_pages: int articles: List[Article] class ArticleSummary(BaseModel): id: str = Field(alias="_id") news_id: str title: str summary: Optional[str] = None language: str categories: List[str] = [] images: List[str] = [] created_at: Union[str, datetime] source_keyword: Optional[str] = None @field_serializer('created_at') def serialize_created_at(self, value: Union[str, datetime], _info): if isinstance(value, datetime): return value.isoformat() return value class Config: populate_by_name = True