## 🔧 Model Synchronization Updated Pydantic models to match actual article structure in MongoDB ### Changes - **Article Model**: Complete restructure to match MongoDB documents - Added Subtopic, Reference, Entities nested models - Changed created_at to Union[str, datetime] with serializer - Added all pipeline metadata fields (job_id, keyword_id, etc.) - Added translation & image fields - Changed category (single) to categories (array) - **ArticleSummary Model**: Updated for list responses - Synced with actual MongoDB structure - Added news_id, categories array, images array - **ArticleService**: Fixed category filtering - Changed "category" to "categories" (array field) - Updated search to include subtopics and source_keyword - Implemented MongoDB aggregation for category list ### Verified Fields ✅ news_id, title, summary, created_at, language ✅ subtopics (array of {title, content[]}) ✅ categories (array), entities (nested object) ✅ references (array), source_keyword, source_count ✅ pipeline_stages, job_id, keyword_id, processing_time ✅ images (array), image_prompt, translated_languages ### Testing - Validated with actual English articles (20,966 total) - Search functionality working (15,298 AI-related articles) - Categories endpoint returning 1000+ unique categories - All datetime fields properly serialized to ISO format 🤖 Generated with [Claude Code](https://claude.ai/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
106 lines
2.9 KiB
Python
106 lines
2.9 KiB
Python
from pydantic import BaseModel, Field, field_serializer
|
|
from typing import Optional, List, Dict, Any, Union
|
|
from datetime import datetime
|
|
|
|
class Subtopic(BaseModel):
|
|
title: str
|
|
content: List[str]
|
|
|
|
class Reference(BaseModel):
|
|
title: str
|
|
link: str
|
|
source: str
|
|
published: Optional[str] = None
|
|
|
|
class Entities(BaseModel):
|
|
people: List[str] = []
|
|
organizations: List[str] = []
|
|
groups: List[str] = []
|
|
countries: List[str] = []
|
|
events: List[str] = []
|
|
|
|
class Article(BaseModel):
|
|
id: str = Field(alias="_id")
|
|
news_id: str
|
|
title: str
|
|
summary: Optional[str] = None
|
|
created_at: Union[str, datetime]
|
|
language: str
|
|
|
|
@field_serializer('created_at')
|
|
def serialize_created_at(self, value: Union[str, datetime], _info):
|
|
if isinstance(value, datetime):
|
|
return value.isoformat()
|
|
return value
|
|
|
|
# Content fields
|
|
subtopics: List[Subtopic] = []
|
|
categories: List[str] = []
|
|
entities: Optional[Entities] = None
|
|
|
|
# Source information
|
|
source_keyword: Optional[str] = None
|
|
source_count: Optional[int] = None
|
|
references: List[Reference] = []
|
|
|
|
# Pipeline metadata
|
|
job_id: Optional[str] = None
|
|
keyword_id: Optional[str] = None
|
|
pipeline_stages: List[str] = []
|
|
processing_time: Optional[float] = None
|
|
|
|
# Translation & Image
|
|
ref_news_id: Optional[str] = None
|
|
rss_guid: Optional[str] = None
|
|
image_prompt: Optional[str] = None
|
|
images: List[str] = []
|
|
translated_languages: List[str] = []
|
|
|
|
class Config:
|
|
populate_by_name = True
|
|
json_schema_extra = {
|
|
"example": {
|
|
"_id": "507f1f77bcf86cd799439011",
|
|
"news_id": "uuid-string",
|
|
"title": "Sample News Article",
|
|
"summary": "A brief summary",
|
|
"language": "en",
|
|
"created_at": "2024-01-01T00:00:00Z",
|
|
"subtopics": [
|
|
{
|
|
"title": "Main Topic",
|
|
"content": ["Content paragraph 1", "Content paragraph 2"]
|
|
}
|
|
],
|
|
"categories": ["technology", "business"],
|
|
"images": ["http://image-url.com/image.png"]
|
|
}
|
|
}
|
|
|
|
class ArticleList(BaseModel):
|
|
total: int
|
|
page: int
|
|
page_size: int
|
|
total_pages: int
|
|
articles: List[Article]
|
|
|
|
class ArticleSummary(BaseModel):
|
|
id: str = Field(alias="_id")
|
|
news_id: str
|
|
title: str
|
|
summary: Optional[str] = None
|
|
language: str
|
|
categories: List[str] = []
|
|
images: List[str] = []
|
|
created_at: Union[str, datetime]
|
|
source_keyword: Optional[str] = None
|
|
|
|
@field_serializer('created_at')
|
|
def serialize_created_at(self, value: Union[str, datetime], _info):
|
|
if isinstance(value, datetime):
|
|
return value.isoformat()
|
|
return value
|
|
|
|
class Config:
|
|
populate_by_name = True
|