from datetime import datetime from typing import Optional, Dict, Any from pydantic import BaseModel, Field from bson import ObjectId class PyObjectId(ObjectId): """Custom ObjectId type for Pydantic""" @classmethod def __get_validators__(cls): yield cls.validate @classmethod def validate(cls, v): if not ObjectId.is_valid(v): raise ValueError("Invalid ObjectId") return ObjectId(v) @classmethod def __get_pydantic_json_schema__(cls, field_schema): field_schema.update(type="string") class PipelineStats(BaseModel): """Pipeline statistics""" total_processed: int = Field(default=0) success_count: int = Field(default=0) error_count: int = Field(default=0) last_run: Optional[datetime] = None average_duration_seconds: Optional[float] = None class Pipeline(BaseModel): """Pipeline data model for process management""" id: Optional[PyObjectId] = Field(default=None, alias="_id") name: str = Field(..., min_length=1, max_length=100) type: str = Field(..., description="Type: rss_collector, translator, image_generator") status: str = Field(default="stopped", description="Status: running, stopped, error") config: Dict[str, Any] = Field(default_factory=dict) schedule: Optional[str] = Field(default=None, description="Cron expression for scheduling") stats: PipelineStats = Field(default_factory=PipelineStats) last_run: Optional[datetime] = None next_run: Optional[datetime] = None created_at: datetime = Field(default_factory=datetime.utcnow) updated_at: datetime = Field(default_factory=datetime.utcnow) class Config: populate_by_name = True arbitrary_types_allowed = True json_encoders = {ObjectId: str} json_schema_extra = { "example": { "name": "RSS Collector - Politics", "type": "rss_collector", "status": "running", "config": { "interval_minutes": 30, "max_articles": 100, "categories": ["politics"] }, "schedule": "*/30 * * * *", "stats": { "total_processed": 1523, "success_count": 1500, "error_count": 23, "average_duration_seconds": 45.2 } } }