Major architectural transformation from synchronous to asynchronous processing: ## Pipeline Services (8 microservices) - pipeline-scheduler: APScheduler for 30-minute periodic job triggers - pipeline-rss-collector: RSS feed collection with deduplication (7-day TTL) - pipeline-google-search: Content enrichment via Google Search API - pipeline-ai-summarizer: AI summarization using Claude API (claude-sonnet-4-20250514) - pipeline-translator: Translation using DeepL Pro API - pipeline-image-generator: Image generation with Replicate API (Stable Diffusion) - pipeline-article-assembly: Final article assembly and MongoDB storage - pipeline-monitor: Real-time monitoring dashboard (port 8100) ## Key Features - Redis-based job queue with deduplication - Asynchronous processing with Python asyncio - Shared models and queue manager for inter-service communication - Docker containerization for all services - Container names standardized with site11_ prefix ## Removed Services - Moved to backup: google-search, rss-feed, news-aggregator, ai-writer ## Configuration - DeepL Pro API: 3abbc796-2515-44a8-972d-22dcf27ab54a - Claude Model: claude-sonnet-4-20250514 - Redis Queue TTL: 7 days for deduplication 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
74 lines
2.1 KiB
Python
74 lines
2.1 KiB
Python
from pydantic import BaseModel, Field, HttpUrl
|
|
from typing import Optional, List, Dict, Any
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
|
|
class FeedStatus(str, Enum):
|
|
ACTIVE = "active"
|
|
INACTIVE = "inactive"
|
|
ERROR = "error"
|
|
|
|
class FeedCategory(str, Enum):
|
|
NEWS = "news"
|
|
TECH = "tech"
|
|
BUSINESS = "business"
|
|
SCIENCE = "science"
|
|
HEALTH = "health"
|
|
SPORTS = "sports"
|
|
ENTERTAINMENT = "entertainment"
|
|
LIFESTYLE = "lifestyle"
|
|
POLITICS = "politics"
|
|
OTHER = "other"
|
|
|
|
class FeedSubscription(BaseModel):
|
|
id: Optional[str] = Field(None, alias="_id")
|
|
title: str
|
|
url: HttpUrl
|
|
description: Optional[str] = None
|
|
category: FeedCategory = FeedCategory.OTHER
|
|
status: FeedStatus = FeedStatus.ACTIVE
|
|
update_interval: int = 900 # seconds
|
|
last_fetch: Optional[datetime] = None
|
|
last_error: Optional[str] = None
|
|
error_count: int = 0
|
|
created_at: datetime = Field(default_factory=datetime.now)
|
|
updated_at: datetime = Field(default_factory=datetime.now)
|
|
metadata: Dict[str, Any] = {}
|
|
|
|
class FeedEntry(BaseModel):
|
|
id: Optional[str] = Field(None, alias="_id")
|
|
feed_id: str
|
|
entry_id: str # RSS entry unique ID
|
|
title: str
|
|
link: str
|
|
summary: Optional[str] = None
|
|
content: Optional[str] = None
|
|
author: Optional[str] = None
|
|
published: Optional[datetime] = None
|
|
updated: Optional[datetime] = None
|
|
categories: List[str] = []
|
|
thumbnail: Optional[str] = None
|
|
enclosures: List[Dict[str, Any]] = []
|
|
is_read: bool = False
|
|
is_starred: bool = False
|
|
created_at: datetime = Field(default_factory=datetime.now)
|
|
|
|
class CreateFeedRequest(BaseModel):
|
|
url: HttpUrl
|
|
title: Optional[str] = None
|
|
category: FeedCategory = FeedCategory.OTHER
|
|
update_interval: Optional[int] = 900
|
|
|
|
class UpdateFeedRequest(BaseModel):
|
|
title: Optional[str] = None
|
|
category: Optional[FeedCategory] = None
|
|
update_interval: Optional[int] = None
|
|
status: Optional[FeedStatus] = None
|
|
|
|
class FeedStatistics(BaseModel):
|
|
feed_id: str
|
|
total_entries: int
|
|
unread_entries: int
|
|
starred_entries: int
|
|
last_update: Optional[datetime]
|
|
error_rate: float |