feat: Implement automated keyword-based news pipeline scheduler

- Add multi-threaded keyword scheduler for periodic news collection
- Create Keyword Manager API for CRUD operations and monitoring
- Implement automatic pipeline triggering (RSS → Google → AI → Translation)
- Add thread status monitoring and dynamic keyword management
- Support priority-based execution and configurable intervals
- Add comprehensive scheduler documentation guide
- Default keywords: AI, 테크놀로지, 경제, 블록체인

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
jungwoo choi
2025-09-15 17:09:22 +09:00
parent 070032006e
commit eeaa9dcb4b
39 changed files with 3472 additions and 759 deletions

View File

@ -40,6 +40,7 @@ class RSSItem(BaseModel):
item_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
title: str
link: str
guid: Optional[str] = None # RSS GUID for deduplication
published: Optional[str] = None
summary: Optional[str] = None
source_feed: str
@ -74,22 +75,54 @@ class ItemWithImage(BaseModel):
image_url: str
image_prompt: str
class FinalArticle(BaseModel):
"""최종 기사"""
article_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
job_id: str
keyword_id: str
keyword: str
class Subtopic(BaseModel):
"""기사 소주제"""
title: str
content: str
content: List[str] # 문단별 내용
class Entities(BaseModel):
"""개체명"""
people: List[str] = Field(default_factory=list)
organizations: List[str] = Field(default_factory=list)
groups: List[str] = Field(default_factory=list)
countries: List[str] = Field(default_factory=list)
events: List[str] = Field(default_factory=list)
class NewsReference(BaseModel):
"""뉴스 레퍼런스"""
title: str
link: str
source: str
published: Optional[str] = None
class FinalArticle(BaseModel):
"""최종 기사 - ai_writer_db.articles 스키마와 일치"""
news_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
title: str
created_at: str = Field(default_factory=lambda: datetime.now().isoformat())
summary: str
source_items: List[ItemWithImage]
images: List[str]
subtopics: List[Subtopic] = Field(default_factory=list)
categories: List[str] = Field(default_factory=list)
tags: List[str] = Field(default_factory=list)
created_at: datetime = Field(default_factory=datetime.now)
pipeline_stages: List[str]
processing_time: float # seconds
entities: Entities = Field(default_factory=Entities)
source_keyword: str
source_count: int = 1
# 레퍼런스 뉴스 정보
references: List[NewsReference] = Field(default_factory=list)
# 파이프라인 관련 추가 필드
job_id: Optional[str] = None
keyword_id: Optional[str] = None
pipeline_stages: List[str] = Field(default_factory=list)
processing_time: Optional[float] = None
# 다국어 지원
language: str = 'ko'
ref_news_id: Optional[str] = None
# RSS 중복 체크용 GUID
rss_guid: Optional[str] = None
# 이미지 관련 필드
image_prompt: Optional[str] = None
images: List[str] = Field(default_factory=list)
# 번역 추적
translated_languages: List[str] = Field(default_factory=list)
class TranslatedItem(BaseModel):
"""번역된 아이템"""
@ -110,4 +143,17 @@ class QueueMessage(BaseModel):
queue_name: str
job: PipelineJob
timestamp: datetime = Field(default_factory=datetime.now)
retry_count: int = 0
retry_count: int = 0
class Keyword(BaseModel):
"""스케줄러용 키워드 모델"""
keyword_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
keyword: str
interval_minutes: int = Field(default=60) # 기본 1시간
is_active: bool = Field(default=True)
last_run: Optional[datetime] = None
next_run: Optional[datetime] = None
created_at: datetime = Field(default_factory=datetime.now)
updated_at: datetime = Field(default_factory=datetime.now)
rss_feeds: List[str] = Field(default_factory=list) # 커스텀 RSS 피드
priority: int = Field(default=0) # 우선순위 (높을수록 우선)
max_articles_per_run: int = Field(default=100) # 실행당 최대 기사 수

View File

@ -16,13 +16,13 @@ class QueueManager:
"""Redis 기반 큐 매니저"""
QUEUES = {
"keyword_processing": "queue:keyword",
"rss_collection": "queue:rss",
"search_enrichment": "queue:search",
"ai_summarization": "queue:summarize",
"translation": "queue:translate",
"image_generation": "queue:image",
"article_assembly": "queue:assembly",
"keyword_processing": "queue:keyword_processing",
"rss_collection": "queue:rss_collection",
"search_enrichment": "queue:search_enrichment",
"google_search": "queue:google_search",
"ai_article_generation": "queue:ai_article_generation",
"image_generation": "queue:image_generation",
"translation": "queue:translation",
"failed": "queue:failed",
"scheduled": "queue:scheduled"
}
@ -77,12 +77,15 @@ class QueueManager:
"""큐에서 작업 가져오기"""
try:
queue_key = self.QUEUES.get(queue_name, f"queue:{queue_name}")
logger.info(f"Attempting to dequeue from {queue_key} with timeout={timeout}")
if timeout > 0:
result = await self.redis_client.blpop(queue_key, timeout=timeout)
result = await self.redis_client.blpop(queue_key, timeout)
if result:
_, data = result
logger.info(f"Dequeued item from {queue_key}")
else:
logger.debug(f"No item available in {queue_key}")
return None
else:
data = await self.redis_client.lpop(queue_key)