feat: Implement automated keyword-based news pipeline scheduler

- Add multi-threaded keyword scheduler for periodic news collection
- Create Keyword Manager API for CRUD operations and monitoring
- Implement automatic pipeline triggering (RSS → Google → AI → Translation)
- Add thread status monitoring and dynamic keyword management
- Support priority-based execution and configurable intervals
- Add comprehensive scheduler documentation guide
- Default keywords: AI, 테크놀로지, 경제, 블록체인

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
jungwoo choi
2025-09-15 17:09:22 +09:00
parent 070032006e
commit eeaa9dcb4b
39 changed files with 3472 additions and 759 deletions

View File

@ -48,46 +48,45 @@ class GoogleSearchWorker:
await asyncio.sleep(1)
async def process_job(self, job: PipelineJob):
"""검색 강화 작업 처리"""
"""검색 강화 작업 처리 - 단일 RSS 아이템"""
try:
logger.info(f"Processing job {job.job_id} for search enrichment")
rss_items = job.data.get('rss_items', [])
enriched_items = []
# 최대 5개 항목만 처리 (API 할당량 관리)
for item_data in rss_items[:5]:
rss_item = RSSItem(**item_data)
# 제목으로 Google 검색
search_results = await self._search_google(rss_item.title)
enriched_item = EnrichedItem(
rss_item=rss_item,
search_results=search_results
)
enriched_items.append(enriched_item)
# API 속도 제한
await asyncio.sleep(0.5)
if enriched_items:
logger.info(f"Enriched {len(enriched_items)} items with search results")
# 다음 단계로 전달
job.data['enriched_items'] = [item.dict() for item in enriched_items]
job.stages_completed.append('search_enrichment')
job.stage = 'ai_summarization'
await self.queue_manager.enqueue('ai_summarization', job)
await self.queue_manager.mark_completed('search_enrichment', job.job_id)
else:
logger.warning(f"No items enriched for job {job.job_id}")
await self.queue_manager.mark_failed(
'search_enrichment',
job,
"No items to enrich"
)
# 단일 RSS 아이템 처리
rss_item_data = job.data.get('rss_item')
if not rss_item_data:
# 이전 버전 호환성 - 여러 아이템 처리
rss_items = job.data.get('rss_items', [])
if rss_items:
rss_item_data = rss_items[0] # 첫 번째 아이템만 처리
else:
logger.warning(f"No RSS item in job {job.job_id}")
await self.queue_manager.mark_failed(
'search_enrichment',
job,
"No RSS item to process"
)
return
rss_item = RSSItem(**rss_item_data)
# 제목으로 Google 검색
search_results = await self._search_google(rss_item.title)
enriched_item = EnrichedItem(
rss_item=rss_item,
search_results=search_results
)
logger.info(f"Enriched item with {len(search_results)} search results")
# 다음 단계로 전달 - 단일 enriched item
job.data['enriched_item'] = enriched_item.dict()
job.stages_completed.append('search_enrichment')
job.stage = 'ai_article_generation'
await self.queue_manager.enqueue('ai_article_generation', job)
await self.queue_manager.mark_completed('search_enrichment', job.job_id)
except Exception as e:
logger.error(f"Error processing job {job.job_id}: {e}")