- Add multi-threaded keyword scheduler for periodic news collection - Create Keyword Manager API for CRUD operations and monitoring - Implement automatic pipeline triggering (RSS → Google → AI → Translation) - Add thread status monitoring and dynamic keyword management - Support priority-based execution and configurable intervals - Add comprehensive scheduler documentation guide - Default keywords: AI, 테크놀로지, 경제, 블록체인 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
152 lines
5.2 KiB
Python
152 lines
5.2 KiB
Python
"""
|
|
Google Search Service
|
|
Google 검색으로 RSS 항목 강화
|
|
"""
|
|
import asyncio
|
|
import logging
|
|
import os
|
|
import sys
|
|
import json
|
|
from typing import List, Dict, Any
|
|
import aiohttp
|
|
from datetime import datetime
|
|
|
|
# Import from shared module
|
|
from shared.models import PipelineJob, RSSItem, SearchResult, EnrichedItem
|
|
from shared.queue_manager import QueueManager
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class GoogleSearchWorker:
|
|
def __init__(self):
|
|
self.queue_manager = QueueManager(
|
|
redis_url=os.getenv("REDIS_URL", "redis://redis:6379")
|
|
)
|
|
self.google_api_key = os.getenv("GOOGLE_API_KEY")
|
|
self.search_engine_id = os.getenv("GOOGLE_SEARCH_ENGINE_ID")
|
|
self.max_results_per_item = 3
|
|
|
|
async def start(self):
|
|
"""워커 시작"""
|
|
logger.info("Starting Google Search Worker")
|
|
|
|
# Redis 연결
|
|
await self.queue_manager.connect()
|
|
|
|
# 메인 처리 루프
|
|
while True:
|
|
try:
|
|
# 큐에서 작업 가져오기
|
|
job = await self.queue_manager.dequeue('search_enrichment', timeout=5)
|
|
|
|
if job:
|
|
await self.process_job(job)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in worker loop: {e}")
|
|
await asyncio.sleep(1)
|
|
|
|
async def process_job(self, job: PipelineJob):
|
|
"""검색 강화 작업 처리 - 단일 RSS 아이템"""
|
|
try:
|
|
logger.info(f"Processing job {job.job_id} for search enrichment")
|
|
|
|
# 단일 RSS 아이템 처리
|
|
rss_item_data = job.data.get('rss_item')
|
|
if not rss_item_data:
|
|
# 이전 버전 호환성 - 여러 아이템 처리
|
|
rss_items = job.data.get('rss_items', [])
|
|
if rss_items:
|
|
rss_item_data = rss_items[0] # 첫 번째 아이템만 처리
|
|
else:
|
|
logger.warning(f"No RSS item in job {job.job_id}")
|
|
await self.queue_manager.mark_failed(
|
|
'search_enrichment',
|
|
job,
|
|
"No RSS item to process"
|
|
)
|
|
return
|
|
|
|
rss_item = RSSItem(**rss_item_data)
|
|
|
|
# 제목으로 Google 검색
|
|
search_results = await self._search_google(rss_item.title)
|
|
|
|
enriched_item = EnrichedItem(
|
|
rss_item=rss_item,
|
|
search_results=search_results
|
|
)
|
|
|
|
logger.info(f"Enriched item with {len(search_results)} search results")
|
|
|
|
# 다음 단계로 전달 - 단일 enriched item
|
|
job.data['enriched_item'] = enriched_item.dict()
|
|
job.stages_completed.append('search_enrichment')
|
|
job.stage = 'ai_article_generation'
|
|
|
|
await self.queue_manager.enqueue('ai_article_generation', job)
|
|
await self.queue_manager.mark_completed('search_enrichment', job.job_id)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing job {job.job_id}: {e}")
|
|
await self.queue_manager.mark_failed('search_enrichment', job, str(e))
|
|
|
|
async def _search_google(self, query: str) -> List[SearchResult]:
|
|
"""Google Custom Search API 호출"""
|
|
results = []
|
|
|
|
if not self.google_api_key or not self.search_engine_id:
|
|
logger.warning("Google API credentials not configured")
|
|
return results
|
|
|
|
try:
|
|
url = "https://www.googleapis.com/customsearch/v1"
|
|
params = {
|
|
"key": self.google_api_key,
|
|
"cx": self.search_engine_id,
|
|
"q": query,
|
|
"num": self.max_results_per_item,
|
|
"hl": "ko",
|
|
"gl": "kr"
|
|
}
|
|
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(url, params=params, timeout=30) as response:
|
|
if response.status == 200:
|
|
data = await response.json()
|
|
|
|
for item in data.get('items', []):
|
|
result = SearchResult(
|
|
title=item.get('title', ''),
|
|
link=item.get('link', ''),
|
|
snippet=item.get('snippet', ''),
|
|
source='google'
|
|
)
|
|
results.append(result)
|
|
else:
|
|
logger.error(f"Google API error: {response.status}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error searching Google for '{query}': {e}")
|
|
|
|
return results
|
|
|
|
async def stop(self):
|
|
"""워커 중지"""
|
|
await self.queue_manager.disconnect()
|
|
logger.info("Google Search Worker stopped")
|
|
|
|
async def main():
|
|
"""메인 함수"""
|
|
worker = GoogleSearchWorker()
|
|
|
|
try:
|
|
await worker.start()
|
|
except KeyboardInterrupt:
|
|
logger.info("Received interrupt signal")
|
|
finally:
|
|
await worker.stop()
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main()) |