""" Google Search Service Google 검색으로 RSS 항목 강화 """ import asyncio import logging import os import sys import json from typing import List, Dict, Any import aiohttp from datetime import datetime # Import from shared module from shared.models import PipelineJob, RSSItem, SearchResult, EnrichedItem from shared.queue_manager import QueueManager logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class GoogleSearchWorker: def __init__(self): self.queue_manager = QueueManager( redis_url=os.getenv("REDIS_URL", "redis://redis:6379") ) self.google_api_key = os.getenv("GOOGLE_API_KEY") self.search_engine_id = os.getenv("GOOGLE_SEARCH_ENGINE_ID") self.max_results_per_item = 3 async def start(self): """워커 시작""" logger.info("Starting Google Search Worker") # Redis 연결 await self.queue_manager.connect() # 메인 처리 루프 while True: try: # 큐에서 작업 가져오기 job = await self.queue_manager.dequeue('search_enrichment', timeout=5) if job: await self.process_job(job) except Exception as e: logger.error(f"Error in worker loop: {e}") await asyncio.sleep(1) async def process_job(self, job: PipelineJob): """검색 강화 작업 처리 - 단일 RSS 아이템""" try: logger.info(f"Processing job {job.job_id} for search enrichment") # 단일 RSS 아이템 처리 rss_item_data = job.data.get('rss_item') if not rss_item_data: # 이전 버전 호환성 - 여러 아이템 처리 rss_items = job.data.get('rss_items', []) if rss_items: rss_item_data = rss_items[0] # 첫 번째 아이템만 처리 else: logger.warning(f"No RSS item in job {job.job_id}") await self.queue_manager.mark_failed( 'search_enrichment', job, "No RSS item to process" ) return rss_item = RSSItem(**rss_item_data) # 제목으로 Google 검색 search_results = await self._search_google(rss_item.title) enriched_item = EnrichedItem( rss_item=rss_item, search_results=search_results ) logger.info(f"Enriched item with {len(search_results)} search results") # 다음 단계로 전달 - 단일 enriched item job.data['enriched_item'] = enriched_item.dict() job.stages_completed.append('search_enrichment') job.stage = 'ai_article_generation' await self.queue_manager.enqueue('ai_article_generation', job) await self.queue_manager.mark_completed('search_enrichment', job.job_id) except Exception as e: logger.error(f"Error processing job {job.job_id}: {e}") await self.queue_manager.mark_failed('search_enrichment', job, str(e)) async def _search_google(self, query: str) -> List[SearchResult]: """Google Custom Search API 호출""" results = [] if not self.google_api_key or not self.search_engine_id: logger.warning("Google API credentials not configured") return results try: url = "https://www.googleapis.com/customsearch/v1" params = { "key": self.google_api_key, "cx": self.search_engine_id, "q": query, "num": self.max_results_per_item, "hl": "ko", "gl": "kr" } async with aiohttp.ClientSession() as session: async with session.get(url, params=params, timeout=30) as response: if response.status == 200: data = await response.json() for item in data.get('items', []): result = SearchResult( title=item.get('title', ''), link=item.get('link', ''), snippet=item.get('snippet', ''), source='google' ) results.append(result) else: logger.error(f"Google API error: {response.status}") except Exception as e: logger.error(f"Error searching Google for '{query}': {e}") return results async def stop(self): """워커 중지""" await self.queue_manager.disconnect() logger.info("Google Search Worker stopped") async def main(): """메인 함수""" worker = GoogleSearchWorker() try: await worker.start() except KeyboardInterrupt: logger.info("Received interrupt signal") finally: await worker.stop() if __name__ == "__main__": asyncio.run(main())