""" News Pipeline Scheduler 뉴스 파이프라인 스케줄러 서비스 """ import asyncio import logging import os import sys from datetime import datetime, timedelta from apscheduler.schedulers.asyncio import AsyncIOScheduler from motor.motor_asyncio import AsyncIOMotorClient # Import from shared module from shared.models import KeywordSubscription, PipelineJob from shared.queue_manager import QueueManager logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class NewsScheduler: def __init__(self): self.scheduler = AsyncIOScheduler() self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") self.db_name = os.getenv("DB_NAME", "ai_writer_db") self.db = None self.queue_manager = QueueManager( redis_url=os.getenv("REDIS_URL", "redis://redis:6379") ) async def start(self): """스케줄러 시작""" logger.info("Starting News Pipeline Scheduler") # MongoDB 연결 client = AsyncIOMotorClient(self.mongodb_url) self.db = client[self.db_name] # Redis 연결 await self.queue_manager.connect() # 기본 스케줄 설정 # 매 30분마다 실행 self.scheduler.add_job( self.process_keywords, 'interval', minutes=30, id='keyword_processor', name='Process Active Keywords' ) # 특정 시간대 강화 스케줄 (아침 7시, 점심 12시, 저녁 6시) for hour in [7, 12, 18]: self.scheduler.add_job( self.process_priority_keywords, 'cron', hour=hour, minute=0, id=f'priority_processor_{hour}', name=f'Process Priority Keywords at {hour}:00' ) # 매일 자정 통계 초기화 self.scheduler.add_job( self.reset_daily_stats, 'cron', hour=0, minute=0, id='stats_reset', name='Reset Daily Statistics' ) self.scheduler.start() logger.info("Scheduler started successfully") # 시작 즉시 한 번 실행 await self.process_keywords() async def process_keywords(self): """활성 키워드 처리""" try: logger.info("Processing active keywords") # MongoDB에서 활성 키워드 로드 now = datetime.now() thirty_minutes_ago = now - timedelta(minutes=30) keywords = await self.db.keywords.find({ "is_active": True, "$or": [ {"last_processed": {"$lt": thirty_minutes_ago}}, {"last_processed": None} ] }).to_list(None) logger.info(f"Found {len(keywords)} keywords to process") for keyword_doc in keywords: await self._create_job(keyword_doc) # 처리 시간 업데이트 await self.db.keywords.update_one( {"keyword_id": keyword_doc['keyword_id']}, {"$set": {"last_processed": now}} ) logger.info(f"Created jobs for {len(keywords)} keywords") except Exception as e: logger.error(f"Error processing keywords: {e}") async def process_priority_keywords(self): """우선순위 키워드 처리""" try: logger.info("Processing priority keywords") keywords = await self.db.keywords.find({ "is_active": True, "is_priority": True }).to_list(None) for keyword_doc in keywords: await self._create_job(keyword_doc, priority=1) logger.info(f"Created priority jobs for {len(keywords)} keywords") except Exception as e: logger.error(f"Error processing priority keywords: {e}") async def _create_job(self, keyword_doc: dict, priority: int = 0): """파이프라인 작업 생성""" try: # KeywordSubscription 모델로 변환 keyword = KeywordSubscription(**keyword_doc) # PipelineJob 생성 job = PipelineJob( keyword_id=keyword.keyword_id, keyword=keyword.keyword, stage='rss_collection', stages_completed=[], priority=priority, data={ 'keyword': keyword.keyword, 'language': keyword.language, 'rss_feeds': keyword.rss_feeds or self._get_default_rss_feeds(), 'categories': keyword.categories } ) # 첫 번째 큐에 추가 await self.queue_manager.enqueue( 'rss_collection', job, priority=priority ) logger.info(f"Created job {job.job_id} for keyword '{keyword.keyword}'") except Exception as e: logger.error(f"Error creating job for keyword: {e}") def _get_default_rss_feeds(self) -> list: """기본 RSS 피드 목록""" return [ "https://news.google.com/rss/search?q={keyword}&hl=ko&gl=KR&ceid=KR:ko", "https://trends.google.com/trends/trendingsearches/daily/rss?geo=KR", "https://www.mk.co.kr/rss/40300001/", # 매일경제 "https://www.hankyung.com/feed/all-news", # 한국경제 "https://www.zdnet.co.kr/news/news_rss.xml", # ZDNet Korea ] async def reset_daily_stats(self): """일일 통계 초기화""" try: logger.info("Resetting daily statistics") # Redis 통계 초기화 # 구현 필요 pass except Exception as e: logger.error(f"Error resetting stats: {e}") async def stop(self): """스케줄러 중지""" self.scheduler.shutdown() await self.queue_manager.disconnect() logger.info("Scheduler stopped") async def main(): """메인 함수""" scheduler = NewsScheduler() try: await scheduler.start() # 계속 실행 while True: await asyncio.sleep(60) except KeyboardInterrupt: logger.info("Received interrupt signal") finally: await scheduler.stop() if __name__ == "__main__": asyncio.run(main())