""" Multi-threaded Keyword Scheduler Service 하나의 프로세스에서 여러 스레드로 키워드를 관리하는 스케줄러 """ import asyncio import logging import os import sys from datetime import datetime, timedelta from motor.motor_asyncio import AsyncIOMotorClient from typing import Dict import threading import time # Import from shared module sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from shared.models import Keyword, PipelineJob from shared.queue_manager import QueueManager logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # 전역 변수로 스케줄러 인스턴스 참조 저장 scheduler_instance = None class KeywordThread(threading.Thread): """개별 키워드를 관리하는 스레드""" def __init__(self, keyword_text: str, mongodb_url: str, db_name: str, redis_url: str): super().__init__(name=f"Thread-{keyword_text}") self.keyword_text = keyword_text self.mongodb_url = mongodb_url self.db_name = db_name self.redis_url = redis_url self.running = True self.keyword = None self.status = "initializing" self.last_execution = None self.execution_count = 0 self.error_count = 0 self.last_error = None def run(self): """스레드 실행""" # 새로운 이벤트 루프 생성 loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: loop.run_until_complete(self.run_scheduler()) finally: loop.close() async def run_scheduler(self): """비동기 스케줄러 실행""" # Redis 연결 self.queue_manager = QueueManager(redis_url=self.redis_url) await self.queue_manager.connect() # MongoDB 연결 client = AsyncIOMotorClient(self.mongodb_url) self.db = client[self.db_name] logger.info(f"[{self.keyword_text}] Thread started") # 키워드 로드 await self.load_keyword() if not self.keyword: logger.error(f"[{self.keyword_text}] Failed to load keyword") return # 메인 루프 while self.running: try: # 키워드 상태 체크 await self.reload_keyword() if not self.keyword.is_active: self.status = "inactive" logger.info(f"[{self.keyword_text}] Keyword is inactive, sleeping...") await asyncio.sleep(60) continue # 실행 시간 체크 now = datetime.now() if self.keyword.next_run and self.keyword.next_run <= now: self.status = "executing" await self.execute_keyword() # 다음 실행 시간까지 대기 sleep_seconds = self.keyword.interval_minutes * 60 self.status = "waiting" else: # 다음 체크까지 1분 대기 sleep_seconds = 60 self.status = "waiting" await asyncio.sleep(sleep_seconds) except Exception as e: self.error_count += 1 self.last_error = str(e) self.status = "error" logger.error(f"[{self.keyword_text}] Error in thread loop: {e}") await asyncio.sleep(60) await self.queue_manager.disconnect() logger.info(f"[{self.keyword_text}] Thread stopped") async def load_keyword(self): """키워드 초기 로드""" try: keyword_doc = await self.db.keywords.find_one({"keyword": self.keyword_text}) if keyword_doc: self.keyword = Keyword(**keyword_doc) logger.info(f"[{self.keyword_text}] Loaded keyword") except Exception as e: logger.error(f"[{self.keyword_text}] Error loading keyword: {e}") async def reload_keyword(self): """키워드 정보 재로드""" try: keyword_doc = await self.db.keywords.find_one({"keyword": self.keyword_text}) if keyword_doc: self.keyword = Keyword(**keyword_doc) except Exception as e: logger.error(f"[{self.keyword_text}] Error reloading keyword: {e}") async def execute_keyword(self): """키워드 실행""" try: logger.info(f"[{self.keyword_text}] Executing keyword") # PipelineJob 생성 job = PipelineJob( keyword_id=self.keyword.keyword_id, keyword=self.keyword.keyword, stage='rss_collection', data={ 'rss_feeds': self.keyword.rss_feeds if self.keyword.rss_feeds else [], 'max_articles': self.keyword.max_articles_per_run, 'scheduled': True, 'thread_name': self.name }, priority=self.keyword.priority ) # 큐에 작업 추가 await self.queue_manager.enqueue('rss_collection', job) logger.info(f"[{self.keyword_text}] Enqueued job {job.job_id}") # 키워드 업데이트 update_data = { "last_run": datetime.now(), "next_run": datetime.now() + timedelta(minutes=self.keyword.interval_minutes), "updated_at": datetime.now() } await self.db.keywords.update_one( {"keyword_id": self.keyword.keyword_id}, {"$set": update_data} ) self.last_execution = datetime.now() self.execution_count += 1 logger.info(f"[{self.keyword_text}] Next run at {update_data['next_run']}") except Exception as e: self.error_count += 1 self.last_error = str(e) logger.error(f"[{self.keyword_text}] Error executing keyword: {e}") def stop(self): """스레드 중지""" self.running = False self.status = "stopped" def get_status(self): """스레드 상태 반환""" return { "keyword": self.keyword_text, "thread_name": self.name, "status": self.status, "is_alive": self.is_alive(), "execution_count": self.execution_count, "last_execution": self.last_execution.isoformat() if self.last_execution else None, "error_count": self.error_count, "last_error": self.last_error, "next_run": self.keyword.next_run.isoformat() if self.keyword and self.keyword.next_run else None } class MultiThreadScheduler: """멀티스레드 키워드 스케줄러""" def __init__(self): self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") self.db_name = os.getenv("DB_NAME", "ai_writer_db") self.redis_url = os.getenv("REDIS_URL", "redis://redis:6379") self.threads: Dict[str, KeywordThread] = {} self.running = True # Singleton 인스턴스를 전역 변수로 저장 global scheduler_instance scheduler_instance = self async def start(self): """스케줄러 시작""" logger.info("Starting Multi-threaded Keyword Scheduler") # MongoDB 연결 client = AsyncIOMotorClient(self.mongodb_url) self.db = client[self.db_name] # 초기 키워드 설정 await self.initialize_keywords() # 키워드 로드 및 스레드 시작 await self.load_and_start_threads() # 메인 루프 - 새로운 키워드 체크 while self.running: try: await self.check_new_keywords() await asyncio.sleep(30) # 30초마다 새 키워드 체크 except Exception as e: logger.error(f"Error in main loop: {e}") await asyncio.sleep(30) async def initialize_keywords(self): """초기 키워드 설정 (없으면 생성)""" try: count = await self.db.keywords.count_documents({}) if count == 0: logger.info("No keywords found. Creating default keywords...") default_keywords = [ { "keyword": "AI", "interval_minutes": 60, "is_active": True, "priority": 1, "rss_feeds": [], "next_run": datetime.now() + timedelta(minutes=1) }, { "keyword": "경제", "interval_minutes": 120, "is_active": True, "priority": 0, "rss_feeds": [], "next_run": datetime.now() + timedelta(minutes=1) }, { "keyword": "테크놀로지", "interval_minutes": 60, "is_active": True, "priority": 1, "rss_feeds": [], "next_run": datetime.now() + timedelta(minutes=1) } ] for kw_data in default_keywords: keyword = Keyword(**kw_data) await self.db.keywords.insert_one(keyword.model_dump()) logger.info(f"Created keyword: {keyword.keyword}") logger.info(f"Found {count} keywords in database") except Exception as e: logger.error(f"Error initializing keywords: {e}") async def load_and_start_threads(self): """키워드 로드 및 스레드 시작""" try: # 활성 키워드 조회 cursor = self.db.keywords.find({"is_active": True}) keywords = await cursor.to_list(None) for keyword_doc in keywords: keyword = Keyword(**keyword_doc) if keyword.keyword not in self.threads: self.start_keyword_thread(keyword.keyword) logger.info(f"Started {len(self.threads)} keyword threads") except Exception as e: logger.error(f"Error loading keywords: {e}") def start_keyword_thread(self, keyword_text: str): """키워드 스레드 시작""" if keyword_text not in self.threads: thread = KeywordThread( keyword_text=keyword_text, mongodb_url=self.mongodb_url, db_name=self.db_name, redis_url=self.redis_url ) thread.start() self.threads[keyword_text] = thread logger.info(f"Started thread for keyword: {keyword_text}") async def check_new_keywords(self): """새로운 키워드 체크 및 스레드 관리""" try: # 현재 활성 키워드 조회 cursor = self.db.keywords.find({"is_active": True}) active_keywords = await cursor.to_list(None) active_keyword_texts = {kw['keyword'] for kw in active_keywords} # 새 키워드 시작 for keyword_text in active_keyword_texts: if keyword_text not in self.threads: self.start_keyword_thread(keyword_text) # 비활성화된 키워드 스레드 중지 for keyword_text in list(self.threads.keys()): if keyword_text not in active_keyword_texts: thread = self.threads[keyword_text] thread.stop() del self.threads[keyword_text] logger.info(f"Stopped thread for keyword: {keyword_text}") except Exception as e: logger.error(f"Error checking new keywords: {e}") def stop(self): """모든 스레드 중지""" self.running = False for thread in self.threads.values(): thread.stop() # 모든 스레드가 종료될 때까지 대기 for thread in self.threads.values(): thread.join(timeout=5) logger.info("Multi-threaded Keyword Scheduler stopped") def get_threads_status(self): """모든 스레드 상태 반환""" status_list = [] for thread in self.threads.values(): status_list.append(thread.get_status()) return status_list async def main(): """메인 함수""" scheduler = MultiThreadScheduler() try: await scheduler.start() except KeyboardInterrupt: logger.info("Received interrupt signal") finally: scheduler.stop() if __name__ == "__main__": asyncio.run(main())