- Add multi-threaded keyword scheduler for periodic news collection - Create Keyword Manager API for CRUD operations and monitoring - Implement automatic pipeline triggering (RSS → Google → AI → Translation) - Add thread status monitoring and dynamic keyword management - Support priority-based execution and configurable intervals - Add comprehensive scheduler documentation guide - Default keywords: AI, 테크놀로지, 경제, 블록체인 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
361 lines
13 KiB
Python
361 lines
13 KiB
Python
"""
|
|
Multi-threaded Keyword Scheduler Service
|
|
하나의 프로세스에서 여러 스레드로 키워드를 관리하는 스케줄러
|
|
"""
|
|
import asyncio
|
|
import logging
|
|
import os
|
|
import sys
|
|
from datetime import datetime, timedelta
|
|
from motor.motor_asyncio import AsyncIOMotorClient
|
|
from typing import Dict
|
|
import threading
|
|
import time
|
|
|
|
# Import from shared module
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
from shared.models import Keyword, PipelineJob
|
|
from shared.queue_manager import QueueManager
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# 전역 변수로 스케줄러 인스턴스 참조 저장
|
|
scheduler_instance = None
|
|
|
|
class KeywordThread(threading.Thread):
|
|
"""개별 키워드를 관리하는 스레드"""
|
|
|
|
def __init__(self, keyword_text: str, mongodb_url: str, db_name: str, redis_url: str):
|
|
super().__init__(name=f"Thread-{keyword_text}")
|
|
self.keyword_text = keyword_text
|
|
self.mongodb_url = mongodb_url
|
|
self.db_name = db_name
|
|
self.redis_url = redis_url
|
|
self.running = True
|
|
self.keyword = None
|
|
self.status = "initializing"
|
|
self.last_execution = None
|
|
self.execution_count = 0
|
|
self.error_count = 0
|
|
self.last_error = None
|
|
|
|
def run(self):
|
|
"""스레드 실행"""
|
|
# 새로운 이벤트 루프 생성
|
|
loop = asyncio.new_event_loop()
|
|
asyncio.set_event_loop(loop)
|
|
|
|
try:
|
|
loop.run_until_complete(self.run_scheduler())
|
|
finally:
|
|
loop.close()
|
|
|
|
async def run_scheduler(self):
|
|
"""비동기 스케줄러 실행"""
|
|
# Redis 연결
|
|
self.queue_manager = QueueManager(redis_url=self.redis_url)
|
|
await self.queue_manager.connect()
|
|
|
|
# MongoDB 연결
|
|
client = AsyncIOMotorClient(self.mongodb_url)
|
|
self.db = client[self.db_name]
|
|
|
|
logger.info(f"[{self.keyword_text}] Thread started")
|
|
|
|
# 키워드 로드
|
|
await self.load_keyword()
|
|
|
|
if not self.keyword:
|
|
logger.error(f"[{self.keyword_text}] Failed to load keyword")
|
|
return
|
|
|
|
# 메인 루프
|
|
while self.running:
|
|
try:
|
|
# 키워드 상태 체크
|
|
await self.reload_keyword()
|
|
|
|
if not self.keyword.is_active:
|
|
self.status = "inactive"
|
|
logger.info(f"[{self.keyword_text}] Keyword is inactive, sleeping...")
|
|
await asyncio.sleep(60)
|
|
continue
|
|
|
|
# 실행 시간 체크
|
|
now = datetime.now()
|
|
if self.keyword.next_run and self.keyword.next_run <= now:
|
|
self.status = "executing"
|
|
await self.execute_keyword()
|
|
# 다음 실행 시간까지 대기
|
|
sleep_seconds = self.keyword.interval_minutes * 60
|
|
self.status = "waiting"
|
|
else:
|
|
# 다음 체크까지 1분 대기
|
|
sleep_seconds = 60
|
|
self.status = "waiting"
|
|
|
|
await asyncio.sleep(sleep_seconds)
|
|
|
|
except Exception as e:
|
|
self.error_count += 1
|
|
self.last_error = str(e)
|
|
self.status = "error"
|
|
logger.error(f"[{self.keyword_text}] Error in thread loop: {e}")
|
|
await asyncio.sleep(60)
|
|
|
|
await self.queue_manager.disconnect()
|
|
logger.info(f"[{self.keyword_text}] Thread stopped")
|
|
|
|
async def load_keyword(self):
|
|
"""키워드 초기 로드"""
|
|
try:
|
|
keyword_doc = await self.db.keywords.find_one({"keyword": self.keyword_text})
|
|
if keyword_doc:
|
|
self.keyword = Keyword(**keyword_doc)
|
|
logger.info(f"[{self.keyword_text}] Loaded keyword")
|
|
except Exception as e:
|
|
logger.error(f"[{self.keyword_text}] Error loading keyword: {e}")
|
|
|
|
async def reload_keyword(self):
|
|
"""키워드 정보 재로드"""
|
|
try:
|
|
keyword_doc = await self.db.keywords.find_one({"keyword": self.keyword_text})
|
|
if keyword_doc:
|
|
self.keyword = Keyword(**keyword_doc)
|
|
except Exception as e:
|
|
logger.error(f"[{self.keyword_text}] Error reloading keyword: {e}")
|
|
|
|
async def execute_keyword(self):
|
|
"""키워드 실행"""
|
|
try:
|
|
logger.info(f"[{self.keyword_text}] Executing keyword")
|
|
|
|
# PipelineJob 생성
|
|
job = PipelineJob(
|
|
keyword_id=self.keyword.keyword_id,
|
|
keyword=self.keyword.keyword,
|
|
stage='rss_collection',
|
|
data={
|
|
'rss_feeds': self.keyword.rss_feeds if self.keyword.rss_feeds else [],
|
|
'max_articles': self.keyword.max_articles_per_run,
|
|
'scheduled': True,
|
|
'thread_name': self.name
|
|
},
|
|
priority=self.keyword.priority
|
|
)
|
|
|
|
# 큐에 작업 추가
|
|
await self.queue_manager.enqueue('rss_collection', job)
|
|
logger.info(f"[{self.keyword_text}] Enqueued job {job.job_id}")
|
|
|
|
# 키워드 업데이트
|
|
update_data = {
|
|
"last_run": datetime.now(),
|
|
"next_run": datetime.now() + timedelta(minutes=self.keyword.interval_minutes),
|
|
"updated_at": datetime.now()
|
|
}
|
|
|
|
await self.db.keywords.update_one(
|
|
{"keyword_id": self.keyword.keyword_id},
|
|
{"$set": update_data}
|
|
)
|
|
|
|
self.last_execution = datetime.now()
|
|
self.execution_count += 1
|
|
logger.info(f"[{self.keyword_text}] Next run at {update_data['next_run']}")
|
|
|
|
except Exception as e:
|
|
self.error_count += 1
|
|
self.last_error = str(e)
|
|
logger.error(f"[{self.keyword_text}] Error executing keyword: {e}")
|
|
|
|
def stop(self):
|
|
"""스레드 중지"""
|
|
self.running = False
|
|
self.status = "stopped"
|
|
|
|
def get_status(self):
|
|
"""스레드 상태 반환"""
|
|
return {
|
|
"keyword": self.keyword_text,
|
|
"thread_name": self.name,
|
|
"status": self.status,
|
|
"is_alive": self.is_alive(),
|
|
"execution_count": self.execution_count,
|
|
"last_execution": self.last_execution.isoformat() if self.last_execution else None,
|
|
"error_count": self.error_count,
|
|
"last_error": self.last_error,
|
|
"next_run": self.keyword.next_run.isoformat() if self.keyword and self.keyword.next_run else None
|
|
}
|
|
|
|
|
|
class MultiThreadScheduler:
|
|
"""멀티스레드 키워드 스케줄러"""
|
|
|
|
def __init__(self):
|
|
self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017")
|
|
self.db_name = os.getenv("DB_NAME", "ai_writer_db")
|
|
self.redis_url = os.getenv("REDIS_URL", "redis://redis:6379")
|
|
self.threads: Dict[str, KeywordThread] = {}
|
|
self.running = True
|
|
# Singleton 인스턴스를 전역 변수로 저장
|
|
global scheduler_instance
|
|
scheduler_instance = self
|
|
|
|
async def start(self):
|
|
"""스케줄러 시작"""
|
|
logger.info("Starting Multi-threaded Keyword Scheduler")
|
|
|
|
# MongoDB 연결
|
|
client = AsyncIOMotorClient(self.mongodb_url)
|
|
self.db = client[self.db_name]
|
|
|
|
# 초기 키워드 설정
|
|
await self.initialize_keywords()
|
|
|
|
# 키워드 로드 및 스레드 시작
|
|
await self.load_and_start_threads()
|
|
|
|
# 메인 루프 - 새로운 키워드 체크
|
|
while self.running:
|
|
try:
|
|
await self.check_new_keywords()
|
|
await asyncio.sleep(30) # 30초마다 새 키워드 체크
|
|
except Exception as e:
|
|
logger.error(f"Error in main loop: {e}")
|
|
await asyncio.sleep(30)
|
|
|
|
async def initialize_keywords(self):
|
|
"""초기 키워드 설정 (없으면 생성)"""
|
|
try:
|
|
count = await self.db.keywords.count_documents({})
|
|
|
|
if count == 0:
|
|
logger.info("No keywords found. Creating default keywords...")
|
|
|
|
default_keywords = [
|
|
{
|
|
"keyword": "AI",
|
|
"interval_minutes": 60,
|
|
"is_active": True,
|
|
"priority": 1,
|
|
"rss_feeds": [],
|
|
"next_run": datetime.now() + timedelta(minutes=1)
|
|
},
|
|
{
|
|
"keyword": "경제",
|
|
"interval_minutes": 120,
|
|
"is_active": True,
|
|
"priority": 0,
|
|
"rss_feeds": [],
|
|
"next_run": datetime.now() + timedelta(minutes=1)
|
|
},
|
|
{
|
|
"keyword": "테크놀로지",
|
|
"interval_minutes": 60,
|
|
"is_active": True,
|
|
"priority": 1,
|
|
"rss_feeds": [],
|
|
"next_run": datetime.now() + timedelta(minutes=1)
|
|
}
|
|
]
|
|
|
|
for kw_data in default_keywords:
|
|
keyword = Keyword(**kw_data)
|
|
await self.db.keywords.insert_one(keyword.model_dump())
|
|
logger.info(f"Created keyword: {keyword.keyword}")
|
|
|
|
logger.info(f"Found {count} keywords in database")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error initializing keywords: {e}")
|
|
|
|
async def load_and_start_threads(self):
|
|
"""키워드 로드 및 스레드 시작"""
|
|
try:
|
|
# 활성 키워드 조회
|
|
cursor = self.db.keywords.find({"is_active": True})
|
|
keywords = await cursor.to_list(None)
|
|
|
|
for keyword_doc in keywords:
|
|
keyword = Keyword(**keyword_doc)
|
|
if keyword.keyword not in self.threads:
|
|
self.start_keyword_thread(keyword.keyword)
|
|
|
|
logger.info(f"Started {len(self.threads)} keyword threads")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error loading keywords: {e}")
|
|
|
|
def start_keyword_thread(self, keyword_text: str):
|
|
"""키워드 스레드 시작"""
|
|
if keyword_text not in self.threads:
|
|
thread = KeywordThread(
|
|
keyword_text=keyword_text,
|
|
mongodb_url=self.mongodb_url,
|
|
db_name=self.db_name,
|
|
redis_url=self.redis_url
|
|
)
|
|
thread.start()
|
|
self.threads[keyword_text] = thread
|
|
logger.info(f"Started thread for keyword: {keyword_text}")
|
|
|
|
async def check_new_keywords(self):
|
|
"""새로운 키워드 체크 및 스레드 관리"""
|
|
try:
|
|
# 현재 활성 키워드 조회
|
|
cursor = self.db.keywords.find({"is_active": True})
|
|
active_keywords = await cursor.to_list(None)
|
|
active_keyword_texts = {kw['keyword'] for kw in active_keywords}
|
|
|
|
# 새 키워드 시작
|
|
for keyword_text in active_keyword_texts:
|
|
if keyword_text not in self.threads:
|
|
self.start_keyword_thread(keyword_text)
|
|
|
|
# 비활성화된 키워드 스레드 중지
|
|
for keyword_text in list(self.threads.keys()):
|
|
if keyword_text not in active_keyword_texts:
|
|
thread = self.threads[keyword_text]
|
|
thread.stop()
|
|
del self.threads[keyword_text]
|
|
logger.info(f"Stopped thread for keyword: {keyword_text}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error checking new keywords: {e}")
|
|
|
|
def stop(self):
|
|
"""모든 스레드 중지"""
|
|
self.running = False
|
|
for thread in self.threads.values():
|
|
thread.stop()
|
|
|
|
# 모든 스레드가 종료될 때까지 대기
|
|
for thread in self.threads.values():
|
|
thread.join(timeout=5)
|
|
|
|
logger.info("Multi-threaded Keyword Scheduler stopped")
|
|
|
|
def get_threads_status(self):
|
|
"""모든 스레드 상태 반환"""
|
|
status_list = []
|
|
for thread in self.threads.values():
|
|
status_list.append(thread.get_status())
|
|
return status_list
|
|
|
|
|
|
async def main():
|
|
"""메인 함수"""
|
|
scheduler = MultiThreadScheduler()
|
|
|
|
try:
|
|
await scheduler.start()
|
|
except KeyboardInterrupt:
|
|
logger.info("Received interrupt signal")
|
|
finally:
|
|
scheduler.stop()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main()) |