feat: Implement automated keyword-based news pipeline scheduler
- Add multi-threaded keyword scheduler for periodic news collection - Create Keyword Manager API for CRUD operations and monitoring - Implement automatic pipeline triggering (RSS → Google → AI → Translation) - Add thread status monitoring and dynamic keyword management - Support priority-based execution and configurable intervals - Add comprehensive scheduler documentation guide - Default keywords: AI, 테크놀로지, 경제, 블록체인 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
173
services/pipeline/scheduler/single_keyword_scheduler.py
Normal file
173
services/pipeline/scheduler/single_keyword_scheduler.py
Normal file
@ -0,0 +1,173 @@
|
||||
"""
|
||||
Single Keyword Scheduler Service
|
||||
단일 키워드를 전담하는 스케줄러
|
||||
"""
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from motor.motor_asyncio import AsyncIOMotorClient
|
||||
import uuid
|
||||
|
||||
# Import from shared module
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from shared.models import Keyword, PipelineJob
|
||||
from shared.queue_manager import QueueManager
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class SingleKeywordScheduler:
|
||||
def __init__(self):
|
||||
self.queue_manager = QueueManager(
|
||||
redis_url=os.getenv("REDIS_URL", "redis://redis:6379")
|
||||
)
|
||||
self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017")
|
||||
self.db_name = os.getenv("DB_NAME", "ai_writer_db")
|
||||
self.keyword_text = os.getenv("KEYWORD") # 환경변수로 키워드 지정
|
||||
self.interval_minutes = int(os.getenv("INTERVAL_MINUTES", "60"))
|
||||
self.db = None
|
||||
self.keyword = None
|
||||
|
||||
async def start(self):
|
||||
"""스케줄러 시작"""
|
||||
if not self.keyword_text:
|
||||
logger.error("KEYWORD environment variable is required")
|
||||
return
|
||||
|
||||
logger.info(f"Starting Single Keyword Scheduler for '{self.keyword_text}'")
|
||||
|
||||
# Redis 연결
|
||||
await self.queue_manager.connect()
|
||||
|
||||
# MongoDB 연결
|
||||
client = AsyncIOMotorClient(self.mongodb_url)
|
||||
self.db = client[self.db_name]
|
||||
|
||||
# 키워드 초기화 또는 로드
|
||||
await self.initialize_keyword()
|
||||
|
||||
if not self.keyword:
|
||||
logger.error(f"Failed to initialize keyword '{self.keyword_text}'")
|
||||
return
|
||||
|
||||
# 메인 루프 - 이 키워드만 처리
|
||||
while True:
|
||||
try:
|
||||
await self.check_and_execute()
|
||||
# 다음 실행까지 대기
|
||||
sleep_seconds = self.keyword.interval_minutes * 60
|
||||
logger.info(f"Sleeping for {self.keyword.interval_minutes} minutes until next execution")
|
||||
await asyncio.sleep(sleep_seconds)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in scheduler loop: {e}")
|
||||
await asyncio.sleep(60) # 에러 발생시 1분 후 재시도
|
||||
|
||||
async def initialize_keyword(self):
|
||||
"""키워드 초기화 또는 로드"""
|
||||
try:
|
||||
# 기존 키워드 찾기
|
||||
keyword_doc = await self.db.keywords.find_one({"keyword": self.keyword_text})
|
||||
|
||||
if keyword_doc:
|
||||
self.keyword = Keyword(**keyword_doc)
|
||||
logger.info(f"Loaded existing keyword: {self.keyword_text}")
|
||||
else:
|
||||
# 새 키워드 생성
|
||||
self.keyword = Keyword(
|
||||
keyword=self.keyword_text,
|
||||
interval_minutes=self.interval_minutes,
|
||||
is_active=True,
|
||||
priority=int(os.getenv("PRIORITY", "0")),
|
||||
rss_feeds=os.getenv("RSS_FEEDS", "").split(",") if os.getenv("RSS_FEEDS") else [],
|
||||
max_articles_per_run=int(os.getenv("MAX_ARTICLES", "100"))
|
||||
)
|
||||
|
||||
await self.db.keywords.insert_one(self.keyword.model_dump())
|
||||
logger.info(f"Created new keyword: {self.keyword_text}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error initializing keyword: {e}")
|
||||
|
||||
async def check_and_execute(self):
|
||||
"""키워드 실행 체크 및 실행"""
|
||||
try:
|
||||
# 최신 키워드 정보 다시 로드
|
||||
keyword_doc = await self.db.keywords.find_one({"keyword": self.keyword_text})
|
||||
|
||||
if not keyword_doc:
|
||||
logger.error(f"Keyword '{self.keyword_text}' not found in database")
|
||||
return
|
||||
|
||||
self.keyword = Keyword(**keyword_doc)
|
||||
|
||||
# 비활성화된 경우 스킵
|
||||
if not self.keyword.is_active:
|
||||
logger.info(f"Keyword '{self.keyword_text}' is inactive, skipping")
|
||||
return
|
||||
|
||||
# 실행
|
||||
await self.execute_keyword()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking keyword: {e}")
|
||||
|
||||
async def execute_keyword(self):
|
||||
"""키워드 실행"""
|
||||
try:
|
||||
logger.info(f"Executing keyword: {self.keyword.keyword}")
|
||||
|
||||
# PipelineJob 생성
|
||||
job = PipelineJob(
|
||||
keyword_id=self.keyword.keyword_id,
|
||||
keyword=self.keyword.keyword,
|
||||
stage='rss_collection',
|
||||
data={
|
||||
'rss_feeds': self.keyword.rss_feeds if self.keyword.rss_feeds else [],
|
||||
'max_articles': self.keyword.max_articles_per_run,
|
||||
'scheduled': True,
|
||||
'scheduler_instance': f"single-{self.keyword_text}"
|
||||
},
|
||||
priority=self.keyword.priority
|
||||
)
|
||||
|
||||
# 큐에 작업 추가
|
||||
await self.queue_manager.enqueue('rss_collection', job)
|
||||
logger.info(f"Enqueued job for keyword '{self.keyword.keyword}' with job_id: {job.job_id}")
|
||||
|
||||
# 키워드 업데이트
|
||||
update_data = {
|
||||
"last_run": datetime.now(),
|
||||
"next_run": datetime.now() + timedelta(minutes=self.keyword.interval_minutes),
|
||||
"updated_at": datetime.now()
|
||||
}
|
||||
|
||||
await self.db.keywords.update_one(
|
||||
{"keyword_id": self.keyword.keyword_id},
|
||||
{"$set": update_data}
|
||||
)
|
||||
|
||||
logger.info(f"Updated keyword '{self.keyword.keyword}' - next run at {update_data['next_run']}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing keyword {self.keyword.keyword}: {e}")
|
||||
|
||||
async def stop(self):
|
||||
"""스케줄러 중지"""
|
||||
await self.queue_manager.disconnect()
|
||||
logger.info(f"Single Keyword Scheduler for '{self.keyword_text}' stopped")
|
||||
|
||||
async def main():
|
||||
"""메인 함수"""
|
||||
scheduler = SingleKeywordScheduler()
|
||||
|
||||
try:
|
||||
await scheduler.start()
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Received interrupt signal")
|
||||
finally:
|
||||
await scheduler.stop()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user