Initial commit - cleaned repository

This commit is contained in:
jungwoo choi
2025-09-28 20:41:57 +09:00
commit e3c28f796a
188 changed files with 28102 additions and 0 deletions

View File

@ -0,0 +1,16 @@
FROM python:3.11-slim
WORKDIR /app
# Install dependencies
COPY ./scheduler/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy shared module
COPY ./shared /app/shared
# Copy scheduler code
COPY ./scheduler /app
# Run scheduler
CMD ["python", "keyword_scheduler.py"]

View File

@ -0,0 +1,336 @@
"""
Keyword Manager API
키워드를 추가/수정/삭제하는 관리 API
"""
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Optional
from datetime import datetime, timedelta
from motor.motor_asyncio import AsyncIOMotorClient
import uvicorn
import os
import sys
import uuid
# Import from shared module
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from shared.models import Keyword
app = FastAPI(title="Keyword Manager API")
# MongoDB 연결
mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017")
db_name = os.getenv("DB_NAME", "ai_writer_db")
@app.on_event("startup")
async def startup_event():
"""앱 시작 시 MongoDB 연결"""
app.mongodb_client = AsyncIOMotorClient(mongodb_url)
app.db = app.mongodb_client[db_name]
@app.on_event("shutdown")
async def shutdown_event():
"""앱 종료 시 연결 해제"""
app.mongodb_client.close()
class KeywordCreate(BaseModel):
"""키워드 생성 요청 모델"""
keyword: str
interval_minutes: int = 60
priority: int = 0
rss_feeds: List[str] = []
max_articles_per_run: int = 100
is_active: bool = True
class KeywordUpdate(BaseModel):
"""키워드 업데이트 요청 모델"""
interval_minutes: Optional[int] = None
priority: Optional[int] = None
rss_feeds: Optional[List[str]] = None
max_articles_per_run: Optional[int] = None
is_active: Optional[bool] = None
@app.get("/")
async def root():
"""API 상태 확인"""
return {"status": "Keyword Manager API is running"}
@app.get("/threads/status")
async def get_threads_status():
"""모든 스레드 상태 조회"""
try:
# MongoDB에서 키워드 정보와 함께 상태 반환
cursor = app.db.keywords.find()
keywords = await cursor.to_list(None)
threads_status = []
for kw in keywords:
status = {
"keyword": kw.get("keyword"),
"keyword_id": kw.get("keyword_id"),
"is_active": kw.get("is_active"),
"interval_minutes": kw.get("interval_minutes"),
"priority": kw.get("priority"),
"last_run": kw.get("last_run").isoformat() if kw.get("last_run") else None,
"next_run": kw.get("next_run").isoformat() if kw.get("next_run") else None,
"thread_status": "active" if kw.get("is_active") else "inactive"
}
# 다음 실행까지 남은 시간 계산
if kw.get("next_run"):
remaining = (kw.get("next_run") - datetime.now()).total_seconds()
if remaining > 0:
status["minutes_until_next_run"] = round(remaining / 60, 1)
else:
status["minutes_until_next_run"] = 0
status["thread_status"] = "pending_execution"
threads_status.append(status)
# 우선순위 순으로 정렬
threads_status.sort(key=lambda x: x.get("priority", 0), reverse=True)
return {
"total_threads": len(threads_status),
"active_threads": sum(1 for t in threads_status if t.get("is_active")),
"threads": threads_status
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/keywords")
async def list_keywords():
"""모든 키워드 조회"""
try:
cursor = app.db.keywords.find()
keywords = await cursor.to_list(None)
# 각 키워드 정보 정리
result = []
for kw in keywords:
result.append({
"keyword_id": kw.get("keyword_id"),
"keyword": kw.get("keyword"),
"interval_minutes": kw.get("interval_minutes"),
"priority": kw.get("priority"),
"is_active": kw.get("is_active"),
"last_run": kw.get("last_run").isoformat() if kw.get("last_run") else None,
"next_run": kw.get("next_run").isoformat() if kw.get("next_run") else None,
"rss_feeds": kw.get("rss_feeds", []),
"max_articles_per_run": kw.get("max_articles_per_run", 100)
})
return {
"total": len(result),
"keywords": result
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/keywords/{keyword_text}")
async def get_keyword(keyword_text: str):
"""특정 키워드 조회"""
try:
keyword = await app.db.keywords.find_one({"keyword": keyword_text})
if not keyword:
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
return {
"keyword_id": keyword.get("keyword_id"),
"keyword": keyword.get("keyword"),
"interval_minutes": keyword.get("interval_minutes"),
"priority": keyword.get("priority"),
"is_active": keyword.get("is_active"),
"last_run": keyword.get("last_run").isoformat() if keyword.get("last_run") else None,
"next_run": keyword.get("next_run").isoformat() if keyword.get("next_run") else None,
"rss_feeds": keyword.get("rss_feeds", []),
"max_articles_per_run": keyword.get("max_articles_per_run", 100)
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/keywords")
async def create_keyword(keyword_data: KeywordCreate):
"""새 키워드 생성"""
try:
# 중복 체크
existing = await app.db.keywords.find_one({"keyword": keyword_data.keyword})
if existing:
raise HTTPException(status_code=400, detail=f"Keyword '{keyword_data.keyword}' already exists")
# 새 키워드 생성
keyword = Keyword(
keyword_id=str(uuid.uuid4()),
keyword=keyword_data.keyword,
interval_minutes=keyword_data.interval_minutes,
priority=keyword_data.priority,
rss_feeds=keyword_data.rss_feeds,
max_articles_per_run=keyword_data.max_articles_per_run,
is_active=keyword_data.is_active,
next_run=datetime.now() + timedelta(minutes=1), # 1분 후 첫 실행
created_at=datetime.now(),
updated_at=datetime.now()
)
await app.db.keywords.insert_one(keyword.model_dump())
return {
"message": f"Keyword '{keyword_data.keyword}' created successfully",
"keyword_id": keyword.keyword_id,
"note": "The scheduler will automatically detect and start processing this keyword within 30 seconds"
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.put("/keywords/{keyword_text}")
async def update_keyword(keyword_text: str, update_data: KeywordUpdate):
"""키워드 업데이트"""
try:
# 키워드 존재 확인
existing = await app.db.keywords.find_one({"keyword": keyword_text})
if not existing:
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
# 업데이트 데이터 준비
update_dict = {}
if update_data.interval_minutes is not None:
update_dict["interval_minutes"] = update_data.interval_minutes
if update_data.priority is not None:
update_dict["priority"] = update_data.priority
if update_data.rss_feeds is not None:
update_dict["rss_feeds"] = update_data.rss_feeds
if update_data.max_articles_per_run is not None:
update_dict["max_articles_per_run"] = update_data.max_articles_per_run
if update_data.is_active is not None:
update_dict["is_active"] = update_data.is_active
if update_dict:
update_dict["updated_at"] = datetime.now()
# 만약 interval이 변경되면 next_run도 재계산
if "interval_minutes" in update_dict:
update_dict["next_run"] = datetime.now() + timedelta(minutes=update_dict["interval_minutes"])
result = await app.db.keywords.update_one(
{"keyword": keyword_text},
{"$set": update_dict}
)
if result.modified_count > 0:
action_note = ""
if update_data.is_active is False:
action_note = "The scheduler will stop the thread for this keyword within 30 seconds."
elif update_data.is_active is True and not existing.get("is_active"):
action_note = "The scheduler will start a new thread for this keyword within 30 seconds."
return {
"message": f"Keyword '{keyword_text}' updated successfully",
"updated_fields": list(update_dict.keys()),
"note": action_note
}
else:
return {"message": "No changes made"}
else:
return {"message": "No update data provided"}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.delete("/keywords/{keyword_text}")
async def delete_keyword(keyword_text: str):
"""키워드 삭제"""
try:
# 키워드 존재 확인
existing = await app.db.keywords.find_one({"keyword": keyword_text})
if not existing:
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
# 삭제
result = await app.db.keywords.delete_one({"keyword": keyword_text})
if result.deleted_count > 0:
return {
"message": f"Keyword '{keyword_text}' deleted successfully",
"note": "The scheduler will stop the thread for this keyword within 30 seconds"
}
else:
raise HTTPException(status_code=500, detail="Failed to delete keyword")
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/keywords/{keyword_text}/activate")
async def activate_keyword(keyword_text: str):
"""키워드 활성화"""
try:
result = await app.db.keywords.update_one(
{"keyword": keyword_text},
{"$set": {"is_active": True, "updated_at": datetime.now()}}
)
if result.matched_count == 0:
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
return {
"message": f"Keyword '{keyword_text}' activated",
"note": "The scheduler will start processing this keyword within 30 seconds"
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/keywords/{keyword_text}/deactivate")
async def deactivate_keyword(keyword_text: str):
"""키워드 비활성화"""
try:
result = await app.db.keywords.update_one(
{"keyword": keyword_text},
{"$set": {"is_active": False, "updated_at": datetime.now()}}
)
if result.matched_count == 0:
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
return {
"message": f"Keyword '{keyword_text}' deactivated",
"note": "The scheduler will stop processing this keyword within 30 seconds"
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/keywords/{keyword_text}/trigger")
async def trigger_keyword(keyword_text: str):
"""키워드 즉시 실행 트리거"""
try:
# next_run을 현재 시간으로 설정하여 즉시 실행되도록 함
result = await app.db.keywords.update_one(
{"keyword": keyword_text},
{"$set": {"next_run": datetime.now(), "updated_at": datetime.now()}}
)
if result.matched_count == 0:
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
return {
"message": f"Keyword '{keyword_text}' triggered for immediate execution",
"note": "The scheduler will execute this keyword within the next minute"
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
port = int(os.getenv("API_PORT", "8100"))
uvicorn.run(app, host="0.0.0.0", port=port)

View File

@ -0,0 +1,245 @@
"""
Keyword Scheduler Service
데이터베이스에 등록된 키워드를 주기적으로 실행하는 스케줄러
"""
import asyncio
import logging
import os
import sys
from datetime import datetime, timedelta
from motor.motor_asyncio import AsyncIOMotorClient
from typing import List, Optional
import uuid
# Import from shared module
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from shared.models import Keyword, PipelineJob
from shared.queue_manager import QueueManager
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class KeywordScheduler:
def __init__(self):
self.queue_manager = QueueManager(
redis_url=os.getenv("REDIS_URL", "redis://redis:6379")
)
self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017")
self.db_name = os.getenv("DB_NAME", "ai_writer_db")
self.db = None
self.check_interval = int(os.getenv("SCHEDULER_CHECK_INTERVAL", "60")) # 1분마다 체크
self.default_interval = int(os.getenv("DEFAULT_KEYWORD_INTERVAL", "60")) # 기본 1시간
async def start(self):
"""스케줄러 시작"""
logger.info("Starting Keyword Scheduler")
# Redis 연결
await self.queue_manager.connect()
# MongoDB 연결
client = AsyncIOMotorClient(self.mongodb_url)
self.db = client[self.db_name]
# 초기 키워드 설정
await self.initialize_keywords()
# 메인 루프
while True:
try:
await self.check_and_execute_keywords()
await asyncio.sleep(self.check_interval)
except Exception as e:
logger.error(f"Error in scheduler loop: {e}")
await asyncio.sleep(10)
async def initialize_keywords(self):
"""초기 키워드 설정 (없으면 생성)"""
try:
# keywords 컬렉션 확인
count = await self.db.keywords.count_documents({})
if count == 0:
logger.info("No keywords found. Creating default keywords...")
# 기본 키워드 생성
default_keywords = [
{
"keyword": "AI",
"interval_minutes": 60,
"is_active": True,
"priority": 1,
"rss_feeds": []
},
{
"keyword": "경제",
"interval_minutes": 120,
"is_active": True,
"priority": 0,
"rss_feeds": []
},
{
"keyword": "테크놀로지",
"interval_minutes": 60,
"is_active": True,
"priority": 1,
"rss_feeds": []
}
]
for kw_data in default_keywords:
keyword = Keyword(**kw_data)
# 다음 실행 시간 설정
keyword.next_run = datetime.now() + timedelta(minutes=5) # 5분 후 첫 실행
await self.db.keywords.insert_one(keyword.dict())
logger.info(f"Created keyword: {keyword.keyword}")
logger.info(f"Found {count} keywords in database")
except Exception as e:
logger.error(f"Error initializing keywords: {e}")
async def check_and_execute_keywords(self):
"""실행할 키워드 체크 및 실행"""
try:
# 현재 시간
now = datetime.now()
# 실행할 키워드 조회 (활성화되고 next_run이 현재 시간 이전인 것)
query = {
"is_active": True,
"$or": [
{"next_run": {"$lte": now}},
{"next_run": None} # next_run이 설정되지 않은 경우
]
}
# 우선순위 순으로 정렬
cursor = self.db.keywords.find(query).sort("priority", -1)
keywords = await cursor.to_list(None)
for keyword_data in keywords:
keyword = Keyword(**keyword_data)
await self.execute_keyword(keyword)
except Exception as e:
logger.error(f"Error checking keywords: {e}")
async def execute_keyword(self, keyword: Keyword):
"""키워드 실행"""
try:
logger.info(f"Executing keyword: {keyword.keyword}")
# PipelineJob 생성
job = PipelineJob(
keyword_id=keyword.keyword_id,
keyword=keyword.keyword,
stage='rss_collection',
data={
'rss_feeds': keyword.rss_feeds if keyword.rss_feeds else [],
'max_articles': keyword.max_articles_per_run,
'scheduled': True
},
priority=keyword.priority
)
# 큐에 작업 추가
await self.queue_manager.enqueue('rss_collection', job)
logger.info(f"Enqueued job for keyword '{keyword.keyword}' with job_id: {job.job_id}")
# 키워드 업데이트
update_data = {
"last_run": datetime.now(),
"next_run": datetime.now() + timedelta(minutes=keyword.interval_minutes),
"updated_at": datetime.now()
}
await self.db.keywords.update_one(
{"keyword_id": keyword.keyword_id},
{"$set": update_data}
)
logger.info(f"Updated keyword '{keyword.keyword}' - next run at {update_data['next_run']}")
except Exception as e:
logger.error(f"Error executing keyword {keyword.keyword}: {e}")
async def add_keyword(self, keyword_text: str, interval_minutes: int = None,
rss_feeds: List[str] = None, priority: int = 0):
"""새 키워드 추가"""
try:
# 중복 체크
existing = await self.db.keywords.find_one({"keyword": keyword_text})
if existing:
logger.warning(f"Keyword '{keyword_text}' already exists")
return None
# 새 키워드 생성
keyword = Keyword(
keyword=keyword_text,
interval_minutes=interval_minutes or self.default_interval,
rss_feeds=rss_feeds or [],
priority=priority,
next_run=datetime.now() + timedelta(minutes=1) # 1분 후 첫 실행
)
result = await self.db.keywords.insert_one(keyword.dict())
logger.info(f"Added new keyword: {keyword_text}")
return keyword
except Exception as e:
logger.error(f"Error adding keyword: {e}")
return None
async def update_keyword(self, keyword_id: str, **kwargs):
"""키워드 업데이트"""
try:
# 업데이트할 필드
update_data = {k: v for k, v in kwargs.items() if v is not None}
update_data["updated_at"] = datetime.now()
result = await self.db.keywords.update_one(
{"keyword_id": keyword_id},
{"$set": update_data}
)
if result.modified_count > 0:
logger.info(f"Updated keyword {keyword_id}")
return True
return False
except Exception as e:
logger.error(f"Error updating keyword: {e}")
return False
async def delete_keyword(self, keyword_id: str):
"""키워드 삭제"""
try:
result = await self.db.keywords.delete_one({"keyword_id": keyword_id})
if result.deleted_count > 0:
logger.info(f"Deleted keyword {keyword_id}")
return True
return False
except Exception as e:
logger.error(f"Error deleting keyword: {e}")
return False
async def stop(self):
"""스케줄러 중지"""
await self.queue_manager.disconnect()
logger.info("Keyword Scheduler stopped")
async def main():
"""메인 함수"""
scheduler = KeywordScheduler()
try:
await scheduler.start()
except KeyboardInterrupt:
logger.info("Received interrupt signal")
finally:
await scheduler.stop()
if __name__ == "__main__":
asyncio.run(main())

View File

@ -0,0 +1,361 @@
"""
Multi-threaded Keyword Scheduler Service
하나의 프로세스에서 여러 스레드로 키워드를 관리하는 스케줄러
"""
import asyncio
import logging
import os
import sys
from datetime import datetime, timedelta
from motor.motor_asyncio import AsyncIOMotorClient
from typing import Dict
import threading
import time
# Import from shared module
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from shared.models import Keyword, PipelineJob
from shared.queue_manager import QueueManager
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# 전역 변수로 스케줄러 인스턴스 참조 저장
scheduler_instance = None
class KeywordThread(threading.Thread):
"""개별 키워드를 관리하는 스레드"""
def __init__(self, keyword_text: str, mongodb_url: str, db_name: str, redis_url: str):
super().__init__(name=f"Thread-{keyword_text}")
self.keyword_text = keyword_text
self.mongodb_url = mongodb_url
self.db_name = db_name
self.redis_url = redis_url
self.running = True
self.keyword = None
self.status = "initializing"
self.last_execution = None
self.execution_count = 0
self.error_count = 0
self.last_error = None
def run(self):
"""스레드 실행"""
# 새로운 이벤트 루프 생성
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
loop.run_until_complete(self.run_scheduler())
finally:
loop.close()
async def run_scheduler(self):
"""비동기 스케줄러 실행"""
# Redis 연결
self.queue_manager = QueueManager(redis_url=self.redis_url)
await self.queue_manager.connect()
# MongoDB 연결
client = AsyncIOMotorClient(self.mongodb_url)
self.db = client[self.db_name]
logger.info(f"[{self.keyword_text}] Thread started")
# 키워드 로드
await self.load_keyword()
if not self.keyword:
logger.error(f"[{self.keyword_text}] Failed to load keyword")
return
# 메인 루프
while self.running:
try:
# 키워드 상태 체크
await self.reload_keyword()
if not self.keyword.is_active:
self.status = "inactive"
logger.info(f"[{self.keyword_text}] Keyword is inactive, sleeping...")
await asyncio.sleep(60)
continue
# 실행 시간 체크
now = datetime.now()
if self.keyword.next_run and self.keyword.next_run <= now:
self.status = "executing"
await self.execute_keyword()
# 다음 실행 시간까지 대기
sleep_seconds = self.keyword.interval_minutes * 60
self.status = "waiting"
else:
# 다음 체크까지 1분 대기
sleep_seconds = 60
self.status = "waiting"
await asyncio.sleep(sleep_seconds)
except Exception as e:
self.error_count += 1
self.last_error = str(e)
self.status = "error"
logger.error(f"[{self.keyword_text}] Error in thread loop: {e}")
await asyncio.sleep(60)
await self.queue_manager.disconnect()
logger.info(f"[{self.keyword_text}] Thread stopped")
async def load_keyword(self):
"""키워드 초기 로드"""
try:
keyword_doc = await self.db.keywords.find_one({"keyword": self.keyword_text})
if keyword_doc:
self.keyword = Keyword(**keyword_doc)
logger.info(f"[{self.keyword_text}] Loaded keyword")
except Exception as e:
logger.error(f"[{self.keyword_text}] Error loading keyword: {e}")
async def reload_keyword(self):
"""키워드 정보 재로드"""
try:
keyword_doc = await self.db.keywords.find_one({"keyword": self.keyword_text})
if keyword_doc:
self.keyword = Keyword(**keyword_doc)
except Exception as e:
logger.error(f"[{self.keyword_text}] Error reloading keyword: {e}")
async def execute_keyword(self):
"""키워드 실행"""
try:
logger.info(f"[{self.keyword_text}] Executing keyword")
# PipelineJob 생성
job = PipelineJob(
keyword_id=self.keyword.keyword_id,
keyword=self.keyword.keyword,
stage='rss_collection',
data={
'rss_feeds': self.keyword.rss_feeds if self.keyword.rss_feeds else [],
'max_articles': self.keyword.max_articles_per_run,
'scheduled': True,
'thread_name': self.name
},
priority=self.keyword.priority
)
# 큐에 작업 추가
await self.queue_manager.enqueue('rss_collection', job)
logger.info(f"[{self.keyword_text}] Enqueued job {job.job_id}")
# 키워드 업데이트
update_data = {
"last_run": datetime.now(),
"next_run": datetime.now() + timedelta(minutes=self.keyword.interval_minutes),
"updated_at": datetime.now()
}
await self.db.keywords.update_one(
{"keyword_id": self.keyword.keyword_id},
{"$set": update_data}
)
self.last_execution = datetime.now()
self.execution_count += 1
logger.info(f"[{self.keyword_text}] Next run at {update_data['next_run']}")
except Exception as e:
self.error_count += 1
self.last_error = str(e)
logger.error(f"[{self.keyword_text}] Error executing keyword: {e}")
def stop(self):
"""스레드 중지"""
self.running = False
self.status = "stopped"
def get_status(self):
"""스레드 상태 반환"""
return {
"keyword": self.keyword_text,
"thread_name": self.name,
"status": self.status,
"is_alive": self.is_alive(),
"execution_count": self.execution_count,
"last_execution": self.last_execution.isoformat() if self.last_execution else None,
"error_count": self.error_count,
"last_error": self.last_error,
"next_run": self.keyword.next_run.isoformat() if self.keyword and self.keyword.next_run else None
}
class MultiThreadScheduler:
"""멀티스레드 키워드 스케줄러"""
def __init__(self):
self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017")
self.db_name = os.getenv("DB_NAME", "ai_writer_db")
self.redis_url = os.getenv("REDIS_URL", "redis://redis:6379")
self.threads: Dict[str, KeywordThread] = {}
self.running = True
# Singleton 인스턴스를 전역 변수로 저장
global scheduler_instance
scheduler_instance = self
async def start(self):
"""스케줄러 시작"""
logger.info("Starting Multi-threaded Keyword Scheduler")
# MongoDB 연결
client = AsyncIOMotorClient(self.mongodb_url)
self.db = client[self.db_name]
# 초기 키워드 설정
await self.initialize_keywords()
# 키워드 로드 및 스레드 시작
await self.load_and_start_threads()
# 메인 루프 - 새로운 키워드 체크
while self.running:
try:
await self.check_new_keywords()
await asyncio.sleep(30) # 30초마다 새 키워드 체크
except Exception as e:
logger.error(f"Error in main loop: {e}")
await asyncio.sleep(30)
async def initialize_keywords(self):
"""초기 키워드 설정 (없으면 생성)"""
try:
count = await self.db.keywords.count_documents({})
if count == 0:
logger.info("No keywords found. Creating default keywords...")
default_keywords = [
{
"keyword": "AI",
"interval_minutes": 60,
"is_active": True,
"priority": 1,
"rss_feeds": [],
"next_run": datetime.now() + timedelta(minutes=1)
},
{
"keyword": "경제",
"interval_minutes": 120,
"is_active": True,
"priority": 0,
"rss_feeds": [],
"next_run": datetime.now() + timedelta(minutes=1)
},
{
"keyword": "테크놀로지",
"interval_minutes": 60,
"is_active": True,
"priority": 1,
"rss_feeds": [],
"next_run": datetime.now() + timedelta(minutes=1)
}
]
for kw_data in default_keywords:
keyword = Keyword(**kw_data)
await self.db.keywords.insert_one(keyword.model_dump())
logger.info(f"Created keyword: {keyword.keyword}")
logger.info(f"Found {count} keywords in database")
except Exception as e:
logger.error(f"Error initializing keywords: {e}")
async def load_and_start_threads(self):
"""키워드 로드 및 스레드 시작"""
try:
# 활성 키워드 조회
cursor = self.db.keywords.find({"is_active": True})
keywords = await cursor.to_list(None)
for keyword_doc in keywords:
keyword = Keyword(**keyword_doc)
if keyword.keyword not in self.threads:
self.start_keyword_thread(keyword.keyword)
logger.info(f"Started {len(self.threads)} keyword threads")
except Exception as e:
logger.error(f"Error loading keywords: {e}")
def start_keyword_thread(self, keyword_text: str):
"""키워드 스레드 시작"""
if keyword_text not in self.threads:
thread = KeywordThread(
keyword_text=keyword_text,
mongodb_url=self.mongodb_url,
db_name=self.db_name,
redis_url=self.redis_url
)
thread.start()
self.threads[keyword_text] = thread
logger.info(f"Started thread for keyword: {keyword_text}")
async def check_new_keywords(self):
"""새로운 키워드 체크 및 스레드 관리"""
try:
# 현재 활성 키워드 조회
cursor = self.db.keywords.find({"is_active": True})
active_keywords = await cursor.to_list(None)
active_keyword_texts = {kw['keyword'] for kw in active_keywords}
# 새 키워드 시작
for keyword_text in active_keyword_texts:
if keyword_text not in self.threads:
self.start_keyword_thread(keyword_text)
# 비활성화된 키워드 스레드 중지
for keyword_text in list(self.threads.keys()):
if keyword_text not in active_keyword_texts:
thread = self.threads[keyword_text]
thread.stop()
del self.threads[keyword_text]
logger.info(f"Stopped thread for keyword: {keyword_text}")
except Exception as e:
logger.error(f"Error checking new keywords: {e}")
def stop(self):
"""모든 스레드 중지"""
self.running = False
for thread in self.threads.values():
thread.stop()
# 모든 스레드가 종료될 때까지 대기
for thread in self.threads.values():
thread.join(timeout=5)
logger.info("Multi-threaded Keyword Scheduler stopped")
def get_threads_status(self):
"""모든 스레드 상태 반환"""
status_list = []
for thread in self.threads.values():
status_list.append(thread.get_status())
return status_list
async def main():
"""메인 함수"""
scheduler = MultiThreadScheduler()
try:
await scheduler.start()
except KeyboardInterrupt:
logger.info("Received interrupt signal")
finally:
scheduler.stop()
if __name__ == "__main__":
asyncio.run(main())

View File

@ -0,0 +1,5 @@
motor==3.6.0
redis[hiredis]==5.0.1
pydantic==2.5.0
fastapi==0.104.1
uvicorn==0.24.0

View File

@ -0,0 +1,203 @@
"""
News Pipeline Scheduler
뉴스 파이프라인 스케줄러 서비스
"""
import asyncio
import logging
import os
import sys
from datetime import datetime, timedelta
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from motor.motor_asyncio import AsyncIOMotorClient
# Import from shared module
from shared.models import KeywordSubscription, PipelineJob
from shared.queue_manager import QueueManager
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class NewsScheduler:
def __init__(self):
self.scheduler = AsyncIOScheduler()
self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017")
self.db_name = os.getenv("DB_NAME", "ai_writer_db")
self.db = None
self.queue_manager = QueueManager(
redis_url=os.getenv("REDIS_URL", "redis://redis:6379")
)
async def start(self):
"""스케줄러 시작"""
logger.info("Starting News Pipeline Scheduler")
# MongoDB 연결
client = AsyncIOMotorClient(self.mongodb_url)
self.db = client[self.db_name]
# Redis 연결
await self.queue_manager.connect()
# 기본 스케줄 설정
# 매 30분마다 실행
self.scheduler.add_job(
self.process_keywords,
'interval',
minutes=30,
id='keyword_processor',
name='Process Active Keywords'
)
# 특정 시간대 강화 스케줄 (아침 7시, 점심 12시, 저녁 6시)
for hour in [7, 12, 18]:
self.scheduler.add_job(
self.process_priority_keywords,
'cron',
hour=hour,
minute=0,
id=f'priority_processor_{hour}',
name=f'Process Priority Keywords at {hour}:00'
)
# 매일 자정 통계 초기화
self.scheduler.add_job(
self.reset_daily_stats,
'cron',
hour=0,
minute=0,
id='stats_reset',
name='Reset Daily Statistics'
)
self.scheduler.start()
logger.info("Scheduler started successfully")
# 시작 즉시 한 번 실행
await self.process_keywords()
async def process_keywords(self):
"""활성 키워드 처리"""
try:
logger.info("Processing active keywords")
# MongoDB에서 활성 키워드 로드
now = datetime.now()
thirty_minutes_ago = now - timedelta(minutes=30)
keywords = await self.db.keywords.find({
"is_active": True,
"$or": [
{"last_processed": {"$lt": thirty_minutes_ago}},
{"last_processed": None}
]
}).to_list(None)
logger.info(f"Found {len(keywords)} keywords to process")
for keyword_doc in keywords:
await self._create_job(keyword_doc)
# 처리 시간 업데이트
await self.db.keywords.update_one(
{"keyword_id": keyword_doc['keyword_id']},
{"$set": {"last_processed": now}}
)
logger.info(f"Created jobs for {len(keywords)} keywords")
except Exception as e:
logger.error(f"Error processing keywords: {e}")
async def process_priority_keywords(self):
"""우선순위 키워드 처리"""
try:
logger.info("Processing priority keywords")
keywords = await self.db.keywords.find({
"is_active": True,
"is_priority": True
}).to_list(None)
for keyword_doc in keywords:
await self._create_job(keyword_doc, priority=1)
logger.info(f"Created priority jobs for {len(keywords)} keywords")
except Exception as e:
logger.error(f"Error processing priority keywords: {e}")
async def _create_job(self, keyword_doc: dict, priority: int = 0):
"""파이프라인 작업 생성"""
try:
# KeywordSubscription 모델로 변환
keyword = KeywordSubscription(**keyword_doc)
# PipelineJob 생성
job = PipelineJob(
keyword_id=keyword.keyword_id,
keyword=keyword.keyword,
stage='rss_collection',
stages_completed=[],
priority=priority,
data={
'keyword': keyword.keyword,
'language': keyword.language,
'rss_feeds': keyword.rss_feeds or self._get_default_rss_feeds(),
'categories': keyword.categories
}
)
# 첫 번째 큐에 추가
await self.queue_manager.enqueue(
'rss_collection',
job,
priority=priority
)
logger.info(f"Created job {job.job_id} for keyword '{keyword.keyword}'")
except Exception as e:
logger.error(f"Error creating job for keyword: {e}")
def _get_default_rss_feeds(self) -> list:
"""기본 RSS 피드 목록"""
return [
"https://news.google.com/rss/search?q={keyword}&hl=ko&gl=KR&ceid=KR:ko",
"https://trends.google.com/trends/trendingsearches/daily/rss?geo=KR",
"https://www.mk.co.kr/rss/40300001/", # 매일경제
"https://www.hankyung.com/feed/all-news", # 한국경제
"https://www.zdnet.co.kr/news/news_rss.xml", # ZDNet Korea
]
async def reset_daily_stats(self):
"""일일 통계 초기화"""
try:
logger.info("Resetting daily statistics")
# Redis 통계 초기화
# 구현 필요
pass
except Exception as e:
logger.error(f"Error resetting stats: {e}")
async def stop(self):
"""스케줄러 중지"""
self.scheduler.shutdown()
await self.queue_manager.disconnect()
logger.info("Scheduler stopped")
async def main():
"""메인 함수"""
scheduler = NewsScheduler()
try:
await scheduler.start()
# 계속 실행
while True:
await asyncio.sleep(60)
except KeyboardInterrupt:
logger.info("Received interrupt signal")
finally:
await scheduler.stop()
if __name__ == "__main__":
asyncio.run(main())

View File

@ -0,0 +1,173 @@
"""
Single Keyword Scheduler Service
단일 키워드를 전담하는 스케줄러
"""
import asyncio
import logging
import os
import sys
from datetime import datetime, timedelta
from motor.motor_asyncio import AsyncIOMotorClient
import uuid
# Import from shared module
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from shared.models import Keyword, PipelineJob
from shared.queue_manager import QueueManager
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class SingleKeywordScheduler:
def __init__(self):
self.queue_manager = QueueManager(
redis_url=os.getenv("REDIS_URL", "redis://redis:6379")
)
self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017")
self.db_name = os.getenv("DB_NAME", "ai_writer_db")
self.keyword_text = os.getenv("KEYWORD") # 환경변수로 키워드 지정
self.interval_minutes = int(os.getenv("INTERVAL_MINUTES", "60"))
self.db = None
self.keyword = None
async def start(self):
"""스케줄러 시작"""
if not self.keyword_text:
logger.error("KEYWORD environment variable is required")
return
logger.info(f"Starting Single Keyword Scheduler for '{self.keyword_text}'")
# Redis 연결
await self.queue_manager.connect()
# MongoDB 연결
client = AsyncIOMotorClient(self.mongodb_url)
self.db = client[self.db_name]
# 키워드 초기화 또는 로드
await self.initialize_keyword()
if not self.keyword:
logger.error(f"Failed to initialize keyword '{self.keyword_text}'")
return
# 메인 루프 - 이 키워드만 처리
while True:
try:
await self.check_and_execute()
# 다음 실행까지 대기
sleep_seconds = self.keyword.interval_minutes * 60
logger.info(f"Sleeping for {self.keyword.interval_minutes} minutes until next execution")
await asyncio.sleep(sleep_seconds)
except Exception as e:
logger.error(f"Error in scheduler loop: {e}")
await asyncio.sleep(60) # 에러 발생시 1분 후 재시도
async def initialize_keyword(self):
"""키워드 초기화 또는 로드"""
try:
# 기존 키워드 찾기
keyword_doc = await self.db.keywords.find_one({"keyword": self.keyword_text})
if keyword_doc:
self.keyword = Keyword(**keyword_doc)
logger.info(f"Loaded existing keyword: {self.keyword_text}")
else:
# 새 키워드 생성
self.keyword = Keyword(
keyword=self.keyword_text,
interval_minutes=self.interval_minutes,
is_active=True,
priority=int(os.getenv("PRIORITY", "0")),
rss_feeds=os.getenv("RSS_FEEDS", "").split(",") if os.getenv("RSS_FEEDS") else [],
max_articles_per_run=int(os.getenv("MAX_ARTICLES", "100"))
)
await self.db.keywords.insert_one(self.keyword.model_dump())
logger.info(f"Created new keyword: {self.keyword_text}")
except Exception as e:
logger.error(f"Error initializing keyword: {e}")
async def check_and_execute(self):
"""키워드 실행 체크 및 실행"""
try:
# 최신 키워드 정보 다시 로드
keyword_doc = await self.db.keywords.find_one({"keyword": self.keyword_text})
if not keyword_doc:
logger.error(f"Keyword '{self.keyword_text}' not found in database")
return
self.keyword = Keyword(**keyword_doc)
# 비활성화된 경우 스킵
if not self.keyword.is_active:
logger.info(f"Keyword '{self.keyword_text}' is inactive, skipping")
return
# 실행
await self.execute_keyword()
except Exception as e:
logger.error(f"Error checking keyword: {e}")
async def execute_keyword(self):
"""키워드 실행"""
try:
logger.info(f"Executing keyword: {self.keyword.keyword}")
# PipelineJob 생성
job = PipelineJob(
keyword_id=self.keyword.keyword_id,
keyword=self.keyword.keyword,
stage='rss_collection',
data={
'rss_feeds': self.keyword.rss_feeds if self.keyword.rss_feeds else [],
'max_articles': self.keyword.max_articles_per_run,
'scheduled': True,
'scheduler_instance': f"single-{self.keyword_text}"
},
priority=self.keyword.priority
)
# 큐에 작업 추가
await self.queue_manager.enqueue('rss_collection', job)
logger.info(f"Enqueued job for keyword '{self.keyword.keyword}' with job_id: {job.job_id}")
# 키워드 업데이트
update_data = {
"last_run": datetime.now(),
"next_run": datetime.now() + timedelta(minutes=self.keyword.interval_minutes),
"updated_at": datetime.now()
}
await self.db.keywords.update_one(
{"keyword_id": self.keyword.keyword_id},
{"$set": update_data}
)
logger.info(f"Updated keyword '{self.keyword.keyword}' - next run at {update_data['next_run']}")
except Exception as e:
logger.error(f"Error executing keyword {self.keyword.keyword}: {e}")
async def stop(self):
"""스케줄러 중지"""
await self.queue_manager.disconnect()
logger.info(f"Single Keyword Scheduler for '{self.keyword_text}' stopped")
async def main():
"""메인 함수"""
scheduler = SingleKeywordScheduler()
try:
await scheduler.start()
except KeyboardInterrupt:
logger.info("Received interrupt signal")
finally:
await scheduler.stop()
if __name__ == "__main__":
asyncio.run(main())