feat: Implement automated keyword-based news pipeline scheduler
- Add multi-threaded keyword scheduler for periodic news collection - Create Keyword Manager API for CRUD operations and monitoring - Implement automatic pipeline triggering (RSS → Google → AI → Translation) - Add thread status monitoring and dynamic keyword management - Support priority-based execution and configurable intervals - Add comprehensive scheduler documentation guide - Default keywords: AI, 테크놀로지, 경제, 블록체인 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
336
services/pipeline/scheduler/keyword_manager.py
Normal file
336
services/pipeline/scheduler/keyword_manager.py
Normal file
@ -0,0 +1,336 @@
|
||||
"""
|
||||
Keyword Manager API
|
||||
키워드를 추가/수정/삭제하는 관리 API
|
||||
"""
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Optional
|
||||
from datetime import datetime, timedelta
|
||||
from motor.motor_asyncio import AsyncIOMotorClient
|
||||
import uvicorn
|
||||
import os
|
||||
import sys
|
||||
import uuid
|
||||
|
||||
# Import from shared module
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from shared.models import Keyword
|
||||
|
||||
app = FastAPI(title="Keyword Manager API")
|
||||
|
||||
# MongoDB 연결
|
||||
mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017")
|
||||
db_name = os.getenv("DB_NAME", "ai_writer_db")
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
"""앱 시작 시 MongoDB 연결"""
|
||||
app.mongodb_client = AsyncIOMotorClient(mongodb_url)
|
||||
app.db = app.mongodb_client[db_name]
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown_event():
|
||||
"""앱 종료 시 연결 해제"""
|
||||
app.mongodb_client.close()
|
||||
|
||||
class KeywordCreate(BaseModel):
|
||||
"""키워드 생성 요청 모델"""
|
||||
keyword: str
|
||||
interval_minutes: int = 60
|
||||
priority: int = 0
|
||||
rss_feeds: List[str] = []
|
||||
max_articles_per_run: int = 100
|
||||
is_active: bool = True
|
||||
|
||||
class KeywordUpdate(BaseModel):
|
||||
"""키워드 업데이트 요청 모델"""
|
||||
interval_minutes: Optional[int] = None
|
||||
priority: Optional[int] = None
|
||||
rss_feeds: Optional[List[str]] = None
|
||||
max_articles_per_run: Optional[int] = None
|
||||
is_active: Optional[bool] = None
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""API 상태 확인"""
|
||||
return {"status": "Keyword Manager API is running"}
|
||||
|
||||
@app.get("/threads/status")
|
||||
async def get_threads_status():
|
||||
"""모든 스레드 상태 조회"""
|
||||
try:
|
||||
# MongoDB에서 키워드 정보와 함께 상태 반환
|
||||
cursor = app.db.keywords.find()
|
||||
keywords = await cursor.to_list(None)
|
||||
|
||||
threads_status = []
|
||||
for kw in keywords:
|
||||
status = {
|
||||
"keyword": kw.get("keyword"),
|
||||
"keyword_id": kw.get("keyword_id"),
|
||||
"is_active": kw.get("is_active"),
|
||||
"interval_minutes": kw.get("interval_minutes"),
|
||||
"priority": kw.get("priority"),
|
||||
"last_run": kw.get("last_run").isoformat() if kw.get("last_run") else None,
|
||||
"next_run": kw.get("next_run").isoformat() if kw.get("next_run") else None,
|
||||
"thread_status": "active" if kw.get("is_active") else "inactive"
|
||||
}
|
||||
|
||||
# 다음 실행까지 남은 시간 계산
|
||||
if kw.get("next_run"):
|
||||
remaining = (kw.get("next_run") - datetime.now()).total_seconds()
|
||||
if remaining > 0:
|
||||
status["minutes_until_next_run"] = round(remaining / 60, 1)
|
||||
else:
|
||||
status["minutes_until_next_run"] = 0
|
||||
status["thread_status"] = "pending_execution"
|
||||
|
||||
threads_status.append(status)
|
||||
|
||||
# 우선순위 순으로 정렬
|
||||
threads_status.sort(key=lambda x: x.get("priority", 0), reverse=True)
|
||||
|
||||
return {
|
||||
"total_threads": len(threads_status),
|
||||
"active_threads": sum(1 for t in threads_status if t.get("is_active")),
|
||||
"threads": threads_status
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/keywords")
|
||||
async def list_keywords():
|
||||
"""모든 키워드 조회"""
|
||||
try:
|
||||
cursor = app.db.keywords.find()
|
||||
keywords = await cursor.to_list(None)
|
||||
|
||||
# 각 키워드 정보 정리
|
||||
result = []
|
||||
for kw in keywords:
|
||||
result.append({
|
||||
"keyword_id": kw.get("keyword_id"),
|
||||
"keyword": kw.get("keyword"),
|
||||
"interval_minutes": kw.get("interval_minutes"),
|
||||
"priority": kw.get("priority"),
|
||||
"is_active": kw.get("is_active"),
|
||||
"last_run": kw.get("last_run").isoformat() if kw.get("last_run") else None,
|
||||
"next_run": kw.get("next_run").isoformat() if kw.get("next_run") else None,
|
||||
"rss_feeds": kw.get("rss_feeds", []),
|
||||
"max_articles_per_run": kw.get("max_articles_per_run", 100)
|
||||
})
|
||||
|
||||
return {
|
||||
"total": len(result),
|
||||
"keywords": result
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/keywords/{keyword_text}")
|
||||
async def get_keyword(keyword_text: str):
|
||||
"""특정 키워드 조회"""
|
||||
try:
|
||||
keyword = await app.db.keywords.find_one({"keyword": keyword_text})
|
||||
if not keyword:
|
||||
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
|
||||
|
||||
return {
|
||||
"keyword_id": keyword.get("keyword_id"),
|
||||
"keyword": keyword.get("keyword"),
|
||||
"interval_minutes": keyword.get("interval_minutes"),
|
||||
"priority": keyword.get("priority"),
|
||||
"is_active": keyword.get("is_active"),
|
||||
"last_run": keyword.get("last_run").isoformat() if keyword.get("last_run") else None,
|
||||
"next_run": keyword.get("next_run").isoformat() if keyword.get("next_run") else None,
|
||||
"rss_feeds": keyword.get("rss_feeds", []),
|
||||
"max_articles_per_run": keyword.get("max_articles_per_run", 100)
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/keywords")
|
||||
async def create_keyword(keyword_data: KeywordCreate):
|
||||
"""새 키워드 생성"""
|
||||
try:
|
||||
# 중복 체크
|
||||
existing = await app.db.keywords.find_one({"keyword": keyword_data.keyword})
|
||||
if existing:
|
||||
raise HTTPException(status_code=400, detail=f"Keyword '{keyword_data.keyword}' already exists")
|
||||
|
||||
# 새 키워드 생성
|
||||
keyword = Keyword(
|
||||
keyword_id=str(uuid.uuid4()),
|
||||
keyword=keyword_data.keyword,
|
||||
interval_minutes=keyword_data.interval_minutes,
|
||||
priority=keyword_data.priority,
|
||||
rss_feeds=keyword_data.rss_feeds,
|
||||
max_articles_per_run=keyword_data.max_articles_per_run,
|
||||
is_active=keyword_data.is_active,
|
||||
next_run=datetime.now() + timedelta(minutes=1), # 1분 후 첫 실행
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now()
|
||||
)
|
||||
|
||||
await app.db.keywords.insert_one(keyword.model_dump())
|
||||
|
||||
return {
|
||||
"message": f"Keyword '{keyword_data.keyword}' created successfully",
|
||||
"keyword_id": keyword.keyword_id,
|
||||
"note": "The scheduler will automatically detect and start processing this keyword within 30 seconds"
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.put("/keywords/{keyword_text}")
|
||||
async def update_keyword(keyword_text: str, update_data: KeywordUpdate):
|
||||
"""키워드 업데이트"""
|
||||
try:
|
||||
# 키워드 존재 확인
|
||||
existing = await app.db.keywords.find_one({"keyword": keyword_text})
|
||||
if not existing:
|
||||
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
|
||||
|
||||
# 업데이트 데이터 준비
|
||||
update_dict = {}
|
||||
if update_data.interval_minutes is not None:
|
||||
update_dict["interval_minutes"] = update_data.interval_minutes
|
||||
if update_data.priority is not None:
|
||||
update_dict["priority"] = update_data.priority
|
||||
if update_data.rss_feeds is not None:
|
||||
update_dict["rss_feeds"] = update_data.rss_feeds
|
||||
if update_data.max_articles_per_run is not None:
|
||||
update_dict["max_articles_per_run"] = update_data.max_articles_per_run
|
||||
if update_data.is_active is not None:
|
||||
update_dict["is_active"] = update_data.is_active
|
||||
|
||||
if update_dict:
|
||||
update_dict["updated_at"] = datetime.now()
|
||||
|
||||
# 만약 interval이 변경되면 next_run도 재계산
|
||||
if "interval_minutes" in update_dict:
|
||||
update_dict["next_run"] = datetime.now() + timedelta(minutes=update_dict["interval_minutes"])
|
||||
|
||||
result = await app.db.keywords.update_one(
|
||||
{"keyword": keyword_text},
|
||||
{"$set": update_dict}
|
||||
)
|
||||
|
||||
if result.modified_count > 0:
|
||||
action_note = ""
|
||||
if update_data.is_active is False:
|
||||
action_note = "The scheduler will stop the thread for this keyword within 30 seconds."
|
||||
elif update_data.is_active is True and not existing.get("is_active"):
|
||||
action_note = "The scheduler will start a new thread for this keyword within 30 seconds."
|
||||
|
||||
return {
|
||||
"message": f"Keyword '{keyword_text}' updated successfully",
|
||||
"updated_fields": list(update_dict.keys()),
|
||||
"note": action_note
|
||||
}
|
||||
else:
|
||||
return {"message": "No changes made"}
|
||||
else:
|
||||
return {"message": "No update data provided"}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.delete("/keywords/{keyword_text}")
|
||||
async def delete_keyword(keyword_text: str):
|
||||
"""키워드 삭제"""
|
||||
try:
|
||||
# 키워드 존재 확인
|
||||
existing = await app.db.keywords.find_one({"keyword": keyword_text})
|
||||
if not existing:
|
||||
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
|
||||
|
||||
# 삭제
|
||||
result = await app.db.keywords.delete_one({"keyword": keyword_text})
|
||||
|
||||
if result.deleted_count > 0:
|
||||
return {
|
||||
"message": f"Keyword '{keyword_text}' deleted successfully",
|
||||
"note": "The scheduler will stop the thread for this keyword within 30 seconds"
|
||||
}
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Failed to delete keyword")
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/keywords/{keyword_text}/activate")
|
||||
async def activate_keyword(keyword_text: str):
|
||||
"""키워드 활성화"""
|
||||
try:
|
||||
result = await app.db.keywords.update_one(
|
||||
{"keyword": keyword_text},
|
||||
{"$set": {"is_active": True, "updated_at": datetime.now()}}
|
||||
)
|
||||
|
||||
if result.matched_count == 0:
|
||||
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
|
||||
|
||||
return {
|
||||
"message": f"Keyword '{keyword_text}' activated",
|
||||
"note": "The scheduler will start processing this keyword within 30 seconds"
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/keywords/{keyword_text}/deactivate")
|
||||
async def deactivate_keyword(keyword_text: str):
|
||||
"""키워드 비활성화"""
|
||||
try:
|
||||
result = await app.db.keywords.update_one(
|
||||
{"keyword": keyword_text},
|
||||
{"$set": {"is_active": False, "updated_at": datetime.now()}}
|
||||
)
|
||||
|
||||
if result.matched_count == 0:
|
||||
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
|
||||
|
||||
return {
|
||||
"message": f"Keyword '{keyword_text}' deactivated",
|
||||
"note": "The scheduler will stop processing this keyword within 30 seconds"
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/keywords/{keyword_text}/trigger")
|
||||
async def trigger_keyword(keyword_text: str):
|
||||
"""키워드 즉시 실행 트리거"""
|
||||
try:
|
||||
# next_run을 현재 시간으로 설정하여 즉시 실행되도록 함
|
||||
result = await app.db.keywords.update_one(
|
||||
{"keyword": keyword_text},
|
||||
{"$set": {"next_run": datetime.now(), "updated_at": datetime.now()}}
|
||||
)
|
||||
|
||||
if result.matched_count == 0:
|
||||
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
|
||||
|
||||
return {
|
||||
"message": f"Keyword '{keyword_text}' triggered for immediate execution",
|
||||
"note": "The scheduler will execute this keyword within the next minute"
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
if __name__ == "__main__":
|
||||
port = int(os.getenv("API_PORT", "8100"))
|
||||
uvicorn.run(app, host="0.0.0.0", port=port)
|
||||
Reference in New Issue
Block a user