Files
site11/services/pipeline/scheduler/keyword_manager.py
jungwoo choi eeaa9dcb4b feat: Implement automated keyword-based news pipeline scheduler
- Add multi-threaded keyword scheduler for periodic news collection
- Create Keyword Manager API for CRUD operations and monitoring
- Implement automatic pipeline triggering (RSS → Google → AI → Translation)
- Add thread status monitoring and dynamic keyword management
- Support priority-based execution and configurable intervals
- Add comprehensive scheduler documentation guide
- Default keywords: AI, 테크놀로지, 경제, 블록체인

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-15 17:09:22 +09:00

336 lines
12 KiB
Python

"""
Keyword Manager API
키워드를 추가/수정/삭제하는 관리 API
"""
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Optional
from datetime import datetime, timedelta
from motor.motor_asyncio import AsyncIOMotorClient
import uvicorn
import os
import sys
import uuid
# Import from shared module
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from shared.models import Keyword
app = FastAPI(title="Keyword Manager API")
# MongoDB 연결
mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017")
db_name = os.getenv("DB_NAME", "ai_writer_db")
@app.on_event("startup")
async def startup_event():
"""앱 시작 시 MongoDB 연결"""
app.mongodb_client = AsyncIOMotorClient(mongodb_url)
app.db = app.mongodb_client[db_name]
@app.on_event("shutdown")
async def shutdown_event():
"""앱 종료 시 연결 해제"""
app.mongodb_client.close()
class KeywordCreate(BaseModel):
"""키워드 생성 요청 모델"""
keyword: str
interval_minutes: int = 60
priority: int = 0
rss_feeds: List[str] = []
max_articles_per_run: int = 100
is_active: bool = True
class KeywordUpdate(BaseModel):
"""키워드 업데이트 요청 모델"""
interval_minutes: Optional[int] = None
priority: Optional[int] = None
rss_feeds: Optional[List[str]] = None
max_articles_per_run: Optional[int] = None
is_active: Optional[bool] = None
@app.get("/")
async def root():
"""API 상태 확인"""
return {"status": "Keyword Manager API is running"}
@app.get("/threads/status")
async def get_threads_status():
"""모든 스레드 상태 조회"""
try:
# MongoDB에서 키워드 정보와 함께 상태 반환
cursor = app.db.keywords.find()
keywords = await cursor.to_list(None)
threads_status = []
for kw in keywords:
status = {
"keyword": kw.get("keyword"),
"keyword_id": kw.get("keyword_id"),
"is_active": kw.get("is_active"),
"interval_minutes": kw.get("interval_minutes"),
"priority": kw.get("priority"),
"last_run": kw.get("last_run").isoformat() if kw.get("last_run") else None,
"next_run": kw.get("next_run").isoformat() if kw.get("next_run") else None,
"thread_status": "active" if kw.get("is_active") else "inactive"
}
# 다음 실행까지 남은 시간 계산
if kw.get("next_run"):
remaining = (kw.get("next_run") - datetime.now()).total_seconds()
if remaining > 0:
status["minutes_until_next_run"] = round(remaining / 60, 1)
else:
status["minutes_until_next_run"] = 0
status["thread_status"] = "pending_execution"
threads_status.append(status)
# 우선순위 순으로 정렬
threads_status.sort(key=lambda x: x.get("priority", 0), reverse=True)
return {
"total_threads": len(threads_status),
"active_threads": sum(1 for t in threads_status if t.get("is_active")),
"threads": threads_status
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/keywords")
async def list_keywords():
"""모든 키워드 조회"""
try:
cursor = app.db.keywords.find()
keywords = await cursor.to_list(None)
# 각 키워드 정보 정리
result = []
for kw in keywords:
result.append({
"keyword_id": kw.get("keyword_id"),
"keyword": kw.get("keyword"),
"interval_minutes": kw.get("interval_minutes"),
"priority": kw.get("priority"),
"is_active": kw.get("is_active"),
"last_run": kw.get("last_run").isoformat() if kw.get("last_run") else None,
"next_run": kw.get("next_run").isoformat() if kw.get("next_run") else None,
"rss_feeds": kw.get("rss_feeds", []),
"max_articles_per_run": kw.get("max_articles_per_run", 100)
})
return {
"total": len(result),
"keywords": result
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/keywords/{keyword_text}")
async def get_keyword(keyword_text: str):
"""특정 키워드 조회"""
try:
keyword = await app.db.keywords.find_one({"keyword": keyword_text})
if not keyword:
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
return {
"keyword_id": keyword.get("keyword_id"),
"keyword": keyword.get("keyword"),
"interval_minutes": keyword.get("interval_minutes"),
"priority": keyword.get("priority"),
"is_active": keyword.get("is_active"),
"last_run": keyword.get("last_run").isoformat() if keyword.get("last_run") else None,
"next_run": keyword.get("next_run").isoformat() if keyword.get("next_run") else None,
"rss_feeds": keyword.get("rss_feeds", []),
"max_articles_per_run": keyword.get("max_articles_per_run", 100)
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/keywords")
async def create_keyword(keyword_data: KeywordCreate):
"""새 키워드 생성"""
try:
# 중복 체크
existing = await app.db.keywords.find_one({"keyword": keyword_data.keyword})
if existing:
raise HTTPException(status_code=400, detail=f"Keyword '{keyword_data.keyword}' already exists")
# 새 키워드 생성
keyword = Keyword(
keyword_id=str(uuid.uuid4()),
keyword=keyword_data.keyword,
interval_minutes=keyword_data.interval_minutes,
priority=keyword_data.priority,
rss_feeds=keyword_data.rss_feeds,
max_articles_per_run=keyword_data.max_articles_per_run,
is_active=keyword_data.is_active,
next_run=datetime.now() + timedelta(minutes=1), # 1분 후 첫 실행
created_at=datetime.now(),
updated_at=datetime.now()
)
await app.db.keywords.insert_one(keyword.model_dump())
return {
"message": f"Keyword '{keyword_data.keyword}' created successfully",
"keyword_id": keyword.keyword_id,
"note": "The scheduler will automatically detect and start processing this keyword within 30 seconds"
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.put("/keywords/{keyword_text}")
async def update_keyword(keyword_text: str, update_data: KeywordUpdate):
"""키워드 업데이트"""
try:
# 키워드 존재 확인
existing = await app.db.keywords.find_one({"keyword": keyword_text})
if not existing:
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
# 업데이트 데이터 준비
update_dict = {}
if update_data.interval_minutes is not None:
update_dict["interval_minutes"] = update_data.interval_minutes
if update_data.priority is not None:
update_dict["priority"] = update_data.priority
if update_data.rss_feeds is not None:
update_dict["rss_feeds"] = update_data.rss_feeds
if update_data.max_articles_per_run is not None:
update_dict["max_articles_per_run"] = update_data.max_articles_per_run
if update_data.is_active is not None:
update_dict["is_active"] = update_data.is_active
if update_dict:
update_dict["updated_at"] = datetime.now()
# 만약 interval이 변경되면 next_run도 재계산
if "interval_minutes" in update_dict:
update_dict["next_run"] = datetime.now() + timedelta(minutes=update_dict["interval_minutes"])
result = await app.db.keywords.update_one(
{"keyword": keyword_text},
{"$set": update_dict}
)
if result.modified_count > 0:
action_note = ""
if update_data.is_active is False:
action_note = "The scheduler will stop the thread for this keyword within 30 seconds."
elif update_data.is_active is True and not existing.get("is_active"):
action_note = "The scheduler will start a new thread for this keyword within 30 seconds."
return {
"message": f"Keyword '{keyword_text}' updated successfully",
"updated_fields": list(update_dict.keys()),
"note": action_note
}
else:
return {"message": "No changes made"}
else:
return {"message": "No update data provided"}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.delete("/keywords/{keyword_text}")
async def delete_keyword(keyword_text: str):
"""키워드 삭제"""
try:
# 키워드 존재 확인
existing = await app.db.keywords.find_one({"keyword": keyword_text})
if not existing:
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
# 삭제
result = await app.db.keywords.delete_one({"keyword": keyword_text})
if result.deleted_count > 0:
return {
"message": f"Keyword '{keyword_text}' deleted successfully",
"note": "The scheduler will stop the thread for this keyword within 30 seconds"
}
else:
raise HTTPException(status_code=500, detail="Failed to delete keyword")
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/keywords/{keyword_text}/activate")
async def activate_keyword(keyword_text: str):
"""키워드 활성화"""
try:
result = await app.db.keywords.update_one(
{"keyword": keyword_text},
{"$set": {"is_active": True, "updated_at": datetime.now()}}
)
if result.matched_count == 0:
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
return {
"message": f"Keyword '{keyword_text}' activated",
"note": "The scheduler will start processing this keyword within 30 seconds"
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/keywords/{keyword_text}/deactivate")
async def deactivate_keyword(keyword_text: str):
"""키워드 비활성화"""
try:
result = await app.db.keywords.update_one(
{"keyword": keyword_text},
{"$set": {"is_active": False, "updated_at": datetime.now()}}
)
if result.matched_count == 0:
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
return {
"message": f"Keyword '{keyword_text}' deactivated",
"note": "The scheduler will stop processing this keyword within 30 seconds"
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/keywords/{keyword_text}/trigger")
async def trigger_keyword(keyword_text: str):
"""키워드 즉시 실행 트리거"""
try:
# next_run을 현재 시간으로 설정하여 즉시 실행되도록 함
result = await app.db.keywords.update_one(
{"keyword": keyword_text},
{"$set": {"next_run": datetime.now(), "updated_at": datetime.now()}}
)
if result.matched_count == 0:
raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found")
return {
"message": f"Keyword '{keyword_text}' triggered for immediate execution",
"note": "The scheduler will execute this keyword within the next minute"
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
port = int(os.getenv("API_PORT", "8100"))
uvicorn.run(app, host="0.0.0.0", port=port)