feat: Implement automated keyword-based news pipeline scheduler

- Add multi-threaded keyword scheduler for periodic news collection
- Create Keyword Manager API for CRUD operations and monitoring
- Implement automatic pipeline triggering (RSS → Google → AI → Translation)
- Add thread status monitoring and dynamic keyword management
- Support priority-based execution and configurable intervals
- Add comprehensive scheduler documentation guide
- Default keywords: AI, 테크놀로지, 경제, 블록체인

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
jungwoo choi
2025-09-15 17:09:22 +09:00
parent 070032006e
commit eeaa9dcb4b
39 changed files with 3472 additions and 759 deletions

View File

@ -10,9 +10,11 @@ import base64
from typing import List, Dict, Any
import httpx
from io import BytesIO
from motor.motor_asyncio import AsyncIOMotorClient
from bson import ObjectId
# Import from shared module
from shared.models import PipelineJob, TranslatedItem, GeneratedImageItem
from shared.models import PipelineJob
from shared.queue_manager import QueueManager
logging.basicConfig(level=logging.INFO)
@ -23,107 +25,136 @@ class ImageGeneratorWorker:
self.queue_manager = QueueManager(
redis_url=os.getenv("REDIS_URL", "redis://redis:6379")
)
self.replicate_api_key = os.getenv("REPLICATE_API_KEY")
self.replicate_api_key = os.getenv("REPLICATE_API_TOKEN")
self.replicate_api_url = "https://api.replicate.com/v1/predictions"
# Stable Diffusion 모델 사용
self.model_version = "stability-ai/sdxl:39ed52f2a78e934b3ba6e2a89f5b1c712de7dfea535525255b1aa35c5565e08b"
self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017")
self.db_name = os.getenv("DB_NAME", "ai_writer_db")
self.db = None
async def start(self):
"""워커 시작"""
logger.info("Starting Image Generator Worker")
# Redis 연결
await self.queue_manager.connect()
# MongoDB 연결
client = AsyncIOMotorClient(self.mongodb_url)
self.db = client[self.db_name]
# API 키 확인
if not self.replicate_api_key:
logger.warning("Replicate API key not configured - using placeholder images")
# 메인 처리 루프
while True:
try:
# 큐에서 작업 가져오기
job = await self.queue_manager.dequeue('image_generation', timeout=5)
if job:
await self.process_job(job)
except Exception as e:
logger.error(f"Error in worker loop: {e}")
await asyncio.sleep(1)
async def process_job(self, job: PipelineJob):
"""이미지 생성 작업 처리"""
"""이미지 생성 및 MongoDB 업데이트"""
try:
logger.info(f"Processing job {job.job_id} for image generation")
translated_items = job.data.get('translated_items', [])
generated_items = []
# 최대 3개 아이템만 이미지 생성 (API 비용 절감)
for idx, item_data in enumerate(translated_items[:3]):
translated_item = TranslatedItem(**item_data)
# 이미지 생성을 위한 프롬프트 생성
prompt = self._create_image_prompt(translated_item)
# 이미지 생성
# MongoDB에서 기사 정보 가져오기
news_id = job.data.get('news_id')
mongodb_id = job.data.get('mongodb_id')
if not news_id:
logger.error(f"No news_id in job {job.job_id}")
await self.queue_manager.mark_failed('image_generation', job, "No news_id")
return
# MongoDB에서 한국어 기사 조회 (articles_ko)
article = await self.db.articles_ko.find_one({"news_id": news_id})
if not article:
logger.error(f"Article {news_id} not found in MongoDB")
await self.queue_manager.mark_failed('image_generation', job, "Article not found")
return
# 이미지 생성을 위한 프롬프트 생성 (한국어 기사 기반)
prompt = self._create_image_prompt_from_article(article)
# 이미지 생성 (최대 3개)
image_urls = []
for i in range(min(3, 1)): # 테스트를 위해 1개만 생성
image_url = await self._generate_image(prompt)
generated_item = GeneratedImageItem(
translated_item=translated_item,
image_url=image_url,
image_prompt=prompt
)
generated_items.append(generated_item)
image_urls.append(image_url)
# API 속도 제한
if self.replicate_api_key:
if self.replicate_api_key and i < 2:
await asyncio.sleep(2)
if generated_items:
logger.info(f"Generated images for {len(generated_items)} items")
# 완료된 데이터를 job에 저장
job.data['generated_items'] = [item.dict() for item in generated_items]
job.stages_completed.append('image_generation')
job.stage = 'completed'
# 최종 기사 조립 단계로 전달 (이미 article-assembly로 수정)
await self.queue_manager.enqueue('article_assembly', job)
await self.queue_manager.mark_completed('image_generation', job.job_id)
else:
logger.warning(f"No images generated for job {job.job_id}")
# 이미지 생성 실패해도 다음 단계로 진행
job.stages_completed.append('image_generation')
await self.queue_manager.enqueue('article_assembly', job)
await self.queue_manager.mark_completed('image_generation', job.job_id)
# MongoDB 업데이트 (이미지 추가 - articles_ko)
await self.db.articles_ko.update_one(
{"news_id": news_id},
{
"$set": {
"images": image_urls,
"image_prompt": prompt
},
"$addToSet": {
"pipeline_stages": "image_generation"
}
}
)
logger.info(f"Updated article {news_id} with {len(image_urls)} images")
# 다음 단계로 전달 (번역)
job.stages_completed.append('image_generation')
job.stage = 'translation'
await self.queue_manager.enqueue('translation', job)
await self.queue_manager.mark_completed('image_generation', job.job_id)
except Exception as e:
logger.error(f"Error processing job {job.job_id}: {e}")
# 이미지 생성 실패해도 다음 단계로 진행
job.stages_completed.append('image_generation')
await self.queue_manager.enqueue('article_assembly', job)
await self.queue_manager.mark_completed('image_generation', job.job_id)
def _create_image_prompt(self, translated_item: TranslatedItem) -> str:
"""이미지 생성을 위한 프롬프트 생성"""
# 영문 제목과 요약을 기반으로 프롬프트 생성
title = translated_item.translated_title or translated_item.summarized_item['enriched_item']['rss_item']['title']
summary = translated_item.translated_summary or translated_item.summarized_item['ai_summary']
await self.queue_manager.mark_failed('image_generation', job, str(e))
def _create_image_prompt_from_article(self, article: Dict) -> str:
"""기사로부터 이미지 프롬프트 생성"""
# 키워드와 제목을 기반으로 프롬프트 생성
keyword = article.get('keyword', '')
title = article.get('title', '')
categories = article.get('categories', [])
# 카테고리 맵핑 (한글 -> 영어)
category_map = {
'기술': 'technology',
'경제': 'business',
'정치': 'politics',
'교육': 'education',
'사회': 'society',
'문화': 'culture',
'과학': 'science'
}
eng_categories = [category_map.get(cat, cat) for cat in categories]
category_str = ', '.join(eng_categories[:2]) if eng_categories else 'news'
# 뉴스 관련 이미지를 위한 프롬프트
prompt = f"News illustration for: {title[:100]}, professional, photorealistic, high quality, 4k"
prompt = f"News illustration for {keyword} {category_str}, professional, modern, clean design, high quality, 4k, no text"
return prompt
async def _generate_image(self, prompt: str) -> str:
"""Replicate API를 사용한 이미지 생성"""
try:
if not self.replicate_api_key:
# API 키가 없으면 플레이스홀더 이미지 URL 반환
return "https://via.placeholder.com/800x600.png?text=News+Image"
async with httpx.AsyncClient() as client:
# 예측 생성 요청
response = await client.post(
@ -149,22 +180,22 @@ class ImageGeneratorWorker:
},
timeout=60
)
if response.status_code in [200, 201]:
result = response.json()
prediction_id = result.get('id')
# 예측 결과 폴링
image_url = await self._poll_prediction(prediction_id)
return image_url
else:
logger.error(f"Replicate API error: {response.status_code}")
return "https://via.placeholder.com/800x600.png?text=Generation+Failed"
except Exception as e:
logger.error(f"Error generating image: {e}")
return "https://via.placeholder.com/800x600.png?text=Error"
async def _poll_prediction(self, prediction_id: str, max_attempts: int = 30) -> str:
"""예측 결과 폴링"""
try:
@ -177,11 +208,11 @@ class ImageGeneratorWorker:
},
timeout=30
)
if response.status_code == 200:
result = response.json()
status = result.get('status')
if status == 'succeeded':
output = result.get('output')
if output and isinstance(output, list) and len(output) > 0:
@ -191,20 +222,20 @@ class ImageGeneratorWorker:
elif status == 'failed':
logger.error(f"Prediction failed: {result.get('error')}")
return "https://via.placeholder.com/800x600.png?text=Failed"
# 아직 처리중이면 대기
await asyncio.sleep(2)
else:
logger.error(f"Error polling prediction: {response.status_code}")
return "https://via.placeholder.com/800x600.png?text=Poll+Error"
# 최대 시도 횟수 초과
return "https://via.placeholder.com/800x600.png?text=Timeout"
except Exception as e:
logger.error(f"Error polling prediction: {e}")
return "https://via.placeholder.com/800x600.png?text=Poll+Exception"
async def stop(self):
"""워커 중지"""
await self.queue_manager.disconnect()
@ -213,7 +244,7 @@ class ImageGeneratorWorker:
async def main():
"""메인 함수"""
worker = ImageGeneratorWorker()
try:
await worker.start()
except KeyboardInterrupt: