Initial commit - cleaned repository
This commit is contained in:
365
backup-services/news-aggregator/backend/app/main.py
Normal file
365
backup-services/news-aggregator/backend/app/main.py
Normal file
@ -0,0 +1,365 @@
|
||||
"""
|
||||
News Aggregator Service
|
||||
RSS 피드 제목을 구글 검색으로 확장하는 통합 서비스
|
||||
"""
|
||||
from fastapi import FastAPI, HTTPException, Query, BackgroundTasks
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from typing import List, Optional, Dict, Any
|
||||
from datetime import datetime
|
||||
import httpx
|
||||
import asyncio
|
||||
from pydantic import BaseModel
|
||||
import logging
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = FastAPI(
|
||||
title="News Aggregator Service",
|
||||
description="RSS 피드와 구글 검색을 통합한 뉴스 수집 서비스",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
# CORS 설정
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Configuration
|
||||
RSS_SERVICE_URL = "http://rss-feed-backend:8000"
|
||||
GOOGLE_SEARCH_SERVICE_URL = "http://google-search-backend:8000"
|
||||
|
||||
# Response Models
|
||||
class NewsItem(BaseModel):
|
||||
"""뉴스 항목"""
|
||||
rss_title: str
|
||||
rss_link: Optional[str] = None
|
||||
google_results: List[Dict[str, Any]] = []
|
||||
search_keyword: str
|
||||
timestamp: datetime = None
|
||||
|
||||
class AggregatedNews(BaseModel):
|
||||
"""통합 뉴스 결과"""
|
||||
keyword: str
|
||||
rss_feed_url: str
|
||||
total_rss_entries: int
|
||||
processed_entries: int
|
||||
news_items: List[NewsItem]
|
||||
processing_time: float
|
||||
|
||||
# HTTP Client
|
||||
client = httpx.AsyncClient(timeout=30.0)
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup():
|
||||
"""서비스 시작"""
|
||||
logger.info("News Aggregator Service starting...")
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown():
|
||||
"""서비스 종료"""
|
||||
await client.aclose()
|
||||
logger.info("News Aggregator Service stopped")
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return {
|
||||
"service": "News Aggregator Service",
|
||||
"version": "1.0.0",
|
||||
"description": "RSS 피드와 구글 검색 통합 서비스",
|
||||
"endpoints": {
|
||||
"aggregate": "GET /api/aggregate",
|
||||
"aggregate_by_location": "GET /api/aggregate/location",
|
||||
"aggregate_by_topic": "GET /api/aggregate/topic",
|
||||
"health": "GET /health"
|
||||
}
|
||||
}
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""헬스 체크"""
|
||||
try:
|
||||
# Check RSS service
|
||||
rss_response = await client.get(f"{RSS_SERVICE_URL}/health")
|
||||
rss_healthy = rss_response.status_code == 200
|
||||
|
||||
# Check Google Search service
|
||||
google_response = await client.get(f"{GOOGLE_SEARCH_SERVICE_URL}/health")
|
||||
google_healthy = google_response.status_code == 200
|
||||
|
||||
return {
|
||||
"status": "healthy" if (rss_healthy and google_healthy) else "degraded",
|
||||
"services": {
|
||||
"rss_feed": "healthy" if rss_healthy else "unhealthy",
|
||||
"google_search": "healthy" if google_healthy else "unhealthy"
|
||||
},
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"status": "unhealthy",
|
||||
"error": str(e),
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
@app.get("/api/aggregate", response_model=AggregatedNews)
|
||||
async def aggregate_news(
|
||||
q: str = Query(..., description="검색 키워드"),
|
||||
limit: int = Query(10, description="처리할 RSS 항목 수", ge=1, le=50),
|
||||
google_results_per_title: int = Query(5, description="각 제목당 구글 검색 결과 수", ge=1, le=10),
|
||||
lang: str = Query("ko", description="언어 코드"),
|
||||
country: str = Query("KR", description="국가 코드")
|
||||
):
|
||||
"""
|
||||
키워드로 RSS 피드를 검색하고, 각 제목을 구글에서 재검색
|
||||
|
||||
1. 키워드로 Google News RSS 피드 가져오기
|
||||
2. RSS 피드의 각 제목을 구글 검색
|
||||
3. 통합 결과 반환
|
||||
"""
|
||||
start_time = datetime.now()
|
||||
|
||||
try:
|
||||
# Step 1: Get RSS feed from keyword
|
||||
logger.info(f"Fetching RSS feed for keyword: {q}")
|
||||
rss_response = await client.get(
|
||||
f"{RSS_SERVICE_URL}/api/google-rss/search",
|
||||
params={"q": q, "lang": lang, "country": country}
|
||||
)
|
||||
rss_response.raise_for_status()
|
||||
rss_data = rss_response.json()
|
||||
|
||||
if not rss_data.get("success"):
|
||||
raise HTTPException(status_code=500, detail=f"RSS 피드 가져오기 실패: {rss_data.get('error')}")
|
||||
|
||||
# Step 2: Process each RSS entry with Google search
|
||||
news_items = []
|
||||
entries = rss_data.get("entries", [])
|
||||
|
||||
# If no entries field, fallback to sample_titles
|
||||
if not entries:
|
||||
titles = rss_data.get("sample_titles", [])[:limit]
|
||||
entries = [{"title": title, "link": "", "published": ""} for title in titles]
|
||||
else:
|
||||
entries = entries[:limit]
|
||||
|
||||
# Create tasks for parallel processing
|
||||
search_tasks = []
|
||||
for entry in entries:
|
||||
title = entry.get("title", "")
|
||||
# Clean title for better search results
|
||||
clean_title = title.split(" - ")[-1] if " - " in title else title
|
||||
search_tasks.append(
|
||||
search_google(clean_title, google_results_per_title, lang, country)
|
||||
)
|
||||
|
||||
# Execute searches in parallel
|
||||
logger.info(f"Searching Google for {len(search_tasks)} RSS entries")
|
||||
search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
|
||||
|
||||
# Combine results
|
||||
for i, entry in enumerate(entries):
|
||||
google_results = []
|
||||
if not isinstance(search_results[i], Exception):
|
||||
google_results = search_results[i]
|
||||
|
||||
title = entry.get("title", "")
|
||||
news_items.append(NewsItem(
|
||||
rss_title=title,
|
||||
rss_link=entry.get("link", ""),
|
||||
google_results=google_results,
|
||||
search_keyword=title.split(" - ")[-1] if " - " in title else title,
|
||||
timestamp=datetime.now()
|
||||
))
|
||||
|
||||
# Calculate processing time
|
||||
processing_time = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
return AggregatedNews(
|
||||
keyword=q,
|
||||
rss_feed_url=rss_data.get("feed_url", ""),
|
||||
total_rss_entries=rss_data.get("entry_count", 0),
|
||||
processed_entries=len(news_items),
|
||||
news_items=news_items,
|
||||
processing_time=processing_time
|
||||
)
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"HTTP error: {e}")
|
||||
raise HTTPException(status_code=e.response.status_code, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"Error in aggregate_news: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
async def search_google(query: str, num_results: int, lang: str, country: str) -> List[Dict[str, Any]]:
|
||||
"""구글 검색 서비스 호출 - 전체 콘텐츠 포함"""
|
||||
try:
|
||||
# Full content API 직접 호출
|
||||
response = await client.get(
|
||||
f"{GOOGLE_SEARCH_SERVICE_URL}/api/search/full",
|
||||
params={
|
||||
"q": query,
|
||||
"num": num_results,
|
||||
"lang": lang,
|
||||
"country": country
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
results = data.get("results", [])
|
||||
|
||||
# full_content가 이미 포함되어 있으므로 그대로 반환
|
||||
logger.info(f"Google search for '{query}' returned {len(results)} results with full content")
|
||||
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.error(f"Google search error for '{query}': {e}")
|
||||
# Fallback to basic search without full content
|
||||
try:
|
||||
response = await client.get(
|
||||
f"{GOOGLE_SEARCH_SERVICE_URL}/api/search",
|
||||
params={
|
||||
"q": query,
|
||||
"num": num_results,
|
||||
"lang": lang,
|
||||
"country": country
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data.get("results", [])
|
||||
except:
|
||||
return []
|
||||
|
||||
@app.get("/api/aggregate/location", response_model=AggregatedNews)
|
||||
async def aggregate_news_by_location(
|
||||
location: str = Query(..., description="지역명 (예: Seoul, Tokyo)"),
|
||||
limit: int = Query(10, description="처리할 RSS 항목 수", ge=1, le=50),
|
||||
google_results_per_title: int = Query(5, description="각 제목당 구글 검색 결과 수", ge=1, le=10),
|
||||
lang: str = Query("ko", description="언어 코드"),
|
||||
country: str = Query("KR", description="국가 코드")
|
||||
):
|
||||
"""지역 기반 RSS 피드를 가져와서 각 제목을 구글 검색"""
|
||||
start_time = datetime.now()
|
||||
|
||||
try:
|
||||
# Get location-based RSS feed
|
||||
logger.info(f"Fetching RSS feed for location: {location}")
|
||||
rss_response = await client.get(
|
||||
f"{RSS_SERVICE_URL}/api/google-rss/location",
|
||||
params={"location": location, "lang": lang, "country": country}
|
||||
)
|
||||
rss_response.raise_for_status()
|
||||
rss_data = rss_response.json()
|
||||
|
||||
if not rss_data.get("success"):
|
||||
raise HTTPException(status_code=500, detail=f"RSS 피드 가져오기 실패: {rss_data.get('error')}")
|
||||
|
||||
# Process titles
|
||||
news_items = []
|
||||
titles = rss_data.get("sample_titles", [])[:limit]
|
||||
|
||||
search_tasks = []
|
||||
for title in titles:
|
||||
clean_title = title.split(" - ")[-1] if " - " in title else title
|
||||
search_tasks.append(
|
||||
search_google(clean_title, google_results_per_title, lang, country)
|
||||
)
|
||||
|
||||
search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
|
||||
|
||||
for i, title in enumerate(titles):
|
||||
google_results = []
|
||||
if not isinstance(search_results[i], Exception):
|
||||
google_results = search_results[i]
|
||||
|
||||
news_items.append(NewsItem(
|
||||
rss_title=title,
|
||||
google_results=google_results,
|
||||
search_keyword=title.split(" - ")[-1] if " - " in title else title,
|
||||
timestamp=datetime.now()
|
||||
))
|
||||
|
||||
processing_time = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
return AggregatedNews(
|
||||
keyword=f"Location: {location}",
|
||||
rss_feed_url=rss_data.get("feed_url", ""),
|
||||
total_rss_entries=rss_data.get("entry_count", 0),
|
||||
processed_entries=len(news_items),
|
||||
news_items=news_items,
|
||||
processing_time=processing_time
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in aggregate_news_by_location: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/api/aggregate/topic", response_model=AggregatedNews)
|
||||
async def aggregate_news_by_topic(
|
||||
category: str = Query(..., description="카테고리 (TECHNOLOGY, BUSINESS, HEALTH 등)"),
|
||||
limit: int = Query(10, description="처리할 RSS 항목 수", ge=1, le=50),
|
||||
google_results_per_title: int = Query(5, description="각 제목당 구글 검색 결과 수", ge=1, le=10),
|
||||
lang: str = Query("ko", description="언어 코드"),
|
||||
country: str = Query("KR", description="국가 코드")
|
||||
):
|
||||
"""주제별 RSS 피드를 가져와서 각 제목을 구글 검색"""
|
||||
start_time = datetime.now()
|
||||
|
||||
try:
|
||||
# Get topic-based RSS feed
|
||||
logger.info(f"Fetching RSS feed for topic: {category}")
|
||||
rss_response = await client.get(
|
||||
f"{RSS_SERVICE_URL}/api/google-rss/topic",
|
||||
params={"category": category, "lang": lang, "country": country}
|
||||
)
|
||||
rss_response.raise_for_status()
|
||||
rss_data = rss_response.json()
|
||||
|
||||
if not rss_data.get("success"):
|
||||
raise HTTPException(status_code=500, detail=f"RSS 피드 가져오기 실패: {rss_data.get('error')}")
|
||||
|
||||
# Process titles
|
||||
news_items = []
|
||||
titles = rss_data.get("sample_titles", [])[:limit]
|
||||
|
||||
search_tasks = []
|
||||
for title in titles:
|
||||
clean_title = title.split(" - ")[-1] if " - " in title else title
|
||||
search_tasks.append(
|
||||
search_google(clean_title, google_results_per_title, lang, country)
|
||||
)
|
||||
|
||||
search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
|
||||
|
||||
for i, title in enumerate(titles):
|
||||
google_results = []
|
||||
if not isinstance(search_results[i], Exception):
|
||||
google_results = search_results[i]
|
||||
|
||||
news_items.append(NewsItem(
|
||||
rss_title=title,
|
||||
google_results=google_results,
|
||||
search_keyword=title.split(" - ")[-1] if " - " in title else title,
|
||||
timestamp=datetime.now()
|
||||
))
|
||||
|
||||
processing_time = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
return AggregatedNews(
|
||||
keyword=f"Topic: {category}",
|
||||
rss_feed_url=rss_data.get("feed_url", ""),
|
||||
total_rss_entries=rss_data.get("entry_count", 0),
|
||||
processed_entries=len(news_items),
|
||||
news_items=news_items,
|
||||
processing_time=processing_time
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in aggregate_news_by_topic: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
Reference in New Issue
Block a user