""" News Aggregator Service RSS 피드 제목을 구글 검색으로 확장하는 통합 서비스 """ from fastapi import FastAPI, HTTPException, Query, BackgroundTasks from fastapi.middleware.cors import CORSMiddleware from typing import List, Optional, Dict, Any from datetime import datetime import httpx import asyncio from pydantic import BaseModel import logging # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) app = FastAPI( title="News Aggregator Service", description="RSS 피드와 구글 검색을 통합한 뉴스 수집 서비스", version="1.0.0" ) # CORS 설정 app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Configuration RSS_SERVICE_URL = "http://rss-feed-backend:8000" GOOGLE_SEARCH_SERVICE_URL = "http://google-search-backend:8000" # Response Models class NewsItem(BaseModel): """뉴스 항목""" rss_title: str rss_link: Optional[str] = None google_results: List[Dict[str, Any]] = [] search_keyword: str timestamp: datetime = None class AggregatedNews(BaseModel): """통합 뉴스 결과""" keyword: str rss_feed_url: str total_rss_entries: int processed_entries: int news_items: List[NewsItem] processing_time: float # HTTP Client client = httpx.AsyncClient(timeout=30.0) @app.on_event("startup") async def startup(): """서비스 시작""" logger.info("News Aggregator Service starting...") @app.on_event("shutdown") async def shutdown(): """서비스 종료""" await client.aclose() logger.info("News Aggregator Service stopped") @app.get("/") async def root(): return { "service": "News Aggregator Service", "version": "1.0.0", "description": "RSS 피드와 구글 검색 통합 서비스", "endpoints": { "aggregate": "GET /api/aggregate", "aggregate_by_location": "GET /api/aggregate/location", "aggregate_by_topic": "GET /api/aggregate/topic", "health": "GET /health" } } @app.get("/health") async def health_check(): """헬스 체크""" try: # Check RSS service rss_response = await client.get(f"{RSS_SERVICE_URL}/health") rss_healthy = rss_response.status_code == 200 # Check Google Search service google_response = await client.get(f"{GOOGLE_SEARCH_SERVICE_URL}/health") google_healthy = google_response.status_code == 200 return { "status": "healthy" if (rss_healthy and google_healthy) else "degraded", "services": { "rss_feed": "healthy" if rss_healthy else "unhealthy", "google_search": "healthy" if google_healthy else "unhealthy" }, "timestamp": datetime.now().isoformat() } except Exception as e: return { "status": "unhealthy", "error": str(e), "timestamp": datetime.now().isoformat() } @app.get("/api/aggregate", response_model=AggregatedNews) async def aggregate_news( q: str = Query(..., description="검색 키워드"), limit: int = Query(10, description="처리할 RSS 항목 수", ge=1, le=50), google_results_per_title: int = Query(5, description="각 제목당 구글 검색 결과 수", ge=1, le=10), lang: str = Query("ko", description="언어 코드"), country: str = Query("KR", description="국가 코드") ): """ 키워드로 RSS 피드를 검색하고, 각 제목을 구글에서 재검색 1. 키워드로 Google News RSS 피드 가져오기 2. RSS 피드의 각 제목을 구글 검색 3. 통합 결과 반환 """ start_time = datetime.now() try: # Step 1: Get RSS feed from keyword logger.info(f"Fetching RSS feed for keyword: {q}") rss_response = await client.get( f"{RSS_SERVICE_URL}/api/google-rss/search", params={"q": q, "lang": lang, "country": country} ) rss_response.raise_for_status() rss_data = rss_response.json() if not rss_data.get("success"): raise HTTPException(status_code=500, detail=f"RSS 피드 가져오기 실패: {rss_data.get('error')}") # Step 2: Process each RSS entry with Google search news_items = [] entries = rss_data.get("entries", []) # If no entries field, fallback to sample_titles if not entries: titles = rss_data.get("sample_titles", [])[:limit] entries = [{"title": title, "link": "", "published": ""} for title in titles] else: entries = entries[:limit] # Create tasks for parallel processing search_tasks = [] for entry in entries: title = entry.get("title", "") # Clean title for better search results clean_title = title.split(" - ")[-1] if " - " in title else title search_tasks.append( search_google(clean_title, google_results_per_title, lang, country) ) # Execute searches in parallel logger.info(f"Searching Google for {len(search_tasks)} RSS entries") search_results = await asyncio.gather(*search_tasks, return_exceptions=True) # Combine results for i, entry in enumerate(entries): google_results = [] if not isinstance(search_results[i], Exception): google_results = search_results[i] title = entry.get("title", "") news_items.append(NewsItem( rss_title=title, rss_link=entry.get("link", ""), google_results=google_results, search_keyword=title.split(" - ")[-1] if " - " in title else title, timestamp=datetime.now() )) # Calculate processing time processing_time = (datetime.now() - start_time).total_seconds() return AggregatedNews( keyword=q, rss_feed_url=rss_data.get("feed_url", ""), total_rss_entries=rss_data.get("entry_count", 0), processed_entries=len(news_items), news_items=news_items, processing_time=processing_time ) except httpx.HTTPStatusError as e: logger.error(f"HTTP error: {e}") raise HTTPException(status_code=e.response.status_code, detail=str(e)) except Exception as e: logger.error(f"Error in aggregate_news: {e}") raise HTTPException(status_code=500, detail=str(e)) async def search_google(query: str, num_results: int, lang: str, country: str) -> List[Dict[str, Any]]: """구글 검색 서비스 호출 - 전체 콘텐츠 포함""" try: # Full content API 직접 호출 response = await client.get( f"{GOOGLE_SEARCH_SERVICE_URL}/api/search/full", params={ "q": query, "num": num_results, "lang": lang, "country": country } ) response.raise_for_status() data = response.json() results = data.get("results", []) # full_content가 이미 포함되어 있으므로 그대로 반환 logger.info(f"Google search for '{query}' returned {len(results)} results with full content") return results except Exception as e: logger.error(f"Google search error for '{query}': {e}") # Fallback to basic search without full content try: response = await client.get( f"{GOOGLE_SEARCH_SERVICE_URL}/api/search", params={ "q": query, "num": num_results, "lang": lang, "country": country } ) response.raise_for_status() data = response.json() return data.get("results", []) except: return [] @app.get("/api/aggregate/location", response_model=AggregatedNews) async def aggregate_news_by_location( location: str = Query(..., description="지역명 (예: Seoul, Tokyo)"), limit: int = Query(10, description="처리할 RSS 항목 수", ge=1, le=50), google_results_per_title: int = Query(5, description="각 제목당 구글 검색 결과 수", ge=1, le=10), lang: str = Query("ko", description="언어 코드"), country: str = Query("KR", description="국가 코드") ): """지역 기반 RSS 피드를 가져와서 각 제목을 구글 검색""" start_time = datetime.now() try: # Get location-based RSS feed logger.info(f"Fetching RSS feed for location: {location}") rss_response = await client.get( f"{RSS_SERVICE_URL}/api/google-rss/location", params={"location": location, "lang": lang, "country": country} ) rss_response.raise_for_status() rss_data = rss_response.json() if not rss_data.get("success"): raise HTTPException(status_code=500, detail=f"RSS 피드 가져오기 실패: {rss_data.get('error')}") # Process titles news_items = [] titles = rss_data.get("sample_titles", [])[:limit] search_tasks = [] for title in titles: clean_title = title.split(" - ")[-1] if " - " in title else title search_tasks.append( search_google(clean_title, google_results_per_title, lang, country) ) search_results = await asyncio.gather(*search_tasks, return_exceptions=True) for i, title in enumerate(titles): google_results = [] if not isinstance(search_results[i], Exception): google_results = search_results[i] news_items.append(NewsItem( rss_title=title, google_results=google_results, search_keyword=title.split(" - ")[-1] if " - " in title else title, timestamp=datetime.now() )) processing_time = (datetime.now() - start_time).total_seconds() return AggregatedNews( keyword=f"Location: {location}", rss_feed_url=rss_data.get("feed_url", ""), total_rss_entries=rss_data.get("entry_count", 0), processed_entries=len(news_items), news_items=news_items, processing_time=processing_time ) except Exception as e: logger.error(f"Error in aggregate_news_by_location: {e}") raise HTTPException(status_code=500, detail=str(e)) @app.get("/api/aggregate/topic", response_model=AggregatedNews) async def aggregate_news_by_topic( category: str = Query(..., description="카테고리 (TECHNOLOGY, BUSINESS, HEALTH 등)"), limit: int = Query(10, description="처리할 RSS 항목 수", ge=1, le=50), google_results_per_title: int = Query(5, description="각 제목당 구글 검색 결과 수", ge=1, le=10), lang: str = Query("ko", description="언어 코드"), country: str = Query("KR", description="국가 코드") ): """주제별 RSS 피드를 가져와서 각 제목을 구글 검색""" start_time = datetime.now() try: # Get topic-based RSS feed logger.info(f"Fetching RSS feed for topic: {category}") rss_response = await client.get( f"{RSS_SERVICE_URL}/api/google-rss/topic", params={"category": category, "lang": lang, "country": country} ) rss_response.raise_for_status() rss_data = rss_response.json() if not rss_data.get("success"): raise HTTPException(status_code=500, detail=f"RSS 피드 가져오기 실패: {rss_data.get('error')}") # Process titles news_items = [] titles = rss_data.get("sample_titles", [])[:limit] search_tasks = [] for title in titles: clean_title = title.split(" - ")[-1] if " - " in title else title search_tasks.append( search_google(clean_title, google_results_per_title, lang, country) ) search_results = await asyncio.gather(*search_tasks, return_exceptions=True) for i, title in enumerate(titles): google_results = [] if not isinstance(search_results[i], Exception): google_results = search_results[i] news_items.append(NewsItem( rss_title=title, google_results=google_results, search_keyword=title.split(" - ")[-1] if " - " in title else title, timestamp=datetime.now() )) processing_time = (datetime.now() - start_time).total_seconds() return AggregatedNews( keyword=f"Topic: {category}", rss_feed_url=rss_data.get("feed_url", ""), total_rss_entries=rss_data.get("entry_count", 0), processed_entries=len(news_items), news_items=news_items, processing_time=processing_time ) except Exception as e: logger.error(f"Error in aggregate_news_by_topic: {e}") raise HTTPException(status_code=500, detail=str(e))