Initial commit - cleaned repository
This commit is contained in:
0
backup-services/rss-feed/backend/app/__init__.py
Normal file
0
backup-services/rss-feed/backend/app/__init__.py
Normal file
26
backup-services/rss-feed/backend/app/config.py
Normal file
26
backup-services/rss-feed/backend/app/config.py
Normal file
@ -0,0 +1,26 @@
|
||||
from pydantic_settings import BaseSettings
|
||||
from typing import Optional
|
||||
|
||||
class Settings(BaseSettings):
|
||||
# MongoDB Configuration
|
||||
mongodb_url: str = "mongodb://mongodb:27017"
|
||||
db_name: str = "rss_feed_db"
|
||||
|
||||
# Redis Configuration
|
||||
redis_url: str = "redis://redis:6379"
|
||||
redis_db: int = 3
|
||||
|
||||
# Feed Settings
|
||||
default_update_interval: int = 900 # 15 minutes in seconds
|
||||
max_entries_per_feed: int = 100
|
||||
fetch_timeout: int = 30
|
||||
|
||||
# Scheduler Settings
|
||||
enable_scheduler: bool = True
|
||||
scheduler_timezone: str = "Asia/Seoul"
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
env_file_encoding = "utf-8"
|
||||
|
||||
settings = Settings()
|
||||
222
backup-services/rss-feed/backend/app/feed_parser.py
Normal file
222
backup-services/rss-feed/backend/app/feed_parser.py
Normal file
@ -0,0 +1,222 @@
|
||||
import feedparser
|
||||
import httpx
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
from dateutil import parser as date_parser
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import hashlib
|
||||
from .models import FeedEntry
|
||||
|
||||
class FeedParser:
|
||||
def __init__(self):
|
||||
self.client = httpx.AsyncClient(
|
||||
timeout=30.0,
|
||||
follow_redirects=True,
|
||||
headers={
|
||||
"User-Agent": "Mozilla/5.0 (compatible; RSS Feed Reader/1.0)"
|
||||
}
|
||||
)
|
||||
|
||||
async def parse_feed(self, url: str) -> Dict[str, Any]:
|
||||
"""Parse RSS/Atom feed from URL"""
|
||||
try:
|
||||
response = await self.client.get(url)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse the feed
|
||||
feed = feedparser.parse(response.content)
|
||||
|
||||
if feed.bozo and feed.bozo_exception:
|
||||
raise Exception(f"Feed parsing error: {feed.bozo_exception}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"feed": feed.feed,
|
||||
"entries": feed.entries,
|
||||
"error": None
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"feed": None,
|
||||
"entries": [],
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
def extract_entry_data(self, entry: Any, feed_id: str) -> FeedEntry:
|
||||
"""Extract and normalize entry data"""
|
||||
# Generate unique entry ID
|
||||
entry_id = self._generate_entry_id(entry)
|
||||
|
||||
# Extract title
|
||||
title = entry.get("title", "Untitled")
|
||||
|
||||
# Extract link
|
||||
link = entry.get("link", "")
|
||||
|
||||
# Extract summary/description
|
||||
summary = self._extract_summary(entry)
|
||||
|
||||
# Extract content
|
||||
content = self._extract_content(entry)
|
||||
|
||||
# Extract author
|
||||
author = entry.get("author", "")
|
||||
|
||||
# Extract published date
|
||||
published = self._parse_date(entry.get("published", entry.get("updated")))
|
||||
|
||||
# Extract updated date
|
||||
updated = self._parse_date(entry.get("updated", entry.get("published")))
|
||||
|
||||
# Extract categories
|
||||
categories = self._extract_categories(entry)
|
||||
|
||||
# Extract thumbnail
|
||||
thumbnail = self._extract_thumbnail(entry)
|
||||
|
||||
# Extract enclosures (media attachments)
|
||||
enclosures = self._extract_enclosures(entry)
|
||||
|
||||
return FeedEntry(
|
||||
feed_id=feed_id,
|
||||
entry_id=entry_id,
|
||||
title=title,
|
||||
link=link,
|
||||
summary=summary,
|
||||
content=content,
|
||||
author=author,
|
||||
published=published,
|
||||
updated=updated,
|
||||
categories=categories,
|
||||
thumbnail=thumbnail,
|
||||
enclosures=enclosures
|
||||
)
|
||||
|
||||
def _generate_entry_id(self, entry: Any) -> str:
|
||||
"""Generate unique ID for entry"""
|
||||
# Try to use entry's unique ID first
|
||||
if hasattr(entry, "id"):
|
||||
return entry.id
|
||||
|
||||
# Generate from link and title
|
||||
unique_str = f"{entry.get('link', '')}{entry.get('title', '')}"
|
||||
return hashlib.md5(unique_str.encode()).hexdigest()
|
||||
|
||||
def _extract_summary(self, entry: Any) -> Optional[str]:
|
||||
"""Extract and clean summary"""
|
||||
summary = entry.get("summary", entry.get("description", ""))
|
||||
if summary:
|
||||
# Clean HTML tags
|
||||
soup = BeautifulSoup(summary, "html.parser")
|
||||
text = soup.get_text(separator=" ", strip=True)
|
||||
# Limit length
|
||||
if len(text) > 500:
|
||||
text = text[:497] + "..."
|
||||
return text
|
||||
return None
|
||||
|
||||
def _extract_content(self, entry: Any) -> Optional[str]:
|
||||
"""Extract full content"""
|
||||
content = ""
|
||||
|
||||
# Try content field
|
||||
if hasattr(entry, "content"):
|
||||
for c in entry.content:
|
||||
if c.get("type") in ["text/html", "text/plain"]:
|
||||
content = c.get("value", "")
|
||||
break
|
||||
|
||||
# Fallback to summary detail
|
||||
if not content and hasattr(entry, "summary_detail"):
|
||||
content = entry.summary_detail.get("value", "")
|
||||
|
||||
# Clean excessive whitespace
|
||||
if content:
|
||||
content = re.sub(r'\s+', ' ', content).strip()
|
||||
return content
|
||||
|
||||
return None
|
||||
|
||||
def _parse_date(self, date_str: Optional[str]) -> Optional[datetime]:
|
||||
"""Parse date string to datetime"""
|
||||
if not date_str:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Try parsing with dateutil
|
||||
return date_parser.parse(date_str)
|
||||
except:
|
||||
try:
|
||||
# Try feedparser's time structure
|
||||
if hasattr(date_str, "tm_year"):
|
||||
import time
|
||||
return datetime.fromtimestamp(time.mktime(date_str))
|
||||
except:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
def _extract_categories(self, entry: Any) -> List[str]:
|
||||
"""Extract categories/tags"""
|
||||
categories = []
|
||||
|
||||
if hasattr(entry, "tags"):
|
||||
for tag in entry.tags:
|
||||
if hasattr(tag, "term"):
|
||||
categories.append(tag.term)
|
||||
elif isinstance(tag, str):
|
||||
categories.append(tag)
|
||||
|
||||
return categories
|
||||
|
||||
def _extract_thumbnail(self, entry: Any) -> Optional[str]:
|
||||
"""Extract thumbnail image URL"""
|
||||
# Check media thumbnail
|
||||
if hasattr(entry, "media_thumbnail"):
|
||||
for thumb in entry.media_thumbnail:
|
||||
if thumb.get("url"):
|
||||
return thumb["url"]
|
||||
|
||||
# Check media content
|
||||
if hasattr(entry, "media_content"):
|
||||
for media in entry.media_content:
|
||||
if media.get("type", "").startswith("image/"):
|
||||
return media.get("url")
|
||||
|
||||
# Check enclosures
|
||||
if hasattr(entry, "enclosures"):
|
||||
for enc in entry.enclosures:
|
||||
if enc.get("type", "").startswith("image/"):
|
||||
return enc.get("href", enc.get("url"))
|
||||
|
||||
# Extract from content/summary
|
||||
content = entry.get("summary", "") + entry.get("content", [{}])[0].get("value", "") if hasattr(entry, "content") else ""
|
||||
if content:
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
img = soup.find("img")
|
||||
if img and img.get("src"):
|
||||
return img["src"]
|
||||
|
||||
return None
|
||||
|
||||
def _extract_enclosures(self, entry: Any) -> List[Dict[str, Any]]:
|
||||
"""Extract media enclosures"""
|
||||
enclosures = []
|
||||
|
||||
if hasattr(entry, "enclosures"):
|
||||
for enc in entry.enclosures:
|
||||
enclosure = {
|
||||
"url": enc.get("href", enc.get("url", "")),
|
||||
"type": enc.get("type", ""),
|
||||
"length": enc.get("length", 0)
|
||||
}
|
||||
if enclosure["url"]:
|
||||
enclosures.append(enclosure)
|
||||
|
||||
return enclosures
|
||||
|
||||
async def close(self):
|
||||
"""Close HTTP client"""
|
||||
await self.client.aclose()
|
||||
115
backup-services/rss-feed/backend/app/google_rss.py
Normal file
115
backup-services/rss-feed/backend/app/google_rss.py
Normal file
@ -0,0 +1,115 @@
|
||||
"""
|
||||
Google News RSS Feed Generator
|
||||
구글 뉴스 RSS 피드 URL 생성 및 구독 지원
|
||||
"""
|
||||
from typing import Optional, List
|
||||
from urllib.parse import quote_plus
|
||||
from enum import Enum
|
||||
|
||||
class GoogleNewsCategory(str, Enum):
|
||||
"""구글 뉴스 카테고리"""
|
||||
WORLD = "WORLD"
|
||||
NATION = "NATION"
|
||||
BUSINESS = "BUSINESS"
|
||||
TECHNOLOGY = "TECHNOLOGY"
|
||||
ENTERTAINMENT = "ENTERTAINMENT"
|
||||
SPORTS = "SPORTS"
|
||||
SCIENCE = "SCIENCE"
|
||||
HEALTH = "HEALTH"
|
||||
|
||||
class GoogleNewsRSS:
|
||||
"""Google News RSS 피드 URL 생성기"""
|
||||
|
||||
BASE_URL = "https://news.google.com/rss"
|
||||
|
||||
@staticmethod
|
||||
def search_feed(query: str, lang: str = "ko", country: str = "KR") -> str:
|
||||
"""
|
||||
키워드 검색 RSS 피드 URL 생성
|
||||
|
||||
Args:
|
||||
query: 검색 키워드
|
||||
lang: 언어 코드 (ko, en, ja, zh-CN 등)
|
||||
country: 국가 코드 (KR, US, JP, CN 등)
|
||||
|
||||
Returns:
|
||||
RSS 피드 URL
|
||||
"""
|
||||
encoded_query = quote_plus(query)
|
||||
return f"{GoogleNewsRSS.BASE_URL}/search?q={encoded_query}&hl={lang}&gl={country}&ceid={country}:{lang}"
|
||||
|
||||
@staticmethod
|
||||
def topic_feed(category: GoogleNewsCategory, lang: str = "ko", country: str = "KR") -> str:
|
||||
"""
|
||||
카테고리별 RSS 피드 URL 생성
|
||||
|
||||
Args:
|
||||
category: 뉴스 카테고리
|
||||
lang: 언어 코드
|
||||
country: 국가 코드
|
||||
|
||||
Returns:
|
||||
RSS 피드 URL
|
||||
"""
|
||||
return f"{GoogleNewsRSS.BASE_URL}/headlines/section/topic/{category.value}?hl={lang}&gl={country}&ceid={country}:{lang}"
|
||||
|
||||
@staticmethod
|
||||
def location_feed(location: str, lang: str = "ko", country: str = "KR") -> str:
|
||||
"""
|
||||
지역 뉴스 RSS 피드 URL 생성
|
||||
|
||||
Args:
|
||||
location: 지역명 (예: Seoul, 서울, New York)
|
||||
lang: 언어 코드
|
||||
country: 국가 코드
|
||||
|
||||
Returns:
|
||||
RSS 피드 URL
|
||||
"""
|
||||
encoded_location = quote_plus(location)
|
||||
return f"{GoogleNewsRSS.BASE_URL}/headlines/section/geo/{encoded_location}?hl={lang}&gl={country}&ceid={country}:{lang}"
|
||||
|
||||
@staticmethod
|
||||
def trending_feed(lang: str = "ko", country: str = "KR") -> str:
|
||||
"""
|
||||
트렌딩 뉴스 RSS 피드 URL 생성
|
||||
|
||||
Args:
|
||||
lang: 언어 코드
|
||||
country: 국가 코드
|
||||
|
||||
Returns:
|
||||
RSS 피드 URL
|
||||
"""
|
||||
return f"{GoogleNewsRSS.BASE_URL}?hl={lang}&gl={country}&ceid={country}:{lang}"
|
||||
|
||||
@staticmethod
|
||||
def get_common_feeds() -> List[dict]:
|
||||
"""
|
||||
자주 사용되는 RSS 피드 목록 반환
|
||||
|
||||
Returns:
|
||||
피드 정보 리스트
|
||||
"""
|
||||
return [
|
||||
{
|
||||
"title": "구글 뉴스 - 한국 헤드라인",
|
||||
"url": GoogleNewsRSS.trending_feed("ko", "KR"),
|
||||
"description": "한국 주요 뉴스"
|
||||
},
|
||||
{
|
||||
"title": "구글 뉴스 - 기술",
|
||||
"url": GoogleNewsRSS.topic_feed(GoogleNewsCategory.TECHNOLOGY, "ko", "KR"),
|
||||
"description": "기술 관련 뉴스"
|
||||
},
|
||||
{
|
||||
"title": "구글 뉴스 - 비즈니스",
|
||||
"url": GoogleNewsRSS.topic_feed(GoogleNewsCategory.BUSINESS, "ko", "KR"),
|
||||
"description": "비즈니스 뉴스"
|
||||
},
|
||||
{
|
||||
"title": "Google News - World",
|
||||
"url": GoogleNewsRSS.topic_feed(GoogleNewsCategory.WORLD, "en", "US"),
|
||||
"description": "World news in English"
|
||||
}
|
||||
]
|
||||
596
backup-services/rss-feed/backend/app/main.py
Normal file
596
backup-services/rss-feed/backend/app/main.py
Normal file
@ -0,0 +1,596 @@
|
||||
from fastapi import FastAPI, HTTPException, Query, Path, BackgroundTasks
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from typing import List, Optional
|
||||
from datetime import datetime
|
||||
from contextlib import asynccontextmanager
|
||||
import motor.motor_asyncio
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.interval import IntervalTrigger
|
||||
import pytz
|
||||
import redis.asyncio as redis
|
||||
import json
|
||||
|
||||
from .config import settings
|
||||
from .models import (
|
||||
FeedSubscription, FeedEntry, CreateFeedRequest,
|
||||
UpdateFeedRequest, FeedStatistics, FeedStatus, FeedCategory
|
||||
)
|
||||
from .feed_parser import FeedParser
|
||||
from .google_rss import GoogleNewsRSS, GoogleNewsCategory
|
||||
|
||||
# Database connection
|
||||
db_client = None
|
||||
db = None
|
||||
redis_client = None
|
||||
scheduler = None
|
||||
parser = None
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
global db_client, db, redis_client, scheduler, parser
|
||||
|
||||
# Connect to MongoDB
|
||||
db_client = motor.motor_asyncio.AsyncIOMotorClient(settings.mongodb_url)
|
||||
db = db_client[settings.db_name]
|
||||
|
||||
# Connect to Redis
|
||||
redis_client = redis.from_url(settings.redis_url, db=settings.redis_db)
|
||||
|
||||
# Initialize feed parser
|
||||
parser = FeedParser()
|
||||
|
||||
# Initialize scheduler
|
||||
if settings.enable_scheduler:
|
||||
scheduler = AsyncIOScheduler(timezone=pytz.timezone(settings.scheduler_timezone))
|
||||
scheduler.add_job(
|
||||
update_all_feeds,
|
||||
trigger=IntervalTrigger(seconds=60),
|
||||
id="update_feeds",
|
||||
replace_existing=True
|
||||
)
|
||||
scheduler.start()
|
||||
print("RSS Feed scheduler started")
|
||||
|
||||
print("RSS Feed Service starting...")
|
||||
yield
|
||||
|
||||
# Cleanup
|
||||
if scheduler:
|
||||
scheduler.shutdown()
|
||||
if parser:
|
||||
await parser.close()
|
||||
if redis_client:
|
||||
await redis_client.close()
|
||||
db_client.close()
|
||||
print("RSS Feed Service stopping...")
|
||||
|
||||
app = FastAPI(
|
||||
title="RSS Feed Service",
|
||||
description="RSS/Atom 피드 구독 및 관리 서비스",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
# CORS 설정
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Helper functions
|
||||
async def update_feed(feed_id: str):
|
||||
"""Update a single feed"""
|
||||
feed = await db.feeds.find_one({"_id": feed_id})
|
||||
if not feed:
|
||||
return
|
||||
|
||||
# Parse feed
|
||||
result = await parser.parse_feed(feed["url"])
|
||||
|
||||
if result["success"]:
|
||||
# Update feed metadata
|
||||
await db.feeds.update_one(
|
||||
{"_id": feed_id},
|
||||
{
|
||||
"$set": {
|
||||
"last_fetch": datetime.now(),
|
||||
"status": FeedStatus.ACTIVE,
|
||||
"error_count": 0,
|
||||
"last_error": None,
|
||||
"updated_at": datetime.now()
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Process entries
|
||||
for entry_data in result["entries"][:settings.max_entries_per_feed]:
|
||||
entry = parser.extract_entry_data(entry_data, feed_id)
|
||||
|
||||
# Check if entry already exists
|
||||
existing = await db.entries.find_one({
|
||||
"feed_id": feed_id,
|
||||
"entry_id": entry.entry_id
|
||||
})
|
||||
|
||||
if not existing:
|
||||
# Insert new entry
|
||||
await db.entries.insert_one(entry.dict())
|
||||
else:
|
||||
# Update existing entry if newer
|
||||
if entry.updated and existing.get("updated"):
|
||||
if entry.updated > existing["updated"]:
|
||||
await db.entries.update_one(
|
||||
{"_id": existing["_id"]},
|
||||
{"$set": entry.dict(exclude={"id", "created_at"})}
|
||||
)
|
||||
else:
|
||||
# Update error status
|
||||
await db.feeds.update_one(
|
||||
{"_id": feed_id},
|
||||
{
|
||||
"$set": {
|
||||
"status": FeedStatus.ERROR,
|
||||
"last_error": result["error"],
|
||||
"updated_at": datetime.now()
|
||||
},
|
||||
"$inc": {"error_count": 1}
|
||||
}
|
||||
)
|
||||
|
||||
async def update_all_feeds():
|
||||
"""Update all active feeds that need updating"""
|
||||
now = datetime.now()
|
||||
|
||||
# Find feeds that need updating
|
||||
feeds = await db.feeds.find({
|
||||
"status": FeedStatus.ACTIVE,
|
||||
"$or": [
|
||||
{"last_fetch": None},
|
||||
{"last_fetch": {"$lt": now}}
|
||||
]
|
||||
}).to_list(100)
|
||||
|
||||
for feed in feeds:
|
||||
# Check if it's time to update
|
||||
if feed.get("last_fetch"):
|
||||
time_diff = (now - feed["last_fetch"]).total_seconds()
|
||||
if time_diff < feed.get("update_interval", settings.default_update_interval):
|
||||
continue
|
||||
|
||||
# Update feed in background
|
||||
await update_feed(str(feed["_id"]))
|
||||
|
||||
# API Endpoints
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return {
|
||||
"service": "RSS Feed Service",
|
||||
"version": "1.0.0",
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"endpoints": {
|
||||
"subscribe": "POST /api/feeds",
|
||||
"list_feeds": "GET /api/feeds",
|
||||
"get_entries": "GET /api/entries",
|
||||
"mark_read": "PUT /api/entries/{entry_id}/read",
|
||||
"mark_starred": "PUT /api/entries/{entry_id}/star",
|
||||
"statistics": "GET /api/stats"
|
||||
}
|
||||
}
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
return {
|
||||
"status": "healthy",
|
||||
"service": "rss-feed",
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
@app.post("/api/feeds", response_model=FeedSubscription)
|
||||
async def subscribe_to_feed(request: CreateFeedRequest, background_tasks: BackgroundTasks):
|
||||
"""RSS/Atom 피드 구독"""
|
||||
# Check if already subscribed
|
||||
existing = await db.feeds.find_one({"url": str(request.url)})
|
||||
if existing:
|
||||
raise HTTPException(status_code=400, detail="이미 구독 중인 피드입니다")
|
||||
|
||||
# Parse feed to get metadata
|
||||
result = await parser.parse_feed(str(request.url))
|
||||
if not result["success"]:
|
||||
raise HTTPException(status_code=400, detail=f"피드 파싱 실패: {result['error']}")
|
||||
|
||||
# Create subscription
|
||||
feed = FeedSubscription(
|
||||
title=request.title or result["feed"].get("title", "Untitled Feed"),
|
||||
url=request.url,
|
||||
description=result["feed"].get("description", ""),
|
||||
category=request.category,
|
||||
update_interval=request.update_interval or settings.default_update_interval
|
||||
)
|
||||
|
||||
# Save to database - convert URL to string
|
||||
feed_dict = feed.dict()
|
||||
feed_dict["url"] = str(feed_dict["url"])
|
||||
result = await db.feeds.insert_one(feed_dict)
|
||||
feed.id = str(result.inserted_id)
|
||||
|
||||
# Fetch entries in background
|
||||
background_tasks.add_task(update_feed, feed.id)
|
||||
|
||||
return feed
|
||||
|
||||
@app.get("/api/feeds", response_model=List[FeedSubscription])
|
||||
async def list_feeds(
|
||||
category: Optional[str] = Query(None, description="카테고리 필터"),
|
||||
status: Optional[FeedStatus] = Query(None, description="상태 필터")
|
||||
):
|
||||
"""구독 중인 피드 목록 조회"""
|
||||
query = {}
|
||||
if category:
|
||||
query["category"] = category
|
||||
if status:
|
||||
query["status"] = status
|
||||
|
||||
feeds = await db.feeds.find(query).to_list(100)
|
||||
for feed in feeds:
|
||||
feed["_id"] = str(feed["_id"])
|
||||
|
||||
return feeds
|
||||
|
||||
@app.get("/api/feeds/{feed_id}", response_model=FeedSubscription)
|
||||
async def get_feed(feed_id: str = Path(..., description="피드 ID")):
|
||||
"""특정 피드 정보 조회"""
|
||||
feed = await db.feeds.find_one({"_id": feed_id})
|
||||
if not feed:
|
||||
raise HTTPException(status_code=404, detail="피드를 찾을 수 없습니다")
|
||||
|
||||
feed["_id"] = str(feed["_id"])
|
||||
return feed
|
||||
|
||||
@app.put("/api/feeds/{feed_id}", response_model=FeedSubscription)
|
||||
async def update_feed_subscription(
|
||||
feed_id: str = Path(..., description="피드 ID"),
|
||||
request: UpdateFeedRequest = ...
|
||||
):
|
||||
"""피드 구독 정보 수정"""
|
||||
update_data = request.dict(exclude_unset=True)
|
||||
if update_data:
|
||||
update_data["updated_at"] = datetime.now()
|
||||
|
||||
result = await db.feeds.update_one(
|
||||
{"_id": feed_id},
|
||||
{"$set": update_data}
|
||||
)
|
||||
|
||||
if result.matched_count == 0:
|
||||
raise HTTPException(status_code=404, detail="피드를 찾을 수 없습니다")
|
||||
|
||||
feed = await db.feeds.find_one({"_id": feed_id})
|
||||
feed["_id"] = str(feed["_id"])
|
||||
return feed
|
||||
|
||||
@app.delete("/api/feeds/{feed_id}")
|
||||
async def unsubscribe_from_feed(feed_id: str = Path(..., description="피드 ID")):
|
||||
"""피드 구독 취소"""
|
||||
# Delete feed
|
||||
result = await db.feeds.delete_one({"_id": feed_id})
|
||||
if result.deleted_count == 0:
|
||||
raise HTTPException(status_code=404, detail="피드를 찾을 수 없습니다")
|
||||
|
||||
# Delete associated entries
|
||||
await db.entries.delete_many({"feed_id": feed_id})
|
||||
|
||||
return {"message": "구독이 취소되었습니다"}
|
||||
|
||||
@app.post("/api/feeds/{feed_id}/refresh")
|
||||
async def refresh_feed(
|
||||
feed_id: str = Path(..., description="피드 ID"),
|
||||
background_tasks: BackgroundTasks = ...
|
||||
):
|
||||
"""피드 수동 새로고침"""
|
||||
feed = await db.feeds.find_one({"_id": feed_id})
|
||||
if not feed:
|
||||
raise HTTPException(status_code=404, detail="피드를 찾을 수 없습니다")
|
||||
|
||||
background_tasks.add_task(update_feed, feed_id)
|
||||
|
||||
return {"message": "피드 새로고침이 시작되었습니다"}
|
||||
|
||||
@app.get("/api/entries", response_model=List[FeedEntry])
|
||||
async def get_entries(
|
||||
feed_id: Optional[str] = Query(None, description="피드 ID"),
|
||||
is_read: Optional[bool] = Query(None, description="읽음 상태 필터"),
|
||||
is_starred: Optional[bool] = Query(None, description="별표 상태 필터"),
|
||||
limit: int = Query(50, ge=1, le=100, description="결과 개수"),
|
||||
offset: int = Query(0, ge=0, description="오프셋")
|
||||
):
|
||||
"""피드 엔트리 목록 조회"""
|
||||
query = {}
|
||||
if feed_id:
|
||||
query["feed_id"] = feed_id
|
||||
if is_read is not None:
|
||||
query["is_read"] = is_read
|
||||
if is_starred is not None:
|
||||
query["is_starred"] = is_starred
|
||||
|
||||
entries = await db.entries.find(query) \
|
||||
.sort("published", -1) \
|
||||
.skip(offset) \
|
||||
.limit(limit) \
|
||||
.to_list(limit)
|
||||
|
||||
for entry in entries:
|
||||
entry["_id"] = str(entry["_id"])
|
||||
|
||||
return entries
|
||||
|
||||
@app.get("/api/entries/{entry_id}", response_model=FeedEntry)
|
||||
async def get_entry(entry_id: str = Path(..., description="엔트리 ID")):
|
||||
"""특정 엔트리 조회"""
|
||||
entry = await db.entries.find_one({"_id": entry_id})
|
||||
if not entry:
|
||||
raise HTTPException(status_code=404, detail="엔트리를 찾을 수 없습니다")
|
||||
|
||||
entry["_id"] = str(entry["_id"])
|
||||
return entry
|
||||
|
||||
@app.put("/api/entries/{entry_id}/read")
|
||||
async def mark_entry_as_read(
|
||||
entry_id: str = Path(..., description="엔트리 ID"),
|
||||
is_read: bool = Query(True, description="읽음 상태")
|
||||
):
|
||||
"""엔트리 읽음 상태 변경"""
|
||||
result = await db.entries.update_one(
|
||||
{"_id": entry_id},
|
||||
{"$set": {"is_read": is_read}}
|
||||
)
|
||||
|
||||
if result.matched_count == 0:
|
||||
raise HTTPException(status_code=404, detail="엔트리를 찾을 수 없습니다")
|
||||
|
||||
return {"message": f"읽음 상태가 {is_read}로 변경되었습니다"}
|
||||
|
||||
@app.put("/api/entries/{entry_id}/star")
|
||||
async def mark_entry_as_starred(
|
||||
entry_id: str = Path(..., description="엔트리 ID"),
|
||||
is_starred: bool = Query(True, description="별표 상태")
|
||||
):
|
||||
"""엔트리 별표 상태 변경"""
|
||||
result = await db.entries.update_one(
|
||||
{"_id": entry_id},
|
||||
{"$set": {"is_starred": is_starred}}
|
||||
)
|
||||
|
||||
if result.matched_count == 0:
|
||||
raise HTTPException(status_code=404, detail="엔트리를 찾을 수 없습니다")
|
||||
|
||||
return {"message": f"별표 상태가 {is_starred}로 변경되었습니다"}
|
||||
|
||||
@app.post("/api/entries/mark-all-read")
|
||||
async def mark_all_as_read(feed_id: Optional[str] = Query(None, description="피드 ID")):
|
||||
"""모든 엔트리를 읽음으로 표시"""
|
||||
query = {}
|
||||
if feed_id:
|
||||
query["feed_id"] = feed_id
|
||||
|
||||
result = await db.entries.update_many(
|
||||
query,
|
||||
{"$set": {"is_read": True}}
|
||||
)
|
||||
|
||||
return {"message": f"{result.modified_count}개 엔트리가 읽음으로 표시되었습니다"}
|
||||
|
||||
@app.get("/api/stats", response_model=List[FeedStatistics])
|
||||
async def get_statistics(feed_id: Optional[str] = Query(None, description="피드 ID")):
|
||||
"""피드 통계 조회"""
|
||||
if feed_id:
|
||||
feeds = [await db.feeds.find_one({"_id": feed_id})]
|
||||
if not feeds[0]:
|
||||
raise HTTPException(status_code=404, detail="피드를 찾을 수 없습니다")
|
||||
else:
|
||||
feeds = await db.feeds.find().to_list(100)
|
||||
|
||||
stats = []
|
||||
for feed in feeds:
|
||||
feed_id = str(feed["_id"])
|
||||
|
||||
# Count entries
|
||||
total = await db.entries.count_documents({"feed_id": feed_id})
|
||||
unread = await db.entries.count_documents({"feed_id": feed_id, "is_read": False})
|
||||
starred = await db.entries.count_documents({"feed_id": feed_id, "is_starred": True})
|
||||
|
||||
# Calculate error rate
|
||||
error_rate = 0
|
||||
if feed.get("error_count", 0) > 0:
|
||||
total_fetches = feed.get("error_count", 0) + (1 if feed.get("last_fetch") else 0)
|
||||
error_rate = feed.get("error_count", 0) / total_fetches
|
||||
|
||||
stats.append(FeedStatistics(
|
||||
feed_id=feed_id,
|
||||
total_entries=total,
|
||||
unread_entries=unread,
|
||||
starred_entries=starred,
|
||||
last_update=feed.get("last_fetch"),
|
||||
error_rate=error_rate
|
||||
))
|
||||
|
||||
return stats
|
||||
|
||||
@app.get("/api/export/opml")
|
||||
async def export_opml():
|
||||
"""피드 목록을 OPML 형식으로 내보내기"""
|
||||
feeds = await db.feeds.find().to_list(100)
|
||||
|
||||
opml = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<opml version="2.0">
|
||||
<head>
|
||||
<title>RSS Feed Subscriptions</title>
|
||||
<dateCreated>{}</dateCreated>
|
||||
</head>
|
||||
<body>""".format(datetime.now().isoformat())
|
||||
|
||||
for feed in feeds:
|
||||
opml += f'\n <outline text="{feed["title"]}" xmlUrl="{feed["url"]}" type="rss" category="{feed.get("category", "")}" />'
|
||||
|
||||
opml += "\n</body>\n</opml>"
|
||||
|
||||
return {
|
||||
"opml": opml,
|
||||
"feed_count": len(feeds)
|
||||
}
|
||||
|
||||
# Google News RSS Endpoints
|
||||
|
||||
@app.get("/api/google-rss/search")
|
||||
async def get_google_search_rss(
|
||||
q: str = Query(..., description="검색 키워드"),
|
||||
lang: str = Query("ko", description="언어 코드 (ko, en, ja, zh-CN 등)"),
|
||||
country: str = Query("KR", description="국가 코드 (KR, US, JP, CN 등)")
|
||||
):
|
||||
"""Google News 검색 RSS 피드 URL 생성"""
|
||||
feed_url = GoogleNewsRSS.search_feed(q, lang, country)
|
||||
|
||||
# 피드 파싱 테스트
|
||||
result = await parser.parse_feed(feed_url)
|
||||
|
||||
return {
|
||||
"keyword": q,
|
||||
"feed_url": feed_url,
|
||||
"success": result["success"],
|
||||
"feed_title": result["feed"].get("title", "Google News") if result["success"] else None,
|
||||
"entry_count": len(result["entries"]) if result["success"] else 0,
|
||||
"sample_titles": [entry.get("title", "") for entry in result["entries"][:5]] if result["success"] else [],
|
||||
"entries": [
|
||||
{
|
||||
"title": entry.get("title", ""),
|
||||
"link": entry.get("link", ""),
|
||||
"published": entry.get("published", ""),
|
||||
"summary": entry.get("summary", "")[:200] if entry.get("summary") else ""
|
||||
} for entry in result["entries"][:20]
|
||||
] if result["success"] else [],
|
||||
"error": result.get("error")
|
||||
}
|
||||
|
||||
@app.get("/api/google-rss/topic")
|
||||
async def get_google_topic_rss(
|
||||
category: GoogleNewsCategory = Query(..., description="뉴스 카테고리"),
|
||||
lang: str = Query("ko", description="언어 코드"),
|
||||
country: str = Query("KR", description="국가 코드")
|
||||
):
|
||||
"""Google News 카테고리별 RSS 피드 URL 생성"""
|
||||
feed_url = GoogleNewsRSS.topic_feed(category, lang, country)
|
||||
|
||||
# 피드 파싱 테스트
|
||||
result = await parser.parse_feed(feed_url)
|
||||
|
||||
return {
|
||||
"category": category,
|
||||
"feed_url": feed_url,
|
||||
"success": result["success"],
|
||||
"feed_title": result["feed"].get("title", "Google News") if result["success"] else None,
|
||||
"entry_count": len(result["entries"]) if result["success"] else 0,
|
||||
"sample_titles": [entry.get("title", "") for entry in result["entries"][:5]] if result["success"] else [],
|
||||
"error": result.get("error")
|
||||
}
|
||||
|
||||
@app.get("/api/google-rss/location")
|
||||
async def get_google_location_rss(
|
||||
location: str = Query(..., description="지역명 (예: Seoul, 서울, New York)"),
|
||||
lang: str = Query("ko", description="언어 코드"),
|
||||
country: str = Query("KR", description="국가 코드")
|
||||
):
|
||||
"""Google News 지역 뉴스 RSS 피드 URL 생성"""
|
||||
feed_url = GoogleNewsRSS.location_feed(location, lang, country)
|
||||
|
||||
# 피드 파싱 테스트
|
||||
result = await parser.parse_feed(feed_url)
|
||||
|
||||
return {
|
||||
"location": location,
|
||||
"feed_url": feed_url,
|
||||
"success": result["success"],
|
||||
"feed_title": result["feed"].get("title", "Google News") if result["success"] else None,
|
||||
"entry_count": len(result["entries"]) if result["success"] else 0,
|
||||
"sample_titles": [entry.get("title", "") for entry in result["entries"][:5]] if result["success"] else [],
|
||||
"error": result.get("error")
|
||||
}
|
||||
|
||||
@app.get("/api/google-rss/trending")
|
||||
async def get_google_trending_rss(
|
||||
lang: str = Query("ko", description="언어 코드"),
|
||||
country: str = Query("KR", description="국가 코드")
|
||||
):
|
||||
"""Google News 트렌딩 RSS 피드 URL 생성"""
|
||||
feed_url = GoogleNewsRSS.trending_feed(lang, country)
|
||||
|
||||
# 피드 파싱 테스트
|
||||
result = await parser.parse_feed(feed_url)
|
||||
|
||||
return {
|
||||
"feed_url": feed_url,
|
||||
"success": result["success"],
|
||||
"feed_title": result["feed"].get("title", "Google News") if result["success"] else None,
|
||||
"entry_count": len(result["entries"]) if result["success"] else 0,
|
||||
"sample_titles": [entry.get("title", "") for entry in result["entries"][:5]] if result["success"] else [],
|
||||
"error": result.get("error")
|
||||
}
|
||||
|
||||
@app.post("/api/google-rss/subscribe")
|
||||
async def subscribe_google_rss(
|
||||
q: Optional[str] = Query(None, description="검색 키워드"),
|
||||
category: Optional[GoogleNewsCategory] = Query(None, description="카테고리"),
|
||||
location: Optional[str] = Query(None, description="지역명"),
|
||||
trending: bool = Query(False, description="트렌딩 뉴스"),
|
||||
lang: str = Query("ko", description="언어 코드"),
|
||||
country: str = Query("KR", description="국가 코드"),
|
||||
background_tasks: BackgroundTasks = ...
|
||||
):
|
||||
"""Google News RSS 피드 구독"""
|
||||
# URL 생성
|
||||
if q:
|
||||
feed_url = GoogleNewsRSS.search_feed(q, lang, country)
|
||||
feed_title = f"Google News - {q}"
|
||||
elif category:
|
||||
feed_url = GoogleNewsRSS.topic_feed(category, lang, country)
|
||||
feed_title = f"Google News - {category.value}"
|
||||
elif location:
|
||||
feed_url = GoogleNewsRSS.location_feed(location, lang, country)
|
||||
feed_title = f"Google News - {location}"
|
||||
elif trending:
|
||||
feed_url = GoogleNewsRSS.trending_feed(lang, country)
|
||||
feed_title = f"Google News - Trending ({country})"
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="검색어, 카테고리, 지역 중 하나를 지정해주세요")
|
||||
|
||||
# 중복 확인
|
||||
existing = await db.feeds.find_one({"url": feed_url})
|
||||
if existing:
|
||||
raise HTTPException(status_code=400, detail="이미 구독 중인 피드입니다")
|
||||
|
||||
# 피드 파싱
|
||||
result = await parser.parse_feed(feed_url)
|
||||
if not result["success"]:
|
||||
raise HTTPException(status_code=400, detail=f"피드 파싱 실패: {result['error']}")
|
||||
|
||||
# 구독 생성
|
||||
feed = FeedSubscription(
|
||||
title=feed_title,
|
||||
url=feed_url,
|
||||
description=result["feed"].get("description", "Google News Feed"),
|
||||
category=FeedCategory.NEWS,
|
||||
update_interval=900 # 15분
|
||||
)
|
||||
|
||||
# DB 저장
|
||||
feed_dict = feed.dict()
|
||||
feed_dict["url"] = str(feed_dict["url"])
|
||||
result = await db.feeds.insert_one(feed_dict)
|
||||
feed.id = str(result.inserted_id)
|
||||
|
||||
# 백그라운드 업데이트
|
||||
background_tasks.add_task(update_feed, feed.id)
|
||||
|
||||
return feed
|
||||
74
backup-services/rss-feed/backend/app/models.py
Normal file
74
backup-services/rss-feed/backend/app/models.py
Normal file
@ -0,0 +1,74 @@
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
from typing import Optional, List, Dict, Any
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
class FeedStatus(str, Enum):
|
||||
ACTIVE = "active"
|
||||
INACTIVE = "inactive"
|
||||
ERROR = "error"
|
||||
|
||||
class FeedCategory(str, Enum):
|
||||
NEWS = "news"
|
||||
TECH = "tech"
|
||||
BUSINESS = "business"
|
||||
SCIENCE = "science"
|
||||
HEALTH = "health"
|
||||
SPORTS = "sports"
|
||||
ENTERTAINMENT = "entertainment"
|
||||
LIFESTYLE = "lifestyle"
|
||||
POLITICS = "politics"
|
||||
OTHER = "other"
|
||||
|
||||
class FeedSubscription(BaseModel):
|
||||
id: Optional[str] = Field(None, alias="_id")
|
||||
title: str
|
||||
url: HttpUrl
|
||||
description: Optional[str] = None
|
||||
category: FeedCategory = FeedCategory.OTHER
|
||||
status: FeedStatus = FeedStatus.ACTIVE
|
||||
update_interval: int = 900 # seconds
|
||||
last_fetch: Optional[datetime] = None
|
||||
last_error: Optional[str] = None
|
||||
error_count: int = 0
|
||||
created_at: datetime = Field(default_factory=datetime.now)
|
||||
updated_at: datetime = Field(default_factory=datetime.now)
|
||||
metadata: Dict[str, Any] = {}
|
||||
|
||||
class FeedEntry(BaseModel):
|
||||
id: Optional[str] = Field(None, alias="_id")
|
||||
feed_id: str
|
||||
entry_id: str # RSS entry unique ID
|
||||
title: str
|
||||
link: str
|
||||
summary: Optional[str] = None
|
||||
content: Optional[str] = None
|
||||
author: Optional[str] = None
|
||||
published: Optional[datetime] = None
|
||||
updated: Optional[datetime] = None
|
||||
categories: List[str] = []
|
||||
thumbnail: Optional[str] = None
|
||||
enclosures: List[Dict[str, Any]] = []
|
||||
is_read: bool = False
|
||||
is_starred: bool = False
|
||||
created_at: datetime = Field(default_factory=datetime.now)
|
||||
|
||||
class CreateFeedRequest(BaseModel):
|
||||
url: HttpUrl
|
||||
title: Optional[str] = None
|
||||
category: FeedCategory = FeedCategory.OTHER
|
||||
update_interval: Optional[int] = 900
|
||||
|
||||
class UpdateFeedRequest(BaseModel):
|
||||
title: Optional[str] = None
|
||||
category: Optional[FeedCategory] = None
|
||||
update_interval: Optional[int] = None
|
||||
status: Optional[FeedStatus] = None
|
||||
|
||||
class FeedStatistics(BaseModel):
|
||||
feed_id: str
|
||||
total_entries: int
|
||||
unread_entries: int
|
||||
starred_entries: int
|
||||
last_update: Optional[datetime]
|
||||
error_rate: float
|
||||
Reference in New Issue
Block a user