Files
site11/services/news-api/backend/app/services/article_service.py
jungwoo choi 86ca214dd8 feat: Add source_keyword-based article queries for dynamic outlet articles
- Add get_articles_by_source_keyword method to query articles by entities
- Search across entities.people, entities.organizations, and entities.groups
- Deprecate get_articles_by_ids method in favor of dynamic queries
- Support pagination for outlet article listings

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-13 16:53:09 +09:00

216 lines
6.5 KiB
Python

from typing import List, Optional
from datetime import datetime
from bson import ObjectId
from app.core.database import get_collection
from app.models.article import Article, ArticleList, ArticleSummary
from app.core.config import settings
SUPPORTED_LANGUAGES = ["ko", "en", "zh_cn", "zh_tw", "ja", "fr", "de", "es", "it"]
class ArticleService:
@staticmethod
def validate_language(language: str) -> bool:
"""언어 코드 검증"""
return language in SUPPORTED_LANGUAGES
@staticmethod
async def get_articles(
language: str,
page: int = 1,
page_size: int = 20,
category: Optional[str] = None
) -> ArticleList:
"""기사 목록 조회"""
collection = get_collection(language)
# 필터 구성
query = {}
if category:
query["categories"] = category # category -> categories (배열)
# 전체 개수
total = await collection.count_documents(query)
# 페이지네이션
skip = (page - 1) * page_size
cursor = collection.find(query).sort("created_at", -1).skip(skip).limit(page_size)
articles = []
async for doc in cursor:
doc["_id"] = str(doc["_id"])
articles.append(Article(**doc))
total_pages = (total + page_size - 1) // page_size
return ArticleList(
total=total,
page=page,
page_size=page_size,
total_pages=total_pages,
articles=articles
)
@staticmethod
async def get_article_by_id(language: str, article_id: str) -> Optional[Article]:
"""ID로 기사 조회"""
collection = get_collection(language)
try:
doc = await collection.find_one({"_id": ObjectId(article_id)})
if doc:
doc["_id"] = str(doc["_id"])
return Article(**doc)
except Exception as e:
print(f"Error fetching article: {e}")
return None
@staticmethod
async def get_latest_articles(
language: str,
limit: int = 10
) -> List[ArticleSummary]:
"""최신 기사 조회"""
collection = get_collection(language)
cursor = collection.find().sort("created_at", -1).limit(limit)
articles = []
async for doc in cursor:
doc["_id"] = str(doc["_id"])
articles.append(ArticleSummary(**doc))
return articles
@staticmethod
async def search_articles(
language: str,
keyword: str,
page: int = 1,
page_size: int = 20
) -> ArticleList:
"""기사 검색"""
collection = get_collection(language)
# 텍스트 검색 쿼리
query = {
"$or": [
{"title": {"$regex": keyword, "$options": "i"}},
{"summary": {"$regex": keyword, "$options": "i"}},
{"subtopics.title": {"$regex": keyword, "$options": "i"}},
{"categories": {"$regex": keyword, "$options": "i"}},
{"source_keyword": {"$regex": keyword, "$options": "i"}}
]
}
# 전체 개수
total = await collection.count_documents(query)
# 페이지네이션
skip = (page - 1) * page_size
cursor = collection.find(query).sort("created_at", -1).skip(skip).limit(page_size)
articles = []
async for doc in cursor:
doc["_id"] = str(doc["_id"])
articles.append(Article(**doc))
total_pages = (total + page_size - 1) // page_size
return ArticleList(
total=total,
page=page,
page_size=page_size,
total_pages=total_pages,
articles=articles
)
@staticmethod
async def get_categories(language: str) -> List[str]:
"""카테고리 목록 조회"""
collection = get_collection(language)
# categories는 배열이므로 모든 배열 요소를 추출
pipeline = [
{"$unwind": "$categories"},
{"$group": {"_id": "$categories"}},
{"$sort": {"_id": 1}}
]
cursor = collection.aggregate(pipeline)
categories = []
async for doc in cursor:
if doc["_id"]:
categories.append(doc["_id"])
return categories
@staticmethod
async def get_articles_by_ids(language: str, article_ids: List[str]) -> List[Article]:
"""여러 ID로 기사 조회 (Deprecated - use get_articles_by_source_keyword)"""
collection = get_collection(language)
if not article_ids:
return []
try:
# Convert string IDs to ObjectIds
object_ids = [ObjectId(aid) for aid in article_ids if ObjectId.is_valid(aid)]
cursor = collection.find({"_id": {"$in": object_ids}})
articles = []
async for doc in cursor:
doc["_id"] = str(doc["_id"])
articles.append(Article(**doc))
return articles
except Exception as e:
print(f"Error fetching articles by IDs: {e}")
return []
@staticmethod
async def get_articles_by_source_keyword(
language: str,
source_keyword: str,
page: int = 1,
page_size: int = 20
) -> ArticleList:
"""source_keyword로 기사 조회 (동적 쿼리) - entities 필드를 사용하여 검색"""
collection = get_collection(language)
# Query by source_keyword in multiple places:
# 1. Direct source_keyword field (for migrated articles)
# 2. entities.people, entities.organizations, entities.groups (for existing articles)
query = {
"$or": [
{"source_keyword": source_keyword},
{"entities.people": source_keyword},
{"entities.organizations": source_keyword},
{"entities.groups": source_keyword}
]
}
# 전체 개수
total = await collection.count_documents(query)
# 페이지네이션
skip = (page - 1) * page_size
cursor = collection.find(query).sort("created_at", -1).skip(skip).limit(page_size)
articles = []
async for doc in cursor:
doc["_id"] = str(doc["_id"])
articles.append(Article(**doc))
total_pages = (total + page_size - 1) // page_size
return ArticleList(
total=total,
page=page,
page_size=page_size,
total_pages=total_pages,
articles=articles
)