- Add get_articles_by_source_keyword method to query articles by entities - Search across entities.people, entities.organizations, and entities.groups - Deprecate get_articles_by_ids method in favor of dynamic queries - Support pagination for outlet article listings 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
216 lines
6.5 KiB
Python
216 lines
6.5 KiB
Python
from typing import List, Optional
|
|
from datetime import datetime
|
|
from bson import ObjectId
|
|
from app.core.database import get_collection
|
|
from app.models.article import Article, ArticleList, ArticleSummary
|
|
from app.core.config import settings
|
|
|
|
SUPPORTED_LANGUAGES = ["ko", "en", "zh_cn", "zh_tw", "ja", "fr", "de", "es", "it"]
|
|
|
|
class ArticleService:
|
|
|
|
@staticmethod
|
|
def validate_language(language: str) -> bool:
|
|
"""언어 코드 검증"""
|
|
return language in SUPPORTED_LANGUAGES
|
|
|
|
@staticmethod
|
|
async def get_articles(
|
|
language: str,
|
|
page: int = 1,
|
|
page_size: int = 20,
|
|
category: Optional[str] = None
|
|
) -> ArticleList:
|
|
"""기사 목록 조회"""
|
|
collection = get_collection(language)
|
|
|
|
# 필터 구성
|
|
query = {}
|
|
if category:
|
|
query["categories"] = category # category -> categories (배열)
|
|
|
|
# 전체 개수
|
|
total = await collection.count_documents(query)
|
|
|
|
# 페이지네이션
|
|
skip = (page - 1) * page_size
|
|
cursor = collection.find(query).sort("created_at", -1).skip(skip).limit(page_size)
|
|
|
|
articles = []
|
|
async for doc in cursor:
|
|
doc["_id"] = str(doc["_id"])
|
|
articles.append(Article(**doc))
|
|
|
|
total_pages = (total + page_size - 1) // page_size
|
|
|
|
return ArticleList(
|
|
total=total,
|
|
page=page,
|
|
page_size=page_size,
|
|
total_pages=total_pages,
|
|
articles=articles
|
|
)
|
|
|
|
@staticmethod
|
|
async def get_article_by_id(language: str, article_id: str) -> Optional[Article]:
|
|
"""ID로 기사 조회"""
|
|
collection = get_collection(language)
|
|
|
|
try:
|
|
doc = await collection.find_one({"_id": ObjectId(article_id)})
|
|
if doc:
|
|
doc["_id"] = str(doc["_id"])
|
|
return Article(**doc)
|
|
except Exception as e:
|
|
print(f"Error fetching article: {e}")
|
|
|
|
return None
|
|
|
|
@staticmethod
|
|
async def get_latest_articles(
|
|
language: str,
|
|
limit: int = 10
|
|
) -> List[ArticleSummary]:
|
|
"""최신 기사 조회"""
|
|
collection = get_collection(language)
|
|
|
|
cursor = collection.find().sort("created_at", -1).limit(limit)
|
|
|
|
articles = []
|
|
async for doc in cursor:
|
|
doc["_id"] = str(doc["_id"])
|
|
articles.append(ArticleSummary(**doc))
|
|
|
|
return articles
|
|
|
|
@staticmethod
|
|
async def search_articles(
|
|
language: str,
|
|
keyword: str,
|
|
page: int = 1,
|
|
page_size: int = 20
|
|
) -> ArticleList:
|
|
"""기사 검색"""
|
|
collection = get_collection(language)
|
|
|
|
# 텍스트 검색 쿼리
|
|
query = {
|
|
"$or": [
|
|
{"title": {"$regex": keyword, "$options": "i"}},
|
|
{"summary": {"$regex": keyword, "$options": "i"}},
|
|
{"subtopics.title": {"$regex": keyword, "$options": "i"}},
|
|
{"categories": {"$regex": keyword, "$options": "i"}},
|
|
{"source_keyword": {"$regex": keyword, "$options": "i"}}
|
|
]
|
|
}
|
|
|
|
# 전체 개수
|
|
total = await collection.count_documents(query)
|
|
|
|
# 페이지네이션
|
|
skip = (page - 1) * page_size
|
|
cursor = collection.find(query).sort("created_at", -1).skip(skip).limit(page_size)
|
|
|
|
articles = []
|
|
async for doc in cursor:
|
|
doc["_id"] = str(doc["_id"])
|
|
articles.append(Article(**doc))
|
|
|
|
total_pages = (total + page_size - 1) // page_size
|
|
|
|
return ArticleList(
|
|
total=total,
|
|
page=page,
|
|
page_size=page_size,
|
|
total_pages=total_pages,
|
|
articles=articles
|
|
)
|
|
|
|
@staticmethod
|
|
async def get_categories(language: str) -> List[str]:
|
|
"""카테고리 목록 조회"""
|
|
collection = get_collection(language)
|
|
|
|
# categories는 배열이므로 모든 배열 요소를 추출
|
|
pipeline = [
|
|
{"$unwind": "$categories"},
|
|
{"$group": {"_id": "$categories"}},
|
|
{"$sort": {"_id": 1}}
|
|
]
|
|
|
|
cursor = collection.aggregate(pipeline)
|
|
categories = []
|
|
async for doc in cursor:
|
|
if doc["_id"]:
|
|
categories.append(doc["_id"])
|
|
|
|
return categories
|
|
|
|
@staticmethod
|
|
async def get_articles_by_ids(language: str, article_ids: List[str]) -> List[Article]:
|
|
"""여러 ID로 기사 조회 (Deprecated - use get_articles_by_source_keyword)"""
|
|
collection = get_collection(language)
|
|
|
|
if not article_ids:
|
|
return []
|
|
|
|
try:
|
|
# Convert string IDs to ObjectIds
|
|
object_ids = [ObjectId(aid) for aid in article_ids if ObjectId.is_valid(aid)]
|
|
|
|
cursor = collection.find({"_id": {"$in": object_ids}})
|
|
|
|
articles = []
|
|
async for doc in cursor:
|
|
doc["_id"] = str(doc["_id"])
|
|
articles.append(Article(**doc))
|
|
|
|
return articles
|
|
except Exception as e:
|
|
print(f"Error fetching articles by IDs: {e}")
|
|
return []
|
|
|
|
@staticmethod
|
|
async def get_articles_by_source_keyword(
|
|
language: str,
|
|
source_keyword: str,
|
|
page: int = 1,
|
|
page_size: int = 20
|
|
) -> ArticleList:
|
|
"""source_keyword로 기사 조회 (동적 쿼리) - entities 필드를 사용하여 검색"""
|
|
collection = get_collection(language)
|
|
|
|
# Query by source_keyword in multiple places:
|
|
# 1. Direct source_keyword field (for migrated articles)
|
|
# 2. entities.people, entities.organizations, entities.groups (for existing articles)
|
|
query = {
|
|
"$or": [
|
|
{"source_keyword": source_keyword},
|
|
{"entities.people": source_keyword},
|
|
{"entities.organizations": source_keyword},
|
|
{"entities.groups": source_keyword}
|
|
]
|
|
}
|
|
|
|
# 전체 개수
|
|
total = await collection.count_documents(query)
|
|
|
|
# 페이지네이션
|
|
skip = (page - 1) * page_size
|
|
cursor = collection.find(query).sort("created_at", -1).skip(skip).limit(page_size)
|
|
|
|
articles = []
|
|
async for doc in cursor:
|
|
doc["_id"] = str(doc["_id"])
|
|
articles.append(Article(**doc))
|
|
|
|
total_pages = (total + page_size - 1) // page_size
|
|
|
|
return ArticleList(
|
|
total=total,
|
|
page=page,
|
|
page_size=page_size,
|
|
total_pages=total_pages,
|
|
articles=articles
|
|
)
|