site11/services/news-api/backend/app/services/article_service.py

from typing import List, Optional
from datetime import datetime
from bson import ObjectId
from app.core.database import get_collection
from app.models.article import Article, ArticleList, ArticleSummary
from app.core.config import settings

SUPPORTED_LANGUAGES = ["ko", "en", "zh_cn", "zh_tw", "ja", "fr", "de", "es", "it"]

class ArticleService:

    @staticmethod
    def validate_language(language: str) -> bool:
        """언어 코드 검증"""
        return language in SUPPORTED_LANGUAGES

    @staticmethod
    async def get_articles(
        language: str,
        page: int = 1,
        page_size: int = 20,
        category: Optional[str] = None
    ) -> ArticleList:
        """기사 목록 조회"""
        collection = get_collection(language)

        # 필터 구성
        query = {}
        if category:
            query["categories"] = category  # category -> categories (배열)

        # 전체 개수
        total = await collection.count_documents(query)

        # 페이지네이션
        skip = (page - 1) * page_size
        cursor = collection.find(query).sort("created_at", -1).skip(skip).limit(page_size)

        articles = []
        async for doc in cursor:
            doc["_id"] = str(doc["_id"])
            articles.append(Article(**doc))

        total_pages = (total + page_size - 1) // page_size

        return ArticleList(
            total=total,
            page=page,
            page_size=page_size,
            total_pages=total_pages,
            articles=articles
        )

    @staticmethod
    async def get_article_by_id(language: str, article_id: str) -> Optional[Article]:
        """ID로 기사 조회"""
        collection = get_collection(language)

        try:
            doc = await collection.find_one({"_id": ObjectId(article_id)})
            if doc:
                doc["_id"] = str(doc["_id"])
                return Article(**doc)
        except Exception as e:
            print(f"Error fetching article: {e}")

        return None

    @staticmethod
    async def get_latest_articles(
        language: str,
        limit: int = 10
    ) -> List[ArticleSummary]:
        """최신 기사 조회"""
        collection = get_collection(language)

        cursor = collection.find().sort("created_at", -1).limit(limit)

        articles = []
        async for doc in cursor:
            doc["_id"] = str(doc["_id"])
            articles.append(ArticleSummary(**doc))

        return articles

    @staticmethod
    async def search_articles(
        language: str,
        keyword: str,
        page: int = 1,
        page_size: int = 20
    ) -> ArticleList:
        """기사 검색"""
        collection = get_collection(language)

        # 텍스트 검색 쿼리
        query = {
            "$or": [
                {"title": {"$regex": keyword, "$options": "i"}},
                {"summary": {"$regex": keyword, "$options": "i"}},
                {"subtopics.title": {"$regex": keyword, "$options": "i"}},
                {"categories": {"$regex": keyword, "$options": "i"}},
                {"source_keyword": {"$regex": keyword, "$options": "i"}}
            ]
        }

        # 전체 개수
        total = await collection.count_documents(query)

        # 페이지네이션
        skip = (page - 1) * page_size
        cursor = collection.find(query).sort("created_at", -1).skip(skip).limit(page_size)

        articles = []
        async for doc in cursor:
            doc["_id"] = str(doc["_id"])
            articles.append(Article(**doc))

        total_pages = (total + page_size - 1) // page_size

        return ArticleList(
            total=total,
            page=page,
            page_size=page_size,
            total_pages=total_pages,
            articles=articles
        )

    @staticmethod
    async def get_categories(language: str) -> List[str]:
        """카테고리 목록 조회"""
        collection = get_collection(language)

        # categories는 배열이므로 모든 배열 요소를 추출
        pipeline = [
            {"$unwind": "$categories"},
            {"$group": {"_id": "$categories"}},
            {"$sort": {"_id": 1}}
        ]

        cursor = collection.aggregate(pipeline)
        categories = []
        async for doc in cursor:
            if doc["_id"]:
                categories.append(doc["_id"])

        return categories

    @staticmethod
    async def get_articles_by_ids(language: str, article_ids: List[str]) -> List[Article]:
        """여러 ID로 기사 조회 (Deprecated - use get_articles_by_source_keyword)"""
        collection = get_collection(language)

        if not article_ids:
            return []

        try:
            # Convert string IDs to ObjectIds
            object_ids = [ObjectId(aid) for aid in article_ids if ObjectId.is_valid(aid)]

            cursor = collection.find({"_id": {"$in": object_ids}})

            articles = []
            async for doc in cursor:
                doc["_id"] = str(doc["_id"])
                articles.append(Article(**doc))

            return articles
        except Exception as e:
            print(f"Error fetching articles by IDs: {e}")
            return []

    @staticmethod
    async def get_articles_by_source_keyword(
        language: str,
        source_keyword: str,
        page: int = 1,
        page_size: int = 20
    ) -> ArticleList:
        """source_keyword로 기사 조회 (동적 쿼리) - entities 필드를 사용하여 검색"""
        collection = get_collection(language)

        # Query by source_keyword in multiple places:
        # 1. Direct source_keyword field (for migrated articles)
        # 2. entities.people, entities.organizations, entities.groups (for existing articles)
        query = {
            "$or": [
                {"source_keyword": source_keyword},
                {"entities.people": source_keyword},
                {"entities.organizations": source_keyword},
                {"entities.groups": source_keyword}
            ]
        }

        # 전체 개수
        total = await collection.count_documents(query)

        # 페이지네이션
        skip = (page - 1) * page_size
        cursor = collection.find(query).sort("created_at", -1).skip(skip).limit(page_size)

        articles = []
        async for doc in cursor:
            doc["_id"] = str(doc["_id"])
            articles.append(Article(**doc))

        total_pages = (total + page_size - 1) // page_size

        return ArticleList(
            total=total,
            page=page,
            page_size=page_size,
            total_pages=total_pages,
            articles=articles
        )