feat: Refactor outlets with multilingual support and dynamic queries
- Replace static articles array with dynamic source_keyword queries
- Use MongoDB _id as unique identifier for outlets
- Add multilingual translations (9 languages: ko, en, zh_cn, zh_tw, ja, fr, de, es, it)
- Add OutletService for database operations
- Add outlet migration script with Korean source_keyword matching
- Remove JSON file-based outlet loading
- Add /outlets/{outlet_id}/articles endpoint for dynamic article retrieval
This resolves the design issues with:
1. Static articles array requiring constant updates
2. Lack of multilingual support for outlet names/descriptions
3. Broken image URLs
4. Korean entity matching for article queries
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -2,25 +2,14 @@ from fastapi import APIRouter, HTTPException, Query
|
||||
from typing import Optional
|
||||
from app.services.article_service import ArticleService
|
||||
from app.services.comment_service import CommentService
|
||||
from app.services.outlet_service import OutletService
|
||||
from app.models.article import ArticleList, Article, ArticleSummary
|
||||
from app.models.comment import Comment, CommentCreate, CommentList
|
||||
from app.models.outlet import Outlet
|
||||
from typing import List
|
||||
import json
|
||||
import os
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Load outlets data
|
||||
OUTLETS_FILE = os.path.join(os.path.dirname(__file__), '../../outlets-extracted.json')
|
||||
outlets_data = None
|
||||
|
||||
def load_outlets():
|
||||
global outlets_data
|
||||
if outlets_data is None:
|
||||
with open(OUTLETS_FILE, 'r', encoding='utf-8') as f:
|
||||
outlets_data = json.load(f)
|
||||
return outlets_data
|
||||
|
||||
@router.get("/{language}/articles", response_model=ArticleList)
|
||||
async def get_articles(
|
||||
language: str,
|
||||
@ -84,28 +73,48 @@ async def get_categories(language: str):
|
||||
@router.get("/outlets")
|
||||
async def get_outlets(category: Optional[str] = Query(None, description="Filter by category: people, topics, companies")):
|
||||
"""Get outlets list - people, topics, companies"""
|
||||
data = load_outlets()
|
||||
|
||||
if category:
|
||||
if category in ['people', 'topics', 'companies']:
|
||||
return {category: data[category]}
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid category: {category}. Must be one of: people, topics, companies")
|
||||
# Get outlets for specific category
|
||||
outlets = await OutletService.get_all_outlets(category=category)
|
||||
return {category: outlets}
|
||||
|
||||
return data
|
||||
# Get all outlets grouped by category
|
||||
result = {}
|
||||
for cat in ['people', 'topics', 'companies']:
|
||||
outlets = await OutletService.get_all_outlets(category=cat)
|
||||
result[cat] = outlets
|
||||
|
||||
return result
|
||||
|
||||
@router.get("/outlets/{outlet_id}")
|
||||
async def get_outlet_by_id(outlet_id: str):
|
||||
"""Get specific outlet by ID"""
|
||||
data = load_outlets()
|
||||
"""Get specific outlet by ID (_id)"""
|
||||
outlet = await OutletService.get_outlet_by_id(outlet_id)
|
||||
return outlet
|
||||
|
||||
# Search in all categories
|
||||
for category in ['people', 'topics', 'companies']:
|
||||
for outlet in data[category]:
|
||||
if outlet['id'] == outlet_id:
|
||||
return outlet
|
||||
@router.get("/{language}/outlets/{outlet_id}/articles")
|
||||
async def get_outlet_articles(
|
||||
language: str,
|
||||
outlet_id: str,
|
||||
page: int = Query(1, ge=1, description="Page number"),
|
||||
page_size: int = Query(20, ge=1, le=100, description="Items per page")
|
||||
):
|
||||
"""Get articles for a specific outlet using source_keyword"""
|
||||
if not ArticleService.validate_language(language):
|
||||
raise HTTPException(status_code=400, detail=f"Unsupported language: {language}")
|
||||
|
||||
raise HTTPException(status_code=404, detail=f"Outlet not found: {outlet_id}")
|
||||
# Get outlet to retrieve source_keyword
|
||||
outlet = await OutletService.get_outlet_by_id(outlet_id)
|
||||
|
||||
# Query articles by source_keyword dynamically
|
||||
articles_result = await ArticleService.get_articles_by_source_keyword(
|
||||
language,
|
||||
outlet['source_keyword'],
|
||||
page,
|
||||
page_size
|
||||
)
|
||||
|
||||
return articles_result
|
||||
|
||||
# Comment endpoints
|
||||
@router.get("/comments", response_model=CommentList)
|
||||
|
||||
47
services/news-api/backend/app/models/outlet.py
Normal file
47
services/news-api/backend/app/models/outlet.py
Normal file
@ -0,0 +1,47 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Optional, Dict
|
||||
|
||||
class OutletTranslations(BaseModel):
|
||||
ko: Optional[str] = None
|
||||
en: Optional[str] = None
|
||||
zh_cn: Optional[str] = None
|
||||
zh_tw: Optional[str] = None
|
||||
ja: Optional[str] = None
|
||||
fr: Optional[str] = None
|
||||
de: Optional[str] = None
|
||||
es: Optional[str] = None
|
||||
it: Optional[str] = None
|
||||
|
||||
class OutletBase(BaseModel):
|
||||
source_keyword: str # Used to query articles dynamically
|
||||
category: str # people, topics, companies
|
||||
name_translations: OutletTranslations = Field(default_factory=lambda: OutletTranslations())
|
||||
description_translations: OutletTranslations = Field(default_factory=lambda: OutletTranslations())
|
||||
image: Optional[str] = None
|
||||
|
||||
# Deprecated - kept for backward compatibility during migration
|
||||
name: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
|
||||
class OutletCreate(OutletBase):
|
||||
pass
|
||||
|
||||
class OutletUpdate(BaseModel):
|
||||
source_keyword: Optional[str] = None
|
||||
category: Optional[str] = None
|
||||
name_translations: Optional[OutletTranslations] = None
|
||||
description_translations: Optional[OutletTranslations] = None
|
||||
image: Optional[str] = None
|
||||
|
||||
# Deprecated
|
||||
name: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
articles: Optional[List[str]] = None
|
||||
|
||||
class Outlet(OutletBase):
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
class OutletList(BaseModel):
|
||||
outlets: List[Outlet]
|
||||
total: int
|
||||
111
services/news-api/backend/app/services/outlet_service.py
Normal file
111
services/news-api/backend/app/services/outlet_service.py
Normal file
@ -0,0 +1,111 @@
|
||||
from app.core.database import get_database
|
||||
from app.models.outlet import Outlet, OutletCreate, OutletUpdate, OutletList
|
||||
from typing import Optional, List
|
||||
from fastapi import HTTPException
|
||||
from bson import ObjectId
|
||||
|
||||
class OutletService:
|
||||
|
||||
@classmethod
|
||||
async def get_all_outlets(cls, category: Optional[str] = None) -> List[dict]:
|
||||
"""Get all outlets, optionally filtered by category"""
|
||||
db = get_database()
|
||||
collection = db.outlets
|
||||
|
||||
query = {}
|
||||
if category:
|
||||
if category not in ['people', 'topics', 'companies']:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid category: {category}. Must be one of: people, topics, companies")
|
||||
query['category'] = category
|
||||
|
||||
cursor = collection.find(query)
|
||||
outlets = await cursor.to_list(length=None)
|
||||
|
||||
# Convert _id to string
|
||||
for outlet in outlets:
|
||||
outlet['_id'] = str(outlet['_id'])
|
||||
|
||||
return outlets
|
||||
|
||||
@classmethod
|
||||
async def get_outlet_by_id(cls, outlet_id: str) -> dict:
|
||||
"""Get specific outlet by ID (_id)"""
|
||||
db = get_database()
|
||||
collection = db.outlets
|
||||
|
||||
try:
|
||||
outlet = await collection.find_one({"_id": ObjectId(outlet_id)})
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid outlet ID: {outlet_id}")
|
||||
|
||||
if not outlet:
|
||||
raise HTTPException(status_code=404, detail=f"Outlet not found: {outlet_id}")
|
||||
|
||||
# Convert _id to string
|
||||
outlet['_id'] = str(outlet['_id'])
|
||||
return outlet
|
||||
|
||||
@classmethod
|
||||
async def create_outlet(cls, outlet_data: OutletCreate) -> Outlet:
|
||||
"""Create a new outlet"""
|
||||
db = get_database()
|
||||
collection = db.outlets
|
||||
|
||||
# Check if outlet with this ID already exists
|
||||
existing = await collection.find_one({"id": outlet_data.id})
|
||||
if existing:
|
||||
raise HTTPException(status_code=400, detail=f"Outlet with ID {outlet_data.id} already exists")
|
||||
|
||||
outlet_dict = outlet_data.model_dump()
|
||||
await collection.insert_one(outlet_dict)
|
||||
|
||||
return Outlet(**outlet_dict)
|
||||
|
||||
@classmethod
|
||||
async def update_outlet(cls, outlet_id: str, outlet_data: OutletUpdate) -> Outlet:
|
||||
"""Update an existing outlet"""
|
||||
db = get_database()
|
||||
collection = db.outlets
|
||||
|
||||
# Check if outlet exists
|
||||
existing = await collection.find_one({"id": outlet_id})
|
||||
if not existing:
|
||||
raise HTTPException(status_code=404, detail=f"Outlet not found: {outlet_id}")
|
||||
|
||||
# Only update fields that are provided
|
||||
update_data = outlet_data.model_dump(exclude_unset=True)
|
||||
|
||||
if update_data:
|
||||
await collection.update_one(
|
||||
{"id": outlet_id},
|
||||
{"$set": update_data}
|
||||
)
|
||||
|
||||
# Return updated outlet
|
||||
updated = await collection.find_one({"id": outlet_id}, {"_id": 0})
|
||||
return Outlet(**updated)
|
||||
|
||||
@classmethod
|
||||
async def delete_outlet(cls, outlet_id: str) -> bool:
|
||||
"""Delete an outlet"""
|
||||
db = get_database()
|
||||
collection = db.outlets
|
||||
|
||||
result = await collection.delete_one({"id": outlet_id})
|
||||
|
||||
if result.deleted_count == 0:
|
||||
raise HTTPException(status_code=404, detail=f"Outlet not found: {outlet_id}")
|
||||
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
async def get_count(cls, category: Optional[str] = None) -> int:
|
||||
"""Get total count of outlets"""
|
||||
db = get_database()
|
||||
collection = db.outlets
|
||||
|
||||
query = {}
|
||||
if category:
|
||||
query['category'] = category
|
||||
|
||||
return await collection.count_documents(query)
|
||||
@ -0,0 +1,129 @@
|
||||
"""
|
||||
Script to add source_keyword field to existing articles based on outlet mappings
|
||||
"""
|
||||
import asyncio
|
||||
import os
|
||||
from motor.motor_asyncio import AsyncIOMotorClient
|
||||
|
||||
# MongoDB connection settings
|
||||
MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017")
|
||||
DB_NAME = os.getenv("DB_NAME", "news_api_db")
|
||||
|
||||
# Supported languages
|
||||
LANGUAGES = ["ko", "en", "zh_cn", "zh_tw", "ja", "fr", "de", "es", "it"]
|
||||
|
||||
async def migrate_article_source_keywords():
|
||||
"""Add source_keyword to articles based on outlet mappings"""
|
||||
|
||||
# Connect to MongoDB
|
||||
client = AsyncIOMotorClient(MONGODB_URL)
|
||||
db = client[DB_NAME]
|
||||
outlets_collection = db.outlets
|
||||
|
||||
# Get all outlets
|
||||
outlets = await outlets_collection.find().to_list(length=None)
|
||||
print(f"Found {len(outlets)} outlets to process")
|
||||
|
||||
# Create mapping from Korean name to source_keyword
|
||||
# Also create reverse mapping for entities matching
|
||||
name_to_keyword = {}
|
||||
for outlet in outlets:
|
||||
# Korean name -> source_keyword
|
||||
name_ko = outlet.get('name') or outlet.get('name_translations', {}).get('ko')
|
||||
if name_ko:
|
||||
name_to_keyword[name_ko] = outlet['source_keyword']
|
||||
|
||||
# Also map the source_keyword to itself for direct matches
|
||||
name_to_keyword[outlet['source_keyword']] = outlet['source_keyword']
|
||||
|
||||
print(f"Created {len(name_to_keyword)} name-to-keyword mappings")
|
||||
|
||||
# Process each language collection
|
||||
total_updated = 0
|
||||
for language in LANGUAGES:
|
||||
collection_name = f"{language}_articles"
|
||||
articles_collection = db[collection_name]
|
||||
|
||||
# Check if collection exists
|
||||
count = await articles_collection.count_documents({})
|
||||
if count == 0:
|
||||
print(f"Skipping empty collection: {collection_name}")
|
||||
continue
|
||||
|
||||
print(f"\nProcessing {collection_name} ({count} articles)...")
|
||||
|
||||
# Process articles in batches
|
||||
batch_size = 100
|
||||
updated_in_lang = 0
|
||||
|
||||
cursor = articles_collection.find({})
|
||||
batch = []
|
||||
|
||||
async for article in cursor:
|
||||
# Extract entities
|
||||
entities = article.get('entities', {})
|
||||
people = entities.get('people', [])
|
||||
organizations = entities.get('organizations', [])
|
||||
groups = entities.get('groups', [])
|
||||
|
||||
# Try to find matching source_keyword
|
||||
source_keyword = None
|
||||
|
||||
# Check people first (most common)
|
||||
for person in people:
|
||||
if person in name_to_keyword:
|
||||
source_keyword = name_to_keyword[person]
|
||||
break
|
||||
|
||||
# Then check organizations
|
||||
if not source_keyword:
|
||||
for org in organizations:
|
||||
if org in name_to_keyword:
|
||||
source_keyword = name_to_keyword[org]
|
||||
break
|
||||
|
||||
# Then check groups
|
||||
if not source_keyword:
|
||||
for group in groups:
|
||||
if group in name_to_keyword:
|
||||
source_keyword = name_to_keyword[group]
|
||||
break
|
||||
|
||||
# If found, update the article
|
||||
if source_keyword:
|
||||
batch.append({
|
||||
'_id': article['_id'],
|
||||
'source_keyword': source_keyword
|
||||
})
|
||||
|
||||
# Execute batch update
|
||||
if len(batch) >= batch_size:
|
||||
for item in batch:
|
||||
await articles_collection.update_one(
|
||||
{'_id': item['_id']},
|
||||
{'$set': {'source_keyword': item['source_keyword']}}
|
||||
)
|
||||
updated_in_lang += len(batch)
|
||||
print(f" Updated {updated_in_lang} articles...", end='\r')
|
||||
batch = []
|
||||
|
||||
# Update remaining batch
|
||||
if batch:
|
||||
for item in batch:
|
||||
await articles_collection.update_one(
|
||||
{'_id': item['_id']},
|
||||
{'$set': {'source_keyword': item['source_keyword']}}
|
||||
)
|
||||
updated_in_lang += len(batch)
|
||||
|
||||
print(f" Updated {updated_in_lang} articles in {collection_name}")
|
||||
total_updated += updated_in_lang
|
||||
|
||||
print(f"\n✓ Migration completed!")
|
||||
print(f"✓ Total articles updated across all languages: {total_updated}")
|
||||
|
||||
# Close connection
|
||||
client.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(migrate_article_source_keywords())
|
||||
67
services/news-api/backend/scripts/migrate_outlets.py
Normal file
67
services/news-api/backend/scripts/migrate_outlets.py
Normal file
@ -0,0 +1,67 @@
|
||||
"""
|
||||
Script to migrate outlets data from JSON file to MongoDB
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from motor.motor_asyncio import AsyncIOMotorClient
|
||||
from pathlib import Path
|
||||
|
||||
# MongoDB connection settings
|
||||
MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017")
|
||||
DB_NAME = os.getenv("DB_NAME", "news_api_db")
|
||||
|
||||
async def migrate_outlets():
|
||||
"""Migrate outlets data from JSON to MongoDB"""
|
||||
|
||||
# Connect to MongoDB
|
||||
client = AsyncIOMotorClient(MONGODB_URL)
|
||||
db = client[DB_NAME]
|
||||
collection = db.outlets
|
||||
|
||||
# Load JSON data
|
||||
json_file = Path(__file__).parent.parent / "outlets-extracted.json"
|
||||
with open(json_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Flatten the data structure
|
||||
all_outlets = []
|
||||
for category in ['people', 'topics', 'companies']:
|
||||
if category in data:
|
||||
all_outlets.extend(data[category])
|
||||
|
||||
if not all_outlets:
|
||||
print("No outlets data found in JSON file")
|
||||
return
|
||||
|
||||
# Clear existing data
|
||||
print(f"Clearing existing outlets data...")
|
||||
result = await collection.delete_many({})
|
||||
print(f"Deleted {result.deleted_count} existing outlets")
|
||||
|
||||
# Insert new data
|
||||
print(f"Inserting {len(all_outlets)} outlets...")
|
||||
result = await collection.insert_many(all_outlets)
|
||||
print(f"Inserted {len(result.inserted_ids)} outlets")
|
||||
|
||||
# Create indexes
|
||||
print("Creating indexes...")
|
||||
await collection.create_index("id", unique=True)
|
||||
await collection.create_index("category")
|
||||
print("Indexes created")
|
||||
|
||||
# Verify data
|
||||
count = await collection.count_documents({})
|
||||
print(f"\nVerification: Total outlets in DB: {count}")
|
||||
|
||||
# Show counts by category
|
||||
for category in ['people', 'topics', 'companies']:
|
||||
category_count = await collection.count_documents({"category": category})
|
||||
print(f" - {category}: {category_count}")
|
||||
|
||||
# Close connection
|
||||
client.close()
|
||||
print("\nMigration completed successfully!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(migrate_outlets())
|
||||
124
services/news-api/backend/scripts/migrate_outlets_v2.py
Normal file
124
services/news-api/backend/scripts/migrate_outlets_v2.py
Normal file
@ -0,0 +1,124 @@
|
||||
"""
|
||||
Script to migrate outlets data to new structure with multilingual support
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from motor.motor_asyncio import AsyncIOMotorClient
|
||||
from pathlib import Path
|
||||
|
||||
# MongoDB connection settings
|
||||
MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017")
|
||||
DB_NAME = os.getenv("DB_NAME", "news_api_db")
|
||||
|
||||
# Mapping for name to source_keyword
|
||||
# This maps outlet names to their corresponding article source_keywords
|
||||
# Use Korean names as source_keyword for articles_ko collection
|
||||
# This ensures matching with entities.people/organizations/groups fields
|
||||
|
||||
# Placeholder image for outlets
|
||||
DEFAULT_IMAGE = "https://via.placeholder.com/400x400?text=No+Image"
|
||||
|
||||
async def migrate_outlets_v2():
|
||||
"""Migrate outlets data to new structure with translations"""
|
||||
|
||||
# Connect to MongoDB
|
||||
client = AsyncIOMotorClient(MONGODB_URL)
|
||||
db = client[DB_NAME]
|
||||
collection = db.outlets
|
||||
|
||||
# Load JSON data
|
||||
json_file = Path(__file__).parent.parent / "outlets-extracted.json"
|
||||
with open(json_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Transform data structure
|
||||
all_outlets = []
|
||||
for category in ['people', 'topics', 'companies']:
|
||||
if category in data:
|
||||
for outlet in data[category]:
|
||||
name_ko = outlet.get('name', '')
|
||||
|
||||
# Use Korean name directly as source_keyword
|
||||
# This matches with entities in articles_ko collection
|
||||
source_keyword = name_ko
|
||||
|
||||
# Create new outlet structure (MongoDB will generate _id)
|
||||
new_outlet = {
|
||||
'source_keyword': source_keyword,
|
||||
'category': category,
|
||||
'name_translations': {
|
||||
'ko': name_ko,
|
||||
# Add more languages as needed
|
||||
'en': None,
|
||||
'zh_cn': None,
|
||||
'zh_tw': None,
|
||||
'ja': None,
|
||||
'fr': None,
|
||||
'de': None,
|
||||
'es': None,
|
||||
'it': None
|
||||
},
|
||||
'description_translations': {
|
||||
'ko': f"{name_ko}에 대한 뉴스 및 업데이트",
|
||||
'en': f"News and updates about {name_ko}",
|
||||
'zh_cn': None,
|
||||
'zh_tw': None,
|
||||
'ja': None,
|
||||
'fr': None,
|
||||
'de': None,
|
||||
'es': None,
|
||||
'it': None
|
||||
},
|
||||
'image': DEFAULT_IMAGE,
|
||||
# Keep old fields for backward compatibility
|
||||
'name': name_ko,
|
||||
'description': outlet.get('description', '')
|
||||
}
|
||||
|
||||
all_outlets.append(new_outlet)
|
||||
|
||||
if not all_outlets:
|
||||
print("No outlets data found in JSON file")
|
||||
return
|
||||
|
||||
# Clear existing data
|
||||
print(f"Clearing existing outlets data...")
|
||||
result = await collection.delete_many({})
|
||||
print(f"Deleted {result.deleted_count} existing outlets")
|
||||
|
||||
# Insert new data
|
||||
print(f"Inserting {len(all_outlets)} outlets...")
|
||||
result = await collection.insert_many(all_outlets)
|
||||
print(f"Inserted {len(result.inserted_ids)} outlets")
|
||||
|
||||
# Create indexes
|
||||
print("Creating indexes...")
|
||||
try:
|
||||
await collection.create_index("category")
|
||||
await collection.create_index("source_keyword")
|
||||
print("Indexes created")
|
||||
except Exception as e:
|
||||
print(f"Note: {e}")
|
||||
|
||||
# Verify data
|
||||
count = await collection.count_documents({})
|
||||
print(f"\nVerification: Total outlets in DB: {count}")
|
||||
|
||||
# Show counts by category
|
||||
for category in ['people', 'topics', 'companies']:
|
||||
category_count = await collection.count_documents({"category": category})
|
||||
print(f" - {category}: {category_count}")
|
||||
|
||||
# Close connection
|
||||
client.close()
|
||||
print("\nMigration completed successfully!")
|
||||
print("\nNew structure includes:")
|
||||
print(" ✓ MongoDB _id as unique identifier")
|
||||
print(" ✓ source_keyword for dynamic article queries")
|
||||
print(" ✓ name_translations for multilingual support")
|
||||
print(" ✓ description_translations for multilingual descriptions")
|
||||
print(" ✓ Placeholder images")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(migrate_outlets_v2())
|
||||
Reference in New Issue
Block a user