feat: Refactor outlets with multilingual support and dynamic queries

- Replace static articles array with dynamic source_keyword queries
- Use MongoDB _id as unique identifier for outlets
- Add multilingual translations (9 languages: ko, en, zh_cn, zh_tw, ja, fr, de, es, it)
- Add OutletService for database operations
- Add outlet migration script with Korean source_keyword matching
- Remove JSON file-based outlet loading
- Add /outlets/{outlet_id}/articles endpoint for dynamic article retrieval

This resolves the design issues with:
1. Static articles array requiring constant updates
2. Lack of multilingual support for outlet names/descriptions
3. Broken image URLs
4. Korean entity matching for article queries

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
jungwoo choi
2025-10-13 16:52:34 +09:00
parent deb52e51f2
commit e467e76d02
6 changed files with 515 additions and 28 deletions

View File

@ -0,0 +1,124 @@
"""
Script to migrate outlets data to new structure with multilingual support
"""
import asyncio
import json
import os
from motor.motor_asyncio import AsyncIOMotorClient
from pathlib import Path
# MongoDB connection settings
MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017")
DB_NAME = os.getenv("DB_NAME", "news_api_db")
# Mapping for name to source_keyword
# This maps outlet names to their corresponding article source_keywords
# Use Korean names as source_keyword for articles_ko collection
# This ensures matching with entities.people/organizations/groups fields
# Placeholder image for outlets
DEFAULT_IMAGE = "https://via.placeholder.com/400x400?text=No+Image"
async def migrate_outlets_v2():
"""Migrate outlets data to new structure with translations"""
# Connect to MongoDB
client = AsyncIOMotorClient(MONGODB_URL)
db = client[DB_NAME]
collection = db.outlets
# Load JSON data
json_file = Path(__file__).parent.parent / "outlets-extracted.json"
with open(json_file, 'r', encoding='utf-8') as f:
data = json.load(f)
# Transform data structure
all_outlets = []
for category in ['people', 'topics', 'companies']:
if category in data:
for outlet in data[category]:
name_ko = outlet.get('name', '')
# Use Korean name directly as source_keyword
# This matches with entities in articles_ko collection
source_keyword = name_ko
# Create new outlet structure (MongoDB will generate _id)
new_outlet = {
'source_keyword': source_keyword,
'category': category,
'name_translations': {
'ko': name_ko,
# Add more languages as needed
'en': None,
'zh_cn': None,
'zh_tw': None,
'ja': None,
'fr': None,
'de': None,
'es': None,
'it': None
},
'description_translations': {
'ko': f"{name_ko}에 대한 뉴스 및 업데이트",
'en': f"News and updates about {name_ko}",
'zh_cn': None,
'zh_tw': None,
'ja': None,
'fr': None,
'de': None,
'es': None,
'it': None
},
'image': DEFAULT_IMAGE,
# Keep old fields for backward compatibility
'name': name_ko,
'description': outlet.get('description', '')
}
all_outlets.append(new_outlet)
if not all_outlets:
print("No outlets data found in JSON file")
return
# Clear existing data
print(f"Clearing existing outlets data...")
result = await collection.delete_many({})
print(f"Deleted {result.deleted_count} existing outlets")
# Insert new data
print(f"Inserting {len(all_outlets)} outlets...")
result = await collection.insert_many(all_outlets)
print(f"Inserted {len(result.inserted_ids)} outlets")
# Create indexes
print("Creating indexes...")
try:
await collection.create_index("category")
await collection.create_index("source_keyword")
print("Indexes created")
except Exception as e:
print(f"Note: {e}")
# Verify data
count = await collection.count_documents({})
print(f"\nVerification: Total outlets in DB: {count}")
# Show counts by category
for category in ['people', 'topics', 'companies']:
category_count = await collection.count_documents({"category": category})
print(f" - {category}: {category_count}")
# Close connection
client.close()
print("\nMigration completed successfully!")
print("\nNew structure includes:")
print(" ✓ MongoDB _id as unique identifier")
print(" ✓ source_keyword for dynamic article queries")
print(" ✓ name_translations for multilingual support")
print(" ✓ description_translations for multilingual descriptions")
print(" ✓ Placeholder images")
if __name__ == "__main__":
asyncio.run(migrate_outlets_v2())