feat: Refactor outlets with multilingual support and dynamic queries
- Replace static articles array with dynamic source_keyword queries
- Use MongoDB _id as unique identifier for outlets
- Add multilingual translations (9 languages: ko, en, zh_cn, zh_tw, ja, fr, de, es, it)
- Add OutletService for database operations
- Add outlet migration script with Korean source_keyword matching
- Remove JSON file-based outlet loading
- Add /outlets/{outlet_id}/articles endpoint for dynamic article retrieval
This resolves the design issues with:
1. Static articles array requiring constant updates
2. Lack of multilingual support for outlet names/descriptions
3. Broken image URLs
4. Korean entity matching for article queries
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
124
services/news-api/backend/scripts/migrate_outlets_v2.py
Normal file
124
services/news-api/backend/scripts/migrate_outlets_v2.py
Normal file
@ -0,0 +1,124 @@
|
||||
"""
|
||||
Script to migrate outlets data to new structure with multilingual support
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from motor.motor_asyncio import AsyncIOMotorClient
|
||||
from pathlib import Path
|
||||
|
||||
# MongoDB connection settings
|
||||
MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017")
|
||||
DB_NAME = os.getenv("DB_NAME", "news_api_db")
|
||||
|
||||
# Mapping for name to source_keyword
|
||||
# This maps outlet names to their corresponding article source_keywords
|
||||
# Use Korean names as source_keyword for articles_ko collection
|
||||
# This ensures matching with entities.people/organizations/groups fields
|
||||
|
||||
# Placeholder image for outlets
|
||||
DEFAULT_IMAGE = "https://via.placeholder.com/400x400?text=No+Image"
|
||||
|
||||
async def migrate_outlets_v2():
|
||||
"""Migrate outlets data to new structure with translations"""
|
||||
|
||||
# Connect to MongoDB
|
||||
client = AsyncIOMotorClient(MONGODB_URL)
|
||||
db = client[DB_NAME]
|
||||
collection = db.outlets
|
||||
|
||||
# Load JSON data
|
||||
json_file = Path(__file__).parent.parent / "outlets-extracted.json"
|
||||
with open(json_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Transform data structure
|
||||
all_outlets = []
|
||||
for category in ['people', 'topics', 'companies']:
|
||||
if category in data:
|
||||
for outlet in data[category]:
|
||||
name_ko = outlet.get('name', '')
|
||||
|
||||
# Use Korean name directly as source_keyword
|
||||
# This matches with entities in articles_ko collection
|
||||
source_keyword = name_ko
|
||||
|
||||
# Create new outlet structure (MongoDB will generate _id)
|
||||
new_outlet = {
|
||||
'source_keyword': source_keyword,
|
||||
'category': category,
|
||||
'name_translations': {
|
||||
'ko': name_ko,
|
||||
# Add more languages as needed
|
||||
'en': None,
|
||||
'zh_cn': None,
|
||||
'zh_tw': None,
|
||||
'ja': None,
|
||||
'fr': None,
|
||||
'de': None,
|
||||
'es': None,
|
||||
'it': None
|
||||
},
|
||||
'description_translations': {
|
||||
'ko': f"{name_ko}에 대한 뉴스 및 업데이트",
|
||||
'en': f"News and updates about {name_ko}",
|
||||
'zh_cn': None,
|
||||
'zh_tw': None,
|
||||
'ja': None,
|
||||
'fr': None,
|
||||
'de': None,
|
||||
'es': None,
|
||||
'it': None
|
||||
},
|
||||
'image': DEFAULT_IMAGE,
|
||||
# Keep old fields for backward compatibility
|
||||
'name': name_ko,
|
||||
'description': outlet.get('description', '')
|
||||
}
|
||||
|
||||
all_outlets.append(new_outlet)
|
||||
|
||||
if not all_outlets:
|
||||
print("No outlets data found in JSON file")
|
||||
return
|
||||
|
||||
# Clear existing data
|
||||
print(f"Clearing existing outlets data...")
|
||||
result = await collection.delete_many({})
|
||||
print(f"Deleted {result.deleted_count} existing outlets")
|
||||
|
||||
# Insert new data
|
||||
print(f"Inserting {len(all_outlets)} outlets...")
|
||||
result = await collection.insert_many(all_outlets)
|
||||
print(f"Inserted {len(result.inserted_ids)} outlets")
|
||||
|
||||
# Create indexes
|
||||
print("Creating indexes...")
|
||||
try:
|
||||
await collection.create_index("category")
|
||||
await collection.create_index("source_keyword")
|
||||
print("Indexes created")
|
||||
except Exception as e:
|
||||
print(f"Note: {e}")
|
||||
|
||||
# Verify data
|
||||
count = await collection.count_documents({})
|
||||
print(f"\nVerification: Total outlets in DB: {count}")
|
||||
|
||||
# Show counts by category
|
||||
for category in ['people', 'topics', 'companies']:
|
||||
category_count = await collection.count_documents({"category": category})
|
||||
print(f" - {category}: {category_count}")
|
||||
|
||||
# Close connection
|
||||
client.close()
|
||||
print("\nMigration completed successfully!")
|
||||
print("\nNew structure includes:")
|
||||
print(" ✓ MongoDB _id as unique identifier")
|
||||
print(" ✓ source_keyword for dynamic article queries")
|
||||
print(" ✓ name_translations for multilingual support")
|
||||
print(" ✓ description_translations for multilingual descriptions")
|
||||
print(" ✓ Placeholder images")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(migrate_outlets_v2())
|
||||
Reference in New Issue
Block a user