- Replace static articles array with dynamic source_keyword queries
- Use MongoDB _id as unique identifier for outlets
- Add multilingual translations (9 languages: ko, en, zh_cn, zh_tw, ja, fr, de, es, it)
- Add OutletService for database operations
- Add outlet migration script with Korean source_keyword matching
- Remove JSON file-based outlet loading
- Add /outlets/{outlet_id}/articles endpoint for dynamic article retrieval
This resolves the design issues with:
1. Static articles array requiring constant updates
2. Lack of multilingual support for outlet names/descriptions
3. Broken image URLs
4. Korean entity matching for article queries
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
68 lines
2.0 KiB
Python
68 lines
2.0 KiB
Python
"""
|
|
Script to migrate outlets data from JSON file to MongoDB
|
|
"""
|
|
import asyncio
|
|
import json
|
|
import os
|
|
from motor.motor_asyncio import AsyncIOMotorClient
|
|
from pathlib import Path
|
|
|
|
# MongoDB connection settings
|
|
MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017")
|
|
DB_NAME = os.getenv("DB_NAME", "news_api_db")
|
|
|
|
async def migrate_outlets():
|
|
"""Migrate outlets data from JSON to MongoDB"""
|
|
|
|
# Connect to MongoDB
|
|
client = AsyncIOMotorClient(MONGODB_URL)
|
|
db = client[DB_NAME]
|
|
collection = db.outlets
|
|
|
|
# Load JSON data
|
|
json_file = Path(__file__).parent.parent / "outlets-extracted.json"
|
|
with open(json_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
# Flatten the data structure
|
|
all_outlets = []
|
|
for category in ['people', 'topics', 'companies']:
|
|
if category in data:
|
|
all_outlets.extend(data[category])
|
|
|
|
if not all_outlets:
|
|
print("No outlets data found in JSON file")
|
|
return
|
|
|
|
# Clear existing data
|
|
print(f"Clearing existing outlets data...")
|
|
result = await collection.delete_many({})
|
|
print(f"Deleted {result.deleted_count} existing outlets")
|
|
|
|
# Insert new data
|
|
print(f"Inserting {len(all_outlets)} outlets...")
|
|
result = await collection.insert_many(all_outlets)
|
|
print(f"Inserted {len(result.inserted_ids)} outlets")
|
|
|
|
# Create indexes
|
|
print("Creating indexes...")
|
|
await collection.create_index("id", unique=True)
|
|
await collection.create_index("category")
|
|
print("Indexes created")
|
|
|
|
# Verify data
|
|
count = await collection.count_documents({})
|
|
print(f"\nVerification: Total outlets in DB: {count}")
|
|
|
|
# Show counts by category
|
|
for category in ['people', 'topics', 'companies']:
|
|
category_count = await collection.count_documents({"category": category})
|
|
print(f" - {category}: {category_count}")
|
|
|
|
# Close connection
|
|
client.close()
|
|
print("\nMigration completed successfully!")
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(migrate_outlets())
|