""" Script to migrate outlets data to new structure with multilingual support """ import asyncio import json import os from motor.motor_asyncio import AsyncIOMotorClient from pathlib import Path # MongoDB connection settings MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017") DB_NAME = os.getenv("DB_NAME", "news_api_db") # Mapping for name to source_keyword # This maps outlet names to their corresponding article source_keywords # Use Korean names as source_keyword for articles_ko collection # This ensures matching with entities.people/organizations/groups fields # Placeholder image for outlets DEFAULT_IMAGE = "https://via.placeholder.com/400x400?text=No+Image" async def migrate_outlets_v2(): """Migrate outlets data to new structure with translations""" # Connect to MongoDB client = AsyncIOMotorClient(MONGODB_URL) db = client[DB_NAME] collection = db.outlets # Load JSON data json_file = Path(__file__).parent.parent / "outlets-extracted.json" with open(json_file, 'r', encoding='utf-8') as f: data = json.load(f) # Transform data structure all_outlets = [] for category in ['people', 'topics', 'companies']: if category in data: for outlet in data[category]: name_ko = outlet.get('name', '') # Use Korean name directly as source_keyword # This matches with entities in articles_ko collection source_keyword = name_ko # Create new outlet structure (MongoDB will generate _id) new_outlet = { 'source_keyword': source_keyword, 'category': category, 'name_translations': { 'ko': name_ko, # Add more languages as needed 'en': None, 'zh_cn': None, 'zh_tw': None, 'ja': None, 'fr': None, 'de': None, 'es': None, 'it': None }, 'description_translations': { 'ko': f"{name_ko}에 대한 뉴스 및 업데이트", 'en': f"News and updates about {name_ko}", 'zh_cn': None, 'zh_tw': None, 'ja': None, 'fr': None, 'de': None, 'es': None, 'it': None }, 'image': DEFAULT_IMAGE, # Keep old fields for backward compatibility 'name': name_ko, 'description': outlet.get('description', '') } all_outlets.append(new_outlet) if not all_outlets: print("No outlets data found in JSON file") return # Clear existing data print(f"Clearing existing outlets data...") result = await collection.delete_many({}) print(f"Deleted {result.deleted_count} existing outlets") # Insert new data print(f"Inserting {len(all_outlets)} outlets...") result = await collection.insert_many(all_outlets) print(f"Inserted {len(result.inserted_ids)} outlets") # Create indexes print("Creating indexes...") try: await collection.create_index("category") await collection.create_index("source_keyword") print("Indexes created") except Exception as e: print(f"Note: {e}") # Verify data count = await collection.count_documents({}) print(f"\nVerification: Total outlets in DB: {count}") # Show counts by category for category in ['people', 'topics', 'companies']: category_count = await collection.count_documents({"category": category}) print(f" - {category}: {category_count}") # Close connection client.close() print("\nMigration completed successfully!") print("\nNew structure includes:") print(" ✓ MongoDB _id as unique identifier") print(" ✓ source_keyword for dynamic article queries") print(" ✓ name_translations for multilingual support") print(" ✓ description_translations for multilingual descriptions") print(" ✓ Placeholder images") if __name__ == "__main__": asyncio.run(migrate_outlets_v2())