""" Script to migrate outlets data from JSON file to MongoDB """ import asyncio import json import os from motor.motor_asyncio import AsyncIOMotorClient from pathlib import Path # MongoDB connection settings MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://localhost:27017") DB_NAME = os.getenv("DB_NAME", "news_api_db") async def migrate_outlets(): """Migrate outlets data from JSON to MongoDB""" # Connect to MongoDB client = AsyncIOMotorClient(MONGODB_URL) db = client[DB_NAME] collection = db.outlets # Load JSON data json_file = Path(__file__).parent.parent / "outlets-extracted.json" with open(json_file, 'r', encoding='utf-8') as f: data = json.load(f) # Flatten the data structure all_outlets = [] for category in ['people', 'topics', 'companies']: if category in data: all_outlets.extend(data[category]) if not all_outlets: print("No outlets data found in JSON file") return # Clear existing data print(f"Clearing existing outlets data...") result = await collection.delete_many({}) print(f"Deleted {result.deleted_count} existing outlets") # Insert new data print(f"Inserting {len(all_outlets)} outlets...") result = await collection.insert_many(all_outlets) print(f"Inserted {len(result.inserted_ids)} outlets") # Create indexes print("Creating indexes...") await collection.create_index("id", unique=True) await collection.create_index("category") print("Indexes created") # Verify data count = await collection.count_documents({}) print(f"\nVerification: Total outlets in DB: {count}") # Show counts by category for category in ['people', 'topics', 'companies']: category_count = await collection.count_documents({"category": category}) print(f" - {category}: {category_count}") # Close connection client.close() print("\nMigration completed successfully!") if __name__ == "__main__": asyncio.run(migrate_outlets())