/** * News-API Client * * Handles communication with news-api MongoDB and data transformation * to sapiens-mobile schema format. */ import { MongoClient, Db, ObjectId } from 'mongodb'; import fs from 'fs'; import path from 'path'; const MONGODB_URL = process.env.MONGODB_URL || 'mongodb://localhost:27017'; const DB_NAME = 'ai_writer_db'; // Cache for outlets data (loaded from outlets-extracted.json) let outlets: { people: any[]; topics: any[]; companies: any[]; } | null = null; // MongoDB client let client: MongoClient | null = null; let db: Db | null = null; /** * Initialize MongoDB connection */ export async function connectToNewsAPI(): Promise { if (client) return; // Already connected try { client = new MongoClient(MONGODB_URL); await client.connect(); db = client.db(DB_NAME); console.log(`Connected to news-api MongoDB: ${DB_NAME}`); } catch (error) { console.error('Failed to connect to news-api MongoDB:', error); throw error; } } /** * Load outlets from outlets-extracted.json */ export function loadOutlets(): any { if (outlets) return outlets; try { const outletsPath = path.resolve(process.cwd(), 'outlets-extracted.json'); const data = fs.readFileSync(outletsPath, 'utf-8'); outlets = JSON.parse(data); console.log(`Loaded outlets: ${outlets!.people.length} people, ${outlets!.topics.length} topics, ${outlets!.companies.length} companies`); return outlets; } catch (error) { console.error('Failed to load outlets from outlets-extracted.json:', error); throw error; } } // Multi-language translations for outlet names const OUTLET_TRANSLATIONS: Record> = { '도널드-트럼프': { en: 'Donald Trump', ja: 'ドナルド・トランプ', zh_cn: '唐纳德·特朗普', zh_tw: '唐納德·川普', de: 'Donald Trump', fr: 'Donald Trump', es: 'Donald Trump', it: 'Donald Trump' }, '온유': { en: 'Onew', ja: 'オンユ', zh_cn: '温流', zh_tw: '溫流', de: 'Onew', fr: 'Onew', es: 'Onew', it: 'Onew' }, '사토시-나카모토': { en: 'Satoshi Nakamoto', ja: 'サトシ・ナカモト', zh_cn: '中本聪', zh_tw: '中本聰', de: 'Satoshi Nakamoto', fr: 'Satoshi Nakamoto', es: 'Satoshi Nakamoto', it: 'Satoshi Nakamoto' }, '일론-머스크': { en: 'Elon Musk', ja: 'イーロン・マスク', zh_cn: '埃隆·马斯克', zh_tw: '伊隆·馬斯克', de: 'Elon Musk', fr: 'Elon Musk', es: 'Elon Musk', it: 'Elon Musk' }, '매기-강': { en: 'Maggie Kang', ja: 'マギー・カン', zh_cn: '玛吉·姜', zh_tw: '瑪姬·姜', de: 'Maggie Kang', fr: 'Maggie Kang', es: 'Maggie Kang', it: 'Maggie Kang' }, '제롬-파월': { en: 'Jerome Powell', ja: 'ジェローム・パウエル', zh_cn: '杰罗姆·鲍威尔', zh_tw: '傑羅姆·鮑威爾', de: 'Jerome Powell', fr: 'Jerome Powell', es: 'Jerome Powell', it: 'Jerome Powell' }, '블라디미르-푸틴': { en: 'Vladimir Putin', ja: 'ウラジーミル・プーチン', zh_cn: '弗拉基米尔·普京', zh_tw: '弗拉基米爾·普丁', de: 'Wladimir Putin', fr: 'Vladimir Poutine', es: 'Vladímir Putin', it: 'Vladimir Putin' }, '조-바이든': { en: 'Joe Biden', ja: 'ジョー・バイデン', zh_cn: '乔·拜登', zh_tw: '喬·拜登', de: 'Joe Biden', fr: 'Joe Biden', es: 'Joe Biden', it: 'Joe Biden' }, '블랙핑크': { en: 'BLACKPINK', ja: 'ブラックピンク', zh_cn: 'BLACKPINK', zh_tw: 'BLACKPINK', de: 'BLACKPINK', fr: 'BLACKPINK', es: 'BLACKPINK', it: 'BLACKPINK' }, '구글': { en: 'Google', ja: 'グーグル', zh_cn: '谷歌', zh_tw: '谷歌', de: 'Google', fr: 'Google', es: 'Google', it: 'Google' }, '마이크로소프트': { en: 'Microsoft', ja: 'マイクロソフト', zh_cn: '微软', zh_tw: '微軟', de: 'Microsoft', fr: 'Microsoft', es: 'Microsoft', it: 'Microsoft' }, '넷플릭스': { en: 'Netflix', ja: 'ネットフリックス', zh_cn: '奈飞', zh_tw: 'Netflix', de: 'Netflix', fr: 'Netflix', es: 'Netflix', it: 'Netflix' }, '메타': { en: 'Meta', ja: 'メタ', zh_cn: 'Meta', zh_tw: 'Meta', de: 'Meta', fr: 'Meta', es: 'Meta', it: 'Meta' }, '삼성전자': { en: 'Samsung Electronics', ja: 'サムスン電子', zh_cn: '三星电子', zh_tw: '三星電子', de: 'Samsung Electronics', fr: 'Samsung Electronics', es: 'Samsung Electronics', it: 'Samsung Electronics' }, '아마존': { en: 'Amazon', ja: 'アマゾン', zh_cn: '亚马逊', zh_tw: '亞馬遜', de: 'Amazon', fr: 'Amazon', es: 'Amazon', it: 'Amazon' }, '샤이니': { en: 'SHINee', ja: 'シャイニー', zh_cn: 'SHINee', zh_tw: 'SHINee', de: 'SHINee', fr: 'SHINee', es: 'SHINee', it: 'SHINee' } }; /** * Translate outlet name and description based on language */ function translateOutlet(outlet: any, language: string): any { // If Korean, return original if (language === 'ko') { return outlet; } // Check if we have a translation for this outlet ID let displayName = outlet.name; if (OUTLET_TRANSLATIONS[outlet.id] && OUTLET_TRANSLATIONS[outlet.id][language]) { displayName = OUTLET_TRANSLATIONS[outlet.id][language]; } else if (/[가-힣]/.test(outlet.name)) { // Fallback: If name contains Korean characters but no translation, // keep the Korean name as is displayName = outlet.name; } // Translate description pattern const descriptionTranslations: Record = { 'en': `News and updates about ${displayName}`, 'ja': `${displayName}に関するニュースと最新情報`, 'zh_cn': `关于${displayName}的新闻和更新`, 'zh_tw': `關於${displayName}的新聞和更新`, 'de': `Nachrichten und Updates über ${displayName}`, 'fr': `Actualités et mises à jour sur ${displayName}`, 'es': `Noticias y actualizaciones sobre ${displayName}`, 'it': `Notizie e aggiornamenti su ${displayName}` }; return { ...outlet, name: displayName, description: descriptionTranslations[language] || outlet.description }; } /** * Get all outlets or by category */ export function getOutlets(category?: string, language = 'ko'): any[] { const allOutlets = loadOutlets(); // Add focusSubject to each outlet (using name as focusSubject) const addFocusSubject = (outlets: any[]) => outlets.map(outlet => { const translated = translateOutlet(outlet, language); return { ...translated, focusSubject: translated.name || translated.id, avatar: translated.image }; }); if (!category) { return [ ...addFocusSubject(allOutlets.people), ...addFocusSubject(allOutlets.topics), ...addFocusSubject(allOutlets.companies) ]; } switch (category) { case 'people': return addFocusSubject(allOutlets.people); case 'topics': return addFocusSubject(allOutlets.topics); case 'companies': return addFocusSubject(allOutlets.companies); default: return []; } } /** * Get outlet by ID */ export function getOutletById(id: string, language = 'ko'): any | null { const allOutlets = getOutlets(undefined, language); const outlet = allOutlets.find(outlet => outlet.id === id); if (!outlet) return null; // Add focusSubject and avatar if not present return { ...outlet, focusSubject: outlet.focusSubject || outlet.name || outlet.id, avatar: outlet.avatar || outlet.image }; } /** * Get articles for an outlet */ export async function getArticlesByOutlet(outletId: string, limit = 50, language = 'en'): Promise { await connectToNewsAPI(); if (!db) throw new Error('Database not connected'); const outlet = getOutletById(outletId); if (!outlet) return []; const articleIds = outlet.articles.slice(0, limit).map((id: string) => new ObjectId(id)); // First, get news_ids from English collection const enCollection = db.collection('articles_en'); const enArticles = await enCollection.find({ _id: { $in: articleIds } }, { projection: { news_id: 1, _id: 1 } }).toArray(); const newsIds = enArticles.map((a: any) => a.news_id).filter(Boolean); if (newsIds.length === 0) return []; // Then get articles from target language collection using news_ids const collectionName = `articles_${language}`; const collection = db.collection(collectionName); const articles = await collection.find({ news_id: { $in: newsIds } }).toArray(); // Create a map from news_id to both article and English ID const newsIdToData = new Map(articles.map((a: any) => { const enArticle = enArticles.find(en => en.news_id === a.news_id); return [a.news_id, { article: a, englishId: enArticle?._id.toString() }]; })); // Sort articles in the same order as outlet.articles const sortedArticles = enArticles .map((en: any) => { const data = newsIdToData.get(en.news_id); return data ? { ...data.article, _englishId: data.englishId } : null; }) .filter(Boolean); return sortedArticles.map(a => transformArticle(a, a._englishId)); } /** * Get article by news_id (preferred for cross-language support) */ export async function getArticleByNewsId(newsId: string, language = 'en'): Promise { await connectToNewsAPI(); if (!db) throw new Error('Database not connected'); console.log(`[newsapi-client.getArticleByNewsId] newsId=${newsId}, language=${language}`); const collectionName = `articles_${language}`; const collection = db.collection(collectionName); const article = await collection.findOne({ news_id: newsId }); if (!article) { console.log(`[newsapi-client.getArticleByNewsId] Article not found in ${collectionName}`); return null; } // Get English article ID for outlet lookup const enCollection = db.collection('articles_en'); const enArticle = await enCollection.findOne( { news_id: newsId }, { projection: { _id: 1 } } ); console.log(`[newsapi-client.getArticleByNewsId] Found article in ${collectionName}: ${article.title}`); return transformArticle(article, enArticle?._id.toString()); } /** * Get article by ID (for backward compatibility) */ export async function getArticleById(id: string, language = 'en'): Promise { await connectToNewsAPI(); if (!db) throw new Error('Database not connected'); console.log(`[newsapi-client.getArticleById] id=${id}, language=${language}`); // First, try to find the article directly in the requested language collection const collectionName = `articles_${language}`; const collection = db.collection(collectionName); let article = await collection.findOne({ _id: new ObjectId(id) }); if (article) { console.log(`[newsapi-client.getArticleById] Found article directly in ${collectionName}: ${article.title}`); return transformArticle(article, id); // Pass the ID as englishArticleId } // If not found, the ID might be from English collection // Try to find it in English collection and get its news_id const enCollection = db.collection('articles_en'); const enArticle = await enCollection.findOne( { _id: new ObjectId(id) }, { projection: { news_id: 1 } } ); console.log(`[newsapi-client.getArticleById] Checked English collection, news_id: ${enArticle?.news_id}`); if (!enArticle || !enArticle.news_id) { console.log(`[newsapi-client.getArticleById] Article not found in any collection`); return null; } // If requesting English, get it from English collection if (language === 'en') { const enFullArticle = await enCollection.findOne({ _id: new ObjectId(id) }); if (!enFullArticle) return null; console.log(`[newsapi-client.getArticleById] Returning English article: ${enFullArticle.title}`); return transformArticle(enFullArticle); } // For other languages, get article using news_id console.log(`[newsapi-client.getArticleById] Querying ${collectionName} with news_id: ${enArticle.news_id}`); article = await collection.findOne({ news_id: enArticle.news_id }); if (!article) { console.log(`[newsapi-client.getArticleById] No article found in ${collectionName} with news_id ${enArticle.news_id}`); return null; } console.log(`[newsapi-client.getArticleById] Found article in ${collectionName}: ${article.title}`); return transformArticle(article, id); // Pass the English ID } /** * Search articles */ export async function searchArticles(query: string, limit = 20, language = 'en'): Promise { await connectToNewsAPI(); if (!db) throw new Error('Database not connected'); const collectionName = `articles_${language}`; const collection = db.collection(collectionName); const articles = await collection.find({ $or: [ { title: { $regex: query, $options: 'i' } }, { summary: { $regex: query, $options: 'i' } }, { body: { $regex: query, $options: 'i' } } ] }).limit(limit).toArray(); return articles.map(transformArticle); } /** * Transform news-api article to sapiens-mobile format */ function transformArticle(article: any, englishArticleId?: string): any { // Find which outlet this article belongs to // Use englishArticleId if provided (for non-English articles), otherwise use current article's _id const allOutlets = getOutlets(); const articleIdStr = englishArticleId || article._id.toString(); const outlet = allOutlets.find(o => o.articles.includes(articleIdStr)); // Extract the first image or use default const images = article.images || []; const thumbnail = images.length > 0 ? images[0] : '/api/assets/default-article.png'; // Format time ago const publishedAt = article.created_at || new Date(); const now = new Date(); const diffInMinutes = Math.floor((now.getTime() - new Date(publishedAt).getTime()) / 60000); const clampedMinutes = Math.max(1, Math.min(59, diffInMinutes)); // Ensure tags is always an array of strings let tags: string[] = []; if (article.subtopics) { if (Array.isArray(article.subtopics)) { tags = article.subtopics .map((t: any) => { if (typeof t === 'string') return t; if (t && typeof t === 'object' && t.title) return t.title; return null; }) .filter((t: any) => t !== null); } else if (typeof article.subtopics === 'string') { tags = [article.subtopics]; } } return { id: article._id.toString(), newsId: article.news_id || article._id.toString(), // Add news_id for cross-language navigation title: article.title || 'Untitled', summary: article.summary || '', body: article.body || article.summary || '', thumbnail, publishedAt: publishedAt, timeAgo: `${clampedMinutes} min ago`, outletId: outlet?.id || 'unknown', outletName: outlet?.name || 'Unknown', tags, subtopics: article.subtopics || [], viewCount: 0, category: outlet?.category || 'topics' }; } /** * Close MongoDB connection */ export async function closeNewsAPIConnection(): Promise { if (client) { await client.close(); client = null; db = null; console.log('Closed news-api MongoDB connection'); } }