// Simple article scraping script for testing const fs = require('fs'); // List of articles to scrape for each outlet const outletsAndUrls = { "ala-shaabana": [ "https://www.rootdata.com/news/323625", "https://stealthex.io/blog/bittensor-price-prediction-can-tao-coin-reach-1000/", "https://www.gate.com/learn/articles/understanding-bittensor-protocol/2203" ], "jacob-robert-steeves": [ "https://eng.ambcrypto.com/will-ai-coin-tao-reach-3000-as-its-first-halving-approaches/", "https://news.ssbcrack.com/bittensors-tao-coin-can-it-really-hit-3000-amid-upcoming-halving-and-ai-buzz/", "https://usethebitcoin.com/crypto-personalities/all-you-need-to-know-about-jacob-robert-steeves-the-co-founder-of-bittensor/" ] }; async function scrapeArticle(url) { try { const response = await fetch(url); const html = await response.text(); // Extract title const titleMatch = html.match(/]*>([^<]+)<\/title>/i); let title = titleMatch ? titleMatch[1].trim() : `Article from ${new URL(url).hostname}`; title = title.replace(/\s+/g, ' ').substring(0, 200); // Extract meta description const metaDescMatch = html.match(/]*name=["\']description["\'][^>]*content=["\']([^"']+)["\'][^>]*>/i); let summary = metaDescMatch ? metaDescMatch[1].trim() : `Article scraped from ${new URL(url).hostname}`; summary = summary.replace(/\s+/g, ' ').substring(0, 500); // Create basic body content let body = `This article was originally published at ${url}.\n\n${summary}`; if (body.length < 200) { body += `\n\nThis content provides insights and analysis on current industry developments and trends.`; } return { title, summary, body, url }; } catch (error) { console.error(`Error scraping ${url}:`, error.message); return null; } } async function addArticleToStorage(outletId, articleData) { try { const response = await fetch('http://localhost:5000/api/articles', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ outletId, title: articleData.title, summary: articleData.summary, body: articleData.body, thumbnail: '/api/assets/default-article.png', publishedAt: new Date().toISOString(), tags: [], viewCount: 0 }) }); if (response.ok) { const result = await response.json(); console.log(`āœ“ Added article: ${articleData.title}`); return result; } else { console.error(`āœ— Failed to add article: ${response.status}`); return null; } } catch (error) { console.error(`āœ— Error adding article:`, error.message); return null; } } async function main() { console.log('Starting article scraping...\n'); for (const [outletId, urls] of Object.entries(outletsAndUrls)) { console.log(`\n=== Scraping articles for ${outletId} ===`); for (const url of urls.slice(0, 3)) { // Limit to 3 articles per outlet console.log(`Scraping: ${url}`); const articleData = await scrapeArticle(url); if (articleData) { await addArticleToStorage(outletId, articleData); } // Small delay to be respectful await new Promise(resolve => setTimeout(resolve, 1000)); } } console.log('\nāœ“ Scraping completed!'); } main().catch(console.error);