// Extended article scraping for more outlets const outletsAndUrls = { "joseph-jacks": [ "https://www.prnewswire.com/news-releases/tao-synergies-welcomes-top-bittensor-tao-leader-as-advisor-for-ai-focused-crypto-treasury-strategy-302538426.html", "https://www.ainvest.com/news/joseph-jacks-joins-tao-synergies-advisor-ai-focused-crypto-treasury-strategy-2508/", "https://fintech.global/2025/08/01/comp-ai-secures-2-6m-to-transform-soc-2-compliance/", "https://fox4kc.com/business/press-releases/ein-presswire/842930120/cossa-launches-as-the-definitive-organization-for-the-26b-commercial-open-source-market" ], "robert-myers": [ "https://www.proofoftalk.io/speakers/robert-myers", "https://coinfomania.com/singularitynet-ceo-ben-goertzel-teams-up-with-fetch-ais-humayun-sheikh-to-explore-decentralized-artificial-intelligence-at-proof-of-talk-2025/", "https://www.theblock.co/post/353065/bittensor-tao-token-crypto-investors" ], "alt-coin": [ "https://cryptopotato.com/bitcoin-joins-the-altcoin-bloodbath-with-a-sudden-flash-crash-to-112k/", "https://www.ainvest.com/news/michael-saylor-strategic-move-september-crypto-presale-outperform-bitcoin-2509/", "https://coinpaper.com/11193/5-signs-the-2025-altseason-could-be-bigger-than-ever", "https://cryptodnes.bg/en/analyst-says-2025-altcoin-rally-could-mirror-past-surges-here-is-why/" ], "stable-coin": [ "https://www.coindesk.com/markets/2025/09/19/u-s-stablecoin-battle-could-be-zero-sum-game-jpmorgan", "https://www.theblock.co/post/370543/tethers-hedge-and-expand-us-strategy-puts-circle-on-defense-in-market-shake-up-tests-oversight-versus-privacy", "https://www.dlnews.com/articles/markets/tether-faces-uphill-battle-launching-usat-stablecoin-in-us/" ], "bittensor": [ "https://eng.ambcrypto.com/will-ai-coin-tao-reach-3000-as-its-first-halving-approaches/", "https://news.ssbcrack.com/bittensors-tao-coin-can-it-really-hit-3000-amid-upcoming-halving-and-ai-buzz/", "https://www.chainup.com/market-update/bittensor-the-ai-alpha/" ] }; async function scrapeArticle(url) { try { const response = await fetch(url); const html = await response.text(); // Extract title const titleMatch = html.match(/]*>([^<]+)<\/title>/i); let title = titleMatch ? titleMatch[1].trim() : `Article from ${new URL(url).hostname}`; title = title.replace(/\s+/g, ' ').substring(0, 200); // Extract meta description const metaDescMatch = html.match(/]*name=["\']description["\'][^>]*content=["\']([^"']+)["\'][^>]*>/i); let summary = metaDescMatch ? metaDescMatch[1].trim() : `Article scraped from ${new URL(url).hostname}`; summary = summary.replace(/\s+/g, ' ').substring(0, 500); // Create basic body content let body = `This article was originally published at ${url}.\n\n${summary}`; if (body.length < 200) { body += `\n\nThis content provides insights and analysis on current industry developments and trends in the cryptocurrency and blockchain space.`; } return { title, summary, body, url }; } catch (error) { console.error(`Error scraping ${url}:`, error.message); return null; } } async function addArticleToStorage(outletId, articleData) { try { const response = await fetch('http://localhost:5000/api/articles', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ outletId, title: articleData.title, summary: articleData.summary, body: articleData.body, thumbnail: '/api/assets/default-article.png', publishedAt: new Date().toISOString(), tags: [], viewCount: 0 }) }); if (response.ok) { const result = await response.json(); console.log(`āœ“ Added article: ${articleData.title.substring(0, 80)}...`); return result; } else { console.error(`āœ— Failed to add article: ${response.status}`); return null; } } catch (error) { console.error(`āœ— Error adding article:`, error.message); return null; } } async function main() { console.log('Starting extended article scraping...\n'); for (const [outletId, urls] of Object.entries(outletsAndUrls)) { console.log(`\n=== Scraping articles for ${outletId} ===`); for (const url of urls.slice(0, 3)) { // Limit to 3 articles per outlet console.log(`Scraping: ${url}`); const articleData = await scrapeArticle(url); if (articleData) { await addArticleToStorage(outletId, articleData); } // Small delay to be respectful await new Promise(resolve => setTimeout(resolve, 800)); } } console.log('\nāœ“ Extended scraping completed!'); } main().catch(console.error);