React Native mobile application for SAPIENS news platform. Consolidated all previous history into single commit. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
100 lines
3.3 KiB
JavaScript
100 lines
3.3 KiB
JavaScript
// Simple article scraping script for testing
|
|
const fs = require('fs');
|
|
|
|
// List of articles to scrape for each outlet
|
|
const outletsAndUrls = {
|
|
"ala-shaabana": [
|
|
"https://www.rootdata.com/news/323625",
|
|
"https://stealthex.io/blog/bittensor-price-prediction-can-tao-coin-reach-1000/",
|
|
"https://www.gate.com/learn/articles/understanding-bittensor-protocol/2203"
|
|
],
|
|
"jacob-robert-steeves": [
|
|
"https://eng.ambcrypto.com/will-ai-coin-tao-reach-3000-as-its-first-halving-approaches/",
|
|
"https://news.ssbcrack.com/bittensors-tao-coin-can-it-really-hit-3000-amid-upcoming-halving-and-ai-buzz/",
|
|
"https://usethebitcoin.com/crypto-personalities/all-you-need-to-know-about-jacob-robert-steeves-the-co-founder-of-bittensor/"
|
|
]
|
|
};
|
|
|
|
async function scrapeArticle(url) {
|
|
try {
|
|
const response = await fetch(url);
|
|
const html = await response.text();
|
|
|
|
// Extract title
|
|
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
|
|
let title = titleMatch ? titleMatch[1].trim() : `Article from ${new URL(url).hostname}`;
|
|
title = title.replace(/\s+/g, ' ').substring(0, 200);
|
|
|
|
// Extract meta description
|
|
const metaDescMatch = html.match(/<meta[^>]*name=["\']description["\'][^>]*content=["\']([^"']+)["\'][^>]*>/i);
|
|
let summary = metaDescMatch ? metaDescMatch[1].trim() : `Article scraped from ${new URL(url).hostname}`;
|
|
summary = summary.replace(/\s+/g, ' ').substring(0, 500);
|
|
|
|
// Create basic body content
|
|
let body = `This article was originally published at ${url}.\n\n${summary}`;
|
|
if (body.length < 200) {
|
|
body += `\n\nThis content provides insights and analysis on current industry developments and trends.`;
|
|
}
|
|
|
|
return { title, summary, body, url };
|
|
|
|
} catch (error) {
|
|
console.error(`Error scraping ${url}:`, error.message);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
async function addArticleToStorage(outletId, articleData) {
|
|
try {
|
|
const response = await fetch('http://localhost:5000/api/articles', {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
outletId,
|
|
title: articleData.title,
|
|
summary: articleData.summary,
|
|
body: articleData.body,
|
|
thumbnail: '/api/assets/default-article.png',
|
|
publishedAt: new Date().toISOString(),
|
|
tags: [],
|
|
viewCount: 0
|
|
})
|
|
});
|
|
|
|
if (response.ok) {
|
|
const result = await response.json();
|
|
console.log(`✓ Added article: ${articleData.title}`);
|
|
return result;
|
|
} else {
|
|
console.error(`✗ Failed to add article: ${response.status}`);
|
|
return null;
|
|
}
|
|
} catch (error) {
|
|
console.error(`✗ Error adding article:`, error.message);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
console.log('Starting article scraping...\n');
|
|
|
|
for (const [outletId, urls] of Object.entries(outletsAndUrls)) {
|
|
console.log(`\n=== Scraping articles for ${outletId} ===`);
|
|
|
|
for (const url of urls.slice(0, 3)) { // Limit to 3 articles per outlet
|
|
console.log(`Scraping: ${url}`);
|
|
|
|
const articleData = await scrapeArticle(url);
|
|
if (articleData) {
|
|
await addArticleToStorage(outletId, articleData);
|
|
}
|
|
|
|
// Small delay to be respectful
|
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
}
|
|
}
|
|
|
|
console.log('\n✓ Scraping completed!');
|
|
}
|
|
|
|
main().catch(console.error); |