feat: SAPIENS Mobile App - Initial commit

React Native mobile application for SAPIENS news platform.
Consolidated all previous history into single commit.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
jungwoo choi
2025-10-23 14:30:25 +09:00
commit 919afe56f2
1516 changed files with 64072 additions and 0 deletions

349
comprehensive-scraping.cjs Normal file
View File

@ -0,0 +1,349 @@
// Comprehensive article scraping for all outlets from attached links
const fs = require('fs');
const path = require('path');
// Mapping from names to outlet IDs in our system
const nameToOutletIdMap = {
// People
'Ala Shaabana': 'ala-shaabana',
'Alex Karp': 'alex-karp',
'Arthur Hayes': 'arthur-hayes',
'Donald Trump Jr.': 'donald-trump-jr',
'Eric Trump': 'eric-trump',
'Jacob Robert Steeves': 'jacob-robert-steeves',
'Jared Kushner': 'jared-kushner',
'J.D. Vance': 'jd-vance',
'Jensen Huang': 'jensen-huang',
'Jerome Powell': 'jerome-powell',
'Joseph Jacks': 'joseph-jacks',
'Larry Ellison': 'larry-ellison',
'Lily Liu': 'lily-liu',
'Marco Rubio': 'marco-rubio',
'Robert Myers': 'robert-myers',
'Sam Altman': 'sam-altman',
'Satya Nadella': 'satya-nadella',
'Scott Bessent': 'scott-bessent',
'Simon Kim': 'simon-kim',
'Yat Siu': 'yat-siu',
// Topics
'AI': 'ai',
'Altcoin': 'alt-coin',
'Bollywood': 'bollywood',
'CantoPop': 'cantopop',
'CBDC': 'cbdc',
'CFTC': 'cftc',
'Crypto': 'crypto',
'Custody Regulation': 'custody-regulation',
'DAT': 'dat',
'Decentralized AI': 'decentralized-ai',
'DeFi': 'defi',
'DEX': 'dex',
'Fed': 'fed',
'FOMC': 'fomc',
'J-Star': 'j-star',
'K-Star': 'k-star',
'NFT': 'nft',
'RWA': 'rwa',
'SEC': 'sec',
'Stablecoin': 'stable-coin',
'SWF': 'swf',
// Companies
'Ava Labs': 'ava-labs',
'Bittensor': 'bittensor',
'BlackRock': 'blackrock',
'Boston Dynamics': 'boston-dynamics',
'Chainlink': 'chainlink',
'Circle': 'circle',
'CME Group': 'cme-group',
'Epic Games': 'epic-games',
'Hashed': 'hashed',
'Hyperliquid': 'hyperliquid',
'Oblong': 'oblong',
'OpenSea': 'opensea',
'Palantir': 'palantir',
'PancakeSwap': 'pancakeswap',
'Polygon': 'polygon',
'Saudi Aramco': 'saudi-aramco',
'Solana Foundation': 'solana-foundation',
'TAOX': 'taox',
'TRON': 'tron',
'TSMC': 'tsmc',
'Uniswap': 'uniswap',
'World Liberty Financial': 'world-liberty-financial',
'xTAO': 'xtao',
'YUMA': 'yuma'
};
// Article links organized by outlet
const outletsAndUrls = {
// People - Ala Shaabana
'ala-shaabana': [
'https://www.rootdata.com/news/323625',
'https://ffnews.com/newsarticle/funding/xtao-tsx-venture-listing/',
'https://stealthex.io/blog/bittensor-price-prediction-can-tao-coin-reach-1000/',
'https://www.gate.com/learn/articles/understanding-bittensor-protocol/2203',
'https://www.investing.com/news/cryptocurrency-news/b-dash-ventures-and-hashed-announce-sponsors-and-main-speakers-for-blockchain-leaders-summit-tokyo-2025-4198036'
],
// Alex Karp
'alex-karp': [
'https://www.thestreet.com/technology/salesforce-ceo-praises-palantir-as-it-closes-950m-uk-defense-deal',
'https://www.msn.com/en-us/money/topstocks/palantir-ceo-alexander-karp-s-new-plan-to-sell-1-2-billion-of-stock/ar-AA1zn2AU',
'https://www.aol.com/palantir-stock-investors-just-got-093000800.html',
'https://www.benzinga.com/markets/equities/25/08/47296576/palantir-ceo-alex-karp-dumps-63-million-in-stock-as-pltr-surges-111-this-year',
'https://finance.yahoo.com/news/palantir-ceo-alex-karp-just-090200616.html'
],
// Arthur Hayes
'arthur-hayes': [
'https://www.ccn.com/news/crypto/arthur-hayes-hype-pivotal-momen-ultra-bullish/',
'https://cointelegraph.com/news/arthur-hayes-sold-all-hype-ferrari-testarossa',
'https://cryptonews.com/news/bitmex-co-founder-arthur-hayes-dumps-entire-hype-bag-for-a-ferrari/',
'https://www.cryptopolitan.com/arthur-hayes-sells-5-1m-in-hype/',
'https://coincentral.com/hayes-dumps-5-1m-hype-position-shortly-after-making-126x-price-call/'
],
// Jacob Robert Steeves
'jacob-robert-steeves': [
'https://eng.ambcrypto.com/will-ai-coin-tao-reach-3000-as-its-first-halving-approaches/',
'https://news.ssbcrack.com/bittensors-tao-coin-can-it-really-hit-3000-amid-upcoming-halving-and-ai-buzz/',
'https://www.globenewswire.com/news-release/2025/04/29/3070182/0/en/Alpha-Sigma-Capital-Research-Publishes-New-Report-on-Bittensor-TAO-Decentralized-Neural-Internet-Model.html',
'https://www.chainup.com/market-update/bittensor-the-ai-alpha/',
'https://usethebitcoin.com/crypto-personalities/all-you-need-to-know-about-jacob-robert-steeves-the-co-founder-of-bittensor/'
],
// Joseph Jacks
'joseph-jacks': [
'https://fox4kc.com/business/press-releases/ein-presswire/842930120/cossa-launches-as-the-definitive-organization-for-the-26b-commercial-open-source-market',
'https://www.prnewswire.com/news-releases/tao-synergies-welcomes-top-bittensor-tao-leader-as-advisor-for-ai-focused-crypto-treasury-strategy-302538426.html',
'https://www.ainvest.com/news/joseph-jacks-joins-tao-synergies-advisor-ai-focused-crypto-treasury-strategy-2508/',
'https://fintech.global/2025/08/01/comp-ai-secures-2-6m-to-transform-soc-2-compliance/',
'https://www.stocktitan.net/news/TAOX/tao-synergies-welcomes-top-bittensor-tao-leader-as-advisor-for-ai-641ecubmt9fz.html'
],
// Robert Myers
'robert-myers': [
'https://oss.capital/news/',
'https://www.proofoftalk.io/speakers/robert-myers',
'https://coinfomania.com/singularitynet-ceo-ben-goertzel-teams-up-with-fetch-ais-humayun-sheikh-to-explore-decentralized-artificial-intelligence-at-proof-of-talk-2025/',
'https://www.theblock.co/post/353065/bittensor-tao-token-crypto-investors'
],
// Topics - AI
'ai': [
'https://www.crescendo.ai/news/latest-ai-news-and-updates',
'https://www.reuters.com/technology/artificial-intelligence/',
'https://www.marketingprofs.com/opinions/2025/53723/ai-update-september-19-2025-ai-news-and-views-from-the-past-week',
'https://www.wndu.com/2025/09/19/artificial-intelligence-ai-update-google-microsoft-apple-meta-quantum-technology-finance/',
'https://solutionsreview.com/artificial-intelligence-news-for-the-week-of-september-19-updates-from-druid-ai-dxc-technology-g-p-more/'
],
// Alt Coin
'alt-coin': [
'https://cryptopotato.com/bitcoin-joins-the-altcoin-bloodbath-with-a-sudden-flash-crash-to-112k/',
'https://www.ainvest.com/news/michael-saylor-strategic-move-september-crypto-presale-outperform-bitcoin-2509/',
'https://coinpaper.com/11193/5-signs-the-2025-altseason-could-be-bigger-than-ever',
'https://cryptodnes.bg/en/analyst-says-2025-altcoin-rally-could-mirror-past-surges-here-is-why/',
'https://tangem.com/en/blog/post/what-is-altseason/'
],
// Stable Coin
'stable-coin': [
'https://www.coindesk.com/markets/2025/09/19/u-s-stablecoin-battle-could-be-zero-sum-game-jpmorgan',
'https://www.circle.com/usdc',
'https://www.theblock.co/post/370543/tethers-hedge-and-expand-us-strategy-puts-circle-on-defense-in-market-shake-up-tests-oversight-versus-privacy',
'https://breet.io/blog/usdt-vs-usdc',
'https://www.dlnews.com/articles/markets/tether-faces-uphill-battle-launching-usat-stablecoin-in-us/'
],
// Bittensor
'bittensor': [
'https://eng.ambcrypto.com/will-ai-coin-tao-reach-3000-as-its-first-halving-approaches/',
'https://news.ssbcrack.com/bittensors-tao-coin-can-it-really-hit-3000-amid-upcoming-halving-and-ai-buzz/',
'https://www.chainup.com/market-update/bittensor-the-ai-alpha/'
]
};
async function scrapeArticle(url) {
try {
console.log(` Scraping: ${url}`);
const response = await fetch(url, {
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const html = await response.text();
// Extract title with better regex
const titleMatches = [
html.match(/<title[^>]*>([^<]+)<\/title>/i),
html.match(/<h1[^>]*>([^<]+)<\/h1>/i),
html.match(/<meta[^>]*property=["\']og:title["\'][^>]*content=["\']([^"']+)["\'][^>]*>/i)
];
let title = 'Untitled Article';
for (const match of titleMatches) {
if (match && match[1]) {
title = match[1].trim().replace(/\s+/g, ' ').substring(0, 200);
break;
}
}
// Clean up title
title = title.replace(/\|.*$/, '').replace(/-.*$/, '').trim();
if (title.length > 150) {
title = title.substring(0, 150) + '...';
}
// Extract meta description or summary
const summaryMatches = [
html.match(/<meta[^>]*name=["\']description["\'][^>]*content=["\']([^"']+)["\'][^>]*>/i),
html.match(/<meta[^>]*property=["\']og:description["\'][^>]*content=["\']([^"']+)["\'][^>]*>/i),
html.match(/<meta[^>]*name=["\']twitter:description["\'][^>]*content=["\']([^"']+)["\'][^>]*>/i)
];
let summary = 'Article summary not available.';
for (const match of summaryMatches) {
if (match && match[1]) {
summary = match[1].trim().replace(/\s+/g, ' ').substring(0, 500);
break;
}
}
// Try to extract main content paragraphs
const contentMatches = [
...html.matchAll(/<p[^>]*>([^<]+(?:<[^>]*>[^<]*<\/[^>]*>[^<]*)*)<\/p>/gi)
];
let bodyParagraphs = [];
for (const match of contentMatches) {
if (match[1]) {
const cleanText = match[1]
.replace(/<[^>]*>/g, ' ')
.replace(/\s+/g, ' ')
.trim();
if (cleanText.length > 30 && !cleanText.includes('cookie') && !cleanText.includes('subscribe')) {
bodyParagraphs.push(cleanText);
}
}
}
// Create body content
let body = `This article was originally published at ${url}.\n\n${summary}`;
if (bodyParagraphs.length > 0) {
const selectedParagraphs = bodyParagraphs.slice(0, 3);
body += `\n\n` + selectedParagraphs.join('\n\n');
} else {
body += `\n\nThis content provides insights and analysis on current industry developments and trends in the cryptocurrency, technology, and financial sectors.`;
}
// Extract main image
let thumbnail = '/api/assets/default-article.png';
const imageMatches = [
html.match(/<meta[^>]*property=["\']og:image["\'][^>]*content=["\']([^"']+)["\'][^>]*>/i),
html.match(/<meta[^>]*name=["\']twitter:image["\'][^>]*content=["\']([^"']+)["\'][^>]*>/i),
html.match(/<img[^>]*src=["\']([^"']+)["\'][^>]*>/i)
];
for (const match of imageMatches) {
if (match && match[1] && match[1].startsWith('http')) {
thumbnail = match[1];
break;
}
}
return {
title,
summary,
body,
url,
thumbnail
};
} catch (error) {
console.error(` ✗ Error scraping ${url}:`, error.message);
return null;
}
}
async function addArticleToStorage(outletId, articleData) {
try {
const response = await fetch('http://localhost:5000/api/articles', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
outletId,
title: articleData.title,
summary: articleData.summary,
body: articleData.body,
thumbnail: articleData.thumbnail,
publishedAt: new Date().toISOString(),
tags: [],
viewCount: 0
})
});
if (response.ok) {
const result = await response.json();
console.log(` ✓ Added: ${articleData.title.substring(0, 80)}...`);
return result;
} else {
const errorText = await response.text();
console.error(` ✗ Failed to add article (${response.status}): ${errorText}`);
return null;
}
} catch (error) {
console.error(` ✗ Error adding article:`, error.message);
return null;
}
}
async function main() {
console.log('🚀 Starting comprehensive article scraping...\n');
let totalScraped = 0;
let totalAdded = 0;
for (const [outletId, urls] of Object.entries(outletsAndUrls)) {
console.log(`\n📰 === Scraping articles for ${outletId} ===`);
console.log(` Found ${urls.length} URLs to scrape`);
let articleCount = 0;
for (const url of urls.slice(0, 5)) { // Limit to 5 articles per outlet
totalScraped++;
const articleData = await scrapeArticle(url);
if (articleData) {
const result = await addArticleToStorage(outletId, articleData);
if (result) {
totalAdded++;
articleCount++;
}
}
// Respectful delay
await new Promise(resolve => setTimeout(resolve, 1500));
}
console.log(` ✅ Added ${articleCount} articles for ${outletId}`);
}
console.log(`\n🎉 === Scraping Summary ===`);
console.log(` 📊 Total URLs processed: ${totalScraped}`);
console.log(` ✅ Articles successfully added: ${totalAdded}`);
console.log(` 📈 Success rate: ${Math.round((totalAdded / totalScraped) * 100)}%`);
console.log(`\n✨ Comprehensive scraping completed!`);
}
// Run the scraper
main().catch(console.error);