React Native mobile application for SAPIENS news platform. Consolidated all previous history into single commit. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
315 lines
11 KiB
TypeScript
315 lines
11 KiB
TypeScript
import { readFileSync } from 'fs';
|
|
import { join } from 'path';
|
|
|
|
export interface OutletLinks {
|
|
name: string;
|
|
category: 'people' | 'topics' | 'companies';
|
|
focusSubject: string;
|
|
urls: string[];
|
|
}
|
|
|
|
export interface ParsedOutlets {
|
|
people: OutletLinks[];
|
|
topics: OutletLinks[];
|
|
companies: OutletLinks[];
|
|
total: number;
|
|
}
|
|
|
|
export class OutletParser {
|
|
|
|
// Parse the attached file and return structured outlet data
|
|
static parseOutletFile(filePath: string): ParsedOutlets {
|
|
try {
|
|
console.log(`Parsing outlet file: ${filePath}`);
|
|
|
|
const content = readFileSync(filePath, 'utf-8');
|
|
const lines = content.split('\n').map(line => line.trim()).filter(line => line);
|
|
|
|
const parsed: ParsedOutlets = {
|
|
people: [],
|
|
topics: [],
|
|
companies: [],
|
|
total: 0,
|
|
};
|
|
|
|
let currentCategory: 'people' | 'topics' | 'companies' | null = null;
|
|
let currentOutlet: OutletLinks | null = null;
|
|
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const line = lines[i];
|
|
|
|
// Skip empty lines
|
|
if (!line) continue;
|
|
|
|
// Detect section headers FIRST (before skipping other # lines)
|
|
if (line.includes('## People')) {
|
|
currentCategory = 'people';
|
|
continue;
|
|
} else if (line.includes('## Topics')) {
|
|
currentCategory = 'topics';
|
|
continue;
|
|
} else if (line.includes('## Companies') || line.startsWith('📋 Companies')) {
|
|
currentCategory = 'companies';
|
|
continue;
|
|
}
|
|
|
|
// Skip other markdown headers (after section detection)
|
|
if (line.startsWith('#') && !line.startsWith('###')) continue;
|
|
|
|
// Parse outlet headers like "### 1. Ala Shaabana - Bittensor 공동창립자"
|
|
if (line.startsWith('###') && currentCategory) {
|
|
// Save previous outlet
|
|
if (currentOutlet && currentOutlet.urls.length > 0) {
|
|
parsed[currentOutlet.category].push(currentOutlet);
|
|
parsed.total++;
|
|
}
|
|
|
|
// Extract outlet name (remove ### and number)
|
|
const nameMatch = line.match(/###\s*\d+\.\s*(.+?)(?:\s*\([^)]*\))?$/);
|
|
if (nameMatch) {
|
|
const rawName = nameMatch[1].trim();
|
|
const cleanedName = this.cleanOutletName(rawName);
|
|
|
|
currentOutlet = {
|
|
name: cleanedName,
|
|
category: currentCategory,
|
|
focusSubject: this.generateFocusSubject(cleanedName),
|
|
urls: []
|
|
};
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Parse numbered URLs like "1. https://example.com"
|
|
const urlMatch = line.match(/^\d+\.\s*(https?:\/\/.+)$/);
|
|
if (urlMatch && currentOutlet) {
|
|
currentOutlet.urls.push(urlMatch[1]);
|
|
continue;
|
|
}
|
|
|
|
// Parse direct URLs for companies section
|
|
if (line.startsWith('http://') || line.startsWith('https://')) {
|
|
if (currentOutlet) {
|
|
currentOutlet.urls.push(line);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Parse company entries like "1. Ava Labs (Avalanche 플랫폼)"
|
|
if (currentCategory === 'companies' && /^\d+\.\s*[A-Za-z]/.test(line) && !line.startsWith('http')) {
|
|
// Save previous outlet
|
|
if (currentOutlet && currentOutlet.urls.length > 0) {
|
|
parsed[currentOutlet.category].push(currentOutlet);
|
|
parsed.total++;
|
|
}
|
|
|
|
const companyMatch = line.match(/^\d+\.\s*(.+?)(?:\s*\([^)]*\))?$/);
|
|
if (companyMatch) {
|
|
const rawName = companyMatch[1].trim();
|
|
const cleanedName = this.cleanOutletName(rawName);
|
|
|
|
currentOutlet = {
|
|
name: cleanedName,
|
|
category: 'companies',
|
|
focusSubject: this.generateFocusSubject(cleanedName),
|
|
urls: []
|
|
};
|
|
}
|
|
}
|
|
}
|
|
|
|
// Don't forget the last outlet
|
|
if (currentOutlet && currentOutlet.urls.length > 0) {
|
|
parsed[currentOutlet.category].push(currentOutlet);
|
|
parsed.total++;
|
|
}
|
|
|
|
console.log(`Successfully parsed ${parsed.total} outlets:`);
|
|
console.log(`- People: ${parsed.people.length}`);
|
|
console.log(`- Topics: ${parsed.topics.length}`);
|
|
console.log(`- Companies: ${parsed.companies.length}`);
|
|
|
|
return parsed;
|
|
|
|
} catch (error: any) {
|
|
console.error('Error parsing outlet file:', error.message);
|
|
throw new Error(`Failed to parse outlet file: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
// Clean outlet names by removing Korean descriptions and normalizing
|
|
private static cleanOutletName(rawName: string): string {
|
|
// Remove Korean parenthetical descriptions like "(연방준비제도 의장)" or "(OSS Capital 창립자)"
|
|
let cleaned = rawName.replace(/\s*\([^)]*\)/g, '').trim();
|
|
|
|
// Handle special cases
|
|
const specialCases: { [key: string]: string } = {
|
|
'CBDC': 'Central Bank Digital Currency',
|
|
'CFTC': 'Commodity Futures Trading Commission',
|
|
'SEC': 'Securities and Exchange Commission',
|
|
'DAT': 'Digital Asset Treasury',
|
|
'DeFi': 'Decentralized Finance',
|
|
'DEX': 'Decentralized Exchange',
|
|
'NFT': 'Non-Fungible Token',
|
|
'RWA': 'Real World Assets',
|
|
'SWF': 'Sovereign Wealth Fund',
|
|
};
|
|
|
|
return specialCases[cleaned] || cleaned;
|
|
}
|
|
|
|
// Generate focus subject for database compatibility
|
|
private static generateFocusSubject(rawName: string): string {
|
|
let subject = rawName.replace(/\s*\([^)]*\)/g, '').trim();
|
|
|
|
// Convert to lowercase and replace spaces with dashes for ID compatibility
|
|
return subject.toLowerCase()
|
|
.replace(/[^a-z0-9\s-]/g, '') // Remove special characters
|
|
.replace(/\s+/g, '-') // Replace spaces with dashes
|
|
.replace(/--+/g, '-') // Replace multiple dashes with single dash
|
|
.replace(/^-+|-+$/g, ''); // Remove leading/trailing dashes
|
|
}
|
|
|
|
// Categorize outlet based on name
|
|
private static categorizeOutlet(name: string): 'people' | 'topics' | 'companies' {
|
|
const cleanName = name.toLowerCase().trim();
|
|
|
|
// People (individual names)
|
|
const people = [
|
|
'ala shaabana', 'alex karp', 'arthur hayes', 'donald trump jr', 'eric trump',
|
|
'jacob robert steeves', 'jared kushner', 'j.d. vance', 'jensen huang',
|
|
'jerome powell', 'joseph jacks', 'robert myers', 'yat siu'
|
|
];
|
|
|
|
// Companies
|
|
const companies = [
|
|
'xtao', 'yuma', 'taox', 'oblong', 'ava labs', 'boston dynamics',
|
|
'blackrock', 'chainlink', 'circle', 'cme group', 'manifold labs'
|
|
];
|
|
|
|
// Check for exact matches first
|
|
if (people.some(person => cleanName.includes(person) || person.includes(cleanName))) {
|
|
return 'people';
|
|
}
|
|
|
|
if (companies.some(company => cleanName.includes(company) || company.includes(cleanName))) {
|
|
return 'companies';
|
|
}
|
|
|
|
// Everything else goes to topics
|
|
return 'topics';
|
|
}
|
|
|
|
// Get specific outlet data by name
|
|
static getOutletByName(parsed: ParsedOutlets, name: string): OutletLinks | null {
|
|
const allOutlets = [...parsed.people, ...parsed.topics, ...parsed.companies];
|
|
return allOutlets.find(outlet =>
|
|
outlet.name.toLowerCase() === name.toLowerCase() ||
|
|
outlet.focusSubject === name
|
|
) || null;
|
|
}
|
|
|
|
// Get all URLs from parsed data
|
|
static getAllUrls(parsed: ParsedOutlets): string[] {
|
|
const allOutlets = [...parsed.people, ...parsed.topics, ...parsed.companies];
|
|
return allOutlets.flatMap(outlet => outlet.urls);
|
|
}
|
|
|
|
// Get URLs by category
|
|
static getUrlsByCategory(parsed: ParsedOutlets, category: 'people' | 'topics' | 'companies'): string[] {
|
|
return parsed[category].flatMap(outlet => outlet.urls);
|
|
}
|
|
|
|
// Convert parsed data to our existing outlet format
|
|
static convertToOutletFormat(parsed: ParsedOutlets): Array<{
|
|
id: string;
|
|
name: string;
|
|
description: string;
|
|
category: string;
|
|
focusSubject: string;
|
|
avatar?: string;
|
|
profileImage?: string;
|
|
bio: string;
|
|
fullBio?: string[];
|
|
urls: string[];
|
|
}> {
|
|
const allOutlets = [...parsed.people, ...parsed.topics, ...parsed.companies];
|
|
|
|
return allOutlets.map(outlet => ({
|
|
id: outlet.focusSubject,
|
|
name: outlet.name,
|
|
description: this.generateDescription(outlet),
|
|
category: outlet.category,
|
|
focusSubject: outlet.focusSubject,
|
|
avatar: this.getDefaultAvatar(outlet.category),
|
|
profileImage: this.getDefaultProfileImage(outlet.category),
|
|
bio: this.generateBio(outlet),
|
|
fullBio: this.generateFullBio(outlet),
|
|
urls: outlet.urls,
|
|
}));
|
|
}
|
|
|
|
private static generateDescription(outlet: OutletLinks): string {
|
|
const descriptions = {
|
|
people: `Latest news and analysis about ${outlet.name}`,
|
|
topics: `Comprehensive coverage of ${outlet.name} developments and trends`,
|
|
companies: `${outlet.name} news, updates, and market analysis`,
|
|
};
|
|
|
|
return descriptions[outlet.category];
|
|
}
|
|
|
|
private static getDefaultAvatar(category: string): string {
|
|
const avatars = {
|
|
people: '/api/assets/default-person.jpg',
|
|
topics: '/api/assets/default-topic.jpg',
|
|
companies: '/api/assets/default-company.jpg',
|
|
};
|
|
|
|
return avatars[category as keyof typeof avatars] || avatars.topics;
|
|
}
|
|
|
|
private static getDefaultProfileImage(category: string): string {
|
|
return this.getDefaultAvatar(category);
|
|
}
|
|
|
|
private static generateBio(outlet: OutletLinks): string {
|
|
const bios = {
|
|
people: `${outlet.name} is a prominent figure in technology and business, making headlines with strategic decisions and market insights.`,
|
|
topics: `Stay informed about the latest developments in ${outlet.name} with comprehensive coverage and expert analysis.`,
|
|
companies: `${outlet.name} continues to shape the industry with innovative solutions and strategic partnerships.`,
|
|
};
|
|
|
|
return bios[outlet.category];
|
|
}
|
|
|
|
private static generateFullBio(outlet: OutletLinks): string[] {
|
|
const fullBios = {
|
|
people: [
|
|
`${outlet.name} is a key figure in the technology and business landscape.`,
|
|
`Known for strategic leadership and innovative thinking in their field.`,
|
|
`Continues to influence industry trends and developments globally.`
|
|
],
|
|
topics: [
|
|
`${outlet.name} represents a critical area of technological advancement.`,
|
|
`Dynamic sector with ongoing market trends, regulatory updates, and innovations.`,
|
|
`Comprehensive resource requiring expert analysis from leading industry professionals.`
|
|
],
|
|
companies: [
|
|
`${outlet.name} is a significant player in the technology industry.`,
|
|
`Known for innovative products and strategic market positioning.`,
|
|
`Continues to drive industry growth and technological advancement.`
|
|
]
|
|
};
|
|
|
|
return fullBios[outlet.category];
|
|
}
|
|
}
|
|
|
|
// Utility function to parse the specific file
|
|
export function parseAttachedOutletFile(): ParsedOutlets {
|
|
const filePath = join(process.cwd(), 'attached_assets', 'Pasted-Ala-Shaabana-https-www-rootdata-com-news-323625-https-ffnews-com-newsarticle-funding-xtao-tsx-v-1758557992922_1758557992922.txt');
|
|
return OutletParser.parseOutletFile(filePath);
|
|
}
|
|
|
|
export default OutletParser; |