feat: SAPIENS Mobile App - Initial commit
React Native mobile application for SAPIENS news platform. Consolidated all previous history into single commit. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
315
server/outletParser.ts
Normal file
315
server/outletParser.ts
Normal file
@ -0,0 +1,315 @@
|
||||
import { readFileSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
|
||||
export interface OutletLinks {
|
||||
name: string;
|
||||
category: 'people' | 'topics' | 'companies';
|
||||
focusSubject: string;
|
||||
urls: string[];
|
||||
}
|
||||
|
||||
export interface ParsedOutlets {
|
||||
people: OutletLinks[];
|
||||
topics: OutletLinks[];
|
||||
companies: OutletLinks[];
|
||||
total: number;
|
||||
}
|
||||
|
||||
export class OutletParser {
|
||||
|
||||
// Parse the attached file and return structured outlet data
|
||||
static parseOutletFile(filePath: string): ParsedOutlets {
|
||||
try {
|
||||
console.log(`Parsing outlet file: ${filePath}`);
|
||||
|
||||
const content = readFileSync(filePath, 'utf-8');
|
||||
const lines = content.split('\n').map(line => line.trim()).filter(line => line);
|
||||
|
||||
const parsed: ParsedOutlets = {
|
||||
people: [],
|
||||
topics: [],
|
||||
companies: [],
|
||||
total: 0,
|
||||
};
|
||||
|
||||
let currentCategory: 'people' | 'topics' | 'companies' | null = null;
|
||||
let currentOutlet: OutletLinks | null = null;
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
|
||||
// Skip empty lines
|
||||
if (!line) continue;
|
||||
|
||||
// Detect section headers FIRST (before skipping other # lines)
|
||||
if (line.includes('## People')) {
|
||||
currentCategory = 'people';
|
||||
continue;
|
||||
} else if (line.includes('## Topics')) {
|
||||
currentCategory = 'topics';
|
||||
continue;
|
||||
} else if (line.includes('## Companies') || line.startsWith('📋 Companies')) {
|
||||
currentCategory = 'companies';
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip other markdown headers (after section detection)
|
||||
if (line.startsWith('#') && !line.startsWith('###')) continue;
|
||||
|
||||
// Parse outlet headers like "### 1. Ala Shaabana - Bittensor 공동창립자"
|
||||
if (line.startsWith('###') && currentCategory) {
|
||||
// Save previous outlet
|
||||
if (currentOutlet && currentOutlet.urls.length > 0) {
|
||||
parsed[currentOutlet.category].push(currentOutlet);
|
||||
parsed.total++;
|
||||
}
|
||||
|
||||
// Extract outlet name (remove ### and number)
|
||||
const nameMatch = line.match(/###\s*\d+\.\s*(.+?)(?:\s*\([^)]*\))?$/);
|
||||
if (nameMatch) {
|
||||
const rawName = nameMatch[1].trim();
|
||||
const cleanedName = this.cleanOutletName(rawName);
|
||||
|
||||
currentOutlet = {
|
||||
name: cleanedName,
|
||||
category: currentCategory,
|
||||
focusSubject: this.generateFocusSubject(cleanedName),
|
||||
urls: []
|
||||
};
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse numbered URLs like "1. https://example.com"
|
||||
const urlMatch = line.match(/^\d+\.\s*(https?:\/\/.+)$/);
|
||||
if (urlMatch && currentOutlet) {
|
||||
currentOutlet.urls.push(urlMatch[1]);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse direct URLs for companies section
|
||||
if (line.startsWith('http://') || line.startsWith('https://')) {
|
||||
if (currentOutlet) {
|
||||
currentOutlet.urls.push(line);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse company entries like "1. Ava Labs (Avalanche 플랫폼)"
|
||||
if (currentCategory === 'companies' && /^\d+\.\s*[A-Za-z]/.test(line) && !line.startsWith('http')) {
|
||||
// Save previous outlet
|
||||
if (currentOutlet && currentOutlet.urls.length > 0) {
|
||||
parsed[currentOutlet.category].push(currentOutlet);
|
||||
parsed.total++;
|
||||
}
|
||||
|
||||
const companyMatch = line.match(/^\d+\.\s*(.+?)(?:\s*\([^)]*\))?$/);
|
||||
if (companyMatch) {
|
||||
const rawName = companyMatch[1].trim();
|
||||
const cleanedName = this.cleanOutletName(rawName);
|
||||
|
||||
currentOutlet = {
|
||||
name: cleanedName,
|
||||
category: 'companies',
|
||||
focusSubject: this.generateFocusSubject(cleanedName),
|
||||
urls: []
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Don't forget the last outlet
|
||||
if (currentOutlet && currentOutlet.urls.length > 0) {
|
||||
parsed[currentOutlet.category].push(currentOutlet);
|
||||
parsed.total++;
|
||||
}
|
||||
|
||||
console.log(`Successfully parsed ${parsed.total} outlets:`);
|
||||
console.log(`- People: ${parsed.people.length}`);
|
||||
console.log(`- Topics: ${parsed.topics.length}`);
|
||||
console.log(`- Companies: ${parsed.companies.length}`);
|
||||
|
||||
return parsed;
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('Error parsing outlet file:', error.message);
|
||||
throw new Error(`Failed to parse outlet file: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Clean outlet names by removing Korean descriptions and normalizing
|
||||
private static cleanOutletName(rawName: string): string {
|
||||
// Remove Korean parenthetical descriptions like "(연방준비제도 의장)" or "(OSS Capital 창립자)"
|
||||
let cleaned = rawName.replace(/\s*\([^)]*\)/g, '').trim();
|
||||
|
||||
// Handle special cases
|
||||
const specialCases: { [key: string]: string } = {
|
||||
'CBDC': 'Central Bank Digital Currency',
|
||||
'CFTC': 'Commodity Futures Trading Commission',
|
||||
'SEC': 'Securities and Exchange Commission',
|
||||
'DAT': 'Digital Asset Treasury',
|
||||
'DeFi': 'Decentralized Finance',
|
||||
'DEX': 'Decentralized Exchange',
|
||||
'NFT': 'Non-Fungible Token',
|
||||
'RWA': 'Real World Assets',
|
||||
'SWF': 'Sovereign Wealth Fund',
|
||||
};
|
||||
|
||||
return specialCases[cleaned] || cleaned;
|
||||
}
|
||||
|
||||
// Generate focus subject for database compatibility
|
||||
private static generateFocusSubject(rawName: string): string {
|
||||
let subject = rawName.replace(/\s*\([^)]*\)/g, '').trim();
|
||||
|
||||
// Convert to lowercase and replace spaces with dashes for ID compatibility
|
||||
return subject.toLowerCase()
|
||||
.replace(/[^a-z0-9\s-]/g, '') // Remove special characters
|
||||
.replace(/\s+/g, '-') // Replace spaces with dashes
|
||||
.replace(/--+/g, '-') // Replace multiple dashes with single dash
|
||||
.replace(/^-+|-+$/g, ''); // Remove leading/trailing dashes
|
||||
}
|
||||
|
||||
// Categorize outlet based on name
|
||||
private static categorizeOutlet(name: string): 'people' | 'topics' | 'companies' {
|
||||
const cleanName = name.toLowerCase().trim();
|
||||
|
||||
// People (individual names)
|
||||
const people = [
|
||||
'ala shaabana', 'alex karp', 'arthur hayes', 'donald trump jr', 'eric trump',
|
||||
'jacob robert steeves', 'jared kushner', 'j.d. vance', 'jensen huang',
|
||||
'jerome powell', 'joseph jacks', 'robert myers', 'yat siu'
|
||||
];
|
||||
|
||||
// Companies
|
||||
const companies = [
|
||||
'xtao', 'yuma', 'taox', 'oblong', 'ava labs', 'boston dynamics',
|
||||
'blackrock', 'chainlink', 'circle', 'cme group', 'manifold labs'
|
||||
];
|
||||
|
||||
// Check for exact matches first
|
||||
if (people.some(person => cleanName.includes(person) || person.includes(cleanName))) {
|
||||
return 'people';
|
||||
}
|
||||
|
||||
if (companies.some(company => cleanName.includes(company) || company.includes(cleanName))) {
|
||||
return 'companies';
|
||||
}
|
||||
|
||||
// Everything else goes to topics
|
||||
return 'topics';
|
||||
}
|
||||
|
||||
// Get specific outlet data by name
|
||||
static getOutletByName(parsed: ParsedOutlets, name: string): OutletLinks | null {
|
||||
const allOutlets = [...parsed.people, ...parsed.topics, ...parsed.companies];
|
||||
return allOutlets.find(outlet =>
|
||||
outlet.name.toLowerCase() === name.toLowerCase() ||
|
||||
outlet.focusSubject === name
|
||||
) || null;
|
||||
}
|
||||
|
||||
// Get all URLs from parsed data
|
||||
static getAllUrls(parsed: ParsedOutlets): string[] {
|
||||
const allOutlets = [...parsed.people, ...parsed.topics, ...parsed.companies];
|
||||
return allOutlets.flatMap(outlet => outlet.urls);
|
||||
}
|
||||
|
||||
// Get URLs by category
|
||||
static getUrlsByCategory(parsed: ParsedOutlets, category: 'people' | 'topics' | 'companies'): string[] {
|
||||
return parsed[category].flatMap(outlet => outlet.urls);
|
||||
}
|
||||
|
||||
// Convert parsed data to our existing outlet format
|
||||
static convertToOutletFormat(parsed: ParsedOutlets): Array<{
|
||||
id: string;
|
||||
name: string;
|
||||
description: string;
|
||||
category: string;
|
||||
focusSubject: string;
|
||||
avatar?: string;
|
||||
profileImage?: string;
|
||||
bio: string;
|
||||
fullBio?: string[];
|
||||
urls: string[];
|
||||
}> {
|
||||
const allOutlets = [...parsed.people, ...parsed.topics, ...parsed.companies];
|
||||
|
||||
return allOutlets.map(outlet => ({
|
||||
id: outlet.focusSubject,
|
||||
name: outlet.name,
|
||||
description: this.generateDescription(outlet),
|
||||
category: outlet.category,
|
||||
focusSubject: outlet.focusSubject,
|
||||
avatar: this.getDefaultAvatar(outlet.category),
|
||||
profileImage: this.getDefaultProfileImage(outlet.category),
|
||||
bio: this.generateBio(outlet),
|
||||
fullBio: this.generateFullBio(outlet),
|
||||
urls: outlet.urls,
|
||||
}));
|
||||
}
|
||||
|
||||
private static generateDescription(outlet: OutletLinks): string {
|
||||
const descriptions = {
|
||||
people: `Latest news and analysis about ${outlet.name}`,
|
||||
topics: `Comprehensive coverage of ${outlet.name} developments and trends`,
|
||||
companies: `${outlet.name} news, updates, and market analysis`,
|
||||
};
|
||||
|
||||
return descriptions[outlet.category];
|
||||
}
|
||||
|
||||
private static getDefaultAvatar(category: string): string {
|
||||
const avatars = {
|
||||
people: '/api/assets/default-person.jpg',
|
||||
topics: '/api/assets/default-topic.jpg',
|
||||
companies: '/api/assets/default-company.jpg',
|
||||
};
|
||||
|
||||
return avatars[category as keyof typeof avatars] || avatars.topics;
|
||||
}
|
||||
|
||||
private static getDefaultProfileImage(category: string): string {
|
||||
return this.getDefaultAvatar(category);
|
||||
}
|
||||
|
||||
private static generateBio(outlet: OutletLinks): string {
|
||||
const bios = {
|
||||
people: `${outlet.name} is a prominent figure in technology and business, making headlines with strategic decisions and market insights.`,
|
||||
topics: `Stay informed about the latest developments in ${outlet.name} with comprehensive coverage and expert analysis.`,
|
||||
companies: `${outlet.name} continues to shape the industry with innovative solutions and strategic partnerships.`,
|
||||
};
|
||||
|
||||
return bios[outlet.category];
|
||||
}
|
||||
|
||||
private static generateFullBio(outlet: OutletLinks): string[] {
|
||||
const fullBios = {
|
||||
people: [
|
||||
`${outlet.name} is a key figure in the technology and business landscape.`,
|
||||
`Known for strategic leadership and innovative thinking in their field.`,
|
||||
`Continues to influence industry trends and developments globally.`
|
||||
],
|
||||
topics: [
|
||||
`${outlet.name} represents a critical area of technological advancement.`,
|
||||
`Dynamic sector with ongoing market trends, regulatory updates, and innovations.`,
|
||||
`Comprehensive resource requiring expert analysis from leading industry professionals.`
|
||||
],
|
||||
companies: [
|
||||
`${outlet.name} is a significant player in the technology industry.`,
|
||||
`Known for innovative products and strategic market positioning.`,
|
||||
`Continues to drive industry growth and technological advancement.`
|
||||
]
|
||||
};
|
||||
|
||||
return fullBios[outlet.category];
|
||||
}
|
||||
}
|
||||
|
||||
// Utility function to parse the specific file
|
||||
export function parseAttachedOutletFile(): ParsedOutlets {
|
||||
const filePath = join(process.cwd(), 'attached_assets', 'Pasted-Ala-Shaabana-https-www-rootdata-com-news-323625-https-ffnews-com-newsarticle-funding-xtao-tsx-v-1758557992922_1758557992922.txt');
|
||||
return OutletParser.parseOutletFile(filePath);
|
||||
}
|
||||
|
||||
export default OutletParser;
|
||||
Reference in New Issue
Block a user