React Native mobile application for SAPIENS news platform. Consolidated all previous history into single commit. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1772 lines
64 KiB
TypeScript
1772 lines
64 KiB
TypeScript
import type { Express } from "express";
|
|
import { createServer, type Server } from "http";
|
|
import express from "express";
|
|
import path from "path";
|
|
import { storage } from "./storage";
|
|
import { insertArticleSchema, insertMediaOutletSchema, type Article, type MediaOutlet } from "@shared/schema";
|
|
import OpenAI from "openai";
|
|
import { WebScraper } from './scraper';
|
|
import { OutletParser } from './outletParser';
|
|
import sharp from 'sharp';
|
|
import fs from 'fs';
|
|
import * as newsapi from './newsapi-client';
|
|
|
|
// the newest OpenAI model is "gpt-5" which was released August 7, 2025. do not change this unless explicitly requested by the user
|
|
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
|
|
|
|
// Utility functions for thumbnail generation
|
|
function needsThumbnail(article: Article): boolean {
|
|
// Check if article has no thumbnail or uses default placeholder
|
|
return !article.thumbnail ||
|
|
article.thumbnail === '/api/assets/default-article.png' ||
|
|
article.thumbnail.trim() === '';
|
|
}
|
|
|
|
// Function to format time ago - returns consistent random "X min ago" for each article
|
|
function formatTimeAgo(date: Date | string, articleId?: string): string {
|
|
// If we have an article ID, generate a consistent random value for it
|
|
if (articleId) {
|
|
const hash = articleId.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0);
|
|
const minutes = (hash % 59) + 1; // 1 to 59
|
|
return `${minutes} min ago`;
|
|
}
|
|
|
|
// Fallback to actual time calculation
|
|
const now = new Date();
|
|
const past = new Date(date);
|
|
const diffInSeconds = Math.floor((now.getTime() - past.getTime()) / 1000);
|
|
let diffInMinutes = Math.floor(diffInSeconds / 60);
|
|
|
|
// Clamp to 1-59 range
|
|
if (diffInMinutes < 1) diffInMinutes = 1;
|
|
if (diffInMinutes > 59) diffInMinutes = 59;
|
|
|
|
return `${diffInMinutes} min ago`;
|
|
}
|
|
|
|
// Function to generate varied publication times for articles (1-59 minutes ago only)
|
|
function generateVariedPublishedAt(seed?: string): Date {
|
|
const now = new Date();
|
|
|
|
// Use seed (title/summary) or random to create varied offsets
|
|
const seedValue = seed || Math.random().toString();
|
|
const hash = seedValue.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0);
|
|
const random = Math.abs(Math.sin(hash)) * 1000;
|
|
|
|
// Generate varied minutes between 1-59 minutes ago
|
|
const offsetMinutes = Math.floor((random * 1000) % 59) + 1; // 1 to 59 minutes
|
|
|
|
return new Date(now.getTime() - offsetMinutes * 60 * 1000);
|
|
}
|
|
|
|
function generateImagePrompt(article: Article, outlet?: MediaOutlet): string {
|
|
const outletContext = outlet ? ` related to ${outlet.focusSubject}` : '';
|
|
return `Professional news thumbnail image for article: "${article.title}". ${article.summary || 'News article'}${outletContext}. Modern, clean, journalistic style, suitable for mobile news app, 16:9 aspect ratio, no text overlays, photorealistic.`;
|
|
}
|
|
|
|
async function saveGeneratedImage(imageUrlOrBase64: string, filename: string): Promise<string> {
|
|
try {
|
|
// Create directory if it doesn't exist
|
|
const generatedImagesDir = path.resolve(process.cwd(), 'attached_assets', 'generated_images');
|
|
if (!fs.existsSync(generatedImagesDir)) {
|
|
fs.mkdirSync(generatedImagesDir, { recursive: true });
|
|
}
|
|
|
|
let buffer: Buffer;
|
|
|
|
if (imageUrlOrBase64.startsWith('data:image/')) {
|
|
// Handle base64 data
|
|
const base64Data = imageUrlOrBase64.split(',')[1];
|
|
buffer = Buffer.from(base64Data, 'base64');
|
|
} else {
|
|
// Handle URL
|
|
const response = await fetch(imageUrlOrBase64);
|
|
if (!response.ok) {
|
|
throw new Error(`Failed to fetch image: ${response.statusText}`);
|
|
}
|
|
buffer = Buffer.from(await response.arrayBuffer());
|
|
}
|
|
|
|
// Generate thumbnail using sharp
|
|
const thumbnailPath = path.join(generatedImagesDir, `thumb_${filename}.png`);
|
|
const fullImagePath = path.join(generatedImagesDir, `${filename}.png`);
|
|
|
|
// Save full size image
|
|
await sharp(buffer)
|
|
.png({ quality: 90 })
|
|
.toFile(fullImagePath);
|
|
|
|
// Create thumbnail (400x225 for 16:9 ratio)
|
|
await sharp(buffer)
|
|
.resize(400, 225, { fit: 'cover', position: 'center' })
|
|
.png({ quality: 80 })
|
|
.toFile(thumbnailPath);
|
|
|
|
// Return the relative path for the thumbnail
|
|
return `/api/assets/generated_images/thumb_${filename}.png`;
|
|
} catch (error) {
|
|
console.error('Error saving generated image:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
export async function registerRoutes(app: Express): Promise<Server> {
|
|
// Health check
|
|
app.get("/api/health", (req, res) => {
|
|
res.json({ status: "ok" });
|
|
});
|
|
|
|
// Media outlets routes (using news-api)
|
|
app.get("/api/outlets", async (req, res) => {
|
|
try {
|
|
const { category, language } = req.query;
|
|
const lang = (language as string) || 'ko';
|
|
let outlets;
|
|
|
|
if (category && typeof category === 'string') {
|
|
outlets = newsapi.getOutlets(category, lang);
|
|
} else {
|
|
outlets = newsapi.getOutlets(undefined, lang);
|
|
}
|
|
|
|
// Add article count to each outlet
|
|
const outletsWithCounts = outlets.map(outlet => ({
|
|
...outlet,
|
|
articleCount: outlet.articles ? outlet.articles.length : 0
|
|
}));
|
|
|
|
res.json(outletsWithCounts);
|
|
} catch (error) {
|
|
console.error("Error fetching outlets:", error);
|
|
res.status(500).json({ error: "Failed to fetch outlets" });
|
|
}
|
|
});
|
|
|
|
app.get("/api/outlets/:id", async (req, res) => {
|
|
try {
|
|
const language = (req.query.language as string) || 'ko';
|
|
console.log(`[routes] GET /api/outlets/${req.params.id} - language: ${language}`);
|
|
const outlet = newsapi.getOutletById(req.params.id, language);
|
|
if (!outlet) {
|
|
return res.status(404).json({ error: "Outlet not found" });
|
|
}
|
|
res.json(outlet);
|
|
} catch (error) {
|
|
console.error("Error fetching outlet:", error);
|
|
res.status(500).json({ error: "Failed to fetch outlet" });
|
|
}
|
|
});
|
|
|
|
app.get("/api/outlets/:id/articles", async (req, res) => {
|
|
try {
|
|
const language = (req.query.language as string) || 'en';
|
|
const articles = await newsapi.getArticlesByOutlet(req.params.id, 50, language);
|
|
res.json(articles);
|
|
} catch (error) {
|
|
console.error("Error fetching articles for outlet:", error);
|
|
res.status(500).json({ error: "Failed to fetch articles" });
|
|
}
|
|
});
|
|
|
|
app.post("/api/outlets", async (req, res) => {
|
|
try {
|
|
const validatedData = insertMediaOutletSchema.parse(req.body);
|
|
const outlet = await storage.createOutlet(validatedData);
|
|
res.status(201).json(outlet);
|
|
} catch (error) {
|
|
console.error("Error creating outlet:", error);
|
|
res.status(400).json({ error: "Invalid outlet data" });
|
|
}
|
|
});
|
|
|
|
app.patch("/api/outlets/:id", async (req, res) => {
|
|
try {
|
|
const partialSchema = insertMediaOutletSchema.omit({ id: true }).partial();
|
|
const validatedData = partialSchema.parse(req.body);
|
|
const outlet = await storage.updateOutlet(req.params.id, validatedData);
|
|
if (!outlet) {
|
|
return res.status(404).json({ error: "Outlet not found" });
|
|
}
|
|
res.json(outlet);
|
|
} catch (error) {
|
|
console.error("Error updating outlet:", error);
|
|
res.status(400).json({ error: "Invalid outlet data" });
|
|
}
|
|
});
|
|
|
|
app.delete("/api/outlets/:id", async (req, res) => {
|
|
try {
|
|
const success = await storage.deleteOutlet(req.params.id);
|
|
if (!success) {
|
|
return res.status(404).json({ error: "Outlet not found" });
|
|
}
|
|
res.status(204).send();
|
|
} catch (error) {
|
|
console.error("Error deleting outlet:", error);
|
|
res.status(500).json({ error: "Failed to delete outlet" });
|
|
}
|
|
});
|
|
|
|
// Articles routes (using news-api)
|
|
app.get("/api/articles", async (req, res) => {
|
|
try {
|
|
const { outlet, featured, limit, language } = req.query;
|
|
const lang = (language as string) || 'en';
|
|
console.log(`[routes] GET /api/articles - language: ${lang}, outlet: ${outlet}`);
|
|
let articles;
|
|
|
|
if (outlet && typeof outlet === 'string') {
|
|
articles = await newsapi.getArticlesByOutlet(outlet, 50, lang);
|
|
} else {
|
|
// For now, return first 50 articles from all outlets
|
|
// TODO: Implement featured articles and pagination
|
|
const allOutlets = newsapi.getOutlets();
|
|
const limitNum = limit ? parseInt(limit as string) : 50;
|
|
const articlePromises = allOutlets.slice(0, 10).map(outlet =>
|
|
newsapi.getArticlesByOutlet(outlet.id, 5, lang)
|
|
);
|
|
const articleArrays = await Promise.all(articlePromises);
|
|
articles = articleArrays.flat().slice(0, limitNum);
|
|
}
|
|
|
|
// Articles already have timeAgo from newsapi.transformArticle
|
|
// Sort by the minutes value in timeAgo (smallest to largest)
|
|
articles.sort((a, b) => {
|
|
const aMin = parseInt(a.timeAgo.split(' ')[0]);
|
|
const bMin = parseInt(b.timeAgo.split(' ')[0]);
|
|
return aMin - bMin;
|
|
});
|
|
|
|
res.json(articles);
|
|
} catch (error) {
|
|
console.error("Error fetching articles:", error);
|
|
res.status(500).json({ error: "Failed to fetch articles" });
|
|
}
|
|
});
|
|
|
|
app.get("/api/articles/:id", async (req, res) => {
|
|
try {
|
|
const language = (req.query.language as string) || 'en';
|
|
const useNewsId = req.query.useNewsId === 'true';
|
|
|
|
console.log(`[routes] GET /api/articles/${req.params.id} - language: ${language}, useNewsId: ${useNewsId}`);
|
|
|
|
let article;
|
|
if (useNewsId) {
|
|
article = await newsapi.getArticleByNewsId(req.params.id, language);
|
|
} else {
|
|
article = await newsapi.getArticleById(req.params.id, language);
|
|
}
|
|
|
|
if (!article) {
|
|
return res.status(404).json({ error: "Article not found" });
|
|
}
|
|
// Article already has timeAgo from newsapi.transformArticle
|
|
res.json(article);
|
|
} catch (error) {
|
|
console.error("Error fetching article:", error);
|
|
res.status(500).json({ error: "Failed to fetch article" });
|
|
}
|
|
});
|
|
|
|
app.post("/api/articles", async (req, res) => {
|
|
try {
|
|
const validatedData = insertArticleSchema.parse(req.body);
|
|
const article = await storage.createArticle(validatedData);
|
|
res.status(201).json(article);
|
|
} catch (error) {
|
|
console.error("Error creating article:", error);
|
|
res.status(400).json({ error: "Invalid article data" });
|
|
}
|
|
});
|
|
|
|
app.patch("/api/articles/:id", async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const { thumbnail } = req.body;
|
|
|
|
if (!thumbnail) {
|
|
return res.status(400).json({ error: "Thumbnail path required" });
|
|
}
|
|
|
|
const updatedArticle = await storage.updateArticleThumbnail(id, thumbnail);
|
|
if (!updatedArticle) {
|
|
return res.status(404).json({ error: "Article not found" });
|
|
}
|
|
|
|
res.json(updatedArticle);
|
|
} catch (error) {
|
|
console.error("Error updating article:", error);
|
|
res.status(500).json({ error: "Failed to update article" });
|
|
}
|
|
});
|
|
|
|
app.delete("/api/articles/:id", async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
|
|
const success = await storage.deleteArticle(id);
|
|
if (!success) {
|
|
return res.status(404).json({ error: "Article not found" });
|
|
}
|
|
|
|
res.status(204).send();
|
|
} catch (error) {
|
|
console.error("Error deleting article:", error);
|
|
res.status(500).json({ error: "Failed to delete article" });
|
|
}
|
|
});
|
|
|
|
// Search routes (using news-api for articles)
|
|
app.get("/api/search", async (req, res) => {
|
|
try {
|
|
const { q, type = 'all', language, outletId } = req.query;
|
|
const lang = (language as string) || 'en';
|
|
if (!q || typeof q !== 'string') {
|
|
return res.status(400).json({ error: "Search query required" });
|
|
}
|
|
|
|
console.log(`[Search] Query: "${q}", Type: ${type}, Language: ${lang}, OutletId: ${outletId || 'none'}`);
|
|
|
|
let articles: any[] = [];
|
|
let outlets: any[] = [];
|
|
|
|
if (type === 'all' || type === 'articles') {
|
|
// If outletId is provided, search within that outlet's articles only
|
|
if (outletId && typeof outletId === 'string') {
|
|
const outletArticles = await newsapi.getArticlesByOutlet(outletId, 1000, lang);
|
|
const searchTerm = q.toLowerCase();
|
|
articles = outletArticles.filter(article => {
|
|
const titleMatch = article.title.toLowerCase().includes(searchTerm);
|
|
const summaryMatch = article.summary && article.summary.toLowerCase().includes(searchTerm);
|
|
const bodyMatch = article.body && article.body.toLowerCase().includes(searchTerm);
|
|
return titleMatch || summaryMatch || bodyMatch;
|
|
}).slice(0, 20);
|
|
console.log(`[Search] Found ${articles.length} articles in outlet ${outletId}`);
|
|
} else {
|
|
articles = await newsapi.searchArticles(q, 20, lang);
|
|
console.log(`[Search] Found ${articles.length} articles globally`);
|
|
}
|
|
}
|
|
|
|
if (type === 'all' || type === 'outlets') {
|
|
// Don't search outlets if we're already filtering by a specific outlet
|
|
if (!outletId) {
|
|
const allOutlets = newsapi.getOutlets();
|
|
const searchTerm = q.toLowerCase();
|
|
outlets = allOutlets.filter(outlet => {
|
|
const nameMatch = outlet.name.toLowerCase().includes(searchTerm);
|
|
const descMatch = outlet.description && outlet.description.toLowerCase().includes(searchTerm);
|
|
|
|
return nameMatch || descMatch;
|
|
});
|
|
console.log(`[Search] Found ${outlets.length} outlets out of ${allOutlets.length} total outlets`);
|
|
|
|
if (outlets.length > 0) {
|
|
console.log(`[Search] Matching outlets: ${outlets.map(o => o.name).join(', ')}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
const result = { articles, outlets };
|
|
console.log(`[Search] Returning ${articles.length} articles and ${outlets.length} outlets`);
|
|
res.json(result);
|
|
} catch (error) {
|
|
console.error("Error searching:", error);
|
|
res.status(500).json({ error: "Failed to search" });
|
|
}
|
|
});
|
|
|
|
// AI-powered image generation for articles
|
|
app.post("/api/generate-thumbnail", async (req, res) => {
|
|
try {
|
|
const { description, aspectRatio = "16:9" } = req.body;
|
|
|
|
if (!description) {
|
|
return res.status(400).json({ error: "Description required" });
|
|
}
|
|
|
|
const sizeMap = {
|
|
"16:9": "1792x1024",
|
|
"1:1": "1024x1024",
|
|
"9:16": "1024x1792"
|
|
};
|
|
|
|
const size = sizeMap[aspectRatio as keyof typeof sizeMap] || "1792x1024";
|
|
|
|
const response = await openai.images.generate({
|
|
model: "dall-e-3",
|
|
prompt: `Professional news article thumbnail: ${description}. High quality, modern, clean composition suitable for mobile news app.`,
|
|
n: 1,
|
|
size: size as any,
|
|
quality: "standard",
|
|
});
|
|
|
|
// Robust handling of OpenAI response
|
|
const imageData = response.data?.[0];
|
|
if (!imageData) {
|
|
throw new Error('No image data received from OpenAI');
|
|
}
|
|
|
|
const imageUrl = imageData.url || imageData.b64_json;
|
|
if (!imageUrl) {
|
|
throw new Error('No image URL or data received');
|
|
}
|
|
|
|
res.json({
|
|
url: imageData.url || `data:image/png;base64,${imageData.b64_json}`,
|
|
format: imageData.url ? 'url' : 'base64'
|
|
});
|
|
} catch (error) {
|
|
console.error("Error generating thumbnail:", error);
|
|
res.status(500).json({ error: "Failed to generate thumbnail" });
|
|
}
|
|
});
|
|
|
|
// Generate thumbnail for a specific article
|
|
app.post("/api/articles/:id/generate-thumbnail", async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
|
|
// Get the article
|
|
const article = await storage.getArticleById(id);
|
|
if (!article) {
|
|
return res.status(404).json({ error: "Article not found" });
|
|
}
|
|
|
|
// Check if article doesn't need a thumbnail (already has one that's not a placeholder)
|
|
if (!needsThumbnail(article)) {
|
|
return res.json({
|
|
message: "Article already has a thumbnail",
|
|
thumbnailPath: article.thumbnail
|
|
});
|
|
}
|
|
|
|
// Get the outlet for context
|
|
const outlet = await storage.getOutletById(article.outletId);
|
|
|
|
// Generate image prompt
|
|
const prompt = await generateImagePrompt(article, outlet || undefined);
|
|
|
|
// Generate image with OpenAI
|
|
const response = await openai.images.generate({
|
|
model: "dall-e-3",
|
|
prompt,
|
|
n: 1,
|
|
size: "1792x1024",
|
|
quality: "standard",
|
|
});
|
|
|
|
const imageData = response.data?.[0];
|
|
if (!imageData) {
|
|
throw new Error('No image data received from OpenAI');
|
|
}
|
|
|
|
const imageUrl = imageData.url || (imageData.b64_json ? `data:image/png;base64,${imageData.b64_json}` : null);
|
|
if (!imageUrl) {
|
|
throw new Error('No image URL or data received from OpenAI');
|
|
}
|
|
|
|
// Create filename
|
|
const timestamp = Date.now();
|
|
const randomId = Math.random().toString(36).substring(2, 8);
|
|
const filename = `${article.title.replace(/[^a-zA-Z0-9]/g, '_').substring(0, 50)}_${timestamp}_${randomId}`;
|
|
|
|
// Save image and get thumbnail path
|
|
const thumbnailPath = await saveGeneratedImage(imageUrl, filename);
|
|
|
|
// Update article with new thumbnail
|
|
const updatedArticle = await storage.updateArticleThumbnail(id, thumbnailPath);
|
|
if (!updatedArticle) {
|
|
throw new Error('Failed to update article thumbnail');
|
|
}
|
|
|
|
res.json({
|
|
success: true,
|
|
thumbnailPath,
|
|
message: "Thumbnail generated and saved successfully"
|
|
});
|
|
|
|
} catch (error) {
|
|
console.error("Error generating article thumbnail:", error);
|
|
res.status(500).json({ error: "Failed to generate thumbnail" });
|
|
}
|
|
});
|
|
|
|
// Generate thumbnails for all articles that need them
|
|
app.post("/api/articles/generate-thumbnails", async (req, res) => {
|
|
try {
|
|
const { limit = 10 } = req.body; // Limit to prevent overwhelming the API
|
|
|
|
// Get all articles
|
|
const articles = await storage.getAllArticles();
|
|
|
|
// Filter articles that need thumbnails
|
|
const articlesNeedingThumbnails = articles
|
|
.filter(needsThumbnail)
|
|
.slice(0, Math.min(limit, 20)); // Cap at 20 to prevent abuse
|
|
|
|
if (articlesNeedingThumbnails.length === 0) {
|
|
return res.json({
|
|
success: true,
|
|
message: "No articles need thumbnail generation",
|
|
processed: 0
|
|
});
|
|
}
|
|
|
|
const results = [];
|
|
const errors = [];
|
|
|
|
// Process articles sequentially to avoid overwhelming OpenAI API
|
|
for (const article of articlesNeedingThumbnails) {
|
|
try {
|
|
// Get the outlet for context
|
|
const outlet = await storage.getOutletById(article.outletId);
|
|
|
|
// Generate image prompt
|
|
const prompt = await generateImagePrompt(article, outlet || undefined);
|
|
|
|
// Generate image with OpenAI
|
|
const response = await openai.images.generate({
|
|
model: "dall-e-3",
|
|
prompt,
|
|
n: 1,
|
|
size: "1792x1024",
|
|
quality: "standard",
|
|
});
|
|
|
|
const imageData = response.data?.[0];
|
|
if (!imageData) {
|
|
throw new Error('No image data received from OpenAI');
|
|
}
|
|
|
|
const imageUrl = imageData.url || (imageData.b64_json ? `data:image/png;base64,${imageData.b64_json}` : null);
|
|
if (!imageUrl) {
|
|
throw new Error('No image URL or data received from OpenAI');
|
|
}
|
|
|
|
// Create filename
|
|
const timestamp = Date.now();
|
|
const randomId = Math.random().toString(36).substring(2, 8);
|
|
const filename = `${article.title.replace(/[^a-zA-Z0-9]/g, '_').substring(0, 50)}_${timestamp}_${randomId}`;
|
|
|
|
// Save image and get thumbnail path
|
|
const thumbnailPath = await saveGeneratedImage(imageUrl, filename);
|
|
|
|
// Update article with new thumbnail
|
|
await storage.updateArticleThumbnail(article.id, thumbnailPath);
|
|
|
|
results.push({
|
|
articleId: article.id,
|
|
title: article.title,
|
|
thumbnailPath,
|
|
success: true
|
|
});
|
|
|
|
// Small delay to avoid rate limiting
|
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
|
|
} catch (error) {
|
|
console.error(`Error generating thumbnail for article ${article.id}:`, error);
|
|
errors.push({
|
|
articleId: article.id,
|
|
title: article.title,
|
|
error: error instanceof Error ? error.message : 'Unknown error'
|
|
});
|
|
}
|
|
}
|
|
|
|
res.json({
|
|
success: true,
|
|
processed: results.length,
|
|
failed: errors.length,
|
|
results,
|
|
errors,
|
|
message: `Generated ${results.length} thumbnails, ${errors.length} failed`
|
|
});
|
|
|
|
} catch (error) {
|
|
console.error("Error in batch thumbnail generation:", error);
|
|
res.status(500).json({ error: "Failed to generate thumbnails" });
|
|
}
|
|
});
|
|
|
|
// AI-powered article enhancement
|
|
app.post("/api/enhance-article", async (req, res) => {
|
|
try {
|
|
const { title, summary, content } = req.body;
|
|
|
|
if (!title || !content) {
|
|
return res.status(400).json({ error: "Title and content required" });
|
|
}
|
|
|
|
const response = await openai.chat.completions.create({
|
|
model: "gpt-4o", // Using gpt-4o as it's widely available
|
|
messages: [
|
|
{
|
|
role: "system",
|
|
content: "You are a professional news editor. Enhance the given article by improving clarity, flow, and journalistic quality while maintaining the original facts and tone. Respond with JSON containing enhanced title, summary, and body."
|
|
},
|
|
{
|
|
role: "user",
|
|
content: `Please enhance this article:
|
|
Title: ${title}
|
|
Summary: ${summary || ""}
|
|
Content: ${content}
|
|
|
|
Provide enhanced versions in JSON format: { "title": "...", "summary": "...", "body": "..." }`
|
|
}
|
|
],
|
|
response_format: { type: "json_object" },
|
|
});
|
|
|
|
// Robust parsing with validation
|
|
const messageContent = response.choices?.[0]?.message?.content;
|
|
if (!messageContent) {
|
|
throw new Error('No content received from OpenAI');
|
|
}
|
|
|
|
let enhanced;
|
|
try {
|
|
enhanced = JSON.parse(messageContent);
|
|
} catch (parseError) {
|
|
console.error('Failed to parse OpenAI response:', messageContent);
|
|
throw new Error('Invalid JSON response from AI');
|
|
}
|
|
|
|
// Validate required fields
|
|
const result = {
|
|
title: enhanced.title || title,
|
|
summary: enhanced.summary || summary || '',
|
|
body: enhanced.body || content
|
|
};
|
|
|
|
res.json(result);
|
|
} catch (error) {
|
|
console.error("Error enhancing article:", error);
|
|
res.status(500).json({ error: "Failed to enhance article" });
|
|
}
|
|
});
|
|
|
|
// Generate AI-powered summary from article content
|
|
app.post("/api/generate-summary", async (req, res) => {
|
|
try {
|
|
const { content, title } = req.body;
|
|
|
|
if (!content) {
|
|
return res.status(400).json({ error: "Content is required" });
|
|
}
|
|
|
|
// Generate AI-powered summary with fallback to basic method
|
|
let summary = '';
|
|
|
|
try {
|
|
const response = await openai.chat.completions.create({
|
|
model: "gpt-4o-mini", // Using mini model for cost-effectiveness
|
|
messages: [
|
|
{
|
|
role: "system",
|
|
content: "You are a professional news summarizer. Create a concise, engaging summary (maximum 2-3 sentences, 150 characters max) that captures the key points and newsworthiness of the article. Focus on who, what, when, where, why. Make it compelling but factual."
|
|
},
|
|
{
|
|
role: "user",
|
|
content: `Title: ${title || "No title"}
|
|
Article Content: ${content.substring(0, 2000)}`
|
|
}
|
|
],
|
|
max_tokens: 100,
|
|
temperature: 0.3 // Lower temperature for more consistent, factual output
|
|
});
|
|
|
|
const aiSummary = response.choices?.[0]?.message?.content?.trim();
|
|
if (aiSummary && aiSummary.length > 10) {
|
|
summary = aiSummary;
|
|
} else {
|
|
throw new Error('AI summary too short or empty');
|
|
}
|
|
} catch (aiError) {
|
|
console.warn("AI summary generation failed, falling back to basic method:", aiError);
|
|
|
|
// Fallback to basic summary generation (improved version)
|
|
const sentences = content.replace(/^This article was originally published at .+?\.\n\n/, '').split(/[.!?]+/);
|
|
let basicSummary = '';
|
|
|
|
for (const sentence of sentences) {
|
|
const trimmed = sentence.trim();
|
|
if (trimmed.length < 10) continue; // Skip very short sentences
|
|
if (basicSummary.length + trimmed.length > 150) break;
|
|
basicSummary += (basicSummary ? '. ' : '') + trimmed;
|
|
}
|
|
|
|
summary = basicSummary + (basicSummary ? '.' : 'Content not available.');
|
|
}
|
|
|
|
res.json({ summary: summary.substring(0, 150) }); // Ensure max 150 chars
|
|
} catch (error) {
|
|
console.error("Error generating summary:", error);
|
|
|
|
// Final fallback - return a generic summary
|
|
const basicSummary = req.body.content.substring(0, 100).trim() + '...';
|
|
res.json({ summary: basicSummary });
|
|
}
|
|
});
|
|
|
|
// AI-powered Text-to-Speech for articles with professional anchor style
|
|
app.post("/api/articles/:id/speech", async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const { voice = 'nova', speed = 1.0 } = req.body;
|
|
|
|
// Get the article
|
|
const article = await storage.getArticleById(id);
|
|
if (!article) {
|
|
return res.status(404).json({ error: "Article not found" });
|
|
}
|
|
|
|
// Prepare the text for speech synthesis with professional anchor style
|
|
const speechText = `Here's today's news from ${article.outletName || 'our newsroom'}. ${article.title}.
|
|
|
|
${article.summary || ''}
|
|
|
|
${article.body || 'Content not available.'}
|
|
|
|
This has been your news update. Thank you for listening.`;
|
|
|
|
// Use OpenAI's text-to-speech with professional voice settings
|
|
const response = await openai.audio.speech.create({
|
|
model: "tts-1-hd", // High-quality model for professional sound
|
|
voice: voice as any, // nova, alloy, echo, fable, onyx, shimmer
|
|
input: speechText.substring(0, 4000), // Limit text length for API
|
|
speed: Math.max(0.25, Math.min(4.0, speed)), // Ensure speed is within valid range
|
|
response_format: "mp3"
|
|
});
|
|
|
|
// Stream the audio response
|
|
res.setHeader('Content-Type', 'audio/mpeg');
|
|
res.setHeader('Content-Disposition', `inline; filename="article_${id}_speech.mp3"`);
|
|
res.setHeader('Cache-Control', 'public, max-age=3600'); // Cache for 1 hour
|
|
|
|
// Convert response to buffer and send
|
|
const buffer = Buffer.from(await response.arrayBuffer());
|
|
res.send(buffer);
|
|
|
|
} catch (error) {
|
|
console.error("Error generating speech:", error);
|
|
res.status(500).json({
|
|
error: "Failed to generate speech",
|
|
details: error instanceof Error ? error.message : 'Unknown error'
|
|
});
|
|
}
|
|
});
|
|
|
|
// Statistics route
|
|
app.get("/api/stats", async (req, res) => {
|
|
try {
|
|
const outlets = await storage.getAllOutlets();
|
|
const articles = await storage.getAllArticles();
|
|
|
|
const categoryStats = outlets.reduce((acc, outlet) => {
|
|
acc[outlet.category] = (acc[outlet.category] || 0) + 1;
|
|
return acc;
|
|
}, {} as Record<string, number>);
|
|
|
|
res.json({
|
|
totalOutlets: outlets.length,
|
|
totalArticles: articles.length,
|
|
categoriesBreakdown: categoryStats,
|
|
recentArticlesCount: articles.slice(0, 5).length
|
|
});
|
|
} catch (error) {
|
|
console.error("Error fetching stats:", error);
|
|
res.status(500).json({ error: "Failed to fetch statistics" });
|
|
}
|
|
});
|
|
|
|
// YouTube-style feed endpoint
|
|
app.get("/api/feed", async (req, res) => {
|
|
try {
|
|
const { cursor, limit, filter } = req.query;
|
|
const params = {
|
|
cursor: cursor as string,
|
|
limit: limit ? parseInt(limit as string) : 10,
|
|
filter: (filter as 'all' | 'people' | 'topics' | 'companies') || 'all'
|
|
};
|
|
|
|
const feed = await storage.listFeed(params);
|
|
res.json(feed);
|
|
} catch (error) {
|
|
console.error("Error fetching feed:", error);
|
|
res.status(500).json({ error: "Failed to fetch feed" });
|
|
}
|
|
});
|
|
|
|
// Increment article view count
|
|
app.post("/api/articles/:id/view", async (req, res) => {
|
|
try {
|
|
await storage.incrementView(req.params.id);
|
|
res.json({ success: true });
|
|
} catch (error) {
|
|
console.error("Error incrementing view:", error);
|
|
res.status(500).json({ error: "Failed to increment view" });
|
|
}
|
|
});
|
|
|
|
// Remove duplicate articles within outlets
|
|
app.post("/api/articles/remove-duplicates", async (req, res) => {
|
|
try {
|
|
console.log("Starting duplicate article removal...");
|
|
const result = await storage.removeDuplicateArticles();
|
|
|
|
res.json({
|
|
success: true,
|
|
removedCount: result.removedCount,
|
|
details: result.details,
|
|
message: `Successfully removed ${result.removedCount} duplicate articles across ${result.details.length} outlets`
|
|
});
|
|
} catch (error) {
|
|
console.error("Error removing duplicate articles:", error);
|
|
res.status(500).json({
|
|
error: "Failed to remove duplicate articles",
|
|
details: error instanceof Error ? error.message : 'Unknown error'
|
|
});
|
|
}
|
|
});
|
|
|
|
// Comment API routes
|
|
app.get("/api/articles/:articleId/comments", async (req, res) => {
|
|
try {
|
|
const { articleId } = req.params;
|
|
const { limit = 10, offset = 0 } = req.query;
|
|
|
|
const result = await storage.getCommentsByArticle(articleId, {
|
|
limit: parseInt(limit as string),
|
|
offset: parseInt(offset as string)
|
|
});
|
|
|
|
res.json(result);
|
|
} catch (error) {
|
|
console.error("Error fetching comments:", error);
|
|
res.status(500).json({ error: "Failed to fetch comments" });
|
|
}
|
|
});
|
|
|
|
app.get("/api/comments/:commentId/replies", async (req, res) => {
|
|
try {
|
|
const { commentId } = req.params;
|
|
const { limit = 5, offset = 0 } = req.query;
|
|
|
|
const result = await storage.getCommentReplies(commentId, {
|
|
limit: parseInt(limit as string),
|
|
offset: parseInt(offset as string)
|
|
});
|
|
|
|
res.json(result);
|
|
} catch (error) {
|
|
console.error("Error fetching replies:", error);
|
|
res.status(500).json({ error: "Failed to fetch replies" });
|
|
}
|
|
});
|
|
|
|
app.post("/api/articles/:articleId/comments", async (req, res) => {
|
|
try {
|
|
const { articleId } = req.params;
|
|
const { content, nickname, avatar, parentId } = req.body;
|
|
|
|
if (!content || !nickname) {
|
|
return res.status(400).json({ error: "Content and nickname are required" });
|
|
}
|
|
|
|
const comment = await storage.createComment({
|
|
articleId,
|
|
content,
|
|
nickname,
|
|
avatar,
|
|
parentId: parentId || null
|
|
});
|
|
|
|
res.json(comment);
|
|
} catch (error) {
|
|
console.error("Error creating comment:", error);
|
|
res.status(500).json({ error: "Failed to create comment" });
|
|
}
|
|
});
|
|
|
|
app.post("/api/comments/:commentId/reactions", async (req, res) => {
|
|
try {
|
|
const { commentId } = req.params;
|
|
const { reactionType, userIdentifier } = req.body;
|
|
|
|
if (!reactionType || !userIdentifier) {
|
|
return res.status(400).json({ error: "Reaction type and user identifier are required" });
|
|
}
|
|
|
|
if (!['like', 'dislike'].includes(reactionType)) {
|
|
return res.status(400).json({ error: "Invalid reaction type" });
|
|
}
|
|
|
|
const result = await storage.toggleCommentReaction(commentId, userIdentifier, reactionType);
|
|
res.json(result);
|
|
} catch (error) {
|
|
console.error("Error toggling reaction:", error);
|
|
res.status(500).json({ error: "Failed to toggle reaction" });
|
|
}
|
|
});
|
|
|
|
app.get("/api/comments/:commentId/reactions/:userIdentifier", async (req, res) => {
|
|
try {
|
|
const { commentId, userIdentifier } = req.params;
|
|
|
|
const reaction = await storage.getUserCommentReaction(commentId, userIdentifier);
|
|
res.json(reaction || null);
|
|
} catch (error) {
|
|
console.error("Error fetching user reaction:", error);
|
|
res.status(500).json({ error: "Failed to fetch user reaction" });
|
|
}
|
|
});
|
|
|
|
// Web scraping endpoint for adding new articles from URLs
|
|
app.post("/api/scrape", async (req, res) => {
|
|
try {
|
|
const { url, outletId } = req.body;
|
|
|
|
if (!url || !outletId) {
|
|
return res.status(400).json({ error: "URL and outletId are required" });
|
|
}
|
|
|
|
// Fetch the web page content
|
|
const response = await fetch(url);
|
|
const html = await response.text();
|
|
|
|
// Parse content using simple regex patterns (basic implementation)
|
|
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
|
|
const metaDescMatch = html.match(/<meta[^>]*name=["\']description["\'][^>]*content=["\']([^"']+)["\'][^>]*>/i);
|
|
|
|
// Extract title and clean it
|
|
let title = titleMatch ? titleMatch[1].trim() : 'Scraped Article';
|
|
title = title.replace(/\s+/g, ' ').substring(0, 200); // Clean and limit length
|
|
|
|
// Extract description/summary
|
|
let summary = metaDescMatch ? metaDescMatch[1].trim() : 'Article scraped from ' + new URL(url).hostname;
|
|
summary = summary.replace(/\s+/g, ' ').substring(0, 500);
|
|
|
|
// Create basic article body from summary
|
|
let body = summary;
|
|
if (body.length < 200) {
|
|
body += `\n\nThis content discusses recent developments and market insights related to the subject matter. The article provides detailed analysis and current information about the industry trends and key developments.`;
|
|
}
|
|
|
|
// Generate a simple thumbnail placeholder (we'll improve this later)
|
|
const defaultThumbnail = '/api/assets/default-article.png';
|
|
|
|
// Create the article
|
|
const articleData = {
|
|
outletId,
|
|
title,
|
|
summary,
|
|
body,
|
|
thumbnail: defaultThumbnail,
|
|
publishedAt: generateVariedPublishedAt(title + summary),
|
|
tags: [] as string[],
|
|
viewCount: 0
|
|
};
|
|
|
|
const article = await storage.createArticle(articleData);
|
|
|
|
res.json({
|
|
success: true,
|
|
article,
|
|
message: `Successfully scraped and added article: ${title}`
|
|
});
|
|
|
|
} catch (error) {
|
|
console.error("Error scraping article:", error);
|
|
res.status(500).json({
|
|
error: "Failed to scrape article",
|
|
details: error instanceof Error ? error.message : 'Unknown error'
|
|
});
|
|
}
|
|
});
|
|
|
|
// Batch scraping endpoint for multiple URLs
|
|
app.post("/api/scrape-batch", async (req, res) => {
|
|
try {
|
|
const { urls, outletId } = req.body;
|
|
|
|
if (!urls || !Array.isArray(urls) || !outletId) {
|
|
return res.status(400).json({ error: "URLs array and outletId are required" });
|
|
}
|
|
|
|
const results = [];
|
|
const errors = [];
|
|
|
|
// Process URLs one by one to avoid overwhelming the server
|
|
for (const url of urls.slice(0, 10)) { // Limit to 10 URLs per batch
|
|
try {
|
|
// Fetch and parse each URL
|
|
const response = await fetch(url);
|
|
const html = await response.text();
|
|
|
|
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
|
|
const metaDescMatch = html.match(/<meta[^>]*name=["\']description["\'][^>]*content=["\']([^"']+)["\'][^>]*>/i);
|
|
|
|
let title = titleMatch ? titleMatch[1].trim() : `Article from ${new URL(url).hostname}`;
|
|
title = title.replace(/\s+/g, ' ').substring(0, 200);
|
|
|
|
let summary = metaDescMatch ? metaDescMatch[1].trim() : `Article scraped from ${new URL(url).hostname}`;
|
|
summary = summary.replace(/\s+/g, ' ').substring(0, 500);
|
|
|
|
let body = summary;
|
|
if (body.length < 200) {
|
|
body += `\n\nThis content provides insights and analysis on current industry developments and trends.`;
|
|
}
|
|
|
|
const articleData = {
|
|
outletId,
|
|
title,
|
|
summary,
|
|
body,
|
|
thumbnail: '/api/assets/default-article.png',
|
|
publishedAt: generateVariedPublishedAt(title + summary),
|
|
tags: [] as string[],
|
|
viewCount: 0
|
|
};
|
|
|
|
const article = await storage.createArticle(articleData);
|
|
results.push({ url, article, success: true });
|
|
|
|
// Small delay to be respectful to target servers
|
|
await new Promise(resolve => setTimeout(resolve, 500));
|
|
|
|
} catch (error) {
|
|
console.error(`Error scraping ${url}:`, error);
|
|
errors.push({
|
|
url,
|
|
error: error instanceof Error ? error.message : 'Unknown error'
|
|
});
|
|
}
|
|
}
|
|
|
|
res.json({
|
|
success: true,
|
|
results,
|
|
errors,
|
|
message: `Processed ${results.length} articles successfully, ${errors.length} errors`
|
|
});
|
|
|
|
} catch (error) {
|
|
console.error("Error in batch scraping:", error);
|
|
res.status(500).json({
|
|
error: "Failed to process batch scraping",
|
|
details: error instanceof Error ? error.message : 'Unknown error'
|
|
});
|
|
}
|
|
});
|
|
|
|
// Advanced web scraping routes
|
|
app.post("/api/scrape/advanced", async (req, res) => {
|
|
try {
|
|
const { urls, outletId, maxConcurrency = 3 } = req.body;
|
|
|
|
if (!urls || !Array.isArray(urls) || urls.length === 0) {
|
|
return res.status(400).json({ error: "URLs array is required" });
|
|
}
|
|
|
|
if (!outletId) {
|
|
return res.status(400).json({ error: "Outlet ID is required" });
|
|
}
|
|
|
|
// Verify outlet exists
|
|
const outlet = await storage.getOutletById(outletId);
|
|
if (!outlet) {
|
|
return res.status(404).json({ error: "Outlet not found" });
|
|
}
|
|
|
|
const scraper = new WebScraper();
|
|
console.log(`Starting advanced scraping of ${urls.length} URLs for outlet ${outlet.name}`);
|
|
|
|
const results = [];
|
|
const errors = [];
|
|
|
|
// Process URLs in batches with concurrency control
|
|
const { successes: scrapedArticles, failures: scrapingFailures } = await scraper.scrapeMultipleArticles(urls, maxConcurrency);
|
|
|
|
// Add scraping failures to errors
|
|
for (const failure of scrapingFailures) {
|
|
errors.push({
|
|
url: failure.url,
|
|
error: failure.error
|
|
});
|
|
}
|
|
|
|
for (const scrapedArticle of scrapedArticles) {
|
|
try {
|
|
// Download and process images if available
|
|
let thumbnailPath = '/api/assets/default-article.png';
|
|
if (scrapedArticle.imageUrl) {
|
|
try {
|
|
const imageFilename = `scraped_${Date.now()}_${Math.random().toString(36).substring(7)}.jpg`;
|
|
const downloadedImage = await scraper.downloadImage(scrapedArticle.imageUrl, imageFilename);
|
|
if (downloadedImage) {
|
|
// Create thumbnail
|
|
const thumbnailFilename = `thumb_${imageFilename}`;
|
|
const thumbnailSuccess = await scraper.createThumbnail(
|
|
join(process.cwd(), 'attached_assets', 'scraped', imageFilename),
|
|
join(process.cwd(), 'attached_assets', 'scraped', thumbnailFilename),
|
|
300
|
|
);
|
|
if (thumbnailSuccess) {
|
|
thumbnailPath = `/api/assets/scraped/${thumbnailFilename}`;
|
|
}
|
|
}
|
|
} catch (imageError) {
|
|
console.warn(`Failed to process image for ${scrapedArticle.url}:`, imageError);
|
|
}
|
|
}
|
|
|
|
// Create article with scraped data
|
|
const articleData = {
|
|
outletId,
|
|
title: scrapedArticle.title.substring(0, 200),
|
|
summary: scrapedArticle.summary.substring(0, 500),
|
|
body: scrapedArticle.body,
|
|
thumbnail: thumbnailPath,
|
|
publishedAt: scrapedArticle.publishedAt,
|
|
tags: scrapedArticle.tags,
|
|
viewCount: 0,
|
|
sourceUrl: scrapedArticle.url,
|
|
author: scrapedArticle.author || null,
|
|
originalImageUrl: scrapedArticle.imageUrl || null,
|
|
scrapedAt: new Date(),
|
|
isScraped: 1
|
|
};
|
|
|
|
const article = await storage.createArticle(articleData);
|
|
results.push({
|
|
url: scrapedArticle.url,
|
|
article: {
|
|
id: article.id,
|
|
title: article.title,
|
|
summary: article.summary
|
|
},
|
|
success: true
|
|
});
|
|
|
|
} catch (error) {
|
|
console.error(`Error saving scraped article from ${scrapedArticle.url}:`, error);
|
|
errors.push({
|
|
url: scrapedArticle.url,
|
|
error: error instanceof Error ? error.message : 'Failed to save article'
|
|
});
|
|
}
|
|
}
|
|
|
|
res.json({
|
|
success: true,
|
|
results,
|
|
errors,
|
|
message: `Successfully scraped and saved ${results.length} articles, ${errors.length} errors`,
|
|
outlet: outlet.name,
|
|
totalProcessed: urls.length
|
|
});
|
|
|
|
} catch (error) {
|
|
console.error("Error in advanced scraping:", error);
|
|
res.status(500).json({
|
|
error: "Failed to process advanced scraping",
|
|
details: error instanceof Error ? error.message : 'Unknown error'
|
|
});
|
|
}
|
|
});
|
|
|
|
// Batch scrape all outlets from attached file
|
|
app.post("/api/scrape/batch-all", async (req, res) => {
|
|
try {
|
|
const { maxPerOutlet = 5, maxConcurrency = 2 } = req.body;
|
|
|
|
console.log("Starting batch scraping of all outlets from attached file...");
|
|
|
|
// Parse the attached outlet file
|
|
const parsedOutlets = parseAttachedOutletFile();
|
|
console.log(`Parsed ${parsedOutlets.total} outlets from file`);
|
|
|
|
// Convert to outlet format
|
|
const outletsData = OutletParser.convertToOutletFormat(parsedOutlets);
|
|
|
|
const scraper = new WebScraper();
|
|
const allResults = [];
|
|
const allErrors = [];
|
|
|
|
let processedOutlets = 0;
|
|
|
|
for (const outletData of outletsData) {
|
|
try {
|
|
processedOutlets++;
|
|
console.log(`Processing outlet ${processedOutlets}/${outletsData.length}: ${outletData.name}`);
|
|
|
|
// Create or update outlet
|
|
let outlet;
|
|
try {
|
|
outlet = await storage.getOutletById(outletData.id);
|
|
if (!outlet) {
|
|
outlet = await storage.createOutlet({
|
|
name: outletData.name,
|
|
description: outletData.description,
|
|
category: outletData.category,
|
|
focusSubject: outletData.focusSubject,
|
|
avatar: outletData.avatar,
|
|
profileImage: outletData.profileImage,
|
|
bio: outletData.bio,
|
|
fullBio: outletData.fullBio
|
|
});
|
|
}
|
|
} catch (outletError) {
|
|
console.error(`Error creating/updating outlet ${outletData.name}:`, outletError);
|
|
allErrors.push({
|
|
outlet: outletData.name,
|
|
error: `Failed to create outlet: ${outletError instanceof Error ? outletError.message : 'Unknown error'}`
|
|
});
|
|
continue;
|
|
}
|
|
|
|
// Take limited number of URLs per outlet
|
|
const urlsToScrape = outletData.urls.slice(0, maxPerOutlet);
|
|
console.log(`Scraping ${urlsToScrape.length} URLs for ${outlet.name}`);
|
|
|
|
// Scrape articles for this outlet
|
|
const { successes: scrapedArticles, failures: scrapingFailures } = await scraper.scrapeMultipleArticles(urlsToScrape, maxConcurrency);
|
|
|
|
// Add scraping failures to errors
|
|
for (const failure of scrapingFailures) {
|
|
allErrors.push({
|
|
outlet: outlet.name,
|
|
url: failure.url,
|
|
error: failure.error
|
|
});
|
|
}
|
|
|
|
let savedCount = 0;
|
|
|
|
for (const scrapedArticle of scrapedArticles) {
|
|
try {
|
|
// Process thumbnail
|
|
let thumbnailPath = '/api/assets/default-article.png';
|
|
if (scrapedArticle.imageUrl) {
|
|
try {
|
|
const imageFilename = `scraped_${outletData.id}_${Date.now()}_${Math.random().toString(36).substring(7)}.jpg`;
|
|
const downloadedImage = await scraper.downloadImage(scrapedArticle.imageUrl, imageFilename);
|
|
if (downloadedImage) {
|
|
const thumbnailFilename = `thumb_${imageFilename}`;
|
|
const thumbnailSuccess = await scraper.createThumbnail(
|
|
join(process.cwd(), 'attached_assets', 'scraped', imageFilename),
|
|
join(process.cwd(), 'attached_assets', 'scraped', thumbnailFilename),
|
|
300
|
|
);
|
|
if (thumbnailSuccess) {
|
|
thumbnailPath = `/api/assets/scraped/${thumbnailFilename}`;
|
|
}
|
|
}
|
|
} catch (imageError) {
|
|
console.warn(`Failed to process image for ${scrapedArticle.url}:`, imageError);
|
|
}
|
|
}
|
|
|
|
// Save article
|
|
const articleData = {
|
|
outletId: outlet.id,
|
|
title: scrapedArticle.title.substring(0, 200),
|
|
summary: scrapedArticle.summary.substring(0, 500),
|
|
body: scrapedArticle.body,
|
|
thumbnail: thumbnailPath,
|
|
publishedAt: scrapedArticle.publishedAt,
|
|
tags: scrapedArticle.tags,
|
|
viewCount: 0,
|
|
sourceUrl: scrapedArticle.url,
|
|
author: scrapedArticle.author || null,
|
|
originalImageUrl: scrapedArticle.imageUrl || null,
|
|
scrapedAt: new Date(),
|
|
isScraped: 1
|
|
};
|
|
|
|
const article = await storage.createArticle(articleData);
|
|
savedCount++;
|
|
|
|
allResults.push({
|
|
outlet: outlet.name,
|
|
url: scrapedArticle.url,
|
|
articleTitle: article.title,
|
|
success: true
|
|
});
|
|
|
|
} catch (error) {
|
|
console.error(`Error saving article from ${scrapedArticle.url}:`, error);
|
|
allErrors.push({
|
|
outlet: outlet.name,
|
|
url: scrapedArticle.url,
|
|
error: error instanceof Error ? error.message : 'Failed to save article'
|
|
});
|
|
}
|
|
}
|
|
|
|
console.log(`Completed ${outlet.name}: ${savedCount} articles saved`);
|
|
|
|
// Small delay between outlets to be respectful
|
|
if (processedOutlets < outletsData.length) {
|
|
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
}
|
|
|
|
} catch (error) {
|
|
console.error(`Error processing outlet ${outletData.name}:`, error);
|
|
allErrors.push({
|
|
outlet: outletData.name,
|
|
error: error instanceof Error ? error.message : 'Failed to process outlet'
|
|
});
|
|
}
|
|
}
|
|
|
|
res.json({
|
|
success: true,
|
|
summary: {
|
|
totalOutlets: outletsData.length,
|
|
processedOutlets,
|
|
totalArticlesSaved: allResults.length,
|
|
totalErrors: allErrors.length
|
|
},
|
|
results: allResults,
|
|
errors: allErrors,
|
|
message: `Batch scraping completed! Processed ${processedOutlets} outlets, saved ${allResults.length} articles`
|
|
});
|
|
|
|
} catch (error) {
|
|
console.error("Error in batch scraping:", error);
|
|
res.status(500).json({
|
|
error: "Failed to process batch scraping",
|
|
details: error instanceof Error ? error.message : 'Unknown error'
|
|
});
|
|
}
|
|
});
|
|
|
|
// Get scraped articles
|
|
app.get("/api/scraped-articles", async (req, res) => {
|
|
try {
|
|
const { limit = 50 } = req.query;
|
|
const limitNum = parseInt(limit as string);
|
|
const articles = await storage.getScrapedArticles(limitNum);
|
|
res.json(articles);
|
|
} catch (error) {
|
|
console.error("Error fetching scraped articles:", error);
|
|
res.status(500).json({ error: "Failed to fetch scraped articles" });
|
|
}
|
|
});
|
|
|
|
// Parse outlet file and scrape all 195 links
|
|
app.post("/api/scrape/outlet-file", async (req, res) => {
|
|
try {
|
|
const { filePath } = req.body;
|
|
|
|
if (!filePath) {
|
|
return res.status(400).json({ error: "File path is required" });
|
|
}
|
|
|
|
// Parse the outlet file
|
|
console.log('Parsing outlet file for scraping...');
|
|
const outletFilePath = join(process.cwd(), filePath);
|
|
const parsedOutlets = OutletParser.parseOutletFile(outletFilePath);
|
|
|
|
console.log(`Parsed ${parsedOutlets.total} outlets with links to scrape`);
|
|
|
|
const scraper = new WebScraper();
|
|
const allResults = [];
|
|
const allErrors = [];
|
|
let processedOutlets = 0;
|
|
|
|
// Process each category (people, topics, companies)
|
|
for (const category of ['people', 'topics', 'companies'] as const) {
|
|
const outlets = parsedOutlets[category];
|
|
|
|
for (const outletData of outlets) {
|
|
try {
|
|
processedOutlets++;
|
|
console.log(`Processing ${category} outlet: ${outletData.name} (${outletData.urls.length} URLs)`);
|
|
|
|
// Find existing outlet or create new one
|
|
let outlet = await storage.getOutletById(outletData.focusSubject);
|
|
|
|
if (!outlet) {
|
|
console.log(`Creating new outlet: ${outletData.name}`);
|
|
// Create outlet if it doesn't exist
|
|
outlet = await storage.createOutlet({
|
|
name: outletData.name,
|
|
description: `Specialized coverage focusing on ${outletData.name}`,
|
|
category: outletData.category,
|
|
focusSubject: outletData.name,
|
|
bio: `Comprehensive coverage and analysis focusing on ${outletData.name}`,
|
|
avatar: null,
|
|
profileImage: null,
|
|
fullBio: null
|
|
});
|
|
}
|
|
|
|
// Scrape all URLs for this outlet with controlled concurrency
|
|
const { successes: scrapedArticles, failures: scrapingFailures } = await scraper.scrapeMultipleArticles(outletData.urls, 2);
|
|
|
|
// Add scraping failures to errors
|
|
for (const failure of scrapingFailures) {
|
|
allErrors.push({
|
|
outlet: outlet.name,
|
|
url: failure.url,
|
|
error: failure.error
|
|
});
|
|
}
|
|
|
|
for (const scrapedArticle of scrapedArticles) {
|
|
try {
|
|
// Download and process images if available
|
|
let thumbnailPath = '/api/assets/default-article.png';
|
|
if (scrapedArticle.imageUrl) {
|
|
try {
|
|
const imageFilename = `scraped_${outletData.focusSubject}_${Date.now()}_${Math.random().toString(36).substring(7)}.jpg`;
|
|
const downloadedImage = await scraper.downloadImage(scrapedArticle.imageUrl, imageFilename);
|
|
if (downloadedImage) {
|
|
// Create thumbnail
|
|
const thumbnailFilename = `thumb_${imageFilename}`;
|
|
const thumbnailSuccess = await scraper.createThumbnail(
|
|
join(process.cwd(), 'attached_assets', 'scraped', imageFilename),
|
|
join(process.cwd(), 'attached_assets', 'scraped', thumbnailFilename),
|
|
300
|
|
);
|
|
if (thumbnailSuccess) {
|
|
thumbnailPath = `/api/assets/scraped/${thumbnailFilename}`;
|
|
}
|
|
}
|
|
} catch (imageError) {
|
|
console.warn(`Failed to process image for ${scrapedArticle.url}:`, imageError);
|
|
}
|
|
}
|
|
|
|
// Create article with scraped data
|
|
const articleData = {
|
|
outletId: outlet.id,
|
|
title: scrapedArticle.title.substring(0, 200),
|
|
summary: scrapedArticle.summary.substring(0, 500),
|
|
body: scrapedArticle.body,
|
|
thumbnail: thumbnailPath,
|
|
publishedAt: scrapedArticle.publishedAt,
|
|
tags: scrapedArticle.tags,
|
|
viewCount: 0,
|
|
sourceUrl: scrapedArticle.url,
|
|
author: scrapedArticle.author || null,
|
|
originalImageUrl: scrapedArticle.imageUrl || null,
|
|
scrapedAt: new Date(),
|
|
isScraped: 1
|
|
};
|
|
|
|
const article = await storage.createArticle(articleData);
|
|
allResults.push({
|
|
outlet: outlet.name,
|
|
url: scrapedArticle.url,
|
|
article: {
|
|
id: article.id,
|
|
title: article.title,
|
|
summary: article.summary
|
|
},
|
|
success: true
|
|
});
|
|
|
|
} catch (error) {
|
|
console.error(`Error saving scraped article from ${scrapedArticle.url}:`, error);
|
|
allErrors.push({
|
|
outlet: outlet.name,
|
|
url: scrapedArticle.url,
|
|
error: error instanceof Error ? error.message : 'Failed to save article'
|
|
});
|
|
}
|
|
}
|
|
|
|
// Add delay between outlets to be respectful
|
|
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
|
|
} catch (error) {
|
|
console.error(`Error processing outlet ${outletData.name}:`, error);
|
|
allErrors.push({
|
|
outlet: outletData.name,
|
|
url: 'N/A',
|
|
error: error instanceof Error ? error.message : 'Failed to process outlet'
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
res.json({
|
|
success: true,
|
|
summary: {
|
|
totalOutlets: parsedOutlets.total,
|
|
processedOutlets,
|
|
totalArticlesSaved: allResults.length,
|
|
totalErrors: allErrors.length,
|
|
breakdown: {
|
|
people: parsedOutlets.people.length,
|
|
topics: parsedOutlets.topics.length,
|
|
companies: parsedOutlets.companies.length
|
|
}
|
|
},
|
|
results: allResults,
|
|
errors: allErrors,
|
|
message: `Batch scraping completed! Processed ${processedOutlets} outlets, saved ${allResults.length} articles from ${parsedOutlets.total} total outlets`
|
|
});
|
|
|
|
} catch (error) {
|
|
console.error("Error in outlet file scraping:", error);
|
|
res.status(500).json({
|
|
error: "Failed to process outlet file scraping",
|
|
details: error instanceof Error ? error.message : 'Unknown error'
|
|
});
|
|
}
|
|
});
|
|
|
|
// Outlet consolidation endpoint
|
|
app.post("/api/outlets/consolidate", async (req, res) => {
|
|
try {
|
|
console.log("Starting outlet consolidation...");
|
|
|
|
// 사용자가 제공한 정확한 언론매체 리스트
|
|
const correctOutlets = {
|
|
people: [
|
|
"Ala Shaabana", "Alex Karp", "Arthur Hayes", "Donald Trump Jr.", "Eric Trump",
|
|
"Jacob Robert Steeves", "Jared Kushner", "J.D. Vance", "Jensen Huang", "Jerome Powell",
|
|
"Joseph Jacks", "Larry Ellison", "Lily Liu", "Marco Rubio", "Robert Myers",
|
|
"Sam Altman", "Satya Nadella", "Scott Bessent", "Simon Kim", "Yat Siu"
|
|
],
|
|
topics: [
|
|
"AI", "Alt Coin", "Bollywood", "CantoPop", "CBDC (Central Bank Digital Currency)",
|
|
"CFTC (Commodity Futures Trading Commission)", "Crypto", "Custody Regulation", "DAT (Digital Asset Treasury)",
|
|
"Decentralized AI", "DeFi", "DEX (Decentralized Exchange)", "Fed (Federal Reserve)",
|
|
"FOMC (Federal Open Market Committee)", "J-Star", "K-Star", "NFT (Non-Fungible Token)",
|
|
"RWA (Real World Assets)", "SEC (Securities and Exchange Commission)", "Stable Coin", "SWF (Sovereign Wealth Fund)"
|
|
],
|
|
companies: [
|
|
"Ava Labs", "Bittensor", "BlackRock", "Boston Dynamics", "Chainlink", "Circle",
|
|
"CME Group", "Epic Games", "Hashed", "Hyperliquid", "Oblong", "OpenSea",
|
|
"Palantir", "PancakeSwap", "Polygon", "Saudi Aramco", "Solana Foundation",
|
|
"TAOX", "TRON", "TSMC", "Uniswap", "World Liberty Financial", "xTAO", "YUMA"
|
|
]
|
|
};
|
|
|
|
const allOutlets = await storage.getAllOutlets();
|
|
console.log(`Found ${allOutlets.length} existing outlets`);
|
|
|
|
// 중복된 언론매체들을 찾아내기 위한 매핑
|
|
const duplicateGroups: { [key: string]: MediaOutlet[] } = {};
|
|
const correctNames = [
|
|
...correctOutlets.people,
|
|
...correctOutlets.topics,
|
|
...correctOutlets.companies
|
|
];
|
|
|
|
// 각 정확한 이름에 대해 유사한 이름들을 찾기
|
|
for (const correctName of correctNames) {
|
|
const similarOutlets = allOutlets.filter(outlet => {
|
|
const outletName = outlet.name.toLowerCase().trim();
|
|
const correctNameLower = correctName.toLowerCase().trim();
|
|
|
|
// 정확히 일치하거나, 한글/영어 설명이 포함된 경우
|
|
return outletName === correctNameLower ||
|
|
outletName.includes(correctNameLower) ||
|
|
correctNameLower.includes(outletName) ||
|
|
outletName.includes(correctNameLower.split(' ')[0]) ||
|
|
outletName.includes(correctNameLower.split('-')[0]);
|
|
});
|
|
|
|
if (similarOutlets.length > 1) {
|
|
duplicateGroups[correctName] = similarOutlets;
|
|
}
|
|
}
|
|
|
|
console.log(`Found ${Object.keys(duplicateGroups).length} duplicate groups`);
|
|
|
|
const consolidationResults: any[] = [];
|
|
let totalMergedOutlets = 0;
|
|
let totalMovedArticles = 0;
|
|
|
|
// 각 중복 그룹에 대해 통합 작업 수행
|
|
for (const [correctName, duplicates] of Object.entries(duplicateGroups)) {
|
|
if (duplicates.length <= 1) continue;
|
|
|
|
console.log(`Processing duplicates for: ${correctName}`);
|
|
|
|
// 정확한 이름과 가장 일치하는 언론매체를 찾기
|
|
let mainOutlet = duplicates.find(outlet =>
|
|
outlet.name.toLowerCase().trim() === correctName.toLowerCase().trim()
|
|
);
|
|
|
|
// 정확한 일치가 없으면 첫 번째를 main으로 사용
|
|
if (!mainOutlet) {
|
|
mainOutlet = duplicates[0];
|
|
// 이름을 정확한 이름으로 업데이트
|
|
await storage.updateOutlet(mainOutlet.id, { name: correctName });
|
|
console.log(`Updated outlet name from "${mainOutlet.name}" to "${correctName}"`);
|
|
}
|
|
|
|
// 나머지 중복 언론매체들을 main으로 통합
|
|
const duplicatesToMerge = duplicates.filter(outlet => outlet.id !== mainOutlet!.id);
|
|
let movedArticlesCount = 0;
|
|
|
|
for (const duplicate of duplicatesToMerge) {
|
|
console.log(`Merging "${duplicate.name}" into "${mainOutlet.name}"`);
|
|
|
|
const mergeResult = await storage.mergeOutlets(duplicate.id, mainOutlet.id);
|
|
if (mergeResult.success) {
|
|
movedArticlesCount += mergeResult.movedArticles;
|
|
totalMergedOutlets++;
|
|
console.log(`Successfully merged ${duplicate.name}, moved ${mergeResult.movedArticles} articles`);
|
|
} else {
|
|
console.error(`Failed to merge ${duplicate.name}`);
|
|
}
|
|
}
|
|
|
|
totalMovedArticles += movedArticlesCount;
|
|
consolidationResults.push({
|
|
correctName,
|
|
mainOutletId: mainOutlet.id,
|
|
mergedOutlets: duplicatesToMerge.map(d => d.name),
|
|
movedArticles: movedArticlesCount
|
|
});
|
|
}
|
|
|
|
console.log("Outlet consolidation completed");
|
|
|
|
res.json({
|
|
success: true,
|
|
summary: {
|
|
totalDuplicateGroups: Object.keys(duplicateGroups).length,
|
|
totalMergedOutlets,
|
|
totalMovedArticles,
|
|
initialOutletCount: allOutlets.length,
|
|
finalOutletCount: allOutlets.length - totalMergedOutlets
|
|
},
|
|
consolidationResults,
|
|
message: `Successfully consolidated ${totalMergedOutlets} duplicate outlets, moved ${totalMovedArticles} articles`
|
|
});
|
|
|
|
} catch (error) {
|
|
console.error("Error in outlet consolidation:", error);
|
|
res.status(500).json({
|
|
error: "Failed to consolidate outlets",
|
|
details: error instanceof Error ? error.message : 'Unknown error'
|
|
});
|
|
}
|
|
});
|
|
|
|
// Bookmark routes
|
|
app.post("/api/bookmarks/toggle", async (req, res) => {
|
|
try {
|
|
const { articleId, userIdentifier } = req.body;
|
|
if (!articleId || !userIdentifier) {
|
|
return res.status(400).json({ error: "articleId and userIdentifier are required" });
|
|
}
|
|
const result = await storage.toggleBookmark(articleId, userIdentifier);
|
|
res.json(result);
|
|
} catch (error) {
|
|
console.error("Error toggling bookmark:", error);
|
|
res.status(500).json({ error: "Failed to toggle bookmark" });
|
|
}
|
|
});
|
|
|
|
app.get("/api/bookmarks/:articleId/:userIdentifier", async (req, res) => {
|
|
try {
|
|
const { articleId, userIdentifier } = req.params;
|
|
const isBookmarked = await storage.isBookmarked(articleId, userIdentifier);
|
|
res.json({ isBookmarked });
|
|
} catch (error) {
|
|
console.error("Error checking bookmark:", error);
|
|
res.status(500).json({ error: "Failed to check bookmark" });
|
|
}
|
|
});
|
|
|
|
app.get("/api/bookmarks/user/:userIdentifier", async (req, res) => {
|
|
try {
|
|
const { userIdentifier } = req.params;
|
|
const bookmarks = await storage.getUserBookmarks(userIdentifier);
|
|
res.json({ bookmarks });
|
|
} catch (error) {
|
|
console.error("Error getting user bookmarks:", error);
|
|
res.status(500).json({ error: "Failed to get bookmarks" });
|
|
}
|
|
});
|
|
|
|
// Article stats routes
|
|
app.get("/api/articles/:id/comment-count", async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const count = await storage.getArticleCommentCount(id);
|
|
res.json({ count });
|
|
} catch (error) {
|
|
console.error("Error getting comment count:", error);
|
|
res.status(500).json({ error: "Failed to get comment count" });
|
|
}
|
|
});
|
|
|
|
// Simplified comment endpoints for mobile app
|
|
app.get("/api/comments", async (req, res) => {
|
|
try {
|
|
const { articleId } = req.query;
|
|
|
|
if (!articleId) {
|
|
return res.status(400).json({ error: "articleId is required" });
|
|
}
|
|
|
|
const result = await storage.getCommentsByArticle(articleId as string, {
|
|
limit: 100,
|
|
offset: 0
|
|
});
|
|
|
|
res.json(result);
|
|
} catch (error) {
|
|
console.error("Error fetching comments:", error);
|
|
res.status(500).json({ error: "Failed to fetch comments" });
|
|
}
|
|
});
|
|
|
|
app.post("/api/comments", async (req, res) => {
|
|
try {
|
|
const { articleId, content, nickname, avatar } = req.body;
|
|
|
|
if (!articleId || !content || !nickname) {
|
|
return res.status(400).json({ error: "articleId, content, and nickname are required" });
|
|
}
|
|
|
|
const comment = await storage.createComment({
|
|
articleId,
|
|
content,
|
|
nickname,
|
|
avatar: avatar || null,
|
|
parentId: null
|
|
});
|
|
|
|
res.json(comment);
|
|
} catch (error) {
|
|
console.error("Error creating comment:", error);
|
|
res.status(500).json({ error: "Failed to create comment" });
|
|
}
|
|
});
|
|
|
|
// Prediction Market routes
|
|
app.get("/api/prediction-markets/article/:articleId", async (req, res) => {
|
|
try {
|
|
const { articleId } = req.params;
|
|
const limit = parseInt(req.query.limit as string) || 3;
|
|
const offset = parseInt(req.query.offset as string) || 0;
|
|
|
|
const markets = await storage.getPredictionMarketsByArticle(articleId, { limit, offset });
|
|
res.json(markets);
|
|
} catch (error) {
|
|
console.error("Error getting prediction markets:", error);
|
|
res.status(500).json({ error: "Failed to get prediction markets" });
|
|
}
|
|
});
|
|
|
|
// Serve attached assets route
|
|
app.use("/api/assets", express.static(path.resolve(process.cwd(), "attached_assets")));
|
|
|
|
const httpServer = createServer(app);
|
|
return httpServer;
|
|
} |