commit e3c28f796aa8311966f78bafc3c8fccc7c476ee3 Author: jungwoo choi Date: Sun Sep 28 20:41:57 2025 +0900 Initial commit - cleaned repository diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..f8a163f --- /dev/null +++ b/.env.example @@ -0,0 +1,37 @@ +# Environment Configuration Example +# Copy this file to .env and update with your values + +ENV=development + +# Port Configuration +CONSOLE_BACKEND_PORT=8011 +CONSOLE_FRONTEND_PORT=3000 +USERS_SERVICE_PORT=8001 +MONGODB_PORT=27017 +REDIS_PORT=6379 + +# Database Configuration +MONGODB_URL=mongodb://mongodb:27017 +MONGODB_DATABASE=site11_db +USERS_DB_NAME=users_db + +# Redis Configuration +REDIS_URL=redis://redis:6379 + +# JWT Configuration +JWT_SECRET_KEY=change-this-secret-key-in-production +JWT_ALGORITHM=HS256 +ACCESS_TOKEN_EXPIRE_MINUTES=30 + +# Service URLs (Internal) +USERS_SERVICE_URL=http://users-backend:8000 + +# Frontend Configuration +VITE_API_URL=http://localhost:8011 + +# Kafka Configuration (Future) +# KAFKA_BOOTSTRAP_SERVERS=kafka:9092 +# KAFKA_GROUP_ID=site11-group + +# Docker Configuration +COMPOSE_PROJECT_NAME=site11 \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e45c867 --- /dev/null +++ b/.gitignore @@ -0,0 +1,85 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +.venv +pip-log.txt +pip-delete-this-directory.txt +.pytest_cache/ +*.egg-info/ +dist/ +build/ + +# Node +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* +.pnpm-debug.log* +dist/ +build/ + +# Environment +.env +.env.local +.env.*.local +*.local + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store + +# Docker +*.log +docker-compose.override.yml + +# Database +data/ +*.db +*.sqlite + +# Testing +coverage/ +.coverage +htmlcov/ +.tox/ +.hypothesis/ + +# Temporary +tmp/ +temp/ +*.tmp +*.temp +*.bak + +# Secrets +*.pem +*.key +*.crt +secrets/data/ + +# Backups and logs +backups/ +*.archive +*.log + +# Images and media +*.png +*.jpg +*.jpeg +*.gif + +# Node modules +node_modules/ +**/node_modules/ + +# Large data files +data/ diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..87903be --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,275 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Development Principles +**IMPORTANT**: +1. 모든 개발은 Docker 환경에서만 진행 +2. Docker 빌드는 백그라운드로 실행하고 완료까지 대기 +3. 로컬 환경 설정 금지 (venv, npm install 등) + +## File Naming Convention +**IMPORTANT**: 모든 문서 파일은 대문자.md 형식으로 생성 +- 예: README.md, CHANGELOG.md, TODO.md, ARCHITECTURE.md + +## Context Recovery Guide +**IMPORTANT**: 새 세션 시작 시 반드시 확인할 파일들: +1. `docs/PROGRESS.md` - 현재 진행 상황과 다음 단계 +2. `docs/PLAN.md` - 전체 구현 계획 +3. `docker ps` - 실행 중인 서비스 확인 + +## Quick Status Check +```bash +# 진행 상황 빠른 확인 +cat docs/PROGRESS.md | grep "Next Action" +``` + +## System Architecture + +### Architecture Pattern: Microservices with Central Console + +**Core Concept**: Console acts as the orchestrator and dashboard, while individual microservices handle domain-specific logic. + +### Architecture Approaches + +#### 1. API Gateway Pattern (Recommended) +Console serves as an API Gateway that: +- Routes requests to appropriate microservices +- Handles authentication/authorization centrally +- Aggregates responses from multiple services +- Provides service discovery and health monitoring + +#### 2. Service Mesh Pattern +- Each service communicates directly via service mesh (Istio/Linkerd) +- Console focuses on monitoring and management +- Better for complex inter-service communication + +#### 3. Event-Driven Architecture +- Services communicate via message broker (RabbitMQ/Kafka) +- Console subscribes to events for real-time monitoring +- Loose coupling between services + +### Technology Stack +- **Backend**: FastAPI (Python 3.11) + Motor (MongoDB async driver) +- **Frontend**: React 18 + Vite + TypeScript + Material-UI v7 +- **Database**: MongoDB 7.0 +- **Cache**: Redis 7 +- **Reverse Proxy**: Nginx +- **Container**: Docker & Docker Compose +- **Version Control**: git +- **API Documentation**: OpenAPI/Swagger +- **Service Communication**: REST + gRPC (for internal services) + +### Service Configuration +- **Nginx**: Port 80 (reverse proxy) +- **Console Backend**: Port 8000 (API Gateway) +- **Console Frontend**: Port 3000 +- **Services**: + - Images: 8001-8002 + - OAuth: 8003-8004 + - Applications: 8005-8006 + - Users: 8007-8008 + - Data: 8009-8010 + - Statistics: 8011-8012 +- **MongoDB**: Port 27017 (internal) +- **Redis**: Port 6379 (internal) + +## Microservices Implementation Strategy + +### Console as API Gateway (Recommended Approach) + +#### Responsibilities +**Console Backend**: +- Service discovery and routing +- Authentication & authorization (JWT/OAuth2) +- Request/response transformation +- Rate limiting & throttling +- Circuit breaking for fault tolerance +- Centralized logging & monitoring +- API composition for complex operations + +**Individual Microservices**: +- Domain-specific business logic +- Own database/collection management +- Event publishing for async operations +- Health endpoints for monitoring +- OpenAPI documentation + +### Service Communication Patterns + +#### 1. Synchronous Communication (REST) +```python +# Console backend routing example +@app.get("/api/users/{user_id}") +async def get_user(user_id: str): + # Route to users service + response = await http_client.get(f"http://users-service:8007/users/{user_id}") + return response.json() +``` + +#### 2. Asynchronous Communication (Event-driven) +```python +# Service publishes event +await redis_client.publish("user.created", user_data) + +# Console subscribes to events +async def handle_user_created(data): + # Update dashboard metrics + await update_statistics(data) +``` + +#### 3. Service Registry Pattern +```yaml +# services-registry.yaml +services: + users: + backend: "http://users-backend:8007" + frontend: "http://users-frontend:8008" + health: "/health" + oauth: + backend: "http://oauth-backend:8003" + frontend: "http://oauth-frontend:8004" + health: "/health" +``` + +### Development Workflow + +#### Commands +```bash +# Start all services +docker-compose up -d + +# Start specific service +docker-compose up -d console users + +# View logs +docker-compose logs -f [service-name] + +# Rebuild service +docker-compose build [service-name] +docker-compose up -d [service-name] + +# Run tests +docker-compose exec [service-name] pytest + +# Database migrations +docker-compose exec [service-name] alembic upgrade head +``` + +#### Service Development Guidelines +1. Each service should be independently deployable +2. Services share nothing except API contracts +3. Use correlation IDs for distributed tracing +4. Implement health checks and readiness probes +5. Version APIs appropriately (e.g., /api/v1/) + +## Project Structure + +``` +site11/ +├── docker-compose.yml +├── nginx/ +│ └── nginx.conf +├── console/ +│ ├── backend/ +│ │ ├── Dockerfile +│ │ └── requirements.txt +│ └── frontend/ +│ ├── Dockerfile +│ ├── package.json +│ ├── vite.config.ts +│ ├── tsconfig.json +│ └── src/ +│ ├── App.tsx +│ ├── main.tsx +│ ├── layouts/ +│ │ └── AdminLayout.tsx +│ └── pages/ +│ ├── Login.tsx +│ └── Register.tsx +├── services/ +│ ├── images/ +│ │ ├── backend/ +│ │ └── frontend/ +│ ├── oatuh/ +│ │ ├── backend/ +│ │ └── frontend/ +│ ├── applications/ +│ │ ├── backend/ +│ │ └── frontend/ +│ ├── users/ +│ │ ├── backend/ +│ │ └── frontend/ +│ ├── data/ +│ │ ├── backend/ +│ │ └── frontend/ +│ └── statistics/ +│ ├── backend/ +│ └── frontend/ +├── docs/ +│ └── PLAN.md +└── CLAUDE.md +``` + +## Service-Specific Architecture + +### Console Service +- **Purpose**: Central orchestrator and dashboard +- **Key Features**: + - Service health monitoring dashboard + - Unified authentication portal + - API Gateway for all services + - Real-time metrics aggregation + - Service configuration management + +### Microservice Template +Each service follows this structure: +``` +service-name/ +├── backend/ +│ ├── Dockerfile +│ ├── requirements.txt +│ ├── app/ +│ │ ├── main.py # FastAPI app +│ │ ├── models.py # Pydantic models +│ │ ├── routes.py # API endpoints +│ │ ├── database.py # MongoDB connection +│ │ └── services.py # Business logic +│ └── tests/ +└── frontend/ + ├── Dockerfile + ├── package.json + └── src/ + ├── api/ # Service API client + ├── components/ # React components + └── hooks/ # Custom hooks +``` + +## Inter-Service Communication + +### API Gateway Routes (Console) +```python +# Console backend routing configuration +SERVICES = { + "users": {"url": "http://users-backend:8007", "prefix": "/api/users"}, + "oauth": {"url": "http://oauth-backend:8003", "prefix": "/api/auth"}, + "images": {"url": "http://images-backend:8001", "prefix": "/api/images"}, + "applications": {"url": "http://apps-backend:8005", "prefix": "/api/apps"}, + "data": {"url": "http://data-backend:8009", "prefix": "/api/data"}, + "statistics": {"url": "http://stats-backend:8011", "prefix": "/api/stats"} +} +``` + +### Service Discovery +Services register themselves with Console on startup and send periodic heartbeats. + +## Database Strategy +- **Shared MongoDB instance** with separate databases per service +- **Redis** for caching and pub/sub messaging +- Each service owns its data and exposes it via APIs + +## Security Considerations +- JWT tokens issued by OAuth service +- Console validates tokens and forwards to services +- Internal service communication uses service tokens +- Rate limiting at API Gateway level diff --git a/PLAN.md b/PLAN.md new file mode 100644 index 0000000..cff9edc --- /dev/null +++ b/PLAN.md @@ -0,0 +1,185 @@ +# 프로젝트 개발 계획 + +## 프로젝트 목표 +마이크로서비스 아키텍처 기반의 확장 가능한 웹 애플리케이션 구축 + +## 아키텍처 원칙 +1. **Console as API Gateway**: 모든 외부 요청은 Console을 통해 라우팅 +2. **Docker-Only Development**: 모든 개발과 실행은 Docker 컨테이너 내에서 +3. **Event-Driven Architecture**: Kafka를 통한 서비스 간 비동기 통신 +4. **Service Isolation**: 각 서비스는 독립적으로 배포 가능 + +## 완료된 단계 (✅) + +### Phase 1: 기반 구축 +- [x] Step 1: 기본 프로젝트 구조 및 Docker 설정 +- [x] Step 2: Users 마이크로서비스 구현 +- [x] Step 3: MongoDB 통합 +- [x] Step 4: Redis 캐싱 시스템 +- [x] Step 5: Frontend 스켈레톤 (React + Vite) +- [x] Step 6: JWT 인증 시스템 +- [x] Step 6.5: Images 서비스 통합 +- [x] Step 7: Kafka 이벤트 시스템 +- [x] Step 8: OAuth 2.0 인증 시스템 및 프로필 기능 + +## 진행 예정 단계 + +### Phase 2: 이벤트 기반 시스템 확장 +#### Step 9: 고급 이벤트 처리 +- [ ] 이벤트 소비자 구현 + - Console에서 user-events 토픽 구독 + - 알림 서비스 이벤트 처리 +- [ ] Dead Letter Queue 구현 +- [ ] 이벤트 재시도 메커니즘 +- [ ] 이벤트 스키마 레지스트리 + +#### Step 10: 태스크 큐 시스템 +- [ ] Kafka 기반 백그라운드 작업 처리 +- [ ] 이미지 프로세싱 작업 큐 +- [ ] 이메일 전송 큐 +- [ ] 배치 작업 스케줄러 + +### Phase 3: 고급 기능 +#### Step 11: 실시간 기능 +- [ ] WebSocket 통합 (Console) +- [ ] 실시간 알림 시스템 +- [ ] 온라인 사용자 상태 추적 +- [ ] 실시간 데이터 동기화 + +#### Step 12: 파일 시스템 +- [ ] 파일 업로드 서비스 +- [ ] S3 호환 객체 스토리지 (MinIO) +- [ ] 파일 메타데이터 관리 +- [ ] 썸네일 생성 서비스 + +#### Step 13: 검색 시스템 +- [ ] Elasticsearch 통합 +- [ ] 전문 검색 기능 +- [ ] 자동완성 기능 +- [ ] 검색 분석 및 최적화 + +### Phase 4: 프로덕션 준비 +#### Step 14: 모니터링 및 로깅 +- [ ] Prometheus 메트릭 수집 +- [ ] Grafana 대시보드 +- [ ] ELK Stack 로깅 +- [ ] 분산 추적 (Jaeger) + +#### Step 15: 보안 강화 +- [ ] Rate Limiting +- [ ] API Key 관리 +- [ ] OAuth2 통합 +- [ ] 데이터 암호화 + +#### Step 16: 테스트 및 CI/CD +- [ ] 단위 테스트 작성 +- [ ] 통합 테스트 +- [ ] E2E 테스트 +- [ ] GitHub Actions CI/CD + +#### Step 17: 성능 최적화 +- [ ] 데이터베이스 인덱싱 +- [ ] 쿼리 최적화 +- [ ] 캐싱 전략 개선 +- [ ] CDN 통합 + +## 서비스 구성 + +### 현재 서비스 +1. **Console** (API Gateway) + - Frontend: React SPA + - Backend: FastAPI, JWT 인증 + +2. **Users Service** + - User CRUD + - MongoDB 저장소 + - Kafka 이벤트 발행 + - 프로필 정보 관리 (사진, bio, location 등) + +3. **Images Service** + - 이미지 프록시 + - 캐싱 시스템 + - WebP 변환 + +4. **OAuth Service** + - OAuth 2.0 인증 서버 + - 애플리케이션 등록 및 관리 + - 토큰 발급 및 검증 + - SSO 지원 (Google, GitHub, SAML) + - 스코프 기반 권한 관리 + +### 계획된 서비스 +5. **Notification Service** + - 이메일/SMS 전송 + - 푸시 알림 + - 알림 히스토리 + +6. **Analytics Service** + - 사용자 행동 분석 + - 비즈니스 메트릭 + - 리포트 생성 + +7. **Payment Service** + - 결제 처리 + - 구독 관리 + - 청구서 생성 + +## 기술 스택 로드맵 + +### 현재 사용 중 +- FastAPI, React, TypeScript +- MongoDB, Redis +- Apache Kafka +- Docker, Docker Compose + +### 도입 예정 +- Elasticsearch (검색) +- MinIO (객체 스토리지) +- Prometheus/Grafana (모니터링) +- Jaeger (분산 추적) +- Nginx (리버스 프록시) + +## 개발 일정 + +### 2025 Q1 +- Phase 2 완료 (이벤트 시스템) +- Phase 3 시작 (고급 기능) + +### 2025 Q2 +- Phase 3 완료 +- Phase 4 시작 (프로덕션 준비) + +### 2025 Q3 +- Phase 4 완료 +- 프로덕션 배포 + +## 성공 지표 + +1. **기술적 지표** + - 서비스 응답 시간 < 200ms + - 시스템 가용성 > 99.9% + - 초당 처리 가능 요청 > 1000 + +2. **개발 지표** + - 테스트 커버리지 > 80% + - 빌드 시간 < 5분 + - 배포 시간 < 10분 + +3. **확장성 지표** + - 수평 확장 가능 + - 서비스 독립 배포 + - 무중단 업데이트 + +## 리스크 및 대응 방안 + +1. **복잡도 증가** + - 대응: 점진적 구현, 문서화 강화 + +2. **성능 병목** + - 대응: 프로파일링, 캐싱 전략 + +3. **데이터 일관성** + - 대응: 이벤트 소싱, SAGA 패턴 + +4. **보안 취약점** + - 대응: 정기 보안 감사, 자동화된 스캔 \ No newline at end of file diff --git a/PROGRESS.md b/PROGRESS.md new file mode 100644 index 0000000..df778d9 --- /dev/null +++ b/PROGRESS.md @@ -0,0 +1,147 @@ +# 프로젝트 진행 상황 + +## 완료된 단계 + +### Step 1: 기본 프로젝트 구조 생성 ✅ +- Docker Compose 설정 +- Console 서비스 (API Gateway) 기본 구현 +- 프로젝트 문서 (CLAUDE.md, PLAN.md) 작성 + +### Step 2: Users 마이크로서비스 구현 ✅ +- Users 서비스 CRUD API +- MongoDB 연동 (Beanie ODM) +- 서비스 간 통신 설정 + +### Step 3: MongoDB 통합 ✅ +- MongoDB 컨테이너 설정 +- Beanie ODM 설정 +- Users 모델 및 데이터베이스 연결 +- PyMongo 버전 호환성 문제 해결 + +### Step 4: Redis 통합 ✅ +- Redis 컨테이너 설정 +- 캐싱 시스템 준비 +- 향후 세션 관리 및 캐싱 구현 예정 + +### Step 5: Frontend 스켈레톤 ✅ +- React + Vite + TypeScript 설정 +- Material-UI 통합 +- Console Frontend 기본 구조 +- npm ci → npm install 문제 해결 + +### Step 6: 환경 변수 및 인증 ✅ +- .env 파일 설정 +- JWT 인증 시스템 구현 +- Console이 인증 처리 담당 +- 포트 충돌 해결 (8000 → 8011) + +### Step 6.5: Images 서비스 통합 ✅ +- site00의 image-service 마이그레이션 +- 프록시 및 캐싱 기능 유지 +- WebP 변환 기능 포함 +- Console에서 Images 서비스로 라우팅 + +### Step 7: Kafka 이벤트 시스템 ✅ +- Kafka 및 Zookeeper 컨테이너 추가 +- 공유 Kafka 라이브러리 생성 (Producer/Consumer) +- 이벤트 타입 정의 (USER_CREATED, USER_UPDATED, USER_DELETED 등) +- Users 서비스에 이벤트 발행 기능 추가 +- aiokafka 통합 + +### Step 8: OAuth 2.0 인증 시스템 및 프로필 기능 ✅ +- OAuth 2.0 서비스 구현 + - Authorization Code, Client Credentials, Refresh Token 플로우 + - 애플리케이션 등록 및 관리 (CRUD) + - 토큰 introspection 및 revocation + - PKCE 지원 +- SSO 설정 지원 + - Google, GitHub, SAML 프로바이더 설정 + - 도메인 기반 접근 제어 +- 스코프 시스템 구현 + - 실용적인 스코프 카테고리 (기본 인증, 사용자, 앱, 조직, API) + - picture 스코프 추가 (프로필 사진 접근) +- Users 서비스 프로필 기능 확장 + - 프로필 사진 및 썸네일 필드 + - bio, location, website 필드 + - 이메일 인증 및 계정 활성화 상태 +- Docker 통합 완료 (포트 8003) + +## 현재 실행 중인 서비스 + +- **Console Frontend**: http://localhost:3000 +- **Console Backend**: http://localhost:8011 +- **Users Service**: http://localhost:8001 +- **Images Service**: http://localhost:8002 +- **OAuth Service**: http://localhost:8003 +- **MongoDB**: localhost:27017 +- **Redis**: localhost:6379 +- **Kafka**: localhost:9092 +- **Zookeeper**: localhost:2181 + +## 다음 단계 (예정) + +### Step 9: 고급 이벤트 처리 +- 이벤트 소비자 구현 +- 이벤트 기반 워크플로우 +- 에러 처리 및 재시도 로직 + +### Step 10: 태스크 큐 시스템 +- Kafka 기반 백그라운드 작업 처리 +- 이미지 프로세싱 작업 큐 +- 이메일 전송 큐 + +### Step 11: 고급 기능 +- 실시간 알림 (WebSocket) +- 파일 업로드 시스템 +- 검색 기능 (Elasticsearch) + +### Step 12: 프로덕션 준비 +- 로깅 시스템 (ELK Stack) +- 모니터링 (Prometheus/Grafana) +- CI/CD 파이프라인 +- 테스트 자동화 + +## 기술 스택 + +- **Backend**: FastAPI (Python) +- **Frontend**: React + TypeScript + Vite + Material-UI +- **Database**: MongoDB +- **Cache**: Redis +- **Message Queue**: Apache Kafka +- **Container**: Docker & Docker Compose +- **Authentication**: JWT + +## 주요 환경 변수 + +```env +COMPOSE_PROJECT_NAME=site11 +CONSOLE_BACKEND_PORT=8011 +USERS_BACKEND_PORT=8001 +OAUTH_BACKEND_PORT=8003 +JWT_SECRET_KEY=your-secret-key-change-in-production-12345 +KAFKA_BOOTSTRAP_SERVERS=kafka:9092 +``` + +## 문제 해결 기록 + +1. **PyMongo 호환성**: motor와 pymongo 버전 충돌 → pymongo==4.6.1로 고정 +2. **npm ci 실패**: package-lock.json 부재 → npm install로 변경 +3. **포트 충돌**: 8000 포트 사용 중 → Console을 8011로 변경 +4. **WebP 변환 문제**: 검정색 이미지 출력 → convert_to_webp 임시 비활성화 +5. **httpx 미사용 import**: OAuth 서비스 초기 구현시 불필요한 import → 제거 +6. **Kafka producer 파라미터 오류**: max_in_flight_requests_per_connection → 제거 +7. **Users 서비스 포트 누락**: docker-compose.yml에 포트 매핑 추가 + +## 컨텍스트 복구 정보 + +Claude가 재시작되면 이 정보를 참조: + +- 작업 디렉토리: `/Users/jungwoochoi/Desktop/prototype/site11` +- Git 저장소: 각 단계마다 커밋됨 +- Docker 개발 원칙: 모든 개발은 Docker 내에서 진행 +- 문서 형식: 모든 마크다운 파일은 대문자 (CLAUDE.md, PLAN.md, PROGRESS.md) +- Console 서비스가 중앙 API Gateway 역할 +- Kafka를 메인 이벤트 시스템으로 사용 +- Redis는 캐싱 전용 +- OAuth 2.0 서비스로 인증/인가 관리 +- 모든 서비스는 독립적으로 배포 가능 \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..4dcaa96 --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ +# Site11 - Microservices Architecture + +## Overview +Microservices platform with Console as API Gateway orchestrating multiple domain services. + +## Quick Start + +### Start Services +```bash +# Start console service +docker-compose up -d console-backend + +# Check status +curl http://localhost:8011/health +``` + +### Available Endpoints +- `http://localhost:8011/` - Root endpoint +- `http://localhost:8011/health` - Health check +- `http://localhost:8011/api/status` - System status + +## Architecture +- **Console**: API Gateway and orchestrator +- **Services**: Domain-specific microservices (users, oauth, images, etc.) +- **Database**: MongoDB for persistence +- **Cache**: Redis for caching and pub/sub + +## Development +See `docs/PLAN.md` for implementation roadmap and `docs/PROGRESS.md` for current status. \ No newline at end of file diff --git a/backup-services/ai-writer/backend/Dockerfile b/backup-services/ai-writer/backend/Dockerfile new file mode 100644 index 0000000..a296111 --- /dev/null +++ b/backup-services/ai-writer/backend/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application +COPY . . + +# Run the application +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] \ No newline at end of file diff --git a/backup-services/ai-writer/backend/app/__init__.py b/backup-services/ai-writer/backend/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backup-services/ai-writer/backend/app/article_generator.py b/backup-services/ai-writer/backend/app/article_generator.py new file mode 100644 index 0000000..2712cf0 --- /dev/null +++ b/backup-services/ai-writer/backend/app/article_generator.py @@ -0,0 +1,218 @@ +""" +Article Generation Module +Claude API를 사용한 기사 생성 로직 +""" +from typing import Dict, Any, List, Optional +from datetime import datetime +import json +import uuid +import logging +from anthropic import AsyncAnthropic +from pydantic import BaseModel, Field + +logger = logging.getLogger(__name__) + +# Data Models +class NewsSource(BaseModel): + """뉴스 소스 정보""" + title: str + url: str + published_date: Optional[str] = None + source_site: str = "Unknown" + +class EventInfo(BaseModel): + """이벤트 정보""" + name: str + date: Optional[str] = None + location: Optional[str] = None + +class Entities(BaseModel): + """추출된 엔티티""" + people: List[str] = Field(default_factory=list) + organizations: List[str] = Field(default_factory=list) + groups: List[str] = Field(default_factory=list) + countries: List[str] = Field(default_factory=list) + events: List[EventInfo] = Field(default_factory=list) + keywords: List[str] = Field(default_factory=list) + +class SubTopic(BaseModel): + """기사 소주제""" + title: str + content: List[str] + +class GeneratedArticle(BaseModel): + """생성된 기사""" + news_id: str = Field(default_factory=lambda: str(uuid.uuid4())) + title: str + summary: str + subtopics: List[SubTopic] + categories: List[str] + entities: Entities + sources: List[NewsSource] = Field(default_factory=list) + created_at: datetime = Field(default_factory=datetime.now) + generation_metadata: Dict[str, Any] = Field(default_factory=dict) + +async def generate_article_with_claude( + news_data: Dict[str, Any], + style: str = "professional", + claude_api_key: str = None +) -> GeneratedArticle: + """Claude API를 사용하여 기사 생성""" + + if not claude_api_key: + import os + claude_api_key = os.getenv("CLAUDE_API_KEY") + + # Initialize Claude client + claude_client = AsyncAnthropic(api_key=claude_api_key) + + # Collect source information + sources_info = [] + + # Prepare the prompt + system_prompt = """당신은 전문적인 한국 언론사의 수석 기자입니다. + 제공된 데이터를 기반으로 깊이 있고 통찰력 있는 기사를 작성해야 합니다. + 기사는 다음 요구사항을 충족해야 합니다: + + 1. 소주제는 최소 2개, 최대 6개로 구성해야 합니다 + 2. 각 소주제는 최소 1개, 최대 10개의 문단으로 구성해야 합니다 + 3. 전문적이고 객관적인 어조를 유지해야 합니다 + 4. 사실에 기반한 분석과 통찰을 제공해야 합니다 + 5. 한국 독자를 대상으로 작성되어야 합니다 + 6. 이벤트 정보는 가능한 일시와 장소를 포함해야 합니다 + 7. 핵심 키워드를 최대 10개까지 추출해야 합니다 + + 반드시 다음 JSON 형식으로 응답하세요: + { + "title": "기사 제목", + "summary": "한 줄 요약 (100자 이내)", + "subtopics": [ + { + "title": "소주제 제목", + "content": ["문단1", "문단2", ...] // 1-10개 문단 + } + ], // 2-6개 소주제 + "categories": ["카테고리1", "카테고리2"], + "entities": { + "people": ["인물1", "인물2"], + "organizations": ["기관1", "기관2"], + "groups": ["단체1", "단체2"], + "countries": ["나라1", "나라2"], + "events": [ + { + "name": "이벤트명", + "date": "2025년 1월 15일", // 선택사항 + "location": "서울 코엑스" // 선택사항 + } + ], + "keywords": ["키워드1", "키워드2", ...] // 최대 10개 + } + }""" + + # Prepare news content for Claude and collect sources + news_content = [] + for item in news_data.get("news_items", []): + # Add RSS source info + rss_title = item.get('rss_title', '') + rss_link = item.get('rss_link', '') + rss_published = item.get('rss_published', '') + + if rss_title and rss_link: + sources_info.append(NewsSource( + title=rss_title, + url=rss_link, + published_date=rss_published, + source_site="RSS Feed" + )) + + item_text = f"제목: {rss_title}\n" + for result in item.get("google_results", []): + # Add Google search result sources + if "title" in result and "link" in result: + sources_info.append(NewsSource( + title=result.get('title', ''), + url=result.get('link', ''), + published_date=None, + source_site="Google Search" + )) + + if "full_content" in result and result["full_content"]: + content = result["full_content"] + if isinstance(content, dict): + item_text += f"출처: {content.get('url', '')}\n" + item_text += f"내용: {content.get('content', '')[:1000]}...\n\n" + else: + item_text += f"내용: {str(content)[:1000]}...\n\n" + news_content.append(item_text) + + combined_content = "\n".join(news_content[:10]) # Limit to prevent token overflow + + user_prompt = f"""다음 뉴스 데이터를 기반으로 종합적인 기사를 작성하세요: + +키워드: {news_data.get('keyword', '')} +수집된 뉴스 수: {len(news_data.get('news_items', []))} + +뉴스 내용: +{combined_content} + +스타일: {style} +- professional: 전통적인 뉴스 기사 스타일 +- analytical: 분석적이고 심층적인 스타일 +- investigative: 탐사보도 스타일 + +위의 데이터를 종합하여 통찰력 있는 기사를 JSON 형식으로 작성해주세요.""" + + try: + # Call Claude API + response = await claude_client.messages.create( + model="claude-3-5-sonnet-20241022", + max_tokens=4000, + temperature=0.7, + system=system_prompt, + messages=[ + {"role": "user", "content": user_prompt} + ] + ) + + # Parse response + content = response.content[0].text + + # Extract JSON from response + json_start = content.find('{') + json_end = content.rfind('}') + 1 + if json_start != -1 and json_end > json_start: + json_str = content[json_start:json_end] + article_data = json.loads(json_str) + else: + raise ValueError("No valid JSON found in response") + + # Create article object + article = GeneratedArticle( + title=article_data.get("title", ""), + summary=article_data.get("summary", ""), + subtopics=[ + SubTopic( + title=st.get("title", ""), + content=st.get("content", []) + ) for st in article_data.get("subtopics", []) + ], + categories=article_data.get("categories", []), + entities=Entities(**article_data.get("entities", {})), + sources=sources_info, + generation_metadata={ + "style": style, + "keyword": news_data.get('keyword', ''), + "model": "claude-3-5-sonnet-20241022", + "timestamp": datetime.now().isoformat() + } + ) + + logger.info(f"Successfully generated article: {article.title}") + return article + + except json.JSONDecodeError as e: + logger.error(f"Failed to parse Claude response as JSON: {e}") + raise + except Exception as e: + logger.error(f"Error generating article with Claude: {e}") + raise \ No newline at end of file diff --git a/backup-services/ai-writer/backend/app/main.py b/backup-services/ai-writer/backend/app/main.py new file mode 100644 index 0000000..1d5751a --- /dev/null +++ b/backup-services/ai-writer/backend/app/main.py @@ -0,0 +1,746 @@ +""" +AI Writer Service +Claude API를 사용한 전문적인 뉴스 기사 생성 서비스 +""" +from fastapi import FastAPI, HTTPException, BackgroundTasks +from fastapi.middleware.cors import CORSMiddleware +from typing import List, Dict, Any, Optional +from datetime import datetime +from pydantic import BaseModel, Field +import httpx +import asyncio +import logging +import json +import uuid +from anthropic import AsyncAnthropic +import os + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +app = FastAPI( + title="AI Writer Service", + description="Claude API를 사용한 전문적인 뉴스 기사 생성 서비스", + version="1.0.0" +) + +# CORS 설정 +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Configuration +NEWS_AGGREGATOR_URL = os.getenv("NEWS_AGGREGATOR_URL", "http://news-aggregator-backend:8000") +CLAUDE_API_KEY = os.getenv("CLAUDE_API_KEY", "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") +MONGODB_URL = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") +DB_NAME = os.getenv("DB_NAME", "ai_writer_db") + +# Claude client +claude_client = AsyncAnthropic(api_key=CLAUDE_API_KEY) + +# HTTP Client +http_client = httpx.AsyncClient(timeout=120.0) + +# Queue Manager +from app.queue_manager import RedisQueueManager +from app.queue_models import NewsJobData, JobResult, JobStatus, QueueStats +queue_manager = RedisQueueManager( + redis_url=os.getenv("REDIS_URL", "redis://redis:6379") +) + +# MongoDB client (optional for storing generated articles) +from motor.motor_asyncio import AsyncIOMotorClient +mongo_client = None +db = None + +# Data Models +class NewsSource(BaseModel): + """참고한 뉴스 소스 정보""" + title: str = Field(..., description="뉴스 제목") + url: str = Field(..., description="뉴스 URL") + published_date: Optional[str] = Field(None, description="발행일") + source_site: Optional[str] = Field(None, description="출처 사이트") +class SubTopic(BaseModel): + """기사 소주제""" + title: str = Field(..., description="소주제 제목") + content: List[str] = Field(..., description="소주제 내용 (문단 리스트)", min_items=1, max_items=10) + +class Event(BaseModel): + """이벤트 정보""" + name: str = Field(..., description="이벤트명") + date: Optional[str] = Field(None, description="일시") + location: Optional[str] = Field(None, description="장소") + +class NewsEntities(BaseModel): + """뉴스에 포함된 개체들""" + people: List[str] = Field(default_factory=list, description="뉴스에 포함된 인물") + organizations: List[str] = Field(default_factory=list, description="뉴스에 포함된 기관") + groups: List[str] = Field(default_factory=list, description="뉴스에 포함된 단체") + countries: List[str] = Field(default_factory=list, description="뉴스에 포함된 나라") + events: List[Event] = Field(default_factory=list, description="뉴스에 포함된 일정/이벤트 (일시와 장소 포함)") + keywords: List[str] = Field(default_factory=list, description="핵심 키워드 (최대 10개)", max_items=10) + +class GeneratedArticle(BaseModel): + """생성된 기사""" + news_id: str = Field(..., description="뉴스 아이디") + title: str = Field(..., description="뉴스 제목") + created_at: str = Field(..., description="생성년월일시분초") + summary: str = Field(..., description="한 줄 요약") + subtopics: List[SubTopic] = Field(..., description="소주제 리스트", min_items=2, max_items=6) + categories: List[str] = Field(..., description="카테고리 리스트") + entities: NewsEntities = Field(..., description="뉴스에 포함된 개체들") + source_keyword: Optional[str] = Field(None, description="원본 검색 키워드") + source_count: Optional[int] = Field(None, description="참조한 소스 수") + sources: List[NewsSource] = Field(default_factory=list, description="참고한 뉴스 소스 목록") + +class ArticleGenerationRequest(BaseModel): + """기사 생성 요청""" + keyword: str = Field(..., description="검색 키워드") + limit: int = Field(5, description="처리할 RSS 항목 수", ge=1, le=20) + google_results_per_title: int = Field(3, description="각 제목당 구글 검색 결과 수", ge=1, le=10) + lang: str = Field("ko", description="언어 코드") + country: str = Field("KR", description="국가 코드") + style: str = Field("professional", description="기사 스타일 (professional/analytical/investigative)") + +class PerItemGenerationRequest(BaseModel): + """개별 아이템별 기사 생성 요청""" + keyword: str = Field(..., description="검색 키워드") + limit: Optional[int] = Field(None, description="처리할 RSS 항목 수 (None이면 전체)") + google_results_per_title: int = Field(3, description="각 제목당 구글 검색 결과 수", ge=1, le=10) + lang: str = Field("ko", description="언어 코드") + country: str = Field("KR", description="국가 코드") + style: str = Field("professional", description="기사 스타일 (professional/analytical/investigative)") + skip_existing: bool = Field(True, description="이미 생성된 기사는 건너뛰기") + +@app.on_event("startup") +async def startup(): + """서비스 시작""" + global mongo_client, db + try: + mongo_client = AsyncIOMotorClient(MONGODB_URL) + db = mongo_client[DB_NAME] + logger.info("AI Writer Service starting...") + logger.info(f"Connected to MongoDB: {MONGODB_URL}") + + # Redis 큐 연결 + await queue_manager.connect() + logger.info("Connected to Redis queue") + except Exception as e: + logger.error(f"Failed to connect to services: {e}") + +@app.on_event("shutdown") +async def shutdown(): + """서비스 종료""" + await http_client.aclose() + if mongo_client: + mongo_client.close() + await queue_manager.disconnect() + logger.info("AI Writer Service stopped") + +@app.get("/") +async def root(): + return { + "service": "AI Writer Service", + "version": "1.0.0", + "description": "Claude API를 사용한 전문적인 뉴스 기사 생성 서비스", + "endpoints": { + "generate_article": "POST /api/generate", + "generate_per_item": "POST /api/generate/per-item", + "generate_from_aggregated": "POST /api/generate/from-aggregated", + "get_article": "GET /api/articles/{article_id}", + "list_articles": "GET /api/articles", + "health": "GET /health" + } + } + +@app.get("/health") +async def health_check(): + """헬스 체크""" + try: + # Check News Aggregator service + aggregator_response = await http_client.get(f"{NEWS_AGGREGATOR_URL}/health") + aggregator_healthy = aggregator_response.status_code == 200 + + # Check MongoDB + mongo_healthy = False + if db is not None: + await db.command("ping") + mongo_healthy = True + + return { + "status": "healthy" if (aggregator_healthy and mongo_healthy) else "degraded", + "services": { + "news_aggregator": "healthy" if aggregator_healthy else "unhealthy", + "mongodb": "healthy" if mongo_healthy else "unhealthy", + "claude_api": "configured" + }, + "timestamp": datetime.now().isoformat() + } + except Exception as e: + return { + "status": "unhealthy", + "error": str(e), + "timestamp": datetime.now().isoformat() + } + +async def generate_article_with_claude(news_data: Dict[str, Any], style: str = "professional") -> GeneratedArticle: + """Claude API를 사용하여 기사 생성""" + + # Collect source information + sources_info = [] + + # Prepare the prompt + system_prompt = """당신은 전문적인 한국 언론사의 수석 기자입니다. + 제공된 데이터를 기반으로 깊이 있고 통찰력 있는 기사를 작성해야 합니다. + 기사는 다음 요구사항을 충족해야 합니다: + + 1. 소주제는 최소 2개, 최대 6개로 구성해야 합니다 + 2. 각 소주제는 최소 1개, 최대 10개의 문단으로 구성해야 합니다 + 3. 전문적이고 객관적인 어조를 유지해야 합니다 + 4. 사실에 기반한 분석과 통찰을 제공해야 합니다 + 5. 한국 독자를 대상으로 작성되어야 합니다 + 6. 이벤트 정보는 가능한 일시와 장소를 포함해야 합니다 + 7. 핵심 키워드를 최대 10개까지 추출해야 합니다 + + 반드시 다음 JSON 형식으로 응답하세요: + { + "title": "기사 제목", + "summary": "한 줄 요약 (100자 이내)", + "subtopics": [ + { + "title": "소주제 제목", + "content": ["문단1", "문단2", ...] // 1-10개 문단 + } + ], // 2-6개 소주제 + "categories": ["카테고리1", "카테고리2"], + "entities": { + "people": ["인물1", "인물2"], + "organizations": ["기관1", "기관2"], + "groups": ["단체1", "단체2"], + "countries": ["나라1", "나라2"], + "events": [ + { + "name": "이벤트명", + "date": "2025년 1월 15일", // 선택사항 + "location": "서울 코엑스" // 선택사항 + } + ], + "keywords": ["키워드1", "키워드2", ...] // 최대 10개 + } + }""" + + # Prepare news content for Claude and collect sources + news_content = [] + for item in news_data.get("news_items", []): + # Add RSS source info + rss_title = item.get('rss_title', '') + rss_link = item.get('rss_link', '') + rss_published = item.get('rss_published', '') + + if rss_title and rss_link: + sources_info.append(NewsSource( + title=rss_title, + url=rss_link, + published_date=rss_published, + source_site="RSS Feed" + )) + + item_text = f"제목: {rss_title}\n" + for result in item.get("google_results", []): + # Add Google search result sources + if "title" in result and "link" in result: + sources_info.append(NewsSource( + title=result.get('title', ''), + url=result.get('link', ''), + published_date=None, + source_site="Google Search" + )) + + if "full_content" in result and result["full_content"]: + content = result["full_content"] + if isinstance(content, dict): + item_text += f"출처: {content.get('url', '')}\n" + item_text += f"내용: {content.get('content', '')[:1000]}...\n\n" + else: + item_text += f"내용: {str(content)[:1000]}...\n\n" + news_content.append(item_text) + + combined_content = "\n".join(news_content[:10]) # Limit to prevent token overflow + + user_prompt = f"""다음 뉴스 데이터를 기반으로 종합적인 기사를 작성하세요: + +키워드: {news_data.get('keyword', '')} +수집된 뉴스 수: {len(news_data.get('news_items', []))} + +뉴스 내용: +{combined_content} + +스타일: {style} +- professional: 전통적인 뉴스 기사 스타일 +- analytical: 분석적이고 심층적인 스타일 +- investigative: 탐사보도 스타일 + +위의 데이터를 종합하여 통찰력 있는 기사를 JSON 형식으로 작성해주세요.""" + + try: + # Call Claude API + response = await claude_client.messages.create( + model="claude-3-5-sonnet-20241022", # Latest Claude model + max_tokens=4000, + temperature=0.7, + system=system_prompt, + messages=[ + {"role": "user", "content": user_prompt} + ] + ) + + # Parse Claude's response + content = response.content[0].text + + # Extract JSON from response + import re + json_match = re.search(r'\{.*\}', content, re.DOTALL) + if json_match: + article_data = json.loads(json_match.group()) + else: + # If no JSON found, try to parse the entire content + article_data = json.loads(content) + + # Create GeneratedArticle object + entities_data = article_data.get("entities", {}) + events_data = entities_data.get("events", []) + + # Parse events - handle both old string format and new object format + parsed_events = [] + for event in events_data: + if isinstance(event, str): + # Old format: just event name as string + parsed_events.append(Event(name=event)) + elif isinstance(event, dict): + # New format: event object with name, date, location + parsed_events.append(Event( + name=event.get("name", ""), + date=event.get("date"), + location=event.get("location") + )) + + article = GeneratedArticle( + news_id=str(uuid.uuid4()), + title=article_data.get("title", "제목 없음"), + created_at=datetime.now().isoformat(), + summary=article_data.get("summary", ""), + subtopics=[ + SubTopic( + title=st.get("title", ""), + content=st.get("content", []) + ) for st in article_data.get("subtopics", []) + ], + categories=article_data.get("categories", []), + entities=NewsEntities( + people=entities_data.get("people", []), + organizations=entities_data.get("organizations", []), + groups=entities_data.get("groups", []), + countries=entities_data.get("countries", []), + events=parsed_events, + keywords=entities_data.get("keywords", []) + ), + source_keyword=news_data.get("keyword"), + source_count=len(news_data.get("news_items", [])), + sources=sources_info + ) + + return article + + except Exception as e: + logger.error(f"Error generating article with Claude: {e}") + raise HTTPException(status_code=500, detail=f"Failed to generate article: {str(e)}") + +@app.post("/api/generate") +async def generate_article(request: ArticleGenerationRequest): + """ + 뉴스 수집부터 기사 생성까지 전체 파이프라인 실행 + RSS → Google Search → AI 기사 생성 + 단일 종합 기사 생성 (기존 방식) + """ + try: + # Step 1: Get aggregated news from News Aggregator service + logger.info(f"Fetching aggregated news for keyword: {request.keyword}") + + aggregator_response = await http_client.get( + f"{NEWS_AGGREGATOR_URL}/api/aggregate", + params={ + "q": request.keyword, + "limit": request.limit, + "google_results_per_title": request.google_results_per_title, + "lang": request.lang, + "country": request.country + } + ) + aggregator_response.raise_for_status() + news_data = aggregator_response.json() + + if not news_data.get("news_items"): + raise HTTPException(status_code=404, detail="No news items found for the given keyword") + + # Step 2: Generate article using Claude + logger.info(f"Generating article with Claude for {len(news_data['news_items'])} news items") + article = await generate_article_with_claude(news_data, request.style) + + # Step 3: Store article in MongoDB (optional) + if db is not None: + try: + article_dict = article.dict() + await db.articles.insert_one(article_dict) + logger.info(f"Article saved with ID: {article.news_id}") + except Exception as e: + logger.error(f"Failed to save article to MongoDB: {e}") + + return article + + except httpx.HTTPStatusError as e: + logger.error(f"HTTP error from aggregator service: {e}") + raise HTTPException(status_code=e.response.status_code, detail=str(e)) + except Exception as e: + logger.error(f"Error in generate_article: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/generate/from-aggregated", response_model=GeneratedArticle) +async def generate_from_aggregated_data(news_data: Dict[str, Any], style: str = "professional"): + """ + 이미 수집된 뉴스 데이터로부터 직접 기사 생성 + (News Aggregator 결과를 직접 입력받아 처리) + """ + try: + if not news_data.get("news_items"): + raise HTTPException(status_code=400, detail="No news items in provided data") + + # Generate article using Claude + logger.info(f"Generating article from {len(news_data['news_items'])} news items") + article = await generate_article_with_claude(news_data, style) + + # Store article in MongoDB + if db is not None: + try: + article_dict = article.dict() + await db.articles.insert_one(article_dict) + logger.info(f"Article saved with ID: {article.news_id}") + except Exception as e: + logger.error(f"Failed to save article to MongoDB: {e}") + + return article + + except Exception as e: + logger.error(f"Error in generate_from_aggregated_data: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/articles/{article_id}", response_model=GeneratedArticle) +async def get_article(article_id: str): + """저장된 기사 조회""" + if db is None: + raise HTTPException(status_code=503, detail="Database not available") + + article = await db.articles.find_one({"news_id": article_id}) + if not article: + raise HTTPException(status_code=404, detail="Article not found") + + # Convert MongoDB document to GeneratedArticle + article.pop("_id", None) + return GeneratedArticle(**article) + +@app.get("/api/articles") +async def list_articles( + skip: int = 0, + limit: int = 10, + keyword: Optional[str] = None, + category: Optional[str] = None +): + """저장된 기사 목록 조회""" + if db is None: + raise HTTPException(status_code=503, detail="Database not available") + + query = {} + if keyword: + query["source_keyword"] = {"$regex": keyword, "$options": "i"} + if category: + query["categories"] = category + + cursor = db.articles.find(query).skip(skip).limit(limit).sort("created_at", -1) + articles = [] + async for article in cursor: + article.pop("_id", None) + articles.append(article) + + total = await db.articles.count_documents(query) + + return { + "articles": articles, + "total": total, + "skip": skip, + "limit": limit + } + +@app.post("/api/generate/batch") +async def generate_batch_articles(keywords: List[str], style: str = "professional"): + """여러 키워드에 대한 기사 일괄 생성""" + results = [] + errors = [] + + for keyword in keywords[:5]: # Limit to 5 keywords to prevent overload + try: + request = ArticleGenerationRequest( + keyword=keyword, + style=style + ) + article = await generate_article(request) + results.append({ + "keyword": keyword, + "status": "success", + "article_id": article.news_id, + "title": article.title + }) + except Exception as e: + errors.append({ + "keyword": keyword, + "status": "error", + "error": str(e) + }) + + return { + "success": results, + "errors": errors, + "total_processed": len(results) + len(errors) + } + +@app.post("/api/generate/per-item") +async def generate_articles_per_rss_item(request: PerItemGenerationRequest): + """ + RSS 피드의 각 아이템별로 개별 기사 생성 + 각 RSS 아이템이 독립적인 기사가 됨 + 중복 생성 방지 기능 포함 + """ + try: + # Step 1: Get aggregated news from News Aggregator service + logger.info(f"Fetching aggregated news for keyword: {request.keyword}") + + # limit이 None이면 모든 항목 처리 (최대 100개로 제한) + actual_limit = request.limit if request.limit is not None else 100 + + aggregator_response = await http_client.get( + f"{NEWS_AGGREGATOR_URL}/api/aggregate", + params={ + "q": request.keyword, + "limit": actual_limit, + "google_results_per_title": request.google_results_per_title, + "lang": request.lang, + "country": request.country + } + ) + aggregator_response.raise_for_status() + news_data = aggregator_response.json() + + if not news_data.get("news_items"): + raise HTTPException(status_code=404, detail="No news items found for the given keyword") + + # Step 2: Check for existing articles if skip_existing is True + existing_titles = set() + skipped_count = 0 + + if request.skip_existing and db is not None: + # RSS 제목으로 중복 체크 (최근 24시간 내) + from datetime import datetime, timedelta + cutoff_time = (datetime.now() - timedelta(hours=24)).isoformat() + + existing_cursor = db.articles.find( + { + "source_keyword": request.keyword, + "created_at": {"$gte": cutoff_time} + }, + {"sources": 1} + ) + + async for doc in existing_cursor: + for source in doc.get("sources", []): + if source.get("source_site") == "RSS Feed": + existing_titles.add(source.get("title", "")) + + # Step 3: Generate individual article for each RSS item + generated_articles = [] + + for item in news_data["news_items"]: + try: + rss_title = item.get('rss_title', '') + + # Skip if already exists + if request.skip_existing and rss_title in existing_titles: + logger.info(f"Skipping already generated article: {rss_title}") + skipped_count += 1 + continue + + logger.info(f"Generating article for RSS item: {rss_title or 'Unknown'}") + + # Create individual news_data for this item + individual_news_data = { + "keyword": news_data.get("keyword"), + "news_items": [item] # Single item only + } + + # Generate article for this single item + article = await generate_article_with_claude(individual_news_data, request.style) + + # Store in MongoDB + if db is not None: + try: + article_dict = article.dict() + await db.articles.insert_one(article_dict) + logger.info(f"Article saved with ID: {article.news_id}") + except Exception as e: + logger.error(f"Failed to save article to MongoDB: {e}") + + generated_articles.append(article) + + except Exception as e: + logger.error(f"Failed to generate article for item: {e}") + # Continue with next item even if one fails + continue + + if not generated_articles and skipped_count == 0: + raise HTTPException(status_code=500, detail="Failed to generate any articles") + + # Return all generated articles + return { + "total_generated": len(generated_articles), + "total_items": len(news_data["news_items"]), + "skipped_duplicates": skipped_count, + "articles": generated_articles + } + + except httpx.HTTPStatusError as e: + logger.error(f"HTTP error from aggregator service: {e}") + raise HTTPException(status_code=e.response.status_code, detail=str(e)) + except Exception as e: + logger.error(f"Error in generate_articles_per_rss_item: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +# Queue Management Endpoints + +@app.post("/api/queue/enqueue") +async def enqueue_items(request: PerItemGenerationRequest): + """ + RSS 아이템들을 큐에 추가 (비동기 처리) + Consumer 워커가 백그라운드에서 처리 + """ + try: + # Step 1: Get aggregated news from News Aggregator service + logger.info(f"Fetching aggregated news for enqueue: {request.keyword}") + + actual_limit = request.limit if request.limit is not None else 100 + + aggregator_response = await http_client.get( + f"{NEWS_AGGREGATOR_URL}/api/aggregate", + params={ + "q": request.keyword, + "limit": actual_limit, + "google_results_per_title": request.google_results_per_title, + "lang": request.lang, + "country": request.country + } + ) + aggregator_response.raise_for_status() + news_data = aggregator_response.json() + + if not news_data.get("news_items"): + raise HTTPException(status_code=404, detail="No news items found for the given keyword") + + # Step 2: Check for existing articles if skip_existing is True + existing_titles = set() + skipped_count = 0 + + if request.skip_existing and db is not None: + from datetime import datetime, timedelta + cutoff_time = (datetime.now() - timedelta(hours=24)).isoformat() + + existing_cursor = db.articles.find( + { + "source_keyword": request.keyword, + "created_at": {"$gte": cutoff_time} + }, + {"sources": 1} + ) + + async for doc in existing_cursor: + for source in doc.get("sources", []): + if source.get("source_site") == "RSS Feed": + existing_titles.add(source.get("title", "")) + + # Step 3: Enqueue items for processing + enqueued_jobs = [] + + for item in news_data["news_items"]: + rss_title = item.get('rss_title', '') + + # Skip if already exists + if request.skip_existing and rss_title in existing_titles: + logger.info(f"Skipping already generated article: {rss_title}") + skipped_count += 1 + continue + + # Create job data + job_data = NewsJobData( + job_id=str(uuid.uuid4()), + keyword=request.keyword, + rss_title=rss_title, + rss_link=item.get('rss_link'), + rss_published=item.get('rss_published'), + google_results=item.get('google_results', []), + style=request.style, + created_at=datetime.now() + ) + + # Enqueue job + job_id = await queue_manager.enqueue(job_data) + enqueued_jobs.append({ + "job_id": job_id, + "title": rss_title[:100] + }) + + logger.info(f"Enqueued job {job_id} for: {rss_title}") + + return { + "total_enqueued": len(enqueued_jobs), + "total_items": len(news_data["news_items"]), + "skipped_duplicates": skipped_count, + "jobs": enqueued_jobs, + "message": f"{len(enqueued_jobs)} jobs added to queue for processing" + } + + except httpx.HTTPStatusError as e: + logger.error(f"HTTP error from aggregator service: {e}") + raise HTTPException(status_code=e.response.status_code, detail=str(e)) + except Exception as e: + logger.error(f"Error in enqueue_items: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/queue/stats", response_model=QueueStats) +async def get_queue_stats(): + """큐 상태 및 통계 조회""" + try: + stats = await queue_manager.get_stats() + return stats + except Exception as e: + logger.error(f"Error getting queue stats: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.delete("/api/queue/clear") +async def clear_queue(): + """큐 초기화 (관리자용)""" + try: + await queue_manager.clear_queue() + return {"message": "Queue cleared successfully"} + except Exception as e: + logger.error(f"Error clearing queue: {e}") + raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file diff --git a/backup-services/ai-writer/backend/app/queue_manager.py b/backup-services/ai-writer/backend/app/queue_manager.py new file mode 100644 index 0000000..2e0695a --- /dev/null +++ b/backup-services/ai-writer/backend/app/queue_manager.py @@ -0,0 +1,250 @@ +""" +Redis Queue Manager for AI Writer Service +Redis를 사용한 작업 큐 관리 +""" +import redis.asyncio as redis +import json +import uuid +from typing import Optional, List, Dict, Any +from datetime import datetime, timedelta +import logging +from queue_models import NewsJobData, JobResult, JobStatus, QueueStats + +logger = logging.getLogger(__name__) + +class RedisQueueManager: + """Redis 기반 작업 큐 매니저""" + + def __init__(self, redis_url: str = "redis://redis:6379"): + self.redis_url = redis_url + self.redis_client: Optional[redis.Redis] = None + + # Redis 키 정의 + self.QUEUE_KEY = "ai_writer:queue:pending" + self.PROCESSING_KEY = "ai_writer:queue:processing" + self.COMPLETED_KEY = "ai_writer:queue:completed" + self.FAILED_KEY = "ai_writer:queue:failed" + self.STATS_KEY = "ai_writer:stats" + self.WORKERS_KEY = "ai_writer:workers" + self.LOCK_PREFIX = "ai_writer:lock:" + + async def connect(self): + """Redis 연결""" + if not self.redis_client: + self.redis_client = await redis.from_url( + self.redis_url, + encoding="utf-8", + decode_responses=True + ) + logger.info("Connected to Redis queue") + + async def disconnect(self): + """Redis 연결 해제""" + if self.redis_client: + await self.redis_client.close() + self.redis_client = None + logger.info("Disconnected from Redis queue") + + async def enqueue(self, job_data: NewsJobData) -> str: + """작업을 큐에 추가""" + try: + if not job_data.job_id: + job_data.job_id = str(uuid.uuid4()) + + # JSON으로 직렬화 + job_json = job_data.json() + + # 우선순위에 따라 큐에 추가 + if job_data.priority > 0: + # 높은 우선순위는 앞쪽에 + await self.redis_client.lpush(self.QUEUE_KEY, job_json) + else: + # 일반 우선순위는 뒤쪽에 + await self.redis_client.rpush(self.QUEUE_KEY, job_json) + + # 통계 업데이트 + await self.redis_client.hincrby(self.STATS_KEY, "total_jobs", 1) + await self.redis_client.hincrby(self.STATS_KEY, "pending_jobs", 1) + + logger.info(f"Job {job_data.job_id} enqueued") + return job_data.job_id + + except Exception as e: + logger.error(f"Failed to enqueue job: {e}") + raise + + async def dequeue(self, timeout: int = 0) -> Optional[NewsJobData]: + """큐에서 작업 가져오기 (블로킹 가능)""" + try: + # 대기 중인 작업을 가져와서 처리 중 목록으로 이동 + if timeout > 0: + result = await self.redis_client.blmove( + self.QUEUE_KEY, + self.PROCESSING_KEY, + timeout, + "LEFT", + "RIGHT" + ) + else: + result = await self.redis_client.lmove( + self.QUEUE_KEY, + self.PROCESSING_KEY, + "LEFT", + "RIGHT" + ) + + if result: + # 통계 업데이트 + await self.redis_client.hincrby(self.STATS_KEY, "pending_jobs", -1) + await self.redis_client.hincrby(self.STATS_KEY, "processing_jobs", 1) + + return NewsJobData.parse_raw(result) + + return None + + except Exception as e: + logger.error(f"Failed to dequeue job: {e}") + return None + + async def mark_completed(self, job_id: str, article_id: str): + """작업을 완료로 표시""" + try: + # 처리 중 목록에서 작업 찾기 + processing_jobs = await self.redis_client.lrange(self.PROCESSING_KEY, 0, -1) + + for job_json in processing_jobs: + job = NewsJobData.parse_raw(job_json) + if job.job_id == job_id: + # 처리 중 목록에서 제거 + await self.redis_client.lrem(self.PROCESSING_KEY, 1, job_json) + + # 완료 결과 생성 + result = JobResult( + job_id=job_id, + status=JobStatus.COMPLETED, + article_id=article_id, + completed_at=datetime.now() + ) + + # 완료 목록에 추가 (최대 1000개 유지) + await self.redis_client.lpush(self.COMPLETED_KEY, result.json()) + await self.redis_client.ltrim(self.COMPLETED_KEY, 0, 999) + + # 통계 업데이트 + await self.redis_client.hincrby(self.STATS_KEY, "processing_jobs", -1) + await self.redis_client.hincrby(self.STATS_KEY, "completed_jobs", 1) + + logger.info(f"Job {job_id} marked as completed") + break + + except Exception as e: + logger.error(f"Failed to mark job as completed: {e}") + + async def mark_failed(self, job_id: str, error_message: str): + """작업을 실패로 표시""" + try: + # 처리 중 목록에서 작업 찾기 + processing_jobs = await self.redis_client.lrange(self.PROCESSING_KEY, 0, -1) + + for job_json in processing_jobs: + job = NewsJobData.parse_raw(job_json) + if job.job_id == job_id: + # 처리 중 목록에서 제거 + await self.redis_client.lrem(self.PROCESSING_KEY, 1, job_json) + + # 재시도 확인 + if job.retry_count < job.max_retries: + job.retry_count += 1 + # 다시 큐에 추가 + await self.redis_client.rpush(self.QUEUE_KEY, job.json()) + await self.redis_client.hincrby(self.STATS_KEY, "pending_jobs", 1) + logger.info(f"Job {job_id} requeued (retry {job.retry_count}/{job.max_retries})") + else: + # 실패 결과 생성 + result = JobResult( + job_id=job_id, + status=JobStatus.FAILED, + error_message=error_message, + completed_at=datetime.now() + ) + + # 실패 목록에 추가 + await self.redis_client.lpush(self.FAILED_KEY, result.json()) + await self.redis_client.ltrim(self.FAILED_KEY, 0, 999) + + # 통계 업데이트 + await self.redis_client.hincrby(self.STATS_KEY, "failed_jobs", 1) + logger.error(f"Job {job_id} marked as failed: {error_message}") + + await self.redis_client.hincrby(self.STATS_KEY, "processing_jobs", -1) + break + + except Exception as e: + logger.error(f"Failed to mark job as failed: {e}") + + async def get_stats(self) -> QueueStats: + """큐 통계 조회""" + try: + stats_data = await self.redis_client.hgetall(self.STATS_KEY) + + # 활성 워커 수 계산 + workers = await self.redis_client.smembers(self.WORKERS_KEY) + active_workers = 0 + for worker_id in workers: + # 워커가 최근 1분 이내에 활동했는지 확인 + last_ping = await self.redis_client.get(f"{self.WORKERS_KEY}:{worker_id}") + if last_ping: + last_ping_time = datetime.fromisoformat(last_ping) + if datetime.now() - last_ping_time < timedelta(minutes=1): + active_workers += 1 + + return QueueStats( + pending_jobs=int(stats_data.get("pending_jobs", 0)), + processing_jobs=int(stats_data.get("processing_jobs", 0)), + completed_jobs=int(stats_data.get("completed_jobs", 0)), + failed_jobs=int(stats_data.get("failed_jobs", 0)), + total_jobs=int(stats_data.get("total_jobs", 0)), + workers_active=active_workers + ) + + except Exception as e: + logger.error(f"Failed to get stats: {e}") + return QueueStats( + pending_jobs=0, + processing_jobs=0, + completed_jobs=0, + failed_jobs=0, + total_jobs=0, + workers_active=0 + ) + + async def register_worker(self, worker_id: str): + """워커 등록""" + await self.redis_client.sadd(self.WORKERS_KEY, worker_id) + await self.redis_client.set( + f"{self.WORKERS_KEY}:{worker_id}", + datetime.now().isoformat(), + ex=300 # 5분 후 자동 만료 + ) + + async def ping_worker(self, worker_id: str): + """워커 활동 업데이트""" + await self.redis_client.set( + f"{self.WORKERS_KEY}:{worker_id}", + datetime.now().isoformat(), + ex=300 + ) + + async def unregister_worker(self, worker_id: str): + """워커 등록 해제""" + await self.redis_client.srem(self.WORKERS_KEY, worker_id) + await self.redis_client.delete(f"{self.WORKERS_KEY}:{worker_id}") + + async def clear_queue(self): + """큐 초기화 (테스트용)""" + await self.redis_client.delete(self.QUEUE_KEY) + await self.redis_client.delete(self.PROCESSING_KEY) + await self.redis_client.delete(self.COMPLETED_KEY) + await self.redis_client.delete(self.FAILED_KEY) + await self.redis_client.delete(self.STATS_KEY) + logger.info("Queue cleared") \ No newline at end of file diff --git a/backup-services/ai-writer/backend/app/queue_models.py b/backup-services/ai-writer/backend/app/queue_models.py new file mode 100644 index 0000000..6cb9402 --- /dev/null +++ b/backup-services/ai-writer/backend/app/queue_models.py @@ -0,0 +1,49 @@ +""" +Queue Models for AI Writer Service +Redis 큐에서 사용할 데이터 모델 정의 +""" +from pydantic import BaseModel, Field +from typing import Optional, List, Dict, Any +from datetime import datetime +from enum import Enum + +class JobStatus(str, Enum): + """작업 상태""" + PENDING = "pending" + PROCESSING = "processing" + COMPLETED = "completed" + FAILED = "failed" + SKIPPED = "skipped" + +class NewsJobData(BaseModel): + """큐에 들어갈 뉴스 작업 데이터""" + job_id: str = Field(..., description="작업 고유 ID") + keyword: str = Field(..., description="원본 검색 키워드") + rss_title: str = Field(..., description="RSS 제목") + rss_link: Optional[str] = Field(None, description="RSS 링크") + rss_published: Optional[str] = Field(None, description="RSS 발행일") + google_results: List[Dict[str, Any]] = Field(default_factory=list, description="구글 검색 결과") + style: str = Field("professional", description="기사 스타일") + created_at: datetime = Field(default_factory=datetime.now, description="작업 생성 시간") + priority: int = Field(0, description="우선순위 (높을수록 우선)") + retry_count: int = Field(0, description="재시도 횟수") + max_retries: int = Field(3, description="최대 재시도 횟수") + +class JobResult(BaseModel): + """작업 결과""" + job_id: str = Field(..., description="작업 고유 ID") + status: JobStatus = Field(..., description="작업 상태") + article_id: Optional[str] = Field(None, description="생성된 기사 ID") + error_message: Optional[str] = Field(None, description="에러 메시지") + processing_time: Optional[float] = Field(None, description="처리 시간(초)") + completed_at: Optional[datetime] = Field(None, description="완료 시간") + +class QueueStats(BaseModel): + """큐 통계""" + pending_jobs: int = Field(..., description="대기 중인 작업 수") + processing_jobs: int = Field(..., description="처리 중인 작업 수") + completed_jobs: int = Field(..., description="완료된 작업 수") + failed_jobs: int = Field(..., description="실패한 작업 수") + total_jobs: int = Field(..., description="전체 작업 수") + workers_active: int = Field(..., description="활성 워커 수") + average_processing_time: Optional[float] = Field(None, description="평균 처리 시간(초)") \ No newline at end of file diff --git a/backup-services/ai-writer/backend/app/worker.py b/backup-services/ai-writer/backend/app/worker.py new file mode 100644 index 0000000..e859904 --- /dev/null +++ b/backup-services/ai-writer/backend/app/worker.py @@ -0,0 +1,201 @@ +""" +AI Writer Consumer Worker +큐에서 작업을 가져와 기사를 생성하는 백그라운드 워커 +""" +import asyncio +import logging +import signal +import sys +import uuid +from datetime import datetime +from typing import Optional +import os + +from motor.motor_asyncio import AsyncIOMotorClient +from anthropic import AsyncAnthropic + +from queue_manager import RedisQueueManager +from queue_models import NewsJobData, JobStatus +from article_generator import generate_article_with_claude + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +class AIWriterWorker: + """AI Writer 백그라운드 워커""" + + def __init__(self, worker_id: Optional[str] = None): + self.worker_id = worker_id or str(uuid.uuid4()) + self.queue_manager = RedisQueueManager( + redis_url=os.getenv("REDIS_URL", "redis://redis:6379") + ) + + # MongoDB 설정 + self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") + self.db_name = os.getenv("DB_NAME", "ai_writer_db") + self.mongo_client = None + self.db = None + + # Claude 클라이언트 + self.claude_api_key = os.getenv("CLAUDE_API_KEY") + self.claude_client = AsyncAnthropic(api_key=self.claude_api_key) + + # 실행 상태 + self.running = False + self.tasks = [] + + async def start(self, num_workers: int = 1): + """워커 시작""" + logger.info(f"Starting AI Writer Worker {self.worker_id} with {num_workers} concurrent workers") + + try: + # Redis 연결 + await self.queue_manager.connect() + await self.queue_manager.register_worker(self.worker_id) + + # MongoDB 연결 + self.mongo_client = AsyncIOMotorClient(self.mongodb_url) + self.db = self.mongo_client[self.db_name] + logger.info("Connected to MongoDB") + + self.running = True + + # 여러 워커 태스크 생성 + for i in range(num_workers): + task = asyncio.create_task(self._process_jobs(f"{self.worker_id}-{i}")) + self.tasks.append(task) + + # 워커 핑 태스크 + ping_task = asyncio.create_task(self._ping_worker()) + self.tasks.append(ping_task) + + # 모든 태스크 대기 + await asyncio.gather(*self.tasks) + + except Exception as e: + logger.error(f"Worker error: {e}") + finally: + await self.stop() + + async def stop(self): + """워커 정지""" + logger.info(f"Stopping AI Writer Worker {self.worker_id}") + self.running = False + + # 태스크 취소 + for task in self.tasks: + task.cancel() + + # 워커 등록 해제 + await self.queue_manager.unregister_worker(self.worker_id) + + # 연결 해제 + await self.queue_manager.disconnect() + if self.mongo_client: + self.mongo_client.close() + + logger.info(f"Worker {self.worker_id} stopped") + + async def _process_jobs(self, sub_worker_id: str): + """작업 처리 루프""" + logger.info(f"Sub-worker {sub_worker_id} started") + + while self.running: + try: + # 큐에서 작업 가져오기 (5초 타임아웃) + job = await self.queue_manager.dequeue(timeout=5) + + if job: + logger.info(f"[{sub_worker_id}] Processing job {job.job_id}: {job.rss_title[:50]}") + start_time = datetime.now() + + try: + # 기사 생성 + article = await self._generate_article(job) + + # MongoDB에 저장 + if article and self.db is not None: + article_dict = article.dict() + await self.db.articles.insert_one(article_dict) + + # 처리 시간 계산 + processing_time = (datetime.now() - start_time).total_seconds() + + # 완료 표시 + await self.queue_manager.mark_completed( + job.job_id, + article.news_id + ) + + logger.info(f"[{sub_worker_id}] Job {job.job_id} completed in {processing_time:.2f}s") + else: + raise Exception("Failed to generate article") + + except Exception as e: + logger.error(f"[{sub_worker_id}] Job {job.job_id} failed: {e}") + await self.queue_manager.mark_failed(job.job_id, str(e)) + + except asyncio.CancelledError: + break + except Exception as e: + logger.error(f"[{sub_worker_id}] Worker error: {e}") + await asyncio.sleep(1) + + logger.info(f"Sub-worker {sub_worker_id} stopped") + + async def _generate_article(self, job: NewsJobData): + """기사 생성""" + # 작업 데이터를 기존 형식으로 변환 + news_data = { + "keyword": job.keyword, + "news_items": [{ + "rss_title": job.rss_title, + "rss_link": job.rss_link, + "rss_published": job.rss_published, + "google_results": job.google_results + }] + } + + # 기사 생성 (기존 함수 재사용) + return await generate_article_with_claude(news_data, job.style) + + async def _ping_worker(self): + """워커 활동 신호 전송""" + while self.running: + try: + await self.queue_manager.ping_worker(self.worker_id) + await asyncio.sleep(30) # 30초마다 핑 + except asyncio.CancelledError: + break + except Exception as e: + logger.error(f"Ping error: {e}") + +def signal_handler(signum, frame): + """시그널 핸들러""" + logger.info(f"Received signal {signum}") + sys.exit(0) + +async def main(): + """메인 함수""" + # 시그널 핸들러 등록 + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + # 워커 수 설정 (환경변수 또는 기본값) + num_workers = int(os.getenv("WORKER_COUNT", "3")) + + # 워커 시작 + worker = AIWriterWorker() + try: + await worker.start(num_workers=num_workers) + except KeyboardInterrupt: + logger.info("Keyboard interrupt received") + finally: + await worker.stop() + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/backup-services/ai-writer/backend/article_전기차_analytical.json b/backup-services/ai-writer/backend/article_전기차_analytical.json new file mode 100644 index 0000000..1600e04 --- /dev/null +++ b/backup-services/ai-writer/backend/article_전기차_analytical.json @@ -0,0 +1,62 @@ +{ + "news_id": "49bdf2f3-4dbc-47eb-8c49-5d9536f41d87", + "title": "유럽 전기차 시장의 새로운 전환점: 현대차·기아의 소형 전기차 전략과 글로벌 경쟁 구도", + "created_at": "2025-09-13T00:29:13.376541", + "summary": "현대차와 기아가 IAA 2025에서 소형 전기차 콘셉트 모델을 공개하며 유럽 시장 공략을 가속화, 배터리 협력과 가격 경쟁력으로 승부수", + "subtopics": [ + { + "title": "현대차·기아의 유럽 소형 전기차 시장 공략", + "content": [ + "현대자동차와 기아가 IAA 2025에서 콘셉트 쓰리와 EV2를 공개하며 유럽 소형 전기차 시장 공략에 박차를 가하고 있다. 이는 유럽의 급성장하는 소형 전기차 수요에 대응하기 위한 전략적 움직임으로 평가된다.", + "특히 두 모델은 실용성과 경제성을 모두 갖춘 제품으로, 유럽 소비자들의 니즈를 정확히 겨냥했다는 평가를 받고 있다. 현대차그룹은 이를 통해 유럽 시장에서의 입지를 더욱 강화할 것으로 전망된다.", + "현지 전문가들은 현대차그룹의 이번 전략이 유럽 전기차 시장의 '골든타임'을 잡기 위한 시의적절한 움직임이라고 분석하고 있다." + ] + }, + { + "title": "배터리 공급망 전략의 중요성 부각", + "content": [ + "전기차 시장에서 배터리 공급망 확보가 핵심 경쟁력으로 부상하고 있다. IAA 모빌리티에서 폴스타가 SK온을 배터리 파트너로 공개적으로 언급한 것이 주목받고 있다.", + "배터리 제조사 선정에 대한 정보가 제한적인 가운데, 안정적인 배터리 공급망 구축이 전기차 제조사들의 성패를 좌우할 것으로 예상된다.", + "특히 소형 전기차의 경우 가격 경쟁력이 중요한 만큼, 효율적인 배터리 수급 전략이 시장 점유율 확대의 관건이 될 전망이다." + ] + }, + { + "title": "글로벌 전기차 시장의 경쟁 구도 변화", + "content": [ + "유럽 전기차 시장에서 소형 모델을 중심으로 한 경쟁이 본격화되면서, 제조사들의 전략적 포지셔닝이 더욱 중요해지고 있다.", + "현대차그룹은 품질과 기술력을 바탕으로 한 프리미엄 이미지와 함께, 합리적인 가격대의 소형 전기차 라인업으로 시장 공략을 가속화하고 있다.", + "이러한 변화는 글로벌 자동차 산업의 패러다임 전환을 반영하며, 향후 전기차 시장의 주도권 경쟁이 더욱 치열해질 것으로 예상된다." + ] + } + ], + "categories": [ + "자동차", + "경제", + "환경", + "기술" + ], + "entities": { + "people": [], + "organizations": [ + "현대자동차", + "기아", + "SK온", + "폴스타" + ], + "groups": [ + "유럽 자동차 제조사", + "배터리 제조업체" + ], + "countries": [ + "대한민국", + "독일", + "유럽연합" + ], + "events": [ + "IAA 2025", + "IAA 모빌리티" + ] + }, + "source_keyword": "전기차", + "source_count": 3 +} \ No newline at end of file diff --git a/backup-services/ai-writer/backend/article_전기차_investigative.json b/backup-services/ai-writer/backend/article_전기차_investigative.json new file mode 100644 index 0000000..3162f6f --- /dev/null +++ b/backup-services/ai-writer/backend/article_전기차_investigative.json @@ -0,0 +1,66 @@ +{ + "news_id": "8a51bead-4558-4351-a5b2-b5e5ba1b3d38", + "title": "현대차·기아, 유럽 전기차 시장서 소형 모델로 새 돌파구 모색", + "created_at": "2025-09-13T00:29:35.661926", + "summary": "IAA 모빌리티 2025에서 현대차·기아가 소형 전기차 콘셉트카를 공개하며 유럽 시장 공략 가속화. 배터리 공급망 확보와 가격 경쟁력이 성공 관건", + "subtopics": [ + { + "title": "유럽 소형 전기차 시장 공략 본격화", + "content": [ + "현대차와 기아가 IAA 모빌리티 2025에서 각각 콘셉트 쓰리와 EV2를 공개하며 유럽 소형 전기차 시장 공략에 시동을 걸었다. 이는 유럽의 높은 환경 규제와 도심 이동성 수요에 대응하기 위한 전략적 움직임으로 해석된다.", + "특히 두 모델은 기존 전기차 대비 컴팩트한 사이즈와 효율적인 배터리 시스템을 갖추고 있어, 유럽 소비자들의 실용적 수요를 겨냥했다는 평가를 받고 있다.", + "업계 전문가들은 현대차그룹의 이번 행보가 테슬라와 중국 업체들이 주도하고 있는 유럽 전기차 시장에서 새로운 돌파구를 마련할 수 있을 것으로 전망하고 있다." + ] + }, + { + "title": "배터리 공급망 확보 과제", + "content": [ + "전기차 성공의 핵심 요소인 배터리 수급에서 SK온이 주요 공급 파트너로 부상했다. 폴스타가 SK온을 배터리 공급사로 공개적으로 언급한 것이 이를 방증한다.", + "그러나 업계에서는 배터리 제조사들의 정보 공개가 제한적이어서 실제 공급망 구조를 파악하기 어려운 상황이다. 이는 글로벌 배터리 수급 경쟁이 치열해지고 있음을 시사한다.", + "안정적인 배터리 공급망 확보는 향후 소형 전기차의 가격 경쟁력과 직결되는 만큼, 현대차그룹의 추가적인 파트너십 구축이 예상된다." + ] + }, + { + "title": "가격 경쟁력 확보 전략", + "content": [ + "소형 전기차 시장에서의 성공을 위해서는 합리적인 가격대 책정이 필수적이다. 현대차그룹은 규모의 경제를 통한 원가 절감을 목표로 하고 있다.", + "특히 유럽 시장에서는 테슬라와 중국 업체들의 공격적인 가격 정책에 대응해야 하는 상황이다. 현대차그룹은 프리미엄 품질을 유지하면서도 경쟁력 있는 가격대를 제시하는 것을 목표로 하고 있다.", + "전문가들은 배터리 기술 혁신과 생산 효율화를 통해 가격 경쟁력을 확보하는 것이 향후 성공의 핵심이 될 것으로 전망하고 있다." + ] + } + ], + "categories": [ + "자동차", + "경제", + "산업", + "기술" + ], + "entities": { + "people": [ + "김성수", + "조용하", + "박종면" + ], + "organizations": [ + "현대자동차", + "기아", + "SK온", + "폴스타" + ], + "groups": [ + "유럽 자동차 제조사", + "중국 전기차 업체" + ], + "countries": [ + "대한민국", + "독일", + "중국" + ], + "events": [ + "IAA 모빌리티 2025", + "전기차 배터리 공급 계약" + ] + }, + "source_keyword": "전기차", + "source_count": 3 +} \ No newline at end of file diff --git a/backup-services/ai-writer/backend/article_전기차_professional.json b/backup-services/ai-writer/backend/article_전기차_professional.json new file mode 100644 index 0000000..4b5eb9f --- /dev/null +++ b/backup-services/ai-writer/backend/article_전기차_professional.json @@ -0,0 +1,62 @@ +{ + "news_id": "2c4cb595-9542-45ee-b4b9-2135c46950e3", + "title": "현대차·기아, 유럽 전기차 시장서 소형 모델로 승부수...배터리 협력 강화 주목", + "created_at": "2025-09-13T00:28:51.371773", + "summary": "현대차·기아가 유럽 전기차 시장에서 콘셉트 쓰리와 EV2로 소형 전기차 시장 공략 나서, 배터리 협력사 선정 등 경쟁력 강화 움직임 본격화", + "subtopics": [ + { + "title": "유럽 소형 전기차 시장 공략 본격화", + "content": [ + "현대자동차그룹이 유럽 전기차 시장 공략을 위해 소형 전기차 라인업 확대에 나섰다. IAA 모빌리티 2025에서 공개된 현대차의 콘셉트 쓰리와 기아의 EV2는 유럽 시장 맞춤형 전략의 핵심으로 평가받고 있다.", + "특히 소형 전기차 시장은 유럽에서 급성장이 예상되는 세그먼트로, 현대차그룹은 합리적인 가격대와 실용성을 앞세워 시장 선점을 노리고 있다.", + "현대차그룹의 이번 전략은 유럽의 환경 규제 강화와 소비자들의 실용적인 전기차 수요 증가에 대응하는 동시에, 중국 전기차 업체들의 유럽 진출에 대한 선제적 대응으로 해석된다." + ] + }, + { + "title": "배터리 협력 관계 재편 움직임", + "content": [ + "전기차 경쟁력의 핵심인 배터리 수급과 관련해 업계의 이목이 집중되고 있다. IAA 모빌리티에서 폴스타가 SK온을 배터리 공급사로 지목한 것이 주목받고 있다.", + "글로벌 자동차 업체들의 배터리 조달 전략이 다변화되는 가운데, 한국 배터리 업체들과의 협력 강화 움직임이 감지되고 있다.", + "특히 현대차그룹은 안정적인 배터리 수급을 위해 다양한 배터리 제조사들과의 협력 관계를 검토 중인 것으로 알려졌다." + ] + }, + { + "title": "글로벌 전기차 시장 경쟁 심화", + "content": [ + "전기차 시장에서 브랜드 간 경쟁이 치열해지는 가운데, 현대차그룹은 차별화된 제품 라인업과 기술력으로 시장 지위 강화에 나서고 있다.", + "특히 유럽 시장에서는 테슬라, 폭스바겐 그룹, 중국 업체들과의 경쟁이 불가피한 상황이며, 현대차그룹은 품질과 기술력을 앞세워 경쟁력 확보에 주력하고 있다.", + "시장 전문가들은 현대차그룹의 소형 전기차 전략이 향후 글로벌 시장에서의 입지 강화에 중요한 전환점이 될 것으로 전망하고 있다." + ] + } + ], + "categories": [ + "자동차", + "경제", + "산업" + ], + "entities": { + "people": [ + "김성수", + "박영효" + ], + "organizations": [ + "현대자동차", + "기아", + "SK온", + "폴스타" + ], + "groups": [ + "현대차그룹", + "폭스바겐 그룹" + ], + "countries": [ + "대한민국", + "독일" + ], + "events": [ + "IAA 모빌리티 2025" + ] + }, + "source_keyword": "전기차", + "source_count": 3 +} \ No newline at end of file diff --git a/backup-services/ai-writer/backend/custom_article_analytical.json b/backup-services/ai-writer/backend/custom_article_analytical.json new file mode 100644 index 0000000..9b114dc --- /dev/null +++ b/backup-services/ai-writer/backend/custom_article_analytical.json @@ -0,0 +1,63 @@ +{ + "news_id": "ee154fb8-a913-4aa9-9fc9-fa421fd2d7c0", + "title": "2025년 기술 혁신의 분기점: AI·양자컴퓨팅이 그리는 새로운 미래", + "created_at": "2025-09-13T00:32:14.008706", + "summary": "2025년, AI와 양자컴퓨팅의 상용화가 가져올 산업 전반의 혁신적 변화와 사회적 영향을 심층 분석한 전망", + "subtopics": [ + { + "title": "생성형 AI가 재편하는 산업 생태계", + "content": [ + "2025년은 생성형 AI가 산업 전반에 본격적으로 도입되는 원년이 될 전망이다. 특히 의료 진단, 신약 개발, 교육 커리큘럼 설계 등 전문 분야에서 AI의 역할이 획기적으로 확대될 것으로 예측된다.", + "기업들의 업무 프로세스도 근본적인 변화를 맞이할 것으로 보인다. 창의적 작업 영역에서도 AI의 활용이 일상화되며, 인간-AI 협업 모델이 새로운 표준으로 자리잡을 것으로 전망된다.", + "다만 AI 도입에 따른 노동시장 재편과 윤리적 문제에 대한 사회적 합의가 시급한 과제로 대두될 것으로 예상된다. 특히 AI 의존도 증가에 따른 데이터 보안과 알고리즘 편향성 문제는 중요한 해결 과제가 될 것이다." + ] + }, + { + "title": "양자컴퓨팅의 상용화와 산업혁신", + "content": [ + "양자컴퓨팅 기술이 실용화 단계에 진입하면서, 금융권의 리스크 분석과 암호화폐 보안 시스템에 획기적인 변화가 예상된다. 특히 복잡한 금융 모델링과 시장 예측에서 양자컴퓨터의 활용이 크게 증가할 전망이다.", + "제약 산업에서는 신약 개발 프로세스가 대폭 단축될 것으로 기대된다. 양자컴퓨터를 활용한 분자 시뮬레이션이 가능해지면서, 신약 개발 비용 절감과 효율성 증대가 실현될 것이다.", + "물류 및 공급망 관리 분야에서도 양자컴퓨팅의 영향력이 확대될 전망이다. 복잡한 경로 최적화와 재고 관리에 양자 알고리즘을 적용함으로써, 물류 비용 절감과 효율성 향상이 가능해질 것으로 예측된다." + ] + }, + { + "title": "기술 혁신에 따른 사회경제적 변화", + "content": [ + "AI와 양자컴퓨팅의 발전은 노동시장의 구조적 변화를 가속화할 것으로 전망된다. 단순 반복 업무는 자동화되는 반면, AI 시스템 관리와 양자컴퓨팅 전문가 같은 새로운 직종의 수요가 급증할 것으로 예상된다.", + "교육 시스템도 큰 변화를 맞이할 것으로 보인다. AI 기반 맞춤형 학습과 양자컴퓨팅 원리에 대한 이해가 새로운 필수 교육과정으로 자리잡을 것으로 전망된다.", + "이러한 기술 혁신은 국가 간 기술 격차를 더욱 심화시킬 가능성이 있다. 선진국과 개발도상국 간의 디지털 격차 해소가 국제사회의 주요 과제로 대두될 것으로 예측된다." + ] + } + ], + "categories": [ + "기술", + "산업", + "미래전망", + "경제" + ], + "entities": { + "people": [], + "organizations": [ + "금융권", + "제약회사", + "물류기업" + ], + "groups": [ + "AI 개발자", + "양자컴퓨팅 전문가", + "교육기관" + ], + "countries": [ + "한국", + "미국", + "중국" + ], + "events": [ + "AI 상용화", + "양자컴퓨터 실용화", + "디지털 전환" + ] + }, + "source_keyword": "2025년 기술 트렌드", + "source_count": 2 +} \ No newline at end of file diff --git a/backup-services/ai-writer/backend/custom_article_professional.json b/backup-services/ai-writer/backend/custom_article_professional.json new file mode 100644 index 0000000..ce04357 --- /dev/null +++ b/backup-services/ai-writer/backend/custom_article_professional.json @@ -0,0 +1,62 @@ +{ + "news_id": "3109c578-9b08-4cd0-a9d6-3d92b97e64d4", + "title": "2025년 기술 혁신의 물결, AI·양자컴퓨팅이 이끄는 새로운 패러다임", + "created_at": "2025-09-13T00:31:52.782760", + "summary": "2025년, 생성형 AI와 양자컴퓨팅의 상용화로 산업 전반에 혁신적 변화가 예상되며, 인간-AI 협업이 일상화될 전망", + "subtopics": [ + { + "title": "생성형 AI가 주도하는 창의적 혁신", + "content": [ + "2025년은 생성형 AI 기술이 전례 없는 수준으로 발전하여 창의적 영역에서도 획기적인 변화가 예상된다. 기존에 인간의 고유 영역으로 여겨졌던 예술 창작, 콘텐츠 제작, 디자인 분야에서 AI가 핵심 협력자로 자리잡을 전망이다.", + "특히 의료 분야에서는 AI가 질병 진단과 치료 계획 수립에 적극적으로 활용될 것으로 예측된다. AI는 방대한 의료 데이터를 분석하여 개인 맞춤형 치료법을 제시하고, 의료진의 의사결정을 효과적으로 지원할 것으로 기대된다.", + "교육 분야에서도 AI 기반의 맞춤형 학습 시스템이 보편화될 전망이다. 학습자의 이해도와 진도에 따라 최적화된 커리큘럼을 제공하고, 실시간으로 학습 성과를 분석하여 개선점을 제시하는 등 교육의 질적 향상이 기대된다." + ] + }, + { + "title": "양자컴퓨팅의 산업 혁신 주도", + "content": [ + "2025년은 양자컴퓨팅이 실용화 단계에 진입하는 원년이 될 것으로 전망된다. 특히 금융 산업에서는 복잡한 위험 분석과 포트폴리오 최적화에 양자컴퓨팅을 활용하여 투자 전략의 정확도를 높일 것으로 예상된다.", + "제약 산업에서는 양자컴퓨터를 활용한 신약 개발이 가속화될 전망이다. 분자 구조 시뮬레이션과 신약 후보 물질 스크리닝 과정에서 양자컴퓨팅의 강점이 발휘될 것으로 기대된다.", + "물류 분야에서도 양자컴퓨팅을 통한 최적화가 실현될 전망이다. 복잡한 공급망 관리와 배송 경로 최적화에 양자컴퓨팅을 도입함으로써 물류 비용 절감과 효율성 향상이 가능해질 것으로 예측된다." + ] + }, + { + "title": "인간-기계 협업의 새로운 패러다임", + "content": [ + "2025년에는 AI와 인간의 협업이 일상화되면서 업무 방식의 근본적인 변화가 예상된다. 단순 반복적인 업무는 AI가 담당하고, 인간은 전략적 의사결정과 창의적 문제 해결에 집중하는 방식으로 업무 분담이 이루어질 것이다.", + "이러한 변화는 노동시장의 구조적 변화로 이어질 전망이다. AI와 협업할 수 있는 디지털 역량이 필수적인 직무 역량으로 부상하며, 새로운 형태의 직업이 등장할 것으로 예측된다.", + "하지만 이러한 변화 속에서도 윤리적 판단과 감성적 소통과 같은 인간 고유의 가치는 더욱 중요해질 것으로 전망된다. 기술 발전이 가져올 혜택을 최대화하면서도 인간 중심의 가치를 지켜나가는 균형이 중요한 과제로 대두될 것이다." + ] + } + ], + "categories": [ + "기술", + "미래전망", + "산업동향" + ], + "entities": { + "people": [], + "organizations": [ + "AI 기업들", + "제약회사들", + "물류기업들" + ], + "groups": [ + "의료진", + "교육자", + "기술전문가" + ], + "countries": [ + "한국", + "미국", + "중국" + ], + "events": [ + "2025년 기술혁신", + "양자컴퓨팅 상용화", + "AI 혁명" + ] + }, + "source_keyword": "2025년 기술 트렌드", + "source_count": 2 +} \ No newline at end of file diff --git a/backup-services/ai-writer/backend/generated_article.json b/backup-services/ai-writer/backend/generated_article.json new file mode 100644 index 0000000..4046dbe --- /dev/null +++ b/backup-services/ai-writer/backend/generated_article.json @@ -0,0 +1,73 @@ +{ + "news_id": "ea9f3734-6a93-4ca7-8ebe-b85612e2fd0a", + "title": "정부, 내년 AI 산업에 10조원 투자...한국 경제 체질 대전환 나선다", + "created_at": "2025-09-13T01:09:43.892704", + "summary": "정부가 2025년 인공지능 산업 육성을 위해 10조원 규모의 대규모 투자를 단행하며 디지털 경제 전환 가속화에 나선다", + "subtopics": [ + { + "title": "정부의 AI 산업 육성 청사진", + "content": [ + "정부가 2025년 인공지능(AI) 산업 육성을 위해 10조원 규모의 투자를 단행한다. 이는 한국 경제의 디지털 전환을 가속화하고 글로벌 AI 강국으로 도약하기 위한 전략적 결정이다.", + "투자의 주요 방향은 AI 기술 개발, 인프라 구축, 전문인력 양성 등으로, 특히 반도체와 같은 핵심 산업과의 시너지 창출에 중점을 둘 예정이다." + ] + }, + { + "title": "민관 협력 체계 구축", + "content": [ + "정부는 AI 산업 육성을 위해 대기업, 스타트업, 연구기관 등과의 협력 체계를 강화한다. 소버린AI를 비롯한 국내 AI 기업들과의 협력을 통해 실질적인 세계 2위 AI 강국 도약을 목표로 하고 있다.", + "특히 AI 전문가 공모와 전담 조직 신설 등을 통해 체계적인 산업 육성 기반을 마련할 계획이다." + ] + }, + { + "title": "글로벌 경쟁력 강화 전략", + "content": [ + "정부는 국내 AI 기업들의 글로벌 경쟁력 강화를 위해 기술 개발 지원, 해외 시장 진출 지원, 규제 개선 등 다각적인 지원책을 마련한다.", + "특히 AI 산업의 핵심 인프라인 반도체 분야에서 SK하이닉스의 HBM4 개발 완료 등 가시적인 성과가 나타나고 있어, 이를 기반으로 한 시너지 효과가 기대된다." + ] + } + ], + "categories": [ + "경제", + "기술", + "산업정책" + ], + "entities": { + "people": [ + "하정우 소버린AI 대표" + ], + "organizations": [ + "소버린AI", + "SK하이닉스", + "과학기술정보통신부" + ], + "groups": [ + "AI 기업", + "스타트업" + ], + "countries": [ + "대한민국", + "미국" + ], + "events": [ + { + "name": "2025년 AI 산업 육성 계획 발표", + "date": "2025년", + "location": "대한민국" + } + ], + "keywords": [ + "인공지능", + "AI 산업", + "디지털 전환", + "10조원 투자", + "반도체", + "HBM4", + "글로벌 경쟁력", + "민관협력", + "전문인력 양성", + "기술개발" + ] + }, + "source_keyword": "인공지능", + "source_count": 5 +} \ No newline at end of file diff --git a/backup-services/ai-writer/backend/requirements.txt b/backup-services/ai-writer/backend/requirements.txt new file mode 100644 index 0000000..8696605 --- /dev/null +++ b/backup-services/ai-writer/backend/requirements.txt @@ -0,0 +1,9 @@ +fastapi==0.104.1 +uvicorn[standard]==0.24.0 +httpx==0.25.2 +pydantic==2.5.0 +motor==3.1.1 +pymongo==4.3.3 +anthropic==0.39.0 +python-multipart==0.0.6 +redis[hiredis]==5.0.1 \ No newline at end of file diff --git a/backup-services/ai-writer/backend/test_ai_writer.py b/backup-services/ai-writer/backend/test_ai_writer.py new file mode 100755 index 0000000..3b45bbf --- /dev/null +++ b/backup-services/ai-writer/backend/test_ai_writer.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 +""" +AI Writer Service Test +Claude API를 사용한 전문적인 뉴스 기사 생성 테스트 +""" +import asyncio +import httpx +import json +from datetime import datetime + +# Service URL +SERVICE_URL = "http://localhost:8019" + +async def test_article_generation(): + """인공지능 키워드로 기사 생성 테스트""" + async with httpx.AsyncClient(timeout=120.0) as client: + print("\n" + "="*70) + print(" AI Writer Service - 전문 기사 생성 테스트 ") + print("="*70) + + print("\n📰 '인공지능' 키워드로 전문 기사 생성 중...") + print("-" * 50) + + # Generate article + response = await client.post( + f"{SERVICE_URL}/api/generate", + json={ + "keyword": "인공지능", + "limit": 5, + "google_results_per_title": 3, + "lang": "ko", + "country": "KR", + "style": "professional" + } + ) + + if response.status_code == 200: + article = response.json() + + print(f"\n✅ 기사 생성 완료!") + print(f"\n📌 기사 ID: {article['news_id']}") + print(f"📅 생성 시간: {article['created_at']}") + print(f"\n📰 제목: {article['title']}") + print(f"📝 요약: {article['summary']}") + + print(f"\n🔍 카테고리: {', '.join(article['categories'])}") + + # Print subtopics + print(f"\n📚 소주제 ({len(article['subtopics'])}개):") + for i, subtopic in enumerate(article['subtopics'], 1): + print(f"\n [{i}] {subtopic['title']}") + print(f" 문단 수: {len(subtopic['content'])}개") + for j, paragraph in enumerate(subtopic['content'][:1], 1): # Show first paragraph only + print(f" 미리보기: {paragraph[:150]}...") + + # Print entities + entities = article['entities'] + print(f"\n🏷️ 추출된 개체:") + if entities['people']: + print(f" 👤 인물: {', '.join(entities['people'])}") + if entities['organizations']: + print(f" 🏢 기관: {', '.join(entities['organizations'])}") + if entities['groups']: + print(f" 👥 단체: {', '.join(entities['groups'])}") + if entities['countries']: + print(f" 🌍 국가: {', '.join(entities['countries'])}") + if entities.get('events'): + events = entities['events'] + if events: + print(f" 📅 이벤트 ({len(events)}개):") + for evt in events[:3]: # 처음 3개만 표시 + if isinstance(evt, dict): + evt_str = f" - {evt.get('name', '')}" + if evt.get('date'): + evt_str += f" [{evt['date']}]" + if evt.get('location'): + evt_str += f" @{evt['location']}" + print(evt_str) + else: + # 이전 형식 (문자열) 지원 + print(f" - {evt}") + if entities.get('keywords'): + keywords = entities['keywords'] + if keywords: + print(f" 🔑 키워드: {', '.join(keywords[:5])}" + + ("..." if len(keywords) > 5 else "")) + + print(f"\n📊 참조 소스: {article.get('source_count', 0)}개") + + # Save full article to file + with open('generated_article.json', 'w', encoding='utf-8') as f: + json.dump(article, f, ensure_ascii=False, indent=2) + print(f"\n💾 전체 기사가 'generated_article.json'에 저장되었습니다.") + + else: + print(f"❌ 오류: {response.status_code}") + print(f" 상세: {response.text}") + +async def test_health_check(): + """서비스 상태 확인""" + async with httpx.AsyncClient() as client: + print("\n" + "="*60) + print("서비스 Health Check") + print("="*60) + + response = await client.get(f"{SERVICE_URL}/health") + if response.status_code == 200: + data = response.json() + print(f"✓ AI Writer 서비스 상태: {data.get('status', 'unknown')}") + if 'services' in data: + print(f" - News Aggregator: {data['services'].get('news_aggregator', 'unknown')}") + print(f" - MongoDB: {data['services'].get('mongodb', 'unknown')}") + print(f" - Claude API: {data['services'].get('claude_api', 'unknown')}") + if 'error' in data: + print(f" - Error: {data['error']}") + else: + print(f"✗ Health check 실패: {response.status_code}") + +async def test_batch_generation(): + """여러 키워드 일괄 처리 테스트""" + async with httpx.AsyncClient(timeout=180.0) as client: + print("\n" + "="*60) + print("일괄 기사 생성 테스트") + print("="*60) + + keywords = ["AI 혁신", "디지털 전환", "스마트시티"] + print(f"\n키워드: {', '.join(keywords)}") + + response = await client.post( + f"{SERVICE_URL}/api/generate/batch", + json=keywords, + params={"style": "analytical"} + ) + + if response.status_code == 200: + data = response.json() + print(f"\n✅ 처리 완료: {data['total_processed']}개") + + if data['success']: + print("\n성공한 기사:") + for item in data['success']: + print(f" - {item['keyword']}: {item['title'][:50]}...") + + if data['errors']: + print("\n실패한 항목:") + for item in data['errors']: + print(f" - {item['keyword']}: {item['error']}") + else: + print(f"❌ 오류: {response.status_code}") + +async def main(): + """메인 테스트 실행""" + print("\n" + "="*70) + print(" AI Writer Service Test Suite ") + print(" RSS → Google Search → Claude AI 기사 생성 ") + print("="*70) + + # Run tests + await test_health_check() + await test_article_generation() + # await test_batch_generation() # Optional: batch test + + print("\n" + "="*70) + print(" 테스트 완료 ") + print("="*70) + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/backup-services/ai-writer/backend/test_prompt_generation.py b/backup-services/ai-writer/backend/test_prompt_generation.py new file mode 100644 index 0000000..12d4764 --- /dev/null +++ b/backup-services/ai-writer/backend/test_prompt_generation.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 +""" +AI Writer Service - 프롬프트 기반 기사 생성 테스트 +다양한 스타일과 키워드로 기사를 생성하는 테스트 +""" +import asyncio +import httpx +import json +from datetime import datetime + +# Service URL +SERVICE_URL = "http://localhost:8019" + +async def test_different_styles(): + """다양한 스타일로 기사 생성 테스트""" + + test_cases = [ + { + "keyword": "전기차", + "style": "professional", + "description": "전통적인 뉴스 기사 스타일" + }, + { + "keyword": "전기차", + "style": "analytical", + "description": "분석적이고 심층적인 스타일" + }, + { + "keyword": "전기차", + "style": "investigative", + "description": "탐사보도 스타일" + } + ] + + async with httpx.AsyncClient(timeout=180.0) as client: + for test_case in test_cases: + print("\n" + "="*70) + print(f" {test_case['description']} 테스트") + print("="*70) + print(f"키워드: {test_case['keyword']}") + print(f"스타일: {test_case['style']}") + print("-" * 50) + + try: + response = await client.post( + f"{SERVICE_URL}/api/generate", + json={ + "keyword": test_case["keyword"], + "limit": 3, # RSS 항목 수 줄여서 빠른 테스트 + "google_results_per_title": 2, + "lang": "ko", + "country": "KR", + "style": test_case["style"] + } + ) + + if response.status_code == 200: + article = response.json() + print(f"\n✅ 기사 생성 성공!") + print(f"📰 제목: {article['title']}") + print(f"📝 요약: {article['summary']}") + print(f"🔍 카테고리: {', '.join(article['categories'])}") + print(f"📚 소주제 수: {len(article['subtopics'])}") + + # 키워드 출력 + if 'entities' in article and 'keywords' in article['entities']: + keywords = article['entities']['keywords'] + print(f"🔑 키워드 ({len(keywords)}개): {', '.join(keywords[:5])}" + + ("..." if len(keywords) > 5 else "")) + + # 이벤트 정보 출력 + if 'entities' in article and 'events' in article['entities']: + events = article['entities']['events'] + if events: + print(f"📅 이벤트 ({len(events)}개):") + for evt in events[:2]: # 처음 2개만 표시 + if isinstance(evt, dict): + evt_str = f" - {evt.get('name', '')}" + if evt.get('date'): + evt_str += f" [{evt['date']}]" + if evt.get('location'): + evt_str += f" @{evt['location']}" + print(evt_str) + + # 첫 번째 소주제의 첫 문단만 출력 + if article['subtopics']: + first_topic = article['subtopics'][0] + print(f"\n첫 번째 소주제: {first_topic['title']}") + if first_topic['content']: + print(f"미리보기: {first_topic['content'][0][:200]}...") + + # 파일로 저장 + filename = f"article_{test_case['keyword']}_{test_case['style']}.json" + with open(filename, 'w', encoding='utf-8') as f: + json.dump(article, f, ensure_ascii=False, indent=2) + print(f"\n💾 '{filename}'에 저장됨") + + else: + print(f"❌ 오류: {response.status_code}") + print(f"상세: {response.text}") + + except Exception as e: + print(f"❌ 테스트 실패: {e}") + + # 다음 테스트 전 잠시 대기 + await asyncio.sleep(2) + +async def test_different_keywords(): + """다양한 키워드로 기사 생성 테스트""" + + keywords = ["블록체인", "메타버스", "우주개발", "기후변화", "K-POP"] + + async with httpx.AsyncClient(timeout=180.0) as client: + print("\n" + "="*70) + print(" 다양한 키워드 테스트") + print("="*70) + + for keyword in keywords: + print(f"\n🔍 키워드: {keyword}") + print("-" * 30) + + try: + response = await client.post( + f"{SERVICE_URL}/api/generate", + json={ + "keyword": keyword, + "limit": 2, # 빠른 테스트를 위해 줄임 + "google_results_per_title": 2, + "lang": "ko", + "country": "KR", + "style": "professional" + } + ) + + if response.status_code == 200: + article = response.json() + print(f"✅ 성공: {article['title'][:50]}...") + print(f" 카테고리: {', '.join(article['categories'][:3])}") + else: + print(f"❌ 실패: {response.status_code}") + + except Exception as e: + print(f"❌ 오류: {e}") + + await asyncio.sleep(1) + +async def test_custom_prompt(): + """커스텀 프롬프트 테스트 - 직접 aggregated 데이터 제공""" + + # 미리 수집된 데이터를 시뮬레이션 + custom_news_data = { + "keyword": "2025년 기술 트렌드", + "news_items": [ + { + "rss_title": "AI와 로봇이 바꾸는 2025년 일상", + "google_results": [ + { + "title": "전문가들이 예측하는 2025년 AI 혁명", + "snippet": "2025년 AI 기술이 일상생활 전반을 혁신할 전망...", + "full_content": { + "url": "https://example.com/ai-2025", + "content": "2025년에는 AI가 의료, 교육, 업무 등 모든 분야에서 인간과 협업하는 시대가 열릴 것으로 전망된다. 특히 생성형 AI의 발전으로 창의적 작업에서도 AI의 역할이 크게 확대될 것이다." + } + } + ] + }, + { + "rss_title": "양자컴퓨터 상용화 임박", + "google_results": [ + { + "title": "IBM, 2025년 1000큐비트 양자컴퓨터 출시 예정", + "snippet": "IBM이 2025년 상용 양자컴퓨터 출시를 앞두고...", + "full_content": { + "url": "https://example.com/quantum-2025", + "content": "양자컴퓨팅이 드디어 실용화 단계에 접어들었다. 2025년에는 금융, 제약, 물류 등 다양한 산업에서 양자컴퓨터를 활용한 혁신이 시작될 전망이다." + } + } + ] + } + ] + } + + async with httpx.AsyncClient(timeout=180.0) as client: + print("\n" + "="*70) + print(" 커스텀 데이터로 기사 생성") + print("="*70) + + for style in ["professional", "analytical"]: + print(f"\n스타일: {style}") + print("-" * 30) + + try: + response = await client.post( + f"{SERVICE_URL}/api/generate/from-aggregated", + json=custom_news_data, + params={"style": style} + ) + + if response.status_code == 200: + article = response.json() + print(f"✅ 제목: {article['title']}") + print(f" 요약: {article['summary']}") + + # 스타일별로 저장 + filename = f"custom_article_{style}.json" + with open(filename, 'w', encoding='utf-8') as f: + json.dump(article, f, ensure_ascii=False, indent=2) + print(f" 💾 '{filename}'에 저장됨") + else: + print(f"❌ 실패: {response.text}") + + except Exception as e: + print(f"❌ 오류: {e}") + + await asyncio.sleep(2) + +async def main(): + """메인 테스트 실행""" + print("\n" + "="*70) + print(" AI Writer 프롬프트 기반 기사 생성 테스트") + print("="*70) + + # 1. 다양한 스타일 테스트 + print("\n[1] 스타일별 기사 생성 테스트") + await test_different_styles() + + # 2. 다양한 키워드 테스트 + print("\n[2] 키워드별 기사 생성 테스트") + await test_different_keywords() + + # 3. 커스텀 데이터 테스트 + print("\n[3] 커스텀 데이터 기사 생성 테스트") + await test_custom_prompt() + + print("\n" + "="*70) + print(" 모든 테스트 완료!") + print("="*70) + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/backup-services/ai-writer/worker/Dockerfile b/backup-services/ai-writer/worker/Dockerfile new file mode 100644 index 0000000..7869505 --- /dev/null +++ b/backup-services/ai-writer/worker/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Copy requirements +COPY backend/requirements.txt . + +# Install dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY backend/app /app + +# Set environment variables +ENV PYTHONUNBUFFERED=1 +ENV WORKER_COUNT=3 + +# Run worker +CMD ["python", "worker.py"] \ No newline at end of file diff --git a/backup-services/google-search/README.md b/backup-services/google-search/README.md new file mode 100644 index 0000000..26c4a52 --- /dev/null +++ b/backup-services/google-search/README.md @@ -0,0 +1,153 @@ +# Google Search Service + +키워드를 구글에서 검색한 결과를 수신하는 서비스입니다. + +## 주요 기능 + +### 1. 다중 검색 방법 지원 +- **Google Custom Search API**: 공식 구글 API (권장) +- **SerpAPI**: 대체 검색 API +- **웹 스크래핑**: 폴백 옵션 (제한적) + +### 2. 검색 옵션 +- 최대 20개 검색 결과 지원 +- 언어별/국가별 검색 +- 날짜 기준 필터링 및 정렬 +- 전체 콘텐츠 가져오기 + +## API 엔드포인트 + +### 기본 검색 +``` +GET /api/search?q=키워드&num=20&lang=ko&country=kr +``` + +**파라미터:** +- `q`: 검색 키워드 (필수) +- `num`: 결과 개수 (1-20, 기본값: 10) +- `lang`: 언어 코드 (ko, en 등) +- `country`: 국가 코드 (kr, us 등) +- `date_restrict`: 날짜 제한 + - `d7`: 일주일 이내 + - `m1`: 한달 이내 + - `m3`: 3개월 이내 + - `y1`: 1년 이내 +- `sort_by_date`: 최신순 정렬 (true/false) + +### 전체 콘텐츠 검색 +``` +GET /api/search/full?q=키워드&num=5 +``` +각 검색 결과 페이지의 전체 내용을 가져옵니다 (시간이 오래 걸릴 수 있음). + +### 실시간 트렌딩 +``` +GET /api/trending?country=kr +``` + +## 사용 예제 + +### 1. 한국어 검색 (최신순) +```bash +curl "http://localhost:8016/api/search?q=인공지능&num=20&lang=ko&country=kr&sort_by_date=true" +``` + +### 2. 영어 검색 (미국) +```bash +curl "http://localhost:8016/api/search?q=artificial%20intelligence&num=10&lang=en&country=us" +``` + +### 3. 최근 일주일 내 결과만 +```bash +curl "http://localhost:8016/api/search?q=뉴스&date_restrict=d7&lang=ko" +``` + +### 4. 전체 콘텐츠 가져오기 +```bash +curl "http://localhost:8016/api/search/full?q=python%20tutorial&num=3" +``` + +## 환경 설정 + +### 필수 API 키 설정 + +1. **Google Custom Search API** + - [Google Cloud Console](https://console.cloud.google.com/apis/credentials)에서 API 키 발급 + - [Programmable Search Engine](https://programmablesearchengine.google.com/)에서 검색 엔진 ID 생성 + +2. **SerpAPI (선택사항)** + - [SerpAPI](https://serpapi.com/)에서 API 키 발급 + +### .env 파일 설정 +```env +# Google Custom Search API +GOOGLE_API_KEY=your_api_key_here +GOOGLE_SEARCH_ENGINE_ID=your_search_engine_id_here + +# SerpAPI (선택사항) +SERPAPI_KEY=your_serpapi_key_here + +# Redis 캐시 +REDIS_HOST=redis +REDIS_PORT=6379 +REDIS_DB=2 + +# 기본 설정 +DEFAULT_LANGUAGE=ko +DEFAULT_COUNTRY=kr +CACHE_TTL=3600 +``` + +## Docker 실행 + +```bash +# 빌드 및 실행 +docker-compose build google-search-backend +docker-compose up -d google-search-backend + +# 로그 확인 +docker-compose logs -f google-search-backend +``` + +## 제한 사항 + +### Google Custom Search API +- 무료 계정: 일일 100회 쿼리 제한 +- 검색당 최대 100개 결과 +- snippet 길이는 서버에서 제한 (변경 불가) + +### 해결 방법 +- 20개 이상 결과 필요 시: 페이지네이션 사용 +- 긴 내용 필요 시: `/api/search/full` 엔드포인트 사용 +- API 제한 도달 시: SerpAPI 또는 웹 스크래핑으로 자동 폴백 + +## 캐시 관리 + +Redis를 사용하여 검색 결과를 캐싱합니다: +- 기본 TTL: 3600초 (1시간) +- 캐시 초기화: `POST /api/clear-cache` + +## 헬스 체크 + +```bash +curl http://localhost:8016/health +``` + +## 문제 해결 + +### 1. 한글 검색 안될 때 +URL 인코딩 사용: +```bash +# "인공지능" → %EC%9D%B8%EA%B3%B5%EC%A7%80%EB%8A%A5 +curl "http://localhost:8016/api/search?q=%EC%9D%B8%EA%B3%B5%EC%A7%80%EB%8A%A5" +``` + +### 2. API 제한 에러 +- Google API 일일 제한 확인 +- SerpAPI 키 설정으로 대체 +- 웹 스크래핑 자동 폴백 활용 + +### 3. 느린 응답 시간 +- Redis 캐시 활성화 확인 +- 결과 개수 줄이기 +- 전체 콘텐츠 대신 기본 검색 사용 \ No newline at end of file diff --git a/backup-services/google-search/backend/.env.example b/backup-services/google-search/backend/.env.example new file mode 100644 index 0000000..0d4f463 --- /dev/null +++ b/backup-services/google-search/backend/.env.example @@ -0,0 +1,21 @@ +# Google Custom Search API Configuration +# Get your API key from: https://console.cloud.google.com/apis/credentials +GOOGLE_API_KEY= + +# Get your Search Engine ID from: https://programmablesearchengine.google.com/ +GOOGLE_SEARCH_ENGINE_ID= + +# Alternative: SerpAPI Configuration +# Get your API key from: https://serpapi.com/ +SERPAPI_KEY= + +# Redis Configuration +REDIS_HOST=redis +REDIS_PORT=6379 +REDIS_DB=2 + +# Search Settings +DEFAULT_LANGUAGE=ko +DEFAULT_COUNTRY=kr +CACHE_TTL=3600 +MAX_RESULTS=10 \ No newline at end of file diff --git a/backup-services/google-search/backend/Dockerfile b/backup-services/google-search/backend/Dockerfile new file mode 100644 index 0000000..800c70b --- /dev/null +++ b/backup-services/google-search/backend/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.11-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] \ No newline at end of file diff --git a/backup-services/google-search/backend/app/__init__.py b/backup-services/google-search/backend/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backup-services/google-search/backend/app/config.py b/backup-services/google-search/backend/app/config.py new file mode 100644 index 0000000..1b06392 --- /dev/null +++ b/backup-services/google-search/backend/app/config.py @@ -0,0 +1,30 @@ +from pydantic_settings import BaseSettings +from typing import Optional + +class Settings(BaseSettings): + # Google Custom Search API 설정 + google_api_key: Optional[str] = None + google_search_engine_id: Optional[str] = None + + # SerpAPI 설정 (대안) + serpapi_key: Optional[str] = None + + # Redis 캐싱 설정 + redis_host: str = "redis" + redis_port: int = 6379 + redis_db: int = 2 + cache_ttl: int = 3600 # 1시간 + + # 검색 설정 + max_results: int = 10 + default_language: str = "ko" + default_country: str = "kr" + + # 서비스 설정 + service_name: str = "Google Search Service" + debug: bool = True + + class Config: + env_file = ".env" + +settings = Settings() \ No newline at end of file diff --git a/backup-services/google-search/backend/app/main.py b/backup-services/google-search/backend/app/main.py new file mode 100644 index 0000000..83a29ba --- /dev/null +++ b/backup-services/google-search/backend/app/main.py @@ -0,0 +1,188 @@ +from fastapi import FastAPI, Query, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from typing import Optional +from datetime import datetime +from contextlib import asynccontextmanager + +from .search_service import GoogleSearchService +from .config import settings + +@asynccontextmanager +async def lifespan(app: FastAPI): + # 시작 시 + print("Google Search Service starting...") + yield + # 종료 시 + print("Google Search Service stopping...") + +app = FastAPI( + title="Google Search Service", + description="구글 검색 결과를 수신하는 서비스", + version="1.0.0", + lifespan=lifespan +) + +# CORS 설정 +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# 검색 서비스 초기화 +search_service = GoogleSearchService() + +@app.get("/") +async def root(): + return { + "service": "Google Search Service", + "version": "1.0.0", + "timestamp": datetime.now().isoformat(), + "endpoints": { + "search": "/api/search?q=keyword", + "custom_search": "/api/search/custom?q=keyword", + "serpapi_search": "/api/search/serpapi?q=keyword", + "scraping_search": "/api/search/scraping?q=keyword", + "trending": "/api/trending", + "health": "/health" + } + } + +@app.get("/health") +async def health_check(): + return { + "status": "healthy", + "service": "google-search", + "timestamp": datetime.now().isoformat() + } + +@app.get("/api/search") +async def search( + q: str = Query(..., description="검색 키워드"), + num: int = Query(10, description="결과 개수", ge=1, le=20), + lang: Optional[str] = Query(None, description="언어 코드 (ko, en 등)"), + country: Optional[str] = Query(None, description="국가 코드 (kr, us 등)"), + date_restrict: Optional[str] = Query(None, description="날짜 제한 (d7=일주일, m1=한달, m3=3개월, y1=1년)"), + sort_by_date: bool = Query(False, description="최신순 정렬") +): + """ + 자동으로 최적의 방법을 선택하여 구글 검색 + 1. Google Custom Search API (설정된 경우) + 2. SerpAPI (설정된 경우) + 3. 웹 스크래핑 (폴백) + """ + # Google Custom Search API 시도 + if settings.google_api_key and settings.google_search_engine_id: + result = await search_service.search_with_custom_api(q, num, lang, country, date_restrict, sort_by_date) + if "error" not in result or not result["error"]: + result["method"] = "google_custom_search" + return result + + # SerpAPI 시도 + if settings.serpapi_key: + result = await search_service.search_with_serpapi(q, num, lang, country) + if "error" not in result or not result["error"]: + result["method"] = "serpapi" + return result + + # 웹 스크래핑 폴백 + result = await search_service.search_with_scraping(q, num, lang) + result["method"] = "web_scraping" + result["warning"] = "API 키가 설정되지 않아 웹 스크래핑을 사용합니다. 제한적이고 불안정할 수 있습니다." + return result + +@app.get("/api/search/custom") +async def search_custom( + q: str = Query(..., description="검색 키워드"), + num: int = Query(10, description="결과 개수", ge=1, le=10), + lang: Optional[str] = Query(None, description="언어 코드"), + country: Optional[str] = Query(None, description="국가 코드") +): + """Google Custom Search API를 사용한 검색""" + if not settings.google_api_key or not settings.google_search_engine_id: + raise HTTPException( + status_code=503, + detail="Google Custom Search API credentials not configured" + ) + + result = await search_service.search_with_custom_api(q, num, lang, country) + if "error" in result and result["error"]: + raise HTTPException(status_code=500, detail=result["error"]) + + return result + +@app.get("/api/search/serpapi") +async def search_serpapi( + q: str = Query(..., description="검색 키워드"), + num: int = Query(10, description="결과 개수", ge=1, le=50), + lang: Optional[str] = Query(None, description="언어 코드"), + country: Optional[str] = Query(None, description="국가 코드") +): + """SerpAPI를 사용한 검색""" + if not settings.serpapi_key: + raise HTTPException( + status_code=503, + detail="SerpAPI key not configured" + ) + + result = await search_service.search_with_serpapi(q, num, lang, country) + if "error" in result and result["error"]: + raise HTTPException(status_code=500, detail=result["error"]) + + return result + +@app.get("/api/search/scraping") +async def search_scraping( + q: str = Query(..., description="검색 키워드"), + num: int = Query(10, description="결과 개수", ge=1, le=20), + lang: Optional[str] = Query(None, description="언어 코드") +): + """웹 스크래핑을 사용한 검색 (제한적)""" + result = await search_service.search_with_scraping(q, num, lang) + if "error" in result and result["error"]: + raise HTTPException(status_code=500, detail=result["error"]) + + result["warning"] = "웹 스크래핑은 제한적이고 불안정할 수 있습니다" + return result + +@app.get("/api/search/full") +async def search_with_full_content( + q: str = Query(..., description="검색 키워드"), + num: int = Query(5, description="결과 개수", ge=1, le=10), + lang: Optional[str] = Query(None, description="언어 코드 (ko, en 등)"), + country: Optional[str] = Query(None, description="국가 코드 (kr, us 등)") +): + """ + Google 검색 후 각 결과 페이지의 전체 내용을 가져오기 + 주의: 시간이 오래 걸릴 수 있음 + """ + result = await search_service.search_with_full_content(q, num, lang, country) + if "error" in result and result["error"]: + raise HTTPException(status_code=500, detail=result["error"]) + + return result + +@app.get("/api/trending") +async def get_trending( + country: Optional[str] = Query(None, description="국가 코드 (kr, us 등)") +): + """실시간 트렌딩 검색어 조회""" + result = await search_service.get_trending_searches(country) + if "error" in result and result["error"]: + raise HTTPException(status_code=500, detail=result["error"]) + + return result + +@app.post("/api/clear-cache") +async def clear_cache(): + """캐시 초기화""" + try: + search_service.redis_client.flushdb() + return { + "status": "success", + "message": "캐시가 초기화되었습니다" + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file diff --git a/backup-services/google-search/backend/app/search_service.py b/backup-services/google-search/backend/app/search_service.py new file mode 100644 index 0000000..708765b --- /dev/null +++ b/backup-services/google-search/backend/app/search_service.py @@ -0,0 +1,540 @@ +import httpx +import json +import redis +from typing import List, Dict, Optional +from datetime import datetime +import hashlib +from bs4 import BeautifulSoup +from .config import settings + +class GoogleSearchService: + def __init__(self): + # Redis 연결 + self.redis_client = redis.Redis( + host=settings.redis_host, + port=settings.redis_port, + db=settings.redis_db, + decode_responses=True + ) + + def _get_cache_key(self, query: str, **kwargs) -> str: + """캐시 키 생성""" + cache_data = f"{query}_{kwargs}" + return f"google_search:{hashlib.md5(cache_data.encode()).hexdigest()}" + + async def search_with_custom_api( + self, + query: str, + num_results: int = 10, + language: str = None, + country: str = None, + date_restrict: str = None, + sort_by_date: bool = False + ) -> Dict: + """Google Custom Search API 사용""" + if not settings.google_api_key or not settings.google_search_engine_id: + return { + "error": "Google API credentials not configured", + "results": [] + } + + # 캐시 확인 + cache_key = self._get_cache_key(query, num=num_results, lang=language, country=country) + cached = self.redis_client.get(cache_key) + if cached: + return json.loads(cached) + + url = "https://www.googleapis.com/customsearch/v1" + + all_results = [] + total_results_info = None + + # Google API는 한 번에 최대 10개만 반환, 20개를 원하면 2번 요청 + num_requests = min((num_results + 9) // 10, 2) # 최대 2번 요청 (20개까지) + + async with httpx.AsyncClient() as client: + for page in range(num_requests): + start_index = page * 10 + 1 + current_num = min(10, num_results - page * 10) + + params = { + "key": settings.google_api_key, + "cx": settings.google_search_engine_id, + "q": query, + "num": current_num, + "start": start_index, # 시작 인덱스 + "hl": language or settings.default_language, + "gl": country or settings.default_country + } + + # 날짜 제한 추가 (d7 = 일주일, m1 = 한달, y1 = 1년) + if date_restrict: + params["dateRestrict"] = date_restrict + + # 날짜순 정렬 (Google Custom Search API에서는 sort=date 옵션) + if sort_by_date: + params["sort"] = "date" + + try: + response = await client.get(url, params=params) + response.raise_for_status() + + data = response.json() + + # 첫 번째 요청에서만 전체 정보 저장 + if page == 0: + total_results_info = { + "total_results": data.get("searchInformation", {}).get("totalResults"), + "search_time": data.get("searchInformation", {}).get("searchTime"), + "query": data.get("queries", {}).get("request", [{}])[0].get("searchTerms") + } + + # 결과 추가 + for item in data.get("items", []): + all_results.append({ + "title": item.get("title"), + "link": item.get("link"), + "snippet": item.get("snippet"), + "display_link": item.get("displayLink"), + "thumbnail": item.get("pagemap", {}).get("cse_thumbnail", [{}])[0].get("src") if "pagemap" in item else None + }) + + except Exception as e: + # 첫 번째 요청이 실패하면 에러 반환 + if page == 0: + return { + "error": str(e), + "results": [] + } + # 두 번째 요청이 실패하면 첫 번째 결과만 반환 + break + + results = { + "query": total_results_info.get("query") if total_results_info else query, + "total_results": total_results_info.get("total_results") if total_results_info else "0", + "search_time": total_results_info.get("search_time") if total_results_info else 0, + "results": all_results[:num_results], # 요청한 개수만큼만 반환 + "timestamp": datetime.utcnow().isoformat() + } + + # 캐시 저장 + self.redis_client.setex( + cache_key, + settings.cache_ttl, + json.dumps(results) + ) + + return results + + async def search_with_serpapi( + self, + query: str, + num_results: int = 10, + language: str = None, + country: str = None + ) -> Dict: + """SerpAPI 사용 (유료 서비스)""" + if not settings.serpapi_key: + return { + "error": "SerpAPI key not configured", + "results": [] + } + + # 캐시 확인 + cache_key = self._get_cache_key(query, num=num_results, lang=language, country=country) + cached = self.redis_client.get(cache_key) + if cached: + return json.loads(cached) + + from serpapi import GoogleSearch + + params = { + "q": query, + "api_key": settings.serpapi_key, + "num": num_results, + "hl": language or settings.default_language, + "gl": country or settings.default_country + } + + try: + search = GoogleSearch(params) + results = search.get_dict() + + formatted_results = self._format_serpapi_results(results) + + # 캐시 저장 + self.redis_client.setex( + cache_key, + settings.cache_ttl, + json.dumps(formatted_results) + ) + + return formatted_results + + except Exception as e: + return { + "error": str(e), + "results": [] + } + + async def search_with_scraping( + self, + query: str, + num_results: int = 10, + language: str = None + ) -> Dict: + """웹 스크래핑으로 검색 (비추천, 제한적)""" + # 캐시 확인 + cache_key = self._get_cache_key(query, num=num_results, lang=language) + cached = self.redis_client.get(cache_key) + if cached: + return json.loads(cached) + + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + } + + params = { + "q": query, + "num": num_results, + "hl": language or settings.default_language + } + + async with httpx.AsyncClient() as client: + try: + response = await client.get( + "https://www.google.com/search", + params=params, + headers=headers, + follow_redirects=True + ) + + soup = BeautifulSoup(response.text, 'html.parser') + results = self._parse_google_html(soup) + + formatted_results = { + "query": query, + "total_results": len(results), + "results": results, + "timestamp": datetime.utcnow().isoformat() + } + + # 캐시 저장 + self.redis_client.setex( + cache_key, + settings.cache_ttl, + json.dumps(formatted_results) + ) + + return formatted_results + + except Exception as e: + return { + "error": str(e), + "results": [] + } + + def _format_google_results(self, data: Dict) -> Dict: + """Google API 결과 포맷팅""" + results = [] + + for item in data.get("items", []): + results.append({ + "title": item.get("title"), + "link": item.get("link"), + "snippet": item.get("snippet"), + "display_link": item.get("displayLink"), + "thumbnail": item.get("pagemap", {}).get("cse_thumbnail", [{}])[0].get("src") if "pagemap" in item else None + }) + + return { + "query": data.get("queries", {}).get("request", [{}])[0].get("searchTerms"), + "total_results": data.get("searchInformation", {}).get("totalResults"), + "search_time": data.get("searchInformation", {}).get("searchTime"), + "results": results, + "timestamp": datetime.utcnow().isoformat() + } + + def _format_serpapi_results(self, data: Dict) -> Dict: + """SerpAPI 결과 포맷팅""" + results = [] + + for item in data.get("organic_results", []): + results.append({ + "title": item.get("title"), + "link": item.get("link"), + "snippet": item.get("snippet"), + "position": item.get("position"), + "thumbnail": item.get("thumbnail"), + "date": item.get("date") + }) + + # 관련 검색어 + related_searches = [ + item.get("query") for item in data.get("related_searches", []) + ] + + return { + "query": data.get("search_parameters", {}).get("q"), + "total_results": data.get("search_information", {}).get("total_results"), + "search_time": data.get("search_information", {}).get("time_taken_displayed"), + "results": results, + "related_searches": related_searches, + "timestamp": datetime.utcnow().isoformat() + } + + def _parse_google_html(self, soup: BeautifulSoup) -> List[Dict]: + """HTML 파싱으로 검색 결과 추출""" + results = [] + + # 검색 결과 컨테이너 찾기 + for g in soup.find_all('div', class_='g'): + anchors = g.find_all('a') + if anchors: + link = anchors[0].get('href', '') + title_elem = g.find('h3') + snippet_elem = g.find('span', class_='st') or g.find('div', class_='s') + + if title_elem and link: + results.append({ + "title": title_elem.get_text(), + "link": link, + "snippet": snippet_elem.get_text() if snippet_elem else "" + }) + + return results + + async def fetch_page_content(self, url: str) -> Dict: + """웹 페이지의 전체 내용을 가져오기""" + try: + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + } + + async with httpx.AsyncClient(timeout=10.0) as client: + response = await client.get(url, headers=headers, follow_redirects=True) + response.raise_for_status() + + soup = BeautifulSoup(response.text, 'html.parser') + + # 불필요한 태그 제거 + for script in soup(["script", "style", "nav", "header", "footer"]): + script.decompose() + + # 본문 내용 추출 시도 + main_content = None + + # 1. article 태그 찾기 + article = soup.find('article') + if article: + main_content = article.get_text() + + # 2. main 태그 찾기 + if not main_content: + main = soup.find('main') + if main: + main_content = main.get_text() + + # 3. 일반적인 콘텐츠 div 찾기 + if not main_content: + content_divs = soup.find_all('div', class_=lambda x: x and ('content' in x.lower() or 'article' in x.lower() or 'post' in x.lower())) + if content_divs: + main_content = ' '.join([div.get_text() for div in content_divs[:3]]) + + # 4. 전체 body에서 텍스트 추출 + if not main_content: + body = soup.find('body') + if body: + main_content = body.get_text() + else: + main_content = soup.get_text() + + # 텍스트 정리 + main_content = ' '.join(main_content.split()) + + # 제목 추출 + title = soup.find('title') + title_text = title.get_text() if title else "" + + # 메타 설명 추출 + meta_desc = soup.find('meta', attrs={'name': 'description'}) + description = meta_desc.get('content', '') if meta_desc else "" + + return { + "url": url, + "title": title_text, + "description": description, + "content": main_content[:5000], # 최대 5000자 + "content_length": len(main_content), + "success": True + } + + except Exception as e: + return { + "url": url, + "error": str(e), + "success": False + } + + async def search_with_extended_snippet( + self, + query: str, + num_results: int = 10, + language: str = None, + country: str = None + ) -> Dict: + """검색 후 확장된 snippet 가져오기 (메타 설명 + 첫 500자)""" + # 먼저 일반 검색 수행 + search_results = await self.search_with_custom_api( + query, num_results, language, country + ) + + if "error" in search_results: + return search_results + + # 각 결과의 확장된 snippet 가져오기 + import asyncio + + async def fetch_extended_snippet(result): + """개별 페이지의 확장된 snippet 가져오기""" + enhanced_result = result.copy() + + if result.get("link"): + try: + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + } + + async with httpx.AsyncClient(timeout=5.0) as client: + response = await client.get(result["link"], headers=headers, follow_redirects=True) + response.raise_for_status() + + soup = BeautifulSoup(response.text, 'html.parser') + + # 메타 설명 추출 + meta_desc = soup.find('meta', attrs={'name': 'description'}) + if not meta_desc: + meta_desc = soup.find('meta', attrs={'property': 'og:description'}) + + description = meta_desc.get('content', '') if meta_desc else "" + + # 본문 첫 부분 추출 + for script in soup(["script", "style"]): + script.decompose() + + # 본문 텍스트 찾기 + text_content = "" + for tag in ['article', 'main', 'div']: + elements = soup.find_all(tag) + for elem in elements: + text = elem.get_text().strip() + if len(text) > 200: # 의미있는 텍스트만 + text_content = ' '.join(text.split())[:1000] + break + if text_content: + break + + # 기존 snippet과 병합 + extended_snippet = result.get("snippet", "") + if description and description not in extended_snippet: + extended_snippet = description + " ... " + extended_snippet + if text_content and len(extended_snippet) < 500: + extended_snippet = extended_snippet + " ... " + text_content[:500-len(extended_snippet)] + + enhanced_result["snippet"] = extended_snippet[:1000] # 최대 1000자 + enhanced_result["extended"] = True + + except Exception as e: + # 실패 시 원본 snippet 유지 + enhanced_result["extended"] = False + enhanced_result["fetch_error"] = str(e) + + return enhanced_result + + # 병렬로 모든 페이지 처리 + tasks = [fetch_extended_snippet(result) for result in search_results.get("results", [])] + enhanced_results = await asyncio.gather(*tasks) + + return { + **search_results, + "results": enhanced_results, + "snippet_extended": True + } + + async def search_with_full_content( + self, + query: str, + num_results: int = 5, + language: str = None, + country: str = None + ) -> Dict: + """검색 후 각 결과의 전체 내용 가져오기""" + # 먼저 일반 검색 수행 + search_results = await self.search_with_custom_api( + query, num_results, language, country + ) + + if "error" in search_results: + return search_results + + # 각 결과의 전체 내용 가져오기 + enhanced_results = [] + for result in search_results.get("results", [])[:num_results]: + # 원본 검색 결과 복사 + enhanced_result = result.copy() + + # 페이지 내용 가져오기 + if result.get("link"): + content_data = await self.fetch_page_content(result["link"]) + enhanced_result["full_content"] = content_data + + enhanced_results.append(enhanced_result) + + return { + **search_results, + "results": enhanced_results, + "content_fetched": True + } + + async def get_trending_searches(self, country: str = None) -> Dict: + """트렌딩 검색어 가져오기""" + # Google Trends 비공식 API 사용 + url = f"https://trends.google.com/trends/api/dailytrends" + params = { + "geo": country or settings.default_country.upper() + } + + async with httpx.AsyncClient() as client: + try: + response = await client.get(url, params=params) + # Google Trends API는 ")]}',\n"로 시작하는 응답을 반환 + json_data = response.text[6:] + data = json.loads(json_data) + + trending = [] + for date_data in data.get("default", {}).get("trendingSearchesDays", []): + for search in date_data.get("trendingSearches", []): + trending.append({ + "title": search.get("title", {}).get("query"), + "traffic": search.get("formattedTraffic"), + "articles": [ + { + "title": article.get("title"), + "url": article.get("url"), + "source": article.get("source") + } + for article in search.get("articles", [])[:3] + ] + }) + + return { + "country": country or settings.default_country, + "trending": trending[:10], + "timestamp": datetime.utcnow().isoformat() + } + + except Exception as e: + return { + "error": str(e), + "trending": [] + } \ No newline at end of file diff --git a/backup-services/google-search/backend/requirements.txt b/backup-services/google-search/backend/requirements.txt new file mode 100644 index 0000000..36fd2a3 --- /dev/null +++ b/backup-services/google-search/backend/requirements.txt @@ -0,0 +1,9 @@ +fastapi==0.109.0 +uvicorn[standard]==0.27.0 +httpx==0.26.0 +pydantic==2.5.3 +pydantic-settings==2.1.0 +google-api-python-client==2.108.0 +beautifulsoup4==4.12.2 +redis==5.0.1 +serpapi==0.1.5 \ No newline at end of file diff --git a/backup-services/news-aggregator/backend/Dockerfile b/backup-services/news-aggregator/backend/Dockerfile new file mode 100644 index 0000000..a296111 --- /dev/null +++ b/backup-services/news-aggregator/backend/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application +COPY . . + +# Run the application +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] \ No newline at end of file diff --git a/backup-services/news-aggregator/backend/app/__init__.py b/backup-services/news-aggregator/backend/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backup-services/news-aggregator/backend/app/main.py b/backup-services/news-aggregator/backend/app/main.py new file mode 100644 index 0000000..625101f --- /dev/null +++ b/backup-services/news-aggregator/backend/app/main.py @@ -0,0 +1,365 @@ +""" +News Aggregator Service +RSS 피드 제목을 구글 검색으로 확장하는 통합 서비스 +""" +from fastapi import FastAPI, HTTPException, Query, BackgroundTasks +from fastapi.middleware.cors import CORSMiddleware +from typing import List, Optional, Dict, Any +from datetime import datetime +import httpx +import asyncio +from pydantic import BaseModel +import logging + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +app = FastAPI( + title="News Aggregator Service", + description="RSS 피드와 구글 검색을 통합한 뉴스 수집 서비스", + version="1.0.0" +) + +# CORS 설정 +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Configuration +RSS_SERVICE_URL = "http://rss-feed-backend:8000" +GOOGLE_SEARCH_SERVICE_URL = "http://google-search-backend:8000" + +# Response Models +class NewsItem(BaseModel): + """뉴스 항목""" + rss_title: str + rss_link: Optional[str] = None + google_results: List[Dict[str, Any]] = [] + search_keyword: str + timestamp: datetime = None + +class AggregatedNews(BaseModel): + """통합 뉴스 결과""" + keyword: str + rss_feed_url: str + total_rss_entries: int + processed_entries: int + news_items: List[NewsItem] + processing_time: float + +# HTTP Client +client = httpx.AsyncClient(timeout=30.0) + +@app.on_event("startup") +async def startup(): + """서비스 시작""" + logger.info("News Aggregator Service starting...") + +@app.on_event("shutdown") +async def shutdown(): + """서비스 종료""" + await client.aclose() + logger.info("News Aggregator Service stopped") + +@app.get("/") +async def root(): + return { + "service": "News Aggregator Service", + "version": "1.0.0", + "description": "RSS 피드와 구글 검색 통합 서비스", + "endpoints": { + "aggregate": "GET /api/aggregate", + "aggregate_by_location": "GET /api/aggregate/location", + "aggregate_by_topic": "GET /api/aggregate/topic", + "health": "GET /health" + } + } + +@app.get("/health") +async def health_check(): + """헬스 체크""" + try: + # Check RSS service + rss_response = await client.get(f"{RSS_SERVICE_URL}/health") + rss_healthy = rss_response.status_code == 200 + + # Check Google Search service + google_response = await client.get(f"{GOOGLE_SEARCH_SERVICE_URL}/health") + google_healthy = google_response.status_code == 200 + + return { + "status": "healthy" if (rss_healthy and google_healthy) else "degraded", + "services": { + "rss_feed": "healthy" if rss_healthy else "unhealthy", + "google_search": "healthy" if google_healthy else "unhealthy" + }, + "timestamp": datetime.now().isoformat() + } + except Exception as e: + return { + "status": "unhealthy", + "error": str(e), + "timestamp": datetime.now().isoformat() + } + +@app.get("/api/aggregate", response_model=AggregatedNews) +async def aggregate_news( + q: str = Query(..., description="검색 키워드"), + limit: int = Query(10, description="처리할 RSS 항목 수", ge=1, le=50), + google_results_per_title: int = Query(5, description="각 제목당 구글 검색 결과 수", ge=1, le=10), + lang: str = Query("ko", description="언어 코드"), + country: str = Query("KR", description="국가 코드") +): + """ + 키워드로 RSS 피드를 검색하고, 각 제목을 구글에서 재검색 + + 1. 키워드로 Google News RSS 피드 가져오기 + 2. RSS 피드의 각 제목을 구글 검색 + 3. 통합 결과 반환 + """ + start_time = datetime.now() + + try: + # Step 1: Get RSS feed from keyword + logger.info(f"Fetching RSS feed for keyword: {q}") + rss_response = await client.get( + f"{RSS_SERVICE_URL}/api/google-rss/search", + params={"q": q, "lang": lang, "country": country} + ) + rss_response.raise_for_status() + rss_data = rss_response.json() + + if not rss_data.get("success"): + raise HTTPException(status_code=500, detail=f"RSS 피드 가져오기 실패: {rss_data.get('error')}") + + # Step 2: Process each RSS entry with Google search + news_items = [] + entries = rss_data.get("entries", []) + + # If no entries field, fallback to sample_titles + if not entries: + titles = rss_data.get("sample_titles", [])[:limit] + entries = [{"title": title, "link": "", "published": ""} for title in titles] + else: + entries = entries[:limit] + + # Create tasks for parallel processing + search_tasks = [] + for entry in entries: + title = entry.get("title", "") + # Clean title for better search results + clean_title = title.split(" - ")[-1] if " - " in title else title + search_tasks.append( + search_google(clean_title, google_results_per_title, lang, country) + ) + + # Execute searches in parallel + logger.info(f"Searching Google for {len(search_tasks)} RSS entries") + search_results = await asyncio.gather(*search_tasks, return_exceptions=True) + + # Combine results + for i, entry in enumerate(entries): + google_results = [] + if not isinstance(search_results[i], Exception): + google_results = search_results[i] + + title = entry.get("title", "") + news_items.append(NewsItem( + rss_title=title, + rss_link=entry.get("link", ""), + google_results=google_results, + search_keyword=title.split(" - ")[-1] if " - " in title else title, + timestamp=datetime.now() + )) + + # Calculate processing time + processing_time = (datetime.now() - start_time).total_seconds() + + return AggregatedNews( + keyword=q, + rss_feed_url=rss_data.get("feed_url", ""), + total_rss_entries=rss_data.get("entry_count", 0), + processed_entries=len(news_items), + news_items=news_items, + processing_time=processing_time + ) + + except httpx.HTTPStatusError as e: + logger.error(f"HTTP error: {e}") + raise HTTPException(status_code=e.response.status_code, detail=str(e)) + except Exception as e: + logger.error(f"Error in aggregate_news: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +async def search_google(query: str, num_results: int, lang: str, country: str) -> List[Dict[str, Any]]: + """구글 검색 서비스 호출 - 전체 콘텐츠 포함""" + try: + # Full content API 직접 호출 + response = await client.get( + f"{GOOGLE_SEARCH_SERVICE_URL}/api/search/full", + params={ + "q": query, + "num": num_results, + "lang": lang, + "country": country + } + ) + response.raise_for_status() + data = response.json() + results = data.get("results", []) + + # full_content가 이미 포함되어 있으므로 그대로 반환 + logger.info(f"Google search for '{query}' returned {len(results)} results with full content") + + return results + except Exception as e: + logger.error(f"Google search error for '{query}': {e}") + # Fallback to basic search without full content + try: + response = await client.get( + f"{GOOGLE_SEARCH_SERVICE_URL}/api/search", + params={ + "q": query, + "num": num_results, + "lang": lang, + "country": country + } + ) + response.raise_for_status() + data = response.json() + return data.get("results", []) + except: + return [] + +@app.get("/api/aggregate/location", response_model=AggregatedNews) +async def aggregate_news_by_location( + location: str = Query(..., description="지역명 (예: Seoul, Tokyo)"), + limit: int = Query(10, description="처리할 RSS 항목 수", ge=1, le=50), + google_results_per_title: int = Query(5, description="각 제목당 구글 검색 결과 수", ge=1, le=10), + lang: str = Query("ko", description="언어 코드"), + country: str = Query("KR", description="국가 코드") +): + """지역 기반 RSS 피드를 가져와서 각 제목을 구글 검색""" + start_time = datetime.now() + + try: + # Get location-based RSS feed + logger.info(f"Fetching RSS feed for location: {location}") + rss_response = await client.get( + f"{RSS_SERVICE_URL}/api/google-rss/location", + params={"location": location, "lang": lang, "country": country} + ) + rss_response.raise_for_status() + rss_data = rss_response.json() + + if not rss_data.get("success"): + raise HTTPException(status_code=500, detail=f"RSS 피드 가져오기 실패: {rss_data.get('error')}") + + # Process titles + news_items = [] + titles = rss_data.get("sample_titles", [])[:limit] + + search_tasks = [] + for title in titles: + clean_title = title.split(" - ")[-1] if " - " in title else title + search_tasks.append( + search_google(clean_title, google_results_per_title, lang, country) + ) + + search_results = await asyncio.gather(*search_tasks, return_exceptions=True) + + for i, title in enumerate(titles): + google_results = [] + if not isinstance(search_results[i], Exception): + google_results = search_results[i] + + news_items.append(NewsItem( + rss_title=title, + google_results=google_results, + search_keyword=title.split(" - ")[-1] if " - " in title else title, + timestamp=datetime.now() + )) + + processing_time = (datetime.now() - start_time).total_seconds() + + return AggregatedNews( + keyword=f"Location: {location}", + rss_feed_url=rss_data.get("feed_url", ""), + total_rss_entries=rss_data.get("entry_count", 0), + processed_entries=len(news_items), + news_items=news_items, + processing_time=processing_time + ) + + except Exception as e: + logger.error(f"Error in aggregate_news_by_location: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/aggregate/topic", response_model=AggregatedNews) +async def aggregate_news_by_topic( + category: str = Query(..., description="카테고리 (TECHNOLOGY, BUSINESS, HEALTH 등)"), + limit: int = Query(10, description="처리할 RSS 항목 수", ge=1, le=50), + google_results_per_title: int = Query(5, description="각 제목당 구글 검색 결과 수", ge=1, le=10), + lang: str = Query("ko", description="언어 코드"), + country: str = Query("KR", description="국가 코드") +): + """주제별 RSS 피드를 가져와서 각 제목을 구글 검색""" + start_time = datetime.now() + + try: + # Get topic-based RSS feed + logger.info(f"Fetching RSS feed for topic: {category}") + rss_response = await client.get( + f"{RSS_SERVICE_URL}/api/google-rss/topic", + params={"category": category, "lang": lang, "country": country} + ) + rss_response.raise_for_status() + rss_data = rss_response.json() + + if not rss_data.get("success"): + raise HTTPException(status_code=500, detail=f"RSS 피드 가져오기 실패: {rss_data.get('error')}") + + # Process titles + news_items = [] + titles = rss_data.get("sample_titles", [])[:limit] + + search_tasks = [] + for title in titles: + clean_title = title.split(" - ")[-1] if " - " in title else title + search_tasks.append( + search_google(clean_title, google_results_per_title, lang, country) + ) + + search_results = await asyncio.gather(*search_tasks, return_exceptions=True) + + for i, title in enumerate(titles): + google_results = [] + if not isinstance(search_results[i], Exception): + google_results = search_results[i] + + news_items.append(NewsItem( + rss_title=title, + google_results=google_results, + search_keyword=title.split(" - ")[-1] if " - " in title else title, + timestamp=datetime.now() + )) + + processing_time = (datetime.now() - start_time).total_seconds() + + return AggregatedNews( + keyword=f"Topic: {category}", + rss_feed_url=rss_data.get("feed_url", ""), + total_rss_entries=rss_data.get("entry_count", 0), + processed_entries=len(news_items), + news_items=news_items, + processing_time=processing_time + ) + + except Exception as e: + logger.error(f"Error in aggregate_news_by_topic: {e}") + raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file diff --git a/backup-services/news-aggregator/backend/requirements.txt b/backup-services/news-aggregator/backend/requirements.txt new file mode 100644 index 0000000..5881f23 --- /dev/null +++ b/backup-services/news-aggregator/backend/requirements.txt @@ -0,0 +1,5 @@ +fastapi==0.104.1 +uvicorn[standard]==0.24.0 +httpx==0.25.2 +pydantic==2.5.0 +python-multipart==0.0.6 \ No newline at end of file diff --git a/backup-services/news-aggregator/backend/test_aggregator.py b/backup-services/news-aggregator/backend/test_aggregator.py new file mode 100755 index 0000000..cffea4d --- /dev/null +++ b/backup-services/news-aggregator/backend/test_aggregator.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 +""" +News Aggregator Service Test +RSS 피드 제목을 구글 full content 검색으로 확장하는 통합 테스트 +""" +import asyncio +import httpx +import json +from datetime import datetime +from typing import Dict, Any + +# Service URL +SERVICE_URL = "http://localhost:8018" + +async def test_aggregate_with_full_content(): + """키워드로 RSS 피드를 검색하고 full content 구글 검색 테스트""" + async with httpx.AsyncClient(timeout=60.0) as client: + print("\n" + "="*60) + print("뉴스 통합 서비스 Full Content 테스트") + print("="*60) + + # Test with keyword "인공지능" + print("\n1. 키워드 '인공지능'으로 RSS 피드 검색 및 구글 full content 검색") + print("-" * 40) + + response = await client.get( + f"{SERVICE_URL}/api/aggregate", + params={ + "q": "인공지능", + "limit": 3, # 테스트용으로 3개만 + "google_results_per_title": 2, # 각 제목당 2개 구글 결과 + "lang": "ko", + "country": "KR" + } + ) + + if response.status_code == 200: + data = response.json() + print(f"✓ RSS 피드 URL: {data['rss_feed_url']}") + print(f"✓ 전체 RSS 항목 수: {data['total_rss_entries']}") + print(f"✓ 처리된 항목 수: {data['processed_entries']}") + print(f"✓ 처리 시간: {data['processing_time']:.2f}초") + + # Check each news item for full content + for i, item in enumerate(data['news_items'], 1): + print(f"\n [{i}] RSS 제목: {item['rss_title'][:50]}...") + print(f" 검색 키워드: {item['search_keyword'][:50]}...") + print(f" 구글 검색 결과 수: {len(item['google_results'])}") + + # Check if google results have full_content + for j, result in enumerate(item['google_results'], 1): + has_full_content = 'full_content' in result + if has_full_content: + full_content = result.get('full_content', '') + if isinstance(full_content, str): + content_length = len(full_content) + else: + content_length = len(str(full_content)) + else: + content_length = 0 + + print(f" - 결과 {j}: {result.get('title', 'N/A')[:40]}...") + print(f" Full Content 포함: {'✓' if has_full_content else '✗'}") + if has_full_content: + print(f" Content 길이: {content_length:,} 문자") + # Show first 200 chars of content + if isinstance(result['full_content'], str): + preview = result['full_content'][:200].replace('\n', ' ') + print(f" 미리보기: {preview}...") + else: + print(f" Content 타입: {type(result['full_content'])}") + print(f" Content 데이터: {str(result['full_content'])[:200]}...") + else: + print(f"✗ 오류: {response.status_code}") + print(f" 상세: {response.text}") + +async def test_aggregate_by_location(): + """지역 기반 RSS 피드 및 full content 테스트""" + async with httpx.AsyncClient(timeout=60.0) as client: + print("\n" + "="*60) + print("지역 기반 뉴스 통합 Full Content 테스트") + print("="*60) + + print("\n2. 지역 'Seoul'로 RSS 피드 검색 및 구글 full content 검색") + print("-" * 40) + + response = await client.get( + f"{SERVICE_URL}/api/aggregate/location", + params={ + "location": "Seoul", + "limit": 2, + "google_results_per_title": 2, + "lang": "ko", + "country": "KR" + } + ) + + if response.status_code == 200: + data = response.json() + print(f"✓ 지역: {data['keyword']}") + print(f"✓ RSS 피드 URL: {data['rss_feed_url']}") + print(f"✓ 처리된 항목 수: {data['processed_entries']}") + + # Check full content availability + full_content_count = 0 + total_content_size = 0 + + for item in data['news_items']: + for result in item['google_results']: + if 'full_content' in result: + full_content_count += 1 + content = result['full_content'] + if isinstance(content, str): + total_content_size += len(content) + else: + total_content_size += len(str(content)) + + print(f"\n📊 Full Content 통계:") + print(f" - Full Content 포함 결과: {full_content_count}개") + print(f" - 전체 Content 크기: {total_content_size:,} 문자") + print(f" - 평균 Content 크기: {total_content_size//max(full_content_count, 1):,} 문자") + else: + print(f"✗ 오류: {response.status_code}") + +async def test_aggregate_by_topic(): + """주제별 RSS 피드 및 full content 테스트""" + async with httpx.AsyncClient(timeout=60.0) as client: + print("\n" + "="*60) + print("주제별 뉴스 통합 Full Content 테스트") + print("="*60) + + print("\n3. 주제 'TECHNOLOGY'로 RSS 피드 검색 및 구글 full content 검색") + print("-" * 40) + + response = await client.get( + f"{SERVICE_URL}/api/aggregate/topic", + params={ + "category": "TECHNOLOGY", + "limit": 2, + "google_results_per_title": 3, + "lang": "ko", + "country": "KR" + } + ) + + if response.status_code == 200: + data = response.json() + print(f"✓ 주제: {data['keyword']}") + print(f"✓ 처리 시간: {data['processing_time']:.2f}초") + + # Analyze content quality for AI summarization + print("\n📝 AI 요약을 위한 Content 품질 분석:") + for i, item in enumerate(data['news_items'], 1): + print(f"\n 뉴스 항목 {i}:") + for j, result in enumerate(item['google_results'], 1): + if 'full_content' in result: + content = result['full_content'] + if isinstance(content, str): + # Check content quality indicators + has_paragraphs = '\n\n' in content or '

' in content + has_sufficient_length = len(content) > 500 + has_korean = any(ord(char) >= 0xAC00 and ord(char) <= 0xD7A3 for char in content[:min(100, len(content))]) + else: + content_str = str(content) + has_paragraphs = '\n\n' in content_str or '

' in content_str + has_sufficient_length = len(content_str) > 500 + has_korean = any(ord(char) >= 0xAC00 and ord(char) <= 0xD7A3 for char in content_str[:min(100, len(content_str))]) + + print(f" 결과 {j} 품질 체크:") + print(f" - 충분한 길이 (>500자): {'✓' if has_sufficient_length else '✗'}") + print(f" - 단락 구조 포함: {'✓' if has_paragraphs else '✗'}") + print(f" - 한국어 콘텐츠: {'✓' if has_korean else '✗'}") + print(f" - AI 요약 가능: {'✓' if (has_sufficient_length and has_paragraphs) else '✗'}") + else: + print(f"✗ 오류: {response.status_code}") + +async def test_health_check(): + """서비스 상태 확인""" + async with httpx.AsyncClient() as client: + print("\n" + "="*60) + print("서비스 Health Check") + print("="*60) + + response = await client.get(f"{SERVICE_URL}/health") + if response.status_code == 200: + data = response.json() + print(f"✓ 통합 서비스 상태: {data['status']}") + print(f" - RSS 서비스: {data['services']['rss_feed']}") + print(f" - Google 검색 서비스: {data['services']['google_search']}") + else: + print(f"✗ Health check 실패: {response.status_code}") + +async def main(): + """메인 테스트 실행""" + print("\n" + "="*70) + print(" News Aggregator Full Content Integration Test ") + print(" RSS 피드 + Google Full Content 통합 테스트 ") + print("="*70) + + # Run tests + await test_health_check() + await test_aggregate_with_full_content() + await test_aggregate_by_location() + await test_aggregate_by_topic() + + print("\n" + "="*70) + print(" 테스트 완료 - Full Content 통합 확인 ") + print("="*70) + print("\n✅ 모든 테스트가 완료되었습니다.") + print(" RSS 피드 제목을 구글 full content로 검색하는 기능이 정상 작동합니다.") + print(" AI 요약을 위한 충분한 콘텐츠가 수집되고 있습니다.") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/backup-services/rss-feed/README.md b/backup-services/rss-feed/README.md new file mode 100644 index 0000000..bac1761 --- /dev/null +++ b/backup-services/rss-feed/README.md @@ -0,0 +1,204 @@ +# RSS Feed Subscription Service + +RSS/Atom 피드를 구독하고 관리하는 서비스입니다. + +## 주요 기능 + +### 1. 피드 구독 관리 +- RSS/Atom 피드 URL 구독 +- 카테고리별 분류 (뉴스, 기술, 비즈니스 등) +- 자동 업데이트 스케줄링 +- 피드 상태 모니터링 + +### 2. 엔트리 관리 +- 새로운 글 자동 수집 +- 읽음/안읽음 상태 관리 +- 별표 표시 기능 +- 전체 내용 저장 + +### 3. 자동 업데이트 +- 설정 가능한 업데이트 주기 (기본 15분) +- 백그라운드 스케줄러 +- 에러 처리 및 재시도 + +## API 엔드포인트 + +### 피드 구독 +``` +POST /api/feeds +{ + "url": "https://example.com/rss", + "title": "Example Blog", + "category": "tech", + "update_interval": 900 +} +``` + +### 피드 목록 조회 +``` +GET /api/feeds?category=tech&status=active +``` + +### 엔트리 조회 +``` +GET /api/entries?feed_id=xxx&is_read=false&limit=50 +``` + +### 읽음 표시 +``` +PUT /api/entries/{entry_id}/read?is_read=true +``` + +### 별표 표시 +``` +PUT /api/entries/{entry_id}/star?is_starred=true +``` + +### 통계 조회 +``` +GET /api/stats?feed_id=xxx +``` + +### OPML 내보내기 +``` +GET /api/export/opml +``` + +## 사용 예제 + +### 1. 기술 블로그 구독 +```bash +curl -X POST http://localhost:8017/api/feeds \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://techcrunch.com/feed/", + "category": "tech" + }' +``` + +### 2. 한국 뉴스 RSS 구독 +```bash +curl -X POST http://localhost:8017/api/feeds \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://www.hani.co.kr/rss/", + "category": "news", + "update_interval": 600 + }' +``` + +### 3. 안읽은 엔트리 조회 +```bash +curl "http://localhost:8017/api/entries?is_read=false&limit=20" +``` + +### 4. 모든 엔트리 읽음 처리 +```bash +curl -X POST "http://localhost:8017/api/entries/mark-all-read?feed_id=xxx" +``` + +## 지원 카테고리 + +- `news`: 뉴스 +- `tech`: 기술 +- `business`: 비즈니스 +- `science`: 과학 +- `health`: 건강 +- `sports`: 스포츠 +- `entertainment`: 엔터테인먼트 +- `lifestyle`: 라이프스타일 +- `politics`: 정치 +- `other`: 기타 + +## 환경 설정 + +### 필수 설정 +```env +MONGODB_URL=mongodb://mongodb:27017 +DB_NAME=rss_feed_db +REDIS_URL=redis://redis:6379 +REDIS_DB=3 +``` + +### 선택 설정 +```env +DEFAULT_UPDATE_INTERVAL=900 # 기본 업데이트 주기 (초) +MAX_ENTRIES_PER_FEED=100 # 피드당 최대 엔트리 수 +ENABLE_SCHEDULER=true # 자동 업데이트 활성화 +SCHEDULER_TIMEZONE=Asia/Seoul # 스케줄러 타임존 +``` + +## Docker 실행 + +```bash +# 빌드 및 실행 +docker-compose build rss-feed-backend +docker-compose up -d rss-feed-backend + +# 로그 확인 +docker-compose logs -f rss-feed-backend +``` + +## 데이터 구조 + +### FeedSubscription +- `title`: 피드 제목 +- `url`: RSS/Atom URL +- `description`: 설명 +- `category`: 카테고리 +- `status`: 상태 (active/inactive/error) +- `update_interval`: 업데이트 주기 +- `last_fetch`: 마지막 업데이트 시간 +- `error_count`: 에러 횟수 + +### FeedEntry +- `feed_id`: 피드 ID +- `title`: 글 제목 +- `link`: 원문 링크 +- `summary`: 요약 +- `content`: 전체 내용 +- `author`: 작성자 +- `published`: 발행일 +- `categories`: 태그/카테고리 +- `thumbnail`: 썸네일 이미지 +- `is_read`: 읽음 상태 +- `is_starred`: 별표 상태 + +## 추천 RSS 피드 + +### 한국 뉴스 +- 한겨레: `https://www.hani.co.kr/rss/` +- 조선일보: `https://www.chosun.com/arc/outboundfeeds/rss/` +- 중앙일보: `https://rss.joins.com/joins_news_list.xml` + +### 기술 블로그 +- TechCrunch: `https://techcrunch.com/feed/` +- The Verge: `https://www.theverge.com/rss/index.xml` +- Ars Technica: `https://feeds.arstechnica.com/arstechnica/index` + +### 개발자 블로그 +- GitHub Blog: `https://github.blog/feed/` +- Stack Overflow Blog: `https://stackoverflow.blog/feed/` +- Dev.to: `https://dev.to/feed` + +## 헬스 체크 + +```bash +curl http://localhost:8017/health +``` + +## 문제 해결 + +### 1. 피드 파싱 실패 +- RSS/Atom 형식이 올바른지 확인 +- URL이 접근 가능한지 확인 +- 피드 인코딩 확인 (UTF-8 권장) + +### 2. 업데이트 안됨 +- 스케줄러 활성화 확인 (`ENABLE_SCHEDULER=true`) +- MongoDB 연결 상태 확인 +- 피드 상태가 `active`인지 확인 + +### 3. 중복 엔트리 +- 피드에서 고유 ID를 제공하는지 확인 +- 엔트리 ID 생성 로직 확인 \ No newline at end of file diff --git a/backup-services/rss-feed/backend/Dockerfile b/backup-services/rss-feed/backend/Dockerfile new file mode 100644 index 0000000..80919ad --- /dev/null +++ b/backup-services/rss-feed/backend/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements and install Python dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . . + +# Expose port +EXPOSE 8000 + +# Run the application +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] \ No newline at end of file diff --git a/backup-services/rss-feed/backend/app/__init__.py b/backup-services/rss-feed/backend/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backup-services/rss-feed/backend/app/config.py b/backup-services/rss-feed/backend/app/config.py new file mode 100644 index 0000000..6dc2564 --- /dev/null +++ b/backup-services/rss-feed/backend/app/config.py @@ -0,0 +1,26 @@ +from pydantic_settings import BaseSettings +from typing import Optional + +class Settings(BaseSettings): + # MongoDB Configuration + mongodb_url: str = "mongodb://mongodb:27017" + db_name: str = "rss_feed_db" + + # Redis Configuration + redis_url: str = "redis://redis:6379" + redis_db: int = 3 + + # Feed Settings + default_update_interval: int = 900 # 15 minutes in seconds + max_entries_per_feed: int = 100 + fetch_timeout: int = 30 + + # Scheduler Settings + enable_scheduler: bool = True + scheduler_timezone: str = "Asia/Seoul" + + class Config: + env_file = ".env" + env_file_encoding = "utf-8" + +settings = Settings() \ No newline at end of file diff --git a/backup-services/rss-feed/backend/app/feed_parser.py b/backup-services/rss-feed/backend/app/feed_parser.py new file mode 100644 index 0000000..fe04223 --- /dev/null +++ b/backup-services/rss-feed/backend/app/feed_parser.py @@ -0,0 +1,222 @@ +import feedparser +import httpx +from typing import List, Dict, Any, Optional +from datetime import datetime +from dateutil import parser as date_parser +from bs4 import BeautifulSoup +import re +import hashlib +from .models import FeedEntry + +class FeedParser: + def __init__(self): + self.client = httpx.AsyncClient( + timeout=30.0, + follow_redirects=True, + headers={ + "User-Agent": "Mozilla/5.0 (compatible; RSS Feed Reader/1.0)" + } + ) + + async def parse_feed(self, url: str) -> Dict[str, Any]: + """Parse RSS/Atom feed from URL""" + try: + response = await self.client.get(url) + response.raise_for_status() + + # Parse the feed + feed = feedparser.parse(response.content) + + if feed.bozo and feed.bozo_exception: + raise Exception(f"Feed parsing error: {feed.bozo_exception}") + + return { + "success": True, + "feed": feed.feed, + "entries": feed.entries, + "error": None + } + except Exception as e: + return { + "success": False, + "feed": None, + "entries": [], + "error": str(e) + } + + def extract_entry_data(self, entry: Any, feed_id: str) -> FeedEntry: + """Extract and normalize entry data""" + # Generate unique entry ID + entry_id = self._generate_entry_id(entry) + + # Extract title + title = entry.get("title", "Untitled") + + # Extract link + link = entry.get("link", "") + + # Extract summary/description + summary = self._extract_summary(entry) + + # Extract content + content = self._extract_content(entry) + + # Extract author + author = entry.get("author", "") + + # Extract published date + published = self._parse_date(entry.get("published", entry.get("updated"))) + + # Extract updated date + updated = self._parse_date(entry.get("updated", entry.get("published"))) + + # Extract categories + categories = self._extract_categories(entry) + + # Extract thumbnail + thumbnail = self._extract_thumbnail(entry) + + # Extract enclosures (media attachments) + enclosures = self._extract_enclosures(entry) + + return FeedEntry( + feed_id=feed_id, + entry_id=entry_id, + title=title, + link=link, + summary=summary, + content=content, + author=author, + published=published, + updated=updated, + categories=categories, + thumbnail=thumbnail, + enclosures=enclosures + ) + + def _generate_entry_id(self, entry: Any) -> str: + """Generate unique ID for entry""" + # Try to use entry's unique ID first + if hasattr(entry, "id"): + return entry.id + + # Generate from link and title + unique_str = f"{entry.get('link', '')}{entry.get('title', '')}" + return hashlib.md5(unique_str.encode()).hexdigest() + + def _extract_summary(self, entry: Any) -> Optional[str]: + """Extract and clean summary""" + summary = entry.get("summary", entry.get("description", "")) + if summary: + # Clean HTML tags + soup = BeautifulSoup(summary, "html.parser") + text = soup.get_text(separator=" ", strip=True) + # Limit length + if len(text) > 500: + text = text[:497] + "..." + return text + return None + + def _extract_content(self, entry: Any) -> Optional[str]: + """Extract full content""" + content = "" + + # Try content field + if hasattr(entry, "content"): + for c in entry.content: + if c.get("type") in ["text/html", "text/plain"]: + content = c.get("value", "") + break + + # Fallback to summary detail + if not content and hasattr(entry, "summary_detail"): + content = entry.summary_detail.get("value", "") + + # Clean excessive whitespace + if content: + content = re.sub(r'\s+', ' ', content).strip() + return content + + return None + + def _parse_date(self, date_str: Optional[str]) -> Optional[datetime]: + """Parse date string to datetime""" + if not date_str: + return None + + try: + # Try parsing with dateutil + return date_parser.parse(date_str) + except: + try: + # Try feedparser's time structure + if hasattr(date_str, "tm_year"): + import time + return datetime.fromtimestamp(time.mktime(date_str)) + except: + pass + + return None + + def _extract_categories(self, entry: Any) -> List[str]: + """Extract categories/tags""" + categories = [] + + if hasattr(entry, "tags"): + for tag in entry.tags: + if hasattr(tag, "term"): + categories.append(tag.term) + elif isinstance(tag, str): + categories.append(tag) + + return categories + + def _extract_thumbnail(self, entry: Any) -> Optional[str]: + """Extract thumbnail image URL""" + # Check media thumbnail + if hasattr(entry, "media_thumbnail"): + for thumb in entry.media_thumbnail: + if thumb.get("url"): + return thumb["url"] + + # Check media content + if hasattr(entry, "media_content"): + for media in entry.media_content: + if media.get("type", "").startswith("image/"): + return media.get("url") + + # Check enclosures + if hasattr(entry, "enclosures"): + for enc in entry.enclosures: + if enc.get("type", "").startswith("image/"): + return enc.get("href", enc.get("url")) + + # Extract from content/summary + content = entry.get("summary", "") + entry.get("content", [{}])[0].get("value", "") if hasattr(entry, "content") else "" + if content: + soup = BeautifulSoup(content, "html.parser") + img = soup.find("img") + if img and img.get("src"): + return img["src"] + + return None + + def _extract_enclosures(self, entry: Any) -> List[Dict[str, Any]]: + """Extract media enclosures""" + enclosures = [] + + if hasattr(entry, "enclosures"): + for enc in entry.enclosures: + enclosure = { + "url": enc.get("href", enc.get("url", "")), + "type": enc.get("type", ""), + "length": enc.get("length", 0) + } + if enclosure["url"]: + enclosures.append(enclosure) + + return enclosures + + async def close(self): + """Close HTTP client""" + await self.client.aclose() \ No newline at end of file diff --git a/backup-services/rss-feed/backend/app/google_rss.py b/backup-services/rss-feed/backend/app/google_rss.py new file mode 100644 index 0000000..b4fd24f --- /dev/null +++ b/backup-services/rss-feed/backend/app/google_rss.py @@ -0,0 +1,115 @@ +""" +Google News RSS Feed Generator +구글 뉴스 RSS 피드 URL 생성 및 구독 지원 +""" +from typing import Optional, List +from urllib.parse import quote_plus +from enum import Enum + +class GoogleNewsCategory(str, Enum): + """구글 뉴스 카테고리""" + WORLD = "WORLD" + NATION = "NATION" + BUSINESS = "BUSINESS" + TECHNOLOGY = "TECHNOLOGY" + ENTERTAINMENT = "ENTERTAINMENT" + SPORTS = "SPORTS" + SCIENCE = "SCIENCE" + HEALTH = "HEALTH" + +class GoogleNewsRSS: + """Google News RSS 피드 URL 생성기""" + + BASE_URL = "https://news.google.com/rss" + + @staticmethod + def search_feed(query: str, lang: str = "ko", country: str = "KR") -> str: + """ + 키워드 검색 RSS 피드 URL 생성 + + Args: + query: 검색 키워드 + lang: 언어 코드 (ko, en, ja, zh-CN 등) + country: 국가 코드 (KR, US, JP, CN 등) + + Returns: + RSS 피드 URL + """ + encoded_query = quote_plus(query) + return f"{GoogleNewsRSS.BASE_URL}/search?q={encoded_query}&hl={lang}&gl={country}&ceid={country}:{lang}" + + @staticmethod + def topic_feed(category: GoogleNewsCategory, lang: str = "ko", country: str = "KR") -> str: + """ + 카테고리별 RSS 피드 URL 생성 + + Args: + category: 뉴스 카테고리 + lang: 언어 코드 + country: 국가 코드 + + Returns: + RSS 피드 URL + """ + return f"{GoogleNewsRSS.BASE_URL}/headlines/section/topic/{category.value}?hl={lang}&gl={country}&ceid={country}:{lang}" + + @staticmethod + def location_feed(location: str, lang: str = "ko", country: str = "KR") -> str: + """ + 지역 뉴스 RSS 피드 URL 생성 + + Args: + location: 지역명 (예: Seoul, 서울, New York) + lang: 언어 코드 + country: 국가 코드 + + Returns: + RSS 피드 URL + """ + encoded_location = quote_plus(location) + return f"{GoogleNewsRSS.BASE_URL}/headlines/section/geo/{encoded_location}?hl={lang}&gl={country}&ceid={country}:{lang}" + + @staticmethod + def trending_feed(lang: str = "ko", country: str = "KR") -> str: + """ + 트렌딩 뉴스 RSS 피드 URL 생성 + + Args: + lang: 언어 코드 + country: 국가 코드 + + Returns: + RSS 피드 URL + """ + return f"{GoogleNewsRSS.BASE_URL}?hl={lang}&gl={country}&ceid={country}:{lang}" + + @staticmethod + def get_common_feeds() -> List[dict]: + """ + 자주 사용되는 RSS 피드 목록 반환 + + Returns: + 피드 정보 리스트 + """ + return [ + { + "title": "구글 뉴스 - 한국 헤드라인", + "url": GoogleNewsRSS.trending_feed("ko", "KR"), + "description": "한국 주요 뉴스" + }, + { + "title": "구글 뉴스 - 기술", + "url": GoogleNewsRSS.topic_feed(GoogleNewsCategory.TECHNOLOGY, "ko", "KR"), + "description": "기술 관련 뉴스" + }, + { + "title": "구글 뉴스 - 비즈니스", + "url": GoogleNewsRSS.topic_feed(GoogleNewsCategory.BUSINESS, "ko", "KR"), + "description": "비즈니스 뉴스" + }, + { + "title": "Google News - World", + "url": GoogleNewsRSS.topic_feed(GoogleNewsCategory.WORLD, "en", "US"), + "description": "World news in English" + } + ] \ No newline at end of file diff --git a/backup-services/rss-feed/backend/app/main.py b/backup-services/rss-feed/backend/app/main.py new file mode 100644 index 0000000..b8f4fe2 --- /dev/null +++ b/backup-services/rss-feed/backend/app/main.py @@ -0,0 +1,596 @@ +from fastapi import FastAPI, HTTPException, Query, Path, BackgroundTasks +from fastapi.middleware.cors import CORSMiddleware +from typing import List, Optional +from datetime import datetime +from contextlib import asynccontextmanager +import motor.motor_asyncio +from apscheduler.schedulers.asyncio import AsyncIOScheduler +from apscheduler.triggers.interval import IntervalTrigger +import pytz +import redis.asyncio as redis +import json + +from .config import settings +from .models import ( + FeedSubscription, FeedEntry, CreateFeedRequest, + UpdateFeedRequest, FeedStatistics, FeedStatus, FeedCategory +) +from .feed_parser import FeedParser +from .google_rss import GoogleNewsRSS, GoogleNewsCategory + +# Database connection +db_client = None +db = None +redis_client = None +scheduler = None +parser = None + +@asynccontextmanager +async def lifespan(app: FastAPI): + global db_client, db, redis_client, scheduler, parser + + # Connect to MongoDB + db_client = motor.motor_asyncio.AsyncIOMotorClient(settings.mongodb_url) + db = db_client[settings.db_name] + + # Connect to Redis + redis_client = redis.from_url(settings.redis_url, db=settings.redis_db) + + # Initialize feed parser + parser = FeedParser() + + # Initialize scheduler + if settings.enable_scheduler: + scheduler = AsyncIOScheduler(timezone=pytz.timezone(settings.scheduler_timezone)) + scheduler.add_job( + update_all_feeds, + trigger=IntervalTrigger(seconds=60), + id="update_feeds", + replace_existing=True + ) + scheduler.start() + print("RSS Feed scheduler started") + + print("RSS Feed Service starting...") + yield + + # Cleanup + if scheduler: + scheduler.shutdown() + if parser: + await parser.close() + if redis_client: + await redis_client.close() + db_client.close() + print("RSS Feed Service stopping...") + +app = FastAPI( + title="RSS Feed Service", + description="RSS/Atom 피드 구독 및 관리 서비스", + version="1.0.0", + lifespan=lifespan +) + +# CORS 설정 +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Helper functions +async def update_feed(feed_id: str): + """Update a single feed""" + feed = await db.feeds.find_one({"_id": feed_id}) + if not feed: + return + + # Parse feed + result = await parser.parse_feed(feed["url"]) + + if result["success"]: + # Update feed metadata + await db.feeds.update_one( + {"_id": feed_id}, + { + "$set": { + "last_fetch": datetime.now(), + "status": FeedStatus.ACTIVE, + "error_count": 0, + "last_error": None, + "updated_at": datetime.now() + } + } + ) + + # Process entries + for entry_data in result["entries"][:settings.max_entries_per_feed]: + entry = parser.extract_entry_data(entry_data, feed_id) + + # Check if entry already exists + existing = await db.entries.find_one({ + "feed_id": feed_id, + "entry_id": entry.entry_id + }) + + if not existing: + # Insert new entry + await db.entries.insert_one(entry.dict()) + else: + # Update existing entry if newer + if entry.updated and existing.get("updated"): + if entry.updated > existing["updated"]: + await db.entries.update_one( + {"_id": existing["_id"]}, + {"$set": entry.dict(exclude={"id", "created_at"})} + ) + else: + # Update error status + await db.feeds.update_one( + {"_id": feed_id}, + { + "$set": { + "status": FeedStatus.ERROR, + "last_error": result["error"], + "updated_at": datetime.now() + }, + "$inc": {"error_count": 1} + } + ) + +async def update_all_feeds(): + """Update all active feeds that need updating""" + now = datetime.now() + + # Find feeds that need updating + feeds = await db.feeds.find({ + "status": FeedStatus.ACTIVE, + "$or": [ + {"last_fetch": None}, + {"last_fetch": {"$lt": now}} + ] + }).to_list(100) + + for feed in feeds: + # Check if it's time to update + if feed.get("last_fetch"): + time_diff = (now - feed["last_fetch"]).total_seconds() + if time_diff < feed.get("update_interval", settings.default_update_interval): + continue + + # Update feed in background + await update_feed(str(feed["_id"])) + +# API Endpoints + +@app.get("/") +async def root(): + return { + "service": "RSS Feed Service", + "version": "1.0.0", + "timestamp": datetime.now().isoformat(), + "endpoints": { + "subscribe": "POST /api/feeds", + "list_feeds": "GET /api/feeds", + "get_entries": "GET /api/entries", + "mark_read": "PUT /api/entries/{entry_id}/read", + "mark_starred": "PUT /api/entries/{entry_id}/star", + "statistics": "GET /api/stats" + } + } + +@app.get("/health") +async def health_check(): + return { + "status": "healthy", + "service": "rss-feed", + "timestamp": datetime.now().isoformat() + } + +@app.post("/api/feeds", response_model=FeedSubscription) +async def subscribe_to_feed(request: CreateFeedRequest, background_tasks: BackgroundTasks): + """RSS/Atom 피드 구독""" + # Check if already subscribed + existing = await db.feeds.find_one({"url": str(request.url)}) + if existing: + raise HTTPException(status_code=400, detail="이미 구독 중인 피드입니다") + + # Parse feed to get metadata + result = await parser.parse_feed(str(request.url)) + if not result["success"]: + raise HTTPException(status_code=400, detail=f"피드 파싱 실패: {result['error']}") + + # Create subscription + feed = FeedSubscription( + title=request.title or result["feed"].get("title", "Untitled Feed"), + url=request.url, + description=result["feed"].get("description", ""), + category=request.category, + update_interval=request.update_interval or settings.default_update_interval + ) + + # Save to database - convert URL to string + feed_dict = feed.dict() + feed_dict["url"] = str(feed_dict["url"]) + result = await db.feeds.insert_one(feed_dict) + feed.id = str(result.inserted_id) + + # Fetch entries in background + background_tasks.add_task(update_feed, feed.id) + + return feed + +@app.get("/api/feeds", response_model=List[FeedSubscription]) +async def list_feeds( + category: Optional[str] = Query(None, description="카테고리 필터"), + status: Optional[FeedStatus] = Query(None, description="상태 필터") +): + """구독 중인 피드 목록 조회""" + query = {} + if category: + query["category"] = category + if status: + query["status"] = status + + feeds = await db.feeds.find(query).to_list(100) + for feed in feeds: + feed["_id"] = str(feed["_id"]) + + return feeds + +@app.get("/api/feeds/{feed_id}", response_model=FeedSubscription) +async def get_feed(feed_id: str = Path(..., description="피드 ID")): + """특정 피드 정보 조회""" + feed = await db.feeds.find_one({"_id": feed_id}) + if not feed: + raise HTTPException(status_code=404, detail="피드를 찾을 수 없습니다") + + feed["_id"] = str(feed["_id"]) + return feed + +@app.put("/api/feeds/{feed_id}", response_model=FeedSubscription) +async def update_feed_subscription( + feed_id: str = Path(..., description="피드 ID"), + request: UpdateFeedRequest = ... +): + """피드 구독 정보 수정""" + update_data = request.dict(exclude_unset=True) + if update_data: + update_data["updated_at"] = datetime.now() + + result = await db.feeds.update_one( + {"_id": feed_id}, + {"$set": update_data} + ) + + if result.matched_count == 0: + raise HTTPException(status_code=404, detail="피드를 찾을 수 없습니다") + + feed = await db.feeds.find_one({"_id": feed_id}) + feed["_id"] = str(feed["_id"]) + return feed + +@app.delete("/api/feeds/{feed_id}") +async def unsubscribe_from_feed(feed_id: str = Path(..., description="피드 ID")): + """피드 구독 취소""" + # Delete feed + result = await db.feeds.delete_one({"_id": feed_id}) + if result.deleted_count == 0: + raise HTTPException(status_code=404, detail="피드를 찾을 수 없습니다") + + # Delete associated entries + await db.entries.delete_many({"feed_id": feed_id}) + + return {"message": "구독이 취소되었습니다"} + +@app.post("/api/feeds/{feed_id}/refresh") +async def refresh_feed( + feed_id: str = Path(..., description="피드 ID"), + background_tasks: BackgroundTasks = ... +): + """피드 수동 새로고침""" + feed = await db.feeds.find_one({"_id": feed_id}) + if not feed: + raise HTTPException(status_code=404, detail="피드를 찾을 수 없습니다") + + background_tasks.add_task(update_feed, feed_id) + + return {"message": "피드 새로고침이 시작되었습니다"} + +@app.get("/api/entries", response_model=List[FeedEntry]) +async def get_entries( + feed_id: Optional[str] = Query(None, description="피드 ID"), + is_read: Optional[bool] = Query(None, description="읽음 상태 필터"), + is_starred: Optional[bool] = Query(None, description="별표 상태 필터"), + limit: int = Query(50, ge=1, le=100, description="결과 개수"), + offset: int = Query(0, ge=0, description="오프셋") +): + """피드 엔트리 목록 조회""" + query = {} + if feed_id: + query["feed_id"] = feed_id + if is_read is not None: + query["is_read"] = is_read + if is_starred is not None: + query["is_starred"] = is_starred + + entries = await db.entries.find(query) \ + .sort("published", -1) \ + .skip(offset) \ + .limit(limit) \ + .to_list(limit) + + for entry in entries: + entry["_id"] = str(entry["_id"]) + + return entries + +@app.get("/api/entries/{entry_id}", response_model=FeedEntry) +async def get_entry(entry_id: str = Path(..., description="엔트리 ID")): + """특정 엔트리 조회""" + entry = await db.entries.find_one({"_id": entry_id}) + if not entry: + raise HTTPException(status_code=404, detail="엔트리를 찾을 수 없습니다") + + entry["_id"] = str(entry["_id"]) + return entry + +@app.put("/api/entries/{entry_id}/read") +async def mark_entry_as_read( + entry_id: str = Path(..., description="엔트리 ID"), + is_read: bool = Query(True, description="읽음 상태") +): + """엔트리 읽음 상태 변경""" + result = await db.entries.update_one( + {"_id": entry_id}, + {"$set": {"is_read": is_read}} + ) + + if result.matched_count == 0: + raise HTTPException(status_code=404, detail="엔트리를 찾을 수 없습니다") + + return {"message": f"읽음 상태가 {is_read}로 변경되었습니다"} + +@app.put("/api/entries/{entry_id}/star") +async def mark_entry_as_starred( + entry_id: str = Path(..., description="엔트리 ID"), + is_starred: bool = Query(True, description="별표 상태") +): + """엔트리 별표 상태 변경""" + result = await db.entries.update_one( + {"_id": entry_id}, + {"$set": {"is_starred": is_starred}} + ) + + if result.matched_count == 0: + raise HTTPException(status_code=404, detail="엔트리를 찾을 수 없습니다") + + return {"message": f"별표 상태가 {is_starred}로 변경되었습니다"} + +@app.post("/api/entries/mark-all-read") +async def mark_all_as_read(feed_id: Optional[str] = Query(None, description="피드 ID")): + """모든 엔트리를 읽음으로 표시""" + query = {} + if feed_id: + query["feed_id"] = feed_id + + result = await db.entries.update_many( + query, + {"$set": {"is_read": True}} + ) + + return {"message": f"{result.modified_count}개 엔트리가 읽음으로 표시되었습니다"} + +@app.get("/api/stats", response_model=List[FeedStatistics]) +async def get_statistics(feed_id: Optional[str] = Query(None, description="피드 ID")): + """피드 통계 조회""" + if feed_id: + feeds = [await db.feeds.find_one({"_id": feed_id})] + if not feeds[0]: + raise HTTPException(status_code=404, detail="피드를 찾을 수 없습니다") + else: + feeds = await db.feeds.find().to_list(100) + + stats = [] + for feed in feeds: + feed_id = str(feed["_id"]) + + # Count entries + total = await db.entries.count_documents({"feed_id": feed_id}) + unread = await db.entries.count_documents({"feed_id": feed_id, "is_read": False}) + starred = await db.entries.count_documents({"feed_id": feed_id, "is_starred": True}) + + # Calculate error rate + error_rate = 0 + if feed.get("error_count", 0) > 0: + total_fetches = feed.get("error_count", 0) + (1 if feed.get("last_fetch") else 0) + error_rate = feed.get("error_count", 0) / total_fetches + + stats.append(FeedStatistics( + feed_id=feed_id, + total_entries=total, + unread_entries=unread, + starred_entries=starred, + last_update=feed.get("last_fetch"), + error_rate=error_rate + )) + + return stats + +@app.get("/api/export/opml") +async def export_opml(): + """피드 목록을 OPML 형식으로 내보내기""" + feeds = await db.feeds.find().to_list(100) + + opml = """ + + + RSS Feed Subscriptions + {} + +""".format(datetime.now().isoformat()) + + for feed in feeds: + opml += f'\n ' + + opml += "\n\n" + + return { + "opml": opml, + "feed_count": len(feeds) + } + +# Google News RSS Endpoints + +@app.get("/api/google-rss/search") +async def get_google_search_rss( + q: str = Query(..., description="검색 키워드"), + lang: str = Query("ko", description="언어 코드 (ko, en, ja, zh-CN 등)"), + country: str = Query("KR", description="국가 코드 (KR, US, JP, CN 등)") +): + """Google News 검색 RSS 피드 URL 생성""" + feed_url = GoogleNewsRSS.search_feed(q, lang, country) + + # 피드 파싱 테스트 + result = await parser.parse_feed(feed_url) + + return { + "keyword": q, + "feed_url": feed_url, + "success": result["success"], + "feed_title": result["feed"].get("title", "Google News") if result["success"] else None, + "entry_count": len(result["entries"]) if result["success"] else 0, + "sample_titles": [entry.get("title", "") for entry in result["entries"][:5]] if result["success"] else [], + "entries": [ + { + "title": entry.get("title", ""), + "link": entry.get("link", ""), + "published": entry.get("published", ""), + "summary": entry.get("summary", "")[:200] if entry.get("summary") else "" + } for entry in result["entries"][:20] + ] if result["success"] else [], + "error": result.get("error") + } + +@app.get("/api/google-rss/topic") +async def get_google_topic_rss( + category: GoogleNewsCategory = Query(..., description="뉴스 카테고리"), + lang: str = Query("ko", description="언어 코드"), + country: str = Query("KR", description="국가 코드") +): + """Google News 카테고리별 RSS 피드 URL 생성""" + feed_url = GoogleNewsRSS.topic_feed(category, lang, country) + + # 피드 파싱 테스트 + result = await parser.parse_feed(feed_url) + + return { + "category": category, + "feed_url": feed_url, + "success": result["success"], + "feed_title": result["feed"].get("title", "Google News") if result["success"] else None, + "entry_count": len(result["entries"]) if result["success"] else 0, + "sample_titles": [entry.get("title", "") for entry in result["entries"][:5]] if result["success"] else [], + "error": result.get("error") + } + +@app.get("/api/google-rss/location") +async def get_google_location_rss( + location: str = Query(..., description="지역명 (예: Seoul, 서울, New York)"), + lang: str = Query("ko", description="언어 코드"), + country: str = Query("KR", description="국가 코드") +): + """Google News 지역 뉴스 RSS 피드 URL 생성""" + feed_url = GoogleNewsRSS.location_feed(location, lang, country) + + # 피드 파싱 테스트 + result = await parser.parse_feed(feed_url) + + return { + "location": location, + "feed_url": feed_url, + "success": result["success"], + "feed_title": result["feed"].get("title", "Google News") if result["success"] else None, + "entry_count": len(result["entries"]) if result["success"] else 0, + "sample_titles": [entry.get("title", "") for entry in result["entries"][:5]] if result["success"] else [], + "error": result.get("error") + } + +@app.get("/api/google-rss/trending") +async def get_google_trending_rss( + lang: str = Query("ko", description="언어 코드"), + country: str = Query("KR", description="국가 코드") +): + """Google News 트렌딩 RSS 피드 URL 생성""" + feed_url = GoogleNewsRSS.trending_feed(lang, country) + + # 피드 파싱 테스트 + result = await parser.parse_feed(feed_url) + + return { + "feed_url": feed_url, + "success": result["success"], + "feed_title": result["feed"].get("title", "Google News") if result["success"] else None, + "entry_count": len(result["entries"]) if result["success"] else 0, + "sample_titles": [entry.get("title", "") for entry in result["entries"][:5]] if result["success"] else [], + "error": result.get("error") + } + +@app.post("/api/google-rss/subscribe") +async def subscribe_google_rss( + q: Optional[str] = Query(None, description="검색 키워드"), + category: Optional[GoogleNewsCategory] = Query(None, description="카테고리"), + location: Optional[str] = Query(None, description="지역명"), + trending: bool = Query(False, description="트렌딩 뉴스"), + lang: str = Query("ko", description="언어 코드"), + country: str = Query("KR", description="국가 코드"), + background_tasks: BackgroundTasks = ... +): + """Google News RSS 피드 구독""" + # URL 생성 + if q: + feed_url = GoogleNewsRSS.search_feed(q, lang, country) + feed_title = f"Google News - {q}" + elif category: + feed_url = GoogleNewsRSS.topic_feed(category, lang, country) + feed_title = f"Google News - {category.value}" + elif location: + feed_url = GoogleNewsRSS.location_feed(location, lang, country) + feed_title = f"Google News - {location}" + elif trending: + feed_url = GoogleNewsRSS.trending_feed(lang, country) + feed_title = f"Google News - Trending ({country})" + else: + raise HTTPException(status_code=400, detail="검색어, 카테고리, 지역 중 하나를 지정해주세요") + + # 중복 확인 + existing = await db.feeds.find_one({"url": feed_url}) + if existing: + raise HTTPException(status_code=400, detail="이미 구독 중인 피드입니다") + + # 피드 파싱 + result = await parser.parse_feed(feed_url) + if not result["success"]: + raise HTTPException(status_code=400, detail=f"피드 파싱 실패: {result['error']}") + + # 구독 생성 + feed = FeedSubscription( + title=feed_title, + url=feed_url, + description=result["feed"].get("description", "Google News Feed"), + category=FeedCategory.NEWS, + update_interval=900 # 15분 + ) + + # DB 저장 + feed_dict = feed.dict() + feed_dict["url"] = str(feed_dict["url"]) + result = await db.feeds.insert_one(feed_dict) + feed.id = str(result.inserted_id) + + # 백그라운드 업데이트 + background_tasks.add_task(update_feed, feed.id) + + return feed \ No newline at end of file diff --git a/backup-services/rss-feed/backend/app/models.py b/backup-services/rss-feed/backend/app/models.py new file mode 100644 index 0000000..9d867fe --- /dev/null +++ b/backup-services/rss-feed/backend/app/models.py @@ -0,0 +1,74 @@ +from pydantic import BaseModel, Field, HttpUrl +from typing import Optional, List, Dict, Any +from datetime import datetime +from enum import Enum + +class FeedStatus(str, Enum): + ACTIVE = "active" + INACTIVE = "inactive" + ERROR = "error" + +class FeedCategory(str, Enum): + NEWS = "news" + TECH = "tech" + BUSINESS = "business" + SCIENCE = "science" + HEALTH = "health" + SPORTS = "sports" + ENTERTAINMENT = "entertainment" + LIFESTYLE = "lifestyle" + POLITICS = "politics" + OTHER = "other" + +class FeedSubscription(BaseModel): + id: Optional[str] = Field(None, alias="_id") + title: str + url: HttpUrl + description: Optional[str] = None + category: FeedCategory = FeedCategory.OTHER + status: FeedStatus = FeedStatus.ACTIVE + update_interval: int = 900 # seconds + last_fetch: Optional[datetime] = None + last_error: Optional[str] = None + error_count: int = 0 + created_at: datetime = Field(default_factory=datetime.now) + updated_at: datetime = Field(default_factory=datetime.now) + metadata: Dict[str, Any] = {} + +class FeedEntry(BaseModel): + id: Optional[str] = Field(None, alias="_id") + feed_id: str + entry_id: str # RSS entry unique ID + title: str + link: str + summary: Optional[str] = None + content: Optional[str] = None + author: Optional[str] = None + published: Optional[datetime] = None + updated: Optional[datetime] = None + categories: List[str] = [] + thumbnail: Optional[str] = None + enclosures: List[Dict[str, Any]] = [] + is_read: bool = False + is_starred: bool = False + created_at: datetime = Field(default_factory=datetime.now) + +class CreateFeedRequest(BaseModel): + url: HttpUrl + title: Optional[str] = None + category: FeedCategory = FeedCategory.OTHER + update_interval: Optional[int] = 900 + +class UpdateFeedRequest(BaseModel): + title: Optional[str] = None + category: Optional[FeedCategory] = None + update_interval: Optional[int] = None + status: Optional[FeedStatus] = None + +class FeedStatistics(BaseModel): + feed_id: str + total_entries: int + unread_entries: int + starred_entries: int + last_update: Optional[datetime] + error_rate: float \ No newline at end of file diff --git a/backup-services/rss-feed/backend/requirements.txt b/backup-services/rss-feed/backend/requirements.txt new file mode 100644 index 0000000..a162152 --- /dev/null +++ b/backup-services/rss-feed/backend/requirements.txt @@ -0,0 +1,14 @@ +fastapi==0.109.0 +uvicorn[standard]==0.27.0 +pydantic==2.5.3 +pydantic-settings==2.1.0 +feedparser==6.0.11 +httpx==0.26.0 +pymongo==4.6.1 +motor==3.3.2 +redis==5.0.1 +python-dateutil==2.8.2 +beautifulsoup4==4.12.3 +lxml==5.1.0 +apscheduler==3.10.4 +pytz==2024.1 \ No newline at end of file diff --git a/check_mongodb.py b/check_mongodb.py new file mode 100644 index 0000000..efc2787 --- /dev/null +++ b/check_mongodb.py @@ -0,0 +1,38 @@ +import pymongo +from datetime import datetime +import json + +# MongoDB 연결 +client = pymongo.MongoClient("mongodb://localhost:27017/") +db = client["ai_writer_db"] +collection = db["articles"] + +# 최근 생성된 기사 조회 +articles = collection.find().sort("created_at", -1).limit(2) + +for article in articles: + print("=" * 80) + print(f"기사 ID: {article['article_id']}") + print(f"키워드: {article['keyword']}") + print(f"제목: {article['title']}") + print(f"요약: {article['summary']}") + print(f"처리 시간: {article['processing_time']:.2f}초") + print(f"생성 시각: {article['created_at']}") + print(f"파이프라인 단계: {', '.join(article['pipeline_stages'])}") + print(f"카테고리: {', '.join(article['categories'])}") + print(f"태그: {', '.join(article['tags'])}") + print(f"\n내용 (첫 500자):\n{article['content'][:500]}...") + print("=" * 80) + print() + +# 저장된 기사 생성 +with open('generated_article.json', 'w', encoding='utf-8') as f: + # 최신 기사 하나를 다시 조회 + latest = collection.find_one(sort=[("created_at", -1)]) + if latest: + # ObjectId를 문자열로 변환 + latest['_id'] = str(latest['_id']) + # datetime 객체를 문자열로 변환 + latest['created_at'] = latest['created_at'].isoformat() + json.dump(latest, f, ensure_ascii=False, indent=2) + print(f"✅ 최신 기사가 generated_article.json 파일로 저장되었습니다.") \ No newline at end of file diff --git a/config/api-keys-backup.env b/config/api-keys-backup.env new file mode 100644 index 0000000..4cd3975 --- /dev/null +++ b/config/api-keys-backup.env @@ -0,0 +1,18 @@ +# API Keys Backup - Created on 2025-01-13 +# 이 파일은 중요한 API 키를 백업한 것입니다. 안전하게 보관하세요. + +# Claude API Key +CLAUDE_API_KEY=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 + +# Google APIs +GOOGLE_API_KEY=AIzaSyBakoCsDP_oF5V4oq_eEKs4eQb-ekqxnRM +GOOGLE_SEARCH_ENGINE_ID=35bfbdb7b6f244569 + +# Translation (DeepL) +DEEPL_API_KEY=3abbc796-2515-44a8-972d-22dcf27ab54a + +# Image Generation (Replicate) +REPLICATE_API_TOKEN=r8_AR4puLJQYD4eeuPljw2yJvKCWKT72k119pEyp + +# Additional APIs (필요시 추가) +# SERPAPI_KEY= \ No newline at end of file diff --git a/console/backend/Dockerfile b/console/backend/Dockerfile new file mode 100644 index 0000000..2515968 --- /dev/null +++ b/console/backend/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements first for better caching +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . . + +# Expose port +EXPOSE 8000 + +# Run the application +CMD ["python", "main.py"] \ No newline at end of file diff --git a/console/backend/auth.py b/console/backend/auth.py new file mode 100644 index 0000000..2225944 --- /dev/null +++ b/console/backend/auth.py @@ -0,0 +1,65 @@ +from datetime import datetime, timedelta +from typing import Optional +from jose import JWTError, jwt +from passlib.context import CryptContext +from fastapi import Depends, HTTPException, status +from fastapi.security import OAuth2PasswordBearer +from pydantic import BaseModel +import os + +SECRET_KEY = os.getenv("JWT_SECRET_KEY", "your-secret-key-change-in-production") +ALGORITHM = "HS256" +ACCESS_TOKEN_EXPIRE_MINUTES = 30 + +pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") +oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/login") + +class Token(BaseModel): + access_token: str + token_type: str + +class TokenData(BaseModel): + username: Optional[str] = None + +class UserLogin(BaseModel): + username: str + password: str + +class UserInDB(BaseModel): + username: str + hashed_password: str + email: str + full_name: Optional[str] = None + is_active: bool = True + +def verify_password(plain_password, hashed_password): + return pwd_context.verify(plain_password, hashed_password) + +def get_password_hash(password): + return pwd_context.hash(password) + +def create_access_token(data: dict, expires_delta: Optional[timedelta] = None): + to_encode = data.copy() + if expires_delta: + expire = datetime.utcnow() + expires_delta + else: + expire = datetime.utcnow() + timedelta(minutes=15) + to_encode.update({"exp": expire}) + encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM) + return encoded_jwt + +async def get_current_user(token: str = Depends(oauth2_scheme)): + credentials_exception = HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Could not validate credentials", + headers={"WWW-Authenticate": "Bearer"}, + ) + try: + payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM]) + username: str = payload.get("sub") + if username is None: + raise credentials_exception + token_data = TokenData(username=username) + except JWTError: + raise credentials_exception + return token_data \ No newline at end of file diff --git a/console/backend/event_consumer.py b/console/backend/event_consumer.py new file mode 100644 index 0000000..9c4c0f3 --- /dev/null +++ b/console/backend/event_consumer.py @@ -0,0 +1,358 @@ +""" +고급 이벤트 컨슈머 with DLQ and Retry +""" +import asyncio +import json +import logging +from typing import Dict, Any, Optional, List +from datetime import datetime, timedelta +from redis import asyncio as aioredis +from aiokafka import AIOKafkaProducer + +import sys +sys.path.append('/app') +from shared.kafka import KafkaConsumer, Event, EventType +from event_handlers import EventHandlers + +logger = logging.getLogger(__name__) + +class RetryPolicy: + """재시도 정책""" + def __init__( + self, + max_retries: int = 3, + initial_delay: float = 1.0, + max_delay: float = 60.0, + exponential_base: float = 2.0 + ): + self.max_retries = max_retries + self.initial_delay = initial_delay + self.max_delay = max_delay + self.exponential_base = exponential_base + + def get_delay(self, retry_count: int) -> float: + """재시도 지연 시간 계산 (exponential backoff)""" + delay = self.initial_delay * (self.exponential_base ** retry_count) + return min(delay, self.max_delay) + +class AdvancedEventConsumer: + def __init__( + self, + topics: List[str], + group_id: str, + redis_url: str = "redis://redis:6379", + bootstrap_servers: str = "kafka:9092", + enable_dlq: bool = True, + dlq_topic: str = "dead-letter-queue" + ): + self.topics = topics + self.group_id = group_id + self.bootstrap_servers = bootstrap_servers + self.enable_dlq = enable_dlq + self.dlq_topic = dlq_topic + + # Kafka Consumer + self.consumer = KafkaConsumer( + topics=topics, + group_id=group_id, + bootstrap_servers=bootstrap_servers + ) + + # DLQ Producer + self.dlq_producer: Optional[AIOKafkaProducer] = None + + # Redis for retry tracking + self.redis: Optional[aioredis.Redis] = None + self.redis_url = redis_url + + # Event handlers + self.handlers: Optional[EventHandlers] = None + + # Retry policies per event type + self.retry_policies = { + EventType.USER_CREATED: RetryPolicy(max_retries=3), + EventType.USER_UPDATED: RetryPolicy(max_retries=2), + EventType.USER_DELETED: RetryPolicy(max_retries=5), # 중요한 이벤트 + EventType.OAUTH_APP_CREATED: RetryPolicy(max_retries=3), + EventType.OAUTH_TOKEN_ISSUED: RetryPolicy(max_retries=1), + } + + # Processing statistics + self.stats = { + "processed": 0, + "failed": 0, + "retried": 0, + "dlq_sent": 0 + } + + async def start(self): + """컨슈머 시작""" + try: + # Redis 연결 + self.redis = await aioredis.from_url( + self.redis_url, + encoding="utf-8", + decode_responses=True + ) + + # Event handlers 초기화 + self.handlers = EventHandlers(redis_client=self.redis) + + # DLQ Producer 초기화 + if self.enable_dlq: + self.dlq_producer = AIOKafkaProducer( + bootstrap_servers=self.bootstrap_servers, + value_serializer=lambda v: json.dumps(v).encode() + ) + await self.dlq_producer.start() + logger.info(f"DLQ Producer started for topic: {self.dlq_topic}") + + # 이벤트 핸들러 등록 + self._register_event_handlers() + + # Kafka Consumer 시작 + await self.consumer.start() + + logger.info(f"Advanced Event Consumer started: {self.topics}") + + # 통계 리포팅 태스크 시작 + asyncio.create_task(self._report_stats()) + + except Exception as e: + logger.error(f"Failed to start Advanced Event Consumer: {e}") + raise + + async def stop(self): + """컨슈머 종료""" + await self.consumer.stop() + + if self.dlq_producer: + await self.dlq_producer.stop() + + if self.redis: + await self.redis.close() + + logger.info("Advanced Event Consumer stopped") + + def _register_event_handlers(self): + """이벤트 핸들러 등록""" + # 각 이벤트 타입에 대한 핸들러를 래퍼로 감싸서 등록 + self.consumer.register_handler( + EventType.USER_CREATED, + self._create_handler_with_retry( + self.handlers.handle_user_created, + EventType.USER_CREATED + ) + ) + + self.consumer.register_handler( + EventType.USER_UPDATED, + self._create_handler_with_retry( + self.handlers.handle_user_updated, + EventType.USER_UPDATED + ) + ) + + self.consumer.register_handler( + EventType.USER_DELETED, + self._create_handler_with_retry( + self.handlers.handle_user_deleted, + EventType.USER_DELETED + ) + ) + + self.consumer.register_handler( + EventType.OAUTH_APP_CREATED, + self._create_handler_with_retry( + self.handlers.handle_oauth_app_created, + EventType.OAUTH_APP_CREATED + ) + ) + + self.consumer.register_handler( + EventType.OAUTH_TOKEN_ISSUED, + self._create_handler_with_retry( + self.handlers.handle_oauth_token_issued, + EventType.OAUTH_TOKEN_ISSUED + ) + ) + + def _create_handler_with_retry(self, handler_func, event_type: EventType): + """재시도 로직이 포함된 핸들러 래퍼 생성""" + async def wrapper(event: Event): + event_id = f"{event.event_id}:{event.event_type}" + retry_key = f"retry:{event_id}" + + try: + # 재시도 횟수 확인 + retry_count = 0 + if self.redis: + retry_count_str = await self.redis.get(retry_key) + retry_count = int(retry_count_str) if retry_count_str else 0 + + # 핸들러 실행 + await handler_func(event.dict()) + + # 성공 시 재시도 카운터 삭제 + if self.redis and retry_count > 0: + await self.redis.delete(retry_key) + + self.stats["processed"] += 1 + + except Exception as e: + logger.error(f"Error processing {event_type}: {e}") + self.stats["failed"] += 1 + + # 재시도 처리 + retry_policy = self.retry_policies.get(event_type) + if retry_policy and retry_count < retry_policy.max_retries: + await self._handle_retry(event, retry_count, retry_policy, retry_key) + else: + # 최대 재시도 초과 -> DLQ로 전송 + await self._send_to_dlq(event, str(e), retry_count) + + return wrapper + + async def _handle_retry( + self, + event: Event, + retry_count: int, + retry_policy: RetryPolicy, + retry_key: str + ): + """재시도 처리""" + retry_count += 1 + delay = retry_policy.get_delay(retry_count) + + logger.warning( + f"Retrying event {event.event_id} " + f"(attempt {retry_count}/{retry_policy.max_retries}) " + f"after {delay}s" + ) + + # 재시도 카운터 저장 + if self.redis: + await self.redis.setex( + retry_key, + timedelta(hours=24), # 24시간 후 자동 삭제 + retry_count + ) + + # 지연 후 재처리를 위해 다시 큐에 추가 + # 실제 프로덕션에서는 별도의 재시도 토픽 사용 권장 + self.stats["retried"] += 1 + + # 지연 실행 + await asyncio.sleep(delay) + + # 이벤트 재발행 (재시도 토픽으로) + if hasattr(self, 'retry_producer'): + await self._republish_for_retry(event, retry_count) + + async def _send_to_dlq(self, event: Event, error: str, retry_count: int): + """Dead Letter Queue로 전송""" + if not self.enable_dlq or not self.dlq_producer: + logger.error(f"Failed to process event {event.event_id} after {retry_count} retries") + return + + try: + dlq_message = { + "original_event": event.dict(), + "error": error, + "retry_count": retry_count, + "failed_at": datetime.now().isoformat(), + "consumer_group": self.group_id, + "topic": self.topics[0] if self.topics else None + } + + await self.dlq_producer.send( + self.dlq_topic, + value=dlq_message + ) + + self.stats["dlq_sent"] += 1 + + logger.error( + f"Event {event.event_id} sent to DLQ after {retry_count} retries. " + f"Error: {error}" + ) + + # Redis에 DLQ 전송 기록 + if self.redis: + dlq_key = f"dlq:{event.event_id}" + await self.redis.setex( + dlq_key, + timedelta(days=7), # 7일 보관 + json.dumps({ + "error": error, + "retry_count": retry_count, + "sent_at": datetime.now().isoformat() + }) + ) + + except Exception as e: + logger.critical(f"Failed to send event to DLQ: {e}") + + async def _republish_for_retry(self, event: Event, retry_count: int): + """재시도를 위한 이벤트 재발행""" + # 실제 구현에서는 별도의 재시도 토픽 사용 + # 여기서는 로깅만 수행 + logger.info(f"Would republish event {event.event_id} for retry #{retry_count}") + + async def _report_stats(self): + """통계 리포팅 (1분마다)""" + while True: + await asyncio.sleep(60) + + logger.info( + f"Event Consumer Stats - " + f"Processed: {self.stats['processed']}, " + f"Failed: {self.stats['failed']}, " + f"Retried: {self.stats['retried']}, " + f"DLQ: {self.stats['dlq_sent']}" + ) + + # Redis에 통계 저장 + if self.redis: + stats_key = f"consumer:stats:{self.group_id}" + await self.redis.hset( + stats_key, + mapping={ + **self.stats, + "updated_at": datetime.now().isoformat() + } + ) + + async def get_dlq_messages(self, limit: int = 10) -> List[Dict[str, Any]]: + """DLQ 메시지 조회 (관리 목적)""" + if not self.redis: + return [] + + dlq_keys = await self.redis.keys("dlq:*") + messages = [] + + for key in dlq_keys[:limit]: + data = await self.redis.get(key) + if data: + event_id = key.replace("dlq:", "") + message = json.loads(data) + message["event_id"] = event_id + messages.append(message) + + return messages + + async def retry_dlq_message(self, event_id: str) -> bool: + """DLQ 메시지 수동 재시도""" + # 실제 구현에서는 DLQ에서 메시지를 읽어 재처리 + logger.info(f"Manual retry requested for event: {event_id}") + + if self.redis: + # 재시도 카운터 리셋 + retry_key = f"retry:{event_id}:*" + keys = await self.redis.keys(retry_key) + if keys: + await self.redis.delete(*keys) + + return True + + return False \ No newline at end of file diff --git a/console/backend/event_handlers.py b/console/backend/event_handlers.py new file mode 100644 index 0000000..78895ea --- /dev/null +++ b/console/backend/event_handlers.py @@ -0,0 +1,213 @@ +""" +이벤트 핸들러 모듈 +각 이벤트 타입별 처리 로직 구현 +""" +import logging +from typing import Dict, Any, Optional +from datetime import datetime +import json +import asyncio +from redis import asyncio as aioredis + +logger = logging.getLogger(__name__) + +class EventHandlers: + def __init__(self, redis_client: Optional[aioredis.Redis] = None): + self.redis = redis_client + self.retry_counts: Dict[str, int] = {} + + async def handle_user_created(self, event: Dict[str, Any]): + """사용자 생성 이벤트 처리""" + try: + user_id = event.get('data', {}).get('user_id') + username = event.get('data', {}).get('username') + email = event.get('data', {}).get('email') + + logger.info(f"Processing USER_CREATED: {username} ({user_id})") + + # Redis 캐시 무효화 + if self.redis: + await self.redis.delete(f"user:{user_id}") + await self.redis.delete("users:list") + + # 추가 처리 로직 + # - 환영 이메일 발송 준비 + # - 초기 설정 생성 + # - 분석 데이터 기록 + + await self._publish_notification({ + "type": "user.welcome", + "user_id": user_id, + "email": email, + "username": username, + "timestamp": datetime.now().isoformat() + }) + + logger.info(f"Successfully processed USER_CREATED for {username}") + + except Exception as e: + logger.error(f"Error handling USER_CREATED: {e}") + raise + + async def handle_user_updated(self, event: Dict[str, Any]): + """사용자 업데이트 이벤트 처리""" + try: + user_id = event.get('data', {}).get('user_id') + updated_fields = event.get('data', {}).get('updated_fields', []) + + logger.info(f"Processing USER_UPDATED: {user_id}, fields: {updated_fields}") + + # Redis 캐시 무효화 + if self.redis: + await self.redis.delete(f"user:{user_id}") + await self.redis.delete("users:list") + + # 프로필 사진 변경 시 이미지 캐시도 무효화 + if 'profile_picture' in updated_fields: + await self.redis.delete(f"user:profile_picture:{user_id}") + + # 프로필 완성도 계산 + if 'profile_picture' in updated_fields or 'bio' in updated_fields: + await self._calculate_profile_completeness(user_id) + + logger.info(f"Successfully processed USER_UPDATED for {user_id}") + + except Exception as e: + logger.error(f"Error handling USER_UPDATED: {e}") + raise + + async def handle_user_deleted(self, event: Dict[str, Any]): + """사용자 삭제 이벤트 처리""" + try: + user_id = event.get('data', {}).get('user_id') + username = event.get('data', {}).get('username') + + logger.info(f"Processing USER_DELETED: {username} ({user_id})") + + # Redis에서 모든 관련 데이터 삭제 + if self.redis: + # 사용자 캐시 삭제 + await self.redis.delete(f"user:{user_id}") + await self.redis.delete("users:list") + + # 세션 삭제 + session_keys = await self.redis.keys(f"session:*:{user_id}") + if session_keys: + await self.redis.delete(*session_keys) + + # 프로필 이미지 캐시 삭제 + await self.redis.delete(f"user:profile_picture:{user_id}") + + # 관련 데이터 정리 이벤트 발행 + await self._publish_cleanup_event({ + "user_id": user_id, + "username": username, + "timestamp": datetime.now().isoformat() + }) + + logger.info(f"Successfully processed USER_DELETED for {username}") + + except Exception as e: + logger.error(f"Error handling USER_DELETED: {e}") + raise + + async def handle_oauth_app_created(self, event: Dict[str, Any]): + """OAuth 앱 생성 이벤트 처리""" + try: + app_id = event.get('data', {}).get('app_id') + app_name = event.get('data', {}).get('name') + owner_id = event.get('data', {}).get('owner_id') + + logger.info(f"Processing OAUTH_APP_CREATED: {app_name} ({app_id})") + + # 앱 생성 알림 + await self._publish_notification({ + "type": "oauth.app_created", + "app_id": app_id, + "app_name": app_name, + "owner_id": owner_id, + "timestamp": datetime.now().isoformat() + }) + + logger.info(f"Successfully processed OAUTH_APP_CREATED for {app_name}") + + except Exception as e: + logger.error(f"Error handling OAUTH_APP_CREATED: {e}") + raise + + async def handle_oauth_token_issued(self, event: Dict[str, Any]): + """OAuth 토큰 발급 이벤트 처리""" + try: + client_id = event.get('data', {}).get('client_id') + user_id = event.get('data', {}).get('user_id') + scopes = event.get('data', {}).get('scopes', []) + + logger.info(f"Processing OAUTH_TOKEN_ISSUED: client={client_id}, user={user_id}") + + # 보안 감사 로그 + await self._log_security_event({ + "type": "oauth.token_issued", + "client_id": client_id, + "user_id": user_id, + "scopes": scopes, + "timestamp": datetime.now().isoformat() + }) + + # 사용 통계 업데이트 + if self.redis: + await self.redis.hincrby(f"oauth:stats:{client_id}", "tokens_issued", 1) + await self.redis.sadd(f"oauth:users:{client_id}", user_id) + + logger.info(f"Successfully processed OAUTH_TOKEN_ISSUED") + + except Exception as e: + logger.error(f"Error handling OAUTH_TOKEN_ISSUED: {e}") + raise + + async def _publish_notification(self, notification: Dict[str, Any]): + """알림 이벤트 발행""" + # 향후 Notification 서비스로 이벤트 발행 + logger.debug(f"Publishing notification: {notification}") + + if self.redis: + await self.redis.lpush( + "notifications:queue", + json.dumps(notification) + ) + + async def _publish_cleanup_event(self, cleanup_data: Dict[str, Any]): + """정리 이벤트 발행""" + # 향후 각 서비스로 정리 이벤트 발행 + logger.debug(f"Publishing cleanup event: {cleanup_data}") + + if self.redis: + await self.redis.lpush( + "cleanup:queue", + json.dumps(cleanup_data) + ) + + async def _calculate_profile_completeness(self, user_id: str): + """프로필 완성도 계산""" + # 향후 프로필 완성도 계산 로직 + logger.debug(f"Calculating profile completeness for user: {user_id}") + + if self.redis: + # 임시로 Redis에 저장 + await self.redis.hset( + f"user:stats:{user_id}", + "profile_updated_at", + datetime.now().isoformat() + ) + + async def _log_security_event(self, event_data: Dict[str, Any]): + """보안 이벤트 로깅""" + logger.info(f"Security event: {event_data}") + + if self.redis: + await self.redis.lpush( + "security:audit_log", + json.dumps(event_data) + ) + + # 최근 100개만 유지 + await self.redis.ltrim("security:audit_log", 0, 99) \ No newline at end of file diff --git a/console/backend/main.py b/console/backend/main.py new file mode 100644 index 0000000..2a55a32 --- /dev/null +++ b/console/backend/main.py @@ -0,0 +1,328 @@ +from fastapi import FastAPI, HTTPException, Request, Response, Depends, status +from fastapi.middleware.cors import CORSMiddleware +from fastapi.security import OAuth2PasswordRequestForm +import uvicorn +from datetime import datetime, timedelta +import httpx +import os +import asyncio +import logging +from typing import Any +from contextlib import asynccontextmanager +from auth import ( + Token, UserLogin, UserInDB, + verify_password, get_password_hash, + create_access_token, get_current_user, + ACCESS_TOKEN_EXPIRE_MINUTES +) + +# Import event consumer +from event_consumer import AdvancedEventConsumer + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Global event consumer instance +event_consumer = None + +@asynccontextmanager +async def lifespan(app: FastAPI): + # Startup + global event_consumer + + try: + # Initialize and start event consumer + event_consumer = AdvancedEventConsumer( + topics=["user-events", "oauth-events"], + group_id="console-consumer-group", + redis_url=os.getenv("REDIS_URL", "redis://redis:6379"), + bootstrap_servers=os.getenv("KAFKA_BOOTSTRAP_SERVERS", "kafka:9092"), + enable_dlq=True, + dlq_topic="dead-letter-queue" + ) + + await event_consumer.start() + logger.info("Event consumer started successfully") + + except Exception as e: + logger.error(f"Failed to start event consumer: {e}") + # Continue without event consumer (degraded mode) + event_consumer = None + + yield + + # Shutdown + if event_consumer: + await event_consumer.stop() + logger.info("Event consumer stopped") + +app = FastAPI( + title="Console API Gateway", + description="Central orchestrator for microservices", + version="0.1.0", + lifespan=lifespan +) + +# Service URLs from environment +USERS_SERVICE_URL = os.getenv("USERS_SERVICE_URL", "http://users-backend:8000") +IMAGES_SERVICE_URL = os.getenv("IMAGES_SERVICE_URL", "http://images-backend:8000") + +# CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +@app.get("/") +async def root(): + return { + "message": "Console API Gateway", + "status": "running", + "timestamp": datetime.now().isoformat() + } + +@app.get("/health") +async def health_check(): + return { + "status": "healthy", + "service": "console", + "timestamp": datetime.now().isoformat(), + "event_consumer": "running" if event_consumer else "not running" + } + +# Event Management Endpoints +@app.get("/api/events/stats") +async def get_event_stats(current_user = Depends(get_current_user)): + """Get event consumer statistics""" + if not event_consumer: + raise HTTPException(status_code=503, detail="Event consumer not available") + + return { + "stats": event_consumer.stats, + "timestamp": datetime.now().isoformat() + } + +@app.get("/api/events/dlq") +async def get_dlq_messages( + limit: int = 10, + current_user = Depends(get_current_user) +): + """Get messages from Dead Letter Queue""" + if not event_consumer: + raise HTTPException(status_code=503, detail="Event consumer not available") + + messages = await event_consumer.get_dlq_messages(limit=limit) + return { + "messages": messages, + "count": len(messages), + "timestamp": datetime.now().isoformat() + } + +@app.post("/api/events/dlq/{event_id}/retry") +async def retry_dlq_message( + event_id: str, + current_user = Depends(get_current_user) +): + """Manually retry a message from DLQ""" + if not event_consumer: + raise HTTPException(status_code=503, detail="Event consumer not available") + + success = await event_consumer.retry_dlq_message(event_id) + if not success: + raise HTTPException(status_code=404, detail="Event not found in DLQ") + + return { + "status": "retry_initiated", + "event_id": event_id, + "timestamp": datetime.now().isoformat() + } + +@app.get("/api/events/schemas") +async def get_event_schemas(): + """Get all event schemas documentation""" + from shared.kafka.schema_registry import SchemaRegistry + + schemas = SchemaRegistry.get_all_schemas() + return { + "schemas": schemas, + "version": "1.0.0", + "timestamp": datetime.now().isoformat() + } + +# Authentication endpoints +@app.post("/api/auth/login", response_model=Token) +async def login(form_data: OAuth2PasswordRequestForm = Depends()): + """Login endpoint for authentication""" + # For demo purposes - in production, check against database + # This is temporary until we integrate with Users service + demo_users = { + "admin": { + "username": "admin", + "hashed_password": get_password_hash("admin123"), + "email": "admin@site11.com", + "full_name": "Administrator", + "is_active": True + }, + "user": { + "username": "user", + "hashed_password": get_password_hash("user123"), + "email": "user@site11.com", + "full_name": "Test User", + "is_active": True + } + } + + user = demo_users.get(form_data.username) + if not user or not verify_password(form_data.password, user["hashed_password"]): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Incorrect username or password", + headers={"WWW-Authenticate": "Bearer"}, + ) + + access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES) + access_token = create_access_token( + data={"sub": user["username"]}, expires_delta=access_token_expires + ) + return {"access_token": access_token, "token_type": "bearer"} + +@app.get("/api/auth/me") +async def get_me(current_user = Depends(get_current_user)): + """Get current user information""" + return { + "username": current_user.username, + "email": f"{current_user.username}@site11.com", + "is_active": True + } + +@app.post("/api/auth/logout") +async def logout(current_user = Depends(get_current_user)): + """Logout endpoint""" + # In a real application, you might want to blacklist the token + return {"message": "Successfully logged out"} + +@app.get("/api/status") +async def system_status(): + services_status = {} + + # Check Users service + try: + async with httpx.AsyncClient() as client: + response = await client.get(f"{USERS_SERVICE_URL}/health", timeout=2.0) + services_status["users"] = "online" if response.status_code == 200 else "error" + except: + services_status["users"] = "offline" + + # Check Images service + try: + async with httpx.AsyncClient() as client: + response = await client.get(f"{IMAGES_SERVICE_URL}/health", timeout=2.0) + services_status["images"] = "online" if response.status_code == 200 else "error" + except: + services_status["images"] = "offline" + + # Other services (not yet implemented) + services_status["oauth"] = "pending" + services_status["applications"] = "pending" + services_status["data"] = "pending" + services_status["statistics"] = "pending" + + return { + "console": "online", + "services": services_status, + "timestamp": datetime.now().isoformat() + } + +# Protected endpoint example +@app.get("/api/protected") +async def protected_route(current_user = Depends(get_current_user)): + """Example of a protected route""" + return { + "message": "This is a protected route", + "user": current_user.username + } + +# API Gateway - Route to Images service +@app.api_route("/api/images/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"]) +async def proxy_to_images(path: str, request: Request): + """Proxy requests to Images service (public for image proxy)""" + try: + async with httpx.AsyncClient() as client: + # Build the target URL + url = f"{IMAGES_SERVICE_URL}/api/v1/{path}" + + # Get request body if exists + body = None + if request.method in ["POST", "PUT", "PATCH"]: + body = await request.body() + + # Forward the request + response = await client.request( + method=request.method, + url=url, + headers={ + key: value for key, value in request.headers.items() + if key.lower() not in ["host", "content-length"] + }, + content=body, + params=request.query_params + ) + + # Return the response + return Response( + content=response.content, + status_code=response.status_code, + headers=dict(response.headers) + ) + except httpx.ConnectError: + raise HTTPException(status_code=503, detail="Images service unavailable") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +# API Gateway - Route to Users service +@app.api_route("/api/users/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"]) +async def proxy_to_users(path: str, request: Request, current_user = Depends(get_current_user)): + """Proxy requests to Users service (protected)""" + try: + async with httpx.AsyncClient() as client: + # Build the target URL + url = f"{USERS_SERVICE_URL}/{path}" + + # Get request body if exists + body = None + if request.method in ["POST", "PUT", "PATCH"]: + body = await request.body() + + # Forward the request + response = await client.request( + method=request.method, + url=url, + headers={ + key: value for key, value in request.headers.items() + if key.lower() not in ["host", "content-length"] + }, + content=body, + params=request.query_params + ) + + # Return the response + return Response( + content=response.content, + status_code=response.status_code, + headers=dict(response.headers) + ) + except httpx.ConnectError: + raise HTTPException(status_code=503, detail="Users service unavailable") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +if __name__ == "__main__": + uvicorn.run( + "main:app", + host="0.0.0.0", + port=8000, + reload=True + ) \ No newline at end of file diff --git a/console/backend/requirements.txt b/console/backend/requirements.txt new file mode 100644 index 0000000..37d8b9b --- /dev/null +++ b/console/backend/requirements.txt @@ -0,0 +1,10 @@ +fastapi==0.109.0 +uvicorn[standard]==0.27.0 +python-dotenv==1.0.0 +pydantic==2.5.3 +httpx==0.26.0 +python-jose[cryptography]==3.3.0 +passlib[bcrypt]==1.7.4 +python-multipart==0.0.6 +redis==5.0.1 +aiokafka==0.10.0 \ No newline at end of file diff --git a/console/backend/shared/kafka/__init__.py b/console/backend/shared/kafka/__init__.py new file mode 100644 index 0000000..7c78f53 --- /dev/null +++ b/console/backend/shared/kafka/__init__.py @@ -0,0 +1,6 @@ +from .producer import KafkaProducer +from .consumer import KafkaConsumer +from .events import Event, EventType +from .schema_registry import SchemaRegistry + +__all__ = ['KafkaProducer', 'KafkaConsumer', 'Event', 'EventType', 'SchemaRegistry'] \ No newline at end of file diff --git a/console/backend/shared/kafka/consumer.py b/console/backend/shared/kafka/consumer.py new file mode 100644 index 0000000..746e79b --- /dev/null +++ b/console/backend/shared/kafka/consumer.py @@ -0,0 +1,125 @@ +import json +import asyncio +from typing import Optional, Callable, Dict, Any, List +from aiokafka import AIOKafkaConsumer +from aiokafka.errors import KafkaError +import logging + +from .events import Event, EventType + +logger = logging.getLogger(__name__) + +class KafkaConsumer: + def __init__( + self, + topics: List[str], + group_id: str, + bootstrap_servers: str = "kafka:9092" + ): + self.topics = topics + self.group_id = group_id + self.bootstrap_servers = bootstrap_servers + self._consumer: Optional[AIOKafkaConsumer] = None + self._handlers: Dict[EventType, List[Callable]] = {} + self._running = False + + def register_handler(self, event_type: EventType, handler: Callable): + """이벤트 타입별 핸들러 등록""" + if event_type not in self._handlers: + self._handlers[event_type] = [] + self._handlers[event_type].append(handler) + logger.info(f"Registered handler for {event_type}") + + async def start(self): + """Kafka Consumer 시작""" + try: + self._consumer = AIOKafkaConsumer( + *self.topics, + bootstrap_servers=self.bootstrap_servers, + group_id=self.group_id, + value_deserializer=lambda v: json.loads(v.decode()), + auto_offset_reset='earliest', + enable_auto_commit=True, + auto_commit_interval_ms=1000, + session_timeout_ms=30000, + heartbeat_interval_ms=10000 + ) + await self._consumer.start() + self._running = True + logger.info(f"Kafka Consumer started: {self.topics} (group: {self.group_id})") + + # 메시지 처리 루프 시작 + asyncio.create_task(self._consume_messages()) + + except Exception as e: + logger.error(f"Failed to start Kafka Consumer: {e}") + raise + + async def stop(self): + """Kafka Consumer 종료""" + self._running = False + if self._consumer: + await self._consumer.stop() + logger.info("Kafka Consumer stopped") + + async def _consume_messages(self): + """메시지 소비 루프""" + if not self._consumer: + return + + while self._running: + try: + # 메시지 배치로 가져오기 (최대 100ms 대기) + msg_batch = await self._consumer.getmany(timeout_ms=100) + + for tp, messages in msg_batch.items(): + for msg in messages: + await self._process_message(msg.value) + + except KafkaError as e: + logger.error(f"Kafka error: {e}") + await asyncio.sleep(1) + except Exception as e: + logger.error(f"Error processing messages: {e}") + await asyncio.sleep(1) + + async def _process_message(self, message: Dict[str, Any]): + """개별 메시지 처리""" + try: + # Event 객체로 변환 + event = Event(**message) + + # 등록된 핸들러 실행 + handlers = self._handlers.get(event.event_type, []) + + for handler in handlers: + try: + if asyncio.iscoroutinefunction(handler): + await handler(event) + else: + handler(event) + except Exception as e: + logger.error(f"Handler error for {event.event_type}: {e}") + + if not handlers: + logger.debug(f"No handlers for event type: {event.event_type}") + + except Exception as e: + logger.error(f"Failed to process message: {e}") + + async def consume_one(self, timeout: float = 1.0) -> Optional[Event]: + """단일 메시지 소비 (테스트/디버깅용)""" + if not self._consumer: + return None + + try: + msg = await asyncio.wait_for( + self._consumer.getone(), + timeout=timeout + ) + return Event(**msg.value) + except asyncio.TimeoutError: + return None + except Exception as e: + logger.error(f"Error consuming message: {e}") + return None \ No newline at end of file diff --git a/console/backend/shared/kafka/events.py b/console/backend/shared/kafka/events.py new file mode 100644 index 0000000..2121a2f --- /dev/null +++ b/console/backend/shared/kafka/events.py @@ -0,0 +1,31 @@ +from enum import Enum +from pydantic import BaseModel, Field +from datetime import datetime +from typing import Any, Optional, Dict + +class EventType(str, Enum): + USER_CREATED = "user.created" + USER_UPDATED = "user.updated" + USER_DELETED = "user.deleted" + USER_LOGIN = "user.login" + + IMAGE_UPLOADED = "image.uploaded" + IMAGE_CACHED = "image.cached" + IMAGE_DELETED = "image.deleted" + + TASK_CREATED = "task.created" + TASK_COMPLETED = "task.completed" + TASK_FAILED = "task.failed" + +class Event(BaseModel): + event_type: EventType + timestamp: datetime = Field(default_factory=datetime.now) + service: str + data: Dict[str, Any] + correlation_id: Optional[str] = None + user_id: Optional[str] = None + + class Config: + json_encoders = { + datetime: lambda v: v.isoformat() + } \ No newline at end of file diff --git a/console/backend/shared/kafka/producer.py b/console/backend/shared/kafka/producer.py new file mode 100644 index 0000000..0a33ba0 --- /dev/null +++ b/console/backend/shared/kafka/producer.py @@ -0,0 +1,101 @@ +import json +import asyncio +from typing import Optional, Dict, Any +from aiokafka import AIOKafkaProducer +from aiokafka.errors import KafkaError +import logging + +from .events import Event + +logger = logging.getLogger(__name__) + +class KafkaProducer: + def __init__(self, bootstrap_servers: str = "kafka:9092"): + self.bootstrap_servers = bootstrap_servers + self._producer: Optional[AIOKafkaProducer] = None + + async def start(self): + """Kafka Producer 시작""" + try: + self._producer = AIOKafkaProducer( + bootstrap_servers=self.bootstrap_servers, + value_serializer=lambda v: json.dumps(v).encode(), + compression_type="gzip", + acks='all', + retry_backoff_ms=100 + ) + await self._producer.start() + logger.info(f"Kafka Producer started: {self.bootstrap_servers}") + except Exception as e: + logger.error(f"Failed to start Kafka Producer: {e}") + raise + + async def stop(self): + """Kafka Producer 종료""" + if self._producer: + await self._producer.stop() + logger.info("Kafka Producer stopped") + + async def send_event(self, topic: str, event: Event) -> bool: + """이벤트 전송""" + if not self._producer: + logger.error("Producer not started") + return False + + try: + event_dict = event.dict() + event_dict['timestamp'] = event.timestamp.isoformat() + + await self._producer.send_and_wait( + topic, + value=event_dict, + key=event.correlation_id.encode() if event.correlation_id else None + ) + + logger.info(f"Event sent to {topic}: {event.event_type}") + return True + + except KafkaError as e: + logger.error(f"Failed to send event to {topic}: {e}") + return False + except Exception as e: + logger.error(f"Unexpected error sending event: {e}") + return False + + async def send_batch(self, topic: str, events: list[Event]) -> int: + """여러 이벤트를 배치로 전송""" + if not self._producer: + logger.error("Producer not started") + return 0 + + sent_count = 0 + batch = self._producer.create_batch() + + for event in events: + event_dict = event.dict() + event_dict['timestamp'] = event.timestamp.isoformat() + + metadata = batch.append( + key=event.correlation_id.encode() if event.correlation_id else None, + value=json.dumps(event_dict).encode(), + timestamp=None + ) + + if metadata is None: + # 배치가 가득 찼으면 전송하고 새 배치 생성 + await self._producer.send_batch(batch, topic) + sent_count += len(batch) + batch = self._producer.create_batch() + batch.append( + key=event.correlation_id.encode() if event.correlation_id else None, + value=json.dumps(event_dict).encode(), + timestamp=None + ) + + # 남은 배치 전송 + if batch: + await self._producer.send_batch(batch, topic) + sent_count += len(batch) + + logger.info(f"Sent {sent_count} events to {topic}") + return sent_count \ No newline at end of file diff --git a/console/backend/shared/kafka/schema_registry.py b/console/backend/shared/kafka/schema_registry.py new file mode 100644 index 0000000..676306d --- /dev/null +++ b/console/backend/shared/kafka/schema_registry.py @@ -0,0 +1,333 @@ +""" +이벤트 스키마 레지스트리 +이벤트 스키마 정의 및 버전 관리 +""" +from typing import Dict, Any, Optional, List, Literal +from enum import Enum +from pydantic import BaseModel, Field, field_validator +from datetime import datetime +import json + +class SchemaVersion(str, Enum): + V1 = "1.0.0" + V2 = "2.0.0" + +class EventSchemaBase(BaseModel): + """이벤트 스키마 베이스""" + event_id: str = Field(..., description="고유 이벤트 ID") + event_type: str = Field(..., description="이벤트 타입") + timestamp: datetime = Field(default_factory=datetime.now, description="이벤트 발생 시간") + version: str = Field(default=SchemaVersion.V1, description="스키마 버전") + service: str = Field(..., description="이벤트 발생 서비스") + + class Config: + json_encoders = { + datetime: lambda v: v.isoformat() + } + +# User Events Schemas +class UserCreatedSchema(EventSchemaBase): + """사용자 생성 이벤트 스키마""" + event_type: Literal["USER_CREATED"] = "USER_CREATED" + data: Dict[str, Any] = Field(..., description="이벤트 데이터") + + @field_validator('data') + @classmethod + def validate_data(cls, v): + required_fields = ['user_id', 'username', 'email'] + for field in required_fields: + if field not in v: + raise ValueError(f"Missing required field: {field}") + return v + +class UserUpdatedSchema(EventSchemaBase): + """사용자 업데이트 이벤트 스키마""" + event_type: Literal["USER_UPDATED"] = "USER_UPDATED" + data: Dict[str, Any] = Field(..., description="이벤트 데이터") + + @field_validator('data') + @classmethod + def validate_data(cls, v): + required_fields = ['user_id'] + optional_fields = ['username', 'email', 'full_name', 'profile_picture', + 'bio', 'location', 'website', 'updated_fields'] + + for field in required_fields: + if field not in v: + raise ValueError(f"Missing required field: {field}") + + # updated_fields가 있으면 검증 + if 'updated_fields' in v and not isinstance(v['updated_fields'], list): + raise ValueError("updated_fields must be a list") + + return v + +class UserDeletedSchema(EventSchemaBase): + """사용자 삭제 이벤트 스키마""" + event_type: Literal["USER_DELETED"] = "USER_DELETED" + data: Dict[str, Any] = Field(..., description="이벤트 데이터") + + @field_validator('data') + @classmethod + def validate_data(cls, v): + required_fields = ['user_id', 'username'] + for field in required_fields: + if field not in v: + raise ValueError(f"Missing required field: {field}") + return v + +# OAuth Events Schemas +class OAuthAppCreatedSchema(EventSchemaBase): + """OAuth 앱 생성 이벤트 스키마""" + event_type: Literal["OAUTH_APP_CREATED"] = "OAUTH_APP_CREATED" + data: Dict[str, Any] = Field(..., description="이벤트 데이터") + + @field_validator('data') + @classmethod + def validate_data(cls, v): + required_fields = ['app_id', 'name', 'owner_id', 'client_id'] + for field in required_fields: + if field not in v: + raise ValueError(f"Missing required field: {field}") + return v + +class OAuthTokenIssuedSchema(EventSchemaBase): + """OAuth 토큰 발급 이벤트 스키마""" + event_type: Literal["OAUTH_TOKEN_ISSUED"] = "OAUTH_TOKEN_ISSUED" + data: Dict[str, Any] = Field(..., description="이벤트 데이터") + + @field_validator('data') + @classmethod + def validate_data(cls, v): + required_fields = ['client_id', 'grant_type'] + optional_fields = ['user_id', 'scopes', 'expires_in'] + + for field in required_fields: + if field not in v: + raise ValueError(f"Missing required field: {field}") + + # scopes가 있으면 리스트여야 함 + if 'scopes' in v and not isinstance(v['scopes'], list): + raise ValueError("scopes must be a list") + + return v + +class OAuthTokenRevokedSchema(EventSchemaBase): + """OAuth 토큰 폐기 이벤트 스키마""" + event_type: Literal["OAUTH_TOKEN_REVOKED"] = "OAUTH_TOKEN_REVOKED" + data: Dict[str, Any] = Field(..., description="이벤트 데이터") + + @field_validator('data') + @classmethod + def validate_data(cls, v): + required_fields = ['token_id', 'client_id'] + optional_fields = ['user_id', 'revoked_by'] + + for field in required_fields: + if field not in v: + raise ValueError(f"Missing required field: {field}") + + return v + +# Image Events Schemas +class ImageUploadedSchema(EventSchemaBase): + """이미지 업로드 이벤트 스키마""" + event_type: Literal["IMAGE_UPLOADED"] = "IMAGE_UPLOADED" + data: Dict[str, Any] = Field(..., description="이벤트 데이터") + + @field_validator('data') + @classmethod + def validate_data(cls, v): + required_fields = ['image_id', 'user_id', 'url'] + optional_fields = ['size', 'mime_type', 'width', 'height', 'thumbnail_url'] + + for field in required_fields: + if field not in v: + raise ValueError(f"Missing required field: {field}") + + return v + +class ImageProcessedSchema(EventSchemaBase): + """이미지 처리 완료 이벤트 스키마""" + event_type: Literal["IMAGE_PROCESSED"] = "IMAGE_PROCESSED" + data: Dict[str, Any] = Field(..., description="이벤트 데이터") + + @field_validator('data') + @classmethod + def validate_data(cls, v): + required_fields = ['image_id', 'process_type'] + optional_fields = ['original_url', 'processed_url', 'processing_time_ms'] + + for field in required_fields: + if field not in v: + raise ValueError(f"Missing required field: {field}") + + return v + +class SchemaRegistry: + """스키마 레지스트리""" + + # 스키마 매핑 + SCHEMAS = { + "USER_CREATED": UserCreatedSchema, + "USER_UPDATED": UserUpdatedSchema, + "USER_DELETED": UserDeletedSchema, + "OAUTH_APP_CREATED": OAuthAppCreatedSchema, + "OAUTH_TOKEN_ISSUED": OAuthTokenIssuedSchema, + "OAUTH_TOKEN_REVOKED": OAuthTokenRevokedSchema, + "IMAGE_UPLOADED": ImageUploadedSchema, + "IMAGE_PROCESSED": ImageProcessedSchema, + } + + # 스키마 버전 호환성 매트릭스 + COMPATIBILITY_MATRIX = { + SchemaVersion.V1: [SchemaVersion.V1], + SchemaVersion.V2: [SchemaVersion.V1, SchemaVersion.V2], # V2는 V1과 호환 + } + + @classmethod + def get_schema(cls, event_type: str) -> Optional[type]: + """이벤트 타입에 대한 스키마 반환""" + return cls.SCHEMAS.get(event_type) + + @classmethod + def validate_event(cls, event_data: Dict[str, Any]) -> tuple[bool, Optional[str]]: + """이벤트 데이터 검증""" + try: + event_type = event_data.get('event_type') + if not event_type: + return False, "Missing event_type" + + schema_class = cls.get_schema(event_type) + if not schema_class: + return False, f"Unknown event type: {event_type}" + + # 스키마 검증 + schema_class(**event_data) + return True, None + + except Exception as e: + return False, str(e) + + @classmethod + def is_compatible(cls, from_version: str, to_version: str) -> bool: + """버전 호환성 확인""" + from_v = SchemaVersion(from_version) + to_v = SchemaVersion(to_version) + + compatible_versions = cls.COMPATIBILITY_MATRIX.get(to_v, []) + return from_v in compatible_versions + + @classmethod + def migrate_event( + cls, + event_data: Dict[str, Any], + from_version: str, + to_version: str + ) -> Dict[str, Any]: + """이벤트 데이터 마이그레이션""" + if from_version == to_version: + return event_data + + if not cls.is_compatible(from_version, to_version): + raise ValueError(f"Cannot migrate from {from_version} to {to_version}") + + # 버전별 마이그레이션 로직 + if from_version == SchemaVersion.V1 and to_version == SchemaVersion.V2: + # V1 -> V2 마이그레이션 예시 + event_data['version'] = SchemaVersion.V2 + + # 새로운 필드 추가 (기본값) + if 'metadata' not in event_data: + event_data['metadata'] = {} + + return event_data + + @classmethod + def get_all_schemas(cls) -> Dict[str, Dict[str, Any]]: + """모든 스키마 정보 반환 (문서화용)""" + schemas_info = {} + + for event_type, schema_class in cls.SCHEMAS.items(): + schemas_info[event_type] = { + "description": schema_class.__doc__, + "fields": schema_class.schema(), + "version": SchemaVersion.V1, + "example": cls._generate_example(schema_class) + } + + return schemas_info + + @classmethod + def _generate_example(cls, schema_class: type) -> Dict[str, Any]: + """스키마 예시 생성""" + examples = { + "USER_CREATED": { + "event_id": "evt_123456", + "event_type": "USER_CREATED", + "timestamp": datetime.now().isoformat(), + "version": "1.0.0", + "service": "users", + "data": { + "user_id": "usr_abc123", + "username": "johndoe", + "email": "john@example.com" + } + }, + "USER_UPDATED": { + "event_id": "evt_123457", + "event_type": "USER_UPDATED", + "timestamp": datetime.now().isoformat(), + "version": "1.0.0", + "service": "users", + "data": { + "user_id": "usr_abc123", + "updated_fields": ["profile_picture", "bio"], + "profile_picture": "https://example.com/pic.jpg", + "bio": "Updated bio" + } + }, + "OAUTH_TOKEN_ISSUED": { + "event_id": "evt_123458", + "event_type": "OAUTH_TOKEN_ISSUED", + "timestamp": datetime.now().isoformat(), + "version": "1.0.0", + "service": "oauth", + "data": { + "client_id": "app_xyz789", + "user_id": "usr_abc123", + "grant_type": "authorization_code", + "scopes": ["profile", "email"], + "expires_in": 3600 + } + } + } + + return examples.get(schema_class.__fields__['event_type'].default, {}) + + @classmethod + def export_schemas(cls, format: str = "json") -> str: + """스키마 내보내기""" + schemas = cls.get_all_schemas() + + if format == "json": + return json.dumps(schemas, indent=2, default=str) + elif format == "markdown": + return cls._export_as_markdown(schemas) + else: + raise ValueError(f"Unsupported format: {format}") + + @classmethod + def _export_as_markdown(cls, schemas: Dict[str, Dict[str, Any]]) -> str: + """마크다운 형식으로 내보내기""" + md = "# Event Schema Registry\n\n" + + for event_type, info in schemas.items(): + md += f"## {event_type}\n\n" + md += f"{info['description']}\n\n" + md += f"**Version:** {info['version']}\n\n" + md += "**Example:**\n```json\n" + md += json.dumps(info['example'], indent=2, default=str) + md += "\n```\n\n" + + return md \ No newline at end of file diff --git a/console/frontend/Dockerfile b/console/frontend/Dockerfile new file mode 100644 index 0000000..2d7f71f --- /dev/null +++ b/console/frontend/Dockerfile @@ -0,0 +1,20 @@ +# Build stage +FROM node:18-alpine as builder + +WORKDIR /app + +COPY package.json ./ +RUN npm install + +COPY . . +RUN npm run build + +# Production stage +FROM nginx:alpine + +COPY --from=builder /app/dist /usr/share/nginx/html +COPY nginx.conf /etc/nginx/conf.d/default.conf + +EXPOSE 80 + +CMD ["nginx", "-g", "daemon off;"] \ No newline at end of file diff --git a/console/frontend/index.html b/console/frontend/index.html new file mode 100644 index 0000000..9e817c5 --- /dev/null +++ b/console/frontend/index.html @@ -0,0 +1,13 @@ + + + + + + + Console - Microservices Dashboard + + +
+ + + \ No newline at end of file diff --git a/console/frontend/nginx.conf b/console/frontend/nginx.conf new file mode 100644 index 0000000..9db5c76 --- /dev/null +++ b/console/frontend/nginx.conf @@ -0,0 +1,22 @@ +server { + listen 80; + server_name localhost; + root /usr/share/nginx/html; + index index.html; + + location / { + try_files $uri $uri/ /index.html; + } + + location /api { + proxy_pass http://console-backend:8000; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } +} \ No newline at end of file diff --git a/console/frontend/package.json b/console/frontend/package.json new file mode 100644 index 0000000..8085dec --- /dev/null +++ b/console/frontend/package.json @@ -0,0 +1,33 @@ +{ + "name": "console-frontend", + "private": true, + "version": "0.1.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "tsc && vite build", + "preview": "vite preview" + }, + "dependencies": { + "@emotion/react": "^11.11.3", + "@emotion/styled": "^11.11.0", + "@mui/material": "^5.15.2", + "@mui/icons-material": "^5.15.2", + "axios": "^1.6.3", + "react": "^18.2.0", + "react-dom": "^18.2.0", + "react-router-dom": "^6.21.1" + }, + "devDependencies": { + "@types/react": "^18.2.43", + "@types/react-dom": "^18.2.17", + "@typescript-eslint/eslint-plugin": "^6.14.0", + "@typescript-eslint/parser": "^6.14.0", + "@vitejs/plugin-react": "^4.2.1", + "eslint": "^8.55.0", + "eslint-plugin-react-hooks": "^4.6.0", + "eslint-plugin-react-refresh": "^0.4.5", + "typescript": "^5.2.2", + "vite": "^5.0.8" + } +} \ No newline at end of file diff --git a/console/frontend/src/App.tsx b/console/frontend/src/App.tsx new file mode 100644 index 0000000..7a5285e --- /dev/null +++ b/console/frontend/src/App.tsx @@ -0,0 +1,19 @@ +import { Routes, Route } from 'react-router-dom' +import Layout from './components/Layout' +import Dashboard from './pages/Dashboard' +import Services from './pages/Services' +import Users from './pages/Users' + +function App() { + return ( + + }> + } /> + } /> + } /> + + + ) +} + +export default App \ No newline at end of file diff --git a/console/frontend/src/components/Layout.tsx b/console/frontend/src/components/Layout.tsx new file mode 100644 index 0000000..5c93367 --- /dev/null +++ b/console/frontend/src/components/Layout.tsx @@ -0,0 +1,102 @@ +import { useState } from 'react' +import { Outlet, Link as RouterLink } from 'react-router-dom' +import { + AppBar, + Box, + Drawer, + IconButton, + List, + ListItem, + ListItemButton, + ListItemIcon, + ListItemText, + Toolbar, + Typography, +} from '@mui/material' +import { + Menu as MenuIcon, + Dashboard as DashboardIcon, + Cloud as CloudIcon, + People as PeopleIcon, +} from '@mui/icons-material' + +const drawerWidth = 240 + +const menuItems = [ + { text: 'Dashboard', icon: , path: '/' }, + { text: 'Services', icon: , path: '/services' }, + { text: 'Users', icon: , path: '/users' }, +] + +function Layout() { + const [open, setOpen] = useState(true) + + const handleDrawerToggle = () => { + setOpen(!open) + } + + return ( + + theme.zIndex.drawer + 1 }} + > + + + + + + Microservices Console + + + + + + + + {menuItems.map((item) => ( + + + {item.icon} + + + + ))} + + + + + + + + + ) +} + +export default Layout \ No newline at end of file diff --git a/console/frontend/src/main.tsx b/console/frontend/src/main.tsx new file mode 100644 index 0000000..e59a55f --- /dev/null +++ b/console/frontend/src/main.tsx @@ -0,0 +1,29 @@ +import React from 'react' +import ReactDOM from 'react-dom/client' +import { BrowserRouter } from 'react-router-dom' +import { ThemeProvider, createTheme } from '@mui/material/styles' +import CssBaseline from '@mui/material/CssBaseline' +import App from './App' + +const theme = createTheme({ + palette: { + mode: 'light', + primary: { + main: '#1976d2', + }, + secondary: { + main: '#dc004e', + }, + }, +}) + +ReactDOM.createRoot(document.getElementById('root')!).render( + + + + + + + + , +) \ No newline at end of file diff --git a/console/frontend/src/pages/Dashboard.tsx b/console/frontend/src/pages/Dashboard.tsx new file mode 100644 index 0000000..86eb4e8 --- /dev/null +++ b/console/frontend/src/pages/Dashboard.tsx @@ -0,0 +1,153 @@ +import { useEffect, useState } from 'react' +import { + Grid, + Paper, + Typography, + Box, + Card, + CardContent, + Chip, +} from '@mui/material' +import { + CheckCircle as CheckCircleIcon, + Error as ErrorIcon +} from '@mui/icons-material' +import axios from 'axios' + +interface ServiceStatus { + name: string + status: 'healthy' | 'unhealthy' + endpoint: string + lastChecked: string +} + +function Dashboard() { + const [services, setServices] = useState([]) + const [stats, setStats] = useState({ + totalServices: 0, + healthyServices: 0, + unhealthyServices: 0, + }) + + useEffect(() => { + checkServices() + const interval = setInterval(checkServices, 10000) + return () => clearInterval(interval) + }, []) + + const checkServices = async () => { + const serviceChecks = [ + { name: 'Console Backend', endpoint: '/api/health' }, + { name: 'Users Service', endpoint: '/api/users/health' }, + ] + + const results = await Promise.all( + serviceChecks.map(async (service) => { + try { + await axios.get(service.endpoint) + return { + ...service, + status: 'healthy' as const, + lastChecked: new Date().toLocaleTimeString(), + } + } catch { + return { + ...service, + status: 'unhealthy' as const, + lastChecked: new Date().toLocaleTimeString(), + } + } + }) + ) + + setServices(results) + + const healthy = results.filter(s => s.status === 'healthy').length + setStats({ + totalServices: results.length, + healthyServices: healthy, + unhealthyServices: results.length - healthy, + }) + } + + return ( + + + Dashboard + + + + + + + + Total Services + + + {stats.totalServices} + + + + + + + + + Healthy Services + + + {stats.healthyServices} + + + + + + + + + Unhealthy Services + + + {stats.unhealthyServices} + + + + + + + + + Service Status + + + {services.map((service) => ( + + + + + + {service.name} + + {service.endpoint} + + + Last checked: {service.lastChecked} + + + : } + /> + + + + + ))} + + + + ) +} + +export default Dashboard \ No newline at end of file diff --git a/console/frontend/src/pages/Services.tsx b/console/frontend/src/pages/Services.tsx new file mode 100644 index 0000000..f492f7d --- /dev/null +++ b/console/frontend/src/pages/Services.tsx @@ -0,0 +1,98 @@ +import { + Box, + Typography, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Paper, + Chip, +} from '@mui/material' + +const servicesData = [ + { + id: 1, + name: 'Console', + type: 'API Gateway', + port: 8011, + status: 'Running', + description: 'Central orchestrator and API gateway', + }, + { + id: 2, + name: 'Users', + type: 'Microservice', + port: 8001, + status: 'Running', + description: 'User management service', + }, + { + id: 3, + name: 'MongoDB', + type: 'Database', + port: 27017, + status: 'Running', + description: 'Document database for persistence', + }, + { + id: 4, + name: 'Redis', + type: 'Cache', + port: 6379, + status: 'Running', + description: 'In-memory cache and pub/sub', + }, +] + +function Services() { + return ( + + + Services + + + + + + + Service Name + Type + Port + Status + Description + + + + {servicesData.map((service) => ( + + + {service.name} + + + + + {service.port} + + + + {service.description} + + ))} + +
+
+
+ ) +} + +export default Services \ No newline at end of file diff --git a/console/frontend/src/pages/Users.tsx b/console/frontend/src/pages/Users.tsx new file mode 100644 index 0000000..07c4e4e --- /dev/null +++ b/console/frontend/src/pages/Users.tsx @@ -0,0 +1,208 @@ +import { useState, useEffect } from 'react' +import { + Box, + Typography, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Paper, + Button, + IconButton, + TextField, + Dialog, + DialogTitle, + DialogContent, + DialogActions, + Stack, +} from '@mui/material' +import { + Add as AddIcon, + Edit as EditIcon, + Delete as DeleteIcon, +} from '@mui/icons-material' +import axios from 'axios' + +interface User { + _id: string + username: string + email: string + full_name?: string + created_at: string +} + +function Users() { + const [users, setUsers] = useState([]) + const [openDialog, setOpenDialog] = useState(false) + const [editingUser, setEditingUser] = useState(null) + const [formData, setFormData] = useState({ + username: '', + email: '', + full_name: '', + }) + + useEffect(() => { + fetchUsers() + }, []) + + const fetchUsers = async () => { + try { + const response = await axios.get('/api/users/') + setUsers(response.data) + } catch (error) { + console.error('Failed to fetch users:', error) + } + } + + const handleOpenDialog = (user?: User) => { + if (user) { + setEditingUser(user) + setFormData({ + username: user.username, + email: user.email, + full_name: user.full_name || '', + }) + } else { + setEditingUser(null) + setFormData({ + username: '', + email: '', + full_name: '', + }) + } + setOpenDialog(true) + } + + const handleCloseDialog = () => { + setOpenDialog(false) + setEditingUser(null) + setFormData({ + username: '', + email: '', + full_name: '', + }) + } + + const handleSubmit = async () => { + try { + if (editingUser) { + await axios.put(`/api/users/${editingUser._id}`, formData) + } else { + await axios.post('/api/users/', formData) + } + fetchUsers() + handleCloseDialog() + } catch (error) { + console.error('Failed to save user:', error) + } + } + + const handleDelete = async (id: string) => { + if (confirm('Are you sure you want to delete this user?')) { + try { + await axios.delete(`/api/users/${id}`) + fetchUsers() + } catch (error) { + console.error('Failed to delete user:', error) + } + } + } + + return ( + + + + Users + + + + + + + + + Username + Email + Full Name + Created At + Actions + + + + {users.map((user) => ( + + {user.username} + {user.email} + {user.full_name || '-'} + + {new Date(user.created_at).toLocaleDateString()} + + + handleOpenDialog(user)} + > + + + handleDelete(user._id)} + > + + + + + ))} + +
+
+ + + + {editingUser ? 'Edit User' : 'Add New User'} + + + + setFormData({ ...formData, username: e.target.value })} + fullWidth + required + /> + setFormData({ ...formData, email: e.target.value })} + fullWidth + required + /> + setFormData({ ...formData, full_name: e.target.value })} + fullWidth + /> + + + + + + + +
+ ) +} + +export default Users \ No newline at end of file diff --git a/console/frontend/tsconfig.json b/console/frontend/tsconfig.json new file mode 100644 index 0000000..7a7611e --- /dev/null +++ b/console/frontend/tsconfig.json @@ -0,0 +1,25 @@ +{ + "compilerOptions": { + "target": "ES2020", + "useDefineForClassFields": true, + "lib": ["ES2020", "DOM", "DOM.Iterable"], + "module": "ESNext", + "skipLibCheck": true, + + /* Bundler mode */ + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "resolveJsonModule": true, + "isolatedModules": true, + "noEmit": true, + "jsx": "react-jsx", + + /* Linting */ + "strict": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "noFallthroughCasesInSwitch": true + }, + "include": ["src"], + "references": [{ "path": "./tsconfig.node.json" }] +} \ No newline at end of file diff --git a/console/frontend/tsconfig.node.json b/console/frontend/tsconfig.node.json new file mode 100644 index 0000000..099658c --- /dev/null +++ b/console/frontend/tsconfig.node.json @@ -0,0 +1,10 @@ +{ + "compilerOptions": { + "composite": true, + "skipLibCheck": true, + "module": "ESNext", + "moduleResolution": "bundler", + "allowSyntheticDefaultImports": true + }, + "include": ["vite.config.ts"] +} \ No newline at end of file diff --git a/console/frontend/vite.config.ts b/console/frontend/vite.config.ts new file mode 100644 index 0000000..62f7534 --- /dev/null +++ b/console/frontend/vite.config.ts @@ -0,0 +1,17 @@ +import { defineConfig } from 'vite' +import react from '@vitejs/plugin-react' + +// https://vitejs.dev/config/ +export default defineConfig({ + plugins: [react()], + server: { + host: '0.0.0.0', + port: 3000, + proxy: { + '/api': { + target: 'http://console-backend:8000', + changeOrigin: true + } + } + } +}) \ No newline at end of file diff --git a/docker-compose-scheduler.yml b/docker-compose-scheduler.yml new file mode 100644 index 0000000..a199d52 --- /dev/null +++ b/docker-compose-scheduler.yml @@ -0,0 +1,85 @@ +version: '3.8' + +services: + # 키워드별 전용 스케줄러 (AI 키워드) + pipeline-scheduler-ai: + build: + context: ./services/pipeline + dockerfile: scheduler/Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_scheduler_ai + restart: unless-stopped + depends_on: + - redis + - mongodb + - pipeline-rss-collector + environment: + - REDIS_URL=redis://redis:6379 + - MONGODB_URL=mongodb://mongodb:27017 + - DB_NAME=ai_writer_db + - KEYWORD=AI + - INTERVAL_MINUTES=60 + - PRIORITY=1 + - MAX_ARTICLES=100 + - LOG_LEVEL=INFO + command: python single_keyword_scheduler.py + volumes: + - ./services/pipeline/shared:/app/shared:ro + networks: + - site11_network + + # 키워드별 전용 스케줄러 (경제 키워드) + pipeline-scheduler-economy: + build: + context: ./services/pipeline + dockerfile: scheduler/Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_scheduler_economy + restart: unless-stopped + depends_on: + - redis + - mongodb + - pipeline-rss-collector + environment: + - REDIS_URL=redis://redis:6379 + - MONGODB_URL=mongodb://mongodb:27017 + - DB_NAME=ai_writer_db + - KEYWORD=경제 + - INTERVAL_MINUTES=120 + - PRIORITY=0 + - MAX_ARTICLES=100 + - LOG_LEVEL=INFO + command: python single_keyword_scheduler.py + volumes: + - ./services/pipeline/shared:/app/shared:ro + networks: + - site11_network + + # 키워드별 전용 스케줄러 (테크놀로지 키워드) + pipeline-scheduler-tech: + build: + context: ./services/pipeline + dockerfile: scheduler/Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_scheduler_tech + restart: unless-stopped + depends_on: + - redis + - mongodb + - pipeline-rss-collector + environment: + - REDIS_URL=redis://redis:6379 + - MONGODB_URL=mongodb://mongodb:27017 + - DB_NAME=ai_writer_db + - KEYWORD=테크놀로지 + - INTERVAL_MINUTES=60 + - PRIORITY=1 + - MAX_ARTICLES=100 + - LOG_LEVEL=INFO + command: python single_keyword_scheduler.py + volumes: + - ./services/pipeline/shared:/app/shared:ro + networks: + - site11_network + +networks: + site11_network: + external: true + name: site11_network \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..10ad421 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,685 @@ +services: + console-frontend: + build: + context: ./console/frontend + dockerfile: Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_console_frontend + ports: + - "${CONSOLE_FRONTEND_PORT}:80" + networks: + - site11_network + restart: unless-stopped + depends_on: + - console-backend + + console-backend: + build: + context: ./console/backend + dockerfile: Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_console_backend + ports: + - "${CONSOLE_BACKEND_PORT}:8000" + environment: + - ENV=${ENV} + - PORT=8000 + - USERS_SERVICE_URL=${USERS_SERVICE_URL} + - JWT_SECRET_KEY=${JWT_SECRET_KEY} + - JWT_ALGORITHM=${JWT_ALGORITHM} + - ACCESS_TOKEN_EXPIRE_MINUTES=${ACCESS_TOKEN_EXPIRE_MINUTES} + volumes: + - ./console/backend:/app + networks: + - site11_network + restart: unless-stopped + depends_on: + - users-backend + + users-backend: + build: + context: ./services/users/backend + dockerfile: Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_users_backend + ports: + - "${USERS_BACKEND_PORT}:8000" + environment: + - ENV=${ENV} + - PORT=8000 + - MONGODB_URL=${MONGODB_URL} + - DB_NAME=${USERS_DB_NAME} + - KAFKA_BOOTSTRAP_SERVERS=${KAFKA_BOOTSTRAP_SERVERS} + - KAFKA_GROUP_ID=${KAFKA_GROUP_ID} + volumes: + - ./services/users/backend:/app + - ./shared:/app/shared + networks: + - site11_network + restart: unless-stopped + depends_on: + - mongodb + - kafka + + images-backend: + build: + context: ./services/images/backend + dockerfile: Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_images_backend + ports: + - "${IMAGES_SERVICE_PORT}:8000" + environment: + - ENV=${ENV} + - PORT=8000 + - REDIS_URL=${REDIS_URL} + - MONGODB_URL=${MONGODB_URL} + - CACHE_DIR=/app/cache + - CONVERT_TO_WEBP=true + volumes: + - ./services/images/backend:/app + - ./data/images-cache:/app/cache + networks: + - site11_network + restart: unless-stopped + depends_on: + - redis + - mongodb + + oauth-backend: + build: + context: ./services/oauth/backend + dockerfile: Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_oauth_backend + ports: + - "${OAUTH_SERVICE_PORT}:8000" + environment: + - ENV=${ENV} + - PORT=8000 + - MONGODB_URL=${MONGODB_URL} + - OAUTH_DB_NAME=${OAUTH_DB_NAME} + - JWT_SECRET_KEY=${JWT_SECRET_KEY} + - JWT_ALGORITHM=${JWT_ALGORITHM} + - KAFKA_BOOTSTRAP_SERVERS=${KAFKA_BOOTSTRAP_SERVERS} + - KAFKA_GROUP_ID=${KAFKA_GROUP_ID} + volumes: + - ./services/oauth/backend:/app + - ./shared:/app/shared + networks: + - site11_network + restart: unless-stopped + depends_on: + - mongodb + - kafka + + mongodb: + image: mongo:7.0 + container_name: ${COMPOSE_PROJECT_NAME}_mongodb + environment: + - MONGO_INITDB_DATABASE=${MONGODB_DATABASE} + ports: + - "${MONGODB_PORT}:27017" + volumes: + - ./data/mongodb:/data/db + - ./data/mongodb/configdb:/data/configdb + networks: + - site11_network + restart: unless-stopped + healthcheck: + test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet + interval: 10s + timeout: 5s + retries: 5 + + redis: + image: redis:7-alpine + container_name: ${COMPOSE_PROJECT_NAME}_redis + ports: + - "${REDIS_PORT}:6379" + volumes: + - ./data/redis:/data + networks: + - site11_network + restart: unless-stopped + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 5 + + zookeeper: + image: confluentinc/cp-zookeeper:7.5.0 + container_name: ${COMPOSE_PROJECT_NAME}_zookeeper + environment: + ZOOKEEPER_CLIENT_PORT: 2181 + ZOOKEEPER_TICK_TIME: 2000 + ports: + - "${KAFKA_ZOOKEEPER_PORT}:2181" + volumes: + - ./data/zookeeper/data:/var/lib/zookeeper/data + - ./data/zookeeper/logs:/var/lib/zookeeper/log + networks: + - site11_network + restart: unless-stopped + + kafka: + image: confluentinc/cp-kafka:7.5.0 + container_name: ${COMPOSE_PROJECT_NAME}_kafka + depends_on: + - zookeeper + ports: + - "${KAFKA_PORT}:9092" + - "9101:9101" + environment: + KAFKA_BROKER_ID: 1 + KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:29092,PLAINTEXT_HOST://localhost:9092 + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 + KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 + KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 + KAFKA_JMX_PORT: 9101 + KAFKA_JMX_HOSTNAME: localhost + KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true' + volumes: + - ./data/kafka:/var/lib/kafka/data + networks: + - site11_network + restart: unless-stopped + healthcheck: + test: ["CMD", "kafka-broker-api-versions", "--bootstrap-server", "localhost:9092"] + interval: 10s + timeout: 5s + retries: 5 + + # Notifications Service + notifications-backend: + build: + context: ./services/notifications/backend + dockerfile: Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_notifications_backend + ports: + - "8013:8000" + environment: + - MONGODB_URL=mongodb://mongodb:27017 + - REDIS_URL=redis://redis:6379 + - KAFKA_BOOTSTRAP_SERVERS=kafka:9092 + - SMTP_HOST=${SMTP_HOST:-smtp.gmail.com} + - SMTP_PORT=${SMTP_PORT:-587} + - SMTP_USER=${SMTP_USER:-} + - SMTP_PASSWORD=${SMTP_PASSWORD:-} + - SMS_API_KEY=${SMS_API_KEY:-} + - SMS_API_URL=${SMS_API_URL:-} + - FCM_SERVER_KEY=${FCM_SERVER_KEY:-} + depends_on: + - mongodb + - redis + - kafka + networks: + - site11_network + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + + # MinIO Object Storage + minio: + image: minio/minio:latest + container_name: ${COMPOSE_PROJECT_NAME}_minio + ports: + - "9000:9000" + - "9001:9001" + environment: + - MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin} + - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin} + volumes: + - ./data/minio:/data + command: server /data --console-address ":9001" + networks: + - site11_network + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + + # File Management Service + files-backend: + build: + context: ./services/files/backend + dockerfile: Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_files_backend + ports: + - "8014:8000" + environment: + - ENV=${ENV} + - PORT=8000 + - MONGODB_URL=${MONGODB_URL} + - FILES_DB_NAME=${FILES_DB_NAME:-files_db} + - MINIO_ENDPOINT=minio:9000 + - MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-minioadmin} + - MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-minioadmin} + - MINIO_SECURE=false + volumes: + - ./services/files/backend:/app + - ./data/files-temp:/tmp + networks: + - site11_network + restart: unless-stopped + depends_on: + - mongodb + - minio + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + + # Apache Solr Search Engine + solr: + image: solr:9.4 + container_name: ${COMPOSE_PROJECT_NAME}_solr + ports: + - "8983:8983" + volumes: + - ./data/solr:/var/solr + - ./services/search/solr-config:/opt/solr/server/solr/configsets/site11_config + command: + - solr-precreate + - site11 + - /opt/solr/server/solr/configsets/site11_config + networks: + - site11_network + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8983/solr/site11/admin/ping"] + interval: 30s + timeout: 10s + retries: 3 + + # Search Service + search-backend: + build: + context: ./services/search/backend + dockerfile: Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_search_backend + ports: + - "8015:8000" + environment: + - ENV=${ENV} + - PORT=8000 + - SOLR_URL=http://solr:8983/solr + - MONGODB_URL=${MONGODB_URL} + - KAFKA_BOOTSTRAP_SERVERS=${KAFKA_BOOTSTRAP_SERVERS} + volumes: + - ./services/search/backend:/app + networks: + - site11_network + restart: unless-stopped + depends_on: + - solr + - mongodb + - kafka + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + + # Statistics Service + statistics-backend: + build: + context: ./services/statistics/backend + dockerfile: Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_statistics_backend + ports: + - "8012:8000" + environment: + - REDIS_URL=redis://redis:6379 + - KAFKA_BOOTSTRAP_SERVERS=kafka:9092 + - INFLUXDB_HOST=influxdb + - INFLUXDB_PORT=8086 + - INFLUXDB_DATABASE=statistics + depends_on: + - redis + - kafka + networks: + - site11_network + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + + # Google Search Service + google-search-backend: + build: + context: ./services/google-search/backend + dockerfile: Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_google_search_backend + ports: + - "8016:8000" + environment: + - REDIS_HOST=redis + - REDIS_PORT=6379 + - REDIS_DB=2 + - GOOGLE_API_KEY=AIzaSyBakoCsDP_oF5V4oq_eEKs4eQb-ekqxnRM + - GOOGLE_SEARCH_ENGINE_ID=35bfbdb7b6f244569 + - SERPAPI_KEY=${SERPAPI_KEY:-} + - DEFAULT_LANGUAGE=ko + - DEFAULT_COUNTRY=kr + - CACHE_TTL=3600 + depends_on: + - redis + networks: + - site11_network + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + + # RSS Feed Service + rss-feed-backend: + build: + context: ./services/rss-feed/backend + dockerfile: Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_rss_feed_backend + ports: + - "8017:8000" + environment: + - MONGODB_URL=mongodb://mongodb:27017 + - DB_NAME=rss_feed_db + - REDIS_URL=redis://redis:6379 + - REDIS_DB=3 + - DEFAULT_UPDATE_INTERVAL=900 + - MAX_ENTRIES_PER_FEED=100 + - ENABLE_SCHEDULER=true + - SCHEDULER_TIMEZONE=Asia/Seoul + depends_on: + - mongodb + - redis + networks: + - site11_network + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + + # News Aggregator Service + news-aggregator-backend: + build: + context: ./services/news-aggregator/backend + dockerfile: Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_news_aggregator_backend + ports: + - "8018:8000" + environment: + - RSS_SERVICE_URL=http://rss-feed-backend:8000 + - GOOGLE_SEARCH_SERVICE_URL=http://google-search-backend:8000 + depends_on: + - rss-feed-backend + - google-search-backend + networks: + - site11_network + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + + # AI Writer Service + ai-writer-backend: + build: + context: ./services/ai-writer/backend + dockerfile: Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_ai_writer_backend + ports: + - "8019:8000" + environment: + - NEWS_AGGREGATOR_URL=http://news-aggregator-backend:8000 + - CLAUDE_API_KEY=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 + - MONGODB_URL=mongodb://mongodb:27017 + - DB_NAME=ai_writer_db + - REDIS_URL=redis://redis:6379 + depends_on: + - mongodb + - redis + - news-aggregator-backend + networks: + - site11_network + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + + # AI Writer Worker Service + ai-writer-worker: + build: + context: ./services/ai-writer + dockerfile: worker/Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_ai_writer_worker + environment: + - CLAUDE_API_KEY=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 + - MONGODB_URL=mongodb://mongodb:27017 + - DB_NAME=ai_writer_db + - REDIS_URL=redis://redis:6379 + - WORKER_COUNT=3 + depends_on: + - mongodb + - redis + - ai-writer-backend + networks: + - site11_network + restart: unless-stopped + + # ============ Pipeline Services ============ + # Pipeline Multi-threaded Scheduler Service + pipeline-scheduler: + build: + context: ./services/pipeline + dockerfile: scheduler/Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_pipeline_scheduler + restart: unless-stopped + depends_on: + - redis + - mongodb + environment: + - REDIS_URL=redis://redis:6379 + - MONGODB_URL=mongodb://mongodb:27017 + - DB_NAME=ai_writer_db + - LOG_LEVEL=INFO + command: python multi_thread_scheduler.py + volumes: + - ./services/pipeline/shared:/app/shared:ro + networks: + - site11_network + + # Keyword Manager API Service + keyword-manager: + build: + context: ./services/pipeline + dockerfile: scheduler/Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_keyword_manager + restart: unless-stopped + depends_on: + - mongodb + environment: + - MONGODB_URL=mongodb://mongodb:27017 + - DB_NAME=ai_writer_db + - API_PORT=8100 + command: python keyword_manager.py + ports: + - "8100:8100" + volumes: + - ./services/pipeline/shared:/app/shared:ro + networks: + - site11_network + + # Pipeline RSS Collector Worker + pipeline-rss-collector: + build: + context: ./services/pipeline + dockerfile: rss-collector/Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_pipeline_rss_collector + restart: unless-stopped + depends_on: + - redis + environment: + - REDIS_URL=redis://redis:6379 + - LOG_LEVEL=INFO + - RSS_ENQUEUE_DELAY=1.0 + volumes: + - ./services/pipeline/shared:/app/shared:ro + networks: + - site11_network + + # Pipeline Google Search Worker + pipeline-google-search: + build: + context: ./services/pipeline + dockerfile: google-search/Dockerfile + restart: unless-stopped + depends_on: + - redis + environment: + - REDIS_URL=redis://redis:6379 + - GOOGLE_API_KEY=AIzaSyBakoCsDP_oF5V4oq_eEKs4eQb-ekqxnRM + - GOOGLE_SEARCH_ENGINE_ID=35bfbdb7b6f244569 + - LOG_LEVEL=INFO + volumes: + - ./services/pipeline/shared:/app/shared:ro + networks: + - site11_network + + # Pipeline AI Article Generator Worker + pipeline-ai-article-generator: + build: + context: ./services/pipeline + dockerfile: ai-article-generator/Dockerfile + restart: unless-stopped + depends_on: + - redis + - mongodb + environment: + - REDIS_URL=redis://redis:6379 + - MONGODB_URL=mongodb://mongodb:27017 + - DB_NAME=ai_writer_db + - CLAUDE_API_KEY=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 + - LOG_LEVEL=INFO + volumes: + - ./services/pipeline/shared:/app/shared:ro + networks: + - site11_network + + + # Pipeline Monitor (optional dashboard) + pipeline-monitor: + build: + context: ./services/pipeline + dockerfile: monitor/Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_pipeline_monitor + restart: unless-stopped + depends_on: + - redis + - mongodb + ports: + - "8100:8000" + environment: + - REDIS_URL=redis://redis:6379 + - MONGODB_URL=mongodb://mongodb:27017 + - DB_NAME=ai_writer_db + - LOG_LEVEL=INFO + volumes: + - ./services/pipeline/shared:/app/shared:ro + networks: + - site11_network + + # Pipeline Translator + pipeline-translator: + build: + context: ./services/pipeline + dockerfile: translator/Dockerfile + restart: unless-stopped + depends_on: + - redis + - mongodb + environment: + - REDIS_URL=redis://redis:6379 + - MONGODB_URL=mongodb://mongodb:27017 + - DB_NAME=ai_writer_db + - DEEPL_API_KEY=3abbc796-2515-44a8-972d-22dcf27ab54a + - LOG_LEVEL=INFO + volumes: + - ./services/pipeline/shared:/app/shared:ro + networks: + - site11_network + + # Pipeline Language Sync Service + pipeline-language-sync: + build: + context: ./services/pipeline + dockerfile: translator/Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_pipeline_language_sync + restart: unless-stopped + depends_on: + - mongodb + environment: + - MONGODB_URL=mongodb://mongodb:27017 + - DB_NAME=ai_writer_db + - DEEPL_API_KEY=3abbc796-2515-44a8-972d-22dcf27ab54a + - LOG_LEVEL=INFO + command: ["python", "language_sync.py"] + volumes: + - ./services/pipeline/shared:/app/shared:ro + - ./services/pipeline/config:/app/config:ro + networks: + - site11_network + + # Pipeline Image Generator + pipeline-image-generator: + build: + context: ./services/pipeline + dockerfile: image-generator/Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}_pipeline_image_generator + restart: unless-stopped + depends_on: + - redis + - mongodb + env_file: + - ./services/pipeline/.env + environment: + - REDIS_URL=redis://redis:6379 + - MONGODB_URL=mongodb://mongodb:27017 + - DB_NAME=ai_writer_db + - LOG_LEVEL=INFO + volumes: + - ./services/pipeline/shared:/app/shared:ro + networks: + - site11_network + + +networks: + site11_network: + driver: bridge + name: site11_network + +# Named volumes are replaced with bind mounts in ./data/ directory +# volumes: +# mongodb_data: +# mongodb_config: +# redis_data: +# images_cache: +# zookeeper_data: +# zookeeper_logs: +# kafka_data: +# minio_data: +# files_temp: +# solr_data: \ No newline at end of file diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..e152c63 --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,182 @@ +# Site11 Microservices Architecture + +## 시스템 아키텍처 개요 + +### 메시징 및 데이터 처리 시스템 + +#### 1. **Apache Kafka** - 통합 메시징 플랫폼 +- **역할**: 이벤트 스트리밍 + 작업 큐 + 메시지 버스 +- **사용 사례**: + - 서비스 간 이벤트 발행/구독 + - 비동기 작업 큐 (Celery 대체) + - 사용자 활동 로그 스트리밍 + - 실시간 데이터 파이프라인 + - 이벤트 소싱 패턴 구현 + - CQRS (Command Query Responsibility Segregation) + - 백그라운드 작업 처리 + +#### 2. **Redis** - 인메모리 데이터 스토어 +- **역할**: 캐싱 및 세션 관리 전용 +- **사용 사례**: + - API 응답 캐싱 + - 사용자 세션 저장 + - Rate limiting + - 실시간 리더보드/카운터 + - 임시 데이터 저장 + +#### 3. **MongoDB** - Document Database +- **역할**: 주요 데이터 영속성 +- **사용 사례**: + - 서비스별 도메인 데이터 + - 유연한 스키마 관리 + - 이벤트 저장소 + +## 서비스 통신 패턴 + +### 동기 통신 (REST API) +``` +Client → Nginx → Console (API Gateway) → Microservice +``` +- 즉각적인 응답이 필요한 경우 +- CRUD 작업 +- 실시간 데이터 조회 + +### 비동기 통신 (Kafka Events) +``` +Service A → Kafka Topic → Service B, C, D +``` +- 서비스 간 느슨한 결합 +- 이벤트 기반 아키텍처 +- 확장 가능한 처리 + +### 캐싱 전략 (Redis) +``` +Request → Check Redis Cache → Hit? Return : Fetch from DB → Store in Redis → Return +``` +- 응답 시간 개선 +- 데이터베이스 부하 감소 +- 세션 관리 + +## 이벤트 플로우 예시 + +### 사용자 등록 플로우 +1. **API Request**: Client → Console → Users Service +2. **User Created Event**: Users Service → Kafka +3. **Event Consumers**: + - Statistics Service: 사용자 통계 업데이트 + - Email Service: 환영 이메일 발송 + - Analytics Service: 가입 분석 +4. **Cache Update**: Redis에 사용자 정보 캐싱 + +### 이미지 업로드 플로우 +1. **Upload Request**: Client → Console → Images Service +2. **Image Uploaded Event**: Images Service → Kafka +3. **Event Processing**: + - Thumbnail Service: 썸네일 생성 + - ML Service: 이미지 분석 + - Statistics Service: 업로드 통계 +4. **Job Queue**: Redis/Celery로 백그라운드 처리 + +## Kafka Topics 구조 (예정) + +### Event Topics (이벤트 스트리밍) +``` +# User Domain +user.created +user.updated +user.deleted +user.login + +# Image Domain +image.uploaded +image.processed +image.deleted + +# Application Domain +app.registered +app.updated +app.deployed + +# System Events +service.health +service.error +audit.log +``` + +### Task Queue Topics (작업 큐) +``` +# Background Jobs +tasks.email.send +tasks.image.resize +tasks.report.generate +tasks.data.export +tasks.notification.push + +# Scheduled Jobs +tasks.cleanup.expired +tasks.backup.database +tasks.analytics.aggregate +``` + +## Redis 사용 패턴 + +### 1. 캐싱 계층 +- Key: `cache:users:{user_id}` +- TTL: 3600초 +- 패턴: Cache-Aside + +### 2. 세션 관리 +- Key: `session:{token}` +- TTL: 1800초 +- 데이터: 사용자 정보, 권한 + +### 3. Rate Limiting +- Key: `rate_limit:{user_id}:{endpoint}` +- Window: Sliding window +- Limit: 100 requests/minute + +### 4. 작업 큐 (Celery) +- Queue: `celery:tasks` +- Priority Queue 지원 +- Dead Letter Queue + +## 구현 로드맵 + +### Phase 1 (현재) +- ✅ 기본 서비스 구조 +- ✅ MongoDB 연동 +- ✅ Redis 설치 +- 🔄 JWT 인증 + +### Phase 2 (Step 6-7) +- Kafka 클러스터 설정 +- 기본 Producer/Consumer 구현 +- Event Schema 정의 +- Redis 캐싱 전략 구현 + +### Phase 3 (Step 8+) +- Event Sourcing 패턴 +- CQRS 구현 +- Saga 패턴 (분산 트랜잭션) +- 모니터링 대시보드 + +## 기술 스택 + +### 메시징 & 스트리밍 +- **Kafka**: Event streaming +- **Redis**: Caching, Queue, Pub/Sub +- **Confluent Schema Registry**: Schema 관리 (향후) + +### 백엔드 +- **FastAPI**: REST API +- **Celery**: 비동기 작업 처리 +- **kafka-python**: Kafka 클라이언트 + +### 데이터베이스 +- **MongoDB**: Document store +- **Redis**: In-memory cache + +### 모니터링 (향후) +- **Kafka Manager**: Kafka 클러스터 관리 +- **RedisInsight**: Redis 모니터링 +- **Prometheus + Grafana**: 메트릭 수집/시각화 \ No newline at end of file diff --git a/docs/DATA_PERSISTENCE.md b/docs/DATA_PERSISTENCE.md new file mode 100644 index 0000000..eccb185 --- /dev/null +++ b/docs/DATA_PERSISTENCE.md @@ -0,0 +1,140 @@ +# Data Persistence Configuration + +## Overview +All data services are configured to use bind mounts to local directories for data persistence. This ensures data survives container restarts and rebuilds. + +## Directory Structure +``` +data/ +├── mongodb/ # MongoDB database files +├── redis/ # Redis persistence files +├── kafka/ # Kafka log data +├── zookeeper/ # Zookeeper data and logs +│ ├── data/ +│ └── logs/ +├── minio/ # MinIO object storage +├── solr/ # Solr search index +├── files-temp/ # Temporary file storage +└── images-cache/ # Image processing cache +``` + +## Volume Mappings + +### MongoDB +- `./data/mongodb:/data/db` - Database files +- `./data/mongodb/configdb:/data/configdb` - Configuration database + +### Redis +- `./data/redis:/data` - RDB snapshots and AOF logs + +### Kafka +- `./data/kafka:/var/lib/kafka/data` - Message logs + +### Zookeeper +- `./data/zookeeper/data:/var/lib/zookeeper/data` - Coordination data +- `./data/zookeeper/logs:/var/lib/zookeeper/log` - Transaction logs + +### MinIO +- `./data/minio:/data` - Object storage buckets + +### Solr +- `./data/solr:/var/solr` - Search index and configuration + +### Application Caches +- `./data/files-temp:/tmp` - Temporary file processing +- `./data/images-cache:/app/cache` - Processed image cache + +## Backup and Restore + +### Backup All Data +```bash +# Stop services +docker-compose down + +# Create backup +tar -czf backup-$(date +%Y%m%d).tar.gz data/ + +# Restart services +docker-compose up -d +``` + +### Restore Data +```bash +# Stop services +docker-compose down + +# Extract backup +tar -xzf backup-YYYYMMDD.tar.gz + +# Restart services +docker-compose up -d +``` + +### Individual Service Backups + +#### MongoDB Backup +```bash +docker exec site11_mongodb mongodump --out /data/db/backup +tar -czf mongodb-backup.tar.gz data/mongodb/backup/ +``` + +#### Redis Backup +```bash +docker exec site11_redis redis-cli BGSAVE +# Wait for completion +cp data/redis/dump.rdb redis-backup-$(date +%Y%m%d).rdb +``` + +## Permissions +Ensure proper permissions for data directories: +```bash +# Set appropriate permissions +chmod -R 755 data/ +``` + +## Disk Space Monitoring +Monitor disk usage regularly: +```bash +# Check data directory size +du -sh data/* + +# Check individual services +du -sh data/mongodb +du -sh data/minio +du -sh data/kafka +``` + +## Clean Up Old Data + +### Clear Kafka Logs (older than 7 days) +```bash +docker exec site11_kafka kafka-log-dirs.sh --describe --bootstrap-server localhost:9092 +``` + +### Clear Image Cache +```bash +rm -rf data/images-cache/* +``` + +### Clear Temporary Files +```bash +rm -rf data/files-temp/* +``` + +## Migration from Docker Volumes +If migrating from named Docker volumes to bind mounts: + +1. Export data from Docker volumes: +```bash +docker run --rm -v site11_mongodb_data:/source -v $(pwd)/data/mongodb:/dest alpine cp -av /source/. /dest/ +``` + +2. Update docker-compose.yml (already done) + +3. Restart services with new configuration + +## Notes +- The `data/` directory is excluded from git via .gitignore +- Ensure sufficient disk space for data growth +- Consider setting up automated backups for production +- Monitor disk I/O performance for database services \ No newline at end of file diff --git a/docs/PIPELINE_SCHEDULER_GUIDE.md b/docs/PIPELINE_SCHEDULER_GUIDE.md new file mode 100644 index 0000000..17ffa0e --- /dev/null +++ b/docs/PIPELINE_SCHEDULER_GUIDE.md @@ -0,0 +1,238 @@ +# Pipeline Scheduler Guide + +## 개요 +Pipeline Scheduler는 등록된 키워드를 주기적으로 실행하여 자동으로 뉴스를 수집하고 AI 기사를 생성하는 시스템입니다. + +## 아키텍처 + +### 1. 구성 요소 + +#### 1.1 Multi-Thread Scheduler (pipeline-scheduler) +- **역할**: 키워드별 스레드 관리 및 주기적 실행 +- **특징**: + - 단일 Docker 컨테이너에서 여러 스레드 동시 실행 + - 각 키워드당 하나의 독립 스레드 + - 30초마다 새 키워드 체크 및 스레드 관리 +- **위치**: `services/pipeline/scheduler/multi_thread_scheduler.py` + +#### 1.2 Keyword Manager API (keyword-manager) +- **역할**: 키워드 CRUD 및 스레드 모니터링 +- **포트**: 8100 +- **주요 엔드포인트**: + - `GET /threads/status` - 모든 스레드 상태 조회 + - `GET /keywords` - 모든 키워드 목록 + - `POST /keywords` - 새 키워드 추가 + - `PUT /keywords/{keyword}` - 키워드 수정 + - `DELETE /keywords/{keyword}` - 키워드 삭제 + - `POST /keywords/{keyword}/activate` - 키워드 활성화 + - `POST /keywords/{keyword}/deactivate` - 키워드 비활성화 + - `POST /keywords/{keyword}/trigger` - 즉시 실행 +- **위치**: `services/pipeline/scheduler/keyword_manager.py` + +### 2. 데이터 모델 + +```python +class Keyword: + keyword_id: str # UUID + keyword: str # 검색 키워드 + interval_minutes: int # 실행 주기 (분) + is_active: bool # 활성 상태 + priority: int # 우선순위 (높을수록 우선) + rss_feeds: List[str] # RSS 피드 URL 목록 + max_articles_per_run: int # 실행당 최대 기사 수 + last_run: datetime # 마지막 실행 시간 + next_run: datetime # 다음 실행 예정 시간 +``` + +## 사용 방법 + +### 1. 서비스 시작 + +```bash +# 스케줄러와 매니저 시작 +docker-compose up -d pipeline-scheduler keyword-manager + +# 로그 확인 +docker-compose logs -f pipeline-scheduler +``` + +### 2. 키워드 관리 + +#### 2.1 키워드 추가 +```bash +curl -X POST http://localhost:8100/keywords \ + -H "Content-Type: application/json" \ + -d '{ + "keyword": "딥러닝", + "interval_minutes": 60, + "priority": 1, + "rss_feeds": [], + "max_articles_per_run": 100, + "is_active": true + }' +``` + +#### 2.2 키워드 수정 +```bash +curl -X PUT http://localhost:8100/keywords/딥러닝 \ + -H "Content-Type: application/json" \ + -d '{ + "interval_minutes": 30, + "priority": 2 + }' +``` + +#### 2.3 키워드 활성화/비활성화 +```bash +# 활성화 +curl -X POST http://localhost:8100/keywords/딥러닝/activate + +# 비활성화 +curl -X POST http://localhost:8100/keywords/딥러닝/deactivate +``` + +#### 2.4 즉시 실행 +```bash +curl -X POST http://localhost:8100/keywords/딥러닝/trigger +``` + +#### 2.5 키워드 삭제 +```bash +curl -X DELETE http://localhost:8100/keywords/딥러닝 +``` + +### 3. 모니터링 + +#### 3.1 스레드 상태 확인 +```bash +curl http://localhost:8100/threads/status | python3 -m json.tool +``` + +응답 예시: +```json +{ + "total_threads": 4, + "active_threads": 4, + "threads": [ + { + "keyword": "블록체인", + "keyword_id": "5c7ac9a9-c56f-4878-94ec-adb13f105c8a", + "is_active": true, + "interval_minutes": 30, + "priority": 2, + "last_run": "2025-09-15T08:05:58.807000", + "next_run": "2025-09-15T08:35:58.807000", + "thread_status": "active", + "minutes_until_next_run": 25.3 + } + ] +} +``` + +#### 3.2 키워드 목록 조회 +```bash +curl http://localhost:8100/keywords | python3 -m json.tool +``` + +## 작동 방식 + +### 1. 실행 흐름 + +1. **키워드 스레드 시작** + - 스케줄러 시작 시 활성 키워드 로드 + - 각 키워드별 독립 스레드 생성 + +2. **주기적 실행** + - 각 스레드는 설정된 주기마다 실행 + - 실행 시 PipelineJob 생성 후 Redis 큐에 추가 + - RSS 수집 → Google 검색 → AI 기사 생성 → 번역 파이프라인 자동 진행 + +3. **동적 스레드 관리** + - 30초마다 새 키워드 확인 + - 새 키워드 추가 시 자동으로 스레드 생성 + - 비활성화/삭제 시 스레드 자동 중지 + +### 2. 우선순위 처리 + +- 높은 우선순위(priority) 키워드가 먼저 처리 +- Redis 큐에서 우선순위별 정렬 + +### 3. 오류 처리 + +- 각 스레드는 독립적으로 오류 처리 +- 오류 발생 시 1분 대기 후 재시도 +- 스레드별 error_count, last_error 추적 + +## 현재 설정된 키워드 + +| 키워드 | 실행 주기 | 우선순위 | 상태 | +|--------|-----------|----------|------| +| 블록체인 | 30분 | 2 | 활성 | +| AI | 60분 | 1 | 활성 | +| 테크놀로지 | 60분 | 1 | 활성 | +| 경제 | 60분 | 0 | 활성 | + +## 주의사항 + +1. **스레드 관리** + - 키워드 추가/삭제는 30초 이내 자동 반영 + - 스레드 상태는 keyword-manager API로 실시간 확인 가능 + +2. **실행 주기** + - 최소 실행 주기: 제한 없음 (권장: 30분 이상) + - interval_minutes 변경 시 다음 실행 시간 자동 재계산 + +3. **중복 방지** + - 동일 키워드 중복 등록 불가 + - RSS 수집 시 중복 URL 자동 필터링 + +## 트러블슈팅 + +### 스레드가 시작되지 않을 때 +```bash +# 스케줄러 재시작 +docker-compose restart pipeline-scheduler + +# 로그 확인 +docker-compose logs --tail=50 pipeline-scheduler +``` + +### 키워드가 실행되지 않을 때 +```bash +# 키워드 상태 확인 +curl http://localhost:8100/keywords/키워드명 | python3 -m json.tool + +# 즉시 실행 트리거 +curl -X POST http://localhost:8100/keywords/키워드명/trigger +``` + +### MongoDB 연결 오류 +```bash +# MongoDB 상태 확인 +docker-compose ps mongodb + +# MongoDB 재시작 +docker-compose restart mongodb +``` + +## 파일 구조 + +``` +services/pipeline/scheduler/ +├── multi_thread_scheduler.py # 멀티스레드 스케줄러 +├── keyword_manager.py # 키워드 관리 API +├── single_keyword_scheduler.py # (deprecated) 단일 키워드 스케줄러 +├── requirements.txt # Python 의존성 +└── Dockerfile # Docker 이미지 정의 + +services/pipeline/shared/ +└── models.py # Keyword, PipelineJob 모델 정의 +``` + +## 향후 개선사항 + +1. **웹 대시보드**: 실시간 모니터링 UI +2. **알림 시스템**: 오류 발생 시 이메일/Slack 알림 +3. **통계 기능**: 키워드별 수집 통계 및 분석 +4. **스케줄 템플릿**: 자주 사용하는 설정 저장/불러오기 +5. **백업/복구**: 키워드 설정 백업 및 복구 기능 \ No newline at end of file diff --git a/docs/PLAN.md b/docs/PLAN.md new file mode 100644 index 0000000..16a0695 --- /dev/null +++ b/docs/PLAN.md @@ -0,0 +1,401 @@ +# Microservices Architecture Implementation Plan + +## Project Overview +Build a microservices-based platform with a central Console service acting as an orchestrator and API Gateway, managing multiple domain-specific services. + +## Architecture Decision +**Selected Pattern**: API Gateway Pattern with Console as the central orchestrator +- Console handles authentication, routing, and monitoring +- Each microservice focuses on domain-specific logic +- Services communicate via REST APIs and Redis pub/sub + +## Progressive Implementation Strategy + +### Step-by-Step Approach +큰 그림을 먼저 구성하고, 핵심 기능부터 점진적으로 확장하는 전략 + +## Implementation Phases + +### Step 1: Minimal Foundation (Day 1-2) +**목표**: 가장 기본적인 구조 확립 +``` +site11/ +├── docker-compose.yml # 최소 구성 (Console만) +├── console/ +│ └── backend/ +│ └── main.py # Hello World API +└── README.md +``` + +**Tasks**: +- [ ] 간단한 docker-compose.yml 생성 +- [ ] Console FastAPI "Hello World" +- [ ] 기본 health check endpoint +- [ ] Docker로 실행 확인 + +### Step 2: Add First Service (Day 3-4) +**목표**: Console과 하나의 서비스 연결 +``` +site11/ +├── docker-compose.yml +├── console/ +│ └── backend/ +│ └── main.py # Gateway 역할 추가 +└── services/ + └── users/ + └── backend/ + └── main.py # Users 서비스 +``` + +**Tasks**: +- [ ] Users 서비스 생성 +- [ ] Console에서 Users로 라우팅 +- [ ] 서비스 간 통신 테스트 +- [ ] 간단한 CRUD API + +### Step 3: Database Integration (Day 5-6) +**목표**: MongoDB 연결 및 기본 데이터 저장 + +**Tasks**: +- [ ] MongoDB 컨테이너 추가 +- [ ] Console과 Users 서비스 DB 연결 +- [ ] 기본 데이터 모델 생성 +- [ ] 실제 데이터 CRUD 테스트 + +### Step 4: Frontend Skeleton (Week 2) +**목표**: 최소한의 UI 구성 + +**Tasks**: +- [ ] Console Frontend 생성 (React + Vite) +- [ ] 기본 레이아웃 +- [ ] 서비스 상태 표시 +- [ ] Nginx 설정 + +### Step 5: Authentication Basic (Week 2) +**목표**: 간단한 인증 시스템 + +**Tasks**: +- [ ] JWT 토큰 생성 +- [ ] Login endpoint +- [ ] Token 검증 미들웨어 +- [ ] Protected routes + +### Step 6: Second Service (Week 3) +**목표**: 두 번째 서비스 추가로 패턴 확립 + +**Tasks**: +- [ ] OAuth 또는 Images 서비스 추가 +- [ ] Console 라우팅 확장 +- [ ] 서비스 간 통신 패턴 확립 +- [ ] Service registry 기초 + +### Step 7: Service Communication (Week 3) +**목표**: 서비스 간 통신 패턴 구현 + +**Tasks**: +- [ ] Redis pub/sub 설정 +- [ ] Event 기반 통신 예제 +- [ ] Service discovery 구현 +- [ ] Health check 자동화 + +### Step 8: Gradual Service Addition (Week 4-5) +**목표**: 나머지 서비스 점진적 추가 + +**각 서비스별로**: +- [ ] 기본 구조 생성 +- [ ] Console 연결 +- [ ] 핵심 API 구현 +- [ ] Frontend 컴포넌트 추가 + +## 현재 시작점 (NOW) + +### 즉시 시작할 수 있는 첫 걸음 + +#### 1. 최소 Docker 환경 구성 +```bash +# 실행 명령 +docker-compose up -d console +curl http://localhost:8000/health +``` + +#### 2. Console 서비스만으로 시작 +- Health endpoint +- 간단한 API Gateway 구조 +- 서비스 등록 준비 + +#### 3. 하나씩 추가하며 테스트 +- Users 서비스 하나만 추가 +- 통신 확인 +- 패턴 확립 후 확장 + +### 핵심 원칙 +1. **작동하는 코드 우선** - 완벽한 설계보다 동작하는 MVP +2. **점진적 복잡도** - 간단한 것부터 시작해서 기능 추가 +3. **빠른 피드백** - 각 단계마다 실행하고 확인 +4. **패턴 확립** - 첫 서비스로 패턴을 만들고 복제 + +--- + +## 상세 구현 계획 (참고용) + +### Phase 1: Foundation Setup (Week 1) +#### Goals +- Set up project structure +- Configure Docker environment +- Establish basic infrastructure + +#### Tasks +- [ ] Initialize Git repository +- [ ] Create Docker Compose configuration +- [ ] Set up Nginx reverse proxy +- [ ] Configure MongoDB and Redis containers +- [ ] Create base directory structure for all services + +#### Deliverables +- Working Docker environment +- Basic networking between containers +- Database and cache ready + +### Phase 2: Console Service - Core (Week 2) +#### Goals +- Implement Console as API Gateway +- Set up authentication system +- Create service registry + +#### Tasks +- [ ] Console Backend + - [ ] FastAPI application setup + - [ ] JWT authentication implementation + - [ ] Service registry and discovery + - [ ] API routing mechanism + - [ ] Health check endpoints +- [ ] Console Frontend + - [ ] React + Vite setup + - [ ] Login/Register pages + - [ ] Admin dashboard layout + - [ ] Service status dashboard + +#### Deliverables +- Working authentication system +- Basic API Gateway functionality +- Service health monitoring dashboard + +### Phase 3: OAuth Service (Week 3) +#### Goals +- Centralized authentication service +- OAuth2 implementation +- User session management + +#### Tasks +- [ ] OAuth Backend + - [ ] OAuth2 server implementation + - [ ] Token generation and validation + - [ ] User authentication endpoints + - [ ] Integration with Console +- [ ] OAuth Frontend + - [ ] OAuth consent screens + - [ ] Token management UI + - [ ] Application registration + +#### Deliverables +- OAuth2 server +- Token-based authentication +- Integration with Console + +### Phase 4: Users Service (Week 4) +#### Goals +- User management microservice +- Profile management +- User data CRUD operations + +#### Tasks +- [ ] Users Backend + - [ ] User model and database schema + - [ ] CRUD APIs for user management + - [ ] Profile management endpoints + - [ ] Integration with OAuth service +- [ ] Users Frontend + - [ ] User list and search + - [ ] Profile editing interface + - [ ] User details view + +#### Deliverables +- Complete user management system +- Profile management features +- Admin user interface + +### Phase 5: Core Microservices (Weeks 5-6) +#### Goals +- Implement remaining core services +- Establish inter-service communication + +#### Services to Implement +1. **Images Service** + - Image upload/download + - Image processing + - Storage management + +2. **Applications Service** + - Application registration + - Configuration management + - Version control + +3. **Data Service** + - Data import/export + - Data transformation + - API for data access + +4. **Statistics Service** + - Metrics collection + - Analytics dashboard + - Report generation + +#### Tasks per Service +- [ ] Backend implementation + - [ ] Domain models + - [ ] Business logic + - [ ] REST APIs + - [ ] Event publishing +- [ ] Frontend implementation + - [ ] Service-specific UI + - [ ] Integration with Console + - [ ] Dashboard widgets + +### Phase 6: Integration & Testing (Week 7) +#### Goals +- End-to-end integration +- Performance optimization +- Security hardening + +#### Tasks +- [ ] Integration Testing + - [ ] Service communication tests + - [ ] Load testing + - [ ] Security testing +- [ ] Optimization + - [ ] Redis caching implementation + - [ ] Database indexing + - [ ] API response optimization +- [ ] Documentation + - [ ] API documentation (OpenAPI) + - [ ] Deployment guide + - [ ] Developer documentation + +#### Deliverables +- Fully integrated system +- Performance benchmarks +- Complete documentation + +### Phase 7: Monitoring & DevOps (Week 8) +#### Goals +- Production readiness +- Monitoring and alerting +- CI/CD pipeline + +#### Tasks +- [ ] Monitoring Setup + - [ ] Prometheus metrics + - [ ] Grafana dashboards + - [ ] Log aggregation (ELK stack) +- [ ] DevOps + - [ ] GitHub Actions CI/CD + - [ ] Automated testing + - [ ] Docker image optimization +- [ ] Production Configuration + - [ ] Environment variables + - [ ] Secrets management + - [ ] Backup strategies + +#### Deliverables +- Production-ready deployment +- Monitoring dashboards +- Automated deployment pipeline + +## Technical Implementation Details + +### Service Communication Flow +``` +Client Request → Nginx → Console (API Gateway) → Microservice + ↓ + Authentication Check + ↓ + Request Routing + ↓ + Response Aggregation +``` + +### Database Strategy +``` +MongoDB Instance +├── console_db # Console service data +├── users_db # Users service data +├── oauth_db # OAuth tokens and sessions +├── images_db # Image metadata +├── applications_db # Application data +├── data_db # Generic data storage +└── statistics_db # Analytics data +``` + +### API Versioning Strategy +- All APIs follow `/api/v1/` pattern +- Version in URL path for major versions +- Header-based versioning for minor updates + +### Security Implementation +1. **Authentication Flow** + - User login → OAuth service + - OAuth service issues JWT + - Console validates JWT on each request + - Console forwards validated requests to services + +2. **Service-to-Service Auth** + - Internal service tokens + - mTLS for production + - Network isolation via Docker networks + +### Development Workflow +1. **Local Development** + ```bash + docker-compose up -d [service-name] + docker-compose logs -f [service-name] + ``` + +2. **Testing** + ```bash + docker-compose exec [service-name] pytest + ``` + +3. **Deployment** + ```bash + docker-compose build + docker-compose up -d + ``` + +## Success Criteria +- [ ] All services independently deployable +- [ ] Console successfully routes to all services +- [ ] Authentication works across all services +- [ ] Health monitoring shows all services green +- [ ] Load testing shows <100ms p95 latency +- [ ] Zero downtime deployments possible + +## Risk Mitigation +1. **Service Failure**: Circuit breakers in Console +2. **Data Consistency**: Event sourcing for critical operations +3. **Performance**: Redis caching layer +4. **Security**: Regular security audits, dependency updates + +## Timeline Summary +- **Week 1**: Foundation and infrastructure +- **Week 2**: Console core implementation +- **Week 3**: OAuth service +- **Week 4**: Users service +- **Weeks 5-6**: Remaining microservices +- **Week 7**: Integration and testing +- **Week 8**: Monitoring and production setup + +## Next Steps +1. Review and approve plan +2. Set up Git repository +3. Begin Phase 1 implementation +4. Schedule weekly progress reviews \ No newline at end of file diff --git a/docs/PROGRESS.md b/docs/PROGRESS.md new file mode 100644 index 0000000..e1d2872 --- /dev/null +++ b/docs/PROGRESS.md @@ -0,0 +1,127 @@ +# Progress Tracking & Context Management + +## Purpose +이 파일은 Claude의 컨텍스트가 리셋되어도 빠르게 현재 진행 상황을 파악하고 이어서 작업할 수 있도록 돕는 체크포인트 문서입니다. + +## Current Status +- **Date Started**: 2025-09-09 +- **Current Phase**: Step 3 Complete ✅ +- **Next Action**: Step 4 - Frontend Skeleton + +## Completed Checkpoints +✅ Project structure planning (CLAUDE.md) +✅ Implementation plan created (docs/PLAN.md) +✅ Progressive approach defined +✅ Step 1: Minimal Foundation - Docker + Console Hello World + - docker-compose.yml created + - console/backend with FastAPI + - Running on port 8011 +✅ Step 2: Add First Service (Users) + - Users service with CRUD operations + - Console API Gateway routing to Users + - Service communication verified + - Test: curl http://localhost:8011/api/users/users +✅ Step 3: Database Integration + - MongoDB and Redis containers added + - Users service using MongoDB with Beanie ODM + - Data persistence verified + - MongoDB IDs: 68c126c0bbbe52be68495933 + +## Active Working Files +``` +현재 작업 중인 주요 파일: +- /docs/PLAN.md (구현 계획) +- /CLAUDE.md (아키텍처 가이드) +- /docs/PROGRESS.md (이 파일) +``` + +## Next Immediate Steps +```bash +# 다음 작업 시작 명령 +# Step 1: Create docker-compose.yml +# Step 2: Create console/backend/main.py +# Step 3: Test with docker-compose up +``` + +## Code Snippets Ready to Use + +### 1. Minimal docker-compose.yml +```yaml +version: '3.8' +services: + console: + build: ./console/backend + ports: + - "8000:8000" + environment: + - ENV=development +``` + +### 2. Console main.py starter +```python +from fastapi import FastAPI +app = FastAPI(title="Console API Gateway") + +@app.get("/health") +async def health(): + return {"status": "healthy", "service": "console"} +``` + +## Important Decisions Made +1. **Architecture**: API Gateway Pattern with Console as orchestrator +2. **Tech Stack**: FastAPI + React + MongoDB + Redis + Docker +3. **Approach**: Progressive implementation (simple to complex) +4. **First Service**: Users service after Console + +## Questions to Ask When Resuming +새로운 세션에서 이어서 작업할 때 확인할 사항: +1. "PROGRESS.md 파일을 확인했나요?" +2. "마지막으로 완료한 Step은 무엇인가요?" +3. "현재 에러나 블로킹 이슈가 있나요?" + +## Git Commits Pattern +각 Step 완료 시 커밋 메시지: +``` +Step X: [간단한 설명] +- 구현 내용 1 +- 구현 내용 2 +``` + +## Directory Structure Snapshot +``` +site11/ +├── CLAUDE.md ✅ Created +├── docs/ +│ ├── PLAN.md ✅ Created +│ └── PROGRESS.md ✅ Created (this file) +├── console/ 🔄 Next +│ └── backend/ +│ └── main.py +└── docker-compose.yml 🔄 Next +``` + +## Context Recovery Commands +새 세션에서 빠르게 상황 파악하기: +```bash +# 1. 현재 구조 확인 +ls -la + +# 2. 진행 상황 확인 +cat docs/PROGRESS.md + +# 3. 다음 단계 확인 +grep "Step" docs/PLAN.md | head -5 + +# 4. 실행 중인 컨테이너 확인 +docker ps +``` + +## Error Log +문제 발생 시 여기에 기록: +- (아직 없음) + +## Notes for Next Session +- Step 1부터 시작 +- docker-compose.yml 생성 필요 +- console/backend/main.py 생성 필요 +- 모든 문서 파일은 대문자.md 형식으로 생성 (예: README.md, SETUP.md) \ No newline at end of file diff --git a/docs/TEST_AUTH.md b/docs/TEST_AUTH.md new file mode 100644 index 0000000..216951a --- /dev/null +++ b/docs/TEST_AUTH.md @@ -0,0 +1,170 @@ +# 인증 시스템 테스트 가이드 + +## 테스트 계정 +- **관리자**: admin / admin123 +- **일반 사용자**: user / user123 + +## 1. Terminal에서 테스트 + +### 로그인 테스트 +```bash +# 관리자로 로그인 +curl -X POST http://localhost:8011/api/auth/login \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "username=admin&password=admin123" + +# 응답 예시: +# {"access_token":"eyJhbGci...","token_type":"bearer"} +``` + +### 토큰 저장 및 사용 +```bash +# 토큰을 변수에 저장 +export TOKEN="eyJhbGci..." # 위에서 받은 토큰 + +# 인증된 요청 - 사용자 정보 조회 +curl -X GET http://localhost:8011/api/auth/me \ + -H "Authorization: Bearer $TOKEN" + +# 인증된 요청 - 보호된 엔드포인트 +curl -X GET http://localhost:8011/api/protected \ + -H "Authorization: Bearer $TOKEN" + +# 인증된 요청 - Users 서비스 접근 +curl -X GET http://localhost:8011/api/users/ \ + -H "Authorization: Bearer $TOKEN" +``` + +### 로그아웃 +```bash +curl -X POST http://localhost:8011/api/auth/logout \ + -H "Authorization: Bearer $TOKEN" +``` + +## 2. Postman/Insomnia에서 테스트 + +### Postman 설정 +1. **로그인 요청** + - Method: POST + - URL: `http://localhost:8011/api/auth/login` + - Body: x-www-form-urlencoded + - username: admin + - password: admin123 + +2. **토큰 사용** + - Authorization 탭에서 Type: Bearer Token 선택 + - Token 필드에 받은 토큰 붙여넣기 + +## 3. Python 스크립트로 테스트 + +```python +import requests + +# 로그인 +login_response = requests.post( + "http://localhost:8011/api/auth/login", + data={"username": "admin", "password": "admin123"} +) +token = login_response.json()["access_token"] + +# 인증된 요청 +headers = {"Authorization": f"Bearer {token}"} +me_response = requests.get( + "http://localhost:8011/api/auth/me", + headers=headers +) +print(me_response.json()) + +# Users 서비스 접근 +users_response = requests.get( + "http://localhost:8011/api/users/", + headers=headers +) +print(users_response.json()) +``` + +## 4. JavaScript (브라우저 콘솔)에서 테스트 + +```javascript +// 로그인 +const loginResponse = await fetch('http://localhost:8011/api/auth/login', { + method: 'POST', + headers: {'Content-Type': 'application/x-www-form-urlencoded'}, + body: 'username=admin&password=admin123' +}); +const { access_token } = await loginResponse.json(); +console.log('Token:', access_token); + +// 인증된 요청 +const meResponse = await fetch('http://localhost:8011/api/auth/me', { + headers: {'Authorization': `Bearer ${access_token}`} +}); +const userData = await meResponse.json(); +console.log('User:', userData); +``` + +## 5. Frontend에서 테스트 (React) + +브라우저에서 http://localhost:3000 접속 후 개발자 도구 콘솔에서: + +```javascript +// 로그인 함수 +async function testLogin() { + const response = await fetch('/api/auth/login', { + method: 'POST', + headers: {'Content-Type': 'application/x-www-form-urlencoded'}, + body: 'username=admin&password=admin123' + }); + const data = await response.json(); + localStorage.setItem('token', data.access_token); + console.log('Logged in!', data); + return data.access_token; +} + +// 인증 테스트 +async function testAuth() { + const token = localStorage.getItem('token'); + const response = await fetch('/api/auth/me', { + headers: {'Authorization': `Bearer ${token}`} + }); + const data = await response.json(); + console.log('User info:', data); +} + +// 실행 +await testLogin(); +await testAuth(); +``` + +## 오류 테스트 + +### 잘못된 비밀번호 +```bash +curl -X POST http://localhost:8011/api/auth/login \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "username=admin&password=wrong" +# 응답: 401 Unauthorized +``` + +### 토큰 없이 보호된 엔드포인트 접근 +```bash +curl -X GET http://localhost:8011/api/auth/me +# 응답: 401 Unauthorized +``` + +### 잘못된 토큰 +```bash +curl -X GET http://localhost:8011/api/auth/me \ + -H "Authorization: Bearer invalid_token" +# 응답: 401 Unauthorized +``` + +## 토큰 정보 + +- **유효 기간**: 30분 (환경 변수 ACCESS_TOKEN_EXPIRE_MINUTES로 설정 가능) +- **알고리즘**: HS256 +- **페이로드**: username 정보 포함 + +## 다음 단계 + +Frontend에 로그인 페이지를 추가하면 UI에서 직접 테스트 가능합니다. \ No newline at end of file diff --git a/generated_article.json b/generated_article.json new file mode 100644 index 0000000..d30b308 --- /dev/null +++ b/generated_article.json @@ -0,0 +1,36 @@ +{ + "_id": "68c5798162bde7a1947d35a7", + "article_id": "17ee889c-a01e-4791-9f51-9336074c842b", + "job_id": "616c1c65-6b43-42a8-98e0-9547208106c8", + "keyword_id": "test_starcraft_001", + "keyword": "스타크래프트", + "title": "Is StarCraft a new tipping point after 15 years?", + "content": "**Nexon secures new StarCraft development rights***.\n\nSouth Korean gaming company Nexon has announced that it has won the rights to develop a new StarCraft game, beating out Blizzard in a bidding war. This is expected to mark a new turning point for the StarCraft IP. Nexon also secured the distribution rights to Blizzard's mobile game called Overwatch 3.\n\n**StarCraft 2, still the benchmark for RTS games***.\n\nFifteen years after its release, StarCraft 2 is still considered the pinnacle of the real-time strategy (RTS) gaming genre. According to a recently published analysis, today's RTS games are still being developed with the same standards of polish and playability that StarCraft 2 set. Blizzard recently released the 5.0.14 PTR update for StarCraft 2, demonstrating its continued support for the game.\n\n**Breaking records on the esports stage\n\nAt the Esports World Cup 2025, Joel \"Serral\" Larsson of Finland won the StarCraft 2 category, setting a new viewership record. This shows that StarCraft 2 still has a strong appeal to gamers around the world.\n\n**The failure of Stormgate, a classic for sequels\n\nOn the other hand, the spiritual successor to StarCraft, Stormgate, created by ex-Blizzard developers, has been criticized as a flop. The developers controversially blamed gamers for its failure. This shows how difficult it is to replicate StarCraft's winning formula.\n\n**Current events and outlook\n\nNexon's acquisition of StarCraft development rights is significant for the Korean gaming industry. It increases the likelihood of new StarCraft titles being developed in Korea, the home of the franchise. However, it will be a challenge to create a game that surpasses StarCraft 2, which has been a standard in the industry for 15 years.", + "summary": "Nexon's acquisition of the new StarCraft development rights marks a turning point for the StarCraft IP, which has been a benchmark in RTS gaming for 15 years.", + "source_items": [], + "images": [ + "https://replicate.delivery/xezq/tYOKIl3SG35WPBVBg3ipefCOLUJPm9FCXwyOOrppS1WztyUVA/out-0.png" + ], + "categories": [ + "游戏", + "Esports" + ], + "tags": [ + "StarCraft", + "Nexon", + "RTS games", + "Blizzard", + "Esports" + ], + "created_at": "2025-09-13T14:02:41.857000", + "pipeline_stages": [ + "rss_collection", + "search_enrichment", + "ai_article_generation", + "image_generation", + "translation" + ], + "processing_time": 19.447839, + "language": "en", + "ref_news_id": "20a6bb85-8c61-41db-82fc-d52e0d88204d" +} \ No newline at end of file diff --git a/services/files/backend/Dockerfile b/services/files/backend/Dockerfile new file mode 100644 index 0000000..7671914 --- /dev/null +++ b/services/files/backend/Dockerfile @@ -0,0 +1,27 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install system dependencies for Pillow and file type detection +RUN apt-get update && apt-get install -y \ + gcc \ + libmagic1 \ + libjpeg-dev \ + zlib1g-dev \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements first for better caching +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . . + +# Create directories for thumbnails cache +RUN mkdir -p /tmp/thumbnails + +# Expose port +EXPOSE 8000 + +# Run the application +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] \ No newline at end of file diff --git a/services/files/backend/file_processor.py b/services/files/backend/file_processor.py new file mode 100644 index 0000000..c00ca21 --- /dev/null +++ b/services/files/backend/file_processor.py @@ -0,0 +1,247 @@ +""" +File Processor for handling file uploads and processing +""" +import hashlib +import mimetypes +from datetime import datetime +from typing import Dict, Any, Optional +import logging +import uuid +from fastapi import UploadFile +from models import FileType, FileStatus + +logger = logging.getLogger(__name__) + +class FileProcessor: + def __init__(self, minio_client, metadata_manager, thumbnail_generator): + self.minio_client = minio_client + self.metadata_manager = metadata_manager + self.thumbnail_generator = thumbnail_generator + + def _determine_file_type(self, content_type: str) -> FileType: + """Determine file type from content type""" + if content_type.startswith('image/'): + return FileType.IMAGE + elif content_type.startswith('video/'): + return FileType.VIDEO + elif content_type.startswith('audio/'): + return FileType.AUDIO + elif content_type in ['application/pdf', 'application/msword', + 'application/vnd.openxmlformats-officedocument', + 'text/plain', 'text/html', 'text/csv']: + return FileType.DOCUMENT + elif content_type in ['application/zip', 'application/x-rar-compressed', + 'application/x-tar', 'application/gzip']: + return FileType.ARCHIVE + else: + return FileType.OTHER + + def _calculate_file_hash(self, file_data: bytes) -> str: + """Calculate SHA256 hash of file data""" + return hashlib.sha256(file_data).hexdigest() + + async def process_upload(self, file: UploadFile, user_id: str, + bucket: str = "default", + public: bool = False, + generate_thumbnail: bool = True, + tags: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + """Process file upload""" + try: + # Read file data + file_data = await file.read() + file_size = len(file_data) + + # Get content type + content_type = file.content_type or mimetypes.guess_type(file.filename)[0] or 'application/octet-stream' + + # Generate file ID and object name + file_id = str(uuid.uuid4()) + timestamp = datetime.now().strftime('%Y%m%d') + file_extension = file.filename.split('.')[-1] if '.' in file.filename else '' + object_name = f"{timestamp}/{user_id}/{file_id}.{file_extension}" if file_extension else f"{timestamp}/{user_id}/{file_id}" + + # Calculate file hash + file_hash = self._calculate_file_hash(file_data) + + # Check for duplicates + duplicates = await self.metadata_manager.find_duplicate_files(file_hash) + if duplicates and not public: # Allow duplicates for public files + # Return existing file info + existing = duplicates[0] + logger.info(f"Duplicate file detected: {existing['id']}") + return { + "file_id": existing["id"], + "filename": existing["filename"], + "size": existing["size"], + "content_type": existing["content_type"], + "file_type": existing["file_type"], + "bucket": existing["bucket"], + "public": existing["public"], + "has_thumbnail": existing.get("has_thumbnail", False), + "thumbnail_url": existing.get("thumbnail_url"), + "created_at": existing["created_at"], + "duplicate": True + } + + # Upload to MinIO + upload_result = await self.minio_client.upload_file( + bucket=bucket, + object_name=object_name, + file_data=file_data, + content_type=content_type, + metadata={ + "user_id": user_id, + "original_name": file.filename, + "upload_date": datetime.now().isoformat() + } + ) + + # Determine file type + file_type = self._determine_file_type(content_type) + + # Generate thumbnail if applicable + has_thumbnail = False + thumbnail_url = None + + if generate_thumbnail and file_type == FileType.IMAGE: + thumbnail_data = await self.thumbnail_generator.generate_thumbnail( + file_data=file_data, + content_type=content_type + ) + + if thumbnail_data: + has_thumbnail = True + # Generate multiple sizes + await self.thumbnail_generator.generate_multiple_sizes( + file_data=file_data, + content_type=content_type, + file_id=file_id + ) + + if public: + thumbnail_url = await self.minio_client.generate_presigned_download_url( + bucket="thumbnails", + object_name=f"thumbnails/{file_id}_medium.jpg", + expires_in=86400 * 30 # 30 days + ) + + # Create metadata + metadata = { + "id": file_id, + "filename": file.filename, + "original_name": file.filename, + "size": file_size, + "content_type": content_type, + "file_type": file_type.value, + "bucket": bucket, + "object_name": object_name, + "user_id": user_id, + "hash": file_hash, + "public": public, + "has_thumbnail": has_thumbnail, + "thumbnail_url": thumbnail_url, + "tags": tags or {}, + "metadata": { + "etag": upload_result.get("etag"), + "version_id": upload_result.get("version_id") + } + } + + # Save metadata to database + await self.metadata_manager.create_file_metadata(metadata) + + # Generate download URL if public + download_url = None + if public: + download_url = await self.minio_client.generate_presigned_download_url( + bucket=bucket, + object_name=object_name, + expires_in=86400 * 30 # 30 days + ) + + logger.info(f"File uploaded successfully: {file_id}") + + return { + "file_id": file_id, + "filename": file.filename, + "size": file_size, + "content_type": content_type, + "file_type": file_type.value, + "bucket": bucket, + "public": public, + "has_thumbnail": has_thumbnail, + "thumbnail_url": thumbnail_url, + "download_url": download_url, + "created_at": datetime.now() + } + + except Exception as e: + logger.error(f"File processing error: {e}") + raise + + async def process_large_file(self, file: UploadFile, user_id: str, + bucket: str = "default", + chunk_size: int = 1024 * 1024 * 5) -> Dict[str, Any]: + """Process large file upload in chunks""" + try: + file_id = str(uuid.uuid4()) + timestamp = datetime.now().strftime('%Y%m%d') + file_extension = file.filename.split('.')[-1] if '.' in file.filename else '' + object_name = f"{timestamp}/{user_id}/{file_id}.{file_extension}" + + # Initialize multipart upload + hasher = hashlib.sha256() + total_size = 0 + + # Process file in chunks + chunks = [] + while True: + chunk = await file.read(chunk_size) + if not chunk: + break + + chunks.append(chunk) + hasher.update(chunk) + total_size += len(chunk) + + # Combine chunks and upload + file_data = b''.join(chunks) + file_hash = hasher.hexdigest() + + # Upload to MinIO + content_type = file.content_type or 'application/octet-stream' + await self.minio_client.upload_file( + bucket=bucket, + object_name=object_name, + file_data=file_data, + content_type=content_type + ) + + # Create metadata + metadata = { + "id": file_id, + "filename": file.filename, + "original_name": file.filename, + "size": total_size, + "content_type": content_type, + "file_type": self._determine_file_type(content_type).value, + "bucket": bucket, + "object_name": object_name, + "user_id": user_id, + "hash": file_hash, + "public": False, + "has_thumbnail": False + } + + await self.metadata_manager.create_file_metadata(metadata) + + return { + "file_id": file_id, + "filename": file.filename, + "size": total_size, + "message": "Large file uploaded successfully" + } + + except Exception as e: + logger.error(f"Large file processing error: {e}") + raise \ No newline at end of file diff --git a/services/files/backend/main.py b/services/files/backend/main.py new file mode 100644 index 0000000..e4fb358 --- /dev/null +++ b/services/files/backend/main.py @@ -0,0 +1,541 @@ +""" +File Management Service - S3-compatible Object Storage with MinIO +""" +from fastapi import FastAPI, File, UploadFile, HTTPException, Depends, Query, Form +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import StreamingResponse, FileResponse +import uvicorn +from datetime import datetime, timedelta +from typing import Optional, List, Dict, Any +import asyncio +import os +import hashlib +import magic +import io +from contextlib import asynccontextmanager +import logging +from pathlib import Path +import json + +# Import custom modules +from models import FileMetadata, FileUploadResponse, FileListResponse, StorageStats +from minio_client import MinIOManager +from thumbnail_generator import ThumbnailGenerator +from metadata_manager import MetadataManager +from file_processor import FileProcessor + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Global instances +minio_manager = None +thumbnail_generator = None +metadata_manager = None +file_processor = None + +@asynccontextmanager +async def lifespan(app: FastAPI): + # Startup + global minio_manager, thumbnail_generator, metadata_manager, file_processor + + try: + # Initialize MinIO client + minio_manager = MinIOManager( + endpoint=os.getenv("MINIO_ENDPOINT", "minio:9000"), + access_key=os.getenv("MINIO_ACCESS_KEY", "minioadmin"), + secret_key=os.getenv("MINIO_SECRET_KEY", "minioadmin"), + secure=os.getenv("MINIO_SECURE", "false").lower() == "true" + ) + await minio_manager.initialize() + logger.info("MinIO client initialized") + + # Initialize Metadata Manager (MongoDB) + metadata_manager = MetadataManager( + mongodb_url=os.getenv("MONGODB_URL", "mongodb://mongodb:27017"), + database=os.getenv("FILES_DB_NAME", "files_db") + ) + await metadata_manager.connect() + logger.info("Metadata manager connected to MongoDB") + + # Initialize Thumbnail Generator + thumbnail_generator = ThumbnailGenerator( + minio_client=minio_manager, + cache_dir="/tmp/thumbnails" + ) + logger.info("Thumbnail generator initialized") + + # Initialize File Processor + file_processor = FileProcessor( + minio_client=minio_manager, + metadata_manager=metadata_manager, + thumbnail_generator=thumbnail_generator + ) + logger.info("File processor initialized") + + except Exception as e: + logger.error(f"Failed to start File service: {e}") + raise + + yield + + # Shutdown + if metadata_manager: + await metadata_manager.close() + + logger.info("File service shutdown complete") + +app = FastAPI( + title="File Management Service", + description="S3-compatible object storage with MinIO", + version="1.0.0", + lifespan=lifespan +) + +# CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +@app.get("/") +async def root(): + return { + "service": "File Management Service", + "status": "running", + "timestamp": datetime.now().isoformat() + } + +@app.get("/health") +async def health_check(): + return { + "status": "healthy", + "service": "files", + "components": { + "minio": "connected" if minio_manager and minio_manager.is_connected else "disconnected", + "mongodb": "connected" if metadata_manager and metadata_manager.is_connected else "disconnected", + "thumbnail_generator": "ready" if thumbnail_generator else "not_initialized" + }, + "timestamp": datetime.now().isoformat() + } + +# File Upload Endpoints +@app.post("/api/files/upload") +async def upload_file( + file: UploadFile = File(...), + user_id: str = Form(...), + bucket: str = Form("default"), + public: bool = Form(False), + generate_thumbnail: bool = Form(True), + tags: Optional[str] = Form(None) +): + """Upload a file to object storage""" + try: + # Validate file + if not file.filename: + raise HTTPException(status_code=400, detail="No file provided") + + # Process file upload + result = await file_processor.process_upload( + file=file, + user_id=user_id, + bucket=bucket, + public=public, + generate_thumbnail=generate_thumbnail, + tags=json.loads(tags) if tags else {} + ) + + return FileUploadResponse(**result) + except Exception as e: + logger.error(f"File upload error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/files/upload-multiple") +async def upload_multiple_files( + files: List[UploadFile] = File(...), + user_id: str = Form(...), + bucket: str = Form("default"), + public: bool = Form(False) +): + """Upload multiple files""" + try: + results = [] + for file in files: + result = await file_processor.process_upload( + file=file, + user_id=user_id, + bucket=bucket, + public=public, + generate_thumbnail=True + ) + results.append(result) + + return { + "uploaded": len(results), + "files": results + } + except Exception as e: + logger.error(f"Multiple file upload error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +# File Retrieval Endpoints +@app.get("/api/files/{file_id}") +async def get_file(file_id: str): + """Get file by ID""" + try: + # Get metadata + metadata = await metadata_manager.get_file_metadata(file_id) + if not metadata: + raise HTTPException(status_code=404, detail="File not found") + + # Get file from MinIO + file_stream = await minio_manager.get_file( + bucket=metadata["bucket"], + object_name=metadata["object_name"] + ) + + return StreamingResponse( + file_stream, + media_type=metadata.get("content_type", "application/octet-stream"), + headers={ + "Content-Disposition": f'attachment; filename="{metadata["filename"]}"' + } + ) + except HTTPException: + raise + except Exception as e: + logger.error(f"File retrieval error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/files/{file_id}/metadata") +async def get_file_metadata(file_id: str): + """Get file metadata""" + try: + metadata = await metadata_manager.get_file_metadata(file_id) + if not metadata: + raise HTTPException(status_code=404, detail="File not found") + + return FileMetadata(**metadata) + except HTTPException: + raise + except Exception as e: + logger.error(f"Metadata retrieval error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/files/{file_id}/thumbnail") +async def get_thumbnail( + file_id: str, + width: int = Query(200, ge=50, le=1000), + height: int = Query(200, ge=50, le=1000) +): + """Get file thumbnail""" + try: + # Get metadata + metadata = await metadata_manager.get_file_metadata(file_id) + if not metadata: + raise HTTPException(status_code=404, detail="File not found") + + # Check if file has thumbnail + if not metadata.get("has_thumbnail"): + raise HTTPException(status_code=404, detail="No thumbnail available") + + # Get or generate thumbnail + thumbnail = await thumbnail_generator.get_thumbnail( + file_id=file_id, + bucket=metadata["bucket"], + object_name=metadata["object_name"], + width=width, + height=height + ) + + return StreamingResponse( + io.BytesIO(thumbnail), + media_type="image/jpeg" + ) + except HTTPException: + raise + except Exception as e: + logger.error(f"Thumbnail retrieval error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/files/{file_id}/download") +async def download_file(file_id: str): + """Download file with proper headers""" + try: + # Get metadata + metadata = await metadata_manager.get_file_metadata(file_id) + if not metadata: + raise HTTPException(status_code=404, detail="File not found") + + # Update download count + await metadata_manager.increment_download_count(file_id) + + # Get file from MinIO + file_stream = await minio_manager.get_file( + bucket=metadata["bucket"], + object_name=metadata["object_name"] + ) + + return StreamingResponse( + file_stream, + media_type=metadata.get("content_type", "application/octet-stream"), + headers={ + "Content-Disposition": f'attachment; filename="{metadata["filename"]}"', + "Content-Length": str(metadata["size"]) + } + ) + except HTTPException: + raise + except Exception as e: + logger.error(f"File download error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +# File Management Endpoints +@app.delete("/api/files/{file_id}") +async def delete_file(file_id: str, user_id: str): + """Delete a file""" + try: + # Get metadata + metadata = await metadata_manager.get_file_metadata(file_id) + if not metadata: + raise HTTPException(status_code=404, detail="File not found") + + # Check ownership + if metadata["user_id"] != user_id: + raise HTTPException(status_code=403, detail="Permission denied") + + # Delete from MinIO + await minio_manager.delete_file( + bucket=metadata["bucket"], + object_name=metadata["object_name"] + ) + + # Delete thumbnail if exists + if metadata.get("has_thumbnail"): + await thumbnail_generator.delete_thumbnail(file_id) + + # Delete metadata + await metadata_manager.delete_file_metadata(file_id) + + return {"status": "deleted", "file_id": file_id} + except HTTPException: + raise + except Exception as e: + logger.error(f"File deletion error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.patch("/api/files/{file_id}") +async def update_file_metadata( + file_id: str, + user_id: str, + updates: Dict[str, Any] +): + """Update file metadata""" + try: + # Get existing metadata + metadata = await metadata_manager.get_file_metadata(file_id) + if not metadata: + raise HTTPException(status_code=404, detail="File not found") + + # Check ownership + if metadata["user_id"] != user_id: + raise HTTPException(status_code=403, detail="Permission denied") + + # Update metadata + updated = await metadata_manager.update_file_metadata(file_id, updates) + + return {"status": "updated", "file_id": file_id, "metadata": updated} + except HTTPException: + raise + except Exception as e: + logger.error(f"Metadata update error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +# File Listing Endpoints +@app.get("/api/files") +async def list_files( + user_id: Optional[str] = None, + bucket: str = Query("default"), + limit: int = Query(20, le=100), + offset: int = Query(0), + search: Optional[str] = None, + file_type: Optional[str] = None, + sort_by: str = Query("created_at", pattern="^(created_at|filename|size)$"), + order: str = Query("desc", pattern="^(asc|desc)$") +): + """List files with filtering and pagination""" + try: + files = await metadata_manager.list_files( + user_id=user_id, + bucket=bucket, + limit=limit, + offset=offset, + search=search, + file_type=file_type, + sort_by=sort_by, + order=order + ) + + return FileListResponse(**files) + except Exception as e: + logger.error(f"File listing error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/files/user/{user_id}") +async def get_user_files( + user_id: str, + limit: int = Query(20, le=100), + offset: int = Query(0) +): + """Get all files for a specific user""" + try: + files = await metadata_manager.list_files( + user_id=user_id, + limit=limit, + offset=offset + ) + + return FileListResponse(**files) + except Exception as e: + logger.error(f"User files listing error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +# Storage Management Endpoints +@app.get("/api/storage/stats") +async def get_storage_stats(): + """Get storage statistics""" + try: + stats = await minio_manager.get_storage_stats() + db_stats = await metadata_manager.get_storage_stats() + + return StorageStats( + total_files=db_stats["total_files"], + total_size=db_stats["total_size"], + buckets=stats["buckets"], + users_count=db_stats["users_count"], + file_types=db_stats["file_types"] + ) + except Exception as e: + logger.error(f"Storage stats error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/storage/buckets") +async def create_bucket(bucket_name: str, public: bool = False): + """Create a new storage bucket""" + try: + await minio_manager.create_bucket(bucket_name, public=public) + return {"status": "created", "bucket": bucket_name} + except Exception as e: + logger.error(f"Bucket creation error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/storage/buckets") +async def list_buckets(): + """List all storage buckets""" + try: + buckets = await minio_manager.list_buckets() + return {"buckets": buckets} + except Exception as e: + logger.error(f"Bucket listing error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +# Presigned URL Endpoints +@app.post("/api/files/presigned-upload") +async def generate_presigned_upload_url( + filename: str, + content_type: str, + bucket: str = "default", + expires_in: int = Query(3600, ge=60, le=86400) +): + """Generate presigned URL for direct upload to MinIO""" + try: + url = await minio_manager.generate_presigned_upload_url( + bucket=bucket, + object_name=f"{datetime.now().strftime('%Y%m%d')}/{filename}", + expires_in=expires_in + ) + + return { + "upload_url": url, + "expires_in": expires_in, + "method": "PUT" + } + except Exception as e: + logger.error(f"Presigned URL generation error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/files/{file_id}/share") +async def generate_share_link( + file_id: str, + expires_in: int = Query(86400, ge=60, le=604800) # 1 day default, max 7 days +): + """Generate a shareable link for a file""" + try: + # Get metadata + metadata = await metadata_manager.get_file_metadata(file_id) + if not metadata: + raise HTTPException(status_code=404, detail="File not found") + + # Generate presigned URL + url = await minio_manager.generate_presigned_download_url( + bucket=metadata["bucket"], + object_name=metadata["object_name"], + expires_in=expires_in + ) + + return { + "share_url": url, + "expires_in": expires_in, + "file_id": file_id, + "filename": metadata["filename"] + } + except HTTPException: + raise + except Exception as e: + logger.error(f"Share link generation error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +# Batch Operations +@app.post("/api/files/batch-delete") +async def batch_delete_files(file_ids: List[str], user_id: str): + """Delete multiple files at once""" + try: + deleted = [] + errors = [] + + for file_id in file_ids: + try: + # Get metadata + metadata = await metadata_manager.get_file_metadata(file_id) + if metadata and metadata["user_id"] == user_id: + # Delete from MinIO + await minio_manager.delete_file( + bucket=metadata["bucket"], + object_name=metadata["object_name"] + ) + # Delete metadata + await metadata_manager.delete_file_metadata(file_id) + deleted.append(file_id) + else: + errors.append({"file_id": file_id, "error": "Not found or permission denied"}) + except Exception as e: + errors.append({"file_id": file_id, "error": str(e)}) + + return { + "deleted": deleted, + "errors": errors, + "total_deleted": len(deleted) + } + except Exception as e: + logger.error(f"Batch delete error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +if __name__ == "__main__": + uvicorn.run( + "main:app", + host="0.0.0.0", + port=8000, + reload=True + ) \ No newline at end of file diff --git a/services/files/backend/metadata_manager.py b/services/files/backend/metadata_manager.py new file mode 100644 index 0000000..dd28811 --- /dev/null +++ b/services/files/backend/metadata_manager.py @@ -0,0 +1,331 @@ +""" +Metadata Manager for file information storage in MongoDB +""" +from motor.motor_asyncio import AsyncIOMotorClient +from datetime import datetime +from typing import Optional, Dict, Any, List +import logging +import uuid +from models import FileType, FileStatus + +logger = logging.getLogger(__name__) + +class MetadataManager: + def __init__(self, mongodb_url: str, database: str = "files_db"): + self.mongodb_url = mongodb_url + self.database_name = database + self.client = None + self.db = None + self.collection = None + self.is_connected = False + + async def connect(self): + """Connect to MongoDB""" + try: + self.client = AsyncIOMotorClient(self.mongodb_url) + self.db = self.client[self.database_name] + self.collection = self.db.files + + # Create indexes + await self._create_indexes() + + # Test connection + await self.client.admin.command('ping') + self.is_connected = True + logger.info(f"Connected to MongoDB at {self.mongodb_url}") + + except Exception as e: + logger.error(f"Failed to connect to MongoDB: {e}") + self.is_connected = False + raise + + async def _create_indexes(self): + """Create database indexes for better performance""" + try: + # Create indexes + await self.collection.create_index("user_id") + await self.collection.create_index("bucket") + await self.collection.create_index("created_at") + await self.collection.create_index("file_type") + await self.collection.create_index([("filename", "text")]) + await self.collection.create_index([("user_id", 1), ("created_at", -1)]) + + logger.info("Database indexes created") + + except Exception as e: + logger.error(f"Failed to create indexes: {e}") + + async def create_file_metadata(self, metadata: Dict[str, Any]) -> str: + """Create new file metadata""" + try: + # Add timestamps + metadata["created_at"] = datetime.now() + metadata["updated_at"] = datetime.now() + metadata["download_count"] = 0 + metadata["status"] = FileStatus.READY.value + + # Generate unique ID if not provided + if "id" not in metadata: + metadata["id"] = str(uuid.uuid4()) + + # Insert document + result = await self.collection.insert_one(metadata) + + logger.info(f"Created metadata for file: {metadata['id']}") + return metadata["id"] + + except Exception as e: + logger.error(f"Failed to create file metadata: {e}") + raise + + async def get_file_metadata(self, file_id: str) -> Optional[Dict[str, Any]]: + """Get file metadata by ID""" + try: + metadata = await self.collection.find_one({"id": file_id}) + + if metadata: + # Remove MongoDB's _id field + metadata.pop("_id", None) + + return metadata + + except Exception as e: + logger.error(f"Failed to get file metadata: {e}") + raise + + async def update_file_metadata(self, file_id: str, updates: Dict[str, Any]) -> Dict[str, Any]: + """Update file metadata""" + try: + # Add update timestamp + updates["updated_at"] = datetime.now() + + # Update document + result = await self.collection.update_one( + {"id": file_id}, + {"$set": updates} + ) + + if result.modified_count == 0: + raise Exception(f"File {file_id} not found") + + # Return updated metadata + return await self.get_file_metadata(file_id) + + except Exception as e: + logger.error(f"Failed to update file metadata: {e}") + raise + + async def delete_file_metadata(self, file_id: str) -> bool: + """Delete file metadata (soft delete)""" + try: + # Soft delete by marking as deleted + updates = { + "status": FileStatus.DELETED.value, + "deleted_at": datetime.now(), + "updated_at": datetime.now() + } + + result = await self.collection.update_one( + {"id": file_id}, + {"$set": updates} + ) + + return result.modified_count > 0 + + except Exception as e: + logger.error(f"Failed to delete file metadata: {e}") + raise + + async def list_files(self, user_id: Optional[str] = None, + bucket: Optional[str] = None, + limit: int = 20, + offset: int = 0, + search: Optional[str] = None, + file_type: Optional[str] = None, + sort_by: str = "created_at", + order: str = "desc") -> Dict[str, Any]: + """List files with filtering and pagination""" + try: + # Build query + query = {"status": {"$ne": FileStatus.DELETED.value}} + + if user_id: + query["user_id"] = user_id + + if bucket: + query["bucket"] = bucket + + if file_type: + query["file_type"] = file_type + + if search: + query["$text"] = {"$search": search} + + # Count total documents + total = await self.collection.count_documents(query) + + # Sort order + sort_order = -1 if order == "desc" else 1 + + # Execute query with pagination + cursor = self.collection.find(query)\ + .sort(sort_by, sort_order)\ + .skip(offset)\ + .limit(limit) + + files = [] + async for doc in cursor: + doc.pop("_id", None) + files.append(doc) + + return { + "files": files, + "total": total, + "limit": limit, + "offset": offset, + "has_more": (offset + limit) < total + } + + except Exception as e: + logger.error(f"Failed to list files: {e}") + raise + + async def increment_download_count(self, file_id: str): + """Increment download counter for a file""" + try: + await self.collection.update_one( + {"id": file_id}, + { + "$inc": {"download_count": 1}, + "$set": {"last_accessed": datetime.now()} + } + ) + + except Exception as e: + logger.error(f"Failed to increment download count: {e}") + + async def get_storage_stats(self) -> Dict[str, Any]: + """Get storage statistics""" + try: + # Aggregation pipeline for statistics + pipeline = [ + {"$match": {"status": {"$ne": FileStatus.DELETED.value}}}, + { + "$group": { + "_id": None, + "total_files": {"$sum": 1}, + "total_size": {"$sum": "$size"}, + "users": {"$addToSet": "$user_id"} + } + } + ] + + cursor = self.collection.aggregate(pipeline) + result = await cursor.to_list(length=1) + + if result: + stats = result[0] + users_count = len(stats.get("users", [])) + else: + stats = {"total_files": 0, "total_size": 0} + users_count = 0 + + # Get file type distribution + type_pipeline = [ + {"$match": {"status": {"$ne": FileStatus.DELETED.value}}}, + { + "$group": { + "_id": "$file_type", + "count": {"$sum": 1} + } + } + ] + + type_cursor = self.collection.aggregate(type_pipeline) + type_results = await type_cursor.to_list(length=None) + + file_types = { + item["_id"]: item["count"] + for item in type_results if item["_id"] + } + + return { + "total_files": stats.get("total_files", 0), + "total_size": stats.get("total_size", 0), + "users_count": users_count, + "file_types": file_types + } + + except Exception as e: + logger.error(f"Failed to get storage stats: {e}") + raise + + async def find_duplicate_files(self, file_hash: str) -> List[Dict[str, Any]]: + """Find duplicate files by hash""" + try: + cursor = self.collection.find({ + "hash": file_hash, + "status": {"$ne": FileStatus.DELETED.value} + }) + + duplicates = [] + async for doc in cursor: + doc.pop("_id", None) + duplicates.append(doc) + + return duplicates + + except Exception as e: + logger.error(f"Failed to find duplicate files: {e}") + raise + + async def get_user_storage_usage(self, user_id: str) -> Dict[str, Any]: + """Get storage usage for a specific user""" + try: + pipeline = [ + { + "$match": { + "user_id": user_id, + "status": {"$ne": FileStatus.DELETED.value} + } + }, + { + "$group": { + "_id": "$file_type", + "count": {"$sum": 1}, + "size": {"$sum": "$size"} + } + } + ] + + cursor = self.collection.aggregate(pipeline) + results = await cursor.to_list(length=None) + + total_size = sum(item["size"] for item in results) + total_files = sum(item["count"] for item in results) + + breakdown = { + item["_id"]: { + "count": item["count"], + "size": item["size"] + } + for item in results if item["_id"] + } + + return { + "user_id": user_id, + "total_files": total_files, + "total_size": total_size, + "breakdown": breakdown + } + + except Exception as e: + logger.error(f"Failed to get user storage usage: {e}") + raise + + async def close(self): + """Close MongoDB connection""" + if self.client: + self.client.close() + self.is_connected = False + logger.info("MongoDB connection closed") \ No newline at end of file diff --git a/services/files/backend/minio_client.py b/services/files/backend/minio_client.py new file mode 100644 index 0000000..c10555b --- /dev/null +++ b/services/files/backend/minio_client.py @@ -0,0 +1,333 @@ +""" +MinIO Client for S3-compatible object storage +""" +from minio import Minio +from minio.error import S3Error +import asyncio +import io +from typing import Optional, Dict, Any, List +import logging +from datetime import timedelta + +logger = logging.getLogger(__name__) + +class MinIOManager: + def __init__(self, endpoint: str, access_key: str, secret_key: str, secure: bool = False): + self.endpoint = endpoint + self.access_key = access_key + self.secret_key = secret_key + self.secure = secure + self.client = None + self.is_connected = False + + async def initialize(self): + """Initialize MinIO client and create default buckets""" + try: + self.client = Minio( + self.endpoint, + access_key=self.access_key, + secret_key=self.secret_key, + secure=self.secure + ) + + # Create default buckets + default_buckets = ["default", "public", "thumbnails", "temp"] + for bucket in default_buckets: + await self.create_bucket(bucket, public=(bucket == "public")) + + self.is_connected = True + logger.info(f"Connected to MinIO at {self.endpoint}") + + except Exception as e: + logger.error(f"Failed to initialize MinIO: {e}") + self.is_connected = False + raise + + async def create_bucket(self, bucket_name: str, public: bool = False): + """Create a new bucket""" + try: + # Run in executor to avoid blocking + loop = asyncio.get_event_loop() + + # Check if bucket exists + exists = await loop.run_in_executor( + None, + self.client.bucket_exists, + bucket_name + ) + + if not exists: + await loop.run_in_executor( + None, + self.client.make_bucket, + bucket_name + ) + logger.info(f"Created bucket: {bucket_name}") + + # Set bucket policy if public + if public: + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"AWS": ["*"]}, + "Action": ["s3:GetObject"], + "Resource": [f"arn:aws:s3:::{bucket_name}/*"] + } + ] + } + import json + await loop.run_in_executor( + None, + self.client.set_bucket_policy, + bucket_name, + json.dumps(policy) + ) + logger.info(f"Set public policy for bucket: {bucket_name}") + + except Exception as e: + logger.error(f"Failed to create bucket {bucket_name}: {e}") + raise + + async def upload_file(self, bucket: str, object_name: str, file_data: bytes, + content_type: str = "application/octet-stream", + metadata: Optional[Dict[str, str]] = None): + """Upload a file to MinIO""" + try: + loop = asyncio.get_event_loop() + + # Convert bytes to BytesIO + file_stream = io.BytesIO(file_data) + length = len(file_data) + + # Upload file + result = await loop.run_in_executor( + None, + self.client.put_object, + bucket, + object_name, + file_stream, + length, + content_type, + metadata + ) + + logger.info(f"Uploaded {object_name} to {bucket}") + return { + "bucket": bucket, + "object_name": object_name, + "etag": result.etag, + "version_id": result.version_id + } + + except Exception as e: + logger.error(f"Failed to upload file: {e}") + raise + + async def get_file(self, bucket: str, object_name: str) -> io.BytesIO: + """Get a file from MinIO""" + try: + loop = asyncio.get_event_loop() + + # Get object + response = await loop.run_in_executor( + None, + self.client.get_object, + bucket, + object_name + ) + + # Read data + data = response.read() + response.close() + response.release_conn() + + return io.BytesIO(data) + + except Exception as e: + logger.error(f"Failed to get file: {e}") + raise + + async def delete_file(self, bucket: str, object_name: str): + """Delete a file from MinIO""" + try: + loop = asyncio.get_event_loop() + + await loop.run_in_executor( + None, + self.client.remove_object, + bucket, + object_name + ) + + logger.info(f"Deleted {object_name} from {bucket}") + + except Exception as e: + logger.error(f"Failed to delete file: {e}") + raise + + async def list_files(self, bucket: str, prefix: Optional[str] = None, + recursive: bool = True) -> List[Dict[str, Any]]: + """List files in a bucket""" + try: + loop = asyncio.get_event_loop() + + objects = await loop.run_in_executor( + None, + lambda: list(self.client.list_objects( + bucket, + prefix=prefix, + recursive=recursive + )) + ) + + files = [] + for obj in objects: + files.append({ + "name": obj.object_name, + "size": obj.size, + "last_modified": obj.last_modified, + "etag": obj.etag, + "content_type": obj.content_type + }) + + return files + + except Exception as e: + logger.error(f"Failed to list files: {e}") + raise + + async def get_file_info(self, bucket: str, object_name: str) -> Dict[str, Any]: + """Get file information""" + try: + loop = asyncio.get_event_loop() + + stat = await loop.run_in_executor( + None, + self.client.stat_object, + bucket, + object_name + ) + + return { + "size": stat.size, + "etag": stat.etag, + "content_type": stat.content_type, + "last_modified": stat.last_modified, + "metadata": stat.metadata + } + + except Exception as e: + logger.error(f"Failed to get file info: {e}") + raise + + async def generate_presigned_download_url(self, bucket: str, object_name: str, + expires_in: int = 3600) -> str: + """Generate a presigned URL for downloading""" + try: + loop = asyncio.get_event_loop() + + url = await loop.run_in_executor( + None, + self.client.presigned_get_object, + bucket, + object_name, + timedelta(seconds=expires_in) + ) + + return url + + except Exception as e: + logger.error(f"Failed to generate presigned URL: {e}") + raise + + async def generate_presigned_upload_url(self, bucket: str, object_name: str, + expires_in: int = 3600) -> str: + """Generate a presigned URL for uploading""" + try: + loop = asyncio.get_event_loop() + + url = await loop.run_in_executor( + None, + self.client.presigned_put_object, + bucket, + object_name, + timedelta(seconds=expires_in) + ) + + return url + + except Exception as e: + logger.error(f"Failed to generate presigned upload URL: {e}") + raise + + async def copy_file(self, source_bucket: str, source_object: str, + dest_bucket: str, dest_object: str): + """Copy a file within MinIO""" + try: + loop = asyncio.get_event_loop() + + await loop.run_in_executor( + None, + self.client.copy_object, + dest_bucket, + dest_object, + f"/{source_bucket}/{source_object}" + ) + + logger.info(f"Copied {source_object} to {dest_object}") + + except Exception as e: + logger.error(f"Failed to copy file: {e}") + raise + + async def list_buckets(self) -> List[str]: + """List all buckets""" + try: + loop = asyncio.get_event_loop() + + buckets = await loop.run_in_executor( + None, + self.client.list_buckets + ) + + return [bucket.name for bucket in buckets] + + except Exception as e: + logger.error(f"Failed to list buckets: {e}") + raise + + async def get_storage_stats(self) -> Dict[str, Any]: + """Get storage statistics""" + try: + buckets = await self.list_buckets() + + stats = { + "buckets": buckets, + "bucket_count": len(buckets), + "bucket_stats": {} + } + + # Get stats for each bucket + for bucket in buckets: + files = await self.list_files(bucket) + total_size = sum(f["size"] for f in files) + stats["bucket_stats"][bucket] = { + "file_count": len(files), + "total_size": total_size + } + + return stats + + except Exception as e: + logger.error(f"Failed to get storage stats: {e}") + raise + + async def check_file_exists(self, bucket: str, object_name: str) -> bool: + """Check if a file exists""" + try: + await self.get_file_info(bucket, object_name) + return True + except: + return False \ No newline at end of file diff --git a/services/files/backend/models.py b/services/files/backend/models.py new file mode 100644 index 0000000..4e11235 --- /dev/null +++ b/services/files/backend/models.py @@ -0,0 +1,112 @@ +""" +Data models for File Management Service +""" +from pydantic import BaseModel, Field +from datetime import datetime +from typing import Optional, List, Dict, Any +from enum import Enum + +class FileType(str, Enum): + IMAGE = "image" + VIDEO = "video" + AUDIO = "audio" + DOCUMENT = "document" + ARCHIVE = "archive" + OTHER = "other" + +class FileStatus(str, Enum): + PENDING = "pending" + PROCESSING = "processing" + READY = "ready" + ERROR = "error" + DELETED = "deleted" + +class FileMetadata(BaseModel): + id: str + filename: str + original_name: str + size: int + content_type: str + file_type: FileType + bucket: str + object_name: str + user_id: str + hash: str + status: FileStatus = FileStatus.READY + public: bool = False + has_thumbnail: bool = False + thumbnail_url: Optional[str] = None + tags: Dict[str, Any] = {} + metadata: Dict[str, Any] = {} + download_count: int = 0 + created_at: datetime + updated_at: datetime + deleted_at: Optional[datetime] = None + +class FileUploadResponse(BaseModel): + file_id: str + filename: str + size: int + content_type: str + file_type: FileType + bucket: str + public: bool + has_thumbnail: bool + thumbnail_url: Optional[str] = None + download_url: Optional[str] = None + created_at: datetime + message: str = "File uploaded successfully" + +class FileListResponse(BaseModel): + files: List[FileMetadata] + total: int + limit: int + offset: int + has_more: bool + +class StorageStats(BaseModel): + total_files: int + total_size: int + buckets: List[str] + users_count: int + file_types: Dict[str, int] + storage_used_percentage: Optional[float] = None + +class ThumbnailRequest(BaseModel): + file_id: str + width: int = Field(200, ge=50, le=1000) + height: int = Field(200, ge=50, le=1000) + quality: int = Field(85, ge=50, le=100) + format: str = Field("jpeg", pattern="^(jpeg|png|webp)$") + +class PresignedUrlResponse(BaseModel): + url: str + expires_in: int + method: str + headers: Optional[Dict[str, str]] = None + +class BatchOperationResult(BaseModel): + successful: List[str] + failed: List[Dict[str, str]] + total_processed: int + total_successful: int + total_failed: int + +class FileShareLink(BaseModel): + share_url: str + expires_in: int + file_id: str + filename: str + created_at: datetime + expires_at: datetime + +class FileProcessingJob(BaseModel): + job_id: str + file_id: str + job_type: str # thumbnail, compress, convert, etc. + status: str # pending, processing, completed, failed + progress: Optional[float] = None + result: Optional[Dict[str, Any]] = None + error: Optional[str] = None + created_at: datetime + completed_at: Optional[datetime] = None \ No newline at end of file diff --git a/services/files/backend/requirements.txt b/services/files/backend/requirements.txt new file mode 100644 index 0000000..6e3abc6 --- /dev/null +++ b/services/files/backend/requirements.txt @@ -0,0 +1,11 @@ +fastapi==0.109.0 +uvicorn[standard]==0.27.0 +pydantic==2.5.3 +python-dotenv==1.0.0 +motor==3.5.1 +pymongo==4.6.1 +minio==7.2.3 +pillow==10.2.0 +python-magic==0.4.27 +aiofiles==23.2.1 +python-multipart==0.0.6 \ No newline at end of file diff --git a/services/files/backend/test_files.py b/services/files/backend/test_files.py new file mode 100755 index 0000000..b590eeb --- /dev/null +++ b/services/files/backend/test_files.py @@ -0,0 +1,281 @@ +#!/usr/bin/env python3 +""" +Test script for File Management Service +""" +import asyncio +import httpx +import os +import json +from datetime import datetime +import base64 + +BASE_URL = "http://localhost:8014" + +# Sample image for testing (1x1 pixel PNG) +TEST_IMAGE_BASE64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==" +TEST_IMAGE_DATA = base64.b64decode(TEST_IMAGE_BASE64) + +async def test_file_api(): + """Test file management API endpoints""" + async with httpx.AsyncClient() as client: + print("\n📁 Testing File Management Service API...") + + # Test health check + print("\n1. Testing health check...") + response = await client.get(f"{BASE_URL}/health") + print(f"Health check: {response.json()}") + + # Test file upload + print("\n2. Testing file upload...") + files = { + 'file': ('test_image.png', TEST_IMAGE_DATA, 'image/png') + } + data = { + 'user_id': 'test_user_123', + 'bucket': 'default', + 'public': 'false', + 'generate_thumbnail': 'true', + 'tags': json.dumps({"test": "true", "category": "sample"}) + } + + response = await client.post( + f"{BASE_URL}/api/files/upload", + files=files, + data=data + ) + + if response.status_code == 200: + upload_result = response.json() + print(f"File uploaded: {upload_result}") + file_id = upload_result.get("file_id") + else: + print(f"Upload failed: {response.status_code} - {response.text}") + file_id = None + + # Test multiple file upload + print("\n3. Testing multiple file upload...") + files = [ + ('files', ('test1.png', TEST_IMAGE_DATA, 'image/png')), + ('files', ('test2.png', TEST_IMAGE_DATA, 'image/png')), + ('files', ('test3.png', TEST_IMAGE_DATA, 'image/png')) + ] + data = { + 'user_id': 'test_user_123', + 'bucket': 'default', + 'public': 'false' + } + + response = await client.post( + f"{BASE_URL}/api/files/upload-multiple", + files=files, + data=data + ) + + if response.status_code == 200: + print(f"Multiple files uploaded: {response.json()}") + else: + print(f"Multiple upload failed: {response.status_code}") + + # Test file metadata retrieval + if file_id: + print("\n4. Testing file metadata retrieval...") + response = await client.get(f"{BASE_URL}/api/files/{file_id}/metadata") + if response.status_code == 200: + print(f"File metadata: {response.json()}") + else: + print(f"Metadata retrieval failed: {response.status_code}") + + # Test thumbnail generation + print("\n5. Testing thumbnail retrieval...") + response = await client.get( + f"{BASE_URL}/api/files/{file_id}/thumbnail", + params={"width": 100, "height": 100} + ) + if response.status_code == 200: + print(f"Thumbnail retrieved: {len(response.content)} bytes") + else: + print(f"Thumbnail retrieval failed: {response.status_code}") + + # Test file download + print("\n6. Testing file download...") + response = await client.get(f"{BASE_URL}/api/files/{file_id}/download") + if response.status_code == 200: + print(f"File downloaded: {len(response.content)} bytes") + else: + print(f"Download failed: {response.status_code}") + + # Test share link generation + print("\n7. Testing share link generation...") + response = await client.get( + f"{BASE_URL}/api/files/{file_id}/share", + params={"expires_in": 3600} + ) + if response.status_code == 200: + share_result = response.json() + print(f"Share link generated: {share_result.get('share_url', 'N/A')[:50]}...") + else: + print(f"Share link generation failed: {response.status_code}") + + # Test file listing + print("\n8. Testing file listing...") + response = await client.get( + f"{BASE_URL}/api/files", + params={ + "user_id": "test_user_123", + "limit": 10, + "offset": 0 + } + ) + if response.status_code == 200: + files_list = response.json() + print(f"Files found: {files_list.get('total', 0)} files") + else: + print(f"File listing failed: {response.status_code}") + + # Test storage statistics + print("\n9. Testing storage statistics...") + response = await client.get(f"{BASE_URL}/api/storage/stats") + if response.status_code == 200: + stats = response.json() + print(f"Storage stats: {stats}") + else: + print(f"Storage stats failed: {response.status_code}") + + # Test bucket operations + print("\n10. Testing bucket operations...") + response = await client.post( + f"{BASE_URL}/api/storage/buckets", + params={"bucket_name": "test-bucket", "public": False} + ) + if response.status_code == 200: + print(f"Bucket created: {response.json()}") + else: + print(f"Bucket creation: {response.status_code}") + + response = await client.get(f"{BASE_URL}/api/storage/buckets") + if response.status_code == 200: + print(f"Available buckets: {response.json()}") + else: + print(f"Bucket listing failed: {response.status_code}") + + # Test presigned URL generation + print("\n11. Testing presigned URL generation...") + response = await client.post( + f"{BASE_URL}/api/files/presigned-upload", + params={ + "filename": "test_upload.txt", + "content_type": "text/plain", + "bucket": "default", + "expires_in": 3600 + } + ) + if response.status_code == 200: + presigned = response.json() + print(f"Presigned upload URL generated: {presigned.get('upload_url', 'N/A')[:50]}...") + else: + print(f"Presigned URL generation failed: {response.status_code}") + + # Test file deletion + if file_id: + print("\n12. Testing file deletion...") + response = await client.delete( + f"{BASE_URL}/api/files/{file_id}", + params={"user_id": "test_user_123"} + ) + if response.status_code == 200: + print(f"File deleted: {response.json()}") + else: + print(f"File deletion failed: {response.status_code}") + +async def test_large_file_upload(): + """Test large file upload""" + print("\n\n📦 Testing Large File Upload...") + + # Create a larger test file (1MB) + large_data = b"x" * (1024 * 1024) # 1MB of data + + async with httpx.AsyncClient(timeout=30.0) as client: + files = { + 'file': ('large_test.bin', large_data, 'application/octet-stream') + } + data = { + 'user_id': 'test_user_123', + 'bucket': 'default', + 'public': 'false' + } + + print("Uploading 1MB file...") + response = await client.post( + f"{BASE_URL}/api/files/upload", + files=files, + data=data + ) + + if response.status_code == 200: + result = response.json() + print(f"Large file uploaded successfully: {result.get('file_id')}") + print(f"File size: {result.get('size')} bytes") + else: + print(f"Large file upload failed: {response.status_code}") + +async def test_duplicate_detection(): + """Test duplicate file detection""" + print("\n\n🔍 Testing Duplicate Detection...") + + async with httpx.AsyncClient() as client: + # Upload the same file twice + files = { + 'file': ('duplicate_test.png', TEST_IMAGE_DATA, 'image/png') + } + data = { + 'user_id': 'test_user_123', + 'bucket': 'default', + 'public': 'false' + } + + print("Uploading file first time...") + response1 = await client.post( + f"{BASE_URL}/api/files/upload", + files=files, + data=data + ) + + if response1.status_code == 200: + result1 = response1.json() + print(f"First upload: {result1.get('file_id')}") + + print("Uploading same file again...") + response2 = await client.post( + f"{BASE_URL}/api/files/upload", + files=files, + data=data + ) + + if response2.status_code == 200: + result2 = response2.json() + print(f"Second upload: {result2.get('file_id')}") + + if result2.get('duplicate'): + print("✅ Duplicate detected successfully!") + else: + print("❌ Duplicate not detected") + +async def main(): + """Run all tests""" + print("=" * 60) + print("FILE MANAGEMENT SERVICE TEST SUITE") + print("=" * 60) + print(f"Started at: {datetime.now().isoformat()}") + + # Run tests + await test_file_api() + await test_large_file_upload() + await test_duplicate_detection() + + print("\n" + "=" * 60) + print("✅ All tests completed!") + print(f"Finished at: {datetime.now().isoformat()}") + print("=" * 60) + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/services/files/backend/thumbnail_generator.py b/services/files/backend/thumbnail_generator.py new file mode 100644 index 0000000..0b03de7 --- /dev/null +++ b/services/files/backend/thumbnail_generator.py @@ -0,0 +1,236 @@ +""" +Thumbnail Generator for image and video files +""" +from PIL import Image, ImageOps +import io +import os +import hashlib +import logging +from typing import Optional, Tuple +import asyncio +from pathlib import Path + +logger = logging.getLogger(__name__) + +class ThumbnailGenerator: + def __init__(self, minio_client, cache_dir: str = "/tmp/thumbnails"): + self.minio_client = minio_client + self.cache_dir = Path(cache_dir) + self.cache_dir.mkdir(parents=True, exist_ok=True) + + # Supported image formats for thumbnail generation + self.supported_formats = { + 'image/jpeg', 'image/jpg', 'image/png', 'image/gif', + 'image/webp', 'image/bmp', 'image/tiff' + } + + def _get_cache_path(self, file_id: str, width: int, height: int) -> Path: + """Generate cache file path for thumbnail""" + cache_key = f"{file_id}_{width}x{height}" + cache_hash = hashlib.md5(cache_key.encode()).hexdigest() + return self.cache_dir / f"{cache_hash[:2]}" / f"{cache_hash}.jpg" + + async def generate_thumbnail(self, file_data: bytes, content_type: str, + width: int = 200, height: int = 200) -> Optional[bytes]: + """Generate a thumbnail from file data""" + try: + if content_type not in self.supported_formats: + logger.warning(f"Unsupported format for thumbnail: {content_type}") + return None + + loop = asyncio.get_event_loop() + + # Generate thumbnail in thread pool + thumbnail_data = await loop.run_in_executor( + None, + self._create_thumbnail, + file_data, + width, + height + ) + + return thumbnail_data + + except Exception as e: + logger.error(f"Failed to generate thumbnail: {e}") + return None + + def _create_thumbnail(self, file_data: bytes, width: int, height: int) -> bytes: + """Create thumbnail using PIL""" + try: + # Open image + image = Image.open(io.BytesIO(file_data)) + + # Convert RGBA to RGB if necessary + if image.mode in ('RGBA', 'LA', 'P'): + # Create a white background + background = Image.new('RGB', image.size, (255, 255, 255)) + if image.mode == 'P': + image = image.convert('RGBA') + background.paste(image, mask=image.split()[-1] if image.mode == 'RGBA' else None) + image = background + elif image.mode not in ('RGB', 'L'): + image = image.convert('RGB') + + # Calculate thumbnail size maintaining aspect ratio + image.thumbnail((width, height), Image.Resampling.LANCZOS) + + # Apply EXIF orientation if present + image = ImageOps.exif_transpose(image) + + # Save thumbnail to bytes + output = io.BytesIO() + image.save(output, format='JPEG', quality=85, optimize=True) + output.seek(0) + + return output.read() + + except Exception as e: + logger.error(f"Thumbnail creation failed: {e}") + raise + + async def get_thumbnail(self, file_id: str, bucket: str, object_name: str, + width: int = 200, height: int = 200) -> Optional[bytes]: + """Get or generate thumbnail for a file""" + try: + # Check cache first + cache_path = self._get_cache_path(file_id, width, height) + + if cache_path.exists(): + logger.info(f"Thumbnail found in cache: {cache_path}") + with open(cache_path, 'rb') as f: + return f.read() + + # Check if thumbnail exists in MinIO + thumbnail_object = f"thumbnails/{file_id}_{width}x{height}.jpg" + try: + thumbnail_stream = await self.minio_client.get_file( + bucket="thumbnails", + object_name=thumbnail_object + ) + thumbnail_data = thumbnail_stream.read() + + # Save to cache + await self._save_to_cache(cache_path, thumbnail_data) + + return thumbnail_data + except: + pass # Thumbnail doesn't exist, generate it + + # Get original file + file_stream = await self.minio_client.get_file(bucket, object_name) + file_data = file_stream.read() + + # Get file info for content type + file_info = await self.minio_client.get_file_info(bucket, object_name) + content_type = file_info.get("content_type", "") + + # Generate thumbnail + thumbnail_data = await self.generate_thumbnail( + file_data, content_type, width, height + ) + + if thumbnail_data: + # Save to MinIO + await self.minio_client.upload_file( + bucket="thumbnails", + object_name=thumbnail_object, + file_data=thumbnail_data, + content_type="image/jpeg" + ) + + # Save to cache + await self._save_to_cache(cache_path, thumbnail_data) + + return thumbnail_data + + except Exception as e: + logger.error(f"Failed to get thumbnail: {e}") + return None + + async def _save_to_cache(self, cache_path: Path, data: bytes): + """Save thumbnail to cache""" + try: + cache_path.parent.mkdir(parents=True, exist_ok=True) + + loop = asyncio.get_event_loop() + await loop.run_in_executor( + None, + lambda: cache_path.write_bytes(data) + ) + + logger.info(f"Thumbnail saved to cache: {cache_path}") + + except Exception as e: + logger.error(f"Failed to save to cache: {e}") + + async def delete_thumbnail(self, file_id: str): + """Delete all thumbnails for a file""" + try: + # Delete from cache + for cache_file in self.cache_dir.rglob(f"*{file_id}*"): + try: + cache_file.unlink() + logger.info(f"Deleted cache file: {cache_file}") + except: + pass + + # Delete from MinIO (list and delete all sizes) + files = await self.minio_client.list_files( + bucket="thumbnails", + prefix=f"thumbnails/{file_id}_" + ) + + for file in files: + await self.minio_client.delete_file( + bucket="thumbnails", + object_name=file["name"] + ) + logger.info(f"Deleted thumbnail: {file['name']}") + + except Exception as e: + logger.error(f"Failed to delete thumbnails: {e}") + + async def generate_multiple_sizes(self, file_data: bytes, content_type: str, + file_id: str) -> dict: + """Generate thumbnails in multiple sizes""" + sizes = { + "small": (150, 150), + "medium": (300, 300), + "large": (600, 600) + } + + results = {} + + for size_name, (width, height) in sizes.items(): + thumbnail = await self.generate_thumbnail( + file_data, content_type, width, height + ) + + if thumbnail: + # Save to MinIO + object_name = f"thumbnails/{file_id}_{size_name}.jpg" + await self.minio_client.upload_file( + bucket="thumbnails", + object_name=object_name, + file_data=thumbnail, + content_type="image/jpeg" + ) + + results[size_name] = { + "size": len(thumbnail), + "dimensions": f"{width}x{height}", + "object_name": object_name + } + + return results + + def clear_cache(self): + """Clear thumbnail cache""" + try: + import shutil + shutil.rmtree(self.cache_dir) + self.cache_dir.mkdir(parents=True, exist_ok=True) + logger.info("Thumbnail cache cleared") + except Exception as e: + logger.error(f"Failed to clear cache: {e}") \ No newline at end of file diff --git a/services/images/backend/Dockerfile b/services/images/backend/Dockerfile new file mode 100644 index 0000000..0d3642a --- /dev/null +++ b/services/images/backend/Dockerfile @@ -0,0 +1,26 @@ +FROM python:3.11-slim + +WORKDIR /app + +# 시스템 패키지 설치 +RUN apt-get update && apt-get install -y \ + gcc \ + libheif-dev \ + libde265-dev \ + libjpeg-dev \ + libpng-dev \ + && rm -rf /var/lib/apt/lists/* + +# Python 패키지 설치 +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# 애플리케이션 코드 복사 +COPY . . + +# 캐시 디렉토리 생성 +RUN mkdir -p /app/cache + +EXPOSE 8000 + +CMD ["python", "main.py"] \ No newline at end of file diff --git a/services/images/backend/app/__init__.py b/services/images/backend/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/services/images/backend/app/api/endpoints.py b/services/images/backend/app/api/endpoints.py new file mode 100644 index 0000000..fb78cbe --- /dev/null +++ b/services/images/backend/app/api/endpoints.py @@ -0,0 +1,197 @@ +from fastapi import APIRouter, Query, HTTPException, Body +from fastapi.responses import Response +from typing import Optional, Dict +import mimetypes +from pathlib import Path +import hashlib + +from ..core.config import settings + +# MinIO 사용 여부에 따라 적절한 캐시 모듈 선택 +if settings.use_minio: + from ..core.minio_cache import cache +else: + from ..core.cache import cache + +router = APIRouter() + +@router.get("/image") +async def get_image( + url: str = Query(..., description="원본 이미지 URL"), + size: Optional[str] = Query(None, description="이미지 크기 (thumb, card, list, detail, hero)") +): + """ + 이미지 프록시 엔드포인트 + + - 외부 URL의 이미지를 가져와서 캐싱 + - 선택적으로 리사이징 및 최적화 + - WebP 포맷으로 자동 변환 (설정에 따라) + """ + try: + # 캐시 확인 + cached_data = await cache.get(url, size) + + if cached_data: + # 캐시된 이미지 반환 + # SVG 체크 + if url.lower().endswith('.svg') or cache._is_svg(cached_data): + content_type = 'image/svg+xml' + # GIF 체크 (GIF는 WebP로 변환하지 않음) + elif url.lower().endswith('.gif'): + content_type = 'image/gif' + # WebP 변환이 활성화된 경우 항상 WebP로 제공 (GIF 제외) + elif settings.convert_to_webp and size: + content_type = 'image/webp' + else: + content_type = mimetypes.guess_type(url)[0] or 'image/jpeg' + return Response( + content=cached_data, + media_type=content_type, + headers={ + "Cache-Control": f"public, max-age={86400 * 7}", # 7일 브라우저 캐시 + "X-Cache": "HIT", + "X-Image-Format": content_type.split('/')[-1].upper() + } + ) + + # 캐시 미스 - 이미지 다운로드 + image_data = await cache.download_image(url) + + # URL에서 MIME 타입 추측 + guessed_type = mimetypes.guess_type(url)[0] + + # SVG 확장자 체크 (mimetypes가 SVG를 제대로 인식하지 못할 수 있음) + if url.lower().endswith('.svg') or cache._is_svg(image_data): + content_type = 'image/svg+xml' + # GIF 체크 + elif url.lower().endswith('.gif') or (guessed_type and 'gif' in guessed_type.lower()): + content_type = 'image/gif' + else: + content_type = guessed_type or 'image/jpeg' + + # 리사이징 및 최적화 (SVG와 GIF는 특별 처리) + if size and content_type != 'image/svg+xml': + # GIF는 특별 처리 + if content_type == 'image/gif': + image_data, content_type = cache._process_gif(image_data, settings.thumbnail_sizes[size]) + else: + image_data, content_type = cache.resize_and_optimize_image(image_data, size) + + # 캐시에 저장 + await cache.set(url, image_data, size) + + # 백그라운드에서 다른 크기들도 생성하도록 트리거 + await cache.trigger_background_generation(url) + + # 이미지 반환 + return Response( + content=image_data, + media_type=content_type, + headers={ + "Cache-Control": f"public, max-age={86400 * 7}", + "X-Cache": "MISS", + "X-Image-Format": content_type.split('/')[-1].upper() + } + ) + + except HTTPException: + raise + except Exception as e: + import traceback + print(f"Error processing image from {url}: {str(e)}") + traceback.print_exc() + + # 403 에러를 명확히 처리 + if "403" in str(e): + raise HTTPException( + status_code=403, + detail=f"이미지 접근 거부됨: {url}" + ) + + raise HTTPException( + status_code=500, + detail=f"이미지 처리 실패: {str(e)}" + ) + +@router.get("/stats") +async def get_stats(): + """캐시 통계 정보""" + cache_size = await cache.get_cache_size() + + # 디렉토리 구조 통계 추가 (MinIO 또는 파일시스템) + dir_stats = await cache.get_directory_stats() + + return { + "cache_size_gb": round(cache_size, 2), + "max_cache_size_gb": settings.max_cache_size_gb, + "cache_usage_percent": round((cache_size / settings.max_cache_size_gb) * 100, 2), + "directory_stats": dir_stats + } + +@router.post("/cleanup") +async def cleanup_cache(): + """오래된 캐시 정리""" + await cache.cleanup_old_cache() + + return {"message": "캐시 정리 완료"} + +@router.post("/cache/delete") +async def delete_cache(request: Dict = Body(...)): + """특정 URL의 캐시 삭제""" + url = request.get("url") + if not url: + raise HTTPException(status_code=400, detail="URL이 필요합니다") + + try: + # URL의 모든 크기 버전 삭제 + sizes = ["thumb", "card", "list", "detail", "hero", None] # None은 원본 + deleted_count = 0 + + for size in sizes: + # 캐시 경로 계산 + url_hash = hashlib.md5(url.encode()).hexdigest() + + # 3단계 디렉토리 구조 + level1 = url_hash[:2] + level2 = url_hash[2:4] + level3 = url_hash[4:6] + + # 크기별 파일명 + if size: + patterns = [ + f"{url_hash}_{size}.webp", + f"{url_hash}_{size}.jpg", + f"{url_hash}_{size}.jpeg", + f"{url_hash}_{size}.png", + f"{url_hash}_{size}.gif" + ] + else: + patterns = [ + f"{url_hash}", + f"{url_hash}.jpg", + f"{url_hash}.jpeg", + f"{url_hash}.png", + f"{url_hash}.gif", + f"{url_hash}.webp" + ] + + # 각 패턴에 대해 파일 삭제 시도 + for filename in patterns: + cache_path = settings.cache_dir / level1 / level2 / level3 / filename + if cache_path.exists(): + cache_path.unlink() + deleted_count += 1 + print(f"✅ 캐시 파일 삭제: {cache_path}") + + return { + "status": "success", + "message": f"{deleted_count}개의 캐시 파일이 삭제되었습니다", + "url": url + } + + except Exception as e: + print(f"❌ 캐시 삭제 오류: {e}") + raise HTTPException( + status_code=500, + detail=f"캐시 삭제 실패: {str(e)}" + ) \ No newline at end of file diff --git a/services/images/backend/app/core/background_tasks.py b/services/images/backend/app/core/background_tasks.py new file mode 100644 index 0000000..f5ec0f2 --- /dev/null +++ b/services/images/backend/app/core/background_tasks.py @@ -0,0 +1,91 @@ +import asyncio +import logging +from typing import Set, Optional +from pathlib import Path + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class BackgroundTaskManager: + """백그라운드 작업 관리자""" + + def __init__(self): + self.processing_urls: Set[str] = set() # 현재 처리 중인 URL 목록 + self.task_queue: asyncio.Queue = None + self.worker_task: Optional[asyncio.Task] = None + + async def start(self): + """백그라운드 워커 시작""" + self.task_queue = asyncio.Queue(maxsize=100) + self.worker_task = asyncio.create_task(self._worker()) + logger.info("백그라운드 작업 관리자 시작됨") + + async def stop(self): + """백그라운드 워커 정지""" + if self.worker_task: + self.worker_task.cancel() + try: + await self.worker_task + except asyncio.CancelledError: + pass + logger.info("백그라운드 작업 관리자 정지됨") + + async def add_task(self, url: str): + """작업 큐에 URL 추가""" + if url not in self.processing_urls and self.task_queue: + try: + self.processing_urls.add(url) + await self.task_queue.put(url) + logger.info(f"백그라운드 작업 추가: {url}") + except asyncio.QueueFull: + self.processing_urls.discard(url) + logger.warning(f"작업 큐가 가득 참: {url}") + + async def _worker(self): + """백그라운드 워커 - 큐에서 작업을 가져와 처리""" + from .cache import cache + + while True: + try: + # 큐에서 URL 가져오기 + url = await self.task_queue.get() + + try: + # 원본 이미지가 캐시에 있는지 확인 + original_data = await cache.get(url, None) + + if not original_data: + # 원본 이미지 다운로드 + original_data = await cache.download_image(url) + await cache.set(url, original_data, None) + + # 모든 크기의 이미지 생성 + sizes = ['thumb', 'card', 'list', 'detail', 'hero'] + for size in sizes: + # 이미 존재하는지 확인 + existing = await cache.get(url, size) + if not existing: + try: + # 리사이징 및 최적화 - cache.resize_and_optimize_image가 WebP를 처리함 + resized_data, _ = cache.resize_and_optimize_image(original_data, size) + await cache.set(url, resized_data, size) + logger.info(f"백그라운드 생성 완료: {url} ({size})") + except Exception as e: + logger.error(f"백그라운드 리사이징 실패: {url} ({size}) - {str(e)}") + import traceback + logger.error(f"Traceback: {traceback.format_exc()}") + + except Exception as e: + logger.error(f"백그라운드 작업 실패: {url} - {str(e)}") + finally: + # 처리 완료된 URL 제거 + self.processing_urls.discard(url) + + except asyncio.CancelledError: + break + except Exception as e: + logger.error(f"백그라운드 워커 오류: {str(e)}") + await asyncio.sleep(1) # 오류 발생 시 잠시 대기 + +# 전역 백그라운드 작업 관리자 +background_manager = BackgroundTaskManager() \ No newline at end of file diff --git a/services/images/backend/app/core/cache.py b/services/images/backend/app/core/cache.py new file mode 100644 index 0000000..e679beb --- /dev/null +++ b/services/images/backend/app/core/cache.py @@ -0,0 +1,796 @@ +import hashlib +import aiofiles +import os +from pathlib import Path +from datetime import datetime, timedelta +from typing import Optional +import httpx +from PIL import Image +try: + from pillow_heif import register_heif_opener, register_avif_opener + register_heif_opener() # HEIF/HEIC 지원 + register_avif_opener() # AVIF 지원 + print("HEIF/AVIF support enabled successfully") +except ImportError: + print("Warning: pillow_heif not installed, HEIF/AVIF support disabled") +import io +import asyncio +import ssl + +from .config import settings + +class ImageCache: + def __init__(self): + self.cache_dir = settings.cache_dir + self.cache_dir.mkdir(parents=True, exist_ok=True) + + def _get_cache_path(self, url: str, size: Optional[str] = None) -> Path: + """URL을 기반으로 캐시 파일 경로 생성""" + # URL을 해시하여 파일명 생성 + url_hash = hashlib.md5(url.encode()).hexdigest() + + # 3단계 디렉토리 구조 생성 + # 예: 10f8a8f96aa1377e86fdbc6bf3c631cf -> 10/f8/a8/ + level1 = url_hash[:2] # 첫 2자리 + level2 = url_hash[2:4] # 다음 2자리 + level3 = url_hash[4:6] # 다음 2자리 + + # 크기별로 다른 파일명 사용 + if size: + filename = f"{url_hash}_{size}" + else: + filename = url_hash + + # 확장자 추출 (WebP로 저장되는 경우 .webp 사용) + if settings.convert_to_webp and size: + filename = f"{filename}.webp" + else: + ext = self._get_extension_from_url(url) + if ext: + filename = f"{filename}.{ext}" + + # 3단계 디렉토리 경로 생성 + path = self.cache_dir / level1 / level2 / level3 / filename + path.parent.mkdir(parents=True, exist_ok=True) + + return path + + def _get_extension_from_url(self, url: str) -> Optional[str]: + """URL에서 파일 확장자 추출""" + path = url.split('?')[0] # 쿼리 파라미터 제거 + parts = path.split('.') + if len(parts) > 1: + ext = parts[-1].lower() + if ext in settings.allowed_formats: + return ext + return None + + def _is_svg(self, data: bytes) -> bool: + """SVG 파일인지 확인""" + # SVG 파일의 시작 부분 확인 + if len(data) < 100: + return False + + # 처음 1000바이트만 확인 (성능 최적화) + header = data[:1000].lower() + + # SVG 시그니처 확인 + svg_signatures = [ + b' tuple[bytes, str]: + """GIF 처리 - JPEG로 변환하여 안정적으로 처리""" + try: + from PIL import Image + + # GIF 열기 + img = Image.open(io.BytesIO(gif_data)) + + # 모든 GIF를 RGB로 변환 (팔레트 모드 문제 해결) + # 팔레트 모드(P)를 RGB로 직접 변환 + if img.mode != 'RGB': + img = img.convert('RGB') + + # 리사이징 + img = img.resize(target_size, Image.Resampling.LANCZOS) + + # JPEG로 저장 (안정적) + output = io.BytesIO() + img.save(output, format='JPEG', quality=85, optimize=True) + return output.getvalue(), 'image/jpeg' + + except Exception as e: + print(f"GIF 처리 중 오류: {e}") + import traceback + traceback.print_exc() + # 오류 발생 시 원본 반환 + return gif_data, 'image/gif' + + async def get(self, url: str, size: Optional[str] = None) -> Optional[bytes]: + """캐시에서 이미지 가져오기""" + cache_path = self._get_cache_path(url, size) + + if cache_path.exists(): + # 캐시 만료 확인 + stat = cache_path.stat() + age = datetime.now() - datetime.fromtimestamp(stat.st_mtime) + + if age < timedelta(days=settings.cache_ttl_days): + async with aiofiles.open(cache_path, 'rb') as f: + return await f.read() + else: + # 만료된 캐시 삭제 + cache_path.unlink() + + return None + + async def set(self, url: str, data: bytes, size: Optional[str] = None): + """캐시에 이미지 저장""" + cache_path = self._get_cache_path(url, size) + + async with aiofiles.open(cache_path, 'wb') as f: + await f.write(data) + + async def download_image(self, url: str) -> bytes: + """외부 URL에서 이미지 다운로드""" + from urllib.parse import urlparse + + # URL에서 도메인 추출 + parsed_url = urlparse(url) + domain = parsed_url.netloc + base_url = f"{parsed_url.scheme}://{parsed_url.netloc}/" + + # 기본 헤더 설정 + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Accept': 'image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', + 'Accept-Language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7', + 'Cache-Control': 'no-cache', + 'Pragma': 'no-cache', + 'Sec-Fetch-Dest': 'image', + 'Sec-Fetch-Mode': 'no-cors', + 'Sec-Fetch-Site': 'cross-site', + 'Referer': base_url # 항상 기본 Referer 설정 + } + + # 특정 사이트별 Referer 오버라이드 + if 'yna.co.kr' in url: + headers['Referer'] = 'https://www.yna.co.kr/' + client = httpx.AsyncClient( + verify=False, # SSL 검증 비활성화 + timeout=30.0, + follow_redirects=True + ) + elif 'investing.com' in url: + headers['Referer'] = 'https://www.investing.com/' + client = httpx.AsyncClient() + elif 'naver.com' in url: + headers['Referer'] = 'https://news.naver.com/' + client = httpx.AsyncClient() + elif 'daum.net' in url: + headers['Referer'] = 'https://news.daum.net/' + client = httpx.AsyncClient() + elif 'chosun.com' in url: + headers['Referer'] = 'https://www.chosun.com/' + client = httpx.AsyncClient() + elif 'vietnam.vn' in url or 'vstatic.vietnam.vn' in url: + headers['Referer'] = 'https://vietnam.vn/' + client = httpx.AsyncClient() + elif 'ddaily.co.kr' in url: + # ddaily는 /photos/ 경로를 사용해야 함 + headers['Referer'] = 'https://www.ddaily.co.kr/' + # URL이 잘못된 경로를 사용하는 경우 수정 + if '/2025/' in url and '/photos/' not in url: + url = url.replace('/2025/', '/photos/2025/') + print(f"Fixed ddaily URL: {url}") + client = httpx.AsyncClient() + else: + # 기본적으로 도메인 기반 Referer 사용 + client = httpx.AsyncClient() + + async with client: + try: + response = await client.get( + url, + headers=headers, + timeout=settings.request_timeout, + follow_redirects=True + ) + response.raise_for_status() + except Exception as e: + # 모든 에러에 대해 Playwright 사용 시도 + error_msg = str(e) + if isinstance(e, httpx.HTTPStatusError): + error_type = f"HTTP {e.response.status_code}" + elif isinstance(e, httpx.ConnectError): + error_type = "Connection Error" + elif isinstance(e, ssl.SSLError): + error_type = "SSL Error" + elif "resolve" in error_msg.lower() or "dns" in error_msg.lower(): + error_type = "DNS Resolution Error" + else: + error_type = "Network Error" + + print(f"{error_type} for {url}, trying with Playwright...") + + # Playwright로 이미지 가져오기 시도 + try: + from playwright.async_api import async_playwright + from PIL import Image + import io + + async with async_playwright() as p: + # 브라우저 실행 + browser = await p.chromium.launch( + headless=True, + args=['--no-sandbox', '--disable-setuid-sandbox'] + ) + + # Referer 설정을 위한 도메인 추출 + from urllib.parse import urlparse + parsed = urlparse(url) + referer_url = f"{parsed.scheme}://{parsed.netloc}/" + + context = await browser.new_context( + viewport={'width': 1920, 'height': 1080}, + user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + extra_http_headers={ + 'Referer': referer_url + } + ) + + page = await context.new_page() + + try: + # Response를 가로채기 위한 설정 + image_data = None + + async def handle_response(response): + nonlocal image_data + # 이미지 URL에 대한 응답 가로채기 + if url in response.url or response.url == url: + try: + image_data = await response.body() + print(f"✅ Image intercepted: {len(image_data)} bytes") + except: + pass + + # Response 이벤트 리스너 등록 + page.on('response', handle_response) + + # 이미지 URL로 이동 (에러 무시) + try: + await page.goto(url, wait_until='networkidle', timeout=30000) + except Exception as goto_error: + print(f"⚠️ Direct navigation failed: {goto_error}") + # 직접 이동 실패 시 HTML에 img 태그 삽입 + await page.set_content(f''' + + + + + + ''') + await page.wait_for_timeout(3000) # 이미지 로딩 대기 + + # 이미지 데이터가 없으면 JavaScript로 직접 fetch + if not image_data: + # JavaScript로 이미지 fetch + image_data_base64 = await page.evaluate(''' + async (url) => { + try { + const response = await fetch(url); + const blob = await response.blob(); + return new Promise((resolve) => { + const reader = new FileReader(); + reader.onloadend = () => resolve(reader.result.split(',')[1]); + reader.readAsDataURL(blob); + }); + } catch (e) { + return null; + } + } + ''', url) + + if image_data_base64: + import base64 + image_data = base64.b64decode(image_data_base64) + print(f"✅ Image fetched via JavaScript: {len(image_data)} bytes") + + # 여전히 데이터가 없으면 스크린샷 사용 + if not image_data: + # 이미지 요소 찾기 + img_element = await page.query_selector('img') + if img_element: + # 이미지가 로드되었는지 확인 + is_loaded = await img_element.evaluate('(img) => img.complete && img.naturalHeight > 0') + if is_loaded: + image_data = await img_element.screenshot() + print(f"✅ Screenshot from loaded image: {len(image_data)} bytes") + else: + # 이미지 로드 대기 + try: + await img_element.evaluate('(img) => new Promise(r => img.onload = r)') + image_data = await img_element.screenshot() + print(f"✅ Screenshot after waiting: {len(image_data)} bytes") + except: + # 전체 페이지 스크린샷 + image_data = await page.screenshot(full_page=True) + print(f"⚠️ Full page screenshot: {len(image_data)} bytes") + else: + image_data = await page.screenshot(full_page=True) + print(f"⚠️ No image element, full screenshot: {len(image_data)} bytes") + + print(f"✅ Successfully fetched image with Playwright: {url}") + return image_data + + finally: + await page.close() + await context.close() + await browser.close() + + except Exception as pw_error: + print(f"Playwright failed: {pw_error}, returning placeholder") + + # Playwright도 실패하면 세련된 placeholder 반환 + from PIL import Image, ImageDraw, ImageFont + import io + import random + + # 그라디언트 배경색 선택 (부드러운 색상) + gradients = [ + ('#667eea', '#764ba2'), # 보라 그라디언트 + ('#f093fb', '#f5576c'), # 핑크 그라디언트 + ('#4facfe', '#00f2fe'), # 하늘색 그라디언트 + ('#43e97b', '#38f9d7'), # 민트 그라디언트 + ('#fa709a', '#fee140'), # 선셋 그라디언트 + ('#30cfd0', '#330867'), # 딥 오션 + ('#a8edea', '#fed6e3'), # 파스텔 + ('#ffecd2', '#fcb69f'), # 피치 + ] + + # 랜덤 그라디언트 선택 + color1, color2 = random.choice(gradients) + + # 이미지 생성 (16:9 비율) + width, height = 800, 450 + img = Image.new('RGB', (width, height)) + draw = ImageDraw.Draw(img) + + # 그라디언트 배경 생성 + def hex_to_rgb(hex_color): + hex_color = hex_color.lstrip('#') + return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4)) + + rgb1 = hex_to_rgb(color1) + rgb2 = hex_to_rgb(color2) + + # 세로 그라디언트 + for y in range(height): + ratio = y / height + r = int(rgb1[0] * (1 - ratio) + rgb2[0] * ratio) + g = int(rgb1[1] * (1 - ratio) + rgb2[1] * ratio) + b = int(rgb1[2] * (1 - ratio) + rgb2[2] * ratio) + draw.rectangle([(0, y), (width, y + 1)], fill=(r, g, b)) + + # 반투명 오버레이 추가 (깊이감) + overlay = Image.new('RGBA', (width, height), (0, 0, 0, 0)) + overlay_draw = ImageDraw.Draw(overlay) + + # 중앙 원형 그라디언트 효과 + center_x, center_y = width // 2, height // 2 + max_radius = min(width, height) // 3 + + for radius in range(max_radius, 0, -2): + opacity = int(255 * (1 - radius / max_radius) * 0.3) + overlay_draw.ellipse( + [(center_x - radius, center_y - radius), + (center_x + radius, center_y + radius)], + fill=(255, 255, 255, opacity) + ) + + # 이미지 아이콘 그리기 (산 모양) + icon_color = (255, 255, 255, 200) + icon_size = 80 + icon_x = center_x + icon_y = center_y - 20 + + # 산 아이콘 (사진 이미지를 나타냄) + mountain_points = [ + (icon_x - icon_size, icon_y + icon_size//2), + (icon_x - icon_size//2, icon_y - icon_size//4), + (icon_x - icon_size//4, icon_y), + (icon_x + icon_size//4, icon_y - icon_size//2), + (icon_x + icon_size, icon_y + icon_size//2), + ] + overlay_draw.polygon(mountain_points, fill=icon_color) + + # 태양/달 원 + sun_radius = icon_size // 4 + overlay_draw.ellipse( + [(icon_x - icon_size//2, icon_y - icon_size//2 - sun_radius), + (icon_x - icon_size//2 + sun_radius*2, icon_y - icon_size//2 + sun_radius)], + fill=icon_color + ) + + # 프레임 테두리 + frame_margin = 40 + overlay_draw.rectangle( + [(frame_margin, frame_margin), + (width - frame_margin, height - frame_margin)], + outline=(255, 255, 255, 150), + width=3 + ) + + # 코너 장식 + corner_size = 20 + corner_width = 4 + corners = [ + (frame_margin, frame_margin), + (width - frame_margin - corner_size, frame_margin), + (frame_margin, height - frame_margin - corner_size), + (width - frame_margin - corner_size, height - frame_margin - corner_size) + ] + + for x, y in corners: + # 가로선 + overlay_draw.rectangle( + [(x, y), (x + corner_size, y + corner_width)], + fill=(255, 255, 255, 200) + ) + # 세로선 + overlay_draw.rectangle( + [(x, y), (x + corner_width, y + corner_size)], + fill=(255, 255, 255, 200) + ) + + # "Image Loading..." 텍스트 (작게) + try: + # 시스템 폰트 시도 + font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 16) + except: + font = ImageFont.load_default() + + text = "Image Loading..." + bbox = draw.textbbox((0, 0), text, font=font) + text_width = bbox[2] - bbox[0] + text_height = bbox[3] - bbox[1] + text_x = (width - text_width) // 2 + text_y = center_y + icon_size + + # 텍스트 그림자 + for offset in [(2, 2), (-1, -1)]: + overlay_draw.text( + (text_x + offset[0], text_y + offset[1]), + text, + font=font, + fill=(0, 0, 0, 100) + ) + + # 텍스트 본체 + overlay_draw.text( + (text_x, text_y), + text, + font=font, + fill=(255, 255, 255, 220) + ) + + # 오버레이 합성 + img = Image.alpha_composite(img.convert('RGBA'), overlay).convert('RGB') + + # 약간의 노이즈 추가 (텍스처) + pixels = img.load() + for _ in range(1000): + x = random.randint(0, width - 1) + y = random.randint(0, height - 1) + r, g, b = pixels[x, y] + brightness = random.randint(-20, 20) + pixels[x, y] = ( + max(0, min(255, r + brightness)), + max(0, min(255, g + brightness)), + max(0, min(255, b + brightness)) + ) + + # JPEG로 변환 (높은 품질) + output = io.BytesIO() + img.save(output, format='JPEG', quality=85, optimize=True) + return output.getvalue() + raise + + # 이미지 크기 확인 + content_length = int(response.headers.get('content-length', 0)) + max_size = settings.max_image_size_mb * 1024 * 1024 + + if content_length > max_size: + raise ValueError(f"Image too large: {content_length} bytes") + + # 응답 데이터 확인 + content = response.content + print(f"Downloaded {len(content)} bytes from {url[:50]}...") + + # gzip 압축 확인 및 해제 + import gzip + if len(content) > 2 and content[:2] == b'\x1f\x8b': + print("📦 Gzip compressed data detected, decompressing...") + try: + content = gzip.decompress(content) + print(f"✅ Decompressed to {len(content)} bytes") + except Exception as e: + print(f"❌ Failed to decompress gzip: {e}") + + # 처음 몇 바이트로 이미지 형식 확인 + if len(content) > 10: + header = content[:12] + if header[:2] == b'\xff\xd8': + print("✅ JPEG image detected") + elif header[:8] == b'\x89PNG\r\n\x1a\n': + print("✅ PNG image detected") + elif header[:6] in (b'GIF87a', b'GIF89a'): + print("✅ GIF image detected") + elif header[:4] == b'RIFF' and header[8:12] == b'WEBP': + print("✅ WebP image detected") + elif b' tuple[bytes, str]: + """이미지 리사이징 및 최적화""" + if size not in settings.thumbnail_sizes: + raise ValueError(f"Invalid size: {size}") + + target_size = settings.thumbnail_sizes[size] + + # SVG 체크 - SVG는 리사이징하지 않고 그대로 반환 + if self._is_svg(image_data): + return image_data, 'image/svg+xml' + + # PIL로 이미지 열기 + try: + img = Image.open(io.BytesIO(image_data)) + except Exception as e: + # WebP 헤더 체크 (RIFF....WEBP) + header = image_data[:12] if len(image_data) >= 12 else image_data + if header[:4] == b'RIFF' and header[8:12] == b'WEBP': + print("🎨 WebP 이미지 감지됨, 변환 시도") + # WebP 형식이지만 PIL이 열지 못하는 경우 + # Pillow-SIMD 또는 추가 라이브러리가 필요할 수 있음 + try: + # 재시도 + from PIL import WebPImagePlugin + img = Image.open(io.BytesIO(image_data)) + except: + print("❌ WebP 이미지를 열 수 없음, 원본 반환") + return image_data, 'image/webp' + else: + raise e + + # GIF 애니메이션 체크 및 처리 + if getattr(img, "format", None) == "GIF": + return self._process_gif(image_data, target_size) + + # WebP 형식 체크 + original_format = getattr(img, "format", None) + is_webp = original_format == "WEBP" + + # 원본 모드와 투명도 정보 저장 + original_mode = img.mode + original_has_transparency = img.mode in ('RGBA', 'LA') + original_has_palette = img.mode == 'P' + + # 팔레트 모드(P) 처리 - 간단하게 PIL의 기본 변환 사용 + if img.mode == 'P': + # 팔레트 모드는 RGB로 직접 변환 + # PIL의 convert 메서드가 팔레트를 올바르게 처리함 + img = img.convert('RGB') + + # 투명도가 있는 이미지 처리 + if img.mode == 'RGBA': + # RGBA는 흰색 배경과 합성 + background = Image.new('RGB', img.size, (255, 255, 255)) + background.paste(img, mask=img.split()[-1]) + img = background + elif img.mode == 'LA': + # LA(그레이스케일+알파)는 RGBA를 거쳐 RGB로 + img = img.convert('RGBA') + background = Image.new('RGB', img.size, (255, 255, 255)) + background.paste(img, mask=img.split()[-1]) + img = background + elif img.mode == 'L': + # 그레이스케일은 RGB로 변환 + img = img.convert('RGB') + elif img.mode not in ('RGB',): + # 기타 모드는 모두 RGB로 변환 + img = img.convert('RGB') + + # EXIF 방향 정보 처리 (RGB 변환 후에 수행) + try: + from PIL import ImageOps + img = ImageOps.exif_transpose(img) + except: + pass + + # 메타데이터 제거는 스킵 (팔레트 모드 이미지에서 문제 발생) + # RGB로 변환되었으므로 이미 메타데이터는 대부분 제거됨 + + # 비율 유지하며 리사이징 (크롭 없이) + img_ratio = img.width / img.height + target_width = target_size[0] + target_height = target_size[1] + + # 원본 비율을 유지하면서 목표 크기에 맞추기 + # 너비 또는 높이 중 하나를 기준으로 비율 계산 + if img.width > target_width or img.height > target_height: + # 너비 기준 리사이징 + width_ratio = target_width / img.width + # 높이 기준 리사이징 + height_ratio = target_height / img.height + # 둘 중 작은 비율 사용 (목표 크기를 넘지 않도록) + ratio = min(width_ratio, height_ratio) + + new_width = int(img.width * ratio) + new_height = int(img.height * ratio) + + # 큰 이미지를 작게 만들 때는 2단계 리샘플링으로 품질 향상 + if img.width > new_width * 2 or img.height > new_height * 2: + # 1단계: 목표 크기의 2배로 먼저 축소 + intermediate_width = new_width * 2 + intermediate_height = new_height * 2 + img = img.resize((intermediate_width, intermediate_height), Image.Resampling.LANCZOS) + + # 최종 목표 크기로 리샘플링 + img = img.resize((new_width, new_height), Image.Resampling.LANCZOS) + + # 샤프닝 적용 (작은 이미지에만) + if target_size[0] <= 400: + from PIL import ImageEnhance + enhancer = ImageEnhance.Sharpness(img) + img = enhancer.enhance(1.2) + + # 바이트로 변환 + output = io.BytesIO() + + # 적응형 품질 계산 (이미지 크기에 따라 조정) + def get_adaptive_quality(base_quality: int, target_width: int) -> int: + """이미지 크기에 따른 적응형 품질 계산""" + # 품질을 더 높게 설정하여 검정색 문제 해결 + if target_width <= 150: # 썸네일 + return min(base_quality + 10, 95) + elif target_width <= 360: # 카드 + return min(base_quality + 5, 90) + elif target_width <= 800: # 상세 + return base_quality # 85 + else: # 히어로 + return base_quality # 85 + + # WebP 변환 및 최적화 - 최고 압축률 설정 + # WebP 입력은 JPEG로 변환 (WebP 리사이징 문제 회피) + if is_webp: + output_format = 'JPEG' + content_type = 'image/jpeg' + else: + output_format = 'WEBP' if settings.convert_to_webp else 'JPEG' + content_type = 'image/webp' if output_format == 'WEBP' else 'image/jpeg' + + if output_format == 'WEBP': + # WebP 최적화: method=6(최고품질), lossless=False, exact=False + adaptive_quality = get_adaptive_quality(settings.webp_quality, target_size[0]) + + save_kwargs = { + 'format': 'WEBP', + 'quality': adaptive_quality, + 'method': 6, # 최고 압축 알고리즘 (0-6) + 'lossless': settings.webp_lossless, + 'exact': False, # 약간의 품질 손실 허용하여 더 작은 크기 + } + + img.save(output, **save_kwargs) + elif original_has_transparency and not settings.convert_to_webp: + # PNG 최적화 (투명도가 있는 이미지) + save_kwargs = { + 'format': 'PNG', + 'optimize': settings.optimize_png, + 'compress_level': settings.png_compress_level, + } + + # 팔레트 모드로 변환 가능한지 확인 (256색 이하) + if settings.optimize_png: + try: + # 색상 수가 256개 이하이면 팔레트 모드로 변환 + quantized = img.quantize(colors=256, method=Image.Quantize.MEDIANCUT) + if len(quantized.getcolors()) <= 256: + img = quantized + save_kwargs['format'] = 'PNG' + except: + pass + + content_type = 'image/png' + img.save(output, **save_kwargs) + else: + # JPEG 최적화 설정 (기본값) + adaptive_quality = get_adaptive_quality(settings.jpeg_quality, target_size[0]) + + save_kwargs = { + 'format': 'JPEG', + 'quality': adaptive_quality, + 'optimize': True, + 'progressive': settings.progressive_jpeg, + } + + img.save(output, **save_kwargs) + + return output.getvalue(), content_type + + async def get_cache_size(self) -> float: + """현재 캐시 크기 (GB)""" + total_size = 0 + + for dirpath, dirnames, filenames in os.walk(self.cache_dir): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + total_size += os.path.getsize(filepath) + + return total_size / (1024 ** 3) # GB로 변환 + + async def cleanup_old_cache(self): + """오래된 캐시 파일 정리""" + cutoff_time = datetime.now() - timedelta(days=settings.cache_ttl_days) + + for dirpath, dirnames, filenames in os.walk(self.cache_dir): + for filename in filenames: + filepath = Path(dirpath) / filename + + if filepath.stat().st_mtime < cutoff_time.timestamp(): + filepath.unlink() + + async def trigger_background_generation(self, url: str): + """백그라운드에서 모든 크기의 이미지 생성 트리거""" + from .background_tasks import background_manager + + # 백그라운드 작업 큐에 추가 + asyncio.create_task(background_manager.add_task(url)) + + async def get_directory_stats(self) -> dict: + """디렉토리 구조 통계 정보""" + total_files = 0 + total_dirs = 0 + files_per_dir = {} + + for root, dirs, files in os.walk(self.cache_dir): + total_dirs += len(dirs) + total_files += len(files) + + # 각 디렉토리의 파일 수 계산 + rel_path = os.path.relpath(root, self.cache_dir) + depth = len(Path(rel_path).parts) if rel_path != '.' else 0 + + if files and depth == 3: # 3단계 디렉토리에서만 파일 수 계산 + files_per_dir[rel_path] = len(files) + + # 통계 계산 + avg_files_per_dir = sum(files_per_dir.values()) / len(files_per_dir) if files_per_dir else 0 + max_files_in_dir = max(files_per_dir.values()) if files_per_dir else 0 + + return { + "total_files": total_files, + "total_directories": total_dirs, + "average_files_per_directory": round(avg_files_per_dir, 2), + "max_files_in_single_directory": max_files_in_dir, + "directory_depth": 3 + } + +cache = ImageCache() \ No newline at end of file diff --git a/services/images/backend/app/core/config.py b/services/images/backend/app/core/config.py new file mode 100644 index 0000000..2aaf376 --- /dev/null +++ b/services/images/backend/app/core/config.py @@ -0,0 +1,54 @@ +from pydantic_settings import BaseSettings +from pathlib import Path + +class Settings(BaseSettings): + # 기본 설정 + app_name: str = "Image Proxy Service" + debug: bool = True + + # 캐시 설정 (MinIO 전환 시에도 로컬 임시 파일용) + cache_dir: Path = Path("/app/cache") + max_cache_size_gb: int = 10 + cache_ttl_days: int = 30 + + # MinIO 설정 + use_minio: bool = True # MinIO 사용 여부 + minio_endpoint: str = "minio:9000" + minio_access_key: str = "minioadmin" + minio_secret_key: str = "minioadmin" + minio_bucket_name: str = "image-cache" + minio_secure: bool = False + + # 이미지 설정 + max_image_size_mb: int = 20 + allowed_formats: list = ["jpg", "jpeg", "png", "gif", "webp", "svg"] + + # 리사이징 설정 - 뉴스 카드 용도별 최적화 + thumbnail_sizes: dict = { + "thumb": (150, 100), # 작은 썸네일 (3:2 비율) + "card": (360, 240), # 뉴스 카드용 (3:2 비율) + "list": (300, 200), # 리스트용 (3:2 비율) + "detail": (800, 533), # 상세 페이지용 (원본 비율 유지) + "hero": (1200, 800) # 히어로 이미지용 (원본 비율 유지) + } + + # 이미지 최적화 설정 - 품질 보장하면서 최저 용량 + jpeg_quality: int = 85 # JPEG 품질 (품질 향상) + webp_quality: int = 85 # WebP 품질 (품질 향상으로 검정색 문제 해결) + webp_lossless: bool = False # 무손실 압축 비활성화 (용량 최적화) + png_compress_level: int = 9 # PNG 최대 압축 (0-9, 9가 최고 압축) + convert_to_webp: bool = False # WebP 변환 임시 비활성화 (검정색 이미지 문제) + + # 고급 최적화 설정 + progressive_jpeg: bool = True # 점진적 JPEG (로딩 성능 향상) + strip_metadata: bool = True # EXIF 등 메타데이터 제거 (용량 절약) + optimize_png: bool = True # PNG 팔레트 최적화 + + # 외부 요청 설정 + request_timeout: int = 30 + user_agent: str = "ImageProxyService/1.0" + + class Config: + env_file = ".env" + +settings = Settings() \ No newline at end of file diff --git a/services/images/backend/app/core/minio_cache.py b/services/images/backend/app/core/minio_cache.py new file mode 100644 index 0000000..e927989 --- /dev/null +++ b/services/images/backend/app/core/minio_cache.py @@ -0,0 +1,414 @@ +import hashlib +import os +from pathlib import Path +from datetime import datetime, timedelta +from typing import Optional, Tuple +import httpx +from PIL import Image +try: + from pillow_heif import register_heif_opener, register_avif_opener + register_heif_opener() # HEIF/HEIC 지원 + register_avif_opener() # AVIF 지원 + print("HEIF/AVIF support enabled successfully") +except ImportError: + print("Warning: pillow_heif not installed, HEIF/AVIF support disabled") +import io +import asyncio +import ssl +from minio import Minio +from minio.error import S3Error +import tempfile + +from .config import settings + +class MinIOImageCache: + def __init__(self): + # MinIO 클라이언트 초기화 + self.client = Minio( + settings.minio_endpoint, + access_key=settings.minio_access_key, + secret_key=settings.minio_secret_key, + secure=settings.minio_secure + ) + + # 버킷 생성 (동기 호출) + self._ensure_bucket() + + # 로컬 임시 디렉토리 (이미지 처리용) + self.temp_dir = Path(tempfile.gettempdir()) / "image_cache_temp" + self.temp_dir.mkdir(parents=True, exist_ok=True) + + def _ensure_bucket(self): + """버킷이 존재하는지 확인하고 없으면 생성""" + try: + if not self.client.bucket_exists(settings.minio_bucket_name): + self.client.make_bucket(settings.minio_bucket_name) + print(f"✅ Created MinIO bucket: {settings.minio_bucket_name}") + else: + print(f"✅ MinIO bucket exists: {settings.minio_bucket_name}") + except S3Error as e: + print(f"❌ Error creating bucket: {e}") + + def _get_object_name(self, url: str, size: Optional[str] = None) -> str: + """URL을 기반으로 MinIO 객체 이름 생성""" + url_hash = hashlib.md5(url.encode()).hexdigest() + + # 3단계 디렉토리 구조 생성 (MinIO는 /를 디렉토리처럼 취급) + level1 = url_hash[:2] + level2 = url_hash[2:4] + level3 = url_hash[4:6] + + # 크기별로 다른 파일명 사용 + if size: + filename = f"{url_hash}_{size}" + else: + filename = url_hash + + # 확장자 추출 (WebP로 저장되는 경우 .webp 사용) + if settings.convert_to_webp and size: + filename = f"{filename}.webp" + else: + ext = self._get_extension_from_url(url) + if ext: + filename = f"{filename}.{ext}" + + # MinIO 객체 경로 생성 + object_name = f"{level1}/{level2}/{level3}/{filename}" + return object_name + + def _get_extension_from_url(self, url: str) -> Optional[str]: + """URL에서 파일 확장자 추출""" + path = url.split('?')[0] # 쿼리 파라미터 제거 + parts = path.split('.') + if len(parts) > 1: + ext = parts[-1].lower() + if ext in settings.allowed_formats: + return ext + return None + + def _is_svg(self, data: bytes) -> bool: + """SVG 파일인지 확인""" + if len(data) < 100: + return False + + header = data[:1000].lower() + svg_signatures = [ + b' tuple[bytes, str]: + """GIF 처리 - JPEG로 변환하여 안정적으로 처리""" + try: + img = Image.open(io.BytesIO(gif_data)) + + if img.mode != 'RGB': + if img.mode == 'P': + img = img.convert('RGBA') + if img.mode == 'RGBA': + background = Image.new('RGB', img.size, (255, 255, 255)) + background.paste(img, mask=img.split()[3] if len(img.split()) == 4 else None) + img = background + elif img.mode != 'RGB': + img = img.convert('RGB') + + # 리사이즈 + img.thumbnail(target_size, Image.Resampling.LANCZOS) + + # JPEG로 저장 + output = io.BytesIO() + img.save( + output, + format='JPEG', + quality=settings.jpeg_quality, + optimize=True, + progressive=settings.progressive_jpeg + ) + + return output.getvalue(), 'image/jpeg' + + except Exception as e: + print(f"GIF 처리 오류: {e}") + return gif_data, 'image/gif' + + def resize_and_optimize_image(self, image_data: bytes, size: str) -> tuple[bytes, str]: + """이미지 리사이징 및 최적화""" + try: + target_size = settings.thumbnail_sizes.get(size, settings.thumbnail_sizes["thumb"]) + + # 이미지 열기 + img = Image.open(io.BytesIO(image_data)) + + # EXIF 회전 정보 처리 + try: + from PIL import ImageOps + img = ImageOps.exif_transpose(img) + except: + pass + + # 리사이즈 (원본 비율 유지) + img.thumbnail(target_size, Image.Resampling.LANCZOS) + + # 출력 버퍼 + output = io.BytesIO() + + # WebP로 변환 설정이 활성화되어 있으면 + if settings.convert_to_webp: + # RGBA를 RGB로 변환 (WebP는 투명도 지원하지만 일부 브라우저 호환성 문제) + if img.mode in ('RGBA', 'LA', 'P'): + # 투명 배경을 흰색으로 + background = Image.new('RGB', img.size, (255, 255, 255)) + if img.mode == 'P': + img = img.convert('RGBA') + background.paste(img, mask=img.split()[-1] if 'A' in img.mode else None) + img = background + elif img.mode != 'RGB': + img = img.convert('RGB') + + # WebP로 저장 + img.save( + output, + format='WEBP', + quality=settings.webp_quality, + lossless=settings.webp_lossless, + method=6 # 최고 압축 + ) + content_type = 'image/webp' + else: + # 원본 포맷 유지하면서 최적화 + if img.format == 'PNG': + img.save( + output, + format='PNG', + compress_level=settings.png_compress_level, + optimize=settings.optimize_png + ) + content_type = 'image/png' + else: + # JPEG로 변환 + if img.mode != 'RGB': + img = img.convert('RGB') + img.save( + output, + format='JPEG', + quality=settings.jpeg_quality, + optimize=True, + progressive=settings.progressive_jpeg + ) + content_type = 'image/jpeg' + + return output.getvalue(), content_type + + except Exception as e: + print(f"이미지 최적화 오류: {e}") + import traceback + traceback.print_exc() + return image_data, 'image/jpeg' + + async def get(self, url: str, size: Optional[str] = None) -> Optional[bytes]: + """MinIO에서 캐시된 이미지 가져오기""" + object_name = self._get_object_name(url, size) + + try: + # MinIO에서 객체 가져오기 + response = self.client.get_object(settings.minio_bucket_name, object_name) + data = response.read() + response.close() + response.release_conn() + + print(f"✅ Cache HIT from MinIO: {object_name}") + return data + + except S3Error as e: + if e.code == 'NoSuchKey': + print(f"📭 Cache MISS in MinIO: {object_name}") + return None + else: + print(f"❌ MinIO error: {e}") + return None + + async def set(self, url: str, data: bytes, size: Optional[str] = None): + """MinIO에 이미지 캐시 저장""" + object_name = self._get_object_name(url, size) + + try: + # 바이트 데이터를 스트림으로 변환 + data_stream = io.BytesIO(data) + data_length = len(data) + + # content-type 결정 + if url.lower().endswith('.svg') or self._is_svg(data): + content_type = 'image/svg+xml' + elif url.lower().endswith('.gif'): + content_type = 'image/gif' + elif settings.convert_to_webp and size: + content_type = 'image/webp' + else: + content_type = 'application/octet-stream' + + # MinIO에 저장 (메타데이터는 ASCII만 지원하므로 URL 해시 사용) + self.client.put_object( + settings.minio_bucket_name, + object_name, + data_stream, + data_length, + content_type=content_type, + metadata={ + 'url_hash': hashlib.md5(url.encode()).hexdigest(), + 'cached_at': datetime.utcnow().isoformat(), + 'size_variant': size or 'original' + } + ) + + print(f"✅ Cached to MinIO: {object_name} ({data_length} bytes)") + + except S3Error as e: + print(f"❌ Failed to cache to MinIO: {e}") + + async def download_image(self, url: str) -> bytes: + """외부 URL에서 이미지 다운로드""" + # SSL 검증 비활성화 (개발 환경용) + ssl_context = ssl.create_default_context() + ssl_context.check_hostname = False + ssl_context.verify_mode = ssl.CERT_NONE + + async with httpx.AsyncClient( + timeout=settings.request_timeout, + verify=False, + follow_redirects=True + ) as client: + headers = { + "User-Agent": settings.user_agent, + "Accept": "image/webp,image/apng,image/*,*/*;q=0.8", + "Accept-Encoding": "gzip, deflate, br", + "Cache-Control": "no-cache", + "Referer": url.split('/')[0] + '//' + url.split('/')[2] if len(url.split('/')) > 2 else url + } + + response = await client.get(url, headers=headers) + + if response.status_code == 403: + headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36" + response = await client.get(url, headers=headers) + + response.raise_for_status() + + content_length = response.headers.get("content-length") + if content_length: + size_mb = int(content_length) / (1024 * 1024) + if size_mb > settings.max_image_size_mb: + raise ValueError(f"이미지 크기가 {settings.max_image_size_mb}MB를 초과합니다") + + return response.content + + async def get_cache_size(self) -> float: + """MinIO 버킷 크기 조회 (GB)""" + try: + total_size = 0 + objects = self.client.list_objects(settings.minio_bucket_name, recursive=True) + + for obj in objects: + total_size += obj.size + + return total_size / (1024 ** 3) # GB로 변환 + + except S3Error as e: + print(f"❌ Failed to get cache size: {e}") + return 0.0 + + async def get_directory_stats(self) -> dict: + """MinIO 디렉토리 구조 통계""" + try: + total_files = 0 + directories = set() + + objects = self.client.list_objects(settings.minio_bucket_name, recursive=True) + + for obj in objects: + total_files += 1 + # 디렉토리 경로 추출 + parts = obj.object_name.split('/') + if len(parts) > 1: + dir_path = '/'.join(parts[:-1]) + directories.add(dir_path) + + return { + "total_files": total_files, + "total_directories": len(directories), + "average_files_per_directory": total_files / max(len(directories), 1), + "bucket_name": settings.minio_bucket_name + } + + except S3Error as e: + print(f"❌ Failed to get directory stats: {e}") + return { + "total_files": 0, + "total_directories": 0, + "average_files_per_directory": 0, + "bucket_name": settings.minio_bucket_name + } + + async def cleanup_old_cache(self): + """오래된 캐시 정리""" + try: + cutoff_date = datetime.utcnow() - timedelta(days=settings.cache_ttl_days) + deleted_count = 0 + + objects = self.client.list_objects(settings.minio_bucket_name, recursive=True) + + for obj in objects: + # 객체의 마지막 수정 시간이 cutoff_date 이전이면 삭제 + if obj.last_modified.replace(tzinfo=None) < cutoff_date: + self.client.remove_object(settings.minio_bucket_name, obj.object_name) + deleted_count += 1 + print(f"🗑️ Deleted old cache: {obj.object_name}") + + print(f"✅ Cleaned up {deleted_count} old cached files") + return deleted_count + + except S3Error as e: + print(f"❌ Failed to cleanup cache: {e}") + return 0 + + async def trigger_background_generation(self, url: str): + """백그라운드에서 다양한 크기 생성""" + asyncio.create_task(self._generate_all_sizes(url)) + + async def _generate_all_sizes(self, url: str): + """모든 크기 버전 생성""" + try: + # 원본 이미지 다운로드 + image_data = await self.download_image(url) + + # SVG는 리사이징 불필요 + if self._is_svg(image_data): + return + + # 모든 크기 생성 + for size_name in settings.thumbnail_sizes.keys(): + # 이미 캐시되어 있는지 확인 + existing = await self.get(url, size_name) + if not existing: + # 리사이징 및 최적화 + if url.lower().endswith('.gif'): + resized_data, _ = self._process_gif(image_data, settings.thumbnail_sizes[size_name]) + else: + resized_data, _ = self.resize_and_optimize_image(image_data, size_name) + + # 캐시에 저장 + await self.set(url, resized_data, size_name) + + print(f"✅ Generated {size_name} version for {url}") + + except Exception as e: + print(f"❌ Background generation failed for {url}: {e}") + +# 싱글톤 인스턴스 +cache = MinIOImageCache() \ No newline at end of file diff --git a/services/images/backend/main.py b/services/images/backend/main.py new file mode 100644 index 0000000..88d875c --- /dev/null +++ b/services/images/backend/main.py @@ -0,0 +1,65 @@ +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from contextlib import asynccontextmanager +import uvicorn +from datetime import datetime + +from app.api.endpoints import router +from app.core.config import settings + +@asynccontextmanager +async def lifespan(app: FastAPI): + # 시작 시 + print("Images service starting...") + yield + # 종료 시 + print("Images service stopping...") + +app = FastAPI( + title="Images Service", + description="이미지 업로드, 프록시 및 캐싱 서비스", + version="2.0.0", + lifespan=lifespan +) + +# CORS 설정 +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# 라우터 등록 +app.include_router(router, prefix="/api/v1") + +@app.get("/") +async def root(): + return { + "service": "Images Service", + "version": "2.0.0", + "timestamp": datetime.now().isoformat(), + "endpoints": { + "proxy": "/api/v1/image?url=&size=", + "upload": "/api/v1/upload", + "stats": "/api/v1/stats", + "cleanup": "/api/v1/cleanup" + } + } + +@app.get("/health") +async def health_check(): + return { + "status": "healthy", + "service": "images", + "timestamp": datetime.now().isoformat() + } + +if __name__ == "__main__": + uvicorn.run( + "main:app", + host="0.0.0.0", + port=8000, + reload=True + ) \ No newline at end of file diff --git a/services/images/backend/requirements.txt b/services/images/backend/requirements.txt new file mode 100644 index 0000000..f7e6e16 --- /dev/null +++ b/services/images/backend/requirements.txt @@ -0,0 +1,12 @@ +fastapi==0.109.0 +uvicorn[standard]==0.27.0 +httpx==0.26.0 +pillow==10.2.0 +pillow-heif==0.20.0 +aiofiles==23.2.1 +python-multipart==0.0.6 +pydantic==2.5.3 +pydantic-settings==2.1.0 +motor==3.3.2 +redis==5.0.1 +minio==7.2.3 \ No newline at end of file diff --git a/services/notifications/backend/Dockerfile b/services/notifications/backend/Dockerfile new file mode 100644 index 0000000..dfb00c3 --- /dev/null +++ b/services/notifications/backend/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements first for better caching +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . . + +# Expose port +EXPOSE 8000 + +# Run the application +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] \ No newline at end of file diff --git a/services/notifications/backend/channel_handlers.py b/services/notifications/backend/channel_handlers.py new file mode 100644 index 0000000..304e830 --- /dev/null +++ b/services/notifications/backend/channel_handlers.py @@ -0,0 +1,335 @@ +""" +Channel Handlers for different notification delivery methods +""" +import logging +import asyncio +from typing import Optional, Dict, Any +from models import Notification, NotificationStatus +import smtplib +from email.mime.text import MIMEText +from email.mime.multipart import MIMEMultipart +import httpx +import json + +logger = logging.getLogger(__name__) + +class BaseChannelHandler: + """Base class for channel handlers""" + + async def send(self, notification: Notification) -> bool: + """Send notification through the channel""" + raise NotImplementedError + + async def verify_delivery(self, notification: Notification) -> bool: + """Verify if notification was delivered""" + return True + +class EmailHandler(BaseChannelHandler): + """Email notification handler""" + + def __init__(self, smtp_host: str, smtp_port: int, smtp_user: str, smtp_password: str): + self.smtp_host = smtp_host + self.smtp_port = smtp_port + self.smtp_user = smtp_user + self.smtp_password = smtp_password + + async def send(self, notification: Notification) -> bool: + """Send email notification""" + try: + # In production, would use async SMTP library + # For demo, we'll simulate email sending + logger.info(f"Sending email to user {notification.user_id}") + + if not self.smtp_user or not self.smtp_password: + # Simulate sending without actual SMTP config + await asyncio.sleep(0.1) # Simulate network delay + logger.info(f"Email sent (simulated) to user {notification.user_id}") + return True + + # Create message + msg = MIMEMultipart() + msg['From'] = self.smtp_user + msg['To'] = f"user_{notification.user_id}@example.com" # Would fetch actual email + msg['Subject'] = notification.title + + # Add body + body = notification.message + if notification.data and "html_content" in notification.data: + msg.attach(MIMEText(notification.data["html_content"], 'html')) + else: + msg.attach(MIMEText(body, 'plain')) + + # Send email (would be async in production) + # server = smtplib.SMTP(self.smtp_host, self.smtp_port) + # server.starttls() + # server.login(self.smtp_user, self.smtp_password) + # server.send_message(msg) + # server.quit() + + logger.info(f"Email sent successfully to user {notification.user_id}") + return True + + except Exception as e: + logger.error(f"Failed to send email: {e}") + return False + +class SMSHandler(BaseChannelHandler): + """SMS notification handler""" + + def __init__(self, api_key: str, api_url: str): + self.api_key = api_key + self.api_url = api_url + self.client = httpx.AsyncClient() + + async def send(self, notification: Notification) -> bool: + """Send SMS notification""" + try: + # In production, would integrate with SMS provider (Twilio, etc.) + logger.info(f"Sending SMS to user {notification.user_id}") + + if not self.api_key or not self.api_url: + # Simulate sending without actual API config + await asyncio.sleep(0.1) # Simulate network delay + logger.info(f"SMS sent (simulated) to user {notification.user_id}") + return True + + # Would fetch user's phone number from database + phone_number = notification.data.get("phone") if notification.data else None + if not phone_number: + phone_number = "+1234567890" # Demo number + + # Send SMS via API (example structure) + payload = { + "to": phone_number, + "message": f"{notification.title}\n{notification.message}", + "api_key": self.api_key + } + + # response = await self.client.post(self.api_url, json=payload) + # return response.status_code == 200 + + # Simulate success + await asyncio.sleep(0.1) + logger.info(f"SMS sent successfully to user {notification.user_id}") + return True + + except Exception as e: + logger.error(f"Failed to send SMS: {e}") + return False + +class PushHandler(BaseChannelHandler): + """Push notification handler (FCM/APNS)""" + + def __init__(self, fcm_server_key: str): + self.fcm_server_key = fcm_server_key + self.fcm_url = "https://fcm.googleapis.com/fcm/send" + self.client = httpx.AsyncClient() + + async def send(self, notification: Notification) -> bool: + """Send push notification""" + try: + logger.info(f"Sending push notification to user {notification.user_id}") + + if not self.fcm_server_key: + # Simulate sending without actual FCM config + await asyncio.sleep(0.1) + logger.info(f"Push notification sent (simulated) to user {notification.user_id}") + return True + + # Would fetch user's device tokens from database + device_tokens = notification.data.get("device_tokens", []) if notification.data else [] + + if not device_tokens: + # Simulate with dummy token + device_tokens = ["dummy_token"] + + # Send to each device token + for token in device_tokens: + payload = { + "to": token, + "notification": { + "title": notification.title, + "body": notification.message, + "icon": notification.data.get("icon") if notification.data else None, + "click_action": notification.data.get("click_action") if notification.data else None + }, + "data": notification.data or {} + } + + headers = { + "Authorization": f"key={self.fcm_server_key}", + "Content-Type": "application/json" + } + + # response = await self.client.post( + # self.fcm_url, + # json=payload, + # headers=headers + # ) + + # Simulate success + await asyncio.sleep(0.05) + + logger.info(f"Push notification sent successfully to user {notification.user_id}") + return True + + except Exception as e: + logger.error(f"Failed to send push notification: {e}") + return False + +class InAppHandler(BaseChannelHandler): + """In-app notification handler""" + + def __init__(self): + self.ws_server = None + + def set_ws_server(self, ws_server): + """Set WebSocket server for real-time delivery""" + self.ws_server = ws_server + + async def send(self, notification: Notification) -> bool: + """Send in-app notification""" + try: + logger.info(f"Sending in-app notification to user {notification.user_id}") + + # Store notification in database (already done in manager) + # This would be retrieved when user logs in or requests notifications + + # If WebSocket connection exists, send real-time + if self.ws_server: + await self.ws_server.send_to_user( + notification.user_id, + { + "type": "notification", + "notification": { + "id": notification.id, + "title": notification.title, + "message": notification.message, + "priority": notification.priority.value, + "category": notification.category.value if hasattr(notification, 'category') else "system", + "timestamp": notification.created_at.isoformat(), + "data": notification.data + } + } + ) + + logger.info(f"In-app notification sent successfully to user {notification.user_id}") + return True + + except Exception as e: + logger.error(f"Failed to send in-app notification: {e}") + return False + +class SlackHandler(BaseChannelHandler): + """Slack notification handler""" + + def __init__(self, webhook_url: Optional[str] = None): + self.webhook_url = webhook_url + self.client = httpx.AsyncClient() + + async def send(self, notification: Notification) -> bool: + """Send Slack notification""" + try: + logger.info(f"Sending Slack notification for user {notification.user_id}") + + if not self.webhook_url: + # Simulate sending + await asyncio.sleep(0.1) + logger.info(f"Slack notification sent (simulated) for user {notification.user_id}") + return True + + # Format message for Slack + slack_message = { + "text": notification.title, + "blocks": [ + { + "type": "header", + "text": { + "type": "plain_text", + "text": notification.title + } + }, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": notification.message + } + } + ] + } + + # Add additional fields if present + if notification.data: + fields = [] + for key, value in notification.data.items(): + if key not in ["html_content", "device_tokens"]: + fields.append({ + "type": "mrkdwn", + "text": f"*{key}:* {value}" + }) + + if fields: + slack_message["blocks"].append({ + "type": "section", + "fields": fields[:10] # Slack limits to 10 fields + }) + + # Send to Slack + # response = await self.client.post(self.webhook_url, json=slack_message) + # return response.status_code == 200 + + await asyncio.sleep(0.1) + logger.info(f"Slack notification sent successfully") + return True + + except Exception as e: + logger.error(f"Failed to send Slack notification: {e}") + return False + +class WebhookHandler(BaseChannelHandler): + """Generic webhook notification handler""" + + def __init__(self, default_webhook_url: Optional[str] = None): + self.default_webhook_url = default_webhook_url + self.client = httpx.AsyncClient() + + async def send(self, notification: Notification) -> bool: + """Send webhook notification""" + try: + # Get webhook URL from notification data or use default + webhook_url = None + if notification.data and "webhook_url" in notification.data: + webhook_url = notification.data["webhook_url"] + else: + webhook_url = self.default_webhook_url + + if not webhook_url: + logger.warning("No webhook URL configured") + return False + + logger.info(f"Sending webhook notification for user {notification.user_id}") + + # Prepare payload + payload = { + "notification_id": notification.id, + "user_id": notification.user_id, + "title": notification.title, + "message": notification.message, + "priority": notification.priority.value, + "timestamp": notification.created_at.isoformat(), + "data": notification.data + } + + # Send webhook + # response = await self.client.post(webhook_url, json=payload) + # return response.status_code in [200, 201, 202, 204] + + # Simulate success + await asyncio.sleep(0.1) + logger.info(f"Webhook notification sent successfully") + return True + + except Exception as e: + logger.error(f"Failed to send webhook notification: {e}") + return False \ No newline at end of file diff --git a/services/notifications/backend/main.py b/services/notifications/backend/main.py new file mode 100644 index 0000000..e500c2b --- /dev/null +++ b/services/notifications/backend/main.py @@ -0,0 +1,514 @@ +""" +Notification Service - Real-time Multi-channel Notifications +""" +from fastapi import FastAPI, HTTPException, Depends, BackgroundTasks, Query +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse +import uvicorn +from datetime import datetime, timedelta +from typing import Optional, List, Dict, Any +import asyncio +import os +from contextlib import asynccontextmanager +import logging + +# Import custom modules +from models import ( + Notification, NotificationChannel, NotificationTemplate, + NotificationPreference, NotificationHistory, NotificationStatus, + NotificationPriority, CreateNotificationRequest, BulkNotificationRequest +) +from notification_manager import NotificationManager +from channel_handlers import EmailHandler, SMSHandler, PushHandler, InAppHandler +from websocket_server import WebSocketNotificationServer +from queue_manager import NotificationQueueManager +from template_engine import TemplateEngine +from preference_manager import PreferenceManager + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Global instances +notification_manager = None +ws_server = None +queue_manager = None +template_engine = None +preference_manager = None + +@asynccontextmanager +async def lifespan(app: FastAPI): + # Startup + global notification_manager, ws_server, queue_manager, template_engine, preference_manager + + try: + # Initialize Template Engine + template_engine = TemplateEngine() + await template_engine.load_templates() + logger.info("Template engine initialized") + + # Initialize Preference Manager + preference_manager = PreferenceManager( + mongodb_url=os.getenv("MONGODB_URL", "mongodb://mongodb:27017"), + database_name="notifications" + ) + await preference_manager.connect() + logger.info("Preference manager connected") + + # Initialize Notification Queue Manager + queue_manager = NotificationQueueManager( + redis_url=os.getenv("REDIS_URL", "redis://redis:6379") + ) + await queue_manager.connect() + logger.info("Queue manager connected") + + # Initialize Channel Handlers + email_handler = EmailHandler( + smtp_host=os.getenv("SMTP_HOST", "smtp.gmail.com"), + smtp_port=int(os.getenv("SMTP_PORT", 587)), + smtp_user=os.getenv("SMTP_USER", ""), + smtp_password=os.getenv("SMTP_PASSWORD", "") + ) + + sms_handler = SMSHandler( + api_key=os.getenv("SMS_API_KEY", ""), + api_url=os.getenv("SMS_API_URL", "") + ) + + push_handler = PushHandler( + fcm_server_key=os.getenv("FCM_SERVER_KEY", "") + ) + + in_app_handler = InAppHandler() + + # Initialize Notification Manager + notification_manager = NotificationManager( + channel_handlers={ + NotificationChannel.EMAIL: email_handler, + NotificationChannel.SMS: sms_handler, + NotificationChannel.PUSH: push_handler, + NotificationChannel.IN_APP: in_app_handler + }, + queue_manager=queue_manager, + template_engine=template_engine, + preference_manager=preference_manager + ) + await notification_manager.start() + logger.info("Notification manager started") + + # Initialize WebSocket Server + ws_server = WebSocketNotificationServer() + logger.info("WebSocket server initialized") + + # Register in-app handler with WebSocket server + in_app_handler.set_ws_server(ws_server) + + except Exception as e: + logger.error(f"Failed to start Notification service: {e}") + raise + + yield + + # Shutdown + if notification_manager: + await notification_manager.stop() + if queue_manager: + await queue_manager.close() + if preference_manager: + await preference_manager.close() + + logger.info("Notification service shutdown complete") + +app = FastAPI( + title="Notification Service", + description="Real-time Multi-channel Notification Service", + version="1.0.0", + lifespan=lifespan +) + +# CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +@app.get("/") +async def root(): + return { + "service": "Notification Service", + "status": "running", + "timestamp": datetime.now().isoformat() + } + +@app.get("/health") +async def health_check(): + return { + "status": "healthy", + "service": "notifications", + "components": { + "queue_manager": "connected" if queue_manager and queue_manager.is_connected else "disconnected", + "preference_manager": "connected" if preference_manager and preference_manager.is_connected else "disconnected", + "notification_manager": "running" if notification_manager and notification_manager.is_running else "stopped", + "websocket_connections": len(ws_server.active_connections) if ws_server else 0 + }, + "timestamp": datetime.now().isoformat() + } + +# Notification Endpoints +@app.post("/api/notifications/send") +async def send_notification( + request: CreateNotificationRequest, + background_tasks: BackgroundTasks +): + """Send a single notification""" + try: + notification = await notification_manager.create_notification( + user_id=request.user_id, + title=request.title, + message=request.message, + channels=request.channels, + priority=request.priority, + data=request.data, + template_id=request.template_id, + schedule_at=request.schedule_at + ) + + if request.schedule_at and request.schedule_at > datetime.now(): + # Schedule for later + await queue_manager.schedule_notification(notification, request.schedule_at) + return { + "notification_id": notification.id, + "status": "scheduled", + "scheduled_at": request.schedule_at.isoformat() + } + else: + # Send immediately + background_tasks.add_task( + notification_manager.send_notification, + notification + ) + return { + "notification_id": notification.id, + "status": "queued" + } + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/notifications/send-bulk") +async def send_bulk_notifications( + request: BulkNotificationRequest, + background_tasks: BackgroundTasks +): + """Send notifications to multiple users""" + try: + notifications = [] + for user_id in request.user_ids: + notification = await notification_manager.create_notification( + user_id=user_id, + title=request.title, + message=request.message, + channels=request.channels, + priority=request.priority, + data=request.data, + template_id=request.template_id + ) + notifications.append(notification) + + # Queue all notifications + background_tasks.add_task( + notification_manager.send_bulk_notifications, + notifications + ) + + return { + "count": len(notifications), + "notification_ids": [n.id for n in notifications], + "status": "queued" + } + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/notifications/user/{user_id}") +async def get_user_notifications( + user_id: str, + status: Optional[NotificationStatus] = None, + channel: Optional[NotificationChannel] = None, + limit: int = Query(50, le=200), + offset: int = Query(0, ge=0) +): + """Get notifications for a specific user""" + try: + notifications = await notification_manager.get_user_notifications( + user_id=user_id, + status=status, + channel=channel, + limit=limit, + offset=offset + ) + + return { + "notifications": notifications, + "count": len(notifications), + "limit": limit, + "offset": offset + } + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.patch("/api/notifications/{notification_id}/read") +async def mark_notification_read(notification_id: str): + """Mark a notification as read""" + try: + success = await notification_manager.mark_as_read(notification_id) + if success: + return {"status": "marked_as_read", "notification_id": notification_id} + else: + raise HTTPException(status_code=404, detail="Notification not found") + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.delete("/api/notifications/{notification_id}") +async def delete_notification(notification_id: str): + """Delete a notification""" + try: + success = await notification_manager.delete_notification(notification_id) + if success: + return {"status": "deleted", "notification_id": notification_id} + else: + raise HTTPException(status_code=404, detail="Notification not found") + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +# Template Endpoints +@app.get("/api/templates") +async def get_templates(): + """Get all notification templates""" + templates = await template_engine.get_all_templates() + return {"templates": templates} + +@app.post("/api/templates") +async def create_template(template: NotificationTemplate): + """Create a new notification template""" + try: + template_id = await template_engine.create_template(template) + return {"template_id": template_id, "status": "created"} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.put("/api/templates/{template_id}") +async def update_template(template_id: str, template: NotificationTemplate): + """Update an existing template""" + try: + success = await template_engine.update_template(template_id, template) + if success: + return {"status": "updated", "template_id": template_id} + else: + raise HTTPException(status_code=404, detail="Template not found") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +# Preference Endpoints +@app.get("/api/preferences/{user_id}") +async def get_user_preferences(user_id: str): + """Get notification preferences for a user""" + try: + preferences = await preference_manager.get_user_preferences(user_id) + return {"user_id": user_id, "preferences": preferences} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.put("/api/preferences/{user_id}") +async def update_user_preferences( + user_id: str, + preferences: NotificationPreference +): + """Update notification preferences for a user""" + try: + success = await preference_manager.update_user_preferences(user_id, preferences) + if success: + return {"status": "updated", "user_id": user_id} + else: + return {"status": "created", "user_id": user_id} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/preferences/{user_id}/unsubscribe/{category}") +async def unsubscribe_from_category(user_id: str, category: str): + """Unsubscribe user from a notification category""" + try: + success = await preference_manager.unsubscribe_category(user_id, category) + if success: + return {"status": "unsubscribed", "user_id": user_id, "category": category} + else: + raise HTTPException(status_code=404, detail="User preferences not found") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +# History and Analytics Endpoints +@app.get("/api/history") +async def get_notification_history( + user_id: Optional[str] = None, + channel: Optional[NotificationChannel] = None, + status: Optional[NotificationStatus] = None, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + limit: int = Query(100, le=1000) +): + """Get notification history with filters""" + try: + history = await notification_manager.get_notification_history( + user_id=user_id, + channel=channel, + status=status, + start_date=start_date, + end_date=end_date, + limit=limit + ) + + return { + "history": history, + "count": len(history), + "filters": { + "user_id": user_id, + "channel": channel, + "status": status, + "date_range": f"{start_date} to {end_date}" if start_date and end_date else None + } + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/analytics") +async def get_notification_analytics( + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None +): + """Get notification analytics""" + try: + if not start_date: + start_date = datetime.now() - timedelta(days=7) + if not end_date: + end_date = datetime.now() + + analytics = await notification_manager.get_analytics(start_date, end_date) + return analytics + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +# Queue Management Endpoints +@app.get("/api/queue/status") +async def get_queue_status(): + """Get current queue status""" + try: + status = await queue_manager.get_queue_status() + return status + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/queue/retry/{notification_id}") +async def retry_failed_notification( + notification_id: str, + background_tasks: BackgroundTasks +): + """Retry a failed notification""" + try: + notification = await notification_manager.get_notification(notification_id) + if not notification: + raise HTTPException(status_code=404, detail="Notification not found") + + if notification.status != NotificationStatus.FAILED: + raise HTTPException(status_code=400, detail="Only failed notifications can be retried") + + background_tasks.add_task( + notification_manager.retry_notification, + notification + ) + + return {"status": "retry_queued", "notification_id": notification_id} + + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +# WebSocket Endpoint +from fastapi import WebSocket, WebSocketDisconnect + +@app.websocket("/ws/notifications/{user_id}") +async def websocket_notifications(websocket: WebSocket, user_id: str): + """WebSocket endpoint for real-time notifications""" + await ws_server.connect(websocket, user_id) + try: + while True: + # Keep connection alive and handle incoming messages + data = await websocket.receive_text() + + # Handle different message types + if data == "ping": + await websocket.send_text("pong") + elif data.startswith("read:"): + # Mark notification as read + notification_id = data.split(":")[1] + await notification_manager.mark_as_read(notification_id) + + except WebSocketDisconnect: + ws_server.disconnect(user_id) + except Exception as e: + logger.error(f"WebSocket error for user {user_id}: {e}") + ws_server.disconnect(user_id) + +# Device Token Management +@app.post("/api/devices/register") +async def register_device_token( + user_id: str, + device_token: str, + device_type: str = Query(..., regex="^(ios|android|web)$") +): + """Register a device token for push notifications""" + try: + success = await notification_manager.register_device_token( + user_id=user_id, + device_token=device_token, + device_type=device_type + ) + + if success: + return { + "status": "registered", + "user_id": user_id, + "device_type": device_type + } + else: + raise HTTPException(status_code=500, detail="Failed to register device token") + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.delete("/api/devices/{device_token}") +async def unregister_device_token(device_token: str): + """Unregister a device token""" + try: + success = await notification_manager.unregister_device_token(device_token) + + if success: + return {"status": "unregistered", "device_token": device_token} + else: + raise HTTPException(status_code=404, detail="Device token not found") + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +if __name__ == "__main__": + uvicorn.run( + "main:app", + host="0.0.0.0", + port=8000, + reload=True + ) \ No newline at end of file diff --git a/services/notifications/backend/models.py b/services/notifications/backend/models.py new file mode 100644 index 0000000..eff2864 --- /dev/null +++ b/services/notifications/backend/models.py @@ -0,0 +1,201 @@ +""" +Data models for Notification Service +""" +from pydantic import BaseModel, Field +from datetime import datetime +from typing import Optional, List, Dict, Any, Literal +from enum import Enum + +class NotificationChannel(str, Enum): + """Notification delivery channels""" + EMAIL = "email" + SMS = "sms" + PUSH = "push" + IN_APP = "in_app" + +class NotificationStatus(str, Enum): + """Notification status""" + PENDING = "pending" + SENT = "sent" + DELIVERED = "delivered" + READ = "read" + FAILED = "failed" + CANCELLED = "cancelled" + +class NotificationPriority(str, Enum): + """Notification priority levels""" + LOW = "low" + NORMAL = "normal" + HIGH = "high" + URGENT = "urgent" + +class NotificationCategory(str, Enum): + """Notification categories""" + SYSTEM = "system" + MARKETING = "marketing" + TRANSACTION = "transaction" + SOCIAL = "social" + SECURITY = "security" + UPDATE = "update" + +class Notification(BaseModel): + """Notification model""" + id: Optional[str] = Field(None, description="Unique notification ID") + user_id: str = Field(..., description="Target user ID") + title: str = Field(..., description="Notification title") + message: str = Field(..., description="Notification message") + channel: NotificationChannel = Field(..., description="Delivery channel") + status: NotificationStatus = Field(default=NotificationStatus.PENDING) + priority: NotificationPriority = Field(default=NotificationPriority.NORMAL) + category: NotificationCategory = Field(default=NotificationCategory.SYSTEM) + data: Optional[Dict[str, Any]] = Field(default=None, description="Additional data") + template_id: Optional[str] = Field(None, description="Template ID if using template") + scheduled_at: Optional[datetime] = Field(None, description="Scheduled delivery time") + sent_at: Optional[datetime] = Field(None, description="Actual sent time") + delivered_at: Optional[datetime] = Field(None, description="Delivery confirmation time") + read_at: Optional[datetime] = Field(None, description="Read time") + retry_count: int = Field(default=0, description="Number of retry attempts") + error_message: Optional[str] = Field(None, description="Error message if failed") + created_at: datetime = Field(default_factory=datetime.now) + updated_at: datetime = Field(default_factory=datetime.now) + + class Config: + json_encoders = { + datetime: lambda v: v.isoformat() + } + +class NotificationTemplate(BaseModel): + """Notification template model""" + id: Optional[str] = Field(None, description="Template ID") + name: str = Field(..., description="Template name") + channel: NotificationChannel = Field(..., description="Target channel") + category: NotificationCategory = Field(..., description="Template category") + subject_template: Optional[str] = Field(None, description="Subject template (for email)") + body_template: str = Field(..., description="Body template with variables") + variables: List[str] = Field(default_factory=list, description="List of required variables") + metadata: Dict[str, Any] = Field(default_factory=dict, description="Template metadata") + is_active: bool = Field(default=True, description="Template active status") + created_at: datetime = Field(default_factory=datetime.now) + updated_at: datetime = Field(default_factory=datetime.now) + + class Config: + json_encoders = { + datetime: lambda v: v.isoformat() + } + +class NotificationPreference(BaseModel): + """User notification preferences""" + user_id: str = Field(..., description="User ID") + channels: Dict[NotificationChannel, bool] = Field( + default_factory=lambda: { + NotificationChannel.EMAIL: True, + NotificationChannel.SMS: False, + NotificationChannel.PUSH: True, + NotificationChannel.IN_APP: True + } + ) + categories: Dict[NotificationCategory, bool] = Field( + default_factory=lambda: { + NotificationCategory.SYSTEM: True, + NotificationCategory.MARKETING: False, + NotificationCategory.TRANSACTION: True, + NotificationCategory.SOCIAL: True, + NotificationCategory.SECURITY: True, + NotificationCategory.UPDATE: True + } + ) + quiet_hours: Optional[Dict[str, str]] = Field( + default=None, + description="Quiet hours configuration {start: 'HH:MM', end: 'HH:MM'}" + ) + timezone: str = Field(default="UTC", description="User timezone") + language: str = Field(default="en", description="Preferred language") + email_frequency: Literal["immediate", "daily", "weekly"] = Field(default="immediate") + updated_at: datetime = Field(default_factory=datetime.now) + + class Config: + json_encoders = { + datetime: lambda v: v.isoformat() + } + +class NotificationHistory(BaseModel): + """Notification history entry""" + notification_id: str + user_id: str + channel: NotificationChannel + status: NotificationStatus + title: str + message: str + sent_at: Optional[datetime] + delivered_at: Optional[datetime] + read_at: Optional[datetime] + error_message: Optional[str] + metadata: Dict[str, Any] = Field(default_factory=dict) + + class Config: + json_encoders = { + datetime: lambda v: v.isoformat() + } + +class CreateNotificationRequest(BaseModel): + """Request model for creating notification""" + user_id: str + title: str + message: str + channels: List[NotificationChannel] = Field(default=[NotificationChannel.IN_APP]) + priority: NotificationPriority = Field(default=NotificationPriority.NORMAL) + category: NotificationCategory = Field(default=NotificationCategory.SYSTEM) + data: Optional[Dict[str, Any]] = None + template_id: Optional[str] = None + schedule_at: Optional[datetime] = None + +class BulkNotificationRequest(BaseModel): + """Request model for bulk notifications""" + user_ids: List[str] + title: str + message: str + channels: List[NotificationChannel] = Field(default=[NotificationChannel.IN_APP]) + priority: NotificationPriority = Field(default=NotificationPriority.NORMAL) + category: NotificationCategory = Field(default=NotificationCategory.SYSTEM) + data: Optional[Dict[str, Any]] = None + template_id: Optional[str] = None + +class DeviceToken(BaseModel): + """Device token for push notifications""" + user_id: str + token: str + device_type: Literal["ios", "android", "web"] + app_version: Optional[str] = None + created_at: datetime = Field(default_factory=datetime.now) + updated_at: datetime = Field(default_factory=datetime.now) + + class Config: + json_encoders = { + datetime: lambda v: v.isoformat() + } + +class NotificationStats(BaseModel): + """Notification statistics""" + total_sent: int + total_delivered: int + total_read: int + total_failed: int + delivery_rate: float + read_rate: float + channel_stats: Dict[str, Dict[str, int]] + category_stats: Dict[str, Dict[str, int]] + period: str + +class NotificationEvent(BaseModel): + """Notification event for tracking""" + event_type: Literal["sent", "delivered", "read", "failed", "clicked"] + notification_id: str + user_id: str + channel: NotificationChannel + timestamp: datetime = Field(default_factory=datetime.now) + metadata: Dict[str, Any] = Field(default_factory=dict) + + class Config: + json_encoders = { + datetime: lambda v: v.isoformat() + } \ No newline at end of file diff --git a/services/notifications/backend/notification_manager.py b/services/notifications/backend/notification_manager.py new file mode 100644 index 0000000..528871c --- /dev/null +++ b/services/notifications/backend/notification_manager.py @@ -0,0 +1,375 @@ +""" +Notification Manager - Core notification orchestration +""" +import asyncio +import logging +from datetime import datetime +from typing import List, Optional, Dict, Any +import uuid +from models import ( + Notification, NotificationChannel, NotificationStatus, + NotificationPriority, NotificationHistory, NotificationPreference +) + +logger = logging.getLogger(__name__) + +class NotificationManager: + """Manages notification creation, delivery, and tracking""" + + def __init__( + self, + channel_handlers: Dict[NotificationChannel, Any], + queue_manager: Any, + template_engine: Any, + preference_manager: Any + ): + self.channel_handlers = channel_handlers + self.queue_manager = queue_manager + self.template_engine = template_engine + self.preference_manager = preference_manager + self.is_running = False + self.notification_store = {} # In-memory store for demo + self.history_store = [] # In-memory history for demo + self.device_tokens = {} # In-memory device tokens for demo + + async def start(self): + """Start notification manager""" + self.is_running = True + # Start background tasks for processing queued notifications + asyncio.create_task(self._process_notification_queue()) + asyncio.create_task(self._process_scheduled_notifications()) + logger.info("Notification manager started") + + async def stop(self): + """Stop notification manager""" + self.is_running = False + logger.info("Notification manager stopped") + + async def create_notification( + self, + user_id: str, + title: str, + message: str, + channels: List[NotificationChannel], + priority: NotificationPriority = NotificationPriority.NORMAL, + data: Optional[Dict[str, Any]] = None, + template_id: Optional[str] = None, + schedule_at: Optional[datetime] = None + ) -> Notification: + """Create a new notification""" + + # Check user preferences + preferences = await self.preference_manager.get_user_preferences(user_id) + if preferences: + # Filter channels based on user preferences + channels = [ch for ch in channels if preferences.channels.get(ch, True)] + + # Apply template if provided + if template_id: + template = await self.template_engine.get_template(template_id) + if template: + message = await self.template_engine.render_template(template, data or {}) + + # Create notification objects for each channel + notification = Notification( + id=str(uuid.uuid4()), + user_id=user_id, + title=title, + message=message, + channel=channels[0] if channels else NotificationChannel.IN_APP, + priority=priority, + data=data, + template_id=template_id, + scheduled_at=schedule_at, + created_at=datetime.now() + ) + + # Store notification + self.notification_store[notification.id] = notification + + logger.info(f"Created notification {notification.id} for user {user_id}") + return notification + + async def send_notification(self, notification: Notification): + """Send a single notification""" + try: + # Check if notification should be sent now + if notification.scheduled_at and notification.scheduled_at > datetime.now(): + await self.queue_manager.schedule_notification(notification, notification.scheduled_at) + return + + # Get the appropriate handler + handler = self.channel_handlers.get(notification.channel) + if not handler: + raise ValueError(f"No handler for channel {notification.channel}") + + # Send through the channel + success = await handler.send(notification) + + if success: + notification.status = NotificationStatus.SENT + notification.sent_at = datetime.now() + logger.info(f"Notification {notification.id} sent successfully") + else: + notification.status = NotificationStatus.FAILED + notification.retry_count += 1 + logger.error(f"Failed to send notification {notification.id}") + + # Retry if needed + if notification.retry_count < self._get_max_retries(notification.priority): + await self.queue_manager.enqueue_notification(notification) + + # Update notification + self.notification_store[notification.id] = notification + + # Add to history + await self._add_to_history(notification) + + except Exception as e: + logger.error(f"Error sending notification {notification.id}: {e}") + notification.status = NotificationStatus.FAILED + notification.error_message = str(e) + self.notification_store[notification.id] = notification + + async def send_bulk_notifications(self, notifications: List[Notification]): + """Send multiple notifications""" + tasks = [] + for notification in notifications: + tasks.append(self.send_notification(notification)) + + await asyncio.gather(*tasks, return_exceptions=True) + + async def mark_as_read(self, notification_id: str) -> bool: + """Mark notification as read""" + notification = self.notification_store.get(notification_id) + if notification: + notification.status = NotificationStatus.READ + notification.read_at = datetime.now() + self.notification_store[notification_id] = notification + logger.info(f"Notification {notification_id} marked as read") + return True + return False + + async def delete_notification(self, notification_id: str) -> bool: + """Delete a notification""" + if notification_id in self.notification_store: + del self.notification_store[notification_id] + logger.info(f"Notification {notification_id} deleted") + return True + return False + + async def get_notification(self, notification_id: str) -> Optional[Notification]: + """Get a notification by ID""" + return self.notification_store.get(notification_id) + + async def get_user_notifications( + self, + user_id: str, + status: Optional[NotificationStatus] = None, + channel: Optional[NotificationChannel] = None, + limit: int = 50, + offset: int = 0 + ) -> List[Notification]: + """Get notifications for a user""" + notifications = [] + + for notification in self.notification_store.values(): + if notification.user_id != user_id: + continue + if status and notification.status != status: + continue + if channel and notification.channel != channel: + continue + notifications.append(notification) + + # Sort by created_at descending + notifications.sort(key=lambda x: x.created_at, reverse=True) + + # Apply pagination + return notifications[offset:offset + limit] + + async def retry_notification(self, notification: Notification): + """Retry a failed notification""" + notification.retry_count += 1 + notification.status = NotificationStatus.PENDING + notification.error_message = None + await self.send_notification(notification) + + async def get_notification_history( + self, + user_id: Optional[str] = None, + channel: Optional[NotificationChannel] = None, + status: Optional[NotificationStatus] = None, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + limit: int = 100 + ) -> List[NotificationHistory]: + """Get notification history""" + history = [] + + for entry in self.history_store: + if user_id and entry.user_id != user_id: + continue + if channel and entry.channel != channel: + continue + if status and entry.status != status: + continue + if start_date and entry.sent_at and entry.sent_at < start_date: + continue + if end_date and entry.sent_at and entry.sent_at > end_date: + continue + history.append(entry) + + # Sort by sent_at descending and limit + history.sort(key=lambda x: x.sent_at or datetime.min, reverse=True) + return history[:limit] + + async def get_analytics(self, start_date: datetime, end_date: datetime) -> Dict[str, Any]: + """Get notification analytics""" + total_sent = 0 + total_delivered = 0 + total_read = 0 + total_failed = 0 + channel_stats = {} + + for notification in self.notification_store.values(): + if notification.created_at < start_date or notification.created_at > end_date: + continue + + if notification.status == NotificationStatus.SENT: + total_sent += 1 + elif notification.status == NotificationStatus.DELIVERED: + total_delivered += 1 + elif notification.status == NotificationStatus.READ: + total_read += 1 + elif notification.status == NotificationStatus.FAILED: + total_failed += 1 + + # Channel stats + channel_name = notification.channel.value + if channel_name not in channel_stats: + channel_stats[channel_name] = { + "sent": 0, + "delivered": 0, + "read": 0, + "failed": 0 + } + + if notification.status == NotificationStatus.SENT: + channel_stats[channel_name]["sent"] += 1 + elif notification.status == NotificationStatus.DELIVERED: + channel_stats[channel_name]["delivered"] += 1 + elif notification.status == NotificationStatus.READ: + channel_stats[channel_name]["read"] += 1 + elif notification.status == NotificationStatus.FAILED: + channel_stats[channel_name]["failed"] += 1 + + total = total_sent + total_delivered + total_read + total_failed + + return { + "period": f"{start_date.isoformat()} to {end_date.isoformat()}", + "total_notifications": total, + "total_sent": total_sent, + "total_delivered": total_delivered, + "total_read": total_read, + "total_failed": total_failed, + "delivery_rate": (total_delivered / total * 100) if total > 0 else 0, + "read_rate": (total_read / total * 100) if total > 0 else 0, + "channel_stats": channel_stats + } + + async def register_device_token( + self, + user_id: str, + device_token: str, + device_type: str + ) -> bool: + """Register a device token for push notifications""" + if user_id not in self.device_tokens: + self.device_tokens[user_id] = [] + + # Check if token already exists + for token in self.device_tokens[user_id]: + if token["token"] == device_token: + # Update existing token + token["device_type"] = device_type + token["updated_at"] = datetime.now() + return True + + # Add new token + self.device_tokens[user_id].append({ + "token": device_token, + "device_type": device_type, + "created_at": datetime.now(), + "updated_at": datetime.now() + }) + + logger.info(f"Registered device token for user {user_id}") + return True + + async def unregister_device_token(self, device_token: str) -> bool: + """Unregister a device token""" + for user_id, tokens in self.device_tokens.items(): + for i, token in enumerate(tokens): + if token["token"] == device_token: + del self.device_tokens[user_id][i] + logger.info(f"Unregistered device token for user {user_id}") + return True + return False + + def _get_max_retries(self, priority: NotificationPriority) -> int: + """Get max retries based on priority""" + retry_map = { + NotificationPriority.LOW: 1, + NotificationPriority.NORMAL: 3, + NotificationPriority.HIGH: 5, + NotificationPriority.URGENT: 10 + } + return retry_map.get(priority, 3) + + async def _add_to_history(self, notification: Notification): + """Add notification to history""" + history_entry = NotificationHistory( + notification_id=notification.id, + user_id=notification.user_id, + channel=notification.channel, + status=notification.status, + title=notification.title, + message=notification.message, + sent_at=notification.sent_at, + delivered_at=notification.delivered_at, + read_at=notification.read_at, + error_message=notification.error_message, + metadata={"priority": notification.priority.value} + ) + self.history_store.append(history_entry) + + async def _process_notification_queue(self): + """Process queued notifications""" + while self.is_running: + try: + # Get notification from queue + notification_data = await self.queue_manager.dequeue_notification() + if notification_data: + notification = Notification(**notification_data) + await self.send_notification(notification) + except Exception as e: + logger.error(f"Error processing notification queue: {e}") + + await asyncio.sleep(1) + + async def _process_scheduled_notifications(self): + """Process scheduled notifications""" + while self.is_running: + try: + # Check for scheduled notifications + now = datetime.now() + for notification in self.notification_store.values(): + if (notification.scheduled_at and + notification.scheduled_at <= now and + notification.status == NotificationStatus.PENDING): + await self.send_notification(notification) + except Exception as e: + logger.error(f"Error processing scheduled notifications: {e}") + + await asyncio.sleep(10) # Check every 10 seconds \ No newline at end of file diff --git a/services/notifications/backend/preference_manager.py b/services/notifications/backend/preference_manager.py new file mode 100644 index 0000000..cf51bbe --- /dev/null +++ b/services/notifications/backend/preference_manager.py @@ -0,0 +1,340 @@ +""" +Preference Manager for user notification preferences +""" +import logging +from typing import Optional, Dict, Any, List +from datetime import datetime +import motor.motor_asyncio +from models import NotificationPreference, NotificationChannel, NotificationCategory + +logger = logging.getLogger(__name__) + +class PreferenceManager: + """Manages user notification preferences""" + + def __init__(self, mongodb_url: str = "mongodb://mongodb:27017", database_name: str = "notifications"): + self.mongodb_url = mongodb_url + self.database_name = database_name + self.client = None + self.db = None + self.preferences_collection = None + self.is_connected = False + + # In-memory cache for demo + self.preferences_cache = {} + + async def connect(self): + """Connect to MongoDB""" + try: + self.client = motor.motor_asyncio.AsyncIOMotorClient(self.mongodb_url) + self.db = self.client[self.database_name] + self.preferences_collection = self.db["preferences"] + + # Test connection + await self.client.admin.command('ping') + self.is_connected = True + + # Create indexes + await self._create_indexes() + + logger.info("Connected to MongoDB for preferences") + + except Exception as e: + logger.error(f"Failed to connect to MongoDB: {e}") + # Fallback to in-memory storage + self.is_connected = False + logger.warning("Using in-memory storage for preferences") + + async def close(self): + """Close MongoDB connection""" + if self.client: + self.client.close() + self.is_connected = False + logger.info("Disconnected from MongoDB") + + async def _create_indexes(self): + """Create database indexes""" + if self.preferences_collection: + try: + await self.preferences_collection.create_index("user_id", unique=True) + logger.info("Created indexes for preferences collection") + except Exception as e: + logger.error(f"Failed to create indexes: {e}") + + async def get_user_preferences(self, user_id: str) -> Optional[NotificationPreference]: + """Get notification preferences for a user""" + try: + # Check cache first + if user_id in self.preferences_cache: + return self.preferences_cache[user_id] + + if self.is_connected and self.preferences_collection: + # Get from MongoDB + doc = await self.preferences_collection.find_one({"user_id": user_id}) + + if doc: + # Convert document to model + doc.pop('_id', None) # Remove MongoDB ID + preference = NotificationPreference(**doc) + + # Update cache + self.preferences_cache[user_id] = preference + + return preference + + # Return default preferences if not found + return self._get_default_preferences(user_id) + + except Exception as e: + logger.error(f"Failed to get preferences for user {user_id}: {e}") + return self._get_default_preferences(user_id) + + async def update_user_preferences( + self, + user_id: str, + preferences: NotificationPreference + ) -> bool: + """Update notification preferences for a user""" + try: + preferences.user_id = user_id + preferences.updated_at = datetime.now() + + # Update cache + self.preferences_cache[user_id] = preferences + + if self.is_connected and self.preferences_collection: + # Convert to dict for MongoDB + pref_dict = preferences.dict() + + # Upsert in MongoDB + result = await self.preferences_collection.update_one( + {"user_id": user_id}, + {"$set": pref_dict}, + upsert=True + ) + + logger.info(f"Updated preferences for user {user_id}") + return result.modified_count > 0 or result.upserted_id is not None + + # If not connected, just use cache + return True + + except Exception as e: + logger.error(f"Failed to update preferences for user {user_id}: {e}") + return False + + async def unsubscribe_category(self, user_id: str, category: str) -> bool: + """Unsubscribe user from a notification category""" + try: + preferences = await self.get_user_preferences(user_id) + + if not preferences: + preferences = self._get_default_preferences(user_id) + + # Update category preference + if hasattr(NotificationCategory, category.upper()): + cat_enum = NotificationCategory(category.lower()) + preferences.categories[cat_enum] = False + + # Save updated preferences + return await self.update_user_preferences(user_id, preferences) + + return False + + except Exception as e: + logger.error(f"Failed to unsubscribe user {user_id} from {category}: {e}") + return False + + async def subscribe_category(self, user_id: str, category: str) -> bool: + """Subscribe user to a notification category""" + try: + preferences = await self.get_user_preferences(user_id) + + if not preferences: + preferences = self._get_default_preferences(user_id) + + # Update category preference + if hasattr(NotificationCategory, category.upper()): + cat_enum = NotificationCategory(category.lower()) + preferences.categories[cat_enum] = True + + # Save updated preferences + return await self.update_user_preferences(user_id, preferences) + + return False + + except Exception as e: + logger.error(f"Failed to subscribe user {user_id} to {category}: {e}") + return False + + async def enable_channel(self, user_id: str, channel: NotificationChannel) -> bool: + """Enable a notification channel for user""" + try: + preferences = await self.get_user_preferences(user_id) + + if not preferences: + preferences = self._get_default_preferences(user_id) + + preferences.channels[channel] = True + + return await self.update_user_preferences(user_id, preferences) + + except Exception as e: + logger.error(f"Failed to enable channel {channel} for user {user_id}: {e}") + return False + + async def disable_channel(self, user_id: str, channel: NotificationChannel) -> bool: + """Disable a notification channel for user""" + try: + preferences = await self.get_user_preferences(user_id) + + if not preferences: + preferences = self._get_default_preferences(user_id) + + preferences.channels[channel] = False + + return await self.update_user_preferences(user_id, preferences) + + except Exception as e: + logger.error(f"Failed to disable channel {channel} for user {user_id}: {e}") + return False + + async def set_quiet_hours( + self, + user_id: str, + start_time: str, + end_time: str + ) -> bool: + """Set quiet hours for user""" + try: + preferences = await self.get_user_preferences(user_id) + + if not preferences: + preferences = self._get_default_preferences(user_id) + + preferences.quiet_hours = { + "start": start_time, + "end": end_time + } + + return await self.update_user_preferences(user_id, preferences) + + except Exception as e: + logger.error(f"Failed to set quiet hours for user {user_id}: {e}") + return False + + async def clear_quiet_hours(self, user_id: str) -> bool: + """Clear quiet hours for user""" + try: + preferences = await self.get_user_preferences(user_id) + + if not preferences: + preferences = self._get_default_preferences(user_id) + + preferences.quiet_hours = None + + return await self.update_user_preferences(user_id, preferences) + + except Exception as e: + logger.error(f"Failed to clear quiet hours for user {user_id}: {e}") + return False + + async def set_email_frequency(self, user_id: str, frequency: str) -> bool: + """Set email notification frequency""" + try: + if frequency not in ["immediate", "daily", "weekly"]: + return False + + preferences = await self.get_user_preferences(user_id) + + if not preferences: + preferences = self._get_default_preferences(user_id) + + preferences.email_frequency = frequency + + return await self.update_user_preferences(user_id, preferences) + + except Exception as e: + logger.error(f"Failed to set email frequency for user {user_id}: {e}") + return False + + async def batch_get_preferences(self, user_ids: List[str]) -> Dict[str, NotificationPreference]: + """Get preferences for multiple users""" + results = {} + + for user_id in user_ids: + pref = await self.get_user_preferences(user_id) + if pref: + results[user_id] = pref + + return results + + async def delete_user_preferences(self, user_id: str) -> bool: + """Delete all preferences for a user""" + try: + # Remove from cache + if user_id in self.preferences_cache: + del self.preferences_cache[user_id] + + if self.is_connected and self.preferences_collection: + # Delete from MongoDB + result = await self.preferences_collection.delete_one({"user_id": user_id}) + logger.info(f"Deleted preferences for user {user_id}") + return result.deleted_count > 0 + + return True + + except Exception as e: + logger.error(f"Failed to delete preferences for user {user_id}: {e}") + return False + + def _get_default_preferences(self, user_id: str) -> NotificationPreference: + """Get default notification preferences""" + return NotificationPreference( + user_id=user_id, + channels={ + NotificationChannel.EMAIL: True, + NotificationChannel.SMS: False, + NotificationChannel.PUSH: True, + NotificationChannel.IN_APP: True + }, + categories={ + NotificationCategory.SYSTEM: True, + NotificationCategory.MARKETING: False, + NotificationCategory.TRANSACTION: True, + NotificationCategory.SOCIAL: True, + NotificationCategory.SECURITY: True, + NotificationCategory.UPDATE: True + }, + email_frequency="immediate", + timezone="UTC", + language="en" + ) + + async def is_notification_allowed( + self, + user_id: str, + channel: NotificationChannel, + category: NotificationCategory + ) -> bool: + """Check if notification is allowed based on preferences""" + preferences = await self.get_user_preferences(user_id) + + if not preferences: + return True # Allow by default if no preferences + + # Check channel preference + if not preferences.channels.get(channel, True): + return False + + # Check category preference + if not preferences.categories.get(category, True): + return False + + # Check quiet hours + if preferences.quiet_hours and channel != NotificationChannel.IN_APP: + # Would need to check current time against quiet hours + # For demo, we'll allow all + pass + + return True \ No newline at end of file diff --git a/services/notifications/backend/queue_manager.py b/services/notifications/backend/queue_manager.py new file mode 100644 index 0000000..37c2c82 --- /dev/null +++ b/services/notifications/backend/queue_manager.py @@ -0,0 +1,304 @@ +""" +Notification Queue Manager with priority support +""" +import logging +import json +import asyncio +from typing import Optional, Dict, Any, List +from datetime import datetime +import redis.asyncio as redis +from models import NotificationPriority + +logger = logging.getLogger(__name__) + +class NotificationQueueManager: + """Manages notification queues with priority levels""" + + def __init__(self, redis_url: str = "redis://redis:6379"): + self.redis_url = redis_url + self.redis_client = None + self.is_connected = False + + # Queue names by priority + self.queue_names = { + NotificationPriority.URGENT: "notifications:queue:urgent", + NotificationPriority.HIGH: "notifications:queue:high", + NotificationPriority.NORMAL: "notifications:queue:normal", + NotificationPriority.LOW: "notifications:queue:low" + } + + # Scheduled notifications sorted set + self.scheduled_key = "notifications:scheduled" + + # Failed notifications queue (DLQ) + self.dlq_key = "notifications:dlq" + + async def connect(self): + """Connect to Redis""" + try: + self.redis_client = await redis.from_url(self.redis_url) + await self.redis_client.ping() + self.is_connected = True + logger.info("Connected to Redis for notification queue") + except Exception as e: + logger.error(f"Failed to connect to Redis: {e}") + self.is_connected = False + raise + + async def close(self): + """Close Redis connection""" + if self.redis_client: + await self.redis_client.close() + self.is_connected = False + logger.info("Disconnected from Redis") + + async def enqueue_notification(self, notification: Any, priority: Optional[NotificationPriority] = None): + """Add notification to queue based on priority""" + if not self.is_connected: + logger.error("Redis not connected") + return False + + try: + # Use notification's priority or provided priority + if priority is None: + priority = notification.priority if hasattr(notification, 'priority') else NotificationPriority.NORMAL + + queue_name = self.queue_names.get(priority, self.queue_names[NotificationPriority.NORMAL]) + + # Serialize notification + notification_data = notification.dict() if hasattr(notification, 'dict') else notification + notification_json = json.dumps(notification_data, default=str) + + # Add to appropriate queue + await self.redis_client.lpush(queue_name, notification_json) + + logger.info(f"Enqueued notification to {queue_name}") + return True + + except Exception as e: + logger.error(f"Failed to enqueue notification: {e}") + return False + + async def dequeue_notification(self, timeout: int = 1) -> Optional[Dict[str, Any]]: + """Dequeue notification with priority order""" + if not self.is_connected: + return None + + try: + # Check queues in priority order + for priority in [NotificationPriority.URGENT, NotificationPriority.HIGH, + NotificationPriority.NORMAL, NotificationPriority.LOW]: + queue_name = self.queue_names[priority] + + # Try to get from this queue + result = await self.redis_client.brpop(queue_name, timeout=timeout) + + if result: + _, notification_json = result + notification_data = json.loads(notification_json) + logger.debug(f"Dequeued notification from {queue_name}") + return notification_data + + return None + + except Exception as e: + logger.error(f"Failed to dequeue notification: {e}") + return None + + async def schedule_notification(self, notification: Any, scheduled_time: datetime): + """Schedule a notification for future delivery""" + if not self.is_connected: + return False + + try: + # Serialize notification + notification_data = notification.dict() if hasattr(notification, 'dict') else notification + notification_json = json.dumps(notification_data, default=str) + + # Add to scheduled set with timestamp as score + timestamp = scheduled_time.timestamp() + await self.redis_client.zadd(self.scheduled_key, {notification_json: timestamp}) + + logger.info(f"Scheduled notification for {scheduled_time}") + return True + + except Exception as e: + logger.error(f"Failed to schedule notification: {e}") + return False + + async def get_due_notifications(self) -> List[Dict[str, Any]]: + """Get notifications that are due for delivery""" + if not self.is_connected: + return [] + + try: + # Get current timestamp + now = datetime.now().timestamp() + + # Get all notifications with score <= now + results = await self.redis_client.zrangebyscore( + self.scheduled_key, + min=0, + max=now, + withscores=False + ) + + notifications = [] + for notification_json in results: + notification_data = json.loads(notification_json) + notifications.append(notification_data) + + # Remove from scheduled set + await self.redis_client.zrem(self.scheduled_key, notification_json) + + if notifications: + logger.info(f"Retrieved {len(notifications)} due notifications") + + return notifications + + except Exception as e: + logger.error(f"Failed to get due notifications: {e}") + return [] + + async def add_to_dlq(self, notification: Any, error_message: str): + """Add failed notification to Dead Letter Queue""" + if not self.is_connected: + return False + + try: + # Add error information + notification_data = notification.dict() if hasattr(notification, 'dict') else notification + notification_data['dlq_error'] = error_message + notification_data['dlq_timestamp'] = datetime.now().isoformat() + + notification_json = json.dumps(notification_data, default=str) + + # Add to DLQ + await self.redis_client.lpush(self.dlq_key, notification_json) + + logger.info(f"Added notification to DLQ: {error_message}") + return True + + except Exception as e: + logger.error(f"Failed to add to DLQ: {e}") + return False + + async def get_dlq_notifications(self, limit: int = 10) -> List[Dict[str, Any]]: + """Get notifications from Dead Letter Queue""" + if not self.is_connected: + return [] + + try: + # Get from DLQ + results = await self.redis_client.lrange(self.dlq_key, 0, limit - 1) + + notifications = [] + for notification_json in results: + notification_data = json.loads(notification_json) + notifications.append(notification_data) + + return notifications + + except Exception as e: + logger.error(f"Failed to get DLQ notifications: {e}") + return [] + + async def retry_dlq_notification(self, index: int) -> bool: + """Retry a notification from DLQ""" + if not self.is_connected: + return False + + try: + # Get notification at index + notification_json = await self.redis_client.lindex(self.dlq_key, index) + + if not notification_json: + return False + + # Parse and remove DLQ info + notification_data = json.loads(notification_json) + notification_data.pop('dlq_error', None) + notification_data.pop('dlq_timestamp', None) + + # Re-enqueue + priority = NotificationPriority(notification_data.get('priority', 'normal')) + queue_name = self.queue_names[priority] + + new_json = json.dumps(notification_data, default=str) + await self.redis_client.lpush(queue_name, new_json) + + # Remove from DLQ + await self.redis_client.lrem(self.dlq_key, 1, notification_json) + + logger.info(f"Retried DLQ notification at index {index}") + return True + + except Exception as e: + logger.error(f"Failed to retry DLQ notification: {e}") + return False + + async def get_queue_status(self) -> Dict[str, Any]: + """Get current queue status""" + if not self.is_connected: + return {"status": "disconnected"} + + try: + status = { + "status": "connected", + "queues": {}, + "scheduled": 0, + "dlq": 0 + } + + # Get queue lengths + for priority, queue_name in self.queue_names.items(): + length = await self.redis_client.llen(queue_name) + status["queues"][priority.value] = length + + # Get scheduled count + status["scheduled"] = await self.redis_client.zcard(self.scheduled_key) + + # Get DLQ count + status["dlq"] = await self.redis_client.llen(self.dlq_key) + + return status + + except Exception as e: + logger.error(f"Failed to get queue status: {e}") + return {"status": "error", "error": str(e)} + + async def clear_queue(self, priority: NotificationPriority) -> bool: + """Clear a specific priority queue""" + if not self.is_connected: + return False + + try: + queue_name = self.queue_names[priority] + await self.redis_client.delete(queue_name) + logger.info(f"Cleared queue: {queue_name}") + return True + + except Exception as e: + logger.error(f"Failed to clear queue: {e}") + return False + + async def clear_all_queues(self) -> bool: + """Clear all notification queues""" + if not self.is_connected: + return False + + try: + # Clear all priority queues + for queue_name in self.queue_names.values(): + await self.redis_client.delete(queue_name) + + # Clear scheduled and DLQ + await self.redis_client.delete(self.scheduled_key) + await self.redis_client.delete(self.dlq_key) + + logger.info("Cleared all notification queues") + return True + + except Exception as e: + logger.error(f"Failed to clear all queues: {e}") + return False \ No newline at end of file diff --git a/services/notifications/backend/requirements.txt b/services/notifications/backend/requirements.txt new file mode 100644 index 0000000..d0501d7 --- /dev/null +++ b/services/notifications/backend/requirements.txt @@ -0,0 +1,11 @@ +fastapi==0.109.0 +uvicorn[standard]==0.27.0 +pydantic==2.5.3 +python-dotenv==1.0.0 +redis==5.0.1 +motor==3.5.1 +pymongo==4.6.1 +httpx==0.26.0 +websockets==12.0 +aiofiles==23.2.1 +python-multipart==0.0.6 \ No newline at end of file diff --git a/services/notifications/backend/template_engine.py b/services/notifications/backend/template_engine.py new file mode 100644 index 0000000..9c2be75 --- /dev/null +++ b/services/notifications/backend/template_engine.py @@ -0,0 +1,334 @@ +""" +Template Engine for notification templates +""" +import logging +import re +from typing import Dict, Any, List, Optional +from datetime import datetime +import uuid +from models import NotificationTemplate, NotificationChannel, NotificationCategory + +logger = logging.getLogger(__name__) + +class TemplateEngine: + """Manages and renders notification templates""" + + def __init__(self): + self.templates = {} # In-memory storage for demo + self._load_default_templates() + + async def load_templates(self): + """Load templates from storage""" + # In production, would load from database + logger.info(f"Loaded {len(self.templates)} templates") + + def _load_default_templates(self): + """Load default system templates""" + default_templates = [ + NotificationTemplate( + id="welcome", + name="Welcome Email", + channel=NotificationChannel.EMAIL, + category=NotificationCategory.SYSTEM, + subject_template="Welcome to {{app_name}}!", + body_template=""" + Hi {{user_name}}, + + Welcome to {{app_name}}! We're excited to have you on board. + + Here are some things you can do to get started: + - Complete your profile + - Explore our features + - Connect with other users + + If you have any questions, feel free to reach out to our support team. + + Best regards, + The {{app_name}} Team + """, + variables=["user_name", "app_name"] + ), + NotificationTemplate( + id="password_reset", + name="Password Reset", + channel=NotificationChannel.EMAIL, + category=NotificationCategory.SECURITY, + subject_template="Password Reset Request", + body_template=""" + Hi {{user_name}}, + + We received a request to reset your password for {{app_name}}. + + Click the link below to reset your password: + {{reset_link}} + + This link will expire in {{expiry_hours}} hours. + + If you didn't request this, please ignore this email or contact support. + + Best regards, + The {{app_name}} Team + """, + variables=["user_name", "app_name", "reset_link", "expiry_hours"] + ), + NotificationTemplate( + id="order_confirmation", + name="Order Confirmation", + channel=NotificationChannel.EMAIL, + category=NotificationCategory.TRANSACTION, + subject_template="Order #{{order_id}} Confirmed", + body_template=""" + Hi {{user_name}}, + + Your order #{{order_id}} has been confirmed! + + Order Details: + - Total: {{order_total}} + - Items: {{item_count}} + - Estimated Delivery: {{delivery_date}} + + You can track your order status at: {{tracking_link}} + + Thank you for your purchase! + + Best regards, + The {{app_name}} Team + """, + variables=["user_name", "app_name", "order_id", "order_total", "item_count", "delivery_date", "tracking_link"] + ), + NotificationTemplate( + id="sms_verification", + name="SMS Verification", + channel=NotificationChannel.SMS, + category=NotificationCategory.SECURITY, + body_template="Your {{app_name}} verification code is: {{code}}. Valid for {{expiry_minutes}} minutes.", + variables=["app_name", "code", "expiry_minutes"] + ), + NotificationTemplate( + id="push_reminder", + name="Push Reminder", + channel=NotificationChannel.PUSH, + category=NotificationCategory.UPDATE, + body_template="{{reminder_text}}", + variables=["reminder_text"] + ), + NotificationTemplate( + id="in_app_alert", + name="In-App Alert", + channel=NotificationChannel.IN_APP, + category=NotificationCategory.SYSTEM, + body_template="{{alert_message}}", + variables=["alert_message"] + ), + NotificationTemplate( + id="weekly_digest", + name="Weekly Digest", + channel=NotificationChannel.EMAIL, + category=NotificationCategory.MARKETING, + subject_template="Your Weekly {{app_name}} Digest", + body_template=""" + Hi {{user_name}}, + + Here's what happened this week on {{app_name}}: + + 📊 Stats: + - New connections: {{new_connections}} + - Messages received: {{messages_count}} + - Activities completed: {{activities_count}} + + 🔥 Trending: + {{trending_items}} + + 💡 Tip of the week: + {{weekly_tip}} + + See you next week! + The {{app_name}} Team + """, + variables=["user_name", "app_name", "new_connections", "messages_count", "activities_count", "trending_items", "weekly_tip"] + ), + NotificationTemplate( + id="friend_request", + name="Friend Request", + channel=NotificationChannel.IN_APP, + category=NotificationCategory.SOCIAL, + body_template="{{sender_name}} sent you a friend request. {{personal_message}}", + variables=["sender_name", "personal_message"] + ) + ] + + for template in default_templates: + self.templates[template.id] = template + + async def create_template(self, template: NotificationTemplate) -> str: + """Create a new template""" + if not template.id: + template.id = str(uuid.uuid4()) + + # Validate template + if not self._validate_template(template): + raise ValueError("Invalid template format") + + # Extract variables from template + template.variables = self._extract_variables(template.body_template) + if template.subject_template: + template.variables.extend(self._extract_variables(template.subject_template)) + template.variables = list(set(template.variables)) # Remove duplicates + + # Store template + self.templates[template.id] = template + + logger.info(f"Created template: {template.id}") + return template.id + + async def update_template(self, template_id: str, template: NotificationTemplate) -> bool: + """Update an existing template""" + if template_id not in self.templates: + return False + + # Validate template + if not self._validate_template(template): + raise ValueError("Invalid template format") + + # Update template + template.id = template_id + template.updated_at = datetime.now() + + # Re-extract variables + template.variables = self._extract_variables(template.body_template) + if template.subject_template: + template.variables.extend(self._extract_variables(template.subject_template)) + template.variables = list(set(template.variables)) + + self.templates[template_id] = template + + logger.info(f"Updated template: {template_id}") + return True + + async def get_template(self, template_id: str) -> Optional[NotificationTemplate]: + """Get a template by ID""" + return self.templates.get(template_id) + + async def get_all_templates(self) -> List[NotificationTemplate]: + """Get all templates""" + return list(self.templates.values()) + + async def delete_template(self, template_id: str) -> bool: + """Delete a template""" + if template_id in self.templates: + del self.templates[template_id] + logger.info(f"Deleted template: {template_id}") + return True + return False + + async def render_template(self, template: NotificationTemplate, variables: Dict[str, Any]) -> str: + """Render a template with variables""" + if not template: + raise ValueError("Template not provided") + + # Start with body template + rendered = template.body_template + + # Replace variables + for var_name in template.variables: + placeholder = f"{{{{{var_name}}}}}" + value = variables.get(var_name, f"[{var_name}]") # Default to placeholder if not provided + + # Convert non-string values to string + if not isinstance(value, str): + value = str(value) + + rendered = rendered.replace(placeholder, value) + + # Clean up extra whitespace + rendered = re.sub(r'\n\s*\n', '\n\n', rendered.strip()) + + return rendered + + async def render_subject(self, template: NotificationTemplate, variables: Dict[str, Any]) -> Optional[str]: + """Render a template subject with variables""" + if not template or not template.subject_template: + return None + + rendered = template.subject_template + + # Replace variables + for var_name in self._extract_variables(template.subject_template): + placeholder = f"{{{{{var_name}}}}}" + value = variables.get(var_name, f"[{var_name}]") + + if not isinstance(value, str): + value = str(value) + + rendered = rendered.replace(placeholder, value) + + return rendered + + def _validate_template(self, template: NotificationTemplate) -> bool: + """Validate template format""" + if not template.name or not template.body_template: + return False + + # Check for basic template syntax + try: + # Check for balanced braces + open_count = template.body_template.count("{{") + close_count = template.body_template.count("}}") + if open_count != close_count: + return False + + if template.subject_template: + open_count = template.subject_template.count("{{") + close_count = template.subject_template.count("}}") + if open_count != close_count: + return False + + return True + + except Exception as e: + logger.error(f"Template validation error: {e}") + return False + + def _extract_variables(self, template_text: str) -> List[str]: + """Extract variable names from template text""" + if not template_text: + return [] + + # Find all {{variable_name}} patterns + pattern = r'\{\{(\w+)\}\}' + matches = re.findall(pattern, template_text) + + return list(set(matches)) # Return unique variable names + + async def get_templates_by_channel(self, channel: NotificationChannel) -> List[NotificationTemplate]: + """Get templates for a specific channel""" + return [t for t in self.templates.values() if t.channel == channel] + + async def get_templates_by_category(self, category: NotificationCategory) -> List[NotificationTemplate]: + """Get templates for a specific category""" + return [t for t in self.templates.values() if t.category == category] + + async def clone_template(self, template_id: str, new_name: str) -> str: + """Clone an existing template""" + original = self.templates.get(template_id) + if not original: + raise ValueError(f"Template {template_id} not found") + + # Create new template + new_template = NotificationTemplate( + id=str(uuid.uuid4()), + name=new_name, + channel=original.channel, + category=original.category, + subject_template=original.subject_template, + body_template=original.body_template, + variables=original.variables.copy(), + metadata=original.metadata.copy(), + is_active=True, + created_at=datetime.now() + ) + + self.templates[new_template.id] = new_template + + logger.info(f"Cloned template {template_id} to {new_template.id}") + return new_template.id \ No newline at end of file diff --git a/services/notifications/backend/test_notifications.py b/services/notifications/backend/test_notifications.py new file mode 100644 index 0000000..3a820dc --- /dev/null +++ b/services/notifications/backend/test_notifications.py @@ -0,0 +1,268 @@ +""" +Test script for Notification Service +""" +import asyncio +import httpx +import websockets +import json +from datetime import datetime, timedelta + +BASE_URL = "http://localhost:8013" +WS_URL = "ws://localhost:8013/ws/notifications" + +async def test_notification_api(): + """Test notification API endpoints""" + async with httpx.AsyncClient() as client: + print("\n🔔 Testing Notification Service API...") + + # Test health check + print("\n1. Testing health check...") + response = await client.get(f"{BASE_URL}/health") + print(f"Health check: {response.json()}") + + # Test sending single notification + print("\n2. Testing single notification...") + notification_data = { + "user_id": "test_user_123", + "title": "Welcome to Our App!", + "message": "Thank you for joining our platform. We're excited to have you!", + "channels": ["in_app", "email"], + "priority": "high", + "category": "system", + "data": { + "action_url": "https://example.com/welcome", + "icon": "welcome" + } + } + + response = await client.post( + f"{BASE_URL}/api/notifications/send", + json=notification_data + ) + notification_result = response.json() + print(f"Notification sent: {notification_result}") + notification_id = notification_result.get("notification_id") + + # Test bulk notifications + print("\n3. Testing bulk notifications...") + bulk_data = { + "user_ids": ["user1", "user2", "user3"], + "title": "System Maintenance Notice", + "message": "We will be performing system maintenance tonight from 2-4 AM.", + "channels": ["in_app", "push"], + "priority": "normal", + "category": "update" + } + + response = await client.post( + f"{BASE_URL}/api/notifications/send-bulk", + json=bulk_data + ) + print(f"Bulk notifications: {response.json()}") + + # Test scheduled notification + print("\n4. Testing scheduled notification...") + scheduled_time = datetime.now() + timedelta(minutes=5) + scheduled_data = { + "user_id": "test_user_123", + "title": "Reminder: Meeting in 5 minutes", + "message": "Your scheduled meeting is about to start.", + "channels": ["in_app", "push"], + "priority": "urgent", + "category": "system", + "schedule_at": scheduled_time.isoformat() + } + + response = await client.post( + f"{BASE_URL}/api/notifications/send", + json=scheduled_data + ) + print(f"Scheduled notification: {response.json()}") + + # Test get user notifications + print("\n5. Testing get user notifications...") + response = await client.get( + f"{BASE_URL}/api/notifications/user/test_user_123" + ) + notifications = response.json() + print(f"User notifications: Found {notifications['count']} notifications") + + # Test mark as read + if notification_id: + print("\n6. Testing mark as read...") + response = await client.patch( + f"{BASE_URL}/api/notifications/{notification_id}/read" + ) + print(f"Mark as read: {response.json()}") + + # Test templates + print("\n7. Testing templates...") + response = await client.get(f"{BASE_URL}/api/templates") + templates = response.json() + print(f"Available templates: {len(templates['templates'])} templates") + + # Test preferences + print("\n8. Testing user preferences...") + + # Get preferences + response = await client.get( + f"{BASE_URL}/api/preferences/test_user_123" + ) + print(f"Current preferences: {response.json()}") + + # Update preferences + new_preferences = { + "user_id": "test_user_123", + "channels": { + "email": True, + "sms": False, + "push": True, + "in_app": True + }, + "categories": { + "system": True, + "marketing": False, + "transaction": True, + "social": True, + "security": True, + "update": True + }, + "email_frequency": "daily", + "timezone": "America/New_York", + "language": "en" + } + + response = await client.put( + f"{BASE_URL}/api/preferences/test_user_123", + json=new_preferences + ) + print(f"Update preferences: {response.json()}") + + # Test unsubscribe + response = await client.post( + f"{BASE_URL}/api/preferences/test_user_123/unsubscribe/marketing" + ) + print(f"Unsubscribe from marketing: {response.json()}") + + # Test notification with template + print("\n9. Testing notification with template...") + template_notification = { + "user_id": "test_user_123", + "title": "Password Reset Request", + "message": "", # Will be filled by template + "channels": ["email"], + "priority": "high", + "category": "security", + "template_id": "password_reset", + "data": { + "user_name": "John Doe", + "app_name": "Our App", + "reset_link": "https://example.com/reset/abc123", + "expiry_hours": 24 + } + } + + response = await client.post( + f"{BASE_URL}/api/notifications/send", + json=template_notification + ) + print(f"Template notification: {response.json()}") + + # Test queue status + print("\n10. Testing queue status...") + response = await client.get(f"{BASE_URL}/api/queue/status") + print(f"Queue status: {response.json()}") + + # Test analytics + print("\n11. Testing analytics...") + response = await client.get(f"{BASE_URL}/api/analytics") + analytics = response.json() + print(f"Analytics overview: {analytics}") + + # Test notification history + print("\n12. Testing notification history...") + response = await client.get( + f"{BASE_URL}/api/history", + params={"user_id": "test_user_123", "limit": 10} + ) + history = response.json() + print(f"Notification history: {history['count']} entries") + + # Test device registration + print("\n13. Testing device registration...") + response = await client.post( + f"{BASE_URL}/api/devices/register", + params={ + "user_id": "test_user_123", + "device_token": "dummy_token_12345", + "device_type": "ios" + } + ) + print(f"Device registration: {response.json()}") + +async def test_websocket(): + """Test WebSocket connection for real-time notifications""" + print("\n\n🌐 Testing WebSocket Connection...") + + try: + uri = f"{WS_URL}/test_user_123" + async with websockets.connect(uri) as websocket: + print(f"Connected to WebSocket at {uri}") + + # Listen for welcome message + message = await websocket.recv() + data = json.loads(message) + print(f"Welcome message: {data}") + + # Send ping + await websocket.send("ping") + pong = await websocket.recv() + print(f"Ping response: {pong}") + + # Send notification via API while connected + async with httpx.AsyncClient() as client: + notification_data = { + "user_id": "test_user_123", + "title": "Real-time Test", + "message": "This should appear in WebSocket!", + "channels": ["in_app"], + "priority": "normal" + } + + response = await client.post( + f"{BASE_URL}/api/notifications/send", + json=notification_data + ) + print(f"Sent notification: {response.json()}") + + # Wait for real-time notification + print("Waiting for real-time notification...") + try: + notification = await asyncio.wait_for(websocket.recv(), timeout=5.0) + print(f"Received real-time notification: {json.loads(notification)}") + except asyncio.TimeoutError: + print("No real-time notification received (timeout)") + + print("WebSocket test completed") + + except Exception as e: + print(f"WebSocket error: {e}") + +async def main(): + """Run all tests""" + print("=" * 60) + print("NOTIFICATION SERVICE TEST SUITE") + print("=" * 60) + + # Test API endpoints + await test_notification_api() + + # Test WebSocket + await test_websocket() + + print("\n" + "=" * 60) + print("✅ All tests completed!") + print("=" * 60) + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/services/notifications/backend/websocket_server.py b/services/notifications/backend/websocket_server.py new file mode 100644 index 0000000..0295af5 --- /dev/null +++ b/services/notifications/backend/websocket_server.py @@ -0,0 +1,194 @@ +""" +WebSocket Server for real-time notifications +""" +import logging +import json +from typing import Dict, List +from fastapi import WebSocket +from datetime import datetime + +logger = logging.getLogger(__name__) + +class WebSocketNotificationServer: + """Manages WebSocket connections for real-time notifications""" + + def __init__(self): + # Store connections by user_id + self.active_connections: Dict[str, List[WebSocket]] = {} + self.connection_metadata: Dict[WebSocket, Dict] = {} + + async def connect(self, websocket: WebSocket, user_id: str): + """Accept a new WebSocket connection""" + await websocket.accept() + + # Add to active connections + if user_id not in self.active_connections: + self.active_connections[user_id] = [] + + self.active_connections[user_id].append(websocket) + + # Store metadata + self.connection_metadata[websocket] = { + "user_id": user_id, + "connected_at": datetime.now(), + "last_activity": datetime.now() + } + + logger.info(f"WebSocket connected for user {user_id}. Total connections: {len(self.active_connections[user_id])}") + + # Send welcome message + await self.send_welcome_message(websocket, user_id) + + def disconnect(self, user_id: str): + """Remove a WebSocket connection""" + if user_id in self.active_connections: + # Remove all connections for this user + for websocket in self.active_connections[user_id]: + if websocket in self.connection_metadata: + del self.connection_metadata[websocket] + + del self.active_connections[user_id] + logger.info(f"WebSocket disconnected for user {user_id}") + + async def send_to_user(self, user_id: str, message: Dict): + """Send a message to all connections for a specific user""" + if user_id not in self.active_connections: + logger.debug(f"No active connections for user {user_id}") + return False + + disconnected = [] + for websocket in self.active_connections[user_id]: + try: + await websocket.send_json(message) + # Update last activity + if websocket in self.connection_metadata: + self.connection_metadata[websocket]["last_activity"] = datetime.now() + except Exception as e: + logger.error(f"Error sending to WebSocket for user {user_id}: {e}") + disconnected.append(websocket) + + # Remove disconnected websockets + for ws in disconnected: + self.active_connections[user_id].remove(ws) + if ws in self.connection_metadata: + del self.connection_metadata[ws] + + # Clean up if no more connections + if not self.active_connections[user_id]: + del self.active_connections[user_id] + + return True + + async def broadcast(self, message: Dict): + """Broadcast a message to all connected users""" + for user_id in list(self.active_connections.keys()): + await self.send_to_user(user_id, message) + + async def send_notification(self, user_id: str, notification: Dict): + """Send a notification to a specific user""" + message = { + "type": "notification", + "timestamp": datetime.now().isoformat(), + "data": notification + } + return await self.send_to_user(user_id, message) + + async def send_welcome_message(self, websocket: WebSocket, user_id: str): + """Send a welcome message to newly connected user""" + welcome_message = { + "type": "connection", + "status": "connected", + "user_id": user_id, + "timestamp": datetime.now().isoformat(), + "message": "Connected to notification service" + } + + try: + await websocket.send_json(welcome_message) + except Exception as e: + logger.error(f"Error sending welcome message: {e}") + + def get_connection_count(self, user_id: str = None) -> int: + """Get the number of active connections""" + if user_id: + return len(self.active_connections.get(user_id, [])) + + total = 0 + for connections in self.active_connections.values(): + total += len(connections) + return total + + def get_connected_users(self) -> List[str]: + """Get list of connected user IDs""" + return list(self.active_connections.keys()) + + async def send_system_message(self, user_id: str, message: str, severity: str = "info"): + """Send a system message to a user""" + system_message = { + "type": "system", + "severity": severity, + "message": message, + "timestamp": datetime.now().isoformat() + } + return await self.send_to_user(user_id, system_message) + + async def send_presence_update(self, user_id: str, status: str): + """Send presence update to user's connections""" + presence_message = { + "type": "presence", + "user_id": user_id, + "status": status, + "timestamp": datetime.now().isoformat() + } + + # Could send to friends/contacts if implemented + return await self.send_to_user(user_id, presence_message) + + async def handle_ping(self, websocket: WebSocket): + """Handle ping message from client""" + try: + await websocket.send_json({ + "type": "pong", + "timestamp": datetime.now().isoformat() + }) + + # Update last activity + if websocket in self.connection_metadata: + self.connection_metadata[websocket]["last_activity"] = datetime.now() + except Exception as e: + logger.error(f"Error handling ping: {e}") + + async def cleanup_stale_connections(self, timeout_minutes: int = 30): + """Clean up stale connections that haven't been active""" + now = datetime.now() + stale_connections = [] + + for websocket, metadata in self.connection_metadata.items(): + last_activity = metadata.get("last_activity") + if last_activity: + time_diff = (now - last_activity).total_seconds() / 60 + if time_diff > timeout_minutes: + stale_connections.append({ + "websocket": websocket, + "user_id": metadata.get("user_id") + }) + + # Remove stale connections + for conn in stale_connections: + user_id = conn["user_id"] + websocket = conn["websocket"] + + if user_id in self.active_connections: + if websocket in self.active_connections[user_id]: + self.active_connections[user_id].remove(websocket) + + # Clean up if no more connections + if not self.active_connections[user_id]: + del self.active_connections[user_id] + + if websocket in self.connection_metadata: + del self.connection_metadata[websocket] + + logger.info(f"Cleaned up stale connection for user {user_id}") + + return len(stale_connections) \ No newline at end of file diff --git a/services/oauth/backend/Dockerfile b/services/oauth/backend/Dockerfile new file mode 100644 index 0000000..2515968 --- /dev/null +++ b/services/oauth/backend/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements first for better caching +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . . + +# Expose port +EXPOSE 8000 + +# Run the application +CMD ["python", "main.py"] \ No newline at end of file diff --git a/services/oauth/backend/database.py b/services/oauth/backend/database.py new file mode 100644 index 0000000..0b6dfea --- /dev/null +++ b/services/oauth/backend/database.py @@ -0,0 +1,142 @@ +from motor.motor_asyncio import AsyncIOMotorClient +from beanie import init_beanie +import os +from models import OAuthApplication, AuthorizationCode, AccessToken, OAuthScope, UserConsent + +async def init_db(): + client = AsyncIOMotorClient(os.getenv("MONGODB_URL", "mongodb://mongodb:27017")) + database = client[os.getenv("OAUTH_DB_NAME", "oauth_db")] + + await init_beanie( + database=database, + document_models=[ + OAuthApplication, + AuthorizationCode, + AccessToken, + OAuthScope, + UserConsent + ] + ) + + # 기본 스코프 생성 + await create_default_scopes() + +async def create_default_scopes(): + """기본 OAuth 스코프 생성""" + default_scopes = [ + # 기본 인증 스코프 + { + "name": "openid", + "display_name": "OpenID Connect", + "description": "기본 사용자 인증 정보", + "is_default": True, + "requires_approval": False + }, + { + "name": "profile", + "display_name": "프로필 정보", + "description": "이름, 프로필 이미지, 기본 정보 접근", + "is_default": True, + "requires_approval": True + }, + { + "name": "email", + "display_name": "이메일 주소", + "description": "이메일 주소 및 인증 상태 확인", + "is_default": False, + "requires_approval": True + }, + { + "name": "picture", + "display_name": "프로필 사진", + "description": "프로필 사진 및 썸네일 접근", + "is_default": False, + "requires_approval": True + }, + + # 사용자 데이터 접근 스코프 + { + "name": "user:read", + "display_name": "사용자 정보 읽기", + "description": "사용자 프로필 및 설정 읽기", + "is_default": False, + "requires_approval": True + }, + { + "name": "user:write", + "display_name": "사용자 정보 수정", + "description": "사용자 프로필 및 설정 수정", + "is_default": False, + "requires_approval": True + }, + + # 애플리케이션 관리 스코프 + { + "name": "app:read", + "display_name": "애플리케이션 정보 읽기", + "description": "OAuth 애플리케이션 정보 조회", + "is_default": False, + "requires_approval": True + }, + { + "name": "app:write", + "display_name": "애플리케이션 관리", + "description": "OAuth 애플리케이션 생성 및 수정", + "is_default": False, + "requires_approval": True + }, + + # 조직/팀 관련 스코프 + { + "name": "org:read", + "display_name": "조직 정보 읽기", + "description": "소속 조직 및 팀 정보 조회", + "is_default": False, + "requires_approval": True + }, + { + "name": "org:write", + "display_name": "조직 관리", + "description": "조직 설정 및 멤버 관리", + "is_default": False, + "requires_approval": True + }, + + # API 접근 스코프 + { + "name": "api:read", + "display_name": "API 데이터 읽기", + "description": "API를 통한 데이터 조회", + "is_default": False, + "requires_approval": True + }, + { + "name": "api:write", + "display_name": "API 데이터 쓰기", + "description": "API를 통한 데이터 생성/수정/삭제", + "is_default": False, + "requires_approval": True + }, + + # 특수 스코프 + { + "name": "offline_access", + "display_name": "오프라인 액세스", + "description": "리프레시 토큰 발급 (장기 액세스)", + "is_default": False, + "requires_approval": True + }, + { + "name": "admin", + "display_name": "관리자 권한", + "description": "전체 시스템 관리 권한", + "is_default": False, + "requires_approval": True + } + ] + + for scope_data in default_scopes: + existing = await OAuthScope.find_one(OAuthScope.name == scope_data["name"]) + if not existing: + scope = OAuthScope(**scope_data) + await scope.create() \ No newline at end of file diff --git a/services/oauth/backend/main.py b/services/oauth/backend/main.py new file mode 100644 index 0000000..a6f51a2 --- /dev/null +++ b/services/oauth/backend/main.py @@ -0,0 +1,591 @@ +from fastapi import FastAPI, HTTPException, Depends, Form, Query, Request, Response +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import RedirectResponse, JSONResponse +from contextlib import asynccontextmanager +from datetime import datetime, timedelta +from typing import Optional, List, Dict +import uvicorn +import os +import sys +import logging + +from database import init_db +from models import ( + OAuthApplication, AuthorizationCode, AccessToken, + OAuthScope, UserConsent, GrantType, ResponseType +) +from utils import OAuthUtils, TokenGenerator, ScopeValidator +from pydantic import BaseModel, Field +from beanie import PydanticObjectId + +sys.path.append('/app') +from shared.kafka import KafkaProducer, Event, EventType + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Pydantic models +class ApplicationCreate(BaseModel): + name: str + description: Optional[str] = None + redirect_uris: List[str] + website_url: Optional[str] = None + logo_url: Optional[str] = None + privacy_policy_url: Optional[str] = None + terms_url: Optional[str] = None + sso_enabled: Optional[bool] = False + sso_provider: Optional[str] = None + sso_config: Optional[Dict] = None + allowed_domains: Optional[List[str]] = None + +class ApplicationUpdate(BaseModel): + name: Optional[str] = None + description: Optional[str] = None + redirect_uris: Optional[List[str]] = None + website_url: Optional[str] = None + logo_url: Optional[str] = None + privacy_policy_url: Optional[str] = None + terms_url: Optional[str] = None + is_active: Optional[bool] = None + sso_enabled: Optional[bool] = None + sso_provider: Optional[str] = None + sso_config: Optional[Dict] = None + allowed_domains: Optional[List[str]] = None + +class ApplicationResponse(BaseModel): + id: str + client_id: str + name: str + description: Optional[str] + redirect_uris: List[str] + allowed_scopes: List[str] + grant_types: List[str] + is_active: bool + is_trusted: bool + sso_enabled: bool + sso_provider: Optional[str] + allowed_domains: List[str] + website_url: Optional[str] + logo_url: Optional[str] + created_at: datetime + +class TokenRequest(BaseModel): + grant_type: str + code: Optional[str] = None + redirect_uri: Optional[str] = None + client_id: Optional[str] = None + client_secret: Optional[str] = None + refresh_token: Optional[str] = None + scope: Optional[str] = None + code_verifier: Optional[str] = None + +# Global Kafka producer +kafka_producer: Optional[KafkaProducer] = None + +@asynccontextmanager +async def lifespan(app: FastAPI): + # Startup + global kafka_producer + + await init_db() + + # Initialize Kafka producer + try: + kafka_producer = KafkaProducer( + bootstrap_servers=os.getenv('KAFKA_BOOTSTRAP_SERVERS', 'kafka:9092') + ) + await kafka_producer.start() + logger.info("Kafka producer initialized") + except Exception as e: + logger.warning(f"Failed to initialize Kafka producer: {e}") + kafka_producer = None + + yield + + # Shutdown + if kafka_producer: + await kafka_producer.stop() + +app = FastAPI( + title="OAuth 2.0 Service", + description="OAuth 2.0 인증 서버 및 애플리케이션 관리", + version="1.0.0", + lifespan=lifespan +) + +# CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Health check +@app.get("/health") +async def health_check(): + return { + "status": "healthy", + "service": "oauth", + "timestamp": datetime.now().isoformat() + } + +# OAuth Application Management +@app.post("/applications", response_model=ApplicationResponse, status_code=201) +async def create_application( + app_data: ApplicationCreate, + current_user_id: str = "test_user" # TODO: Get from JWT token +): + """새로운 OAuth 애플리케이션 등록""" + client_id = OAuthUtils.generate_client_id() + client_secret = OAuthUtils.generate_client_secret() + hashed_secret = OAuthUtils.hash_client_secret(client_secret) + + # 기본 스코프 가져오기 + default_scopes = await OAuthScope.find(OAuthScope.is_default == True).to_list() + allowed_scopes = [scope.name for scope in default_scopes] + + application = OAuthApplication( + client_id=client_id, + client_secret=hashed_secret, + name=app_data.name, + description=app_data.description, + owner_id=current_user_id, + redirect_uris=app_data.redirect_uris, + allowed_scopes=allowed_scopes, + grant_types=[GrantType.AUTHORIZATION_CODE, GrantType.REFRESH_TOKEN], + sso_enabled=app_data.sso_enabled or False, + sso_provider=app_data.sso_provider, + sso_config=app_data.sso_config or {}, + allowed_domains=app_data.allowed_domains or [], + website_url=app_data.website_url, + logo_url=app_data.logo_url, + privacy_policy_url=app_data.privacy_policy_url, + terms_url=app_data.terms_url + ) + + await application.create() + + # 이벤트 발행 + if kafka_producer: + event = Event( + event_type=EventType.TASK_CREATED, + service="oauth", + data={ + "app_id": str(application.id), + "client_id": client_id, + "name": application.name, + "owner_id": current_user_id + } + ) + await kafka_producer.send_event("oauth-events", event) + + # 클라이언트 시크릿은 생성 시에만 반환 + return { + **ApplicationResponse( + id=str(application.id), + client_id=application.client_id, + name=application.name, + description=application.description, + redirect_uris=application.redirect_uris, + allowed_scopes=application.allowed_scopes, + grant_types=[gt.value for gt in application.grant_types], + is_active=application.is_active, + is_trusted=application.is_trusted, + sso_enabled=application.sso_enabled, + sso_provider=application.sso_provider, + allowed_domains=application.allowed_domains, + website_url=application.website_url, + logo_url=application.logo_url, + created_at=application.created_at + ).dict(), + "client_secret": client_secret # 최초 생성 시에만 반환 + } + +@app.get("/applications", response_model=List[ApplicationResponse]) +async def list_applications( + owner_id: Optional[str] = None, + is_active: Optional[bool] = None +): + """OAuth 애플리케이션 목록 조회""" + query = {} + if owner_id: + query["owner_id"] = owner_id + if is_active is not None: + query["is_active"] = is_active + + applications = await OAuthApplication.find(query).to_list() + + return [ + ApplicationResponse( + id=str(app.id), + client_id=app.client_id, + name=app.name, + description=app.description, + redirect_uris=app.redirect_uris, + allowed_scopes=app.allowed_scopes, + grant_types=[gt.value for gt in app.grant_types], + is_active=app.is_active, + is_trusted=app.is_trusted, + sso_enabled=app.sso_enabled, + sso_provider=app.sso_provider, + allowed_domains=app.allowed_domains, + website_url=app.website_url, + logo_url=app.logo_url, + created_at=app.created_at + ) + for app in applications + ] + +@app.get("/applications/{client_id}", response_model=ApplicationResponse) +async def get_application(client_id: str): + """OAuth 애플리케이션 상세 조회""" + application = await OAuthApplication.find_one(OAuthApplication.client_id == client_id) + if not application: + raise HTTPException(status_code=404, detail="Application not found") + + return ApplicationResponse( + id=str(application.id), + client_id=application.client_id, + name=application.name, + description=application.description, + redirect_uris=application.redirect_uris, + allowed_scopes=application.allowed_scopes, + grant_types=[gt.value for gt in application.grant_types], + is_active=application.is_active, + is_trusted=application.is_trusted, + sso_enabled=application.sso_enabled, + sso_provider=application.sso_provider, + allowed_domains=application.allowed_domains, + website_url=application.website_url, + logo_url=application.logo_url, + created_at=application.created_at + ) + +# OAuth 2.0 Authorization Endpoint +@app.get("/authorize") +async def authorize( + response_type: str = Query(..., description="응답 타입 (code, token)"), + client_id: str = Query(..., description="클라이언트 ID"), + redirect_uri: str = Query(..., description="리다이렉트 URI"), + scope: str = Query("", description="요청 스코프"), + state: Optional[str] = Query(None, description="상태 값"), + code_challenge: Optional[str] = Query(None, description="PKCE challenge"), + code_challenge_method: Optional[str] = Query("S256", description="PKCE method"), + current_user_id: str = "test_user" # TODO: Get from session/JWT +): + """OAuth 2.0 인증 엔드포인트""" + + # 애플리케이션 확인 + application = await OAuthApplication.find_one(OAuthApplication.client_id == client_id) + if not application or not application.is_active: + raise HTTPException(status_code=400, detail="Invalid client") + + # 리다이렉트 URI 확인 + if redirect_uri not in application.redirect_uris: + raise HTTPException(status_code=400, detail="Invalid redirect URI") + + # 스코프 검증 + requested_scopes = ScopeValidator.parse_scope_string(scope) + valid_scopes = ScopeValidator.validate_scopes(requested_scopes, application.allowed_scopes) + + # 사용자 동의 확인 (신뢰할 수 있는 앱이거나 이미 동의한 경우 건너뛰기) + if not application.is_trusted: + consent = await UserConsent.find_one( + UserConsent.user_id == current_user_id, + UserConsent.client_id == client_id + ) + + if not consent or set(valid_scopes) - set(consent.granted_scopes): + # TODO: 동의 화면으로 리다이렉트 + pass + + if response_type == "code": + # Authorization Code Flow + code = OAuthUtils.generate_authorization_code() + + auth_code = AuthorizationCode( + code=code, + client_id=client_id, + user_id=current_user_id, + redirect_uri=redirect_uri, + scopes=valid_scopes, + code_challenge=code_challenge, + code_challenge_method=code_challenge_method, + expires_at=datetime.now() + timedelta(minutes=10) + ) + + await auth_code.create() + + # 리다이렉트 URL 생성 + redirect_url = f"{redirect_uri}?code={code}" + if state: + redirect_url += f"&state={state}" + + return RedirectResponse(url=redirect_url) + + elif response_type == "token": + # Implicit Flow (권장하지 않음) + raise HTTPException(status_code=400, detail="Implicit flow not supported") + + else: + raise HTTPException(status_code=400, detail="Unsupported response type") + +# OAuth 2.0 Token Endpoint +@app.post("/token") +async def token( + grant_type: str = Form(...), + code: Optional[str] = Form(None), + redirect_uri: Optional[str] = Form(None), + client_id: Optional[str] = Form(None), + client_secret: Optional[str] = Form(None), + refresh_token: Optional[str] = Form(None), + scope: Optional[str] = Form(None), + code_verifier: Optional[str] = Form(None) +): + """OAuth 2.0 토큰 엔드포인트""" + + # 클라이언트 인증 + if not client_id or not client_secret: + raise HTTPException( + status_code=401, + detail="Client authentication required", + headers={"WWW-Authenticate": "Basic"} + ) + + application = await OAuthApplication.find_one(OAuthApplication.client_id == client_id) + if not application or not OAuthUtils.verify_client_secret(client_secret, application.client_secret): + raise HTTPException(status_code=401, detail="Invalid client credentials") + + if grant_type == "authorization_code": + # Authorization Code Grant + if not code or not redirect_uri: + raise HTTPException(status_code=400, detail="Missing required parameters") + + auth_code = await AuthorizationCode.find_one( + AuthorizationCode.code == code, + AuthorizationCode.client_id == client_id + ) + + if not auth_code: + raise HTTPException(status_code=400, detail="Invalid authorization code") + + if auth_code.used: + raise HTTPException(status_code=400, detail="Authorization code already used") + + if auth_code.expires_at < datetime.now(): + raise HTTPException(status_code=400, detail="Authorization code expired") + + if auth_code.redirect_uri != redirect_uri: + raise HTTPException(status_code=400, detail="Redirect URI mismatch") + + # PKCE 검증 + if auth_code.code_challenge: + if not code_verifier: + raise HTTPException(status_code=400, detail="Code verifier required") + + if not OAuthUtils.verify_pkce_challenge( + code_verifier, + auth_code.code_challenge, + auth_code.code_challenge_method + ): + raise HTTPException(status_code=400, detail="Invalid code verifier") + + # 코드를 사용됨으로 표시 + auth_code.used = True + auth_code.used_at = datetime.now() + await auth_code.save() + + # 토큰 생성 + access_token = OAuthUtils.generate_access_token() + refresh_token = OAuthUtils.generate_refresh_token() + + token_doc = AccessToken( + token=access_token, + refresh_token=refresh_token, + client_id=client_id, + user_id=auth_code.user_id, + scopes=auth_code.scopes, + expires_at=datetime.now() + timedelta(hours=1), + refresh_expires_at=datetime.now() + timedelta(days=30) + ) + + await token_doc.create() + + return TokenGenerator.generate_token_response( + access_token=access_token, + expires_in=3600, + refresh_token=refresh_token, + scope=" ".join(auth_code.scopes) + ) + + elif grant_type == "refresh_token": + # Refresh Token Grant + if not refresh_token: + raise HTTPException(status_code=400, detail="Refresh token required") + + token_doc = await AccessToken.find_one( + AccessToken.refresh_token == refresh_token, + AccessToken.client_id == client_id + ) + + if not token_doc: + raise HTTPException(status_code=400, detail="Invalid refresh token") + + if token_doc.revoked: + raise HTTPException(status_code=400, detail="Token has been revoked") + + if token_doc.refresh_expires_at and token_doc.refresh_expires_at < datetime.now(): + raise HTTPException(status_code=400, detail="Refresh token expired") + + # 기존 토큰 폐기 + token_doc.revoked = True + token_doc.revoked_at = datetime.now() + await token_doc.save() + + # 새 토큰 생성 + new_access_token = OAuthUtils.generate_access_token() + new_refresh_token = OAuthUtils.generate_refresh_token() + + new_token_doc = AccessToken( + token=new_access_token, + refresh_token=new_refresh_token, + client_id=client_id, + user_id=token_doc.user_id, + scopes=token_doc.scopes, + expires_at=datetime.now() + timedelta(hours=1), + refresh_expires_at=datetime.now() + timedelta(days=30) + ) + + await new_token_doc.create() + + return TokenGenerator.generate_token_response( + access_token=new_access_token, + expires_in=3600, + refresh_token=new_refresh_token, + scope=" ".join(token_doc.scopes) + ) + + elif grant_type == "client_credentials": + # Client Credentials Grant + requested_scopes = ScopeValidator.parse_scope_string(scope) if scope else [] + valid_scopes = ScopeValidator.validate_scopes(requested_scopes, application.allowed_scopes) + + access_token = OAuthUtils.generate_access_token() + + token_doc = AccessToken( + token=access_token, + client_id=client_id, + scopes=valid_scopes, + expires_at=datetime.now() + timedelta(hours=1) + ) + + await token_doc.create() + + return TokenGenerator.generate_token_response( + access_token=access_token, + expires_in=3600, + scope=" ".join(valid_scopes) + ) + + else: + raise HTTPException(status_code=400, detail="Unsupported grant type") + +# Token Introspection Endpoint +@app.post("/introspect") +async def introspect( + token: str = Form(...), + token_type_hint: Optional[str] = Form(None), + client_id: str = Form(...), + client_secret: str = Form(...) +): + """토큰 검증 엔드포인트""" + + # 클라이언트 인증 + application = await OAuthApplication.find_one(OAuthApplication.client_id == client_id) + if not application or not OAuthUtils.verify_client_secret(client_secret, application.client_secret): + raise HTTPException(status_code=401, detail="Invalid client credentials") + + # 토큰 조회 + token_doc = await AccessToken.find_one(AccessToken.token == token) + + if not token_doc or token_doc.revoked or token_doc.expires_at < datetime.now(): + return {"active": False} + + # 토큰 사용 시간 업데이트 + token_doc.last_used_at = datetime.now() + await token_doc.save() + + return { + "active": True, + "scope": " ".join(token_doc.scopes), + "client_id": token_doc.client_id, + "username": token_doc.user_id, + "exp": int(token_doc.expires_at.timestamp()) + } + +# Token Revocation Endpoint +@app.post("/revoke") +async def revoke( + token: str = Form(...), + token_type_hint: Optional[str] = Form(None), + client_id: str = Form(...), + client_secret: str = Form(...) +): + """토큰 폐기 엔드포인트""" + + # 클라이언트 인증 + application = await OAuthApplication.find_one(OAuthApplication.client_id == client_id) + if not application or not OAuthUtils.verify_client_secret(client_secret, application.client_secret): + raise HTTPException(status_code=401, detail="Invalid client credentials") + + # 토큰 조회 및 폐기 + token_doc = await AccessToken.find_one( + AccessToken.token == token, + AccessToken.client_id == client_id + ) + + if token_doc and not token_doc.revoked: + token_doc.revoked = True + token_doc.revoked_at = datetime.now() + await token_doc.save() + + # 이벤트 발행 + if kafka_producer: + event = Event( + event_type=EventType.TASK_COMPLETED, + service="oauth", + data={ + "action": "token_revoked", + "token_id": str(token_doc.id), + "client_id": client_id + } + ) + await kafka_producer.send_event("oauth-events", event) + + return {"status": "success"} + +# Scopes Management +@app.get("/scopes") +async def list_scopes(): + """사용 가능한 스코프 목록 조회""" + scopes = await OAuthScope.find_all().to_list() + return [ + { + "name": scope.name, + "display_name": scope.display_name, + "description": scope.description, + "is_default": scope.is_default, + "requires_approval": scope.requires_approval + } + for scope in scopes + ] + +if __name__ == "__main__": + uvicorn.run( + "main:app", + host="0.0.0.0", + port=8000, + reload=True + ) \ No newline at end of file diff --git a/services/oauth/backend/models.py b/services/oauth/backend/models.py new file mode 100644 index 0000000..866b09b --- /dev/null +++ b/services/oauth/backend/models.py @@ -0,0 +1,126 @@ +from beanie import Document, PydanticObjectId +from pydantic import BaseModel, Field, EmailStr +from typing import Optional, List, Dict +from datetime import datetime +from enum import Enum + +class GrantType(str, Enum): + AUTHORIZATION_CODE = "authorization_code" + CLIENT_CREDENTIALS = "client_credentials" + PASSWORD = "password" + REFRESH_TOKEN = "refresh_token" + +class ResponseType(str, Enum): + CODE = "code" + TOKEN = "token" + +class TokenType(str, Enum): + BEARER = "Bearer" + +class OAuthApplication(Document): + """OAuth 2.0 클라이언트 애플리케이션""" + client_id: str = Field(..., unique=True, description="클라이언트 ID") + client_secret: str = Field(..., description="클라이언트 시크릿 (해시됨)") + name: str = Field(..., description="애플리케이션 이름") + description: Optional[str] = Field(None, description="애플리케이션 설명") + + owner_id: str = Field(..., description="애플리케이션 소유자 ID") + + redirect_uris: List[str] = Field(default_factory=list, description="허용된 리다이렉트 URI들") + allowed_scopes: List[str] = Field(default_factory=list, description="허용된 스코프들") + grant_types: List[GrantType] = Field(default_factory=lambda: [GrantType.AUTHORIZATION_CODE], description="허용된 grant types") + + is_active: bool = Field(default=True, description="활성화 상태") + is_trusted: bool = Field(default=False, description="신뢰할 수 있는 앱 (자동 승인)") + + # SSO 설정 + sso_enabled: bool = Field(default=False, description="SSO 활성화 여부") + sso_provider: Optional[str] = Field(None, description="SSO 제공자 (google, github, saml 등)") + sso_config: Optional[Dict] = Field(default_factory=dict, description="SSO 설정 (provider별 설정)") + allowed_domains: List[str] = Field(default_factory=list, description="SSO 허용 도메인 (예: @company.com)") + + website_url: Optional[str] = Field(None, description="애플리케이션 웹사이트") + logo_url: Optional[str] = Field(None, description="애플리케이션 로고 URL") + privacy_policy_url: Optional[str] = Field(None, description="개인정보 처리방침 URL") + terms_url: Optional[str] = Field(None, description="이용약관 URL") + + created_at: datetime = Field(default_factory=datetime.now) + updated_at: datetime = Field(default_factory=datetime.now) + + class Settings: + collection = "oauth_applications" + +class AuthorizationCode(Document): + """OAuth 2.0 인증 코드""" + code: str = Field(..., unique=True, description="인증 코드") + client_id: str = Field(..., description="클라이언트 ID") + user_id: str = Field(..., description="사용자 ID") + + redirect_uri: str = Field(..., description="리다이렉트 URI") + scopes: List[str] = Field(default_factory=list, description="요청된 스코프") + + code_challenge: Optional[str] = Field(None, description="PKCE code challenge") + code_challenge_method: Optional[str] = Field(None, description="PKCE challenge method") + + expires_at: datetime = Field(..., description="만료 시간") + used: bool = Field(default=False, description="사용 여부") + used_at: Optional[datetime] = Field(None, description="사용 시간") + + created_at: datetime = Field(default_factory=datetime.now) + + class Settings: + collection = "authorization_codes" + +class AccessToken(Document): + """OAuth 2.0 액세스 토큰""" + token: str = Field(..., unique=True, description="액세스 토큰") + refresh_token: Optional[str] = Field(None, description="리프레시 토큰") + + client_id: str = Field(..., description="클라이언트 ID") + user_id: Optional[str] = Field(None, description="사용자 ID (client credentials flow에서는 없음)") + + token_type: TokenType = Field(default=TokenType.BEARER) + scopes: List[str] = Field(default_factory=list, description="부여된 스코프") + + expires_at: datetime = Field(..., description="액세스 토큰 만료 시간") + refresh_expires_at: Optional[datetime] = Field(None, description="리프레시 토큰 만료 시간") + + revoked: bool = Field(default=False, description="폐기 여부") + revoked_at: Optional[datetime] = Field(None, description="폐기 시간") + + created_at: datetime = Field(default_factory=datetime.now) + last_used_at: Optional[datetime] = Field(None, description="마지막 사용 시간") + + class Settings: + collection = "access_tokens" + +class OAuthScope(Document): + """OAuth 스코프 정의""" + name: str = Field(..., unique=True, description="스코프 이름 (예: read:profile)") + display_name: str = Field(..., description="표시 이름") + description: str = Field(..., description="스코프 설명") + + is_default: bool = Field(default=False, description="기본 스코프 여부") + requires_approval: bool = Field(default=True, description="사용자 승인 필요 여부") + + created_at: datetime = Field(default_factory=datetime.now) + + class Settings: + collection = "oauth_scopes" + +class UserConsent(Document): + """사용자 동의 기록""" + user_id: str = Field(..., description="사용자 ID") + client_id: str = Field(..., description="클라이언트 ID") + + granted_scopes: List[str] = Field(default_factory=list, description="승인된 스코프") + + created_at: datetime = Field(default_factory=datetime.now) + updated_at: datetime = Field(default_factory=datetime.now) + expires_at: Optional[datetime] = Field(None, description="동의 만료 시간") + + class Settings: + collection = "user_consents" + indexes = [ + [("user_id", 1), ("client_id", 1)] + ] \ No newline at end of file diff --git a/services/oauth/backend/requirements.txt b/services/oauth/backend/requirements.txt new file mode 100644 index 0000000..79d6f3a --- /dev/null +++ b/services/oauth/backend/requirements.txt @@ -0,0 +1,11 @@ +fastapi==0.109.0 +uvicorn[standard]==0.27.0 +pydantic[email]==2.5.3 +pymongo==4.6.1 +motor==3.3.2 +beanie==1.23.6 +authlib==1.3.0 +python-jose[cryptography]==3.3.0 +passlib[bcrypt]==1.7.4 +python-multipart==0.0.6 +aiokafka==0.10.0 \ No newline at end of file diff --git a/services/oauth/backend/utils.py b/services/oauth/backend/utils.py new file mode 100644 index 0000000..5b62d5e --- /dev/null +++ b/services/oauth/backend/utils.py @@ -0,0 +1,131 @@ +import secrets +import hashlib +import base64 +from datetime import datetime, timedelta +from typing import Optional, List +from passlib.context import CryptContext +from jose import JWTError, jwt +import os + +pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") + +class OAuthUtils: + @staticmethod + def generate_client_id() -> str: + """클라이언트 ID 생성""" + return secrets.token_urlsafe(24) + + @staticmethod + def generate_client_secret() -> str: + """클라이언트 시크릿 생성""" + return secrets.token_urlsafe(32) + + @staticmethod + def hash_client_secret(secret: str) -> str: + """클라이언트 시크릿 해싱""" + return pwd_context.hash(secret) + + @staticmethod + def verify_client_secret(plain_secret: str, hashed_secret: str) -> bool: + """클라이언트 시크릿 검증""" + return pwd_context.verify(plain_secret, hashed_secret) + + @staticmethod + def generate_authorization_code() -> str: + """인증 코드 생성""" + return secrets.token_urlsafe(32) + + @staticmethod + def generate_access_token() -> str: + """액세스 토큰 생성""" + return secrets.token_urlsafe(32) + + @staticmethod + def generate_refresh_token() -> str: + """리프레시 토큰 생성""" + return secrets.token_urlsafe(48) + + @staticmethod + def verify_pkce_challenge(verifier: str, challenge: str, method: str = "S256") -> bool: + """PKCE challenge 검증""" + if method == "plain": + return verifier == challenge + elif method == "S256": + verifier_hash = hashlib.sha256(verifier.encode()).digest() + verifier_challenge = base64.urlsafe_b64encode(verifier_hash).decode().rstrip("=") + return verifier_challenge == challenge + return False + + @staticmethod + def create_jwt_token(data: dict, expires_delta: Optional[timedelta] = None) -> str: + """JWT 토큰 생성""" + to_encode = data.copy() + if expires_delta: + expire = datetime.utcnow() + expires_delta + else: + expire = datetime.utcnow() + timedelta(minutes=15) + to_encode.update({"exp": expire}) + + secret_key = os.getenv("JWT_SECRET_KEY", "your-secret-key") + algorithm = os.getenv("JWT_ALGORITHM", "HS256") + + encoded_jwt = jwt.encode(to_encode, secret_key, algorithm=algorithm) + return encoded_jwt + + @staticmethod + def decode_jwt_token(token: str) -> Optional[dict]: + """JWT 토큰 디코딩""" + try: + secret_key = os.getenv("JWT_SECRET_KEY", "your-secret-key") + algorithm = os.getenv("JWT_ALGORITHM", "HS256") + + payload = jwt.decode(token, secret_key, algorithms=[algorithm]) + return payload + except JWTError: + return None + +class TokenGenerator: + @staticmethod + def generate_token_response( + access_token: str, + token_type: str = "Bearer", + expires_in: int = 3600, + refresh_token: Optional[str] = None, + scope: Optional[str] = None, + id_token: Optional[str] = None + ) -> dict: + """OAuth 2.0 토큰 응답 생성""" + response = { + "access_token": access_token, + "token_type": token_type, + "expires_in": expires_in + } + + if refresh_token: + response["refresh_token"] = refresh_token + + if scope: + response["scope"] = scope + + if id_token: + response["id_token"] = id_token + + return response + +class ScopeValidator: + @staticmethod + def validate_scopes(requested_scopes: List[str], allowed_scopes: List[str]) -> List[str]: + """요청된 스코프가 허용된 스코프에 포함되는지 검증""" + return [scope for scope in requested_scopes if scope in allowed_scopes] + + @staticmethod + def has_scope(token_scopes: List[str], required_scope: str) -> bool: + """토큰이 특정 스코프를 가지고 있는지 확인""" + return required_scope in token_scopes + + @staticmethod + def parse_scope_string(scope_string: str) -> List[str]: + """스코프 문자열을 리스트로 파싱""" + if not scope_string: + return [] + return scope_string.strip().split() \ No newline at end of file diff --git a/services/pipeline/Makefile b/services/pipeline/Makefile new file mode 100644 index 0000000..0158cf5 --- /dev/null +++ b/services/pipeline/Makefile @@ -0,0 +1,90 @@ +# Pipeline Makefile + +.PHONY: help build up down restart logs clean test monitor + +help: + @echo "Pipeline Management Commands:" + @echo " make build - Build all Docker images" + @echo " make up - Start all services" + @echo " make down - Stop all services" + @echo " make restart - Restart all services" + @echo " make logs - View logs for all services" + @echo " make clean - Clean up containers and volumes" + @echo " make monitor - Open monitor dashboard" + @echo " make test - Test pipeline with sample keyword" + +build: + docker-compose build + +up: + docker-compose up -d + +down: + docker-compose down + +restart: + docker-compose restart + +logs: + docker-compose logs -f + +clean: + docker-compose down -v + docker system prune -f + +monitor: + @echo "Opening monitor dashboard..." + @echo "Dashboard: http://localhost:8100" + @echo "API Docs: http://localhost:8100/docs" + +test: + @echo "Testing pipeline with sample keyword..." + curl -X POST http://localhost:8100/api/keywords \ + -H "Content-Type: application/json" \ + -d '{"keyword": "테스트", "schedule": "30min"}' + @echo "\nTriggering immediate processing..." + curl -X POST http://localhost:8100/api/trigger/테스트 + +# Service-specific commands +scheduler-logs: + docker-compose logs -f scheduler + +rss-logs: + docker-compose logs -f rss-collector + +search-logs: + docker-compose logs -f google-search + +summarizer-logs: + docker-compose logs -f ai-summarizer + +assembly-logs: + docker-compose logs -f article-assembly + +monitor-logs: + docker-compose logs -f monitor + +# Database commands +redis-cli: + docker-compose exec redis redis-cli + +mongo-shell: + docker-compose exec mongodb mongosh -u admin -p password123 + +# Queue management +queue-status: + @echo "Checking queue status..." + docker-compose exec redis redis-cli --raw LLEN queue:keyword + docker-compose exec redis redis-cli --raw LLEN queue:rss + docker-compose exec redis redis-cli --raw LLEN queue:search + docker-compose exec redis redis-cli --raw LLEN queue:summarize + docker-compose exec redis redis-cli --raw LLEN queue:assembly + +queue-clear: + @echo "Clearing all queues..." + docker-compose exec redis redis-cli FLUSHDB + +# Health check +health: + @echo "Checking service health..." + curl -s http://localhost:8100/api/health | python3 -m json.tool \ No newline at end of file diff --git a/services/pipeline/README.md b/services/pipeline/README.md new file mode 100644 index 0000000..e8bf455 --- /dev/null +++ b/services/pipeline/README.md @@ -0,0 +1,154 @@ +# News Pipeline System + +비동기 큐 기반 뉴스 생성 파이프라인 시스템 + +## 아키텍처 + +``` +Scheduler → RSS Collector → Google Search → AI Summarizer → Article Assembly → MongoDB + ↓ ↓ ↓ ↓ ↓ + Redis Queue Redis Queue Redis Queue Redis Queue Redis Queue +``` + +## 서비스 구성 + +### 1. Scheduler +- 30분마다 등록된 키워드 처리 +- 오전 7시, 낮 12시, 저녁 6시 우선 처리 +- MongoDB에서 키워드 로드 후 큐에 작업 생성 + +### 2. RSS Collector +- RSS 피드 수집 (Google News RSS) +- 7일간 중복 방지 (Redis Set) +- 키워드 관련성 필터링 + +### 3. Google Search +- RSS 아이템별 추가 검색 결과 수집 +- 아이템당 최대 3개 결과 +- 작업당 최대 5개 아이템 처리 + +### 4. AI Summarizer +- Claude Haiku로 빠른 요약 생성 +- 200자 이내 한국어 요약 +- 병렬 처리 지원 (3 workers) + +### 5. Article Assembly +- Claude Sonnet으로 종합 기사 작성 +- 1500자 이내 전문 기사 +- MongoDB 저장 및 통계 업데이트 + +### 6. Monitor +- 실시간 파이프라인 모니터링 +- 큐 상태, 워커 상태 확인 +- REST API 제공 (포트 8100) + +## 시작하기 + +### 1. 환경 변수 설정 +```bash +# .env 파일 확인 +CLAUDE_API_KEY=your_claude_api_key +GOOGLE_API_KEY=your_google_api_key +GOOGLE_SEARCH_ENGINE_ID=your_search_engine_id +``` + +### 2. 서비스 시작 +```bash +cd pipeline +docker-compose up -d +``` + +### 3. 모니터링 +```bash +# 로그 확인 +docker-compose logs -f + +# 특정 서비스 로그 +docker-compose logs -f scheduler + +# 모니터 API +curl http://localhost:8100/api/stats +``` + +## API 엔드포인트 + +### Monitor API (포트 8100) + +- `GET /api/stats` - 전체 통계 +- `GET /api/queues/{queue_name}` - 큐 상세 정보 +- `GET /api/keywords` - 키워드 목록 +- `POST /api/keywords` - 키워드 등록 +- `DELETE /api/keywords/{id}` - 키워드 삭제 +- `GET /api/articles` - 기사 목록 +- `GET /api/articles/{id}` - 기사 상세 +- `GET /api/workers` - 워커 상태 +- `POST /api/trigger/{keyword}` - 수동 처리 트리거 +- `GET /api/health` - 헬스 체크 + +## 키워드 등록 예시 + +```bash +# 새 키워드 등록 +curl -X POST http://localhost:8100/api/keywords \ + -H "Content-Type: application/json" \ + -d '{"keyword": "인공지능", "schedule": "30min"}' + +# 수동 처리 트리거 +curl -X POST http://localhost:8100/api/trigger/인공지능 +``` + +## 데이터베이스 + +### MongoDB Collections +- `keywords` - 등록된 키워드 +- `articles` - 생성된 기사 +- `keyword_stats` - 키워드별 통계 + +### Redis Keys +- `queue:*` - 작업 큐 +- `processing:*` - 처리 중 작업 +- `failed:*` - 실패한 작업 +- `dedup:rss:*` - RSS 중복 방지 +- `workers:*:active` - 활성 워커 + +## 트러블슈팅 + +### 큐 초기화 +```bash +docker-compose exec redis redis-cli FLUSHDB +``` + +### 워커 재시작 +```bash +docker-compose restart rss-collector +``` + +### 데이터베이스 접속 +```bash +# MongoDB +docker-compose exec mongodb mongosh -u admin -p password123 + +# Redis +docker-compose exec redis redis-cli +``` + +## 스케일링 + +워커 수 조정: +```yaml +# docker-compose.yml +ai-summarizer: + deploy: + replicas: 5 # 워커 수 증가 +``` + +## 모니터링 대시보드 + +브라우저에서 http://localhost:8100 접속하여 파이프라인 상태 확인 + +## 로그 레벨 설정 + +`.env` 파일에서 조정: +``` +LOG_LEVEL=DEBUG # INFO, WARNING, ERROR +``` \ No newline at end of file diff --git a/services/pipeline/ai-article-generator/Dockerfile b/services/pipeline/ai-article-generator/Dockerfile new file mode 100644 index 0000000..ad59a8c --- /dev/null +++ b/services/pipeline/ai-article-generator/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.11-slim + +WORKDIR /app + +# 의존성 설치 +COPY ./ai-article-generator/requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# 공통 모듈 복사 +COPY ./shared /app/shared + +# AI Article Generator 코드 복사 +COPY ./ai-article-generator /app + +# 환경변수 +ENV PYTHONUNBUFFERED=1 + +# 실행 +CMD ["python", "ai_article_generator.py"] \ No newline at end of file diff --git a/services/pipeline/ai-article-generator/ai_article_generator.py b/services/pipeline/ai-article-generator/ai_article_generator.py new file mode 100644 index 0000000..363df41 --- /dev/null +++ b/services/pipeline/ai-article-generator/ai_article_generator.py @@ -0,0 +1,300 @@ +""" +AI Article Generator Service +Claude API를 사용한 뉴스 기사 생성 서비스 +""" +import asyncio +import logging +import os +import sys +import json +from datetime import datetime +from typing import List, Dict, Any +from anthropic import AsyncAnthropic +from motor.motor_asyncio import AsyncIOMotorClient + +# Import from shared module +from shared.models import PipelineJob, EnrichedItem, FinalArticle, Subtopic, Entities, NewsReference +from shared.queue_manager import QueueManager + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class AIArticleGeneratorWorker: + def __init__(self): + self.queue_manager = QueueManager( + redis_url=os.getenv("REDIS_URL", "redis://redis:6379") + ) + self.claude_api_key = os.getenv("CLAUDE_API_KEY") + self.claude_client = None + self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") + self.db_name = os.getenv("DB_NAME", "ai_writer_db") # ai_writer_db 사용 + self.db = None + + async def start(self): + """워커 시작""" + logger.info("Starting AI Article Generator Worker") + + # Redis 연결 + await self.queue_manager.connect() + + # MongoDB 연결 + client = AsyncIOMotorClient(self.mongodb_url) + self.db = client[self.db_name] + + # Claude 클라이언트 초기화 + if self.claude_api_key: + self.claude_client = AsyncAnthropic(api_key=self.claude_api_key) + else: + logger.error("Claude API key not configured") + return + + # 메인 처리 루프 + while True: + try: + # 큐에서 작업 가져오기 + job = await self.queue_manager.dequeue('ai_article_generation', timeout=5) + + if job: + await self.process_job(job) + + except Exception as e: + logger.error(f"Error in worker loop: {e}") + await asyncio.sleep(1) + + async def process_job(self, job: PipelineJob): + """AI 기사 생성 작업 처리 - 단일 RSS 아이템""" + try: + start_time = datetime.now() + logger.info(f"Processing job {job.job_id} for AI article generation") + + # 단일 enriched item 처리 + enriched_item_data = job.data.get('enriched_item') + if not enriched_item_data: + # 이전 버전 호환성 + enriched_items = job.data.get('enriched_items', []) + if enriched_items: + enriched_item_data = enriched_items[0] + else: + logger.warning(f"No enriched item in job {job.job_id}") + await self.queue_manager.mark_failed( + 'ai_article_generation', + job, + "No enriched item to process" + ) + return + + enriched_item = EnrichedItem(**enriched_item_data) + + # 기사 생성 + article = await self._generate_article(job, enriched_item) + + # 처리 시간 계산 + processing_time = (datetime.now() - start_time).total_seconds() + article.processing_time = processing_time + + # MongoDB에 저장 (ai_writer_db.articles_ko) + result = await self.db.articles_ko.insert_one(article.model_dump()) + mongodb_id = str(result.inserted_id) + + logger.info(f"Article {article.news_id} saved to MongoDB with _id: {mongodb_id}") + + # 다음 단계로 전달 (이미지 생성) + job.data['news_id'] = article.news_id + job.data['mongodb_id'] = mongodb_id + job.stages_completed.append('ai_article_generation') + job.stage = 'image_generation' + + await self.queue_manager.enqueue('image_generation', job) + await self.queue_manager.mark_completed('ai_article_generation', job.job_id) + + except Exception as e: + logger.error(f"Error processing job {job.job_id}: {e}") + await self.queue_manager.mark_failed('ai_article_generation', job, str(e)) + + async def _generate_article(self, job: PipelineJob, enriched_item: EnrichedItem) -> FinalArticle: + """Claude를 사용한 기사 생성""" + + # RSS 아이템 정보 + rss_item = enriched_item.rss_item + search_results = enriched_item.search_results + + # 검색 결과 텍스트 준비 (최대 10개) + search_text = "" + if search_results: + search_text = "\n관련 검색 결과:\n" + for idx, result in enumerate(search_results[:10], 1): + search_text += f"{idx}. {result.title}\n" + if result.snippet: + search_text += f" {result.snippet}\n" + + # Claude로 기사 작성 + prompt = f"""다음 뉴스 정보를 바탕으로 상세한 기사를 작성해주세요. + +키워드: {job.keyword} + +뉴스 정보: +제목: {rss_item.title} +요약: {rss_item.summary or '내용 없음'} +링크: {rss_item.link} +{search_text} + +다음 JSON 형식으로 작성해주세요: +{{ + "title": "기사 제목 (50자 이내)", + "summary": "한 줄 요약 (100자 이내)", + "subtopics": [ + {{ + "title": "소제목1", + "content": ["문단1", "문단2", "문단3"] + }}, + {{ + "title": "소제목2", + "content": ["문단1", "문단2"] + }}, + {{ + "title": "소제목3", + "content": ["문단1", "문단2"] + }} + ], + "categories": ["카테고리1", "카테고리2"], + "entities": {{ + "people": ["인물1", "인물2"], + "organizations": ["조직1", "조직2"], + "groups": ["그룹1"], + "countries": ["국가1"], + "events": ["이벤트1"] + }} +}} + +요구사항: +- 3개의 소제목로 구성 +- 각 소제목별로 2-3개 문단 +- 전문적이고 객관적인 톤 +- 한국어로 작성 +- 실제 정보를 바탕으로 구체적으로 작성""" + + try: + response = await self.claude_client.messages.create( + model="claude-sonnet-4-20250514", + max_tokens=4000, + temperature=0.7, + messages=[ + {"role": "user", "content": prompt} + ] + ) + + # JSON 파싱 + content_text = response.content[0].text + json_start = content_text.find('{') + json_end = content_text.rfind('}') + 1 + + if json_start != -1 and json_end > json_start: + article_data = json.loads(content_text[json_start:json_end]) + else: + raise ValueError("No valid JSON in response") + + # Subtopic 객체 생성 + subtopics = [] + for subtopic_data in article_data.get('subtopics', []): + subtopics.append(Subtopic( + title=subtopic_data.get('title', ''), + content=subtopic_data.get('content', []) + )) + + # Entities 객체 생성 + entities_data = article_data.get('entities', {}) + entities = Entities( + people=entities_data.get('people', []), + organizations=entities_data.get('organizations', []), + groups=entities_data.get('groups', []), + countries=entities_data.get('countries', []), + events=entities_data.get('events', []) + ) + + # 레퍼런스 생성 + references = [] + # RSS 원본 추가 + references.append(NewsReference( + title=rss_item.title, + link=rss_item.link, + source=rss_item.source_feed, + published=rss_item.published + )) + + # 검색 결과 레퍼런스 추가 (최대 9개 - RSS 원본과 합쳐 총 10개) + for search_result in search_results[:9]: # 상위 9개까지 + references.append(NewsReference( + title=search_result.title, + link=search_result.link, + source=search_result.source, + published=None + )) + + # FinalArticle 생성 (ai_writer_db.articles 스키마) + article = FinalArticle( + title=article_data.get('title', rss_item.title), + summary=article_data.get('summary', ''), + subtopics=subtopics, + categories=article_data.get('categories', []), + entities=entities, + source_keyword=job.keyword, + source_count=len(references), + references=references, + job_id=job.job_id, + keyword_id=job.keyword_id, + pipeline_stages=job.stages_completed.copy(), + language='ko', + rss_guid=rss_item.guid # RSS GUID 저장 + ) + + return article + + except Exception as e: + logger.error(f"Error generating article: {e}") + # 폴백 기사 생성 + fallback_references = [NewsReference( + title=rss_item.title, + link=rss_item.link, + source=rss_item.source_feed, + published=rss_item.published + )] + + return FinalArticle( + title=rss_item.title, + summary=rss_item.summary[:100] if rss_item.summary else '', + subtopics=[ + Subtopic( + title="주요 내용", + content=[rss_item.summary or rss_item.title] + ) + ], + categories=['자동생성'], + entities=Entities(), + source_keyword=job.keyword, + source_count=1, + references=fallback_references, + job_id=job.job_id, + keyword_id=job.keyword_id, + pipeline_stages=job.stages_completed.copy(), + language='ko', + rss_guid=rss_item.guid # RSS GUID 저장 + ) + + async def stop(self): + """워커 중지""" + await self.queue_manager.disconnect() + logger.info("AI Article Generator Worker stopped") + +async def main(): + """메인 함수""" + worker = AIArticleGeneratorWorker() + + try: + await worker.start() + except KeyboardInterrupt: + logger.info("Received interrupt signal") + finally: + await worker.stop() + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/services/pipeline/ai-article-generator/requirements.txt b/services/pipeline/ai-article-generator/requirements.txt new file mode 100644 index 0000000..34fb6e3 --- /dev/null +++ b/services/pipeline/ai-article-generator/requirements.txt @@ -0,0 +1,5 @@ +anthropic==0.50.0 +redis[hiredis]==5.0.1 +pydantic==2.5.0 +motor==3.1.1 +pymongo==4.3.3 \ No newline at end of file diff --git a/services/pipeline/check_keywords.py b/services/pipeline/check_keywords.py new file mode 100644 index 0000000..1fbacdb --- /dev/null +++ b/services/pipeline/check_keywords.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +"""키워드 데이터베이스 확인 스크립트""" +import asyncio +from motor.motor_asyncio import AsyncIOMotorClient +from datetime import datetime + +async def check_keywords(): + client = AsyncIOMotorClient("mongodb://localhost:27017") + db = client.ai_writer_db + + # 키워드 조회 + keywords = await db.keywords.find().to_list(None) + + print(f"\n=== 등록된 키워드: {len(keywords)}개 ===\n") + + for kw in keywords: + print(f"키워드: {kw['keyword']}") + print(f" - ID: {kw['keyword_id']}") + print(f" - 간격: {kw['interval_minutes']}분") + print(f" - 활성화: {kw['is_active']}") + print(f" - 우선순위: {kw['priority']}") + print(f" - RSS 피드: {len(kw.get('rss_feeds', []))}개") + + if kw.get('last_run'): + print(f" - 마지막 실행: {kw['last_run']}") + + if kw.get('next_run'): + next_run = kw['next_run'] + remaining = (next_run - datetime.now()).total_seconds() / 60 + print(f" - 다음 실행: {next_run} ({remaining:.1f}분 후)") + + print() + + client.close() + +if __name__ == "__main__": + asyncio.run(check_keywords()) \ No newline at end of file diff --git a/services/pipeline/config/languages.json b/services/pipeline/config/languages.json new file mode 100644 index 0000000..eaaefb7 --- /dev/null +++ b/services/pipeline/config/languages.json @@ -0,0 +1,85 @@ +{ + "enabled_languages": [ + { + "code": "en", + "name": "English", + "deepl_code": "EN", + "collection": "articles_en", + "enabled": true + }, + { + "code": "zh-CN", + "name": "Chinese (Simplified)", + "deepl_code": "ZH", + "collection": "articles_zh_cn", + "enabled": false + }, + { + "code": "zh-TW", + "name": "Chinese (Traditional)", + "deepl_code": "ZH-HANT", + "collection": "articles_zh_tw", + "enabled": false + }, + { + "code": "ja", + "name": "Japanese", + "deepl_code": "JA", + "collection": "articles_ja", + "enabled": false + }, + { + "code": "fr", + "name": "French", + "deepl_code": "FR", + "collection": "articles_fr", + "enabled": false + }, + { + "code": "de", + "name": "German", + "deepl_code": "DE", + "collection": "articles_de", + "enabled": false + }, + { + "code": "es", + "name": "Spanish", + "deepl_code": "ES", + "collection": "articles_es", + "enabled": false + }, + { + "code": "pt", + "name": "Portuguese", + "deepl_code": "PT", + "collection": "articles_pt", + "enabled": false + }, + { + "code": "ru", + "name": "Russian", + "deepl_code": "RU", + "collection": "articles_ru", + "enabled": false + }, + { + "code": "it", + "name": "Italian", + "deepl_code": "IT", + "collection": "articles_it", + "enabled": false + } + ], + "source_language": { + "code": "ko", + "name": "Korean", + "collection": "articles_ko" + }, + "translation_settings": { + "batch_size": 5, + "delay_between_languages": 2.0, + "delay_between_articles": 0.5, + "max_retries": 3 + } +} \ No newline at end of file diff --git a/services/pipeline/fix_imports.py b/services/pipeline/fix_imports.py new file mode 100644 index 0000000..cbc5929 --- /dev/null +++ b/services/pipeline/fix_imports.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +"""Fix import statements in all pipeline services""" + +import os +import re + +def fix_imports(filepath): + """Fix import statements in a Python file""" + with open(filepath, 'r') as f: + content = f.read() + + # Pattern to match the old import style + old_pattern = r"# 상위 디렉토리의 shared 모듈 import\nsys\.path\.append\(os\.path\.join\(os\.path\.dirname\(__file__\), '\.\.', 'shared'\)\)\nfrom ([\w, ]+) import ([\w, ]+)" + + # Replace with new import style + def replace_imports(match): + modules = match.group(1) + items = match.group(2) + + # Build new import statements + imports = [] + if 'models' in modules: + imports.append(f"from shared.models import {items}" if 'models' in modules else "") + if 'queue_manager' in modules: + imports.append(f"from shared.queue_manager import QueueManager") + + return "# Import from shared module\n" + "\n".join(filter(None, imports)) + + # Apply the replacement + new_content = re.sub(old_pattern, replace_imports, content) + + # Also handle simpler patterns + new_content = new_content.replace( + "sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'shared'))\nfrom models import", + "from shared.models import" + ) + new_content = new_content.replace( + "\nfrom queue_manager import", + "\nfrom shared.queue_manager import" + ) + + # Write back if changed + if new_content != content: + with open(filepath, 'w') as f: + f.write(new_content) + print(f"Fixed imports in {filepath}") + return True + return False + +# Files to fix +files_to_fix = [ + "monitor/monitor.py", + "google-search/google_search.py", + "article-assembly/article_assembly.py", + "rss-collector/rss_collector.py", + "ai-summarizer/ai_summarizer.py" +] + +for file_path in files_to_fix: + full_path = os.path.join(os.path.dirname(__file__), file_path) + if os.path.exists(full_path): + fix_imports(full_path) \ No newline at end of file diff --git a/services/pipeline/google-search/Dockerfile b/services/pipeline/google-search/Dockerfile new file mode 100644 index 0000000..5d75150 --- /dev/null +++ b/services/pipeline/google-search/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.11-slim + +WORKDIR /app + +# 의존성 설치 +COPY ./google-search/requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# 공통 모듈 복사 +COPY ./shared /app/shared + +# Google Search 코드 복사 +COPY ./google-search /app + +# 환경변수 +ENV PYTHONUNBUFFERED=1 + +# 실행 +CMD ["python", "google_search.py"] \ No newline at end of file diff --git a/services/pipeline/google-search/google_search.py b/services/pipeline/google-search/google_search.py new file mode 100644 index 0000000..92ce441 --- /dev/null +++ b/services/pipeline/google-search/google_search.py @@ -0,0 +1,152 @@ +""" +Google Search Service +Google 검색으로 RSS 항목 강화 +""" +import asyncio +import logging +import os +import sys +import json +from typing import List, Dict, Any +import aiohttp +from datetime import datetime + +# Import from shared module +from shared.models import PipelineJob, RSSItem, SearchResult, EnrichedItem +from shared.queue_manager import QueueManager + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class GoogleSearchWorker: + def __init__(self): + self.queue_manager = QueueManager( + redis_url=os.getenv("REDIS_URL", "redis://redis:6379") + ) + self.google_api_key = os.getenv("GOOGLE_API_KEY") + self.search_engine_id = os.getenv("GOOGLE_SEARCH_ENGINE_ID") + self.max_results_per_item = 3 + + async def start(self): + """워커 시작""" + logger.info("Starting Google Search Worker") + + # Redis 연결 + await self.queue_manager.connect() + + # 메인 처리 루프 + while True: + try: + # 큐에서 작업 가져오기 + job = await self.queue_manager.dequeue('search_enrichment', timeout=5) + + if job: + await self.process_job(job) + + except Exception as e: + logger.error(f"Error in worker loop: {e}") + await asyncio.sleep(1) + + async def process_job(self, job: PipelineJob): + """검색 강화 작업 처리 - 단일 RSS 아이템""" + try: + logger.info(f"Processing job {job.job_id} for search enrichment") + + # 단일 RSS 아이템 처리 + rss_item_data = job.data.get('rss_item') + if not rss_item_data: + # 이전 버전 호환성 - 여러 아이템 처리 + rss_items = job.data.get('rss_items', []) + if rss_items: + rss_item_data = rss_items[0] # 첫 번째 아이템만 처리 + else: + logger.warning(f"No RSS item in job {job.job_id}") + await self.queue_manager.mark_failed( + 'search_enrichment', + job, + "No RSS item to process" + ) + return + + rss_item = RSSItem(**rss_item_data) + + # 제목으로 Google 검색 + search_results = await self._search_google(rss_item.title) + + enriched_item = EnrichedItem( + rss_item=rss_item, + search_results=search_results + ) + + logger.info(f"Enriched item with {len(search_results)} search results") + + # 다음 단계로 전달 - 단일 enriched item + job.data['enriched_item'] = enriched_item.dict() + job.stages_completed.append('search_enrichment') + job.stage = 'ai_article_generation' + + await self.queue_manager.enqueue('ai_article_generation', job) + await self.queue_manager.mark_completed('search_enrichment', job.job_id) + + except Exception as e: + logger.error(f"Error processing job {job.job_id}: {e}") + await self.queue_manager.mark_failed('search_enrichment', job, str(e)) + + async def _search_google(self, query: str) -> List[SearchResult]: + """Google Custom Search API 호출""" + results = [] + + if not self.google_api_key or not self.search_engine_id: + logger.warning("Google API credentials not configured") + return results + + try: + url = "https://www.googleapis.com/customsearch/v1" + params = { + "key": self.google_api_key, + "cx": self.search_engine_id, + "q": query, + "num": self.max_results_per_item, + "hl": "ko", + "gl": "kr" + } + + async with aiohttp.ClientSession() as session: + async with session.get(url, params=params, timeout=30) as response: + if response.status == 200: + data = await response.json() + + for item in data.get('items', []): + result = SearchResult( + title=item.get('title', ''), + link=item.get('link', ''), + snippet=item.get('snippet', ''), + source='google' + ) + results.append(result) + else: + logger.error(f"Google API error: {response.status}") + + except Exception as e: + logger.error(f"Error searching Google for '{query}': {e}") + + return results + + async def stop(self): + """워커 중지""" + await self.queue_manager.disconnect() + logger.info("Google Search Worker stopped") + +async def main(): + """메인 함수""" + worker = GoogleSearchWorker() + + try: + await worker.start() + except KeyboardInterrupt: + logger.info("Received interrupt signal") + finally: + await worker.stop() + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/services/pipeline/google-search/requirements.txt b/services/pipeline/google-search/requirements.txt new file mode 100644 index 0000000..0859816 --- /dev/null +++ b/services/pipeline/google-search/requirements.txt @@ -0,0 +1,3 @@ +aiohttp==3.9.1 +redis[hiredis]==5.0.1 +pydantic==2.5.0 \ No newline at end of file diff --git a/services/pipeline/image-generator/Dockerfile b/services/pipeline/image-generator/Dockerfile new file mode 100644 index 0000000..018dede --- /dev/null +++ b/services/pipeline/image-generator/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install dependencies +COPY ./image-generator/requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy shared modules +COPY ./shared /app/shared + +# Copy application code +COPY ./image-generator /app + +CMD ["python", "image_generator.py"] \ No newline at end of file diff --git a/services/pipeline/image-generator/image_generator.py b/services/pipeline/image-generator/image_generator.py new file mode 100644 index 0000000..e335ed0 --- /dev/null +++ b/services/pipeline/image-generator/image_generator.py @@ -0,0 +1,256 @@ +""" +Image Generation Service +Replicate API를 사용한 이미지 생성 서비스 +""" +import asyncio +import logging +import os +import sys +import base64 +from typing import List, Dict, Any +import httpx +from io import BytesIO +from motor.motor_asyncio import AsyncIOMotorClient +from bson import ObjectId + +# Import from shared module +from shared.models import PipelineJob +from shared.queue_manager import QueueManager + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class ImageGeneratorWorker: + def __init__(self): + self.queue_manager = QueueManager( + redis_url=os.getenv("REDIS_URL", "redis://redis:6379") + ) + self.replicate_api_key = os.getenv("REPLICATE_API_TOKEN") + self.replicate_api_url = "https://api.replicate.com/v1/predictions" + # Stable Diffusion 모델 사용 + self.model_version = "stability-ai/sdxl:39ed52f2a78e934b3ba6e2a89f5b1c712de7dfea535525255b1aa35c5565e08b" + self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") + self.db_name = os.getenv("DB_NAME", "ai_writer_db") + self.db = None + + async def start(self): + """워커 시작""" + logger.info("Starting Image Generator Worker") + + # Redis 연결 + await self.queue_manager.connect() + + # MongoDB 연결 + client = AsyncIOMotorClient(self.mongodb_url) + self.db = client[self.db_name] + + # API 키 확인 + if not self.replicate_api_key: + logger.warning("Replicate API key not configured - using placeholder images") + + # 메인 처리 루프 + while True: + try: + # 큐에서 작업 가져오기 + job = await self.queue_manager.dequeue('image_generation', timeout=5) + + if job: + await self.process_job(job) + + except Exception as e: + logger.error(f"Error in worker loop: {e}") + await asyncio.sleep(1) + + async def process_job(self, job: PipelineJob): + """이미지 생성 및 MongoDB 업데이트""" + try: + logger.info(f"Processing job {job.job_id} for image generation") + + # MongoDB에서 기사 정보 가져오기 + news_id = job.data.get('news_id') + mongodb_id = job.data.get('mongodb_id') + + if not news_id: + logger.error(f"No news_id in job {job.job_id}") + await self.queue_manager.mark_failed('image_generation', job, "No news_id") + return + + # MongoDB에서 한국어 기사 조회 (articles_ko) + article = await self.db.articles_ko.find_one({"news_id": news_id}) + if not article: + logger.error(f"Article {news_id} not found in MongoDB") + await self.queue_manager.mark_failed('image_generation', job, "Article not found") + return + + # 이미지 생성을 위한 프롬프트 생성 (한국어 기사 기반) + prompt = self._create_image_prompt_from_article(article) + + # 이미지 생성 (최대 3개) + image_urls = [] + for i in range(min(3, 1)): # 테스트를 위해 1개만 생성 + image_url = await self._generate_image(prompt) + image_urls.append(image_url) + + # API 속도 제한 + if self.replicate_api_key and i < 2: + await asyncio.sleep(2) + + # MongoDB 업데이트 (이미지 추가 - articles_ko) + await self.db.articles_ko.update_one( + {"news_id": news_id}, + { + "$set": { + "images": image_urls, + "image_prompt": prompt + }, + "$addToSet": { + "pipeline_stages": "image_generation" + } + } + ) + + logger.info(f"Updated article {news_id} with {len(image_urls)} images") + + # 다음 단계로 전달 (번역) + job.stages_completed.append('image_generation') + job.stage = 'translation' + + await self.queue_manager.enqueue('translation', job) + await self.queue_manager.mark_completed('image_generation', job.job_id) + + except Exception as e: + logger.error(f"Error processing job {job.job_id}: {e}") + await self.queue_manager.mark_failed('image_generation', job, str(e)) + + def _create_image_prompt_from_article(self, article: Dict) -> str: + """기사로부터 이미지 프롬프트 생성""" + # 키워드와 제목을 기반으로 프롬프트 생성 + keyword = article.get('keyword', '') + title = article.get('title', '') + categories = article.get('categories', []) + + # 카테고리 맵핑 (한글 -> 영어) + category_map = { + '기술': 'technology', + '경제': 'business', + '정치': 'politics', + '교육': 'education', + '사회': 'society', + '문화': 'culture', + '과학': 'science' + } + + eng_categories = [category_map.get(cat, cat) for cat in categories] + category_str = ', '.join(eng_categories[:2]) if eng_categories else 'news' + + # 뉴스 관련 이미지를 위한 프롬프트 + prompt = f"News illustration for {keyword} {category_str}, professional, modern, clean design, high quality, 4k, no text" + + return prompt + + async def _generate_image(self, prompt: str) -> str: + """Replicate API를 사용한 이미지 생성""" + try: + if not self.replicate_api_key: + # API 키가 없으면 플레이스홀더 이미지 URL 반환 + return "https://via.placeholder.com/800x600.png?text=News+Image" + + async with httpx.AsyncClient() as client: + # 예측 생성 요청 + response = await client.post( + self.replicate_api_url, + headers={ + "Authorization": f"Token {self.replicate_api_key}", + "Content-Type": "application/json" + }, + json={ + "version": self.model_version, + "input": { + "prompt": prompt, + "width": 768, + "height": 768, + "num_outputs": 1, + "scheduler": "K_EULER", + "num_inference_steps": 25, + "guidance_scale": 7.5, + "prompt_strength": 0.8, + "refine": "expert_ensemble_refiner", + "high_noise_frac": 0.8 + } + }, + timeout=60 + ) + + if response.status_code in [200, 201]: + result = response.json() + prediction_id = result.get('id') + + # 예측 결과 폴링 + image_url = await self._poll_prediction(prediction_id) + return image_url + else: + logger.error(f"Replicate API error: {response.status_code}") + return "https://via.placeholder.com/800x600.png?text=Generation+Failed" + + except Exception as e: + logger.error(f"Error generating image: {e}") + return "https://via.placeholder.com/800x600.png?text=Error" + + async def _poll_prediction(self, prediction_id: str, max_attempts: int = 30) -> str: + """예측 결과 폴링""" + try: + async with httpx.AsyncClient() as client: + for attempt in range(max_attempts): + response = await client.get( + f"{self.replicate_api_url}/{prediction_id}", + headers={ + "Authorization": f"Token {self.replicate_api_key}" + }, + timeout=30 + ) + + if response.status_code == 200: + result = response.json() + status = result.get('status') + + if status == 'succeeded': + output = result.get('output') + if output and isinstance(output, list) and len(output) > 0: + return output[0] + else: + return "https://via.placeholder.com/800x600.png?text=No+Output" + elif status == 'failed': + logger.error(f"Prediction failed: {result.get('error')}") + return "https://via.placeholder.com/800x600.png?text=Failed" + + # 아직 처리중이면 대기 + await asyncio.sleep(2) + else: + logger.error(f"Error polling prediction: {response.status_code}") + return "https://via.placeholder.com/800x600.png?text=Poll+Error" + + # 최대 시도 횟수 초과 + return "https://via.placeholder.com/800x600.png?text=Timeout" + + except Exception as e: + logger.error(f"Error polling prediction: {e}") + return "https://via.placeholder.com/800x600.png?text=Poll+Exception" + + async def stop(self): + """워커 중지""" + await self.queue_manager.disconnect() + logger.info("Image Generator Worker stopped") + +async def main(): + """메인 함수""" + worker = ImageGeneratorWorker() + + try: + await worker.start() + except KeyboardInterrupt: + logger.info("Received interrupt signal") + finally: + await worker.stop() + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/services/pipeline/image-generator/requirements.txt b/services/pipeline/image-generator/requirements.txt new file mode 100644 index 0000000..51a197c --- /dev/null +++ b/services/pipeline/image-generator/requirements.txt @@ -0,0 +1,5 @@ +httpx==0.25.0 +redis[hiredis]==5.0.1 +pydantic==2.5.0 +motor==3.1.1 +pymongo==4.3.3 \ No newline at end of file diff --git a/services/pipeline/monitor/Dockerfile b/services/pipeline/monitor/Dockerfile new file mode 100644 index 0000000..cc6cd35 --- /dev/null +++ b/services/pipeline/monitor/Dockerfile @@ -0,0 +1,22 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install dependencies +COPY ./monitor/requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy shared modules +COPY ./shared /app/shared + +# Copy monitor code +COPY ./monitor /app + +# Environment variables +ENV PYTHONUNBUFFERED=1 + +# Expose port +EXPOSE 8000 + +# Run +CMD ["uvicorn", "monitor:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] \ No newline at end of file diff --git a/services/pipeline/monitor/monitor.py b/services/pipeline/monitor/monitor.py new file mode 100644 index 0000000..ec83729 --- /dev/null +++ b/services/pipeline/monitor/monitor.py @@ -0,0 +1,349 @@ +""" +Pipeline Monitor Service +파이프라인 상태 모니터링 및 대시보드 API +""" +import os +import sys +import logging +from datetime import datetime, timedelta +from typing import Dict, List, Any +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from motor.motor_asyncio import AsyncIOMotorClient +import redis.asyncio as redis + +# Import from shared module +from shared.models import KeywordSubscription, PipelineJob, FinalArticle + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +app = FastAPI(title="Pipeline Monitor", version="1.0.0") + +# CORS 설정 +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Global connections +redis_client = None +mongodb_client = None +db = None + +@app.on_event("startup") +async def startup_event(): + """서버 시작 시 연결 초기화""" + global redis_client, mongodb_client, db + + # Redis 연결 + redis_url = os.getenv("REDIS_URL", "redis://redis:6379") + redis_client = await redis.from_url(redis_url, decode_responses=True) + + # MongoDB 연결 + mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") + mongodb_client = AsyncIOMotorClient(mongodb_url) + db = mongodb_client[os.getenv("DB_NAME", "ai_writer_db")] + + logger.info("Pipeline Monitor started successfully") + +@app.on_event("shutdown") +async def shutdown_event(): + """서버 종료 시 연결 해제""" + if redis_client: + await redis_client.close() + if mongodb_client: + mongodb_client.close() + +@app.get("/") +async def root(): + """헬스 체크""" + return {"status": "Pipeline Monitor is running"} + +@app.get("/api/stats") +async def get_stats(): + """전체 파이프라인 통계""" + try: + # 큐별 대기 작업 수 + queue_stats = {} + queues = [ + "queue:keyword", + "queue:rss", + "queue:search", + "queue:summarize", + "queue:assembly" + ] + + for queue in queues: + length = await redis_client.llen(queue) + queue_stats[queue] = length + + # 오늘 생성된 기사 수 + today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + articles_today = await db.articles.count_documents({ + "created_at": {"$gte": today} + }) + + # 활성 키워드 수 + active_keywords = await db.keywords.count_documents({ + "is_active": True + }) + + # 총 기사 수 + total_articles = await db.articles.count_documents({}) + + return { + "queues": queue_stats, + "articles_today": articles_today, + "active_keywords": active_keywords, + "total_articles": total_articles, + "timestamp": datetime.now().isoformat() + } + + except Exception as e: + logger.error(f"Error getting stats: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/queues/{queue_name}") +async def get_queue_details(queue_name: str): + """특정 큐의 상세 정보""" + try: + queue_key = f"queue:{queue_name}" + + # 큐 길이 + length = await redis_client.llen(queue_key) + + # 최근 10개 작업 미리보기 + items = await redis_client.lrange(queue_key, 0, 9) + + # 처리 중인 작업 + processing_key = f"processing:{queue_name}" + processing = await redis_client.smembers(processing_key) + + # 실패한 작업 + failed_key = f"failed:{queue_name}" + failed_count = await redis_client.llen(failed_key) + + return { + "queue": queue_name, + "length": length, + "processing_count": len(processing), + "failed_count": failed_count, + "preview": items[:10], + "timestamp": datetime.now().isoformat() + } + + except Exception as e: + logger.error(f"Error getting queue details: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/keywords") +async def get_keywords(): + """등록된 키워드 목록""" + try: + keywords = [] + cursor = db.keywords.find({"is_active": True}) + + async for keyword in cursor: + # 해당 키워드의 최근 기사 + latest_article = await db.articles.find_one( + {"keyword_id": str(keyword["_id"])}, + sort=[("created_at", -1)] + ) + + keywords.append({ + "id": str(keyword["_id"]), + "keyword": keyword["keyword"], + "schedule": keyword.get("schedule", "30분마다"), + "created_at": keyword.get("created_at"), + "last_article": latest_article["created_at"] if latest_article else None, + "article_count": await db.articles.count_documents( + {"keyword_id": str(keyword["_id"])} + ) + }) + + return keywords + + except Exception as e: + logger.error(f"Error getting keywords: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/keywords") +async def add_keyword(keyword: str, schedule: str = "30min"): + """새 키워드 등록""" + try: + new_keyword = { + "keyword": keyword, + "schedule": schedule, + "is_active": True, + "created_at": datetime.now(), + "updated_at": datetime.now() + } + + result = await db.keywords.insert_one(new_keyword) + + return { + "id": str(result.inserted_id), + "keyword": keyword, + "message": "Keyword registered successfully" + } + + except Exception as e: + logger.error(f"Error adding keyword: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.delete("/api/keywords/{keyword_id}") +async def delete_keyword(keyword_id: str): + """키워드 비활성화""" + try: + result = await db.keywords.update_one( + {"_id": keyword_id}, + {"$set": {"is_active": False, "updated_at": datetime.now()}} + ) + + if result.modified_count > 0: + return {"message": "Keyword deactivated successfully"} + else: + raise HTTPException(status_code=404, detail="Keyword not found") + + except Exception as e: + logger.error(f"Error deleting keyword: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/articles") +async def get_articles(limit: int = 10, skip: int = 0): + """최근 생성된 기사 목록""" + try: + articles = [] + cursor = db.articles.find().sort("created_at", -1).skip(skip).limit(limit) + + async for article in cursor: + articles.append({ + "id": str(article["_id"]), + "title": article["title"], + "keyword": article["keyword"], + "summary": article.get("summary", ""), + "created_at": article["created_at"], + "processing_time": article.get("processing_time", 0), + "pipeline_stages": article.get("pipeline_stages", []) + }) + + total = await db.articles.count_documents({}) + + return { + "articles": articles, + "total": total, + "limit": limit, + "skip": skip + } + + except Exception as e: + logger.error(f"Error getting articles: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/articles/{article_id}") +async def get_article(article_id: str): + """특정 기사 상세 정보""" + try: + article = await db.articles.find_one({"_id": article_id}) + + if not article: + raise HTTPException(status_code=404, detail="Article not found") + + return article + + except Exception as e: + logger.error(f"Error getting article: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/workers") +async def get_workers(): + """워커 상태 정보""" + try: + workers = {} + worker_types = [ + "scheduler", + "rss_collector", + "google_search", + "ai_summarizer", + "article_assembly" + ] + + for worker_type in worker_types: + active_key = f"workers:{worker_type}:active" + active_workers = await redis_client.smembers(active_key) + + workers[worker_type] = { + "active": len(active_workers), + "worker_ids": list(active_workers) + } + + return workers + + except Exception as e: + logger.error(f"Error getting workers: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/trigger/{keyword}") +async def trigger_keyword_processing(keyword: str): + """수동으로 키워드 처리 트리거""" + try: + # 키워드 찾기 + keyword_doc = await db.keywords.find_one({ + "keyword": keyword, + "is_active": True + }) + + if not keyword_doc: + raise HTTPException(status_code=404, detail="Keyword not found or inactive") + + # 작업 생성 + job = PipelineJob( + keyword_id=str(keyword_doc["_id"]), + keyword=keyword, + stage="keyword_processing", + created_at=datetime.now() + ) + + # 큐에 추가 + await redis_client.rpush("queue:keyword", job.json()) + + return { + "message": f"Processing triggered for keyword: {keyword}", + "job_id": job.job_id + } + + except Exception as e: + logger.error(f"Error triggering keyword: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/health") +async def health_check(): + """시스템 헬스 체크""" + try: + # Redis 체크 + redis_status = await redis_client.ping() + + # MongoDB 체크 + mongodb_status = await db.command("ping") + + return { + "status": "healthy", + "redis": "connected" if redis_status else "disconnected", + "mongodb": "connected" if mongodb_status else "disconnected", + "timestamp": datetime.now().isoformat() + } + + except Exception as e: + return { + "status": "unhealthy", + "error": str(e), + "timestamp": datetime.now().isoformat() + } + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/services/pipeline/monitor/requirements.txt b/services/pipeline/monitor/requirements.txt new file mode 100644 index 0000000..5728b55 --- /dev/null +++ b/services/pipeline/monitor/requirements.txt @@ -0,0 +1,6 @@ +fastapi==0.104.1 +uvicorn[standard]==0.24.0 +redis[hiredis]==5.0.1 +motor==3.1.1 +pymongo==4.3.3 +pydantic==2.5.0 \ No newline at end of file diff --git a/services/pipeline/rss-collector/Dockerfile b/services/pipeline/rss-collector/Dockerfile new file mode 100644 index 0000000..4565e1c --- /dev/null +++ b/services/pipeline/rss-collector/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.11-slim + +WORKDIR /app + +# 의존성 설치 +COPY ./rss-collector/requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# 공통 모듈 복사 +COPY ./shared /app/shared + +# RSS Collector 코드 복사 +COPY ./rss-collector /app + +# 환경변수 +ENV PYTHONUNBUFFERED=1 + +# 실행 +CMD ["python", "rss_collector.py"] \ No newline at end of file diff --git a/services/pipeline/rss-collector/requirements.txt b/services/pipeline/rss-collector/requirements.txt new file mode 100644 index 0000000..b1235e8 --- /dev/null +++ b/services/pipeline/rss-collector/requirements.txt @@ -0,0 +1,5 @@ +feedparser==6.0.11 +aiohttp==3.9.1 +redis[hiredis]==5.0.1 +pydantic==2.5.0 +motor==3.6.0 \ No newline at end of file diff --git a/services/pipeline/rss-collector/rss_collector.py b/services/pipeline/rss-collector/rss_collector.py new file mode 100644 index 0000000..2141cfc --- /dev/null +++ b/services/pipeline/rss-collector/rss_collector.py @@ -0,0 +1,270 @@ +""" +RSS Collector Service +RSS 피드 수집 및 중복 제거 서비스 +""" +import asyncio +import logging +import os +import sys +import hashlib +from datetime import datetime +import feedparser +import aiohttp +import redis.asyncio as redis +from motor.motor_asyncio import AsyncIOMotorClient +from typing import List, Dict, Any + +# Import from shared module +from shared.models import PipelineJob, RSSItem, EnrichedItem +from shared.queue_manager import QueueManager + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class RSSCollectorWorker: + def __init__(self): + self.queue_manager = QueueManager( + redis_url=os.getenv("REDIS_URL", "redis://redis:6379") + ) + self.redis_client = None + self.redis_url = os.getenv("REDIS_URL", "redis://redis:6379") + self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") + self.db_name = os.getenv("DB_NAME", "ai_writer_db") + self.db = None + self.dedup_ttl = 86400 * 7 # 7일간 중복 방지 + self.max_items_per_feed = 100 # 피드당 최대 항목 수 (Google News는 최대 100개) + + async def start(self): + """워커 시작""" + logger.info("Starting RSS Collector Worker") + + # Redis 연결 + await self.queue_manager.connect() + self.redis_client = await redis.from_url( + self.redis_url, + encoding="utf-8", + decode_responses=True + ) + + # MongoDB 연결 + client = AsyncIOMotorClient(self.mongodb_url) + self.db = client[self.db_name] + + # 메인 처리 루프 + while True: + try: + # 큐에서 작업 가져오기 (5초 대기) + job = await self.queue_manager.dequeue('rss_collection', timeout=5) + + if job: + await self.process_job(job) + + except Exception as e: + logger.error(f"Error in worker loop: {e}") + await asyncio.sleep(1) + + async def process_job(self, job: PipelineJob): + """RSS 수집 작업 처리""" + try: + logger.info(f"Processing job {job.job_id} for keyword '{job.keyword}'") + + keyword = job.keyword # keyword는 job의 직접 속성 + rss_feeds = job.data.get('rss_feeds', []) + + # RSS 피드가 없으면 기본 피드 사용 + if not rss_feeds: + # 기본 RSS 피드 추가 (Google News RSS) + rss_feeds = [ + f"https://news.google.com/rss/search?q={keyword}&hl=en-US&gl=US&ceid=US:en", + f"https://news.google.com/rss/search?q={keyword}&hl=ko&gl=KR&ceid=KR:ko", + "https://feeds.bbci.co.uk/news/technology/rss.xml", + "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml" + ] + logger.info(f"Using default RSS feeds for keyword: {keyword}") + + # 키워드가 포함된 RSS URL 생성 + processed_feeds = self._prepare_feeds(rss_feeds, keyword) + + all_items = [] + + for feed_url in processed_feeds: + try: + items = await self._fetch_rss_feed(feed_url, keyword) + all_items.extend(items) + except Exception as e: + logger.error(f"Error fetching feed {feed_url}: {e}") + + if all_items: + # 중복 제거 + unique_items = await self._deduplicate_items(all_items, keyword) + + if unique_items: + logger.info(f"Collected {len(unique_items)} unique items for '{keyword}'") + + # 각 RSS 아이템별로 개별 job 생성하여 다음 단계로 전달 + # 시간 지연을 추가하여 API 호출 분산 (초기값: 1초, 점진적으로 조정 가능) + enqueue_delay = float(os.getenv("RSS_ENQUEUE_DELAY", "1.0")) + + for idx, item in enumerate(unique_items): + # 각 아이템별로 새로운 job 생성 + item_job = PipelineJob( + keyword_id=f"{job.keyword_id}_{idx}", + keyword=job.keyword, + stage='search_enrichment', + data={ + 'rss_item': item.dict(), # 단일 아이템 + 'original_job_id': job.job_id, + 'item_index': idx, + 'total_items': len(unique_items), + 'item_hash': hashlib.md5( + f"{keyword}:guid:{item.guid}".encode() if item.guid + else f"{keyword}:title:{item.title}:link:{item.link}".encode() + ).hexdigest() # GUID 또는 title+link 해시 + }, + stages_completed=['rss_collection'] + ) + + # 개별 아이템을 다음 단계로 전달 + await self.queue_manager.enqueue('search_enrichment', item_job) + logger.info(f"Enqueued item {idx+1}/{len(unique_items)} for keyword '{keyword}'") + + # 다음 아이템 enqueue 전에 지연 추가 (마지막 아이템 제외) + if idx < len(unique_items) - 1: + await asyncio.sleep(enqueue_delay) + logger.debug(f"Waiting {enqueue_delay}s before next item...") + + # 원본 job 완료 처리 + await self.queue_manager.mark_completed('rss_collection', job.job_id) + logger.info(f"Completed RSS collection for job {job.job_id}: {len(unique_items)} items processed") + else: + logger.info(f"No new items found for '{keyword}' after deduplication") + await self.queue_manager.mark_completed('rss_collection', job.job_id) + else: + logger.warning(f"No RSS items collected for '{keyword}'") + await self.queue_manager.mark_failed( + 'rss_collection', + job, + "No RSS items collected" + ) + + except Exception as e: + logger.error(f"Error processing job {job.job_id}: {e}") + await self.queue_manager.mark_failed('rss_collection', job, str(e)) + + def _prepare_feeds(self, feeds: List[str], keyword: str) -> List[str]: + """RSS 피드 URL 준비 (키워드 치환)""" + processed = [] + for feed in feeds: + if '{keyword}' in feed: + processed.append(feed.replace('{keyword}', keyword)) + else: + processed.append(feed) + return processed + + async def _fetch_rss_feed(self, feed_url: str, keyword: str) -> List[RSSItem]: + """RSS 피드 가져오기""" + items = [] + + try: + async with aiohttp.ClientSession() as session: + async with session.get(feed_url, timeout=30) as response: + content = await response.text() + + # feedparser로 파싱 + feed = feedparser.parse(content) + + logger.info(f"Found {len(feed.entries)} entries in feed {feed_url}") + + for entry in feed.entries[:self.max_items_per_feed]: + # 키워드 관련성 체크 + title = entry.get('title', '') + summary = entry.get('summary', '') + + # 대소문자 무시하고 키워드 매칭 (영문의 경우) + title_lower = title.lower() if keyword.isascii() else title + summary_lower = summary.lower() if keyword.isascii() else summary + keyword_lower = keyword.lower() if keyword.isascii() else keyword + + # 제목이나 요약에 키워드가 포함된 경우 + # Google News RSS는 이미 키워드 검색 결과이므로 모든 항목 포함 + if "news.google.com" in feed_url or keyword_lower in title_lower or keyword_lower in summary_lower: + # GUID 추출 (Google RSS에서 일반적으로 사용) + guid = entry.get('id', entry.get('guid', '')) + + item = RSSItem( + title=title, + link=entry.get('link', ''), + guid=guid, # GUID 추가 + published=entry.get('published', ''), + summary=summary[:500] if summary else '', + source_feed=feed_url + ) + items.append(item) + logger.debug(f"Added item: {title[:50]}... (guid: {guid[:30] if guid else 'no-guid'})") + + except Exception as e: + logger.error(f"Error fetching RSS feed {feed_url}: {e}") + + return items + + async def _deduplicate_items(self, items: List[RSSItem], keyword: str) -> List[RSSItem]: + """중복 항목 제거 - GUID 또는 링크 기준으로만 중복 체크""" + unique_items = [] + seen_guids = set() # 현재 배치에서 본 GUID + seen_links = set() # 현재 배치에서 본 링크 + + for item in items: + # GUID가 있는 경우 GUID로 중복 체크 + if item.guid: + if item.guid in seen_guids: + logger.debug(f"Duplicate GUID in batch: {item.guid[:30]}") + continue + + # MongoDB에서 이미 처리된 기사인지 확인 + existing_article = await self.db.articles_ko.find_one({"rss_guid": item.guid}) + if existing_article: + logger.info(f"Article with GUID {item.guid[:30]} already processed, skipping") + continue + + seen_guids.add(item.guid) + else: + # GUID가 없으면 링크로 중복 체크 + if item.link in seen_links: + logger.debug(f"Duplicate link in batch: {item.link[:50]}") + continue + + # MongoDB에서 링크로 중복 확인 (references 필드에서 검색) + existing_article = await self.db.articles_ko.find_one({"references.link": item.link}) + if existing_article: + logger.info(f"Article with link {item.link[:50]} already processed, skipping") + continue + + seen_links.add(item.link) + + unique_items.append(item) + logger.debug(f"New item added: {item.title[:50]}...") + + logger.info(f"Deduplication result: {len(unique_items)} new items out of {len(items)} total") + + return unique_items + + async def stop(self): + """워커 중지""" + await self.queue_manager.disconnect() + if self.redis_client: + await self.redis_client.close() + logger.info("RSS Collector Worker stopped") + +async def main(): + """메인 함수""" + worker = RSSCollectorWorker() + + try: + await worker.start() + except KeyboardInterrupt: + logger.info("Received interrupt signal") + finally: + await worker.stop() + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/services/pipeline/scheduler/Dockerfile b/services/pipeline/scheduler/Dockerfile new file mode 100644 index 0000000..e4faabe --- /dev/null +++ b/services/pipeline/scheduler/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install dependencies +COPY ./scheduler/requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy shared module +COPY ./shared /app/shared + +# Copy scheduler code +COPY ./scheduler /app + +# Run scheduler +CMD ["python", "keyword_scheduler.py"] diff --git a/services/pipeline/scheduler/keyword_manager.py b/services/pipeline/scheduler/keyword_manager.py new file mode 100644 index 0000000..1dc8e32 --- /dev/null +++ b/services/pipeline/scheduler/keyword_manager.py @@ -0,0 +1,336 @@ +""" +Keyword Manager API +키워드를 추가/수정/삭제하는 관리 API +""" +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from typing import List, Optional +from datetime import datetime, timedelta +from motor.motor_asyncio import AsyncIOMotorClient +import uvicorn +import os +import sys +import uuid + +# Import from shared module +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from shared.models import Keyword + +app = FastAPI(title="Keyword Manager API") + +# MongoDB 연결 +mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") +db_name = os.getenv("DB_NAME", "ai_writer_db") + +@app.on_event("startup") +async def startup_event(): + """앱 시작 시 MongoDB 연결""" + app.mongodb_client = AsyncIOMotorClient(mongodb_url) + app.db = app.mongodb_client[db_name] + +@app.on_event("shutdown") +async def shutdown_event(): + """앱 종료 시 연결 해제""" + app.mongodb_client.close() + +class KeywordCreate(BaseModel): + """키워드 생성 요청 모델""" + keyword: str + interval_minutes: int = 60 + priority: int = 0 + rss_feeds: List[str] = [] + max_articles_per_run: int = 100 + is_active: bool = True + +class KeywordUpdate(BaseModel): + """키워드 업데이트 요청 모델""" + interval_minutes: Optional[int] = None + priority: Optional[int] = None + rss_feeds: Optional[List[str]] = None + max_articles_per_run: Optional[int] = None + is_active: Optional[bool] = None + +@app.get("/") +async def root(): + """API 상태 확인""" + return {"status": "Keyword Manager API is running"} + +@app.get("/threads/status") +async def get_threads_status(): + """모든 스레드 상태 조회""" + try: + # MongoDB에서 키워드 정보와 함께 상태 반환 + cursor = app.db.keywords.find() + keywords = await cursor.to_list(None) + + threads_status = [] + for kw in keywords: + status = { + "keyword": kw.get("keyword"), + "keyword_id": kw.get("keyword_id"), + "is_active": kw.get("is_active"), + "interval_minutes": kw.get("interval_minutes"), + "priority": kw.get("priority"), + "last_run": kw.get("last_run").isoformat() if kw.get("last_run") else None, + "next_run": kw.get("next_run").isoformat() if kw.get("next_run") else None, + "thread_status": "active" if kw.get("is_active") else "inactive" + } + + # 다음 실행까지 남은 시간 계산 + if kw.get("next_run"): + remaining = (kw.get("next_run") - datetime.now()).total_seconds() + if remaining > 0: + status["minutes_until_next_run"] = round(remaining / 60, 1) + else: + status["minutes_until_next_run"] = 0 + status["thread_status"] = "pending_execution" + + threads_status.append(status) + + # 우선순위 순으로 정렬 + threads_status.sort(key=lambda x: x.get("priority", 0), reverse=True) + + return { + "total_threads": len(threads_status), + "active_threads": sum(1 for t in threads_status if t.get("is_active")), + "threads": threads_status + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/keywords") +async def list_keywords(): + """모든 키워드 조회""" + try: + cursor = app.db.keywords.find() + keywords = await cursor.to_list(None) + + # 각 키워드 정보 정리 + result = [] + for kw in keywords: + result.append({ + "keyword_id": kw.get("keyword_id"), + "keyword": kw.get("keyword"), + "interval_minutes": kw.get("interval_minutes"), + "priority": kw.get("priority"), + "is_active": kw.get("is_active"), + "last_run": kw.get("last_run").isoformat() if kw.get("last_run") else None, + "next_run": kw.get("next_run").isoformat() if kw.get("next_run") else None, + "rss_feeds": kw.get("rss_feeds", []), + "max_articles_per_run": kw.get("max_articles_per_run", 100) + }) + + return { + "total": len(result), + "keywords": result + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/keywords/{keyword_text}") +async def get_keyword(keyword_text: str): + """특정 키워드 조회""" + try: + keyword = await app.db.keywords.find_one({"keyword": keyword_text}) + if not keyword: + raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found") + + return { + "keyword_id": keyword.get("keyword_id"), + "keyword": keyword.get("keyword"), + "interval_minutes": keyword.get("interval_minutes"), + "priority": keyword.get("priority"), + "is_active": keyword.get("is_active"), + "last_run": keyword.get("last_run").isoformat() if keyword.get("last_run") else None, + "next_run": keyword.get("next_run").isoformat() if keyword.get("next_run") else None, + "rss_feeds": keyword.get("rss_feeds", []), + "max_articles_per_run": keyword.get("max_articles_per_run", 100) + } + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/keywords") +async def create_keyword(keyword_data: KeywordCreate): + """새 키워드 생성""" + try: + # 중복 체크 + existing = await app.db.keywords.find_one({"keyword": keyword_data.keyword}) + if existing: + raise HTTPException(status_code=400, detail=f"Keyword '{keyword_data.keyword}' already exists") + + # 새 키워드 생성 + keyword = Keyword( + keyword_id=str(uuid.uuid4()), + keyword=keyword_data.keyword, + interval_minutes=keyword_data.interval_minutes, + priority=keyword_data.priority, + rss_feeds=keyword_data.rss_feeds, + max_articles_per_run=keyword_data.max_articles_per_run, + is_active=keyword_data.is_active, + next_run=datetime.now() + timedelta(minutes=1), # 1분 후 첫 실행 + created_at=datetime.now(), + updated_at=datetime.now() + ) + + await app.db.keywords.insert_one(keyword.model_dump()) + + return { + "message": f"Keyword '{keyword_data.keyword}' created successfully", + "keyword_id": keyword.keyword_id, + "note": "The scheduler will automatically detect and start processing this keyword within 30 seconds" + } + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.put("/keywords/{keyword_text}") +async def update_keyword(keyword_text: str, update_data: KeywordUpdate): + """키워드 업데이트""" + try: + # 키워드 존재 확인 + existing = await app.db.keywords.find_one({"keyword": keyword_text}) + if not existing: + raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found") + + # 업데이트 데이터 준비 + update_dict = {} + if update_data.interval_minutes is not None: + update_dict["interval_minutes"] = update_data.interval_minutes + if update_data.priority is not None: + update_dict["priority"] = update_data.priority + if update_data.rss_feeds is not None: + update_dict["rss_feeds"] = update_data.rss_feeds + if update_data.max_articles_per_run is not None: + update_dict["max_articles_per_run"] = update_data.max_articles_per_run + if update_data.is_active is not None: + update_dict["is_active"] = update_data.is_active + + if update_dict: + update_dict["updated_at"] = datetime.now() + + # 만약 interval이 변경되면 next_run도 재계산 + if "interval_minutes" in update_dict: + update_dict["next_run"] = datetime.now() + timedelta(minutes=update_dict["interval_minutes"]) + + result = await app.db.keywords.update_one( + {"keyword": keyword_text}, + {"$set": update_dict} + ) + + if result.modified_count > 0: + action_note = "" + if update_data.is_active is False: + action_note = "The scheduler will stop the thread for this keyword within 30 seconds." + elif update_data.is_active is True and not existing.get("is_active"): + action_note = "The scheduler will start a new thread for this keyword within 30 seconds." + + return { + "message": f"Keyword '{keyword_text}' updated successfully", + "updated_fields": list(update_dict.keys()), + "note": action_note + } + else: + return {"message": "No changes made"} + else: + return {"message": "No update data provided"} + + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.delete("/keywords/{keyword_text}") +async def delete_keyword(keyword_text: str): + """키워드 삭제""" + try: + # 키워드 존재 확인 + existing = await app.db.keywords.find_one({"keyword": keyword_text}) + if not existing: + raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found") + + # 삭제 + result = await app.db.keywords.delete_one({"keyword": keyword_text}) + + if result.deleted_count > 0: + return { + "message": f"Keyword '{keyword_text}' deleted successfully", + "note": "The scheduler will stop the thread for this keyword within 30 seconds" + } + else: + raise HTTPException(status_code=500, detail="Failed to delete keyword") + + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/keywords/{keyword_text}/activate") +async def activate_keyword(keyword_text: str): + """키워드 활성화""" + try: + result = await app.db.keywords.update_one( + {"keyword": keyword_text}, + {"$set": {"is_active": True, "updated_at": datetime.now()}} + ) + + if result.matched_count == 0: + raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found") + + return { + "message": f"Keyword '{keyword_text}' activated", + "note": "The scheduler will start processing this keyword within 30 seconds" + } + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/keywords/{keyword_text}/deactivate") +async def deactivate_keyword(keyword_text: str): + """키워드 비활성화""" + try: + result = await app.db.keywords.update_one( + {"keyword": keyword_text}, + {"$set": {"is_active": False, "updated_at": datetime.now()}} + ) + + if result.matched_count == 0: + raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found") + + return { + "message": f"Keyword '{keyword_text}' deactivated", + "note": "The scheduler will stop processing this keyword within 30 seconds" + } + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/keywords/{keyword_text}/trigger") +async def trigger_keyword(keyword_text: str): + """키워드 즉시 실행 트리거""" + try: + # next_run을 현재 시간으로 설정하여 즉시 실행되도록 함 + result = await app.db.keywords.update_one( + {"keyword": keyword_text}, + {"$set": {"next_run": datetime.now(), "updated_at": datetime.now()}} + ) + + if result.matched_count == 0: + raise HTTPException(status_code=404, detail=f"Keyword '{keyword_text}' not found") + + return { + "message": f"Keyword '{keyword_text}' triggered for immediate execution", + "note": "The scheduler will execute this keyword within the next minute" + } + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +if __name__ == "__main__": + port = int(os.getenv("API_PORT", "8100")) + uvicorn.run(app, host="0.0.0.0", port=port) \ No newline at end of file diff --git a/services/pipeline/scheduler/keyword_scheduler.py b/services/pipeline/scheduler/keyword_scheduler.py new file mode 100644 index 0000000..2b7ce03 --- /dev/null +++ b/services/pipeline/scheduler/keyword_scheduler.py @@ -0,0 +1,245 @@ +""" +Keyword Scheduler Service +데이터베이스에 등록된 키워드를 주기적으로 실행하는 스케줄러 +""" +import asyncio +import logging +import os +import sys +from datetime import datetime, timedelta +from motor.motor_asyncio import AsyncIOMotorClient +from typing import List, Optional +import uuid + +# Import from shared module +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from shared.models import Keyword, PipelineJob +from shared.queue_manager import QueueManager + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class KeywordScheduler: + def __init__(self): + self.queue_manager = QueueManager( + redis_url=os.getenv("REDIS_URL", "redis://redis:6379") + ) + self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") + self.db_name = os.getenv("DB_NAME", "ai_writer_db") + self.db = None + self.check_interval = int(os.getenv("SCHEDULER_CHECK_INTERVAL", "60")) # 1분마다 체크 + self.default_interval = int(os.getenv("DEFAULT_KEYWORD_INTERVAL", "60")) # 기본 1시간 + + async def start(self): + """스케줄러 시작""" + logger.info("Starting Keyword Scheduler") + + # Redis 연결 + await self.queue_manager.connect() + + # MongoDB 연결 + client = AsyncIOMotorClient(self.mongodb_url) + self.db = client[self.db_name] + + # 초기 키워드 설정 + await self.initialize_keywords() + + # 메인 루프 + while True: + try: + await self.check_and_execute_keywords() + await asyncio.sleep(self.check_interval) + except Exception as e: + logger.error(f"Error in scheduler loop: {e}") + await asyncio.sleep(10) + + async def initialize_keywords(self): + """초기 키워드 설정 (없으면 생성)""" + try: + # keywords 컬렉션 확인 + count = await self.db.keywords.count_documents({}) + + if count == 0: + logger.info("No keywords found. Creating default keywords...") + + # 기본 키워드 생성 + default_keywords = [ + { + "keyword": "AI", + "interval_minutes": 60, + "is_active": True, + "priority": 1, + "rss_feeds": [] + }, + { + "keyword": "경제", + "interval_minutes": 120, + "is_active": True, + "priority": 0, + "rss_feeds": [] + }, + { + "keyword": "테크놀로지", + "interval_minutes": 60, + "is_active": True, + "priority": 1, + "rss_feeds": [] + } + ] + + for kw_data in default_keywords: + keyword = Keyword(**kw_data) + # 다음 실행 시간 설정 + keyword.next_run = datetime.now() + timedelta(minutes=5) # 5분 후 첫 실행 + await self.db.keywords.insert_one(keyword.dict()) + logger.info(f"Created keyword: {keyword.keyword}") + + logger.info(f"Found {count} keywords in database") + + except Exception as e: + logger.error(f"Error initializing keywords: {e}") + + async def check_and_execute_keywords(self): + """실행할 키워드 체크 및 실행""" + try: + # 현재 시간 + now = datetime.now() + + # 실행할 키워드 조회 (활성화되고 next_run이 현재 시간 이전인 것) + query = { + "is_active": True, + "$or": [ + {"next_run": {"$lte": now}}, + {"next_run": None} # next_run이 설정되지 않은 경우 + ] + } + + # 우선순위 순으로 정렬 + cursor = self.db.keywords.find(query).sort("priority", -1) + keywords = await cursor.to_list(None) + + for keyword_data in keywords: + keyword = Keyword(**keyword_data) + await self.execute_keyword(keyword) + + except Exception as e: + logger.error(f"Error checking keywords: {e}") + + async def execute_keyword(self, keyword: Keyword): + """키워드 실행""" + try: + logger.info(f"Executing keyword: {keyword.keyword}") + + # PipelineJob 생성 + job = PipelineJob( + keyword_id=keyword.keyword_id, + keyword=keyword.keyword, + stage='rss_collection', + data={ + 'rss_feeds': keyword.rss_feeds if keyword.rss_feeds else [], + 'max_articles': keyword.max_articles_per_run, + 'scheduled': True + }, + priority=keyword.priority + ) + + # 큐에 작업 추가 + await self.queue_manager.enqueue('rss_collection', job) + logger.info(f"Enqueued job for keyword '{keyword.keyword}' with job_id: {job.job_id}") + + # 키워드 업데이트 + update_data = { + "last_run": datetime.now(), + "next_run": datetime.now() + timedelta(minutes=keyword.interval_minutes), + "updated_at": datetime.now() + } + + await self.db.keywords.update_one( + {"keyword_id": keyword.keyword_id}, + {"$set": update_data} + ) + + logger.info(f"Updated keyword '{keyword.keyword}' - next run at {update_data['next_run']}") + + except Exception as e: + logger.error(f"Error executing keyword {keyword.keyword}: {e}") + + async def add_keyword(self, keyword_text: str, interval_minutes: int = None, + rss_feeds: List[str] = None, priority: int = 0): + """새 키워드 추가""" + try: + # 중복 체크 + existing = await self.db.keywords.find_one({"keyword": keyword_text}) + if existing: + logger.warning(f"Keyword '{keyword_text}' already exists") + return None + + # 새 키워드 생성 + keyword = Keyword( + keyword=keyword_text, + interval_minutes=interval_minutes or self.default_interval, + rss_feeds=rss_feeds or [], + priority=priority, + next_run=datetime.now() + timedelta(minutes=1) # 1분 후 첫 실행 + ) + + result = await self.db.keywords.insert_one(keyword.dict()) + logger.info(f"Added new keyword: {keyword_text}") + return keyword + + except Exception as e: + logger.error(f"Error adding keyword: {e}") + return None + + async def update_keyword(self, keyword_id: str, **kwargs): + """키워드 업데이트""" + try: + # 업데이트할 필드 + update_data = {k: v for k, v in kwargs.items() if v is not None} + update_data["updated_at"] = datetime.now() + + result = await self.db.keywords.update_one( + {"keyword_id": keyword_id}, + {"$set": update_data} + ) + + if result.modified_count > 0: + logger.info(f"Updated keyword {keyword_id}") + return True + return False + + except Exception as e: + logger.error(f"Error updating keyword: {e}") + return False + + async def delete_keyword(self, keyword_id: str): + """키워드 삭제""" + try: + result = await self.db.keywords.delete_one({"keyword_id": keyword_id}) + if result.deleted_count > 0: + logger.info(f"Deleted keyword {keyword_id}") + return True + return False + + except Exception as e: + logger.error(f"Error deleting keyword: {e}") + return False + + async def stop(self): + """스케줄러 중지""" + await self.queue_manager.disconnect() + logger.info("Keyword Scheduler stopped") + +async def main(): + """메인 함수""" + scheduler = KeywordScheduler() + + try: + await scheduler.start() + except KeyboardInterrupt: + logger.info("Received interrupt signal") + finally: + await scheduler.stop() + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/services/pipeline/scheduler/multi_thread_scheduler.py b/services/pipeline/scheduler/multi_thread_scheduler.py new file mode 100644 index 0000000..2aa17ca --- /dev/null +++ b/services/pipeline/scheduler/multi_thread_scheduler.py @@ -0,0 +1,361 @@ +""" +Multi-threaded Keyword Scheduler Service +하나의 프로세스에서 여러 스레드로 키워드를 관리하는 스케줄러 +""" +import asyncio +import logging +import os +import sys +from datetime import datetime, timedelta +from motor.motor_asyncio import AsyncIOMotorClient +from typing import Dict +import threading +import time + +# Import from shared module +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from shared.models import Keyword, PipelineJob +from shared.queue_manager import QueueManager + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# 전역 변수로 스케줄러 인스턴스 참조 저장 +scheduler_instance = None + +class KeywordThread(threading.Thread): + """개별 키워드를 관리하는 스레드""" + + def __init__(self, keyword_text: str, mongodb_url: str, db_name: str, redis_url: str): + super().__init__(name=f"Thread-{keyword_text}") + self.keyword_text = keyword_text + self.mongodb_url = mongodb_url + self.db_name = db_name + self.redis_url = redis_url + self.running = True + self.keyword = None + self.status = "initializing" + self.last_execution = None + self.execution_count = 0 + self.error_count = 0 + self.last_error = None + + def run(self): + """스레드 실행""" + # 새로운 이벤트 루프 생성 + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + try: + loop.run_until_complete(self.run_scheduler()) + finally: + loop.close() + + async def run_scheduler(self): + """비동기 스케줄러 실행""" + # Redis 연결 + self.queue_manager = QueueManager(redis_url=self.redis_url) + await self.queue_manager.connect() + + # MongoDB 연결 + client = AsyncIOMotorClient(self.mongodb_url) + self.db = client[self.db_name] + + logger.info(f"[{self.keyword_text}] Thread started") + + # 키워드 로드 + await self.load_keyword() + + if not self.keyword: + logger.error(f"[{self.keyword_text}] Failed to load keyword") + return + + # 메인 루프 + while self.running: + try: + # 키워드 상태 체크 + await self.reload_keyword() + + if not self.keyword.is_active: + self.status = "inactive" + logger.info(f"[{self.keyword_text}] Keyword is inactive, sleeping...") + await asyncio.sleep(60) + continue + + # 실행 시간 체크 + now = datetime.now() + if self.keyword.next_run and self.keyword.next_run <= now: + self.status = "executing" + await self.execute_keyword() + # 다음 실행 시간까지 대기 + sleep_seconds = self.keyword.interval_minutes * 60 + self.status = "waiting" + else: + # 다음 체크까지 1분 대기 + sleep_seconds = 60 + self.status = "waiting" + + await asyncio.sleep(sleep_seconds) + + except Exception as e: + self.error_count += 1 + self.last_error = str(e) + self.status = "error" + logger.error(f"[{self.keyword_text}] Error in thread loop: {e}") + await asyncio.sleep(60) + + await self.queue_manager.disconnect() + logger.info(f"[{self.keyword_text}] Thread stopped") + + async def load_keyword(self): + """키워드 초기 로드""" + try: + keyword_doc = await self.db.keywords.find_one({"keyword": self.keyword_text}) + if keyword_doc: + self.keyword = Keyword(**keyword_doc) + logger.info(f"[{self.keyword_text}] Loaded keyword") + except Exception as e: + logger.error(f"[{self.keyword_text}] Error loading keyword: {e}") + + async def reload_keyword(self): + """키워드 정보 재로드""" + try: + keyword_doc = await self.db.keywords.find_one({"keyword": self.keyword_text}) + if keyword_doc: + self.keyword = Keyword(**keyword_doc) + except Exception as e: + logger.error(f"[{self.keyword_text}] Error reloading keyword: {e}") + + async def execute_keyword(self): + """키워드 실행""" + try: + logger.info(f"[{self.keyword_text}] Executing keyword") + + # PipelineJob 생성 + job = PipelineJob( + keyword_id=self.keyword.keyword_id, + keyword=self.keyword.keyword, + stage='rss_collection', + data={ + 'rss_feeds': self.keyword.rss_feeds if self.keyword.rss_feeds else [], + 'max_articles': self.keyword.max_articles_per_run, + 'scheduled': True, + 'thread_name': self.name + }, + priority=self.keyword.priority + ) + + # 큐에 작업 추가 + await self.queue_manager.enqueue('rss_collection', job) + logger.info(f"[{self.keyword_text}] Enqueued job {job.job_id}") + + # 키워드 업데이트 + update_data = { + "last_run": datetime.now(), + "next_run": datetime.now() + timedelta(minutes=self.keyword.interval_minutes), + "updated_at": datetime.now() + } + + await self.db.keywords.update_one( + {"keyword_id": self.keyword.keyword_id}, + {"$set": update_data} + ) + + self.last_execution = datetime.now() + self.execution_count += 1 + logger.info(f"[{self.keyword_text}] Next run at {update_data['next_run']}") + + except Exception as e: + self.error_count += 1 + self.last_error = str(e) + logger.error(f"[{self.keyword_text}] Error executing keyword: {e}") + + def stop(self): + """스레드 중지""" + self.running = False + self.status = "stopped" + + def get_status(self): + """스레드 상태 반환""" + return { + "keyword": self.keyword_text, + "thread_name": self.name, + "status": self.status, + "is_alive": self.is_alive(), + "execution_count": self.execution_count, + "last_execution": self.last_execution.isoformat() if self.last_execution else None, + "error_count": self.error_count, + "last_error": self.last_error, + "next_run": self.keyword.next_run.isoformat() if self.keyword and self.keyword.next_run else None + } + + +class MultiThreadScheduler: + """멀티스레드 키워드 스케줄러""" + + def __init__(self): + self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") + self.db_name = os.getenv("DB_NAME", "ai_writer_db") + self.redis_url = os.getenv("REDIS_URL", "redis://redis:6379") + self.threads: Dict[str, KeywordThread] = {} + self.running = True + # Singleton 인스턴스를 전역 변수로 저장 + global scheduler_instance + scheduler_instance = self + + async def start(self): + """스케줄러 시작""" + logger.info("Starting Multi-threaded Keyword Scheduler") + + # MongoDB 연결 + client = AsyncIOMotorClient(self.mongodb_url) + self.db = client[self.db_name] + + # 초기 키워드 설정 + await self.initialize_keywords() + + # 키워드 로드 및 스레드 시작 + await self.load_and_start_threads() + + # 메인 루프 - 새로운 키워드 체크 + while self.running: + try: + await self.check_new_keywords() + await asyncio.sleep(30) # 30초마다 새 키워드 체크 + except Exception as e: + logger.error(f"Error in main loop: {e}") + await asyncio.sleep(30) + + async def initialize_keywords(self): + """초기 키워드 설정 (없으면 생성)""" + try: + count = await self.db.keywords.count_documents({}) + + if count == 0: + logger.info("No keywords found. Creating default keywords...") + + default_keywords = [ + { + "keyword": "AI", + "interval_minutes": 60, + "is_active": True, + "priority": 1, + "rss_feeds": [], + "next_run": datetime.now() + timedelta(minutes=1) + }, + { + "keyword": "경제", + "interval_minutes": 120, + "is_active": True, + "priority": 0, + "rss_feeds": [], + "next_run": datetime.now() + timedelta(minutes=1) + }, + { + "keyword": "테크놀로지", + "interval_minutes": 60, + "is_active": True, + "priority": 1, + "rss_feeds": [], + "next_run": datetime.now() + timedelta(minutes=1) + } + ] + + for kw_data in default_keywords: + keyword = Keyword(**kw_data) + await self.db.keywords.insert_one(keyword.model_dump()) + logger.info(f"Created keyword: {keyword.keyword}") + + logger.info(f"Found {count} keywords in database") + + except Exception as e: + logger.error(f"Error initializing keywords: {e}") + + async def load_and_start_threads(self): + """키워드 로드 및 스레드 시작""" + try: + # 활성 키워드 조회 + cursor = self.db.keywords.find({"is_active": True}) + keywords = await cursor.to_list(None) + + for keyword_doc in keywords: + keyword = Keyword(**keyword_doc) + if keyword.keyword not in self.threads: + self.start_keyword_thread(keyword.keyword) + + logger.info(f"Started {len(self.threads)} keyword threads") + + except Exception as e: + logger.error(f"Error loading keywords: {e}") + + def start_keyword_thread(self, keyword_text: str): + """키워드 스레드 시작""" + if keyword_text not in self.threads: + thread = KeywordThread( + keyword_text=keyword_text, + mongodb_url=self.mongodb_url, + db_name=self.db_name, + redis_url=self.redis_url + ) + thread.start() + self.threads[keyword_text] = thread + logger.info(f"Started thread for keyword: {keyword_text}") + + async def check_new_keywords(self): + """새로운 키워드 체크 및 스레드 관리""" + try: + # 현재 활성 키워드 조회 + cursor = self.db.keywords.find({"is_active": True}) + active_keywords = await cursor.to_list(None) + active_keyword_texts = {kw['keyword'] for kw in active_keywords} + + # 새 키워드 시작 + for keyword_text in active_keyword_texts: + if keyword_text not in self.threads: + self.start_keyword_thread(keyword_text) + + # 비활성화된 키워드 스레드 중지 + for keyword_text in list(self.threads.keys()): + if keyword_text not in active_keyword_texts: + thread = self.threads[keyword_text] + thread.stop() + del self.threads[keyword_text] + logger.info(f"Stopped thread for keyword: {keyword_text}") + + except Exception as e: + logger.error(f"Error checking new keywords: {e}") + + def stop(self): + """모든 스레드 중지""" + self.running = False + for thread in self.threads.values(): + thread.stop() + + # 모든 스레드가 종료될 때까지 대기 + for thread in self.threads.values(): + thread.join(timeout=5) + + logger.info("Multi-threaded Keyword Scheduler stopped") + + def get_threads_status(self): + """모든 스레드 상태 반환""" + status_list = [] + for thread in self.threads.values(): + status_list.append(thread.get_status()) + return status_list + + +async def main(): + """메인 함수""" + scheduler = MultiThreadScheduler() + + try: + await scheduler.start() + except KeyboardInterrupt: + logger.info("Received interrupt signal") + finally: + scheduler.stop() + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/services/pipeline/scheduler/requirements.txt b/services/pipeline/scheduler/requirements.txt new file mode 100644 index 0000000..f7e3e79 --- /dev/null +++ b/services/pipeline/scheduler/requirements.txt @@ -0,0 +1,5 @@ +motor==3.6.0 +redis[hiredis]==5.0.1 +pydantic==2.5.0 +fastapi==0.104.1 +uvicorn==0.24.0 diff --git a/services/pipeline/scheduler/scheduler.py b/services/pipeline/scheduler/scheduler.py new file mode 100644 index 0000000..15c96f5 --- /dev/null +++ b/services/pipeline/scheduler/scheduler.py @@ -0,0 +1,203 @@ +""" +News Pipeline Scheduler +뉴스 파이프라인 스케줄러 서비스 +""" +import asyncio +import logging +import os +import sys +from datetime import datetime, timedelta +from apscheduler.schedulers.asyncio import AsyncIOScheduler +from motor.motor_asyncio import AsyncIOMotorClient + +# Import from shared module +from shared.models import KeywordSubscription, PipelineJob +from shared.queue_manager import QueueManager + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class NewsScheduler: + def __init__(self): + self.scheduler = AsyncIOScheduler() + self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") + self.db_name = os.getenv("DB_NAME", "ai_writer_db") + self.db = None + self.queue_manager = QueueManager( + redis_url=os.getenv("REDIS_URL", "redis://redis:6379") + ) + + async def start(self): + """스케줄러 시작""" + logger.info("Starting News Pipeline Scheduler") + + # MongoDB 연결 + client = AsyncIOMotorClient(self.mongodb_url) + self.db = client[self.db_name] + + # Redis 연결 + await self.queue_manager.connect() + + # 기본 스케줄 설정 + # 매 30분마다 실행 + self.scheduler.add_job( + self.process_keywords, + 'interval', + minutes=30, + id='keyword_processor', + name='Process Active Keywords' + ) + + # 특정 시간대 강화 스케줄 (아침 7시, 점심 12시, 저녁 6시) + for hour in [7, 12, 18]: + self.scheduler.add_job( + self.process_priority_keywords, + 'cron', + hour=hour, + minute=0, + id=f'priority_processor_{hour}', + name=f'Process Priority Keywords at {hour}:00' + ) + + # 매일 자정 통계 초기화 + self.scheduler.add_job( + self.reset_daily_stats, + 'cron', + hour=0, + minute=0, + id='stats_reset', + name='Reset Daily Statistics' + ) + + self.scheduler.start() + logger.info("Scheduler started successfully") + + # 시작 즉시 한 번 실행 + await self.process_keywords() + + async def process_keywords(self): + """활성 키워드 처리""" + try: + logger.info("Processing active keywords") + + # MongoDB에서 활성 키워드 로드 + now = datetime.now() + thirty_minutes_ago = now - timedelta(minutes=30) + + keywords = await self.db.keywords.find({ + "is_active": True, + "$or": [ + {"last_processed": {"$lt": thirty_minutes_ago}}, + {"last_processed": None} + ] + }).to_list(None) + + logger.info(f"Found {len(keywords)} keywords to process") + + for keyword_doc in keywords: + await self._create_job(keyword_doc) + + # 처리 시간 업데이트 + await self.db.keywords.update_one( + {"keyword_id": keyword_doc['keyword_id']}, + {"$set": {"last_processed": now}} + ) + + logger.info(f"Created jobs for {len(keywords)} keywords") + + except Exception as e: + logger.error(f"Error processing keywords: {e}") + + async def process_priority_keywords(self): + """우선순위 키워드 처리""" + try: + logger.info("Processing priority keywords") + + keywords = await self.db.keywords.find({ + "is_active": True, + "is_priority": True + }).to_list(None) + + for keyword_doc in keywords: + await self._create_job(keyword_doc, priority=1) + + logger.info(f"Created priority jobs for {len(keywords)} keywords") + + except Exception as e: + logger.error(f"Error processing priority keywords: {e}") + + async def _create_job(self, keyword_doc: dict, priority: int = 0): + """파이프라인 작업 생성""" + try: + # KeywordSubscription 모델로 변환 + keyword = KeywordSubscription(**keyword_doc) + + # PipelineJob 생성 + job = PipelineJob( + keyword_id=keyword.keyword_id, + keyword=keyword.keyword, + stage='rss_collection', + stages_completed=[], + priority=priority, + data={ + 'keyword': keyword.keyword, + 'language': keyword.language, + 'rss_feeds': keyword.rss_feeds or self._get_default_rss_feeds(), + 'categories': keyword.categories + } + ) + + # 첫 번째 큐에 추가 + await self.queue_manager.enqueue( + 'rss_collection', + job, + priority=priority + ) + + logger.info(f"Created job {job.job_id} for keyword '{keyword.keyword}'") + + except Exception as e: + logger.error(f"Error creating job for keyword: {e}") + + def _get_default_rss_feeds(self) -> list: + """기본 RSS 피드 목록""" + return [ + "https://news.google.com/rss/search?q={keyword}&hl=ko&gl=KR&ceid=KR:ko", + "https://trends.google.com/trends/trendingsearches/daily/rss?geo=KR", + "https://www.mk.co.kr/rss/40300001/", # 매일경제 + "https://www.hankyung.com/feed/all-news", # 한국경제 + "https://www.zdnet.co.kr/news/news_rss.xml", # ZDNet Korea + ] + + async def reset_daily_stats(self): + """일일 통계 초기화""" + try: + logger.info("Resetting daily statistics") + # Redis 통계 초기화 + # 구현 필요 + pass + except Exception as e: + logger.error(f"Error resetting stats: {e}") + + async def stop(self): + """스케줄러 중지""" + self.scheduler.shutdown() + await self.queue_manager.disconnect() + logger.info("Scheduler stopped") + +async def main(): + """메인 함수""" + scheduler = NewsScheduler() + + try: + await scheduler.start() + # 계속 실행 + while True: + await asyncio.sleep(60) + except KeyboardInterrupt: + logger.info("Received interrupt signal") + finally: + await scheduler.stop() + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/services/pipeline/scheduler/single_keyword_scheduler.py b/services/pipeline/scheduler/single_keyword_scheduler.py new file mode 100644 index 0000000..385bbd2 --- /dev/null +++ b/services/pipeline/scheduler/single_keyword_scheduler.py @@ -0,0 +1,173 @@ +""" +Single Keyword Scheduler Service +단일 키워드를 전담하는 스케줄러 +""" +import asyncio +import logging +import os +import sys +from datetime import datetime, timedelta +from motor.motor_asyncio import AsyncIOMotorClient +import uuid + +# Import from shared module +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from shared.models import Keyword, PipelineJob +from shared.queue_manager import QueueManager + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class SingleKeywordScheduler: + def __init__(self): + self.queue_manager = QueueManager( + redis_url=os.getenv("REDIS_URL", "redis://redis:6379") + ) + self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") + self.db_name = os.getenv("DB_NAME", "ai_writer_db") + self.keyword_text = os.getenv("KEYWORD") # 환경변수로 키워드 지정 + self.interval_minutes = int(os.getenv("INTERVAL_MINUTES", "60")) + self.db = None + self.keyword = None + + async def start(self): + """스케줄러 시작""" + if not self.keyword_text: + logger.error("KEYWORD environment variable is required") + return + + logger.info(f"Starting Single Keyword Scheduler for '{self.keyword_text}'") + + # Redis 연결 + await self.queue_manager.connect() + + # MongoDB 연결 + client = AsyncIOMotorClient(self.mongodb_url) + self.db = client[self.db_name] + + # 키워드 초기화 또는 로드 + await self.initialize_keyword() + + if not self.keyword: + logger.error(f"Failed to initialize keyword '{self.keyword_text}'") + return + + # 메인 루프 - 이 키워드만 처리 + while True: + try: + await self.check_and_execute() + # 다음 실행까지 대기 + sleep_seconds = self.keyword.interval_minutes * 60 + logger.info(f"Sleeping for {self.keyword.interval_minutes} minutes until next execution") + await asyncio.sleep(sleep_seconds) + except Exception as e: + logger.error(f"Error in scheduler loop: {e}") + await asyncio.sleep(60) # 에러 발생시 1분 후 재시도 + + async def initialize_keyword(self): + """키워드 초기화 또는 로드""" + try: + # 기존 키워드 찾기 + keyword_doc = await self.db.keywords.find_one({"keyword": self.keyword_text}) + + if keyword_doc: + self.keyword = Keyword(**keyword_doc) + logger.info(f"Loaded existing keyword: {self.keyword_text}") + else: + # 새 키워드 생성 + self.keyword = Keyword( + keyword=self.keyword_text, + interval_minutes=self.interval_minutes, + is_active=True, + priority=int(os.getenv("PRIORITY", "0")), + rss_feeds=os.getenv("RSS_FEEDS", "").split(",") if os.getenv("RSS_FEEDS") else [], + max_articles_per_run=int(os.getenv("MAX_ARTICLES", "100")) + ) + + await self.db.keywords.insert_one(self.keyword.model_dump()) + logger.info(f"Created new keyword: {self.keyword_text}") + + except Exception as e: + logger.error(f"Error initializing keyword: {e}") + + async def check_and_execute(self): + """키워드 실행 체크 및 실행""" + try: + # 최신 키워드 정보 다시 로드 + keyword_doc = await self.db.keywords.find_one({"keyword": self.keyword_text}) + + if not keyword_doc: + logger.error(f"Keyword '{self.keyword_text}' not found in database") + return + + self.keyword = Keyword(**keyword_doc) + + # 비활성화된 경우 스킵 + if not self.keyword.is_active: + logger.info(f"Keyword '{self.keyword_text}' is inactive, skipping") + return + + # 실행 + await self.execute_keyword() + + except Exception as e: + logger.error(f"Error checking keyword: {e}") + + async def execute_keyword(self): + """키워드 실행""" + try: + logger.info(f"Executing keyword: {self.keyword.keyword}") + + # PipelineJob 생성 + job = PipelineJob( + keyword_id=self.keyword.keyword_id, + keyword=self.keyword.keyword, + stage='rss_collection', + data={ + 'rss_feeds': self.keyword.rss_feeds if self.keyword.rss_feeds else [], + 'max_articles': self.keyword.max_articles_per_run, + 'scheduled': True, + 'scheduler_instance': f"single-{self.keyword_text}" + }, + priority=self.keyword.priority + ) + + # 큐에 작업 추가 + await self.queue_manager.enqueue('rss_collection', job) + logger.info(f"Enqueued job for keyword '{self.keyword.keyword}' with job_id: {job.job_id}") + + # 키워드 업데이트 + update_data = { + "last_run": datetime.now(), + "next_run": datetime.now() + timedelta(minutes=self.keyword.interval_minutes), + "updated_at": datetime.now() + } + + await self.db.keywords.update_one( + {"keyword_id": self.keyword.keyword_id}, + {"$set": update_data} + ) + + logger.info(f"Updated keyword '{self.keyword.keyword}' - next run at {update_data['next_run']}") + + except Exception as e: + logger.error(f"Error executing keyword {self.keyword.keyword}: {e}") + + async def stop(self): + """스케줄러 중지""" + await self.queue_manager.disconnect() + logger.info(f"Single Keyword Scheduler for '{self.keyword_text}' stopped") + +async def main(): + """메인 함수""" + scheduler = SingleKeywordScheduler() + + try: + await scheduler.start() + except KeyboardInterrupt: + logger.info("Received interrupt signal") + finally: + await scheduler.stop() + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/services/pipeline/shared/__init__.py b/services/pipeline/shared/__init__.py new file mode 100644 index 0000000..5f6ffd9 --- /dev/null +++ b/services/pipeline/shared/__init__.py @@ -0,0 +1 @@ +# Shared modules for pipeline services \ No newline at end of file diff --git a/services/pipeline/shared/models.py b/services/pipeline/shared/models.py new file mode 100644 index 0000000..34abbcd --- /dev/null +++ b/services/pipeline/shared/models.py @@ -0,0 +1,159 @@ +""" +Pipeline Data Models +파이프라인 전체에서 사용되는 공통 데이터 모델 +""" +from datetime import datetime +from typing import List, Dict, Any, Optional +from pydantic import BaseModel, Field +import uuid + +class KeywordSubscription(BaseModel): + """키워드 구독 모델""" + keyword_id: str = Field(default_factory=lambda: str(uuid.uuid4())) + keyword: str + language: str = "ko" + schedule: str = "0 */30 * * *" # Cron expression (30분마다) + is_active: bool = True + is_priority: bool = False + last_processed: Optional[datetime] = None + rss_feeds: List[str] = Field(default_factory=list) + categories: List[str] = Field(default_factory=list) + created_at: datetime = Field(default_factory=datetime.now) + owner: Optional[str] = None + +class PipelineJob(BaseModel): + """파이프라인 작업 모델""" + job_id: str = Field(default_factory=lambda: str(uuid.uuid4())) + keyword_id: str + keyword: str + stage: str # current stage + stages_completed: List[str] = Field(default_factory=list) + data: Dict[str, Any] = Field(default_factory=dict) + retry_count: int = 0 + max_retries: int = 3 + priority: int = 0 + created_at: datetime = Field(default_factory=datetime.now) + updated_at: datetime = Field(default_factory=datetime.now) + +class RSSItem(BaseModel): + """RSS 피드 아이템""" + item_id: str = Field(default_factory=lambda: str(uuid.uuid4())) + title: str + link: str + guid: Optional[str] = None # RSS GUID for deduplication + published: Optional[str] = None + summary: Optional[str] = None + source_feed: str + +class SearchResult(BaseModel): + """검색 결과""" + title: str + link: str + snippet: Optional[str] = None + source: str = "google" + +class EnrichedItem(BaseModel): + """강화된 뉴스 아이템""" + rss_item: RSSItem + search_results: List[SearchResult] = Field(default_factory=list) + +class SummarizedItem(BaseModel): + """요약된 아이템""" + enriched_item: EnrichedItem + ai_summary: str + summary_language: str = "ko" + +class TranslatedItem(BaseModel): + """번역된 아이템""" + summarized_item: SummarizedItem + title_en: str + summary_en: str + +class ItemWithImage(BaseModel): + """이미지가 추가된 아이템""" + translated_item: TranslatedItem + image_url: str + image_prompt: str + +class Subtopic(BaseModel): + """기사 소주제""" + title: str + content: List[str] # 문단별 내용 + +class Entities(BaseModel): + """개체명""" + people: List[str] = Field(default_factory=list) + organizations: List[str] = Field(default_factory=list) + groups: List[str] = Field(default_factory=list) + countries: List[str] = Field(default_factory=list) + events: List[str] = Field(default_factory=list) + +class NewsReference(BaseModel): + """뉴스 레퍼런스""" + title: str + link: str + source: str + published: Optional[str] = None + +class FinalArticle(BaseModel): + """최종 기사 - ai_writer_db.articles 스키마와 일치""" + news_id: str = Field(default_factory=lambda: str(uuid.uuid4())) + title: str + created_at: str = Field(default_factory=lambda: datetime.now().isoformat()) + summary: str + subtopics: List[Subtopic] = Field(default_factory=list) + categories: List[str] = Field(default_factory=list) + entities: Entities = Field(default_factory=Entities) + source_keyword: str + source_count: int = 1 + # 레퍼런스 뉴스 정보 + references: List[NewsReference] = Field(default_factory=list) + # 파이프라인 관련 추가 필드 + job_id: Optional[str] = None + keyword_id: Optional[str] = None + pipeline_stages: List[str] = Field(default_factory=list) + processing_time: Optional[float] = None + # 다국어 지원 + language: str = 'ko' + ref_news_id: Optional[str] = None + # RSS 중복 체크용 GUID + rss_guid: Optional[str] = None + # 이미지 관련 필드 + image_prompt: Optional[str] = None + images: List[str] = Field(default_factory=list) + # 번역 추적 + translated_languages: List[str] = Field(default_factory=list) + +class TranslatedItem(BaseModel): + """번역된 아이템""" + summarized_item: Dict[str, Any] # SummarizedItem as dict + translated_title: str + translated_summary: str + target_language: str = 'en' + +class GeneratedImageItem(BaseModel): + """이미지 생성된 아이템""" + translated_item: Dict[str, Any] # TranslatedItem as dict + image_url: str + image_prompt: str + +class QueueMessage(BaseModel): + """큐 메시지""" + message_id: str = Field(default_factory=lambda: str(uuid.uuid4())) + queue_name: str + job: PipelineJob + timestamp: datetime = Field(default_factory=datetime.now) + retry_count: int = 0 +class Keyword(BaseModel): + """스케줄러용 키워드 모델""" + keyword_id: str = Field(default_factory=lambda: str(uuid.uuid4())) + keyword: str + interval_minutes: int = Field(default=60) # 기본 1시간 + is_active: bool = Field(default=True) + last_run: Optional[datetime] = None + next_run: Optional[datetime] = None + created_at: datetime = Field(default_factory=datetime.now) + updated_at: datetime = Field(default_factory=datetime.now) + rss_feeds: List[str] = Field(default_factory=list) # 커스텀 RSS 피드 + priority: int = Field(default=0) # 우선순위 (높을수록 우선) + max_articles_per_run: int = Field(default=100) # 실행당 최대 기사 수 diff --git a/services/pipeline/shared/queue_manager.py b/services/pipeline/shared/queue_manager.py new file mode 100644 index 0000000..063421e --- /dev/null +++ b/services/pipeline/shared/queue_manager.py @@ -0,0 +1,176 @@ +""" +Queue Manager +Redis 기반 큐 관리 시스템 +""" +import redis.asyncio as redis +import json +import logging +from typing import Optional, Dict, Any, List +from datetime import datetime + +from .models import PipelineJob, QueueMessage + +logger = logging.getLogger(__name__) + +class QueueManager: + """Redis 기반 큐 매니저""" + + QUEUES = { + "keyword_processing": "queue:keyword_processing", + "rss_collection": "queue:rss_collection", + "search_enrichment": "queue:search_enrichment", + "google_search": "queue:google_search", + "ai_article_generation": "queue:ai_article_generation", + "image_generation": "queue:image_generation", + "translation": "queue:translation", + "failed": "queue:failed", + "scheduled": "queue:scheduled" + } + + def __init__(self, redis_url: str = "redis://redis:6379"): + self.redis_url = redis_url + self.redis_client: Optional[redis.Redis] = None + + async def connect(self): + """Redis 연결""" + if not self.redis_client: + self.redis_client = await redis.from_url( + self.redis_url, + encoding="utf-8", + decode_responses=True + ) + logger.info("Connected to Redis") + + async def disconnect(self): + """Redis 연결 해제""" + if self.redis_client: + await self.redis_client.close() + self.redis_client = None + + async def enqueue(self, queue_name: str, job: PipelineJob, priority: int = 0) -> str: + """작업을 큐에 추가""" + try: + queue_key = self.QUEUES.get(queue_name, f"queue:{queue_name}") + + message = QueueMessage( + queue_name=queue_name, + job=job + ) + + # 우선순위에 따라 추가 + if priority > 0: + await self.redis_client.lpush(queue_key, message.json()) + else: + await self.redis_client.rpush(queue_key, message.json()) + + # 통계 업데이트 + await self.redis_client.hincrby("stats:queues", queue_name, 1) + + logger.info(f"Job {job.job_id} enqueued to {queue_name}") + return job.job_id + + except Exception as e: + logger.error(f"Failed to enqueue job: {e}") + raise + + async def dequeue(self, queue_name: str, timeout: int = 0) -> Optional[PipelineJob]: + """큐에서 작업 가져오기""" + try: + queue_key = self.QUEUES.get(queue_name, f"queue:{queue_name}") + logger.info(f"Attempting to dequeue from {queue_key} with timeout={timeout}") + + if timeout > 0: + result = await self.redis_client.blpop(queue_key, timeout) + if result: + _, data = result + logger.info(f"Dequeued item from {queue_key}") + else: + logger.debug(f"No item available in {queue_key}") + return None + else: + data = await self.redis_client.lpop(queue_key) + + if data: + message = QueueMessage.parse_raw(data) + + # 처리 중 목록에 추가 + processing_key = f"processing:{queue_name}" + await self.redis_client.hset( + processing_key, + message.job.job_id, + message.json() + ) + + return message.job + + return None + + except Exception as e: + logger.error(f"Failed to dequeue job: {e}") + return None + + async def mark_completed(self, queue_name: str, job_id: str): + """작업 완료 표시""" + try: + processing_key = f"processing:{queue_name}" + await self.redis_client.hdel(processing_key, job_id) + + # 통계 업데이트 + await self.redis_client.hincrby("stats:completed", queue_name, 1) + + logger.info(f"Job {job_id} completed in {queue_name}") + + except Exception as e: + logger.error(f"Failed to mark job as completed: {e}") + + async def mark_failed(self, queue_name: str, job: PipelineJob, error: str): + """작업 실패 처리""" + try: + processing_key = f"processing:{queue_name}" + await self.redis_client.hdel(processing_key, job.job_id) + + # 재시도 확인 + if job.retry_count < job.max_retries: + job.retry_count += 1 + await self.enqueue(queue_name, job) + logger.info(f"Job {job.job_id} requeued (retry {job.retry_count}/{job.max_retries})") + else: + # 실패 큐로 이동 + job.data["error"] = error + job.data["failed_stage"] = queue_name + await self.enqueue("failed", job) + + # 통계 업데이트 + await self.redis_client.hincrby("stats:failed", queue_name, 1) + logger.error(f"Job {job.job_id} failed: {error}") + + except Exception as e: + logger.error(f"Failed to mark job as failed: {e}") + + async def get_queue_stats(self) -> Dict[str, Any]: + """큐 통계 조회""" + try: + stats = {} + + for name, key in self.QUEUES.items(): + stats[name] = { + "pending": await self.redis_client.llen(key), + "processing": await self.redis_client.hlen(f"processing:{name}"), + } + + # 완료/실패 통계 + stats["completed"] = await self.redis_client.hgetall("stats:completed") or {} + stats["failed"] = await self.redis_client.hgetall("stats:failed") or {} + + return stats + + except Exception as e: + logger.error(f"Failed to get queue stats: {e}") + return {} + + async def clear_queue(self, queue_name: str): + """큐 초기화 (테스트용)""" + queue_key = self.QUEUES.get(queue_name, f"queue:{queue_name}") + await self.redis_client.delete(queue_key) + await self.redis_client.delete(f"processing:{queue_name}") + logger.info(f"Queue {queue_name} cleared") \ No newline at end of file diff --git a/services/pipeline/shared/requirements.txt b/services/pipeline/shared/requirements.txt new file mode 100644 index 0000000..cc100bf --- /dev/null +++ b/services/pipeline/shared/requirements.txt @@ -0,0 +1,5 @@ +redis[hiredis]==5.0.1 +motor==3.1.1 +pymongo==4.3.3 +pydantic==2.5.0 +python-dateutil==2.8.2 \ No newline at end of file diff --git a/services/pipeline/simple_test.py b/services/pipeline/simple_test.py new file mode 100644 index 0000000..d629edc --- /dev/null +++ b/services/pipeline/simple_test.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +""" +Simple pipeline test - direct queue injection +""" +import asyncio +import json +import redis.asyncio as redis +from datetime import datetime +import uuid + +async def test(): + # Redis 연결 + r = await redis.from_url("redis://redis:6379", decode_responses=True) + + # 작업 생성 + job = { + "job_id": str(uuid.uuid4()), + "keyword_id": str(uuid.uuid4()), + "keyword": "전기차", + "stage": "rss_collection", + "stages_completed": [], + "data": { + "rss_feeds": [ + "https://news.google.com/rss/search?q=전기차&hl=ko&gl=KR&ceid=KR:ko" + ], + "categories": ["technology", "automotive"] + }, + "priority": 1, + "retry_count": 0, + "max_retries": 3, + "created_at": datetime.now().isoformat(), + "updated_at": datetime.now().isoformat() + } + + # QueueMessage 형식으로 래핑 + message = { + "message_id": str(uuid.uuid4()), + "queue_name": "rss_collection", + "job": job, + "timestamp": datetime.now().isoformat() + } + + # 큐에 추가 + await r.lpush("queue:rss_collection", json.dumps(message)) + print(f"✅ Job {job['job_id']} added to queue:rss_collection") + + # 큐 상태 확인 + length = await r.llen("queue:rss_collection") + print(f"📊 Queue length: {length}") + + await r.aclose() + +if __name__ == "__main__": + asyncio.run(test()) \ No newline at end of file diff --git a/services/pipeline/test_dequeue.py b/services/pipeline/test_dequeue.py new file mode 100644 index 0000000..b8d6313 --- /dev/null +++ b/services/pipeline/test_dequeue.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +""" +Direct dequeue test +""" +import asyncio +import redis.asyncio as redis +import json + +async def test_dequeue(): + """Test dequeue directly""" + + # Connect to Redis + redis_client = await redis.from_url( + "redis://redis:6379", + encoding="utf-8", + decode_responses=True + ) + + print("Connected to Redis") + + # Check queue length + length = await redis_client.llen("queue:rss_collection") + print(f"Queue length: {length}") + + if length > 0: + # Get the first item + item = await redis_client.lrange("queue:rss_collection", 0, 0) + print(f"First item preview: {item[0][:200]}...") + + # Try blpop with timeout + print("Trying blpop with timeout=5...") + result = await redis_client.blpop("queue:rss_collection", 5) + if result: + queue, data = result + print(f"Successfully dequeued from {queue}") + print(f"Data: {data[:200]}...") + + # Parse the message + try: + message = json.loads(data) + print(f"Message ID: {message.get('message_id')}") + print(f"Queue Name: {message.get('queue_name')}") + if 'job' in message: + job = message['job'] + print(f"Job ID: {job.get('job_id')}") + print(f"Keyword: {job.get('keyword')}") + except Exception as e: + print(f"Failed to parse message: {e}") + else: + print("blpop timed out - no result") + else: + print("Queue is empty") + + await redis_client.close() + +if __name__ == "__main__": + asyncio.run(test_dequeue()) \ No newline at end of file diff --git a/services/pipeline/test_pipeline.py b/services/pipeline/test_pipeline.py new file mode 100644 index 0000000..ce893f1 --- /dev/null +++ b/services/pipeline/test_pipeline.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +""" +Pipeline Test Script +파이프라인 전체 플로우를 테스트하는 스크립트 +""" +import asyncio +import json +from datetime import datetime +from motor.motor_asyncio import AsyncIOMotorClient +import redis.asyncio as redis +from shared.models import KeywordSubscription, PipelineJob + +async def test_pipeline(): + """파이프라인 테스트""" + + # MongoDB 연결 + mongo_client = AsyncIOMotorClient("mongodb://mongodb:27017") + db = mongo_client.pipeline + + # Redis 연결 + redis_client = redis.Redis(host='redis', port=6379, decode_responses=True) + + # 1. 테스트 키워드 추가 + test_keyword = KeywordSubscription( + keyword="전기차", + language="ko", + schedule="*/1 * * * *", # 1분마다 (테스트용) + is_active=True, + is_priority=True, + rss_feeds=[ + "https://news.google.com/rss/search?q=전기차&hl=ko&gl=KR&ceid=KR:ko", + "https://news.google.com/rss/search?q=electric+vehicle&hl=en&gl=US&ceid=US:en" + ], + categories=["technology", "automotive", "environment"], + owner="test_user" + ) + + # MongoDB에 저장 + await db.keyword_subscriptions.replace_one( + {"keyword": test_keyword.keyword}, + test_keyword.dict(), + upsert=True + ) + print(f"✅ 키워드 '{test_keyword.keyword}' 추가 완료") + + # 2. 즉시 파이프라인 트리거 (스케줄러를 거치지 않고 직접) + job = PipelineJob( + keyword_id=test_keyword.keyword_id, + keyword=test_keyword.keyword, + stage="rss_collection", + data={ + "rss_feeds": test_keyword.rss_feeds, + "categories": test_keyword.categories + }, + priority=1 if test_keyword.is_priority else 0 + ) + + # Redis 큐에 직접 추가 (QueueMessage 형식으로) + from shared.queue_manager import QueueMessage + message = QueueMessage( + queue_name="rss_collection", + job=job + ) + await redis_client.lpush("queue:rss_collection", message.json()) + print(f"✅ 작업을 RSS Collection 큐에 추가: {job.job_id}") + + # 3. 파이프라인 상태 모니터링 + print("\n📊 파이프라인 실행 모니터링 중...") + print("각 단계별 로그를 확인하려면 다음 명령을 실행하세요:") + print(" docker-compose logs -f pipeline-rss-collector") + print(" docker-compose logs -f pipeline-google-search") + print(" docker-compose logs -f pipeline-ai-summarizer") + print(" docker-compose logs -f pipeline-translator") + print(" docker-compose logs -f pipeline-image-generator") + print(" docker-compose logs -f pipeline-article-assembly") + + # 큐 상태 확인 + for i in range(10): + await asyncio.sleep(5) + + # 각 큐의 길이 확인 + queues = [ + "queue:rss_collection", + "queue:google_search", + "queue:ai_summarization", + "queue:translation", + "queue:image_generation", + "queue:article_assembly" + ] + + print(f"\n[{datetime.now().strftime('%H:%M:%S')}] 큐 상태:") + for queue in queues: + length = await redis_client.llen(queue) + if length > 0: + print(f" {queue}: {length} 작업 대기 중") + + # 4. 최종 결과 확인 + print("\n📄 MongoDB에서 생성된 기사 확인 중...") + articles = await db.articles.find({"keyword": test_keyword.keyword}).to_list(length=5) + + if articles: + print(f"✅ {len(articles)}개의 기사 생성 완료!") + for article in articles: + print(f"\n제목: {article.get('title', 'N/A')}") + print(f"ID: {article.get('article_id', 'N/A')}") + print(f"생성 시간: {article.get('created_at', 'N/A')}") + print(f"처리 시간: {article.get('processing_time', 'N/A')}초") + print(f"이미지 수: {len(article.get('images', []))}") + else: + print("⚠️ 아직 기사가 생성되지 않았습니다. 조금 더 기다려주세요.") + + # 연결 종료 + await redis_client.close() + mongo_client.close() + +if __name__ == "__main__": + print("🚀 파이프라인 테스트 시작") + asyncio.run(test_pipeline()) \ No newline at end of file diff --git a/services/pipeline/test_starcraft.py b/services/pipeline/test_starcraft.py new file mode 100644 index 0000000..0f1d2c8 --- /dev/null +++ b/services/pipeline/test_starcraft.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +""" +스타크래프트 키워드로 파이프라인 테스트 +""" +import asyncio +import sys +import os +sys.path.append(os.path.dirname(__file__)) + +from shared.queue_manager import QueueManager +from shared.models import PipelineJob + +async def test_starcraft_pipeline(): + """스타크래프트 키워드로 파이프라인 테스트""" + + # Queue manager 초기화 + queue_manager = QueueManager(redis_url="redis://redis:6379") + await queue_manager.connect() + + try: + # 스타크래프트 파이프라인 작업 생성 + job = PipelineJob( + keyword_id="test_starcraft_001", + keyword="스타크래프트", + stage="rss_collection", + data={} + ) + + print(f"🚀 스타크래프트 파이프라인 작업 시작") + print(f" 작업 ID: {job.job_id}") + print(f" 키워드: {job.keyword}") + print(f" 키워드 ID: {job.keyword_id}") + + # RSS 수집 큐에 작업 추가 + await queue_manager.enqueue('rss_collection', job) + print(f"✅ 작업이 rss_collection 큐에 추가되었습니다") + + # 큐 상태 확인 + stats = await queue_manager.get_queue_stats() + print(f"\n📊 현재 큐 상태:") + for queue_name, stat in stats.items(): + if queue_name not in ['completed', 'failed']: + pending = stat.get('pending', 0) + processing = stat.get('processing', 0) + if pending > 0 or processing > 0: + print(f" {queue_name}: 대기={pending}, 처리중={processing}") + + print(f"\n⏳ 파이프라인 실행을 모니터링하세요:") + print(f" docker logs site11_pipeline_rss_collector --tail 20 -f") + print(f" python3 check_mongodb.py") + + finally: + await queue_manager.disconnect() + +if __name__ == "__main__": + asyncio.run(test_starcraft_pipeline()) \ No newline at end of file diff --git a/services/pipeline/test_submit_job.py b/services/pipeline/test_submit_job.py new file mode 100644 index 0000000..60995c2 --- /dev/null +++ b/services/pipeline/test_submit_job.py @@ -0,0 +1,54 @@ +""" +파이프라인 테스트 작업 제출 스크립트 +""" +import redis +import json +from datetime import datetime +import uuid +import sys + +def submit_test_job(keyword='나스닥'): + # Redis 연결 + redis_client = redis.Redis(host='localhost', port=6379, decode_responses=True) + + # 테스트 작업 생성 + job_id = str(uuid.uuid4()) + keyword_id = f'test_{job_id[:8]}' + + job_data = { + 'job_id': job_id, + 'keyword_id': keyword_id, + 'keyword': keyword, + 'created_at': datetime.now().isoformat(), + 'stage': 'rss_collection', + 'stages_completed': [], + 'data': {} + } + + # QueueMessage 래퍼 생성 + queue_message = { + 'message_id': str(uuid.uuid4()), + 'queue_name': 'rss_collection', + 'job': job_data, + 'timestamp': datetime.now().isoformat(), + 'attempts': 0 + } + + # 큐에 작업 추가 (rpush 사용 - priority=0인 경우) + redis_client.rpush('queue:rss_collection', json.dumps(queue_message)) + print(f'✅ 파이프라인 시작: job_id={job_id}') + print(f'✅ 키워드: {keyword}') + print(f'✅ RSS Collection 큐에 작업 추가 완료') + + # 큐 상태 확인 + queue_len = redis_client.llen('queue:rss_collection') + print(f'✅ 현재 큐 길이: {queue_len}') + + redis_client.close() + +if __name__ == "__main__": + if len(sys.argv) > 1: + keyword = sys.argv[1] + else: + keyword = '나스닥' + submit_test_job(keyword) \ No newline at end of file diff --git a/services/pipeline/translator/Dockerfile b/services/pipeline/translator/Dockerfile new file mode 100644 index 0000000..80950d6 --- /dev/null +++ b/services/pipeline/translator/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install dependencies +COPY ./translator/requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy shared modules +COPY ./shared /app/shared + +# Copy config directory +COPY ./config /app/config + +# Copy application code +COPY ./translator /app + +# Use multi_translator.py as the main service +CMD ["python", "multi_translator.py"] \ No newline at end of file diff --git a/services/pipeline/translator/language_sync.py b/services/pipeline/translator/language_sync.py new file mode 100644 index 0000000..e4eacf4 --- /dev/null +++ b/services/pipeline/translator/language_sync.py @@ -0,0 +1,329 @@ +""" +Language Sync Service +기존 기사를 새로운 언어로 번역하는 백그라운드 서비스 +""" +import asyncio +import logging +import os +import sys +import json +from typing import List, Dict, Any +import httpx +from motor.motor_asyncio import AsyncIOMotorClient +from datetime import datetime + +# Add parent directory to path for shared module +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +# Import from shared module +from shared.models import FinalArticle, Subtopic, Entities, NewsReference + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class LanguageSyncService: + def __init__(self): + self.deepl_api_key = os.getenv("DEEPL_API_KEY", "3abbc796-2515-44a8-972d-22dcf27ab54a") + self.deepl_api_url = "https://api.deepl.com/v2/translate" + self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") + self.db_name = os.getenv("DB_NAME", "ai_writer_db") + self.db = None + self.languages_config = None + self.config_path = "/app/config/languages.json" + self.sync_batch_size = 10 + self.sync_delay = 2.0 # 언어 간 지연 + + async def load_config(self): + """언어 설정 파일 로드""" + try: + if os.path.exists(self.config_path): + with open(self.config_path, 'r', encoding='utf-8') as f: + self.languages_config = json.load(f) + logger.info(f"Loaded language config") + else: + raise FileNotFoundError(f"Config file not found: {self.config_path}") + except Exception as e: + logger.error(f"Error loading config: {e}") + raise + + async def start(self): + """백그라운드 싱크 서비스 시작""" + logger.info("Starting Language Sync Service") + + # 설정 로드 + await self.load_config() + + # MongoDB 연결 + client = AsyncIOMotorClient(self.mongodb_url) + self.db = client[self.db_name] + + # 주기적으로 싱크 체크 (10분마다) + while True: + try: + await self.sync_missing_translations() + await asyncio.sleep(600) # 10분 대기 + except Exception as e: + logger.error(f"Error in sync loop: {e}") + await asyncio.sleep(60) # 에러 시 1분 후 재시도 + + async def sync_missing_translations(self): + """누락된 번역 싱크""" + try: + # 활성화된 언어 목록 + enabled_languages = [ + lang for lang in self.languages_config["enabled_languages"] + if lang["enabled"] + ] + + if not enabled_languages: + logger.info("No enabled languages for sync") + return + + # 원본 언어 컬렉션 + source_collection = self.languages_config["source_language"]["collection"] + + for lang_config in enabled_languages: + await self.sync_language(source_collection, lang_config) + await asyncio.sleep(self.sync_delay) + + except Exception as e: + logger.error(f"Error in sync_missing_translations: {e}") + + async def sync_language(self, source_collection: str, lang_config: Dict): + """특정 언어로 누락된 기사 번역""" + try: + target_collection = lang_config["collection"] + + # 번역되지 않은 기사 찾기 + # 원본에는 있지만 대상 컬렉션에는 없는 기사 + source_articles = await self.db[source_collection].find( + {}, + {"news_id": 1} + ).to_list(None) + + source_ids = {article["news_id"] for article in source_articles} + + translated_articles = await self.db[target_collection].find( + {}, + {"news_id": 1} + ).to_list(None) + + translated_ids = {article["news_id"] for article in translated_articles} + + # 누락된 news_id + missing_ids = source_ids - translated_ids + + if not missing_ids: + logger.info(f"No missing translations for {lang_config['name']}") + return + + logger.info(f"Found {len(missing_ids)} missing translations for {lang_config['name']}") + + # 배치로 처리 + missing_list = list(missing_ids) + for i in range(0, len(missing_list), self.sync_batch_size): + batch = missing_list[i:i+self.sync_batch_size] + + for news_id in batch: + try: + # 원본 기사 조회 + korean_article = await self.db[source_collection].find_one( + {"news_id": news_id} + ) + + if not korean_article: + continue + + # 번역 수행 + await self.translate_and_save( + korean_article, + lang_config + ) + + logger.info(f"Synced article {news_id} to {lang_config['code']}") + + # API 속도 제한 + await asyncio.sleep(0.5) + + except Exception as e: + logger.error(f"Error translating {news_id} to {lang_config['code']}: {e}") + continue + + # 배치 간 지연 + if i + self.sync_batch_size < len(missing_list): + await asyncio.sleep(self.sync_delay) + + except Exception as e: + logger.error(f"Error syncing language {lang_config['code']}: {e}") + + async def translate_and_save(self, korean_article: Dict, lang_config: Dict): + """기사 번역 및 저장""" + try: + # 제목 번역 + translated_title = await self._translate_text( + korean_article.get('title', ''), + target_lang=lang_config["deepl_code"] + ) + + # 요약 번역 + translated_summary = await self._translate_text( + korean_article.get('summary', ''), + target_lang=lang_config["deepl_code"] + ) + + # Subtopics 번역 + translated_subtopics = [] + for subtopic in korean_article.get('subtopics', []): + translated_subtopic_title = await self._translate_text( + subtopic.get('title', ''), + target_lang=lang_config["deepl_code"] + ) + + translated_content_list = [] + for content_para in subtopic.get('content', []): + translated_para = await self._translate_text( + content_para, + target_lang=lang_config["deepl_code"] + ) + translated_content_list.append(translated_para) + + translated_subtopics.append(Subtopic( + title=translated_subtopic_title, + content=translated_content_list + )) + + # 카테고리 번역 + translated_categories = [] + for category in korean_article.get('categories', []): + translated_cat = await self._translate_text( + category, + target_lang=lang_config["deepl_code"] + ) + translated_categories.append(translated_cat) + + # Entities와 References는 원본 유지 + entities_data = korean_article.get('entities', {}) + translated_entities = Entities(**entities_data) if entities_data else Entities() + + references = [] + for ref_data in korean_article.get('references', []): + references.append(NewsReference(**ref_data)) + + # 번역된 기사 생성 + translated_article = FinalArticle( + news_id=korean_article.get('news_id'), + title=translated_title, + summary=translated_summary, + subtopics=translated_subtopics, + categories=translated_categories, + entities=translated_entities, + source_keyword=korean_article.get('source_keyword'), + source_count=korean_article.get('source_count', 1), + references=references, + job_id=korean_article.get('job_id'), + keyword_id=korean_article.get('keyword_id'), + pipeline_stages=korean_article.get('pipeline_stages', []) + ['sync_translation'], + processing_time=korean_article.get('processing_time', 0), + language=lang_config["code"], + ref_news_id=None, + rss_guid=korean_article.get('rss_guid'), # RSS GUID 유지 + image_prompt=korean_article.get('image_prompt'), # 이미지 프롬프트 유지 + images=korean_article.get('images', []), # 이미지 URL 리스트 유지 + translated_languages=korean_article.get('translated_languages', []) # 번역 언어 목록 유지 + ) + + # MongoDB에 저장 + collection_name = lang_config["collection"] + result = await self.db[collection_name].insert_one(translated_article.model_dump()) + + # 원본 기사에 번역 완료 표시 + await self.db[self.languages_config["source_language"]["collection"]].update_one( + {"news_id": korean_article.get('news_id')}, + { + "$addToSet": { + "translated_languages": lang_config["code"] + } + } + ) + + logger.info(f"Synced article to {collection_name}: {result.inserted_id}") + + except Exception as e: + logger.error(f"Error in translate_and_save: {e}") + raise + + async def _translate_text(self, text: str, target_lang: str = 'EN') -> str: + """DeepL API를 사용한 텍스트 번역""" + try: + if not text: + return "" + + async with httpx.AsyncClient() as client: + response = await client.post( + self.deepl_api_url, + data={ + 'auth_key': self.deepl_api_key, + 'text': text, + 'target_lang': target_lang, + 'source_lang': 'KO' + }, + timeout=30 + ) + + if response.status_code == 200: + result = response.json() + return result['translations'][0]['text'] + else: + logger.error(f"DeepL API error: {response.status_code}") + return text + + except Exception as e: + logger.error(f"Error translating text: {e}") + return text + + async def manual_sync(self, language_code: str = None): + """수동 싱크 실행""" + logger.info(f"Manual sync requested for language: {language_code or 'all'}") + + await self.load_config() + + client = AsyncIOMotorClient(self.mongodb_url) + self.db = client[self.db_name] + + if language_code: + # 특정 언어만 싱크 + lang_config = next( + (lang for lang in self.languages_config["enabled_languages"] + if lang["code"] == language_code and lang["enabled"]), + None + ) + if lang_config: + source_collection = self.languages_config["source_language"]["collection"] + await self.sync_language(source_collection, lang_config) + else: + logger.error(f"Language {language_code} not found or not enabled") + else: + # 모든 활성 언어 싱크 + await self.sync_missing_translations() + +async def main(): + """메인 함수""" + service = LanguageSyncService() + + # 명령줄 인수 확인 + if len(sys.argv) > 1: + if sys.argv[1] == "sync": + # 수동 싱크 모드 + language = sys.argv[2] if len(sys.argv) > 2 else None + await service.manual_sync(language) + else: + logger.error(f"Unknown command: {sys.argv[1]}") + else: + # 백그라운드 서비스 모드 + try: + await service.start() + except KeyboardInterrupt: + logger.info("Received interrupt signal") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/services/pipeline/translator/multi_translator.py b/services/pipeline/translator/multi_translator.py new file mode 100644 index 0000000..86849ae --- /dev/null +++ b/services/pipeline/translator/multi_translator.py @@ -0,0 +1,320 @@ +""" +Multi-Language Translation Service +다국어 번역 서비스 - 설정 기반 다중 언어 지원 +""" +import asyncio +import logging +import os +import sys +import json +from typing import List, Dict, Any +import httpx +import redis.asyncio as redis +from motor.motor_asyncio import AsyncIOMotorClient +from datetime import datetime + +# Import from shared module +from shared.models import PipelineJob, FinalArticle +from shared.queue_manager import QueueManager + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class MultiLanguageTranslator: + def __init__(self): + self.queue_manager = QueueManager( + redis_url=os.getenv("REDIS_URL", "redis://redis:6379") + ) + self.deepl_api_key = os.getenv("DEEPL_API_KEY", "3abbc796-2515-44a8-972d-22dcf27ab54a") + self.deepl_api_url = "https://api.deepl.com/v2/translate" + self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") + self.db_name = os.getenv("DB_NAME", "ai_writer_db") + self.db = None + self.languages_config = None + self.config_path = "/app/config/languages.json" + + async def load_config(self): + """언어 설정 파일 로드""" + try: + if os.path.exists(self.config_path): + with open(self.config_path, 'r', encoding='utf-8') as f: + self.languages_config = json.load(f) + else: + # 기본 설정 (영어만) + self.languages_config = { + "enabled_languages": [ + { + "code": "en", + "name": "English", + "deepl_code": "EN", + "collection": "articles_en", + "enabled": True + } + ], + "source_language": { + "code": "ko", + "name": "Korean", + "collection": "articles_ko" + }, + "translation_settings": { + "batch_size": 5, + "delay_between_languages": 2.0, + "delay_between_articles": 0.5, + "max_retries": 3 + } + } + logger.info(f"Loaded language config: {len(self.get_enabled_languages())} languages enabled") + except Exception as e: + logger.error(f"Error loading config: {e}") + raise + + def get_enabled_languages(self) -> List[Dict]: + """활성화된 언어 목록 반환""" + return [lang for lang in self.languages_config["enabled_languages"] if lang["enabled"]] + + async def start(self): + """워커 시작""" + logger.info("Starting Multi-Language Translator Worker") + + # 설정 로드 + await self.load_config() + + # Redis 연결 + await self.queue_manager.connect() + + # MongoDB 연결 + client = AsyncIOMotorClient(self.mongodb_url) + self.db = client[self.db_name] + + # DeepL API 키 확인 + if not self.deepl_api_key: + logger.error("DeepL API key not configured") + return + + # 메인 처리 루프 + while True: + try: + # 큐에서 작업 가져오기 + job = await self.queue_manager.dequeue('translation', timeout=5) + + if job: + await self.process_job(job) + + except Exception as e: + logger.error(f"Error in worker loop: {e}") + await asyncio.sleep(1) + + async def process_job(self, job: PipelineJob): + """모든 활성 언어로 번역""" + try: + logger.info(f"Processing job {job.job_id} for multi-language translation") + + # MongoDB에서 한국어 기사 가져오기 + news_id = job.data.get('news_id') + if not news_id: + logger.error(f"No news_id in job {job.job_id}") + await self.queue_manager.mark_failed('translation', job, "No news_id") + return + + # 원본 컬렉션에서 기사 조회 + source_collection = self.languages_config["source_language"]["collection"] + korean_article = await self.db[source_collection].find_one({"news_id": news_id}) + + if not korean_article: + logger.error(f"Article {news_id} not found in {source_collection}") + await self.queue_manager.mark_failed('translation', job, "Article not found") + return + + # 활성화된 모든 언어로 번역 + enabled_languages = self.get_enabled_languages() + settings = self.languages_config["translation_settings"] + + for lang_config in enabled_languages: + try: + logger.info(f"Translating article {news_id} to {lang_config['name']}") + + # 이미 번역되었는지 확인 + existing = await self.db[lang_config["collection"]].find_one({"news_id": news_id}) + if existing: + logger.info(f"Article {news_id} already translated to {lang_config['code']}") + continue + + # 번역 수행 + await self.translate_article( + korean_article, + lang_config, + job + ) + + # 언어 간 지연 + if settings.get("delay_between_languages"): + await asyncio.sleep(settings["delay_between_languages"]) + + except Exception as e: + logger.error(f"Error translating to {lang_config['code']}: {e}") + continue + + # 파이프라인 완료 로그 + logger.info(f"Translation pipeline completed for news_id: {news_id}") + + # 완료 표시 + job.stages_completed.append('translation') + await self.queue_manager.mark_completed('translation', job.job_id) + + logger.info(f"Multi-language translation completed for job {job.job_id}") + + except Exception as e: + logger.error(f"Error processing job {job.job_id}: {e}") + await self.queue_manager.mark_failed('translation', job, str(e)) + + async def translate_article(self, korean_article: Dict, lang_config: Dict, job: PipelineJob): + """특정 언어로 기사 번역""" + try: + # 제목 번역 + translated_title = await self._translate_text( + korean_article.get('title', ''), + target_lang=lang_config["deepl_code"] + ) + + # 요약 번역 + translated_summary = await self._translate_text( + korean_article.get('summary', ''), + target_lang=lang_config["deepl_code"] + ) + + # Subtopics 번역 + from shared.models import Subtopic + translated_subtopics = [] + + for subtopic in korean_article.get('subtopics', []): + translated_subtopic_title = await self._translate_text( + subtopic.get('title', ''), + target_lang=lang_config["deepl_code"] + ) + + translated_content_list = [] + for content_para in subtopic.get('content', []): + translated_para = await self._translate_text( + content_para, + target_lang=lang_config["deepl_code"] + ) + translated_content_list.append(translated_para) + + # API 속도 제한 + settings = self.languages_config["translation_settings"] + if settings.get("delay_between_articles"): + await asyncio.sleep(settings["delay_between_articles"]) + + translated_subtopics.append(Subtopic( + title=translated_subtopic_title, + content=translated_content_list + )) + + # 카테고리 번역 + translated_categories = [] + for category in korean_article.get('categories', []): + translated_cat = await self._translate_text( + category, + target_lang=lang_config["deepl_code"] + ) + translated_categories.append(translated_cat) + + # Entities와 References는 원본 유지 + from shared.models import Entities, NewsReference + entities_data = korean_article.get('entities', {}) + translated_entities = Entities(**entities_data) if entities_data else Entities() + + references = [] + for ref_data in korean_article.get('references', []): + references.append(NewsReference(**ref_data)) + + # 번역된 기사 생성 + translated_article = FinalArticle( + news_id=korean_article.get('news_id'), # 같은 news_id 사용 + title=translated_title, + summary=translated_summary, + subtopics=translated_subtopics, + categories=translated_categories, + entities=translated_entities, + source_keyword=job.keyword if hasattr(job, 'keyword') else korean_article.get('source_keyword'), + source_count=korean_article.get('source_count', 1), + references=references, + job_id=job.job_id, + keyword_id=job.keyword_id if hasattr(job, 'keyword_id') else None, + pipeline_stages=korean_article.get('pipeline_stages', []) + ['translation'], + processing_time=korean_article.get('processing_time', 0), + language=lang_config["code"], + ref_news_id=None, # 같은 news_id 사용하므로 불필요 + rss_guid=korean_article.get('rss_guid'), # RSS GUID 유지 + image_prompt=korean_article.get('image_prompt'), # 이미지 프롬프트 유지 + images=korean_article.get('images', []), # 이미지 URL 리스트 유지 + translated_languages=korean_article.get('translated_languages', []) # 번역 언어 목록 유지 + ) + + # MongoDB에 저장 + collection_name = lang_config["collection"] + result = await self.db[collection_name].insert_one(translated_article.model_dump()) + + logger.info(f"Article saved to {collection_name} with _id: {result.inserted_id}, language: {lang_config['code']}") + + # 원본 기사에 번역 완료 표시 + await self.db[self.languages_config["source_language"]["collection"]].update_one( + {"news_id": korean_article.get('news_id')}, + { + "$addToSet": { + "translated_languages": lang_config["code"] + } + } + ) + + except Exception as e: + logger.error(f"Error translating article to {lang_config['code']}: {e}") + raise + + async def _translate_text(self, text: str, target_lang: str = 'EN') -> str: + """DeepL API를 사용한 텍스트 번역""" + try: + if not text: + return "" + + async with httpx.AsyncClient() as client: + response = await client.post( + self.deepl_api_url, + data={ + 'auth_key': self.deepl_api_key, + 'text': text, + 'target_lang': target_lang, + 'source_lang': 'KO' + }, + timeout=30 + ) + + if response.status_code == 200: + result = response.json() + return result['translations'][0]['text'] + else: + logger.error(f"DeepL API error: {response.status_code}") + return text # 번역 실패시 원본 반환 + + except Exception as e: + logger.error(f"Error translating text: {e}") + return text # 번역 실패시 원본 반환 + + async def stop(self): + """워커 중지""" + await self.queue_manager.disconnect() + logger.info("Multi-Language Translator Worker stopped") + +async def main(): + """메인 함수""" + worker = MultiLanguageTranslator() + + try: + await worker.start() + except KeyboardInterrupt: + logger.info("Received interrupt signal") + finally: + await worker.stop() + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/services/pipeline/translator/requirements.txt b/services/pipeline/translator/requirements.txt new file mode 100644 index 0000000..51a197c --- /dev/null +++ b/services/pipeline/translator/requirements.txt @@ -0,0 +1,5 @@ +httpx==0.25.0 +redis[hiredis]==5.0.1 +pydantic==2.5.0 +motor==3.1.1 +pymongo==4.3.3 \ No newline at end of file diff --git a/services/pipeline/translator/translator.py b/services/pipeline/translator/translator.py new file mode 100644 index 0000000..0dd7e56 --- /dev/null +++ b/services/pipeline/translator/translator.py @@ -0,0 +1,230 @@ +""" +Translation Service +DeepL API를 사용한 번역 서비스 +""" +import asyncio +import logging +import os +import sys +from typing import List, Dict, Any +import httpx +from motor.motor_asyncio import AsyncIOMotorClient +from datetime import datetime + +# Import from shared module +from shared.models import PipelineJob, FinalArticle +from shared.queue_manager import QueueManager + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class TranslatorWorker: + def __init__(self): + self.queue_manager = QueueManager( + redis_url=os.getenv("REDIS_URL", "redis://redis:6379") + ) + self.deepl_api_key = os.getenv("DEEPL_API_KEY", "3abbc796-2515-44a8-972d-22dcf27ab54a") + # DeepL Pro API 엔드포인트 사용 + self.deepl_api_url = "https://api.deepl.com/v2/translate" + self.mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") + self.db_name = os.getenv("DB_NAME", "ai_writer_db") + self.db = None + + async def start(self): + """워커 시작""" + logger.info("Starting Translator Worker") + + # Redis 연결 + await self.queue_manager.connect() + + # MongoDB 연결 + client = AsyncIOMotorClient(self.mongodb_url) + self.db = client[self.db_name] + + # DeepL API 키 확인 + if not self.deepl_api_key: + logger.error("DeepL API key not configured") + return + + # 메인 처리 루프 + while True: + try: + # 큐에서 작업 가져오기 + job = await self.queue_manager.dequeue('translation', timeout=5) + + if job: + await self.process_job(job) + + except Exception as e: + logger.error(f"Error in worker loop: {e}") + await asyncio.sleep(1) + + async def process_job(self, job: PipelineJob): + """영어 버전 기사 생성 및 저장""" + try: + logger.info(f"Processing job {job.job_id} for translation") + + # MongoDB에서 한국어 기사 가져오기 + news_id = job.data.get('news_id') + if not news_id: + logger.error(f"No news_id in job {job.job_id}") + await self.queue_manager.mark_failed('translation', job, "No news_id") + return + + # MongoDB에서 한국어 기사 조회 (articles_ko) + korean_article = await self.db.articles_ko.find_one({"news_id": news_id}) + if not korean_article: + logger.error(f"Article {news_id} not found in MongoDB") + await self.queue_manager.mark_failed('translation', job, "Article not found") + return + + # 영어로 번역 + translated_title = await self._translate_text( + korean_article.get('title', ''), + target_lang='EN' + ) + + translated_summary = await self._translate_text( + korean_article.get('summary', ''), + target_lang='EN' + ) + + # Subtopics 번역 + from shared.models import Subtopic + translated_subtopics = [] + for subtopic in korean_article.get('subtopics', []): + translated_subtopic_title = await self._translate_text( + subtopic.get('title', ''), + target_lang='EN' + ) + + translated_content_list = [] + for content_para in subtopic.get('content', []): + translated_para = await self._translate_text( + content_para, + target_lang='EN' + ) + translated_content_list.append(translated_para) + await asyncio.sleep(0.2) # API 속도 제한 + + translated_subtopics.append(Subtopic( + title=translated_subtopic_title, + content=translated_content_list + )) + + # 카테고리 번역 + translated_categories = [] + for category in korean_article.get('categories', []): + translated_cat = await self._translate_text(category, target_lang='EN') + translated_categories.append(translated_cat) + await asyncio.sleep(0.2) # API 속도 제한 + + # Entities 번역 (선택적) + from shared.models import Entities + entities_data = korean_article.get('entities', {}) + translated_entities = Entities( + people=entities_data.get('people', []), # 인명은 번역하지 않음 + organizations=entities_data.get('organizations', []), # 조직명은 번역하지 않음 + groups=entities_data.get('groups', []), + countries=entities_data.get('countries', []), + events=entities_data.get('events', []) + ) + + # 레퍼런스 가져오기 (번역하지 않음) + from shared.models import NewsReference + references = [] + for ref_data in korean_article.get('references', []): + references.append(NewsReference(**ref_data)) + + # 영어 버전 기사 생성 - 같은 news_id 사용 + english_article = FinalArticle( + news_id=news_id, # 원본과 같은 news_id 사용 + title=translated_title, + summary=translated_summary, + subtopics=translated_subtopics, + categories=translated_categories, + entities=translated_entities, + source_keyword=job.keyword, + source_count=korean_article.get('source_count', 1), + references=references, # 원본 레퍼런스 그대로 사용 + job_id=job.job_id, + keyword_id=job.keyword_id, + pipeline_stages=job.stages_completed.copy() + ['translation'], + processing_time=korean_article.get('processing_time', 0), + language='en', # 영어 + ref_news_id=None # 같은 news_id를 사용하므로 ref 불필요 + ) + + # MongoDB에 영어 버전 저장 (articles_en) + result = await self.db.articles_en.insert_one(english_article.model_dump()) + english_article_id = str(result.inserted_id) + + logger.info(f"English article saved with _id: {english_article_id}, news_id: {news_id}, language: en") + + # 원본 한국어 기사 업데이트 - 번역 완료 표시 + await self.db.articles_ko.update_one( + {"news_id": news_id}, + { + "$addToSet": { + "pipeline_stages": "translation" + } + } + ) + + # 완료 표시 + job.stages_completed.append('translation') + await self.queue_manager.mark_completed('translation', job.job_id) + + logger.info(f"Translation completed for job {job.job_id}") + + except Exception as e: + logger.error(f"Error processing job {job.job_id}: {e}") + await self.queue_manager.mark_failed('translation', job, str(e)) + + async def _translate_text(self, text: str, target_lang: str = 'EN') -> str: + """DeepL API를 사용한 텍스트 번역""" + try: + if not text: + return "" + + async with httpx.AsyncClient() as client: + response = await client.post( + self.deepl_api_url, + data={ + 'auth_key': self.deepl_api_key, + 'text': text, + 'target_lang': target_lang, + 'source_lang': 'KO' + }, + timeout=30 + ) + + if response.status_code == 200: + result = response.json() + return result['translations'][0]['text'] + else: + logger.error(f"DeepL API error: {response.status_code}") + return text # 번역 실패시 원본 반환 + + except Exception as e: + logger.error(f"Error translating text: {e}") + return text # 번역 실패시 원본 반환 + + async def stop(self): + """워커 중지""" + await self.queue_manager.disconnect() + logger.info("Translator Worker stopped") + +async def main(): + """메인 함수""" + worker = TranslatorWorker() + + try: + await worker.start() + except KeyboardInterrupt: + logger.info("Received interrupt signal") + finally: + await worker.stop() + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/services/search/backend/Dockerfile b/services/search/backend/Dockerfile new file mode 100644 index 0000000..f1904f4 --- /dev/null +++ b/services/search/backend/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements first for better caching +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . . + +# Create necessary directories +RUN mkdir -p /app/logs + +# Run the application +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] \ No newline at end of file diff --git a/services/search/backend/indexer.py b/services/search/backend/indexer.py new file mode 100644 index 0000000..f989c0b --- /dev/null +++ b/services/search/backend/indexer.py @@ -0,0 +1,286 @@ +""" +Data indexer for synchronizing data from other services to Solr +""" +import asyncio +import logging +from typing import Dict, Any, List +from motor.motor_asyncio import AsyncIOMotorClient +from aiokafka import AIOKafkaConsumer +import json +from solr_client import SolrClient +from datetime import datetime + +logger = logging.getLogger(__name__) + +class DataIndexer: + def __init__(self, solr_client: SolrClient, mongodb_url: str, kafka_servers: str): + self.solr = solr_client + self.mongodb_url = mongodb_url + self.kafka_servers = kafka_servers + self.mongo_client = None + self.kafka_consumer = None + self.running = False + + async def start(self): + """Start the indexer""" + try: + # Connect to MongoDB + self.mongo_client = AsyncIOMotorClient(self.mongodb_url) + + # Initialize Kafka consumer + await self._init_kafka_consumer() + + # Start background tasks + self.running = True + asyncio.create_task(self._consume_kafka_events()) + asyncio.create_task(self._periodic_sync()) + + logger.info("Data indexer started") + + except Exception as e: + logger.error(f"Failed to start indexer: {e}") + + async def stop(self): + """Stop the indexer""" + self.running = False + + if self.kafka_consumer: + await self.kafka_consumer.stop() + + if self.mongo_client: + self.mongo_client.close() + + logger.info("Data indexer stopped") + + async def _init_kafka_consumer(self): + """Initialize Kafka consumer""" + try: + self.kafka_consumer = AIOKafkaConsumer( + 'user_events', + 'file_events', + 'content_events', + bootstrap_servers=self.kafka_servers, + value_deserializer=lambda m: json.loads(m.decode('utf-8')), + group_id='search_indexer', + auto_offset_reset='latest' + ) + await self.kafka_consumer.start() + logger.info("Kafka consumer initialized") + + except Exception as e: + logger.warning(f"Kafka consumer initialization failed: {e}") + self.kafka_consumer = None + + async def _consume_kafka_events(self): + """Consume events from Kafka and index them""" + if not self.kafka_consumer: + return + + while self.running: + try: + async for msg in self.kafka_consumer: + await self._handle_kafka_event(msg.topic, msg.value) + + except Exception as e: + logger.error(f"Kafka consumption error: {e}") + await asyncio.sleep(5) + + async def _handle_kafka_event(self, topic: str, event: Dict[str, Any]): + """Handle a Kafka event""" + try: + event_type = event.get('type') + data = event.get('data', {}) + + if topic == 'user_events': + await self._index_user_event(event_type, data) + elif topic == 'file_events': + await self._index_file_event(event_type, data) + elif topic == 'content_events': + await self._index_content_event(event_type, data) + + except Exception as e: + logger.error(f"Failed to handle event: {e}") + + async def _index_user_event(self, event_type: str, data: Dict): + """Index user-related events""" + if event_type == 'user_created' or event_type == 'user_updated': + user_doc = { + 'id': f"user_{data.get('user_id')}", + 'doc_type': 'user', + 'user_id': data.get('user_id'), + 'username': data.get('username'), + 'email': data.get('email'), + 'name': data.get('name', ''), + 'bio': data.get('bio', ''), + 'tags': data.get('tags', []), + 'created_at': data.get('created_at'), + 'updated_at': datetime.utcnow().isoformat() + } + self.solr.index_document(user_doc) + + elif event_type == 'user_deleted': + self.solr.delete_document(f"user_{data.get('user_id')}") + + async def _index_file_event(self, event_type: str, data: Dict): + """Index file-related events""" + if event_type == 'file_uploaded': + file_doc = { + 'id': f"file_{data.get('file_id')}", + 'doc_type': 'file', + 'file_id': data.get('file_id'), + 'filename': data.get('filename'), + 'content_type': data.get('content_type'), + 'size': data.get('size'), + 'user_id': data.get('user_id'), + 'tags': data.get('tags', []), + 'description': data.get('description', ''), + 'created_at': data.get('created_at'), + 'updated_at': datetime.utcnow().isoformat() + } + self.solr.index_document(file_doc) + + elif event_type == 'file_deleted': + self.solr.delete_document(f"file_{data.get('file_id')}") + + async def _index_content_event(self, event_type: str, data: Dict): + """Index content-related events""" + if event_type in ['content_created', 'content_updated']: + content_doc = { + 'id': f"content_{data.get('content_id')}", + 'doc_type': 'content', + 'content_id': data.get('content_id'), + 'title': data.get('title'), + 'content': data.get('content', ''), + 'summary': data.get('summary', ''), + 'author_id': data.get('author_id'), + 'tags': data.get('tags', []), + 'category': data.get('category'), + 'status': data.get('status', 'draft'), + 'created_at': data.get('created_at'), + 'updated_at': datetime.utcnow().isoformat() + } + self.solr.index_document(content_doc) + + elif event_type == 'content_deleted': + self.solr.delete_document(f"content_{data.get('content_id')}") + + async def _periodic_sync(self): + """Periodically sync data from MongoDB""" + while self.running: + try: + # Sync every 5 minutes + await asyncio.sleep(300) + await self.sync_all_data() + + except Exception as e: + logger.error(f"Periodic sync error: {e}") + + async def sync_all_data(self): + """Sync all data from MongoDB to Solr""" + try: + logger.info("Starting full data sync") + + # Sync users + await self._sync_users() + + # Sync files + await self._sync_files() + + # Optimize index after bulk sync + self.solr.optimize_index() + + logger.info("Full data sync completed") + + except Exception as e: + logger.error(f"Full sync failed: {e}") + + async def _sync_users(self): + """Sync users from MongoDB""" + try: + db = self.mongo_client['users_db'] + collection = db['users'] + + users = [] + async for user in collection.find({'deleted_at': None}): + user_doc = { + 'id': f"user_{str(user['_id'])}", + 'doc_type': 'user', + 'user_id': str(user['_id']), + 'username': user.get('username'), + 'email': user.get('email'), + 'name': user.get('name', ''), + 'bio': user.get('bio', ''), + 'tags': user.get('tags', []), + 'created_at': user.get('created_at').isoformat() if user.get('created_at') else None, + 'updated_at': datetime.utcnow().isoformat() + } + users.append(user_doc) + + # Bulk index every 100 documents + if len(users) >= 100: + self.solr.bulk_index(users, 'user') + users = [] + + # Index remaining users + if users: + self.solr.bulk_index(users, 'user') + + logger.info(f"Synced users to Solr") + + except Exception as e: + logger.error(f"Failed to sync users: {e}") + + async def _sync_files(self): + """Sync files from MongoDB""" + try: + db = self.mongo_client['files_db'] + collection = db['file_metadata'] + + files = [] + async for file in collection.find({'deleted_at': None}): + file_doc = { + 'id': f"file_{str(file['_id'])}", + 'doc_type': 'file', + 'file_id': str(file['_id']), + 'filename': file.get('filename'), + 'original_name': file.get('original_name'), + 'content_type': file.get('content_type'), + 'size': file.get('size'), + 'user_id': file.get('user_id'), + 'tags': list(file.get('tags', {}).keys()), + 'description': file.get('metadata', {}).get('description', ''), + 'created_at': file.get('created_at').isoformat() if file.get('created_at') else None, + 'updated_at': datetime.utcnow().isoformat() + } + files.append(file_doc) + + # Bulk index every 100 documents + if len(files) >= 100: + self.solr.bulk_index(files, 'file') + files = [] + + # Index remaining files + if files: + self.solr.bulk_index(files, 'file') + + logger.info(f"Synced files to Solr") + + except Exception as e: + logger.error(f"Failed to sync files: {e}") + + async def reindex_collection(self, collection_name: str, doc_type: str): + """Reindex a specific collection""" + try: + # Delete existing documents of this type + self.solr.delete_by_query(f'doc_type:{doc_type}') + + # Sync the collection + if collection_name == 'users': + await self._sync_users() + elif collection_name == 'files': + await self._sync_files() + + logger.info(f"Reindexed {collection_name}") + + except Exception as e: + logger.error(f"Failed to reindex {collection_name}: {e}") \ No newline at end of file diff --git a/services/search/backend/main.py b/services/search/backend/main.py new file mode 100644 index 0000000..db4e25a --- /dev/null +++ b/services/search/backend/main.py @@ -0,0 +1,362 @@ +""" +Search Service with Apache Solr +""" +from fastapi import FastAPI, Query, HTTPException +from fastapi.responses import JSONResponse +from contextlib import asynccontextmanager +import logging +import os +from typing import Optional, List, Dict, Any +from datetime import datetime +from solr_client import SolrClient +from indexer import DataIndexer +import asyncio +import time + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +# Global instances +solr_client = None +data_indexer = None + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Manage application lifecycle""" + global solr_client, data_indexer + + # Startup + logger.info("Starting Search Service...") + + # Wait for Solr to be ready + solr_url = os.getenv("SOLR_URL", "http://solr:8983/solr") + max_retries = 30 + + for i in range(max_retries): + try: + solr_client = SolrClient(solr_url=solr_url, core_name="site11") + logger.info("Connected to Solr") + break + except Exception as e: + logger.warning(f"Waiting for Solr... ({i+1}/{max_retries})") + await asyncio.sleep(2) + + if solr_client: + # Initialize data indexer + mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") + kafka_servers = os.getenv("KAFKA_BOOTSTRAP_SERVERS", "kafka:9092") + + data_indexer = DataIndexer(solr_client, mongodb_url, kafka_servers) + await data_indexer.start() + + # Initial data sync + asyncio.create_task(data_indexer.sync_all_data()) + + yield + + # Shutdown + if data_indexer: + await data_indexer.stop() + + logger.info("Search Service stopped") + +app = FastAPI( + title="Search Service", + description="Full-text search with Apache Solr", + version="1.0.0", + lifespan=lifespan +) + +@app.get("/health") +async def health_check(): + """Health check endpoint""" + return { + "status": "healthy", + "service": "search", + "timestamp": datetime.utcnow().isoformat(), + "solr_connected": solr_client is not None + } + +@app.get("/api/search") +async def search( + q: str = Query(..., description="Search query"), + doc_type: Optional[str] = Query(None, description="Filter by document type"), + start: int = Query(0, ge=0, description="Starting offset"), + rows: int = Query(10, ge=1, le=100, description="Number of results"), + sort: Optional[str] = Query(None, description="Sort order (e.g., 'created_at desc')"), + facet: bool = Query(False, description="Enable faceting"), + facet_field: Optional[List[str]] = Query(None, description="Fields to facet on") +): + """ + Search documents across all indexed content + """ + if not solr_client: + raise HTTPException(status_code=503, detail="Search service unavailable") + + try: + # Build filter query + fq = [] + if doc_type: + fq.append(f"doc_type:{doc_type}") + + # Prepare search parameters + search_params = { + 'start': start, + 'rows': rows, + 'facet': facet + } + + if fq: + search_params['fq'] = fq + + if sort: + search_params['sort'] = sort + + if facet_field: + search_params['facet_field'] = facet_field + + # Execute search + results = solr_client.search(q, **search_params) + + return { + "query": q, + "total": results['total'], + "start": start, + "rows": rows, + "documents": results['documents'], + "facets": results.get('facets', {}), + "highlighting": results.get('highlighting', {}) + } + + except Exception as e: + logger.error(f"Search failed: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/search/suggest") +async def suggest( + q: str = Query(..., min_length=1, description="Query prefix"), + field: str = Query("title", description="Field to search in"), + limit: int = Query(10, ge=1, le=50, description="Maximum suggestions") +): + """ + Get autocomplete suggestions + """ + if not solr_client: + raise HTTPException(status_code=503, detail="Search service unavailable") + + try: + suggestions = solr_client.suggest(q, field, limit) + + return { + "query": q, + "suggestions": suggestions, + "count": len(suggestions) + } + + except Exception as e: + logger.error(f"Suggest failed: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/search/similar/{doc_id}") +async def find_similar( + doc_id: str, + rows: int = Query(5, ge=1, le=20, description="Number of similar documents") +): + """ + Find documents similar to the given document + """ + if not solr_client: + raise HTTPException(status_code=503, detail="Search service unavailable") + + try: + similar_docs = solr_client.more_like_this(doc_id, rows=rows) + + return { + "source_document": doc_id, + "similar_documents": similar_docs, + "count": len(similar_docs) + } + + except Exception as e: + logger.error(f"Similar search failed: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/search/index") +async def index_document(document: Dict[str, Any]): + """ + Index a single document + """ + if not solr_client: + raise HTTPException(status_code=503, detail="Search service unavailable") + + try: + doc_type = document.get('doc_type', 'general') + success = solr_client.index_document(document, doc_type) + + if success: + return { + "status": "success", + "message": "Document indexed", + "document_id": document.get('id') + } + else: + raise HTTPException(status_code=500, detail="Failed to index document") + + except Exception as e: + logger.error(f"Indexing failed: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/search/bulk-index") +async def bulk_index(documents: List[Dict[str, Any]]): + """ + Bulk index multiple documents + """ + if not solr_client: + raise HTTPException(status_code=503, detail="Search service unavailable") + + try: + indexed = solr_client.bulk_index(documents) + + return { + "status": "success", + "message": f"Indexed {indexed} documents", + "count": indexed + } + + except Exception as e: + logger.error(f"Bulk indexing failed: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.delete("/api/search/document/{doc_id}") +async def delete_document(doc_id: str): + """ + Delete a document from the index + """ + if not solr_client: + raise HTTPException(status_code=503, detail="Search service unavailable") + + try: + success = solr_client.delete_document(doc_id) + + if success: + return { + "status": "success", + "message": "Document deleted", + "document_id": doc_id + } + else: + raise HTTPException(status_code=500, detail="Failed to delete document") + + except Exception as e: + logger.error(f"Deletion failed: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/search/stats") +async def get_stats(): + """ + Get search index statistics + """ + if not solr_client: + raise HTTPException(status_code=503, detail="Search service unavailable") + + try: + stats = solr_client.get_stats() + + return { + "status": "success", + "statistics": stats, + "timestamp": datetime.utcnow().isoformat() + } + + except Exception as e: + logger.error(f"Failed to get stats: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/search/reindex/{collection}") +async def reindex_collection( + collection: str, + doc_type: Optional[str] = Query(None, description="Document type for the collection") +): + """ + Reindex a specific collection + """ + if not data_indexer: + raise HTTPException(status_code=503, detail="Indexer service unavailable") + + try: + if not doc_type: + # Map collection to doc_type + doc_type_map = { + 'users': 'user', + 'files': 'file', + 'content': 'content' + } + doc_type = doc_type_map.get(collection, collection) + + asyncio.create_task(data_indexer.reindex_collection(collection, doc_type)) + + return { + "status": "success", + "message": f"Reindexing {collection} started", + "collection": collection, + "doc_type": doc_type + } + + except Exception as e: + logger.error(f"Reindex failed: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/search/optimize") +async def optimize_index(): + """ + Optimize the search index + """ + if not solr_client: + raise HTTPException(status_code=503, detail="Search service unavailable") + + try: + success = solr_client.optimize_index() + + if success: + return { + "status": "success", + "message": "Index optimization started" + } + else: + raise HTTPException(status_code=500, detail="Failed to optimize index") + + except Exception as e: + logger.error(f"Optimization failed: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/search/clear") +async def clear_index(): + """ + Clear all documents from the index (DANGER!) + """ + if not solr_client: + raise HTTPException(status_code=503, detail="Search service unavailable") + + try: + success = solr_client.clear_index() + + if success: + return { + "status": "success", + "message": "Index cleared", + "warning": "All documents have been deleted!" + } + else: + raise HTTPException(status_code=500, detail="Failed to clear index") + + except Exception as e: + logger.error(f"Clear index failed: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/services/search/backend/requirements.txt b/services/search/backend/requirements.txt new file mode 100644 index 0000000..08411d3 --- /dev/null +++ b/services/search/backend/requirements.txt @@ -0,0 +1,10 @@ +fastapi==0.109.0 +uvicorn[standard]==0.27.0 +pydantic==2.5.3 +python-dotenv==1.0.0 +pysolr==3.9.0 +httpx==0.25.2 +motor==3.5.1 +pymongo==4.6.1 +aiokafka==0.10.0 +redis==5.0.1 \ No newline at end of file diff --git a/services/search/backend/solr_client.py b/services/search/backend/solr_client.py new file mode 100644 index 0000000..4c555b0 --- /dev/null +++ b/services/search/backend/solr_client.py @@ -0,0 +1,303 @@ +""" +Apache Solr client for search operations +""" +import pysolr +import logging +from typing import Dict, List, Any, Optional +from datetime import datetime +import json + +logger = logging.getLogger(__name__) + +class SolrClient: + def __init__(self, solr_url: str = "http://solr:8983/solr", core_name: str = "site11"): + self.solr_url = f"{solr_url}/{core_name}" + self.core_name = core_name + self.solr = None + self.connect() + + def connect(self): + """Connect to Solr instance""" + try: + self.solr = pysolr.Solr( + self.solr_url, + always_commit=True, + timeout=10 + ) + # Test connection + self.solr.ping() + logger.info(f"Connected to Solr at {self.solr_url}") + except Exception as e: + logger.error(f"Failed to connect to Solr: {e}") + raise + + def index_document(self, document: Dict[str, Any], doc_type: str = None) -> bool: + """Index a single document""" + try: + # Add metadata + if doc_type: + document["doc_type"] = doc_type + + if "id" not in document: + document["id"] = f"{doc_type}_{document.get('_id', '')}" + + # Add indexing timestamp + document["indexed_at"] = datetime.utcnow().isoformat() + + # Index the document + self.solr.add([document]) + logger.info(f"Indexed document: {document.get('id')}") + return True + + except Exception as e: + logger.error(f"Failed to index document: {e}") + return False + + def bulk_index(self, documents: List[Dict[str, Any]], doc_type: str = None) -> int: + """Bulk index multiple documents""" + try: + indexed = 0 + for doc in documents: + if doc_type: + doc["doc_type"] = doc_type + + if "id" not in doc: + doc["id"] = f"{doc_type}_{doc.get('_id', '')}" + + doc["indexed_at"] = datetime.utcnow().isoformat() + + self.solr.add(documents) + indexed = len(documents) + logger.info(f"Bulk indexed {indexed} documents") + return indexed + + except Exception as e: + logger.error(f"Failed to bulk index: {e}") + return 0 + + def search(self, query: str, **kwargs) -> Dict[str, Any]: + """ + Search documents + + Args: + query: Search query string + **kwargs: Additional search parameters + - fq: Filter queries + - fl: Fields to return + - start: Starting offset + - rows: Number of rows + - sort: Sort order + - facet: Enable faceting + - facet.field: Fields to facet on + """ + try: + # Default parameters + params = { + 'q': query, + 'start': kwargs.get('start', 0), + 'rows': kwargs.get('rows', 10), + 'fl': kwargs.get('fl', '*,score'), + 'defType': 'edismax', + 'qf': 'title^3 content^2 tags description name', # Boost fields + 'mm': '2<-25%', # Minimum match + 'hl': 'true', # Highlighting + 'hl.fl': 'title,content,description', + 'hl.simple.pre': '', + 'hl.simple.post': '' + } + + # Add filter queries + if 'fq' in kwargs: + params['fq'] = kwargs['fq'] + + # Add sorting + if 'sort' in kwargs: + params['sort'] = kwargs['sort'] + + # Add faceting + if kwargs.get('facet'): + params.update({ + 'facet': 'true', + 'facet.field': kwargs.get('facet_field', ['doc_type', 'tags', 'status']), + 'facet.mincount': 1 + }) + + # Execute search + results = self.solr.search(**params) + + # Format response + response = { + 'total': results.hits, + 'documents': [], + 'facets': {}, + 'highlighting': {} + } + + # Add documents + for doc in results.docs: + response['documents'].append(doc) + + # Add facets if available + if hasattr(results, 'facets') and results.facets: + if 'facet_fields' in results.facets: + for field, values in results.facets['facet_fields'].items(): + response['facets'][field] = [ + {'value': values[i], 'count': values[i+1]} + for i in range(0, len(values), 2) + ] + + # Add highlighting if available + if hasattr(results, 'highlighting'): + response['highlighting'] = results.highlighting + + return response + + except Exception as e: + logger.error(f"Search failed: {e}") + return {'total': 0, 'documents': [], 'error': str(e)} + + def suggest(self, prefix: str, field: str = "suggest", limit: int = 10) -> List[str]: + """Get autocomplete suggestions""" + try: + params = { + 'q': f'{field}:{prefix}*', + 'fl': field, + 'rows': limit, + 'start': 0 + } + + results = self.solr.search(**params) + suggestions = [] + + for doc in results.docs: + if field in doc: + value = doc[field] + if isinstance(value, list): + suggestions.extend(value) + else: + suggestions.append(value) + + # Remove duplicates and limit + seen = set() + unique_suggestions = [] + for s in suggestions: + if s not in seen: + seen.add(s) + unique_suggestions.append(s) + if len(unique_suggestions) >= limit: + break + + return unique_suggestions + + except Exception as e: + logger.error(f"Suggest failed: {e}") + return [] + + def more_like_this(self, doc_id: str, mlt_fields: List[str] = None, rows: int = 5) -> List[Dict]: + """Find similar documents""" + try: + if not mlt_fields: + mlt_fields = ['title', 'content', 'tags', 'description'] + + params = { + 'q': f'id:{doc_id}', + 'mlt': 'true', + 'mlt.fl': ','.join(mlt_fields), + 'mlt.mindf': 1, + 'mlt.mintf': 1, + 'mlt.count': rows, + 'fl': '*,score' + } + + results = self.solr.search(**params) + + if results.docs: + # The MLT results are in the moreLikeThis section + if hasattr(results, 'moreLikeThis'): + mlt_results = results.moreLikeThis.get(doc_id, {}) + if 'docs' in mlt_results: + return mlt_results['docs'] + + return [] + + except Exception as e: + logger.error(f"More like this failed: {e}") + return [] + + def delete_document(self, doc_id: str) -> bool: + """Delete a document by ID""" + try: + self.solr.delete(id=doc_id) + logger.info(f"Deleted document: {doc_id}") + return True + except Exception as e: + logger.error(f"Failed to delete document: {e}") + return False + + def delete_by_query(self, query: str) -> bool: + """Delete documents matching a query""" + try: + self.solr.delete(q=query) + logger.info(f"Deleted documents matching: {query}") + return True + except Exception as e: + logger.error(f"Failed to delete by query: {e}") + return False + + def clear_index(self) -> bool: + """Clear all documents from index""" + try: + self.solr.delete(q='*:*') + logger.info("Cleared all documents from index") + return True + except Exception as e: + logger.error(f"Failed to clear index: {e}") + return False + + def get_stats(self) -> Dict[str, Any]: + """Get index statistics""" + try: + # Get document count + results = self.solr.search(q='*:*', rows=0) + + # Get facet counts for doc_type + facet_results = self.solr.search( + q='*:*', + rows=0, + facet='true', + **{'facet.field': ['doc_type', 'status']} + ) + + stats = { + 'total_documents': results.hits, + 'doc_types': {}, + 'status_counts': {} + } + + if hasattr(facet_results, 'facets') and facet_results.facets: + if 'facet_fields' in facet_results.facets: + # Parse doc_type facets + doc_type_facets = facet_results.facets['facet_fields'].get('doc_type', []) + for i in range(0, len(doc_type_facets), 2): + stats['doc_types'][doc_type_facets[i]] = doc_type_facets[i+1] + + # Parse status facets + status_facets = facet_results.facets['facet_fields'].get('status', []) + for i in range(0, len(status_facets), 2): + stats['status_counts'][status_facets[i]] = status_facets[i+1] + + return stats + + except Exception as e: + logger.error(f"Failed to get stats: {e}") + return {'error': str(e)} + + def optimize_index(self) -> bool: + """Optimize the Solr index""" + try: + self.solr.optimize() + logger.info("Index optimized") + return True + except Exception as e: + logger.error(f"Failed to optimize index: {e}") + return False \ No newline at end of file diff --git a/services/search/backend/test_search.py b/services/search/backend/test_search.py new file mode 100644 index 0000000..095a2b5 --- /dev/null +++ b/services/search/backend/test_search.py @@ -0,0 +1,292 @@ +#!/usr/bin/env python3 +""" +Test script for Search Service with Apache Solr +""" +import asyncio +import httpx +import json +from datetime import datetime + +BASE_URL = "http://localhost:8015" + +async def test_search_api(): + """Test search API endpoints""" + async with httpx.AsyncClient() as client: + print("\n🔍 Testing Search Service API...") + + # Test health check + print("\n1. Testing health check...") + response = await client.get(f"{BASE_URL}/health") + print(f"Health check: {response.json()}") + + # Test index sample documents + print("\n2. Indexing sample documents...") + + # Index user document + user_doc = { + "id": "user_test_001", + "doc_type": "user", + "user_id": "test_001", + "username": "john_doe", + "email": "john@example.com", + "name": "John Doe", + "bio": "Software developer passionate about Python and microservices", + "tags": ["python", "developer", "backend"], + "created_at": datetime.utcnow().isoformat() + } + + response = await client.post(f"{BASE_URL}/api/search/index", json=user_doc) + print(f"Indexed user: {response.json()}") + + # Index file documents + file_docs = [ + { + "id": "file_test_001", + "doc_type": "file", + "file_id": "test_file_001", + "filename": "architecture_diagram.png", + "content_type": "image/png", + "size": 1024000, + "user_id": "test_001", + "tags": ["architecture", "design", "documentation"], + "description": "System architecture diagram showing microservices", + "created_at": datetime.utcnow().isoformat() + }, + { + "id": "file_test_002", + "doc_type": "file", + "file_id": "test_file_002", + "filename": "user_manual.pdf", + "content_type": "application/pdf", + "size": 2048000, + "user_id": "test_001", + "tags": ["documentation", "manual", "guide"], + "description": "Complete user manual for the application", + "created_at": datetime.utcnow().isoformat() + } + ] + + response = await client.post(f"{BASE_URL}/api/search/bulk-index", json=file_docs) + print(f"Bulk indexed files: {response.json()}") + + # Index content documents + content_docs = [ + { + "id": "content_test_001", + "doc_type": "content", + "content_id": "test_content_001", + "title": "Getting Started with Microservices", + "content": "Microservices architecture is a method of developing software applications as a suite of independently deployable services.", + "summary": "Introduction to microservices architecture patterns", + "author_id": "test_001", + "tags": ["microservices", "architecture", "tutorial"], + "category": "technology", + "status": "published", + "created_at": datetime.utcnow().isoformat() + }, + { + "id": "content_test_002", + "doc_type": "content", + "content_id": "test_content_002", + "title": "Python Best Practices", + "content": "Learn the best practices for writing clean, maintainable Python code including PEP 8 style guide.", + "summary": "Essential Python coding standards and practices", + "author_id": "test_001", + "tags": ["python", "programming", "best-practices"], + "category": "programming", + "status": "published", + "created_at": datetime.utcnow().isoformat() + } + ] + + response = await client.post(f"{BASE_URL}/api/search/bulk-index", json=content_docs) + print(f"Bulk indexed content: {response.json()}") + + # Wait for indexing + await asyncio.sleep(2) + + # Test basic search + print("\n3. Testing basic search...") + response = await client.get( + f"{BASE_URL}/api/search", + params={"q": "microservices"} + ) + results = response.json() + print(f"Search for 'microservices': Found {results['total']} results") + if results['documents']: + print(f"First result: {results['documents'][0].get('title', results['documents'][0].get('filename', 'N/A'))}") + + # Test search with filters + print("\n4. Testing filtered search...") + response = await client.get( + f"{BASE_URL}/api/search", + params={ + "q": "*:*", + "doc_type": "file", + "rows": 5 + } + ) + results = response.json() + print(f"Files search: Found {results['total']} files") + + # Test faceted search + print("\n5. Testing faceted search...") + response = await client.get( + f"{BASE_URL}/api/search", + params={ + "q": "*:*", + "facet": "true", + "facet_field": ["doc_type", "tags", "category", "status"] + } + ) + results = response.json() + print(f"Facets: {json.dumps(results['facets'], indent=2)}") + + # Test autocomplete/suggest + print("\n6. Testing autocomplete...") + response = await client.get( + f"{BASE_URL}/api/search/suggest", + params={ + "q": "micro", + "field": "title", + "limit": 5 + } + ) + suggestions = response.json() + print(f"Suggestions for 'micro': {suggestions['suggestions']}") + + # Test similar documents + print("\n7. Testing similar documents...") + response = await client.get(f"{BASE_URL}/api/search/similar/content_test_001") + if response.status_code == 200: + similar = response.json() + print(f"Found {similar['count']} similar documents") + else: + print(f"Similar search: {response.status_code}") + + # Test search with highlighting + print("\n8. Testing search with highlighting...") + response = await client.get( + f"{BASE_URL}/api/search", + params={"q": "Python"} + ) + results = response.json() + if results['highlighting']: + print(f"Highlighting results: {len(results['highlighting'])} documents highlighted") + + # Test search statistics + print("\n9. Testing search statistics...") + response = await client.get(f"{BASE_URL}/api/search/stats") + if response.status_code == 200: + stats = response.json() + print(f"Index stats: {stats['statistics']}") + + # Test complex query + print("\n10. Testing complex query...") + response = await client.get( + f"{BASE_URL}/api/search", + params={ + "q": "architecture OR python", + "doc_type": "content", + "sort": "created_at desc", + "rows": 10 + } + ) + results = response.json() + print(f"Complex query: Found {results['total']} results") + + # Test delete document + print("\n11. Testing document deletion...") + response = await client.delete(f"{BASE_URL}/api/search/document/content_test_002") + if response.status_code == 200: + print(f"Deleted document: {response.json()}") + + # Verify deletion + await asyncio.sleep(1) + response = await client.get( + f"{BASE_URL}/api/search", + params={"q": "id:content_test_002"} + ) + results = response.json() + print(f"Verify deletion: Found {results['total']} results (should be 0)") + +async def test_performance(): + """Test search performance""" + print("\n\n⚡ Testing Search Performance...") + + async with httpx.AsyncClient(timeout=30.0) as client: + # Index many documents + print("Indexing 100 test documents...") + docs = [] + for i in range(100): + docs.append({ + "id": f"perf_test_{i}", + "doc_type": "content", + "title": f"Test Document {i}", + "content": f"This is test content for document {i} with various keywords like search, Solr, Python, microservices", + "tags": [f"tag{i%10}", f"category{i%5}"], + "created_at": datetime.utcnow().isoformat() + }) + + response = await client.post(f"{BASE_URL}/api/search/bulk-index", json=docs) + print(f"Indexed {response.json().get('count', 0)} documents") + + # Wait for indexing + await asyncio.sleep(2) + + # Test search speed + print("\nTesting search response times...") + import time + + queries = ["search", "Python", "document", "test", "microservices"] + for query in queries: + start = time.time() + response = await client.get( + f"{BASE_URL}/api/search", + params={"q": query, "rows": 20} + ) + elapsed = time.time() - start + results = response.json() + print(f"Query '{query}': {results['total']} results in {elapsed:.3f}s") + +async def test_reindex(): + """Test reindexing from MongoDB""" + print("\n\n🔄 Testing Reindex Functionality...") + + async with httpx.AsyncClient() as client: + # Trigger reindex for users collection + print("Triggering reindex for users collection...") + response = await client.post( + f"{BASE_URL}/api/search/reindex/users", + params={"doc_type": "user"} + ) + if response.status_code == 200: + print(f"Reindex started: {response.json()}") + else: + print(f"Reindex failed: {response.status_code}") + + # Test index optimization + print("\nTesting index optimization...") + response = await client.post(f"{BASE_URL}/api/search/optimize") + if response.status_code == 200: + print(f"Optimization: {response.json()}") + +async def main(): + """Run all tests""" + print("=" * 60) + print("SEARCH SERVICE TEST SUITE (Apache Solr)") + print("=" * 60) + print(f"Started at: {datetime.now().isoformat()}") + + # Run tests + await test_search_api() + await test_performance() + await test_reindex() + + print("\n" + "=" * 60) + print("✅ All search tests completed!") + print(f"Finished at: {datetime.now().isoformat()}") + print("=" * 60) + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/services/search/solr-config/conf/managed-schema.xml b/services/search/solr-config/conf/managed-schema.xml new file mode 100644 index 0000000..e3a02ef --- /dev/null +++ b/services/search/solr-config/conf/managed-schema.xml @@ -0,0 +1,105 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + \ No newline at end of file diff --git a/services/search/solr-config/conf/solrconfig.xml b/services/search/solr-config/conf/solrconfig.xml new file mode 100644 index 0000000..43c3403 --- /dev/null +++ b/services/search/solr-config/conf/solrconfig.xml @@ -0,0 +1,152 @@ + + + 9.4.0 + + + ${solr.data.dir:} + + + + 100 + 1000 + + 10 + 10 + + + + + + + ${solr.ulog.dir:} + ${solr.ulog.numVersionBuckets:65536} + + + ${solr.autoCommit.maxTime:15000} + false + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + 1024 + + + + true + 20 + 200 + + + + + + + + + + + + + + explicit + 10 + content + OR + edismax + + title^3.0 name^2.5 content^2.0 description^1.5 summary^1.5 + filename^1.5 tags^1.2 category username email bio + + + title^4.0 name^3.0 content^2.5 description^2.0 + + 2<-25% + true + title,content,description,summary + <mark> + </mark> + true + 1 + + + + + + + + + + true + + + + + + + solrpingquery + + + all + + + + + + + true + 10 + suggest + + + suggest + + + + + + text_general + + default + content + solr.DirectSolrSpellChecker + internal + 0.5 + 2 + 1 + 5 + 4 + 0.01 + + + + + + + suggest + FuzzyLookupFactory + DocumentDictionaryFactory + suggest + text_suggest + false + + + + + + + title,content,description,tags + 1 + 1 + 10 + + + + + + + \ No newline at end of file diff --git a/services/search/solr-config/conf/stopwords.txt b/services/search/solr-config/conf/stopwords.txt new file mode 100644 index 0000000..3f50366 --- /dev/null +++ b/services/search/solr-config/conf/stopwords.txt @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) +# Standard English stop words +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with \ No newline at end of file diff --git a/services/search/solr-config/conf/synonyms.txt b/services/search/solr-config/conf/synonyms.txt new file mode 100644 index 0000000..c167f27 --- /dev/null +++ b/services/search/solr-config/conf/synonyms.txt @@ -0,0 +1,38 @@ +# Synonyms for site11 search +# Format: term1, term2, term3 => all are synonyms +# Or: term1, term2 => term1 is replaced by term2 + +# Technology synonyms +javascript, js +typescript, ts +python, py +golang, go +database, db +kubernetes, k8s +docker, container, containerization + +# Common terms +search, find, query, lookup +upload, import, add +download, export, get +delete, remove, erase +update, modify, edit, change +create, make, new, add + +# File related +document, doc, file +image, picture, photo, img +video, movie, clip +audio, sound, music + +# User related +user, member, account +admin, administrator, moderator +profile, account, user + +# Status +active, enabled, live +inactive, disabled, offline +pending, waiting, processing +complete, done, finished +error, failed, failure \ No newline at end of file diff --git a/services/statistics/backend/Dockerfile b/services/statistics/backend/Dockerfile new file mode 100644 index 0000000..2515968 --- /dev/null +++ b/services/statistics/backend/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements first for better caching +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . . + +# Expose port +EXPOSE 8000 + +# Run the application +CMD ["python", "main.py"] \ No newline at end of file diff --git a/services/statistics/backend/aggregator.py b/services/statistics/backend/aggregator.py new file mode 100644 index 0000000..df20f60 --- /dev/null +++ b/services/statistics/backend/aggregator.py @@ -0,0 +1,617 @@ +""" +Data Aggregator - Performs data aggregation and analytics +""" +import asyncio +import logging +from typing import Dict, Any, List, Optional +from datetime import datetime, timedelta +from models import ( + AggregatedMetric, AggregationType, Granularity, + UserAnalytics, SystemAnalytics, EventAnalytics, + AlertRule, Alert +) +import uuid +import io +import csv + +logger = logging.getLogger(__name__) + +class DataAggregator: + """Performs data aggregation and analytics operations""" + + def __init__(self, ts_db, cache): + self.ts_db = ts_db + self.cache = cache + self.is_running = False + self.alert_rules = {} + self.active_alerts = {} + self.aggregation_jobs = [] + + async def start_aggregation_jobs(self): + """Start background aggregation jobs""" + self.is_running = True + + # Schedule periodic aggregation jobs + self.aggregation_jobs = [ + asyncio.create_task(self._aggregate_hourly_metrics()), + asyncio.create_task(self._aggregate_daily_metrics()), + asyncio.create_task(self._check_alert_rules()), + asyncio.create_task(self._cleanup_old_data()) + ] + + logger.info("Data aggregation jobs started") + + async def stop(self): + """Stop aggregation jobs""" + self.is_running = False + + # Cancel all jobs + for job in self.aggregation_jobs: + job.cancel() + + # Wait for jobs to complete + await asyncio.gather(*self.aggregation_jobs, return_exceptions=True) + + logger.info("Data aggregation jobs stopped") + + async def _aggregate_hourly_metrics(self): + """Aggregate metrics every hour""" + while self.is_running: + try: + await asyncio.sleep(3600) # Run every hour + + end_time = datetime.now() + start_time = end_time - timedelta(hours=1) + + # Aggregate different metric types + await self._aggregate_metric_type("user.event", start_time, end_time, Granularity.HOUR) + await self._aggregate_metric_type("system.cpu", start_time, end_time, Granularity.HOUR) + await self._aggregate_metric_type("system.memory", start_time, end_time, Granularity.HOUR) + + logger.info("Completed hourly metrics aggregation") + + except Exception as e: + logger.error(f"Error in hourly aggregation: {e}") + + async def _aggregate_daily_metrics(self): + """Aggregate metrics every day""" + while self.is_running: + try: + await asyncio.sleep(86400) # Run every 24 hours + + end_time = datetime.now() + start_time = end_time - timedelta(days=1) + + # Aggregate different metric types + await self._aggregate_metric_type("user.event", start_time, end_time, Granularity.DAY) + await self._aggregate_metric_type("system", start_time, end_time, Granularity.DAY) + + logger.info("Completed daily metrics aggregation") + + except Exception as e: + logger.error(f"Error in daily aggregation: {e}") + + async def _aggregate_metric_type( + self, + metric_prefix: str, + start_time: datetime, + end_time: datetime, + granularity: Granularity + ): + """Aggregate a specific metric type""" + try: + # Query raw metrics + metrics = await self.ts_db.query_metrics( + metric_type=metric_prefix, + start_time=start_time, + end_time=end_time + ) + + if not metrics: + return + + # Calculate aggregations + aggregations = { + AggregationType.AVG: sum(m['value'] for m in metrics) / len(metrics), + AggregationType.SUM: sum(m['value'] for m in metrics), + AggregationType.MIN: min(m['value'] for m in metrics), + AggregationType.MAX: max(m['value'] for m in metrics), + AggregationType.COUNT: len(metrics) + } + + # Store aggregated results + for agg_type, value in aggregations.items(): + aggregated = AggregatedMetric( + metric_name=metric_prefix, + aggregation_type=agg_type, + value=value, + start_time=start_time, + end_time=end_time, + granularity=granularity, + count=len(metrics) + ) + + await self.ts_db.store_aggregated_metric(aggregated) + + except Exception as e: + logger.error(f"Error aggregating {metric_prefix}: {e}") + + async def aggregate_metrics( + self, + metric_type: str, + aggregation: str, + group_by: Optional[str], + start_time: datetime, + end_time: datetime + ) -> Dict[str, Any]: + """Perform custom metric aggregation""" + try: + # Query metrics + metrics = await self.ts_db.query_metrics( + metric_type=metric_type, + start_time=start_time, + end_time=end_time + ) + + if not metrics: + return {"result": 0, "count": 0} + + # Group metrics if requested + if group_by: + grouped = {} + for metric in metrics: + key = metric.get('tags', {}).get(group_by, 'unknown') + if key not in grouped: + grouped[key] = [] + grouped[key].append(metric['value']) + + # Aggregate each group + results = {} + for key, values in grouped.items(): + results[key] = self._calculate_aggregation(values, aggregation) + + return {"grouped_results": results, "count": len(metrics)} + else: + # Single aggregation + values = [m['value'] for m in metrics] + result = self._calculate_aggregation(values, aggregation) + return {"result": result, "count": len(metrics)} + + except Exception as e: + logger.error(f"Error in custom aggregation: {e}") + raise + + def _calculate_aggregation(self, values: List[float], aggregation: str) -> float: + """Calculate aggregation on values""" + if not values: + return 0 + + if aggregation == "avg": + return sum(values) / len(values) + elif aggregation == "sum": + return sum(values) + elif aggregation == "min": + return min(values) + elif aggregation == "max": + return max(values) + elif aggregation == "count": + return len(values) + else: + return 0 + + async def get_overview(self) -> Dict[str, Any]: + """Get analytics overview""" + try: + now = datetime.now() + last_hour = now - timedelta(hours=1) + last_day = now - timedelta(days=1) + last_week = now - timedelta(weeks=1) + + # Get various metrics + hourly_events = await self.ts_db.count_metrics("user.event", last_hour, now) + daily_events = await self.ts_db.count_metrics("user.event", last_day, now) + weekly_events = await self.ts_db.count_metrics("user.event", last_week, now) + + # Get system status + cpu_avg = await self.ts_db.get_average("system.cpu.usage", last_hour, now) + memory_avg = await self.ts_db.get_average("system.memory.usage", last_hour, now) + + # Get active users (approximate from events) + active_users = await self.ts_db.count_distinct_tags("user.event", "user_id", last_day, now) + + return { + "events": { + "last_hour": hourly_events, + "last_day": daily_events, + "last_week": weekly_events + }, + "system": { + "cpu_usage": cpu_avg, + "memory_usage": memory_avg + }, + "users": { + "active_daily": active_users + }, + "alerts": { + "active": len(self.active_alerts) + }, + "timestamp": now.isoformat() + } + + except Exception as e: + logger.error(f"Error getting overview: {e}") + return {} + + async def get_user_analytics( + self, + start_date: datetime, + end_date: datetime, + granularity: str + ) -> UserAnalytics: + """Get user analytics""" + try: + # Get user metrics + total_users = await self.ts_db.count_distinct_tags( + "user.event.user_created", + "user_id", + datetime.min, + end_date + ) + + active_users = await self.ts_db.count_distinct_tags( + "user.event", + "user_id", + start_date, + end_date + ) + + new_users = await self.ts_db.count_metrics( + "user.event.user_created", + start_date, + end_date + ) + + # Calculate growth rate + prev_period_start = start_date - (end_date - start_date) + prev_users = await self.ts_db.count_distinct_tags( + "user.event", + "user_id", + prev_period_start, + start_date + ) + + growth_rate = ((active_users - prev_users) / max(prev_users, 1)) * 100 + + # Get top actions + top_actions = await self.ts_db.get_top_metrics( + "user.event", + "event_type", + start_date, + end_date, + limit=10 + ) + + return UserAnalytics( + total_users=total_users, + active_users=active_users, + new_users=new_users, + user_growth_rate=growth_rate, + average_session_duration=0, # Would need session tracking + top_actions=top_actions, + user_distribution={}, # Would need geographic data + period=f"{start_date.date()} to {end_date.date()}" + ) + + except Exception as e: + logger.error(f"Error getting user analytics: {e}") + raise + + async def get_system_analytics(self) -> SystemAnalytics: + """Get system performance analytics""" + try: + now = datetime.now() + last_hour = now - timedelta(hours=1) + last_day = now - timedelta(days=1) + + # Calculate uptime (simplified - would need actual downtime tracking) + total_checks = await self.ts_db.count_metrics("system.health", last_day, now) + successful_checks = await self.ts_db.count_metrics_with_value( + "system.health", + 1, + last_day, + now + ) + uptime = (successful_checks / max(total_checks, 1)) * 100 + + # Get averages + cpu_usage = await self.ts_db.get_average("system.cpu.usage", last_hour, now) + memory_usage = await self.ts_db.get_average("system.memory.usage", last_hour, now) + disk_usage = await self.ts_db.get_average("system.disk.usage", last_hour, now) + response_time = await self.ts_db.get_average("api.response_time", last_hour, now) + + # Get error rate + total_requests = await self.ts_db.count_metrics("api.request", last_hour, now) + error_requests = await self.ts_db.count_metrics("api.error", last_hour, now) + error_rate = (error_requests / max(total_requests, 1)) * 100 + + # Throughput + throughput = total_requests / 3600 # requests per second + + return SystemAnalytics( + uptime_percentage=uptime, + average_response_time=response_time or 0, + error_rate=error_rate, + throughput=throughput, + cpu_usage=cpu_usage or 0, + memory_usage=memory_usage or 0, + disk_usage=disk_usage or 0, + active_connections=0, # Would need connection tracking + services_health={} # Would need service health checks + ) + + except Exception as e: + logger.error(f"Error getting system analytics: {e}") + raise + + async def get_event_analytics( + self, + event_type: Optional[str], + limit: int + ) -> EventAnalytics: + """Get event analytics""" + try: + now = datetime.now() + last_hour = now - timedelta(hours=1) + + # Get total events + total_events = await self.ts_db.count_metrics( + event_type or "user.event", + last_hour, + now + ) + + # Events per second + events_per_second = total_events / 3600 + + # Get event types distribution + event_types = await self.ts_db.get_metric_distribution( + "user.event", + "event_type", + last_hour, + now + ) + + # Top events + top_events = await self.ts_db.get_top_metrics( + event_type or "user.event", + "event_type", + last_hour, + now, + limit=limit + ) + + # Error events + error_events = await self.ts_db.count_metrics( + "user.event.error", + last_hour, + now + ) + + # Success rate + success_rate = ((total_events - error_events) / max(total_events, 1)) * 100 + + return EventAnalytics( + total_events=total_events, + events_per_second=events_per_second, + event_types=event_types, + top_events=top_events, + error_events=error_events, + success_rate=success_rate, + processing_time={} # Would need timing metrics + ) + + except Exception as e: + logger.error(f"Error getting event analytics: {e}") + raise + + async def get_dashboard_configs(self) -> List[Dict[str, Any]]: + """Get available dashboard configurations""" + return [ + { + "id": "overview", + "name": "Overview Dashboard", + "description": "General system overview" + }, + { + "id": "users", + "name": "User Analytics", + "description": "User behavior and statistics" + }, + { + "id": "system", + "name": "System Performance", + "description": "System health and performance metrics" + }, + { + "id": "events", + "name": "Event Analytics", + "description": "Event processing and statistics" + } + ] + + async def get_dashboard_data(self, dashboard_id: str) -> Dict[str, Any]: + """Get data for a specific dashboard""" + if dashboard_id == "overview": + return await self.get_overview() + elif dashboard_id == "users": + end_date = datetime.now() + start_date = end_date - timedelta(days=7) + analytics = await self.get_user_analytics(start_date, end_date, "day") + return analytics.dict() + elif dashboard_id == "system": + analytics = await self.get_system_analytics() + return analytics.dict() + elif dashboard_id == "events": + analytics = await self.get_event_analytics(None, 100) + return analytics.dict() + else: + raise ValueError(f"Unknown dashboard: {dashboard_id}") + + async def create_alert_rule(self, rule_data: Dict[str, Any]) -> str: + """Create a new alert rule""" + rule = AlertRule(**rule_data) + rule.id = str(uuid.uuid4()) + self.alert_rules[rule.id] = rule + + # Store in cache + await self.cache.set( + f"alert_rule:{rule.id}", + rule.json(), + expire=None # Permanent + ) + + return rule.id + + async def _check_alert_rules(self): + """Check alert rules periodically""" + while self.is_running: + try: + await asyncio.sleep(60) # Check every minute + + for rule_id, rule in self.alert_rules.items(): + if not rule.enabled: + continue + + await self._evaluate_alert_rule(rule) + + except Exception as e: + logger.error(f"Error checking alert rules: {e}") + + async def _evaluate_alert_rule(self, rule: AlertRule): + """Evaluate a single alert rule""" + try: + # Get recent metric values + end_time = datetime.now() + start_time = end_time - timedelta(seconds=rule.duration) + + avg_value = await self.ts_db.get_average( + rule.metric_name, + start_time, + end_time + ) + + if avg_value is None: + return + + # Check condition + triggered = False + if rule.condition == "gt" and avg_value > rule.threshold: + triggered = True + elif rule.condition == "lt" and avg_value < rule.threshold: + triggered = True + elif rule.condition == "gte" and avg_value >= rule.threshold: + triggered = True + elif rule.condition == "lte" and avg_value <= rule.threshold: + triggered = True + elif rule.condition == "eq" and avg_value == rule.threshold: + triggered = True + elif rule.condition == "neq" and avg_value != rule.threshold: + triggered = True + + # Handle alert state + alert_key = f"{rule.id}:{rule.metric_name}" + + if triggered: + if alert_key not in self.active_alerts: + # New alert + alert = Alert( + id=str(uuid.uuid4()), + rule_id=rule.id, + rule_name=rule.name, + metric_name=rule.metric_name, + current_value=avg_value, + threshold=rule.threshold, + severity=rule.severity, + triggered_at=datetime.now(), + status="active" + ) + self.active_alerts[alert_key] = alert + + # Send notifications + await self._send_alert_notifications(alert, rule) + + else: + if alert_key in self.active_alerts: + # Alert resolved + alert = self.active_alerts[alert_key] + alert.resolved_at = datetime.now() + alert.status = "resolved" + del self.active_alerts[alert_key] + + logger.info(f"Alert resolved: {rule.name}") + + except Exception as e: + logger.error(f"Error evaluating alert rule {rule.id}: {e}") + + async def _send_alert_notifications(self, alert: Alert, rule: AlertRule): + """Send alert notifications""" + logger.warning(f"ALERT: {rule.name} - {alert.metric_name} = {alert.current_value} (threshold: {alert.threshold})") + # Would implement actual notification channels here + + async def get_active_alerts(self) -> List[Dict[str, Any]]: + """Get currently active alerts""" + return [alert.dict() for alert in self.active_alerts.values()] + + async def export_to_csv( + self, + metric_type: str, + start_time: datetime, + end_time: datetime + ): + """Export metrics to CSV""" + try: + # Get metrics + metrics = await self.ts_db.query_metrics( + metric_type=metric_type, + start_time=start_time, + end_time=end_time + ) + + # Create CSV + output = io.StringIO() + writer = csv.DictWriter( + output, + fieldnames=['timestamp', 'metric_name', 'value', 'tags', 'service'] + ) + writer.writeheader() + + for metric in metrics: + writer.writerow({ + 'timestamp': metric.get('timestamp'), + 'metric_name': metric.get('name'), + 'value': metric.get('value'), + 'tags': str(metric.get('tags', {})), + 'service': metric.get('service') + }) + + output.seek(0) + return output + + except Exception as e: + logger.error(f"Error exporting to CSV: {e}") + raise + + async def _cleanup_old_data(self): + """Clean up old data periodically""" + while self.is_running: + try: + await asyncio.sleep(86400) # Run daily + + # Delete data older than 30 days + cutoff_date = datetime.now() - timedelta(days=30) + await self.ts_db.delete_old_data(cutoff_date) + + logger.info("Completed old data cleanup") + + except Exception as e: + logger.error(f"Error in data cleanup: {e}") \ No newline at end of file diff --git a/services/statistics/backend/cache_manager.py b/services/statistics/backend/cache_manager.py new file mode 100644 index 0000000..51d5981 --- /dev/null +++ b/services/statistics/backend/cache_manager.py @@ -0,0 +1,32 @@ +"""Cache Manager for Redis""" +import json +import logging +from typing import Optional, Any + +logger = logging.getLogger(__name__) + +class CacheManager: + """Redis cache manager""" + + def __init__(self, redis_url: str): + self.redis_url = redis_url + self.is_connected = False + self.cache = {} # Simplified in-memory cache + + async def connect(self): + """Connect to Redis""" + self.is_connected = True + logger.info("Connected to cache") + + async def close(self): + """Close Redis connection""" + self.is_connected = False + logger.info("Disconnected from cache") + + async def get(self, key: str) -> Optional[str]: + """Get value from cache""" + return self.cache.get(key) + + async def set(self, key: str, value: str, expire: Optional[int] = None): + """Set value in cache""" + self.cache[key] = value \ No newline at end of file diff --git a/services/statistics/backend/main.py b/services/statistics/backend/main.py new file mode 100644 index 0000000..f1a3c53 --- /dev/null +++ b/services/statistics/backend/main.py @@ -0,0 +1,396 @@ +""" +Statistics Service - Real-time Analytics and Metrics +""" +from fastapi import FastAPI, HTTPException, Depends, Query +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import StreamingResponse +import uvicorn +from datetime import datetime, timedelta +from typing import Optional, List, Dict, Any +import asyncio +import json +import os +from contextlib import asynccontextmanager +import logging + +# Import custom modules +from models import Metric, AggregatedMetric, TimeSeriesData, DashboardConfig +from metrics_collector import MetricsCollector +from aggregator import DataAggregator +from websocket_manager import WebSocketManager +from time_series_db import TimeSeriesDB +from cache_manager import CacheManager + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Global instances +metrics_collector = None +data_aggregator = None +ws_manager = None +ts_db = None +cache_manager = None + +@asynccontextmanager +async def lifespan(app: FastAPI): + # Startup + global metrics_collector, data_aggregator, ws_manager, ts_db, cache_manager + + try: + # Initialize TimeSeriesDB (using InfluxDB) + ts_db = TimeSeriesDB( + host=os.getenv("INFLUXDB_HOST", "influxdb"), + port=int(os.getenv("INFLUXDB_PORT", 8086)), + database=os.getenv("INFLUXDB_DATABASE", "statistics") + ) + await ts_db.connect() + logger.info("Connected to InfluxDB") + + # Initialize Cache Manager + cache_manager = CacheManager( + redis_url=os.getenv("REDIS_URL", "redis://redis:6379") + ) + await cache_manager.connect() + logger.info("Connected to Redis cache") + + # Initialize Metrics Collector (optional Kafka connection) + try: + metrics_collector = MetricsCollector( + kafka_bootstrap_servers=os.getenv("KAFKA_BOOTSTRAP_SERVERS", "kafka:9092"), + ts_db=ts_db, + cache=cache_manager + ) + await metrics_collector.start() + logger.info("Metrics collector started") + except Exception as e: + logger.warning(f"Metrics collector failed to start (Kafka not available): {e}") + metrics_collector = None + + # Initialize Data Aggregator + data_aggregator = DataAggregator( + ts_db=ts_db, + cache=cache_manager + ) + asyncio.create_task(data_aggregator.start_aggregation_jobs()) + logger.info("Data aggregator started") + + # Initialize WebSocket Manager + ws_manager = WebSocketManager() + logger.info("WebSocket manager initialized") + + except Exception as e: + logger.error(f"Failed to start Statistics service: {e}") + raise + + yield + + # Shutdown + if metrics_collector: + await metrics_collector.stop() + if data_aggregator: + await data_aggregator.stop() + if ts_db: + await ts_db.close() + if cache_manager: + await cache_manager.close() + + logger.info("Statistics service shutdown complete") + +app = FastAPI( + title="Statistics Service", + description="Real-time Analytics and Metrics Service", + version="1.0.0", + lifespan=lifespan +) + +# CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +@app.get("/") +async def root(): + return { + "service": "Statistics Service", + "status": "running", + "timestamp": datetime.now().isoformat() + } + +@app.get("/health") +async def health_check(): + return { + "status": "healthy", + "service": "statistics", + "components": { + "influxdb": "connected" if ts_db and ts_db.is_connected else "disconnected", + "redis": "connected" if cache_manager and cache_manager.is_connected else "disconnected", + "metrics_collector": "running" if metrics_collector and metrics_collector.is_running else "stopped", + "aggregator": "running" if data_aggregator and data_aggregator.is_running else "stopped" + }, + "timestamp": datetime.now().isoformat() + } + +# Metrics Endpoints +@app.post("/api/metrics") +async def record_metric(metric: Metric): + """Record a single metric""" + try: + await metrics_collector.record_metric(metric) + return {"status": "recorded", "metric_id": metric.id} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/metrics/batch") +async def record_metrics_batch(metrics: List[Metric]): + """Record multiple metrics in batch""" + try: + await metrics_collector.record_metrics_batch(metrics) + return {"status": "recorded", "count": len(metrics)} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/metrics/realtime/{metric_type}") +async def get_realtime_metrics( + metric_type: str, + duration: int = Query(60, description="Duration in seconds") +): + """Get real-time metrics for the specified type""" + try: + end_time = datetime.now() + start_time = end_time - timedelta(seconds=duration) + + metrics = await ts_db.query_metrics( + metric_type=metric_type, + start_time=start_time, + end_time=end_time + ) + + return { + "metric_type": metric_type, + "duration": duration, + "data": metrics, + "timestamp": datetime.now().isoformat() + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +# Analytics Endpoints +@app.get("/api/analytics/overview") +async def get_analytics_overview(): + """Get overall analytics overview""" + try: + # Try to get from cache first + cached = await cache_manager.get("analytics:overview") + if cached: + return json.loads(cached) + + # Calculate analytics + overview = await data_aggregator.get_overview() + + # Cache for 1 minute + await cache_manager.set( + "analytics:overview", + json.dumps(overview), + expire=60 + ) + + return overview + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/analytics/users") +async def get_user_analytics( + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + granularity: str = Query("hour", regex="^(minute|hour|day|week|month)$") +): + """Get user analytics""" + try: + if not start_date: + start_date = datetime.now() - timedelta(days=7) + if not end_date: + end_date = datetime.now() + + analytics = await data_aggregator.get_user_analytics( + start_date=start_date, + end_date=end_date, + granularity=granularity + ) + + return analytics + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/analytics/system") +async def get_system_analytics(): + """Get system performance analytics""" + try: + analytics = await data_aggregator.get_system_analytics() + return analytics + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/analytics/events") +async def get_event_analytics( + event_type: Optional[str] = None, + limit: int = Query(100, le=1000) +): + """Get event analytics""" + try: + analytics = await data_aggregator.get_event_analytics( + event_type=event_type, + limit=limit + ) + return analytics + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +# Time Series Endpoints +@app.get("/api/timeseries/{metric_name}") +async def get_time_series( + metric_name: str, + start_time: datetime, + end_time: datetime, + interval: str = Query("1m", regex="^\\d+[smhd]$") +): + """Get time series data for a specific metric""" + try: + data = await ts_db.get_time_series( + metric_name=metric_name, + start_time=start_time, + end_time=end_time, + interval=interval + ) + + return TimeSeriesData( + metric_name=metric_name, + start_time=start_time, + end_time=end_time, + interval=interval, + data=data + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +# Aggregation Endpoints +@app.get("/api/aggregates/{metric_type}") +async def get_aggregated_metrics( + metric_type: str, + aggregation: str = Query("avg", regex="^(avg|sum|min|max|count)$"), + group_by: Optional[str] = None, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None +): + """Get aggregated metrics""" + try: + if not start_time: + start_time = datetime.now() - timedelta(hours=24) + if not end_time: + end_time = datetime.now() + + result = await data_aggregator.aggregate_metrics( + metric_type=metric_type, + aggregation=aggregation, + group_by=group_by, + start_time=start_time, + end_time=end_time + ) + + return result + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +# Dashboard Endpoints +@app.get("/api/dashboard/configs") +async def get_dashboard_configs(): + """Get available dashboard configurations""" + configs = await data_aggregator.get_dashboard_configs() + return {"configs": configs} + +@app.get("/api/dashboard/{dashboard_id}") +async def get_dashboard_data(dashboard_id: str): + """Get data for a specific dashboard""" + try: + data = await data_aggregator.get_dashboard_data(dashboard_id) + return data + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +# WebSocket Endpoint for Real-time Updates +from fastapi import WebSocket, WebSocketDisconnect + +@app.websocket("/ws/metrics") +async def websocket_metrics(websocket: WebSocket): + """WebSocket endpoint for real-time metrics streaming""" + await ws_manager.connect(websocket) + try: + while True: + # Send metrics updates every second + metrics = await metrics_collector.get_latest_metrics() + await websocket.send_json({ + "type": "metrics_update", + "data": metrics, + "timestamp": datetime.now().isoformat() + }) + await asyncio.sleep(1) + except WebSocketDisconnect: + ws_manager.disconnect(websocket) + except Exception as e: + logger.error(f"WebSocket error: {e}") + ws_manager.disconnect(websocket) + +# Alert Management Endpoints +@app.post("/api/alerts/rules") +async def create_alert_rule(rule: Dict[str, Any]): + """Create a new alert rule""" + try: + rule_id = await data_aggregator.create_alert_rule(rule) + return {"rule_id": rule_id, "status": "created"} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/alerts/active") +async def get_active_alerts(): + """Get currently active alerts""" + try: + alerts = await data_aggregator.get_active_alerts() + return {"alerts": alerts, "count": len(alerts)} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +# Export Endpoints +@app.get("/api/export/csv") +async def export_metrics_csv( + metric_type: str, + start_time: datetime, + end_time: datetime +): + """Export metrics as CSV""" + try: + csv_data = await data_aggregator.export_to_csv( + metric_type=metric_type, + start_time=start_time, + end_time=end_time + ) + + return StreamingResponse( + csv_data, + media_type="text/csv", + headers={ + "Content-Disposition": f"attachment; filename=metrics_{metric_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" + } + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +if __name__ == "__main__": + uvicorn.run( + "main:app", + host="0.0.0.0", + port=8000, + reload=True + ) \ No newline at end of file diff --git a/services/statistics/backend/metrics_collector.py b/services/statistics/backend/metrics_collector.py new file mode 100644 index 0000000..7a86cc4 --- /dev/null +++ b/services/statistics/backend/metrics_collector.py @@ -0,0 +1,242 @@ +""" +Metrics Collector - Collects metrics from Kafka and other sources +""" +import asyncio +import json +import logging +from typing import List, Dict, Any, Optional +from datetime import datetime +from aiokafka import AIOKafkaConsumer +from models import Metric, MetricType +import uuid + +logger = logging.getLogger(__name__) + +class MetricsCollector: + """Collects and processes metrics from various sources""" + + def __init__(self, kafka_bootstrap_servers: str, ts_db, cache): + self.kafka_servers = kafka_bootstrap_servers + self.ts_db = ts_db + self.cache = cache + self.consumer = None + self.is_running = False + self.latest_metrics = {} + self.metrics_buffer = [] + self.buffer_size = 100 + self.flush_interval = 5 # seconds + + async def start(self): + """Start the metrics collector""" + try: + # Start Kafka consumer for event metrics + self.consumer = AIOKafkaConsumer( + 'metrics-events', + 'user-events', + 'system-metrics', + bootstrap_servers=self.kafka_servers, + group_id='statistics-consumer-group', + value_deserializer=lambda m: json.loads(m.decode('utf-8')) + ) + await self.consumer.start() + self.is_running = True + + # Start background tasks + asyncio.create_task(self._consume_metrics()) + asyncio.create_task(self._flush_metrics_periodically()) + + logger.info("Metrics collector started") + except Exception as e: + logger.error(f"Failed to start metrics collector: {e}") + raise + + async def stop(self): + """Stop the metrics collector""" + self.is_running = False + if self.consumer: + await self.consumer.stop() + + # Flush remaining metrics + if self.metrics_buffer: + await self._flush_metrics() + + logger.info("Metrics collector stopped") + + async def _consume_metrics(self): + """Consume metrics from Kafka""" + while self.is_running: + try: + async for msg in self.consumer: + if not self.is_running: + break + + metric = self._parse_kafka_message(msg) + if metric: + await self.record_metric(metric) + + except Exception as e: + logger.error(f"Error consuming metrics: {e}") + await asyncio.sleep(5) + + def _parse_kafka_message(self, msg) -> Optional[Metric]: + """Parse Kafka message into Metric""" + try: + data = msg.value + topic = msg.topic + + # Create metric based on topic + if topic == 'user-events': + return self._create_user_metric(data) + elif topic == 'system-metrics': + return self._create_system_metric(data) + elif topic == 'metrics-events': + return Metric(**data) + else: + return None + + except Exception as e: + logger.error(f"Failed to parse Kafka message: {e}") + return None + + def _create_user_metric(self, data: Dict) -> Metric: + """Create metric from user event""" + event_type = data.get('event_type', 'unknown') + + return Metric( + id=str(uuid.uuid4()), + name=f"user.event.{event_type.lower()}", + type=MetricType.COUNTER, + value=1, + tags={ + "event_type": event_type, + "user_id": data.get('data', {}).get('user_id', 'unknown'), + "service": data.get('service', 'unknown') + }, + timestamp=datetime.fromisoformat(data.get('timestamp', datetime.now().isoformat())), + service=data.get('service', 'users') + ) + + def _create_system_metric(self, data: Dict) -> Metric: + """Create metric from system event""" + return Metric( + id=str(uuid.uuid4()), + name=data.get('metric_name', 'system.unknown'), + type=MetricType.GAUGE, + value=float(data.get('value', 0)), + tags=data.get('tags', {}), + timestamp=datetime.fromisoformat(data.get('timestamp', datetime.now().isoformat())), + service=data.get('service', 'system') + ) + + async def record_metric(self, metric: Metric): + """Record a single metric""" + try: + # Add to buffer + self.metrics_buffer.append(metric) + + # Update latest metrics cache + self.latest_metrics[metric.name] = { + "value": metric.value, + "timestamp": metric.timestamp.isoformat(), + "tags": metric.tags + } + + # Flush if buffer is full + if len(self.metrics_buffer) >= self.buffer_size: + await self._flush_metrics() + + except Exception as e: + logger.error(f"Failed to record metric: {e}") + raise + + async def record_metrics_batch(self, metrics: List[Metric]): + """Record multiple metrics""" + for metric in metrics: + await self.record_metric(metric) + + async def _flush_metrics(self): + """Flush metrics buffer to time series database""" + if not self.metrics_buffer: + return + + try: + # Write to time series database + await self.ts_db.write_metrics(self.metrics_buffer) + + # Clear buffer + self.metrics_buffer.clear() + + logger.debug(f"Flushed {len(self.metrics_buffer)} metrics to database") + + except Exception as e: + logger.error(f"Failed to flush metrics: {e}") + + async def _flush_metrics_periodically(self): + """Periodically flush metrics buffer""" + while self.is_running: + await asyncio.sleep(self.flush_interval) + await self._flush_metrics() + + async def get_latest_metrics(self) -> Dict[str, Any]: + """Get latest metrics for real-time display""" + return self.latest_metrics + + async def collect_system_metrics(self): + """Collect system-level metrics""" + import psutil + + try: + # CPU metrics + cpu_percent = psutil.cpu_percent(interval=1) + await self.record_metric(Metric( + name="system.cpu.usage", + type=MetricType.GAUGE, + value=cpu_percent, + tags={"host": "localhost"}, + service="statistics" + )) + + # Memory metrics + memory = psutil.virtual_memory() + await self.record_metric(Metric( + name="system.memory.usage", + type=MetricType.GAUGE, + value=memory.percent, + tags={"host": "localhost"}, + service="statistics" + )) + + # Disk metrics + disk = psutil.disk_usage('/') + await self.record_metric(Metric( + name="system.disk.usage", + type=MetricType.GAUGE, + value=disk.percent, + tags={"host": "localhost", "mount": "/"}, + service="statistics" + )) + + # Network metrics + net_io = psutil.net_io_counters() + await self.record_metric(Metric( + name="system.network.bytes_sent", + type=MetricType.COUNTER, + value=net_io.bytes_sent, + tags={"host": "localhost"}, + service="statistics" + )) + await self.record_metric(Metric( + name="system.network.bytes_recv", + type=MetricType.COUNTER, + value=net_io.bytes_recv, + tags={"host": "localhost"}, + service="statistics" + )) + + except Exception as e: + logger.error(f"Failed to collect system metrics: {e}") + + async def collect_application_metrics(self): + """Collect application-level metrics""" + # This would be called by other services to report their metrics + pass \ No newline at end of file diff --git a/services/statistics/backend/models.py b/services/statistics/backend/models.py new file mode 100644 index 0000000..a772e69 --- /dev/null +++ b/services/statistics/backend/models.py @@ -0,0 +1,159 @@ +""" +Data models for Statistics Service +""" +from pydantic import BaseModel, Field +from datetime import datetime +from typing import Optional, List, Dict, Any, Literal +from enum import Enum + +class MetricType(str, Enum): + """Types of metrics""" + COUNTER = "counter" + GAUGE = "gauge" + HISTOGRAM = "histogram" + SUMMARY = "summary" + +class AggregationType(str, Enum): + """Types of aggregation""" + AVG = "avg" + SUM = "sum" + MIN = "min" + MAX = "max" + COUNT = "count" + PERCENTILE = "percentile" + +class Granularity(str, Enum): + """Time granularity for aggregation""" + MINUTE = "minute" + HOUR = "hour" + DAY = "day" + WEEK = "week" + MONTH = "month" + +class Metric(BaseModel): + """Single metric data point""" + id: Optional[str] = Field(None, description="Unique metric ID") + name: str = Field(..., description="Metric name") + type: MetricType = Field(..., description="Metric type") + value: float = Field(..., description="Metric value") + tags: Dict[str, str] = Field(default_factory=dict, description="Metric tags") + timestamp: datetime = Field(default_factory=datetime.now, description="Metric timestamp") + service: str = Field(..., description="Source service") + + class Config: + json_encoders = { + datetime: lambda v: v.isoformat() + } + +class AggregatedMetric(BaseModel): + """Aggregated metric result""" + metric_name: str + aggregation_type: AggregationType + value: float + start_time: datetime + end_time: datetime + granularity: Optional[Granularity] = None + group_by: Optional[str] = None + count: int = Field(..., description="Number of data points aggregated") + + class Config: + json_encoders = { + datetime: lambda v: v.isoformat() + } + +class TimeSeriesData(BaseModel): + """Time series data response""" + metric_name: str + start_time: datetime + end_time: datetime + interval: str + data: List[Dict[str, Any]] + + class Config: + json_encoders = { + datetime: lambda v: v.isoformat() + } + +class DashboardConfig(BaseModel): + """Dashboard configuration""" + id: str + name: str + description: Optional[str] = None + widgets: List[Dict[str, Any]] + refresh_interval: int = Field(60, description="Refresh interval in seconds") + created_at: datetime = Field(default_factory=datetime.now) + updated_at: datetime = Field(default_factory=datetime.now) + + class Config: + json_encoders = { + datetime: lambda v: v.isoformat() + } + +class AlertRule(BaseModel): + """Alert rule configuration""" + id: Optional[str] = None + name: str + metric_name: str + condition: Literal["gt", "lt", "gte", "lte", "eq", "neq"] + threshold: float + duration: int = Field(..., description="Duration in seconds") + severity: Literal["low", "medium", "high", "critical"] + enabled: bool = True + notification_channels: List[str] = Field(default_factory=list) + created_at: datetime = Field(default_factory=datetime.now) + + class Config: + json_encoders = { + datetime: lambda v: v.isoformat() + } + +class Alert(BaseModel): + """Active alert""" + id: str + rule_id: str + rule_name: str + metric_name: str + current_value: float + threshold: float + severity: str + triggered_at: datetime + resolved_at: Optional[datetime] = None + status: Literal["active", "resolved", "acknowledged"] + + class Config: + json_encoders = { + datetime: lambda v: v.isoformat() + } + +class UserAnalytics(BaseModel): + """User analytics data""" + total_users: int + active_users: int + new_users: int + user_growth_rate: float + average_session_duration: float + top_actions: List[Dict[str, Any]] + user_distribution: Dict[str, int] + period: str + +class SystemAnalytics(BaseModel): + """System performance analytics""" + uptime_percentage: float + average_response_time: float + error_rate: float + throughput: float + cpu_usage: float + memory_usage: float + disk_usage: float + active_connections: int + services_health: Dict[str, str] + +class EventAnalytics(BaseModel): + """Event analytics data""" + total_events: int + events_per_second: float + event_types: Dict[str, int] + top_events: List[Dict[str, Any]] + error_events: int + success_rate: float + processing_time: Dict[str, float] \ No newline at end of file diff --git a/services/statistics/backend/requirements.txt b/services/statistics/backend/requirements.txt new file mode 100644 index 0000000..7f2930c --- /dev/null +++ b/services/statistics/backend/requirements.txt @@ -0,0 +1,9 @@ +fastapi==0.109.0 +uvicorn[standard]==0.27.0 +pydantic==2.5.3 +python-dotenv==1.0.0 +aiokafka==0.10.0 +redis==5.0.1 +psutil==5.9.8 +httpx==0.26.0 +websockets==12.0 \ No newline at end of file diff --git a/services/statistics/backend/time_series_db.py b/services/statistics/backend/time_series_db.py new file mode 100644 index 0000000..de3e593 --- /dev/null +++ b/services/statistics/backend/time_series_db.py @@ -0,0 +1,165 @@ +""" +Time Series Database Interface (Simplified for InfluxDB) +""" +import logging +from typing import List, Dict, Any, Optional +from datetime import datetime +from models import Metric, AggregatedMetric + +logger = logging.getLogger(__name__) + +class TimeSeriesDB: + """Time series database interface""" + + def __init__(self, host: str, port: int, database: str): + self.host = host + self.port = port + self.database = database + self.is_connected = False + # In production, would use actual InfluxDB client + self.data_store = [] # Simplified in-memory storage + + async def connect(self): + """Connect to database""" + # Simplified connection + self.is_connected = True + logger.info(f"Connected to time series database at {self.host}:{self.port}") + + async def close(self): + """Close database connection""" + self.is_connected = False + logger.info("Disconnected from time series database") + + async def write_metrics(self, metrics: List[Metric]): + """Write metrics to database""" + for metric in metrics: + self.data_store.append({ + "name": metric.name, + "value": metric.value, + "timestamp": metric.timestamp, + "tags": metric.tags, + "service": metric.service + }) + + async def query_metrics( + self, + metric_type: str, + start_time: datetime, + end_time: datetime + ) -> List[Dict[str, Any]]: + """Query metrics from database""" + results = [] + for data in self.data_store: + if (data["name"].startswith(metric_type) and + start_time <= data["timestamp"] <= end_time): + results.append(data) + return results + + async def get_time_series( + self, + metric_name: str, + start_time: datetime, + end_time: datetime, + interval: str + ) -> List[Dict[str, Any]]: + """Get time series data""" + return await self.query_metrics(metric_name, start_time, end_time) + + async def store_aggregated_metric(self, metric: AggregatedMetric): + """Store aggregated metric""" + self.data_store.append({ + "name": f"agg.{metric.metric_name}", + "value": metric.value, + "timestamp": metric.end_time, + "tags": {"aggregation": metric.aggregation_type}, + "service": "statistics" + }) + + async def count_metrics( + self, + metric_type: str, + start_time: datetime, + end_time: datetime + ) -> int: + """Count metrics""" + metrics = await self.query_metrics(metric_type, start_time, end_time) + return len(metrics) + + async def get_average( + self, + metric_name: str, + start_time: datetime, + end_time: datetime + ) -> Optional[float]: + """Get average value""" + metrics = await self.query_metrics(metric_name, start_time, end_time) + if not metrics: + return None + values = [m["value"] for m in metrics] + return sum(values) / len(values) + + async def count_distinct_tags( + self, + metric_type: str, + tag_name: str, + start_time: datetime, + end_time: datetime + ) -> int: + """Count distinct tag values""" + metrics = await self.query_metrics(metric_type, start_time, end_time) + unique_values = set() + for metric in metrics: + if tag_name in metric.get("tags", {}): + unique_values.add(metric["tags"][tag_name]) + return len(unique_values) + + async def get_top_metrics( + self, + metric_type: str, + group_by: str, + start_time: datetime, + end_time: datetime, + limit: int = 10 + ) -> List[Dict[str, Any]]: + """Get top metrics grouped by tag""" + metrics = await self.query_metrics(metric_type, start_time, end_time) + grouped = {} + for metric in metrics: + key = metric.get("tags", {}).get(group_by, "unknown") + grouped[key] = grouped.get(key, 0) + 1 + + sorted_items = sorted(grouped.items(), key=lambda x: x[1], reverse=True) + return [{"name": k, "count": v} for k, v in sorted_items[:limit]] + + async def count_metrics_with_value( + self, + metric_name: str, + value: float, + start_time: datetime, + end_time: datetime + ) -> int: + """Count metrics with specific value""" + metrics = await self.query_metrics(metric_name, start_time, end_time) + return sum(1 for m in metrics if m["value"] == value) + + async def get_metric_distribution( + self, + metric_type: str, + tag_name: str, + start_time: datetime, + end_time: datetime + ) -> Dict[str, int]: + """Get metric distribution by tag""" + metrics = await self.query_metrics(metric_type, start_time, end_time) + distribution = {} + for metric in metrics: + key = metric.get("tags", {}).get(tag_name, "unknown") + distribution[key] = distribution.get(key, 0) + 1 + return distribution + + async def delete_old_data(self, cutoff_date: datetime): + """Delete old data""" + self.data_store = [ + d for d in self.data_store + if d["timestamp"] >= cutoff_date + ] \ No newline at end of file diff --git a/services/statistics/backend/websocket_manager.py b/services/statistics/backend/websocket_manager.py new file mode 100644 index 0000000..7db10ee --- /dev/null +++ b/services/statistics/backend/websocket_manager.py @@ -0,0 +1,33 @@ +"""WebSocket Manager for real-time updates""" +from typing import List +from fastapi import WebSocket +import logging + +logger = logging.getLogger(__name__) + +class WebSocketManager: + """Manages WebSocket connections""" + + def __init__(self): + self.active_connections: List[WebSocket] = [] + + async def connect(self, websocket: WebSocket): + """Accept WebSocket connection""" + await websocket.accept() + self.active_connections.append(websocket) + logger.info(f"WebSocket connected. Total connections: {len(self.active_connections)}") + + def disconnect(self, websocket: WebSocket): + """Remove WebSocket connection""" + if websocket in self.active_connections: + self.active_connections.remove(websocket) + logger.info(f"WebSocket disconnected. Total connections: {len(self.active_connections)}") + + async def broadcast(self, message: dict): + """Broadcast message to all connected clients""" + for connection in self.active_connections: + try: + await connection.send_json(message) + except Exception as e: + logger.error(f"Error broadcasting to WebSocket: {e}") + self.disconnect(connection) \ No newline at end of file diff --git a/services/users/backend/Dockerfile b/services/users/backend/Dockerfile new file mode 100644 index 0000000..2515968 --- /dev/null +++ b/services/users/backend/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements first for better caching +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . . + +# Expose port +EXPOSE 8000 + +# Run the application +CMD ["python", "main.py"] \ No newline at end of file diff --git a/services/users/backend/database.py b/services/users/backend/database.py new file mode 100644 index 0000000..2409c81 --- /dev/null +++ b/services/users/backend/database.py @@ -0,0 +1,22 @@ +from motor.motor_asyncio import AsyncIOMotorClient +from beanie import init_beanie +import os +from models import User + + +async def init_db(): + """Initialize database connection""" + # Get MongoDB URL from environment + mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017") + db_name = os.getenv("DB_NAME", "users_db") + + # Create Motor client + client = AsyncIOMotorClient(mongodb_url) + + # Initialize beanie with the User model + await init_beanie( + database=client[db_name], + document_models=[User] + ) + + print(f"Connected to MongoDB: {mongodb_url}/{db_name}") \ No newline at end of file diff --git a/services/users/backend/main.py b/services/users/backend/main.py new file mode 100644 index 0000000..6ebcb14 --- /dev/null +++ b/services/users/backend/main.py @@ -0,0 +1,334 @@ +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel +from typing import List, Optional +from datetime import datetime +import uvicorn +import os +import sys +import logging +from contextlib import asynccontextmanager +from database import init_db +from models import User +from beanie import PydanticObjectId + +sys.path.append('/app') +from shared.kafka import KafkaProducer, Event, EventType + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +# Pydantic models for requests +class UserCreate(BaseModel): + username: str + email: str + full_name: Optional[str] = None + profile_picture: Optional[str] = None + bio: Optional[str] = None + location: Optional[str] = None + website: Optional[str] = None + +class UserUpdate(BaseModel): + username: Optional[str] = None + email: Optional[str] = None + full_name: Optional[str] = None + profile_picture: Optional[str] = None + profile_picture_thumbnail: Optional[str] = None + bio: Optional[str] = None + location: Optional[str] = None + website: Optional[str] = None + is_email_verified: Optional[bool] = None + is_active: Optional[bool] = None + +class UserResponse(BaseModel): + id: str + username: str + email: str + full_name: Optional[str] = None + profile_picture: Optional[str] = None + profile_picture_thumbnail: Optional[str] = None + bio: Optional[str] = None + location: Optional[str] = None + website: Optional[str] = None + is_email_verified: bool + is_active: bool + created_at: datetime + updated_at: datetime + +class UserPublicResponse(BaseModel): + """공개 프로필용 응답 (민감한 정보 제외)""" + id: str + username: str + full_name: Optional[str] = None + profile_picture: Optional[str] = None + profile_picture_thumbnail: Optional[str] = None + bio: Optional[str] = None + location: Optional[str] = None + website: Optional[str] = None + created_at: datetime + + +# Global Kafka producer +kafka_producer: Optional[KafkaProducer] = None + +@asynccontextmanager +async def lifespan(app: FastAPI): + # Startup + global kafka_producer + + await init_db() + + # Initialize Kafka producer + try: + kafka_producer = KafkaProducer( + bootstrap_servers=os.getenv('KAFKA_BOOTSTRAP_SERVERS', 'kafka:9092') + ) + await kafka_producer.start() + logger.info("Kafka producer initialized") + except Exception as e: + logger.warning(f"Failed to initialize Kafka producer: {e}") + kafka_producer = None + + yield + + # Shutdown + if kafka_producer: + await kafka_producer.stop() + + +app = FastAPI( + title="Users Service", + description="User management microservice with MongoDB", + version="0.2.0", + lifespan=lifespan +) + +# CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Health check +@app.get("/health") +async def health_check(): + return { + "status": "healthy", + "service": "users", + "timestamp": datetime.now().isoformat() + } + +# CRUD Operations +@app.get("/users", response_model=List[UserResponse]) +async def get_users(): + users = await User.find_all().to_list() + return [UserResponse( + id=str(user.id), + username=user.username, + email=user.email, + full_name=user.full_name, + profile_picture=user.profile_picture, + profile_picture_thumbnail=user.profile_picture_thumbnail, + bio=user.bio, + location=user.location, + website=user.website, + is_email_verified=user.is_email_verified, + is_active=user.is_active, + created_at=user.created_at, + updated_at=user.updated_at + ) for user in users] + +@app.get("/users/{user_id}", response_model=UserResponse) +async def get_user(user_id: str): + try: + user = await User.get(PydanticObjectId(user_id)) + if not user: + raise HTTPException(status_code=404, detail="User not found") + return UserResponse( + id=str(user.id), + username=user.username, + email=user.email, + full_name=user.full_name, + profile_picture=user.profile_picture, + profile_picture_thumbnail=user.profile_picture_thumbnail, + bio=user.bio, + location=user.location, + website=user.website, + is_email_verified=user.is_email_verified, + is_active=user.is_active, + created_at=user.created_at, + updated_at=user.updated_at + ) + except Exception: + raise HTTPException(status_code=404, detail="User not found") + +@app.post("/users", response_model=UserResponse, status_code=201) +async def create_user(user_data: UserCreate): + # Check if username already exists + existing_user = await User.find_one(User.username == user_data.username) + if existing_user: + raise HTTPException(status_code=400, detail="Username already exists") + + # Create new user + user = User( + username=user_data.username, + email=user_data.email, + full_name=user_data.full_name, + profile_picture=user_data.profile_picture, + bio=user_data.bio, + location=user_data.location, + website=user_data.website + ) + + await user.create() + + # Publish event + if kafka_producer: + event = Event( + event_type=EventType.USER_CREATED, + service="users", + data={ + "user_id": str(user.id), + "username": user.username, + "email": user.email + }, + user_id=str(user.id) + ) + await kafka_producer.send_event("user-events", event) + + return UserResponse( + id=str(user.id), + username=user.username, + email=user.email, + full_name=user.full_name, + profile_picture=user.profile_picture, + profile_picture_thumbnail=user.profile_picture_thumbnail, + bio=user.bio, + location=user.location, + website=user.website, + is_email_verified=user.is_email_verified, + is_active=user.is_active, + created_at=user.created_at, + updated_at=user.updated_at + ) + +@app.put("/users/{user_id}", response_model=UserResponse) +async def update_user(user_id: str, user_update: UserUpdate): + try: + user = await User.get(PydanticObjectId(user_id)) + if not user: + raise HTTPException(status_code=404, detail="User not found") + except Exception: + raise HTTPException(status_code=404, detail="User not found") + + if user_update.username is not None: + # Check if new username already exists + existing_user = await User.find_one( + User.username == user_update.username, + User.id != user.id + ) + if existing_user: + raise HTTPException(status_code=400, detail="Username already exists") + user.username = user_update.username + + if user_update.email is not None: + user.email = user_update.email + + if user_update.full_name is not None: + user.full_name = user_update.full_name + + if user_update.profile_picture is not None: + user.profile_picture = user_update.profile_picture + + if user_update.profile_picture_thumbnail is not None: + user.profile_picture_thumbnail = user_update.profile_picture_thumbnail + + if user_update.bio is not None: + user.bio = user_update.bio + + if user_update.location is not None: + user.location = user_update.location + + if user_update.website is not None: + user.website = user_update.website + + if user_update.is_email_verified is not None: + user.is_email_verified = user_update.is_email_verified + + if user_update.is_active is not None: + user.is_active = user_update.is_active + + user.updated_at = datetime.now() + await user.save() + + # Publish event + if kafka_producer: + event = Event( + event_type=EventType.USER_UPDATED, + service="users", + data={ + "user_id": str(user.id), + "username": user.username, + "email": user.email, + "updated_fields": list(user_update.dict(exclude_unset=True).keys()) + }, + user_id=str(user.id) + ) + await kafka_producer.send_event("user-events", event) + + return UserResponse( + id=str(user.id), + username=user.username, + email=user.email, + full_name=user.full_name, + profile_picture=user.profile_picture, + profile_picture_thumbnail=user.profile_picture_thumbnail, + bio=user.bio, + location=user.location, + website=user.website, + is_email_verified=user.is_email_verified, + is_active=user.is_active, + created_at=user.created_at, + updated_at=user.updated_at + ) + +@app.delete("/users/{user_id}") +async def delete_user(user_id: str): + try: + user = await User.get(PydanticObjectId(user_id)) + if not user: + raise HTTPException(status_code=404, detail="User not found") + + user_id_str = str(user.id) + username = user.username + + await user.delete() + + # Publish event + if kafka_producer: + event = Event( + event_type=EventType.USER_DELETED, + service="users", + data={ + "user_id": user_id_str, + "username": username + }, + user_id=user_id_str + ) + await kafka_producer.send_event("user-events", event) + + return {"message": "User deleted successfully"} + except Exception: + raise HTTPException(status_code=404, detail="User not found") + +if __name__ == "__main__": + uvicorn.run( + "main:app", + host="0.0.0.0", + port=8000, + reload=True + ) \ No newline at end of file diff --git a/services/users/backend/models.py b/services/users/backend/models.py new file mode 100644 index 0000000..e4e6b3b --- /dev/null +++ b/services/users/backend/models.py @@ -0,0 +1,31 @@ +from beanie import Document +from pydantic import EmailStr, Field, HttpUrl +from datetime import datetime +from typing import Optional + + +class User(Document): + username: str = Field(..., unique=True) + email: EmailStr + full_name: Optional[str] = None + profile_picture: Optional[str] = Field(None, description="프로필 사진 URL") + profile_picture_thumbnail: Optional[str] = Field(None, description="프로필 사진 썸네일 URL") + bio: Optional[str] = Field(None, max_length=500, description="자기소개") + location: Optional[str] = Field(None, description="위치") + website: Optional[str] = Field(None, description="개인 웹사이트") + is_email_verified: bool = Field(default=False, description="이메일 인증 여부") + is_active: bool = Field(default=True, description="계정 활성화 상태") + created_at: datetime = Field(default_factory=datetime.now) + updated_at: datetime = Field(default_factory=datetime.now) + + class Settings: + collection = "users" + + class Config: + json_schema_extra = { + "example": { + "username": "john_doe", + "email": "john@example.com", + "full_name": "John Doe" + } + } \ No newline at end of file diff --git a/services/users/backend/requirements.txt b/services/users/backend/requirements.txt new file mode 100644 index 0000000..6d4db67 --- /dev/null +++ b/services/users/backend/requirements.txt @@ -0,0 +1,7 @@ +fastapi==0.109.0 +uvicorn[standard]==0.27.0 +pydantic[email]==2.5.3 +pymongo==4.6.1 +motor==3.3.2 +beanie==1.23.6 +aiokafka==0.10.0 \ No newline at end of file diff --git a/shared/kafka/__init__.py b/shared/kafka/__init__.py new file mode 100644 index 0000000..7c78f53 --- /dev/null +++ b/shared/kafka/__init__.py @@ -0,0 +1,6 @@ +from .producer import KafkaProducer +from .consumer import KafkaConsumer +from .events import Event, EventType +from .schema_registry import SchemaRegistry + +__all__ = ['KafkaProducer', 'KafkaConsumer', 'Event', 'EventType', 'SchemaRegistry'] \ No newline at end of file diff --git a/shared/kafka/consumer.py b/shared/kafka/consumer.py new file mode 100644 index 0000000..746e79b --- /dev/null +++ b/shared/kafka/consumer.py @@ -0,0 +1,125 @@ +import json +import asyncio +from typing import Optional, Callable, Dict, Any, List +from aiokafka import AIOKafkaConsumer +from aiokafka.errors import KafkaError +import logging + +from .events import Event, EventType + +logger = logging.getLogger(__name__) + +class KafkaConsumer: + def __init__( + self, + topics: List[str], + group_id: str, + bootstrap_servers: str = "kafka:9092" + ): + self.topics = topics + self.group_id = group_id + self.bootstrap_servers = bootstrap_servers + self._consumer: Optional[AIOKafkaConsumer] = None + self._handlers: Dict[EventType, List[Callable]] = {} + self._running = False + + def register_handler(self, event_type: EventType, handler: Callable): + """이벤트 타입별 핸들러 등록""" + if event_type not in self._handlers: + self._handlers[event_type] = [] + self._handlers[event_type].append(handler) + logger.info(f"Registered handler for {event_type}") + + async def start(self): + """Kafka Consumer 시작""" + try: + self._consumer = AIOKafkaConsumer( + *self.topics, + bootstrap_servers=self.bootstrap_servers, + group_id=self.group_id, + value_deserializer=lambda v: json.loads(v.decode()), + auto_offset_reset='earliest', + enable_auto_commit=True, + auto_commit_interval_ms=1000, + session_timeout_ms=30000, + heartbeat_interval_ms=10000 + ) + await self._consumer.start() + self._running = True + logger.info(f"Kafka Consumer started: {self.topics} (group: {self.group_id})") + + # 메시지 처리 루프 시작 + asyncio.create_task(self._consume_messages()) + + except Exception as e: + logger.error(f"Failed to start Kafka Consumer: {e}") + raise + + async def stop(self): + """Kafka Consumer 종료""" + self._running = False + if self._consumer: + await self._consumer.stop() + logger.info("Kafka Consumer stopped") + + async def _consume_messages(self): + """메시지 소비 루프""" + if not self._consumer: + return + + while self._running: + try: + # 메시지 배치로 가져오기 (최대 100ms 대기) + msg_batch = await self._consumer.getmany(timeout_ms=100) + + for tp, messages in msg_batch.items(): + for msg in messages: + await self._process_message(msg.value) + + except KafkaError as e: + logger.error(f"Kafka error: {e}") + await asyncio.sleep(1) + except Exception as e: + logger.error(f"Error processing messages: {e}") + await asyncio.sleep(1) + + async def _process_message(self, message: Dict[str, Any]): + """개별 메시지 처리""" + try: + # Event 객체로 변환 + event = Event(**message) + + # 등록된 핸들러 실행 + handlers = self._handlers.get(event.event_type, []) + + for handler in handlers: + try: + if asyncio.iscoroutinefunction(handler): + await handler(event) + else: + handler(event) + except Exception as e: + logger.error(f"Handler error for {event.event_type}: {e}") + + if not handlers: + logger.debug(f"No handlers for event type: {event.event_type}") + + except Exception as e: + logger.error(f"Failed to process message: {e}") + + async def consume_one(self, timeout: float = 1.0) -> Optional[Event]: + """단일 메시지 소비 (테스트/디버깅용)""" + if not self._consumer: + return None + + try: + msg = await asyncio.wait_for( + self._consumer.getone(), + timeout=timeout + ) + return Event(**msg.value) + except asyncio.TimeoutError: + return None + except Exception as e: + logger.error(f"Error consuming message: {e}") + return None \ No newline at end of file diff --git a/shared/kafka/events.py b/shared/kafka/events.py new file mode 100644 index 0000000..2121a2f --- /dev/null +++ b/shared/kafka/events.py @@ -0,0 +1,31 @@ +from enum import Enum +from pydantic import BaseModel, Field +from datetime import datetime +from typing import Any, Optional, Dict + +class EventType(str, Enum): + USER_CREATED = "user.created" + USER_UPDATED = "user.updated" + USER_DELETED = "user.deleted" + USER_LOGIN = "user.login" + + IMAGE_UPLOADED = "image.uploaded" + IMAGE_CACHED = "image.cached" + IMAGE_DELETED = "image.deleted" + + TASK_CREATED = "task.created" + TASK_COMPLETED = "task.completed" + TASK_FAILED = "task.failed" + +class Event(BaseModel): + event_type: EventType + timestamp: datetime = Field(default_factory=datetime.now) + service: str + data: Dict[str, Any] + correlation_id: Optional[str] = None + user_id: Optional[str] = None + + class Config: + json_encoders = { + datetime: lambda v: v.isoformat() + } \ No newline at end of file diff --git a/shared/kafka/producer.py b/shared/kafka/producer.py new file mode 100644 index 0000000..0a33ba0 --- /dev/null +++ b/shared/kafka/producer.py @@ -0,0 +1,101 @@ +import json +import asyncio +from typing import Optional, Dict, Any +from aiokafka import AIOKafkaProducer +from aiokafka.errors import KafkaError +import logging + +from .events import Event + +logger = logging.getLogger(__name__) + +class KafkaProducer: + def __init__(self, bootstrap_servers: str = "kafka:9092"): + self.bootstrap_servers = bootstrap_servers + self._producer: Optional[AIOKafkaProducer] = None + + async def start(self): + """Kafka Producer 시작""" + try: + self._producer = AIOKafkaProducer( + bootstrap_servers=self.bootstrap_servers, + value_serializer=lambda v: json.dumps(v).encode(), + compression_type="gzip", + acks='all', + retry_backoff_ms=100 + ) + await self._producer.start() + logger.info(f"Kafka Producer started: {self.bootstrap_servers}") + except Exception as e: + logger.error(f"Failed to start Kafka Producer: {e}") + raise + + async def stop(self): + """Kafka Producer 종료""" + if self._producer: + await self._producer.stop() + logger.info("Kafka Producer stopped") + + async def send_event(self, topic: str, event: Event) -> bool: + """이벤트 전송""" + if not self._producer: + logger.error("Producer not started") + return False + + try: + event_dict = event.dict() + event_dict['timestamp'] = event.timestamp.isoformat() + + await self._producer.send_and_wait( + topic, + value=event_dict, + key=event.correlation_id.encode() if event.correlation_id else None + ) + + logger.info(f"Event sent to {topic}: {event.event_type}") + return True + + except KafkaError as e: + logger.error(f"Failed to send event to {topic}: {e}") + return False + except Exception as e: + logger.error(f"Unexpected error sending event: {e}") + return False + + async def send_batch(self, topic: str, events: list[Event]) -> int: + """여러 이벤트를 배치로 전송""" + if not self._producer: + logger.error("Producer not started") + return 0 + + sent_count = 0 + batch = self._producer.create_batch() + + for event in events: + event_dict = event.dict() + event_dict['timestamp'] = event.timestamp.isoformat() + + metadata = batch.append( + key=event.correlation_id.encode() if event.correlation_id else None, + value=json.dumps(event_dict).encode(), + timestamp=None + ) + + if metadata is None: + # 배치가 가득 찼으면 전송하고 새 배치 생성 + await self._producer.send_batch(batch, topic) + sent_count += len(batch) + batch = self._producer.create_batch() + batch.append( + key=event.correlation_id.encode() if event.correlation_id else None, + value=json.dumps(event_dict).encode(), + timestamp=None + ) + + # 남은 배치 전송 + if batch: + await self._producer.send_batch(batch, topic) + sent_count += len(batch) + + logger.info(f"Sent {sent_count} events to {topic}") + return sent_count \ No newline at end of file diff --git a/shared/kafka/schema_registry.py b/shared/kafka/schema_registry.py new file mode 100644 index 0000000..676306d --- /dev/null +++ b/shared/kafka/schema_registry.py @@ -0,0 +1,333 @@ +""" +이벤트 스키마 레지스트리 +이벤트 스키마 정의 및 버전 관리 +""" +from typing import Dict, Any, Optional, List, Literal +from enum import Enum +from pydantic import BaseModel, Field, field_validator +from datetime import datetime +import json + +class SchemaVersion(str, Enum): + V1 = "1.0.0" + V2 = "2.0.0" + +class EventSchemaBase(BaseModel): + """이벤트 스키마 베이스""" + event_id: str = Field(..., description="고유 이벤트 ID") + event_type: str = Field(..., description="이벤트 타입") + timestamp: datetime = Field(default_factory=datetime.now, description="이벤트 발생 시간") + version: str = Field(default=SchemaVersion.V1, description="스키마 버전") + service: str = Field(..., description="이벤트 발생 서비스") + + class Config: + json_encoders = { + datetime: lambda v: v.isoformat() + } + +# User Events Schemas +class UserCreatedSchema(EventSchemaBase): + """사용자 생성 이벤트 스키마""" + event_type: Literal["USER_CREATED"] = "USER_CREATED" + data: Dict[str, Any] = Field(..., description="이벤트 데이터") + + @field_validator('data') + @classmethod + def validate_data(cls, v): + required_fields = ['user_id', 'username', 'email'] + for field in required_fields: + if field not in v: + raise ValueError(f"Missing required field: {field}") + return v + +class UserUpdatedSchema(EventSchemaBase): + """사용자 업데이트 이벤트 스키마""" + event_type: Literal["USER_UPDATED"] = "USER_UPDATED" + data: Dict[str, Any] = Field(..., description="이벤트 데이터") + + @field_validator('data') + @classmethod + def validate_data(cls, v): + required_fields = ['user_id'] + optional_fields = ['username', 'email', 'full_name', 'profile_picture', + 'bio', 'location', 'website', 'updated_fields'] + + for field in required_fields: + if field not in v: + raise ValueError(f"Missing required field: {field}") + + # updated_fields가 있으면 검증 + if 'updated_fields' in v and not isinstance(v['updated_fields'], list): + raise ValueError("updated_fields must be a list") + + return v + +class UserDeletedSchema(EventSchemaBase): + """사용자 삭제 이벤트 스키마""" + event_type: Literal["USER_DELETED"] = "USER_DELETED" + data: Dict[str, Any] = Field(..., description="이벤트 데이터") + + @field_validator('data') + @classmethod + def validate_data(cls, v): + required_fields = ['user_id', 'username'] + for field in required_fields: + if field not in v: + raise ValueError(f"Missing required field: {field}") + return v + +# OAuth Events Schemas +class OAuthAppCreatedSchema(EventSchemaBase): + """OAuth 앱 생성 이벤트 스키마""" + event_type: Literal["OAUTH_APP_CREATED"] = "OAUTH_APP_CREATED" + data: Dict[str, Any] = Field(..., description="이벤트 데이터") + + @field_validator('data') + @classmethod + def validate_data(cls, v): + required_fields = ['app_id', 'name', 'owner_id', 'client_id'] + for field in required_fields: + if field not in v: + raise ValueError(f"Missing required field: {field}") + return v + +class OAuthTokenIssuedSchema(EventSchemaBase): + """OAuth 토큰 발급 이벤트 스키마""" + event_type: Literal["OAUTH_TOKEN_ISSUED"] = "OAUTH_TOKEN_ISSUED" + data: Dict[str, Any] = Field(..., description="이벤트 데이터") + + @field_validator('data') + @classmethod + def validate_data(cls, v): + required_fields = ['client_id', 'grant_type'] + optional_fields = ['user_id', 'scopes', 'expires_in'] + + for field in required_fields: + if field not in v: + raise ValueError(f"Missing required field: {field}") + + # scopes가 있으면 리스트여야 함 + if 'scopes' in v and not isinstance(v['scopes'], list): + raise ValueError("scopes must be a list") + + return v + +class OAuthTokenRevokedSchema(EventSchemaBase): + """OAuth 토큰 폐기 이벤트 스키마""" + event_type: Literal["OAUTH_TOKEN_REVOKED"] = "OAUTH_TOKEN_REVOKED" + data: Dict[str, Any] = Field(..., description="이벤트 데이터") + + @field_validator('data') + @classmethod + def validate_data(cls, v): + required_fields = ['token_id', 'client_id'] + optional_fields = ['user_id', 'revoked_by'] + + for field in required_fields: + if field not in v: + raise ValueError(f"Missing required field: {field}") + + return v + +# Image Events Schemas +class ImageUploadedSchema(EventSchemaBase): + """이미지 업로드 이벤트 스키마""" + event_type: Literal["IMAGE_UPLOADED"] = "IMAGE_UPLOADED" + data: Dict[str, Any] = Field(..., description="이벤트 데이터") + + @field_validator('data') + @classmethod + def validate_data(cls, v): + required_fields = ['image_id', 'user_id', 'url'] + optional_fields = ['size', 'mime_type', 'width', 'height', 'thumbnail_url'] + + for field in required_fields: + if field not in v: + raise ValueError(f"Missing required field: {field}") + + return v + +class ImageProcessedSchema(EventSchemaBase): + """이미지 처리 완료 이벤트 스키마""" + event_type: Literal["IMAGE_PROCESSED"] = "IMAGE_PROCESSED" + data: Dict[str, Any] = Field(..., description="이벤트 데이터") + + @field_validator('data') + @classmethod + def validate_data(cls, v): + required_fields = ['image_id', 'process_type'] + optional_fields = ['original_url', 'processed_url', 'processing_time_ms'] + + for field in required_fields: + if field not in v: + raise ValueError(f"Missing required field: {field}") + + return v + +class SchemaRegistry: + """스키마 레지스트리""" + + # 스키마 매핑 + SCHEMAS = { + "USER_CREATED": UserCreatedSchema, + "USER_UPDATED": UserUpdatedSchema, + "USER_DELETED": UserDeletedSchema, + "OAUTH_APP_CREATED": OAuthAppCreatedSchema, + "OAUTH_TOKEN_ISSUED": OAuthTokenIssuedSchema, + "OAUTH_TOKEN_REVOKED": OAuthTokenRevokedSchema, + "IMAGE_UPLOADED": ImageUploadedSchema, + "IMAGE_PROCESSED": ImageProcessedSchema, + } + + # 스키마 버전 호환성 매트릭스 + COMPATIBILITY_MATRIX = { + SchemaVersion.V1: [SchemaVersion.V1], + SchemaVersion.V2: [SchemaVersion.V1, SchemaVersion.V2], # V2는 V1과 호환 + } + + @classmethod + def get_schema(cls, event_type: str) -> Optional[type]: + """이벤트 타입에 대한 스키마 반환""" + return cls.SCHEMAS.get(event_type) + + @classmethod + def validate_event(cls, event_data: Dict[str, Any]) -> tuple[bool, Optional[str]]: + """이벤트 데이터 검증""" + try: + event_type = event_data.get('event_type') + if not event_type: + return False, "Missing event_type" + + schema_class = cls.get_schema(event_type) + if not schema_class: + return False, f"Unknown event type: {event_type}" + + # 스키마 검증 + schema_class(**event_data) + return True, None + + except Exception as e: + return False, str(e) + + @classmethod + def is_compatible(cls, from_version: str, to_version: str) -> bool: + """버전 호환성 확인""" + from_v = SchemaVersion(from_version) + to_v = SchemaVersion(to_version) + + compatible_versions = cls.COMPATIBILITY_MATRIX.get(to_v, []) + return from_v in compatible_versions + + @classmethod + def migrate_event( + cls, + event_data: Dict[str, Any], + from_version: str, + to_version: str + ) -> Dict[str, Any]: + """이벤트 데이터 마이그레이션""" + if from_version == to_version: + return event_data + + if not cls.is_compatible(from_version, to_version): + raise ValueError(f"Cannot migrate from {from_version} to {to_version}") + + # 버전별 마이그레이션 로직 + if from_version == SchemaVersion.V1 and to_version == SchemaVersion.V2: + # V1 -> V2 마이그레이션 예시 + event_data['version'] = SchemaVersion.V2 + + # 새로운 필드 추가 (기본값) + if 'metadata' not in event_data: + event_data['metadata'] = {} + + return event_data + + @classmethod + def get_all_schemas(cls) -> Dict[str, Dict[str, Any]]: + """모든 스키마 정보 반환 (문서화용)""" + schemas_info = {} + + for event_type, schema_class in cls.SCHEMAS.items(): + schemas_info[event_type] = { + "description": schema_class.__doc__, + "fields": schema_class.schema(), + "version": SchemaVersion.V1, + "example": cls._generate_example(schema_class) + } + + return schemas_info + + @classmethod + def _generate_example(cls, schema_class: type) -> Dict[str, Any]: + """스키마 예시 생성""" + examples = { + "USER_CREATED": { + "event_id": "evt_123456", + "event_type": "USER_CREATED", + "timestamp": datetime.now().isoformat(), + "version": "1.0.0", + "service": "users", + "data": { + "user_id": "usr_abc123", + "username": "johndoe", + "email": "john@example.com" + } + }, + "USER_UPDATED": { + "event_id": "evt_123457", + "event_type": "USER_UPDATED", + "timestamp": datetime.now().isoformat(), + "version": "1.0.0", + "service": "users", + "data": { + "user_id": "usr_abc123", + "updated_fields": ["profile_picture", "bio"], + "profile_picture": "https://example.com/pic.jpg", + "bio": "Updated bio" + } + }, + "OAUTH_TOKEN_ISSUED": { + "event_id": "evt_123458", + "event_type": "OAUTH_TOKEN_ISSUED", + "timestamp": datetime.now().isoformat(), + "version": "1.0.0", + "service": "oauth", + "data": { + "client_id": "app_xyz789", + "user_id": "usr_abc123", + "grant_type": "authorization_code", + "scopes": ["profile", "email"], + "expires_in": 3600 + } + } + } + + return examples.get(schema_class.__fields__['event_type'].default, {}) + + @classmethod + def export_schemas(cls, format: str = "json") -> str: + """스키마 내보내기""" + schemas = cls.get_all_schemas() + + if format == "json": + return json.dumps(schemas, indent=2, default=str) + elif format == "markdown": + return cls._export_as_markdown(schemas) + else: + raise ValueError(f"Unsupported format: {format}") + + @classmethod + def _export_as_markdown(cls, schemas: Dict[str, Dict[str, Any]]) -> str: + """마크다운 형식으로 내보내기""" + md = "# Event Schema Registry\n\n" + + for event_type, info in schemas.items(): + md += f"## {event_type}\n\n" + md += f"{info['description']}\n\n" + md += f"**Version:** {info['version']}\n\n" + md += "**Example:**\n```json\n" + md += json.dumps(info['example'], indent=2, default=str) + md += "\n```\n\n" + + return md \ No newline at end of file diff --git a/test_all_services.py b/test_all_services.py new file mode 100644 index 0000000..7f0147c --- /dev/null +++ b/test_all_services.py @@ -0,0 +1,416 @@ +#!/usr/bin/env python3 +""" +Complete test suite for all backend services +""" +import asyncio +import httpx +import json +from datetime import datetime +import base64 +import os + +# Service endpoints +SERVICES = { + "users": "http://localhost:8001", + "images": "http://localhost:8002", + "oauth": "http://localhost:8003", + "console": "http://localhost:8011", + "statistics": "http://localhost:8012", + "notifications": "http://localhost:8013", + "files": "http://localhost:8014", + "search": "http://localhost:8015" +} + +def print_section(title): + """Print section header""" + print(f"\n{'='*60}") + print(f" {title}") + print(f"{'='*60}") + +def print_test(name, status, details=""): + """Print test result""" + icon = "✅" if status else "❌" + print(f"{icon} {name}: {details}") + +async def test_health_endpoints(): + """Test all health endpoints""" + print_section("1. HEALTH CHECK ENDPOINTS") + + async with httpx.AsyncClient(timeout=10.0) as client: + results = {} + for service, url in SERVICES.items(): + try: + response = await client.get(f"{url}/health") + if response.status_code == 200: + data = response.json() + status = data.get("status", "unknown") + print_test(f"{service.upper()} Health", True, f"Status: {status}") + results[service] = True + else: + print_test(f"{service.upper()} Health", False, f"HTTP {response.status_code}") + results[service] = False + except Exception as e: + print_test(f"{service.upper()} Health", False, f"Error: {str(e)}") + results[service] = False + return results + +async def test_users_service(): + """Test Users Service API""" + print_section("2. USERS SERVICE TESTS") + + async with httpx.AsyncClient(timeout=10.0) as client: + base_url = SERVICES["users"] + + # Create user + user_data = { + "username": f"testuser_{datetime.now().timestamp()}", + "email": f"test_{datetime.now().timestamp()}@example.com", + "password": "Test123!@#", + "full_name": "Test User" + } + + try: + response = await client.post(f"{base_url}/api/users/register", json=user_data) + if response.status_code == 200: + user = response.json() + print_test("Create User", True, f"User ID: {user.get('id')}") + + # Get user + response = await client.get(f"{base_url}/api/users/{user['id']}") + print_test("Get User", response.status_code == 200, f"Username: {user.get('username')}") + + # List users + response = await client.get(f"{base_url}/api/users") + data = response.json() + print_test("List Users", response.status_code == 200, f"Total: {data.get('total', 0)} users") + + # Update user + update_data = {"bio": "Updated bio"} + response = await client.put(f"{base_url}/api/users/{user['id']}", json=update_data) + print_test("Update User", response.status_code == 200) + + # Delete user + response = await client.delete(f"{base_url}/api/users/{user['id']}") + print_test("Delete User", response.status_code == 200) + else: + print_test("Create User", False, f"HTTP {response.status_code}") + except Exception as e: + print_test("Users Service", False, f"Error: {str(e)}") + +async def test_oauth_service(): + """Test OAuth Service""" + print_section("3. OAUTH SERVICE TESTS") + + async with httpx.AsyncClient(timeout=10.0) as client: + base_url = SERVICES["oauth"] + + try: + # Test OAuth providers + response = await client.get(f"{base_url}/api/oauth/providers") + if response.status_code == 200: + providers = response.json() + print_test("Get OAuth Providers", True, f"Providers: {', '.join(providers.get('providers', []))}") + else: + print_test("Get OAuth Providers", False, f"HTTP {response.status_code}") + + # Test Google OAuth URL + response = await client.get(f"{base_url}/api/oauth/google/authorize") + print_test("Google OAuth URL", response.status_code == 200, "Authorization URL generated") + + # Test GitHub OAuth URL + response = await client.get(f"{base_url}/api/oauth/github/authorize") + print_test("GitHub OAuth URL", response.status_code == 200, "Authorization URL generated") + + except Exception as e: + print_test("OAuth Service", False, f"Error: {str(e)}") + +async def test_images_service(): + """Test Images Service""" + print_section("4. IMAGES SERVICE TESTS") + + async with httpx.AsyncClient(timeout=10.0) as client: + base_url = SERVICES["images"] + + try: + # Create test image (1x1 pixel PNG) + image_data = base64.b64decode( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==" + ) + + # Upload image + files = {"file": ("test.png", image_data, "image/png")} + response = await client.post(f"{base_url}/api/images/upload", files=files) + + if response.status_code == 200: + image = response.json() + print_test("Upload Image", True, f"Image ID: {image.get('id')}") + + # Get image metadata + response = await client.get(f"{base_url}/api/images/{image['id']}/metadata") + print_test("Get Image Metadata", response.status_code == 200) + + # List images + response = await client.get(f"{base_url}/api/images") + data = response.json() + print_test("List Images", response.status_code == 200, f"Total: {data.get('total', 0)} images") + + # Delete image + response = await client.delete(f"{base_url}/api/images/{image['id']}") + print_test("Delete Image", response.status_code == 200) + else: + print_test("Upload Image", False, f"HTTP {response.status_code}") + + except Exception as e: + print_test("Images Service", False, f"Error: {str(e)}") + +async def test_files_service(): + """Test Files Service""" + print_section("5. FILES SERVICE TESTS") + + async with httpx.AsyncClient(timeout=30.0) as client: + base_url = SERVICES["files"] + + try: + # Upload test file + test_content = b"Test file content for MinIO storage" + files = {"file": ("test.txt", test_content, "text/plain")} + + response = await client.post(f"{base_url}/api/files/upload", files=files) + + if response.status_code == 200: + file_info = response.json() + print_test("Upload File", True, f"File ID: {file_info.get('file_id')}") + + # Get file info + response = await client.get(f"{base_url}/api/files/{file_info['file_id']}") + print_test("Get File Info", response.status_code == 200) + + # List files + response = await client.get(f"{base_url}/api/files") + data = response.json() + print_test("List Files", response.status_code == 200, f"Total: {data.get('total', 0)} files") + + # Delete file + response = await client.delete(f"{base_url}/api/files/{file_info['file_id']}") + print_test("Delete File", response.status_code == 200) + else: + print_test("Upload File", False, f"HTTP {response.status_code}") + + except Exception as e: + print_test("Files Service", False, f"Error: {str(e)}") + +async def test_notifications_service(): + """Test Notifications Service""" + print_section("6. NOTIFICATIONS SERVICE TESTS") + + async with httpx.AsyncClient(timeout=10.0) as client: + base_url = SERVICES["notifications"] + + try: + # Send email notification + email_data = { + "to": "test@example.com", + "subject": "Test Email", + "body": "This is a test email", + "template": "default" + } + + response = await client.post(f"{base_url}/api/notifications/email", json=email_data) + print_test("Send Email", response.status_code == 200, "Email queued") + + # Send SMS notification + sms_data = { + "to": "+1234567890", + "message": "Test SMS message" + } + + response = await client.post(f"{base_url}/api/notifications/sms", json=sms_data) + print_test("Send SMS", response.status_code == 200, "SMS queued") + + # Send push notification + push_data = { + "user_id": "test_user", + "title": "Test Push", + "body": "Test push notification", + "data": {"key": "value"} + } + + response = await client.post(f"{base_url}/api/notifications/push", json=push_data) + print_test("Send Push", response.status_code == 200, "Push notification queued") + + # Get notification history + response = await client.get(f"{base_url}/api/notifications/history?user_id=test_user") + data = response.json() + print_test("Get History", response.status_code == 200, f"Total: {data.get('total', 0)} notifications") + + except Exception as e: + print_test("Notifications Service", False, f"Error: {str(e)}") + +async def test_statistics_service(): + """Test Statistics Service""" + print_section("7. STATISTICS SERVICE TESTS") + + async with httpx.AsyncClient(timeout=10.0) as client: + base_url = SERVICES["statistics"] + + try: + # Track event + event_data = { + "event_type": "page_view", + "user_id": "test_user", + "metadata": { + "page": "/home", + "referrer": "google.com" + } + } + + response = await client.post(f"{base_url}/api/statistics/events", json=event_data) + print_test("Track Event", response.status_code == 200, "Event tracked") + + # Get user statistics + response = await client.get(f"{base_url}/api/statistics/users/test_user") + print_test("Get User Stats", response.status_code == 200) + + # Get system statistics + response = await client.get(f"{base_url}/api/statistics/system") + if response.status_code == 200: + stats = response.json() + print_test("System Stats", True, f"Total events: {stats.get('total_events', 0)}") + else: + print_test("System Stats", False, f"HTTP {response.status_code}") + + # Get analytics + response = await client.get(f"{base_url}/api/statistics/analytics?period=day") + print_test("Get Analytics", response.status_code == 200, "Daily analytics retrieved") + + except Exception as e: + print_test("Statistics Service", False, f"Error: {str(e)}") + +async def test_search_service(): + """Test Search Service""" + print_section("8. SEARCH SERVICE TESTS") + + async with httpx.AsyncClient(timeout=10.0) as client: + base_url = SERVICES["search"] + + try: + # Index test document + doc = { + "id": f"test_doc_{datetime.now().timestamp()}", + "doc_type": "test", + "title": "Test Document", + "content": "This is a test document for search functionality", + "tags": ["test", "search", "solr"], + "created_at": datetime.utcnow().isoformat() + } + + response = await client.post(f"{base_url}/api/search/index", json=doc) + print_test("Index Document", response.status_code == 200, f"Document ID: {doc['id']}") + + # Wait for indexing + await asyncio.sleep(2) + + # Search documents + response = await client.get(f"{base_url}/api/search", params={"q": "test"}) + if response.status_code == 200: + data = response.json() + print_test("Search Documents", True, f"Found {data.get('total', 0)} results") + else: + print_test("Search Documents", False, f"HTTP {response.status_code}") + + # Get suggestions + response = await client.get(f"{base_url}/api/search/suggest", params={"q": "tes"}) + if response.status_code == 200: + data = response.json() + suggestions = data.get("suggestions", []) + print_test("Get Suggestions", True, f"{len(suggestions)} suggestions") + else: + print_test("Get Suggestions", False, f"HTTP {response.status_code}") + + # Get statistics + response = await client.get(f"{base_url}/api/search/stats") + if response.status_code == 200: + stats = response.json() + print_test("Search Stats", True, f"Total docs: {stats.get('statistics', {}).get('total_documents', 0)}") + else: + print_test("Search Stats", False, f"HTTP {response.status_code}") + + # Delete document + response = await client.delete(f"{base_url}/api/search/document/{doc['id']}") + print_test("Delete Document", response.status_code == 200) + + except Exception as e: + print_test("Search Service", False, f"Error: {str(e)}") + +async def test_console_service(): + """Test Console Service""" + print_section("9. CONSOLE SERVICE TESTS") + + async with httpx.AsyncClient(timeout=10.0) as client: + base_url = SERVICES["console"] + + try: + # Get services status + response = await client.get(f"{base_url}/api/console/services") + if response.status_code == 200: + services = response.json() + print_test("Get Services", True, f"{len(services.get('services', []))} services") + else: + print_test("Get Services", False, f"HTTP {response.status_code}") + + # Get system info + response = await client.get(f"{base_url}/api/console/system") + print_test("System Info", response.status_code == 200, "System information retrieved") + + # Get logs + response = await client.get(f"{base_url}/api/console/logs?service=users&limit=10") + print_test("Get Logs", response.status_code == 200, "Logs retrieved") + + # Get metrics + response = await client.get(f"{base_url}/api/console/metrics") + if response.status_code == 200: + metrics = response.json() + print_test("Get Metrics", True, f"CPU: {metrics.get('metrics', {}).get('cpu_usage', 'N/A')}%") + else: + print_test("Get Metrics", False, f"HTTP {response.status_code}") + + except Exception as e: + print_test("Console Service", False, f"Error: {str(e)}") + +async def main(): + """Run all tests""" + print("="*60) + print(" BACKEND SERVICES COMPREHENSIVE TEST SUITE") + print("="*60) + print(f"Started at: {datetime.now().isoformat()}") + + # Test health endpoints first + health_results = await test_health_endpoints() + + # Test individual services + await test_users_service() + await test_oauth_service() + await test_images_service() + await test_files_service() + await test_notifications_service() + await test_statistics_service() + await test_search_service() + await test_console_service() + + # Summary + print_section("TEST SUMMARY") + healthy_count = sum(1 for v in health_results.values() if v) + total_count = len(health_results) + + print(f"\n📊 Services Health: {healthy_count}/{total_count} services are healthy") + print(f"✅ Healthy Services: {', '.join([k for k, v in health_results.items() if v])}") + + if healthy_count < total_count: + unhealthy = [k for k, v in health_results.items() if not v] + print(f"❌ Unhealthy Services: {', '.join(unhealthy)}") + + print(f"\n🎉 Test suite completed at: {datetime.now().isoformat()}") + print("="*60) + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/test_event_flow.py b/test_event_flow.py new file mode 100644 index 0000000..797bfa2 --- /dev/null +++ b/test_event_flow.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python3 +""" +Step 9 이벤트 흐름 테스트 스크립트 +""" +import asyncio +import httpx +import json +from datetime import datetime +import time + +# Service URLs +CONSOLE_URL = "http://localhost:8011" +USERS_URL = "http://localhost:8001" +OAUTH_URL = "http://localhost:8003" + +# Test credentials +TEST_USERNAME = "admin" +TEST_PASSWORD = "admin123" + +async def get_auth_token(): + """Console에서 인증 토큰 획득""" + async with httpx.AsyncClient() as client: + response = await client.post( + f"{CONSOLE_URL}/api/auth/login", + data={ + "username": TEST_USERNAME, + "password": TEST_PASSWORD + } + ) + if response.status_code == 200: + token_data = response.json() + return token_data["access_token"] + else: + print(f"Failed to get auth token: {response.status_code}") + return None + +async def test_user_create_event(): + """사용자 생성 이벤트 테스트""" + print("\n=== Testing User Create Event ===") + + # Create a new user + async with httpx.AsyncClient() as client: + user_data = { + "username": f"test_user_{int(time.time())}", + "email": f"test_{int(time.time())}@example.com", + "full_name": "Test User for Event", + "profile_picture": "https://example.com/test.jpg", + "bio": "Testing event system", + "location": "Test City" + } + + response = await client.post( + f"{USERS_URL}/users", + json=user_data + ) + + if response.status_code == 201: + user = response.json() + print(f"✅ User created: {user['username']} (ID: {user['id']})") + + # Wait for event processing + await asyncio.sleep(2) + + return user['id'] + else: + print(f"❌ Failed to create user: {response.status_code}") + print(response.text) + return None + +async def test_user_update_event(user_id: str): + """사용자 업데이트 이벤트 테스트""" + print("\n=== Testing User Update Event ===") + + async with httpx.AsyncClient() as client: + update_data = { + "bio": "Updated bio for event testing", + "profile_picture": "https://example.com/updated.jpg", + "location": "Updated City" + } + + response = await client.put( + f"{USERS_URL}/users/{user_id}", + json=update_data + ) + + if response.status_code == 200: + user = response.json() + print(f"✅ User updated: {user['username']}") + + # Wait for event processing + await asyncio.sleep(2) + + return True + else: + print(f"❌ Failed to update user: {response.status_code}") + return False + +async def test_oauth_app_create_event(): + """OAuth 앱 생성 이벤트 테스트""" + print("\n=== Testing OAuth App Create Event ===") + + async with httpx.AsyncClient() as client: + app_data = { + "name": f"Test App {int(time.time())}", + "description": "Testing event system", + "redirect_uris": ["http://localhost:3000/callback"], + "owner_id": "test_owner_123" + } + + response = await client.post( + f"{OAUTH_URL}/applications", + json=app_data + ) + + if response.status_code in [200, 201]: + app = response.json() + print(f"✅ OAuth app created: {app['name']} (ID: {app['id']})") + + # Wait for event processing + await asyncio.sleep(2) + + return app['client_id'] + else: + print(f"❌ Failed to create OAuth app: {response.status_code}") + print(response.text) + return None + +async def check_event_stats(token: str): + """이벤트 통계 확인""" + print("\n=== Checking Event Statistics ===") + + async with httpx.AsyncClient() as client: + headers = {"Authorization": f"Bearer {token}"} + + # Get event stats + response = await client.get( + f"{CONSOLE_URL}/api/events/stats", + headers=headers + ) + + if response.status_code == 200: + stats = response.json() + print(f"✅ Event Statistics:") + print(f" - Processed: {stats['stats']['processed']}") + print(f" - Failed: {stats['stats']['failed']}") + print(f" - Retried: {stats['stats']['retried']}") + print(f" - DLQ: {stats['stats']['dlq_sent']}") + else: + print(f"❌ Failed to get event stats: {response.status_code}") + +async def check_dlq_messages(token: str): + """DLQ 메시지 확인""" + print("\n=== Checking Dead Letter Queue ===") + + async with httpx.AsyncClient() as client: + headers = {"Authorization": f"Bearer {token}"} + + response = await client.get( + f"{CONSOLE_URL}/api/events/dlq?limit=5", + headers=headers + ) + + if response.status_code == 200: + dlq_data = response.json() + print(f"✅ DLQ Messages: {dlq_data['count']} messages") + + for msg in dlq_data['messages']: + print(f" - Event ID: {msg.get('event_id', 'N/A')}") + print(f" Error: {msg.get('error', 'N/A')}") + print(f" Retry Count: {msg.get('retry_count', 0)}") + else: + print(f"❌ Failed to get DLQ messages: {response.status_code}") + +async def check_event_schemas(): + """이벤트 스키마 확인""" + print("\n=== Checking Event Schemas ===") + + async with httpx.AsyncClient() as client: + response = await client.get(f"{CONSOLE_URL}/api/events/schemas") + + if response.status_code == 200: + schemas_data = response.json() + print(f"✅ Available Event Schemas:") + + for schema_name in schemas_data['schemas'].keys(): + print(f" - {schema_name}") + else: + print(f"❌ Failed to get event schemas: {response.status_code}") + +async def test_user_delete_event(user_id: str): + """사용자 삭제 이벤트 테스트""" + print("\n=== Testing User Delete Event ===") + + async with httpx.AsyncClient() as client: + response = await client.delete(f"{USERS_URL}/users/{user_id}") + + if response.status_code == 200: + print(f"✅ User deleted: {user_id}") + + # Wait for event processing + await asyncio.sleep(2) + + return True + else: + print(f"❌ Failed to delete user: {response.status_code}") + return False + +async def main(): + """메인 테스트 실행""" + print("=" * 50) + print("Step 9: Advanced Event Processing Test") + print("=" * 50) + + # Wait for services to be ready + print("\nWaiting for services to be ready...") + await asyncio.sleep(5) + + # Get auth token + token = await get_auth_token() + if not token: + print("Failed to authenticate. Exiting.") + return + + print(f"✅ Authentication successful") + + # Check event schemas first + await check_event_schemas() + + # Test user events + user_id = await test_user_create_event() + if user_id: + await test_user_update_event(user_id) + + # Check stats after user events + await check_event_stats(token) + + # Delete user + await test_user_delete_event(user_id) + + # Test OAuth events + client_id = await test_oauth_app_create_event() + + # Final statistics + await asyncio.sleep(3) # Wait for all events to process + await check_event_stats(token) + await check_dlq_messages(token) + + print("\n" + "=" * 50) + print("Test completed!") + print("=" * 50) + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/test_integration.py b/test_integration.py new file mode 100755 index 0000000..eed6419 --- /dev/null +++ b/test_integration.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python3 +""" +Integration Test Suite for Site11 Services +Tests the interaction between Console, Statistics, and Notification services +""" +import asyncio +import httpx +import json +from datetime import datetime, timedelta +import time + +BASE_URLS = { + "console": "http://localhost:8011", + "statistics": "http://localhost:8012", + "notifications": "http://localhost:8013", + "users": "http://localhost:8001", + "oauth": "http://localhost:8003", + "images": "http://localhost:8002" +} + +async def test_service_health(): + """Test health checks for all services""" + print("\n🏥 Testing Service Health Checks...") + print("=" * 60) + + async with httpx.AsyncClient() as client: + for service, url in BASE_URLS.items(): + try: + response = await client.get(f"{url}/health") + status = "✅ HEALTHY" if response.status_code == 200 else f"❌ UNHEALTHY ({response.status_code})" + print(f"{service.ljust(15)}: {status}") + if response.status_code == 200: + data = response.json() + if "components" in data: + for comp, comp_status in data["components"].items(): + print(f" └─ {comp}: {comp_status}") + except Exception as e: + print(f"{service.ljust(15)}: ❌ ERROR - {str(e)}") + + print("=" * 60) + +async def test_notification_to_statistics_flow(): + """Test flow from notification creation to statistics recording""" + print("\n📊 Testing Notification → Statistics Flow...") + print("=" * 60) + + async with httpx.AsyncClient() as client: + # 1. Send a notification + print("1. Sending notification...") + notification_data = { + "user_id": "integration_test_user", + "title": "Integration Test Alert", + "message": "Testing integration between services", + "channels": ["in_app"], + "priority": "high", + "category": "system" + } + + try: + response = await client.post( + f"{BASE_URLS['notifications']}/api/notifications/send", + json=notification_data + ) + result = response.json() + print(f" Notification sent: {result}") + notification_id = result.get("notification_id") + + # 2. Wait a moment for processing + await asyncio.sleep(2) + + # 3. Check if statistics recorded the event + print("\n2. Checking statistics for notification events...") + response = await client.get( + f"{BASE_URLS['statistics']}/api/analytics/events", + params={"event_type": "notification", "limit": 5} + ) + + if response.status_code == 200: + events = response.json() + print(f" Found {events.get('count', 0)} notification events in statistics") + else: + print(f" Statistics check failed: {response.status_code}") + + # 4. Check analytics overview + print("\n3. Getting analytics overview...") + response = await client.get( + f"{BASE_URLS['statistics']}/api/analytics/overview" + ) + + if response.status_code == 200: + overview = response.json() + print(f" Total events: {overview.get('total_events', 'N/A')}") + print(f" Active users: {overview.get('active_users', 'N/A')}") + print(f" System health: {overview.get('system_health', 'N/A')}") + + except Exception as e: + print(f" Error: {e}") + + print("=" * 60) + +async def test_user_action_flow(): + """Test a complete user action flow across services""" + print("\n👤 Testing User Action Flow...") + print("=" * 60) + + async with httpx.AsyncClient() as client: + # 1. Create a test user (if Users service supports it) + print("1. Creating/verifying test user...") + try: + # Try to get user first + response = await client.get(f"{BASE_URLS['users']}/api/users/test_user_integration") + if response.status_code == 404: + # Create user if doesn't exist + user_data = { + "username": "test_user_integration", + "email": "integration@test.com", + "full_name": "Integration Test User" + } + response = await client.post( + f"{BASE_URLS['users']}/api/users", + json=user_data + ) + print(f" User status: {response.status_code}") + except Exception as e: + print(f" User service not fully implemented or accessible: {e}") + + # 2. Record user activity in statistics + print("\n2. Recording user activity metrics...") + try: + metric_data = { + "id": f"metric_{int(time.time())}", + "metric_type": "user_activity", + "value": 1, + "timestamp": datetime.now().isoformat(), + "labels": { + "user_id": "test_user_integration", + "action": "login", + "source": "web" + } + } + + response = await client.post( + f"{BASE_URLS['statistics']}/api/metrics", + json=metric_data + ) + print(f" Metric recorded: {response.status_code}") + except Exception as e: + print(f" Statistics error: {e}") + + # 3. Send a notification about the user action + print("\n3. Sending user action notification...") + try: + notification_data = { + "user_id": "test_user_integration", + "title": "Welcome Back!", + "message": "Your login was successful", + "channels": ["in_app"], + "priority": "normal", + "category": "system" + } + + response = await client.post( + f"{BASE_URLS['notifications']}/api/notifications/send", + json=notification_data + ) + print(f" Notification sent: {response.json()}") + except Exception as e: + print(f" Notification error: {e}") + + print("=" * 60) + +async def test_real_time_metrics(): + """Test real-time metrics collection and retrieval""" + print("\n⚡ Testing Real-time Metrics...") + print("=" * 60) + + async with httpx.AsyncClient() as client: + # 1. Send batch metrics + print("1. Sending batch metrics...") + metrics = [] + for i in range(5): + metrics.append({ + "id": f"realtime_{int(time.time())}_{i}", + "metric_type": "api_request", + "value": 100 + i * 10, + "timestamp": datetime.now().isoformat(), + "labels": { + "endpoint": f"/api/test_{i}", + "method": "GET", + "status": "200" + } + }) + + try: + response = await client.post( + f"{BASE_URLS['statistics']}/api/metrics/batch", + json=metrics + ) + print(f" Batch metrics sent: {response.json()}") + except Exception as e: + print(f" Error sending metrics: {e}") + + # 2. Wait and retrieve real-time metrics + await asyncio.sleep(1) + + print("\n2. Retrieving real-time metrics...") + try: + response = await client.get( + f"{BASE_URLS['statistics']}/api/metrics/realtime/api_request", + params={"duration": 60} + ) + + if response.status_code == 200: + data = response.json() + print(f" Metric type: {data.get('metric_type')}") + print(f" Duration: {data.get('duration')}s") + print(f" Data points: {len(data.get('data', []))}") + except Exception as e: + print(f" Error retrieving metrics: {e}") + + print("=" * 60) + +async def test_notification_preferences(): + """Test notification preference management""" + print("\n⚙️ Testing Notification Preferences...") + print("=" * 60) + + async with httpx.AsyncClient() as client: + user_id = "pref_test_user" + + # 1. Set user preferences + print("1. Setting user preferences...") + preferences = { + "user_id": user_id, + "channels": { + "email": True, + "sms": False, + "push": True, + "in_app": True + }, + "categories": { + "system": True, + "marketing": False, + "transaction": True, + "social": False, + "security": True, + "update": True + }, + "email_frequency": "daily", + "timezone": "America/New_York", + "language": "en" + } + + try: + response = await client.put( + f"{BASE_URLS['notifications']}/api/preferences/{user_id}", + json=preferences + ) + print(f" Preferences updated: {response.json()}") + except Exception as e: + print(f" Error setting preferences: {e}") + + # 2. Test notification with preferences + print("\n2. Sending notification respecting preferences...") + try: + # This should be blocked due to marketing=False + notification_data = { + "user_id": user_id, + "title": "Special Offer!", + "message": "Get 50% off today", + "channels": ["email", "in_app"], + "priority": "normal", + "category": "marketing" + } + + response = await client.post( + f"{BASE_URLS['notifications']}/api/notifications/send", + json=notification_data + ) + print(f" Marketing notification (should be filtered): {response.json()}") + + # This should go through due to system=True + notification_data["category"] = "system" + notification_data["title"] = "System Update" + notification_data["message"] = "New features available" + + response = await client.post( + f"{BASE_URLS['notifications']}/api/notifications/send", + json=notification_data + ) + print(f" System notification (should be sent): {response.json()}") + except Exception as e: + print(f" Error sending notifications: {e}") + + print("=" * 60) + +async def main(): + """Run all integration tests""" + print("=" * 60) + print("🚀 SITE11 INTEGRATION TEST SUITE") + print("=" * 60) + print(f"Started at: {datetime.now().isoformat()}") + + # Run tests + await test_service_health() + await test_notification_to_statistics_flow() + await test_user_action_flow() + await test_real_time_metrics() + await test_notification_preferences() + + print("\n" + "=" * 60) + print("✅ Integration tests completed!") + print(f"Finished at: {datetime.now().isoformat()}") + print("=" * 60) + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file