feat: 3-mode inspection with tabbed UI + batch upload
- Add batch inspection backend (multipart upload, SSE streaming, MongoDB) - Add tabbed UI (single page / site crawling / batch upload) on home and history pages - Add batch inspection progress, result pages with 2-panel layout - Rename "사이트 전체" to "사이트 크롤링" across codebase - Add python-multipart dependency for file upload - Consolidate nginx SSE location for all inspection types Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@ -26,6 +26,10 @@ class Settings(BaseSettings):
|
||||
SITE_MAX_DEPTH: int = 2
|
||||
SITE_CONCURRENCY: int = 8
|
||||
|
||||
# Batch inspection
|
||||
BATCH_MAX_URLS: int = 200
|
||||
BATCH_CONCURRENCY: int = 8
|
||||
|
||||
# Application
|
||||
PROJECT_NAME: str = "Web Inspector API"
|
||||
|
||||
|
||||
@ -29,6 +29,11 @@ async def connect_db() -> None:
|
||||
await _db.site_inspections.create_index([("domain", 1), ("created_at", -1)])
|
||||
await _db.site_inspections.create_index([("created_at", -1)])
|
||||
|
||||
# Create indexes - batch_inspections
|
||||
await _db.batch_inspections.create_index("batch_inspection_id", unique=True)
|
||||
await _db.batch_inspections.create_index([("name", 1), ("created_at", -1)])
|
||||
await _db.batch_inspections.create_index([("created_at", -1)])
|
||||
|
||||
# Verify connection
|
||||
await _client.admin.command("ping")
|
||||
logger.info("MongoDB connected successfully: %s", settings.DB_NAME)
|
||||
|
||||
@ -10,7 +10,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app.core.database import connect_db, close_db
|
||||
from app.core.redis import connect_redis, close_redis
|
||||
from app.routers import health, inspections, reports, site_inspections
|
||||
from app.routers import health, inspections, reports, site_inspections, batch_inspections
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
@ -57,3 +57,4 @@ app.include_router(health.router, prefix="/api", tags=["Health"])
|
||||
app.include_router(inspections.router, prefix="/api", tags=["Inspections"])
|
||||
app.include_router(reports.router, prefix="/api", tags=["Reports"])
|
||||
app.include_router(site_inspections.router, prefix="/api", tags=["Site Inspections"])
|
||||
app.include_router(batch_inspections.router, prefix="/api", tags=["Batch Inspections"])
|
||||
|
||||
86
backend/app/models/batch_schemas.py
Normal file
86
backend/app/models/batch_schemas.py
Normal file
@ -0,0 +1,86 @@
|
||||
"""
|
||||
Pydantic models for batch inspection request/response validation.
|
||||
|
||||
Batch inspection allows inspecting multiple URLs from a file upload
|
||||
without any crawling phase - URLs are inspected directly.
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
from app.models.site_schemas import AggregateScores, PageStatus
|
||||
|
||||
|
||||
# --- Enums ---
|
||||
|
||||
class BatchInspectionStatus(str, Enum):
|
||||
"""Batch inspection status (no crawling phase)."""
|
||||
INSPECTING = "inspecting"
|
||||
COMPLETED = "completed"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
# --- Core Data Models ---
|
||||
|
||||
class BatchPage(BaseModel):
|
||||
"""배치 검사 개별 페이지 (크롤링 없이 직접 지정된 URL)."""
|
||||
url: str
|
||||
depth: int = 0
|
||||
parent_url: Optional[str] = None
|
||||
inspection_id: Optional[str] = None
|
||||
status: PageStatus = PageStatus.PENDING
|
||||
title: Optional[str] = None
|
||||
overall_score: Optional[int] = None
|
||||
grade: Optional[str] = None
|
||||
|
||||
|
||||
class BatchInspectionConfig(BaseModel):
|
||||
"""배치 검사 설정."""
|
||||
concurrency: int = 4
|
||||
|
||||
|
||||
# --- Response Models ---
|
||||
|
||||
class StartBatchInspectionResponse(BaseModel):
|
||||
"""배치 검사 시작 응답."""
|
||||
batch_inspection_id: str
|
||||
status: str = "inspecting"
|
||||
name: str
|
||||
total_urls: int
|
||||
stream_url: str
|
||||
|
||||
|
||||
class BatchInspectionResult(BaseModel):
|
||||
"""배치 검사 전체 결과."""
|
||||
batch_inspection_id: str
|
||||
name: str
|
||||
status: BatchInspectionStatus
|
||||
created_at: datetime
|
||||
completed_at: Optional[datetime] = None
|
||||
config: BatchInspectionConfig
|
||||
source_urls: list[str] = []
|
||||
discovered_pages: list[BatchPage] = []
|
||||
aggregate_scores: Optional[AggregateScores] = None
|
||||
|
||||
|
||||
class BatchInspectionListItem(BaseModel):
|
||||
"""배치 검사 목록 항목 (요약)."""
|
||||
batch_inspection_id: str
|
||||
name: str
|
||||
status: BatchInspectionStatus
|
||||
created_at: datetime
|
||||
total_urls: int = 0
|
||||
pages_inspected: int = 0
|
||||
overall_score: Optional[int] = None
|
||||
grade: Optional[str] = None
|
||||
|
||||
|
||||
class BatchInspectionPaginatedResponse(BaseModel):
|
||||
"""배치 검사 목록 페이지네이션 응답."""
|
||||
items: list[BatchInspectionListItem]
|
||||
total: int
|
||||
page: int
|
||||
limit: int
|
||||
total_pages: int
|
||||
286
backend/app/routers/batch_inspections.py
Normal file
286
backend/app/routers/batch_inspections.py
Normal file
@ -0,0 +1,286 @@
|
||||
"""
|
||||
Batch inspections router.
|
||||
Handles batch inspection lifecycle:
|
||||
- Start batch inspection (multipart file upload + inspect all URLs)
|
||||
- SSE stream for real-time progress
|
||||
- Get batch inspection result
|
||||
- List batch inspections (history)
|
||||
|
||||
IMPORTANT: Static paths (/batch-inspections) must be registered BEFORE
|
||||
dynamic paths (/batch-inspections/{id}) to avoid routing conflicts.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from fastapi import APIRouter, File, Form, HTTPException, Query, UploadFile
|
||||
from sse_starlette.sse import EventSourceResponse
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.core.database import get_db
|
||||
from app.core.redis import get_redis
|
||||
from app.models.batch_schemas import StartBatchInspectionResponse
|
||||
from app.services.batch_inspection_service import BatchInspectionService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Maximum upload file size: 1MB
|
||||
MAX_FILE_SIZE = 1 * 1024 * 1024 # 1MB
|
||||
|
||||
|
||||
def _get_service() -> BatchInspectionService:
|
||||
"""Get BatchInspectionService instance."""
|
||||
db = get_db()
|
||||
redis = get_redis()
|
||||
return BatchInspectionService(db=db, redis=redis)
|
||||
|
||||
|
||||
def _validate_url(url: str) -> bool:
|
||||
"""Validate that a URL has http or https scheme."""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
return parsed.scheme in ("http", "https") and bool(parsed.netloc)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _parse_url_file(content: str) -> list[str]:
|
||||
"""
|
||||
Parse URL file content into a list of valid URLs.
|
||||
- One URL per line
|
||||
- Skip empty lines
|
||||
- Skip lines starting with # (comments)
|
||||
- Strip whitespace
|
||||
"""
|
||||
urls = []
|
||||
for line in content.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
urls.append(line)
|
||||
return urls
|
||||
|
||||
|
||||
# ============================================================
|
||||
# POST /api/batch-inspections -- Start batch inspection
|
||||
# ============================================================
|
||||
|
||||
@router.post("/batch-inspections", status_code=202)
|
||||
async def start_batch_inspection(
|
||||
file: UploadFile = File(...),
|
||||
name: str = Form(...),
|
||||
concurrency: int = Form(default=4),
|
||||
):
|
||||
"""
|
||||
Start a new batch inspection from an uploaded URL file.
|
||||
Returns 202 Accepted with batch_inspection_id immediately.
|
||||
Inspection runs asynchronously in the background.
|
||||
|
||||
File format: .txt, one URL per line, max 1MB, max 200 URLs.
|
||||
Lines starting with # are treated as comments and ignored.
|
||||
"""
|
||||
settings = get_settings()
|
||||
|
||||
# Validate file extension
|
||||
if file.filename and not file.filename.lower().endswith(".txt"):
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail="텍스트 파일(.txt)만 업로드 가능합니다",
|
||||
)
|
||||
|
||||
# Validate file size
|
||||
content_bytes = await file.read()
|
||||
if len(content_bytes) > MAX_FILE_SIZE:
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail="파일 크기가 1MB를 초과합니다",
|
||||
)
|
||||
|
||||
# Parse file content
|
||||
try:
|
||||
content = content_bytes.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail="파일 인코딩이 올바르지 않습니다 (UTF-8만 지원)",
|
||||
)
|
||||
|
||||
urls = _parse_url_file(content)
|
||||
|
||||
if not urls:
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail="파일에 유효한 URL이 없습니다",
|
||||
)
|
||||
|
||||
# Validate URL count
|
||||
if len(urls) > settings.BATCH_MAX_URLS:
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail=f"URL 수가 최대 허용치({settings.BATCH_MAX_URLS}개)를 초과합니다 (입력: {len(urls)}개)",
|
||||
)
|
||||
|
||||
# Validate each URL
|
||||
invalid_urls = [url for url in urls if not _validate_url(url)]
|
||||
if invalid_urls:
|
||||
# Show first 5 invalid URLs
|
||||
sample = invalid_urls[:5]
|
||||
detail = f"유효하지 않은 URL이 포함되어 있습니다: {', '.join(sample)}"
|
||||
if len(invalid_urls) > 5:
|
||||
detail += f" 외 {len(invalid_urls) - 5}건"
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail=detail,
|
||||
)
|
||||
|
||||
# Validate concurrency
|
||||
concurrency = max(1, min(concurrency, settings.BATCH_CONCURRENCY))
|
||||
|
||||
# Validate name
|
||||
name = name.strip()
|
||||
if not name:
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail="배치 검사 이름을 입력해주세요",
|
||||
)
|
||||
|
||||
service = _get_service()
|
||||
|
||||
try:
|
||||
batch_inspection_id = await service.start_batch_inspection(
|
||||
name=name,
|
||||
urls=urls,
|
||||
concurrency=concurrency,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Failed to start batch inspection: %s", str(e))
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="배치 검사를 시작할 수 없습니다",
|
||||
)
|
||||
|
||||
return StartBatchInspectionResponse(
|
||||
batch_inspection_id=batch_inspection_id,
|
||||
status="inspecting",
|
||||
name=name,
|
||||
total_urls=len(urls),
|
||||
stream_url=f"/api/batch-inspections/{batch_inspection_id}/stream",
|
||||
)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# GET /api/batch-inspections -- List batch inspections (history)
|
||||
# IMPORTANT: This MUST be before /{batch_inspection_id} routes
|
||||
# ============================================================
|
||||
|
||||
@router.get("/batch-inspections")
|
||||
async def list_batch_inspections(
|
||||
page: int = Query(default=1, ge=1),
|
||||
limit: int = Query(default=20, ge=1, le=100),
|
||||
name: str = Query(default=None, description="이름 검색 필터"),
|
||||
):
|
||||
"""Get paginated batch inspection history."""
|
||||
service = _get_service()
|
||||
result = await service.get_batch_inspection_list(
|
||||
page=page,
|
||||
limit=limit,
|
||||
name_filter=name,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
# ============================================================
|
||||
# GET /api/batch-inspections/{batch_inspection_id}/stream -- SSE
|
||||
# ============================================================
|
||||
|
||||
@router.get("/batch-inspections/{batch_inspection_id}/stream")
|
||||
async def batch_inspection_stream(batch_inspection_id: str):
|
||||
"""
|
||||
Stream batch inspection progress via Server-Sent Events.
|
||||
|
||||
Events:
|
||||
- connected: { batch_inspection_id, message }
|
||||
- page_start: { batch_inspection_id, page_url, page_index }
|
||||
- page_complete: { batch_inspection_id, page_url, inspection_id, overall_score, grade }
|
||||
- aggregate_update: { batch_inspection_id, pages_inspected, pages_total, overall_score, grade }
|
||||
- complete: { batch_inspection_id, status: "completed", aggregate_scores: {...} }
|
||||
- error: { batch_inspection_id, message }
|
||||
"""
|
||||
|
||||
async def event_generator():
|
||||
redis = get_redis()
|
||||
pubsub = redis.pubsub()
|
||||
channel = f"batch-inspection:{batch_inspection_id}:events"
|
||||
|
||||
await pubsub.subscribe(channel)
|
||||
|
||||
try:
|
||||
# Send initial connected event
|
||||
yield {
|
||||
"event": "connected",
|
||||
"data": json.dumps({
|
||||
"batch_inspection_id": batch_inspection_id,
|
||||
"message": "SSE 연결 완료",
|
||||
}, ensure_ascii=False),
|
||||
}
|
||||
|
||||
# Listen for Pub/Sub messages
|
||||
async for message in pubsub.listen():
|
||||
if message["type"] == "message":
|
||||
event_data = json.loads(message["data"])
|
||||
event_type = event_data.pop("event_type", "progress")
|
||||
|
||||
yield {
|
||||
"event": event_type,
|
||||
"data": json.dumps(event_data, ensure_ascii=False),
|
||||
}
|
||||
|
||||
# End stream on complete or error
|
||||
if event_type in ("complete", "error"):
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"SSE stream error for batch %s: %s",
|
||||
batch_inspection_id, str(e),
|
||||
)
|
||||
yield {
|
||||
"event": "error",
|
||||
"data": json.dumps({
|
||||
"batch_inspection_id": batch_inspection_id,
|
||||
"status": "error",
|
||||
"message": "스트리밍 중 오류가 발생했습니다",
|
||||
}, ensure_ascii=False),
|
||||
}
|
||||
finally:
|
||||
await pubsub.unsubscribe(channel)
|
||||
await pubsub.aclose()
|
||||
|
||||
return EventSourceResponse(
|
||||
event_generator(),
|
||||
media_type="text/event-stream",
|
||||
)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# GET /api/batch-inspections/{batch_inspection_id} -- Get result
|
||||
# ============================================================
|
||||
|
||||
@router.get("/batch-inspections/{batch_inspection_id}")
|
||||
async def get_batch_inspection(batch_inspection_id: str):
|
||||
"""Get batch inspection result by ID."""
|
||||
service = _get_service()
|
||||
result = await service.get_batch_inspection(batch_inspection_id)
|
||||
|
||||
if result is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="배치 검사 결과를 찾을 수 없습니다",
|
||||
)
|
||||
|
||||
# Remove MongoDB _id field if present
|
||||
result.pop("_id", None)
|
||||
return result
|
||||
537
backend/app/services/batch_inspection_service.py
Normal file
537
backend/app/services/batch_inspection_service.py
Normal file
@ -0,0 +1,537 @@
|
||||
"""
|
||||
Batch inspection orchestration service.
|
||||
|
||||
Manages batch inspection lifecycle without crawling:
|
||||
1. Accept a list of URLs (from uploaded file)
|
||||
2. Parallel inspection of each URL (semaphore-controlled)
|
||||
3. Aggregate score computation
|
||||
4. Progress tracking via Redis Pub/Sub (SSE events)
|
||||
5. Result storage in MongoDB (batch_inspections collection)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
from redis.asyncio import Redis
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.models.schemas import calculate_grade
|
||||
from app.services.inspection_service import InspectionService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Redis key TTLs
|
||||
BATCH_RESULT_CACHE_TTL = 3600 # 1 hour
|
||||
|
||||
|
||||
class BatchInspectionService:
|
||||
"""Batch inspection orchestration service."""
|
||||
|
||||
def __init__(self, db: AsyncIOMotorDatabase, redis: Redis):
|
||||
self.db = db
|
||||
self.redis = redis
|
||||
self.inspection_service = InspectionService(db=db, redis=redis)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def start_batch_inspection(
|
||||
self,
|
||||
name: str,
|
||||
urls: list[str],
|
||||
concurrency: int = 4,
|
||||
) -> str:
|
||||
"""
|
||||
Start a batch inspection.
|
||||
|
||||
1. Generate batch_inspection_id
|
||||
2. Create initial MongoDB document with status "inspecting"
|
||||
3. Build discovered_pages array (depth=0, parent_url=None)
|
||||
4. Launch background inspect-all task
|
||||
5. Return batch_inspection_id
|
||||
"""
|
||||
settings = get_settings()
|
||||
|
||||
# Clamp concurrency to server-side limit
|
||||
concurrency = min(concurrency, settings.BATCH_CONCURRENCY)
|
||||
|
||||
batch_inspection_id = str(uuid.uuid4())
|
||||
|
||||
# Build discovered_pages documents
|
||||
discovered_pages = []
|
||||
for url in urls:
|
||||
discovered_pages.append({
|
||||
"url": url,
|
||||
"depth": 0,
|
||||
"parent_url": None,
|
||||
"inspection_id": None,
|
||||
"status": "pending",
|
||||
"title": None,
|
||||
"overall_score": None,
|
||||
"grade": None,
|
||||
})
|
||||
|
||||
# Create initial document
|
||||
doc = {
|
||||
"batch_inspection_id": batch_inspection_id,
|
||||
"name": name,
|
||||
"status": "inspecting",
|
||||
"created_at": datetime.now(timezone.utc),
|
||||
"completed_at": None,
|
||||
"config": {
|
||||
"concurrency": concurrency,
|
||||
},
|
||||
"source_urls": urls,
|
||||
"discovered_pages": discovered_pages,
|
||||
"aggregate_scores": None,
|
||||
}
|
||||
await self.db.batch_inspections.insert_one(doc)
|
||||
|
||||
logger.info(
|
||||
"Batch inspection started: id=%s, name=%s, total_urls=%d, concurrency=%d",
|
||||
batch_inspection_id, name, len(urls), concurrency,
|
||||
)
|
||||
|
||||
# Launch background task
|
||||
asyncio.create_task(
|
||||
self._inspect_all(batch_inspection_id, urls, concurrency)
|
||||
)
|
||||
|
||||
return batch_inspection_id
|
||||
|
||||
async def get_batch_inspection(self, batch_inspection_id: str) -> Optional[dict]:
|
||||
"""Get batch inspection result by ID (cache-first)."""
|
||||
# Try Redis cache first
|
||||
cache_key = f"batch-inspection:result:{batch_inspection_id}"
|
||||
cached = await self.redis.get(cache_key)
|
||||
if cached:
|
||||
return json.loads(cached)
|
||||
|
||||
# Fetch from MongoDB
|
||||
doc = await self.db.batch_inspections.find_one(
|
||||
{"batch_inspection_id": batch_inspection_id},
|
||||
{"_id": 0},
|
||||
)
|
||||
if doc:
|
||||
# Only cache completed results
|
||||
if doc.get("status") in ("completed", "error"):
|
||||
await self.redis.set(
|
||||
cache_key,
|
||||
json.dumps(doc, ensure_ascii=False, default=str),
|
||||
ex=BATCH_RESULT_CACHE_TTL,
|
||||
)
|
||||
return doc
|
||||
return None
|
||||
|
||||
async def get_batch_inspection_list(
|
||||
self,
|
||||
page: int = 1,
|
||||
limit: int = 20,
|
||||
name_filter: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""Get paginated list of batch inspections."""
|
||||
limit = min(limit, 100)
|
||||
skip = (page - 1) * limit
|
||||
|
||||
# Build query
|
||||
query = {}
|
||||
if name_filter:
|
||||
query["name"] = {"$regex": name_filter, "$options": "i"}
|
||||
|
||||
total = await self.db.batch_inspections.count_documents(query)
|
||||
|
||||
cursor = self.db.batch_inspections.find(
|
||||
query,
|
||||
{
|
||||
"_id": 0,
|
||||
"batch_inspection_id": 1,
|
||||
"name": 1,
|
||||
"status": 1,
|
||||
"created_at": 1,
|
||||
"discovered_pages": 1,
|
||||
"aggregate_scores": 1,
|
||||
"source_urls": 1,
|
||||
},
|
||||
).sort("created_at", -1).skip(skip).limit(limit)
|
||||
|
||||
items = []
|
||||
async for doc in cursor:
|
||||
pages = doc.get("discovered_pages", [])
|
||||
total_urls = len(doc.get("source_urls", []))
|
||||
pages_inspected = sum(
|
||||
1 for p in pages if p.get("status") == "completed"
|
||||
)
|
||||
agg = doc.get("aggregate_scores")
|
||||
|
||||
items.append({
|
||||
"batch_inspection_id": doc.get("batch_inspection_id"),
|
||||
"name": doc.get("name"),
|
||||
"status": doc.get("status"),
|
||||
"created_at": doc.get("created_at"),
|
||||
"total_urls": total_urls,
|
||||
"pages_inspected": pages_inspected,
|
||||
"overall_score": agg.get("overall_score") if agg else None,
|
||||
"grade": agg.get("grade") if agg else None,
|
||||
})
|
||||
|
||||
total_pages = max(1, -(-total // limit))
|
||||
|
||||
return {
|
||||
"items": items,
|
||||
"total": total,
|
||||
"page": page,
|
||||
"limit": limit,
|
||||
"total_pages": total_pages,
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Background task: Inspect All URLs
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _inspect_all(
|
||||
self,
|
||||
batch_inspection_id: str,
|
||||
urls: list[str],
|
||||
concurrency: int = 4,
|
||||
) -> None:
|
||||
"""
|
||||
Background task that inspects all URLs in parallel.
|
||||
No crawling phase - URLs are inspected directly.
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
"Batch inspection started: %s, urls=%d",
|
||||
batch_inspection_id, len(urls),
|
||||
)
|
||||
|
||||
semaphore = asyncio.Semaphore(concurrency)
|
||||
|
||||
tasks = [
|
||||
self._inspect_page_with_semaphore(
|
||||
semaphore=semaphore,
|
||||
batch_inspection_id=batch_inspection_id,
|
||||
page_url=url,
|
||||
page_index=idx,
|
||||
total_pages=len(urls),
|
||||
)
|
||||
for idx, url in enumerate(urls)
|
||||
]
|
||||
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# ==============================
|
||||
# Finalize: Compute aggregates
|
||||
# ==============================
|
||||
aggregate_scores = await self._compute_and_store_aggregates(batch_inspection_id)
|
||||
|
||||
# Mark as completed
|
||||
await self.db.batch_inspections.update_one(
|
||||
{"batch_inspection_id": batch_inspection_id},
|
||||
{
|
||||
"$set": {
|
||||
"status": "completed",
|
||||
"completed_at": datetime.now(timezone.utc),
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
# Publish complete event
|
||||
await self._publish_batch_event(batch_inspection_id, {
|
||||
"event_type": "complete",
|
||||
"batch_inspection_id": batch_inspection_id,
|
||||
"status": "completed",
|
||||
"aggregate_scores": aggregate_scores,
|
||||
})
|
||||
|
||||
logger.info("Batch inspection completed: %s", batch_inspection_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Batch inspection %s failed: %s",
|
||||
batch_inspection_id, str(e), exc_info=True,
|
||||
)
|
||||
|
||||
await self.db.batch_inspections.update_one(
|
||||
{"batch_inspection_id": batch_inspection_id},
|
||||
{
|
||||
"$set": {
|
||||
"status": "error",
|
||||
"completed_at": datetime.now(timezone.utc),
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
await self._publish_batch_event(batch_inspection_id, {
|
||||
"event_type": "error",
|
||||
"batch_inspection_id": batch_inspection_id,
|
||||
"status": "error",
|
||||
"message": f"배치 검사 중 오류가 발생했습니다: {str(e)[:200]}",
|
||||
})
|
||||
|
||||
async def _inspect_page_with_semaphore(
|
||||
self,
|
||||
semaphore: asyncio.Semaphore,
|
||||
batch_inspection_id: str,
|
||||
page_url: str,
|
||||
page_index: int,
|
||||
total_pages: int,
|
||||
) -> None:
|
||||
"""Inspect a single page with semaphore-controlled concurrency."""
|
||||
async with semaphore:
|
||||
await self._inspect_single_page(
|
||||
batch_inspection_id=batch_inspection_id,
|
||||
page_url=page_url,
|
||||
page_index=page_index,
|
||||
total_pages=total_pages,
|
||||
)
|
||||
|
||||
async def _inspect_single_page(
|
||||
self,
|
||||
batch_inspection_id: str,
|
||||
page_url: str,
|
||||
page_index: int,
|
||||
total_pages: int,
|
||||
) -> None:
|
||||
"""Run inspection for a single page in the batch."""
|
||||
inspection_id = str(uuid.uuid4())
|
||||
|
||||
# Publish page_start event
|
||||
await self._publish_batch_event(batch_inspection_id, {
|
||||
"event_type": "page_start",
|
||||
"batch_inspection_id": batch_inspection_id,
|
||||
"page_url": page_url,
|
||||
"page_index": page_index,
|
||||
})
|
||||
|
||||
# Mark page as inspecting in MongoDB
|
||||
await self.db.batch_inspections.update_one(
|
||||
{
|
||||
"batch_inspection_id": batch_inspection_id,
|
||||
"discovered_pages.url": page_url,
|
||||
},
|
||||
{
|
||||
"$set": {
|
||||
"discovered_pages.$.status": "inspecting",
|
||||
"discovered_pages.$.inspection_id": inspection_id,
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
# Progress callback for per-page SSE updates
|
||||
async def page_progress_callback(category: str, progress: int, current_step: str):
|
||||
await self._publish_batch_event(batch_inspection_id, {
|
||||
"event_type": "page_progress",
|
||||
"batch_inspection_id": batch_inspection_id,
|
||||
"page_url": page_url,
|
||||
"page_index": page_index,
|
||||
"category": category,
|
||||
"progress": progress,
|
||||
"current_step": current_step,
|
||||
})
|
||||
|
||||
# Run the inspection
|
||||
_, result = await self.inspection_service.run_inspection_inline(
|
||||
url=page_url,
|
||||
inspection_id=inspection_id,
|
||||
progress_callback=page_progress_callback,
|
||||
)
|
||||
|
||||
overall_score = result.get("overall_score", 0)
|
||||
grade = result.get("grade", "F")
|
||||
|
||||
# Update page status in MongoDB
|
||||
await self.db.batch_inspections.update_one(
|
||||
{
|
||||
"batch_inspection_id": batch_inspection_id,
|
||||
"discovered_pages.url": page_url,
|
||||
},
|
||||
{
|
||||
"$set": {
|
||||
"discovered_pages.$.status": "completed",
|
||||
"discovered_pages.$.overall_score": overall_score,
|
||||
"discovered_pages.$.grade": grade,
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
# Publish page_complete event
|
||||
await self._publish_batch_event(batch_inspection_id, {
|
||||
"event_type": "page_complete",
|
||||
"batch_inspection_id": batch_inspection_id,
|
||||
"page_url": page_url,
|
||||
"inspection_id": inspection_id,
|
||||
"overall_score": overall_score,
|
||||
"grade": grade,
|
||||
})
|
||||
|
||||
# Compute and publish aggregate update
|
||||
aggregate_scores = await self._compute_and_store_aggregates(batch_inspection_id)
|
||||
await self._publish_batch_event(batch_inspection_id, {
|
||||
"event_type": "aggregate_update",
|
||||
"batch_inspection_id": batch_inspection_id,
|
||||
"pages_inspected": aggregate_scores.get("pages_inspected", 0),
|
||||
"pages_total": aggregate_scores.get("pages_total", total_pages),
|
||||
"overall_score": aggregate_scores.get("overall_score", 0),
|
||||
"grade": aggregate_scores.get("grade", "F"),
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"Page inspection completed: batch=%s, page=%s, score=%d",
|
||||
batch_inspection_id, page_url, overall_score,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Page inspection failed: batch=%s, page=%s, error=%s",
|
||||
batch_inspection_id, page_url, str(e),
|
||||
)
|
||||
|
||||
# Mark page as error
|
||||
await self.db.batch_inspections.update_one(
|
||||
{
|
||||
"batch_inspection_id": batch_inspection_id,
|
||||
"discovered_pages.url": page_url,
|
||||
},
|
||||
{
|
||||
"$set": {
|
||||
"discovered_pages.$.status": "error",
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
# Publish page error (non-fatal, continue with other pages)
|
||||
await self._publish_batch_event(batch_inspection_id, {
|
||||
"event_type": "page_complete",
|
||||
"batch_inspection_id": batch_inspection_id,
|
||||
"page_url": page_url,
|
||||
"inspection_id": None,
|
||||
"overall_score": 0,
|
||||
"grade": "F",
|
||||
"error": str(e)[:200],
|
||||
})
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Aggregate computation
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _compute_and_store_aggregates(self, batch_inspection_id: str) -> dict:
|
||||
"""
|
||||
Compute aggregate scores from all completed page inspections.
|
||||
|
||||
Fetches each completed page's full inspection result from the
|
||||
inspections collection, averages category scores, and stores
|
||||
the aggregate in the batch_inspections document.
|
||||
|
||||
Returns the aggregate_scores dict.
|
||||
"""
|
||||
doc = await self.db.batch_inspections.find_one(
|
||||
{"batch_inspection_id": batch_inspection_id},
|
||||
)
|
||||
if not doc:
|
||||
return {}
|
||||
|
||||
pages = doc.get("discovered_pages", [])
|
||||
total_pages = len(pages)
|
||||
|
||||
# Collect inspection IDs for completed pages
|
||||
completed_ids = [
|
||||
p["inspection_id"]
|
||||
for p in pages
|
||||
if p.get("status") == "completed" and p.get("inspection_id")
|
||||
]
|
||||
|
||||
if not completed_ids:
|
||||
aggregate = {
|
||||
"overall_score": 0,
|
||||
"grade": "F",
|
||||
"html_css": 0,
|
||||
"accessibility": 0,
|
||||
"seo": 0,
|
||||
"performance_security": 0,
|
||||
"total_issues": 0,
|
||||
"pages_inspected": 0,
|
||||
"pages_total": total_pages,
|
||||
}
|
||||
await self._store_aggregates(batch_inspection_id, aggregate)
|
||||
return aggregate
|
||||
|
||||
# Fetch all completed inspection results
|
||||
cursor = self.db.inspections.find(
|
||||
{"inspection_id": {"$in": completed_ids}},
|
||||
{
|
||||
"_id": 0,
|
||||
"overall_score": 1,
|
||||
"categories.html_css.score": 1,
|
||||
"categories.accessibility.score": 1,
|
||||
"categories.seo.score": 1,
|
||||
"categories.performance_security.score": 1,
|
||||
"summary.total_issues": 1,
|
||||
},
|
||||
)
|
||||
|
||||
scores_overall = []
|
||||
scores_html_css = []
|
||||
scores_accessibility = []
|
||||
scores_seo = []
|
||||
scores_perf = []
|
||||
total_issues = 0
|
||||
|
||||
async for insp in cursor:
|
||||
scores_overall.append(insp.get("overall_score", 0))
|
||||
|
||||
cats = insp.get("categories", {})
|
||||
scores_html_css.append(cats.get("html_css", {}).get("score", 0))
|
||||
scores_accessibility.append(cats.get("accessibility", {}).get("score", 0))
|
||||
scores_seo.append(cats.get("seo", {}).get("score", 0))
|
||||
scores_perf.append(cats.get("performance_security", {}).get("score", 0))
|
||||
|
||||
total_issues += insp.get("summary", {}).get("total_issues", 0)
|
||||
|
||||
pages_inspected = len(scores_overall)
|
||||
|
||||
def safe_avg(values: list[int]) -> int:
|
||||
return round(sum(values) / len(values)) if values else 0
|
||||
|
||||
overall_score = safe_avg(scores_overall)
|
||||
grade = calculate_grade(overall_score)
|
||||
|
||||
aggregate = {
|
||||
"overall_score": overall_score,
|
||||
"grade": grade,
|
||||
"html_css": safe_avg(scores_html_css),
|
||||
"accessibility": safe_avg(scores_accessibility),
|
||||
"seo": safe_avg(scores_seo),
|
||||
"performance_security": safe_avg(scores_perf),
|
||||
"total_issues": total_issues,
|
||||
"pages_inspected": pages_inspected,
|
||||
"pages_total": total_pages,
|
||||
}
|
||||
|
||||
await self._store_aggregates(batch_inspection_id, aggregate)
|
||||
return aggregate
|
||||
|
||||
async def _store_aggregates(self, batch_inspection_id: str, aggregate: dict) -> None:
|
||||
"""Store aggregate scores in MongoDB."""
|
||||
await self.db.batch_inspections.update_one(
|
||||
{"batch_inspection_id": batch_inspection_id},
|
||||
{"$set": {"aggregate_scores": aggregate}},
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# SSE event publishing
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _publish_batch_event(self, batch_inspection_id: str, event_data: dict) -> None:
|
||||
"""Publish an SSE event for batch inspection via Redis Pub/Sub."""
|
||||
channel = f"batch-inspection:{batch_inspection_id}:events"
|
||||
await self.redis.publish(
|
||||
channel,
|
||||
json.dumps(event_data, ensure_ascii=False, default=str),
|
||||
)
|
||||
@ -33,5 +33,8 @@ Jinja2>=3.1.0
|
||||
# Rules (YAML)
|
||||
PyYAML>=6.0.0
|
||||
|
||||
# Multipart (file upload)
|
||||
python-multipart>=0.0.7
|
||||
|
||||
# Utilities
|
||||
python-slugify>=8.0.0
|
||||
|
||||
Reference in New Issue
Block a user