feat: 사이트 전체 검사 기능 추가
도메인 하위 링크를 BFS로 자동 크롤링하여 페이지별 검사 수행. - BFS 링크 크롤러 (같은 도메인 필터링, max_pages/max_depth 설정) - 사이트 검사 오케스트레이션 (크롤링→순차 검사→집계) - SSE 실시간 진행 상태 (크롤링/검사/완료) - 페이지 트리 + 집계 결과 UI - UrlInputForm에 "사이트 전체 검사" 버튼 추가 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
249
backend/app/routers/site_inspections.py
Normal file
249
backend/app/routers/site_inspections.py
Normal file
@ -0,0 +1,249 @@
|
||||
"""
|
||||
Site inspections router.
|
||||
Handles site-wide inspection lifecycle:
|
||||
- Start site inspection (crawl + inspect all pages)
|
||||
- SSE stream for real-time progress
|
||||
- Get site inspection result
|
||||
- List site inspections (history)
|
||||
- Trigger single page inspection within a site
|
||||
|
||||
IMPORTANT: Static paths (/site-inspections) must be registered BEFORE
|
||||
dynamic paths (/site-inspections/{id}) to avoid routing conflicts.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from sse_starlette.sse import EventSourceResponse
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.core.redis import get_redis
|
||||
from app.models.site_schemas import (
|
||||
StartSiteInspectionRequest,
|
||||
StartSiteInspectionResponse,
|
||||
InspectPageRequest,
|
||||
)
|
||||
from app.services.site_inspection_service import SiteInspectionService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def _get_service() -> SiteInspectionService:
|
||||
"""Get SiteInspectionService instance."""
|
||||
db = get_db()
|
||||
redis = get_redis()
|
||||
return SiteInspectionService(db=db, redis=redis)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# POST /api/site-inspections -- Start site inspection
|
||||
# ============================================================
|
||||
|
||||
@router.post("/site-inspections", status_code=202)
|
||||
async def start_site_inspection(request: StartSiteInspectionRequest):
|
||||
"""
|
||||
Start a new site-wide inspection.
|
||||
Returns 202 Accepted with site_inspection_id immediately.
|
||||
Crawling and inspection run asynchronously in the background.
|
||||
"""
|
||||
url = str(request.url)
|
||||
|
||||
# Validate URL scheme
|
||||
if not url.startswith(("http://", "https://")):
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail="유효한 URL을 입력해주세요 (http:// 또는 https://로 시작해야 합니다)",
|
||||
)
|
||||
|
||||
service = _get_service()
|
||||
|
||||
try:
|
||||
site_inspection_id = await service.start_site_inspection(
|
||||
url=url,
|
||||
max_pages=request.max_pages,
|
||||
max_depth=request.max_depth,
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"해당 URL에 접근할 수 없습니다 (HTTP {e.response.status_code})",
|
||||
)
|
||||
except httpx.TimeoutException:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="해당 URL에 접근할 수 없습니다 (응답 시간 초과)",
|
||||
)
|
||||
except httpx.RequestError:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="해당 URL에 접근할 수 없습니다",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Failed to start site inspection: %s", str(e))
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="사이트 검사를 시작할 수 없습니다",
|
||||
)
|
||||
|
||||
return StartSiteInspectionResponse(
|
||||
site_inspection_id=site_inspection_id,
|
||||
status="crawling",
|
||||
root_url=url,
|
||||
stream_url=f"/api/site-inspections/{site_inspection_id}/stream",
|
||||
)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# GET /api/site-inspections -- List site inspections (history)
|
||||
# IMPORTANT: This MUST be before /{site_inspection_id} routes
|
||||
# ============================================================
|
||||
|
||||
@router.get("/site-inspections")
|
||||
async def list_site_inspections(
|
||||
page: int = Query(default=1, ge=1),
|
||||
limit: int = Query(default=20, ge=1, le=100),
|
||||
):
|
||||
"""Get paginated site inspection history."""
|
||||
service = _get_service()
|
||||
result = await service.get_site_inspection_list(
|
||||
page=page,
|
||||
limit=limit,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
# ============================================================
|
||||
# GET /api/site-inspections/{site_inspection_id}/stream -- SSE
|
||||
# ============================================================
|
||||
|
||||
@router.get("/site-inspections/{site_inspection_id}/stream")
|
||||
async def stream_site_progress(site_inspection_id: str):
|
||||
"""
|
||||
Stream site inspection progress via Server-Sent Events.
|
||||
|
||||
Events:
|
||||
- crawl_progress: { pages_found, current_url }
|
||||
- crawl_complete: { total_pages, pages: [...] }
|
||||
- page_start: { page_url, page_index }
|
||||
- page_progress: { page_url, category, progress, current_step }
|
||||
- page_complete: { page_url, inspection_id, score, grade }
|
||||
- aggregate_update: { pages_inspected, pages_total, overall_score }
|
||||
- complete: { status, aggregate_scores }
|
||||
- error: { message }
|
||||
"""
|
||||
|
||||
async def event_generator():
|
||||
redis = get_redis()
|
||||
pubsub = redis.pubsub()
|
||||
channel = f"site-inspection:{site_inspection_id}:events"
|
||||
|
||||
await pubsub.subscribe(channel)
|
||||
|
||||
try:
|
||||
# Send initial connected event
|
||||
yield {
|
||||
"event": "connected",
|
||||
"data": json.dumps({
|
||||
"site_inspection_id": site_inspection_id,
|
||||
"message": "SSE 연결 완료",
|
||||
}, ensure_ascii=False),
|
||||
}
|
||||
|
||||
# Listen for Pub/Sub messages
|
||||
async for message in pubsub.listen():
|
||||
if message["type"] == "message":
|
||||
event_data = json.loads(message["data"])
|
||||
event_type = event_data.pop("event_type", "progress")
|
||||
|
||||
yield {
|
||||
"event": event_type,
|
||||
"data": json.dumps(event_data, ensure_ascii=False),
|
||||
}
|
||||
|
||||
# End stream on complete or error
|
||||
if event_type in ("complete", "error"):
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"SSE stream error for site %s: %s",
|
||||
site_inspection_id, str(e),
|
||||
)
|
||||
yield {
|
||||
"event": "error",
|
||||
"data": json.dumps({
|
||||
"site_inspection_id": site_inspection_id,
|
||||
"status": "error",
|
||||
"message": "스트리밍 중 오류가 발생했습니다",
|
||||
}, ensure_ascii=False),
|
||||
}
|
||||
finally:
|
||||
await pubsub.unsubscribe(channel)
|
||||
await pubsub.aclose()
|
||||
|
||||
return EventSourceResponse(
|
||||
event_generator(),
|
||||
media_type="text/event-stream",
|
||||
)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# GET /api/site-inspections/{site_inspection_id} -- Get result
|
||||
# ============================================================
|
||||
|
||||
@router.get("/site-inspections/{site_inspection_id}")
|
||||
async def get_site_inspection(site_inspection_id: str):
|
||||
"""Get site inspection result by ID."""
|
||||
service = _get_service()
|
||||
result = await service.get_site_inspection(site_inspection_id)
|
||||
|
||||
if result is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="사이트 검사 결과를 찾을 수 없습니다",
|
||||
)
|
||||
|
||||
# Remove MongoDB _id field if present
|
||||
result.pop("_id", None)
|
||||
return result
|
||||
|
||||
|
||||
# ============================================================
|
||||
# POST /api/site-inspections/{site_inspection_id}/inspect-page
|
||||
# -- Trigger single page inspection
|
||||
# ============================================================
|
||||
|
||||
@router.post("/site-inspections/{site_inspection_id}/inspect-page")
|
||||
async def inspect_page(
|
||||
site_inspection_id: str,
|
||||
request: InspectPageRequest,
|
||||
):
|
||||
"""
|
||||
Trigger inspection for a specific page within a site inspection.
|
||||
Useful for re-inspecting a single page or manually triggering
|
||||
inspection of a page that failed previously.
|
||||
"""
|
||||
page_url = str(request.url)
|
||||
service = _get_service()
|
||||
|
||||
inspection_id = await service.inspect_single_page(
|
||||
site_inspection_id=site_inspection_id,
|
||||
page_url=page_url,
|
||||
)
|
||||
|
||||
if inspection_id is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="사이트 검사 또는 해당 페이지를 찾을 수 없습니다",
|
||||
)
|
||||
|
||||
return {
|
||||
"site_inspection_id": site_inspection_id,
|
||||
"page_url": page_url,
|
||||
"inspection_id": inspection_id,
|
||||
"status": "completed",
|
||||
}
|
||||
Reference in New Issue
Block a user