""" Site inspections router. Handles site-wide inspection lifecycle: - Start site inspection (crawl + inspect all pages) - SSE stream for real-time progress - Get site inspection result - List site inspections (history) - Trigger single page inspection within a site IMPORTANT: Static paths (/site-inspections) must be registered BEFORE dynamic paths (/site-inspections/{id}) to avoid routing conflicts. """ import json import logging import httpx from fastapi import APIRouter, HTTPException, Query from sse_starlette.sse import EventSourceResponse from app.core.database import get_db from app.core.redis import get_redis from app.models.site_schemas import ( StartSiteInspectionRequest, StartSiteInspectionResponse, InspectPageRequest, ) from app.services.site_inspection_service import SiteInspectionService logger = logging.getLogger(__name__) router = APIRouter() def _get_service() -> SiteInspectionService: """Get SiteInspectionService instance.""" db = get_db() redis = get_redis() return SiteInspectionService(db=db, redis=redis) # ============================================================ # POST /api/site-inspections -- Start site inspection # ============================================================ @router.post("/site-inspections", status_code=202) async def start_site_inspection(request: StartSiteInspectionRequest): """ Start a new site-wide inspection. Returns 202 Accepted with site_inspection_id immediately. Crawling and inspection run asynchronously in the background. """ url = str(request.url) # Validate URL scheme if not url.startswith(("http://", "https://")): raise HTTPException( status_code=422, detail="유효한 URL을 입력해주세요 (http:// 또는 https://로 시작해야 합니다)", ) service = _get_service() try: site_inspection_id = await service.start_site_inspection( url=url, max_pages=request.max_pages, max_depth=request.max_depth, concurrency=request.concurrency, accessibility_standard=request.accessibility_standard, ) except httpx.HTTPStatusError as e: raise HTTPException( status_code=400, detail=f"해당 URL에 접근할 수 없습니다 (HTTP {e.response.status_code})", ) except httpx.TimeoutException: raise HTTPException( status_code=400, detail="해당 URL에 접근할 수 없습니다 (응답 시간 초과)", ) except httpx.RequestError: raise HTTPException( status_code=400, detail="해당 URL에 접근할 수 없습니다", ) except Exception as e: logger.error("Failed to start site inspection: %s", str(e)) raise HTTPException( status_code=400, detail="사이트 검사를 시작할 수 없습니다", ) return StartSiteInspectionResponse( site_inspection_id=site_inspection_id, status="crawling", root_url=url, stream_url=f"/api/site-inspections/{site_inspection_id}/stream", ) # ============================================================ # GET /api/site-inspections -- List site inspections (history) # IMPORTANT: This MUST be before /{site_inspection_id} routes # ============================================================ @router.get("/site-inspections") async def list_site_inspections( page: int = Query(default=1, ge=1), limit: int = Query(default=20, ge=1, le=100), ): """Get paginated site inspection history.""" service = _get_service() result = await service.get_site_inspection_list( page=page, limit=limit, ) return result # ============================================================ # GET /api/site-inspections/{site_inspection_id}/stream -- SSE # ============================================================ @router.get("/site-inspections/{site_inspection_id}/stream") async def stream_site_progress(site_inspection_id: str): """ Stream site inspection progress via Server-Sent Events. Events: - crawl_progress: { pages_found, current_url } - crawl_complete: { total_pages, pages: [...] } - page_start: { page_url, page_index } - page_progress: { page_url, category, progress, current_step } - page_complete: { page_url, inspection_id, score, grade } - aggregate_update: { pages_inspected, pages_total, overall_score } - complete: { status, aggregate_scores } - error: { message } """ async def event_generator(): redis = get_redis() pubsub = redis.pubsub() channel = f"site-inspection:{site_inspection_id}:events" await pubsub.subscribe(channel) try: # Send initial connected event yield { "event": "connected", "data": json.dumps({ "site_inspection_id": site_inspection_id, "message": "SSE 연결 완료", }, ensure_ascii=False), } # Listen for Pub/Sub messages async for message in pubsub.listen(): if message["type"] == "message": event_data = json.loads(message["data"]) event_type = event_data.pop("event_type", "progress") yield { "event": event_type, "data": json.dumps(event_data, ensure_ascii=False), } # End stream on complete or error if event_type in ("complete", "error"): break except Exception as e: logger.error( "SSE stream error for site %s: %s", site_inspection_id, str(e), ) yield { "event": "error", "data": json.dumps({ "site_inspection_id": site_inspection_id, "status": "error", "message": "스트리밍 중 오류가 발생했습니다", }, ensure_ascii=False), } finally: await pubsub.unsubscribe(channel) await pubsub.aclose() return EventSourceResponse( event_generator(), media_type="text/event-stream", ) # ============================================================ # GET /api/site-inspections/{site_inspection_id} -- Get result # ============================================================ @router.get("/site-inspections/{site_inspection_id}") async def get_site_inspection(site_inspection_id: str): """Get site inspection result by ID.""" service = _get_service() result = await service.get_site_inspection(site_inspection_id) if result is None: raise HTTPException( status_code=404, detail="사이트 검사 결과를 찾을 수 없습니다", ) # Remove MongoDB _id field if present result.pop("_id", None) return result # ============================================================ # POST /api/site-inspections/{site_inspection_id}/inspect-page # -- Trigger single page inspection # ============================================================ @router.post("/site-inspections/{site_inspection_id}/inspect-page") async def inspect_page( site_inspection_id: str, request: InspectPageRequest, ): """ Trigger inspection for a specific page within a site inspection. Useful for re-inspecting a single page or manually triggering inspection of a page that failed previously. """ page_url = str(request.url) service = _get_service() inspection_id = await service.inspect_single_page( site_inspection_id=site_inspection_id, page_url=page_url, ) if inspection_id is None: raise HTTPException( status_code=404, detail="사이트 검사 또는 해당 페이지를 찾을 수 없습니다", ) return { "site_inspection_id": site_inspection_id, "page_url": page_url, "inspection_id": inspection_id, "status": "completed", }