diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 116222e..aa7e048 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -21,6 +21,11 @@ class Settings(BaseSettings): CATEGORY_TIMEOUT: int = 60 MAX_HTML_SIZE: int = 10485760 # 10MB + # Site inspection + SITE_MAX_PAGES: int = 20 + SITE_MAX_DEPTH: int = 2 + SITE_CONCURRENCY: int = 2 + # Application PROJECT_NAME: str = "Web Inspector API" diff --git a/backend/app/core/database.py b/backend/app/core/database.py index 706da25..048dd5d 100644 --- a/backend/app/core/database.py +++ b/backend/app/core/database.py @@ -19,11 +19,16 @@ async def connect_db() -> None: _client = AsyncIOMotorClient(settings.MONGODB_URL) _db = _client[settings.DB_NAME] - # Create indexes + # Create indexes - inspections await _db.inspections.create_index("inspection_id", unique=True) await _db.inspections.create_index([("url", 1), ("created_at", -1)]) await _db.inspections.create_index([("created_at", -1)]) + # Create indexes - site_inspections + await _db.site_inspections.create_index("site_inspection_id", unique=True) + await _db.site_inspections.create_index([("domain", 1), ("created_at", -1)]) + await _db.site_inspections.create_index([("created_at", -1)]) + # Verify connection await _client.admin.command("ping") logger.info("MongoDB connected successfully: %s", settings.DB_NAME) diff --git a/backend/app/main.py b/backend/app/main.py index 2d7ae8a..48ba6d8 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -10,7 +10,7 @@ from fastapi.middleware.cors import CORSMiddleware from app.core.database import connect_db, close_db from app.core.redis import connect_redis, close_redis -from app.routers import health, inspections, reports +from app.routers import health, inspections, reports, site_inspections # Configure logging logging.basicConfig( @@ -56,3 +56,4 @@ app.add_middleware( app.include_router(health.router, prefix="/api", tags=["Health"]) app.include_router(inspections.router, prefix="/api", tags=["Inspections"]) app.include_router(reports.router, prefix="/api", tags=["Reports"]) +app.include_router(site_inspections.router, prefix="/api", tags=["Site Inspections"]) diff --git a/backend/app/models/site_schemas.py b/backend/app/models/site_schemas.py new file mode 100644 index 0000000..39e3469 --- /dev/null +++ b/backend/app/models/site_schemas.py @@ -0,0 +1,113 @@ +""" +Pydantic models for site-wide inspection request/response validation. +""" + +from pydantic import BaseModel, Field, HttpUrl +from typing import Optional +from datetime import datetime +from enum import Enum + + +# --- Enums --- + +class SiteInspectionStatus(str, Enum): + CRAWLING = "crawling" + INSPECTING = "inspecting" + COMPLETED = "completed" + ERROR = "error" + + +class PageStatus(str, Enum): + PENDING = "pending" + INSPECTING = "inspecting" + COMPLETED = "completed" + ERROR = "error" + + +# --- Request --- + +class StartSiteInspectionRequest(BaseModel): + url: HttpUrl + max_pages: int = Field(default=20, ge=1, le=50, description="최대 크롤링 페이지 수") + max_depth: int = Field(default=2, ge=1, le=3, description="최대 크롤링 깊이") + + +class InspectPageRequest(BaseModel): + url: HttpUrl + + +# --- Core Data Models --- + +class DiscoveredPage(BaseModel): + """크롤링으로 발견된 개별 페이지.""" + url: str + depth: int = 0 + parent_url: Optional[str] = None + inspection_id: Optional[str] = None + status: PageStatus = PageStatus.PENDING + title: Optional[str] = None + overall_score: Optional[int] = None + grade: Optional[str] = None + + +class AggregateScores(BaseModel): + """사이트 전체 집계 점수.""" + overall_score: int = Field(ge=0, le=100, default=0) + grade: str = "F" + html_css: int = Field(ge=0, le=100, default=0) + accessibility: int = Field(ge=0, le=100, default=0) + seo: int = Field(ge=0, le=100, default=0) + performance_security: int = Field(ge=0, le=100, default=0) + total_issues: int = 0 + pages_inspected: int = 0 + pages_total: int = 0 + + +class SiteInspectionConfig(BaseModel): + """사이트 검사 설정.""" + max_pages: int = 20 + max_depth: int = 2 + + +# --- Response Models --- + +class StartSiteInspectionResponse(BaseModel): + site_inspection_id: str + status: str = "crawling" + root_url: str + stream_url: str + + +class SiteInspectionResult(BaseModel): + """사이트 검사 전체 결과.""" + site_inspection_id: str + root_url: str + domain: str + status: SiteInspectionStatus + created_at: datetime + completed_at: Optional[datetime] = None + config: SiteInspectionConfig + discovered_pages: list[DiscoveredPage] = [] + aggregate_scores: Optional[AggregateScores] = None + + +class SiteInspectionListItem(BaseModel): + """사이트 검사 목록 항목 (요약).""" + site_inspection_id: str + root_url: str + domain: str + status: SiteInspectionStatus + created_at: datetime + pages_total: int = 0 + pages_inspected: int = 0 + overall_score: Optional[int] = None + grade: Optional[str] = None + + +class SiteInspectionPaginatedResponse(BaseModel): + """사이트 검사 목록 페이지네이션 응답.""" + items: list[SiteInspectionListItem] + total: int + page: int + limit: int + total_pages: int diff --git a/backend/app/routers/site_inspections.py b/backend/app/routers/site_inspections.py new file mode 100644 index 0000000..76a31cf --- /dev/null +++ b/backend/app/routers/site_inspections.py @@ -0,0 +1,249 @@ +""" +Site inspections router. +Handles site-wide inspection lifecycle: + - Start site inspection (crawl + inspect all pages) + - SSE stream for real-time progress + - Get site inspection result + - List site inspections (history) + - Trigger single page inspection within a site + +IMPORTANT: Static paths (/site-inspections) must be registered BEFORE +dynamic paths (/site-inspections/{id}) to avoid routing conflicts. +""" + +import json +import logging + +import httpx +from fastapi import APIRouter, HTTPException, Query +from sse_starlette.sse import EventSourceResponse + +from app.core.database import get_db +from app.core.redis import get_redis +from app.models.site_schemas import ( + StartSiteInspectionRequest, + StartSiteInspectionResponse, + InspectPageRequest, +) +from app.services.site_inspection_service import SiteInspectionService + +logger = logging.getLogger(__name__) + +router = APIRouter() + + +def _get_service() -> SiteInspectionService: + """Get SiteInspectionService instance.""" + db = get_db() + redis = get_redis() + return SiteInspectionService(db=db, redis=redis) + + +# ============================================================ +# POST /api/site-inspections -- Start site inspection +# ============================================================ + +@router.post("/site-inspections", status_code=202) +async def start_site_inspection(request: StartSiteInspectionRequest): + """ + Start a new site-wide inspection. + Returns 202 Accepted with site_inspection_id immediately. + Crawling and inspection run asynchronously in the background. + """ + url = str(request.url) + + # Validate URL scheme + if not url.startswith(("http://", "https://")): + raise HTTPException( + status_code=422, + detail="유효한 URL을 입력해주세요 (http:// 또는 https://로 시작해야 합니다)", + ) + + service = _get_service() + + try: + site_inspection_id = await service.start_site_inspection( + url=url, + max_pages=request.max_pages, + max_depth=request.max_depth, + ) + except httpx.HTTPStatusError as e: + raise HTTPException( + status_code=400, + detail=f"해당 URL에 접근할 수 없습니다 (HTTP {e.response.status_code})", + ) + except httpx.TimeoutException: + raise HTTPException( + status_code=400, + detail="해당 URL에 접근할 수 없습니다 (응답 시간 초과)", + ) + except httpx.RequestError: + raise HTTPException( + status_code=400, + detail="해당 URL에 접근할 수 없습니다", + ) + except Exception as e: + logger.error("Failed to start site inspection: %s", str(e)) + raise HTTPException( + status_code=400, + detail="사이트 검사를 시작할 수 없습니다", + ) + + return StartSiteInspectionResponse( + site_inspection_id=site_inspection_id, + status="crawling", + root_url=url, + stream_url=f"/api/site-inspections/{site_inspection_id}/stream", + ) + + +# ============================================================ +# GET /api/site-inspections -- List site inspections (history) +# IMPORTANT: This MUST be before /{site_inspection_id} routes +# ============================================================ + +@router.get("/site-inspections") +async def list_site_inspections( + page: int = Query(default=1, ge=1), + limit: int = Query(default=20, ge=1, le=100), +): + """Get paginated site inspection history.""" + service = _get_service() + result = await service.get_site_inspection_list( + page=page, + limit=limit, + ) + return result + + +# ============================================================ +# GET /api/site-inspections/{site_inspection_id}/stream -- SSE +# ============================================================ + +@router.get("/site-inspections/{site_inspection_id}/stream") +async def stream_site_progress(site_inspection_id: str): + """ + Stream site inspection progress via Server-Sent Events. + + Events: + - crawl_progress: { pages_found, current_url } + - crawl_complete: { total_pages, pages: [...] } + - page_start: { page_url, page_index } + - page_progress: { page_url, category, progress, current_step } + - page_complete: { page_url, inspection_id, score, grade } + - aggregate_update: { pages_inspected, pages_total, overall_score } + - complete: { status, aggregate_scores } + - error: { message } + """ + + async def event_generator(): + redis = get_redis() + pubsub = redis.pubsub() + channel = f"site-inspection:{site_inspection_id}:events" + + await pubsub.subscribe(channel) + + try: + # Send initial connected event + yield { + "event": "connected", + "data": json.dumps({ + "site_inspection_id": site_inspection_id, + "message": "SSE 연결 완료", + }, ensure_ascii=False), + } + + # Listen for Pub/Sub messages + async for message in pubsub.listen(): + if message["type"] == "message": + event_data = json.loads(message["data"]) + event_type = event_data.pop("event_type", "progress") + + yield { + "event": event_type, + "data": json.dumps(event_data, ensure_ascii=False), + } + + # End stream on complete or error + if event_type in ("complete", "error"): + break + + except Exception as e: + logger.error( + "SSE stream error for site %s: %s", + site_inspection_id, str(e), + ) + yield { + "event": "error", + "data": json.dumps({ + "site_inspection_id": site_inspection_id, + "status": "error", + "message": "스트리밍 중 오류가 발생했습니다", + }, ensure_ascii=False), + } + finally: + await pubsub.unsubscribe(channel) + await pubsub.aclose() + + return EventSourceResponse( + event_generator(), + media_type="text/event-stream", + ) + + +# ============================================================ +# GET /api/site-inspections/{site_inspection_id} -- Get result +# ============================================================ + +@router.get("/site-inspections/{site_inspection_id}") +async def get_site_inspection(site_inspection_id: str): + """Get site inspection result by ID.""" + service = _get_service() + result = await service.get_site_inspection(site_inspection_id) + + if result is None: + raise HTTPException( + status_code=404, + detail="사이트 검사 결과를 찾을 수 없습니다", + ) + + # Remove MongoDB _id field if present + result.pop("_id", None) + return result + + +# ============================================================ +# POST /api/site-inspections/{site_inspection_id}/inspect-page +# -- Trigger single page inspection +# ============================================================ + +@router.post("/site-inspections/{site_inspection_id}/inspect-page") +async def inspect_page( + site_inspection_id: str, + request: InspectPageRequest, +): + """ + Trigger inspection for a specific page within a site inspection. + Useful for re-inspecting a single page or manually triggering + inspection of a page that failed previously. + """ + page_url = str(request.url) + service = _get_service() + + inspection_id = await service.inspect_single_page( + site_inspection_id=site_inspection_id, + page_url=page_url, + ) + + if inspection_id is None: + raise HTTPException( + status_code=404, + detail="사이트 검사 또는 해당 페이지를 찾을 수 없습니다", + ) + + return { + "site_inspection_id": site_inspection_id, + "page_url": page_url, + "inspection_id": inspection_id, + "status": "completed", + } diff --git a/backend/app/services/inspection_service.py b/backend/app/services/inspection_service.py index 808200a..f6430c4 100644 --- a/backend/app/services/inspection_service.py +++ b/backend/app/services/inspection_service.py @@ -75,20 +75,162 @@ class InspectionService: return inspection_id - async def _run_inspection( - self, inspection_id: str, url: str, response: httpx.Response - ) -> None: - """Execute 4 category checks in parallel and store results.""" + async def run_inspection_inline( + self, + url: str, + inspection_id: Optional[str] = None, + progress_callback: Optional[object] = None, + ) -> tuple[str, dict]: + """ + Run a full inspection synchronously (inline) and return the result. + + This is the core inspection logic extracted for reuse by both: + - Single-page inspection (_run_inspection wrapper with SSE/Redis) + - Site-wide inspection (site_inspection_service calling per-page) + + Args: + url: Target URL to inspect. + inspection_id: Optional pre-generated ID. If None, a new UUID is generated. + progress_callback: Optional async callback(category, progress, current_step). + If None, progress is not reported. + + Returns: + (inspection_id, result_dict) where result_dict is the MongoDB document. + + Raises: + Exception: On fetch failure or unrecoverable errors. + """ + settings = get_settings() + + if inspection_id is None: + inspection_id = str(uuid.uuid4()) + + # Fetch URL + response = await self._fetch_url(url, timeout=settings.URL_FETCH_TIMEOUT) html_content = response.text headers = dict(response.headers) start_time = time.time() created_at = datetime.now(timezone.utc) + # Use provided callback or a no-op + if progress_callback is None: + async def progress_callback(category: str, progress: int, current_step: str): + pass + + # Create 4 checker engines + checkers = [ + HtmlCssChecker(progress_callback=progress_callback), + AccessibilityChecker(progress_callback=progress_callback), + SeoChecker(progress_callback=progress_callback), + PerformanceSecurityChecker(progress_callback=progress_callback), + ] + + # Parallel execution with per-category timeout + results = await asyncio.gather( + *[ + asyncio.wait_for( + checker.check(url, html_content, headers), + timeout=settings.CATEGORY_TIMEOUT, + ) + for checker in checkers + ], + return_exceptions=True, + ) + + # Process results (handle timeouts/errors per category) + categories = {} + category_names = ["html_css", "accessibility", "seo", "performance_security"] + + for i, result in enumerate(results): + cat_name = category_names[i] + if isinstance(result, Exception): + logger.error( + "Category %s failed for inspection %s: %s", + cat_name, inspection_id, str(result), + ) + categories[cat_name] = CategoryResult( + score=0, + grade="F", + total_issues=0, + issues=[], + ) + else: + categories[cat_name] = result + + # Calculate overall score + overall_score = calculate_overall_score(categories) + grade = calculate_grade(overall_score) + duration = round(time.time() - start_time, 1) + + # Build summary + total_critical = sum(c.critical for c in categories.values()) + total_major = sum(c.major for c in categories.values()) + total_minor = sum(c.minor for c in categories.values()) + total_info = sum(c.info for c in categories.values()) + total_issues = sum(c.total_issues for c in categories.values()) + + summary = IssueSummary( + total_issues=total_issues, + critical=total_critical, + major=total_major, + minor=total_minor, + info=total_info, + ) + + # Build inspection result + completed_at = datetime.now(timezone.utc) + inspection_result = InspectionResult( + inspection_id=inspection_id, + url=url, + status="completed", + created_at=created_at, + completed_at=completed_at, + duration_seconds=duration, + overall_score=overall_score, + grade=grade, + categories=categories, + summary=summary, + ) + + # Store in MongoDB + doc = inspection_result.model_dump(mode="json") + await self.db.inspections.insert_one(doc) + + # Enforce URL history limit (max 100 per URL) + await self._enforce_history_limit(url, max_count=100) + + # Cache in Redis + await cache_result(inspection_id, doc) + + logger.info( + "Inspection %s completed (inline): score=%d, duration=%.1fs", + inspection_id, overall_score, duration, + ) + + return inspection_id, doc + + async def _run_inspection( + self, inspection_id: str, url: str, response: httpx.Response + ) -> None: + """ + Execute 4 category checks in parallel and store results. + + This is the background-task wrapper that adds SSE/Redis progress + tracking on top of run_inspection_inline(). + """ try: - # Progress callback factory + # Progress callback that publishes to Redis + SSE async def progress_callback(category: str, progress: int, current_step: str): await self._update_progress(inspection_id, category, progress, current_step) + # Use inline runner (fetches URL internally, so we pass the pre-fetched response data) + # Since run_inspection_inline fetches the URL again, we use the lower-level approach + # to avoid double-fetching. We replicate the core logic with SSE event publishing. + html_content = response.text + headers = dict(response.headers) + start_time = time.time() + created_at = datetime.now(timezone.utc) + # Create 4 checker engines checkers = [ HtmlCssChecker(progress_callback=progress_callback), @@ -122,14 +264,13 @@ class InspectionService: "Category %s failed for inspection %s: %s", cat_name, inspection_id, str(result), ) - # Create error result for failed category categories[cat_name] = CategoryResult( score=0, grade="F", total_issues=0, issues=[], ) - # Publish category error + # Publish category error event await publish_event(inspection_id, { "event_type": "category_complete", "inspection_id": inspection_id, @@ -139,7 +280,7 @@ class InspectionService: }) else: categories[cat_name] = result - # Publish category completion + # Publish category completion event await publish_event(inspection_id, { "event_type": "category_complete", "inspection_id": inspection_id, diff --git a/backend/app/services/link_crawler.py b/backend/app/services/link_crawler.py new file mode 100644 index 0000000..46c3522 --- /dev/null +++ b/backend/app/services/link_crawler.py @@ -0,0 +1,291 @@ +""" +BFS link crawler for same-domain page discovery. + +Crawls a root URL using BFS (Breadth-First Search), extracting same-domain +links up to configurable max_pages and max_depth limits. Used by the +site-wide inspection feature to discover pages before inspection. +""" + +import logging +from collections import deque +from typing import Callable, Awaitable, Optional +from urllib.parse import urljoin, urlparse, urlunparse + +import httpx +from bs4 import BeautifulSoup + +logger = logging.getLogger(__name__) + +# Schemes to skip when extracting links +_SKIP_SCHEMES = {"javascript", "mailto", "tel", "data", "blob", "ftp"} + +# File extensions that are not HTML pages +_SKIP_EXTENSIONS = { + ".pdf", ".jpg", ".jpeg", ".png", ".gif", ".svg", ".webp", ".ico", + ".css", ".js", ".json", ".xml", ".zip", ".tar", ".gz", ".mp4", + ".mp3", ".wav", ".avi", ".mov", ".woff", ".woff2", ".ttf", ".eot", +} + +# Type alias for progress callback: (pages_found, current_url) -> None +ProgressCallback = Callable[[int, str], Awaitable[None]] + + +def normalize_url(url: str) -> str: + """ + Normalize a URL for deduplication: + - Remove fragment (#...) + - Remove trailing slash (except for root path) + - Lowercase scheme and netloc + """ + parsed = urlparse(url) + + # Remove fragment + normalized = parsed._replace(fragment="") + + # Lowercase scheme and netloc + normalized = normalized._replace( + scheme=normalized.scheme.lower(), + netloc=normalized.netloc.lower(), + ) + + # Remove trailing slash (but keep "/" for root path) + path = normalized.path + if path != "/" and path.endswith("/"): + path = path.rstrip("/") + normalized = normalized._replace(path=path) + + return urlunparse(normalized) + + +def is_same_domain(url: str, root_domain: str) -> bool: + """Check if a URL belongs to the same domain as the root.""" + parsed = urlparse(url) + url_domain = parsed.netloc.lower() + + # Handle www prefix: treat example.com and www.example.com as same domain + root_clean = root_domain.lower().removeprefix("www.") + url_clean = url_domain.removeprefix("www.") + + return root_clean == url_clean + + +def should_skip_url(href: str) -> bool: + """Check if a URL should be skipped based on scheme or extension.""" + if not href or href.strip() == "": + return True + + # Skip anchors-only links + if href.startswith("#"): + return True + + # Skip non-HTTP schemes + parsed = urlparse(href) + if parsed.scheme and parsed.scheme.lower() in _SKIP_SCHEMES: + return True + + # Skip non-HTML file extensions + path = parsed.path.lower() + for ext in _SKIP_EXTENSIONS: + if path.endswith(ext): + return True + + return False + + +class LinkCrawler: + """ + BFS link crawler that discovers same-domain pages. + + Usage: + crawler = LinkCrawler( + root_url="https://example.com", + max_pages=20, + max_depth=2, + ) + pages = await crawler.crawl(progress_callback=callback) + """ + + def __init__( + self, + root_url: str, + max_pages: int = 20, + max_depth: int = 2, + ): + self.root_url = normalize_url(root_url) + self.max_pages = max_pages + self.max_depth = max_depth + + parsed = urlparse(self.root_url) + self.root_domain = parsed.netloc.lower() + self.root_scheme = parsed.scheme + + async def crawl( + self, + progress_callback: Optional[ProgressCallback] = None, + ) -> list[dict]: + """ + BFS crawl starting from root_url. + + Returns list of dicts: + [ + { + "url": "https://example.com/", + "depth": 0, + "parent_url": None, + "title": "Example Page", + "status": "discovered", + }, + ... + ] + """ + visited: set[str] = set() + results: list[dict] = [] + + # BFS queue: (url, depth, parent_url) + queue: deque[tuple[str, int, Optional[str]]] = deque() + queue.append((self.root_url, 0, None)) + visited.add(self.root_url) + + async with httpx.AsyncClient( + follow_redirects=True, + timeout=httpx.Timeout(10.0), + verify=False, + headers={ + "User-Agent": "WebInspector/1.0 (Site Crawler)", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7", + }, + ) as client: + while queue and len(results) < self.max_pages: + url, depth, parent_url = queue.popleft() + + # Fetch the page + title = None + status_code = None + links: list[str] = [] + + try: + response = await client.get(url) + status_code = response.status_code + + # Only parse HTML content + content_type = response.headers.get("content-type", "") + if "text/html" not in content_type and "application/xhtml" not in content_type: + logger.debug("Skipping non-HTML content: %s (%s)", url, content_type) + # Still record it but don't extract links + results.append({ + "url": url, + "depth": depth, + "parent_url": parent_url, + "title": None, + "status": "discovered", + }) + if progress_callback: + await progress_callback(len(results), url) + continue + + html = response.text + title, links = self._extract_links_and_title(url, html) + + except httpx.TimeoutException: + logger.warning("Timeout crawling %s", url) + results.append({ + "url": url, + "depth": depth, + "parent_url": parent_url, + "title": None, + "status": "discovered", + }) + if progress_callback: + await progress_callback(len(results), url) + continue + + except httpx.RequestError as e: + logger.warning("Request error crawling %s: %s", url, str(e)) + results.append({ + "url": url, + "depth": depth, + "parent_url": parent_url, + "title": None, + "status": "discovered", + }) + if progress_callback: + await progress_callback(len(results), url) + continue + + # Record this page + results.append({ + "url": url, + "depth": depth, + "parent_url": parent_url, + "title": title, + "status": "discovered", + }) + + # Notify progress + if progress_callback: + await progress_callback(len(results), url) + + # Only enqueue child links if we haven't reached max_depth + if depth < self.max_depth: + for link in links: + normalized = normalize_url(link) + + if normalized in visited: + continue + + if not is_same_domain(normalized, self.root_domain): + continue + + if len(visited) >= self.max_pages: + break + + visited.add(normalized) + queue.append((normalized, depth + 1, url)) + + logger.info( + "Crawl completed: root=%s, pages_found=%d, max_pages=%d, max_depth=%d", + self.root_url, len(results), self.max_pages, self.max_depth, + ) + + return results + + def _extract_links_and_title( + self, base_url: str, html: str + ) -> tuple[Optional[str], list[str]]: + """ + Extract page title and same-domain links from HTML. + + Returns: + (title, list_of_absolute_urls) + """ + soup = BeautifulSoup(html, "html.parser") + + # Extract title + title = None + title_tag = soup.find("title") + if title_tag and title_tag.string: + title = title_tag.string.strip() + # Truncate very long titles + if len(title) > 200: + title = title[:200] + "..." + + # Extract links + links: list[str] = [] + for a_tag in soup.find_all("a", href=True): + href = a_tag["href"].strip() + + if should_skip_url(href): + continue + + # Resolve relative URLs + absolute_url = urljoin(base_url, href) + + # Verify it's HTTP(S) + parsed = urlparse(absolute_url) + if parsed.scheme not in ("http", "https"): + continue + + links.append(absolute_url) + + return title, links diff --git a/backend/app/services/site_inspection_service.py b/backend/app/services/site_inspection_service.py new file mode 100644 index 0000000..814c4b8 --- /dev/null +++ b/backend/app/services/site_inspection_service.py @@ -0,0 +1,678 @@ +""" +Site-wide inspection orchestration service. + +Manages the full site inspection lifecycle: + 1. BFS crawling to discover same-domain pages + 2. Sequential/parallel inspection of each discovered page + 3. Aggregate score computation + 4. Progress tracking via Redis Pub/Sub (SSE events) + 5. Result storage in MongoDB (site_inspections collection) +""" + +import asyncio +import json +import logging +import uuid +from datetime import datetime, timezone +from typing import Optional +from urllib.parse import urlparse + +from motor.motor_asyncio import AsyncIOMotorDatabase +from redis.asyncio import Redis + +from app.core.config import get_settings +from app.core.redis import get_redis +from app.models.schemas import calculate_grade +from app.services.link_crawler import LinkCrawler +from app.services.inspection_service import InspectionService + +logger = logging.getLogger(__name__) + +# Redis key TTLs +SITE_RESULT_CACHE_TTL = 3600 # 1 hour + + +class SiteInspectionService: + """Site-wide inspection orchestration service.""" + + def __init__(self, db: AsyncIOMotorDatabase, redis: Redis): + self.db = db + self.redis = redis + self.inspection_service = InspectionService(db=db, redis=redis) + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + async def start_site_inspection( + self, + url: str, + max_pages: int = 20, + max_depth: int = 2, + ) -> str: + """ + Start a site-wide inspection. + + 1. Validate URL + 2. Generate site_inspection_id + 3. Create initial MongoDB document with status "crawling" + 4. Launch background crawl-and-inspect task + 5. Return site_inspection_id + """ + settings = get_settings() + + # Clamp to server-side limits + max_pages = min(max_pages, settings.SITE_MAX_PAGES) + max_depth = min(max_depth, settings.SITE_MAX_DEPTH) + + site_inspection_id = str(uuid.uuid4()) + parsed = urlparse(url) + domain = parsed.netloc.lower() + + # Create initial document + doc = { + "site_inspection_id": site_inspection_id, + "root_url": url, + "domain": domain, + "status": "crawling", + "created_at": datetime.now(timezone.utc), + "completed_at": None, + "config": { + "max_pages": max_pages, + "max_depth": max_depth, + }, + "discovered_pages": [], + "aggregate_scores": None, + } + await self.db.site_inspections.insert_one(doc) + + logger.info( + "Site inspection started: id=%s, url=%s, max_pages=%d, max_depth=%d", + site_inspection_id, url, max_pages, max_depth, + ) + + # Launch background task + asyncio.create_task( + self._crawl_and_inspect(site_inspection_id, url, max_pages, max_depth) + ) + + return site_inspection_id + + async def get_site_inspection(self, site_inspection_id: str) -> Optional[dict]: + """Get site inspection result by ID (cache-first).""" + # Try Redis cache first + cache_key = f"site-inspection:result:{site_inspection_id}" + cached = await self.redis.get(cache_key) + if cached: + return json.loads(cached) + + # Fetch from MongoDB + doc = await self.db.site_inspections.find_one( + {"site_inspection_id": site_inspection_id}, + {"_id": 0}, + ) + if doc: + # Only cache completed results + if doc.get("status") in ("completed", "error"): + await self.redis.set( + cache_key, + json.dumps(doc, ensure_ascii=False, default=str), + ex=SITE_RESULT_CACHE_TTL, + ) + return doc + return None + + async def get_site_inspection_list( + self, + page: int = 1, + limit: int = 20, + ) -> dict: + """Get paginated list of site inspections.""" + limit = min(limit, 100) + skip = (page - 1) * limit + + total = await self.db.site_inspections.count_documents({}) + + cursor = self.db.site_inspections.find( + {}, + { + "_id": 0, + "site_inspection_id": 1, + "root_url": 1, + "domain": 1, + "status": 1, + "created_at": 1, + "discovered_pages": 1, + "aggregate_scores": 1, + }, + ).sort("created_at", -1).skip(skip).limit(limit) + + items = [] + async for doc in cursor: + pages = doc.get("discovered_pages", []) + pages_total = len(pages) + pages_inspected = sum( + 1 for p in pages if p.get("status") == "completed" + ) + agg = doc.get("aggregate_scores") + + items.append({ + "site_inspection_id": doc.get("site_inspection_id"), + "root_url": doc.get("root_url"), + "domain": doc.get("domain"), + "status": doc.get("status"), + "created_at": doc.get("created_at"), + "pages_total": pages_total, + "pages_inspected": pages_inspected, + "overall_score": agg.get("overall_score") if agg else None, + "grade": agg.get("grade") if agg else None, + }) + + total_pages = max(1, -(-total // limit)) + + return { + "items": items, + "total": total, + "page": page, + "limit": limit, + "total_pages": total_pages, + } + + async def inspect_single_page( + self, + site_inspection_id: str, + page_url: str, + ) -> Optional[str]: + """ + Trigger inspection for a single page within a site inspection. + Returns the inspection_id if successful, None if site inspection not found. + """ + doc = await self.db.site_inspections.find_one( + {"site_inspection_id": site_inspection_id}, + ) + if not doc: + return None + + # Find the page in discovered_pages + page_found = False + for page in doc.get("discovered_pages", []): + if page["url"] == page_url: + page_found = True + break + + if not page_found: + return None + + # Run inspection inline + inspection_id = str(uuid.uuid4()) + try: + inspection_id, result = await self.inspection_service.run_inspection_inline( + url=page_url, + inspection_id=inspection_id, + ) + + # Update page status in site inspection + overall_score = result.get("overall_score", 0) + grade = result.get("grade", "F") + + await self.db.site_inspections.update_one( + { + "site_inspection_id": site_inspection_id, + "discovered_pages.url": page_url, + }, + { + "$set": { + "discovered_pages.$.inspection_id": inspection_id, + "discovered_pages.$.status": "completed", + "discovered_pages.$.overall_score": overall_score, + "discovered_pages.$.grade": grade, + } + }, + ) + + # Recompute aggregates + await self._compute_and_store_aggregates(site_inspection_id) + + # Invalidate cache + cache_key = f"site-inspection:result:{site_inspection_id}" + await self.redis.delete(cache_key) + + return inspection_id + + except Exception as e: + logger.error( + "Failed to inspect page %s in site %s: %s", + page_url, site_inspection_id, str(e), + ) + await self.db.site_inspections.update_one( + { + "site_inspection_id": site_inspection_id, + "discovered_pages.url": page_url, + }, + { + "$set": { + "discovered_pages.$.status": "error", + } + }, + ) + return None + + # ------------------------------------------------------------------ + # Background task: Crawl + Inspect + # ------------------------------------------------------------------ + + async def _crawl_and_inspect( + self, + site_inspection_id: str, + url: str, + max_pages: int, + max_depth: int, + ) -> None: + """ + Background task that runs in two phases: + Phase 1: BFS crawling to discover pages + Phase 2: Parallel inspection of discovered pages (with semaphore) + """ + try: + # ============================== + # Phase 1: Crawling + # ============================== + logger.info("Phase 1 (crawling) started: %s", site_inspection_id) + + async def crawl_progress(pages_found: int, current_url: str): + await self._publish_site_event(site_inspection_id, { + "event_type": "crawl_progress", + "site_inspection_id": site_inspection_id, + "pages_found": pages_found, + "current_url": current_url, + }) + + crawler = LinkCrawler( + root_url=url, + max_pages=max_pages, + max_depth=max_depth, + ) + discovered = await crawler.crawl(progress_callback=crawl_progress) + + if not discovered: + raise ValueError("크롤링 결과가 없습니다. URL을 확인해주세요.") + + # Build discovered_pages documents + discovered_pages = [] + for page in discovered: + discovered_pages.append({ + "url": page["url"], + "depth": page["depth"], + "parent_url": page["parent_url"], + "inspection_id": None, + "status": "pending", + "title": page.get("title"), + "overall_score": None, + "grade": None, + }) + + # Store discovered pages in MongoDB + await self.db.site_inspections.update_one( + {"site_inspection_id": site_inspection_id}, + { + "$set": { + "status": "inspecting", + "discovered_pages": discovered_pages, + } + }, + ) + + # Publish crawl_complete event + await self._publish_site_event(site_inspection_id, { + "event_type": "crawl_complete", + "site_inspection_id": site_inspection_id, + "total_pages": len(discovered_pages), + "pages": [ + { + "url": p["url"], + "depth": p["depth"], + "parent_url": p["parent_url"], + "title": p.get("title"), + } + for p in discovered_pages + ], + }) + + logger.info( + "Phase 1 completed: %s, pages=%d", + site_inspection_id, len(discovered_pages), + ) + + # ============================== + # Phase 2: Page-by-page inspection + # ============================== + logger.info("Phase 2 (inspection) started: %s", site_inspection_id) + + settings = get_settings() + semaphore = asyncio.Semaphore(settings.SITE_CONCURRENCY) + + tasks = [ + self._inspect_page_with_semaphore( + semaphore=semaphore, + site_inspection_id=site_inspection_id, + page_url=page["url"], + page_index=idx, + total_pages=len(discovered_pages), + ) + for idx, page in enumerate(discovered_pages) + ] + + await asyncio.gather(*tasks, return_exceptions=True) + + # ============================== + # Finalize: Compute aggregates + # ============================== + aggregate_scores = await self._compute_and_store_aggregates(site_inspection_id) + + # Mark as completed + await self.db.site_inspections.update_one( + {"site_inspection_id": site_inspection_id}, + { + "$set": { + "status": "completed", + "completed_at": datetime.now(timezone.utc), + } + }, + ) + + # Publish complete event + await self._publish_site_event(site_inspection_id, { + "event_type": "complete", + "site_inspection_id": site_inspection_id, + "status": "completed", + "aggregate_scores": aggregate_scores, + }) + + logger.info("Site inspection completed: %s", site_inspection_id) + + except Exception as e: + logger.error( + "Site inspection %s failed: %s", + site_inspection_id, str(e), exc_info=True, + ) + + await self.db.site_inspections.update_one( + {"site_inspection_id": site_inspection_id}, + { + "$set": { + "status": "error", + "completed_at": datetime.now(timezone.utc), + } + }, + ) + + await self._publish_site_event(site_inspection_id, { + "event_type": "error", + "site_inspection_id": site_inspection_id, + "status": "error", + "message": f"사이트 검사 중 오류가 발생했습니다: {str(e)[:200]}", + }) + + async def _inspect_page_with_semaphore( + self, + semaphore: asyncio.Semaphore, + site_inspection_id: str, + page_url: str, + page_index: int, + total_pages: int, + ) -> None: + """Inspect a single page with semaphore-controlled concurrency.""" + async with semaphore: + await self._inspect_single_page( + site_inspection_id=site_inspection_id, + page_url=page_url, + page_index=page_index, + total_pages=total_pages, + ) + + async def _inspect_single_page( + self, + site_inspection_id: str, + page_url: str, + page_index: int, + total_pages: int, + ) -> None: + """Run inspection for a single discovered page.""" + inspection_id = str(uuid.uuid4()) + + # Publish page_start event + await self._publish_site_event(site_inspection_id, { + "event_type": "page_start", + "site_inspection_id": site_inspection_id, + "page_url": page_url, + "page_index": page_index, + }) + + # Mark page as inspecting in MongoDB + await self.db.site_inspections.update_one( + { + "site_inspection_id": site_inspection_id, + "discovered_pages.url": page_url, + }, + { + "$set": { + "discovered_pages.$.status": "inspecting", + "discovered_pages.$.inspection_id": inspection_id, + } + }, + ) + + try: + # Progress callback for per-page SSE updates + async def page_progress_callback(category: str, progress: int, current_step: str): + await self._publish_site_event(site_inspection_id, { + "event_type": "page_progress", + "site_inspection_id": site_inspection_id, + "page_url": page_url, + "page_index": page_index, + "category": category, + "progress": progress, + "current_step": current_step, + }) + + # Run the inspection + _, result = await self.inspection_service.run_inspection_inline( + url=page_url, + inspection_id=inspection_id, + progress_callback=page_progress_callback, + ) + + overall_score = result.get("overall_score", 0) + grade = result.get("grade", "F") + + # Update page status in MongoDB + await self.db.site_inspections.update_one( + { + "site_inspection_id": site_inspection_id, + "discovered_pages.url": page_url, + }, + { + "$set": { + "discovered_pages.$.status": "completed", + "discovered_pages.$.overall_score": overall_score, + "discovered_pages.$.grade": grade, + } + }, + ) + + # Publish page_complete event + await self._publish_site_event(site_inspection_id, { + "event_type": "page_complete", + "site_inspection_id": site_inspection_id, + "page_url": page_url, + "page_index": page_index, + "inspection_id": inspection_id, + "score": overall_score, + "grade": grade, + }) + + # Compute and publish aggregate update + aggregate_scores = await self._compute_and_store_aggregates(site_inspection_id) + await self._publish_site_event(site_inspection_id, { + "event_type": "aggregate_update", + "site_inspection_id": site_inspection_id, + "pages_inspected": aggregate_scores.get("pages_inspected", 0), + "pages_total": aggregate_scores.get("pages_total", total_pages), + "overall_score": aggregate_scores.get("overall_score", 0), + "grade": aggregate_scores.get("grade", "F"), + }) + + logger.info( + "Page inspection completed: site=%s, page=%s, score=%d", + site_inspection_id, page_url, overall_score, + ) + + except Exception as e: + logger.error( + "Page inspection failed: site=%s, page=%s, error=%s", + site_inspection_id, page_url, str(e), + ) + + # Mark page as error + await self.db.site_inspections.update_one( + { + "site_inspection_id": site_inspection_id, + "discovered_pages.url": page_url, + }, + { + "$set": { + "discovered_pages.$.status": "error", + } + }, + ) + + # Publish page error (non-fatal, continue with other pages) + await self._publish_site_event(site_inspection_id, { + "event_type": "page_complete", + "site_inspection_id": site_inspection_id, + "page_url": page_url, + "page_index": page_index, + "inspection_id": None, + "score": 0, + "grade": "F", + "error": str(e)[:200], + }) + + # ------------------------------------------------------------------ + # Aggregate computation + # ------------------------------------------------------------------ + + async def _compute_and_store_aggregates(self, site_inspection_id: str) -> dict: + """ + Compute aggregate scores from all completed page inspections. + + Fetches each completed page's full inspection result from the + inspections collection, averages category scores, and stores + the aggregate in the site_inspections document. + + Returns the aggregate_scores dict. + """ + doc = await self.db.site_inspections.find_one( + {"site_inspection_id": site_inspection_id}, + ) + if not doc: + return {} + + pages = doc.get("discovered_pages", []) + total_pages = len(pages) + + # Collect inspection IDs for completed pages + completed_ids = [ + p["inspection_id"] + for p in pages + if p.get("status") == "completed" and p.get("inspection_id") + ] + + if not completed_ids: + aggregate = { + "overall_score": 0, + "grade": "F", + "html_css": 0, + "accessibility": 0, + "seo": 0, + "performance_security": 0, + "total_issues": 0, + "pages_inspected": 0, + "pages_total": total_pages, + } + await self._store_aggregates(site_inspection_id, aggregate) + return aggregate + + # Fetch all completed inspection results + cursor = self.db.inspections.find( + {"inspection_id": {"$in": completed_ids}}, + { + "_id": 0, + "overall_score": 1, + "categories.html_css.score": 1, + "categories.accessibility.score": 1, + "categories.seo.score": 1, + "categories.performance_security.score": 1, + "summary.total_issues": 1, + }, + ) + + scores_overall = [] + scores_html_css = [] + scores_accessibility = [] + scores_seo = [] + scores_perf = [] + total_issues = 0 + + async for insp in cursor: + scores_overall.append(insp.get("overall_score", 0)) + + cats = insp.get("categories", {}) + scores_html_css.append(cats.get("html_css", {}).get("score", 0)) + scores_accessibility.append(cats.get("accessibility", {}).get("score", 0)) + scores_seo.append(cats.get("seo", {}).get("score", 0)) + scores_perf.append(cats.get("performance_security", {}).get("score", 0)) + + total_issues += insp.get("summary", {}).get("total_issues", 0) + + pages_inspected = len(scores_overall) + + def safe_avg(values: list[int]) -> int: + return round(sum(values) / len(values)) if values else 0 + + overall_score = safe_avg(scores_overall) + grade = calculate_grade(overall_score) + + aggregate = { + "overall_score": overall_score, + "grade": grade, + "html_css": safe_avg(scores_html_css), + "accessibility": safe_avg(scores_accessibility), + "seo": safe_avg(scores_seo), + "performance_security": safe_avg(scores_perf), + "total_issues": total_issues, + "pages_inspected": pages_inspected, + "pages_total": total_pages, + } + + await self._store_aggregates(site_inspection_id, aggregate) + return aggregate + + async def _store_aggregates(self, site_inspection_id: str, aggregate: dict) -> None: + """Store aggregate scores in MongoDB.""" + await self.db.site_inspections.update_one( + {"site_inspection_id": site_inspection_id}, + {"$set": {"aggregate_scores": aggregate}}, + ) + + # ------------------------------------------------------------------ + # SSE event publishing + # ------------------------------------------------------------------ + + async def _publish_site_event(self, site_inspection_id: str, event_data: dict) -> None: + """Publish an SSE event for site inspection via Redis Pub/Sub.""" + channel = f"site-inspection:{site_inspection_id}:events" + await self.redis.publish( + channel, + json.dumps(event_data, ensure_ascii=False, default=str), + ) diff --git a/frontend/src/app/globals.css b/frontend/src/app/globals.css index 03302f9..74ab900 100644 --- a/frontend/src/app/globals.css +++ b/frontend/src/app/globals.css @@ -35,3 +35,17 @@ @apply bg-background text-foreground; } } + +/* 크롤링 진행 바 애니메이션 */ +@keyframes crawl-progress { + 0% { + transform: translateX(-100%); + } + 100% { + transform: translateX(400%); + } +} + +.animate-crawl-progress { + animation: crawl-progress 1.5s ease-in-out infinite; +} diff --git a/frontend/src/app/site-inspections/[id]/page.tsx b/frontend/src/app/site-inspections/[id]/page.tsx new file mode 100644 index 0000000..2cef576 --- /dev/null +++ b/frontend/src/app/site-inspections/[id]/page.tsx @@ -0,0 +1,283 @@ +"use client"; + +import { use, useState, useCallback } from "react"; +import { useRouter } from "next/navigation"; +import { useSiteInspectionResult, useInspectionResult } from "@/lib/queries"; +import { PageTree } from "@/components/site-inspection/PageTree"; +import { AggregateScorePanel } from "@/components/site-inspection/AggregateScorePanel"; +import { OverallScoreGauge } from "@/components/dashboard/OverallScoreGauge"; +import { CategoryScoreCard } from "@/components/dashboard/CategoryScoreCard"; +import { IssueSummaryBar } from "@/components/dashboard/IssueSummaryBar"; +import { InspectionMeta } from "@/components/dashboard/InspectionMeta"; +import { LoadingSpinner } from "@/components/common/LoadingSpinner"; +import { ErrorState } from "@/components/common/ErrorState"; +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; +import { Button } from "@/components/ui/button"; +import { CATEGORY_LABELS, CATEGORY_KEYS } from "@/lib/constants"; +import { ApiError } from "@/lib/api"; +import { Clock, Menu, X } from "lucide-react"; +import type { CategoryKey } from "@/types/inspection"; + +export default function SiteInspectionResultPage({ + params, +}: { + params: Promise<{ id: string }>; +}) { + const { id } = use(params); + const [selectedPageUrl, setSelectedPageUrl] = useState(null); + const [isSidebarOpen, setIsSidebarOpen] = useState(false); + + const { + data: siteResult, + isLoading, + isError, + error, + refetch, + } = useSiteInspectionResult(id); + + if (isLoading) { + return ( +
+ +
+ ); + } + + if (isError || !siteResult) { + return ( +
+ refetch()} + /> +
+ ); + } + + // 선택된 페이지의 inspection_id 찾기 + const selectedPage = selectedPageUrl + ? siteResult.discovered_pages.find((p) => p.url === selectedPageUrl) + : null; + + const handleSelectPage = (url: string | null) => { + setSelectedPageUrl(url); + // 모바일: 사이드바 닫기 + setIsSidebarOpen(false); + }; + + return ( +
+ {/* 모바일 사이드바 토글 */} +
+ + {selectedPageUrl && ( + + {selectedPageUrl} + + )} +
+ +
+ {/* 왼쪽 사이드바: 페이지 트리 */} + + + {/* 모바일 오버레이 */} + {isSidebarOpen && ( +
setIsSidebarOpen(false)} + /> + )} + + {/* 오른쪽 패널: 결과 표시 */} +
+ {selectedPageUrl === null ? ( + // 전체 집계 보기 + siteResult.aggregate_scores ? ( + + ) : ( +
+ +

아직 집계 결과가 없습니다

+
+ ) + ) : selectedPage?.inspection_id ? ( + // 개별 페이지 결과 (기존 대시보드 재사용) + + ) : ( + // 검사 대기 중인 페이지 +
+ +

검사 대기 중

+

+ 이 페이지는 아직 검사가 완료되지 않았습니다 +

+
+ )} +
+
+
+ ); +} + +/** + * 개별 페이지 대시보드 컴포넌트. + * 기존 inspection 결과 컴포넌트들을 재사용하여 특정 페이지의 검사 결과를 표시한다. + */ +function PageDashboard({ + inspectionId, + pageUrl, +}: { + inspectionId: string; + pageUrl: string; +}) { + const router = useRouter(); + + const { + data: result, + isLoading, + isError, + error, + refetch, + } = useInspectionResult(inspectionId); + + const handleCategoryClick = useCallback( + (category: CategoryKey) => { + router.push(`/inspections/${inspectionId}/issues?category=${category}`); + }, + [inspectionId, router] + ); + + const handleViewIssues = useCallback(() => { + router.push(`/inspections/${inspectionId}/issues`); + }, [inspectionId, router]); + + if (isLoading) { + return ; + } + + if (isError || !result) { + return ( + refetch()} + /> + ); + } + + return ( +
+ {/* 페이지 URL 표시 */} +
+

페이지 검사 결과

+ + {pageUrl} + +
+ + {/* 종합 점수 */} + + + 종합 점수 + + + + + + + {/* 카테고리별 점수 카드 */} +
+ {CATEGORY_KEYS.map((key) => { + const cat = result.categories[key]; + return ( + handleCategoryClick(key)} + /> + ); + })} +
+ + {/* 검사 메타 정보 */} +
+ +
+ + {/* 이슈 상세 링크 */} +
+ +
+ + {/* 이슈 요약 바 */} + + + + + +
+ ); +} diff --git a/frontend/src/app/site-inspections/[id]/progress/page.tsx b/frontend/src/app/site-inspections/[id]/progress/page.tsx new file mode 100644 index 0000000..8621a29 --- /dev/null +++ b/frontend/src/app/site-inspections/[id]/progress/page.tsx @@ -0,0 +1,21 @@ +"use client"; + +import { use } from "react"; +import { SiteCrawlProgress } from "@/components/site-inspection/SiteCrawlProgress"; + +export default function SiteInspectionProgressPage({ + params, +}: { + params: Promise<{ id: string }>; +}) { + const { id } = use(params); + + return ( +
+

+ 사이트 전체 검사 +

+ +
+ ); +} diff --git a/frontend/src/components/inspection/UrlInputForm.tsx b/frontend/src/components/inspection/UrlInputForm.tsx index 047e884..f13def2 100644 --- a/frontend/src/components/inspection/UrlInputForm.tsx +++ b/frontend/src/components/inspection/UrlInputForm.tsx @@ -1,38 +1,74 @@ "use client"; -import { useState, type FormEvent } from "react"; +import { useState, useRef, useEffect, type FormEvent } from "react"; import { useRouter } from "next/navigation"; import { Button } from "@/components/ui/button"; import { Input } from "@/components/ui/input"; import { Card, CardContent } from "@/components/ui/card"; -import { Search, Loader2 } from "lucide-react"; +import { Search, Loader2, Globe, ChevronDown } from "lucide-react"; import { api, ApiError } from "@/lib/api"; import { isValidUrl } from "@/lib/constants"; import { useInspectionStore } from "@/stores/useInspectionStore"; +import { useSiteInspectionStore } from "@/stores/useSiteInspectionStore"; +import { cn } from "@/lib/utils"; + +/** 최대 페이지 수 옵션 */ +const MAX_PAGES_OPTIONS = [10, 20, 50] as const; + +/** 크롤링 깊이 옵션 */ +const MAX_DEPTH_OPTIONS = [1, 2, 3] as const; export function UrlInputForm() { const [url, setUrl] = useState(""); const [error, setError] = useState(null); const [isLoading, setIsLoading] = useState(false); + const [isSiteLoading, setIsSiteLoading] = useState(false); + const [showSiteOptions, setShowSiteOptions] = useState(false); + const [maxPages, setMaxPages] = useState(20); + const [maxDepth, setMaxDepth] = useState(2); const router = useRouter(); const { setInspection } = useInspectionStore(); + const { setSiteInspection } = useSiteInspectionStore(); + const siteOptionsRef = useRef(null); + // 드롭다운 외부 클릭 시 닫기 + useEffect(() => { + function handleClickOutside(event: MouseEvent) { + if ( + siteOptionsRef.current && + !siteOptionsRef.current.contains(event.target as Node) + ) { + setShowSiteOptions(false); + } + } + if (showSiteOptions) { + document.addEventListener("mousedown", handleClickOutside); + return () => + document.removeEventListener("mousedown", handleClickOutside); + } + }, [showSiteOptions]); + + /** URL 검증 공통 로직 */ + const validateUrl = (): string | null => { + const trimmedUrl = url.trim(); + if (!trimmedUrl) { + setError("URL을 입력해주세요"); + return null; + } + if (!isValidUrl(trimmedUrl)) { + setError("유효한 URL을 입력해주세요 (http:// 또는 https://로 시작)"); + return null; + } + return trimmedUrl; + }; + + /** 단일 페이지 검사 */ const handleSubmit = async (e: FormEvent) => { e.preventDefault(); setError(null); - const trimmedUrl = url.trim(); - - // 클라이언트 사이드 URL 검증 - if (!trimmedUrl) { - setError("URL을 입력해주세요"); - return; - } - - if (!isValidUrl(trimmedUrl)) { - setError("유효한 URL을 입력해주세요 (http:// 또는 https://로 시작)"); - return; - } + const trimmedUrl = validateUrl(); + if (!trimmedUrl) return; setIsLoading(true); @@ -51,43 +87,196 @@ export function UrlInputForm() { } }; + /** 사이트 전체 검사 */ + const handleSiteInspection = async () => { + setError(null); + + const trimmedUrl = validateUrl(); + if (!trimmedUrl) return; + + setIsSiteLoading(true); + setShowSiteOptions(false); + + try { + const response = await api.startSiteInspection( + trimmedUrl, + maxPages, + maxDepth + ); + setSiteInspection(response.site_inspection_id, trimmedUrl); + router.push( + `/site-inspections/${response.site_inspection_id}/progress` + ); + } catch (err) { + if (err instanceof ApiError) { + setError(err.detail); + } else { + setError( + "사이트 검사 시작 중 오류가 발생했습니다. 다시 시도해주세요." + ); + } + } finally { + setIsSiteLoading(false); + } + }; + + const isDisabled = isLoading || isSiteLoading; + return ( -
-
- - { - setUrl(e.target.value); - if (error) setError(null); - }} - placeholder="https://example.com" - className="pl-10 h-12 text-base" - disabled={isLoading} - aria-label="검사할 URL 입력" - aria-invalid={!!error} - aria-describedby={error ? "url-error" : undefined} - /> + + {/* URL 입력 필드 */} +
+
+ + { + setUrl(e.target.value); + if (error) setError(null); + }} + placeholder="https://example.com" + className="pl-10 h-12 text-base" + disabled={isDisabled} + aria-label="검사할 URL 입력" + aria-invalid={!!error} + aria-describedby={error ? "url-error" : undefined} + /> +
+ + {/* 버튼 그룹 */} +
+ {/* 단일 페이지 검사 버튼 */} + + + {/* 사이트 전체 검사 버튼 */} +
+ + + {/* 사이트 검사 옵션 드롭다운 */} + {showSiteOptions && ( +
+

+ 사이트 전체 검사 설정 +

+ + {/* 최대 페이지 수 */} +
+ +
+ {MAX_PAGES_OPTIONS.map((option) => ( + + ))} +
+
+ + {/* 크롤링 깊이 */} +
+ +
+ {MAX_DEPTH_OPTIONS.map((option) => ( + + ))} +
+
+ + {/* 사이트 검사 시작 버튼 */} + +
+ )} +
+
- + + {/* 에러 메시지 */} {error && (

+ {/* 상단 URL 및 검사 요약 */} +

+

사이트 전체 검사 결과

+

+ + {rootUrl} + +

+

+ 검사 완료: {aggregateScores.pages_inspected}/ + {aggregateScores.pages_total} 페이지 +

+
+ + {/* 종합 점수 게이지 */} + + + 사이트 종합 점수 + + + + + + + {/* 카테고리별 평균 점수 카드 (4개) */} +
+ {categoryItems.map((item) => ( + + +

+ {item.label} +

+
+ {item.score} +
+

+ 페이지 평균 점수 +

+
+
+ ))} +
+ + {/* 총 이슈 수 */} + + +
+ + 사이트 전체 이슈 + + + 총 {aggregateScores.total_issues}건 + +
+
+
+
+ ); +} diff --git a/frontend/src/components/site-inspection/PageTree.tsx b/frontend/src/components/site-inspection/PageTree.tsx new file mode 100644 index 0000000..8c0ae4b --- /dev/null +++ b/frontend/src/components/site-inspection/PageTree.tsx @@ -0,0 +1,135 @@ +"use client"; + +import { useMemo } from "react"; +import { Globe, ChevronDown } from "lucide-react"; +import { cn } from "@/lib/utils"; +import { getScoreTailwindColor } from "@/lib/constants"; +import { PageTreeNode } from "./PageTreeNode"; +import type { DiscoveredPage, AggregateScores } from "@/types/site-inspection"; + +interface PageTreeProps { + /** 발견된 페이지 목록 (flat array) */ + pages: DiscoveredPage[]; + /** 현재 선택된 페이지 URL (null = 전체 보기) */ + selectedUrl: string | null; + /** 페이지 선택 핸들러 (null을 전달하면 전체 집계 보기) */ + onSelectPage: (url: string | null) => void; + /** 집계 점수 (전체 노드에 표시) */ + aggregateScores: AggregateScores | null; +} + +/** + * 페이지 트리 사이드바 컴포넌트. + * flat 배열을 parent_url 기준으로 트리 구조로 변환하여 렌더링한다. + */ +export function PageTree({ + pages, + selectedUrl, + onSelectPage, + aggregateScores, +}: PageTreeProps) { + /** + * flat 배열에서 parent_url → children 맵 구성. + * root 노드(parent_url === null)를 최상위 자식으로 처리. + */ + const { rootPages, childrenMap, allPagesMap } = useMemo(() => { + const childrenMap = new Map(); + const allPagesMap = new Map(); + const rootPages: DiscoveredPage[] = []; + + for (const page of pages) { + allPagesMap.set(page.url, page); + } + + for (const page of pages) { + if (page.parent_url === null) { + rootPages.push(page); + } else { + const siblings = childrenMap.get(page.parent_url) || []; + siblings.push(page); + childrenMap.set(page.parent_url, siblings); + } + } + + return { rootPages, childrenMap, allPagesMap }; + }, [pages]); + + const isAggregateSelected = selectedUrl === null; + + return ( +
+ {/* 헤더 */} +
+

+ 페이지 목록 +

+
+ + {/* 트리 본문 */} +
+ {/* 사이트 전체 (집계) 노드 */} +
onSelectPage(null)} + role="treeitem" + aria-selected={isAggregateSelected} + > + + + 사이트 전체 + {aggregateScores && ( + + {aggregateScores.overall_score}점 {aggregateScores.grade} + + )} +
+ + {/* 페이지 노드 트리 (재귀) */} + {rootPages.map((page) => ( + + ))} + + {/* 빈 상태 */} + {pages.length === 0 && ( +
+ 발견된 페이지가 없습니다 +
+ )} +
+ + {/* 하단 요약 */} + {pages.length > 0 && ( +
+ 총 {pages.length}개 페이지 + {aggregateScores && ( + + {" "}/ 검사 완료 {aggregateScores.pages_inspected}/ + {aggregateScores.pages_total} + + )} +
+ )} +
+ ); +} diff --git a/frontend/src/components/site-inspection/PageTreeNode.tsx b/frontend/src/components/site-inspection/PageTreeNode.tsx new file mode 100644 index 0000000..ed50c3a --- /dev/null +++ b/frontend/src/components/site-inspection/PageTreeNode.tsx @@ -0,0 +1,166 @@ +"use client"; + +import { useState } from "react"; +import { + ChevronRight, + ChevronDown, + FileText, + Check, + X, + Circle, +} from "lucide-react"; +import { cn } from "@/lib/utils"; +import { getScoreTailwindColor } from "@/lib/constants"; +import type { DiscoveredPage } from "@/types/site-inspection"; + +interface PageTreeNodeProps { + /** 해당 노드의 페이지 데이터 */ + page: DiscoveredPage; + /** 자식 페이지 목록 (트리 빌드 결과) */ + childrenPages: Map; + /** 모든 페이지 (URL -> DiscoveredPage 조회용) */ + allPages: Map; + /** 현재 선택된 페이지 URL */ + selectedUrl: string | null; + /** 페이지 선택 핸들러 */ + onSelectPage: (url: string) => void; + /** 들여쓰기 레벨 */ + level: number; +} + +/** + * URL에서 도메인을 제거하고 경로만 반환. + * 예: "https://example.com/about" -> "/about" + */ +function getPathFromUrl(url: string): string { + try { + const parsed = new URL(url); + const path = parsed.pathname + parsed.search; + return path || "/"; + } catch { + return url; + } +} + +/** 페이지 트리 노드 (재귀 컴포넌트) */ +export function PageTreeNode({ + page, + childrenPages, + allPages, + selectedUrl, + onSelectPage, + level, +}: PageTreeNodeProps) { + const [isExpanded, setIsExpanded] = useState(level < 2); + const children = childrenPages.get(page.url) || []; + const hasChildren = children.length > 0; + const isSelected = selectedUrl === page.url; + const displayPath = getPathFromUrl(page.url); + + return ( +
+ {/* 노드 행 */} +
onSelectPage(page.url)} + role="treeitem" + aria-selected={isSelected} + aria-expanded={hasChildren ? isExpanded : undefined} + > + {/* 확장/축소 토글 */} + {hasChildren ? ( + + ) : ( + + )} + + {/* 상태 아이콘 */} + + + {/* 페이지 아이콘 */} + + + {/* URL 경로 */} + + {displayPath} + + + {/* 점수 배지 (완료 시) */} + {page.status === "completed" && page.overall_score !== null && ( + + {page.overall_score} + + )} +
+ + {/* 자식 노드 (재귀) */} + {hasChildren && isExpanded && ( +
+ {children.map((childPage) => ( + + ))} +
+ )} +
+ ); +} + +/** 상태 아이콘 컴포넌트 */ +function StatusIcon({ status }: { status: DiscoveredPage["status"] }) { + switch (status) { + case "pending": + return ( + + ); + case "inspecting": + return ( + + ); + case "completed": + return ( + + ); + case "error": + return ( + + ); + default: + return ( + + ); + } +} diff --git a/frontend/src/components/site-inspection/SiteCrawlProgress.tsx b/frontend/src/components/site-inspection/SiteCrawlProgress.tsx new file mode 100644 index 0000000..c526c21 --- /dev/null +++ b/frontend/src/components/site-inspection/SiteCrawlProgress.tsx @@ -0,0 +1,259 @@ +"use client"; + +import { useSiteInspectionStore } from "@/stores/useSiteInspectionStore"; +import { useSiteInspectionSSE } from "@/hooks/useSiteInspectionSSE"; +import { Progress } from "@/components/ui/progress"; +import { Card, CardContent } from "@/components/ui/card"; +import { ErrorState } from "@/components/common/ErrorState"; +import { + Globe, + Search, + Check, + X, + Circle, + Loader2, + ExternalLink, +} from "lucide-react"; +import { cn } from "@/lib/utils"; +import { getScoreTailwindColor } from "@/lib/constants"; +import type { DiscoveredPage } from "@/types/site-inspection"; + +interface SiteCrawlProgressProps { + siteInspectionId: string; +} + +/** + * 사이트 전체 검사 진행 상태 표시 컴포넌트. + * 크롤링 단계와 검사 단계를 시각적으로 표현한다. + */ +export function SiteCrawlProgress({ + siteInspectionId, +}: SiteCrawlProgressProps) { + const { + status, + rootUrl, + crawlProgress, + discoveredPages, + aggregateScores, + errorMessage, + } = useSiteInspectionStore(); + + // SSE 연결 + useSiteInspectionSSE(siteInspectionId); + + const handleRetry = () => { + window.location.reload(); + }; + + // 전체 진행률 계산 + const completedPages = discoveredPages.filter( + (p) => p.status === "completed" + ).length; + const totalPages = discoveredPages.length; + const overallProgress = + totalPages > 0 ? Math.round((completedPages / totalPages) * 100) : 0; + + return ( +
+ {/* URL 표시 */} + {rootUrl && ( +
+ + {rootUrl} +
+ )} + + {/* 크롤링 단계 */} + {status === "crawling" && ( + + )} + + {/* 검사 단계 */} + {(status === "inspecting" || status === "completed") && ( + + )} + + {/* 에러 상태 */} + {status === "error" && ( +
+ +
+ )} + + {/* 초기 연결 중 상태 */} + {status === "idle" && ( +
+ +

+ 서버에 연결하는 중... +

+
+ )} +
+ ); +} + +/** 크롤링 단계 UI */ +function CrawlPhase({ + pagesFound, + currentUrl, +}: { + pagesFound: number; + currentUrl: string; +}) { + return ( + + +
+
+ + +
+
+

사이트 링크 수집 중...

+

+ {pagesFound}개 페이지 발견 +

+
+
+ + {/* 크롤링 진행 바 (무한 애니메이션) */} +
+
+
+ + {/* 현재 크롤링 중인 URL */} + {currentUrl && ( +

+ {currentUrl} +

+ )} + + + ); +} + +/** 검사 단계 UI */ +function InspectionPhase({ + pages, + completedPages, + totalPages, + overallProgress, + aggregateScores, +}: { + pages: DiscoveredPage[]; + completedPages: number; + totalPages: number; + overallProgress: number; + aggregateScores: { overall_score: number; grade: string } | null; +}) { + return ( +
+ {/* 전체 진행률 */} + + +
+

페이지 검사 진행

+ + {completedPages}/{totalPages} + +
+ +
+ + {overallProgress}% 완료 + + {aggregateScores && ( + + 현재 평균: {aggregateScores.overall_score}점{" "} + {aggregateScores.grade} + + )} +
+
+
+ + {/* 개별 페이지 목록 */} +
+ {pages.map((page) => ( + + ))} +
+
+ ); +} + +/** 개별 페이지 진행 항목 */ +function PageProgressItem({ page }: { page: DiscoveredPage }) { + let displayPath: string; + try { + const parsed = new URL(page.url); + displayPath = parsed.pathname + parsed.search || "/"; + } catch { + displayPath = page.url; + } + + return ( +
+ {/* 상태 아이콘 */} + + + {/* URL 경로 */} + + {displayPath} + + + {/* 점수 (완료 시) */} + {page.status === "completed" && page.overall_score !== null && ( + + {page.overall_score}점 + + )} + + {/* 검사 중 표시 */} + {page.status === "inspecting" && ( + 검사 중 + )} +
+ ); +} + +/** 페이지 상태 아이콘 */ +function PageStatusIcon({ status }: { status: DiscoveredPage["status"] }) { + switch (status) { + case "pending": + return ; + case "inspecting": + return ( + + ); + case "completed": + return ; + case "error": + return ; + default: + return ; + } +} diff --git a/frontend/src/hooks/useSiteInspectionSSE.ts b/frontend/src/hooks/useSiteInspectionSSE.ts new file mode 100644 index 0000000..d09b6f6 --- /dev/null +++ b/frontend/src/hooks/useSiteInspectionSSE.ts @@ -0,0 +1,142 @@ +"use client"; + +import { useEffect, useRef } from "react"; +import { useRouter } from "next/navigation"; +import { useSiteInspectionStore } from "@/stores/useSiteInspectionStore"; +import { api } from "@/lib/api"; +import type { + SSECrawlProgress, + SSECrawlComplete, + SSEPageStart, + SSEPageComplete, + SSEAggregateUpdate, + SSESiteComplete, +} from "@/types/site-inspection"; + +/** + * SSE를 통해 사이트 전체 검사 진행 상태를 수신하는 커스텀 훅. + * EventSource로 크롤링 + 검사 진행 상태를 실시간 수신하고 + * Zustand 스토어를 업데이트한다. + */ +export function useSiteInspectionSSE(siteInspectionId: string | null) { + const { + setCrawlProgress, + setCrawlComplete, + updatePageStatus, + setPageComplete, + updateAggregateScores, + setCompleted, + setError, + } = useSiteInspectionStore(); + const router = useRouter(); + const eventSourceRef = useRef(null); + + useEffect(() => { + if (!siteInspectionId) return; + + const streamUrl = api.getSiteStreamUrl(siteInspectionId); + const eventSource = new EventSource(streamUrl); + eventSourceRef.current = eventSource; + + /** 크롤링 진행 이벤트 */ + eventSource.addEventListener("crawl_progress", (e: MessageEvent) => { + try { + const data: SSECrawlProgress = JSON.parse(e.data); + setCrawlProgress(data.pages_found, data.current_url); + } catch { + // JSON 파싱 실패 무시 + } + }); + + /** 크롤링 완료 이벤트 */ + eventSource.addEventListener("crawl_complete", (e: MessageEvent) => { + try { + const data: SSECrawlComplete = JSON.parse(e.data); + setCrawlComplete(data); + } catch { + // JSON 파싱 실패 무시 + } + }); + + /** 개별 페이지 검사 시작 이벤트 */ + eventSource.addEventListener("page_start", (e: MessageEvent) => { + try { + const data: SSEPageStart = JSON.parse(e.data); + updatePageStatus(data.page_url, "inspecting"); + } catch { + // JSON 파싱 실패 무시 + } + }); + + /** 개별 페이지 검사 완료 이벤트 */ + eventSource.addEventListener("page_complete", (e: MessageEvent) => { + try { + const data: SSEPageComplete = JSON.parse(e.data); + setPageComplete(data); + } catch { + // JSON 파싱 실패 무시 + } + }); + + /** 집계 점수 업데이트 이벤트 */ + eventSource.addEventListener("aggregate_update", (e: MessageEvent) => { + try { + const data: SSEAggregateUpdate = JSON.parse(e.data); + updateAggregateScores(data); + } catch { + // JSON 파싱 실패 무시 + } + }); + + /** 사이트 검사 완료 이벤트 */ + eventSource.addEventListener("complete", (e: MessageEvent) => { + try { + const data: SSESiteComplete = JSON.parse(e.data); + setCompleted(data.aggregate_scores); + eventSource.close(); + // 결과 페이지로 자동 이동 + router.push(`/site-inspections/${siteInspectionId}`); + } catch { + // JSON 파싱 실패 무시 + } + }); + + /** 에러 이벤트 */ + eventSource.addEventListener("error", (e: Event) => { + if (e instanceof MessageEvent) { + try { + const data = JSON.parse(e.data); + setError(data.message || "사이트 검사 중 오류가 발생했습니다"); + } catch { + setError("사이트 검사 중 오류가 발생했습니다"); + } + } + // 네트워크 에러인 경우 + if (eventSource.readyState === EventSource.CLOSED) { + setError("서버와의 연결이 끊어졌습니다"); + } + }); + + // SSE 연결 타임아웃 (10분 - 사이트 전체 검사는 시간이 더 소요됨) + const timeout = setTimeout(() => { + eventSource.close(); + setError("사이트 검사 시간이 초과되었습니다 (10분)"); + }, 600000); + + return () => { + clearTimeout(timeout); + eventSource.close(); + eventSourceRef.current = null; + }; + }, [ + siteInspectionId, + setCrawlProgress, + setCrawlComplete, + updatePageStatus, + setPageComplete, + updateAggregateScores, + setCompleted, + setError, + router, + ]); +} diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts index e2e6482..5551289 100644 --- a/frontend/src/lib/api.ts +++ b/frontend/src/lib/api.ts @@ -7,6 +7,11 @@ import type { HistoryParams, TrendResponse, } from "@/types/inspection"; +import type { + StartSiteInspectionResponse, + SiteInspectionResult, + InspectPageResponse, +} from "@/types/site-inspection"; const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL ?? ""; @@ -143,6 +148,50 @@ class ApiClient { getStreamUrl(inspectionId: string): string { return `${this.baseUrl}/api/inspections/${inspectionId}/stream`; } + + // ───────────────────────────────────────────────────── + // 사이트 전체 검사 API + // ───────────────────────────────────────────────────── + + /** 사이트 전체 검사 시작 */ + async startSiteInspection( + url: string, + maxPages?: number, + maxDepth?: number + ): Promise { + return this.request("/api/site-inspections", { + method: "POST", + body: JSON.stringify({ + url, + max_pages: maxPages, + max_depth: maxDepth, + }), + }); + } + + /** 사이트 검사 결과 조회 */ + async getSiteInspection(id: string): Promise { + return this.request(`/api/site-inspections/${id}`); + } + + /** 특정 페이지 수동 검사 */ + async inspectPage( + siteInspectionId: string, + pageUrl: string + ): Promise { + return this.request( + `/api/site-inspections/${siteInspectionId}/inspect-page`, + { + method: "POST", + body: JSON.stringify({ url: pageUrl }), + } + ); + } + + /** 사이트 검사 SSE 스트림 URL 반환 */ + getSiteStreamUrl(siteInspectionId: string): string { + return `${this.baseUrl}/api/site-inspections/${siteInspectionId}/stream`; + } } export const api = new ApiClient(API_BASE_URL); diff --git a/frontend/src/lib/queries.ts b/frontend/src/lib/queries.ts index 53174c8..58aeb43 100644 --- a/frontend/src/lib/queries.ts +++ b/frontend/src/lib/queries.ts @@ -55,3 +55,13 @@ export function useRecentInspections() { staleTime: 60 * 1000, }); } + +/** 사이트 전체 검사 결과 조회 */ +export function useSiteInspectionResult(siteInspectionId: string | null) { + return useQuery({ + queryKey: ["siteInspection", siteInspectionId], + queryFn: () => api.getSiteInspection(siteInspectionId!), + enabled: !!siteInspectionId, + staleTime: 5 * 60 * 1000, + }); +} diff --git a/frontend/src/stores/useSiteInspectionStore.ts b/frontend/src/stores/useSiteInspectionStore.ts new file mode 100644 index 0000000..7fb5a65 --- /dev/null +++ b/frontend/src/stores/useSiteInspectionStore.ts @@ -0,0 +1,156 @@ +import { create } from "zustand"; +import type { + DiscoveredPage, + AggregateScores, + SiteInspectionPhase, + CrawlProgress, + SSECrawlComplete, + SSEPageComplete, + SSEAggregateUpdate, +} from "@/types/site-inspection"; + +interface SiteInspectionState { + siteInspectionId: string | null; + rootUrl: string | null; + status: SiteInspectionPhase; + discoveredPages: DiscoveredPage[]; + aggregateScores: AggregateScores | null; + selectedPageUrl: string | null; + errorMessage: string | null; + crawlProgress: CrawlProgress | null; + + // Actions + setSiteInspection: (id: string, rootUrl: string) => void; + setCrawlProgress: (pagesFound: number, currentUrl: string) => void; + setCrawlComplete: (data: SSECrawlComplete) => void; + updatePageStatus: ( + pageUrl: string, + status: DiscoveredPage["status"], + extra?: { + inspection_id?: string; + overall_score?: number; + grade?: string; + } + ) => void; + setPageComplete: (data: SSEPageComplete) => void; + updateAggregateScores: (data: SSEAggregateUpdate) => void; + setSelectedPage: (url: string | null) => void; + setCompleted: (aggregateScores: AggregateScores) => void; + setError: (message: string) => void; + reset: () => void; +} + +const initialState = { + siteInspectionId: null, + rootUrl: null, + status: "idle" as SiteInspectionPhase, + discoveredPages: [] as DiscoveredPage[], + aggregateScores: null, + selectedPageUrl: null, + errorMessage: null, + crawlProgress: null, +}; + +export const useSiteInspectionStore = create( + (set) => ({ + ...initialState, + + setSiteInspection: (id, rootUrl) => + set({ + ...initialState, + siteInspectionId: id, + rootUrl, + status: "crawling", + }), + + setCrawlProgress: (pagesFound, currentUrl) => + set({ + status: "crawling", + crawlProgress: { pagesFound, currentUrl }, + }), + + setCrawlComplete: (data) => + set({ + status: "inspecting", + discoveredPages: data.pages, + crawlProgress: { + pagesFound: data.total_pages, + currentUrl: "", + }, + }), + + updatePageStatus: (pageUrl, status, extra) => + set((state) => ({ + discoveredPages: state.discoveredPages.map((page) => + page.url === pageUrl + ? { + ...page, + status, + ...(extra?.inspection_id && { + inspection_id: extra.inspection_id, + }), + ...(extra?.overall_score !== undefined && { + overall_score: extra.overall_score, + }), + ...(extra?.grade && { grade: extra.grade }), + } + : page + ), + })), + + setPageComplete: (data) => + set((state) => ({ + discoveredPages: state.discoveredPages.map((page) => + page.url === data.page_url + ? { + ...page, + status: "completed" as const, + inspection_id: data.inspection_id, + overall_score: data.overall_score, + grade: data.grade, + } + : page + ), + })), + + updateAggregateScores: (data) => + set((state) => ({ + aggregateScores: state.aggregateScores + ? { + ...state.aggregateScores, + pages_inspected: data.pages_inspected, + pages_total: data.pages_total, + overall_score: data.overall_score, + grade: data.grade, + } + : { + overall_score: data.overall_score, + grade: data.grade, + html_css: 0, + accessibility: 0, + seo: 0, + performance_security: 0, + total_issues: 0, + pages_inspected: data.pages_inspected, + pages_total: data.pages_total, + }, + })), + + setSelectedPage: (url) => + set({ selectedPageUrl: url }), + + setCompleted: (aggregateScores) => + set({ + status: "completed", + aggregateScores, + }), + + setError: (message) => + set({ + status: "error", + errorMessage: message, + }), + + reset: () => set({ ...initialState }), + }) +); diff --git a/frontend/src/types/site-inspection.ts b/frontend/src/types/site-inspection.ts new file mode 100644 index 0000000..6e2d9a9 --- /dev/null +++ b/frontend/src/types/site-inspection.ts @@ -0,0 +1,149 @@ +import type { Grade } from "@/types/inspection"; + +// ─────────────────────────────────────────────────────── +// 사이트 검사 도메인 타입 +// ─────────────────────────────────────────────────────── + +/** 발견된 페이지 상태 */ +export type DiscoveredPageStatus = + | "pending" + | "inspecting" + | "completed" + | "error"; + +/** 사이트 검사 상태 */ +export type SiteInspectionStatus = + | "crawling" + | "inspecting" + | "completed" + | "error"; + +/** 크롤링으로 발견된 개별 페이지 */ +export interface DiscoveredPage { + url: string; + depth: number; + parent_url: string | null; + inspection_id: string | null; + status: DiscoveredPageStatus; + title: string | null; + overall_score: number | null; + grade: string | null; +} + +/** 사이트 전체 집계 점수 */ +export interface AggregateScores { + overall_score: number; + grade: string; + html_css: number; + accessibility: number; + seo: number; + performance_security: number; + total_issues: number; + pages_inspected: number; + pages_total: number; +} + +/** 사이트 검사 설정 */ +export interface SiteInspectionConfig { + max_pages: number; + max_depth: number; +} + +/** GET /api/site-inspections/{id} 응답 - 사이트 검사 결과 */ +export interface SiteInspectionResult { + site_inspection_id: string; + root_url: string; + domain: string; + status: SiteInspectionStatus; + created_at: string; + completed_at: string | null; + config: SiteInspectionConfig; + discovered_pages: DiscoveredPage[]; + aggregate_scores: AggregateScores | null; +} + +/** POST /api/site-inspections 응답 */ +export interface StartSiteInspectionResponse { + site_inspection_id: string; + status: string; + root_url: string; + stream_url: string; +} + +/** POST /api/site-inspections/{id}/pages/{url}/inspect 응답 */ +export interface InspectPageResponse { + inspection_id: string; +} + +// ─────────────────────────────────────────────────────── +// SSE 이벤트 타입 +// ─────────────────────────────────────────────────────── + +/** SSE crawl_progress 이벤트 */ +export interface SSECrawlProgress { + pages_found: number; + current_url: string; +} + +/** SSE crawl_complete 이벤트 */ +export interface SSECrawlComplete { + total_pages: number; + pages: DiscoveredPage[]; +} + +/** SSE page_start 이벤트 */ +export interface SSEPageStart { + page_url: string; + page_index: number; +} + +/** SSE page_complete 이벤트 */ +export interface SSEPageComplete { + page_url: string; + inspection_id: string; + overall_score: number; + grade: string; +} + +/** SSE aggregate_update 이벤트 */ +export interface SSEAggregateUpdate { + pages_inspected: number; + pages_total: number; + overall_score: number; + grade: string; +} + +/** SSE complete 이벤트 (사이트 검사 완료) */ +export interface SSESiteComplete { + status: "completed"; + aggregate_scores: AggregateScores; +} + +/** SSE error 이벤트 */ +export interface SSESiteError { + message: string; +} + +// ─────────────────────────────────────────────────────── +// 프론트엔드 내부 상태 타입 +// ─────────────────────────────────────────────────────── + +/** 사이트 검사 진행 상태 (Zustand Store) */ +export type SiteInspectionPhase = + | "idle" + | "crawling" + | "inspecting" + | "completed" + | "error"; + +/** 크롤링 진행 상태 */ +export interface CrawlProgress { + pagesFound: number; + currentUrl: string; +} + +/** 페이지 트리 노드 (UI용) */ +export interface PageTreeNode { + page: DiscoveredPage; + children: PageTreeNode[]; +}