feat: 웹사이트 표준화 검사 도구 구현

- 4개 검사 엔진: HTML/CSS, 접근성(WCAG), SEO, 성능/보안 (총 50개 항목) - FastAPI 백엔드 (9개 API, SSE 실시간 진행, PDF/JSON 리포트) - Next.js 15 프론트엔드 (6개 페이지, 29개 컴포넌트, 반원 게이지 차트) - Docker Compose 배포 (Backend:8011, Frontend:3011, MongoDB:27022, Redis:6392) - 전체 테스트 32/32 PASS Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 13:57:27 +09:00
parent c37cda5b13
commit b5fa5d96b9
93 changed files with 18735 additions and 22 deletions
--- a/backend/app/services/init.py
+++ b/backend/app/services/init.py
--- a/backend/app/services/inspection_service.py
+++ b/backend/app/services/inspection_service.py
@ -0,0 +1,493 @@
+"""
+Inspection orchestration service.
+Manages the full inspection lifecycle:
+  - URL validation and fetching
+  - Parallel execution of 4 checker engines
+  - Progress tracking via Redis
+  - Result aggregation and storage in MongoDB
+"""
+
+import asyncio
+import json
+import logging
+import time
+import uuid
+from datetime import datetime, timezone
+from typing import Optional
+
+import httpx
+from motor.motor_asyncio import AsyncIOMotorDatabase
+from redis.asyncio import Redis
+
+from app.core.config import get_settings
+from app.core.redis import (
+    set_inspection_status,
+    update_category_progress,
+    publish_event,
+    cache_result,
+)
+from app.engines.html_css import HtmlCssChecker
+from app.engines.accessibility import AccessibilityChecker
+from app.engines.seo import SeoChecker
+from app.engines.performance_security import PerformanceSecurityChecker
+from app.models.schemas import (
+    CategoryResult,
+    InspectionResult,
+    IssueSummary,
+    Severity,
+    calculate_grade,
+    calculate_overall_score,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class InspectionService:
+    """Inspection orchestration service."""
+
+    def __init__(self, db: AsyncIOMotorDatabase, redis: Redis):
+        self.db = db
+        self.redis = redis
+
+    async def start_inspection(self, url: str) -> str:
+        """
+        Start an inspection and return the inspection_id.
+        1. Validate URL accessibility (timeout 10s)
+        2. Generate inspection_id (UUID v4)
+        3. Initialize progress state in Redis
+        4. Launch background inspection task
+        """
+        settings = get_settings()
+
+        # 1. Fetch URL to verify accessibility
+        response = await self._fetch_url(url, timeout=settings.URL_FETCH_TIMEOUT)
+
+        # 2. Generate inspection_id
+        inspection_id = str(uuid.uuid4())
+
+        # 3. Initialize Redis state
+        await self._init_progress(inspection_id, url)
+
+        # 4. Run inspection as background task
+        asyncio.create_task(
+            self._run_inspection(inspection_id, url, response)
+        )
+
+        return inspection_id
+
+    async def _run_inspection(
+        self, inspection_id: str, url: str, response: httpx.Response
+    ) -> None:
+        """Execute 4 category checks in parallel and store results."""
+        html_content = response.text
+        headers = dict(response.headers)
+        start_time = time.time()
+        created_at = datetime.now(timezone.utc)
+
+        try:
+            # Progress callback factory
+            async def progress_callback(category: str, progress: int, current_step: str):
+                await self._update_progress(inspection_id, category, progress, current_step)
+
+            # Create 4 checker engines
+            checkers = [
+                HtmlCssChecker(progress_callback=progress_callback),
+                AccessibilityChecker(progress_callback=progress_callback),
+                SeoChecker(progress_callback=progress_callback),
+                PerformanceSecurityChecker(progress_callback=progress_callback),
+            ]
+
+            settings = get_settings()
+
+            # Parallel execution with per-category timeout
+            results = await asyncio.gather(
+                *[
+                    asyncio.wait_for(
+                        checker.check(url, html_content, headers),
+                        timeout=settings.CATEGORY_TIMEOUT,
+                    )
+                    for checker in checkers
+                ],
+                return_exceptions=True,
+            )
+
+            # Process results (handle timeouts/errors per category)
+            categories = {}
+            category_names = ["html_css", "accessibility", "seo", "performance_security"]
+
+            for i, result in enumerate(results):
+                cat_name = category_names[i]
+                if isinstance(result, Exception):
+                    logger.error(
+                        "Category %s failed for inspection %s: %s",
+                        cat_name, inspection_id, str(result),
+                    )
+                    # Create error result for failed category
+                    categories[cat_name] = CategoryResult(
+                        score=0,
+                        grade="F",
+                        total_issues=0,
+                        issues=[],
+                    )
+                    # Publish category error
+                    await publish_event(inspection_id, {
+                        "event_type": "category_complete",
+                        "inspection_id": inspection_id,
+                        "category": cat_name,
+                        "score": 0,
+                        "total_issues": 0,
+                    })
+                else:
+                    categories[cat_name] = result
+                    # Publish category completion
+                    await publish_event(inspection_id, {
+                        "event_type": "category_complete",
+                        "inspection_id": inspection_id,
+                        "category": cat_name,
+                        "score": result.score,
+                        "total_issues": result.total_issues,
+                    })
+
+            # Calculate overall score
+            overall_score = calculate_overall_score(categories)
+            grade = calculate_grade(overall_score)
+            duration = round(time.time() - start_time, 1)
+
+            # Build summary
+            total_critical = sum(c.critical for c in categories.values())
+            total_major = sum(c.major for c in categories.values())
+            total_minor = sum(c.minor for c in categories.values())
+            total_info = sum(c.info for c in categories.values())
+            total_issues = sum(c.total_issues for c in categories.values())
+
+            summary = IssueSummary(
+                total_issues=total_issues,
+                critical=total_critical,
+                major=total_major,
+                minor=total_minor,
+                info=total_info,
+            )
+
+            # Build inspection result
+            completed_at = datetime.now(timezone.utc)
+            inspection_result = InspectionResult(
+                inspection_id=inspection_id,
+                url=url,
+                status="completed",
+                created_at=created_at,
+                completed_at=completed_at,
+                duration_seconds=duration,
+                overall_score=overall_score,
+                grade=grade,
+                categories=categories,
+                summary=summary,
+            )
+
+            # Store in MongoDB
+            doc = inspection_result.model_dump(mode="json")
+            await self.db.inspections.insert_one(doc)
+
+            # Enforce URL history limit (max 100 per URL)
+            await self._enforce_history_limit(url, max_count=100)
+
+            # Cache in Redis
+            await cache_result(inspection_id, doc)
+
+            # Mark as completed
+            await set_inspection_status(inspection_id, "completed")
+
+            # Publish complete event
+            await publish_event(inspection_id, {
+                "event_type": "complete",
+                "inspection_id": inspection_id,
+                "status": "completed",
+                "overall_score": overall_score,
+                "redirect_url": f"/inspections/{inspection_id}",
+            })
+
+            logger.info(
+                "Inspection %s completed: score=%d, duration=%.1fs",
+                inspection_id, overall_score, duration,
+            )
+
+        except Exception as e:
+            logger.error(
+                "Inspection %s failed: %s", inspection_id, str(e), exc_info=True
+            )
+            await set_inspection_status(inspection_id, "error")
+            await publish_event(inspection_id, {
+                "event_type": "error",
+                "inspection_id": inspection_id,
+                "status": "error",
+                "message": "검사 중 오류가 발생했습니다",
+            })
+
+            # Store error record in MongoDB
+            error_doc = {
+                "inspection_id": inspection_id,
+                "url": url,
+                "status": "error",
+                "created_at": datetime.now(timezone.utc),
+                "error_message": str(e)[:500],
+                "overall_score": 0,
+                "grade": "F",
+                "categories": {},
+                "summary": {
+                    "total_issues": 0,
+                    "critical": 0,
+                    "major": 0,
+                    "minor": 0,
+                    "info": 0,
+                },
+            }
+            await self.db.inspections.insert_one(error_doc)
+
+    async def _fetch_url(self, url: str, timeout: int = 10) -> httpx.Response:
+        """Fetch URL content with timeout."""
+        async with httpx.AsyncClient(
+            follow_redirects=True,
+            timeout=httpx.Timeout(float(timeout)),
+            verify=False,
+        ) as client:
+            response = await client.get(url, headers={
+                "User-Agent": "WebInspector/1.0 (Inspection Bot)",
+                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+                "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
+            })
+            response.raise_for_status()
+            return response
+
+    async def _init_progress(self, inspection_id: str, url: str) -> None:
+        """Initialize inspection progress in Redis."""
+        await set_inspection_status(inspection_id, "running")
+
+        # Initialize all category progresses
+        for cat in ["html_css", "accessibility", "seo", "performance_security"]:
+            await update_category_progress(inspection_id, cat, 0, "대기 중...")
+
+    async def _update_progress(
+        self, inspection_id: str, category: str, progress: int, current_step: str
+    ) -> None:
+        """Update category progress and publish SSE event."""
+        await update_category_progress(inspection_id, category, progress, current_step)
+
+        # Build full progress state
+        progress_data = await self._build_progress_event(inspection_id, category, progress, current_step)
+
+        await publish_event(inspection_id, progress_data)
+
+    async def _build_progress_event(
+        self, inspection_id: str, updated_category: str, progress: int, current_step: str
+    ) -> dict:
+        """Build progress event data including all categories."""
+        from app.core.redis import get_current_progress
+
+        raw = await get_current_progress(inspection_id)
+
+        categories = {}
+        category_list = ["html_css", "accessibility", "seo", "performance_security"]
+
+        for cat in category_list:
+            if raw:
+                cat_progress = int(raw.get(f"{cat}_progress", 0))
+                cat_step = raw.get(f"{cat}_step", "")
+                cat_status = raw.get(f"{cat}_status", "pending")
+            else:
+                cat_progress = 0
+                cat_step = ""
+                cat_status = "pending"
+
+            # Override with just-updated values
+            if cat == updated_category:
+                cat_progress = progress
+                cat_step = current_step
+                cat_status = "completed" if progress >= 100 else "running"
+
+            categories[cat] = {
+                "status": cat_status,
+                "progress": cat_progress,
+                "current_step": cat_step,
+            }
+
+        # Calculate overall progress
+        total_progress = sum(c["progress"] for c in categories.values())
+        overall_progress = round(total_progress / len(categories))
+
+        return {
+            "event_type": "progress",
+            "inspection_id": inspection_id,
+            "status": "running",
+            "overall_progress": overall_progress,
+            "categories": categories,
+        }
+
+    async def _enforce_history_limit(self, url: str, max_count: int = 100) -> None:
+        """Delete oldest inspection records if URL exceeds max_count."""
+        count = await self.db.inspections.count_documents({"url": url})
+        if count > max_count:
+            excess = count - max_count
+            oldest = self.db.inspections.find(
+                {"url": url}
+            ).sort("created_at", 1).limit(excess)
+
+            ids_to_delete = []
+            async for doc in oldest:
+                ids_to_delete.append(doc["_id"])
+
+            if ids_to_delete:
+                await self.db.inspections.delete_many({"_id": {"$in": ids_to_delete}})
+                logger.info(
+                    "Deleted %d oldest inspections for URL %s",
+                    len(ids_to_delete), url,
+                )
+
+    async def get_inspection(self, inspection_id: str) -> Optional[dict]:
+        """Get inspection result by ID (cache-first)."""
+        from app.core.redis import get_cached_result, cache_result
+
+        # Try cache first
+        cached = await get_cached_result(inspection_id)
+        if cached:
+            return cached
+
+        # Fetch from MongoDB
+        doc = await self.db.inspections.find_one(
+            {"inspection_id": inspection_id},
+            {"_id": 0},
+        )
+        if doc:
+            await cache_result(inspection_id, doc)
+            return doc
+        return None
+
+    async def get_issues(
+        self,
+        inspection_id: str,
+        category: Optional[str] = None,
+        severity: Optional[str] = None,
+    ) -> Optional[dict]:
+        """Get filtered issues for an inspection."""
+        doc = await self.get_inspection(inspection_id)
+        if not doc:
+            return None
+
+        all_issues = []
+        categories = doc.get("categories", {})
+
+        for cat_name, cat_data in categories.items():
+            if category and category != "all" and cat_name != category:
+                continue
+            for issue in cat_data.get("issues", []):
+                if severity and severity != "all" and issue.get("severity") != severity:
+                    continue
+                all_issues.append(issue)
+
+        # Sort by severity priority
+        severity_order = {"critical": 0, "major": 1, "minor": 2, "info": 3}
+        all_issues.sort(key=lambda x: severity_order.get(x.get("severity", "info"), 4))
+
+        return {
+            "inspection_id": inspection_id,
+            "total": len(all_issues),
+            "filters": {
+                "category": category or "all",
+                "severity": severity or "all",
+            },
+            "issues": all_issues,
+        }
+
+    async def get_inspection_list(
+        self,
+        page: int = 1,
+        limit: int = 20,
+        url_filter: Optional[str] = None,
+        sort: str = "-created_at",
+    ) -> dict:
+        """Get paginated inspection list."""
+        limit = min(limit, 100)
+        skip = (page - 1) * limit
+
+        # Build query
+        query = {}
+        if url_filter:
+            query["url"] = {"$regex": url_filter, "$options": "i"}
+
+        # Sort direction
+        if sort.startswith("-"):
+            sort_field = sort[1:]
+            sort_dir = -1
+        else:
+            sort_field = sort
+            sort_dir = 1
+
+        # Count total
+        total = await self.db.inspections.count_documents(query)
+
+        # Fetch items
+        cursor = self.db.inspections.find(
+            query,
+            {
+                "_id": 0,
+                "inspection_id": 1,
+                "url": 1,
+                "created_at": 1,
+                "overall_score": 1,
+                "grade": 1,
+                "summary.total_issues": 1,
+            },
+        ).sort(sort_field, sort_dir).skip(skip).limit(limit)
+
+        items = []
+        async for doc in cursor:
+            items.append({
+                "inspection_id": doc.get("inspection_id"),
+                "url": doc.get("url"),
+                "created_at": doc.get("created_at"),
+                "overall_score": doc.get("overall_score", 0),
+                "grade": doc.get("grade", "F"),
+                "total_issues": doc.get("summary", {}).get("total_issues", 0),
+            })
+
+        total_pages = max(1, -(-total // limit))  # Ceiling division
+
+        return {
+            "items": items,
+            "total": total,
+            "page": page,
+            "limit": limit,
+            "total_pages": total_pages,
+        }
+
+    async def get_trend(self, url: str, limit: int = 10) -> dict:
+        """Get trend data for a specific URL."""
+        cursor = self.db.inspections.find(
+            {"url": url, "status": "completed"},
+            {
+                "_id": 0,
+                "inspection_id": 1,
+                "created_at": 1,
+                "overall_score": 1,
+                "categories.html_css.score": 1,
+                "categories.accessibility.score": 1,
+                "categories.seo.score": 1,
+                "categories.performance_security.score": 1,
+            },
+        ).sort("created_at", 1).limit(limit)
+
+        data_points = []
+        async for doc in cursor:
+            cats = doc.get("categories", {})
+            data_points.append({
+                "inspection_id": doc.get("inspection_id"),
+                "created_at": doc.get("created_at"),
+                "overall_score": doc.get("overall_score", 0),
+                "html_css": cats.get("html_css", {}).get("score", 0),
+                "accessibility": cats.get("accessibility", {}).get("score", 0),
+                "seo": cats.get("seo", {}).get("score", 0),
+                "performance_security": cats.get("performance_security", {}).get("score", 0),
+            })
+
+        return {
+            "url": url,
+            "data_points": data_points,
+        }
--- a/backend/app/services/report_service.py
+++ b/backend/app/services/report_service.py
@ -0,0 +1,95 @@
+"""
+Report generation service.
+Generates PDF and JSON reports from inspection results.
+"""
+
+import json
+import logging
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+from urllib.parse import urlparse
+
+from jinja2 import Environment, FileSystemLoader
+from slugify import slugify
+
+logger = logging.getLogger(__name__)
+
+TEMPLATES_DIR = Path(__file__).parent.parent / "templates"
+
+# Grade color mapping
+GRADE_COLORS = {
+    "A+": "#22C55E",
+    "A": "#22C55E",
+    "B": "#3B82F6",
+    "C": "#F59E0B",
+    "D": "#F97316",
+    "F": "#EF4444",
+}
+
+SEVERITY_COLORS = {
+    "critical": "#EF4444",
+    "major": "#F97316",
+    "minor": "#EAB308",
+    "info": "#3B82F6",
+}
+
+CATEGORY_LABELS = {
+    "html_css": "HTML/CSS 표준",
+    "accessibility": "접근성 (WCAG)",
+    "seo": "SEO 최적화",
+    "performance_security": "성능/보안",
+}
+
+
+class ReportService:
+    """PDF and JSON report generation service."""
+
+    def __init__(self):
+        self.env = Environment(
+            loader=FileSystemLoader(str(TEMPLATES_DIR)),
+            autoescape=True,
+        )
+        # Register custom filters
+        self.env.filters["grade_color"] = lambda g: GRADE_COLORS.get(g, "#6B7280")
+        self.env.filters["severity_color"] = lambda s: SEVERITY_COLORS.get(s, "#6B7280")
+        self.env.filters["category_label"] = lambda c: CATEGORY_LABELS.get(c, c)
+
+    async def generate_pdf(self, inspection: dict) -> bytes:
+        """Generate PDF report from inspection result."""
+        try:
+            from weasyprint import HTML
+
+            template = self.env.get_template("report.html")
+            html_string = template.render(
+                inspection=inspection,
+                generated_at=datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
+                grade_colors=GRADE_COLORS,
+                severity_colors=SEVERITY_COLORS,
+                category_labels=CATEGORY_LABELS,
+            )
+            pdf_bytes = HTML(string=html_string).write_pdf()
+            return pdf_bytes
+
+        except ImportError:
+            logger.error("WeasyPrint is not installed")
+            raise RuntimeError("PDF generation is not available (WeasyPrint not installed)")
+        except Exception as e:
+            logger.error("PDF generation failed: %s", str(e))
+            raise RuntimeError(f"PDF generation failed: {str(e)}")
+
+    async def generate_json(self, inspection: dict) -> bytes:
+        """Generate JSON report from inspection result."""
+        # Remove MongoDB internal fields
+        clean_data = {k: v for k, v in inspection.items() if k != "_id"}
+        json_str = json.dumps(clean_data, ensure_ascii=False, indent=2, default=str)
+        return json_str.encode("utf-8")
+
+    @staticmethod
+    def generate_filename(url: str, extension: str) -> str:
+        """Generate download filename: web-inspector-{url-slug}-{date}.{ext}"""
+        parsed = urlparse(url)
+        hostname = parsed.hostname or "unknown"
+        url_slug = slugify(hostname, max_length=50)
+        date_str = datetime.utcnow().strftime("%Y-%m-%d")
+        return f"web-inspector-{url_slug}-{date_str}.{extension}"