feat: 웹사이트 표준화 검사 도구 구현

- 4개 검사 엔진: HTML/CSS, 접근성(WCAG), SEO, 성능/보안 (총 50개 항목)
- FastAPI 백엔드 (9개 API, SSE 실시간 진행, PDF/JSON 리포트)
- Next.js 15 프론트엔드 (6개 페이지, 29개 컴포넌트, 반원 게이지 차트)
- Docker Compose 배포 (Backend:8011, Frontend:3011, MongoDB:27022, Redis:6392)
- 전체 테스트 32/32 PASS

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
jungwoo choi
2026-02-13 13:57:27 +09:00
parent c37cda5b13
commit b5fa5d96b9
93 changed files with 18735 additions and 22 deletions

View File

View File

@ -0,0 +1,493 @@
"""
Inspection orchestration service.
Manages the full inspection lifecycle:
- URL validation and fetching
- Parallel execution of 4 checker engines
- Progress tracking via Redis
- Result aggregation and storage in MongoDB
"""
import asyncio
import json
import logging
import time
import uuid
from datetime import datetime, timezone
from typing import Optional
import httpx
from motor.motor_asyncio import AsyncIOMotorDatabase
from redis.asyncio import Redis
from app.core.config import get_settings
from app.core.redis import (
set_inspection_status,
update_category_progress,
publish_event,
cache_result,
)
from app.engines.html_css import HtmlCssChecker
from app.engines.accessibility import AccessibilityChecker
from app.engines.seo import SeoChecker
from app.engines.performance_security import PerformanceSecurityChecker
from app.models.schemas import (
CategoryResult,
InspectionResult,
IssueSummary,
Severity,
calculate_grade,
calculate_overall_score,
)
logger = logging.getLogger(__name__)
class InspectionService:
"""Inspection orchestration service."""
def __init__(self, db: AsyncIOMotorDatabase, redis: Redis):
self.db = db
self.redis = redis
async def start_inspection(self, url: str) -> str:
"""
Start an inspection and return the inspection_id.
1. Validate URL accessibility (timeout 10s)
2. Generate inspection_id (UUID v4)
3. Initialize progress state in Redis
4. Launch background inspection task
"""
settings = get_settings()
# 1. Fetch URL to verify accessibility
response = await self._fetch_url(url, timeout=settings.URL_FETCH_TIMEOUT)
# 2. Generate inspection_id
inspection_id = str(uuid.uuid4())
# 3. Initialize Redis state
await self._init_progress(inspection_id, url)
# 4. Run inspection as background task
asyncio.create_task(
self._run_inspection(inspection_id, url, response)
)
return inspection_id
async def _run_inspection(
self, inspection_id: str, url: str, response: httpx.Response
) -> None:
"""Execute 4 category checks in parallel and store results."""
html_content = response.text
headers = dict(response.headers)
start_time = time.time()
created_at = datetime.now(timezone.utc)
try:
# Progress callback factory
async def progress_callback(category: str, progress: int, current_step: str):
await self._update_progress(inspection_id, category, progress, current_step)
# Create 4 checker engines
checkers = [
HtmlCssChecker(progress_callback=progress_callback),
AccessibilityChecker(progress_callback=progress_callback),
SeoChecker(progress_callback=progress_callback),
PerformanceSecurityChecker(progress_callback=progress_callback),
]
settings = get_settings()
# Parallel execution with per-category timeout
results = await asyncio.gather(
*[
asyncio.wait_for(
checker.check(url, html_content, headers),
timeout=settings.CATEGORY_TIMEOUT,
)
for checker in checkers
],
return_exceptions=True,
)
# Process results (handle timeouts/errors per category)
categories = {}
category_names = ["html_css", "accessibility", "seo", "performance_security"]
for i, result in enumerate(results):
cat_name = category_names[i]
if isinstance(result, Exception):
logger.error(
"Category %s failed for inspection %s: %s",
cat_name, inspection_id, str(result),
)
# Create error result for failed category
categories[cat_name] = CategoryResult(
score=0,
grade="F",
total_issues=0,
issues=[],
)
# Publish category error
await publish_event(inspection_id, {
"event_type": "category_complete",
"inspection_id": inspection_id,
"category": cat_name,
"score": 0,
"total_issues": 0,
})
else:
categories[cat_name] = result
# Publish category completion
await publish_event(inspection_id, {
"event_type": "category_complete",
"inspection_id": inspection_id,
"category": cat_name,
"score": result.score,
"total_issues": result.total_issues,
})
# Calculate overall score
overall_score = calculate_overall_score(categories)
grade = calculate_grade(overall_score)
duration = round(time.time() - start_time, 1)
# Build summary
total_critical = sum(c.critical for c in categories.values())
total_major = sum(c.major for c in categories.values())
total_minor = sum(c.minor for c in categories.values())
total_info = sum(c.info for c in categories.values())
total_issues = sum(c.total_issues for c in categories.values())
summary = IssueSummary(
total_issues=total_issues,
critical=total_critical,
major=total_major,
minor=total_minor,
info=total_info,
)
# Build inspection result
completed_at = datetime.now(timezone.utc)
inspection_result = InspectionResult(
inspection_id=inspection_id,
url=url,
status="completed",
created_at=created_at,
completed_at=completed_at,
duration_seconds=duration,
overall_score=overall_score,
grade=grade,
categories=categories,
summary=summary,
)
# Store in MongoDB
doc = inspection_result.model_dump(mode="json")
await self.db.inspections.insert_one(doc)
# Enforce URL history limit (max 100 per URL)
await self._enforce_history_limit(url, max_count=100)
# Cache in Redis
await cache_result(inspection_id, doc)
# Mark as completed
await set_inspection_status(inspection_id, "completed")
# Publish complete event
await publish_event(inspection_id, {
"event_type": "complete",
"inspection_id": inspection_id,
"status": "completed",
"overall_score": overall_score,
"redirect_url": f"/inspections/{inspection_id}",
})
logger.info(
"Inspection %s completed: score=%d, duration=%.1fs",
inspection_id, overall_score, duration,
)
except Exception as e:
logger.error(
"Inspection %s failed: %s", inspection_id, str(e), exc_info=True
)
await set_inspection_status(inspection_id, "error")
await publish_event(inspection_id, {
"event_type": "error",
"inspection_id": inspection_id,
"status": "error",
"message": "검사 중 오류가 발생했습니다",
})
# Store error record in MongoDB
error_doc = {
"inspection_id": inspection_id,
"url": url,
"status": "error",
"created_at": datetime.now(timezone.utc),
"error_message": str(e)[:500],
"overall_score": 0,
"grade": "F",
"categories": {},
"summary": {
"total_issues": 0,
"critical": 0,
"major": 0,
"minor": 0,
"info": 0,
},
}
await self.db.inspections.insert_one(error_doc)
async def _fetch_url(self, url: str, timeout: int = 10) -> httpx.Response:
"""Fetch URL content with timeout."""
async with httpx.AsyncClient(
follow_redirects=True,
timeout=httpx.Timeout(float(timeout)),
verify=False,
) as client:
response = await client.get(url, headers={
"User-Agent": "WebInspector/1.0 (Inspection Bot)",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
})
response.raise_for_status()
return response
async def _init_progress(self, inspection_id: str, url: str) -> None:
"""Initialize inspection progress in Redis."""
await set_inspection_status(inspection_id, "running")
# Initialize all category progresses
for cat in ["html_css", "accessibility", "seo", "performance_security"]:
await update_category_progress(inspection_id, cat, 0, "대기 중...")
async def _update_progress(
self, inspection_id: str, category: str, progress: int, current_step: str
) -> None:
"""Update category progress and publish SSE event."""
await update_category_progress(inspection_id, category, progress, current_step)
# Build full progress state
progress_data = await self._build_progress_event(inspection_id, category, progress, current_step)
await publish_event(inspection_id, progress_data)
async def _build_progress_event(
self, inspection_id: str, updated_category: str, progress: int, current_step: str
) -> dict:
"""Build progress event data including all categories."""
from app.core.redis import get_current_progress
raw = await get_current_progress(inspection_id)
categories = {}
category_list = ["html_css", "accessibility", "seo", "performance_security"]
for cat in category_list:
if raw:
cat_progress = int(raw.get(f"{cat}_progress", 0))
cat_step = raw.get(f"{cat}_step", "")
cat_status = raw.get(f"{cat}_status", "pending")
else:
cat_progress = 0
cat_step = ""
cat_status = "pending"
# Override with just-updated values
if cat == updated_category:
cat_progress = progress
cat_step = current_step
cat_status = "completed" if progress >= 100 else "running"
categories[cat] = {
"status": cat_status,
"progress": cat_progress,
"current_step": cat_step,
}
# Calculate overall progress
total_progress = sum(c["progress"] for c in categories.values())
overall_progress = round(total_progress / len(categories))
return {
"event_type": "progress",
"inspection_id": inspection_id,
"status": "running",
"overall_progress": overall_progress,
"categories": categories,
}
async def _enforce_history_limit(self, url: str, max_count: int = 100) -> None:
"""Delete oldest inspection records if URL exceeds max_count."""
count = await self.db.inspections.count_documents({"url": url})
if count > max_count:
excess = count - max_count
oldest = self.db.inspections.find(
{"url": url}
).sort("created_at", 1).limit(excess)
ids_to_delete = []
async for doc in oldest:
ids_to_delete.append(doc["_id"])
if ids_to_delete:
await self.db.inspections.delete_many({"_id": {"$in": ids_to_delete}})
logger.info(
"Deleted %d oldest inspections for URL %s",
len(ids_to_delete), url,
)
async def get_inspection(self, inspection_id: str) -> Optional[dict]:
"""Get inspection result by ID (cache-first)."""
from app.core.redis import get_cached_result, cache_result
# Try cache first
cached = await get_cached_result(inspection_id)
if cached:
return cached
# Fetch from MongoDB
doc = await self.db.inspections.find_one(
{"inspection_id": inspection_id},
{"_id": 0},
)
if doc:
await cache_result(inspection_id, doc)
return doc
return None
async def get_issues(
self,
inspection_id: str,
category: Optional[str] = None,
severity: Optional[str] = None,
) -> Optional[dict]:
"""Get filtered issues for an inspection."""
doc = await self.get_inspection(inspection_id)
if not doc:
return None
all_issues = []
categories = doc.get("categories", {})
for cat_name, cat_data in categories.items():
if category and category != "all" and cat_name != category:
continue
for issue in cat_data.get("issues", []):
if severity and severity != "all" and issue.get("severity") != severity:
continue
all_issues.append(issue)
# Sort by severity priority
severity_order = {"critical": 0, "major": 1, "minor": 2, "info": 3}
all_issues.sort(key=lambda x: severity_order.get(x.get("severity", "info"), 4))
return {
"inspection_id": inspection_id,
"total": len(all_issues),
"filters": {
"category": category or "all",
"severity": severity or "all",
},
"issues": all_issues,
}
async def get_inspection_list(
self,
page: int = 1,
limit: int = 20,
url_filter: Optional[str] = None,
sort: str = "-created_at",
) -> dict:
"""Get paginated inspection list."""
limit = min(limit, 100)
skip = (page - 1) * limit
# Build query
query = {}
if url_filter:
query["url"] = {"$regex": url_filter, "$options": "i"}
# Sort direction
if sort.startswith("-"):
sort_field = sort[1:]
sort_dir = -1
else:
sort_field = sort
sort_dir = 1
# Count total
total = await self.db.inspections.count_documents(query)
# Fetch items
cursor = self.db.inspections.find(
query,
{
"_id": 0,
"inspection_id": 1,
"url": 1,
"created_at": 1,
"overall_score": 1,
"grade": 1,
"summary.total_issues": 1,
},
).sort(sort_field, sort_dir).skip(skip).limit(limit)
items = []
async for doc in cursor:
items.append({
"inspection_id": doc.get("inspection_id"),
"url": doc.get("url"),
"created_at": doc.get("created_at"),
"overall_score": doc.get("overall_score", 0),
"grade": doc.get("grade", "F"),
"total_issues": doc.get("summary", {}).get("total_issues", 0),
})
total_pages = max(1, -(-total // limit)) # Ceiling division
return {
"items": items,
"total": total,
"page": page,
"limit": limit,
"total_pages": total_pages,
}
async def get_trend(self, url: str, limit: int = 10) -> dict:
"""Get trend data for a specific URL."""
cursor = self.db.inspections.find(
{"url": url, "status": "completed"},
{
"_id": 0,
"inspection_id": 1,
"created_at": 1,
"overall_score": 1,
"categories.html_css.score": 1,
"categories.accessibility.score": 1,
"categories.seo.score": 1,
"categories.performance_security.score": 1,
},
).sort("created_at", 1).limit(limit)
data_points = []
async for doc in cursor:
cats = doc.get("categories", {})
data_points.append({
"inspection_id": doc.get("inspection_id"),
"created_at": doc.get("created_at"),
"overall_score": doc.get("overall_score", 0),
"html_css": cats.get("html_css", {}).get("score", 0),
"accessibility": cats.get("accessibility", {}).get("score", 0),
"seo": cats.get("seo", {}).get("score", 0),
"performance_security": cats.get("performance_security", {}).get("score", 0),
})
return {
"url": url,
"data_points": data_points,
}

View File

@ -0,0 +1,95 @@
"""
Report generation service.
Generates PDF and JSON reports from inspection results.
"""
import json
import logging
from datetime import datetime
from pathlib import Path
from typing import Optional
from urllib.parse import urlparse
from jinja2 import Environment, FileSystemLoader
from slugify import slugify
logger = logging.getLogger(__name__)
TEMPLATES_DIR = Path(__file__).parent.parent / "templates"
# Grade color mapping
GRADE_COLORS = {
"A+": "#22C55E",
"A": "#22C55E",
"B": "#3B82F6",
"C": "#F59E0B",
"D": "#F97316",
"F": "#EF4444",
}
SEVERITY_COLORS = {
"critical": "#EF4444",
"major": "#F97316",
"minor": "#EAB308",
"info": "#3B82F6",
}
CATEGORY_LABELS = {
"html_css": "HTML/CSS 표준",
"accessibility": "접근성 (WCAG)",
"seo": "SEO 최적화",
"performance_security": "성능/보안",
}
class ReportService:
"""PDF and JSON report generation service."""
def __init__(self):
self.env = Environment(
loader=FileSystemLoader(str(TEMPLATES_DIR)),
autoescape=True,
)
# Register custom filters
self.env.filters["grade_color"] = lambda g: GRADE_COLORS.get(g, "#6B7280")
self.env.filters["severity_color"] = lambda s: SEVERITY_COLORS.get(s, "#6B7280")
self.env.filters["category_label"] = lambda c: CATEGORY_LABELS.get(c, c)
async def generate_pdf(self, inspection: dict) -> bytes:
"""Generate PDF report from inspection result."""
try:
from weasyprint import HTML
template = self.env.get_template("report.html")
html_string = template.render(
inspection=inspection,
generated_at=datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
grade_colors=GRADE_COLORS,
severity_colors=SEVERITY_COLORS,
category_labels=CATEGORY_LABELS,
)
pdf_bytes = HTML(string=html_string).write_pdf()
return pdf_bytes
except ImportError:
logger.error("WeasyPrint is not installed")
raise RuntimeError("PDF generation is not available (WeasyPrint not installed)")
except Exception as e:
logger.error("PDF generation failed: %s", str(e))
raise RuntimeError(f"PDF generation failed: {str(e)}")
async def generate_json(self, inspection: dict) -> bytes:
"""Generate JSON report from inspection result."""
# Remove MongoDB internal fields
clean_data = {k: v for k, v in inspection.items() if k != "_id"}
json_str = json.dumps(clean_data, ensure_ascii=False, indent=2, default=str)
return json_str.encode("utf-8")
@staticmethod
def generate_filename(url: str, extension: str) -> str:
"""Generate download filename: web-inspector-{url-slug}-{date}.{ext}"""
parsed = urlparse(url)
hostname = parsed.hostname or "unknown"
url_slug = slugify(hostname, max_length=50)
date_str = datetime.utcnow().strftime("%Y-%m-%d")
return f"web-inspector-{url_slug}-{date_str}.{extension}"