Files
web-inspector/backend/app/services/inspection_service.py
jungwoo choi b5fa5d96b9 feat: 웹사이트 표준화 검사 도구 구현
- 4개 검사 엔진: HTML/CSS, 접근성(WCAG), SEO, 성능/보안 (총 50개 항목)
- FastAPI 백엔드 (9개 API, SSE 실시간 진행, PDF/JSON 리포트)
- Next.js 15 프론트엔드 (6개 페이지, 29개 컴포넌트, 반원 게이지 차트)
- Docker Compose 배포 (Backend:8011, Frontend:3011, MongoDB:27022, Redis:6392)
- 전체 테스트 32/32 PASS

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 13:57:27 +09:00

494 lines
17 KiB
Python

"""
Inspection orchestration service.
Manages the full inspection lifecycle:
- URL validation and fetching
- Parallel execution of 4 checker engines
- Progress tracking via Redis
- Result aggregation and storage in MongoDB
"""
import asyncio
import json
import logging
import time
import uuid
from datetime import datetime, timezone
from typing import Optional
import httpx
from motor.motor_asyncio import AsyncIOMotorDatabase
from redis.asyncio import Redis
from app.core.config import get_settings
from app.core.redis import (
set_inspection_status,
update_category_progress,
publish_event,
cache_result,
)
from app.engines.html_css import HtmlCssChecker
from app.engines.accessibility import AccessibilityChecker
from app.engines.seo import SeoChecker
from app.engines.performance_security import PerformanceSecurityChecker
from app.models.schemas import (
CategoryResult,
InspectionResult,
IssueSummary,
Severity,
calculate_grade,
calculate_overall_score,
)
logger = logging.getLogger(__name__)
class InspectionService:
"""Inspection orchestration service."""
def __init__(self, db: AsyncIOMotorDatabase, redis: Redis):
self.db = db
self.redis = redis
async def start_inspection(self, url: str) -> str:
"""
Start an inspection and return the inspection_id.
1. Validate URL accessibility (timeout 10s)
2. Generate inspection_id (UUID v4)
3. Initialize progress state in Redis
4. Launch background inspection task
"""
settings = get_settings()
# 1. Fetch URL to verify accessibility
response = await self._fetch_url(url, timeout=settings.URL_FETCH_TIMEOUT)
# 2. Generate inspection_id
inspection_id = str(uuid.uuid4())
# 3. Initialize Redis state
await self._init_progress(inspection_id, url)
# 4. Run inspection as background task
asyncio.create_task(
self._run_inspection(inspection_id, url, response)
)
return inspection_id
async def _run_inspection(
self, inspection_id: str, url: str, response: httpx.Response
) -> None:
"""Execute 4 category checks in parallel and store results."""
html_content = response.text
headers = dict(response.headers)
start_time = time.time()
created_at = datetime.now(timezone.utc)
try:
# Progress callback factory
async def progress_callback(category: str, progress: int, current_step: str):
await self._update_progress(inspection_id, category, progress, current_step)
# Create 4 checker engines
checkers = [
HtmlCssChecker(progress_callback=progress_callback),
AccessibilityChecker(progress_callback=progress_callback),
SeoChecker(progress_callback=progress_callback),
PerformanceSecurityChecker(progress_callback=progress_callback),
]
settings = get_settings()
# Parallel execution with per-category timeout
results = await asyncio.gather(
*[
asyncio.wait_for(
checker.check(url, html_content, headers),
timeout=settings.CATEGORY_TIMEOUT,
)
for checker in checkers
],
return_exceptions=True,
)
# Process results (handle timeouts/errors per category)
categories = {}
category_names = ["html_css", "accessibility", "seo", "performance_security"]
for i, result in enumerate(results):
cat_name = category_names[i]
if isinstance(result, Exception):
logger.error(
"Category %s failed for inspection %s: %s",
cat_name, inspection_id, str(result),
)
# Create error result for failed category
categories[cat_name] = CategoryResult(
score=0,
grade="F",
total_issues=0,
issues=[],
)
# Publish category error
await publish_event(inspection_id, {
"event_type": "category_complete",
"inspection_id": inspection_id,
"category": cat_name,
"score": 0,
"total_issues": 0,
})
else:
categories[cat_name] = result
# Publish category completion
await publish_event(inspection_id, {
"event_type": "category_complete",
"inspection_id": inspection_id,
"category": cat_name,
"score": result.score,
"total_issues": result.total_issues,
})
# Calculate overall score
overall_score = calculate_overall_score(categories)
grade = calculate_grade(overall_score)
duration = round(time.time() - start_time, 1)
# Build summary
total_critical = sum(c.critical for c in categories.values())
total_major = sum(c.major for c in categories.values())
total_minor = sum(c.minor for c in categories.values())
total_info = sum(c.info for c in categories.values())
total_issues = sum(c.total_issues for c in categories.values())
summary = IssueSummary(
total_issues=total_issues,
critical=total_critical,
major=total_major,
minor=total_minor,
info=total_info,
)
# Build inspection result
completed_at = datetime.now(timezone.utc)
inspection_result = InspectionResult(
inspection_id=inspection_id,
url=url,
status="completed",
created_at=created_at,
completed_at=completed_at,
duration_seconds=duration,
overall_score=overall_score,
grade=grade,
categories=categories,
summary=summary,
)
# Store in MongoDB
doc = inspection_result.model_dump(mode="json")
await self.db.inspections.insert_one(doc)
# Enforce URL history limit (max 100 per URL)
await self._enforce_history_limit(url, max_count=100)
# Cache in Redis
await cache_result(inspection_id, doc)
# Mark as completed
await set_inspection_status(inspection_id, "completed")
# Publish complete event
await publish_event(inspection_id, {
"event_type": "complete",
"inspection_id": inspection_id,
"status": "completed",
"overall_score": overall_score,
"redirect_url": f"/inspections/{inspection_id}",
})
logger.info(
"Inspection %s completed: score=%d, duration=%.1fs",
inspection_id, overall_score, duration,
)
except Exception as e:
logger.error(
"Inspection %s failed: %s", inspection_id, str(e), exc_info=True
)
await set_inspection_status(inspection_id, "error")
await publish_event(inspection_id, {
"event_type": "error",
"inspection_id": inspection_id,
"status": "error",
"message": "검사 중 오류가 발생했습니다",
})
# Store error record in MongoDB
error_doc = {
"inspection_id": inspection_id,
"url": url,
"status": "error",
"created_at": datetime.now(timezone.utc),
"error_message": str(e)[:500],
"overall_score": 0,
"grade": "F",
"categories": {},
"summary": {
"total_issues": 0,
"critical": 0,
"major": 0,
"minor": 0,
"info": 0,
},
}
await self.db.inspections.insert_one(error_doc)
async def _fetch_url(self, url: str, timeout: int = 10) -> httpx.Response:
"""Fetch URL content with timeout."""
async with httpx.AsyncClient(
follow_redirects=True,
timeout=httpx.Timeout(float(timeout)),
verify=False,
) as client:
response = await client.get(url, headers={
"User-Agent": "WebInspector/1.0 (Inspection Bot)",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
})
response.raise_for_status()
return response
async def _init_progress(self, inspection_id: str, url: str) -> None:
"""Initialize inspection progress in Redis."""
await set_inspection_status(inspection_id, "running")
# Initialize all category progresses
for cat in ["html_css", "accessibility", "seo", "performance_security"]:
await update_category_progress(inspection_id, cat, 0, "대기 중...")
async def _update_progress(
self, inspection_id: str, category: str, progress: int, current_step: str
) -> None:
"""Update category progress and publish SSE event."""
await update_category_progress(inspection_id, category, progress, current_step)
# Build full progress state
progress_data = await self._build_progress_event(inspection_id, category, progress, current_step)
await publish_event(inspection_id, progress_data)
async def _build_progress_event(
self, inspection_id: str, updated_category: str, progress: int, current_step: str
) -> dict:
"""Build progress event data including all categories."""
from app.core.redis import get_current_progress
raw = await get_current_progress(inspection_id)
categories = {}
category_list = ["html_css", "accessibility", "seo", "performance_security"]
for cat in category_list:
if raw:
cat_progress = int(raw.get(f"{cat}_progress", 0))
cat_step = raw.get(f"{cat}_step", "")
cat_status = raw.get(f"{cat}_status", "pending")
else:
cat_progress = 0
cat_step = ""
cat_status = "pending"
# Override with just-updated values
if cat == updated_category:
cat_progress = progress
cat_step = current_step
cat_status = "completed" if progress >= 100 else "running"
categories[cat] = {
"status": cat_status,
"progress": cat_progress,
"current_step": cat_step,
}
# Calculate overall progress
total_progress = sum(c["progress"] for c in categories.values())
overall_progress = round(total_progress / len(categories))
return {
"event_type": "progress",
"inspection_id": inspection_id,
"status": "running",
"overall_progress": overall_progress,
"categories": categories,
}
async def _enforce_history_limit(self, url: str, max_count: int = 100) -> None:
"""Delete oldest inspection records if URL exceeds max_count."""
count = await self.db.inspections.count_documents({"url": url})
if count > max_count:
excess = count - max_count
oldest = self.db.inspections.find(
{"url": url}
).sort("created_at", 1).limit(excess)
ids_to_delete = []
async for doc in oldest:
ids_to_delete.append(doc["_id"])
if ids_to_delete:
await self.db.inspections.delete_many({"_id": {"$in": ids_to_delete}})
logger.info(
"Deleted %d oldest inspections for URL %s",
len(ids_to_delete), url,
)
async def get_inspection(self, inspection_id: str) -> Optional[dict]:
"""Get inspection result by ID (cache-first)."""
from app.core.redis import get_cached_result, cache_result
# Try cache first
cached = await get_cached_result(inspection_id)
if cached:
return cached
# Fetch from MongoDB
doc = await self.db.inspections.find_one(
{"inspection_id": inspection_id},
{"_id": 0},
)
if doc:
await cache_result(inspection_id, doc)
return doc
return None
async def get_issues(
self,
inspection_id: str,
category: Optional[str] = None,
severity: Optional[str] = None,
) -> Optional[dict]:
"""Get filtered issues for an inspection."""
doc = await self.get_inspection(inspection_id)
if not doc:
return None
all_issues = []
categories = doc.get("categories", {})
for cat_name, cat_data in categories.items():
if category and category != "all" and cat_name != category:
continue
for issue in cat_data.get("issues", []):
if severity and severity != "all" and issue.get("severity") != severity:
continue
all_issues.append(issue)
# Sort by severity priority
severity_order = {"critical": 0, "major": 1, "minor": 2, "info": 3}
all_issues.sort(key=lambda x: severity_order.get(x.get("severity", "info"), 4))
return {
"inspection_id": inspection_id,
"total": len(all_issues),
"filters": {
"category": category or "all",
"severity": severity or "all",
},
"issues": all_issues,
}
async def get_inspection_list(
self,
page: int = 1,
limit: int = 20,
url_filter: Optional[str] = None,
sort: str = "-created_at",
) -> dict:
"""Get paginated inspection list."""
limit = min(limit, 100)
skip = (page - 1) * limit
# Build query
query = {}
if url_filter:
query["url"] = {"$regex": url_filter, "$options": "i"}
# Sort direction
if sort.startswith("-"):
sort_field = sort[1:]
sort_dir = -1
else:
sort_field = sort
sort_dir = 1
# Count total
total = await self.db.inspections.count_documents(query)
# Fetch items
cursor = self.db.inspections.find(
query,
{
"_id": 0,
"inspection_id": 1,
"url": 1,
"created_at": 1,
"overall_score": 1,
"grade": 1,
"summary.total_issues": 1,
},
).sort(sort_field, sort_dir).skip(skip).limit(limit)
items = []
async for doc in cursor:
items.append({
"inspection_id": doc.get("inspection_id"),
"url": doc.get("url"),
"created_at": doc.get("created_at"),
"overall_score": doc.get("overall_score", 0),
"grade": doc.get("grade", "F"),
"total_issues": doc.get("summary", {}).get("total_issues", 0),
})
total_pages = max(1, -(-total // limit)) # Ceiling division
return {
"items": items,
"total": total,
"page": page,
"limit": limit,
"total_pages": total_pages,
}
async def get_trend(self, url: str, limit: int = 10) -> dict:
"""Get trend data for a specific URL."""
cursor = self.db.inspections.find(
{"url": url, "status": "completed"},
{
"_id": 0,
"inspection_id": 1,
"created_at": 1,
"overall_score": 1,
"categories.html_css.score": 1,
"categories.accessibility.score": 1,
"categories.seo.score": 1,
"categories.performance_security.score": 1,
},
).sort("created_at", 1).limit(limit)
data_points = []
async for doc in cursor:
cats = doc.get("categories", {})
data_points.append({
"inspection_id": doc.get("inspection_id"),
"created_at": doc.get("created_at"),
"overall_score": doc.get("overall_score", 0),
"html_css": cats.get("html_css", {}).get("score", 0),
"accessibility": cats.get("accessibility", {}).get("score", 0),
"seo": cats.get("seo", {}).get("score", 0),
"performance_security": cats.get("performance_security", {}).get("score", 0),
})
return {
"url": url,
"data_points": data_points,
}