feat: 사이트 전체 검사 기능 추가

도메인 하위 링크를 BFS로 자동 크롤링하여 페이지별 검사 수행.
- BFS 링크 크롤러 (같은 도메인 필터링, max_pages/max_depth 설정)
- 사이트 검사 오케스트레이션 (크롤링→순차 검사→집계)
- SSE 실시간 진행 상태 (크롤링/검사/완료)
- 페이지 트리 + 집계 결과 UI
- UrlInputForm에 "사이트 전체 검사" 버튼 추가

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
jungwoo choi
2026-02-13 16:46:49 +09:00
parent 44ad36e2ab
commit 81b9104aea
21 changed files with 3238 additions and 56 deletions

View File

@ -75,20 +75,162 @@ class InspectionService:
return inspection_id
async def _run_inspection(
self, inspection_id: str, url: str, response: httpx.Response
) -> None:
"""Execute 4 category checks in parallel and store results."""
async def run_inspection_inline(
self,
url: str,
inspection_id: Optional[str] = None,
progress_callback: Optional[object] = None,
) -> tuple[str, dict]:
"""
Run a full inspection synchronously (inline) and return the result.
This is the core inspection logic extracted for reuse by both:
- Single-page inspection (_run_inspection wrapper with SSE/Redis)
- Site-wide inspection (site_inspection_service calling per-page)
Args:
url: Target URL to inspect.
inspection_id: Optional pre-generated ID. If None, a new UUID is generated.
progress_callback: Optional async callback(category, progress, current_step).
If None, progress is not reported.
Returns:
(inspection_id, result_dict) where result_dict is the MongoDB document.
Raises:
Exception: On fetch failure or unrecoverable errors.
"""
settings = get_settings()
if inspection_id is None:
inspection_id = str(uuid.uuid4())
# Fetch URL
response = await self._fetch_url(url, timeout=settings.URL_FETCH_TIMEOUT)
html_content = response.text
headers = dict(response.headers)
start_time = time.time()
created_at = datetime.now(timezone.utc)
# Use provided callback or a no-op
if progress_callback is None:
async def progress_callback(category: str, progress: int, current_step: str):
pass
# Create 4 checker engines
checkers = [
HtmlCssChecker(progress_callback=progress_callback),
AccessibilityChecker(progress_callback=progress_callback),
SeoChecker(progress_callback=progress_callback),
PerformanceSecurityChecker(progress_callback=progress_callback),
]
# Parallel execution with per-category timeout
results = await asyncio.gather(
*[
asyncio.wait_for(
checker.check(url, html_content, headers),
timeout=settings.CATEGORY_TIMEOUT,
)
for checker in checkers
],
return_exceptions=True,
)
# Process results (handle timeouts/errors per category)
categories = {}
category_names = ["html_css", "accessibility", "seo", "performance_security"]
for i, result in enumerate(results):
cat_name = category_names[i]
if isinstance(result, Exception):
logger.error(
"Category %s failed for inspection %s: %s",
cat_name, inspection_id, str(result),
)
categories[cat_name] = CategoryResult(
score=0,
grade="F",
total_issues=0,
issues=[],
)
else:
categories[cat_name] = result
# Calculate overall score
overall_score = calculate_overall_score(categories)
grade = calculate_grade(overall_score)
duration = round(time.time() - start_time, 1)
# Build summary
total_critical = sum(c.critical for c in categories.values())
total_major = sum(c.major for c in categories.values())
total_minor = sum(c.minor for c in categories.values())
total_info = sum(c.info for c in categories.values())
total_issues = sum(c.total_issues for c in categories.values())
summary = IssueSummary(
total_issues=total_issues,
critical=total_critical,
major=total_major,
minor=total_minor,
info=total_info,
)
# Build inspection result
completed_at = datetime.now(timezone.utc)
inspection_result = InspectionResult(
inspection_id=inspection_id,
url=url,
status="completed",
created_at=created_at,
completed_at=completed_at,
duration_seconds=duration,
overall_score=overall_score,
grade=grade,
categories=categories,
summary=summary,
)
# Store in MongoDB
doc = inspection_result.model_dump(mode="json")
await self.db.inspections.insert_one(doc)
# Enforce URL history limit (max 100 per URL)
await self._enforce_history_limit(url, max_count=100)
# Cache in Redis
await cache_result(inspection_id, doc)
logger.info(
"Inspection %s completed (inline): score=%d, duration=%.1fs",
inspection_id, overall_score, duration,
)
return inspection_id, doc
async def _run_inspection(
self, inspection_id: str, url: str, response: httpx.Response
) -> None:
"""
Execute 4 category checks in parallel and store results.
This is the background-task wrapper that adds SSE/Redis progress
tracking on top of run_inspection_inline().
"""
try:
# Progress callback factory
# Progress callback that publishes to Redis + SSE
async def progress_callback(category: str, progress: int, current_step: str):
await self._update_progress(inspection_id, category, progress, current_step)
# Use inline runner (fetches URL internally, so we pass the pre-fetched response data)
# Since run_inspection_inline fetches the URL again, we use the lower-level approach
# to avoid double-fetching. We replicate the core logic with SSE event publishing.
html_content = response.text
headers = dict(response.headers)
start_time = time.time()
created_at = datetime.now(timezone.utc)
# Create 4 checker engines
checkers = [
HtmlCssChecker(progress_callback=progress_callback),
@ -122,14 +264,13 @@ class InspectionService:
"Category %s failed for inspection %s: %s",
cat_name, inspection_id, str(result),
)
# Create error result for failed category
categories[cat_name] = CategoryResult(
score=0,
grade="F",
total_issues=0,
issues=[],
)
# Publish category error
# Publish category error event
await publish_event(inspection_id, {
"event_type": "category_complete",
"inspection_id": inspection_id,
@ -139,7 +280,7 @@ class InspectionService:
})
else:
categories[cat_name] = result
# Publish category completion
# Publish category completion event
await publish_event(inspection_id, {
"event_type": "category_complete",
"inspection_id": inspection_id,