feat: 웹사이트 표준화 검사 도구 구현

- 4개 검사 엔진: HTML/CSS, 접근성(WCAG), SEO, 성능/보안 (총 50개 항목)
- FastAPI 백엔드 (9개 API, SSE 실시간 진행, PDF/JSON 리포트)
- Next.js 15 프론트엔드 (6개 페이지, 29개 컴포넌트, 반원 게이지 차트)
- Docker Compose 배포 (Backend:8011, Frontend:3011, MongoDB:27022, Redis:6392)
- 전체 테스트 32/32 PASS

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
jungwoo choi
2026-02-13 13:57:27 +09:00
parent c37cda5b13
commit b5fa5d96b9
93 changed files with 18735 additions and 22 deletions

View File

View File

@ -0,0 +1,422 @@
"""
Accessibility (WCAG 2.1 AA) Checker Engine (F-003).
Uses Playwright + axe-core for comprehensive accessibility testing.
Falls back to BeautifulSoup-based checks if Playwright is unavailable.
"""
import json
import logging
import os
from pathlib import Path
from typing import Optional
from bs4 import BeautifulSoup
from app.engines.base import BaseChecker
from app.models.schemas import CategoryResult, Issue
logger = logging.getLogger(__name__)
# axe-core JS file path
AXE_CORE_JS_PATH = Path(__file__).parent / "axe_core" / "axe.min.js"
# Korean message mapping for axe-core rules
AXE_RULE_MESSAGES = {
"image-alt": ("A-01", "이미지에 대체 텍스트(alt)가 없습니다", "1.1.1"),
"color-contrast": ("A-02", "텍스트와 배경의 색상 대비가 부족합니다", "1.4.3"),
"keyboard": ("A-03", "키보드로 접근할 수 없는 요소가 있습니다", "2.1.1"),
"focus-visible": ("A-04", "키보드 포커스가 시각적으로 표시되지 않습니다", "2.4.7"),
"label": ("A-05", "폼 요소에 레이블이 연결되지 않았습니다", "1.3.1"),
"input-label": ("A-05", "입력 요소에 레이블이 없습니다", "1.3.1"),
"aria-valid-attr": ("A-06", "유효하지 않은 ARIA 속성이 사용되었습니다", "4.1.2"),
"aria-roles": ("A-06", "유효하지 않은 ARIA 역할이 사용되었습니다", "4.1.2"),
"aria-required-attr": ("A-06", "필수 ARIA 속성이 누락되었습니다", "4.1.2"),
"aria-valid-attr-value": ("A-06", "ARIA 속성 값이 올바르지 않습니다", "4.1.2"),
"link-name": ("A-07", "링크 텍스트가 목적을 설명하지 않습니다", "2.4.4"),
"html-has-lang": ("A-08", "HTML 요소에 lang 속성이 없습니다", "3.1.1"),
"html-lang-valid": ("A-08", "HTML lang 속성 값이 올바르지 않습니다", "3.1.1"),
"bypass": ("A-09", "건너뛰기 링크(skip navigation)가 없습니다", "2.4.1"),
"no-autoplay-audio": ("A-10", "자동 재생 미디어에 정지/음소거 컨트롤이 없습니다", "1.4.2"),
"audio-caption": ("A-10", "오디오/비디오에 자막이 없습니다", "1.2.2"),
"video-caption": ("A-10", "비디오에 자막이 없습니다", "1.2.2"),
}
# axe-core impact to severity mapping
IMPACT_TO_SEVERITY = {
"critical": "critical",
"serious": "major",
"moderate": "minor",
"minor": "info",
}
class AccessibilityChecker(BaseChecker):
"""Accessibility (WCAG 2.1 AA) checker engine."""
@property
def category_name(self) -> str:
return "accessibility"
async def check(self, url: str, html_content: str, headers: dict) -> CategoryResult:
"""
Primary: Playwright + axe-core.
Fallback: BeautifulSoup-based basic checks.
"""
try:
return await self._check_with_playwright(url)
except Exception as e:
logger.warning(
"Playwright accessibility check failed, falling back to basic checks: %s",
str(e),
)
return await self._check_with_beautifulsoup(url, html_content)
async def _check_with_playwright(self, url: str) -> CategoryResult:
"""Run axe-core via Playwright headless browser."""
from playwright.async_api import async_playwright
await self.update_progress(10, "브라우저 시작 중...")
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
try:
page = await browser.new_page()
await self.update_progress(20, "페이지 로드 중...")
await page.goto(url, wait_until="networkidle", timeout=30000)
await self.update_progress(40, "axe-core 주입 중...")
# Load axe-core JS
if AXE_CORE_JS_PATH.exists() and AXE_CORE_JS_PATH.stat().st_size > 1000:
axe_js = AXE_CORE_JS_PATH.read_text(encoding="utf-8")
await page.evaluate(axe_js)
else:
# Fallback: load from CDN
await page.evaluate("""
async () => {
const script = document.createElement('script');
script.src = 'https://cdnjs.cloudflare.com/ajax/libs/axe-core/4.10.2/axe.min.js';
document.head.appendChild(script);
await new Promise((resolve, reject) => {
script.onload = resolve;
script.onerror = reject;
});
}
""")
await self.update_progress(60, "접근성 검사 실행 중...")
axe_results = await page.evaluate("""
() => {
return new Promise((resolve, reject) => {
if (typeof axe === 'undefined') {
reject(new Error('axe-core not loaded'));
return;
}
axe.run(document, {
runOnly: {
type: 'tag',
values: ['wcag2a', 'wcag2aa', 'best-practice']
}
}).then(resolve).catch(reject);
});
}
""")
await self.update_progress(80, "결과 분석 중...")
issues = self._parse_axe_results(axe_results)
score = self._calculate_axe_score(axe_results)
finally:
await browser.close()
await self.update_progress(100, "완료")
return self._build_result(
category="accessibility",
score=score,
issues=issues,
wcag_level="AA",
)
async def _check_with_beautifulsoup(self, url: str, html_content: str) -> CategoryResult:
"""Fallback: basic accessibility checks using BeautifulSoup."""
soup = BeautifulSoup(html_content, "html5lib")
issues: list[Issue] = []
await self.update_progress(20, "이미지 대체 텍스트 검사 중...")
issues += self._bs_check_img_alt(soup)
await self.update_progress(35, "폼 레이블 검사 중...")
issues += self._bs_check_form_labels(soup)
await self.update_progress(50, "ARIA 속성 검사 중...")
issues += self._bs_check_aria(soup)
await self.update_progress(60, "링크 텍스트 검사 중...")
issues += self._bs_check_link_text(soup)
await self.update_progress(70, "언어 속성 검사 중...")
issues += self._bs_check_lang(soup)
await self.update_progress(80, "건너뛰기 링크 검사 중...")
issues += self._bs_check_skip_nav(soup)
await self.update_progress(90, "자동 재생 검사 중...")
issues += self._bs_check_autoplay(soup)
score = self._calculate_score_by_deduction(issues)
await self.update_progress(100, "완료")
return self._build_result(
category="accessibility",
score=score,
issues=issues,
wcag_level="AA",
)
def _parse_axe_results(self, axe_results: dict) -> list[Issue]:
"""Convert axe-core violations to Issue list with Korean messages."""
issues = []
for violation in axe_results.get("violations", []):
rule_id = violation.get("id", "")
impact = violation.get("impact", "minor")
severity = IMPACT_TO_SEVERITY.get(impact, "info")
# Map to our issue codes
if rule_id in AXE_RULE_MESSAGES:
code, korean_msg, wcag = AXE_RULE_MESSAGES[rule_id]
else:
code = "A-06"
korean_msg = violation.get("description", "접근성 위반 사항이 발견되었습니다")
wcag = "4.1.2"
# Get affected elements
nodes = violation.get("nodes", [])
element = None
if nodes:
html_snippet = nodes[0].get("html", "")
if html_snippet:
element = html_snippet[:200]
# Additional context for color contrast
detail = ""
if rule_id == "color-contrast" and nodes:
data = nodes[0].get("any", [{}])
if data and isinstance(data, list) and len(data) > 0:
msg_data = data[0].get("data", {})
if isinstance(msg_data, dict):
fg = msg_data.get("fgColor", "")
bg = msg_data.get("bgColor", "")
ratio = msg_data.get("contrastRatio", "")
if ratio:
detail = f" (대비율: {ratio}:1, 최소 4.5:1 필요)"
# Create the issue with node count info
node_count = len(nodes)
count_info = f" ({node_count}개 요소)" if node_count > 1 else ""
issues.append(self._create_issue(
code=code,
severity=severity,
message=f"{korean_msg}{detail}{count_info}",
element=element,
suggestion=violation.get("helpUrl", "해당 WCAG 기준을 확인하고 수정하세요"),
wcag_criterion=wcag,
))
return issues
def _calculate_axe_score(self, axe_results: dict) -> int:
"""
Calculate score based on axe-core violations.
critical=-20, serious=-10, moderate=-5, minor=-2
"""
severity_weights = {
"critical": 20,
"serious": 10,
"moderate": 5,
"minor": 2,
}
deduction = 0
for violation in axe_results.get("violations", []):
impact = violation.get("impact", "minor")
deduction += severity_weights.get(impact, 2)
return max(0, 100 - deduction)
# --- BeautifulSoup fallback checks ---
def _bs_check_img_alt(self, soup: BeautifulSoup) -> list[Issue]:
"""A-01: Check images for alt text."""
issues = []
images = soup.find_all("img")
missing = [img for img in images if not img.get("alt") and img.get("alt") != ""]
if missing:
issues.append(self._create_issue(
code="A-01",
severity="critical",
message=f"alt 속성이 없는 이미지가 {len(missing)}개 발견되었습니다",
element=str(missing[0])[:200] if missing else None,
suggestion="모든 이미지에 설명적인 대체 텍스트를 추가하세요",
wcag_criterion="1.1.1",
))
return issues
def _bs_check_form_labels(self, soup: BeautifulSoup) -> list[Issue]:
"""A-05: Check form elements for associated labels."""
issues = []
inputs = soup.find_all(["input", "select", "textarea"])
unlabeled = []
for inp in inputs:
input_type = inp.get("type", "text")
if input_type in ("hidden", "submit", "button", "reset", "image"):
continue
inp_id = inp.get("id")
has_label = False
if inp_id:
label = soup.find("label", attrs={"for": inp_id})
if label:
has_label = True
if inp.get("aria-label") or inp.get("aria-labelledby") or inp.get("title"):
has_label = True
# Check if wrapped in label
parent_label = inp.find_parent("label")
if parent_label:
has_label = True
if not has_label:
unlabeled.append(inp)
if unlabeled:
issues.append(self._create_issue(
code="A-05",
severity="critical",
message=f"레이블이 연결되지 않은 폼 요소가 {len(unlabeled)}개 발견되었습니다",
element=str(unlabeled[0])[:200] if unlabeled else None,
suggestion="<label for='id'>를 사용하거나 aria-label 속성을 추가하세요",
wcag_criterion="1.3.1",
))
return issues
def _bs_check_aria(self, soup: BeautifulSoup) -> list[Issue]:
"""A-06: Basic ARIA attribute validation."""
issues = []
valid_roles = {
"alert", "alertdialog", "application", "article", "banner", "button",
"cell", "checkbox", "columnheader", "combobox", "complementary",
"contentinfo", "definition", "dialog", "directory", "document",
"feed", "figure", "form", "grid", "gridcell", "group", "heading",
"img", "link", "list", "listbox", "listitem", "log", "main",
"marquee", "math", "menu", "menubar", "menuitem", "menuitemcheckbox",
"menuitemradio", "navigation", "none", "note", "option", "presentation",
"progressbar", "radio", "radiogroup", "region", "row", "rowgroup",
"rowheader", "scrollbar", "search", "searchbox", "separator",
"slider", "spinbutton", "status", "switch", "tab", "table",
"tablist", "tabpanel", "term", "textbox", "timer", "toolbar",
"tooltip", "tree", "treegrid", "treeitem",
}
elements_with_role = soup.find_all(attrs={"role": True})
invalid_roles = []
for el in elements_with_role:
role = el.get("role", "").strip().lower()
if role and role not in valid_roles:
invalid_roles.append(el)
if invalid_roles:
issues.append(self._create_issue(
code="A-06",
severity="major",
message=f"유효하지 않은 ARIA 역할이 {len(invalid_roles)}개 발견되었습니다",
element=str(invalid_roles[0])[:200] if invalid_roles else None,
suggestion="올바른 ARIA 역할을 사용하세요 (WAI-ARIA 명세 참조)",
wcag_criterion="4.1.2",
))
return issues
def _bs_check_link_text(self, soup: BeautifulSoup) -> list[Issue]:
"""A-07: Check link text clarity."""
issues = []
vague_texts = {"click here", "here", "more", "read more", "link", "여기", "더보기", "클릭"}
links = soup.find_all("a")
vague_links = []
for link in links:
text = link.get_text(strip=True).lower()
if text in vague_texts:
vague_links.append(link)
if vague_links:
issues.append(self._create_issue(
code="A-07",
severity="minor",
message=f"목적이 불분명한 링크 텍스트가 {len(vague_links)}개 발견되었습니다",
element=str(vague_links[0])[:200] if vague_links else None,
suggestion="'여기를 클릭하세요' 대신 구체적인 링크 목적을 설명하는 텍스트를 사용하세요",
wcag_criterion="2.4.4",
))
return issues
def _bs_check_lang(self, soup: BeautifulSoup) -> list[Issue]:
"""A-08: Check page language attribute."""
html_tag = soup.find("html")
if html_tag is None or not html_tag.get("lang"):
return [self._create_issue(
code="A-08",
severity="major",
message="HTML 요소에 lang 속성이 없습니다",
suggestion='<html lang="ko">와 같이 페이지 언어를 명시하세요',
wcag_criterion="3.1.1",
)]
return []
def _bs_check_skip_nav(self, soup: BeautifulSoup) -> list[Issue]:
"""A-09: Check for skip navigation link."""
# Look for skip nav patterns
skip_links = soup.find_all("a", href=True)
has_skip = False
for link in skip_links[:10]: # Check first 10 links
href = link.get("href", "")
text = link.get_text(strip=True).lower()
if href.startswith("#") and any(
keyword in text
for keyword in ["skip", "본문", "건너뛰기", "main", "content"]
):
has_skip = True
break
if not has_skip:
return [self._create_issue(
code="A-09",
severity="minor",
message="건너뛰기 링크(skip navigation)가 없습니다",
suggestion='페이지 상단에 <a href="#main-content">본문으로 건너뛰기</a> 링크를 추가하세요',
wcag_criterion="2.4.1",
)]
return []
def _bs_check_autoplay(self, soup: BeautifulSoup) -> list[Issue]:
"""A-10: Check for autoplay media without controls."""
issues = []
media = soup.find_all(["video", "audio"])
for el in media:
if el.get("autoplay") is not None:
has_controls = el.get("controls") is not None or el.get("muted") is not None
if not has_controls:
issues.append(self._create_issue(
code="A-10",
severity="major",
message="자동 재생 미디어에 정지/음소거 컨트롤이 없습니다",
element=str(el)[:200],
suggestion="autoplay 미디어에 controls 속성을 추가하거나 muted 속성을 사용하세요",
wcag_criterion="1.4.2",
))
break # Report only first
return issues

12
backend/app/engines/axe_core/axe.min.js vendored Normal file

File diff suppressed because one or more lines are too long

108
backend/app/engines/base.py Normal file
View File

@ -0,0 +1,108 @@
"""
BaseChecker abstract class - foundation for all inspection engines.
"""
from abc import ABC, abstractmethod
from typing import Callable, Optional
from app.models.schemas import CategoryResult, Issue, Severity, calculate_grade
class BaseChecker(ABC):
"""
Abstract base class for all inspection engines.
Provides progress callback mechanism and common utility methods.
"""
def __init__(self, progress_callback: Optional[Callable] = None):
self.progress_callback = progress_callback
async def update_progress(self, progress: int, current_step: str) -> None:
"""Update progress via Redis callback."""
if self.progress_callback:
await self.progress_callback(
category=self.category_name,
progress=progress,
current_step=current_step,
)
@property
@abstractmethod
def category_name(self) -> str:
"""Category identifier (e.g., 'html_css')."""
pass
@abstractmethod
async def check(self, url: str, html_content: str, headers: dict) -> CategoryResult:
"""Execute inspection and return results."""
pass
def _create_issue(
self,
code: str,
severity: str,
message: str,
suggestion: str,
element: Optional[str] = None,
line: Optional[int] = None,
wcag_criterion: Optional[str] = None,
) -> Issue:
"""Helper to create a standardized Issue object."""
return Issue(
code=code,
category=self.category_name,
severity=Severity(severity),
message=message,
element=element,
line=line,
suggestion=suggestion,
wcag_criterion=wcag_criterion,
)
def _calculate_score_by_deduction(self, issues: list[Issue]) -> int:
"""
Calculate score by deduction:
score = 100 - (Critical*15 + Major*8 + Minor*3 + Info*1)
Minimum 0, Maximum 100
"""
severity_weights = {
"critical": 15,
"major": 8,
"minor": 3,
"info": 1,
}
deduction = sum(
severity_weights.get(issue.severity.value, 0) for issue in issues
)
return max(0, 100 - deduction)
def _build_result(
self,
category: str,
score: int,
issues: list[Issue],
wcag_level: Optional[str] = None,
meta_info: Optional[dict] = None,
sub_scores: Optional[dict] = None,
metrics: Optional[dict] = None,
) -> CategoryResult:
"""Build a CategoryResult with computed severity counts."""
critical = sum(1 for i in issues if i.severity == Severity.CRITICAL)
major = sum(1 for i in issues if i.severity == Severity.MAJOR)
minor = sum(1 for i in issues if i.severity == Severity.MINOR)
info = sum(1 for i in issues if i.severity == Severity.INFO)
return CategoryResult(
score=score,
grade=calculate_grade(score),
total_issues=len(issues),
critical=critical,
major=major,
minor=minor,
info=info,
issues=issues,
wcag_level=wcag_level,
meta_info=meta_info,
sub_scores=sub_scores,
metrics=metrics,
)

View File

@ -0,0 +1,308 @@
"""
HTML/CSS Standards Checker Engine (F-002).
Checks HTML5 validity, semantic tags, CSS inline usage, etc.
Uses BeautifulSoup4 + html5lib for parsing.
"""
import re
import logging
from collections import Counter
from typing import Optional
from bs4 import BeautifulSoup
from app.engines.base import BaseChecker
from app.models.schemas import CategoryResult, Issue
logger = logging.getLogger(__name__)
DEPRECATED_TAGS = [
"font", "center", "marquee", "blink", "strike", "big", "tt",
"basefont", "applet", "dir", "isindex",
]
SEMANTIC_TAGS = ["header", "nav", "main", "footer", "section", "article"]
class HtmlCssChecker(BaseChecker):
"""HTML/CSS standards checker engine."""
@property
def category_name(self) -> str:
return "html_css"
async def check(self, url: str, html_content: str, headers: dict) -> CategoryResult:
soup = BeautifulSoup(html_content, "html5lib")
issues: list[Issue] = []
await self.update_progress(10, "DOCTYPE 검사 중...")
issues += self._check_doctype(html_content)
await self.update_progress(20, "문자 인코딩 검사 중...")
issues += self._check_charset(soup)
await self.update_progress(30, "언어 속성 검사 중...")
issues += self._check_lang(soup)
await self.update_progress(40, "title 태그 검사 중...")
issues += self._check_title(soup)
await self.update_progress(50, "시맨틱 태그 검사 중...")
issues += self._check_semantic_tags(soup)
await self.update_progress(60, "이미지 alt 속성 검사 중...")
issues += self._check_img_alt(soup)
await self.update_progress(70, "중복 ID 검사 중...")
issues += self._check_duplicate_ids(soup)
await self.update_progress(80, "링크 및 스타일 검사 중...")
issues += self._check_empty_links(soup)
issues += self._check_inline_styles(soup)
issues += self._check_deprecated_tags(soup)
await self.update_progress(90, "heading 구조 검사 중...")
issues += self._check_heading_hierarchy(soup)
issues += self._check_viewport_meta(soup)
score = self._calculate_score_by_deduction(issues)
await self.update_progress(100, "완료")
return self._build_result(
category="html_css",
score=score,
issues=issues,
)
def _check_doctype(self, html_content: str) -> list[Issue]:
"""H-01: Check for <!DOCTYPE html> declaration."""
stripped = html_content.lstrip()
if not stripped.lower().startswith("<!doctype html"):
return [self._create_issue(
code="H-01",
severity="major",
message="DOCTYPE 선언이 없습니다",
suggestion="문서 최상단에 <!DOCTYPE html>을 추가하세요",
)]
return []
def _check_charset(self, soup: BeautifulSoup) -> list[Issue]:
"""H-02: Check for <meta charset='utf-8'>."""
meta_charset = soup.find("meta", attrs={"charset": True})
meta_content_type = soup.find("meta", attrs={"http-equiv": re.compile(r"content-type", re.I)})
if meta_charset is None and meta_content_type is None:
return [self._create_issue(
code="H-02",
severity="major",
message="문자 인코딩(charset) 선언이 없습니다",
suggestion='<meta charset="utf-8">을 <head> 태그 안에 추가하세요',
)]
return []
def _check_lang(self, soup: BeautifulSoup) -> list[Issue]:
"""H-03: Check for <html lang='...'> attribute."""
html_tag = soup.find("html")
if html_tag is None or not html_tag.get("lang"):
return [self._create_issue(
code="H-03",
severity="minor",
message="HTML 언어 속성(lang)이 설정되지 않았습니다",
suggestion='<html lang="ko"> 또는 해당 언어 코드를 추가하세요',
)]
return []
def _check_title(self, soup: BeautifulSoup) -> list[Issue]:
"""H-04: Check for <title> tag existence and content."""
title = soup.find("title")
if title is None:
return [self._create_issue(
code="H-04",
severity="major",
message="<title> 태그가 없습니다",
suggestion="<head> 안에 <title> 태그를 추가하세요",
)]
if title.string is None or title.string.strip() == "":
return [self._create_issue(
code="H-04",
severity="major",
message="<title> 태그가 비어있습니다",
element=str(title),
suggestion="<title> 태그에 페이지 제목을 입력하세요",
)]
return []
def _check_semantic_tags(self, soup: BeautifulSoup) -> list[Issue]:
"""H-05: Check for semantic HTML5 tag usage."""
found_tags = set()
for tag_name in SEMANTIC_TAGS:
if soup.find(tag_name):
found_tags.add(tag_name)
if not found_tags:
return [self._create_issue(
code="H-05",
severity="minor",
message="시맨틱 태그가 사용되지 않았습니다 (header, nav, main, footer, section, article)",
suggestion="적절한 시맨틱 태그를 사용하여 문서 구조를 명확히 하세요",
)]
missing = set(SEMANTIC_TAGS) - found_tags
# Only report if major structural elements are missing (main is most important)
if "main" in missing:
return [self._create_issue(
code="H-05",
severity="minor",
message=f"주요 시맨틱 태그가 누락되었습니다: {', '.join(sorted(missing))}",
suggestion="<main> 태그를 사용하여 주요 콘텐츠 영역을 표시하세요",
)]
return []
def _check_img_alt(self, soup: BeautifulSoup) -> list[Issue]:
"""H-06: Check all <img> tags have alt attributes."""
issues = []
images = soup.find_all("img")
for img in images:
if not img.get("alt") and img.get("alt") != "":
line = self._get_line_number(img)
issues.append(self._create_issue(
code="H-06",
severity="major",
message="이미지에 alt 속성이 없습니다",
element=self._truncate_element(str(img)),
line=line,
suggestion="이미지에 설명을 위한 alt 속성을 추가하세요",
))
return issues
def _check_duplicate_ids(self, soup: BeautifulSoup) -> list[Issue]:
"""H-07: Check for duplicate ID attributes."""
issues = []
id_elements = soup.find_all(id=True)
id_counter = Counter(el.get("id") for el in id_elements)
for id_val, count in id_counter.items():
if count > 1:
elements = [el for el in id_elements if el.get("id") == id_val]
first_el = elements[0] if elements else None
line = self._get_line_number(first_el) if first_el else None
issues.append(self._create_issue(
code="H-07",
severity="critical",
message=f"중복 ID 발견: '{id_val}' ({count}회 사용)",
element=self._truncate_element(str(first_el)) if first_el else None,
line=line,
suggestion="각 요소에 고유한 ID를 부여하세요",
))
return issues
def _check_empty_links(self, soup: BeautifulSoup) -> list[Issue]:
"""H-08: Check for empty or '#' href links."""
issues = []
links = soup.find_all("a")
empty_count = 0
first_element = None
first_line = None
for link in links:
href = link.get("href", "")
if href == "" or href == "#":
empty_count += 1
if first_element is None:
first_element = self._truncate_element(str(link))
first_line = self._get_line_number(link)
if empty_count > 0:
issues.append(self._create_issue(
code="H-08",
severity="minor",
message=f"빈 링크(href가 비어있거나 '#')가 {empty_count}개 발견되었습니다",
element=first_element,
line=first_line,
suggestion="링크에 유효한 URL을 설정하거나, 버튼이 필요한 경우 <button>을 사용하세요",
))
return issues
def _check_inline_styles(self, soup: BeautifulSoup) -> list[Issue]:
"""H-09: Check for inline style attributes."""
issues = []
styled_elements = soup.find_all(style=True)
if styled_elements:
first_el = styled_elements[0]
issues.append(self._create_issue(
code="H-09",
severity="info",
message=f"인라인 스타일이 {len(styled_elements)}개 요소에서 사용되고 있습니다",
element=self._truncate_element(str(first_el)),
line=self._get_line_number(first_el),
suggestion="인라인 스타일 대신 외부 CSS 파일 또는 <style> 태그를 사용하세요",
))
return issues
def _check_deprecated_tags(self, soup: BeautifulSoup) -> list[Issue]:
"""H-10: Check for deprecated HTML tags."""
issues = []
for tag_name in DEPRECATED_TAGS:
found = soup.find_all(tag_name)
if found:
first_el = found[0]
issues.append(self._create_issue(
code="H-10",
severity="major",
message=f"사용 중단된(deprecated) 태그 <{tag_name}>이(가) {len(found)}회 사용되었습니다",
element=self._truncate_element(str(first_el)),
line=self._get_line_number(first_el),
suggestion=f"<{tag_name}> 대신 CSS를 사용하여 스타일을 적용하세요",
))
return issues
def _check_heading_hierarchy(self, soup: BeautifulSoup) -> list[Issue]:
"""H-11: Check heading hierarchy (h1-h6 should not skip levels)."""
issues = []
headings = soup.find_all(re.compile(r"^h[1-6]$"))
if not headings:
return []
prev_level = 0
for heading in headings:
level = int(heading.name[1])
if prev_level > 0 and level > prev_level + 1:
issues.append(self._create_issue(
code="H-11",
severity="minor",
message=f"heading 계층 구조가 건너뛰어졌습니다: h{prev_level} 다음에 h{level}",
element=self._truncate_element(str(heading)),
line=self._get_line_number(heading),
suggestion=f"h{prev_level} 다음에는 h{prev_level + 1}을 사용하세요",
))
break # Only report first skip
prev_level = level
return issues
def _check_viewport_meta(self, soup: BeautifulSoup) -> list[Issue]:
"""H-12: Check for viewport meta tag."""
viewport = soup.find("meta", attrs={"name": re.compile(r"viewport", re.I)})
if viewport is None:
return [self._create_issue(
code="H-12",
severity="major",
message="viewport meta 태그가 없습니다",
suggestion='<meta name="viewport" content="width=device-width, initial-scale=1.0">을 추가하세요',
)]
return []
@staticmethod
def _get_line_number(element) -> Optional[int]:
"""Extract source line number from a BeautifulSoup element."""
if element and hasattr(element, "sourceline"):
return element.sourceline
return None
@staticmethod
def _truncate_element(element_str: str, max_len: int = 200) -> str:
"""Truncate element string for display."""
if len(element_str) > max_len:
return element_str[:max_len] + "..."
return element_str

View File

@ -0,0 +1,454 @@
"""
Performance/Security Checker Engine (F-005).
Checks security headers, HTTPS, SSL certificate, response time, page size, etc.
"""
import re
import ssl
import socket
import logging
import time
from datetime import datetime, timezone
from urllib.parse import urlparse
from typing import Optional
import httpx
from bs4 import BeautifulSoup
from app.engines.base import BaseChecker
from app.models.schemas import CategoryResult, Issue, calculate_grade
logger = logging.getLogger(__name__)
class PerformanceSecurityChecker(BaseChecker):
"""Performance and security checker engine."""
@property
def category_name(self) -> str:
return "performance_security"
async def check(self, url: str, html_content: str, headers: dict) -> CategoryResult:
issues: list[Issue] = []
metrics: dict = {}
await self.update_progress(10, "HTTPS 검사 중...")
issues += self._check_https(url, metrics)
await self.update_progress(20, "SSL 인증서 검사 중...")
issues += await self._check_ssl(url, metrics)
await self.update_progress(35, "보안 헤더 검사 중...")
issues += self._check_hsts(headers)
issues += self._check_csp(headers)
issues += self._check_x_content_type(headers)
issues += self._check_x_frame_options(headers)
issues += self._check_x_xss_protection(headers)
issues += self._check_referrer_policy(headers)
issues += self._check_permissions_policy(headers)
await self.update_progress(60, "응답 시간 측정 중...")
issues += await self._check_ttfb(url, metrics)
await self.update_progress(70, "페이지 크기 분석 중...")
issues += self._check_page_size(html_content, metrics)
await self.update_progress(80, "리다이렉트 검사 중...")
issues += await self._check_redirects(url, metrics)
await self.update_progress(85, "압축 검사 중...")
issues += self._check_compression(headers, metrics)
await self.update_progress(90, "혼합 콘텐츠 검사 중...")
issues += self._check_mixed_content(url, html_content)
score, sub_scores = self._calculate_composite_score(issues, metrics)
await self.update_progress(100, "완료")
return self._build_result(
category="performance_security",
score=score,
issues=issues,
sub_scores=sub_scores,
metrics=metrics,
)
def _check_https(self, url: str, metrics: dict) -> list[Issue]:
"""P-01: Check HTTPS usage."""
parsed = urlparse(url)
is_https = parsed.scheme == "https"
metrics["https"] = is_https
if not is_https:
return [self._create_issue(
code="P-01",
severity="critical",
message="HTTPS를 사용하지 않고 있습니다",
suggestion="사이트 보안을 위해 HTTPS를 적용하세요",
)]
return []
async def _check_ssl(self, url: str, metrics: dict) -> list[Issue]:
"""P-02: Check SSL certificate validity and expiry."""
parsed = urlparse(url)
if parsed.scheme != "https":
metrics["ssl_valid"] = False
metrics["ssl_expiry_days"] = None
return [self._create_issue(
code="P-02",
severity="critical",
message="HTTPS를 사용하지 않아 SSL 인증서를 확인할 수 없습니다",
suggestion="SSL 인증서를 설치하고 HTTPS를 적용하세요",
)]
hostname = parsed.hostname
port = parsed.port or 443
try:
ctx = ssl.create_default_context()
conn = ctx.wrap_socket(
socket.socket(socket.AF_INET),
server_hostname=hostname,
)
conn.settimeout(5)
conn.connect((hostname, port))
cert = conn.getpeercert()
conn.close()
# Check expiry
not_after = cert.get("notAfter")
if not_after:
expiry_date = datetime.strptime(not_after, "%b %d %H:%M:%S %Y %Z")
days_remaining = (expiry_date - datetime.now()).days
metrics["ssl_valid"] = True
metrics["ssl_expiry_days"] = days_remaining
if days_remaining < 0:
return [self._create_issue(
code="P-02",
severity="critical",
message="SSL 인증서가 만료되었습니다",
suggestion="SSL 인증서를 즉시 갱신하세요",
)]
elif days_remaining < 30:
return [self._create_issue(
code="P-02",
severity="major",
message=f"SSL 인증서가 {days_remaining}일 후 만료됩니다",
suggestion="인증서 만료 전에 갱신하세요",
)]
else:
metrics["ssl_valid"] = True
metrics["ssl_expiry_days"] = None
except ssl.SSLError as e:
metrics["ssl_valid"] = False
metrics["ssl_expiry_days"] = None
return [self._create_issue(
code="P-02",
severity="critical",
message=f"SSL 인증서가 유효하지 않습니다: {str(e)[:100]}",
suggestion="유효한 SSL 인증서를 설치하세요",
)]
except Exception as e:
logger.warning("SSL check failed for %s: %s", url, str(e))
metrics["ssl_valid"] = None
metrics["ssl_expiry_days"] = None
return [self._create_issue(
code="P-02",
severity="minor",
message="SSL 인증서를 확인할 수 없습니다",
suggestion="서버의 SSL 설정을 점검하세요",
)]
return []
def _check_hsts(self, headers: dict) -> list[Issue]:
"""P-03: Check Strict-Transport-Security header."""
hsts = self._get_header(headers, "Strict-Transport-Security")
if not hsts:
return [self._create_issue(
code="P-03",
severity="major",
message="Strict-Transport-Security(HSTS) 헤더가 설정되지 않았습니다",
suggestion="HSTS 헤더를 추가하세요: Strict-Transport-Security: max-age=31536000; includeSubDomains",
)]
return []
def _check_csp(self, headers: dict) -> list[Issue]:
"""P-04: Check Content-Security-Policy header."""
csp = self._get_header(headers, "Content-Security-Policy")
if not csp:
return [self._create_issue(
code="P-04",
severity="major",
message="Content-Security-Policy(CSP) 헤더가 설정되지 않았습니다",
suggestion="CSP 헤더를 추가하여 XSS 공격을 방지하세요",
)]
return []
def _check_x_content_type(self, headers: dict) -> list[Issue]:
"""P-05: Check X-Content-Type-Options header."""
xcto = self._get_header(headers, "X-Content-Type-Options")
if not xcto or "nosniff" not in xcto.lower():
return [self._create_issue(
code="P-05",
severity="minor",
message="X-Content-Type-Options 헤더가 설정되지 않았습니다",
suggestion="X-Content-Type-Options: nosniff 헤더를 추가하세요",
)]
return []
def _check_x_frame_options(self, headers: dict) -> list[Issue]:
"""P-06: Check X-Frame-Options header."""
xfo = self._get_header(headers, "X-Frame-Options")
if not xfo:
return [self._create_issue(
code="P-06",
severity="minor",
message="X-Frame-Options 헤더가 설정되지 않았습니다",
suggestion="클릭재킹 방지를 위해 X-Frame-Options: DENY 또는 SAMEORIGIN을 설정하세요",
)]
return []
def _check_x_xss_protection(self, headers: dict) -> list[Issue]:
"""P-07: Check X-XSS-Protection header (deprecated notice)."""
xxp = self._get_header(headers, "X-XSS-Protection")
if xxp:
return [self._create_issue(
code="P-07",
severity="info",
message="X-XSS-Protection 헤더가 설정되어 있습니다 (현재 deprecated)",
suggestion="X-XSS-Protection 대신 Content-Security-Policy를 사용하세요",
)]
return []
def _check_referrer_policy(self, headers: dict) -> list[Issue]:
"""P-08: Check Referrer-Policy header."""
rp = self._get_header(headers, "Referrer-Policy")
if not rp:
return [self._create_issue(
code="P-08",
severity="minor",
message="Referrer-Policy 헤더가 설정되지 않았습니다",
suggestion="Referrer-Policy: strict-origin-when-cross-origin을 설정하세요",
)]
return []
def _check_permissions_policy(self, headers: dict) -> list[Issue]:
"""P-09: Check Permissions-Policy header."""
pp = self._get_header(headers, "Permissions-Policy")
if not pp:
return [self._create_issue(
code="P-09",
severity="minor",
message="Permissions-Policy 헤더가 설정되지 않았습니다",
suggestion="Permissions-Policy 헤더를 추가하여 브라우저 기능 접근을 제한하세요",
)]
return []
async def _check_ttfb(self, url: str, metrics: dict) -> list[Issue]:
"""P-10: Check Time To First Byte (TTFB)."""
try:
start = time.monotonic()
async with httpx.AsyncClient(
timeout=httpx.Timeout(10.0),
follow_redirects=True,
verify=False,
) as client:
resp = await client.get(url, headers={
"User-Agent": "WebInspector/1.0 (Inspection Bot)",
})
ttfb_ms = round((time.monotonic() - start) * 1000)
metrics["ttfb_ms"] = ttfb_ms
if ttfb_ms > 2000:
return [self._create_issue(
code="P-10",
severity="major",
message=f"응답 시간(TTFB)이 느립니다: {ttfb_ms}ms (권장 < 1000ms)",
suggestion="서버 응답 속도를 개선하세요 (캐싱, CDN, 서버 최적화)",
)]
elif ttfb_ms > 1000:
return [self._create_issue(
code="P-10",
severity="minor",
message=f"응답 시간(TTFB)이 다소 느립니다: {ttfb_ms}ms (권장 < 1000ms)",
suggestion="서버 응답 속도 개선을 고려하세요",
)]
except Exception as e:
logger.warning("TTFB check failed for %s: %s", url, str(e))
metrics["ttfb_ms"] = None
return [self._create_issue(
code="P-10",
severity="major",
message="응답 시간(TTFB)을 측정할 수 없습니다",
suggestion="서버 접근성을 확인하세요",
)]
return []
def _check_page_size(self, html_content: str, metrics: dict) -> list[Issue]:
"""P-11: Check HTML page size."""
size_bytes = len(html_content.encode("utf-8"))
metrics["page_size_bytes"] = size_bytes
if size_bytes > 3 * 1024 * 1024: # 3MB
return [self._create_issue(
code="P-11",
severity="minor",
message=f"페이지 크기가 큽니다: {round(size_bytes / 1024 / 1024, 1)}MB (권장 < 3MB)",
suggestion="페이지 크기를 줄이세요 (불필요한 코드 제거, 이미지 최적화, 코드 분할)",
)]
return []
async def _check_redirects(self, url: str, metrics: dict) -> list[Issue]:
"""P-12: Check redirect chain length."""
try:
async with httpx.AsyncClient(
timeout=httpx.Timeout(10.0),
follow_redirects=True,
verify=False,
) as client:
resp = await client.get(url, headers={
"User-Agent": "WebInspector/1.0 (Inspection Bot)",
})
redirect_count = len(resp.history)
metrics["redirect_count"] = redirect_count
if redirect_count >= 3:
return [self._create_issue(
code="P-12",
severity="minor",
message=f"리다이렉트가 {redirect_count}회 발생합니다 (권장 < 3회)",
suggestion="리다이렉트 체인을 줄여 로딩 속도를 개선하세요",
)]
except Exception as e:
logger.warning("Redirect check failed for %s: %s", url, str(e))
metrics["redirect_count"] = None
return []
def _check_compression(self, headers: dict, metrics: dict) -> list[Issue]:
"""P-13: Check response compression (Gzip/Brotli)."""
encoding = self._get_header(headers, "Content-Encoding")
if encoding:
metrics["compression"] = encoding.lower()
return []
metrics["compression"] = None
return [self._create_issue(
code="P-13",
severity="minor",
message="응답 압축(Gzip/Brotli)이 적용되지 않았습니다",
suggestion="서버에서 Gzip 또는 Brotli 압축을 활성화하세요",
)]
def _check_mixed_content(self, url: str, html_content: str) -> list[Issue]:
"""P-14: Check for mixed content (HTTP resources on HTTPS page)."""
parsed = urlparse(url)
if parsed.scheme != "https":
return []
soup = BeautifulSoup(html_content, "html5lib")
mixed_elements = []
# Check src attributes
for tag in soup.find_all(["img", "script", "link", "iframe", "audio", "video", "source"]):
src = tag.get("src") or tag.get("href")
if src and src.startswith("http://"):
mixed_elements.append(tag)
if mixed_elements:
return [self._create_issue(
code="P-14",
severity="major",
message=f"혼합 콘텐츠 발견: HTTPS 페이지에서 HTTP 리소스 {len(mixed_elements)}개 로드",
element=self._truncate_element(str(mixed_elements[0])) if mixed_elements else None,
suggestion="모든 리소스를 HTTPS로 변경하세요",
)]
return []
def _calculate_composite_score(self, issues: list[Issue], metrics: dict) -> tuple[int, dict]:
"""
Calculate composite score:
Security (70%): HTTPS/SSL (30%) + Security Headers (40%)
Performance (30%): Response time (40%) + Page size (30%) + Compression (30%)
"""
# Security score
security_score = 100
# HTTPS/SSL component (30% of security)
https_ssl_score = 100
for issue in issues:
if issue.code in ("P-01", "P-02"):
if issue.severity.value == "critical":
https_ssl_score -= 50
elif issue.severity.value == "major":
https_ssl_score -= 25
https_ssl_score = max(0, https_ssl_score)
# Security headers component (40% of security)
header_issues = [i for i in issues if i.code in ("P-03", "P-04", "P-05", "P-06", "P-07", "P-08", "P-09")]
total_header_checks = 7
passed_headers = total_header_checks - len(header_issues)
header_score = round(passed_headers / total_header_checks * 100) if total_header_checks else 100
security_score = round(https_ssl_score * 0.43 + header_score * 0.57)
# Performance score
perf_score = 100
# TTFB component (40% of performance)
ttfb = metrics.get("ttfb_ms")
if ttfb is not None:
if ttfb <= 500:
ttfb_score = 100
elif ttfb <= 1000:
ttfb_score = 80
elif ttfb <= 2000:
ttfb_score = 60
else:
ttfb_score = 30
else:
ttfb_score = 50
# Page size component (30% of performance)
page_size = metrics.get("page_size_bytes", 0)
if page_size <= 1024 * 1024: # 1MB
size_score = 100
elif page_size <= 2 * 1024 * 1024: # 2MB
size_score = 80
elif page_size <= 3 * 1024 * 1024: # 3MB
size_score = 60
else:
size_score = 30
# Compression component (30% of performance)
compression = metrics.get("compression")
compression_score = 100 if compression else 50
perf_score = round(ttfb_score * 0.4 + size_score * 0.3 + compression_score * 0.3)
# Composite
overall = round(security_score * 0.7 + perf_score * 0.3)
overall = max(0, min(100, overall))
sub_scores = {
"security": security_score,
"performance": perf_score,
}
return overall, sub_scores
@staticmethod
def _get_header(headers: dict, name: str) -> Optional[str]:
"""Case-insensitive header lookup."""
for key, value in headers.items():
if key.lower() == name.lower():
return value
return None
@staticmethod
def _truncate_element(element_str: str, max_len: int = 200) -> str:
if len(element_str) > max_len:
return element_str[:max_len] + "..."
return element_str

382
backend/app/engines/seo.py Normal file
View File

@ -0,0 +1,382 @@
"""
SEO Optimization Checker Engine (F-004).
Checks meta tags, OG tags, robots.txt, sitemap.xml, structured data, etc.
"""
import re
import json
import logging
from urllib.parse import urlparse, urljoin
from typing import Optional
import httpx
from bs4 import BeautifulSoup
from app.engines.base import BaseChecker
from app.models.schemas import CategoryResult, Issue
logger = logging.getLogger(__name__)
class SeoChecker(BaseChecker):
"""SEO optimization checker engine."""
@property
def category_name(self) -> str:
return "seo"
async def check(self, url: str, html_content: str, headers: dict) -> CategoryResult:
soup = BeautifulSoup(html_content, "html5lib")
issues: list[Issue] = []
meta_info: dict = {}
await self.update_progress(10, "title 태그 검사 중...")
issues += self._check_title(soup, meta_info)
await self.update_progress(20, "meta description 검사 중...")
issues += self._check_meta_description(soup, meta_info)
issues += self._check_meta_keywords(soup, meta_info)
await self.update_progress(30, "OG 태그 검사 중...")
issues += self._check_og_tags(soup)
issues += self._check_twitter_card(soup)
await self.update_progress(40, "canonical URL 검사 중...")
issues += self._check_canonical(soup)
await self.update_progress(50, "robots.txt 확인 중...")
issues += await self._check_robots_txt(url, meta_info)
await self.update_progress(60, "sitemap.xml 확인 중...")
issues += await self._check_sitemap(url, meta_info)
await self.update_progress(70, "H1 태그 검사 중...")
issues += self._check_h1(soup)
await self.update_progress(80, "구조화 데이터 검사 중...")
issues += self._check_structured_data(soup, html_content, meta_info)
await self.update_progress(90, "기타 항목 검사 중...")
issues += self._check_favicon(soup)
issues += self._check_viewport(soup)
issues += self._check_url_structure(url)
issues += self._check_img_alt_seo(soup)
score = self._calculate_score_by_deduction(issues)
await self.update_progress(100, "완료")
return self._build_result(
category="seo",
score=score,
issues=issues,
meta_info=meta_info,
)
def _check_title(self, soup: BeautifulSoup, meta_info: dict) -> list[Issue]:
"""S-01: Check title tag existence and length (10-60 chars)."""
issues = []
title = soup.find("title")
if title is None or not title.string or title.string.strip() == "":
meta_info["title"] = None
meta_info["title_length"] = 0
issues.append(self._create_issue(
code="S-01",
severity="critical",
message="<title> 태그가 없거나 비어있습니다",
suggestion="검색 결과에 표시될 10-60자 길이의 페이지 제목을 설정하세요",
))
return issues
title_text = title.string.strip()
title_len = len(title_text)
meta_info["title"] = title_text
meta_info["title_length"] = title_len
if title_len < 10:
issues.append(self._create_issue(
code="S-01",
severity="critical",
message=f"title이 너무 짧습니다 ({title_len}자, 권장 10-60자)",
element=f"<title>{title_text}</title>",
suggestion="검색 결과에 효과적으로 표시되도록 10자 이상의 제목을 작성하세요",
))
elif title_len > 60:
issues.append(self._create_issue(
code="S-01",
severity="minor",
message=f"title이 너무 깁니다 ({title_len}자, 권장 10-60자)",
element=f"<title>{title_text[:50]}...</title>",
suggestion="검색 결과에서 잘리지 않도록 60자 이내로 제목을 줄이세요",
))
return issues
def _check_meta_description(self, soup: BeautifulSoup, meta_info: dict) -> list[Issue]:
"""S-02: Check meta description existence and length (50-160 chars)."""
issues = []
desc = soup.find("meta", attrs={"name": re.compile(r"^description$", re.I)})
if desc is None or not desc.get("content"):
meta_info["description"] = None
meta_info["description_length"] = 0
issues.append(self._create_issue(
code="S-02",
severity="major",
message="meta description이 없습니다",
suggestion='<meta name="description" content="페이지 설명">을 추가하세요 (50-160자 권장)',
))
return issues
content = desc["content"].strip()
content_len = len(content)
meta_info["description"] = content
meta_info["description_length"] = content_len
if content_len < 50:
issues.append(self._create_issue(
code="S-02",
severity="major",
message=f"meta description이 너무 짧습니다 ({content_len}자, 권장 50-160자)",
suggestion="검색 결과에서 페이지를 효과적으로 설명하도록 50자 이상으로 작성하세요",
))
elif content_len > 160:
issues.append(self._create_issue(
code="S-02",
severity="minor",
message=f"meta description이 너무 깁니다 ({content_len}자, 권장 50-160자)",
suggestion="검색 결과에서 잘리지 않도록 160자 이내로 줄이세요",
))
return issues
def _check_meta_keywords(self, soup: BeautifulSoup, meta_info: dict) -> list[Issue]:
"""S-03: Check meta keywords (informational only)."""
keywords = soup.find("meta", attrs={"name": re.compile(r"^keywords$", re.I)})
if keywords is None or not keywords.get("content"):
meta_info["has_keywords"] = False
return [self._create_issue(
code="S-03",
severity="info",
message="meta keywords가 없습니다 (현재 대부분의 검색엔진에서 무시됨)",
suggestion="meta keywords는 SEO에 큰 영향이 없지만, 참고용으로 추가할 수 있습니다",
)]
meta_info["has_keywords"] = True
return []
def _check_og_tags(self, soup: BeautifulSoup) -> list[Issue]:
"""S-04: Check Open Graph tags (og:title, og:description, og:image)."""
issues = []
required_og = ["og:title", "og:description", "og:image"]
missing = []
for prop in required_og:
og = soup.find("meta", attrs={"property": prop})
if og is None or not og.get("content"):
missing.append(prop)
if missing:
issues.append(self._create_issue(
code="S-04",
severity="major",
message=f"Open Graph 태그가 누락되었습니다: {', '.join(missing)}",
suggestion=f'누락된 OG 태그를 추가하세요. 예: <meta property="{missing[0]}" content="">',
))
return issues
def _check_twitter_card(self, soup: BeautifulSoup) -> list[Issue]:
"""S-05: Check Twitter Card tags."""
twitter_card = soup.find("meta", attrs={"name": "twitter:card"})
twitter_title = soup.find("meta", attrs={"name": "twitter:title"})
if twitter_card is None and twitter_title is None:
return [self._create_issue(
code="S-05",
severity="minor",
message="Twitter Card 태그가 없습니다",
suggestion='<meta name="twitter:card" content="summary_large_image">를 추가하세요',
)]
return []
def _check_canonical(self, soup: BeautifulSoup) -> list[Issue]:
"""S-06: Check canonical URL."""
canonical = soup.find("link", attrs={"rel": "canonical"})
if canonical is None or not canonical.get("href"):
return [self._create_issue(
code="S-06",
severity="major",
message="canonical URL이 설정되지 않았습니다",
suggestion='<link rel="canonical" href="현재페이지URL">을 추가하여 중복 콘텐츠 문제를 방지하세요',
)]
return []
async def _check_robots_txt(self, url: str, meta_info: dict) -> list[Issue]:
"""S-07: Check robots.txt accessibility."""
parsed = urlparse(url)
robots_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt"
try:
async with httpx.AsyncClient(timeout=httpx.Timeout(5.0), verify=False) as client:
resp = await client.get(robots_url)
if resp.status_code == 200:
meta_info["has_robots_txt"] = True
return []
else:
meta_info["has_robots_txt"] = False
return [self._create_issue(
code="S-07",
severity="major",
message=f"robots.txt에 접근할 수 없습니다 (HTTP {resp.status_code})",
suggestion="검색엔진 크롤링을 제어하기 위해 /robots.txt 파일을 생성하세요",
)]
except Exception as e:
logger.warning("robots.txt check failed for %s: %s", url, str(e))
meta_info["has_robots_txt"] = False
return [self._create_issue(
code="S-07",
severity="major",
message="robots.txt에 접근할 수 없습니다",
suggestion="검색엔진 크롤링을 제어하기 위해 /robots.txt 파일을 생성하세요",
)]
async def _check_sitemap(self, url: str, meta_info: dict) -> list[Issue]:
"""S-08: Check sitemap.xml accessibility."""
parsed = urlparse(url)
sitemap_url = f"{parsed.scheme}://{parsed.netloc}/sitemap.xml"
try:
async with httpx.AsyncClient(timeout=httpx.Timeout(5.0), verify=False) as client:
resp = await client.get(sitemap_url)
if resp.status_code == 200:
meta_info["has_sitemap"] = True
return []
else:
meta_info["has_sitemap"] = False
return [self._create_issue(
code="S-08",
severity="major",
message=f"sitemap.xml에 접근할 수 없습니다 (HTTP {resp.status_code})",
suggestion="검색엔진이 사이트 구조를 이해할 수 있도록 /sitemap.xml을 생성하세요",
)]
except Exception as e:
logger.warning("sitemap.xml check failed for %s: %s", url, str(e))
meta_info["has_sitemap"] = False
return [self._create_issue(
code="S-08",
severity="major",
message="sitemap.xml에 접근할 수 없습니다",
suggestion="검색엔진이 사이트 구조를 이해할 수 있도록 /sitemap.xml을 생성하세요",
)]
def _check_h1(self, soup: BeautifulSoup) -> list[Issue]:
"""S-09: Check H1 tag existence and uniqueness."""
h1_tags = soup.find_all("h1")
issues = []
if len(h1_tags) == 0:
issues.append(self._create_issue(
code="S-09",
severity="critical",
message="H1 태그가 없습니다",
suggestion="페이지의 주요 제목을 <h1> 태그로 추가하세요",
))
elif len(h1_tags) > 1:
issues.append(self._create_issue(
code="S-09",
severity="critical",
message=f"H1 태그가 {len(h1_tags)}개 발견되었습니다 (1개 권장)",
element=self._truncate_element(str(h1_tags[0])),
suggestion="페이지당 H1 태그는 1개만 사용하세요",
))
return issues
def _check_structured_data(self, soup: BeautifulSoup, html_content: str, meta_info: dict) -> list[Issue]:
"""S-10: Check for structured data (JSON-LD, Microdata, RDFa)."""
structured_types = []
# JSON-LD
json_ld_scripts = soup.find_all("script", attrs={"type": "application/ld+json"})
if json_ld_scripts:
structured_types.append("JSON-LD")
# Microdata
microdata = soup.find_all(attrs={"itemscope": True})
if microdata:
structured_types.append("Microdata")
# RDFa
rdfa = soup.find_all(attrs={"typeof": True})
if rdfa:
structured_types.append("RDFa")
meta_info["structured_data_types"] = structured_types
if not structured_types:
return [self._create_issue(
code="S-10",
severity="minor",
message="구조화 데이터(JSON-LD, Microdata, RDFa)가 없습니다",
suggestion='<script type="application/ld+json">을 사용하여 구조화 데이터를 추가하세요',
)]
return []
def _check_favicon(self, soup: BeautifulSoup) -> list[Issue]:
"""S-11: Check favicon existence."""
favicon = soup.find("link", attrs={"rel": re.compile(r"icon", re.I)})
if favicon is None:
return [self._create_issue(
code="S-11",
severity="minor",
message="favicon이 설정되지 않았습니다",
suggestion='<link rel="icon" href="/favicon.ico">를 추가하세요',
)]
return []
def _check_viewport(self, soup: BeautifulSoup) -> list[Issue]:
"""S-12: Check viewport meta tag for mobile friendliness."""
viewport = soup.find("meta", attrs={"name": re.compile(r"^viewport$", re.I)})
if viewport is None:
return [self._create_issue(
code="S-12",
severity="major",
message="viewport meta 태그가 없습니다 (모바일 친화성 부족)",
suggestion='<meta name="viewport" content="width=device-width, initial-scale=1.0">을 추가하세요',
)]
return []
def _check_url_structure(self, url: str) -> list[Issue]:
"""S-13: Check URL structure for SEO friendliness."""
parsed = urlparse(url)
path = parsed.path
# Check for special characters (excluding common ones like /, -, _)
special_chars = re.findall(r"[^a-zA-Z0-9/\-_.]", path)
if len(special_chars) > 3:
return [self._create_issue(
code="S-13",
severity="minor",
message=f"URL에 특수 문자가 많습니다 ({len(special_chars)}개)",
suggestion="URL은 영문, 숫자, 하이픈(-)을 사용하여 깔끔하게 구성하세요",
)]
return []
def _check_img_alt_seo(self, soup: BeautifulSoup) -> list[Issue]:
"""S-14: Check image alt attributes from SEO perspective."""
images = soup.find_all("img")
if not images:
return []
missing_alt = [img for img in images if not img.get("alt") and img.get("alt") != ""]
if missing_alt:
return [self._create_issue(
code="S-14",
severity="major",
message=f"alt 속성이 없는 이미지가 {len(missing_alt)}개 발견되었습니다",
element=self._truncate_element(str(missing_alt[0])) if missing_alt else None,
suggestion="검색엔진이 이미지를 이해할 수 있도록 모든 이미지에 설명적인 alt 속성을 추가하세요",
)]
return []
@staticmethod
def _truncate_element(element_str: str, max_len: int = 200) -> str:
if len(element_str) > max_len:
return element_str[:max_len] + "..."
return element_str