refactor: 4개 검사 엔진을 YAML 기반 표준 규칙으로 리팩토링

- YAML 규칙 파일 4개 신규 생성 (html_css, accessibility, seo, performance_security)
  W3C, WCAG 2.0/2.1/2.2, OWASP, Google Search Essentials 공식 표준 기반
- rules/__init__.py: YAML 로더 + 캐싱 + 리로드 모듈
- html_css.py: 30개 폐기 요소, 100+개 폐기 속성을 YAML에서 동적 로드
- accessibility.py: WCAG 버전 선택 지원 (wcag_version 파라미터)
- seo.py: title/description 길이, OG 필수 태그 등 임계값 YAML 로드
- performance_security.py: COOP/COEP/CORP 검사 추가, 정보 노출 헤더 검사 추가,
  TTFB/페이지 크기 임계값 YAML 로드
- PyYAML 의존성 추가

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
jungwoo choi
2026-02-13 15:49:57 +09:00
parent cdb6405714
commit 44ad36e2ab
10 changed files with 3393 additions and 92 deletions

View File

@ -1,7 +1,8 @@
"""
Accessibility (WCAG 2.1 AA) Checker Engine (F-003).
Accessibility (WCAG) Checker Engine (F-003).
Uses Playwright + axe-core for comprehensive accessibility testing.
Falls back to BeautifulSoup-based checks if Playwright is unavailable.
Supports WCAG version selection (2.0/2.1/2.2) via rules/accessibility.yaml.
"""
import json
@ -14,6 +15,7 @@ from bs4 import BeautifulSoup
from app.engines.base import BaseChecker
from app.models.schemas import CategoryResult, Issue
from app.rules import get_rules
logger = logging.getLogger(__name__)
@ -24,21 +26,42 @@ AXE_CORE_JS_PATH = Path(__file__).parent / "axe_core" / "axe.min.js"
AXE_RULE_MESSAGES = {
"image-alt": ("A-01", "이미지에 대체 텍스트(alt)가 없습니다", "1.1.1"),
"color-contrast": ("A-02", "텍스트와 배경의 색상 대비가 부족합니다", "1.4.3"),
"color-contrast-enhanced": ("A-02", "텍스트와 배경의 색상 대비가 향상된 기준을 충족하지 않습니다", "1.4.6"),
"keyboard": ("A-03", "키보드로 접근할 수 없는 요소가 있습니다", "2.1.1"),
"focus-visible": ("A-04", "키보드 포커스가 시각적으로 표시되지 않습니다", "2.4.7"),
"label": ("A-05", "폼 요소에 레이블이 연결되지 않았습니다", "1.3.1"),
"input-label": ("A-05", "입력 요소에 레이블이 없습니다", "1.3.1"),
"input-button-name": ("A-05", "입력 버튼에 접근 가능한 이름이 없습니다", "4.1.2"),
"select-name": ("A-05", "select 요소에 접근 가능한 이름이 없습니다", "4.1.2"),
"aria-valid-attr": ("A-06", "유효하지 않은 ARIA 속성이 사용되었습니다", "4.1.2"),
"aria-roles": ("A-06", "유효하지 않은 ARIA 역할이 사용되었습니다", "4.1.2"),
"aria-required-attr": ("A-06", "필수 ARIA 속성이 누락되었습니다", "4.1.2"),
"aria-valid-attr-value": ("A-06", "ARIA 속성 값이 올바르지 않습니다", "4.1.2"),
"aria-allowed-attr": ("A-06", "허용되지 않는 ARIA 속성이 사용되었습니다", "4.1.2"),
"aria-allowed-role": ("A-06", "허용되지 않는 ARIA 역할이 사용되었습니다", "4.1.2"),
"aria-hidden-body": ("A-06", "body 요소에 aria-hidden이 설정되어 있습니다", "4.1.2"),
"aria-hidden-focus": ("A-06", "aria-hidden 요소 내에 포커스 가능한 요소가 있습니다", "4.1.2"),
"link-name": ("A-07", "링크 텍스트가 목적을 설명하지 않습니다", "2.4.4"),
"html-has-lang": ("A-08", "HTML 요소에 lang 속성이 없습니다", "3.1.1"),
"html-lang-valid": ("A-08", "HTML lang 속성 값이 올바르지 않습니다", "3.1.1"),
"valid-lang": ("A-08", "lang 속성 값이 올바르지 않습니다", "3.1.2"),
"bypass": ("A-09", "건너뛰기 링크(skip navigation)가 없습니다", "2.4.1"),
"region": ("A-09", "랜드마크 영역 밖에 콘텐츠가 있습니다", "2.4.1"),
"no-autoplay-audio": ("A-10", "자동 재생 미디어에 정지/음소거 컨트롤이 없습니다", "1.4.2"),
"audio-caption": ("A-10", "오디오/비디오에 자막이 없습니다", "1.2.2"),
"video-caption": ("A-10", "비디오에 자막이 없습니다", "1.2.2"),
"document-title": ("A-11", "페이지에 제목(title)이 없습니다", "2.4.2"),
"empty-heading": ("A-12", "빈 heading 요소가 있습니다", "2.4.6"),
"frame-title": ("A-13", "iframe에 접근 가능한 제목이 없습니다", "4.1.2"),
"button-name": ("A-14", "버튼에 접근 가능한 이름이 없습니다", "4.1.2"),
"meta-refresh": ("A-15", "meta refresh로 시간 제한이 설정되어 있습니다", "2.2.1"),
"meta-viewport-large": ("A-16", "사용자의 확대/축소가 제한되어 있습니다", "1.4.4"),
"autocomplete-valid": ("A-17", "autocomplete 속성이 올바르지 않습니다", "1.3.5"),
"target-size": ("A-18", "터치 대상 크기가 최소 기준을 충족하지 않습니다", "2.5.8"),
"scrollable-region-focusable": ("A-19", "스크롤 가능한 영역에 키보드 접근이 불가합니다", "2.1.1"),
"tabindex": ("A-20", "tabindex 값이 0보다 크게 설정되어 있습니다", "2.4.3"),
"blink": ("A-21", "깜빡이는 콘텐츠가 있습니다", "2.2.2"),
"marquee": ("A-21", "자동 스크롤 콘텐츠(marquee)가 있습니다", "2.2.2"),
}
# axe-core impact to severity mapping
@ -49,9 +72,56 @@ IMPACT_TO_SEVERITY = {
"minor": "info",
}
# WCAG version to axe-core tags mapping (loaded from YAML at runtime)
WCAG_VERSION_PRESETS = {
"2.0_A": ["wcag2a"],
"2.0_AA": ["wcag2a", "wcag2aa"],
"2.1_AA": ["wcag2a", "wcag2aa", "wcag21a", "wcag21aa"],
"2.2_AA": ["wcag2a", "wcag2aa", "wcag21a", "wcag21aa", "wcag22aa"],
"2.2_full": ["wcag2a", "wcag2aa", "wcag2aaa", "wcag21a", "wcag21aa", "wcag22aa"],
}
def _get_axe_tags_for_version(wcag_version: str = "2.1_AA") -> list[str]:
"""Get axe-core tags for a given WCAG version preset."""
rules = get_rules("accessibility")
presets = rules.get("compliance_presets", {})
# Map user-friendly names to YAML preset keys
version_map = {
"2.0_A": "wcag_20_a",
"2.0_AA": "wcag_20_aa",
"2.1_AA": "wcag_21_aa",
"2.2_AA": "wcag_22_aa",
"2.2_full": "wcag_22_full",
}
yaml_key = version_map.get(wcag_version)
if yaml_key and yaml_key in presets:
return presets[yaml_key].get("tags", WCAG_VERSION_PRESETS.get(wcag_version, ["wcag2a", "wcag2aa"]))
return WCAG_VERSION_PRESETS.get(wcag_version, ["wcag2a", "wcag2aa", "wcag21a", "wcag21aa"])
def _get_wcag_level_label(wcag_version: str) -> str:
"""Get human-readable WCAG level label."""
labels = {
"2.0_A": "WCAG 2.0 Level A",
"2.0_AA": "WCAG 2.0 Level AA",
"2.1_AA": "WCAG 2.1 Level AA",
"2.2_AA": "WCAG 2.2 Level AA",
"2.2_full": "WCAG 2.2 All Levels",
}
return labels.get(wcag_version, "WCAG 2.1 Level AA")
class AccessibilityChecker(BaseChecker):
"""Accessibility (WCAG 2.1 AA) checker engine."""
"""Accessibility (WCAG) checker engine with version selection."""
def __init__(self, progress_callback=None, wcag_version: str = "2.1_AA"):
super().__init__(progress_callback)
self.wcag_version = wcag_version
self.axe_tags = _get_axe_tags_for_version(wcag_version)
@property
def category_name(self) -> str:
@ -86,12 +156,10 @@ class AccessibilityChecker(BaseChecker):
await page.goto(url, wait_until="networkidle", timeout=30000)
await self.update_progress(40, "axe-core 주입 중...")
# Load axe-core JS
if AXE_CORE_JS_PATH.exists() and AXE_CORE_JS_PATH.stat().st_size > 1000:
axe_js = AXE_CORE_JS_PATH.read_text(encoding="utf-8")
await page.evaluate(axe_js)
else:
# Fallback: load from CDN
await page.evaluate("""
async () => {
const script = document.createElement('script');
@ -104,22 +172,27 @@ class AccessibilityChecker(BaseChecker):
}
""")
await self.update_progress(60, "접근성 검사 실행 중...")
axe_results = await page.evaluate("""
() => {
return new Promise((resolve, reject) => {
if (typeof axe === 'undefined') {
await self.update_progress(60, f"접근성 검사 실행 중 ({_get_wcag_level_label(self.wcag_version)})...")
# Build axe-core tag list including best-practice
axe_tag_list = self.axe_tags + ["best-practice"]
tags_js = json.dumps(axe_tag_list)
axe_results = await page.evaluate(f"""
() => {{
return new Promise((resolve, reject) => {{
if (typeof axe === 'undefined') {{
reject(new Error('axe-core not loaded'));
return;
}
axe.run(document, {
runOnly: {
}}
axe.run(document, {{
runOnly: {{
type: 'tag',
values: ['wcag2a', 'wcag2aa', 'best-practice']
}
}).then(resolve).catch(reject);
});
}
values: {tags_js}
}}
}}).then(resolve).catch(reject);
}});
}}
""")
await self.update_progress(80, "결과 분석 중...")
@ -134,7 +207,7 @@ class AccessibilityChecker(BaseChecker):
category="accessibility",
score=score,
issues=issues,
wcag_level="AA",
wcag_level=_get_wcag_level_label(self.wcag_version),
)
async def _check_with_beautifulsoup(self, url: str, html_content: str) -> CategoryResult:
@ -170,7 +243,7 @@ class AccessibilityChecker(BaseChecker):
category="accessibility",
score=score,
issues=issues,
wcag_level="AA",
wcag_level=_get_wcag_level_label(self.wcag_version),
)
def _parse_axe_results(self, axe_results: dict) -> list[Issue]:
@ -182,15 +255,15 @@ class AccessibilityChecker(BaseChecker):
impact = violation.get("impact", "minor")
severity = IMPACT_TO_SEVERITY.get(impact, "info")
# Map to our issue codes
if rule_id in AXE_RULE_MESSAGES:
code, korean_msg, wcag = AXE_RULE_MESSAGES[rule_id]
else:
code = "A-06"
code = "A-99"
korean_msg = violation.get("description", "접근성 위반 사항이 발견되었습니다")
wcag = "4.1.2"
# Try to extract WCAG criterion from tags
tags = violation.get("tags", [])
wcag = self._extract_wcag_from_tags(tags)
# Get affected elements
nodes = violation.get("nodes", [])
element = None
if nodes:
@ -211,7 +284,6 @@ class AccessibilityChecker(BaseChecker):
if ratio:
detail = f" (대비율: {ratio}:1, 최소 4.5:1 필요)"
# Create the issue with node count info
node_count = len(nodes)
count_info = f" ({node_count}개 요소)" if node_count > 1 else ""
@ -226,11 +298,19 @@ class AccessibilityChecker(BaseChecker):
return issues
@staticmethod
def _extract_wcag_from_tags(tags: list[str]) -> str:
"""Extract WCAG criterion number from axe-core tags (e.g., 'wcag111' -> '1.1.1')."""
for tag in tags:
if tag.startswith("wcag") and not tag.startswith("wcag2"):
# e.g., "wcag111" -> "1.1.1"
digits = tag[4:]
if len(digits) >= 3:
return f"{digits[0]}.{digits[1]}.{digits[2:]}"
return "4.1.2"
def _calculate_axe_score(self, axe_results: dict) -> int:
"""
Calculate score based on axe-core violations.
critical=-20, serious=-10, moderate=-5, minor=-2
"""
"""Calculate score based on axe-core violations."""
severity_weights = {
"critical": 20,
"serious": 10,
@ -284,7 +364,6 @@ class AccessibilityChecker(BaseChecker):
if inp.get("aria-label") or inp.get("aria-labelledby") or inp.get("title"):
has_label = True
# Check if wrapped in label
parent_label = inp.find_parent("label")
if parent_label:
has_label = True
@ -377,10 +456,9 @@ class AccessibilityChecker(BaseChecker):
def _bs_check_skip_nav(self, soup: BeautifulSoup) -> list[Issue]:
"""A-09: Check for skip navigation link."""
# Look for skip nav patterns
skip_links = soup.find_all("a", href=True)
has_skip = False
for link in skip_links[:10]: # Check first 10 links
for link in skip_links[:10]:
href = link.get("href", "")
text = link.get_text(strip=True).lower()
if href.startswith("#") and any(
@ -417,6 +495,6 @@ class AccessibilityChecker(BaseChecker):
suggestion="autoplay 미디어에 controls 속성을 추가하거나 muted 속성을 사용하세요",
wcag_criterion="1.4.2",
))
break # Report only first
break
return issues

View File

@ -2,6 +2,7 @@
HTML/CSS Standards Checker Engine (F-002).
Checks HTML5 validity, semantic tags, CSS inline usage, etc.
Uses BeautifulSoup4 + html5lib for parsing.
Rules loaded from rules/html_css.yaml.
"""
import re
@ -13,15 +14,28 @@ from bs4 import BeautifulSoup
from app.engines.base import BaseChecker
from app.models.schemas import CategoryResult, Issue
from app.rules import get_rules
logger = logging.getLogger(__name__)
DEPRECATED_TAGS = [
"font", "center", "marquee", "blink", "strike", "big", "tt",
"basefont", "applet", "dir", "isindex",
]
SEMANTIC_TAGS = ["header", "nav", "main", "footer", "section", "article"]
def _load_obsolete_elements() -> list[dict]:
"""Load obsolete elements from YAML."""
rules = get_rules("html_css")
return rules.get("obsolete_elements", [])
def _load_obsolete_attributes() -> dict[str, list[dict]]:
"""Load obsolete attributes from YAML, keyed by element name."""
rules = get_rules("html_css")
return rules.get("obsolete_attributes", {})
def _load_semantic_tags() -> list[str]:
"""Load structural semantic tag names from YAML."""
rules = get_rules("html_css")
structural = rules.get("semantic_elements", {}).get("structural", [])
return [item["tag"] for item in structural]
class HtmlCssChecker(BaseChecker):
@ -50,16 +64,21 @@ class HtmlCssChecker(BaseChecker):
await self.update_progress(50, "시맨틱 태그 검사 중...")
issues += self._check_semantic_tags(soup)
await self.update_progress(60, "이미지 alt 속성 검사 중...")
await self.update_progress(55, "이미지 alt 속성 검사 중...")
issues += self._check_img_alt(soup)
await self.update_progress(70, "중복 ID 검사 중...")
await self.update_progress(60, "중복 ID 검사 중...")
issues += self._check_duplicate_ids(soup)
await self.update_progress(80, "링크 및 스타일 검사 중...")
await self.update_progress(65, "링크 및 스타일 검사 중...")
issues += self._check_empty_links(soup)
issues += self._check_inline_styles(soup)
issues += self._check_deprecated_tags(soup)
await self.update_progress(75, "Obsolete 태그 검사 중...")
issues += self._check_obsolete_tags(soup)
await self.update_progress(80, "Obsolete 속성 검사 중...")
issues += self._check_obsolete_attributes(soup)
await self.update_progress(90, "heading 구조 검사 중...")
issues += self._check_heading_hierarchy(soup)
@ -134,21 +153,22 @@ class HtmlCssChecker(BaseChecker):
def _check_semantic_tags(self, soup: BeautifulSoup) -> list[Issue]:
"""H-05: Check for semantic HTML5 tag usage."""
semantic_tags = _load_semantic_tags()
found_tags = set()
for tag_name in SEMANTIC_TAGS:
for tag_name in semantic_tags:
if soup.find(tag_name):
found_tags.add(tag_name)
if not found_tags:
tag_list = ", ".join(semantic_tags)
return [self._create_issue(
code="H-05",
severity="minor",
message="시맨틱 태그가 사용되지 않았습니다 (header, nav, main, footer, section, article)",
message=f"시맨틱 태그가 사용되지 않았습니다 ({tag_list})",
suggestion="적절한 시맨틱 태그를 사용하여 문서 구조를 명확히 하세요",
)]
missing = set(SEMANTIC_TAGS) - found_tags
# Only report if major structural elements are missing (main is most important)
missing = set(semantic_tags) - found_tags
if "main" in missing:
return [self._create_issue(
code="H-05",
@ -240,23 +260,105 @@ class HtmlCssChecker(BaseChecker):
))
return issues
def _check_deprecated_tags(self, soup: BeautifulSoup) -> list[Issue]:
"""H-10: Check for deprecated HTML tags."""
def _check_obsolete_tags(self, soup: BeautifulSoup) -> list[Issue]:
"""H-10: Check for obsolete HTML tags (loaded from YAML)."""
issues = []
for tag_name in DEPRECATED_TAGS:
obsolete = _load_obsolete_elements()
for entry in obsolete:
tag_name = entry["tag"]
found = soup.find_all(tag_name)
if found:
replacement = entry.get("replacement", "CSS")
severity = entry.get("severity", "major")
first_el = found[0]
issues.append(self._create_issue(
code="H-10",
severity="major",
message=f"사용 중단된(deprecated) 태그 <{tag_name}>이(가) {len(found)}회 사용되었습니다",
severity=severity,
message=f"사용 중단된(obsolete) 태그 <{tag_name}>이(가) {len(found)}회 사용되었습니다",
element=self._truncate_element(str(first_el)),
line=self._get_line_number(first_el),
suggestion=f"<{tag_name}> 대신 CSS를 사용하여 스타일을 적용하세요",
suggestion=f"<{tag_name}> 대신 {replacement}을(를) 사용하세요",
))
return issues
def _check_obsolete_attributes(self, soup: BeautifulSoup) -> list[Issue]:
"""H-13: Check for obsolete HTML attributes (loaded from YAML)."""
issues = []
obsolete_attrs = _load_obsolete_attributes()
# Check element-specific obsolete attributes
element_checks = {
"a": "a", "body": "body", "br": "br", "form": "form",
"hr": "hr", "html": "html", "iframe": "iframe", "img": "img",
"input": "input", "link": "link", "meta": "meta", "script": "script",
"style": "style", "table": "table", "embed": "embed",
}
# Multi-element groups
multi_checks = {
"td_th": ["td", "th"],
"tr": ["tr"],
"thead_tbody_tfoot": ["thead", "tbody", "tfoot"],
"ol_ul": ["ol", "ul"],
"heading": ["h1", "h2", "h3", "h4", "h5", "h6"],
"embed": ["embed"],
}
found_count = 0
first_element = None
first_line = None
first_attr = None
# Single-element checks
for yaml_key, html_tag in element_checks.items():
attr_list = obsolete_attrs.get(yaml_key, [])
for attr_entry in attr_list:
attr_name = attr_entry["attr"]
elements = soup.find_all(html_tag, attrs={attr_name: True})
if elements:
found_count += len(elements)
if first_element is None:
first_element = self._truncate_element(str(elements[0]))
first_line = self._get_line_number(elements[0])
first_attr = f'{html_tag}[{attr_name}]'
# Multi-element group checks
for yaml_key, html_tags in multi_checks.items():
attr_list = obsolete_attrs.get(yaml_key, [])
for attr_entry in attr_list:
attr_name = attr_entry["attr"]
for html_tag in html_tags:
elements = soup.find_all(html_tag, attrs={attr_name: True})
if elements:
found_count += len(elements)
if first_element is None:
first_element = self._truncate_element(str(elements[0]))
first_line = self._get_line_number(elements[0])
first_attr = f'{html_tag}[{attr_name}]'
# Global obsolete attributes
global_attrs = obsolete_attrs.get("global", [])
for attr_entry in global_attrs:
attr_name = attr_entry["attr"]
elements = soup.find_all(attrs={attr_name: True})
if elements:
found_count += len(elements)
if first_element is None:
first_element = self._truncate_element(str(elements[0]))
first_line = self._get_line_number(elements[0])
first_attr = attr_name
if found_count > 0:
issues.append(self._create_issue(
code="H-13",
severity="minor",
message=f"사용 중단된(obsolete) HTML 속성이 {found_count}개 발견되었습니다 (예: {first_attr})",
element=first_element,
line=first_line,
suggestion="사용 중단된 HTML 속성 대신 CSS를 사용하세요 (W3C HTML Living Standard 참조)",
))
return issues
def _check_heading_hierarchy(self, soup: BeautifulSoup) -> list[Issue]:
"""H-11: Check heading hierarchy (h1-h6 should not skip levels)."""
issues = []
@ -277,7 +379,7 @@ class HtmlCssChecker(BaseChecker):
line=self._get_line_number(heading),
suggestion=f"h{prev_level} 다음에는 h{prev_level + 1}을 사용하세요",
))
break # Only report first skip
break
prev_level = level
return issues
@ -295,14 +397,12 @@ class HtmlCssChecker(BaseChecker):
@staticmethod
def _get_line_number(element) -> Optional[int]:
"""Extract source line number from a BeautifulSoup element."""
if element and hasattr(element, "sourceline"):
return element.sourceline
return None
@staticmethod
def _truncate_element(element_str: str, max_len: int = 200) -> str:
"""Truncate element string for display."""
if len(element_str) > max_len:
return element_str[:max_len] + "..."
return element_str

View File

@ -10,13 +10,14 @@ import logging
import time
from datetime import datetime, timezone
from urllib.parse import urlparse
from typing import Optional
from typing import Any, Optional
import httpx
from bs4 import BeautifulSoup
from app.engines.base import BaseChecker
from app.models.schemas import CategoryResult, Issue, calculate_grade
from app.rules import get_rules
logger = logging.getLogger(__name__)
@ -24,6 +25,32 @@ logger = logging.getLogger(__name__)
class PerformanceSecurityChecker(BaseChecker):
"""Performance and security checker engine."""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self._rules_data = get_rules("performance_security")
def _get_security_headers(self) -> list[dict[str, Any]]:
"""Load required security headers from YAML."""
return self._rules_data.get("security", {}).get("headers", [])
def _get_headers_to_remove(self) -> list[dict[str, Any]]:
"""Load information disclosure headers from YAML."""
return self._rules_data.get("security", {}).get("headers_to_remove", [])
def _get_ttfb_thresholds(self) -> dict[str, int]:
"""Load TTFB thresholds from YAML."""
for metric in self._rules_data.get("performance", {}).get("additional_metrics", []):
if metric.get("id") == "perf-ttfb":
return metric.get("thresholds", {})
return {"good": 800, "needs_improvement": 1800}
def _get_page_size_thresholds(self) -> dict[str, int]:
"""Load total page size thresholds from YAML."""
for check in self._rules_data.get("performance", {}).get("resource_checks", []):
if check.get("id") == "perf-total-page-size":
return check.get("thresholds", {})
return {"good": 1500, "needs_improvement": 3000, "poor": 5000}
@property
def category_name(self) -> str:
return "performance_security"
@ -46,6 +73,12 @@ class PerformanceSecurityChecker(BaseChecker):
issues += self._check_x_xss_protection(headers)
issues += self._check_referrer_policy(headers)
issues += self._check_permissions_policy(headers)
issues += self._check_coop(headers)
issues += self._check_coep(headers)
issues += self._check_corp(headers)
await self.update_progress(50, "정보 노출 헤더 검사 중...")
issues += self._check_info_disclosure(headers)
await self.update_progress(60, "응답 시간 측정 중...")
issues += await self._check_ttfb(url, metrics)
@ -163,15 +196,24 @@ class PerformanceSecurityChecker(BaseChecker):
return []
def _get_security_header_rule(self, rule_id: str) -> dict[str, Any]:
"""Find a specific security header rule from YAML."""
for h in self._get_security_headers():
if h.get("id") == rule_id:
return h
return {}
def _check_hsts(self, headers: dict) -> list[Issue]:
"""P-03: Check Strict-Transport-Security header."""
rule = self._get_security_header_rule("sec-strict-transport-security")
recommended = rule.get("details", {}).get("recommended_value", "max-age=31536000; includeSubDomains")
hsts = self._get_header(headers, "Strict-Transport-Security")
if not hsts:
return [self._create_issue(
code="P-03",
severity="major",
message="Strict-Transport-Security(HSTS) 헤더가 설정되지 않았습니다",
suggestion="HSTS 헤더를 추가하세요: Strict-Transport-Security: max-age=31536000; includeSubDomains",
suggestion=f"HSTS 헤더를 추가하세요: Strict-Transport-Security: {recommended}",
)]
return []
@ -225,13 +267,15 @@ class PerformanceSecurityChecker(BaseChecker):
def _check_referrer_policy(self, headers: dict) -> list[Issue]:
"""P-08: Check Referrer-Policy header."""
rule = self._get_security_header_rule("sec-referrer-policy")
recommended = rule.get("details", {}).get("recommended_value", "strict-origin-when-cross-origin")
rp = self._get_header(headers, "Referrer-Policy")
if not rp:
return [self._create_issue(
code="P-08",
severity="minor",
message="Referrer-Policy 헤더가 설정되지 않았습니다",
suggestion="Referrer-Policy: strict-origin-when-cross-origin을 설정하세요",
suggestion=f"Referrer-Policy: {recommended}을 설정하세요",
)]
return []
@ -247,8 +291,69 @@ class PerformanceSecurityChecker(BaseChecker):
)]
return []
def _check_coop(self, headers: dict) -> list[Issue]:
"""P-15: Check Cross-Origin-Opener-Policy header."""
rule = self._get_security_header_rule("sec-cross-origin-opener-policy")
recommended = rule.get("details", {}).get("recommended_value", "same-origin")
coop = self._get_header(headers, "Cross-Origin-Opener-Policy")
if not coop:
return [self._create_issue(
code="P-15",
severity="minor",
message="Cross-Origin-Opener-Policy(COOP) 헤더가 설정되지 않았습니다",
suggestion=f"COOP 헤더를 추가하세요: Cross-Origin-Opener-Policy: {recommended}",
)]
return []
def _check_coep(self, headers: dict) -> list[Issue]:
"""P-16: Check Cross-Origin-Embedder-Policy header."""
rule = self._get_security_header_rule("sec-cross-origin-embedder-policy")
recommended = rule.get("details", {}).get("recommended_value", "require-corp")
coep = self._get_header(headers, "Cross-Origin-Embedder-Policy")
if not coep:
return [self._create_issue(
code="P-16",
severity="minor",
message="Cross-Origin-Embedder-Policy(COEP) 헤더가 설정되지 않았습니다",
suggestion=f"COEP 헤더를 추가하세요: Cross-Origin-Embedder-Policy: {recommended}",
)]
return []
def _check_corp(self, headers: dict) -> list[Issue]:
"""P-17: Check Cross-Origin-Resource-Policy header."""
rule = self._get_security_header_rule("sec-cross-origin-resource-policy")
recommended = rule.get("details", {}).get("recommended_value", "same-site")
corp = self._get_header(headers, "Cross-Origin-Resource-Policy")
if not corp:
return [self._create_issue(
code="P-17",
severity="minor",
message="Cross-Origin-Resource-Policy(CORP) 헤더가 설정되지 않았습니다",
suggestion=f"CORP 헤더를 추가하세요: Cross-Origin-Resource-Policy: {recommended}",
)]
return []
def _check_info_disclosure(self, headers: dict) -> list[Issue]:
"""P-18: Check for information disclosure headers (Server, X-Powered-By)."""
issues = []
for rule in self._get_headers_to_remove():
header_name = rule.get("details", {}).get("header", "")
value = self._get_header(headers, header_name)
if value:
issues.append(self._create_issue(
code="P-18",
severity="info",
message=f"{header_name} 헤더가 서버 정보를 노출하고 있습니다: {value[:80]}",
suggestion=f"{header_name} 헤더를 제거하여 서버 기술 스택 노출을 방지하세요",
))
return issues
async def _check_ttfb(self, url: str, metrics: dict) -> list[Issue]:
"""P-10: Check Time To First Byte (TTFB)."""
"""P-10: Check Time To First Byte (TTFB) using YAML thresholds."""
thresholds = self._get_ttfb_thresholds()
good_ms = thresholds.get("good", 800)
needs_improvement_ms = thresholds.get("needs_improvement", 1800)
try:
start = time.monotonic()
async with httpx.AsyncClient(
@ -262,18 +367,18 @@ class PerformanceSecurityChecker(BaseChecker):
ttfb_ms = round((time.monotonic() - start) * 1000)
metrics["ttfb_ms"] = ttfb_ms
if ttfb_ms > 2000:
if ttfb_ms > needs_improvement_ms:
return [self._create_issue(
code="P-10",
severity="major",
message=f"응답 시간(TTFB)이 느립니다: {ttfb_ms}ms (권장 < 1000ms)",
message=f"응답 시간(TTFB)이 느립니다: {ttfb_ms}ms (권장 < {good_ms}ms)",
suggestion="서버 응답 속도를 개선하세요 (캐싱, CDN, 서버 최적화)",
)]
elif ttfb_ms > 1000:
elif ttfb_ms > good_ms:
return [self._create_issue(
code="P-10",
severity="minor",
message=f"응답 시간(TTFB)이 다소 느립니다: {ttfb_ms}ms (권장 < 1000ms)",
message=f"응답 시간(TTFB)이 다소 느립니다: {ttfb_ms}ms (권장 < {good_ms}ms)",
suggestion="서버 응답 속도 개선을 고려하세요",
)]
except Exception as e:
@ -288,15 +393,19 @@ class PerformanceSecurityChecker(BaseChecker):
return []
def _check_page_size(self, html_content: str, metrics: dict) -> list[Issue]:
"""P-11: Check HTML page size."""
"""P-11: Check HTML page size using YAML thresholds."""
thresholds = self._get_page_size_thresholds()
poor_kb = thresholds.get("poor", 5000)
poor_bytes = poor_kb * 1024
size_bytes = len(html_content.encode("utf-8"))
metrics["page_size_bytes"] = size_bytes
if size_bytes > 3 * 1024 * 1024: # 3MB
if size_bytes > poor_bytes:
return [self._create_issue(
code="P-11",
severity="minor",
message=f"페이지 크기가 큽니다: {round(size_bytes / 1024 / 1024, 1)}MB (권장 < 3MB)",
message=f"페이지 크기가 큽니다: {round(size_bytes / 1024 / 1024, 1)}MB (권장 < {poor_kb // 1024}MB)",
suggestion="페이지 크기를 줄이세요 (불필요한 코드 제거, 이미지 최적화, 코드 분할)",
)]
return []
@ -387,8 +496,9 @@ class PerformanceSecurityChecker(BaseChecker):
https_ssl_score = max(0, https_ssl_score)
# Security headers component (40% of security)
header_issues = [i for i in issues if i.code in ("P-03", "P-04", "P-05", "P-06", "P-07", "P-08", "P-09")]
total_header_checks = 7
header_codes = {"P-03", "P-04", "P-05", "P-06", "P-07", "P-08", "P-09", "P-15", "P-16", "P-17"}
header_issues = [i for i in issues if i.code in header_codes]
total_header_checks = len(header_codes)
passed_headers = total_header_checks - len(header_issues)
header_score = round(passed_headers / total_header_checks * 100) if total_header_checks else 100
@ -398,13 +508,16 @@ class PerformanceSecurityChecker(BaseChecker):
perf_score = 100
# TTFB component (40% of performance)
ttfb_thresholds = self._get_ttfb_thresholds()
ttfb_good = ttfb_thresholds.get("good", 800)
ttfb_ni = ttfb_thresholds.get("needs_improvement", 1800)
ttfb = metrics.get("ttfb_ms")
if ttfb is not None:
if ttfb <= 500:
if ttfb <= ttfb_good // 2:
ttfb_score = 100
elif ttfb <= 1000:
elif ttfb <= ttfb_good:
ttfb_score = 80
elif ttfb <= 2000:
elif ttfb <= ttfb_ni:
ttfb_score = 60
else:
ttfb_score = 30
@ -412,12 +525,16 @@ class PerformanceSecurityChecker(BaseChecker):
ttfb_score = 50
# Page size component (30% of performance)
size_thresholds = self._get_page_size_thresholds()
good_kb = size_thresholds.get("good", 1500)
ni_kb = size_thresholds.get("needs_improvement", 3000)
poor_kb = size_thresholds.get("poor", 5000)
page_size = metrics.get("page_size_bytes", 0)
if page_size <= 1024 * 1024: # 1MB
if page_size <= good_kb * 1024:
size_score = 100
elif page_size <= 2 * 1024 * 1024: # 2MB
elif page_size <= ni_kb * 1024:
size_score = 80
elif page_size <= 3 * 1024 * 1024: # 3MB
elif page_size <= poor_kb * 1024:
size_score = 60
else:
size_score = 30

View File

@ -7,13 +7,14 @@ import re
import json
import logging
from urllib.parse import urlparse, urljoin
from typing import Optional
from typing import Any, Optional
import httpx
from bs4 import BeautifulSoup
from app.engines.base import BaseChecker
from app.models.schemas import CategoryResult, Issue
from app.rules import get_rules
logger = logging.getLogger(__name__)
@ -21,6 +22,23 @@ logger = logging.getLogger(__name__)
class SeoChecker(BaseChecker):
"""SEO optimization checker engine."""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self._rules_data = get_rules("seo")
def _get_seo_rule(self, rule_id: str) -> dict[str, Any]:
"""Lookup a rule by id from YAML data."""
for rule in self._rules_data.get("rules", []):
if rule.get("id") == rule_id:
return rule
return {}
def _get_threshold(self, rule_id: str, key: str, default: Any = None) -> Any:
"""Get a specific threshold from a rule's details."""
rule = self._get_seo_rule(rule_id)
details = rule.get("details", {})
return details.get(key, default)
@property
def category_name(self) -> str:
return "seo"
@ -73,9 +91,11 @@ class SeoChecker(BaseChecker):
)
def _check_title(self, soup: BeautifulSoup, meta_info: dict) -> list[Issue]:
"""S-01: Check title tag existence and length (10-60 chars)."""
"""S-01: Check title tag existence and length."""
issues = []
title = soup.find("title")
min_len = self._get_threshold("seo-title-tag", "min_length", 10)
max_len = self._get_threshold("seo-title-tag", "max_length", 60)
if title is None or not title.string or title.string.strip() == "":
meta_info["title"] = None
@ -84,7 +104,7 @@ class SeoChecker(BaseChecker):
code="S-01",
severity="critical",
message="<title> 태그가 없거나 비어있습니다",
suggestion="검색 결과에 표시될 10-60자 길이의 페이지 제목을 설정하세요",
suggestion=f"검색 결과에 표시될 {min_len}-{max_len}자 길이의 페이지 제목을 설정하세요",
))
return issues
@ -93,28 +113,30 @@ class SeoChecker(BaseChecker):
meta_info["title"] = title_text
meta_info["title_length"] = title_len
if title_len < 10:
if title_len < min_len:
issues.append(self._create_issue(
code="S-01",
severity="critical",
message=f"title이 너무 짧습니다 ({title_len}자, 권장 10-60자)",
message=f"title이 너무 짧습니다 ({title_len}자, 권장 {min_len}-{max_len}자)",
element=f"<title>{title_text}</title>",
suggestion="검색 결과에 효과적으로 표시되도록 10자 이상의 제목을 작성하세요",
suggestion=f"검색 결과에 효과적으로 표시되도록 {min_len}자 이상의 제목을 작성하세요",
))
elif title_len > 60:
elif title_len > max_len:
issues.append(self._create_issue(
code="S-01",
severity="minor",
message=f"title이 너무 깁니다 ({title_len}자, 권장 10-60자)",
message=f"title이 너무 깁니다 ({title_len}자, 권장 {min_len}-{max_len}자)",
element=f"<title>{title_text[:50]}...</title>",
suggestion="검색 결과에서 잘리지 않도록 60자 이내로 제목을 줄이세요",
suggestion=f"검색 결과에서 잘리지 않도록 {max_len}자 이내로 제목을 줄이세요",
))
return issues
def _check_meta_description(self, soup: BeautifulSoup, meta_info: dict) -> list[Issue]:
"""S-02: Check meta description existence and length (50-160 chars)."""
"""S-02: Check meta description existence and length."""
issues = []
desc = soup.find("meta", attrs={"name": re.compile(r"^description$", re.I)})
min_len = self._get_threshold("seo-meta-description", "min_length", 50)
max_len = self._get_threshold("seo-meta-description", "max_length", 160)
if desc is None or not desc.get("content"):
meta_info["description"] = None
@ -123,7 +145,7 @@ class SeoChecker(BaseChecker):
code="S-02",
severity="major",
message="meta description이 없습니다",
suggestion='<meta name="description" content="페이지 설명">을 추가하세요 (50-160자 권장)',
suggestion=f'<meta name="description" content="페이지 설명">을 추가하세요 ({min_len}-{max_len}자 권장)',
))
return issues
@ -132,19 +154,19 @@ class SeoChecker(BaseChecker):
meta_info["description"] = content
meta_info["description_length"] = content_len
if content_len < 50:
if content_len < min_len:
issues.append(self._create_issue(
code="S-02",
severity="major",
message=f"meta description이 너무 짧습니다 ({content_len}자, 권장 50-160자)",
suggestion="검색 결과에서 페이지를 효과적으로 설명하도록 50자 이상으로 작성하세요",
message=f"meta description이 너무 짧습니다 ({content_len}자, 권장 {min_len}-{max_len}자)",
suggestion=f"검색 결과에서 페이지를 효과적으로 설명하도록 {min_len}자 이상으로 작성하세요",
))
elif content_len > 160:
elif content_len > max_len:
issues.append(self._create_issue(
code="S-02",
severity="minor",
message=f"meta description이 너무 깁니다 ({content_len}자, 권장 50-160자)",
suggestion="검색 결과에서 잘리지 않도록 160자 이내로 줄이세요",
message=f"meta description이 너무 깁니다 ({content_len}자, 권장 {min_len}-{max_len}자)",
suggestion=f"검색 결과에서 잘리지 않도록 {max_len}자 이내로 줄이세요",
))
return issues
@ -163,9 +185,13 @@ class SeoChecker(BaseChecker):
return []
def _check_og_tags(self, soup: BeautifulSoup) -> list[Issue]:
"""S-04: Check Open Graph tags (og:title, og:description, og:image)."""
"""S-04: Check Open Graph tags from YAML rule definitions."""
issues = []
required_og = ["og:title", "og:description", "og:image"]
rule = self._get_seo_rule("seo-open-graph")
required_tags = rule.get("details", {}).get("required_tags", [])
required_og = [t["property"] for t in required_tags] if required_tags else [
"og:title", "og:description", "og:image",
]
missing = []
for prop in required_og: