diff --git a/backend/app/engines/accessibility.py b/backend/app/engines/accessibility.py
index d20a7dc..2f32ca4 100644
--- a/backend/app/engines/accessibility.py
+++ b/backend/app/engines/accessibility.py
@@ -1,7 +1,8 @@
"""
-Accessibility (WCAG 2.1 AA) Checker Engine (F-003).
+Accessibility (WCAG) Checker Engine (F-003).
Uses Playwright + axe-core for comprehensive accessibility testing.
Falls back to BeautifulSoup-based checks if Playwright is unavailable.
+Supports WCAG version selection (2.0/2.1/2.2) via rules/accessibility.yaml.
"""
import json
@@ -14,6 +15,7 @@ from bs4 import BeautifulSoup
from app.engines.base import BaseChecker
from app.models.schemas import CategoryResult, Issue
+from app.rules import get_rules
logger = logging.getLogger(__name__)
@@ -24,21 +26,42 @@ AXE_CORE_JS_PATH = Path(__file__).parent / "axe_core" / "axe.min.js"
AXE_RULE_MESSAGES = {
"image-alt": ("A-01", "이미지에 대체 텍스트(alt)가 없습니다", "1.1.1"),
"color-contrast": ("A-02", "텍스트와 배경의 색상 대비가 부족합니다", "1.4.3"),
+ "color-contrast-enhanced": ("A-02", "텍스트와 배경의 색상 대비가 향상된 기준을 충족하지 않습니다", "1.4.6"),
"keyboard": ("A-03", "키보드로 접근할 수 없는 요소가 있습니다", "2.1.1"),
"focus-visible": ("A-04", "키보드 포커스가 시각적으로 표시되지 않습니다", "2.4.7"),
"label": ("A-05", "폼 요소에 레이블이 연결되지 않았습니다", "1.3.1"),
"input-label": ("A-05", "입력 요소에 레이블이 없습니다", "1.3.1"),
+ "input-button-name": ("A-05", "입력 버튼에 접근 가능한 이름이 없습니다", "4.1.2"),
+ "select-name": ("A-05", "select 요소에 접근 가능한 이름이 없습니다", "4.1.2"),
"aria-valid-attr": ("A-06", "유효하지 않은 ARIA 속성이 사용되었습니다", "4.1.2"),
"aria-roles": ("A-06", "유효하지 않은 ARIA 역할이 사용되었습니다", "4.1.2"),
"aria-required-attr": ("A-06", "필수 ARIA 속성이 누락되었습니다", "4.1.2"),
"aria-valid-attr-value": ("A-06", "ARIA 속성 값이 올바르지 않습니다", "4.1.2"),
+ "aria-allowed-attr": ("A-06", "허용되지 않는 ARIA 속성이 사용되었습니다", "4.1.2"),
+ "aria-allowed-role": ("A-06", "허용되지 않는 ARIA 역할이 사용되었습니다", "4.1.2"),
+ "aria-hidden-body": ("A-06", "body 요소에 aria-hidden이 설정되어 있습니다", "4.1.2"),
+ "aria-hidden-focus": ("A-06", "aria-hidden 요소 내에 포커스 가능한 요소가 있습니다", "4.1.2"),
"link-name": ("A-07", "링크 텍스트가 목적을 설명하지 않습니다", "2.4.4"),
"html-has-lang": ("A-08", "HTML 요소에 lang 속성이 없습니다", "3.1.1"),
"html-lang-valid": ("A-08", "HTML lang 속성 값이 올바르지 않습니다", "3.1.1"),
+ "valid-lang": ("A-08", "lang 속성 값이 올바르지 않습니다", "3.1.2"),
"bypass": ("A-09", "건너뛰기 링크(skip navigation)가 없습니다", "2.4.1"),
+ "region": ("A-09", "랜드마크 영역 밖에 콘텐츠가 있습니다", "2.4.1"),
"no-autoplay-audio": ("A-10", "자동 재생 미디어에 정지/음소거 컨트롤이 없습니다", "1.4.2"),
"audio-caption": ("A-10", "오디오/비디오에 자막이 없습니다", "1.2.2"),
"video-caption": ("A-10", "비디오에 자막이 없습니다", "1.2.2"),
+ "document-title": ("A-11", "페이지에 제목(title)이 없습니다", "2.4.2"),
+ "empty-heading": ("A-12", "빈 heading 요소가 있습니다", "2.4.6"),
+ "frame-title": ("A-13", "iframe에 접근 가능한 제목이 없습니다", "4.1.2"),
+ "button-name": ("A-14", "버튼에 접근 가능한 이름이 없습니다", "4.1.2"),
+ "meta-refresh": ("A-15", "meta refresh로 시간 제한이 설정되어 있습니다", "2.2.1"),
+ "meta-viewport-large": ("A-16", "사용자의 확대/축소가 제한되어 있습니다", "1.4.4"),
+ "autocomplete-valid": ("A-17", "autocomplete 속성이 올바르지 않습니다", "1.3.5"),
+ "target-size": ("A-18", "터치 대상 크기가 최소 기준을 충족하지 않습니다", "2.5.8"),
+ "scrollable-region-focusable": ("A-19", "스크롤 가능한 영역에 키보드 접근이 불가합니다", "2.1.1"),
+ "tabindex": ("A-20", "tabindex 값이 0보다 크게 설정되어 있습니다", "2.4.3"),
+ "blink": ("A-21", "깜빡이는 콘텐츠가 있습니다", "2.2.2"),
+ "marquee": ("A-21", "자동 스크롤 콘텐츠(marquee)가 있습니다", "2.2.2"),
}
# axe-core impact to severity mapping
@@ -49,9 +72,56 @@ IMPACT_TO_SEVERITY = {
"minor": "info",
}
+# WCAG version to axe-core tags mapping (loaded from YAML at runtime)
+WCAG_VERSION_PRESETS = {
+ "2.0_A": ["wcag2a"],
+ "2.0_AA": ["wcag2a", "wcag2aa"],
+ "2.1_AA": ["wcag2a", "wcag2aa", "wcag21a", "wcag21aa"],
+ "2.2_AA": ["wcag2a", "wcag2aa", "wcag21a", "wcag21aa", "wcag22aa"],
+ "2.2_full": ["wcag2a", "wcag2aa", "wcag2aaa", "wcag21a", "wcag21aa", "wcag22aa"],
+}
+
+
+def _get_axe_tags_for_version(wcag_version: str = "2.1_AA") -> list[str]:
+ """Get axe-core tags for a given WCAG version preset."""
+ rules = get_rules("accessibility")
+ presets = rules.get("compliance_presets", {})
+
+ # Map user-friendly names to YAML preset keys
+ version_map = {
+ "2.0_A": "wcag_20_a",
+ "2.0_AA": "wcag_20_aa",
+ "2.1_AA": "wcag_21_aa",
+ "2.2_AA": "wcag_22_aa",
+ "2.2_full": "wcag_22_full",
+ }
+
+ yaml_key = version_map.get(wcag_version)
+ if yaml_key and yaml_key in presets:
+ return presets[yaml_key].get("tags", WCAG_VERSION_PRESETS.get(wcag_version, ["wcag2a", "wcag2aa"]))
+
+ return WCAG_VERSION_PRESETS.get(wcag_version, ["wcag2a", "wcag2aa", "wcag21a", "wcag21aa"])
+
+
+def _get_wcag_level_label(wcag_version: str) -> str:
+ """Get human-readable WCAG level label."""
+ labels = {
+ "2.0_A": "WCAG 2.0 Level A",
+ "2.0_AA": "WCAG 2.0 Level AA",
+ "2.1_AA": "WCAG 2.1 Level AA",
+ "2.2_AA": "WCAG 2.2 Level AA",
+ "2.2_full": "WCAG 2.2 All Levels",
+ }
+ return labels.get(wcag_version, "WCAG 2.1 Level AA")
+
class AccessibilityChecker(BaseChecker):
- """Accessibility (WCAG 2.1 AA) checker engine."""
+ """Accessibility (WCAG) checker engine with version selection."""
+
+ def __init__(self, progress_callback=None, wcag_version: str = "2.1_AA"):
+ super().__init__(progress_callback)
+ self.wcag_version = wcag_version
+ self.axe_tags = _get_axe_tags_for_version(wcag_version)
@property
def category_name(self) -> str:
@@ -86,12 +156,10 @@ class AccessibilityChecker(BaseChecker):
await page.goto(url, wait_until="networkidle", timeout=30000)
await self.update_progress(40, "axe-core 주입 중...")
- # Load axe-core JS
if AXE_CORE_JS_PATH.exists() and AXE_CORE_JS_PATH.stat().st_size > 1000:
axe_js = AXE_CORE_JS_PATH.read_text(encoding="utf-8")
await page.evaluate(axe_js)
else:
- # Fallback: load from CDN
await page.evaluate("""
async () => {
const script = document.createElement('script');
@@ -104,22 +172,27 @@ class AccessibilityChecker(BaseChecker):
}
""")
- await self.update_progress(60, "접근성 검사 실행 중...")
- axe_results = await page.evaluate("""
- () => {
- return new Promise((resolve, reject) => {
- if (typeof axe === 'undefined') {
+ await self.update_progress(60, f"접근성 검사 실행 중 ({_get_wcag_level_label(self.wcag_version)})...")
+
+ # Build axe-core tag list including best-practice
+ axe_tag_list = self.axe_tags + ["best-practice"]
+ tags_js = json.dumps(axe_tag_list)
+
+ axe_results = await page.evaluate(f"""
+ () => {{
+ return new Promise((resolve, reject) => {{
+ if (typeof axe === 'undefined') {{
reject(new Error('axe-core not loaded'));
return;
- }
- axe.run(document, {
- runOnly: {
+ }}
+ axe.run(document, {{
+ runOnly: {{
type: 'tag',
- values: ['wcag2a', 'wcag2aa', 'best-practice']
- }
- }).then(resolve).catch(reject);
- });
- }
+ values: {tags_js}
+ }}
+ }}).then(resolve).catch(reject);
+ }});
+ }}
""")
await self.update_progress(80, "결과 분석 중...")
@@ -134,7 +207,7 @@ class AccessibilityChecker(BaseChecker):
category="accessibility",
score=score,
issues=issues,
- wcag_level="AA",
+ wcag_level=_get_wcag_level_label(self.wcag_version),
)
async def _check_with_beautifulsoup(self, url: str, html_content: str) -> CategoryResult:
@@ -170,7 +243,7 @@ class AccessibilityChecker(BaseChecker):
category="accessibility",
score=score,
issues=issues,
- wcag_level="AA",
+ wcag_level=_get_wcag_level_label(self.wcag_version),
)
def _parse_axe_results(self, axe_results: dict) -> list[Issue]:
@@ -182,15 +255,15 @@ class AccessibilityChecker(BaseChecker):
impact = violation.get("impact", "minor")
severity = IMPACT_TO_SEVERITY.get(impact, "info")
- # Map to our issue codes
if rule_id in AXE_RULE_MESSAGES:
code, korean_msg, wcag = AXE_RULE_MESSAGES[rule_id]
else:
- code = "A-06"
+ code = "A-99"
korean_msg = violation.get("description", "접근성 위반 사항이 발견되었습니다")
- wcag = "4.1.2"
+ # Try to extract WCAG criterion from tags
+ tags = violation.get("tags", [])
+ wcag = self._extract_wcag_from_tags(tags)
- # Get affected elements
nodes = violation.get("nodes", [])
element = None
if nodes:
@@ -211,7 +284,6 @@ class AccessibilityChecker(BaseChecker):
if ratio:
detail = f" (대비율: {ratio}:1, 최소 4.5:1 필요)"
- # Create the issue with node count info
node_count = len(nodes)
count_info = f" ({node_count}개 요소)" if node_count > 1 else ""
@@ -226,11 +298,19 @@ class AccessibilityChecker(BaseChecker):
return issues
+ @staticmethod
+ def _extract_wcag_from_tags(tags: list[str]) -> str:
+ """Extract WCAG criterion number from axe-core tags (e.g., 'wcag111' -> '1.1.1')."""
+ for tag in tags:
+ if tag.startswith("wcag") and not tag.startswith("wcag2"):
+ # e.g., "wcag111" -> "1.1.1"
+ digits = tag[4:]
+ if len(digits) >= 3:
+ return f"{digits[0]}.{digits[1]}.{digits[2:]}"
+ return "4.1.2"
+
def _calculate_axe_score(self, axe_results: dict) -> int:
- """
- Calculate score based on axe-core violations.
- critical=-20, serious=-10, moderate=-5, minor=-2
- """
+ """Calculate score based on axe-core violations."""
severity_weights = {
"critical": 20,
"serious": 10,
@@ -284,7 +364,6 @@ class AccessibilityChecker(BaseChecker):
if inp.get("aria-label") or inp.get("aria-labelledby") or inp.get("title"):
has_label = True
- # Check if wrapped in label
parent_label = inp.find_parent("label")
if parent_label:
has_label = True
@@ -377,10 +456,9 @@ class AccessibilityChecker(BaseChecker):
def _bs_check_skip_nav(self, soup: BeautifulSoup) -> list[Issue]:
"""A-09: Check for skip navigation link."""
- # Look for skip nav patterns
skip_links = soup.find_all("a", href=True)
has_skip = False
- for link in skip_links[:10]: # Check first 10 links
+ for link in skip_links[:10]:
href = link.get("href", "")
text = link.get_text(strip=True).lower()
if href.startswith("#") and any(
@@ -417,6 +495,6 @@ class AccessibilityChecker(BaseChecker):
suggestion="autoplay 미디어에 controls 속성을 추가하거나 muted 속성을 사용하세요",
wcag_criterion="1.4.2",
))
- break # Report only first
+ break
return issues
diff --git a/backend/app/engines/html_css.py b/backend/app/engines/html_css.py
index 110161c..754ef67 100644
--- a/backend/app/engines/html_css.py
+++ b/backend/app/engines/html_css.py
@@ -2,6 +2,7 @@
HTML/CSS Standards Checker Engine (F-002).
Checks HTML5 validity, semantic tags, CSS inline usage, etc.
Uses BeautifulSoup4 + html5lib for parsing.
+Rules loaded from rules/html_css.yaml.
"""
import re
@@ -13,15 +14,28 @@ from bs4 import BeautifulSoup
from app.engines.base import BaseChecker
from app.models.schemas import CategoryResult, Issue
+from app.rules import get_rules
logger = logging.getLogger(__name__)
-DEPRECATED_TAGS = [
- "font", "center", "marquee", "blink", "strike", "big", "tt",
- "basefont", "applet", "dir", "isindex",
-]
-SEMANTIC_TAGS = ["header", "nav", "main", "footer", "section", "article"]
+def _load_obsolete_elements() -> list[dict]:
+ """Load obsolete elements from YAML."""
+ rules = get_rules("html_css")
+ return rules.get("obsolete_elements", [])
+
+
+def _load_obsolete_attributes() -> dict[str, list[dict]]:
+ """Load obsolete attributes from YAML, keyed by element name."""
+ rules = get_rules("html_css")
+ return rules.get("obsolete_attributes", {})
+
+
+def _load_semantic_tags() -> list[str]:
+ """Load structural semantic tag names from YAML."""
+ rules = get_rules("html_css")
+ structural = rules.get("semantic_elements", {}).get("structural", [])
+ return [item["tag"] for item in structural]
class HtmlCssChecker(BaseChecker):
@@ -50,16 +64,21 @@ class HtmlCssChecker(BaseChecker):
await self.update_progress(50, "시맨틱 태그 검사 중...")
issues += self._check_semantic_tags(soup)
- await self.update_progress(60, "이미지 alt 속성 검사 중...")
+ await self.update_progress(55, "이미지 alt 속성 검사 중...")
issues += self._check_img_alt(soup)
- await self.update_progress(70, "중복 ID 검사 중...")
+ await self.update_progress(60, "중복 ID 검사 중...")
issues += self._check_duplicate_ids(soup)
- await self.update_progress(80, "링크 및 스타일 검사 중...")
+ await self.update_progress(65, "링크 및 스타일 검사 중...")
issues += self._check_empty_links(soup)
issues += self._check_inline_styles(soup)
- issues += self._check_deprecated_tags(soup)
+
+ await self.update_progress(75, "Obsolete 태그 검사 중...")
+ issues += self._check_obsolete_tags(soup)
+
+ await self.update_progress(80, "Obsolete 속성 검사 중...")
+ issues += self._check_obsolete_attributes(soup)
await self.update_progress(90, "heading 구조 검사 중...")
issues += self._check_heading_hierarchy(soup)
@@ -134,21 +153,22 @@ class HtmlCssChecker(BaseChecker):
def _check_semantic_tags(self, soup: BeautifulSoup) -> list[Issue]:
"""H-05: Check for semantic HTML5 tag usage."""
+ semantic_tags = _load_semantic_tags()
found_tags = set()
- for tag_name in SEMANTIC_TAGS:
+ for tag_name in semantic_tags:
if soup.find(tag_name):
found_tags.add(tag_name)
if not found_tags:
+ tag_list = ", ".join(semantic_tags)
return [self._create_issue(
code="H-05",
severity="minor",
- message="시맨틱 태그가 사용되지 않았습니다 (header, nav, main, footer, section, article)",
+ message=f"시맨틱 태그가 사용되지 않았습니다 ({tag_list})",
suggestion="적절한 시맨틱 태그를 사용하여 문서 구조를 명확히 하세요",
)]
- missing = set(SEMANTIC_TAGS) - found_tags
- # Only report if major structural elements are missing (main is most important)
+ missing = set(semantic_tags) - found_tags
if "main" in missing:
return [self._create_issue(
code="H-05",
@@ -240,23 +260,105 @@ class HtmlCssChecker(BaseChecker):
))
return issues
- def _check_deprecated_tags(self, soup: BeautifulSoup) -> list[Issue]:
- """H-10: Check for deprecated HTML tags."""
+ def _check_obsolete_tags(self, soup: BeautifulSoup) -> list[Issue]:
+ """H-10: Check for obsolete HTML tags (loaded from YAML)."""
issues = []
- for tag_name in DEPRECATED_TAGS:
+ obsolete = _load_obsolete_elements()
+
+ for entry in obsolete:
+ tag_name = entry["tag"]
found = soup.find_all(tag_name)
if found:
+ replacement = entry.get("replacement", "CSS")
+ severity = entry.get("severity", "major")
first_el = found[0]
issues.append(self._create_issue(
code="H-10",
- severity="major",
- message=f"사용 중단된(deprecated) 태그 <{tag_name}>이(가) {len(found)}회 사용되었습니다",
+ severity=severity,
+ message=f"사용 중단된(obsolete) 태그 <{tag_name}>이(가) {len(found)}회 사용되었습니다",
element=self._truncate_element(str(first_el)),
line=self._get_line_number(first_el),
- suggestion=f"<{tag_name}> 대신 CSS를 사용하여 스타일을 적용하세요",
+ suggestion=f"<{tag_name}> 대신 {replacement}을(를) 사용하세요",
))
return issues
+ def _check_obsolete_attributes(self, soup: BeautifulSoup) -> list[Issue]:
+ """H-13: Check for obsolete HTML attributes (loaded from YAML)."""
+ issues = []
+ obsolete_attrs = _load_obsolete_attributes()
+
+ # Check element-specific obsolete attributes
+ element_checks = {
+ "a": "a", "body": "body", "br": "br", "form": "form",
+ "hr": "hr", "html": "html", "iframe": "iframe", "img": "img",
+ "input": "input", "link": "link", "meta": "meta", "script": "script",
+ "style": "style", "table": "table", "embed": "embed",
+ }
+ # Multi-element groups
+ multi_checks = {
+ "td_th": ["td", "th"],
+ "tr": ["tr"],
+ "thead_tbody_tfoot": ["thead", "tbody", "tfoot"],
+ "ol_ul": ["ol", "ul"],
+ "heading": ["h1", "h2", "h3", "h4", "h5", "h6"],
+ "embed": ["embed"],
+ }
+
+ found_count = 0
+ first_element = None
+ first_line = None
+ first_attr = None
+
+ # Single-element checks
+ for yaml_key, html_tag in element_checks.items():
+ attr_list = obsolete_attrs.get(yaml_key, [])
+ for attr_entry in attr_list:
+ attr_name = attr_entry["attr"]
+ elements = soup.find_all(html_tag, attrs={attr_name: True})
+ if elements:
+ found_count += len(elements)
+ if first_element is None:
+ first_element = self._truncate_element(str(elements[0]))
+ first_line = self._get_line_number(elements[0])
+ first_attr = f'{html_tag}[{attr_name}]'
+
+ # Multi-element group checks
+ for yaml_key, html_tags in multi_checks.items():
+ attr_list = obsolete_attrs.get(yaml_key, [])
+ for attr_entry in attr_list:
+ attr_name = attr_entry["attr"]
+ for html_tag in html_tags:
+ elements = soup.find_all(html_tag, attrs={attr_name: True})
+ if elements:
+ found_count += len(elements)
+ if first_element is None:
+ first_element = self._truncate_element(str(elements[0]))
+ first_line = self._get_line_number(elements[0])
+ first_attr = f'{html_tag}[{attr_name}]'
+
+ # Global obsolete attributes
+ global_attrs = obsolete_attrs.get("global", [])
+ for attr_entry in global_attrs:
+ attr_name = attr_entry["attr"]
+ elements = soup.find_all(attrs={attr_name: True})
+ if elements:
+ found_count += len(elements)
+ if first_element is None:
+ first_element = self._truncate_element(str(elements[0]))
+ first_line = self._get_line_number(elements[0])
+ first_attr = attr_name
+
+ if found_count > 0:
+ issues.append(self._create_issue(
+ code="H-13",
+ severity="minor",
+ message=f"사용 중단된(obsolete) HTML 속성이 {found_count}개 발견되었습니다 (예: {first_attr})",
+ element=first_element,
+ line=first_line,
+ suggestion="사용 중단된 HTML 속성 대신 CSS를 사용하세요 (W3C HTML Living Standard 참조)",
+ ))
+ return issues
+
def _check_heading_hierarchy(self, soup: BeautifulSoup) -> list[Issue]:
"""H-11: Check heading hierarchy (h1-h6 should not skip levels)."""
issues = []
@@ -277,7 +379,7 @@ class HtmlCssChecker(BaseChecker):
line=self._get_line_number(heading),
suggestion=f"h{prev_level} 다음에는 h{prev_level + 1}을 사용하세요",
))
- break # Only report first skip
+ break
prev_level = level
return issues
@@ -295,14 +397,12 @@ class HtmlCssChecker(BaseChecker):
@staticmethod
def _get_line_number(element) -> Optional[int]:
- """Extract source line number from a BeautifulSoup element."""
if element and hasattr(element, "sourceline"):
return element.sourceline
return None
@staticmethod
def _truncate_element(element_str: str, max_len: int = 200) -> str:
- """Truncate element string for display."""
if len(element_str) > max_len:
return element_str[:max_len] + "..."
return element_str
diff --git a/backend/app/engines/performance_security.py b/backend/app/engines/performance_security.py
index fdfa10d..db02ee3 100644
--- a/backend/app/engines/performance_security.py
+++ b/backend/app/engines/performance_security.py
@@ -10,13 +10,14 @@ import logging
import time
from datetime import datetime, timezone
from urllib.parse import urlparse
-from typing import Optional
+from typing import Any, Optional
import httpx
from bs4 import BeautifulSoup
from app.engines.base import BaseChecker
from app.models.schemas import CategoryResult, Issue, calculate_grade
+from app.rules import get_rules
logger = logging.getLogger(__name__)
@@ -24,6 +25,32 @@ logger = logging.getLogger(__name__)
class PerformanceSecurityChecker(BaseChecker):
"""Performance and security checker engine."""
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self._rules_data = get_rules("performance_security")
+
+ def _get_security_headers(self) -> list[dict[str, Any]]:
+ """Load required security headers from YAML."""
+ return self._rules_data.get("security", {}).get("headers", [])
+
+ def _get_headers_to_remove(self) -> list[dict[str, Any]]:
+ """Load information disclosure headers from YAML."""
+ return self._rules_data.get("security", {}).get("headers_to_remove", [])
+
+ def _get_ttfb_thresholds(self) -> dict[str, int]:
+ """Load TTFB thresholds from YAML."""
+ for metric in self._rules_data.get("performance", {}).get("additional_metrics", []):
+ if metric.get("id") == "perf-ttfb":
+ return metric.get("thresholds", {})
+ return {"good": 800, "needs_improvement": 1800}
+
+ def _get_page_size_thresholds(self) -> dict[str, int]:
+ """Load total page size thresholds from YAML."""
+ for check in self._rules_data.get("performance", {}).get("resource_checks", []):
+ if check.get("id") == "perf-total-page-size":
+ return check.get("thresholds", {})
+ return {"good": 1500, "needs_improvement": 3000, "poor": 5000}
+
@property
def category_name(self) -> str:
return "performance_security"
@@ -46,6 +73,12 @@ class PerformanceSecurityChecker(BaseChecker):
issues += self._check_x_xss_protection(headers)
issues += self._check_referrer_policy(headers)
issues += self._check_permissions_policy(headers)
+ issues += self._check_coop(headers)
+ issues += self._check_coep(headers)
+ issues += self._check_corp(headers)
+
+ await self.update_progress(50, "정보 노출 헤더 검사 중...")
+ issues += self._check_info_disclosure(headers)
await self.update_progress(60, "응답 시간 측정 중...")
issues += await self._check_ttfb(url, metrics)
@@ -163,15 +196,24 @@ class PerformanceSecurityChecker(BaseChecker):
return []
+ def _get_security_header_rule(self, rule_id: str) -> dict[str, Any]:
+ """Find a specific security header rule from YAML."""
+ for h in self._get_security_headers():
+ if h.get("id") == rule_id:
+ return h
+ return {}
+
def _check_hsts(self, headers: dict) -> list[Issue]:
"""P-03: Check Strict-Transport-Security header."""
+ rule = self._get_security_header_rule("sec-strict-transport-security")
+ recommended = rule.get("details", {}).get("recommended_value", "max-age=31536000; includeSubDomains")
hsts = self._get_header(headers, "Strict-Transport-Security")
if not hsts:
return [self._create_issue(
code="P-03",
severity="major",
message="Strict-Transport-Security(HSTS) 헤더가 설정되지 않았습니다",
- suggestion="HSTS 헤더를 추가하세요: Strict-Transport-Security: max-age=31536000; includeSubDomains",
+ suggestion=f"HSTS 헤더를 추가하세요: Strict-Transport-Security: {recommended}",
)]
return []
@@ -225,13 +267,15 @@ class PerformanceSecurityChecker(BaseChecker):
def _check_referrer_policy(self, headers: dict) -> list[Issue]:
"""P-08: Check Referrer-Policy header."""
+ rule = self._get_security_header_rule("sec-referrer-policy")
+ recommended = rule.get("details", {}).get("recommended_value", "strict-origin-when-cross-origin")
rp = self._get_header(headers, "Referrer-Policy")
if not rp:
return [self._create_issue(
code="P-08",
severity="minor",
message="Referrer-Policy 헤더가 설정되지 않았습니다",
- suggestion="Referrer-Policy: strict-origin-when-cross-origin을 설정하세요",
+ suggestion=f"Referrer-Policy: {recommended}을 설정하세요",
)]
return []
@@ -247,8 +291,69 @@ class PerformanceSecurityChecker(BaseChecker):
)]
return []
+ def _check_coop(self, headers: dict) -> list[Issue]:
+ """P-15: Check Cross-Origin-Opener-Policy header."""
+ rule = self._get_security_header_rule("sec-cross-origin-opener-policy")
+ recommended = rule.get("details", {}).get("recommended_value", "same-origin")
+ coop = self._get_header(headers, "Cross-Origin-Opener-Policy")
+ if not coop:
+ return [self._create_issue(
+ code="P-15",
+ severity="minor",
+ message="Cross-Origin-Opener-Policy(COOP) 헤더가 설정되지 않았습니다",
+ suggestion=f"COOP 헤더를 추가하세요: Cross-Origin-Opener-Policy: {recommended}",
+ )]
+ return []
+
+ def _check_coep(self, headers: dict) -> list[Issue]:
+ """P-16: Check Cross-Origin-Embedder-Policy header."""
+ rule = self._get_security_header_rule("sec-cross-origin-embedder-policy")
+ recommended = rule.get("details", {}).get("recommended_value", "require-corp")
+ coep = self._get_header(headers, "Cross-Origin-Embedder-Policy")
+ if not coep:
+ return [self._create_issue(
+ code="P-16",
+ severity="minor",
+ message="Cross-Origin-Embedder-Policy(COEP) 헤더가 설정되지 않았습니다",
+ suggestion=f"COEP 헤더를 추가하세요: Cross-Origin-Embedder-Policy: {recommended}",
+ )]
+ return []
+
+ def _check_corp(self, headers: dict) -> list[Issue]:
+ """P-17: Check Cross-Origin-Resource-Policy header."""
+ rule = self._get_security_header_rule("sec-cross-origin-resource-policy")
+ recommended = rule.get("details", {}).get("recommended_value", "same-site")
+ corp = self._get_header(headers, "Cross-Origin-Resource-Policy")
+ if not corp:
+ return [self._create_issue(
+ code="P-17",
+ severity="minor",
+ message="Cross-Origin-Resource-Policy(CORP) 헤더가 설정되지 않았습니다",
+ suggestion=f"CORP 헤더를 추가하세요: Cross-Origin-Resource-Policy: {recommended}",
+ )]
+ return []
+
+ def _check_info_disclosure(self, headers: dict) -> list[Issue]:
+ """P-18: Check for information disclosure headers (Server, X-Powered-By)."""
+ issues = []
+ for rule in self._get_headers_to_remove():
+ header_name = rule.get("details", {}).get("header", "")
+ value = self._get_header(headers, header_name)
+ if value:
+ issues.append(self._create_issue(
+ code="P-18",
+ severity="info",
+ message=f"{header_name} 헤더가 서버 정보를 노출하고 있습니다: {value[:80]}",
+ suggestion=f"{header_name} 헤더를 제거하여 서버 기술 스택 노출을 방지하세요",
+ ))
+ return issues
+
async def _check_ttfb(self, url: str, metrics: dict) -> list[Issue]:
- """P-10: Check Time To First Byte (TTFB)."""
+ """P-10: Check Time To First Byte (TTFB) using YAML thresholds."""
+ thresholds = self._get_ttfb_thresholds()
+ good_ms = thresholds.get("good", 800)
+ needs_improvement_ms = thresholds.get("needs_improvement", 1800)
+
try:
start = time.monotonic()
async with httpx.AsyncClient(
@@ -262,18 +367,18 @@ class PerformanceSecurityChecker(BaseChecker):
ttfb_ms = round((time.monotonic() - start) * 1000)
metrics["ttfb_ms"] = ttfb_ms
- if ttfb_ms > 2000:
+ if ttfb_ms > needs_improvement_ms:
return [self._create_issue(
code="P-10",
severity="major",
- message=f"응답 시간(TTFB)이 느립니다: {ttfb_ms}ms (권장 < 1000ms)",
+ message=f"응답 시간(TTFB)이 느립니다: {ttfb_ms}ms (권장 < {good_ms}ms)",
suggestion="서버 응답 속도를 개선하세요 (캐싱, CDN, 서버 최적화)",
)]
- elif ttfb_ms > 1000:
+ elif ttfb_ms > good_ms:
return [self._create_issue(
code="P-10",
severity="minor",
- message=f"응답 시간(TTFB)이 다소 느립니다: {ttfb_ms}ms (권장 < 1000ms)",
+ message=f"응답 시간(TTFB)이 다소 느립니다: {ttfb_ms}ms (권장 < {good_ms}ms)",
suggestion="서버 응답 속도 개선을 고려하세요",
)]
except Exception as e:
@@ -288,15 +393,19 @@ class PerformanceSecurityChecker(BaseChecker):
return []
def _check_page_size(self, html_content: str, metrics: dict) -> list[Issue]:
- """P-11: Check HTML page size."""
+ """P-11: Check HTML page size using YAML thresholds."""
+ thresholds = self._get_page_size_thresholds()
+ poor_kb = thresholds.get("poor", 5000)
+ poor_bytes = poor_kb * 1024
+
size_bytes = len(html_content.encode("utf-8"))
metrics["page_size_bytes"] = size_bytes
- if size_bytes > 3 * 1024 * 1024: # 3MB
+ if size_bytes > poor_bytes:
return [self._create_issue(
code="P-11",
severity="minor",
- message=f"페이지 크기가 큽니다: {round(size_bytes / 1024 / 1024, 1)}MB (권장 < 3MB)",
+ message=f"페이지 크기가 큽니다: {round(size_bytes / 1024 / 1024, 1)}MB (권장 < {poor_kb // 1024}MB)",
suggestion="페이지 크기를 줄이세요 (불필요한 코드 제거, 이미지 최적화, 코드 분할)",
)]
return []
@@ -387,8 +496,9 @@ class PerformanceSecurityChecker(BaseChecker):
https_ssl_score = max(0, https_ssl_score)
# Security headers component (40% of security)
- header_issues = [i for i in issues if i.code in ("P-03", "P-04", "P-05", "P-06", "P-07", "P-08", "P-09")]
- total_header_checks = 7
+ header_codes = {"P-03", "P-04", "P-05", "P-06", "P-07", "P-08", "P-09", "P-15", "P-16", "P-17"}
+ header_issues = [i for i in issues if i.code in header_codes]
+ total_header_checks = len(header_codes)
passed_headers = total_header_checks - len(header_issues)
header_score = round(passed_headers / total_header_checks * 100) if total_header_checks else 100
@@ -398,13 +508,16 @@ class PerformanceSecurityChecker(BaseChecker):
perf_score = 100
# TTFB component (40% of performance)
+ ttfb_thresholds = self._get_ttfb_thresholds()
+ ttfb_good = ttfb_thresholds.get("good", 800)
+ ttfb_ni = ttfb_thresholds.get("needs_improvement", 1800)
ttfb = metrics.get("ttfb_ms")
if ttfb is not None:
- if ttfb <= 500:
+ if ttfb <= ttfb_good // 2:
ttfb_score = 100
- elif ttfb <= 1000:
+ elif ttfb <= ttfb_good:
ttfb_score = 80
- elif ttfb <= 2000:
+ elif ttfb <= ttfb_ni:
ttfb_score = 60
else:
ttfb_score = 30
@@ -412,12 +525,16 @@ class PerformanceSecurityChecker(BaseChecker):
ttfb_score = 50
# Page size component (30% of performance)
+ size_thresholds = self._get_page_size_thresholds()
+ good_kb = size_thresholds.get("good", 1500)
+ ni_kb = size_thresholds.get("needs_improvement", 3000)
+ poor_kb = size_thresholds.get("poor", 5000)
page_size = metrics.get("page_size_bytes", 0)
- if page_size <= 1024 * 1024: # 1MB
+ if page_size <= good_kb * 1024:
size_score = 100
- elif page_size <= 2 * 1024 * 1024: # 2MB
+ elif page_size <= ni_kb * 1024:
size_score = 80
- elif page_size <= 3 * 1024 * 1024: # 3MB
+ elif page_size <= poor_kb * 1024:
size_score = 60
else:
size_score = 30
diff --git a/backend/app/engines/seo.py b/backend/app/engines/seo.py
index 64bd350..46c87c0 100644
--- a/backend/app/engines/seo.py
+++ b/backend/app/engines/seo.py
@@ -7,13 +7,14 @@ import re
import json
import logging
from urllib.parse import urlparse, urljoin
-from typing import Optional
+from typing import Any, Optional
import httpx
from bs4 import BeautifulSoup
from app.engines.base import BaseChecker
from app.models.schemas import CategoryResult, Issue
+from app.rules import get_rules
logger = logging.getLogger(__name__)
@@ -21,6 +22,23 @@ logger = logging.getLogger(__name__)
class SeoChecker(BaseChecker):
"""SEO optimization checker engine."""
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self._rules_data = get_rules("seo")
+
+ def _get_seo_rule(self, rule_id: str) -> dict[str, Any]:
+ """Lookup a rule by id from YAML data."""
+ for rule in self._rules_data.get("rules", []):
+ if rule.get("id") == rule_id:
+ return rule
+ return {}
+
+ def _get_threshold(self, rule_id: str, key: str, default: Any = None) -> Any:
+ """Get a specific threshold from a rule's details."""
+ rule = self._get_seo_rule(rule_id)
+ details = rule.get("details", {})
+ return details.get(key, default)
+
@property
def category_name(self) -> str:
return "seo"
@@ -73,9 +91,11 @@ class SeoChecker(BaseChecker):
)
def _check_title(self, soup: BeautifulSoup, meta_info: dict) -> list[Issue]:
- """S-01: Check title tag existence and length (10-60 chars)."""
+ """S-01: Check title tag existence and length."""
issues = []
title = soup.find("title")
+ min_len = self._get_threshold("seo-title-tag", "min_length", 10)
+ max_len = self._get_threshold("seo-title-tag", "max_length", 60)
if title is None or not title.string or title.string.strip() == "":
meta_info["title"] = None
@@ -84,7 +104,7 @@ class SeoChecker(BaseChecker):
code="S-01",
severity="critical",
message="
태그가 없거나 비어있습니다",
- suggestion="검색 결과에 표시될 10-60자 길이의 페이지 제목을 설정하세요",
+ suggestion=f"검색 결과에 표시될 {min_len}-{max_len}자 길이의 페이지 제목을 설정하세요",
))
return issues
@@ -93,28 +113,30 @@ class SeoChecker(BaseChecker):
meta_info["title"] = title_text
meta_info["title_length"] = title_len
- if title_len < 10:
+ if title_len < min_len:
issues.append(self._create_issue(
code="S-01",
severity="critical",
- message=f"title이 너무 짧습니다 ({title_len}자, 권장 10-60자)",
+ message=f"title이 너무 짧습니다 ({title_len}자, 권장 {min_len}-{max_len}자)",
element=f"{title_text}",
- suggestion="검색 결과에 효과적으로 표시되도록 10자 이상의 제목을 작성하세요",
+ suggestion=f"검색 결과에 효과적으로 표시되도록 {min_len}자 이상의 제목을 작성하세요",
))
- elif title_len > 60:
+ elif title_len > max_len:
issues.append(self._create_issue(
code="S-01",
severity="minor",
- message=f"title이 너무 깁니다 ({title_len}자, 권장 10-60자)",
+ message=f"title이 너무 깁니다 ({title_len}자, 권장 {min_len}-{max_len}자)",
element=f"{title_text[:50]}...",
- suggestion="검색 결과에서 잘리지 않도록 60자 이내로 제목을 줄이세요",
+ suggestion=f"검색 결과에서 잘리지 않도록 {max_len}자 이내로 제목을 줄이세요",
))
return issues
def _check_meta_description(self, soup: BeautifulSoup, meta_info: dict) -> list[Issue]:
- """S-02: Check meta description existence and length (50-160 chars)."""
+ """S-02: Check meta description existence and length."""
issues = []
desc = soup.find("meta", attrs={"name": re.compile(r"^description$", re.I)})
+ min_len = self._get_threshold("seo-meta-description", "min_length", 50)
+ max_len = self._get_threshold("seo-meta-description", "max_length", 160)
if desc is None or not desc.get("content"):
meta_info["description"] = None
@@ -123,7 +145,7 @@ class SeoChecker(BaseChecker):
code="S-02",
severity="major",
message="meta description이 없습니다",
- suggestion='을 추가하세요 (50-160자 권장)',
+ suggestion=f'을 추가하세요 ({min_len}-{max_len}자 권장)',
))
return issues
@@ -132,19 +154,19 @@ class SeoChecker(BaseChecker):
meta_info["description"] = content
meta_info["description_length"] = content_len
- if content_len < 50:
+ if content_len < min_len:
issues.append(self._create_issue(
code="S-02",
severity="major",
- message=f"meta description이 너무 짧습니다 ({content_len}자, 권장 50-160자)",
- suggestion="검색 결과에서 페이지를 효과적으로 설명하도록 50자 이상으로 작성하세요",
+ message=f"meta description이 너무 짧습니다 ({content_len}자, 권장 {min_len}-{max_len}자)",
+ suggestion=f"검색 결과에서 페이지를 효과적으로 설명하도록 {min_len}자 이상으로 작성하세요",
))
- elif content_len > 160:
+ elif content_len > max_len:
issues.append(self._create_issue(
code="S-02",
severity="minor",
- message=f"meta description이 너무 깁니다 ({content_len}자, 권장 50-160자)",
- suggestion="검색 결과에서 잘리지 않도록 160자 이내로 줄이세요",
+ message=f"meta description이 너무 깁니다 ({content_len}자, 권장 {min_len}-{max_len}자)",
+ suggestion=f"검색 결과에서 잘리지 않도록 {max_len}자 이내로 줄이세요",
))
return issues
@@ -163,9 +185,13 @@ class SeoChecker(BaseChecker):
return []
def _check_og_tags(self, soup: BeautifulSoup) -> list[Issue]:
- """S-04: Check Open Graph tags (og:title, og:description, og:image)."""
+ """S-04: Check Open Graph tags from YAML rule definitions."""
issues = []
- required_og = ["og:title", "og:description", "og:image"]
+ rule = self._get_seo_rule("seo-open-graph")
+ required_tags = rule.get("details", {}).get("required_tags", [])
+ required_og = [t["property"] for t in required_tags] if required_tags else [
+ "og:title", "og:description", "og:image",
+ ]
missing = []
for prop in required_og:
diff --git a/backend/app/rules/__init__.py b/backend/app/rules/__init__.py
new file mode 100644
index 0000000..1f46af4
--- /dev/null
+++ b/backend/app/rules/__init__.py
@@ -0,0 +1,67 @@
+"""
+Rules Loader - YAML 기반 표준 규칙 데이터 로드 및 캐싱.
+
+Usage:
+ from app.rules import get_rules
+ rules = get_rules("html_css") # html_css.yaml 전체 로드
+ rules = get_rules("accessibility") # accessibility.yaml 전체 로드
+"""
+
+import logging
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+logger = logging.getLogger(__name__)
+
+RULES_DIR = Path(__file__).parent
+_cache: dict[str, dict[str, Any]] = {}
+
+
+def get_rules(category: str) -> dict[str, Any]:
+ """
+ Load and cache YAML rules for a given category.
+
+ Args:
+ category: One of "html_css", "accessibility", "seo", "performance_security"
+
+ Returns:
+ Parsed YAML data as a dictionary.
+ """
+ if category in _cache:
+ return _cache[category]
+
+ yaml_path = RULES_DIR / f"{category}.yaml"
+ if not yaml_path.exists():
+ logger.error("Rules file not found: %s", yaml_path)
+ return {}
+
+ with open(yaml_path, "r", encoding="utf-8") as f:
+ data = yaml.safe_load(f) or {}
+
+ _cache[category] = data
+ logger.info("Loaded rules: %s (%d bytes)", category, yaml_path.stat().st_size)
+ return data
+
+
+def reload_rules(category: str | None = None) -> None:
+ """
+ Clear cache and reload rules.
+ If category is None, reload all cached rules.
+ """
+ if category:
+ _cache.pop(category, None)
+ get_rules(category)
+ else:
+ categories = list(_cache.keys())
+ _cache.clear()
+ for cat in categories:
+ get_rules(cat)
+
+
+def get_all_categories() -> list[str]:
+ """Return list of available rule categories."""
+ return [
+ p.stem for p in RULES_DIR.glob("*.yaml")
+ ]
diff --git a/backend/app/rules/accessibility.yaml b/backend/app/rules/accessibility.yaml
new file mode 100644
index 0000000..ba2c6b6
--- /dev/null
+++ b/backend/app/rules/accessibility.yaml
@@ -0,0 +1,830 @@
+# ============================================================
+# WCAG Accessibility Rules
+# Based on: W3C WCAG 2.0, 2.1, 2.2 + axe-core Rule Mapping
+# ============================================================
+
+metadata:
+ name: "WCAG Accessibility Standards"
+ version: "1.0.0"
+ last_updated: "2026-02-13"
+ sources:
+ - name: "Web Content Accessibility Guidelines (WCAG) 2.2"
+ url: "https://www.w3.org/TR/WCAG22/"
+ version: "2.2"
+ date: "2023-10-05"
+ - name: "Web Content Accessibility Guidelines (WCAG) 2.1"
+ url: "https://www.w3.org/TR/WCAG21/"
+ version: "2.1"
+ date: "2018-06-05"
+ - name: "Web Content Accessibility Guidelines (WCAG) 2.0"
+ url: "https://www.w3.org/TR/WCAG20/"
+ version: "2.0"
+ date: "2008-12-11"
+ - name: "axe-core Rule Descriptions"
+ url: "https://github.com/dequelabs/axe-core/blob/develop/doc/rule-descriptions.md"
+ - name: "axe-core API Documentation"
+ url: "https://www.deque.com/axe/core-documentation/api-documentation/"
+
+# ============================================================
+# axe-core Tag Mapping
+# These tags control which rules axe-core runs
+# IMPORTANT: Tags are NOT inclusive - wcag2aa only runs AA rules,
+# NOT A rules. Combine tags for full compliance testing.
+# ============================================================
+axe_core_tags:
+ wcag20:
+ - tag: "wcag2a"
+ description: "WCAG 2.0 Level A rules only"
+ - tag: "wcag2aa"
+ description: "WCAG 2.0 Level AA rules only"
+ - tag: "wcag2aaa"
+ description: "WCAG 2.0 Level AAA rules only"
+ wcag21:
+ - tag: "wcag21a"
+ description: "WCAG 2.1 Level A rules only (new in 2.1)"
+ - tag: "wcag21aa"
+ description: "WCAG 2.1 Level AA rules only (new in 2.1)"
+ wcag22:
+ - tag: "wcag22aa"
+ description: "WCAG 2.2 Level AA rules only (new in 2.2)"
+ other:
+ - tag: "best-practice"
+ description: "Common accessibility best practices (not WCAG-specific)"
+ - tag: "section508"
+ description: "Section 508 compliance rules"
+
+# Compliance presets (combine tags for full testing)
+compliance_presets:
+ wcag_20_a:
+ tags: ["wcag2a"]
+ description: "WCAG 2.0 Level A compliance"
+ wcag_20_aa:
+ tags: ["wcag2a", "wcag2aa"]
+ description: "WCAG 2.0 Level AA compliance"
+ wcag_21_aa:
+ tags: ["wcag2a", "wcag2aa", "wcag21a", "wcag21aa"]
+ description: "WCAG 2.1 Level AA compliance (most common requirement)"
+ wcag_22_aa:
+ tags: ["wcag2a", "wcag2aa", "wcag21a", "wcag21aa", "wcag22aa"]
+ description: "WCAG 2.2 Level AA compliance (latest standard)"
+ wcag_22_full:
+ tags: ["wcag2a", "wcag2aa", "wcag2aaa", "wcag21a", "wcag21aa", "wcag22aa"]
+ description: "WCAG 2.2 all levels including AAA"
+
+# ============================================================
+# WCAG 2.2 Complete Success Criteria
+# Total: 86 criteria (4.1.1 Parsing removed in 2.2)
+# Distribution: Level A (32), Level AA (24), Level AAA (30)
+# ============================================================
+
+principles:
+ # ========================================================
+ # Principle 1: PERCEIVABLE
+ # ========================================================
+ - id: "perceivable"
+ name: "Perceivable"
+ description: "Information and user interface components must be presentable to users in ways they can perceive"
+
+ guidelines:
+ # --- 1.1 Text Alternatives ---
+ - id: "1.1"
+ name: "Text Alternatives"
+ description: "Provide text alternatives for any non-text content"
+ criteria:
+ - id: "1.1.1"
+ name: "Non-text Content"
+ level: "A"
+ since: "2.0"
+ description: "All non-text content has a text alternative that serves the equivalent purpose"
+ axe_rules: ["image-alt", "input-image-alt", "area-alt", "object-alt", "svg-img-alt"]
+
+ # --- 1.2 Time-based Media ---
+ - id: "1.2"
+ name: "Time-based Media"
+ description: "Provide alternatives for time-based media"
+ criteria:
+ - id: "1.2.1"
+ name: "Audio-only and Video-only (Prerecorded)"
+ level: "A"
+ since: "2.0"
+ description: "An alternative is provided for prerecorded audio-only and video-only media"
+ axe_rules: ["audio-caption", "video-caption"]
+
+ - id: "1.2.2"
+ name: "Captions (Prerecorded)"
+ level: "A"
+ since: "2.0"
+ description: "Captions are provided for all prerecorded audio content in synchronized media"
+ axe_rules: ["video-caption"]
+
+ - id: "1.2.3"
+ name: "Audio Description or Media Alternative (Prerecorded)"
+ level: "A"
+ since: "2.0"
+ description: "An alternative for time-based media or audio description is provided"
+ axe_rules: ["video-description"]
+
+ - id: "1.2.4"
+ name: "Captions (Live)"
+ level: "AA"
+ since: "2.0"
+ description: "Captions are provided for all live audio content in synchronized media"
+
+ - id: "1.2.5"
+ name: "Audio Description (Prerecorded)"
+ level: "AA"
+ since: "2.0"
+ description: "Audio description is provided for all prerecorded video content"
+
+ - id: "1.2.6"
+ name: "Sign Language (Prerecorded)"
+ level: "AAA"
+ since: "2.0"
+ description: "Sign language interpretation is provided for prerecorded audio"
+
+ - id: "1.2.7"
+ name: "Extended Audio Description (Prerecorded)"
+ level: "AAA"
+ since: "2.0"
+ description: "Extended audio description is provided when pauses are insufficient"
+
+ - id: "1.2.8"
+ name: "Media Alternative (Prerecorded)"
+ level: "AAA"
+ since: "2.0"
+ description: "A text alternative is provided for all prerecorded synchronized media"
+
+ - id: "1.2.9"
+ name: "Audio-only (Live)"
+ level: "AAA"
+ since: "2.0"
+ description: "A text alternative is provided for live audio-only content"
+
+ # --- 1.3 Adaptable ---
+ - id: "1.3"
+ name: "Adaptable"
+ description: "Create content that can be presented in different ways without losing information"
+ criteria:
+ - id: "1.3.1"
+ name: "Info and Relationships"
+ level: "A"
+ since: "2.0"
+ description: "Information, structure, and relationships can be programmatically determined"
+ axe_rules: ["aria-required-parent", "aria-required-children", "definition-list", "dlitem", "list", "listitem", "th-has-data-cells", "td-headers-attr", "p-as-heading"]
+
+ - id: "1.3.2"
+ name: "Meaningful Sequence"
+ level: "A"
+ since: "2.0"
+ description: "Correct reading sequence can be programmatically determined"
+
+ - id: "1.3.3"
+ name: "Sensory Characteristics"
+ level: "A"
+ since: "2.0"
+ description: "Instructions do not rely solely on sensory characteristics"
+
+ - id: "1.3.4"
+ name: "Orientation"
+ level: "AA"
+ since: "2.1"
+ description: "Content does not restrict viewing to a single display orientation"
+
+ - id: "1.3.5"
+ name: "Identify Input Purpose"
+ level: "AA"
+ since: "2.1"
+ description: "Input field purpose can be programmatically determined"
+ axe_rules: ["autocomplete-valid"]
+
+ - id: "1.3.6"
+ name: "Identify Purpose"
+ level: "AAA"
+ since: "2.1"
+ description: "The purpose of UI components, icons, and regions can be programmatically determined"
+
+ # --- 1.4 Distinguishable ---
+ - id: "1.4"
+ name: "Distinguishable"
+ description: "Make it easier for users to see and hear content"
+ criteria:
+ - id: "1.4.1"
+ name: "Use of Color"
+ level: "A"
+ since: "2.0"
+ description: "Color is not the only visual means of conveying information"
+ axe_rules: ["link-in-text-block"]
+
+ - id: "1.4.2"
+ name: "Audio Control"
+ level: "A"
+ since: "2.0"
+ description: "Mechanism to pause/stop/control volume of auto-playing audio"
+ axe_rules: ["no-autoplay-audio"]
+
+ - id: "1.4.3"
+ name: "Contrast (Minimum)"
+ level: "AA"
+ since: "2.0"
+ description: "Text has contrast ratio of at least 4.5:1 (3:1 for large text)"
+ axe_rules: ["color-contrast"]
+
+ - id: "1.4.4"
+ name: "Resize Text"
+ level: "AA"
+ since: "2.0"
+ description: "Text can be resized up to 200% without loss of content or functionality"
+ axe_rules: ["meta-viewport-large"]
+
+ - id: "1.4.5"
+ name: "Images of Text"
+ level: "AA"
+ since: "2.0"
+ description: "Text is used instead of images of text where possible"
+
+ - id: "1.4.6"
+ name: "Contrast (Enhanced)"
+ level: "AAA"
+ since: "2.0"
+ description: "Text has contrast ratio of at least 7:1 (4.5:1 for large text)"
+ axe_rules: ["color-contrast-enhanced"]
+
+ - id: "1.4.7"
+ name: "Low or No Background Audio"
+ level: "AAA"
+ since: "2.0"
+ description: "Prerecorded speech audio has low or no background noise"
+
+ - id: "1.4.8"
+ name: "Visual Presentation"
+ level: "AAA"
+ since: "2.0"
+ description: "Text blocks have configurable visual presentation"
+
+ - id: "1.4.9"
+ name: "Images of Text (No Exception)"
+ level: "AAA"
+ since: "2.0"
+ description: "Images of text are only used for pure decoration or essential cases"
+
+ - id: "1.4.10"
+ name: "Reflow"
+ level: "AA"
+ since: "2.1"
+ description: "Content can reflow without scrolling in two dimensions at 320px/256px"
+
+ - id: "1.4.11"
+ name: "Non-text Contrast"
+ level: "AA"
+ since: "2.1"
+ description: "UI components and graphics have contrast ratio of at least 3:1"
+
+ - id: "1.4.12"
+ name: "Text Spacing"
+ level: "AA"
+ since: "2.1"
+ description: "Content adapts to specified text spacing without loss"
+
+ - id: "1.4.13"
+ name: "Content on Hover or Focus"
+ level: "AA"
+ since: "2.1"
+ description: "Hoverable/focusable additional content is dismissible, hoverable, persistent"
+
+ # ========================================================
+ # Principle 2: OPERABLE
+ # ========================================================
+ - id: "operable"
+ name: "Operable"
+ description: "User interface components and navigation must be operable"
+
+ guidelines:
+ # --- 2.1 Keyboard Accessible ---
+ - id: "2.1"
+ name: "Keyboard Accessible"
+ description: "Make all functionality available from a keyboard"
+ criteria:
+ - id: "2.1.1"
+ name: "Keyboard"
+ level: "A"
+ since: "2.0"
+ description: "All functionality is operable through a keyboard interface"
+ axe_rules: ["scrollable-region-focusable"]
+
+ - id: "2.1.2"
+ name: "No Keyboard Trap"
+ level: "A"
+ since: "2.0"
+ description: "Keyboard focus can be moved away from any component"
+
+ - id: "2.1.3"
+ name: "Keyboard (No Exception)"
+ level: "AAA"
+ since: "2.0"
+ description: "All functionality is operable through keyboard without exception"
+
+ - id: "2.1.4"
+ name: "Character Key Shortcuts"
+ level: "A"
+ since: "2.1"
+ description: "Single character key shortcuts can be turned off or remapped"
+ axe_rules: ["accesskeys"]
+
+ # --- 2.2 Enough Time ---
+ - id: "2.2"
+ name: "Enough Time"
+ description: "Provide users enough time to read and use content"
+ criteria:
+ - id: "2.2.1"
+ name: "Timing Adjustable"
+ level: "A"
+ since: "2.0"
+ description: "Time limits can be turned off, adjusted, or extended"
+ axe_rules: ["meta-refresh"]
+
+ - id: "2.2.2"
+ name: "Pause, Stop, Hide"
+ level: "A"
+ since: "2.0"
+ description: "Moving, blinking, scrolling, or auto-updating content can be controlled"
+ axe_rules: ["blink", "marquee"]
+
+ - id: "2.2.3"
+ name: "No Timing"
+ level: "AAA"
+ since: "2.0"
+ description: "Timing is not an essential part of the activity"
+
+ - id: "2.2.4"
+ name: "Interruptions"
+ level: "AAA"
+ since: "2.0"
+ description: "Interruptions can be postponed or suppressed"
+
+ - id: "2.2.5"
+ name: "Re-authenticating"
+ level: "AAA"
+ since: "2.0"
+ description: "Data is preserved when re-authenticating after session expiry"
+
+ - id: "2.2.6"
+ name: "Timeouts"
+ level: "AAA"
+ since: "2.1"
+ description: "Users are warned about data loss from inactivity timeouts"
+
+ # --- 2.3 Seizures and Physical Reactions ---
+ - id: "2.3"
+ name: "Seizures and Physical Reactions"
+ description: "Do not design content that causes seizures or physical reactions"
+ criteria:
+ - id: "2.3.1"
+ name: "Three Flashes or Below Threshold"
+ level: "A"
+ since: "2.0"
+ description: "Pages do not contain content that flashes more than three times per second"
+
+ - id: "2.3.2"
+ name: "Three Flashes"
+ level: "AAA"
+ since: "2.0"
+ description: "Pages do not contain any content that flashes more than three times per second"
+
+ - id: "2.3.3"
+ name: "Animation from Interactions"
+ level: "AAA"
+ since: "2.1"
+ description: "Motion animation triggered by interaction can be disabled"
+
+ # --- 2.4 Navigable ---
+ - id: "2.4"
+ name: "Navigable"
+ description: "Provide ways to help users navigate, find content, and determine where they are"
+ criteria:
+ - id: "2.4.1"
+ name: "Bypass Blocks"
+ level: "A"
+ since: "2.0"
+ description: "Mechanism available to bypass blocks of content repeated on pages"
+ axe_rules: ["bypass", "region"]
+
+ - id: "2.4.2"
+ name: "Page Titled"
+ level: "A"
+ since: "2.0"
+ description: "Web pages have titles that describe topic or purpose"
+ axe_rules: ["document-title"]
+
+ - id: "2.4.3"
+ name: "Focus Order"
+ level: "A"
+ since: "2.0"
+ description: "Focus order preserves meaning and operability"
+ axe_rules: ["tabindex"]
+
+ - id: "2.4.4"
+ name: "Link Purpose (In Context)"
+ level: "A"
+ since: "2.0"
+ description: "Link purpose can be determined from link text or context"
+ axe_rules: ["link-name"]
+
+ - id: "2.4.5"
+ name: "Multiple Ways"
+ level: "AA"
+ since: "2.0"
+ description: "More than one way available to locate a page in a set"
+
+ - id: "2.4.6"
+ name: "Headings and Labels"
+ level: "AA"
+ since: "2.0"
+ description: "Headings and labels describe topic or purpose"
+ axe_rules: ["empty-heading"]
+
+ - id: "2.4.7"
+ name: "Focus Visible"
+ level: "AA"
+ since: "2.0"
+ description: "Keyboard focus indicator is visible"
+
+ - id: "2.4.8"
+ name: "Location"
+ level: "AAA"
+ since: "2.0"
+ description: "Information about the user's location within a set of pages is available"
+
+ - id: "2.4.9"
+ name: "Link Purpose (Link Only)"
+ level: "AAA"
+ since: "2.0"
+ description: "Link purpose can be determined from link text alone"
+
+ - id: "2.4.10"
+ name: "Section Headings"
+ level: "AAA"
+ since: "2.0"
+ description: "Section headings are used to organize content"
+
+ - id: "2.4.11"
+ name: "Focus Not Obscured (Minimum)"
+ level: "AA"
+ since: "2.2"
+ description: "Focused component is not entirely hidden by author-created content"
+
+ - id: "2.4.12"
+ name: "Focus Not Obscured (Enhanced)"
+ level: "AAA"
+ since: "2.2"
+ description: "No part of the focused component is hidden by author-created content"
+
+ - id: "2.4.13"
+ name: "Focus Appearance"
+ level: "AAA"
+ since: "2.2"
+ description: "Focus indicator meets minimum area and contrast requirements"
+
+ # --- 2.5 Input Modalities ---
+ - id: "2.5"
+ name: "Input Modalities"
+ description: "Make it easier to operate through various inputs beyond keyboard"
+ criteria:
+ - id: "2.5.1"
+ name: "Pointer Gestures"
+ level: "A"
+ since: "2.1"
+ description: "Multipoint/path-based gestures have single-pointer alternatives"
+
+ - id: "2.5.2"
+ name: "Pointer Cancellation"
+ level: "A"
+ since: "2.1"
+ description: "Functions using single pointer can be cancelled"
+
+ - id: "2.5.3"
+ name: "Label in Name"
+ level: "A"
+ since: "2.1"
+ description: "Visible label is part of the accessible name"
+ axe_rules: ["label-title-only"]
+
+ - id: "2.5.4"
+ name: "Motion Actuation"
+ level: "A"
+ since: "2.1"
+ description: "Motion-activated functions have UI alternatives and can be disabled"
+
+ - id: "2.5.5"
+ name: "Target Size (Enhanced)"
+ level: "AAA"
+ since: "2.1"
+ description: "Target size is at least 44 by 44 CSS pixels"
+
+ - id: "2.5.6"
+ name: "Concurrent Input Mechanisms"
+ level: "A"
+ since: "2.1"
+ description: "Content does not restrict use of available input modalities"
+
+ - id: "2.5.7"
+ name: "Dragging Movements"
+ level: "AA"
+ since: "2.2"
+ description: "Drag functions have single-pointer alternatives"
+
+ - id: "2.5.8"
+ name: "Target Size (Minimum)"
+ level: "AA"
+ since: "2.2"
+ description: "Target size is at least 24 by 24 CSS pixels"
+ axe_rules: ["target-size"]
+
+ # ========================================================
+ # Principle 3: UNDERSTANDABLE
+ # ========================================================
+ - id: "understandable"
+ name: "Understandable"
+ description: "Information and the operation of user interface must be understandable"
+
+ guidelines:
+ # --- 3.1 Readable ---
+ - id: "3.1"
+ name: "Readable"
+ description: "Make text content readable and understandable"
+ criteria:
+ - id: "3.1.1"
+ name: "Language of Page"
+ level: "A"
+ since: "2.0"
+ description: "Default human language of each page can be programmatically determined"
+ axe_rules: ["html-has-lang", "html-lang-valid"]
+
+ - id: "3.1.2"
+ name: "Language of Parts"
+ level: "AA"
+ since: "2.0"
+ description: "Language of each passage or phrase can be programmatically determined"
+ axe_rules: ["valid-lang"]
+
+ - id: "3.1.3"
+ name: "Unusual Words"
+ level: "AAA"
+ since: "2.0"
+ description: "Mechanism is available for unusual words or jargon"
+
+ - id: "3.1.4"
+ name: "Abbreviations"
+ level: "AAA"
+ since: "2.0"
+ description: "Mechanism for identifying expanded form of abbreviations"
+
+ - id: "3.1.5"
+ name: "Reading Level"
+ level: "AAA"
+ since: "2.0"
+ description: "Supplemental content for text beyond lower secondary education level"
+
+ - id: "3.1.6"
+ name: "Pronunciation"
+ level: "AAA"
+ since: "2.0"
+ description: "Mechanism for identifying pronunciation of ambiguous words"
+
+ # --- 3.2 Predictable ---
+ - id: "3.2"
+ name: "Predictable"
+ description: "Make web pages appear and operate in predictable ways"
+ criteria:
+ - id: "3.2.1"
+ name: "On Focus"
+ level: "A"
+ since: "2.0"
+ description: "Receiving focus does not initiate a change of context"
+
+ - id: "3.2.2"
+ name: "On Input"
+ level: "A"
+ since: "2.0"
+ description: "Changing a UI component setting does not automatically cause a change of context"
+
+ - id: "3.2.3"
+ name: "Consistent Navigation"
+ level: "AA"
+ since: "2.0"
+ description: "Navigation repeated on pages occurs in the same relative order"
+
+ - id: "3.2.4"
+ name: "Consistent Identification"
+ level: "AA"
+ since: "2.0"
+ description: "Components with the same functionality are identified consistently"
+
+ - id: "3.2.5"
+ name: "Change on Request"
+ level: "AAA"
+ since: "2.0"
+ description: "Changes of context are initiated only by user request"
+
+ - id: "3.2.6"
+ name: "Consistent Help"
+ level: "A"
+ since: "2.2"
+ description: "Help mechanisms occur in the same relative order across pages"
+
+ # --- 3.3 Input Assistance ---
+ - id: "3.3"
+ name: "Input Assistance"
+ description: "Help users avoid and correct mistakes"
+ criteria:
+ - id: "3.3.1"
+ name: "Error Identification"
+ level: "A"
+ since: "2.0"
+ description: "Input errors are automatically detected and described to the user"
+ axe_rules: ["aria-input-field-name"]
+
+ - id: "3.3.2"
+ name: "Labels or Instructions"
+ level: "A"
+ since: "2.0"
+ description: "Labels or instructions are provided when content requires user input"
+ axe_rules: ["label", "input-button-name", "select-name"]
+
+ - id: "3.3.3"
+ name: "Error Suggestion"
+ level: "AA"
+ since: "2.0"
+ description: "Error suggestions are provided when errors are detected and suggestions are known"
+
+ - id: "3.3.4"
+ name: "Error Prevention (Legal, Financial, Data)"
+ level: "AA"
+ since: "2.0"
+ description: "Submissions are reversible, checked, or confirmed for legal/financial/data"
+
+ - id: "3.3.5"
+ name: "Help"
+ level: "AAA"
+ since: "2.0"
+ description: "Context-sensitive help is available"
+
+ - id: "3.3.6"
+ name: "Error Prevention (All)"
+ level: "AAA"
+ since: "2.0"
+ description: "Submissions are reversible, checked, or confirmed for all user input"
+
+ - id: "3.3.7"
+ name: "Redundant Entry"
+ level: "A"
+ since: "2.2"
+ description: "Previously entered information is auto-populated or available for selection"
+
+ - id: "3.3.8"
+ name: "Accessible Authentication (Minimum)"
+ level: "AA"
+ since: "2.2"
+ description: "Cognitive function test is not required for authentication"
+
+ - id: "3.3.9"
+ name: "Accessible Authentication (Enhanced)"
+ level: "AAA"
+ since: "2.2"
+ description: "No cognitive function test is required for authentication (no exceptions)"
+
+ # ========================================================
+ # Principle 4: ROBUST
+ # ========================================================
+ - id: "robust"
+ name: "Robust"
+ description: "Content must be robust enough to be interpreted by a wide variety of user agents"
+
+ guidelines:
+ # --- 4.1 Compatible ---
+ - id: "4.1"
+ name: "Compatible"
+ description: "Maximize compatibility with current and future user agents"
+ criteria:
+ # Note: 4.1.1 Parsing was REMOVED in WCAG 2.2
+ # It was deemed obsolete as modern browsers handle parsing errors gracefully
+
+ - id: "4.1.2"
+ name: "Name, Role, Value"
+ level: "A"
+ since: "2.0"
+ description: "Name, role, and value of all UI components can be programmatically determined"
+ axe_rules: ["aria-allowed-attr", "aria-allowed-role", "aria-hidden-body", "aria-hidden-focus", "aria-roles", "aria-valid-attr", "aria-valid-attr-value", "button-name", "frame-title", "image-alt", "input-button-name", "input-image-alt", "label", "link-name", "select-name"]
+
+ - id: "4.1.3"
+ name: "Status Messages"
+ level: "AA"
+ since: "2.1"
+ description: "Status messages can be programmatically determined without receiving focus"
+ axe_rules: ["aria-progressbar-name"]
+
+# ============================================================
+# Version Diff Summary
+# What's new in each version
+# ============================================================
+version_diff:
+ removed_in_22:
+ - id: "4.1.1"
+ name: "Parsing"
+ reason: "Modern browsers handle parsing errors; criterion was obsolete"
+
+ new_in_21:
+ level_a:
+ - "1.3.4 Orientation" # Note: Listed as A in some sources, AA in W3C spec
+ - "2.1.4 Character Key Shortcuts"
+ - "2.5.1 Pointer Gestures"
+ - "2.5.2 Pointer Cancellation"
+ - "2.5.3 Label in Name"
+ - "2.5.4 Motion Actuation"
+ - "2.5.6 Concurrent Input Mechanisms"
+ level_aa:
+ - "1.3.4 Orientation"
+ - "1.3.5 Identify Input Purpose"
+ - "1.4.10 Reflow"
+ - "1.4.11 Non-text Contrast"
+ - "1.4.12 Text Spacing"
+ - "1.4.13 Content on Hover or Focus"
+ level_aaa:
+ - "1.3.6 Identify Purpose"
+ - "2.2.6 Timeouts"
+ - "2.3.3 Animation from Interactions"
+ - "2.5.5 Target Size (Enhanced)"
+
+ new_in_22:
+ level_a:
+ - "3.2.6 Consistent Help"
+ - "3.3.7 Redundant Entry"
+ level_aa:
+ - "2.4.11 Focus Not Obscured (Minimum)"
+ - "2.5.7 Dragging Movements"
+ - "2.5.8 Target Size (Minimum)"
+ - "3.3.8 Accessible Authentication (Minimum)"
+ level_aaa:
+ - "2.4.12 Focus Not Obscured (Enhanced)"
+ - "2.4.13 Focus Appearance"
+ - "3.3.9 Accessible Authentication (Enhanced)"
+
+# ============================================================
+# Additional axe-core Best Practice Rules
+# (Not mapped to specific WCAG criteria but recommended)
+# ============================================================
+best_practices:
+ - id: "landmark-one-main"
+ description: "Document should have one main landmark"
+ severity: "major"
+
+ - id: "landmark-complementary-is-top-level"
+ description: "Aside/complementary should be top-level"
+ severity: "minor"
+
+ - id: "landmark-no-duplicate-banner"
+ description: "Document should have at most one banner landmark"
+ severity: "major"
+
+ - id: "landmark-no-duplicate-contentinfo"
+ description: "Document should have at most one contentinfo landmark"
+ severity: "major"
+
+ - id: "landmark-no-duplicate-main"
+ description: "Document should have at most one main landmark"
+ severity: "major"
+
+ - id: "page-has-heading-one"
+ description: "Page should contain a level-one heading"
+ severity: "major"
+
+ - id: "heading-order"
+ description: "Heading levels should increase by one"
+ severity: "minor"
+
+ - id: "scope-attr-valid"
+ description: "scope attribute should be used correctly"
+ severity: "minor"
+
+ - id: "skip-link"
+ description: "Skip navigation link should be provided"
+ severity: "minor"
+
+ - id: "tabindex"
+ description: "Tabindex should not be greater than zero"
+ severity: "major"
+
+ - id: "duplicate-id-active"
+ description: "Active elements should not have duplicate IDs"
+ severity: "critical"
+
+ - id: "duplicate-id-aria"
+ description: "ARIA IDs should be unique"
+ severity: "critical"
+
+ - id: "frame-tested"
+ description: "Frames should be tested with axe-core"
+ severity: "minor"
+
+ - id: "aria-text"
+ description: "Elements with role=text should have no focusable descendants"
+ severity: "minor"
diff --git a/backend/app/rules/html_css.yaml b/backend/app/rules/html_css.yaml
new file mode 100644
index 0000000..4d16a1a
--- /dev/null
+++ b/backend/app/rules/html_css.yaml
@@ -0,0 +1,821 @@
+# ============================================================
+# HTML/CSS Web Standards Rules
+# Based on: W3C HTML Living Standard (WHATWG), CSS Specifications
+# ============================================================
+
+metadata:
+ name: "HTML/CSS Web Standards"
+ version: "1.0.0"
+ last_updated: "2026-02-13"
+ sources:
+ - name: "HTML Living Standard (WHATWG)"
+ url: "https://html.spec.whatwg.org/multipage/"
+ section: "16 Obsolete features"
+ - name: "HTML Living Standard - Obsolete Features"
+ url: "https://html.spec.whatwg.org/multipage/obsolete.html"
+ - name: "MDN Web Docs - HTML Elements Reference"
+ url: "https://developer.mozilla.org/en-US/docs/Web/HTML/Element"
+ - name: "W3C CSS Specifications"
+ url: "https://www.w3.org/Style/CSS/"
+
+# ============================================================
+# 1. Obsolete (Non-Conforming) HTML Elements
+# Source: HTML Living Standard Section 16
+# ============================================================
+obsolete_elements:
+ # --- Entirely Obsolete (must not be used) ---
+ - tag: "applet"
+ replacement: "embed or object"
+ reason: "Outdated plugin technology (Java applets)"
+ severity: "critical"
+
+ - tag: "acronym"
+ replacement: "abbr"
+ reason: "Redundant; abbr covers both abbreviations and acronyms"
+ severity: "major"
+
+ - tag: "bgsound"
+ replacement: "audio"
+ reason: "Proprietary (IE-only) audio element"
+ severity: "critical"
+
+ - tag: "dir"
+ replacement: "ul"
+ reason: "Non-standard directory list"
+ severity: "major"
+
+ - tag: "frame"
+ replacement: "iframe with CSS, or server-side includes"
+ reason: "Frame-based layouts are obsolete"
+ severity: "critical"
+
+ - tag: "frameset"
+ replacement: "iframe with CSS, or server-side includes"
+ reason: "Frame-based layouts are obsolete"
+ severity: "critical"
+
+ - tag: "noframes"
+ replacement: "N/A (remove with frame/frameset)"
+ reason: "Related to obsolete frames"
+ severity: "major"
+
+ - tag: "isindex"
+ replacement: "form with input[type=text]"
+ reason: "Outdated form method"
+ severity: "critical"
+
+ - tag: "keygen"
+ replacement: "Web Cryptography API"
+ reason: "Certificate enrollment; use Web Crypto API"
+ severity: "major"
+
+ - tag: "listing"
+ replacement: "pre + code"
+ reason: "Obsolete code presentation element"
+ severity: "major"
+
+ - tag: "menuitem"
+ replacement: "Script handling contextmenu event"
+ reason: "Context menu item (never widely supported)"
+ severity: "minor"
+
+ - tag: "nextid"
+ replacement: "GUIDs or UUIDs"
+ reason: "Obsolete identifier generation"
+ severity: "minor"
+
+ - tag: "noembed"
+ replacement: "object instead of embed"
+ reason: "Fallback for embed; use object element"
+ severity: "minor"
+
+ - tag: "param"
+ replacement: "data attribute on object"
+ reason: "Object parameter passing"
+ severity: "minor"
+
+ - tag: "plaintext"
+ replacement: "MIME type text/plain"
+ reason: "Obsolete text rendering mode"
+ severity: "major"
+
+ - tag: "rb"
+ replacement: "ruby element directly"
+ reason: "Ruby base text (use ruby directly)"
+ severity: "minor"
+
+ - tag: "rtc"
+ replacement: "Nested ruby elements"
+ reason: "Ruby text container (use nested ruby)"
+ severity: "minor"
+
+ - tag: "strike"
+ replacement: "del (edits) or s (no longer relevant)"
+ reason: "Presentational strikethrough"
+ severity: "major"
+
+ - tag: "xmp"
+ replacement: "pre + code with escaped entities"
+ reason: "Obsolete code display element"
+ severity: "major"
+
+ # --- Presentational Elements (use CSS instead) ---
+ - tag: "basefont"
+ replacement: "CSS font properties"
+ reason: "Base font styling (presentational)"
+ severity: "critical"
+
+ - tag: "big"
+ replacement: "CSS font-size or semantic elements (h1-h6, strong, mark)"
+ reason: "Presentational text sizing"
+ severity: "major"
+
+ - tag: "blink"
+ replacement: "CSS animations/transitions"
+ reason: "Presentational text animation"
+ severity: "critical"
+
+ - tag: "center"
+ replacement: "CSS text-align or margin auto"
+ reason: "Presentational centering"
+ severity: "major"
+
+ - tag: "font"
+ replacement: "CSS font properties"
+ reason: "Presentational font styling"
+ severity: "critical"
+
+ - tag: "marquee"
+ replacement: "CSS animations/transitions"
+ reason: "Presentational scrolling text"
+ severity: "critical"
+
+ - tag: "multicol"
+ replacement: "CSS columns"
+ reason: "Presentational multi-column layout"
+ severity: "major"
+
+ - tag: "nobr"
+ replacement: "CSS white-space: nowrap"
+ reason: "Presentational no-break text"
+ severity: "minor"
+
+ - tag: "spacer"
+ replacement: "CSS margin/padding"
+ reason: "Presentational spacing"
+ severity: "major"
+
+ - tag: "tt"
+ replacement: "kbd, var, code, or samp (context-dependent)"
+ reason: "Presentational monospace text"
+ severity: "major"
+
+# ============================================================
+# 2. Obsolete HTML Attributes
+# Source: HTML Living Standard Section 16
+# ============================================================
+obsolete_attributes:
+ # --- Global Attributes ---
+ global:
+ - attr: "contextmenu"
+ replacement: "Script for contextmenu event"
+ severity: "minor"
+ - attr: "datasrc"
+ replacement: "XMLHttpRequest / Fetch API"
+ severity: "minor"
+ - attr: "datafld"
+ replacement: "XMLHttpRequest / Fetch API"
+ severity: "minor"
+ - attr: "dataformatas"
+ replacement: "XMLHttpRequest / Fetch API"
+ severity: "minor"
+ - attr: "dropzone"
+ replacement: "Script for drag/drop events"
+ severity: "minor"
+
+ # --- Element-Specific Attributes ---
+ a:
+ - attr: "charset"
+ replacement: "HTTP Content-Type header"
+ severity: "minor"
+ - attr: "coords"
+ replacement: "area element for image maps"
+ severity: "minor"
+ - attr: "shape"
+ replacement: "area element for image maps"
+ severity: "minor"
+ - attr: "methods"
+ replacement: "HTTP OPTIONS"
+ severity: "minor"
+ - attr: "name"
+ replacement: "id attribute"
+ severity: "major"
+ - attr: "rev"
+ replacement: "rel with opposite term"
+ severity: "minor"
+ - attr: "urn"
+ replacement: "href attribute"
+ severity: "minor"
+
+ body:
+ - attr: "alink"
+ replacement: "CSS :active pseudo-class"
+ severity: "major"
+ - attr: "bgcolor"
+ replacement: "CSS background-color"
+ severity: "major"
+ - attr: "bottommargin"
+ replacement: "CSS margin-bottom"
+ severity: "major"
+ - attr: "leftmargin"
+ replacement: "CSS margin-left"
+ severity: "major"
+ - attr: "link"
+ replacement: "CSS color for links"
+ severity: "major"
+ - attr: "marginheight"
+ replacement: "CSS margin"
+ severity: "major"
+ - attr: "marginwidth"
+ replacement: "CSS margin"
+ severity: "major"
+ - attr: "rightmargin"
+ replacement: "CSS margin-right"
+ severity: "major"
+ - attr: "text"
+ replacement: "CSS color"
+ severity: "major"
+ - attr: "topmargin"
+ replacement: "CSS margin-top"
+ severity: "major"
+ - attr: "vlink"
+ replacement: "CSS :visited pseudo-class"
+ severity: "major"
+
+ br:
+ - attr: "clear"
+ replacement: "CSS clear property"
+ severity: "minor"
+
+ form:
+ - attr: "accept"
+ replacement: "accept attribute on individual input elements"
+ severity: "minor"
+
+ head:
+ - attr: "profile"
+ replacement: "Omit (unnecessary)"
+ severity: "minor"
+
+ hr:
+ - attr: "align"
+ replacement: "CSS margin"
+ severity: "minor"
+ - attr: "color"
+ replacement: "CSS border-color / background-color"
+ severity: "minor"
+ - attr: "noshade"
+ replacement: "CSS border/background"
+ severity: "minor"
+ - attr: "size"
+ replacement: "CSS height"
+ severity: "minor"
+ - attr: "width"
+ replacement: "CSS width"
+ severity: "minor"
+
+ html:
+ - attr: "manifest"
+ replacement: "Service Workers"
+ severity: "major"
+ - attr: "version"
+ replacement: "Omit (unnecessary)"
+ severity: "minor"
+
+ iframe:
+ - attr: "align"
+ replacement: "CSS"
+ severity: "minor"
+ - attr: "allowtransparency"
+ replacement: "CSS"
+ severity: "minor"
+ - attr: "frameborder"
+ replacement: "CSS border"
+ severity: "minor"
+ - attr: "framespacing"
+ replacement: "CSS"
+ severity: "minor"
+ - attr: "hspace"
+ replacement: "CSS margin"
+ severity: "minor"
+ - attr: "longdesc"
+ replacement: "Link to description page"
+ severity: "minor"
+ - attr: "marginheight"
+ replacement: "CSS padding"
+ severity: "minor"
+ - attr: "marginwidth"
+ replacement: "CSS padding"
+ severity: "minor"
+ - attr: "scrolling"
+ replacement: "CSS overflow"
+ severity: "minor"
+ - attr: "vspace"
+ replacement: "CSS margin"
+ severity: "minor"
+
+ img:
+ - attr: "align"
+ replacement: "CSS float or vertical-align"
+ severity: "minor"
+ - attr: "border"
+ replacement: "CSS border"
+ severity: "major"
+ - attr: "hspace"
+ replacement: "CSS margin"
+ severity: "minor"
+ - attr: "lowsrc"
+ replacement: "Progressive JPEG or srcset"
+ severity: "minor"
+ - attr: "name"
+ replacement: "id attribute"
+ severity: "minor"
+ - attr: "vspace"
+ replacement: "CSS margin"
+ severity: "minor"
+
+ input:
+ - attr: "align"
+ replacement: "CSS"
+ severity: "minor"
+ - attr: "border"
+ replacement: "CSS border"
+ severity: "minor"
+ - attr: "hspace"
+ replacement: "CSS margin"
+ severity: "minor"
+ - attr: "vspace"
+ replacement: "CSS margin"
+ severity: "minor"
+
+ link:
+ - attr: "charset"
+ replacement: "HTTP Content-Type header"
+ severity: "minor"
+ - attr: "methods"
+ replacement: "HTTP OPTIONS"
+ severity: "minor"
+ - attr: "rev"
+ replacement: "rel with opposite term"
+ severity: "minor"
+ - attr: "target"
+ replacement: "Omit"
+ severity: "minor"
+
+ meta:
+ - attr: "scheme"
+ replacement: "Include scheme in value"
+ severity: "minor"
+
+ object:
+ - attr: "align"
+ replacement: "CSS"
+ severity: "minor"
+ - attr: "archive"
+ replacement: "data and type attributes"
+ severity: "minor"
+ - attr: "border"
+ replacement: "CSS border"
+ severity: "minor"
+ - attr: "classid"
+ replacement: "data and type attributes"
+ severity: "minor"
+ - attr: "code"
+ replacement: "data and type attributes"
+ severity: "minor"
+ - attr: "codebase"
+ replacement: "data and type attributes"
+ severity: "minor"
+ - attr: "codetype"
+ replacement: "data and type attributes"
+ severity: "minor"
+ - attr: "declare"
+ replacement: "Repeat element"
+ severity: "minor"
+ - attr: "hspace"
+ replacement: "CSS margin"
+ severity: "minor"
+ - attr: "standby"
+ replacement: "Optimize resource loading"
+ severity: "minor"
+ - attr: "typemustmatch"
+ replacement: "Avoid untrusted resources"
+ severity: "minor"
+ - attr: "vspace"
+ replacement: "CSS margin"
+ severity: "minor"
+
+ script:
+ - attr: "charset"
+ replacement: "Omit (UTF-8 required)"
+ severity: "minor"
+ - attr: "event"
+ replacement: "DOM event listeners"
+ severity: "minor"
+ - attr: "for"
+ replacement: "DOM event listeners"
+ severity: "minor"
+ - attr: "language"
+ replacement: "Omit for JavaScript"
+ severity: "minor"
+
+ style:
+ - attr: "type"
+ replacement: "Omit for CSS (default)"
+ severity: "info"
+
+ table:
+ - attr: "align"
+ replacement: "CSS margin"
+ severity: "major"
+ - attr: "bgcolor"
+ replacement: "CSS background-color"
+ severity: "major"
+ - attr: "border"
+ replacement: "CSS border"
+ severity: "major"
+ - attr: "bordercolor"
+ replacement: "CSS border-color"
+ severity: "minor"
+ - attr: "cellpadding"
+ replacement: "CSS padding on td/th"
+ severity: "major"
+ - attr: "cellspacing"
+ replacement: "CSS border-spacing"
+ severity: "major"
+ - attr: "frame"
+ replacement: "CSS border"
+ severity: "minor"
+ - attr: "height"
+ replacement: "CSS height"
+ severity: "minor"
+ - attr: "rules"
+ replacement: "CSS border on td/th"
+ severity: "minor"
+ - attr: "summary"
+ replacement: "caption element or aria-describedby"
+ severity: "minor"
+ - attr: "width"
+ replacement: "CSS width"
+ severity: "minor"
+
+ td_th:
+ - attr: "abbr"
+ applies_to: "td only"
+ replacement: "Descriptive text or title attribute"
+ severity: "minor"
+ - attr: "align"
+ replacement: "CSS text-align"
+ severity: "minor"
+ - attr: "axis"
+ replacement: "scope on th"
+ severity: "minor"
+ - attr: "bgcolor"
+ replacement: "CSS background-color"
+ severity: "minor"
+ - attr: "char"
+ replacement: "CSS"
+ severity: "minor"
+ - attr: "charoff"
+ replacement: "CSS"
+ severity: "minor"
+ - attr: "height"
+ replacement: "CSS height"
+ severity: "minor"
+ - attr: "nowrap"
+ replacement: "CSS white-space"
+ severity: "minor"
+ - attr: "valign"
+ replacement: "CSS vertical-align"
+ severity: "minor"
+ - attr: "width"
+ replacement: "CSS width"
+ severity: "minor"
+
+ tr:
+ - attr: "align"
+ replacement: "CSS text-align"
+ severity: "minor"
+ - attr: "bgcolor"
+ replacement: "CSS background-color"
+ severity: "minor"
+ - attr: "char"
+ replacement: "CSS"
+ severity: "minor"
+ - attr: "charoff"
+ replacement: "CSS"
+ severity: "minor"
+ - attr: "height"
+ replacement: "CSS height"
+ severity: "minor"
+ - attr: "valign"
+ replacement: "CSS vertical-align"
+ severity: "minor"
+
+ thead_tbody_tfoot:
+ - attr: "align"
+ replacement: "CSS text-align"
+ severity: "minor"
+ - attr: "char"
+ replacement: "CSS"
+ severity: "minor"
+ - attr: "charoff"
+ replacement: "CSS"
+ severity: "minor"
+ - attr: "height"
+ replacement: "CSS height"
+ severity: "minor"
+ - attr: "valign"
+ replacement: "CSS vertical-align"
+ severity: "minor"
+
+ ol_ul:
+ - attr: "compact"
+ replacement: "CSS"
+ severity: "minor"
+ - attr: "type"
+ applies_to: "ul only (ol type is valid)"
+ replacement: "CSS list-style-type"
+ severity: "minor"
+
+ heading:
+ - attr: "align"
+ applies_to: "h1-h6"
+ replacement: "CSS text-align"
+ severity: "minor"
+
+ embed:
+ - attr: "align"
+ replacement: "CSS"
+ severity: "minor"
+ - attr: "hspace"
+ replacement: "CSS margin"
+ severity: "minor"
+ - attr: "name"
+ replacement: "id attribute"
+ severity: "minor"
+ - attr: "vspace"
+ replacement: "CSS margin"
+ severity: "minor"
+
+# ============================================================
+# 3. Semantic HTML5 Elements
+# Source: HTML Living Standard - Sections, Grouping
+# ============================================================
+semantic_elements:
+ structural:
+ - tag: "header"
+ description: "Introductory content or navigational aids for a section/page"
+ typical_use: "Site header, section header"
+ - tag: "nav"
+ description: "Section with navigation links"
+ typical_use: "Main navigation, breadcrumbs, table of contents"
+ - tag: "main"
+ description: "Dominant content of the body (unique per page)"
+ typical_use: "Primary content area (one per page)"
+ - tag: "footer"
+ description: "Footer for its nearest sectioning content/root"
+ typical_use: "Site footer, section footer"
+ - tag: "aside"
+ description: "Content tangentially related to surrounding content"
+ typical_use: "Sidebar, pull quotes, related links"
+ - tag: "section"
+ description: "Generic standalone section of a document"
+ typical_use: "Thematic grouping with heading"
+ - tag: "article"
+ description: "Self-contained composition independently distributable"
+ typical_use: "Blog post, news article, forum post, comment"
+
+ text_level:
+ - tag: "figure"
+ description: "Self-contained content with optional caption"
+ typical_use: "Images, diagrams, code listings with captions"
+ - tag: "figcaption"
+ description: "Caption for a figure element"
+ typical_use: "Image caption, diagram description"
+ - tag: "details"
+ description: "Disclosure widget for additional information"
+ typical_use: "FAQ, expandable sections"
+ - tag: "summary"
+ description: "Visible heading for a details element"
+ typical_use: "Click-to-expand label"
+ - tag: "mark"
+ description: "Text highlighted for reference or notation"
+ typical_use: "Search result highlighting"
+ - tag: "time"
+ description: "Machine-readable date/time"
+ typical_use: "Publication dates, event times"
+ - tag: "address"
+ description: "Contact information for author/owner"
+ typical_use: "Author contact info in article/footer"
+ - tag: "search"
+ description: "Container for search functionality"
+ typical_use: "Search form wrapper (new in HTML5.2+)"
+
+ interactive:
+ - tag: "dialog"
+ description: "Dialog box or interactive component"
+ typical_use: "Modal dialogs, alerts"
+ - tag: "menu"
+ description: "List of commands or options"
+ typical_use: "Context menus, toolbars"
+
+# ============================================================
+# 4. Required/Recommended Meta Tags
+# Source: HTML Living Standard, MDN Web Docs
+# ============================================================
+meta_tags:
+ required:
+ - name: "charset"
+ element: ''
+ description: "Character encoding declaration (must be UTF-8)"
+ severity: "critical"
+ standard: "HTML Living Standard"
+
+ - name: "viewport"
+ element: ''
+ description: "Viewport configuration for responsive design"
+ severity: "critical"
+ standard: "CSS Device Adaptation"
+
+ recommended:
+ - name: "description"
+ element: ''
+ description: "Page description for search engines (150-160 chars)"
+ severity: "major"
+ standard: "HTML Living Standard"
+
+ - name: "title"
+ element: "Page Title"
+ description: "Document title (required by spec, shown in browser tab)"
+ severity: "critical"
+ standard: "HTML Living Standard"
+
+ - name: "lang"
+ element: ''
+ description: "Document language declaration"
+ severity: "major"
+ standard: "HTML Living Standard"
+
+ - name: "content-type"
+ element: 'Content-Type HTTP header or '
+ description: "MIME type and encoding declaration"
+ severity: "major"
+ standard: "HTML Living Standard"
+
+ social_media:
+ - name: "og:title"
+ element: ''
+ description: "Open Graph title for social sharing"
+ severity: "minor"
+ standard: "Open Graph Protocol"
+
+ - name: "og:description"
+ element: ''
+ description: "Open Graph description for social sharing"
+ severity: "minor"
+ standard: "Open Graph Protocol"
+
+ - name: "og:image"
+ element: ''
+ description: "Open Graph image for social sharing"
+ severity: "minor"
+ standard: "Open Graph Protocol"
+
+ - name: "og:url"
+ element: ''
+ description: "Canonical URL for Open Graph"
+ severity: "minor"
+ standard: "Open Graph Protocol"
+
+ - name: "og:type"
+ element: ''
+ description: "Content type for Open Graph"
+ severity: "minor"
+ standard: "Open Graph Protocol"
+
+ - name: "twitter:card"
+ element: ''
+ description: "Twitter Card type"
+ severity: "info"
+ standard: "Twitter Cards"
+
+ - name: "twitter:title"
+ element: ''
+ description: "Twitter Card title"
+ severity: "info"
+ standard: "Twitter Cards"
+
+ - name: "twitter:description"
+ element: ''
+ description: "Twitter Card description"
+ severity: "info"
+ standard: "Twitter Cards"
+
+# ============================================================
+# 5. Document Structure Rules
+# Source: HTML Living Standard
+# ============================================================
+document_structure:
+ doctype:
+ rule: "Document must start with "
+ severity: "critical"
+ description: "HTML5 doctype declaration required"
+
+ heading_hierarchy:
+ rule: "Headings must follow proper hierarchy (h1 > h2 > h3...)"
+ severity: "major"
+ checks:
+ - id: "single-h1"
+ description: "Page should have exactly one h1 element"
+ severity: "major"
+ - id: "no-skipped-levels"
+ description: "Heading levels should not be skipped (e.g., h1 to h3)"
+ severity: "major"
+ - id: "logical-order"
+ description: "Headings should follow logical document outline"
+ severity: "minor"
+
+ image_alt:
+ rule: "All img elements must have alt attribute"
+ severity: "critical"
+ description: "Alternative text for images (accessibility + validity)"
+ exceptions:
+ - "Decorative images may use alt=''"
+ - "Images with role='presentation' may omit alt"
+
+ inline_styles:
+ rule: "Avoid inline style attributes"
+ severity: "minor"
+ description: "Inline styles reduce maintainability and violate separation of concerns"
+
+ duplicate_ids:
+ rule: "Element id attributes must be unique within the document"
+ severity: "critical"
+ description: "Duplicate IDs cause accessibility and JavaScript issues"
+
+ empty_links:
+ rule: "Anchor elements should have accessible content"
+ severity: "major"
+ description: "Links without text or aria-label are not accessible"
+
+ table_structure:
+ rule: "Data tables should have proper structure"
+ severity: "major"
+ checks:
+ - id: "table-has-thead"
+ description: "Data tables should have thead with th elements"
+ - id: "table-has-caption"
+ description: "Complex tables should have caption or aria-label"
+ - id: "th-has-scope"
+ description: "th elements should have scope attribute"
+
+ form_structure:
+ rule: "Form elements should have proper labels"
+ severity: "major"
+ checks:
+ - id: "input-has-label"
+ description: "Every form input should have an associated label"
+ - id: "form-has-submit"
+ description: "Forms should have a submit mechanism"
+ - id: "fieldset-has-legend"
+ description: "Fieldset elements should have a legend"
+
+ link_integrity:
+ rule: "Links and resources should be valid"
+ severity: "minor"
+ checks:
+ - id: "no-empty-href"
+ description: "Links should not have empty href attributes"
+ - id: "valid-rel"
+ description: "Link rel values should be valid"
+
+# ============================================================
+# 6. CSS Best Practices
+# Source: CSS Specifications, MDN Web Docs
+# ============================================================
+css_checks:
+ - id: "no-important-overuse"
+ description: "Avoid excessive use of !important declarations"
+ severity: "minor"
+ standard: "CSS Cascading and Inheritance"
+
+ - id: "vendor-prefix-check"
+ description: "Check for unnecessary vendor prefixes on well-supported properties"
+ severity: "info"
+ standard: "CSS Specifications"
+
+ - id: "no-universal-selector-performance"
+ description: "Avoid universal selector (*) in complex selectors for performance"
+ severity: "info"
+ standard: "CSS Selectors Level 4"
diff --git a/backend/app/rules/performance_security.yaml b/backend/app/rules/performance_security.yaml
new file mode 100644
index 0000000..64d5490
--- /dev/null
+++ b/backend/app/rules/performance_security.yaml
@@ -0,0 +1,730 @@
+# ============================================================
+# Performance & Security Rules
+# Based on: Core Web Vitals, Lighthouse, OWASP, Mozilla Observatory
+# ============================================================
+
+metadata:
+ name: "Performance & Security Standards"
+ version: "1.0.0"
+ last_updated: "2026-02-13"
+ sources:
+ - name: "Google Core Web Vitals"
+ url: "https://developers.google.com/search/docs/appearance/core-web-vitals"
+ - name: "Lighthouse Performance Audits"
+ url: "https://developer.chrome.com/docs/lighthouse/performance/"
+ - name: "OWASP Secure Headers Project"
+ url: "https://owasp.org/www-project-secure-headers/"
+ - name: "OWASP HTTP Headers Cheat Sheet"
+ url: "https://cheatsheetseries.owasp.org/cheatsheets/HTTP_Headers_Cheat_Sheet.html"
+ - name: "Mozilla Observatory"
+ url: "https://observatory.mozilla.org/"
+ - name: "OWASP Top 10 (2021)"
+ url: "https://owasp.org/www-project-top-ten/"
+
+# ============================================================
+# PERFORMANCE RULES
+# ============================================================
+
+performance:
+ # --- Core Web Vitals ---
+ core_web_vitals:
+ - id: "perf-lcp"
+ name: "Largest Contentful Paint (LCP)"
+ description: "Measures loading performance - time to render the largest content element"
+ severity: "critical"
+ category: "loading"
+ standard: "Google Core Web Vitals"
+ thresholds:
+ good: 2500 # ms
+ needs_improvement: 4000
+ poor: 4001 # above this
+ unit: "milliseconds"
+ tips:
+ - "Optimize and compress images (WebP/AVIF format)"
+ - "Preload critical resources"
+ - "Remove render-blocking resources"
+ - "Use a CDN for static assets"
+ - "Optimize server response time (TTFB < 800ms)"
+
+ - id: "perf-inp"
+ name: "Interaction to Next Paint (INP)"
+ description: "Measures responsiveness - latency of all user interactions"
+ severity: "critical"
+ category: "interactivity"
+ standard: "Google Core Web Vitals"
+ note: "Replaced FID (First Input Delay) in March 2024"
+ thresholds:
+ good: 200 # ms
+ needs_improvement: 500
+ poor: 501
+ unit: "milliseconds"
+ tips:
+ - "Break up long tasks (> 50ms)"
+ - "Reduce JavaScript execution time"
+ - "Use web workers for heavy computation"
+ - "Minimize main thread work"
+ - "Optimize event handlers"
+
+ - id: "perf-cls"
+ name: "Cumulative Layout Shift (CLS)"
+ description: "Measures visual stability - unexpected layout shifts during page life"
+ severity: "critical"
+ category: "visual_stability"
+ standard: "Google Core Web Vitals"
+ thresholds:
+ good: 0.1
+ needs_improvement: 0.25
+ poor: 0.26
+ unit: "score"
+ tips:
+ - "Set explicit width/height on images and video"
+ - "Reserve space for ads and embeds"
+ - "Avoid inserting content above existing content"
+ - "Use CSS contain for dynamic content"
+ - "Preload web fonts and use font-display: swap"
+
+ # --- Additional Performance Metrics ---
+ additional_metrics:
+ - id: "perf-fcp"
+ name: "First Contentful Paint (FCP)"
+ description: "Time to render the first piece of DOM content"
+ severity: "major"
+ category: "loading"
+ standard: "Lighthouse"
+ thresholds:
+ good: 1800 # ms
+ needs_improvement: 3000
+ poor: 3001
+ unit: "milliseconds"
+
+ - id: "perf-ttfb"
+ name: "Time to First Byte (TTFB)"
+ description: "Time from request to first byte of response"
+ severity: "major"
+ category: "server"
+ standard: "Lighthouse"
+ thresholds:
+ good: 800 # ms
+ needs_improvement: 1800
+ poor: 1801
+ unit: "milliseconds"
+ tips:
+ - "Use a CDN"
+ - "Optimize server-side rendering"
+ - "Enable HTTP/2 or HTTP/3"
+ - "Optimize database queries"
+
+ - id: "perf-si"
+ name: "Speed Index"
+ description: "How quickly content is visually displayed during page load"
+ severity: "major"
+ category: "loading"
+ standard: "Lighthouse"
+ thresholds:
+ good: 3400 # ms
+ needs_improvement: 5800
+ poor: 5801
+ unit: "milliseconds"
+
+ - id: "perf-tbt"
+ name: "Total Blocking Time (TBT)"
+ description: "Total time where main thread was blocked for > 50ms between FCP and TTI"
+ severity: "major"
+ category: "interactivity"
+ standard: "Lighthouse"
+ thresholds:
+ good: 200 # ms
+ needs_improvement: 600
+ poor: 601
+ unit: "milliseconds"
+
+ # --- Resource Optimization ---
+ resource_checks:
+ - id: "perf-total-page-size"
+ name: "Total Page Size"
+ description: "Total size of all resources loaded by the page"
+ severity: "major"
+ category: "resources"
+ standard: "Web Performance Best Practice"
+ thresholds:
+ good: 1500 # KB
+ needs_improvement: 3000
+ poor: 5000
+ unit: "kilobytes"
+
+ - id: "perf-total-requests"
+ name: "Total HTTP Requests"
+ description: "Total number of HTTP requests made by the page"
+ severity: "major"
+ category: "resources"
+ standard: "Web Performance Best Practice"
+ thresholds:
+ good: 50
+ needs_improvement: 80
+ poor: 100
+ unit: "count"
+
+ - id: "perf-image-optimization"
+ name: "Image Optimization"
+ description: "Images should be properly optimized"
+ severity: "major"
+ category: "resources"
+ standard: "Lighthouse"
+ checks:
+ - id: "uses-webp-avif"
+ description: "Use modern image formats (WebP, AVIF)"
+ severity: "minor"
+ - id: "responsive-images"
+ description: "Use srcset for responsive images"
+ severity: "minor"
+ - id: "lazy-loading"
+ description: "Offscreen images should use lazy loading"
+ severity: "minor"
+ - id: "image-dimensions"
+ description: "Images should have explicit width and height"
+ severity: "major"
+ - id: "oversized-images"
+ description: "Images should not be larger than their display size"
+ severity: "minor"
+
+ - id: "perf-js-optimization"
+ name: "JavaScript Optimization"
+ description: "JavaScript should be properly optimized"
+ severity: "major"
+ category: "resources"
+ standard: "Lighthouse"
+ checks:
+ - id: "minified-js"
+ description: "JavaScript should be minified"
+ severity: "minor"
+ - id: "no-render-blocking-js"
+ description: "Non-critical JS should use async or defer"
+ severity: "major"
+ - id: "unused-js"
+ description: "Remove unused JavaScript"
+ severity: "minor"
+ - id: "js-bundle-size"
+ description: "Individual JS bundles should be under 250KB (compressed)"
+ max_size_kb: 250
+ severity: "major"
+
+ - id: "perf-css-optimization"
+ name: "CSS Optimization"
+ description: "CSS should be properly optimized"
+ severity: "minor"
+ category: "resources"
+ standard: "Lighthouse"
+ checks:
+ - id: "minified-css"
+ description: "CSS should be minified"
+ severity: "minor"
+ - id: "no-render-blocking-css"
+ description: "Non-critical CSS should be deferred"
+ severity: "major"
+ - id: "unused-css"
+ description: "Remove unused CSS rules"
+ severity: "minor"
+ - id: "critical-css-inlined"
+ description: "Critical CSS should be inlined"
+ severity: "info"
+
+ - id: "perf-font-optimization"
+ name: "Font Optimization"
+ description: "Web fonts should be properly optimized"
+ severity: "minor"
+ category: "resources"
+ standard: "Web Performance Best Practice"
+ checks:
+ - id: "font-display"
+ description: "Use font-display: swap or optional"
+ severity: "minor"
+ - id: "preload-fonts"
+ description: "Preload critical fonts"
+ severity: "minor"
+ - id: "font-subsetting"
+ description: "Use font subsetting for CJK fonts"
+ severity: "info"
+ - id: "woff2-format"
+ description: "Use WOFF2 format for web fonts"
+ severity: "minor"
+
+ # --- Caching & Compression ---
+ caching:
+ - id: "perf-compression"
+ name: "Text Compression"
+ description: "Text resources should be served with compression"
+ severity: "major"
+ category: "network"
+ standard: "Lighthouse"
+ details:
+ supported_encodings:
+ - "gzip"
+ - "br (Brotli - preferred)"
+ - "zstd"
+ applies_to:
+ - "text/html"
+ - "text/css"
+ - "application/javascript"
+ - "application/json"
+ - "image/svg+xml"
+
+ - id: "perf-cache-headers"
+ name: "Cache Headers"
+ description: "Static resources should have proper cache headers"
+ severity: "major"
+ category: "network"
+ standard: "HTTP Caching (RFC 7234)"
+ details:
+ checks:
+ - id: "has-cache-control"
+ description: "Static assets should have Cache-Control header"
+ - id: "long-cache-lifetime"
+ description: "Static assets should have cache lifetime >= 1 year"
+ recommended: "Cache-Control: public, max-age=31536000, immutable"
+ - id: "etag"
+ description: "Resources should have ETag for validation"
+
+ - id: "perf-http2"
+ name: "HTTP/2 or HTTP/3"
+ description: "Site should use HTTP/2 or HTTP/3 protocol"
+ severity: "minor"
+ category: "network"
+ standard: "IETF RFC 9113 (HTTP/2), RFC 9114 (HTTP/3)"
+ details:
+ description: "HTTP/2+ provides multiplexing, header compression, and server push"
+
+# ============================================================
+# SECURITY RULES
+# ============================================================
+
+security:
+ # --- HTTP Security Headers (OWASP) ---
+ headers:
+ - id: "sec-strict-transport-security"
+ name: "Strict-Transport-Security (HSTS)"
+ description: "Enforces HTTPS-only access to prevent protocol downgrade attacks"
+ severity: "critical"
+ category: "transport"
+ standard: "OWASP Secure Headers Project"
+ standard_ref: "RFC 6797"
+ check_type: "header_check"
+ details:
+ header: "Strict-Transport-Security"
+ recommended_value: "max-age=63072000; includeSubDomains; preload"
+ directives:
+ - name: "max-age"
+ description: "Time in seconds browser should remember HTTPS-only"
+ recommended: 63072000 # 2 years
+ minimum: 31536000 # 1 year
+ - name: "includeSubDomains"
+ description: "Apply to all subdomains"
+ recommended: true
+ - name: "preload"
+ description: "Allow inclusion in browser HSTS preload list"
+ recommended: true
+ note: "Only effective over HTTPS connections"
+
+ - id: "sec-content-security-policy"
+ name: "Content-Security-Policy (CSP)"
+ description: "Restricts content origins to prevent XSS and injection attacks"
+ severity: "critical"
+ category: "injection"
+ standard: "OWASP Secure Headers Project"
+ standard_ref: "W3C CSP Level 3"
+ check_type: "header_check"
+ details:
+ header: "Content-Security-Policy"
+ recommended_directives:
+ - directive: "default-src"
+ description: "Fallback for other directives"
+ recommended: "'self'"
+ - directive: "script-src"
+ description: "Valid sources for JavaScript"
+ recommended: "'self'"
+ avoid: "'unsafe-inline', 'unsafe-eval'"
+ - directive: "style-src"
+ description: "Valid sources for stylesheets"
+ recommended: "'self'"
+ - directive: "img-src"
+ description: "Valid sources for images"
+ recommended: "'self' data:"
+ - directive: "font-src"
+ description: "Valid sources for fonts"
+ recommended: "'self'"
+ - directive: "connect-src"
+ description: "Valid targets for XMLHttpRequest, Fetch, WebSocket"
+ recommended: "'self'"
+ - directive: "frame-ancestors"
+ description: "Valid parents for embedding (replaces X-Frame-Options)"
+ recommended: "'none'"
+ - directive: "base-uri"
+ description: "Restricts URLs for base element"
+ recommended: "'self'"
+ - directive: "form-action"
+ description: "Restricts form submission targets"
+ recommended: "'self'"
+ - directive: "object-src"
+ description: "Valid sources for plugins"
+ recommended: "'none'"
+ - directive: "upgrade-insecure-requests"
+ description: "Upgrade HTTP requests to HTTPS"
+ recommended: true
+
+ - id: "sec-x-frame-options"
+ name: "X-Frame-Options"
+ description: "Prevents page from being displayed in frames (clickjacking protection)"
+ severity: "critical"
+ category: "clickjacking"
+ standard: "OWASP Secure Headers Project"
+ standard_ref: "RFC 7034"
+ check_type: "header_check"
+ details:
+ header: "X-Frame-Options"
+ recommended_value: "DENY"
+ valid_values:
+ - value: "DENY"
+ description: "Page cannot be displayed in any frame"
+ - value: "SAMEORIGIN"
+ description: "Page can only be displayed in frame on same origin"
+ note: "CSP frame-ancestors is the modern replacement"
+
+ - id: "sec-x-content-type-options"
+ name: "X-Content-Type-Options"
+ description: "Prevents MIME type sniffing attacks"
+ severity: "major"
+ category: "injection"
+ standard: "OWASP Secure Headers Project"
+ check_type: "header_check"
+ details:
+ header: "X-Content-Type-Options"
+ recommended_value: "nosniff"
+ description: "Blocks browsers from guessing MIME types, preventing XSS via MIME confusion"
+
+ - id: "sec-referrer-policy"
+ name: "Referrer-Policy"
+ description: "Controls referrer information sent with requests"
+ severity: "major"
+ category: "privacy"
+ standard: "OWASP Secure Headers Project"
+ standard_ref: "W3C Referrer Policy"
+ check_type: "header_check"
+ details:
+ header: "Referrer-Policy"
+ recommended_value: "strict-origin-when-cross-origin"
+ valid_values:
+ - value: "no-referrer"
+ description: "Never send referrer"
+ security: "highest"
+ - value: "no-referrer-when-downgrade"
+ description: "Don't send referrer on HTTPS → HTTP"
+ security: "medium"
+ - value: "origin"
+ description: "Only send the origin"
+ security: "high"
+ - value: "origin-when-cross-origin"
+ description: "Full URL for same-origin, origin for cross-origin"
+ security: "medium"
+ - value: "same-origin"
+ description: "Only send referrer for same-origin requests"
+ security: "high"
+ - value: "strict-origin"
+ description: "Send origin when protocol stays same"
+ security: "high"
+ - value: "strict-origin-when-cross-origin"
+ description: "Full URL for same-origin, origin for cross-origin (same protocol)"
+ security: "medium-high"
+ - value: "unsafe-url"
+ description: "Always send full URL"
+ security: "none"
+
+ - id: "sec-permissions-policy"
+ name: "Permissions-Policy"
+ description: "Controls browser feature access (geolocation, camera, etc.)"
+ severity: "major"
+ category: "privacy"
+ standard: "OWASP Secure Headers Project"
+ standard_ref: "W3C Permissions Policy"
+ check_type: "header_check"
+ details:
+ header: "Permissions-Policy"
+ recommended_value: "geolocation=(), camera=(), microphone=(), payment=(), usb=(), magnetometer=(), gyroscope=(), accelerometer=()"
+ controllable_features:
+ - feature: "geolocation"
+ description: "Access to user's location"
+ default_recommendation: "()" # deny all
+ - feature: "camera"
+ description: "Access to device camera"
+ default_recommendation: "()"
+ - feature: "microphone"
+ description: "Access to device microphone"
+ default_recommendation: "()"
+ - feature: "payment"
+ description: "Payment Request API"
+ default_recommendation: "()"
+ - feature: "usb"
+ description: "WebUSB API"
+ default_recommendation: "()"
+ - feature: "magnetometer"
+ description: "Magnetometer sensor"
+ default_recommendation: "()"
+ - feature: "gyroscope"
+ description: "Gyroscope sensor"
+ default_recommendation: "()"
+ - feature: "accelerometer"
+ description: "Accelerometer sensor"
+ default_recommendation: "()"
+ - feature: "autoplay"
+ description: "Auto-play media"
+ default_recommendation: "(self)"
+ - feature: "fullscreen"
+ description: "Fullscreen API"
+ default_recommendation: "(self)"
+ - feature: "interest-cohort"
+ description: "FLoC / Topics API (ad tracking)"
+ default_recommendation: "()"
+
+ - id: "sec-cross-origin-opener-policy"
+ name: "Cross-Origin-Opener-Policy (COOP)"
+ description: "Isolates browsing context to prevent Spectre-type attacks"
+ severity: "minor"
+ category: "isolation"
+ standard: "OWASP Secure Headers Project"
+ check_type: "header_check"
+ details:
+ header: "Cross-Origin-Opener-Policy"
+ recommended_value: "same-origin"
+ valid_values:
+ - "unsafe-none"
+ - "same-origin-allow-popups"
+ - "same-origin"
+
+ - id: "sec-cross-origin-embedder-policy"
+ name: "Cross-Origin-Embedder-Policy (COEP)"
+ description: "Restricts cross-origin resource loading"
+ severity: "minor"
+ category: "isolation"
+ standard: "OWASP Secure Headers Project"
+ check_type: "header_check"
+ details:
+ header: "Cross-Origin-Embedder-Policy"
+ recommended_value: "require-corp"
+ valid_values:
+ - "unsafe-none"
+ - "require-corp"
+ - "credentialless"
+
+ - id: "sec-cross-origin-resource-policy"
+ name: "Cross-Origin-Resource-Policy (CORP)"
+ description: "Blocks resource loading from unauthorized origins"
+ severity: "minor"
+ category: "isolation"
+ standard: "OWASP Secure Headers Project"
+ check_type: "header_check"
+ details:
+ header: "Cross-Origin-Resource-Policy"
+ recommended_value: "same-site"
+ valid_values:
+ - "same-site"
+ - "same-origin"
+ - "cross-origin"
+
+ # --- Headers to Remove (Information Disclosure) ---
+ headers_to_remove:
+ - id: "sec-remove-server"
+ name: "Remove Server Header"
+ description: "Server header exposes web server technology"
+ severity: "minor"
+ category: "information_disclosure"
+ standard: "OWASP Secure Headers Project"
+ details:
+ header: "Server"
+ action: "Remove or set to non-informative value"
+ reason: "Prevents fingerprinting of web server software"
+
+ - id: "sec-remove-x-powered-by"
+ name: "Remove X-Powered-By Header"
+ description: "X-Powered-By exposes application framework"
+ severity: "minor"
+ category: "information_disclosure"
+ standard: "OWASP Secure Headers Project"
+ details:
+ header: "X-Powered-By"
+ action: "Remove entirely"
+ reason: "Prevents fingerprinting of application framework (Express, PHP, ASP.NET)"
+
+ - id: "sec-remove-x-aspnet-version"
+ name: "Remove X-AspNet-Version Header"
+ description: "Exposes .NET framework version"
+ severity: "minor"
+ category: "information_disclosure"
+ standard: "OWASP Secure Headers Project"
+ details:
+ header: "X-AspNet-Version"
+ action: "Remove entirely"
+
+ - id: "sec-remove-x-aspnetmvc-version"
+ name: "Remove X-AspNetMvc-Version Header"
+ description: "Exposes ASP.NET MVC version"
+ severity: "minor"
+ category: "information_disclosure"
+ standard: "OWASP Secure Headers Project"
+ details:
+ header: "X-AspNetMvc-Version"
+ action: "Remove entirely"
+
+ # --- Deprecated Headers ---
+ deprecated_headers:
+ - id: "sec-no-x-xss-protection"
+ name: "X-XSS-Protection (Deprecated)"
+ description: "Legacy XSS filter - should be disabled in favor of CSP"
+ severity: "info"
+ category: "legacy"
+ standard: "OWASP Secure Headers Project"
+ details:
+ header: "X-XSS-Protection"
+ recommended_value: "0"
+ reason: "Modern browsers have removed XSS auditor; use CSP instead"
+ note: "Setting to 1; mode=block can introduce vulnerabilities in older browsers"
+
+ - id: "sec-no-public-key-pins"
+ name: "Public-Key-Pins (HPKP) - Removed"
+ description: "HTTP Public Key Pinning is deprecated and should not be used"
+ severity: "info"
+ category: "legacy"
+ standard: "OWASP Secure Headers Project"
+ details:
+ header: "Public-Key-Pins"
+ action: "Do not use"
+ reason: "Risk of permanent site lockout; replaced by Certificate Transparency"
+
+ # --- Transport Security ---
+ transport:
+ - id: "sec-https"
+ name: "HTTPS Enforcement"
+ description: "Site must be served over HTTPS"
+ severity: "critical"
+ category: "transport"
+ standard: "OWASP / Google"
+ checks:
+ - id: "uses-https"
+ description: "Page is served over HTTPS"
+ severity: "critical"
+ - id: "no-mixed-content"
+ description: "No HTTP resources loaded on HTTPS page"
+ severity: "critical"
+ - id: "http-redirects-to-https"
+ description: "HTTP requests redirect to HTTPS"
+ severity: "major"
+ - id: "valid-certificate"
+ description: "SSL/TLS certificate is valid and not expired"
+ severity: "critical"
+ - id: "strong-tls-version"
+ description: "Uses TLS 1.2 or higher"
+ severity: "major"
+ details:
+ minimum_version: "TLS 1.2"
+ recommended_version: "TLS 1.3"
+ deprecated_versions:
+ - "SSL 2.0"
+ - "SSL 3.0"
+ - "TLS 1.0"
+ - "TLS 1.1"
+
+ # --- Cookie Security ---
+ cookies:
+ - id: "sec-cookie-secure"
+ name: "Secure Cookie Flag"
+ description: "Cookies should have Secure flag over HTTPS"
+ severity: "major"
+ category: "cookies"
+ standard: "OWASP Session Management"
+ details:
+ flag: "Secure"
+ description: "Cookie only sent over HTTPS connections"
+
+ - id: "sec-cookie-httponly"
+ name: "HttpOnly Cookie Flag"
+ description: "Session cookies should have HttpOnly flag"
+ severity: "major"
+ category: "cookies"
+ standard: "OWASP Session Management"
+ details:
+ flag: "HttpOnly"
+ description: "Cookie not accessible via JavaScript (prevents XSS theft)"
+
+ - id: "sec-cookie-samesite"
+ name: "SameSite Cookie Attribute"
+ description: "Cookies should have SameSite attribute"
+ severity: "major"
+ category: "cookies"
+ standard: "OWASP Session Management"
+ details:
+ attribute: "SameSite"
+ recommended_value: "Lax"
+ valid_values:
+ - value: "Strict"
+ description: "Cookie not sent in any cross-site request"
+ - value: "Lax"
+ description: "Cookie sent in top-level navigations (recommended default)"
+ - value: "None"
+ description: "Cookie sent in all contexts (requires Secure flag)"
+
+ # --- Content Security ---
+ content:
+ - id: "sec-subresource-integrity"
+ name: "Subresource Integrity (SRI)"
+ description: "External scripts/styles should use integrity attribute"
+ severity: "minor"
+ category: "supply_chain"
+ standard: "W3C Subresource Integrity"
+ check_type: "attribute_check"
+ details:
+ applies_to:
+ - "script[src] from CDN"
+ - "link[rel=stylesheet] from CDN"
+ attribute: "integrity"
+ description: "Hash-based verification of external resources"
+
+ - id: "sec-form-action-https"
+ name: "Form Action HTTPS"
+ description: "Form actions should use HTTPS"
+ severity: "major"
+ category: "transport"
+ standard: "OWASP"
+ check_type: "form_check"
+ details:
+ description: "Form submissions should always go to HTTPS endpoints"
+
+ - id: "sec-target-blank-rel"
+ name: "Target Blank Security"
+ description: "Links with target=_blank should have rel=noopener"
+ severity: "minor"
+ category: "injection"
+ standard: "Web Security Best Practice"
+ check_type: "link_check"
+ details:
+ description: "Prevents tab-napping attacks via window.opener"
+ recommended: 'rel="noopener noreferrer"'
+ note: "Modern browsers set noopener by default, but explicit is safer"
+
+ - id: "sec-no-inline-event-handlers"
+ name: "No Inline Event Handlers"
+ description: "Avoid inline event handlers (onclick, onload, etc.)"
+ severity: "minor"
+ category: "injection"
+ standard: "OWASP / CSP"
+ check_type: "attribute_check"
+ details:
+ description: "Inline event handlers are incompatible with strict CSP"
+ blocked_attributes:
+ - "onclick"
+ - "onload"
+ - "onerror"
+ - "onmouseover"
+ - "onsubmit"
+ - "onfocus"
+ - "onblur"
+ - "onchange"
+ - "onkeydown"
+ - "onkeyup"
+ - "onkeypress"
diff --git a/backend/app/rules/seo.yaml b/backend/app/rules/seo.yaml
new file mode 100644
index 0000000..f58783a
--- /dev/null
+++ b/backend/app/rules/seo.yaml
@@ -0,0 +1,529 @@
+# ============================================================
+# SEO (Search Engine Optimization) Rules
+# Based on: Google Search Essentials, Schema.org, Core Web Vitals
+# ============================================================
+
+metadata:
+ name: "SEO Standards"
+ version: "1.0.0"
+ last_updated: "2026-02-13"
+ sources:
+ - name: "Google Search Essentials"
+ url: "https://developers.google.com/search/docs/essentials"
+ description: "Google's core guidelines for search visibility"
+ - name: "Google Search Central - Technical Requirements"
+ url: "https://developers.google.com/search/docs/crawling-indexing"
+ - name: "Core Web Vitals"
+ url: "https://developers.google.com/search/docs/appearance/core-web-vitals"
+ - name: "Schema.org"
+ url: "https://schema.org/"
+ description: "Structured data vocabulary"
+ - name: "Open Graph Protocol"
+ url: "https://ogp.me/"
+ - name: "Lighthouse SEO Audit"
+ url: "https://developer.chrome.com/docs/lighthouse/seo/"
+
+# ============================================================
+# 1. Title & Meta Tags
+# ============================================================
+rules:
+ # --- Essential Meta Tags ---
+ - id: "seo-title-tag"
+ name: "Title Tag"
+ description: "Page must have a unique, descriptive tag"
+ severity: "critical"
+ category: "meta"
+ standard: "Google Search Essentials"
+ check_type: "meta_tag_check"
+ details:
+ tag: "title"
+ requirements:
+ - "Must be present and non-empty"
+ - "Should be 30-60 characters for optimal display"
+ - "Should be unique across the site"
+ - "Should accurately describe page content"
+ max_length: 60
+ min_length: 10
+
+ - id: "seo-meta-description"
+ name: "Meta Description"
+ description: "Page should have a descriptive meta description"
+ severity: "major"
+ category: "meta"
+ standard: "Google Search Essentials"
+ check_type: "meta_tag_check"
+ details:
+ tag: ''
+ requirements:
+ - "Should be 120-160 characters for optimal display"
+ - "Should be unique across the site"
+ - "Should accurately summarize page content"
+ max_length: 160
+ min_length: 50
+
+ - id: "seo-meta-viewport"
+ name: "Viewport Meta Tag"
+ description: "Page must have viewport meta tag for mobile compatibility"
+ severity: "critical"
+ category: "meta"
+ standard: "Google Mobile-First Indexing"
+ check_type: "meta_tag_check"
+ details:
+ tag: ''
+ recommended_value: "width=device-width, initial-scale=1"
+
+ - id: "seo-charset"
+ name: "Character Encoding"
+ description: "Page must declare character encoding"
+ severity: "major"
+ category: "meta"
+ standard: "HTML Living Standard"
+ check_type: "meta_tag_check"
+ details:
+ tag: ''
+
+ - id: "seo-lang-attribute"
+ name: "HTML Language Attribute"
+ description: "HTML element should have lang attribute"
+ severity: "major"
+ category: "meta"
+ standard: "Google Search Essentials"
+ check_type: "attribute_check"
+ details:
+ element: "html"
+ attribute: "lang"
+ description: "Helps search engines serve language-appropriate results"
+
+ # --- Canonical & Duplicate Content ---
+ - id: "seo-canonical-url"
+ name: "Canonical URL"
+ description: "Page should have a canonical URL to prevent duplicate content"
+ severity: "major"
+ category: "meta"
+ standard: "Google Search Central"
+ check_type: "link_tag_check"
+ details:
+ tag: ''
+ requirements:
+ - "Should be an absolute URL"
+ - "Should point to the preferred version of the page"
+ - "Must be self-referencing or point to a valid page"
+
+ - id: "seo-hreflang"
+ name: "Hreflang Tags"
+ description: "Multilingual pages should have hreflang annotations"
+ severity: "minor"
+ category: "meta"
+ standard: "Google Search Central - Internationalization"
+ check_type: "link_tag_check"
+ details:
+ tag: ''
+ description: "Tells Google which language versions exist for a page"
+
+ # --- Robots Control ---
+ - id: "seo-meta-robots"
+ name: "Meta Robots Tag"
+ description: "Check for meta robots directives"
+ severity: "major"
+ category: "crawling"
+ standard: "Google Search Central"
+ check_type: "meta_tag_check"
+ details:
+ tag: ''
+ valid_values:
+ - "index"
+ - "noindex"
+ - "follow"
+ - "nofollow"
+ - "noarchive"
+ - "nosnippet"
+ - "max-snippet"
+ - "max-image-preview"
+ - "max-video-preview"
+ warning_values:
+ - value: "noindex"
+ message: "Page is blocked from indexing"
+ - value: "nofollow"
+ message: "Links on this page will not be followed"
+ - value: "none"
+ message: "Page is blocked from indexing and links won't be followed"
+
+# ============================================================
+# 2. Content Structure
+# ============================================================
+ - id: "seo-heading-structure"
+ name: "Heading Structure"
+ description: "Page should have proper heading hierarchy for SEO"
+ severity: "major"
+ category: "content"
+ standard: "Google Search Essentials"
+ check_type: "heading_check"
+ details:
+ checks:
+ - id: "has-h1"
+ description: "Page should have exactly one H1 tag"
+ severity: "critical"
+ - id: "h1-not-empty"
+ description: "H1 tag should not be empty"
+ severity: "critical"
+ - id: "heading-hierarchy"
+ description: "Headings should follow logical hierarchy (no skipping levels)"
+ severity: "major"
+ - id: "heading-keywords"
+ description: "Headings should contain relevant keywords"
+ severity: "minor"
+ - id: "no-multiple-h1"
+ description: "Page should not have multiple H1 tags"
+ severity: "major"
+
+ - id: "seo-image-alt"
+ name: "Image Alt Text"
+ description: "Images should have descriptive alt attributes for SEO"
+ severity: "major"
+ category: "content"
+ standard: "Google Search Essentials - Images"
+ check_type: "image_check"
+ details:
+ checks:
+ - id: "has-alt"
+ description: "All images should have alt attribute"
+ severity: "critical"
+ - id: "alt-not-empty"
+ description: "Alt text should not be empty (unless decorative)"
+ severity: "major"
+ - id: "alt-not-filename"
+ description: "Alt text should not be just a filename"
+ severity: "minor"
+ - id: "alt-not-too-long"
+ description: "Alt text should be under 125 characters"
+ max_length: 125
+ severity: "minor"
+
+ - id: "seo-internal-links"
+ name: "Internal Linking"
+ description: "Page should have internal links for crawlability"
+ severity: "minor"
+ category: "content"
+ standard: "Google Search Central - Links"
+ check_type: "link_check"
+ details:
+ checks:
+ - id: "has-internal-links"
+ description: "Page should contain internal links"
+ - id: "no-broken-links"
+ description: "Internal links should not return 404"
+ - id: "descriptive-anchor"
+ description: "Link anchor text should be descriptive (not 'click here')"
+
+ - id: "seo-content-length"
+ name: "Content Length"
+ description: "Page should have sufficient text content"
+ severity: "minor"
+ category: "content"
+ standard: "SEO Best Practice"
+ check_type: "content_check"
+ details:
+ min_word_count: 300
+ description: "Pages with thin content may rank poorly"
+
+# ============================================================
+# 3. Technical SEO
+# ============================================================
+ - id: "seo-robots-txt"
+ name: "Robots.txt"
+ description: "Site should have a valid robots.txt file"
+ severity: "major"
+ category: "crawling"
+ standard: "Google Search Central - Robots.txt"
+ check_type: "file_check"
+ details:
+ path: "/robots.txt"
+ checks:
+ - id: "exists"
+ description: "robots.txt file should exist"
+ - id: "valid-syntax"
+ description: "robots.txt should have valid syntax"
+ - id: "not-blocking-important"
+ description: "Should not block important resources (CSS, JS, images)"
+ - id: "has-sitemap-reference"
+ description: "Should reference XML sitemap"
+
+ - id: "seo-sitemap-xml"
+ name: "XML Sitemap"
+ description: "Site should have a valid XML sitemap"
+ severity: "major"
+ category: "crawling"
+ standard: "Google Search Central - Sitemaps"
+ check_type: "file_check"
+ details:
+ paths:
+ - "/sitemap.xml"
+ - "/sitemap_index.xml"
+ checks:
+ - id: "exists"
+ description: "XML sitemap should exist"
+ - id: "valid-xml"
+ description: "Sitemap should be valid XML"
+ - id: "referenced-in-robots"
+ description: "Sitemap should be referenced in robots.txt"
+
+ - id: "seo-https"
+ name: "HTTPS"
+ description: "Site should be served over HTTPS"
+ severity: "critical"
+ category: "security_seo"
+ standard: "Google Search Essentials"
+ check_type: "protocol_check"
+ details:
+ description: "HTTPS is a confirmed Google ranking signal"
+
+ - id: "seo-mobile-friendly"
+ name: "Mobile Friendliness"
+ description: "Page should be mobile-friendly (mobile-first indexing)"
+ severity: "critical"
+ category: "mobile"
+ standard: "Google Mobile-First Indexing"
+ check_type: "mobile_check"
+ details:
+ checks:
+ - id: "viewport-meta"
+ description: "Has viewport meta tag"
+ - id: "responsive-design"
+ description: "Uses responsive CSS (media queries or fluid layout)"
+ - id: "no-horizontal-scroll"
+ description: "No horizontal scrolling at mobile widths"
+ - id: "readable-font-size"
+ description: "Font size is readable without zooming (>= 16px base)"
+ - id: "tap-targets"
+ description: "Tap targets are at least 48x48 CSS pixels"
+
+ - id: "seo-page-speed"
+ name: "Page Load Speed"
+ description: "Page should load quickly for better SEO"
+ severity: "major"
+ category: "performance_seo"
+ standard: "Google Core Web Vitals"
+ check_type: "performance_check"
+ details:
+ metrics:
+ - name: "LCP"
+ description: "Largest Contentful Paint"
+ good: "<= 2.5s"
+ needs_improvement: "<= 4.0s"
+ poor: "> 4.0s"
+ - name: "INP"
+ description: "Interaction to Next Paint"
+ good: "<= 200ms"
+ needs_improvement: "<= 500ms"
+ poor: "> 500ms"
+ - name: "CLS"
+ description: "Cumulative Layout Shift"
+ good: "<= 0.1"
+ needs_improvement: "<= 0.25"
+ poor: "> 0.25"
+
+ - id: "seo-url-structure"
+ name: "URL Structure"
+ description: "URLs should be clean and descriptive"
+ severity: "minor"
+ category: "technical"
+ standard: "Google Search Essentials - URL Structure"
+ check_type: "url_check"
+ details:
+ checks:
+ - id: "readable-url"
+ description: "URL should be human-readable (not query strings)"
+ - id: "no-underscores"
+ description: "URLs should use hyphens, not underscores"
+ - id: "lowercase"
+ description: "URLs should be lowercase"
+ - id: "no-excessive-depth"
+ description: "URL path should not be excessively deep (> 4 levels)"
+ max_depth: 4
+
+ - id: "seo-redirect-check"
+ name: "Redirect Handling"
+ description: "Check for proper redirect implementation"
+ severity: "major"
+ category: "technical"
+ standard: "Google Search Central - Redirects"
+ check_type: "redirect_check"
+ details:
+ checks:
+ - id: "no-redirect-chains"
+ description: "Avoid redirect chains (>2 hops)"
+ - id: "use-301"
+ description: "Permanent redirects should use 301 status"
+ - id: "no-meta-refresh-redirect"
+ description: "Avoid meta refresh redirects"
+
+# ============================================================
+# 4. Structured Data
+# ============================================================
+ - id: "seo-structured-data"
+ name: "Structured Data (Schema.org)"
+ description: "Page should include structured data for rich results"
+ severity: "minor"
+ category: "structured_data"
+ standard: "Schema.org / Google Structured Data"
+ check_type: "structured_data_check"
+ details:
+ formats:
+ - "JSON-LD (recommended)"
+ - "Microdata"
+ - "RDFa"
+ common_types:
+ - type: "WebSite"
+ description: "Site-level information with search action"
+ - type: "Organization"
+ description: "Organization/company information"
+ - type: "BreadcrumbList"
+ description: "Breadcrumb navigation structure"
+ - type: "Article"
+ description: "News article, blog post"
+ - type: "Product"
+ description: "Product information with reviews/pricing"
+ - type: "FAQPage"
+ description: "Frequently asked questions"
+ - type: "LocalBusiness"
+ description: "Local business information"
+ - type: "Event"
+ description: "Event information"
+ - type: "Recipe"
+ description: "Recipe with ingredients and instructions"
+ - type: "HowTo"
+ description: "Step-by-step instructions"
+ - type: "VideoObject"
+ description: "Video content metadata"
+
+ - id: "seo-json-ld-valid"
+ name: "JSON-LD Validity"
+ description: "JSON-LD structured data should be valid"
+ severity: "minor"
+ category: "structured_data"
+ standard: "Schema.org"
+ check_type: "structured_data_check"
+ details:
+ checks:
+ - id: "valid-json"
+ description: "JSON-LD must be valid JSON"
+ - id: "has-context"
+ description: "Must include @context: https://schema.org"
+ - id: "has-type"
+ description: "Must include @type property"
+ - id: "required-properties"
+ description: "Must include required properties for the type"
+
+# ============================================================
+# 5. Social Media / Open Graph
+# ============================================================
+ - id: "seo-open-graph"
+ name: "Open Graph Tags"
+ description: "Page should have Open Graph meta tags for social sharing"
+ severity: "minor"
+ category: "social"
+ standard: "Open Graph Protocol"
+ check_type: "meta_tag_check"
+ details:
+ required_tags:
+ - property: "og:title"
+ description: "Title for social sharing"
+ - property: "og:description"
+ description: "Description for social sharing"
+ - property: "og:image"
+ description: "Image for social sharing (min 1200x630px recommended)"
+ - property: "og:url"
+ description: "Canonical URL for social sharing"
+ - property: "og:type"
+ description: "Content type (website, article, etc.)"
+ recommended_tags:
+ - property: "og:site_name"
+ description: "Website name"
+ - property: "og:locale"
+ description: "Locale for the content"
+
+ - id: "seo-twitter-cards"
+ name: "Twitter Card Tags"
+ description: "Page should have Twitter Card meta tags"
+ severity: "info"
+ category: "social"
+ standard: "Twitter Cards"
+ check_type: "meta_tag_check"
+ details:
+ tags:
+ - name: "twitter:card"
+ description: "Card type (summary, summary_large_image, player)"
+ required: true
+ - name: "twitter:title"
+ description: "Title for Twitter sharing"
+ required: false
+ - name: "twitter:description"
+ description: "Description for Twitter sharing"
+ required: false
+ - name: "twitter:image"
+ description: "Image for Twitter sharing"
+ required: false
+
+# ============================================================
+# 6. Crawling & Indexing
+# ============================================================
+ - id: "seo-crawlability"
+ name: "Page Crawlability"
+ description: "Page should be crawlable by search engines"
+ severity: "critical"
+ category: "crawling"
+ standard: "Google Search Central"
+ check_type: "crawl_check"
+ details:
+ checks:
+ - id: "status-200"
+ description: "Page should return HTTP 200 status"
+ - id: "not-blocked-robots"
+ description: "Page should not be blocked by robots.txt"
+ - id: "not-noindex"
+ description: "Page should not have noindex directive (unless intended)"
+ - id: "content-type-html"
+ description: "Content-Type should be text/html"
+
+ - id: "seo-favicon"
+ name: "Favicon"
+ description: "Site should have a favicon"
+ severity: "info"
+ category: "technical"
+ standard: "Google Search Central"
+ check_type: "file_check"
+ details:
+ description: "Favicons appear in search results and browser tabs"
+ check_locations:
+ - ''
+ - ''
+ - "/favicon.ico"
+
+ - id: "seo-404-page"
+ name: "Custom 404 Page"
+ description: "Site should have a custom 404 error page"
+ severity: "minor"
+ category: "technical"
+ standard: "Google Search Essentials"
+ check_type: "http_check"
+ details:
+ description: "Custom 404 pages help users navigate back to working pages"
+
+ - id: "seo-nofollow-usage"
+ name: "Nofollow Link Usage"
+ description: "Check for proper use of rel=nofollow on links"
+ severity: "info"
+ category: "links"
+ standard: "Google Search Central - Links"
+ check_type: "link_check"
+ details:
+ rel_values:
+ - value: "nofollow"
+ description: "Do not follow this link"
+ use_case: "User-generated content, untrusted links"
+ - value: "ugc"
+ description: "User-generated content"
+ use_case: "Comments, forum posts"
+ - value: "sponsored"
+ description: "Paid/sponsored link"
+ use_case: "Advertisements, sponsored content"
diff --git a/backend/requirements.txt b/backend/requirements.txt
index acca931..9812ae1 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -30,5 +30,8 @@ playwright>=1.49.0
weasyprint>=62.0
Jinja2>=3.1.0
+# Rules (YAML)
+PyYAML>=6.0.0
+
# Utilities
python-slugify>=8.0.0