""" Accessibility (WCAG/KWCAG) Checker Engine (F-003). Uses Playwright + axe-core for comprehensive accessibility testing. Falls back to BeautifulSoup-based checks if Playwright is unavailable. Supports WCAG version selection (2.0/2.1/2.2) via rules/accessibility.yaml. Supports KWCAG (Korean) standard selection (2.1/2.2) via kwcag_mapping module. """ import json import logging import os from pathlib import Path from typing import Optional from bs4 import BeautifulSoup from app.engines.base import BaseChecker from app.engines.kwcag_mapping import ( convert_wcag_issue_to_kwcag, get_kwcag_axe_tags, get_kwcag_label, ) from app.models.schemas import CategoryResult, Issue from app.rules import get_rules logger = logging.getLogger(__name__) # axe-core JS file path AXE_CORE_JS_PATH = Path(__file__).parent / "axe_core" / "axe.min.js" # Korean message mapping for axe-core rules AXE_RULE_MESSAGES = { "image-alt": ("A-01", "이미지에 대체 텍스트(alt)가 없습니다", "1.1.1"), "color-contrast": ("A-02", "텍스트와 배경의 색상 대비가 부족합니다", "1.4.3"), "color-contrast-enhanced": ("A-02", "텍스트와 배경의 색상 대비가 향상된 기준을 충족하지 않습니다", "1.4.6"), "keyboard": ("A-03", "키보드로 접근할 수 없는 요소가 있습니다", "2.1.1"), "focus-visible": ("A-04", "키보드 포커스가 시각적으로 표시되지 않습니다", "2.4.7"), "label": ("A-05", "폼 요소에 레이블이 연결되지 않았습니다", "1.3.1"), "input-label": ("A-05", "입력 요소에 레이블이 없습니다", "1.3.1"), "input-button-name": ("A-05", "입력 버튼에 접근 가능한 이름이 없습니다", "4.1.2"), "select-name": ("A-05", "select 요소에 접근 가능한 이름이 없습니다", "4.1.2"), "aria-valid-attr": ("A-06", "유효하지 않은 ARIA 속성이 사용되었습니다", "4.1.2"), "aria-roles": ("A-06", "유효하지 않은 ARIA 역할이 사용되었습니다", "4.1.2"), "aria-required-attr": ("A-06", "필수 ARIA 속성이 누락되었습니다", "4.1.2"), "aria-valid-attr-value": ("A-06", "ARIA 속성 값이 올바르지 않습니다", "4.1.2"), "aria-allowed-attr": ("A-06", "허용되지 않는 ARIA 속성이 사용되었습니다", "4.1.2"), "aria-allowed-role": ("A-06", "허용되지 않는 ARIA 역할이 사용되었습니다", "4.1.2"), "aria-hidden-body": ("A-06", "body 요소에 aria-hidden이 설정되어 있습니다", "4.1.2"), "aria-hidden-focus": ("A-06", "aria-hidden 요소 내에 포커스 가능한 요소가 있습니다", "4.1.2"), "link-name": ("A-07", "링크 텍스트가 목적을 설명하지 않습니다", "2.4.4"), "html-has-lang": ("A-08", "HTML 요소에 lang 속성이 없습니다", "3.1.1"), "html-lang-valid": ("A-08", "HTML lang 속성 값이 올바르지 않습니다", "3.1.1"), "valid-lang": ("A-08", "lang 속성 값이 올바르지 않습니다", "3.1.2"), "bypass": ("A-09", "건너뛰기 링크(skip navigation)가 없습니다", "2.4.1"), "region": ("A-09", "랜드마크 영역 밖에 콘텐츠가 있습니다", "2.4.1"), "no-autoplay-audio": ("A-10", "자동 재생 미디어에 정지/음소거 컨트롤이 없습니다", "1.4.2"), "audio-caption": ("A-10", "오디오/비디오에 자막이 없습니다", "1.2.2"), "video-caption": ("A-10", "비디오에 자막이 없습니다", "1.2.2"), "document-title": ("A-11", "페이지에 제목(title)이 없습니다", "2.4.2"), "empty-heading": ("A-12", "빈 heading 요소가 있습니다", "2.4.6"), "frame-title": ("A-13", "iframe에 접근 가능한 제목이 없습니다", "4.1.2"), "button-name": ("A-14", "버튼에 접근 가능한 이름이 없습니다", "4.1.2"), "meta-refresh": ("A-15", "meta refresh로 시간 제한이 설정되어 있습니다", "2.2.1"), "meta-viewport-large": ("A-16", "사용자의 확대/축소가 제한되어 있습니다", "1.4.4"), "autocomplete-valid": ("A-17", "autocomplete 속성이 올바르지 않습니다", "1.3.5"), "target-size": ("A-18", "터치 대상 크기가 최소 기준을 충족하지 않습니다", "2.5.8"), "scrollable-region-focusable": ("A-19", "스크롤 가능한 영역에 키보드 접근이 불가합니다", "2.1.1"), "tabindex": ("A-20", "tabindex 값이 0보다 크게 설정되어 있습니다", "2.4.3"), "blink": ("A-21", "깜빡이는 콘텐츠가 있습니다", "2.2.2"), "marquee": ("A-21", "자동 스크롤 콘텐츠(marquee)가 있습니다", "2.2.2"), } # axe-core impact to severity mapping IMPACT_TO_SEVERITY = { "critical": "critical", "serious": "major", "moderate": "minor", "minor": "info", } # WCAG version to axe-core tags mapping (loaded from YAML at runtime) WCAG_VERSION_PRESETS = { "2.0_A": ["wcag2a"], "2.0_AA": ["wcag2a", "wcag2aa"], "2.1_AA": ["wcag2a", "wcag2aa", "wcag21a", "wcag21aa"], "2.2_AA": ["wcag2a", "wcag2aa", "wcag21a", "wcag21aa", "wcag22aa"], "2.2_full": ["wcag2a", "wcag2aa", "wcag2aaa", "wcag21a", "wcag21aa", "wcag22aa"], } def _get_axe_tags_for_version(wcag_version: str = "2.1_AA") -> list[str]: """Get axe-core tags for a given WCAG version preset.""" rules = get_rules("accessibility") presets = rules.get("compliance_presets", {}) # Map user-friendly names to YAML preset keys version_map = { "2.0_A": "wcag_20_a", "2.0_AA": "wcag_20_aa", "2.1_AA": "wcag_21_aa", "2.2_AA": "wcag_22_aa", "2.2_full": "wcag_22_full", } yaml_key = version_map.get(wcag_version) if yaml_key and yaml_key in presets: return presets[yaml_key].get("tags", WCAG_VERSION_PRESETS.get(wcag_version, ["wcag2a", "wcag2aa"])) return WCAG_VERSION_PRESETS.get(wcag_version, ["wcag2a", "wcag2aa", "wcag21a", "wcag21aa"]) def _get_wcag_level_label(wcag_version: str) -> str: """Get human-readable WCAG level label.""" labels = { "2.0_A": "WCAG 2.0 Level A", "2.0_AA": "WCAG 2.0 Level AA", "2.1_AA": "WCAG 2.1 Level AA", "2.2_AA": "WCAG 2.2 Level AA", "2.2_full": "WCAG 2.2 All Levels", } return labels.get(wcag_version, "WCAG 2.1 Level AA") # Standard parameter to internal version mapping # Maps API-facing standard values to (is_kwcag, internal_version) _STANDARD_MAP: dict[str, tuple[bool, str]] = { "wcag_2.0_a": (False, "2.0_A"), "wcag_2.0_aa": (False, "2.0_AA"), "wcag_2.1_aa": (False, "2.1_AA"), "wcag_2.2_aa": (False, "2.2_AA"), "kwcag_2.1": (True, "kwcag_2.1"), "kwcag_2.2": (True, "kwcag_2.2"), } def _parse_standard(standard: str) -> tuple[bool, str]: """ Parse the accessibility standard parameter. Args: standard: API-facing standard string, e.g. "wcag_2.1_aa" or "kwcag_2.2" Returns: (is_kwcag, version_key) where version_key is the internal key for axe tag selection and label generation. """ return _STANDARD_MAP.get(standard, (False, "2.1_AA")) class AccessibilityChecker(BaseChecker): """Accessibility (WCAG/KWCAG) checker engine with standard selection.""" def __init__( self, progress_callback=None, wcag_version: str = "2.1_AA", standard: str = "wcag_2.1_aa", ): super().__init__(progress_callback) # Parse the standard parameter to determine mode self.is_kwcag, self._version_key = _parse_standard(standard) self.standard = standard if self.is_kwcag: # KWCAG mode: use KWCAG-specific axe tags self.wcag_version = wcag_version # Keep for internal compatibility self.axe_tags = get_kwcag_axe_tags(self._version_key) else: # WCAG mode: use the internal version key for axe tags self.wcag_version = self._version_key self.axe_tags = _get_axe_tags_for_version(self._version_key) @property def category_name(self) -> str: return "accessibility" async def check(self, url: str, html_content: str, headers: dict) -> CategoryResult: """ Primary: Playwright + axe-core. Fallback: BeautifulSoup-based basic checks. """ try: return await self._check_with_playwright(url) except Exception as e: logger.warning( "Playwright accessibility check failed, falling back to basic checks: %s", str(e), ) return await self._check_with_beautifulsoup(url, html_content) async def _check_with_playwright(self, url: str) -> CategoryResult: """Run axe-core via Playwright headless browser.""" from playwright.async_api import async_playwright await self.update_progress(10, "브라우저 시작 중...") async with async_playwright() as p: browser = await p.chromium.launch(headless=True) try: page = await browser.new_page() await self.update_progress(20, "페이지 로드 중...") await page.goto(url, wait_until="networkidle", timeout=30000) await self.update_progress(40, "axe-core 주입 중...") if AXE_CORE_JS_PATH.exists() and AXE_CORE_JS_PATH.stat().st_size > 1000: axe_js = AXE_CORE_JS_PATH.read_text(encoding="utf-8") await page.evaluate(axe_js) else: await page.evaluate(""" async () => { const script = document.createElement('script'); script.src = 'https://cdnjs.cloudflare.com/ajax/libs/axe-core/4.10.2/axe.min.js'; document.head.appendChild(script); await new Promise((resolve, reject) => { script.onload = resolve; script.onerror = reject; }); } """) level_label = ( get_kwcag_label(self._version_key) if self.is_kwcag else _get_wcag_level_label(self.wcag_version) ) await self.update_progress(60, f"접근성 검사 실행 중 ({level_label})...") # Build axe-core tag list including best-practice axe_tag_list = self.axe_tags + ["best-practice"] tags_js = json.dumps(axe_tag_list) axe_results = await page.evaluate(f""" () => {{ return new Promise((resolve, reject) => {{ if (typeof axe === 'undefined') {{ reject(new Error('axe-core not loaded')); return; }} axe.run(document, {{ runOnly: {{ type: 'tag', values: {tags_js} }} }}).then(resolve).catch(reject); }}); }} """) await self.update_progress(80, "결과 분석 중...") issues = self._parse_axe_results(axe_results) score = self._calculate_axe_score(axe_results) finally: await browser.close() await self.update_progress(100, "완료") wcag_level = ( get_kwcag_label(self._version_key) if self.is_kwcag else _get_wcag_level_label(self.wcag_version) ) return self._build_result( category="accessibility", score=score, issues=issues, wcag_level=wcag_level, ) async def _check_with_beautifulsoup(self, url: str, html_content: str) -> CategoryResult: """Fallback: basic accessibility checks using BeautifulSoup.""" soup = BeautifulSoup(html_content, "html5lib") issues: list[Issue] = [] await self.update_progress(20, "이미지 대체 텍스트 검사 중...") issues += self._bs_check_img_alt(soup) await self.update_progress(35, "폼 레이블 검사 중...") issues += self._bs_check_form_labels(soup) await self.update_progress(50, "ARIA 속성 검사 중...") issues += self._bs_check_aria(soup) await self.update_progress(60, "링크 텍스트 검사 중...") issues += self._bs_check_link_text(soup) await self.update_progress(70, "언어 속성 검사 중...") issues += self._bs_check_lang(soup) await self.update_progress(80, "건너뛰기 링크 검사 중...") issues += self._bs_check_skip_nav(soup) await self.update_progress(90, "자동 재생 검사 중...") issues += self._bs_check_autoplay(soup) score = self._calculate_score_by_deduction(issues) await self.update_progress(100, "완료") wcag_level = ( get_kwcag_label(self._version_key) if self.is_kwcag else _get_wcag_level_label(self.wcag_version) ) return self._build_result( category="accessibility", score=score, issues=issues, wcag_level=wcag_level, ) def _parse_axe_results(self, axe_results: dict) -> list[Issue]: """Convert axe-core violations to Issue list with Korean messages.""" issues = [] for violation in axe_results.get("violations", []): rule_id = violation.get("id", "") impact = violation.get("impact", "minor") severity = IMPACT_TO_SEVERITY.get(impact, "info") if rule_id in AXE_RULE_MESSAGES: code, korean_msg, wcag = AXE_RULE_MESSAGES[rule_id] else: code = "A-99" korean_msg = violation.get("description", "접근성 위반 사항이 발견되었습니다") # Try to extract WCAG criterion from tags tags = violation.get("tags", []) wcag = self._extract_wcag_from_tags(tags) nodes = violation.get("nodes", []) element = None if nodes: html_snippet = nodes[0].get("html", "") if html_snippet: element = html_snippet[:200] # Additional context for color contrast detail = "" if rule_id == "color-contrast" and nodes: data = nodes[0].get("any", [{}]) if data and isinstance(data, list) and len(data) > 0: msg_data = data[0].get("data", {}) if isinstance(msg_data, dict): fg = msg_data.get("fgColor", "") bg = msg_data.get("bgColor", "") ratio = msg_data.get("contrastRatio", "") if ratio: detail = f" (대비율: {ratio}:1, 최소 4.5:1 필요)" node_count = len(nodes) count_info = f" ({node_count}개 요소)" if node_count > 1 else "" issue = self._create_issue( code=code, severity=severity, message=f"{korean_msg}{detail}{count_info}", element=element, suggestion=violation.get("helpUrl", "해당 WCAG 기준을 확인하고 수정하세요"), wcag_criterion=wcag, ) # Convert to KWCAG criterion if in KWCAG mode if self.is_kwcag: issue_dict = issue.model_dump() issue_dict = convert_wcag_issue_to_kwcag(issue_dict, self._version_key) issue = Issue(**issue_dict) issues.append(issue) return issues @staticmethod def _extract_wcag_from_tags(tags: list[str]) -> str: """Extract WCAG criterion number from axe-core tags (e.g., 'wcag111' -> '1.1.1').""" for tag in tags: if tag.startswith("wcag") and not tag.startswith("wcag2"): # e.g., "wcag111" -> "1.1.1" digits = tag[4:] if len(digits) >= 3: return f"{digits[0]}.{digits[1]}.{digits[2:]}" return "4.1.2" def _calculate_axe_score(self, axe_results: dict) -> int: """Calculate score based on axe-core violations.""" severity_weights = { "critical": 20, "serious": 10, "moderate": 5, "minor": 2, } deduction = 0 for violation in axe_results.get("violations", []): impact = violation.get("impact", "minor") deduction += severity_weights.get(impact, 2) return max(0, 100 - deduction) # --- BeautifulSoup fallback checks --- def _bs_check_img_alt(self, soup: BeautifulSoup) -> list[Issue]: """A-01: Check images for alt text.""" issues = [] images = soup.find_all("img") missing = [img for img in images if not img.get("alt") and img.get("alt") != ""] if missing: issues.append(self._create_issue( code="A-01", severity="critical", message=f"alt 속성이 없는 이미지가 {len(missing)}개 발견되었습니다", element=str(missing[0])[:200] if missing else None, suggestion="모든 이미지에 설명적인 대체 텍스트를 추가하세요", wcag_criterion="1.1.1", )) return issues def _bs_check_form_labels(self, soup: BeautifulSoup) -> list[Issue]: """A-05: Check form elements for associated labels.""" issues = [] inputs = soup.find_all(["input", "select", "textarea"]) unlabeled = [] for inp in inputs: input_type = inp.get("type", "text") if input_type in ("hidden", "submit", "button", "reset", "image"): continue inp_id = inp.get("id") has_label = False if inp_id: label = soup.find("label", attrs={"for": inp_id}) if label: has_label = True if inp.get("aria-label") or inp.get("aria-labelledby") or inp.get("title"): has_label = True parent_label = inp.find_parent("label") if parent_label: has_label = True if not has_label: unlabeled.append(inp) if unlabeled: issues.append(self._create_issue( code="A-05", severity="critical", message=f"레이블이 연결되지 않은 폼 요소가 {len(unlabeled)}개 발견되었습니다", element=str(unlabeled[0])[:200] if unlabeled else None, suggestion="