""" HTML/CSS Standards Checker Engine (F-002). Checks HTML5 validity, semantic tags, CSS inline usage, etc. Uses BeautifulSoup4 + html5lib for parsing. Rules loaded from rules/html_css.yaml. """ import re import logging from collections import Counter from typing import Optional from bs4 import BeautifulSoup from app.engines.base import BaseChecker from app.models.schemas import CategoryResult, Issue from app.rules import get_rules logger = logging.getLogger(__name__) def _load_obsolete_elements() -> list[dict]: """Load obsolete elements from YAML.""" rules = get_rules("html_css") return rules.get("obsolete_elements", []) def _load_obsolete_attributes() -> dict[str, list[dict]]: """Load obsolete attributes from YAML, keyed by element name.""" rules = get_rules("html_css") return rules.get("obsolete_attributes", {}) def _load_semantic_tags() -> list[str]: """Load structural semantic tag names from YAML.""" rules = get_rules("html_css") structural = rules.get("semantic_elements", {}).get("structural", []) return [item["tag"] for item in structural] class HtmlCssChecker(BaseChecker): """HTML/CSS standards checker engine.""" @property def category_name(self) -> str: return "html_css" async def check(self, url: str, html_content: str, headers: dict) -> CategoryResult: soup = BeautifulSoup(html_content, "html5lib") issues: list[Issue] = [] await self.update_progress(10, "DOCTYPE 검사 중...") issues += self._check_doctype(html_content) await self.update_progress(20, "문자 인코딩 검사 중...") issues += self._check_charset(soup) await self.update_progress(30, "언어 속성 검사 중...") issues += self._check_lang(soup) await self.update_progress(40, "title 태그 검사 중...") issues += self._check_title(soup) await self.update_progress(50, "시맨틱 태그 검사 중...") issues += self._check_semantic_tags(soup) await self.update_progress(55, "이미지 alt 속성 검사 중...") issues += self._check_img_alt(soup) await self.update_progress(60, "중복 ID 검사 중...") issues += self._check_duplicate_ids(soup) await self.update_progress(65, "링크 및 스타일 검사 중...") issues += self._check_empty_links(soup) issues += self._check_inline_styles(soup) await self.update_progress(75, "Obsolete 태그 검사 중...") issues += self._check_obsolete_tags(soup) await self.update_progress(80, "Obsolete 속성 검사 중...") issues += self._check_obsolete_attributes(soup) await self.update_progress(90, "heading 구조 검사 중...") issues += self._check_heading_hierarchy(soup) issues += self._check_viewport_meta(soup) score = self._calculate_score_by_deduction(issues) await self.update_progress(100, "완료") return self._build_result( category="html_css", score=score, issues=issues, ) def _check_doctype(self, html_content: str) -> list[Issue]: """H-01: Check for declaration.""" stripped = html_content.lstrip() if not stripped.lower().startswith("을 추가하세요", )] return [] def _check_charset(self, soup: BeautifulSoup) -> list[Issue]: """H-02: Check for .""" meta_charset = soup.find("meta", attrs={"charset": True}) meta_content_type = soup.find("meta", attrs={"http-equiv": re.compile(r"content-type", re.I)}) if meta_charset is None and meta_content_type is None: return [self._create_issue( code="H-02", severity="major", message="문자 인코딩(charset) 선언이 없습니다", suggestion='을
태그 안에 추가하세요', )] return [] def _check_lang(self, soup: BeautifulSoup) -> list[Issue]: """H-03: Check for attribute.""" html_tag = soup.find("html") if html_tag is None or not html_tag.get("lang"): return [self._create_issue( code="H-03", severity="minor", message="HTML 언어 속성(lang)이 설정되지 않았습니다", suggestion=' 또는 해당 언어 코드를 추가하세요', )] return [] def _check_title(self, soup: BeautifulSoup) -> list[Issue]: """H-04: Check for