Files
web-inspector/backend/app/engines/html_css.py
jungwoo choi 44ad36e2ab refactor: 4개 검사 엔진을 YAML 기반 표준 규칙으로 리팩토링
- YAML 규칙 파일 4개 신규 생성 (html_css, accessibility, seo, performance_security)
  W3C, WCAG 2.0/2.1/2.2, OWASP, Google Search Essentials 공식 표준 기반
- rules/__init__.py: YAML 로더 + 캐싱 + 리로드 모듈
- html_css.py: 30개 폐기 요소, 100+개 폐기 속성을 YAML에서 동적 로드
- accessibility.py: WCAG 버전 선택 지원 (wcag_version 파라미터)
- seo.py: title/description 길이, OG 필수 태그 등 임계값 YAML 로드
- performance_security.py: COOP/COEP/CORP 검사 추가, 정보 노출 헤더 검사 추가,
  TTFB/페이지 크기 임계값 YAML 로드
- PyYAML 의존성 추가

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 15:49:57 +09:00

409 lines
16 KiB
Python

"""
HTML/CSS Standards Checker Engine (F-002).
Checks HTML5 validity, semantic tags, CSS inline usage, etc.
Uses BeautifulSoup4 + html5lib for parsing.
Rules loaded from rules/html_css.yaml.
"""
import re
import logging
from collections import Counter
from typing import Optional
from bs4 import BeautifulSoup
from app.engines.base import BaseChecker
from app.models.schemas import CategoryResult, Issue
from app.rules import get_rules
logger = logging.getLogger(__name__)
def _load_obsolete_elements() -> list[dict]:
"""Load obsolete elements from YAML."""
rules = get_rules("html_css")
return rules.get("obsolete_elements", [])
def _load_obsolete_attributes() -> dict[str, list[dict]]:
"""Load obsolete attributes from YAML, keyed by element name."""
rules = get_rules("html_css")
return rules.get("obsolete_attributes", {})
def _load_semantic_tags() -> list[str]:
"""Load structural semantic tag names from YAML."""
rules = get_rules("html_css")
structural = rules.get("semantic_elements", {}).get("structural", [])
return [item["tag"] for item in structural]
class HtmlCssChecker(BaseChecker):
"""HTML/CSS standards checker engine."""
@property
def category_name(self) -> str:
return "html_css"
async def check(self, url: str, html_content: str, headers: dict) -> CategoryResult:
soup = BeautifulSoup(html_content, "html5lib")
issues: list[Issue] = []
await self.update_progress(10, "DOCTYPE 검사 중...")
issues += self._check_doctype(html_content)
await self.update_progress(20, "문자 인코딩 검사 중...")
issues += self._check_charset(soup)
await self.update_progress(30, "언어 속성 검사 중...")
issues += self._check_lang(soup)
await self.update_progress(40, "title 태그 검사 중...")
issues += self._check_title(soup)
await self.update_progress(50, "시맨틱 태그 검사 중...")
issues += self._check_semantic_tags(soup)
await self.update_progress(55, "이미지 alt 속성 검사 중...")
issues += self._check_img_alt(soup)
await self.update_progress(60, "중복 ID 검사 중...")
issues += self._check_duplicate_ids(soup)
await self.update_progress(65, "링크 및 스타일 검사 중...")
issues += self._check_empty_links(soup)
issues += self._check_inline_styles(soup)
await self.update_progress(75, "Obsolete 태그 검사 중...")
issues += self._check_obsolete_tags(soup)
await self.update_progress(80, "Obsolete 속성 검사 중...")
issues += self._check_obsolete_attributes(soup)
await self.update_progress(90, "heading 구조 검사 중...")
issues += self._check_heading_hierarchy(soup)
issues += self._check_viewport_meta(soup)
score = self._calculate_score_by_deduction(issues)
await self.update_progress(100, "완료")
return self._build_result(
category="html_css",
score=score,
issues=issues,
)
def _check_doctype(self, html_content: str) -> list[Issue]:
"""H-01: Check for <!DOCTYPE html> declaration."""
stripped = html_content.lstrip()
if not stripped.lower().startswith("<!doctype html"):
return [self._create_issue(
code="H-01",
severity="major",
message="DOCTYPE 선언이 없습니다",
suggestion="문서 최상단에 <!DOCTYPE html>을 추가하세요",
)]
return []
def _check_charset(self, soup: BeautifulSoup) -> list[Issue]:
"""H-02: Check for <meta charset='utf-8'>."""
meta_charset = soup.find("meta", attrs={"charset": True})
meta_content_type = soup.find("meta", attrs={"http-equiv": re.compile(r"content-type", re.I)})
if meta_charset is None and meta_content_type is None:
return [self._create_issue(
code="H-02",
severity="major",
message="문자 인코딩(charset) 선언이 없습니다",
suggestion='<meta charset="utf-8">을 <head> 태그 안에 추가하세요',
)]
return []
def _check_lang(self, soup: BeautifulSoup) -> list[Issue]:
"""H-03: Check for <html lang='...'> attribute."""
html_tag = soup.find("html")
if html_tag is None or not html_tag.get("lang"):
return [self._create_issue(
code="H-03",
severity="minor",
message="HTML 언어 속성(lang)이 설정되지 않았습니다",
suggestion='<html lang="ko"> 또는 해당 언어 코드를 추가하세요',
)]
return []
def _check_title(self, soup: BeautifulSoup) -> list[Issue]:
"""H-04: Check for <title> tag existence and content."""
title = soup.find("title")
if title is None:
return [self._create_issue(
code="H-04",
severity="major",
message="<title> 태그가 없습니다",
suggestion="<head> 안에 <title> 태그를 추가하세요",
)]
if title.string is None or title.string.strip() == "":
return [self._create_issue(
code="H-04",
severity="major",
message="<title> 태그가 비어있습니다",
element=str(title),
suggestion="<title> 태그에 페이지 제목을 입력하세요",
)]
return []
def _check_semantic_tags(self, soup: BeautifulSoup) -> list[Issue]:
"""H-05: Check for semantic HTML5 tag usage."""
semantic_tags = _load_semantic_tags()
found_tags = set()
for tag_name in semantic_tags:
if soup.find(tag_name):
found_tags.add(tag_name)
if not found_tags:
tag_list = ", ".join(semantic_tags)
return [self._create_issue(
code="H-05",
severity="minor",
message=f"시맨틱 태그가 사용되지 않았습니다 ({tag_list})",
suggestion="적절한 시맨틱 태그를 사용하여 문서 구조를 명확히 하세요",
)]
missing = set(semantic_tags) - found_tags
if "main" in missing:
return [self._create_issue(
code="H-05",
severity="minor",
message=f"주요 시맨틱 태그가 누락되었습니다: {', '.join(sorted(missing))}",
suggestion="<main> 태그를 사용하여 주요 콘텐츠 영역을 표시하세요",
)]
return []
def _check_img_alt(self, soup: BeautifulSoup) -> list[Issue]:
"""H-06: Check all <img> tags have alt attributes."""
issues = []
images = soup.find_all("img")
for img in images:
if not img.get("alt") and img.get("alt") != "":
line = self._get_line_number(img)
issues.append(self._create_issue(
code="H-06",
severity="major",
message="이미지에 alt 속성이 없습니다",
element=self._truncate_element(str(img)),
line=line,
suggestion="이미지에 설명을 위한 alt 속성을 추가하세요",
))
return issues
def _check_duplicate_ids(self, soup: BeautifulSoup) -> list[Issue]:
"""H-07: Check for duplicate ID attributes."""
issues = []
id_elements = soup.find_all(id=True)
id_counter = Counter(el.get("id") for el in id_elements)
for id_val, count in id_counter.items():
if count > 1:
elements = [el for el in id_elements if el.get("id") == id_val]
first_el = elements[0] if elements else None
line = self._get_line_number(first_el) if first_el else None
issues.append(self._create_issue(
code="H-07",
severity="critical",
message=f"중복 ID 발견: '{id_val}' ({count}회 사용)",
element=self._truncate_element(str(first_el)) if first_el else None,
line=line,
suggestion="각 요소에 고유한 ID를 부여하세요",
))
return issues
def _check_empty_links(self, soup: BeautifulSoup) -> list[Issue]:
"""H-08: Check for empty or '#' href links."""
issues = []
links = soup.find_all("a")
empty_count = 0
first_element = None
first_line = None
for link in links:
href = link.get("href", "")
if href == "" or href == "#":
empty_count += 1
if first_element is None:
first_element = self._truncate_element(str(link))
first_line = self._get_line_number(link)
if empty_count > 0:
issues.append(self._create_issue(
code="H-08",
severity="minor",
message=f"빈 링크(href가 비어있거나 '#')가 {empty_count}개 발견되었습니다",
element=first_element,
line=first_line,
suggestion="링크에 유효한 URL을 설정하거나, 버튼이 필요한 경우 <button>을 사용하세요",
))
return issues
def _check_inline_styles(self, soup: BeautifulSoup) -> list[Issue]:
"""H-09: Check for inline style attributes."""
issues = []
styled_elements = soup.find_all(style=True)
if styled_elements:
first_el = styled_elements[0]
issues.append(self._create_issue(
code="H-09",
severity="info",
message=f"인라인 스타일이 {len(styled_elements)}개 요소에서 사용되고 있습니다",
element=self._truncate_element(str(first_el)),
line=self._get_line_number(first_el),
suggestion="인라인 스타일 대신 외부 CSS 파일 또는 <style> 태그를 사용하세요",
))
return issues
def _check_obsolete_tags(self, soup: BeautifulSoup) -> list[Issue]:
"""H-10: Check for obsolete HTML tags (loaded from YAML)."""
issues = []
obsolete = _load_obsolete_elements()
for entry in obsolete:
tag_name = entry["tag"]
found = soup.find_all(tag_name)
if found:
replacement = entry.get("replacement", "CSS")
severity = entry.get("severity", "major")
first_el = found[0]
issues.append(self._create_issue(
code="H-10",
severity=severity,
message=f"사용 중단된(obsolete) 태그 <{tag_name}>이(가) {len(found)}회 사용되었습니다",
element=self._truncate_element(str(first_el)),
line=self._get_line_number(first_el),
suggestion=f"<{tag_name}> 대신 {replacement}을(를) 사용하세요",
))
return issues
def _check_obsolete_attributes(self, soup: BeautifulSoup) -> list[Issue]:
"""H-13: Check for obsolete HTML attributes (loaded from YAML)."""
issues = []
obsolete_attrs = _load_obsolete_attributes()
# Check element-specific obsolete attributes
element_checks = {
"a": "a", "body": "body", "br": "br", "form": "form",
"hr": "hr", "html": "html", "iframe": "iframe", "img": "img",
"input": "input", "link": "link", "meta": "meta", "script": "script",
"style": "style", "table": "table", "embed": "embed",
}
# Multi-element groups
multi_checks = {
"td_th": ["td", "th"],
"tr": ["tr"],
"thead_tbody_tfoot": ["thead", "tbody", "tfoot"],
"ol_ul": ["ol", "ul"],
"heading": ["h1", "h2", "h3", "h4", "h5", "h6"],
"embed": ["embed"],
}
found_count = 0
first_element = None
first_line = None
first_attr = None
# Single-element checks
for yaml_key, html_tag in element_checks.items():
attr_list = obsolete_attrs.get(yaml_key, [])
for attr_entry in attr_list:
attr_name = attr_entry["attr"]
elements = soup.find_all(html_tag, attrs={attr_name: True})
if elements:
found_count += len(elements)
if first_element is None:
first_element = self._truncate_element(str(elements[0]))
first_line = self._get_line_number(elements[0])
first_attr = f'{html_tag}[{attr_name}]'
# Multi-element group checks
for yaml_key, html_tags in multi_checks.items():
attr_list = obsolete_attrs.get(yaml_key, [])
for attr_entry in attr_list:
attr_name = attr_entry["attr"]
for html_tag in html_tags:
elements = soup.find_all(html_tag, attrs={attr_name: True})
if elements:
found_count += len(elements)
if first_element is None:
first_element = self._truncate_element(str(elements[0]))
first_line = self._get_line_number(elements[0])
first_attr = f'{html_tag}[{attr_name}]'
# Global obsolete attributes
global_attrs = obsolete_attrs.get("global", [])
for attr_entry in global_attrs:
attr_name = attr_entry["attr"]
elements = soup.find_all(attrs={attr_name: True})
if elements:
found_count += len(elements)
if first_element is None:
first_element = self._truncate_element(str(elements[0]))
first_line = self._get_line_number(elements[0])
first_attr = attr_name
if found_count > 0:
issues.append(self._create_issue(
code="H-13",
severity="minor",
message=f"사용 중단된(obsolete) HTML 속성이 {found_count}개 발견되었습니다 (예: {first_attr})",
element=first_element,
line=first_line,
suggestion="사용 중단된 HTML 속성 대신 CSS를 사용하세요 (W3C HTML Living Standard 참조)",
))
return issues
def _check_heading_hierarchy(self, soup: BeautifulSoup) -> list[Issue]:
"""H-11: Check heading hierarchy (h1-h6 should not skip levels)."""
issues = []
headings = soup.find_all(re.compile(r"^h[1-6]$"))
if not headings:
return []
prev_level = 0
for heading in headings:
level = int(heading.name[1])
if prev_level > 0 and level > prev_level + 1:
issues.append(self._create_issue(
code="H-11",
severity="minor",
message=f"heading 계층 구조가 건너뛰어졌습니다: h{prev_level} 다음에 h{level}",
element=self._truncate_element(str(heading)),
line=self._get_line_number(heading),
suggestion=f"h{prev_level} 다음에는 h{prev_level + 1}을 사용하세요",
))
break
prev_level = level
return issues
def _check_viewport_meta(self, soup: BeautifulSoup) -> list[Issue]:
"""H-12: Check for viewport meta tag."""
viewport = soup.find("meta", attrs={"name": re.compile(r"viewport", re.I)})
if viewport is None:
return [self._create_issue(
code="H-12",
severity="major",
message="viewport meta 태그가 없습니다",
suggestion='<meta name="viewport" content="width=device-width, initial-scale=1.0">을 추가하세요',
)]
return []
@staticmethod
def _get_line_number(element) -> Optional[int]:
if element and hasattr(element, "sourceline"):
return element.sourceline
return None
@staticmethod
def _truncate_element(element_str: str, max_len: int = 200) -> str:
if len(element_str) > max_len:
return element_str[:max_len] + "..."
return element_str