""" HTML/CSS Standards Checker Engine (F-002). Checks HTML5 validity, semantic tags, CSS inline usage, etc. Uses BeautifulSoup4 + html5lib for parsing. """ import re import logging from collections import Counter from typing import Optional from bs4 import BeautifulSoup from app.engines.base import BaseChecker from app.models.schemas import CategoryResult, Issue logger = logging.getLogger(__name__) DEPRECATED_TAGS = [ "font", "center", "marquee", "blink", "strike", "big", "tt", "basefont", "applet", "dir", "isindex", ] SEMANTIC_TAGS = ["header", "nav", "main", "footer", "section", "article"] class HtmlCssChecker(BaseChecker): """HTML/CSS standards checker engine.""" @property def category_name(self) -> str: return "html_css" async def check(self, url: str, html_content: str, headers: dict) -> CategoryResult: soup = BeautifulSoup(html_content, "html5lib") issues: list[Issue] = [] await self.update_progress(10, "DOCTYPE 검사 중...") issues += self._check_doctype(html_content) await self.update_progress(20, "문자 인코딩 검사 중...") issues += self._check_charset(soup) await self.update_progress(30, "언어 속성 검사 중...") issues += self._check_lang(soup) await self.update_progress(40, "title 태그 검사 중...") issues += self._check_title(soup) await self.update_progress(50, "시맨틱 태그 검사 중...") issues += self._check_semantic_tags(soup) await self.update_progress(60, "이미지 alt 속성 검사 중...") issues += self._check_img_alt(soup) await self.update_progress(70, "중복 ID 검사 중...") issues += self._check_duplicate_ids(soup) await self.update_progress(80, "링크 및 스타일 검사 중...") issues += self._check_empty_links(soup) issues += self._check_inline_styles(soup) issues += self._check_deprecated_tags(soup) await self.update_progress(90, "heading 구조 검사 중...") issues += self._check_heading_hierarchy(soup) issues += self._check_viewport_meta(soup) score = self._calculate_score_by_deduction(issues) await self.update_progress(100, "완료") return self._build_result( category="html_css", score=score, issues=issues, ) def _check_doctype(self, html_content: str) -> list[Issue]: """H-01: Check for declaration.""" stripped = html_content.lstrip() if not stripped.lower().startswith("을 추가하세요", )] return [] def _check_charset(self, soup: BeautifulSoup) -> list[Issue]: """H-02: Check for .""" meta_charset = soup.find("meta", attrs={"charset": True}) meta_content_type = soup.find("meta", attrs={"http-equiv": re.compile(r"content-type", re.I)}) if meta_charset is None and meta_content_type is None: return [self._create_issue( code="H-02", severity="major", message="문자 인코딩(charset) 선언이 없습니다", suggestion='을 태그 안에 추가하세요', )] return [] def _check_lang(self, soup: BeautifulSoup) -> list[Issue]: """H-03: Check for attribute.""" html_tag = soup.find("html") if html_tag is None or not html_tag.get("lang"): return [self._create_issue( code="H-03", severity="minor", message="HTML 언어 속성(lang)이 설정되지 않았습니다", suggestion=' 또는 해당 언어 코드를 추가하세요', )] return [] def _check_title(self, soup: BeautifulSoup) -> list[Issue]: """H-04: Check for tag existence and content.""" title = soup.find("title") if title is None: return [self._create_issue( code="H-04", severity="major", message="<title> 태그가 없습니다", suggestion="<head> 안에 <title> 태그를 추가하세요", )] if title.string is None or title.string.strip() == "": return [self._create_issue( code="H-04", severity="major", message="<title> 태그가 비어있습니다", element=str(title), suggestion="<title> 태그에 페이지 제목을 입력하세요", )] return [] def _check_semantic_tags(self, soup: BeautifulSoup) -> list[Issue]: """H-05: Check for semantic HTML5 tag usage.""" found_tags = set() for tag_name in SEMANTIC_TAGS: if soup.find(tag_name): found_tags.add(tag_name) if not found_tags: return [self._create_issue( code="H-05", severity="minor", message="시맨틱 태그가 사용되지 않았습니다 (header, nav, main, footer, section, article)", suggestion="적절한 시맨틱 태그를 사용하여 문서 구조를 명확히 하세요", )] missing = set(SEMANTIC_TAGS) - found_tags # Only report if major structural elements are missing (main is most important) if "main" in missing: return [self._create_issue( code="H-05", severity="minor", message=f"주요 시맨틱 태그가 누락되었습니다: {', '.join(sorted(missing))}", suggestion="<main> 태그를 사용하여 주요 콘텐츠 영역을 표시하세요", )] return [] def _check_img_alt(self, soup: BeautifulSoup) -> list[Issue]: """H-06: Check all <img> tags have alt attributes.""" issues = [] images = soup.find_all("img") for img in images: if not img.get("alt") and img.get("alt") != "": line = self._get_line_number(img) issues.append(self._create_issue( code="H-06", severity="major", message="이미지에 alt 속성이 없습니다", element=self._truncate_element(str(img)), line=line, suggestion="이미지에 설명을 위한 alt 속성을 추가하세요", )) return issues def _check_duplicate_ids(self, soup: BeautifulSoup) -> list[Issue]: """H-07: Check for duplicate ID attributes.""" issues = [] id_elements = soup.find_all(id=True) id_counter = Counter(el.get("id") for el in id_elements) for id_val, count in id_counter.items(): if count > 1: elements = [el for el in id_elements if el.get("id") == id_val] first_el = elements[0] if elements else None line = self._get_line_number(first_el) if first_el else None issues.append(self._create_issue( code="H-07", severity="critical", message=f"중복 ID 발견: '{id_val}' ({count}회 사용)", element=self._truncate_element(str(first_el)) if first_el else None, line=line, suggestion="각 요소에 고유한 ID를 부여하세요", )) return issues def _check_empty_links(self, soup: BeautifulSoup) -> list[Issue]: """H-08: Check for empty or '#' href links.""" issues = [] links = soup.find_all("a") empty_count = 0 first_element = None first_line = None for link in links: href = link.get("href", "") if href == "" or href == "#": empty_count += 1 if first_element is None: first_element = self._truncate_element(str(link)) first_line = self._get_line_number(link) if empty_count > 0: issues.append(self._create_issue( code="H-08", severity="minor", message=f"빈 링크(href가 비어있거나 '#')가 {empty_count}개 발견되었습니다", element=first_element, line=first_line, suggestion="링크에 유효한 URL을 설정하거나, 버튼이 필요한 경우 <button>을 사용하세요", )) return issues def _check_inline_styles(self, soup: BeautifulSoup) -> list[Issue]: """H-09: Check for inline style attributes.""" issues = [] styled_elements = soup.find_all(style=True) if styled_elements: first_el = styled_elements[0] issues.append(self._create_issue( code="H-09", severity="info", message=f"인라인 스타일이 {len(styled_elements)}개 요소에서 사용되고 있습니다", element=self._truncate_element(str(first_el)), line=self._get_line_number(first_el), suggestion="인라인 스타일 대신 외부 CSS 파일 또는 <style> 태그를 사용하세요", )) return issues def _check_deprecated_tags(self, soup: BeautifulSoup) -> list[Issue]: """H-10: Check for deprecated HTML tags.""" issues = [] for tag_name in DEPRECATED_TAGS: found = soup.find_all(tag_name) if found: first_el = found[0] issues.append(self._create_issue( code="H-10", severity="major", message=f"사용 중단된(deprecated) 태그 <{tag_name}>이(가) {len(found)}회 사용되었습니다", element=self._truncate_element(str(first_el)), line=self._get_line_number(first_el), suggestion=f"<{tag_name}> 대신 CSS를 사용하여 스타일을 적용하세요", )) return issues def _check_heading_hierarchy(self, soup: BeautifulSoup) -> list[Issue]: """H-11: Check heading hierarchy (h1-h6 should not skip levels).""" issues = [] headings = soup.find_all(re.compile(r"^h[1-6]$")) if not headings: return [] prev_level = 0 for heading in headings: level = int(heading.name[1]) if prev_level > 0 and level > prev_level + 1: issues.append(self._create_issue( code="H-11", severity="minor", message=f"heading 계층 구조가 건너뛰어졌습니다: h{prev_level} 다음에 h{level}", element=self._truncate_element(str(heading)), line=self._get_line_number(heading), suggestion=f"h{prev_level} 다음에는 h{prev_level + 1}을 사용하세요", )) break # Only report first skip prev_level = level return issues def _check_viewport_meta(self, soup: BeautifulSoup) -> list[Issue]: """H-12: Check for viewport meta tag.""" viewport = soup.find("meta", attrs={"name": re.compile(r"viewport", re.I)}) if viewport is None: return [self._create_issue( code="H-12", severity="major", message="viewport meta 태그가 없습니다", suggestion='<meta name="viewport" content="width=device-width, initial-scale=1.0">을 추가하세요', )] return [] @staticmethod def _get_line_number(element) -> Optional[int]: """Extract source line number from a BeautifulSoup element.""" if element and hasattr(element, "sourceline"): return element.sourceline return None @staticmethod def _truncate_element(element_str: str, max_len: int = 200) -> str: """Truncate element string for display.""" if len(element_str) > max_len: return element_str[:max_len] + "..." return element_str