feat: 웹사이트 표준화 검사 도구 구현

- 4개 검사 엔진: HTML/CSS, 접근성(WCAG), SEO, 성능/보안 (총 50개 항목) - FastAPI 백엔드 (9개 API, SSE 실시간 진행, PDF/JSON 리포트) - Next.js 15 프론트엔드 (6개 페이지, 29개 컴포넌트, 반원 게이지 차트) - Docker Compose 배포 (Backend:8011, Frontend:3011, MongoDB:27022, Redis:6392) - 전체 테스트 32/32 PASS Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 13:57:27 +09:00
parent c37cda5b13
commit b5fa5d96b9
93 changed files with 18735 additions and 22 deletions
--- a/backend/app/engines/seo.py
+++ b/backend/app/engines/seo.py
@ -0,0 +1,382 @@
+"""
+SEO Optimization Checker Engine (F-004).
+Checks meta tags, OG tags, robots.txt, sitemap.xml, structured data, etc.
+"""
+
+import re
+import json
+import logging
+from urllib.parse import urlparse, urljoin
+from typing import Optional
+
+import httpx
+from bs4 import BeautifulSoup
+
+from app.engines.base import BaseChecker
+from app.models.schemas import CategoryResult, Issue
+
+logger = logging.getLogger(__name__)
+
+
+class SeoChecker(BaseChecker):
+    """SEO optimization checker engine."""
+
+    @property
+    def category_name(self) -> str:
+        return "seo"
+
+    async def check(self, url: str, html_content: str, headers: dict) -> CategoryResult:
+        soup = BeautifulSoup(html_content, "html5lib")
+        issues: list[Issue] = []
+        meta_info: dict = {}
+
+        await self.update_progress(10, "title 태그 검사 중...")
+        issues += self._check_title(soup, meta_info)
+
+        await self.update_progress(20, "meta description 검사 중...")
+        issues += self._check_meta_description(soup, meta_info)
+        issues += self._check_meta_keywords(soup, meta_info)
+
+        await self.update_progress(30, "OG 태그 검사 중...")
+        issues += self._check_og_tags(soup)
+        issues += self._check_twitter_card(soup)
+
+        await self.update_progress(40, "canonical URL 검사 중...")
+        issues += self._check_canonical(soup)
+
+        await self.update_progress(50, "robots.txt 확인 중...")
+        issues += await self._check_robots_txt(url, meta_info)
+
+        await self.update_progress(60, "sitemap.xml 확인 중...")
+        issues += await self._check_sitemap(url, meta_info)
+
+        await self.update_progress(70, "H1 태그 검사 중...")
+        issues += self._check_h1(soup)
+
+        await self.update_progress(80, "구조화 데이터 검사 중...")
+        issues += self._check_structured_data(soup, html_content, meta_info)
+
+        await self.update_progress(90, "기타 항목 검사 중...")
+        issues += self._check_favicon(soup)
+        issues += self._check_viewport(soup)
+        issues += self._check_url_structure(url)
+        issues += self._check_img_alt_seo(soup)
+
+        score = self._calculate_score_by_deduction(issues)
+        await self.update_progress(100, "완료")
+
+        return self._build_result(
+            category="seo",
+            score=score,
+            issues=issues,
+            meta_info=meta_info,
+        )
+
+    def _check_title(self, soup: BeautifulSoup, meta_info: dict) -> list[Issue]:
+        """S-01: Check title tag existence and length (10-60 chars)."""
+        issues = []
+        title = soup.find("title")
+
+        if title is None or not title.string or title.string.strip() == "":
+            meta_info["title"] = None
+            meta_info["title_length"] = 0
+            issues.append(self._create_issue(
+                code="S-01",
+                severity="critical",
+                message="<title> 태그가 없거나 비어있습니다",
+                suggestion="검색 결과에 표시될 10-60자 길이의 페이지 제목을 설정하세요",
+            ))
+            return issues
+
+        title_text = title.string.strip()
+        title_len = len(title_text)
+        meta_info["title"] = title_text
+        meta_info["title_length"] = title_len
+
+        if title_len < 10:
+            issues.append(self._create_issue(
+                code="S-01",
+                severity="critical",
+                message=f"title이 너무 짧습니다 ({title_len}자, 권장 10-60자)",
+                element=f"<title>{title_text}</title>",
+                suggestion="검색 결과에 효과적으로 표시되도록 10자 이상의 제목을 작성하세요",
+            ))
+        elif title_len > 60:
+            issues.append(self._create_issue(
+                code="S-01",
+                severity="minor",
+                message=f"title이 너무 깁니다 ({title_len}자, 권장 10-60자)",
+                element=f"<title>{title_text[:50]}...</title>",
+                suggestion="검색 결과에서 잘리지 않도록 60자 이내로 제목을 줄이세요",
+            ))
+        return issues
+
+    def _check_meta_description(self, soup: BeautifulSoup, meta_info: dict) -> list[Issue]:
+        """S-02: Check meta description existence and length (50-160 chars)."""
+        issues = []
+        desc = soup.find("meta", attrs={"name": re.compile(r"^description$", re.I)})
+
+        if desc is None or not desc.get("content"):
+            meta_info["description"] = None
+            meta_info["description_length"] = 0
+            issues.append(self._create_issue(
+                code="S-02",
+                severity="major",
+                message="meta description이 없습니다",
+                suggestion='<meta name="description" content="페이지 설명">을 추가하세요 (50-160자 권장)',
+            ))
+            return issues
+
+        content = desc["content"].strip()
+        content_len = len(content)
+        meta_info["description"] = content
+        meta_info["description_length"] = content_len
+
+        if content_len < 50:
+            issues.append(self._create_issue(
+                code="S-02",
+                severity="major",
+                message=f"meta description이 너무 짧습니다 ({content_len}자, 권장 50-160자)",
+                suggestion="검색 결과에서 페이지를 효과적으로 설명하도록 50자 이상으로 작성하세요",
+            ))
+        elif content_len > 160:
+            issues.append(self._create_issue(
+                code="S-02",
+                severity="minor",
+                message=f"meta description이 너무 깁니다 ({content_len}자, 권장 50-160자)",
+                suggestion="검색 결과에서 잘리지 않도록 160자 이내로 줄이세요",
+            ))
+        return issues
+
+    def _check_meta_keywords(self, soup: BeautifulSoup, meta_info: dict) -> list[Issue]:
+        """S-03: Check meta keywords (informational only)."""
+        keywords = soup.find("meta", attrs={"name": re.compile(r"^keywords$", re.I)})
+        if keywords is None or not keywords.get("content"):
+            meta_info["has_keywords"] = False
+            return [self._create_issue(
+                code="S-03",
+                severity="info",
+                message="meta keywords가 없습니다 (현재 대부분의 검색엔진에서 무시됨)",
+                suggestion="meta keywords는 SEO에 큰 영향이 없지만, 참고용으로 추가할 수 있습니다",
+            )]
+        meta_info["has_keywords"] = True
+        return []
+
+    def _check_og_tags(self, soup: BeautifulSoup) -> list[Issue]:
+        """S-04: Check Open Graph tags (og:title, og:description, og:image)."""
+        issues = []
+        required_og = ["og:title", "og:description", "og:image"]
+        missing = []
+
+        for prop in required_og:
+            og = soup.find("meta", attrs={"property": prop})
+            if og is None or not og.get("content"):
+                missing.append(prop)
+
+        if missing:
+            issues.append(self._create_issue(
+                code="S-04",
+                severity="major",
+                message=f"Open Graph 태그가 누락되었습니다: {', '.join(missing)}",
+                suggestion=f'누락된 OG 태그를 추가하세요. 예: <meta property="{missing[0]}" content="값">',
+            ))
+        return issues
+
+    def _check_twitter_card(self, soup: BeautifulSoup) -> list[Issue]:
+        """S-05: Check Twitter Card tags."""
+        twitter_card = soup.find("meta", attrs={"name": "twitter:card"})
+        twitter_title = soup.find("meta", attrs={"name": "twitter:title"})
+
+        if twitter_card is None and twitter_title is None:
+            return [self._create_issue(
+                code="S-05",
+                severity="minor",
+                message="Twitter Card 태그가 없습니다",
+                suggestion='<meta name="twitter:card" content="summary_large_image">를 추가하세요',
+            )]
+        return []
+
+    def _check_canonical(self, soup: BeautifulSoup) -> list[Issue]:
+        """S-06: Check canonical URL."""
+        canonical = soup.find("link", attrs={"rel": "canonical"})
+        if canonical is None or not canonical.get("href"):
+            return [self._create_issue(
+                code="S-06",
+                severity="major",
+                message="canonical URL이 설정되지 않았습니다",
+                suggestion='<link rel="canonical" href="현재페이지URL">을 추가하여 중복 콘텐츠 문제를 방지하세요',
+            )]
+        return []
+
+    async def _check_robots_txt(self, url: str, meta_info: dict) -> list[Issue]:
+        """S-07: Check robots.txt accessibility."""
+        parsed = urlparse(url)
+        robots_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt"
+
+        try:
+            async with httpx.AsyncClient(timeout=httpx.Timeout(5.0), verify=False) as client:
+                resp = await client.get(robots_url)
+                if resp.status_code == 200:
+                    meta_info["has_robots_txt"] = True
+                    return []
+                else:
+                    meta_info["has_robots_txt"] = False
+                    return [self._create_issue(
+                        code="S-07",
+                        severity="major",
+                        message=f"robots.txt에 접근할 수 없습니다 (HTTP {resp.status_code})",
+                        suggestion="검색엔진 크롤링을 제어하기 위해 /robots.txt 파일을 생성하세요",
+                    )]
+        except Exception as e:
+            logger.warning("robots.txt check failed for %s: %s", url, str(e))
+            meta_info["has_robots_txt"] = False
+            return [self._create_issue(
+                code="S-07",
+                severity="major",
+                message="robots.txt에 접근할 수 없습니다",
+                suggestion="검색엔진 크롤링을 제어하기 위해 /robots.txt 파일을 생성하세요",
+            )]
+
+    async def _check_sitemap(self, url: str, meta_info: dict) -> list[Issue]:
+        """S-08: Check sitemap.xml accessibility."""
+        parsed = urlparse(url)
+        sitemap_url = f"{parsed.scheme}://{parsed.netloc}/sitemap.xml"
+
+        try:
+            async with httpx.AsyncClient(timeout=httpx.Timeout(5.0), verify=False) as client:
+                resp = await client.get(sitemap_url)
+                if resp.status_code == 200:
+                    meta_info["has_sitemap"] = True
+                    return []
+                else:
+                    meta_info["has_sitemap"] = False
+                    return [self._create_issue(
+                        code="S-08",
+                        severity="major",
+                        message=f"sitemap.xml에 접근할 수 없습니다 (HTTP {resp.status_code})",
+                        suggestion="검색엔진이 사이트 구조를 이해할 수 있도록 /sitemap.xml을 생성하세요",
+                    )]
+        except Exception as e:
+            logger.warning("sitemap.xml check failed for %s: %s", url, str(e))
+            meta_info["has_sitemap"] = False
+            return [self._create_issue(
+                code="S-08",
+                severity="major",
+                message="sitemap.xml에 접근할 수 없습니다",
+                suggestion="검색엔진이 사이트 구조를 이해할 수 있도록 /sitemap.xml을 생성하세요",
+            )]
+
+    def _check_h1(self, soup: BeautifulSoup) -> list[Issue]:
+        """S-09: Check H1 tag existence and uniqueness."""
+        h1_tags = soup.find_all("h1")
+        issues = []
+
+        if len(h1_tags) == 0:
+            issues.append(self._create_issue(
+                code="S-09",
+                severity="critical",
+                message="H1 태그가 없습니다",
+                suggestion="페이지의 주요 제목을 <h1> 태그로 추가하세요",
+            ))
+        elif len(h1_tags) > 1:
+            issues.append(self._create_issue(
+                code="S-09",
+                severity="critical",
+                message=f"H1 태그가 {len(h1_tags)}개 발견되었습니다 (1개 권장)",
+                element=self._truncate_element(str(h1_tags[0])),
+                suggestion="페이지당 H1 태그는 1개만 사용하세요",
+            ))
+        return issues
+
+    def _check_structured_data(self, soup: BeautifulSoup, html_content: str, meta_info: dict) -> list[Issue]:
+        """S-10: Check for structured data (JSON-LD, Microdata, RDFa)."""
+        structured_types = []
+
+        # JSON-LD
+        json_ld_scripts = soup.find_all("script", attrs={"type": "application/ld+json"})
+        if json_ld_scripts:
+            structured_types.append("JSON-LD")
+
+        # Microdata
+        microdata = soup.find_all(attrs={"itemscope": True})
+        if microdata:
+            structured_types.append("Microdata")
+
+        # RDFa
+        rdfa = soup.find_all(attrs={"typeof": True})
+        if rdfa:
+            structured_types.append("RDFa")
+
+        meta_info["structured_data_types"] = structured_types
+
+        if not structured_types:
+            return [self._create_issue(
+                code="S-10",
+                severity="minor",
+                message="구조화 데이터(JSON-LD, Microdata, RDFa)가 없습니다",
+                suggestion='<script type="application/ld+json">을 사용하여 구조화 데이터를 추가하세요',
+            )]
+        return []
+
+    def _check_favicon(self, soup: BeautifulSoup) -> list[Issue]:
+        """S-11: Check favicon existence."""
+        favicon = soup.find("link", attrs={"rel": re.compile(r"icon", re.I)})
+        if favicon is None:
+            return [self._create_issue(
+                code="S-11",
+                severity="minor",
+                message="favicon이 설정되지 않았습니다",
+                suggestion='<link rel="icon" href="/favicon.ico">를 추가하세요',
+            )]
+        return []
+
+    def _check_viewport(self, soup: BeautifulSoup) -> list[Issue]:
+        """S-12: Check viewport meta tag for mobile friendliness."""
+        viewport = soup.find("meta", attrs={"name": re.compile(r"^viewport$", re.I)})
+        if viewport is None:
+            return [self._create_issue(
+                code="S-12",
+                severity="major",
+                message="viewport meta 태그가 없습니다 (모바일 친화성 부족)",
+                suggestion='<meta name="viewport" content="width=device-width, initial-scale=1.0">을 추가하세요',
+            )]
+        return []
+
+    def _check_url_structure(self, url: str) -> list[Issue]:
+        """S-13: Check URL structure for SEO friendliness."""
+        parsed = urlparse(url)
+        path = parsed.path
+
+        # Check for special characters (excluding common ones like /, -, _)
+        special_chars = re.findall(r"[^a-zA-Z0-9/\-_.]", path)
+        if len(special_chars) > 3:
+            return [self._create_issue(
+                code="S-13",
+                severity="minor",
+                message=f"URL에 특수 문자가 많습니다 ({len(special_chars)}개)",
+                suggestion="URL은 영문, 숫자, 하이픈(-)을 사용하여 깔끔하게 구성하세요",
+            )]
+        return []
+
+    def _check_img_alt_seo(self, soup: BeautifulSoup) -> list[Issue]:
+        """S-14: Check image alt attributes from SEO perspective."""
+        images = soup.find_all("img")
+        if not images:
+            return []
+
+        missing_alt = [img for img in images if not img.get("alt") and img.get("alt") != ""]
+        if missing_alt:
+            return [self._create_issue(
+                code="S-14",
+                severity="major",
+                message=f"alt 속성이 없는 이미지가 {len(missing_alt)}개 발견되었습니다",
+                element=self._truncate_element(str(missing_alt[0])) if missing_alt else None,
+                suggestion="검색엔진이 이미지를 이해할 수 있도록 모든 이미지에 설명적인 alt 속성을 추가하세요",
+            )]
+        return []
+
+    @staticmethod
+    def _truncate_element(element_str: str, max_len: int = 200) -> str:
+        if len(element_str) > max_len:
+            return element_str[:max_len] + "..."
+        return element_str