feat: 웹사이트 표준화 검사 도구 구현

- 4개 검사 엔진: HTML/CSS, 접근성(WCAG), SEO, 성능/보안 (총 50개 항목)
- FastAPI 백엔드 (9개 API, SSE 실시간 진행, PDF/JSON 리포트)
- Next.js 15 프론트엔드 (6개 페이지, 29개 컴포넌트, 반원 게이지 차트)
- Docker Compose 배포 (Backend:8011, Frontend:3011, MongoDB:27022, Redis:6392)
- 전체 테스트 32/32 PASS

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
jungwoo choi
2026-02-13 13:57:27 +09:00
parent c37cda5b13
commit b5fa5d96b9
93 changed files with 18735 additions and 22 deletions

382
backend/app/engines/seo.py Normal file
View File

@ -0,0 +1,382 @@
"""
SEO Optimization Checker Engine (F-004).
Checks meta tags, OG tags, robots.txt, sitemap.xml, structured data, etc.
"""
import re
import json
import logging
from urllib.parse import urlparse, urljoin
from typing import Optional
import httpx
from bs4 import BeautifulSoup
from app.engines.base import BaseChecker
from app.models.schemas import CategoryResult, Issue
logger = logging.getLogger(__name__)
class SeoChecker(BaseChecker):
"""SEO optimization checker engine."""
@property
def category_name(self) -> str:
return "seo"
async def check(self, url: str, html_content: str, headers: dict) -> CategoryResult:
soup = BeautifulSoup(html_content, "html5lib")
issues: list[Issue] = []
meta_info: dict = {}
await self.update_progress(10, "title 태그 검사 중...")
issues += self._check_title(soup, meta_info)
await self.update_progress(20, "meta description 검사 중...")
issues += self._check_meta_description(soup, meta_info)
issues += self._check_meta_keywords(soup, meta_info)
await self.update_progress(30, "OG 태그 검사 중...")
issues += self._check_og_tags(soup)
issues += self._check_twitter_card(soup)
await self.update_progress(40, "canonical URL 검사 중...")
issues += self._check_canonical(soup)
await self.update_progress(50, "robots.txt 확인 중...")
issues += await self._check_robots_txt(url, meta_info)
await self.update_progress(60, "sitemap.xml 확인 중...")
issues += await self._check_sitemap(url, meta_info)
await self.update_progress(70, "H1 태그 검사 중...")
issues += self._check_h1(soup)
await self.update_progress(80, "구조화 데이터 검사 중...")
issues += self._check_structured_data(soup, html_content, meta_info)
await self.update_progress(90, "기타 항목 검사 중...")
issues += self._check_favicon(soup)
issues += self._check_viewport(soup)
issues += self._check_url_structure(url)
issues += self._check_img_alt_seo(soup)
score = self._calculate_score_by_deduction(issues)
await self.update_progress(100, "완료")
return self._build_result(
category="seo",
score=score,
issues=issues,
meta_info=meta_info,
)
def _check_title(self, soup: BeautifulSoup, meta_info: dict) -> list[Issue]:
"""S-01: Check title tag existence and length (10-60 chars)."""
issues = []
title = soup.find("title")
if title is None or not title.string or title.string.strip() == "":
meta_info["title"] = None
meta_info["title_length"] = 0
issues.append(self._create_issue(
code="S-01",
severity="critical",
message="<title> 태그가 없거나 비어있습니다",
suggestion="검색 결과에 표시될 10-60자 길이의 페이지 제목을 설정하세요",
))
return issues
title_text = title.string.strip()
title_len = len(title_text)
meta_info["title"] = title_text
meta_info["title_length"] = title_len
if title_len < 10:
issues.append(self._create_issue(
code="S-01",
severity="critical",
message=f"title이 너무 짧습니다 ({title_len}자, 권장 10-60자)",
element=f"<title>{title_text}</title>",
suggestion="검색 결과에 효과적으로 표시되도록 10자 이상의 제목을 작성하세요",
))
elif title_len > 60:
issues.append(self._create_issue(
code="S-01",
severity="minor",
message=f"title이 너무 깁니다 ({title_len}자, 권장 10-60자)",
element=f"<title>{title_text[:50]}...</title>",
suggestion="검색 결과에서 잘리지 않도록 60자 이내로 제목을 줄이세요",
))
return issues
def _check_meta_description(self, soup: BeautifulSoup, meta_info: dict) -> list[Issue]:
"""S-02: Check meta description existence and length (50-160 chars)."""
issues = []
desc = soup.find("meta", attrs={"name": re.compile(r"^description$", re.I)})
if desc is None or not desc.get("content"):
meta_info["description"] = None
meta_info["description_length"] = 0
issues.append(self._create_issue(
code="S-02",
severity="major",
message="meta description이 없습니다",
suggestion='<meta name="description" content="페이지 설명">을 추가하세요 (50-160자 권장)',
))
return issues
content = desc["content"].strip()
content_len = len(content)
meta_info["description"] = content
meta_info["description_length"] = content_len
if content_len < 50:
issues.append(self._create_issue(
code="S-02",
severity="major",
message=f"meta description이 너무 짧습니다 ({content_len}자, 권장 50-160자)",
suggestion="검색 결과에서 페이지를 효과적으로 설명하도록 50자 이상으로 작성하세요",
))
elif content_len > 160:
issues.append(self._create_issue(
code="S-02",
severity="minor",
message=f"meta description이 너무 깁니다 ({content_len}자, 권장 50-160자)",
suggestion="검색 결과에서 잘리지 않도록 160자 이내로 줄이세요",
))
return issues
def _check_meta_keywords(self, soup: BeautifulSoup, meta_info: dict) -> list[Issue]:
"""S-03: Check meta keywords (informational only)."""
keywords = soup.find("meta", attrs={"name": re.compile(r"^keywords$", re.I)})
if keywords is None or not keywords.get("content"):
meta_info["has_keywords"] = False
return [self._create_issue(
code="S-03",
severity="info",
message="meta keywords가 없습니다 (현재 대부분의 검색엔진에서 무시됨)",
suggestion="meta keywords는 SEO에 큰 영향이 없지만, 참고용으로 추가할 수 있습니다",
)]
meta_info["has_keywords"] = True
return []
def _check_og_tags(self, soup: BeautifulSoup) -> list[Issue]:
"""S-04: Check Open Graph tags (og:title, og:description, og:image)."""
issues = []
required_og = ["og:title", "og:description", "og:image"]
missing = []
for prop in required_og:
og = soup.find("meta", attrs={"property": prop})
if og is None or not og.get("content"):
missing.append(prop)
if missing:
issues.append(self._create_issue(
code="S-04",
severity="major",
message=f"Open Graph 태그가 누락되었습니다: {', '.join(missing)}",
suggestion=f'누락된 OG 태그를 추가하세요. 예: <meta property="{missing[0]}" content="">',
))
return issues
def _check_twitter_card(self, soup: BeautifulSoup) -> list[Issue]:
"""S-05: Check Twitter Card tags."""
twitter_card = soup.find("meta", attrs={"name": "twitter:card"})
twitter_title = soup.find("meta", attrs={"name": "twitter:title"})
if twitter_card is None and twitter_title is None:
return [self._create_issue(
code="S-05",
severity="minor",
message="Twitter Card 태그가 없습니다",
suggestion='<meta name="twitter:card" content="summary_large_image">를 추가하세요',
)]
return []
def _check_canonical(self, soup: BeautifulSoup) -> list[Issue]:
"""S-06: Check canonical URL."""
canonical = soup.find("link", attrs={"rel": "canonical"})
if canonical is None or not canonical.get("href"):
return [self._create_issue(
code="S-06",
severity="major",
message="canonical URL이 설정되지 않았습니다",
suggestion='<link rel="canonical" href="현재페이지URL">을 추가하여 중복 콘텐츠 문제를 방지하세요',
)]
return []
async def _check_robots_txt(self, url: str, meta_info: dict) -> list[Issue]:
"""S-07: Check robots.txt accessibility."""
parsed = urlparse(url)
robots_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt"
try:
async with httpx.AsyncClient(timeout=httpx.Timeout(5.0), verify=False) as client:
resp = await client.get(robots_url)
if resp.status_code == 200:
meta_info["has_robots_txt"] = True
return []
else:
meta_info["has_robots_txt"] = False
return [self._create_issue(
code="S-07",
severity="major",
message=f"robots.txt에 접근할 수 없습니다 (HTTP {resp.status_code})",
suggestion="검색엔진 크롤링을 제어하기 위해 /robots.txt 파일을 생성하세요",
)]
except Exception as e:
logger.warning("robots.txt check failed for %s: %s", url, str(e))
meta_info["has_robots_txt"] = False
return [self._create_issue(
code="S-07",
severity="major",
message="robots.txt에 접근할 수 없습니다",
suggestion="검색엔진 크롤링을 제어하기 위해 /robots.txt 파일을 생성하세요",
)]
async def _check_sitemap(self, url: str, meta_info: dict) -> list[Issue]:
"""S-08: Check sitemap.xml accessibility."""
parsed = urlparse(url)
sitemap_url = f"{parsed.scheme}://{parsed.netloc}/sitemap.xml"
try:
async with httpx.AsyncClient(timeout=httpx.Timeout(5.0), verify=False) as client:
resp = await client.get(sitemap_url)
if resp.status_code == 200:
meta_info["has_sitemap"] = True
return []
else:
meta_info["has_sitemap"] = False
return [self._create_issue(
code="S-08",
severity="major",
message=f"sitemap.xml에 접근할 수 없습니다 (HTTP {resp.status_code})",
suggestion="검색엔진이 사이트 구조를 이해할 수 있도록 /sitemap.xml을 생성하세요",
)]
except Exception as e:
logger.warning("sitemap.xml check failed for %s: %s", url, str(e))
meta_info["has_sitemap"] = False
return [self._create_issue(
code="S-08",
severity="major",
message="sitemap.xml에 접근할 수 없습니다",
suggestion="검색엔진이 사이트 구조를 이해할 수 있도록 /sitemap.xml을 생성하세요",
)]
def _check_h1(self, soup: BeautifulSoup) -> list[Issue]:
"""S-09: Check H1 tag existence and uniqueness."""
h1_tags = soup.find_all("h1")
issues = []
if len(h1_tags) == 0:
issues.append(self._create_issue(
code="S-09",
severity="critical",
message="H1 태그가 없습니다",
suggestion="페이지의 주요 제목을 <h1> 태그로 추가하세요",
))
elif len(h1_tags) > 1:
issues.append(self._create_issue(
code="S-09",
severity="critical",
message=f"H1 태그가 {len(h1_tags)}개 발견되었습니다 (1개 권장)",
element=self._truncate_element(str(h1_tags[0])),
suggestion="페이지당 H1 태그는 1개만 사용하세요",
))
return issues
def _check_structured_data(self, soup: BeautifulSoup, html_content: str, meta_info: dict) -> list[Issue]:
"""S-10: Check for structured data (JSON-LD, Microdata, RDFa)."""
structured_types = []
# JSON-LD
json_ld_scripts = soup.find_all("script", attrs={"type": "application/ld+json"})
if json_ld_scripts:
structured_types.append("JSON-LD")
# Microdata
microdata = soup.find_all(attrs={"itemscope": True})
if microdata:
structured_types.append("Microdata")
# RDFa
rdfa = soup.find_all(attrs={"typeof": True})
if rdfa:
structured_types.append("RDFa")
meta_info["structured_data_types"] = structured_types
if not structured_types:
return [self._create_issue(
code="S-10",
severity="minor",
message="구조화 데이터(JSON-LD, Microdata, RDFa)가 없습니다",
suggestion='<script type="application/ld+json">을 사용하여 구조화 데이터를 추가하세요',
)]
return []
def _check_favicon(self, soup: BeautifulSoup) -> list[Issue]:
"""S-11: Check favicon existence."""
favicon = soup.find("link", attrs={"rel": re.compile(r"icon", re.I)})
if favicon is None:
return [self._create_issue(
code="S-11",
severity="minor",
message="favicon이 설정되지 않았습니다",
suggestion='<link rel="icon" href="/favicon.ico">를 추가하세요',
)]
return []
def _check_viewport(self, soup: BeautifulSoup) -> list[Issue]:
"""S-12: Check viewport meta tag for mobile friendliness."""
viewport = soup.find("meta", attrs={"name": re.compile(r"^viewport$", re.I)})
if viewport is None:
return [self._create_issue(
code="S-12",
severity="major",
message="viewport meta 태그가 없습니다 (모바일 친화성 부족)",
suggestion='<meta name="viewport" content="width=device-width, initial-scale=1.0">을 추가하세요',
)]
return []
def _check_url_structure(self, url: str) -> list[Issue]:
"""S-13: Check URL structure for SEO friendliness."""
parsed = urlparse(url)
path = parsed.path
# Check for special characters (excluding common ones like /, -, _)
special_chars = re.findall(r"[^a-zA-Z0-9/\-_.]", path)
if len(special_chars) > 3:
return [self._create_issue(
code="S-13",
severity="minor",
message=f"URL에 특수 문자가 많습니다 ({len(special_chars)}개)",
suggestion="URL은 영문, 숫자, 하이픈(-)을 사용하여 깔끔하게 구성하세요",
)]
return []
def _check_img_alt_seo(self, soup: BeautifulSoup) -> list[Issue]:
"""S-14: Check image alt attributes from SEO perspective."""
images = soup.find_all("img")
if not images:
return []
missing_alt = [img for img in images if not img.get("alt") and img.get("alt") != ""]
if missing_alt:
return [self._create_issue(
code="S-14",
severity="major",
message=f"alt 속성이 없는 이미지가 {len(missing_alt)}개 발견되었습니다",
element=self._truncate_element(str(missing_alt[0])) if missing_alt else None,
suggestion="검색엔진이 이미지를 이해할 수 있도록 모든 이미지에 설명적인 alt 속성을 추가하세요",
)]
return []
@staticmethod
def _truncate_element(element_str: str, max_len: int = 200) -> str:
if len(element_str) > max_len:
return element_str[:max_len] + "..."
return element_str