From 96ff721ea6c4328c13d4cc3116cc3e8732e7723c Mon Sep 17 00:00:00 2001 From: jungwoo choi Date: Sat, 14 Feb 2026 08:00:16 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20=EC=BF=BC=EB=A6=AC=20=ED=8C=8C=EB=9D=BC?= =?UTF-8?q?=EB=AF=B8=ED=84=B0=20=EB=8B=A4=EB=A5=B4=EB=A9=B4=20=EC=9C=A0?= =?UTF-8?q?=EB=8B=88=ED=81=AC=20URL=EB=A1=9C=20=ED=8C=90=EB=8B=A8=20?= =?UTF-8?q?=E2=80=94=20=ED=8A=B8=EB=9E=98=ED=82=B9=20=ED=8C=8C=EB=9D=BC?= =?UTF-8?q?=EB=AF=B8=ED=84=B0=20=EC=A0=9C=EA=B1=B0=20=EB=A1=9C=EC=A7=81=20?= =?UTF-8?q?=EC=82=AD=EC=A0=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 모든 쿼리 파라미터를 보존하여 파라미터가 다른 URL은 별도 페이지로 취급. Co-Authored-By: Claude Opus 4.6 --- backend/app/services/link_crawler.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/backend/app/services/link_crawler.py b/backend/app/services/link_crawler.py index 8138f53..868c258 100644 --- a/backend/app/services/link_crawler.py +++ b/backend/app/services/link_crawler.py @@ -40,7 +40,7 @@ def normalize_url(url: str) -> str: - Remove trailing slash (except for root path) - Lowercase scheme and netloc - Strip www. prefix for consistent deduplication - - Remove common tracking query parameters + - Query parameters are preserved as-is (different params = different page) """ parsed = urlparse(url) @@ -62,17 +62,6 @@ def normalize_url(url: str) -> str: path = path.rstrip("/") normalized = normalized._replace(path=path) - # Remove common tracking query parameters - if normalized.query: - from urllib.parse import parse_qs, urlencode - _TRACKING_PARAMS = { - "utm_source", "utm_medium", "utm_campaign", "utm_term", - "utm_content", "ref", "fbclid", "gclid", "mc_cid", "mc_eid", - } - params = parse_qs(normalized.query, keep_blank_values=True) - filtered = {k: v for k, v in params.items() if k.lower() not in _TRACKING_PARAMS} - normalized = normalized._replace(query=urlencode(filtered, doseq=True)) - return urlunparse(normalized)