From 96ff721ea6c4328c13d4cc3116cc3e8732e7723c Mon Sep 17 00:00:00 2001
From: jungwoo choi <jungwoochoi@MacBook-Pro-2.local>
Date: Sat, 14 Feb 2026 08:00:16 +0900
Subject: [PATCH] =?UTF-8?q?fix:=20=EC=BF=BC=EB=A6=AC=20=ED=8C=8C=EB=9D=BC?=
 =?UTF-8?q?=EB=AF=B8=ED=84=B0=20=EB=8B=A4=EB=A5=B4=EB=A9=B4=20=EC=9C=A0?=
 =?UTF-8?q?=EB=8B=88=ED=81=AC=20URL=EB=A1=9C=20=ED=8C=90=EB=8B=A8=20?=
 =?UTF-8?q?=E2=80=94=20=ED=8A=B8=EB=9E=98=ED=82=B9=20=ED=8C=8C=EB=9D=BC?=
 =?UTF-8?q?=EB=AF=B8=ED=84=B0=20=EC=A0=9C=EA=B1=B0=20=EB=A1=9C=EC=A7=81=20?=
 =?UTF-8?q?=EC=82=AD=EC=A0=9C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

모든 쿼리 파라미터를 보존하여 파라미터가 다른 URL은 별도 페이지로 취급.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backend/app/services/link_crawler.py | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/backend/app/services/link_crawler.py b/backend/app/services/link_crawler.py
index 8138f53..868c258 100644
--- a/backend/app/services/link_crawler.py
+++ b/backend/app/services/link_crawler.py
@@ -40,7 +40,7 @@ def normalize_url(url: str) -> str:
     - Remove trailing slash (except for root path)
     - Lowercase scheme and netloc
     - Strip www. prefix for consistent deduplication
-    - Remove common tracking query parameters
+    - Query parameters are preserved as-is (different params = different page)
     """
     parsed = urlparse(url)
 
@@ -62,17 +62,6 @@ def normalize_url(url: str) -> str:
         path = path.rstrip("/")
     normalized = normalized._replace(path=path)
 
-    # Remove common tracking query parameters
-    if normalized.query:
-        from urllib.parse import parse_qs, urlencode
-        _TRACKING_PARAMS = {
-            "utm_source", "utm_medium", "utm_campaign", "utm_term",
-            "utm_content", "ref", "fbclid", "gclid", "mc_cid", "mc_eid",
-        }
-        params = parse_qs(normalized.query, keep_blank_values=True)
-        filtered = {k: v for k, v in params.items() if k.lower() not in _TRACKING_PARAMS}
-        normalized = normalized._replace(query=urlencode(filtered, doseq=True))
-
     return urlunparse(normalized)