aio-libs · rodrigobnogueira · Apr 12, 2026 · Apr 12, 2026 · Apr 15, 2026 · Apr 19, 2026
diff --git a/CHANGES/1655.bugfix.1.rst b/CHANGES/1655.bugfix.1.rst
@@ -0,0 +1,4 @@
+Fixed :meth:`~yarl.URL.build` failing to validate characters in the zone ID
+portion of IPv6 addresses when ``validate_host=True``, allowing control
+characters such as CR and LF to pass through into ``url.host``
+-- by :user:`rodrigobnogueira`.
diff --git a/CHANGES/1655.bugfix.2.rst b/CHANGES/1655.bugfix.2.rst
@@ -0,0 +1,5 @@
+Fixed ``_check_netloc()`` missing ``%`` from its NFKC normalization character
+check, which allowed Unicode characters U+FF05 (FULLWIDTH PERCENT SIGN) and
+U+FE6A (SMALL PERCENT SIGN) to produce a literal ``%`` in ``url.host`` via
+the standard library IDNA fallback
+-- by :user:`rodrigobnogueira`.
diff --git a/tests/test_url.py b/tests/test_url.py
@@ -12,6 +12,8 @@
 _VERTICAL_COLON = "\ufe13"  # normalizes to ":"
 _FULL_WITH_NUMBER_SIGN = "\uff03"  # normalizes to "#"
 _ACCOUNT_OF = "\u2100"  # normalizes to "a/c"
+_FULLWIDTH_PERCENT = "\uff05"  # normalizes to "%"
+_SMALL_PERCENT = "\ufe6a"  # normalizes to "%"
 
 
 def test_inheritance() -> None:
@@ -2465,3 +2467,16 @@ def test_url_with_invalid_unicode(disallowed_unicode: str) -> None:
         ValueError, match="contains invalid characters under NFKC normalization"
     ):
         URL(f"http://example.{disallowed_unicode}.com/frag")
+
+
+@pytest.mark.parametrize(
+    "percent_char",
+    [_FULLWIDTH_PERCENT, _SMALL_PERCENT],
+    ids=["fullwidth-percent-U+FF05", "small-percent-U+FE6A"],
+)
+def test_url_with_fullwidth_percent_rejected(percent_char: str) -> None:
+    """NFKC normalization of fullwidth/small percent signs must be caught."""
+    with pytest.raises(
+        ValueError, match="contains invalid characters under NFKC normalization"
+    ):
+        URL(f"http://evil.com{percent_char}2e.internal/")
diff --git a/tests/test_url_build.py b/tests/test_url_build.py
@@ -35,6 +35,21 @@ def test_url_ipv4_in_ipv6() -> None:
     assert str(u) == "http://[2001:db8:122:344::c000:221]"
 
 
+@pytest.mark.parametrize(
+    "zone",
+    (
+        "\r\nX-Injected: evil",
+        "\x00evil",
+        "zone with spaces",
+    ),
+    ids=("crlf-injection", "null-byte", "spaces"),
+)
+def test_url_build_ipv6_zone_id_invalid_chars(zone: str) -> None:
+    """Zone IDs with control characters must be rejected by validate_host."""
+    with pytest.raises(ValueError, match="Invalid characters in IPv6 zone ID"):
+        URL.build(scheme="http", host=f"::1%{zone}", path="/")
+
+
 def test_build_with_scheme() -> None:
     u = URL.build(scheme="blob", path="path")
     assert str(u) == "blob:path"

diff --git a/yarl/_parse.py b/yarl/_parse.py
@@ -96,7 +96,7 @@ def _check_netloc(netloc: str) -> None:
     # Note that there are no unicode decompositions for the character '@' so
     # its currently impossible to have test coverage for this branch, however if the
     # one should be added in the future we want to make sure its still checked.
-    for c in "/?#@:":  # pragma: no branch
+    for c in "/?#@:%":  # pragma: no branch
         if c in normalized_netloc:
             raise ValueError(
                 f"netloc '{netloc}' contains invalid "

diff --git a/yarl/_url.py b/yarl/_url.py
@@ -89,6 +89,12 @@
     re.VERBOSE,
 )
 
+# Zone IDs in URIs are defined by RFC 6874 (obsoleted by RFC 9844 for UI usage):
+# ZoneID = 1*( unreserved / pct-encoded )
+# https://www.rfc-editor.org/rfc/rfc6874#section-2
+# In practice, sub-delimiters are also used (e.g. eth0, Ethernet+1).
+_ZONE_ID_RE = re.compile(r"^[A-Za-z0-9._~!$&'()*+,;=%-]+$")
+
 _T = TypeVar("_T")
 
 if sys.version_info >= (3, 11):
@@ -1574,6 +1580,8 @@ def _encode_host(host: str, validate_host: bool) -> str:
         except ValueError:
             pass
         else:
+            if sep and validate_host and not _ZONE_ID_RE.match(zone):
+                raise ValueError(f"Invalid characters in IPv6 zone ID: {zone!r}")
             # These checks should not happen in the
             # LRU to keep the cache size small
             host = ip.compressed