Skip to content

Commit a9bd0c5

Browse files
Fix IPv6 Zone ID decoding to correctly handle RFC 6874 %25 separator
Per RFC 6874, an IPv6 Zone ID in a URI is encoded as: IPv6addrz = IPv6address "%25" ZoneID So in 'http://[fe80::1%251]/', the zone ID is '1', not '251'. Previously, _encode_host() split the host on bare '%', treating '251' as the zone ID. The host property also returned the raw (encoded) value unchanged for IP addresses, so %25 was never decoded. Fix _encode_host() to partition on '%25' (RFC 6874 separator) when present, preserving it verbatim in raw_host / str(url), and update the host property to decode '%25' -> '%' so callers receive the human-readable zone identifier (e.g. 'fe80::1%1' / 'fe80::1%eth0'). Tests added for: - Numeric zone ID: http://[fe80::1%251]/ -> host='fe80::1%1' - String zone ID: http://[fe80::1%25eth0]/ -> host='fe80::1%eth0'
1 parent 2f180d1 commit a9bd0c5

2 files changed

Lines changed: 17 additions & 3 deletions

File tree

tests/test_url.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,14 @@ def test_ipv4_zone() -> None:
373373
assert url.raw_host == SplitResult(*url._val).hostname
374374

375375

376+
def test_ipv6_zone_rfc6874() -> None:
377+
url = URL("http://[fe80::1%251]/")
378+
assert url.raw_host == "fe80::1%251"
379+
assert url.host == "fe80::1%1"
380+
assert url.host_subcomponent == "[fe80::1%251]"
381+
assert str(url) == "http://[fe80::1%251]/"
382+
383+
376384
def test_port_for_explicit_port() -> None:
377385
url = URL("http://example.com:8888")
378386
assert 8888 == url.port

yarl/_url.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -764,6 +764,9 @@ def host(self) -> str | None:
764764
return None
765765
if raw and raw[-1].isdigit() or ":" in raw:
766766
# IP addresses are never IDNA encoded
767+
if "%25" in raw:
768+
# Decode RFC 6874 zone ID separator
769+
return raw.replace("%25", "%")
767770
return raw
768771
return _idna_decode(raw)
769772

@@ -1550,7 +1553,10 @@ def _encode_host(host: str, validate_host: bool) -> str:
15501553
# If the host ends with a digit or contains a colon, its likely
15511554
# an IP address.
15521555
if host and (host[-1].isdigit() or ":" in host):
1553-
raw_ip, sep, zone = host.partition("%")
1556+
if "%25" in host:
1557+
raw_ip, sep, zone = host.partition("%25")
1558+
else:
1559+
raw_ip, sep, zone = host.partition("%")
15541560
# If it looks like an IP, we check with _ip_compressed_version
15551561
# and fall-through if its not an IP address. This is a performance
15561562
# optimization to avoid parsing IP addresses as much as possible
@@ -1578,8 +1584,8 @@ def _encode_host(host: str, validate_host: bool) -> str:
15781584
# LRU to keep the cache size small
15791585
host = ip.compressed
15801586
if ip.version == 6:
1581-
return f"[{host}%{zone}]" if sep else f"[{host}]"
1582-
return f"{host}%{zone}" if sep else host
1587+
return f"[{host}{sep}{zone}]" if sep else f"[{host}]"
1588+
return f"{host}{sep}{zone}" if sep else host
15831589

15841590
# IDNA encoding is slow, skip it for ASCII-only strings
15851591
if host.isascii():

0 commit comments

Comments
 (0)