Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGES/1653.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Fixed the :attr:`~yarl.URL.host` property incorrectly returning the
percent-encoded zone ID separator ``%25`` instead of decoding it to ``%``
for IPv6 Zone ID URLs (e.g. ``http://[fe80::1%251]/`` now correctly exposes
``.host`` as ``fe80::1%1`` per :rfc:`6874`)
-- by :user:`rodrigobnogueira`.
8 changes: 8 additions & 0 deletions tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,14 @@ def test_ipv4_zone() -> None:
assert url.raw_host == SplitResult(*url._val).hostname


def test_ipv6_zone_rfc6874() -> None:
url = URL("http://[fe80::1%251]/")
assert url.raw_host == "fe80::1%251"
assert url.host == "fe80::1%1"
assert url.host_subcomponent == "[fe80::1%251]"
assert str(url) == "http://[fe80::1%251]/"


def test_port_for_explicit_port() -> None:
url = URL("http://example.com:8888")
assert 8888 == url.port
Expand Down
12 changes: 9 additions & 3 deletions yarl/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -764,6 +764,9 @@ def host(self) -> str | None:
return None
if raw and raw[-1].isdigit() or ":" in raw:
# IP addresses are never IDNA encoded
if "%25" in raw:
# Decode RFC 6874 zone ID separator
return raw.replace("%25", "%")
return raw
return _idna_decode(raw)

Expand Down Expand Up @@ -1550,7 +1553,10 @@ def _encode_host(host: str, validate_host: bool) -> str:
# If the host ends with a digit or contains a colon, its likely
# an IP address.
if host and (host[-1].isdigit() or ":" in host):
raw_ip, sep, zone = host.partition("%")
if "%25" in host:
raw_ip, sep, zone = host.partition("%25")
else:
raw_ip, sep, zone = host.partition("%")
# If it looks like an IP, we check with _ip_compressed_version
# and fall-through if its not an IP address. This is a performance
# optimization to avoid parsing IP addresses as much as possible
Expand Down Expand Up @@ -1578,8 +1584,8 @@ def _encode_host(host: str, validate_host: bool) -> str:
# LRU to keep the cache size small
host = ip.compressed
if ip.version == 6:
return f"[{host}%{zone}]" if sep else f"[{host}]"
return f"{host}%{zone}" if sep else host
return f"[{host}{sep}{zone}]" if sep else f"[{host}]"
return f"{host}{sep}{zone}" if sep else host

# IDNA encoding is slow, skip it for ASCII-only strings
if host.isascii():
Expand Down
Loading