Skip to content

Commit a52a3dd

Browse files
Reject URLs with multiple brackets in host component
Fixes host-confusion parsing where URLs containing multiple bracket characters in the authority (e.g. http://[:localhost[]].google:80) were silently canonicalized to an unintended host. Both split_url() and split_netloc() now raise ValueError when: - more than one '[' or ']' appears in the netloc/hostinfo, or - '[' does not start the host subcomponent (per RFC 3986 IP-literal) Adds 7 regression tests covering the affected code paths.
1 parent 2f180d1 commit a52a3dd

3 files changed

Lines changed: 48 additions & 0 deletions

File tree

CHANGES/1661.bugfix.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Fixed a host-confusion parsing bug where URLs containing multiple bracket
2+
characters in the host component (e.g. ``http://[:localhost[]].google:80``)
3+
were silently canonicalized to an unintended host. Both ``split_url()`` and
4+
``split_netloc()`` now raise :exc:`ValueError` when more than one ``[`` or
5+
``]`` is found in the authority, or when ``[`` does not appear at the start of
6+
the host subcomponent, in compliance with :rfc:`3986` -- by
7+
:user:`rodrigobnogueira`.

tests/test_url.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,37 @@ def test_ipfuture_brackets_not_allowed() -> None:
365365
URL("http://[v10]/")
366366

367367

368+
@pytest.mark.parametrize(
369+
"url",
370+
(
371+
"http://[:localhost[]].google:80",
372+
"http://[:localhost[]].google",
373+
"http://[:attacker.com[]]:80",
374+
"http://[:evil.com[]].bank.com:443",
375+
"http://[:127.0.0.1[]]:80",
376+
"http://[v1.:attacker[]].bank.com:80",
377+
),
378+
ids=(
379+
"host-confusion-with-port",
380+
"host-confusion-without-port",
381+
"attacker-host-injection",
382+
"domain-allowlist-bypass",
383+
"private-ip-injection",
384+
"ipvfuture-bracket-abuse",
385+
),
386+
)
387+
def test_malformed_bracketed_host_rejected(url: str) -> None:
388+
"""Reject URLs with multiple brackets to prevent host confusion (SSRF)."""
389+
with pytest.raises(ValueError, match="Invalid IPv6 URL"):
390+
URL(url)
391+
392+
393+
def test_malformed_bracketed_host_in_authority() -> None:
394+
"""Reject malformed brackets via URL.build(authority=...) path."""
395+
with pytest.raises(ValueError, match="Invalid IPv6 URL"):
396+
URL.build(scheme="http", authority="[:localhost[]].google:80")
397+
398+
368399
def test_ipv4_zone() -> None:
369400
# I'm unsure if it is correct.
370401
url = URL("http://1.2.3.4%тест%42:123")

yarl/_parse.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ def split_url(url: str) -> SplitURLType:
6363
has_right_bracket and not has_left_bracket
6464
):
6565
raise ValueError("Invalid IPv6 URL")
66+
if has_left_bracket and (
67+
netloc.count("[") != 1 or netloc.count("]") != 1
68+
):
69+
raise ValueError("Invalid IPv6 URL")
6670
if has_left_bracket:
6771
bracketed_host = netloc.partition("[")[2].partition("]")[0]
6872
# Valid bracketed hosts are defined in
@@ -120,6 +124,12 @@ def split_netloc(
120124
password = None
121125

122126
if "[" in hostinfo:
127+
if (
128+
hostinfo.count("[") != 1
129+
or hostinfo.count("]") != 1
130+
or not hostinfo.startswith("[")
131+
):
132+
raise ValueError("Invalid IPv6 URL")
123133
_, _, bracketed = hostinfo.partition("[")
124134
hostname, _, port_str = bracketed.partition("]")
125135
_, _, port_str = port_str.partition(":")

0 commit comments

Comments
 (0)