diff --git a/CHANGES/1661.bugfix.rst b/CHANGES/1661.bugfix.rst new file mode 100644 index 000000000..6329c9f4d --- /dev/null +++ b/CHANGES/1661.bugfix.rst @@ -0,0 +1,7 @@ +Fixed a host-confusion parsing bug where URLs containing multiple bracket +characters in the host component (e.g. ``http://[:localhost[]].google:80``) +were silently canonicalized to an unintended host. Both ``split_url()`` and +``split_netloc()`` now raise :exc:`ValueError` when more than one ``[`` or +``]`` is found in the authority, or when ``[`` does not appear at the start of +the host subcomponent, in compliance with :rfc:`3986` -- by +:user:`rodrigobnogueira`. diff --git a/tests/test_url.py b/tests/test_url.py index 37871fedb..5fb66c56b 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -365,6 +365,37 @@ def test_ipfuture_brackets_not_allowed() -> None: URL("http://[v10]/") +@pytest.mark.parametrize( + "url", + ( + "http://[:localhost[]].google:80", + "http://[:localhost[]].google", + "http://[:attacker.com[]]:80", + "http://[:evil.com[]].bank.com:443", + "http://[:127.0.0.1[]]:80", + "http://[v1.:attacker[]].bank.com:80", + ), + ids=( + "host-confusion-with-port", + "host-confusion-without-port", + "attacker-host-injection", + "domain-allowlist-bypass", + "private-ip-injection", + "ipvfuture-bracket-abuse", + ), +) +def test_malformed_bracketed_host_rejected(url: str) -> None: + """Reject URLs with multiple brackets to prevent host confusion (SSRF).""" + with pytest.raises(ValueError, match="Invalid IPv6 URL"): + URL(url) + + +def test_malformed_bracketed_host_in_authority() -> None: + """Reject malformed brackets via URL.build(authority=...) path.""" + with pytest.raises(ValueError, match="Invalid IPv6 URL"): + URL.build(scheme="http", authority="[:localhost[]].google:80") + + def test_ipv4_zone() -> None: # I'm unsure if it is correct. url = URL("http://1.2.3.4%ั‚ะตัั‚%42:123") diff --git a/yarl/_parse.py b/yarl/_parse.py index bb64165c7..3f7b9ebf5 100644 --- a/yarl/_parse.py +++ b/yarl/_parse.py @@ -63,6 +63,8 @@ def split_url(url: str) -> SplitURLType: has_right_bracket and not has_left_bracket ): raise ValueError("Invalid IPv6 URL") + if has_left_bracket and (netloc.count("[") != 1 or netloc.count("]") != 1): + raise ValueError("Invalid IPv6 URL") if has_left_bracket: bracketed_host = netloc.partition("[")[2].partition("]")[0] # Valid bracketed hosts are defined in @@ -120,6 +122,12 @@ def split_netloc( password = None if "[" in hostinfo: + if ( + hostinfo.count("[") != 1 + or hostinfo.count("]") != 1 + or not hostinfo.startswith("[") + ): + raise ValueError("Invalid IPv6 URL") _, _, bracketed = hostinfo.partition("[") hostname, _, port_str = bracketed.partition("]") _, _, port_str = port_str.partition(":")