Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGES/1655.bugfix.1.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Fixed :meth:`~yarl.URL.build` failing to validate characters in the zone ID
portion of IPv6 addresses when ``validate_host=True``, allowing control
characters such as CR, LF, and NUL to pass through into ``url.host``.
Zone IDs now reject ASCII control characters per
`RFC 9844 §6.3 <https://datatracker.ietf.org/doc/html/rfc9844#section-6-3>`_
-- by :user:`rodrigobnogueira`.
5 changes: 5 additions & 0 deletions CHANGES/1655.bugfix.2.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Fixed ``_check_netloc()`` missing ``%`` from its NFKC normalization character
check, which allowed Unicode characters U+FF05 (FULLWIDTH PERCENT SIGN) and
U+FE6A (SMALL PERCENT SIGN) to produce a literal ``%`` in ``url.host`` via
the standard library IDNA fallback
-- by :user:`rodrigobnogueira`.
51 changes: 29 additions & 22 deletions tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
_VERTICAL_COLON = "\ufe13" # normalizes to ":"
_FULL_WITH_NUMBER_SIGN = "\uff03" # normalizes to "#"
_ACCOUNT_OF = "\u2100" # normalizes to "a/c"
_FULLWIDTH_PERCENT = "\uff05" # normalizes to "%"
_SMALL_PERCENT = "\ufe6a" # normalizes to "%"


def test_inheritance() -> None:
Expand Down Expand Up @@ -1806,8 +1808,8 @@ def test_to_idna() -> None:


def test_from_ascii_login() -> None:
url = URL("http://" "%D0%B2%D0%B0%D1%81%D1%8F" "@host:1234/")
assert ("http://" "%D0%B2%D0%B0%D1%81%D1%8F" "@host:1234/") == str(url)
url = URL("http://%D0%B2%D0%B0%D1%81%D1%8F@host:1234/")
assert ("http://%D0%B2%D0%B0%D1%81%D1%8F@host:1234/") == str(url)


def test_from_non_ascii_login() -> None:
Expand Down Expand Up @@ -1841,16 +1843,16 @@ def test_from_non_ascii_login_and_password() -> None:


def test_from_ascii_path() -> None:
url = URL("http://example.com/" "%D0%BF%D1%83%D1%82%D1%8C/%D1%82%D1%83%D0%B4%D0%B0")
url = URL("http://example.com/%D0%BF%D1%83%D1%82%D1%8C/%D1%82%D1%83%D0%B4%D0%B0")
assert (
"http://example.com/" "%D0%BF%D1%83%D1%82%D1%8C/%D1%82%D1%83%D0%B4%D0%B0"
"http://example.com/%D0%BF%D1%83%D1%82%D1%8C/%D1%82%D1%83%D0%B4%D0%B0"
) == str(url)


def test_from_ascii_path_lower_case() -> None:
url = URL("http://example.com/" "%d0%bf%d1%83%d1%82%d1%8c/%d1%82%d1%83%d0%b4%d0%b0")
url = URL("http://example.com/%d0%bf%d1%83%d1%82%d1%8c/%d1%82%d1%83%d0%b4%d0%b0")
assert (
"http://example.com/" "%D0%BF%D1%83%D1%82%D1%8C/%D1%82%D1%83%D0%B4%D0%B0"
"http://example.com/%D0%BF%D1%83%D1%82%D1%8C/%D1%82%D1%83%D0%B4%D0%B0"
) == str(url)


Expand All @@ -1871,23 +1873,17 @@ def test_bytes() -> None:

def test_from_ascii_query_parts() -> None:
url = URL(
"http://example.com/"
"?%D0%BF%D0%B0%D1%80%D0%B0%D0%BC"
"=%D0%B7%D0%BD%D0%B0%D1%87"
"http://example.com/?%D0%BF%D0%B0%D1%80%D0%B0%D0%BC=%D0%B7%D0%BD%D0%B0%D1%87"
)
assert (
"http://example.com/"
"?%D0%BF%D0%B0%D1%80%D0%B0%D0%BC"
"=%D0%B7%D0%BD%D0%B0%D1%87"
"http://example.com/?%D0%BF%D0%B0%D1%80%D0%B0%D0%BC=%D0%B7%D0%BD%D0%B0%D1%87"
) == str(url)


def test_from_non_ascii_query_parts() -> None:
url = URL("http://example.com/?парам=знач")
assert (
"http://example.com/"
"?%D0%BF%D0%B0%D1%80%D0%B0%D0%BC"
"=%D0%B7%D0%BD%D0%B0%D1%87"
"http://example.com/?%D0%BF%D0%B0%D1%80%D0%B0%D0%BC=%D0%B7%D0%BD%D0%B0%D1%87"
) == str(url)


Expand All @@ -1897,16 +1893,16 @@ def test_from_non_ascii_query_parts2() -> None:


def test_from_ascii_fragment() -> None:
url = URL("http://example.com/" "#%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82")
url = URL("http://example.com/#%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82")
assert (
"http://example.com/" "#%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82"
"http://example.com/#%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82"
) == str(url)


def test_from_bytes_with_non_ascii_fragment() -> None:
url = URL("http://example.com/#фрагмент")
assert (
"http://example.com/" "#%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82"
"http://example.com/#%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82"
) == str(url)


Expand All @@ -1917,12 +1913,10 @@ def test_to_str() -> None:

def test_to_str_long() -> None:
url = URL(
"https://host-12345678901234567890123456789012345678901234567890" "-name:8888/"
"https://host-12345678901234567890123456789012345678901234567890-name:8888/"
)
expected = (
"https://host-"
"12345678901234567890123456789012345678901234567890"
"-name:8888/"
"https://host-12345678901234567890123456789012345678901234567890-name:8888/"
)
assert expected == str(url)

Expand Down Expand Up @@ -2465,3 +2459,16 @@ def test_url_with_invalid_unicode(disallowed_unicode: str) -> None:
ValueError, match="contains invalid characters under NFKC normalization"
):
URL(f"http://example.{disallowed_unicode}.com/frag")


@pytest.mark.parametrize(
"percent_char",
[_FULLWIDTH_PERCENT, _SMALL_PERCENT],
ids=["fullwidth-percent-U+FF05", "small-percent-U+FE6A"],
)
def test_url_with_fullwidth_percent_rejected(percent_char: str) -> None:
"""NFKC normalization of fullwidth/small percent signs must be caught."""
with pytest.raises(
ValueError, match="contains invalid characters under NFKC normalization"
):
URL(f"http://evil.com{percent_char}2e.internal/")
31 changes: 31 additions & 0 deletions tests/test_url_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,37 @@ def test_url_ipv4_in_ipv6() -> None:
assert str(u) == "http://[2001:db8:122:344::c000:221]"


@pytest.mark.parametrize(
"zone",
(
"\r\nX-Injected: evil",
"\x00evil",
),
ids=("crlf-injection", "null-byte"),
)
def test_url_build_ipv6_zone_id_invalid_chars(zone: str) -> None:
"""Zone IDs with control characters must be rejected by validate_host."""
with pytest.raises(ValueError, match="Invalid characters in IPv6 zone ID"):
URL.build(scheme="http", host=f"::1%{zone}", path="/")


@pytest.mark.parametrize(
"zone",
(
"eth0",
"1",
"zone with spaces",
"Ethernet (LAN)",
"日本語",
),
ids=("iface-name", "numeric", "spaces", "parens", "unicode"),
)
def test_url_build_ipv6_zone_id_valid(zone: str) -> None:
"""Zone IDs accept any non-CTL text per RFC 4007 §11.2."""
u = URL.build(scheme="http", host=f"::1%{zone}", path="/")
assert u.host == f"::1%{zone}"


def test_build_with_scheme() -> None:
u = URL.build(scheme="blob", path="path")
assert str(u) == "blob:path"
Expand Down
2 changes: 1 addition & 1 deletion yarl/_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def _check_netloc(netloc: str) -> None:
# Note that there are no unicode decompositions for the character '@' so
# its currently impossible to have test coverage for this branch, however if the
# one should be added in the future we want to make sure its still checked.
for c in "/?#@:": # pragma: no branch
for c in "/?#@:%": # pragma: no branch
if c in normalized_netloc:
raise ValueError(
f"netloc '{netloc}' contains invalid "
Expand Down
9 changes: 9 additions & 0 deletions yarl/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,13 @@
re.VERBOSE,
)

# Zone IDs are OS-specific text strings with no format defined by the RFCs:
# https://datatracker.ietf.org/doc/html/rfc4007#section-11.2
# RFC 9844 §6.3 recommends rejecting characters inappropriate for the
# environment; for yarl we reject ASCII control characters (CTL):
# https://datatracker.ietf.org/doc/html/rfc9844#section-6-3
_ZONE_ID_UNSAFE_RE = re.compile(r"[\x00-\x1f\x7f]")

_T = TypeVar("_T")

if sys.version_info >= (3, 11):
Expand Down Expand Up @@ -1574,6 +1581,8 @@ def _encode_host(host: str, validate_host: bool) -> str:
except ValueError:
pass
else:
if sep and validate_host and (not zone or _ZONE_ID_UNSAFE_RE.search(zone)):
raise ValueError(f"Invalid characters in IPv6 zone ID: {zone!r}")
# These checks should not happen in the
# LRU to keep the cache size small
host = ip.compressed
Expand Down
Loading