diff --git a/CHANGES/12091.bugfix.rst b/CHANGES/12091.bugfix.rst new file mode 100644 index 00000000000..45ffbc3557f --- /dev/null +++ b/CHANGES/12091.bugfix.rst @@ -0,0 +1,6 @@ +Switched :py:meth:`~aiohttp.CookieJar.save` to use JSON format and +:py:meth:`~aiohttp.CookieJar.load` to try JSON first with a fallback to +a restricted pickle unpickler that only allows cookie-related types +(``SimpleCookie``, ``Morsel``, ``defaultdict``, etc.), preventing +arbitrary code execution via malicious pickle payloads +(CWE-502) -- by :user:`YuvalElbar6`. diff --git a/aiohttp/cookiejar.py b/aiohttp/cookiejar.py index 016fae94d20..8a11bdd53ec 100644 --- a/aiohttp/cookiejar.py +++ b/aiohttp/cookiejar.py @@ -4,6 +4,7 @@ import datetime import heapq import itertools +import json import os # noqa import pathlib import pickle @@ -38,6 +39,41 @@ _SIMPLE_COOKIE = SimpleCookie() +class _RestrictedCookieUnpickler(pickle.Unpickler): + """A restricted unpickler that only allows cookie-related types. + + This prevents arbitrary code execution when loading pickled cookie data + from untrusted sources. Only types that are expected in a serialized + CookieJar are permitted. + + See: https://docs.python.org/3/library/pickle.html#restricting-globals + """ + + _ALLOWED_CLASSES: frozenset[tuple[str, str]] = frozenset( + { + # Core cookie types + ("http.cookies", "SimpleCookie"), + ("http.cookies", "Morsel"), + # Container types used by CookieJar._cookies + ("collections", "defaultdict"), + # builtins that pickle uses for reconstruction + ("builtins", "tuple"), + ("builtins", "set"), + ("builtins", "frozenset"), + ("builtins", "dict"), + } + ) + + def find_class(self, module: str, name: str) -> type: + if (module, name) not in self._ALLOWED_CLASSES: + raise pickle.UnpicklingError( + f"Forbidden class: {module}.{name}. " + "CookieJar.load() only allows cookie-related types for security. " + "See https://docs.python.org/3/library/pickle.html#restricting-globals" + ) + return super().find_class(module, name) # type: ignore[no-any-return] + + class CookieJar(AbstractCookieJar): """Implements cookie storage adhering to RFC 6265.""" @@ -112,14 +148,84 @@ def quote_cookie(self) -> bool: return self._quote_cookie def save(self, file_path: PathLike) -> None: + """Save cookies to a file using JSON format. + + :param file_path: Path to file where cookies will be serialized, + :class:`str` or :class:`pathlib.Path` instance. + """ file_path = pathlib.Path(file_path) - with file_path.open(mode="wb") as f: - pickle.dump(self._cookies, f, pickle.HIGHEST_PROTOCOL) + data: dict[str, dict[str, dict[str, str | bool]]] = {} + for (domain, path), cookie in self._cookies.items(): + key = f"{domain}|{path}" + data[key] = {} + for name, morsel in cookie.items(): + morsel_data: dict[str, str | bool] = { + "key": morsel.key, + "value": morsel.value, + "coded_value": morsel.coded_value, + } + # Save all morsel attributes that have values + for attr in morsel._reserved: # type: ignore[attr-defined] + attr_val = morsel[attr] + if attr_val: + morsel_data[attr] = attr_val + data[key][name] = morsel_data + with file_path.open(mode="w", encoding="utf-8") as f: + json.dump(data, f, indent=2) def load(self, file_path: PathLike) -> None: + """Load cookies from a file. + + Tries to load JSON format first. Falls back to loading legacy + pickle format (using a restricted unpickler) for backward + compatibility with existing cookie files. + + :param file_path: Path to file from where cookies will be + imported, :class:`str` or :class:`pathlib.Path` instance. + """ file_path = pathlib.Path(file_path) - with file_path.open(mode="rb") as f: - self._cookies = pickle.load(f) + # Try JSON format first + try: + with file_path.open(mode="r", encoding="utf-8") as f: + data = json.load(f) + self._cookies = self._load_json_data(data) + except (json.JSONDecodeError, UnicodeDecodeError, ValueError): + # Fall back to legacy pickle format with restricted unpickler + with file_path.open(mode="rb") as f: + self._cookies = _RestrictedCookieUnpickler(f).load() + + def _load_json_data( + self, data: dict[str, dict[str, dict[str, str | bool]]] + ) -> defaultdict[tuple[str, str], SimpleCookie]: + """Load cookies from parsed JSON data.""" + cookies: defaultdict[tuple[str, str], SimpleCookie] = defaultdict(SimpleCookie) + for compound_key, cookie_data in data.items(): + domain, path = compound_key.split("|", 1) + key = (domain, path) + for name, morsel_data in cookie_data.items(): + morsel: Morsel[str] = Morsel() + morsel_key = morsel_data["key"] + morsel_value = morsel_data["value"] + morsel_coded_value = morsel_data["coded_value"] + # Use __setstate__ to bypass validation, same pattern + # used in _build_morsel and _cookie_helpers. + morsel.__setstate__( # type: ignore[attr-defined] + { + "key": morsel_key, + "value": morsel_value, + "coded_value": morsel_coded_value, + } + ) + # Restore morsel attributes + for attr in morsel._reserved: # type: ignore[attr-defined] + if attr in morsel_data and attr not in ( + "key", + "value", + "coded_value", + ): + morsel[attr] = morsel_data[attr] + cookies[key][name] = morsel + return cookies def clear(self, predicate: ClearCookiePredicate | None = None) -> None: if predicate is None: diff --git a/docs/client_reference.rst b/docs/client_reference.rst index 0b1ae5dff19..a8096b01c3d 100644 --- a/docs/client_reference.rst +++ b/docs/client_reference.rst @@ -2474,16 +2474,28 @@ Utilities .. method:: save(file_path) - Write a pickled representation of cookies into the file + Write a JSON representation of cookies into the file at provided path. + .. versionchanged:: 3.14 + + Previously used pickle format. Now uses JSON for safe + serialization. + :param file_path: Path to file where cookies will be serialized, :class:`str` or :class:`pathlib.Path` instance. .. method:: load(file_path) - Load a pickled representation of cookies from the file - at provided path. + Load cookies from the file at provided path. Tries JSON format + first, then falls back to legacy pickle format (using a restricted + unpickler that only allows cookie-related types) for backward + compatibility with existing cookie files. + + .. versionchanged:: 3.14 + + Now loads JSON format by default. Falls back to restricted + pickle for files saved by older versions. :param file_path: Path to file from where cookies will be imported, :class:`str` or :class:`pathlib.Path` instance. diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 9b5eafcea4a..7e633b9b1eb 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -99,6 +99,7 @@ deduplicate defs Dependabot deprecations +deserialization DER dev Dev @@ -213,7 +214,8 @@ Multipart musllinux mypy Nagle -Nagle’s +Nagle's +NFS namedtuple nameservers namespace @@ -235,6 +237,7 @@ param params parsers pathlib +payloads peername performant pickleable @@ -355,6 +358,8 @@ unhandled unicode unittest Unittest +unpickler +untrusted unix unobvious unsets diff --git a/tests/conftest.py b/tests/conftest.py index 661f539a632..71e773ddca8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,6 +95,11 @@ def blockbuster(request: pytest.FixtureRequest) -> Iterator[None]: bb.functions[func].can_block_in( "aiohttp/web_urldispatcher.py", "add_static" ) + # save/load is not async, so we must allow this: + for func in ("io.TextIOWrapper.read", "io.BufferedReader.read"): + bb.functions[func].can_block_in("aiohttp/cookiejar.py", "load") + for func in ("io.TextIOWrapper.write", "io.BufferedWriter.write"): + bb.functions[func].can_block_in("aiohttp/cookiejar.py", "save") # Note: coverage.py uses locking internally which can cause false positives # in blockbuster when it instruments code. This is particularly problematic # on Windows where it can lead to flaky test failures. diff --git a/tests/test_cookiejar.py b/tests/test_cookiejar.py index dc2c9f4cc98..9e4f7500afc 100644 --- a/tests/test_cookiejar.py +++ b/tests/test_cookiejar.py @@ -9,6 +9,7 @@ import unittest from http.cookies import BaseCookie, Morsel, SimpleCookie from operator import not_ +from pathlib import Path from unittest import mock import pytest @@ -1620,3 +1621,191 @@ async def test_shared_cookie_with_multiple_domains() -> None: # Verify cache is reused efficiently assert ("", "") in jar._morsel_cache assert "universal" in jar._morsel_cache[("", "")] + + +# === Security tests for restricted unpickler and JSON save/load === + + +async def test_load_rejects_malicious_pickle(tmp_path: Path) -> None: + """Verify CookieJar.load() blocks arbitrary code execution via pickle. + + A crafted pickle payload using os.system (or any non-cookie class) + must be rejected by the restricted unpickler. + """ + import os + + file_path = tmp_path / "malicious.pkl" + + class RCEPayload: + def __reduce__(self) -> tuple[object, ...]: + return (os.system, ("echo PWNED",)) + + with open(file_path, "wb") as f: + pickle.dump(RCEPayload(), f, pickle.HIGHEST_PROTOCOL) + + jar = CookieJar() + with pytest.raises(pickle.UnpicklingError, match="Forbidden class"): + jar.load(file_path) + + +async def test_load_rejects_eval_payload(tmp_path: Path) -> None: + """Verify CookieJar.load() blocks eval-based pickle payloads.""" + file_path = tmp_path / "eval_payload.pkl" + + class EvalPayload: + def __reduce__(self) -> tuple[object, ...]: + return (eval, ("__import__('os').system('echo PWNED')",)) + + with open(file_path, "wb") as f: + pickle.dump(EvalPayload(), f, pickle.HIGHEST_PROTOCOL) + + jar = CookieJar() + with pytest.raises(pickle.UnpicklingError, match="Forbidden class"): + jar.load(file_path) + + +async def test_load_rejects_subprocess_payload(tmp_path: Path) -> None: + """Verify CookieJar.load() blocks subprocess-based pickle payloads.""" + import subprocess + + file_path = tmp_path / "subprocess_payload.pkl" + + class SubprocessPayload: + def __reduce__(self) -> tuple[object, ...]: + return (subprocess.call, (["echo", "PWNED"],)) + + with open(file_path, "wb") as f: + pickle.dump(SubprocessPayload(), f, pickle.HIGHEST_PROTOCOL) + + jar = CookieJar() + with pytest.raises(pickle.UnpicklingError, match="Forbidden class"): + jar.load(file_path) + + +async def test_load_falls_back_to_pickle( + tmp_path: Path, + cookies_to_receive: SimpleCookie, +) -> None: + """Verify load() falls back to restricted pickle for legacy cookie files. + + Existing cookie files saved with older versions of aiohttp used pickle. + load() should detect that the file is not JSON and fall back to the + restricted pickle unpickler for backward compatibility. + """ + file_path = tmp_path / "legit.pkl" + + # Write a legacy pickle file directly (as old aiohttp save() would) + jar_save = CookieJar() + jar_save.update_cookies(cookies_to_receive) + with file_path.open(mode="wb") as f: + pickle.dump(jar_save._cookies, f, pickle.HIGHEST_PROTOCOL) + + jar_load = CookieJar() + jar_load.load(file_path=file_path) + + jar_test = SimpleCookie() + for cookie in jar_load: + jar_test[cookie.key] = cookie + + assert jar_test == cookies_to_receive + + +async def test_save_load_json_roundtrip( + tmp_path: Path, + cookies_to_receive: SimpleCookie, +) -> None: + """Verify save/load roundtrip preserves cookies via JSON format.""" + file_path = tmp_path / "cookies.json" + + jar_save = CookieJar() + jar_save.update_cookies(cookies_to_receive) + jar_save.save(file_path=file_path) + + jar_load = CookieJar() + jar_load.load(file_path=file_path) + + saved_cookies = SimpleCookie() + for cookie in jar_save: + saved_cookies[cookie.key] = cookie + + loaded_cookies = SimpleCookie() + for cookie in jar_load: + loaded_cookies[cookie.key] = cookie + + assert saved_cookies == loaded_cookies + + +async def test_save_load_json_partitioned_cookies(tmp_path: Path) -> None: + """Verify save/load roundtrip works with partitioned cookies.""" + file_path = tmp_path / "partitioned.json" + + jar_save = CookieJar() + jar_save.update_cookies_from_headers( + ["session=cookie; Partitioned"], URL("https://example.com/") + ) + jar_save.save(file_path=file_path) + + jar_load = CookieJar() + jar_load.load(file_path=file_path) + + # Compare individual cookie values (same approach as test_save_load_partitioned_cookies) + saved = list(jar_save) + loaded = list(jar_load) + assert len(saved) == len(loaded) + for s, lo in zip(saved, loaded): + assert s.key == lo.key + assert s.value == lo.value + assert s["domain"] == lo["domain"] + assert s["path"] == lo["path"] + + +async def test_json_format_is_safe(tmp_path: Path) -> None: + """Verify the JSON file format cannot execute code on load.""" + import json + + file_path = tmp_path / "safe.json" + + # Write something that might look dangerous but is just data + malicious_data = { + "evil.com|/": { + "session": { + "key": "session", + "value": "__import__('os').system('echo PWNED')", + "coded_value": "__import__('os').system('echo PWNED')", + } + } + } + with open(file_path, "w") as f: + json.dump(malicious_data, f) + + jar = CookieJar() + jar.load(file_path=file_path) + + # The "malicious" string is just a cookie value, not executed code + cookies = list(jar) + assert len(cookies) == 1 + assert cookies[0].value == "__import__('os').system('echo PWNED')" + + +async def test_save_load_json_secure_cookies(tmp_path: Path) -> None: + """Verify save/load preserves Secure and HttpOnly flags.""" + file_path = tmp_path / "secure.json" + + jar_save = CookieJar() + jar_save.update_cookies_from_headers( + ["token=abc123; Secure; HttpOnly; Path=/; Domain=example.com"], + URL("https://example.com/"), + ) + jar_save.save(file_path=file_path) + + jar_load = CookieJar() + jar_load.load(file_path=file_path) + + loaded_cookies = list(jar_load) + assert len(loaded_cookies) == 1 + cookie = loaded_cookies[0] + assert cookie.key == "token" + assert cookie.value == "abc123" + assert cookie["secure"] is True + assert cookie["httponly"] is True + assert cookie["domain"] == "example.com"