Skip to content

Commit dcf40f3

Browse files
patchback[bot]YuvalElbar6Dreamsorcerer
authored
[PR #12091/8a631e74 backport][3.14] Restrict pickle deserialization in CookieJar.load() (#12105)
**This is a backport of PR #12091 as merged into master (8a631e7).** --------- Co-authored-by: Yuval Elbar <[email protected]> Co-authored-by: Sam Bull <[email protected]>
1 parent 4f12a66 commit dcf40f3

6 files changed

Lines changed: 331 additions & 8 deletions

File tree

CHANGES/12091.bugfix.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Switched :py:meth:`~aiohttp.CookieJar.save` to use JSON format and
2+
:py:meth:`~aiohttp.CookieJar.load` to try JSON first with a fallback to
3+
a restricted pickle unpickler that only allows cookie-related types
4+
(``SimpleCookie``, ``Morsel``, ``defaultdict``, etc.), preventing
5+
arbitrary code execution via malicious pickle payloads
6+
(CWE-502) -- by :user:`YuvalElbar6`.

aiohttp/cookiejar.py

Lines changed: 110 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import datetime
55
import heapq
66
import itertools
7+
import json
78
import os # noqa
89
import pathlib
910
import pickle
@@ -38,6 +39,41 @@
3839
_SIMPLE_COOKIE = SimpleCookie()
3940

4041

42+
class _RestrictedCookieUnpickler(pickle.Unpickler):
43+
"""A restricted unpickler that only allows cookie-related types.
44+
45+
This prevents arbitrary code execution when loading pickled cookie data
46+
from untrusted sources. Only types that are expected in a serialized
47+
CookieJar are permitted.
48+
49+
See: https://docs.python.org/3/library/pickle.html#restricting-globals
50+
"""
51+
52+
_ALLOWED_CLASSES: frozenset[tuple[str, str]] = frozenset(
53+
{
54+
# Core cookie types
55+
("http.cookies", "SimpleCookie"),
56+
("http.cookies", "Morsel"),
57+
# Container types used by CookieJar._cookies
58+
("collections", "defaultdict"),
59+
# builtins that pickle uses for reconstruction
60+
("builtins", "tuple"),
61+
("builtins", "set"),
62+
("builtins", "frozenset"),
63+
("builtins", "dict"),
64+
}
65+
)
66+
67+
def find_class(self, module: str, name: str) -> type:
68+
if (module, name) not in self._ALLOWED_CLASSES:
69+
raise pickle.UnpicklingError(
70+
f"Forbidden class: {module}.{name}. "
71+
"CookieJar.load() only allows cookie-related types for security. "
72+
"See https://docs.python.org/3/library/pickle.html#restricting-globals"
73+
)
74+
return super().find_class(module, name) # type: ignore[no-any-return]
75+
76+
4177
class CookieJar(AbstractCookieJar):
4278
"""Implements cookie storage adhering to RFC 6265."""
4379

@@ -112,14 +148,84 @@ def quote_cookie(self) -> bool:
112148
return self._quote_cookie
113149

114150
def save(self, file_path: PathLike) -> None:
151+
"""Save cookies to a file using JSON format.
152+
153+
:param file_path: Path to file where cookies will be serialized,
154+
:class:`str` or :class:`pathlib.Path` instance.
155+
"""
115156
file_path = pathlib.Path(file_path)
116-
with file_path.open(mode="wb") as f:
117-
pickle.dump(self._cookies, f, pickle.HIGHEST_PROTOCOL)
157+
data: dict[str, dict[str, dict[str, str | bool]]] = {}
158+
for (domain, path), cookie in self._cookies.items():
159+
key = f"{domain}|{path}"
160+
data[key] = {}
161+
for name, morsel in cookie.items():
162+
morsel_data: dict[str, str | bool] = {
163+
"key": morsel.key,
164+
"value": morsel.value,
165+
"coded_value": morsel.coded_value,
166+
}
167+
# Save all morsel attributes that have values
168+
for attr in morsel._reserved: # type: ignore[attr-defined]
169+
attr_val = morsel[attr]
170+
if attr_val:
171+
morsel_data[attr] = attr_val
172+
data[key][name] = morsel_data
173+
with file_path.open(mode="w", encoding="utf-8") as f:
174+
json.dump(data, f, indent=2)
118175

119176
def load(self, file_path: PathLike) -> None:
177+
"""Load cookies from a file.
178+
179+
Tries to load JSON format first. Falls back to loading legacy
180+
pickle format (using a restricted unpickler) for backward
181+
compatibility with existing cookie files.
182+
183+
:param file_path: Path to file from where cookies will be
184+
imported, :class:`str` or :class:`pathlib.Path` instance.
185+
"""
120186
file_path = pathlib.Path(file_path)
121-
with file_path.open(mode="rb") as f:
122-
self._cookies = pickle.load(f)
187+
# Try JSON format first
188+
try:
189+
with file_path.open(mode="r", encoding="utf-8") as f:
190+
data = json.load(f)
191+
self._cookies = self._load_json_data(data)
192+
except (json.JSONDecodeError, UnicodeDecodeError, ValueError):
193+
# Fall back to legacy pickle format with restricted unpickler
194+
with file_path.open(mode="rb") as f:
195+
self._cookies = _RestrictedCookieUnpickler(f).load()
196+
197+
def _load_json_data(
198+
self, data: dict[str, dict[str, dict[str, str | bool]]]
199+
) -> defaultdict[tuple[str, str], SimpleCookie]:
200+
"""Load cookies from parsed JSON data."""
201+
cookies: defaultdict[tuple[str, str], SimpleCookie] = defaultdict(SimpleCookie)
202+
for compound_key, cookie_data in data.items():
203+
domain, path = compound_key.split("|", 1)
204+
key = (domain, path)
205+
for name, morsel_data in cookie_data.items():
206+
morsel: Morsel[str] = Morsel()
207+
morsel_key = morsel_data["key"]
208+
morsel_value = morsel_data["value"]
209+
morsel_coded_value = morsel_data["coded_value"]
210+
# Use __setstate__ to bypass validation, same pattern
211+
# used in _build_morsel and _cookie_helpers.
212+
morsel.__setstate__( # type: ignore[attr-defined]
213+
{
214+
"key": morsel_key,
215+
"value": morsel_value,
216+
"coded_value": morsel_coded_value,
217+
}
218+
)
219+
# Restore morsel attributes
220+
for attr in morsel._reserved: # type: ignore[attr-defined]
221+
if attr in morsel_data and attr not in (
222+
"key",
223+
"value",
224+
"coded_value",
225+
):
226+
morsel[attr] = morsel_data[attr]
227+
cookies[key][name] = morsel
228+
return cookies
123229

124230
def clear(self, predicate: ClearCookiePredicate | None = None) -> None:
125231
if predicate is None:

docs/client_reference.rst

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2496,16 +2496,28 @@ Utilities
24962496

24972497
.. method:: save(file_path)
24982498

2499-
Write a pickled representation of cookies into the file
2499+
Write a JSON representation of cookies into the file
25002500
at provided path.
25012501

2502+
.. versionchanged:: 3.14
2503+
2504+
Previously used pickle format. Now uses JSON for safe
2505+
serialization.
2506+
25022507
:param file_path: Path to file where cookies will be serialized,
25032508
:class:`str` or :class:`pathlib.Path` instance.
25042509

25052510
.. method:: load(file_path)
25062511

2507-
Load a pickled representation of cookies from the file
2508-
at provided path.
2512+
Load cookies from the file at provided path. Tries JSON format
2513+
first, then falls back to legacy pickle format (using a restricted
2514+
unpickler that only allows cookie-related types) for backward
2515+
compatibility with existing cookie files.
2516+
2517+
.. versionchanged:: 3.14
2518+
2519+
Now loads JSON format by default. Falls back to restricted
2520+
pickle for files saved by older versions.
25092521

25102522
:param file_path: Path to file from where cookies will be
25112523
imported, :class:`str` or :class:`pathlib.Path` instance.

docs/spelling_wordlist.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ deduplicate
9999
defs
100100
Dependabot
101101
deprecations
102+
deserialization
102103
DER
103104
dev
104105
Dev
@@ -213,7 +214,8 @@ Multipart
213214
musllinux
214215
mypy
215216
Nagle
216-
Nagle’s
217+
Nagle's
218+
NFS
217219
namedtuple
218220
nameservers
219221
namespace
@@ -236,6 +238,7 @@ param
236238
params
237239
parsers
238240
pathlib
241+
payloads
239242
peername
240243
performant
241244
pickleable
@@ -356,6 +359,8 @@ unhandled
356359
unicode
357360
unittest
358361
Unittest
362+
unpickler
363+
untrusted
359364
unix
360365
unobvious
361366
unsets

tests/conftest.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,11 @@ def blockbuster(request: pytest.FixtureRequest) -> Iterator[None]:
9595
bb.functions[func].can_block_in(
9696
"aiohttp/web_urldispatcher.py", "add_static"
9797
)
98+
# save/load is not async, so we must allow this:
99+
for func in ("io.TextIOWrapper.read", "io.BufferedReader.read"):
100+
bb.functions[func].can_block_in("aiohttp/cookiejar.py", "load")
101+
for func in ("io.TextIOWrapper.write", "io.BufferedWriter.write"):
102+
bb.functions[func].can_block_in("aiohttp/cookiejar.py", "save")
98103
# Note: coverage.py uses locking internally which can cause false positives
99104
# in blockbuster when it instruments code. This is particularly problematic
100105
# on Windows where it can lead to flaky test failures.

0 commit comments

Comments
 (0)