Skip to content

Commit 2dc02ee

Browse files
Narrow singleton header rejection to security-critical headers (#12302)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent b1cd6a8 commit 2dc02ee

4 files changed

Lines changed: 131 additions & 36 deletions

File tree

CHANGES/12302.bugfix.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Skipped the duplicate singleton header check in lax mode (the default for response
2+
parsing). In strict mode (request parsing, or ``-X dev``), all RFC 9110 singletons
3+
are still enforced -- by :user:`bdraco`.

aiohttp/_http_parser.pyx

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,11 @@ cdef object StreamReader = _StreamReader
7171
cdef object DeflateBuffer = _DeflateBuffer
7272
cdef bytes EMPTY_BYTES = b""
7373

74-
# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
75-
cdef tuple SINGLETON_HEADERS = (
74+
# RFC 9110 singleton headers — duplicates are rejected in strict mode.
75+
# In lax mode (response parser default), the check is skipped entirely
76+
# since real-world servers (e.g. Google APIs, Werkzeug) commonly send
77+
# duplicate headers like Content-Type or Server.
78+
cdef frozenset SINGLETON_HEADERS = frozenset({
7679
hdrs.CONTENT_LENGTH,
7780
hdrs.CONTENT_LOCATION,
7881
hdrs.CONTENT_RANGE,
@@ -83,7 +86,7 @@ cdef tuple SINGLETON_HEADERS = (
8386
hdrs.SERVER,
8487
hdrs.TRANSFER_ENCODING,
8588
hdrs.USER_AGENT,
86-
)
89+
})
8790

8891
cdef inline object extend(object buf, const char* at, size_t length):
8992
cdef Py_ssize_t s
@@ -304,13 +307,15 @@ cdef class HttpParser:
304307
size_t _max_headers
305308
bint _response_with_body
306309
bint _read_until_eof
310+
bint _lax
307311

308312
bint _started
309313
object _url
310314
bytearray _buf
311315
str _path
312316
str _reason
313317
list _headers
318+
set _seen_singletons
314319
list _raw_headers
315320
bint _upgraded
316321
list _messages
@@ -377,6 +382,8 @@ cdef class HttpParser:
377382
self._upgraded = False
378383
self._auto_decompress = auto_decompress
379384
self._content_encoding = None
385+
self._lax = False
386+
self._seen_singletons = set()
380387

381388
self._csettings.on_url = cb_on_url
382389
self._csettings.on_status = cb_on_status
@@ -405,6 +412,10 @@ cdef class HttpParser:
405412
if "\x00" in value:
406413
raise InvalidHeader(self._raw_value)
407414

415+
if not self._lax and name in SINGLETON_HEADERS:
416+
if name in self._seen_singletons:
417+
raise BadHttpMessage(f"Duplicate '{name}' header found.")
418+
self._seen_singletons.add(name)
408419
self._headers.append((name, value))
409420
if len(self._headers) > self._max_headers:
410421
raise BadHttpMessage("Too many headers received")
@@ -444,14 +455,6 @@ cdef class HttpParser:
444455
raw_headers = tuple(self._raw_headers)
445456
headers = CIMultiDictProxy(CIMultiDict(self._headers))
446457

447-
# https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf
448-
bad_hdr = next(
449-
(h for h in SINGLETON_HEADERS if len(headers.getall(h, ())) > 1),
450-
None,
451-
)
452-
if bad_hdr is not None:
453-
raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")
454-
455458
if self._cparser.type == cparser.HTTP_REQUEST:
456459
h_upg = headers.get("upgrade", "")
457460
allowed = upgrade and h_upg.isascii() and h_upg.lower() in ALLOWED_UPGRADES
@@ -689,6 +692,7 @@ cdef class HttpResponseParser(HttpParser):
689692
cparser.llhttp_set_lenient_headers(self._cparser, 1)
690693
cparser.llhttp_set_lenient_optional_cr_before_lf(self._cparser, 1)
691694
cparser.llhttp_set_lenient_spaces_after_chunk_size(self._cparser, 1)
695+
self._lax = True
692696

693697
cdef object _on_status_complete(self):
694698
if self._buf:
@@ -702,6 +706,7 @@ cdef int cb_on_message_begin(cparser.llhttp_t* parser) except -1:
702706

703707
pyparser._started = True
704708
pyparser._headers = []
709+
pyparser._seen_singletons = set()
705710
pyparser._raw_headers = []
706711
PyByteArray_Resize(pyparser._buf, 0)
707712
pyparser._path = None

aiohttp/http_parser.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,26 @@
7575
DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII)
7676
HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+")
7777

78+
# RFC 9110 singleton headers — duplicates are rejected in strict mode.
79+
# In lax mode (response parser default), the check is skipped entirely
80+
# since real-world servers (e.g. Google APIs, Werkzeug) commonly send
81+
# duplicate headers like Content-Type or Server.
82+
# Lowercased for case-insensitive matching against wire names.
83+
SINGLETON_HEADERS: Final[frozenset[str]] = frozenset(
84+
{
85+
"content-length",
86+
"content-location",
87+
"content-range",
88+
"content-type",
89+
"etag",
90+
"host",
91+
"max-forwards",
92+
"server",
93+
"transfer-encoding",
94+
"user-agent",
95+
}
96+
)
97+
7898

7999
class RawRequestMessage(NamedTuple):
80100
method: str
@@ -194,6 +214,8 @@ def parse_headers(
194214
elif _FIELD_VALUE_FORBIDDEN_CTL_RE.search(value):
195215
raise InvalidHeader(bvalue)
196216

217+
if not self._lax and name in headers and name.lower() in SINGLETON_HEADERS:
218+
raise BadHttpMessage(f"Duplicate '{name}' header found.")
197219
headers.add(name, value)
198220
raw_headers.append((bname, bvalue))
199221

@@ -502,24 +524,6 @@ def parse_headers(
502524
upgrade = False
503525
chunked = False
504526

505-
# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
506-
# https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf
507-
singletons = (
508-
hdrs.CONTENT_LENGTH,
509-
hdrs.CONTENT_LOCATION,
510-
hdrs.CONTENT_RANGE,
511-
hdrs.CONTENT_TYPE,
512-
hdrs.ETAG,
513-
hdrs.HOST,
514-
hdrs.MAX_FORWARDS,
515-
hdrs.SERVER,
516-
hdrs.TRANSFER_ENCODING,
517-
hdrs.USER_AGENT,
518-
)
519-
bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)
520-
if bad_hdr is not None:
521-
raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")
522-
523527
# keep-alive and protocol switching
524528
# RFC 9110 section 7.6.1 defines Connection as a comma-separated list.
525529
conn_values = headers.getall(hdrs.CONNECTION, ())

tests/test_http_parser.py

Lines changed: 90 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -281,32 +281,76 @@ def test_content_length_transfer_encoding(parser: HttpRequestParser) -> None:
281281
"hdr",
282282
(
283283
"Content-Length",
284+
"Host",
285+
"Transfer-Encoding",
286+
),
287+
)
288+
def test_duplicate_singleton_header_rejected(
289+
parser: HttpRequestParser, hdr: str
290+
) -> None:
291+
val1, val2 = ("1", "2") if hdr == "Content-Length" else ("value1", "value2")
292+
text = (
293+
f"GET /test HTTP/1.1\r\n"
294+
f"Host: example.com\r\n"
295+
f"{hdr}: {val1}\r\n"
296+
f"{hdr}: {val2}\r\n"
297+
"\r\n"
298+
).encode()
299+
with pytest.raises(http_exceptions.BadHttpMessage, match="Duplicate"):
300+
parser.feed_data(text)
301+
302+
303+
@pytest.mark.parametrize(
304+
"hdr",
305+
(
284306
"Content-Location",
285307
"Content-Range",
286308
"Content-Type",
287309
"ETag",
288-
"Host",
289310
"Max-Forwards",
290311
"Server",
291-
"Transfer-Encoding",
292312
"User-Agent",
293313
),
294314
)
295-
def test_duplicate_singleton_header_rejected(
315+
def test_duplicate_non_security_singleton_header_rejected_strict(
296316
parser: HttpRequestParser, hdr: str
297317
) -> None:
298-
val1, val2 = ("1", "2") if hdr == "Content-Length" else ("value1", "value2")
318+
"""Non-security singletons are rejected in strict mode (requests)."""
299319
text = (
300320
f"GET /test HTTP/1.1\r\n"
301321
f"Host: example.com\r\n"
302-
f"{hdr}: {val1}\r\n"
303-
f"{hdr}: {val2}\r\n"
304-
f"\r\n"
322+
f"{hdr}: value1\r\n"
323+
f"{hdr}: value2\r\n"
324+
"\r\n"
305325
).encode()
306326
with pytest.raises(http_exceptions.BadHttpMessage, match="Duplicate"):
307327
parser.feed_data(text)
308328

309329

330+
@pytest.mark.parametrize(
331+
"hdr",
332+
(
333+
# Content-Length is excluded because llhttp rejects duplicates
334+
# at the C level before our singleton check runs.
335+
"Content-Location",
336+
"Content-Range",
337+
"Content-Type",
338+
"ETag",
339+
"Max-Forwards",
340+
"Server",
341+
"Transfer-Encoding",
342+
"User-Agent",
343+
),
344+
)
345+
def test_duplicate_singleton_header_accepted_in_lax_mode(
346+
response: HttpResponseParser, hdr: str
347+
) -> None:
348+
"""All singleton duplicates are accepted in lax mode (response parser default)."""
349+
text = (f"HTTP/1.1 200 OK\r\n{hdr}: value1\r\n{hdr}: value2\r\n\r\n").encode()
350+
messages, upgrade, tail = response.feed_data(text)
351+
assert len(messages) == 1
352+
353+
310354
def test_duplicate_host_header_rejected(parser: HttpRequestParser) -> None:
311355
text = (
312356
b"GET /admin HTTP/1.1\r\n"
@@ -318,6 +362,45 @@ def test_duplicate_host_header_rejected(parser: HttpRequestParser) -> None:
318362
parser.feed_data(text)
319363

320364

365+
@pytest.mark.parametrize(
366+
("hdr1", "hdr2"),
367+
(
368+
("content-length", "Content-Length"),
369+
("Content-Length", "content-length"),
370+
("transfer-encoding", "Transfer-Encoding"),
371+
("Transfer-Encoding", "transfer-encoding"),
372+
),
373+
)
374+
def test_duplicate_singleton_header_different_casing_rejected(
375+
parser: HttpRequestParser, hdr1: str, hdr2: str
376+
) -> None:
377+
"""Singleton check must be case-insensitive per RFC 9110."""
378+
val1, val2 = ("1", "2") if "content-length" in hdr1.lower() else ("v1", "v2")
379+
text = (
380+
f"GET /test HTTP/1.1\r\n"
381+
f"Host: example.com\r\n"
382+
f"{hdr1}: {val1}\r\n"
383+
f"{hdr2}: {val2}\r\n"
384+
"\r\n"
385+
).encode()
386+
with pytest.raises(http_exceptions.BadHttpMessage, match="Duplicate"):
387+
parser.feed_data(text)
388+
389+
390+
def test_duplicate_host_header_different_casing_rejected(
391+
parser: HttpRequestParser,
392+
) -> None:
393+
"""Duplicate Host with different casing must also be rejected."""
394+
text = (
395+
b"GET /test HTTP/1.1\r\n"
396+
b"host: evil.example\r\n"
397+
b"Host: good.example\r\n"
398+
b"\r\n"
399+
)
400+
with pytest.raises(http_exceptions.BadHttpMessage, match="Duplicate"):
401+
parser.feed_data(text)
402+
403+
321404
def test_bad_chunked(parser: HttpRequestParser) -> None:
322405
"""Test that invalid chunked encoding doesn't allow content-length to be used."""
323406
text = (

0 commit comments

Comments
 (0)