Skip to content

Commit 8d721a8

Browse files
committed
httputil: Only strip tabs and spaces from header values
The RFC specifies that only tabs and spaces should be stripped. Removing additonal whitespace characters can lead to framing errors with certain proxies.
1 parent fb119c7 commit 8d721a8

File tree

2 files changed

+24
-2
lines changed

2 files changed

+24
-2
lines changed

tornado/httputil.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@
6262
from asyncio import Future # noqa: F401
6363
import unittest # noqa: F401
6464

65+
# To be used with str.strip() and related methods.
66+
HTTP_WHITESPACE = " \t"
67+
6568

6669
@lru_cache(1000)
6770
def _normalize_header(name: str) -> str:
@@ -171,15 +174,15 @@ def parse_line(self, line: str) -> None:
171174
# continuation of a multi-line header
172175
if self._last_key is None:
173176
raise HTTPInputError("first header line cannot start with whitespace")
174-
new_part = " " + line.lstrip()
177+
new_part = " " + line.lstrip(HTTP_WHITESPACE)
175178
self._as_list[self._last_key][-1] += new_part
176179
self._dict[self._last_key] += new_part
177180
else:
178181
try:
179182
name, value = line.split(":", 1)
180183
except ValueError:
181184
raise HTTPInputError("no colon in header line")
182-
self.add(name, value.strip())
185+
self.add(name, value.strip(HTTP_WHITESPACE))
183186

184187
@classmethod
185188
def parse(cls, headers: str) -> "HTTPHeaders":

tornado/test/httputil_test.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,25 @@ def test_unicode_newlines(self):
334334
gen_log.warning("failed while trying %r in %s", newline, encoding)
335335
raise
336336

337+
def test_unicode_whitespace(self):
338+
# Only tabs and spaces are to be stripped according to the HTTP standard.
339+
# Other unicode whitespace is to be left as-is. In the context of headers,
340+
# this specifically means the whitespace characters falling within the
341+
# latin1 charset.
342+
whitespace = [
343+
(" ", True), # SPACE
344+
("\t", True), # TAB
345+
("\u00a0", False), # NON-BREAKING SPACE
346+
("\u0085", False), # NEXT LINE
347+
]
348+
for c, stripped in whitespace:
349+
headers = HTTPHeaders.parse("Transfer-Encoding: %schunked" % c)
350+
if stripped:
351+
expected = [("Transfer-Encoding", "chunked")]
352+
else:
353+
expected = [("Transfer-Encoding", "%schunked" % c)]
354+
self.assertEqual(expected, list(headers.get_all()))
355+
337356
def test_optional_cr(self):
338357
# Both CRLF and LF should be accepted as separators. CR should not be
339358
# part of the data when followed by LF, but it is a normal char

0 commit comments

Comments
 (0)