Skip to content

Commit 20f0ef6

Browse files
authored
♻️ Refactor header option parser to use the standard library instead of a custom RegEx (#75)
1 parent d3d16da commit 20f0ef6

File tree

1 file changed

+26
-24
lines changed

1 file changed

+26
-24
lines changed

multipart/multipart.py

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import tempfile
1010
from io import BytesIO
1111
from numbers import Number
12+
from email.message import Message
13+
from typing import Dict, Union, Tuple
1214

1315
# Unique missing object.
1416
_missing = object()
@@ -76,44 +78,44 @@
7678
QUOTE = b'"'[0]
7779

7880

79-
def parse_options_header(value):
81+
def parse_options_header(value: Union[str, bytes]) -> Tuple[bytes, Dict[bytes, bytes]]:
8082
"""
8183
Parses a Content-Type header into a value in the following format:
8284
(content_type, {parameters})
8385
"""
86+
# Uses email.message.Message to parse the header as described in PEP 594.
87+
# Ref: https://peps.python.org/pep-0594/#cgi
8488
if not value:
8589
return (b'', {})
8690

87-
# If we are passed a string, we assume that it conforms to WSGI and does
88-
# not contain any code point that's not in latin-1.
89-
if isinstance(value, str): # pragma: no cover
90-
value = value.encode('latin-1')
91+
# If we are passed bytes, we assume that it conforms to WSGI, encoding in latin-1.
92+
if isinstance(value, bytes): # pragma: no cover
93+
value = value.decode('latin-1')
94+
95+
# For types
96+
assert isinstance(value, str), 'Value should be a string by now'
9197

9298
# If we have no options, return the string as-is.
93-
if b';' not in value:
94-
return (value.lower().strip(), {})
99+
if ";" not in value:
100+
return (value.lower().strip().encode('latin-1'), {})
95101

96102
# Split at the first semicolon, to get our value and then options.
97-
ctype, rest = value.split(b';', 1)
103+
# ctype, rest = value.split(b';', 1)
104+
message = Message()
105+
message['content-type'] = value
106+
params = message.get_params()
107+
# If there were no parameters, this would have already returned above
108+
assert params, 'At least the content type value should be present'
109+
ctype = params.pop(0)[0].encode('latin-1')
98110
options = {}
99-
100-
# Parse the options.
101-
for match in OPTION_RE.finditer(rest):
102-
key = match.group(1).lower()
103-
value = match.group(2)
104-
if value[0] == QUOTE and value[-1] == QUOTE:
105-
# Unquote the value.
106-
value = value[1:-1]
107-
value = value.replace(b'\\\\', b'\\').replace(b'\\"', b'"')
108-
111+
for param in params:
112+
key, value = param
109113
# If the value is a filename, we need to fix a bug on IE6 that sends
110114
# the full file path instead of the filename.
111-
if key == b'filename':
112-
if value[1:3] == b':\\' or value[:2] == b'\\\\':
113-
value = value.split(b'\\')[-1]
114-
115-
options[key] = value
116-
115+
if key == 'filename':
116+
if value[1:3] == ':\\' or value[:2] == '\\\\':
117+
value = value.split('\\')[-1]
118+
options[key.encode('latin-1')] = value.encode('latin-1')
117119
return ctype, options
118120

119121

0 commit comments

Comments
 (0)