|
9 | 9 | import tempfile
|
10 | 10 | from io import BytesIO
|
11 | 11 | from numbers import Number
|
| 12 | +from email.message import Message |
| 13 | +from typing import Dict, Union, Tuple |
12 | 14 |
|
13 | 15 | # Unique missing object.
|
14 | 16 | _missing = object()
|
|
76 | 78 | QUOTE = b'"'[0]
|
77 | 79 |
|
78 | 80 |
|
79 |
| -def parse_options_header(value): |
| 81 | +def parse_options_header(value: Union[str, bytes]) -> Tuple[bytes, Dict[bytes, bytes]]: |
80 | 82 | """
|
81 | 83 | Parses a Content-Type header into a value in the following format:
|
82 | 84 | (content_type, {parameters})
|
83 | 85 | """
|
| 86 | + # Uses email.message.Message to parse the header as described in PEP 594. |
| 87 | + # Ref: https://peps.python.org/pep-0594/#cgi |
84 | 88 | if not value:
|
85 | 89 | return (b'', {})
|
86 | 90 |
|
87 |
| - # If we are passed a string, we assume that it conforms to WSGI and does |
88 |
| - # not contain any code point that's not in latin-1. |
89 |
| - if isinstance(value, str): # pragma: no cover |
90 |
| - value = value.encode('latin-1') |
| 91 | + # If we are passed bytes, we assume that it conforms to WSGI, encoding in latin-1. |
| 92 | + if isinstance(value, bytes): # pragma: no cover |
| 93 | + value = value.decode('latin-1') |
| 94 | + |
| 95 | + # For types |
| 96 | + assert isinstance(value, str), 'Value should be a string by now' |
91 | 97 |
|
92 | 98 | # If we have no options, return the string as-is.
|
93 |
| - if b';' not in value: |
94 |
| - return (value.lower().strip(), {}) |
| 99 | + if ";" not in value: |
| 100 | + return (value.lower().strip().encode('latin-1'), {}) |
95 | 101 |
|
96 | 102 | # Split at the first semicolon, to get our value and then options.
|
97 |
| - ctype, rest = value.split(b';', 1) |
| 103 | + # ctype, rest = value.split(b';', 1) |
| 104 | + message = Message() |
| 105 | + message['content-type'] = value |
| 106 | + params = message.get_params() |
| 107 | + # If there were no parameters, this would have already returned above |
| 108 | + assert params, 'At least the content type value should be present' |
| 109 | + ctype = params.pop(0)[0].encode('latin-1') |
98 | 110 | options = {}
|
99 |
| - |
100 |
| - # Parse the options. |
101 |
| - for match in OPTION_RE.finditer(rest): |
102 |
| - key = match.group(1).lower() |
103 |
| - value = match.group(2) |
104 |
| - if value[0] == QUOTE and value[-1] == QUOTE: |
105 |
| - # Unquote the value. |
106 |
| - value = value[1:-1] |
107 |
| - value = value.replace(b'\\\\', b'\\').replace(b'\\"', b'"') |
108 |
| - |
| 111 | + for param in params: |
| 112 | + key, value = param |
109 | 113 | # If the value is a filename, we need to fix a bug on IE6 that sends
|
110 | 114 | # the full file path instead of the filename.
|
111 |
| - if key == b'filename': |
112 |
| - if value[1:3] == b':\\' or value[:2] == b'\\\\': |
113 |
| - value = value.split(b'\\')[-1] |
114 |
| - |
115 |
| - options[key] = value |
116 |
| - |
| 115 | + if key == 'filename': |
| 116 | + if value[1:3] == ':\\' or value[:2] == '\\\\': |
| 117 | + value = value.split('\\')[-1] |
| 118 | + options[key.encode('latin-1')] = value.encode('latin-1') |
117 | 119 | return ctype, options
|
118 | 120 |
|
119 | 121 |
|
|
0 commit comments