From 23e23d9aa20bda640860030108045584aa7db7ff Mon Sep 17 00:00:00 2001 From: Ben Cail Date: Fri, 13 Oct 2023 09:04:24 -0400 Subject: [PATCH 1/5] gh-105704: Disallow IPv6 URLs with invalid prefix/suffix --- Lib/test/test_urlparse.py | 33 +++++++++++++++++++++++---------- Lib/urllib/parse.py | 12 +++++++----- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 4516bdea6adb19..f7b5a3cf47bfd3 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1402,16 +1402,29 @@ def test_issue14072(self): self.assertEqual(p2.path, '+31641044153') def test_invalid_bracketed_hosts(self): - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[192.0.2.146]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[important.com:8000]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123r.IP]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v12ae]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v.IP]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123.]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path') + cases = [ + 'Scheme://user@[192.0.2.146]/Path?Query', + 'Scheme://user@[important.com:8000]/Path?Query', + 'Scheme://user@[v123r.IP]/Path?Query', + 'Scheme://user@[v12ae]/Path?Query', + 'Scheme://user@[v.IP]/Path?Query', + 'Scheme://user@[v123.]/Path?Query', + 'Scheme://user@[v]/Path?Query', + 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query', + 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query', + 'Scheme://user@]v6a.ip[/Path', + 'Scheme://user@[v6a.ip/path?query', + 'Scheme://user@v6a.ip]/path?query', + 'Scheme://user@prefix.[v6a.ip]/path?query', + 'Scheme://user@[v6a.ip].suffix/path?query', + ] + + for case in cases: + with self.subTest(case=case): + with self.assertRaises(ValueError): + urllib.parse.urlsplit(case).hostname + with self.assertRaises(ValueError): + urllib.parse.urlparse(case).hostname def test_splitting_bracketed_hosts(self): p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 8d7631d5693ece..8c4f6b4196cc86 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -206,10 +206,15 @@ def _userinfo(self): def _hostinfo(self): netloc = self.netloc _, _, hostinfo = netloc.rpartition('@') - _, have_open_br, bracketed = hostinfo.partition('[') + bracket_prefix, have_open_br, bracketed = hostinfo.partition('[') if have_open_br: + if bracket_prefix: + raise ValueError('Invalid IPv6 URL') hostname, _, port = bracketed.partition(']') - _, _, port = port.partition(':') + _check_bracketed_host(hostname) + bracket_suffix, _, port = port.partition(':') + if bracket_suffix: + raise ValueError('Invalid IPv6 URL') else: hostname, _, port = hostinfo.partition(':') if not port: @@ -504,9 +509,6 @@ def _urlsplit(url, scheme=None, allow_fragments=True): if (('[' in netloc and ']' not in netloc) or (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") - if '[' in netloc and ']' in netloc: - bracketed_host = netloc.partition('[')[2].partition(']')[0] - _check_bracketed_host(bracketed_host) if allow_fragments and '#' in url: url, fragment = url.split('#', 1) if '?' in url: From f75c6a5a6af9fbdca7fa67f4e5799047a3f0ea80 Mon Sep 17 00:00:00 2001 From: Ben Cail Date: Tue, 24 Oct 2023 11:56:13 -0400 Subject: [PATCH 2/5] Add checking for `bytes` URLs --- Lib/test/test_urlparse.py | 5 +++++ Lib/urllib/parse.py | 9 +++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index f7b5a3cf47bfd3..4fe78a129de275 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1425,6 +1425,11 @@ def test_invalid_bracketed_hosts(self): urllib.parse.urlsplit(case).hostname with self.assertRaises(ValueError): urllib.parse.urlparse(case).hostname + bytes_case = case.encode('utf8') + with self.assertRaises(ValueError): + urllib.parse.urlsplit(bytes_case).hostname + with self.assertRaises(ValueError): + urllib.parse.urlparse(bytes_case).hostname def test_splitting_bracketed_hosts(self): p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 8c4f6b4196cc86..6822f25b3ce7bb 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -241,10 +241,15 @@ def _userinfo(self): def _hostinfo(self): netloc = self.netloc _, _, hostinfo = netloc.rpartition(b'@') - _, have_open_br, bracketed = hostinfo.partition(b'[') + bracket_prefix, have_open_br, bracketed = hostinfo.partition(b'[') if have_open_br: + if bracket_prefix: + raise ValueError('Invalid IPv6 URL') hostname, _, port = bracketed.partition(b']') - _, _, port = port.partition(b':') + _check_bracketed_host(hostname.decode(_implicit_encoding, _implicit_errors)) + bracket_suffix, _, port = port.partition(b':') + if bracket_suffix: + raise ValueError('Invalid IPv6 URL') else: hostname, _, port = hostinfo.partition(b':') if not port: From 9cb076e36a499f7b96253a4a21205847993c7934 Mon Sep 17 00:00:00 2001 From: Ben Cail Date: Tue, 26 Nov 2024 11:20:19 -0500 Subject: [PATCH 3/5] update to not require attribute access --- Lib/test/test_urlparse.py | 8 ++++---- Lib/urllib/parse.py | 28 ++++++++++++++-------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 4fe78a129de275..63a9924b26dbeb 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1422,14 +1422,14 @@ def test_invalid_bracketed_hosts(self): for case in cases: with self.subTest(case=case): with self.assertRaises(ValueError): - urllib.parse.urlsplit(case).hostname + urllib.parse.urlsplit(case) with self.assertRaises(ValueError): - urllib.parse.urlparse(case).hostname + urllib.parse.urlparse(case) bytes_case = case.encode('utf8') with self.assertRaises(ValueError): - urllib.parse.urlsplit(bytes_case).hostname + urllib.parse.urlsplit(bytes_case) with self.assertRaises(ValueError): - urllib.parse.urlparse(bytes_case).hostname + urllib.parse.urlparse(bytes_case) def test_splitting_bracketed_hosts(self): p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 6822f25b3ce7bb..be626a74eb823c 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -206,15 +206,10 @@ def _userinfo(self): def _hostinfo(self): netloc = self.netloc _, _, hostinfo = netloc.rpartition('@') - bracket_prefix, have_open_br, bracketed = hostinfo.partition('[') + _, have_open_br, bracketed = hostinfo.partition('[') if have_open_br: - if bracket_prefix: - raise ValueError('Invalid IPv6 URL') hostname, _, port = bracketed.partition(']') - _check_bracketed_host(hostname) - bracket_suffix, _, port = port.partition(':') - if bracket_suffix: - raise ValueError('Invalid IPv6 URL') + _, _, port = port.partition(':') else: hostname, _, port = hostinfo.partition(':') if not port: @@ -241,15 +236,10 @@ def _userinfo(self): def _hostinfo(self): netloc = self.netloc _, _, hostinfo = netloc.rpartition(b'@') - bracket_prefix, have_open_br, bracketed = hostinfo.partition(b'[') + _, have_open_br, bracketed = hostinfo.partition(b'[') if have_open_br: - if bracket_prefix: - raise ValueError('Invalid IPv6 URL') hostname, _, port = bracketed.partition(b']') - _check_bracketed_host(hostname.decode(_implicit_encoding, _implicit_errors)) - bracket_suffix, _, port = port.partition(b':') - if bracket_suffix: - raise ValueError('Invalid IPv6 URL') + _, _, port = port.partition(b':') else: hostname, _, port = hostinfo.partition(b':') if not port: @@ -514,6 +504,16 @@ def _urlsplit(url, scheme=None, allow_fragments=True): if (('[' in netloc and ']' not in netloc) or (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") + if '[' in netloc and ']' in netloc: + _, _, hostinfo = netloc.rpartition('@') + bracket_prefix, have_open_br, bracketed = hostinfo.partition('[') + if bracket_prefix: + raise ValueError('Invalid IPv6 URL') + hostname, _, port = bracketed.partition(']') + _check_bracketed_host(hostname) + bracket_suffix, _, port = port.partition(':') + if bracket_suffix: + raise ValueError('Invalid IPv6 URL') if allow_fragments and '#' in url: url, fragment = url.split('#', 1) if '?' in url: From 735c79759f99868d637f7c6f83279c1337ba3c83 Mon Sep 17 00:00:00 2001 From: Ben Cail Date: Tue, 26 Nov 2024 11:25:50 -0500 Subject: [PATCH 4/5] use _ for unused var --- Lib/urllib/parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index be626a74eb823c..9ffdbe062c0837 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -511,7 +511,7 @@ def _urlsplit(url, scheme=None, allow_fragments=True): raise ValueError('Invalid IPv6 URL') hostname, _, port = bracketed.partition(']') _check_bracketed_host(hostname) - bracket_suffix, _, port = port.partition(':') + bracket_suffix, _, _ = port.partition(':') if bracket_suffix: raise ValueError('Invalid IPv6 URL') if allow_fragments and '#' in url: From 4374ca4ac45edc5b6fc605d0448c2efaf2a10f6c Mon Sep 17 00:00:00 2001 From: Ben Cail Date: Tue, 26 Nov 2024 15:32:39 -0500 Subject: [PATCH 5/5] add blurb --- .../next/Library/2024-11-26-15-32-19.gh-issue-105704.EPaSIS.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-11-26-15-32-19.gh-issue-105704.EPaSIS.rst diff --git a/Misc/NEWS.d/next/Library/2024-11-26-15-32-19.gh-issue-105704.EPaSIS.rst b/Misc/NEWS.d/next/Library/2024-11-26-15-32-19.gh-issue-105704.EPaSIS.rst new file mode 100644 index 00000000000000..2f03f7cad9fd1e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-26-15-32-19.gh-issue-105704.EPaSIS.rst @@ -0,0 +1,2 @@ +Fix :func:`urllib.parse.urlparse` and :func:`urllib.parse.urlsplit` to +disallow IPv6 URLs with invalid prefix/suffix