diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 4516bdea6adb19..63a9924b26dbeb 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1402,16 +1402,34 @@ def test_issue14072(self): self.assertEqual(p2.path, '+31641044153') def test_invalid_bracketed_hosts(self): - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[192.0.2.146]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[important.com:8000]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123r.IP]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v12ae]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v.IP]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123.]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query') - self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path') + cases = [ + 'Scheme://user@[192.0.2.146]/Path?Query', + 'Scheme://user@[important.com:8000]/Path?Query', + 'Scheme://user@[v123r.IP]/Path?Query', + 'Scheme://user@[v12ae]/Path?Query', + 'Scheme://user@[v.IP]/Path?Query', + 'Scheme://user@[v123.]/Path?Query', + 'Scheme://user@[v]/Path?Query', + 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query', + 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query', + 'Scheme://user@]v6a.ip[/Path', + 'Scheme://user@[v6a.ip/path?query', + 'Scheme://user@v6a.ip]/path?query', + 'Scheme://user@prefix.[v6a.ip]/path?query', + 'Scheme://user@[v6a.ip].suffix/path?query', + ] + + for case in cases: + with self.subTest(case=case): + with self.assertRaises(ValueError): + urllib.parse.urlsplit(case) + with self.assertRaises(ValueError): + urllib.parse.urlparse(case) + bytes_case = case.encode('utf8') + with self.assertRaises(ValueError): + urllib.parse.urlsplit(bytes_case) + with self.assertRaises(ValueError): + urllib.parse.urlparse(bytes_case) def test_splitting_bracketed_hosts(self): p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 8d7631d5693ece..9ffdbe062c0837 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -505,8 +505,15 @@ def _urlsplit(url, scheme=None, allow_fragments=True): (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") if '[' in netloc and ']' in netloc: - bracketed_host = netloc.partition('[')[2].partition(']')[0] - _check_bracketed_host(bracketed_host) + _, _, hostinfo = netloc.rpartition('@') + bracket_prefix, have_open_br, bracketed = hostinfo.partition('[') + if bracket_prefix: + raise ValueError('Invalid IPv6 URL') + hostname, _, port = bracketed.partition(']') + _check_bracketed_host(hostname) + bracket_suffix, _, _ = port.partition(':') + if bracket_suffix: + raise ValueError('Invalid IPv6 URL') if allow_fragments and '#' in url: url, fragment = url.split('#', 1) if '?' in url: diff --git a/Misc/NEWS.d/next/Library/2024-11-26-15-32-19.gh-issue-105704.EPaSIS.rst b/Misc/NEWS.d/next/Library/2024-11-26-15-32-19.gh-issue-105704.EPaSIS.rst new file mode 100644 index 00000000000000..2f03f7cad9fd1e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-26-15-32-19.gh-issue-105704.EPaSIS.rst @@ -0,0 +1,2 @@ +Fix :func:`urllib.parse.urlparse` and :func:`urllib.parse.urlsplit` to +disallow IPv6 URLs with invalid prefix/suffix