From 6204ab9f989be3841c8c47e1e2cfe6a658fe16d5 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Tue, 28 Jan 2025 14:09:00 -0600 Subject: [PATCH 1/4] gh-105704: Disallow square brackets ( and ) in domain names for parsed URLs --- Lib/test/test_urlparse.py | 14 +++++++++++++ Lib/urllib/parse.py | 20 +++++++++++++++++-- ...-01-28-14-08-03.gh-issue-105704.EnhHxu.rst | 4 ++++ 3 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 4516bdea6adb19..0f15a0998ff2ea 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1412,6 +1412,20 @@ def test_invalid_bracketed_hosts(self): self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query') self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query') self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip]') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip].suffix') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip]/') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip].suffix/') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip]?') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip].suffix?') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]/') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix/') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]?') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix?') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@prefix.[v6a.ip]') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@[v6a.ip].suffix') def test_splitting_bracketed_hosts(self): p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index c412c729852272..9d51f4c6812b57 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -439,6 +439,23 @@ def _checknetloc(netloc): raise ValueError("netloc '" + netloc + "' contains invalid " + "characters under NFKC normalization") +def _check_bracketed_netloc(netloc): + # Note that this function must mirror the splitting + # done in NetlocResultMixins._hostinfo(). + hostname_and_port = netloc.rpartition('@')[2] + before_bracket, have_open_br, bracketed = hostname_and_port.partition('[') + if have_open_br: + # No data is allowed before a bracket. + if before_bracket: + raise ValueError("Invalid IPv6 URL") + hostname, _, port = bracketed.partition(']') + # No data is allowed after the bracket but before the port delimiter. + if port and not port.startswith(":"): + raise ValueError("Invalid IPv6 URL") + else: + hostname, _, port = hostname_and_port.partition(':') + _check_bracketed_host(hostname) + # Valid bracketed hosts are defined in # https://www.rfc-editor.org/rfc/rfc3986#page-49 and https://url.spec.whatwg.org/ def _check_bracketed_host(hostname): @@ -505,8 +522,7 @@ def _urlsplit(url, scheme=None, allow_fragments=True): (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") if '[' in netloc and ']' in netloc: - bracketed_host = netloc.partition('[')[2].partition(']')[0] - _check_bracketed_host(bracketed_host) + _check_bracketed_netloc(netloc) if allow_fragments and '#' in url: url, fragment = url.split('#', 1) if '?' in url: diff --git a/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst b/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst new file mode 100644 index 00000000000000..aaeac71678de87 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst @@ -0,0 +1,4 @@ +When using ``urllib.parse.urlsplit()`` and ``urlparse()`` host parsing would +not reject domain names containing square brackets (``[`` and ``]``). Square +brackets are only valid for IPv6 and IPvFuture hosts according to `RFC 3986 +Section 3.2.2 `__. From 3ab35e8d890e2c5d4e6b0c0299f94775a3ded9ae Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Thu, 30 Jan 2025 09:50:14 -0600 Subject: [PATCH 2/4] Use Sphinx references Co-authored-by: Peter Bierma --- .../Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst b/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst index aaeac71678de87..fb8674f558db59 100644 --- a/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst +++ b/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst @@ -1,4 +1,4 @@ -When using ``urllib.parse.urlsplit()`` and ``urlparse()`` host parsing would +When using :func:`urllib.parse.urlsplit()` and :func:`urllib.parse.urlparse()` host parsing would not reject domain names containing square brackets (``[`` and ``]``). Square brackets are only valid for IPv6 and IPvFuture hosts according to `RFC 3986 Section 3.2.2 `__. From ebf92bb4d323d41778e5de6df177b26f18ecf7f9 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Thu, 30 Jan 2025 11:10:35 -0600 Subject: [PATCH 3/4] Add mismatched bracket test cases, fix news format --- Lib/test/test_urlparse.py | 10 ++++++++++ .../2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst | 8 ++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 0f15a0998ff2ea..f8ce61b2b49621 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1426,6 +1426,16 @@ def test_invalid_bracketed_hosts(self): self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix?') self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@prefix.[v6a.ip]') self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@[v6a.ip].suffix') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip]') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://]v6a.ip[') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://]v6a.ip') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip[') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip].suffix') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip[suffix') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip[suffix') def test_splitting_bracketed_hosts(self): p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') diff --git a/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst b/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst index fb8674f558db59..bff1bc6b0d609c 100644 --- a/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst +++ b/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst @@ -1,4 +1,4 @@ -When using :func:`urllib.parse.urlsplit()` and :func:`urllib.parse.urlparse()` host parsing would -not reject domain names containing square brackets (``[`` and ``]``). Square -brackets are only valid for IPv6 and IPvFuture hosts according to `RFC 3986 -Section 3.2.2 `__. +When using :func:`urllib.parse.urlsplit` and :func:`urllib.parse.urlparse` host +parsing would not reject domain names containing square brackets (``[`` and +``]``). Square brackets are only valid for IPv6 and IPvFuture hosts according to +`RFC 3986 Section 3.2.2 `__. From 2817b2e29c8b28a24f9eb97abce1e1b60b1162fa Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Thu, 30 Jan 2025 13:01:19 -0600 Subject: [PATCH 4/4] Add more test coverage for ports --- Lib/test/test_urlparse.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index f8ce61b2b49621..b51cc006b73280 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1424,6 +1424,15 @@ def test_invalid_bracketed_hosts(self): self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix/') self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]?') self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix?') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:a') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:a') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:a1') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:a1') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:1a') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:1a') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:/') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:?') self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@prefix.[v6a.ip]') self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@[v6a.ip].suffix') self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip') @@ -1438,14 +1447,16 @@ def test_invalid_bracketed_hosts(self): self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip[suffix') def test_splitting_bracketed_hosts(self): - p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') + p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]:1234/path?query') self.assertEqual(p1.hostname, 'v6a.ip') self.assertEqual(p1.username, 'user') self.assertEqual(p1.path, '/path') + self.assertEqual(p1.port, 1234) p2 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7%test]/path?query') self.assertEqual(p2.hostname, '0439:23af:2309::fae7%test') self.assertEqual(p2.username, 'user') self.assertEqual(p2.path, '/path') + self.assertIs(p2.port, None) p3 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7:1234:192.0.2.146%test]/path?query') self.assertEqual(p3.hostname, '0439:23af:2309::fae7:1234:192.0.2.146%test') self.assertEqual(p3.username, 'user')