Skip to content

Commit 6c53ad6

Browse files
committed
Raise EmailUndeliverableError for special use domain names and their subdomains, except @test when a new test_environment argument is passed
Some of the domain names used in tests had to be revised because they went from valid to invalid, or the exception message changed.
1 parent fbcf145 commit 6c53ad6

File tree

3 files changed

+127
-50
lines changed

3 files changed

+127
-50
lines changed

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ shown):
135135

136136
`dns_resolver=None`: Pass an instance of [dns.resolver.Resolver](https://dnspython.readthedocs.io/en/latest/resolver-class.html) to control the DNS resolver including setting a timeout and [a cache](https://dnspython.readthedocs.io/en/latest/resolver-caching.html). The `caching_resolver` function shown above is a helper function to construct a dns.resolver.Resolver with a [LRUCache](https://dnspython.readthedocs.io/en/latest/resolver-caching.html#dns.resolver.LRUCache). Reuse the same resolver instance across calls to `validate_email` to make use of the cache.
137137

138+
In non-production test environments, you may want to allow `@test` or `@mycompany.test` email addresses to be used as placeholder email addresses, which would normally not be permitted. In that case, pass `test_environment=True`. DNS-based deliverability checks will be disabled as well. Other [Special Use Domain Names](https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.xhtml) are always considered invalid and raise `EmailUndeliverableError`.
138139

139140
Internationalized email addresses
140141
---------------------------------
@@ -340,8 +341,11 @@ strictly conform to the standards. Many email address forms are obsolete
340341
or likely to cause trouble:
341342

342343
* The validator assumes the email address is intended to be
343-
deliverable on the public Internet using DNS, and so the domain part
344+
deliverable on the public Internet. The domain part
344345
of the email address must be a resolvable domain name.
346+
[Special Use Domain Names](https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.xhtml)
347+
and their subdomains are always considered invalid (except see
348+
the `test_environment` parameter above).
345349
* The "quoted string" form of the local part of the email address (RFC
346350
5321 4.1.2) is not permitted --- no one uses this anymore anyway.
347351
Quoted forms allow multiple @-signs, space characters, and other

email_validator/__init__.py

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,25 @@
3636
LOCAL_PART_MAX_LENGTH = 64
3737
DOMAIN_MAX_LENGTH = 255
3838

39+
# IANA Special Use Domain Names
40+
# Last Updated 2021-09-21
41+
# https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.txt
42+
# The domain names without dots would be caught by the check that the domain
43+
# name in an email address must have a period, but this list will also catch
44+
# subdomains of these domains, which are also reserved.
45+
SPECIAL_USE_DOMAIN_NAMES = (
46+
"arpa", # consolidated from a lot of arpa subdomains, we'll assume all subdomains of arpa are actually reserved
47+
"example",
48+
"example.com",
49+
"example.net",
50+
"example.org",
51+
"invalid",
52+
"local",
53+
"localhost",
54+
"onion",
55+
"test", # see special logic for 'test' where this is checked
56+
)
57+
3958
# ease compatibility in type checking
4059
if sys.version_info >= (3,):
4160
unicode_class = str
@@ -194,6 +213,7 @@ def validate_email(
194213
allow_smtputf8=True,
195214
allow_empty_local=False,
196215
check_deliverability=True,
216+
test_environment=False,
197217
timeout=DEFAULT_TIMEOUT,
198218
dns_resolver=None
199219
):
@@ -230,7 +250,7 @@ def validate_email(
230250
ret.smtputf8 = local_part_info["smtputf8"]
231251

232252
# Validate the email address's domain part syntax and get a normalized form.
233-
domain_part_info = validate_email_domain_part(parts[1])
253+
domain_part_info = validate_email_domain_part(parts[1], test_environment=test_environment)
234254
ret.domain = domain_part_info["domain"]
235255
ret.ascii_domain = domain_part_info["ascii_domain"]
236256

@@ -280,9 +300,9 @@ def validate_email(
280300
reason = "(when encoded in bytes)"
281301
raise EmailSyntaxError("The email address is too long {}.".format(reason))
282302

283-
if check_deliverability:
284-
# Validate the email address's deliverability and update the
285-
# return dict with metadata.
303+
if check_deliverability and not test_environment:
304+
# Validate the email address's deliverability using DNS
305+
# and update the return dict with metadata.
286306
deliverability_info = validate_email_deliverability(
287307
ret["domain"], ret["domain_i18n"], timeout, dns_resolver
288308
)
@@ -356,7 +376,7 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals
356376
}
357377

358378

359-
def validate_email_domain_part(domain):
379+
def validate_email_domain_part(domain, test_environment=False):
360380
# Empty?
361381
if len(domain) == 0:
362382
raise EmailSyntaxError("There must be something after the @-sign.")
@@ -435,11 +455,33 @@ def validate_email_domain_part(domain):
435455
raise EmailSyntaxError("The email address contains invalid characters after the @-sign.")
436456

437457
# All publicly deliverable addresses have domain named with at least
438-
# one period. We also know that all TLDs end with a letter.
439-
if "." not in ascii_domain:
458+
# one period, and we'll consider the lack of a period a syntax error
459+
# since that will match people's sense of what an email address looks
460+
# like. We'll skip this in test environments to allow '@test' email
461+
# addresses.
462+
if "." not in ascii_domain and not (ascii_domain == "test" and test_environment):
440463
raise EmailSyntaxError("The domain name %s is not valid. It should have a period." % domain_i18n)
464+
465+
# Check special-use and reserved domain names. Raise these as
466+
# deliverability errors since they are syntactically valid.
467+
# Some might fail DNS-based deliverability checks, but that
468+
# can be turned off, so we should fail them all sooner.
469+
for d in SPECIAL_USE_DOMAIN_NAMES:
470+
# RFC 6761 says that applications should not block use of the 'test'
471+
# domain name, presumably because that would prevent it from being
472+
# used for actual testing. We'll block it, except when a special
473+
# testing flag is used, indicating that the module is being used
474+
# in a test environment.
475+
if d == "test" and test_environment:
476+
continue
477+
478+
if ascii_domain == d or ascii_domain.endswith("." + d):
479+
raise EmailUndeliverableError("The domain name %s is a special-use or reserved name that cannot be used with email." % domain_i18n)
480+
481+
# We also know that all TLDs currently end with a letter, and
482+
# we'll consider that a non-DNS based deliverability check.
441483
if not re.search(r"[A-Za-z]\Z", ascii_domain):
442-
raise EmailSyntaxError(
484+
raise EmailUndeliverableError(
443485
"The domain name %s is not valid. It is not within a valid top-level domain." % domain_i18n
444486
)
445487

tests/test_main.py

Lines changed: 72 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -12,51 +12,51 @@
1212
'email_input,output',
1313
[
1414
(
15-
'Abc@example.com',
15+
'Abc@example.tld',
1616
ValidatedEmail(
1717
local_part='Abc',
1818
ascii_local_part='Abc',
1919
smtputf8=False,
20-
ascii_domain='example.com',
21-
domain='example.com',
22-
email='Abc@example.com',
23-
ascii_email='Abc@example.com',
20+
ascii_domain='example.tld',
21+
domain='example.tld',
22+
email='Abc@example.tld',
23+
ascii_email='Abc@example.tld',
2424
),
2525
),
2626
(
27-
'Abc.123@example.com',
27+
'Abc.123@test-example.com',
2828
ValidatedEmail(
2929
local_part='Abc.123',
3030
ascii_local_part='Abc.123',
3131
smtputf8=False,
32-
ascii_domain='example.com',
33-
domain='example.com',
34-
email='Abc.123@example.com',
35-
ascii_email='Abc.123@example.com',
32+
ascii_domain='test-example.com',
33+
domain='test-example.com',
34+
email='Abc.123@test-example.com',
35+
ascii_email='Abc.123@test-example.com',
3636
),
3737
),
3838
(
39-
'user+mailbox/department=shipping@example.com',
39+
'user+mailbox/department=shipping@example.tld',
4040
ValidatedEmail(
4141
local_part='user+mailbox/department=shipping',
4242
ascii_local_part='user+mailbox/department=shipping',
4343
smtputf8=False,
44-
ascii_domain='example.com',
45-
domain='example.com',
46-
email='user+mailbox/department=shipping@example.com',
47-
ascii_email='user+mailbox/department=shipping@example.com',
44+
ascii_domain='example.tld',
45+
domain='example.tld',
46+
email='user+mailbox/department=shipping@example.tld',
47+
ascii_email='user+mailbox/department=shipping@example.tld',
4848
),
4949
),
5050
(
51-
"!#$%&'*+-/=?^_`.{|}~@example.com",
51+
"!#$%&'*+-/=?^_`.{|}~@example.tld",
5252
ValidatedEmail(
5353
local_part="!#$%&'*+-/=?^_`.{|}~",
5454
ascii_local_part="!#$%&'*+-/=?^_`.{|}~",
5555
smtputf8=False,
56-
ascii_domain='example.com',
57-
domain='example.com',
58-
email="!#$%&'*+-/=?^_`.{|}~@example.com",
59-
ascii_email="!#$%&'*+-/=?^_`.{|}~@example.com",
56+
ascii_domain='example.tld',
57+
domain='example.tld',
58+
email="!#$%&'*+-/=?^_`.{|}~@example.tld",
59+
ascii_email="!#$%&'*+-/=?^_`.{|}~@example.tld",
6060
),
6161
),
6262
(
@@ -142,43 +142,43 @@
142142
),
143143
),
144144
(
145-
'ñoñó@example.com',
145+
'ñoñó@example.tld',
146146
ValidatedEmail(
147147
local_part='ñoñó',
148148
smtputf8=True,
149-
ascii_domain='example.com',
150-
domain='example.com',
151-
email='ñoñó@example.com',
149+
ascii_domain='example.tld',
150+
domain='example.tld',
151+
email='ñoñó@example.tld',
152152
),
153153
),
154154
(
155-
'我買@example.com',
155+
'我買@example.tld',
156156
ValidatedEmail(
157157
local_part='我買',
158158
smtputf8=True,
159-
ascii_domain='example.com',
160-
domain='example.com',
161-
email='我買@example.com',
159+
ascii_domain='example.tld',
160+
domain='example.tld',
161+
email='我買@example.tld',
162162
),
163163
),
164164
(
165-
'甲斐黒川日本@example.com',
165+
'甲斐黒川日本@example.tld',
166166
ValidatedEmail(
167167
local_part='甲斐黒川日本',
168168
smtputf8=True,
169-
ascii_domain='example.com',
170-
domain='example.com',
171-
email='甲斐黒川日本@example.com',
169+
ascii_domain='example.tld',
170+
domain='example.tld',
171+
email='甲斐黒川日本@example.tld',
172172
),
173173
),
174174
(
175-
'чебурашкаящик-с-апельсинами.рф@example.com',
175+
'чебурашкаящик-с-апельсинами.рф@example.tld',
176176
ValidatedEmail(
177177
local_part='чебурашкаящик-с-апельсинами.рф',
178178
smtputf8=True,
179-
ascii_domain='example.com',
180-
domain='example.com',
181-
email='чебурашкаящик-с-апельсинами.рф@example.com',
179+
ascii_domain='example.tld',
180+
domain='example.tld',
181+
email='чебурашкаящик-с-апельсинами.рф@example.tld',
182182
),
183183
),
184184
(
@@ -211,6 +211,7 @@ def test_email_valid(email_input, output):
211211
@pytest.mark.parametrize(
212212
'email_input,error_msg',
213213
[
214+
('my@localhost', 'The domain name localhost is not valid. It should have a period.'),
214215
('my@.leadingdot.com', 'An email address cannot have a period immediately after the @-sign.'),
215216
('my@..leadingfwdot.com', 'An email address cannot have a period immediately after the @-sign.'),
216217
('my@..twodots.com', 'An email address cannot have a period immediately after the @-sign.'),
@@ -247,15 +248,45 @@ def test_email_valid(email_input, output):
247248
('my.λong.address@1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444.info', 'The email address is too long (at least 1 character too many).'),
248249
],
249250
)
250-
def test_email_invalid(email_input, error_msg):
251+
def test_email_invalid_syntax(email_input, error_msg):
252+
# Since these all have syntax errors, deliverability
253+
# checks do not arise.
251254
with pytest.raises(EmailSyntaxError) as exc_info:
252255
validate_email(email_input)
253256
# print(f'({email_input!r}, {str(exc_info.value)!r}),')
254257
assert str(exc_info.value) == error_msg
255258

256259

260+
@pytest.mark.parametrize(
261+
'email_input',
262+
[
263+
('me@anything.arpa'),
264+
('me@anything.example'),
265+
('me@example.com'),
266+
('me@mail.example.com'),
267+
('me@valid.invalid'),
268+
('me@link.local'),
269+
('me@host.localhost'),
270+
('me@onion.onion.onion'),
271+
('me@test.test.test'),
272+
],
273+
)
274+
def test_email_invalid_reserved_domain(email_input):
275+
# Since these all fail deliverabiltiy from a static list,
276+
# DNS deliverability checks do not arise.
277+
with pytest.raises(EmailUndeliverableError) as exc_info:
278+
validate_email(email_input)
279+
# print(f'({email_input!r}, {str(exc_info.value)!r}),')
280+
assert "is a special-use or reserved name" in str(exc_info.value)
281+
282+
283+
def test_email_test_domain_name_in_test_environment():
284+
validate_email("anything@test", test_environment=True)
285+
validate_email("anything@mycompany.test", test_environment=True)
286+
287+
257288
def test_dict_accessor():
258-
input_email = "testaddr@example.com"
289+
input_email = "testaddr@example.tld"
259290
valid_email = validate_email(input_email, check_deliverability=False)
260291
assert isinstance(valid_email.as_dict(), dict)
261292
assert valid_email.as_dict()["original_email"] == input_email
@@ -292,7 +323,7 @@ def test_deliverability_dns_timeout():
292323

293324
def test_main_single_good_input(monkeypatch, capsys):
294325
import json
295-
test_email = "test@example.com"
326+
test_email = "google@google.com"
296327
monkeypatch.setattr('sys.argv', ['email_validator', test_email])
297328
validator_main()
298329
stdout, _ = capsys.readouterr()
@@ -311,7 +342,7 @@ def test_main_single_bad_input(monkeypatch, capsys):
311342

312343
def test_main_multi_input(monkeypatch, capsys):
313344
import io
314-
test_cases = ["test@example.com", "test2@example.com", "test@.com", "test3@.com"]
345+
test_cases = ["google1@google.com", "google2@google.com", "test@.com", "test3@.com"]
315346
test_input = io.StringIO("\n".join(test_cases))
316347
monkeypatch.setattr('sys.stdin', test_input)
317348
monkeypatch.setattr('sys.argv', ['email_validator'])
@@ -326,7 +357,7 @@ def test_main_multi_input(monkeypatch, capsys):
326357
def test_main_input_shim(monkeypatch, capsys):
327358
import json
328359
monkeypatch.setattr('sys.version_info', (2, 7))
329-
test_email = b"test@example.com"
360+
test_email = b"google@google.com"
330361
monkeypatch.setattr('sys.argv', ['email_validator', test_email])
331362
validator_main()
332363
stdout, _ = capsys.readouterr()

0 commit comments

Comments
 (0)