From 90fcc31e895eee0e22a27ee4ed3af29b642bd2de Mon Sep 17 00:00:00 2001 From: romanov Date: Tue, 16 Aug 2022 15:04:39 +0300 Subject: [PATCH 1/3] Added 3 soft args to validate method --- email_validator/__init__.py | 57 ++++++++++++++++++++++++++++--------- main.py | 9 ++++++ 2 files changed, 52 insertions(+), 14 deletions(-) create mode 100644 main.py diff --git a/email_validator/__init__.py b/email_validator/__init__.py index 3d295ec..dc3336d 100644 --- a/email_validator/__init__.py +++ b/email_validator/__init__.py @@ -13,7 +13,10 @@ CHECK_DELIVERABILITY = True TEST_ENVIRONMENT = False DEFAULT_TIMEOUT = 15 # secs - +#Soft validation options +ALLOW_SPECIAL_DOMAINS = False +ALLOW_ANY_TOP_LEVEL_DOMAIN = False +ALLOWED_TOP_LEVEL_DOMAINS = [] #type: ignore # Based on RFC 2822 section 3.2.4 / RFC 5322 section 3.2.3, these # characters are permitted in email addresses (not taking into # account internationalization): @@ -265,7 +268,10 @@ def validate_email( check_deliverability=CHECK_DELIVERABILITY, test_environment=TEST_ENVIRONMENT, timeout=DEFAULT_TIMEOUT, - dns_resolver=None + dns_resolver=None, + allow_special_domains=ALLOW_SPECIAL_DOMAINS, + allow_any_top_level_domain=ALLOW_ANY_TOP_LEVEL_DOMAIN, + allowed_top_level_domains=ALLOWED_TOP_LEVEL_DOMAINS #type: ignore ): """ Validates an email address, raising an EmailNotValidError if the address is not valid or returning a dict of @@ -300,7 +306,13 @@ def validate_email( ret.smtputf8 = local_part_info["smtputf8"] # Validate the email address's domain part syntax and get a normalized form. - domain_part_info = validate_email_domain_part(parts[1], test_environment=test_environment) + domain_part_info = validate_email_domain_part( + parts[1], + test_environment=test_environment, + allow_special_domains=allow_special_domains, + allow_any_top_level_domain=allow_any_top_level_domain, + allowed_top_level_domains=allowed_top_level_domains + ) ret.domain = domain_part_info["domain"] ret.ascii_domain = domain_part_info["ascii_domain"] @@ -460,7 +472,13 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals } -def validate_email_domain_part(domain, test_environment=False): +def validate_email_domain_part( + domain, + test_environment=False, + allow_special_domains=ALLOW_SPECIAL_DOMAINS, + allow_any_top_level_domain=ALLOW_ANY_TOP_LEVEL_DOMAIN, + allowed_top_level_domains=ALLOWED_TOP_LEVEL_DOMAINS #type: ignore + ): # Empty? if len(domain) == 0: raise EmailSyntaxError("There must be something after the @-sign.") @@ -550,20 +568,31 @@ def validate_email_domain_part(domain, test_environment=False): # deliverability errors since they are syntactically valid. # Some might fail DNS-based deliverability checks, but that # can be turned off, so we should fail them all sooner. - for d in SPECIAL_USE_DOMAIN_NAMES: - # See the note near the definition of SPECIAL_USE_DOMAIN_NAMES. - if d == "test" and test_environment: - continue + if not allow_special_domains: + for d in SPECIAL_USE_DOMAIN_NAMES: + # See the note near the definition of SPECIAL_USE_DOMAIN_NAMES. + if d == "test" and test_environment: + continue - if ascii_domain == d or ascii_domain.endswith("." + d): - raise EmailUndeliverableError("The domain name %s is a special-use or reserved name that cannot be used with email." % domain_i18n) + if ascii_domain == d or ascii_domain.endswith("." + d): + raise EmailUndeliverableError("The domain name %s is a special-use or reserved name that cannot be used with email." % domain_i18n) # We also know that all TLDs currently end with a letter, and # we'll consider that a non-DNS based deliverability check. - if not re.search(r"[A-Za-z]\Z", ascii_domain): - raise EmailUndeliverableError( - "The domain name %s is not valid. It is not within a valid top-level domain." % domain_i18n - ) + if not allow_any_top_level_domain: + #We check len() not to slow validating if tld were not allowed + if len(allowed_top_level_domains) > 0: + #We already trust the period of domain name and avoiding IndexError + tld = ascii_domain.split('.')[-1] + if tld not in allowed_top_level_domains: + raise EmailUndeliverableError( + "The domain name %s is not valid. Top-level domain name is not included in allowed_top_level_domains." % domain_i18n + ) + else: + if not re.search(r"[A-Za-z]\Z", ascii_domain): + raise EmailUndeliverableError( + "The domain name %s is not valid. It is not within a valid top-level domain." % domain_i18n + ) # Return the IDNA ASCII-encoded form of the domain, which is how it # would be transmitted on the wire (except when used with SMTPUTF8 diff --git a/main.py b/main.py new file mode 100644 index 0000000..ab4c5e9 --- /dev/null +++ b/main.py @@ -0,0 +1,9 @@ +from email_validator import validate_email + +email1 = 'ewferfwekuh@ekjfjeir1.com' +email2 = 'ewferfwekuh@ekjfjeir.ewrfref.mm71' +email3 = 'ewferfwekuh@ekjfjeir.local' + +v1 = validate_email(email1,check_deliverability=False) +v2 = validate_email(email2,check_deliverability=False,allow_any_top_level_domain=False,allowed_top_level_domains=['mm72','mm74']) +v3 = validate_email(email3,check_deliverability=False,allow_special_domains=True) \ No newline at end of file From 2d6e7c3238e1fa6f86d0f3aeaad35a9b4cd5a6b0 Mon Sep 17 00:00:00 2001 From: northpowered Date: Tue, 16 Aug 2022 18:02:10 +0300 Subject: [PATCH 2/3] README updated --- README.md | 5 +++++ main.py | 9 --------- tests/__init__.py | 1 + 3 files changed, 6 insertions(+), 9 deletions(-) delete mode 100644 main.py create mode 100644 tests/__init__.py diff --git a/README.md b/README.md index 6a613bc..400dc96 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,11 @@ The `validate_email` function also accepts the following keyword arguments `test_environment=False`: DNS-based deliverability checks are disabled and `test` and `subdomain.test` domain names are permitted (see below). +`allow_special_domains=False`: Turning off EmailUndeliverableError exception for special top-level domains, such as "arpa", "local" and others. Default is False (restricted). + +`allow_any_top_level_domain=False`: Turn off EmailUndeliverableError exception for top-level domains, which are not matching with regex **[A-Za-z]\Z**, such as "org123". May be useful for local services in isolated environments with special local TLD. Default is False (restricted). + +`allowed_top_level_domains=[]`: Similar with `allow_any_top_level_domain` but working like whitelist. Will be ignored, if `allow_any_top_level_domain=True` or if list is empty. Default is [] (no allowed *bad* domains). ### DNS timeout and cache When validating many email addresses or to control the timeout (the default is 15 seconds), create a caching [dns.resolver.Resolver](https://dnspython.readthedocs.io/en/latest/resolver-class.html) to reuse in each call. The `caching_resolver` function returns one easily for you: diff --git a/main.py b/main.py deleted file mode 100644 index ab4c5e9..0000000 --- a/main.py +++ /dev/null @@ -1,9 +0,0 @@ -from email_validator import validate_email - -email1 = 'ewferfwekuh@ekjfjeir1.com' -email2 = 'ewferfwekuh@ekjfjeir.ewrfref.mm71' -email3 = 'ewferfwekuh@ekjfjeir.local' - -v1 = validate_email(email1,check_deliverability=False) -v2 = validate_email(email2,check_deliverability=False,allow_any_top_level_domain=False,allowed_top_level_domains=['mm72','mm74']) -v3 = validate_email(email3,check_deliverability=False,allow_special_domains=True) \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..36d80d3 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +#Just for a right import from email_validator to the test_main.py on modern Python versions \ No newline at end of file From dace4e70c2e7dea3a402450c730947263fd33a26 Mon Sep 17 00:00:00 2001 From: northpowered Date: Tue, 16 Aug 2022 18:25:54 +0300 Subject: [PATCH 3/3] Linter fixes --- Makefile | 2 +- email_validator/__init__.py | 22 +++++++++++----------- tests/__init__.py | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 71f8600..b716cbe 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ install: .PHONY: lint lint: #python setup.py check -rms - flake8 --ignore=E501,E126,W503 email_validator tests + flake8 --ignore=E501,E126,E121,E125,W503 email_validator tests .PHONY: test test: diff --git a/email_validator/__init__.py b/email_validator/__init__.py index dc3336d..59b80fb 100644 --- a/email_validator/__init__.py +++ b/email_validator/__init__.py @@ -13,10 +13,10 @@ CHECK_DELIVERABILITY = True TEST_ENVIRONMENT = False DEFAULT_TIMEOUT = 15 # secs -#Soft validation options +# Soft validation options ALLOW_SPECIAL_DOMAINS = False ALLOW_ANY_TOP_LEVEL_DOMAIN = False -ALLOWED_TOP_LEVEL_DOMAINS = [] #type: ignore +ALLOWED_TOP_LEVEL_DOMAINS = [] # type: ignore # Based on RFC 2822 section 3.2.4 / RFC 5322 section 3.2.3, these # characters are permitted in email addresses (not taking into # account internationalization): @@ -271,7 +271,7 @@ def validate_email( dns_resolver=None, allow_special_domains=ALLOW_SPECIAL_DOMAINS, allow_any_top_level_domain=ALLOW_ANY_TOP_LEVEL_DOMAIN, - allowed_top_level_domains=ALLOWED_TOP_LEVEL_DOMAINS #type: ignore + allowed_top_level_domains=ALLOWED_TOP_LEVEL_DOMAINS # type: ignore ): """ Validates an email address, raising an EmailNotValidError if the address is not valid or returning a dict of @@ -312,7 +312,7 @@ def validate_email( allow_special_domains=allow_special_domains, allow_any_top_level_domain=allow_any_top_level_domain, allowed_top_level_domains=allowed_top_level_domains - ) + ) ret.domain = domain_part_info["domain"] ret.ascii_domain = domain_part_info["ascii_domain"] @@ -473,11 +473,11 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals def validate_email_domain_part( - domain, - test_environment=False, - allow_special_domains=ALLOW_SPECIAL_DOMAINS, - allow_any_top_level_domain=ALLOW_ANY_TOP_LEVEL_DOMAIN, - allowed_top_level_domains=ALLOWED_TOP_LEVEL_DOMAINS #type: ignore + domain, + test_environment=False, + allow_special_domains=ALLOW_SPECIAL_DOMAINS, + allow_any_top_level_domain=ALLOW_ANY_TOP_LEVEL_DOMAIN, + allowed_top_level_domains=ALLOWED_TOP_LEVEL_DOMAINS # type: ignore ): # Empty? if len(domain) == 0: @@ -580,9 +580,9 @@ def validate_email_domain_part( # We also know that all TLDs currently end with a letter, and # we'll consider that a non-DNS based deliverability check. if not allow_any_top_level_domain: - #We check len() not to slow validating if tld were not allowed + # We check len() not to slow validating if tld were not allowed if len(allowed_top_level_domains) > 0: - #We already trust the period of domain name and avoiding IndexError + # We already trust the period of domain name and avoiding IndexError tld = ascii_domain.split('.')[-1] if tld not in allowed_top_level_domains: raise EmailUndeliverableError( diff --git a/tests/__init__.py b/tests/__init__.py index 36d80d3..7664849 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1 +1 @@ -#Just for a right import from email_validator to the test_main.py on modern Python versions \ No newline at end of file +# Just for a right import from email_validator to the test_main.py on modern Python versions