Move regex checks to corelibs_regex_checks module

This commit is contained in:
Clemens Schwaighofer
2026-02-02 14:56:07 +09:00
parent d098eb58f3
commit 28ab7c6f0c
5 changed files with 43 additions and 665 deletions

View File

@@ -8,6 +8,7 @@ requires-python = ">=3.13"
dependencies = [
"corelibs-datetime>=1.0.1",
"corelibs-enum-base>=1.0.0",
"corelibs-regex-checks>=1.0.0",
"corelibs-text-colors>=1.0.0",
"corelibs-var>=1.0.0",
"cryptography>=46.0.3",

View File

@@ -3,8 +3,20 @@ List of regex compiled strings that can be used
"""
import re
from warnings import warn, deprecated
from corelibs_regex_checks.regex_constants import (
compile_re as compile_re_ng,
SUB_EMAIL_BASIC_REGEX as SUB_EMAIL_BASIC_REGEX_NG,
EMAIL_BASIC_REGEX as EMAIL_BASIC_REGEX_NG,
NAME_EMAIL_SIMPLE_REGEX as NAME_EMAIL_SIMPLE_REGEX_NG,
NAME_EMAIL_BASIC_REGEX as NAME_EMAIL_BASIC_REGEX_NG,
DOMAIN_WITH_LOCALHOST_REGEX as DOMAIN_WITH_LOCALHOST_REGEX_NG,
DOMAIN_WITH_LOCALHOST_PORT_REGEX as DOMAIN_WITH_LOCALHOST_PORT_REGEX_NG,
DOMAIN_REGEX as DOMAIN_REGEX_NG
)
@deprecated("Use corelibs_regex_checks.regex_constants.compile_re instead")
def compile_re(reg: str) -> re.Pattern[str]:
"""
compile a regex with verbose flag
@@ -15,40 +27,25 @@ def compile_re(reg: str) -> re.Pattern[str]:
Returns:
re.Pattern[str] -- _description_
"""
return re.compile(reg, re.VERBOSE)
return compile_re_ng(reg)
# email regex
SUB_EMAIL_BASIC_REGEX: str = r"""
[A-Za-z0-9!#$%&'*+\-\/=?^_`{|}~][A-Za-z0-9!#$%:\(\)&'*+\-\/=?^_`{|}~\.]{0,63}
@(?!-)[A-Za-z0-9-]{1,63}(?<!-)(?:\.[A-Za-z0-9-]{1,63}(?<!-))*\.[a-zA-Z]{2,6}
"""
EMAIL_BASIC_REGEX = rf"^{SUB_EMAIL_BASIC_REGEX}$"
SUB_EMAIL_BASIC_REGEX = SUB_EMAIL_BASIC_REGEX_NG
EMAIL_BASIC_REGEX = EMAIL_BASIC_REGEX_NG
# name + email regex for email sending type like "foo bar" <email@mail.com>
NAME_EMAIL_SIMPLE_REGEX = r"""
^\s*(?:"(?P<name1>[^"]+)"\s*<(?P<email1>[^>]+)>|
(?P<name2>.+?)\s*<(?P<email2>[^>]+)>|
<(?P<email3>[^>]+)>|
(?P<email4>[^\s<>]+))\s*$
"""
NAME_EMAIL_SIMPLE_REGEX = NAME_EMAIL_SIMPLE_REGEX_NG
# name + email with the basic regex set
NAME_EMAIL_BASIC_REGEX = rf"""
^\s*(?:
"(?P<name1>[^"]+)"\s*<(?P<email1>{SUB_EMAIL_BASIC_REGEX})>|
(?P<name2>.+?)\s*<(?P<email2>{SUB_EMAIL_BASIC_REGEX})>|
<(?P<email3>{SUB_EMAIL_BASIC_REGEX})>|
(?P<email4>{SUB_EMAIL_BASIC_REGEX})
)\s*$
"""
NAME_EMAIL_BASIC_REGEX = NAME_EMAIL_BASIC_REGEX_NG
# Domain regex with localhost
DOMAIN_WITH_LOCALHOST_REGEX: str = r"""
^(?:localhost|(?!-)[A-Za-z0-9-]{1,63}(?<!-)(?:\.[A-Za-z0-9-]{1,63}(?<!-))*\.[A-Za-z]{2,})$
"""
DOMAIN_WITH_LOCALHOST_REGEX = DOMAIN_WITH_LOCALHOST_REGEX_NG
# domain regex with loclhost and optional port
DOMAIN_WITH_LOCALHOST_PORT_REGEX: str = r"""
^(?:localhost|(?!-)[A-Za-z0-9-]{1,63}(?<!-)(?:\.[A-Za-z0-9-]{1,63}(?<!-))*\.[A-Za-z]{2,})(?::\d+)?$
"""
DOMAIN_WITH_LOCALHOST_PORT_REGEX = DOMAIN_WITH_LOCALHOST_PORT_REGEX_NG
# Domain, no localhost
DOMAIN_REGEX: str = r"^(?!-)[A-Za-z0-9-]{1,63}(?<!-)(?:\.[A-Za-z0-9-]{1,63}(?<!-))*\.[A-Za-z]{2,}$"
DOMAIN_REGEX = DOMAIN_REGEX_NG
# At the module level, issue a deprecation warning
warn("Use corelibs_regex_checks.regex_constants instead", DeprecationWarning, stacklevel=2)
# __END__

View File

@@ -2,22 +2,26 @@
List of regex compiled strings that can be used
"""
from corelibs.check_handling.regex_constants import (
compile_re,
EMAIL_BASIC_REGEX,
NAME_EMAIL_SIMPLE_REGEX,
NAME_EMAIL_BASIC_REGEX,
DOMAIN_WITH_LOCALHOST_REGEX,
DOMAIN_WITH_LOCALHOST_PORT_REGEX,
DOMAIN_REGEX
import warnings
from corelibs_regex_checks.regex_constants_compiled import (
COMPILED_EMAIL_BASIC_REGEX as COMPILED_EMAIL_BASIC_REGEX_NG,
COMPILED_NAME_EMAIL_SIMPLE_REGEX as COMPILED_NAME_EMAIL_SIMPLE_REGEX_NG,
COMPILED_NAME_EMAIL_BASIC_REGEX as COMPILED_NAME_EMAIL_BASIC_REGEX_NG,
COMPILED_DOMAIN_WITH_LOCALHOST_REGEX as COMPILED_DOMAIN_WITH_LOCALHOST_REGEX_NG,
COMPILED_DOMAIN_WITH_LOCALHOST_PORT_REGEX as COMPILED_DOMAIN_WITH_LOCALHOST_PORT_REGEX_NG,
COMPILED_DOMAIN_REGEX as COMPILED_DOMAIN_REGEX_NG
)
# all above in compiled form
COMPILED_EMAIL_BASIC_REGEX = compile_re(EMAIL_BASIC_REGEX)
COMPILED_NAME_EMAIL_SIMPLE_REGEX = compile_re(NAME_EMAIL_SIMPLE_REGEX)
COMPILED_NAME_EMAIL_BASIC_REGEX = compile_re(NAME_EMAIL_BASIC_REGEX)
COMPILED_DOMAIN_WITH_LOCALHOST_REGEX = compile_re(DOMAIN_WITH_LOCALHOST_REGEX)
COMPILED_DOMAIN_WITH_LOCALHOST_PORT_REGEX = compile_re(DOMAIN_WITH_LOCALHOST_PORT_REGEX)
COMPILED_DOMAIN_REGEX = compile_re(DOMAIN_REGEX)
COMPILED_EMAIL_BASIC_REGEX = COMPILED_EMAIL_BASIC_REGEX_NG
COMPILED_NAME_EMAIL_SIMPLE_REGEX = COMPILED_NAME_EMAIL_SIMPLE_REGEX_NG
COMPILED_NAME_EMAIL_BASIC_REGEX = COMPILED_NAME_EMAIL_BASIC_REGEX_NG
COMPILED_DOMAIN_WITH_LOCALHOST_REGEX = COMPILED_DOMAIN_WITH_LOCALHOST_REGEX_NG
COMPILED_DOMAIN_WITH_LOCALHOST_PORT_REGEX = COMPILED_DOMAIN_WITH_LOCALHOST_PORT_REGEX_NG
COMPILED_DOMAIN_REGEX = COMPILED_DOMAIN_REGEX_NG
# At the module level, issue a deprecation warning
warnings.warn("Use corelibs_regex_checks.regex_constants_compiled instead", DeprecationWarning, stacklevel=2)
# __END__

View File

@@ -1 +0,0 @@
"""Unit tests for check_handling module."""

View File

@@ -1,623 +0,0 @@
"""
Unit tests for regex_constants module.
Tests all regex patterns defined in the check_handling.regex_constants module.
"""
import re
import pytest
from corelibs.check_handling.regex_constants import (
compile_re,
SUB_EMAIL_BASIC_REGEX,
EMAIL_BASIC_REGEX,
NAME_EMAIL_SIMPLE_REGEX,
NAME_EMAIL_BASIC_REGEX,
DOMAIN_WITH_LOCALHOST_REGEX,
DOMAIN_WITH_LOCALHOST_PORT_REGEX,
DOMAIN_REGEX
)
from corelibs.check_handling.regex_constants_compiled import (
COMPILED_EMAIL_BASIC_REGEX,
COMPILED_NAME_EMAIL_SIMPLE_REGEX,
COMPILED_NAME_EMAIL_BASIC_REGEX,
COMPILED_DOMAIN_WITH_LOCALHOST_REGEX,
COMPILED_DOMAIN_WITH_LOCALHOST_PORT_REGEX,
COMPILED_DOMAIN_REGEX,
)
class TestCompileRe:
"""Test cases for the compile_re function."""
def test_compile_re_returns_pattern(self) -> None:
"""Test that compile_re returns a compiled regex Pattern object."""
pattern = compile_re(r"test")
assert isinstance(pattern, re.Pattern)
def test_compile_re_with_verbose_flag(self) -> None:
"""Test that compile_re compiles with VERBOSE flag."""
# Verbose mode allows whitespace and comments in regex
verbose_regex = r"""
\d+ # digits
\s+ # whitespace
"""
pattern = compile_re(verbose_regex)
assert pattern.match("123 ")
assert not pattern.match("abc")
def test_compile_re_simple_pattern(self) -> None:
"""Test compile_re with a simple pattern."""
pattern = compile_re(r"^\d{3}$")
assert pattern.match("123")
assert not pattern.match("12")
assert not pattern.match("1234")
class TestEmailBasicRegex:
"""Test cases for EMAIL_BASIC_REGEX pattern."""
@pytest.fixture
def email_pattern(self) -> re.Pattern[str]:
"""Fixture that returns compiled email regex pattern."""
return COMPILED_EMAIL_BASIC_REGEX
@pytest.mark.parametrize("valid_email", [
"user@example.com",
"test.user@example.com",
"user+tag@example.co.uk",
"first.last@subdomain.example.com",
"user123@test-domain.com",
"a@example.com",
"user_name@example.com",
"user-name@example.com",
"user@sub.domain.example.com",
"test!#$%&'*+-/=?^_`{|}~@example.com",
"1234567890@example.com",
"user@example-domain.com",
"user@domain.co",
# Regex allows these (even if not strictly RFC compliant):
"user.@example.com", # ends with dot before @
"user..name@example.com", # consecutive dots in local part
])
def test_valid_emails(
self, email_pattern: re.Pattern[str], valid_email: str
) -> None:
"""Test that valid email addresses match the pattern."""
assert email_pattern.match(valid_email), (
f"Failed to match valid email: {valid_email}"
)
@pytest.mark.parametrize("invalid_email", [
"", # empty string
"@example.com", # missing local part
"user@", # missing domain
"user", # no @ symbol
"user@.com", # domain starts with dot
"user@domain", # no TLD
"user @example.com", # space in local part
"user@exam ple.com", # space in domain
".user@example.com", # starts with dot
"user@-example.com", # domain starts with hyphen
"user@example-.com", # domain part ends with hyphen
"user@example.c", # TLD too short (1 char)
"user@example.toolong", # TLD too long (>6 chars)
"user@@example.com", # double @
"user@example@com", # multiple @
"user@.example.com", # domain starts with dot
"user@example.com.", # ends with dot
"user@123.456.789.012", # numeric TLD not allowed
])
def test_invalid_emails(
self, email_pattern: re.Pattern[str], invalid_email: str
) -> None:
"""Test that invalid email addresses do not match the pattern."""
assert not email_pattern.match(invalid_email), (
f"Incorrectly matched invalid email: {invalid_email}"
)
def test_email_max_local_part_length(
self, email_pattern: re.Pattern[str]
) -> None:
"""Test email with maximum local part length (64 characters)."""
# Local part can be up to 64 chars (first char + 63 more)
local_part = "a" * 64
email = f"{local_part}@example.com"
assert email_pattern.match(email)
def test_email_exceeds_local_part_length(
self, email_pattern: re.Pattern[str]
) -> None:
"""Test email exceeding maximum local part length."""
# 65 characters should not match
local_part = "a" * 65
email = f"{local_part}@example.com"
assert not email_pattern.match(email)
class TestSubEmailBasicRegex:
"""Test cases for SUB_EMAIL_BASIC_REGEX pattern (without anchors)."""
@pytest.fixture
def sub_email_pattern(self) -> re.Pattern[str]:
"""Fixture that returns compiled sub email regex pattern."""
return compile_re(rf"^{SUB_EMAIL_BASIC_REGEX}$")
@pytest.mark.parametrize("valid_email", [
"user@example.com",
"test.user@example.com",
"user+tag@example.co.uk",
"first.last@subdomain.example.com",
"user123@test-domain.com",
"a@example.com",
"user_name@example.com",
"user-name@example.com",
"user@sub.domain.example.com",
"test!#$%&'*+-/=?^_`{|}~@example.com",
"1234567890@example.com",
])
def test_valid_emails_match(self, sub_email_pattern: re.Pattern[str], valid_email: str) -> None:
"""Test that valid email addresses match SUB_EMAIL_BASIC_REGEX."""
assert sub_email_pattern.match(valid_email), (
f"Failed to match valid email: {valid_email}"
)
@pytest.mark.parametrize("invalid_email", [
"",
"@example.com",
"user@",
"user",
"user@.com",
"user@domain",
"user @example.com",
".user@example.com",
"user@-example.com",
"user@example-.com",
"user@example.c",
"user@example.toolong",
])
def test_invalid_emails_no_match(self, sub_email_pattern: re.Pattern[str], invalid_email: str) -> None:
"""Test that invalid emails don't match SUB_EMAIL_BASIC_REGEX."""
assert not sub_email_pattern.match(invalid_email), (
f"Incorrectly matched invalid email: {invalid_email}"
)
def test_sub_email_max_local_part_length(self, sub_email_pattern: re.Pattern[str]) -> None:
"""Test email with maximum local part length (64 characters)."""
local_part = "a" * 64
email = f"{local_part}@example.com"
assert sub_email_pattern.match(email)
def test_sub_email_exceeds_local_part_length(self, sub_email_pattern: re.Pattern[str]) -> None:
"""Test email exceeding maximum local part length."""
local_part = "a" * 65
email = f"{local_part}@example.com"
assert not sub_email_pattern.match(email)
class TestNameEmailSimpleRegex:
"""Test cases for NAME_EMAIL_SIMPLE_REGEX pattern."""
@pytest.fixture
def name_email_simple_pattern(self) -> re.Pattern[str]:
"""Fixture that returns compiled name+email simple regex pattern."""
return COMPILED_NAME_EMAIL_SIMPLE_REGEX
@pytest.mark.parametrize("test_input,expected_groups", [
('"John Doe" <john@example.com>', {'name1': 'John Doe', 'email1': 'john@example.com'}),
('John Doe <john@example.com>', {'name2': 'John Doe', 'email2': 'john@example.com'}),
('<john@example.com>', {'email3': 'john@example.com'}),
('john@example.com', {'email4': 'john@example.com'}),
(' "Jane Smith" <jane@test.com> ', {'name1': 'Jane Smith', 'email1': 'jane@test.com'}),
('Bob <bob@test.org>', {'name2': 'Bob', 'email2': 'bob@test.org'}),
])
def test_valid_name_email_combinations(
self, name_email_simple_pattern: re.Pattern[str], test_input: str, expected_groups: dict[str, str]
) -> None:
"""Test that valid name+email combinations match and extract correct groups."""
match = name_email_simple_pattern.match(test_input)
assert match is not None, f"Failed to match: {test_input}"
# Check that expected groups are present and match
for group_name, expected_value in expected_groups.items():
assert match.group(group_name) == expected_value, (
f"Group {group_name} expected '{expected_value}', got '{match.group(group_name)}'"
)
@pytest.mark.parametrize("invalid_input", [
"",
"not an email",
"<>",
'"Name Only"',
'Name <',
'<email',
'Name <<email@test.com>>',
'Name <email@test.com',
'Name email@test.com>',
])
def test_invalid_name_email_combinations(
self, name_email_simple_pattern: re.Pattern[str], invalid_input: str
) -> None:
"""Test that invalid inputs don't match NAME_EMAIL_SIMPLE_REGEX."""
assert not name_email_simple_pattern.match(invalid_input), (
f"Incorrectly matched invalid input: {invalid_input}"
)
def test_extract_name_from_quoted(
self, name_email_simple_pattern: re.Pattern[str]
) -> None:
"""Test extracting name from quoted format."""
match = name_email_simple_pattern.match('"Alice Wonder" <alice@example.com>')
assert match is not None
assert match.group('name1') == 'Alice Wonder'
assert match.group('email1') == 'alice@example.com'
def test_extract_name_from_unquoted(
self, name_email_simple_pattern: re.Pattern[str]
) -> None:
"""Test extracting name from unquoted format."""
match = name_email_simple_pattern.match('Bob Builder <bob@example.com>')
assert match is not None
assert match.group('name2') == 'Bob Builder'
assert match.group('email2') == 'bob@example.com'
def test_email_only_in_brackets(
self, name_email_simple_pattern: re.Pattern[str]
) -> None:
"""Test email-only format in angle brackets."""
match = name_email_simple_pattern.match('<charlie@example.com>')
assert match is not None
assert match.group('email3') == 'charlie@example.com'
def test_email_only_plain(
self, name_email_simple_pattern: re.Pattern[str]
) -> None:
"""Test plain email format without brackets."""
match = name_email_simple_pattern.match('dave@example.com')
assert match is not None
assert match.group('email4') == 'dave@example.com'
def test_whitespace_handling(
self, name_email_simple_pattern: re.Pattern[str]
) -> None:
"""Test that leading/trailing whitespace is handled correctly."""
match = name_email_simple_pattern.match(' "User Name" <user@example.com> ')
assert match is not None
assert match.group('name1') == 'User Name'
assert match.group('email1') == 'user@example.com'
class TestNameEmailBasicRegex:
"""Test cases for NAME_EMAIL_BASIC_REGEX pattern with strict email validation."""
@pytest.fixture
def name_email_basic_pattern(self) -> re.Pattern[str]:
"""Fixture that returns compiled name+email basic regex pattern."""
return COMPILED_NAME_EMAIL_BASIC_REGEX
@pytest.mark.parametrize("test_input,expected_name,expected_email", [
('"John Doe" <john@example.com>', 'John Doe', 'john@example.com'),
('John Doe <john@example.com>', 'John Doe', 'john@example.com'),
('<john@example.com>', None, 'john@example.com'),
('john@example.com', None, 'john@example.com'),
(' "Jane Smith" <jane.smith@test.co.uk> ', 'Jane Smith', 'jane.smith@test.co.uk'),
('Alice Wonder <alice+tag@example.com>', 'Alice Wonder', 'alice+tag@example.com'),
])
def test_valid_name_email_with_validation(
self,
name_email_basic_pattern: re.Pattern[str],
test_input: str,
expected_name: str | None,
expected_email: str,
) -> None:
"""Test valid name+email with strict email validation."""
match = name_email_basic_pattern.match(test_input)
assert match is not None, f"Failed to match: {test_input}"
# Extract name and email from whichever group matched
name = match.group('name1') or match.group('name2')
email = (
match.group('email1') or match.group('email2') or
match.group('email3') or match.group('email4')
)
assert name == expected_name, f"Expected name '{expected_name}', got '{name}'"
assert email == expected_email, f"Expected email '{expected_email}', got '{email}'"
@pytest.mark.parametrize("invalid_input", [
'"John Doe" <invalid.email>', # invalid email format
'John Doe <@example.com>', # missing local part
'<user@>', # missing domain
'user@domain', # no TLD
'"Name" <user @example.com>', # space in email
'<.user@example.com>', # starts with dot
'user@-example.com', # domain starts with hyphen
'Name <user@example.c>', # TLD too short
'Name <user@example.toolongdomain>', # TLD too long
])
def test_invalid_email_format_rejected(
self, name_email_basic_pattern: re.Pattern[str], invalid_input: str
) -> None:
"""Test that inputs with invalid email formats are rejected."""
assert not name_email_basic_pattern.match(invalid_input), (
f"Incorrectly matched invalid input: {invalid_input}"
)
def test_quoted_name_with_valid_email(
self, name_email_basic_pattern: re.Pattern[str]
) -> None:
"""Test quoted name format with valid email."""
match = name_email_basic_pattern.match('"Alice Wonder" <alice@example.com>')
assert match is not None
assert match.group('name1') == 'Alice Wonder'
assert match.group('email1') == 'alice@example.com'
def test_unquoted_name_with_valid_email(
self, name_email_basic_pattern: re.Pattern[str]
) -> None:
"""Test unquoted name format with valid email."""
match = name_email_basic_pattern.match('Bob Builder <bob@example.com>')
assert match is not None
assert match.group('name2') == 'Bob Builder'
assert match.group('email2') == 'bob@example.com'
def test_email_only_formats(
self, name_email_basic_pattern: re.Pattern[str]
) -> None:
"""Test email-only formats (with and without brackets)."""
# With brackets
match1 = name_email_basic_pattern.match('<charlie@example.com>')
assert match1 is not None
assert match1.group('email3') == 'charlie@example.com'
# Without brackets
match2 = name_email_basic_pattern.match('dave@example.com')
assert match2 is not None
assert match2.group('email4') == 'dave@example.com'
def test_whitespace_handling(
self, name_email_basic_pattern: re.Pattern[str]
) -> None:
"""Test that leading/trailing whitespace is handled correctly."""
match = name_email_basic_pattern.match(' "User" <user@example.com> ')
assert match is not None
assert match.group('name1') == 'User'
assert match.group('email1') == 'user@example.com'
def test_special_characters_in_local_part(
self, name_email_basic_pattern: re.Pattern[str]
) -> None:
"""Test email with special characters in local part."""
match = name_email_basic_pattern.match('Test User <test!#$%&\'*+-/=?^_`{|}~@example.com>')
assert match is not None
assert match.group('name2') == 'Test User'
assert match.group('email2') == 'test!#$%&\'*+-/=?^_`{|}~@example.com'
class TestDomainWithLocalhostRegex:
"""Test cases for DOMAIN_WITH_LOCALHOST_REGEX pattern."""
@pytest.fixture
def domain_localhost_pattern(self) -> re.Pattern[str]:
"""Fixture that returns compiled domain with localhost regex pattern."""
return COMPILED_DOMAIN_WITH_LOCALHOST_REGEX
@pytest.mark.parametrize("valid_domain", [
"localhost",
"example.com",
"subdomain.example.com",
"sub.domain.example.com",
"test-domain.com",
"example.co.uk",
"a.com",
"test123.example.com",
"my-site.example.org",
"multi.level.subdomain.example.com",
])
def test_valid_domains(
self, domain_localhost_pattern: re.Pattern[str], valid_domain: str
) -> None:
"""Test that valid domains (including localhost) match the pattern."""
assert domain_localhost_pattern.match(valid_domain), (
f"Failed to match valid domain: {valid_domain}"
)
@pytest.mark.parametrize("invalid_domain", [
"", # empty string
"example", # no TLD
"-example.com", # starts with hyphen
"example-.com", # ends with hyphen
".example.com", # starts with dot
"example.com.", # ends with dot
"example..com", # consecutive dots
"exam ple.com", # space in domain
"example.c", # TLD too short
"localhost:8080", # port not allowed in this pattern
"example.com:8080", # port not allowed in this pattern
"@example.com", # invalid character
"example@com", # invalid character
])
def test_invalid_domains(
self, domain_localhost_pattern: re.Pattern[str], invalid_domain: str
) -> None:
"""Test that invalid domains do not match the pattern."""
assert not domain_localhost_pattern.match(invalid_domain), (
f"Incorrectly matched invalid domain: {invalid_domain}"
)
class TestDomainWithLocalhostPortRegex:
"""Test cases for DOMAIN_WITH_LOCALHOST_PORT_REGEX pattern."""
@pytest.fixture
def domain_localhost_port_pattern(self) -> re.Pattern[str]:
"""Fixture that returns compiled domain and localhost with port pattern."""
return COMPILED_DOMAIN_WITH_LOCALHOST_PORT_REGEX
@pytest.mark.parametrize("valid_domain", [
"localhost",
"localhost:8080",
"localhost:3000",
"localhost:80",
"localhost:443",
"localhost:65535",
"example.com",
"example.com:8080",
"subdomain.example.com:3000",
"test-domain.com:443",
"example.co.uk",
"example.co.uk:8000",
"a.com:1",
"multi.level.subdomain.example.com:9999",
])
def test_valid_domains_with_port(
self, domain_localhost_port_pattern: re.Pattern[str], valid_domain: str
) -> None:
"""Test that valid domains with optional ports match the pattern."""
assert domain_localhost_port_pattern.match(valid_domain), (
f"Failed to match valid domain: {valid_domain}"
)
@pytest.mark.parametrize("invalid_domain", [
"", # empty string
"example", # no TLD
"-example.com", # starts with hyphen
"example-.com", # ends with hyphen
".example.com", # starts with dot
"example.com.", # ends with dot
"localhost:", # port without number
"example.com:", # port without number
"example.com:abc", # non-numeric port
"example.com: 8080", # space before port
"example.com:80 80", # space in port
"exam ple.com", # space in domain
"localhost :8080", # space before colon
])
def test_invalid_domains_with_port(
self,
domain_localhost_port_pattern: re.Pattern[str],
invalid_domain: str,
) -> None:
"""Test that invalid domains do not match the pattern."""
assert not domain_localhost_port_pattern.match(invalid_domain), (
f"Incorrectly matched invalid domain: {invalid_domain}"
)
def test_large_port_number(
self, domain_localhost_port_pattern: re.Pattern[str]
) -> None:
"""Test domain with large port numbers."""
assert domain_localhost_port_pattern.match("example.com:65535")
# Regex doesn't validate port range
assert domain_localhost_port_pattern.match("example.com:99999")
class TestDomainRegex:
"""Test cases for DOMAIN_REGEX pattern (no localhost)."""
@pytest.fixture
def domain_pattern(self) -> re.Pattern[str]:
"""Fixture that returns compiled domain regex pattern."""
return COMPILED_DOMAIN_REGEX
@pytest.mark.parametrize("valid_domain", [
"example.com",
"subdomain.example.com",
"sub.domain.example.com",
"test-domain.com",
"example.co.uk",
"a.com",
"test123.example.com",
"my-site.example.org",
"multi.level.subdomain.example.com",
"example.co",
])
def test_valid_domains_no_localhost(
self, domain_pattern: re.Pattern[str], valid_domain: str
) -> None:
"""Test that valid domains match the pattern."""
assert domain_pattern.match(valid_domain), (
f"Failed to match valid domain: {valid_domain}"
)
@pytest.mark.parametrize("invalid_domain", [
"", # empty string
"localhost", # localhost not allowed
"example", # no TLD
"-example.com", # starts with hyphen
"example-.com", # ends with hyphen
".example.com", # starts with dot
"example.com.", # ends with dot
"example..com", # consecutive dots
"exam ple.com", # space in domain
"example.c", # TLD too short
"example.com:8080", # port not allowed
"@example.com", # invalid character
"example@com", # invalid character
])
def test_invalid_domains_no_localhost(
self, domain_pattern: re.Pattern[str], invalid_domain: str
) -> None:
"""Test that invalid domains do not match the pattern."""
assert not domain_pattern.match(invalid_domain), (
f"Incorrectly matched invalid domain: {invalid_domain}"
)
def test_localhost_not_allowed(
self, domain_pattern: re.Pattern[str]
) -> None:
"""Test that localhost is explicitly not allowed in DOMAIN_REGEX."""
assert not domain_pattern.match("localhost")
class TestRegexPatternConsistency:
"""Test cases for consistency across regex patterns."""
def test_all_patterns_compile(self) -> None:
"""Test that all regex patterns can be compiled without errors."""
patterns = [
EMAIL_BASIC_REGEX,
NAME_EMAIL_SIMPLE_REGEX,
NAME_EMAIL_BASIC_REGEX,
DOMAIN_WITH_LOCALHOST_REGEX,
DOMAIN_WITH_LOCALHOST_PORT_REGEX,
DOMAIN_REGEX,
]
for pattern in patterns:
compiled = compile_re(pattern)
assert isinstance(compiled, re.Pattern)
def test_compiled_patterns_are_patterns(self) -> None:
"""Test that all COMPILED_ constants are Pattern objects."""
compiled_patterns = [
COMPILED_EMAIL_BASIC_REGEX,
COMPILED_NAME_EMAIL_SIMPLE_REGEX,
COMPILED_NAME_EMAIL_BASIC_REGEX,
COMPILED_DOMAIN_WITH_LOCALHOST_REGEX,
COMPILED_DOMAIN_WITH_LOCALHOST_PORT_REGEX,
COMPILED_DOMAIN_REGEX,
]
for pattern in compiled_patterns:
assert isinstance(pattern, re.Pattern)
def test_domain_patterns_are_strings(self) -> None:
"""Test that all regex constants are strings."""
assert isinstance(EMAIL_BASIC_REGEX, str)
assert isinstance(NAME_EMAIL_SIMPLE_REGEX, str)
assert isinstance(NAME_EMAIL_BASIC_REGEX, str)
assert isinstance(DOMAIN_WITH_LOCALHOST_REGEX, str)
assert isinstance(DOMAIN_WITH_LOCALHOST_PORT_REGEX, str)
assert isinstance(DOMAIN_REGEX, str)
def test_domain_patterns_hierarchy(self) -> None:
"""Test that domain patterns follow expected hierarchy."""
# DOMAIN_WITH_LOCALHOST_PORT_REGEX should accept everything
# DOMAIN_WITH_LOCALHOST_REGEX accepts
domain_localhost = COMPILED_DOMAIN_WITH_LOCALHOST_REGEX
domain_localhost_port = COMPILED_DOMAIN_WITH_LOCALHOST_PORT_REGEX
test_cases = ["example.com", "subdomain.example.com", "localhost"]
for test_case in test_cases:
if domain_localhost.match(test_case):
assert domain_localhost_port.match(test_case), (
f"{test_case} should match both patterns"
)