Add new regex constants for email handling and update related tests

This commit is contained in:
Clemens Schwaighofer
2026-01-08 14:58:14 +09:00
parent d3f61005cf
commit f91e0bb93a
4 changed files with 476 additions and 15 deletions

View File

@@ -8,7 +8,10 @@ import re
import pytest
from corelibs.check_handling.regex_constants import (
compile_re,
SUB_EMAIL_BASIC_REGEX,
EMAIL_BASIC_REGEX,
NAME_EMAIL_SIMPLE_REGEX,
NAME_EMAIL_BASIC_REGEX,
DOMAIN_WITH_LOCALHOST_REGEX,
DOMAIN_WITH_LOCALHOST_PORT_REGEX,
DOMAIN_REGEX,
@@ -123,6 +126,265 @@ class TestEmailBasicRegex:
assert not email_pattern.match(email)
class TestSubEmailBasicRegex:
"""Test cases for SUB_EMAIL_BASIC_REGEX pattern (without anchors)."""
@pytest.fixture
def sub_email_pattern(self) -> re.Pattern[str]:
"""Fixture that returns compiled sub email regex pattern."""
return compile_re(rf"^{SUB_EMAIL_BASIC_REGEX}$")
@pytest.mark.parametrize("valid_email", [
"user@example.com",
"test.user@example.com",
"user+tag@example.co.uk",
"first.last@subdomain.example.com",
"user123@test-domain.com",
"a@example.com",
"user_name@example.com",
"user-name@example.com",
"user@sub.domain.example.com",
"test!#$%&'*+-/=?^_`{|}~@example.com",
"1234567890@example.com",
])
def test_valid_emails_match(self, sub_email_pattern: re.Pattern[str], valid_email: str) -> None:
"""Test that valid email addresses match SUB_EMAIL_BASIC_REGEX."""
assert sub_email_pattern.match(valid_email), (
f"Failed to match valid email: {valid_email}"
)
@pytest.mark.parametrize("invalid_email", [
"",
"@example.com",
"user@",
"user",
"user@.com",
"user@domain",
"user @example.com",
".user@example.com",
"user@-example.com",
"user@example-.com",
"user@example.c",
"user@example.toolong",
])
def test_invalid_emails_no_match(self, sub_email_pattern: re.Pattern[str], invalid_email: str) -> None:
"""Test that invalid emails don't match SUB_EMAIL_BASIC_REGEX."""
assert not sub_email_pattern.match(invalid_email), (
f"Incorrectly matched invalid email: {invalid_email}"
)
def test_sub_email_max_local_part_length(self, sub_email_pattern: re.Pattern[str]) -> None:
"""Test email with maximum local part length (64 characters)."""
local_part = "a" * 64
email = f"{local_part}@example.com"
assert sub_email_pattern.match(email)
def test_sub_email_exceeds_local_part_length(self, sub_email_pattern: re.Pattern[str]) -> None:
"""Test email exceeding maximum local part length."""
local_part = "a" * 65
email = f"{local_part}@example.com"
assert not sub_email_pattern.match(email)
class TestNameEmailSimpleRegex:
"""Test cases for NAME_EMAIL_SIMPLE_REGEX pattern."""
@pytest.fixture
def name_email_simple_pattern(self) -> re.Pattern[str]:
"""Fixture that returns compiled name+email simple regex pattern."""
return compile_re(NAME_EMAIL_SIMPLE_REGEX)
@pytest.mark.parametrize("test_input,expected_groups", [
('"John Doe" <john@example.com>', {'name1': 'John Doe', 'email1': 'john@example.com'}),
('John Doe <john@example.com>', {'name2': 'John Doe', 'email2': 'john@example.com'}),
('<john@example.com>', {'email3': 'john@example.com'}),
('john@example.com', {'email4': 'john@example.com'}),
(' "Jane Smith" <jane@test.com> ', {'name1': 'Jane Smith', 'email1': 'jane@test.com'}),
('Bob <bob@test.org>', {'name2': 'Bob', 'email2': 'bob@test.org'}),
])
def test_valid_name_email_combinations(
self, name_email_simple_pattern: re.Pattern[str], test_input: str, expected_groups: dict[str, str]
) -> None:
"""Test that valid name+email combinations match and extract correct groups."""
match = name_email_simple_pattern.match(test_input)
assert match is not None, f"Failed to match: {test_input}"
# Check that expected groups are present and match
for group_name, expected_value in expected_groups.items():
assert match.group(group_name) == expected_value, (
f"Group {group_name} expected '{expected_value}', got '{match.group(group_name)}'"
)
@pytest.mark.parametrize("invalid_input", [
"",
"not an email",
"<>",
'"Name Only"',
'Name <',
'<email',
'Name <<email@test.com>>',
'Name <email@test.com',
'Name email@test.com>',
])
def test_invalid_name_email_combinations(
self, name_email_simple_pattern: re.Pattern[str], invalid_input: str
) -> None:
"""Test that invalid inputs don't match NAME_EMAIL_SIMPLE_REGEX."""
assert not name_email_simple_pattern.match(invalid_input), (
f"Incorrectly matched invalid input: {invalid_input}"
)
def test_extract_name_from_quoted(
self, name_email_simple_pattern: re.Pattern[str]
) -> None:
"""Test extracting name from quoted format."""
match = name_email_simple_pattern.match('"Alice Wonder" <alice@example.com>')
assert match is not None
assert match.group('name1') == 'Alice Wonder'
assert match.group('email1') == 'alice@example.com'
def test_extract_name_from_unquoted(
self, name_email_simple_pattern: re.Pattern[str]
) -> None:
"""Test extracting name from unquoted format."""
match = name_email_simple_pattern.match('Bob Builder <bob@example.com>')
assert match is not None
assert match.group('name2') == 'Bob Builder'
assert match.group('email2') == 'bob@example.com'
def test_email_only_in_brackets(
self, name_email_simple_pattern: re.Pattern[str]
) -> None:
"""Test email-only format in angle brackets."""
match = name_email_simple_pattern.match('<charlie@example.com>')
assert match is not None
assert match.group('email3') == 'charlie@example.com'
def test_email_only_plain(
self, name_email_simple_pattern: re.Pattern[str]
) -> None:
"""Test plain email format without brackets."""
match = name_email_simple_pattern.match('dave@example.com')
assert match is not None
assert match.group('email4') == 'dave@example.com'
def test_whitespace_handling(
self, name_email_simple_pattern: re.Pattern[str]
) -> None:
"""Test that leading/trailing whitespace is handled correctly."""
match = name_email_simple_pattern.match(' "User Name" <user@example.com> ')
assert match is not None
assert match.group('name1') == 'User Name'
assert match.group('email1') == 'user@example.com'
class TestNameEmailBasicRegex:
"""Test cases for NAME_EMAIL_BASIC_REGEX pattern with strict email validation."""
@pytest.fixture
def name_email_basic_pattern(self) -> re.Pattern[str]:
"""Fixture that returns compiled name+email basic regex pattern."""
return compile_re(NAME_EMAIL_BASIC_REGEX)
@pytest.mark.parametrize("test_input,expected_name,expected_email", [
('"John Doe" <john@example.com>', 'John Doe', 'john@example.com'),
('John Doe <john@example.com>', 'John Doe', 'john@example.com'),
('<john@example.com>', None, 'john@example.com'),
('john@example.com', None, 'john@example.com'),
(' "Jane Smith" <jane.smith@test.co.uk> ', 'Jane Smith', 'jane.smith@test.co.uk'),
('Alice Wonder <alice+tag@example.com>', 'Alice Wonder', 'alice+tag@example.com'),
])
def test_valid_name_email_with_validation(
self,
name_email_basic_pattern: re.Pattern[str],
test_input: str,
expected_name: str | None,
expected_email: str,
) -> None:
"""Test valid name+email with strict email validation."""
match = name_email_basic_pattern.match(test_input)
assert match is not None, f"Failed to match: {test_input}"
# Extract name and email from whichever group matched
name = match.group('name1') or match.group('name2')
email = (
match.group('email1') or match.group('email2') or
match.group('email3') or match.group('email4')
)
assert name == expected_name, f"Expected name '{expected_name}', got '{name}'"
assert email == expected_email, f"Expected email '{expected_email}', got '{email}'"
@pytest.mark.parametrize("invalid_input", [
'"John Doe" <invalid.email>', # invalid email format
'John Doe <@example.com>', # missing local part
'<user@>', # missing domain
'user@domain', # no TLD
'"Name" <user @example.com>', # space in email
'<.user@example.com>', # starts with dot
'user@-example.com', # domain starts with hyphen
'Name <user@example.c>', # TLD too short
'Name <user@example.toolongdomain>', # TLD too long
])
def test_invalid_email_format_rejected(
self, name_email_basic_pattern: re.Pattern[str], invalid_input: str
) -> None:
"""Test that inputs with invalid email formats are rejected."""
assert not name_email_basic_pattern.match(invalid_input), (
f"Incorrectly matched invalid input: {invalid_input}"
)
def test_quoted_name_with_valid_email(
self, name_email_basic_pattern: re.Pattern[str]
) -> None:
"""Test quoted name format with valid email."""
match = name_email_basic_pattern.match('"Alice Wonder" <alice@example.com>')
assert match is not None
assert match.group('name1') == 'Alice Wonder'
assert match.group('email1') == 'alice@example.com'
def test_unquoted_name_with_valid_email(
self, name_email_basic_pattern: re.Pattern[str]
) -> None:
"""Test unquoted name format with valid email."""
match = name_email_basic_pattern.match('Bob Builder <bob@example.com>')
assert match is not None
assert match.group('name2') == 'Bob Builder'
assert match.group('email2') == 'bob@example.com'
def test_email_only_formats(
self, name_email_basic_pattern: re.Pattern[str]
) -> None:
"""Test email-only formats (with and without brackets)."""
# With brackets
match1 = name_email_basic_pattern.match('<charlie@example.com>')
assert match1 is not None
assert match1.group('email3') == 'charlie@example.com'
# Without brackets
match2 = name_email_basic_pattern.match('dave@example.com')
assert match2 is not None
assert match2.group('email4') == 'dave@example.com'
def test_whitespace_handling(
self, name_email_basic_pattern: re.Pattern[str]
) -> None:
"""Test that leading/trailing whitespace is handled correctly."""
match = name_email_basic_pattern.match(' "User" <user@example.com> ')
assert match is not None
assert match.group('name1') == 'User'
assert match.group('email1') == 'user@example.com'
def test_special_characters_in_local_part(
self, name_email_basic_pattern: re.Pattern[str]
) -> None:
"""Test email with special characters in local part."""
match = name_email_basic_pattern.match('Test User <test!#$%&\'*+-/=?^_`{|}~@example.com>')
assert match is not None
assert match.group('name2') == 'Test User'
assert match.group('email2') == 'test!#$%&\'*+-/=?^_`{|}~@example.com'
class TestDomainWithLocalhostRegex:
"""Test cases for DOMAIN_WITH_LOCALHOST_REGEX pattern."""