Add new regex constants for email handling and update related tests

This commit is contained in:
Clemens Schwaighofer
2026-01-08 14:58:14 +09:00
parent d3f61005cf
commit f91e0bb93a
4 changed files with 476 additions and 15 deletions

View File

@@ -19,9 +19,26 @@ def compile_re(reg: str) -> re.Pattern[str]:
# email regex # email regex
EMAIL_BASIC_REGEX: str = r""" SUB_EMAIL_BASIC_REGEX: str = r"""
^[A-Za-z0-9!#$%&'*+\-\/=?^_`{|}~][A-Za-z0-9!#$%:\(\)&'*+\-\/=?^_`{|}~\.]{0,63} [A-Za-z0-9!#$%&'*+\-\/=?^_`{|}~][A-Za-z0-9!#$%:\(\)&'*+\-\/=?^_`{|}~\.]{0,63}
@(?!-)[A-Za-z0-9-]{1,63}(?<!-)(?:\.[A-Za-z0-9-]{1,63}(?<!-))*\.[a-zA-Z]{2,6}$ @(?!-)[A-Za-z0-9-]{1,63}(?<!-)(?:\.[A-Za-z0-9-]{1,63}(?<!-))*\.[a-zA-Z]{2,6}
"""
EMAIL_BASIC_REGEX = rf"^{SUB_EMAIL_BASIC_REGEX}$"
# name + email regex for email sending type like "foo bar" <email@mail.com>
NAME_EMAIL_SIMPLE_REGEX = r"""
^\s*(?:"(?P<name1>[^"]+)"\s*<(?P<email1>[^>]+)>|
(?P<name2>.+?)\s*<(?P<email2>[^>]+)>|
<(?P<email3>[^>]+)>|
(?P<email4>[^\s<>]+))\s*$
"""
# name + email with the basic regex set
NAME_EMAIL_BASIC_REGEX = rf"""
^\s*(?:
"(?P<name1>[^"]+)"\s*<(?P<email1>{SUB_EMAIL_BASIC_REGEX})>|
(?P<name2>.+?)\s*<(?P<email2>{SUB_EMAIL_BASIC_REGEX})>|
<(?P<email3>{SUB_EMAIL_BASIC_REGEX})>|
(?P<email4>{SUB_EMAIL_BASIC_REGEX})
)\s*$
""" """
# Domain regex with localhost # Domain regex with localhost
DOMAIN_WITH_LOCALHOST_REGEX: str = r""" DOMAIN_WITH_LOCALHOST_REGEX: str = r"""

View File

@@ -2,14 +2,24 @@
Test check andling for regex checks Test check andling for regex checks
""" """
import re from corelibs_text_colors.text_colors import Colors
from corelibs.check_handling.regex_constants import DOMAIN_WITH_LOCALHOST_REGEX from corelibs.check_handling.regex_constants import (
compile_re, DOMAIN_WITH_LOCALHOST_REGEX, EMAIL_BASIC_REGEX, NAME_EMAIL_BASIC_REGEX, SUB_EMAIL_BASIC_REGEX
)
NAME_EMAIL_SIMPLE_REGEX = r"""
^\s*(?:"(?P<name1>[^"]+)"\s*<(?P<email1>[^>]+)>|
(?P<name2>.+?)\s*<(?P<email2>[^>]+)>|
<(?P<email3>[^>]+)>|
(?P<email4>[^\s<>]+))\s*$
"""
def main(): def domain_test():
""" """
Test regex checks domain regex test
""" """
print("=" * 30)
test_domains = [ test_domains = [
"example.com", "example.com",
"localhost", "localhost",
@@ -18,7 +28,7 @@ def main():
"some-domain.org" "some-domain.org"
] ]
regex_domain_check = re.compile(DOMAIN_WITH_LOCALHOST_REGEX) regex_domain_check = compile_re(DOMAIN_WITH_LOCALHOST_REGEX)
print(f"REGEX: {DOMAIN_WITH_LOCALHOST_REGEX}") print(f"REGEX: {DOMAIN_WITH_LOCALHOST_REGEX}")
print(f"Check regex: {regex_domain_check.search('localhost')}") print(f"Check regex: {regex_domain_check.search('localhost')}")
@@ -29,6 +39,61 @@ def main():
print(f"Did not match: {domain}") print(f"Did not match: {domain}")
def email_test():
"""
email regex test
"""
print("=" * 30)
email_list = """
e@bar.com
<f@foobar.com>
"Master" <foobar@bar.com>
"not valid" not@valid.com
also not valid not@valid.com
some header <something@bar.com>
test master <master@master.com>
日本語 <japan@jp.net>
"ひほん カケ苦" <foo@bar.com>
single@entry.com
arsch@popsch.com
test open <open@open.com>
"""
basic_email = compile_re(EMAIL_BASIC_REGEX)
sub_basic_email = compile_re(SUB_EMAIL_BASIC_REGEX)
simple_name_email_regex = compile_re(NAME_EMAIL_SIMPLE_REGEX)
full_name_email_regex = compile_re(NAME_EMAIL_BASIC_REGEX)
for email in email_list.splitlines():
email = email.strip()
if not email:
continue
print(f">>> Testing: {email}")
if not basic_email.match(email):
print(f"{Colors.red}[EMAIL ] No match: {email}{Colors.reset}")
else:
print(f"{Colors.green}[EMAIL ] Matched : {email}{Colors.reset}")
if not sub_basic_email.match(email):
print(f"{Colors.red}[SUB ] No match: {email}{Colors.reset}")
else:
print(f"{Colors.green}[SUB ] Matched : {email}{Colors.reset}")
if not simple_name_email_regex.match(email):
print(f"{Colors.red}[SIMPLE] No match: {email}{Colors.reset}")
else:
print(f"{Colors.green}[SIMPLE] Matched : {email}{Colors.reset}")
if not full_name_email_regex.match(email):
print(f"{Colors.red}[FULL ] No match: {email}{Colors.reset}")
else:
print(f"{Colors.green}[FULL ] Matched : {email}{Colors.reset}")
def main():
"""
Test regex checks
"""
domain_test()
email_test()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@@ -24,12 +24,19 @@ def main() -> None:
"lookup_value_c": "B02", "lookup_value_c": "B02",
"replace_value": "R02", "replace_value": "R02",
}, },
{
"lookup_value_p": "A03",
"lookup_value_c": "B03",
"replace_value": "R03",
},
] ]
test_foo = ArraySearchList( test_foo = ArraySearchList(
key = "lookup_value_p", key="lookup_value_p",
value = "A01" value="A01"
) )
print(test_foo) result = find_in_array_from_list(data, [test_foo])
print(f"Search A: {dump_data(test_foo)} -> {dump_data(result)}")
search: list[ArraySearchList] = [ search: list[ArraySearchList] = [
{ {
"key": "lookup_value_p", "key": "lookup_value_p",
@@ -38,12 +45,122 @@ def main() -> None:
{ {
"key": "lookup_value_c", "key": "lookup_value_c",
"value": "B01" "value": "B01"
},
]
result = find_in_array_from_list(data, search)
print(f"Search B: {dump_data(search)} -> {dump_data(result)}")
search: list[ArraySearchList] = [
{
"key": "lookup_value_p",
"value": "A01"
},
{
"key": "lookup_value_c",
"value": "B01"
},
{
"key": "lookup_value_c",
"value": "B02"
},
]
try:
result = find_in_array_from_list(data, search)
print(f"Search C: {dump_data(search)} -> {dump_data(result)}")
except KeyError as e:
print(f"Search C raised KeyError: {e}")
search: list[ArraySearchList] = [
{
"key": "lookup_value_p",
"value": "A01"
},
{
"key": "lookup_value_c",
"value": ["B01", "B02"]
},
]
try:
result = find_in_array_from_list(data, search)
print(f"Search D: {dump_data(search)} -> {dump_data(result)}")
except KeyError as e:
print(f"Search D raised KeyError: {e}")
search: list[ArraySearchList] = [
{
"key": "lookup_value_p",
"value": ["A01", "A03"]
},
{
"key": "lookup_value_c",
"value": ["B01", "B02"]
},
]
try:
result = find_in_array_from_list(data, search)
print(f"Search E: {dump_data(search)} -> {dump_data(result)}")
except KeyError as e:
print(f"Search E raised KeyError: {e}")
search: list[ArraySearchList] = [
{
"key": "lookup_value_p",
"value": "NOT FOUND"
},
]
try:
result = find_in_array_from_list(data, search)
print(f"Search F: {dump_data(search)} -> {dump_data(result)}")
except KeyError as e:
print(f"Search F raised KeyError: {e}")
data = [
{
"sd_user_id": "1593",
"email": "",
"employee_id": ""
},
{
"sd_user_id": "1592",
"email": "",
"employee_id": ""
},
{
"sd_user_id": "1596",
"email": "",
"employee_id": ""
},
{
"sd_user_id": "1594",
"email": "",
"employee_id": ""
},
{
"sd_user_id": "1595",
"email": "",
"employee_id": ""
},
{
"sd_user_id": "1861",
"email": "",
"employee_id": ""
},
{
"sd_user_id": "1862",
"email": "",
"employee_id": ""
},
{
"sd_user_id": "1860",
"email": "",
"employee_id": ""
} }
] ]
result = find_in_array_from_list(data, [ArraySearchList(
result = find_in_array_from_list(data, search) key="sd_user_id",
value="1593"
print(f"Search {dump_data(search)} -> {dump_data(result)}") )])
print(f"Search F: -> {dump_data(result)}")
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -8,7 +8,10 @@ import re
import pytest import pytest
from corelibs.check_handling.regex_constants import ( from corelibs.check_handling.regex_constants import (
compile_re, compile_re,
SUB_EMAIL_BASIC_REGEX,
EMAIL_BASIC_REGEX, EMAIL_BASIC_REGEX,
NAME_EMAIL_SIMPLE_REGEX,
NAME_EMAIL_BASIC_REGEX,
DOMAIN_WITH_LOCALHOST_REGEX, DOMAIN_WITH_LOCALHOST_REGEX,
DOMAIN_WITH_LOCALHOST_PORT_REGEX, DOMAIN_WITH_LOCALHOST_PORT_REGEX,
DOMAIN_REGEX, DOMAIN_REGEX,
@@ -123,6 +126,265 @@ class TestEmailBasicRegex:
assert not email_pattern.match(email) assert not email_pattern.match(email)
class TestSubEmailBasicRegex:
"""Test cases for SUB_EMAIL_BASIC_REGEX pattern (without anchors)."""
@pytest.fixture
def sub_email_pattern(self) -> re.Pattern[str]:
"""Fixture that returns compiled sub email regex pattern."""
return compile_re(rf"^{SUB_EMAIL_BASIC_REGEX}$")
@pytest.mark.parametrize("valid_email", [
"user@example.com",
"test.user@example.com",
"user+tag@example.co.uk",
"first.last@subdomain.example.com",
"user123@test-domain.com",
"a@example.com",
"user_name@example.com",
"user-name@example.com",
"user@sub.domain.example.com",
"test!#$%&'*+-/=?^_`{|}~@example.com",
"1234567890@example.com",
])
def test_valid_emails_match(self, sub_email_pattern: re.Pattern[str], valid_email: str) -> None:
"""Test that valid email addresses match SUB_EMAIL_BASIC_REGEX."""
assert sub_email_pattern.match(valid_email), (
f"Failed to match valid email: {valid_email}"
)
@pytest.mark.parametrize("invalid_email", [
"",
"@example.com",
"user@",
"user",
"user@.com",
"user@domain",
"user @example.com",
".user@example.com",
"user@-example.com",
"user@example-.com",
"user@example.c",
"user@example.toolong",
])
def test_invalid_emails_no_match(self, sub_email_pattern: re.Pattern[str], invalid_email: str) -> None:
"""Test that invalid emails don't match SUB_EMAIL_BASIC_REGEX."""
assert not sub_email_pattern.match(invalid_email), (
f"Incorrectly matched invalid email: {invalid_email}"
)
def test_sub_email_max_local_part_length(self, sub_email_pattern: re.Pattern[str]) -> None:
"""Test email with maximum local part length (64 characters)."""
local_part = "a" * 64
email = f"{local_part}@example.com"
assert sub_email_pattern.match(email)
def test_sub_email_exceeds_local_part_length(self, sub_email_pattern: re.Pattern[str]) -> None:
"""Test email exceeding maximum local part length."""
local_part = "a" * 65
email = f"{local_part}@example.com"
assert not sub_email_pattern.match(email)
class TestNameEmailSimpleRegex:
"""Test cases for NAME_EMAIL_SIMPLE_REGEX pattern."""
@pytest.fixture
def name_email_simple_pattern(self) -> re.Pattern[str]:
"""Fixture that returns compiled name+email simple regex pattern."""
return compile_re(NAME_EMAIL_SIMPLE_REGEX)
@pytest.mark.parametrize("test_input,expected_groups", [
('"John Doe" <john@example.com>', {'name1': 'John Doe', 'email1': 'john@example.com'}),
('John Doe <john@example.com>', {'name2': 'John Doe', 'email2': 'john@example.com'}),
('<john@example.com>', {'email3': 'john@example.com'}),
('john@example.com', {'email4': 'john@example.com'}),
(' "Jane Smith" <jane@test.com> ', {'name1': 'Jane Smith', 'email1': 'jane@test.com'}),
('Bob <bob@test.org>', {'name2': 'Bob', 'email2': 'bob@test.org'}),
])
def test_valid_name_email_combinations(
self, name_email_simple_pattern: re.Pattern[str], test_input: str, expected_groups: dict[str, str]
) -> None:
"""Test that valid name+email combinations match and extract correct groups."""
match = name_email_simple_pattern.match(test_input)
assert match is not None, f"Failed to match: {test_input}"
# Check that expected groups are present and match
for group_name, expected_value in expected_groups.items():
assert match.group(group_name) == expected_value, (
f"Group {group_name} expected '{expected_value}', got '{match.group(group_name)}'"
)
@pytest.mark.parametrize("invalid_input", [
"",
"not an email",
"<>",
'"Name Only"',
'Name <',
'<email',
'Name <<email@test.com>>',
'Name <email@test.com',
'Name email@test.com>',
])
def test_invalid_name_email_combinations(
self, name_email_simple_pattern: re.Pattern[str], invalid_input: str
) -> None:
"""Test that invalid inputs don't match NAME_EMAIL_SIMPLE_REGEX."""
assert not name_email_simple_pattern.match(invalid_input), (
f"Incorrectly matched invalid input: {invalid_input}"
)
def test_extract_name_from_quoted(
self, name_email_simple_pattern: re.Pattern[str]
) -> None:
"""Test extracting name from quoted format."""
match = name_email_simple_pattern.match('"Alice Wonder" <alice@example.com>')
assert match is not None
assert match.group('name1') == 'Alice Wonder'
assert match.group('email1') == 'alice@example.com'
def test_extract_name_from_unquoted(
self, name_email_simple_pattern: re.Pattern[str]
) -> None:
"""Test extracting name from unquoted format."""
match = name_email_simple_pattern.match('Bob Builder <bob@example.com>')
assert match is not None
assert match.group('name2') == 'Bob Builder'
assert match.group('email2') == 'bob@example.com'
def test_email_only_in_brackets(
self, name_email_simple_pattern: re.Pattern[str]
) -> None:
"""Test email-only format in angle brackets."""
match = name_email_simple_pattern.match('<charlie@example.com>')
assert match is not None
assert match.group('email3') == 'charlie@example.com'
def test_email_only_plain(
self, name_email_simple_pattern: re.Pattern[str]
) -> None:
"""Test plain email format without brackets."""
match = name_email_simple_pattern.match('dave@example.com')
assert match is not None
assert match.group('email4') == 'dave@example.com'
def test_whitespace_handling(
self, name_email_simple_pattern: re.Pattern[str]
) -> None:
"""Test that leading/trailing whitespace is handled correctly."""
match = name_email_simple_pattern.match(' "User Name" <user@example.com> ')
assert match is not None
assert match.group('name1') == 'User Name'
assert match.group('email1') == 'user@example.com'
class TestNameEmailBasicRegex:
"""Test cases for NAME_EMAIL_BASIC_REGEX pattern with strict email validation."""
@pytest.fixture
def name_email_basic_pattern(self) -> re.Pattern[str]:
"""Fixture that returns compiled name+email basic regex pattern."""
return compile_re(NAME_EMAIL_BASIC_REGEX)
@pytest.mark.parametrize("test_input,expected_name,expected_email", [
('"John Doe" <john@example.com>', 'John Doe', 'john@example.com'),
('John Doe <john@example.com>', 'John Doe', 'john@example.com'),
('<john@example.com>', None, 'john@example.com'),
('john@example.com', None, 'john@example.com'),
(' "Jane Smith" <jane.smith@test.co.uk> ', 'Jane Smith', 'jane.smith@test.co.uk'),
('Alice Wonder <alice+tag@example.com>', 'Alice Wonder', 'alice+tag@example.com'),
])
def test_valid_name_email_with_validation(
self,
name_email_basic_pattern: re.Pattern[str],
test_input: str,
expected_name: str | None,
expected_email: str,
) -> None:
"""Test valid name+email with strict email validation."""
match = name_email_basic_pattern.match(test_input)
assert match is not None, f"Failed to match: {test_input}"
# Extract name and email from whichever group matched
name = match.group('name1') or match.group('name2')
email = (
match.group('email1') or match.group('email2') or
match.group('email3') or match.group('email4')
)
assert name == expected_name, f"Expected name '{expected_name}', got '{name}'"
assert email == expected_email, f"Expected email '{expected_email}', got '{email}'"
@pytest.mark.parametrize("invalid_input", [
'"John Doe" <invalid.email>', # invalid email format
'John Doe <@example.com>', # missing local part
'<user@>', # missing domain
'user@domain', # no TLD
'"Name" <user @example.com>', # space in email
'<.user@example.com>', # starts with dot
'user@-example.com', # domain starts with hyphen
'Name <user@example.c>', # TLD too short
'Name <user@example.toolongdomain>', # TLD too long
])
def test_invalid_email_format_rejected(
self, name_email_basic_pattern: re.Pattern[str], invalid_input: str
) -> None:
"""Test that inputs with invalid email formats are rejected."""
assert not name_email_basic_pattern.match(invalid_input), (
f"Incorrectly matched invalid input: {invalid_input}"
)
def test_quoted_name_with_valid_email(
self, name_email_basic_pattern: re.Pattern[str]
) -> None:
"""Test quoted name format with valid email."""
match = name_email_basic_pattern.match('"Alice Wonder" <alice@example.com>')
assert match is not None
assert match.group('name1') == 'Alice Wonder'
assert match.group('email1') == 'alice@example.com'
def test_unquoted_name_with_valid_email(
self, name_email_basic_pattern: re.Pattern[str]
) -> None:
"""Test unquoted name format with valid email."""
match = name_email_basic_pattern.match('Bob Builder <bob@example.com>')
assert match is not None
assert match.group('name2') == 'Bob Builder'
assert match.group('email2') == 'bob@example.com'
def test_email_only_formats(
self, name_email_basic_pattern: re.Pattern[str]
) -> None:
"""Test email-only formats (with and without brackets)."""
# With brackets
match1 = name_email_basic_pattern.match('<charlie@example.com>')
assert match1 is not None
assert match1.group('email3') == 'charlie@example.com'
# Without brackets
match2 = name_email_basic_pattern.match('dave@example.com')
assert match2 is not None
assert match2.group('email4') == 'dave@example.com'
def test_whitespace_handling(
self, name_email_basic_pattern: re.Pattern[str]
) -> None:
"""Test that leading/trailing whitespace is handled correctly."""
match = name_email_basic_pattern.match(' "User" <user@example.com> ')
assert match is not None
assert match.group('name1') == 'User'
assert match.group('email1') == 'user@example.com'
def test_special_characters_in_local_part(
self, name_email_basic_pattern: re.Pattern[str]
) -> None:
"""Test email with special characters in local part."""
match = name_email_basic_pattern.match('Test User <test!#$%&\'*+-/=?^_`{|}~@example.com>')
assert match is not None
assert match.group('name2') == 'Test User'
assert match.group('email2') == 'test!#$%&\'*+-/=?^_`{|}~@example.com'
class TestDomainWithLocalhostRegex: class TestDomainWithLocalhostRegex:
"""Test cases for DOMAIN_WITH_LOCALHOST_REGEX pattern.""" """Test cases for DOMAIN_WITH_LOCALHOST_REGEX pattern."""