From f91e0bb93aed288ea8e71e8b7cb45d6b6bee63bc Mon Sep 17 00:00:00 2001 From: Clemens Schwaighofer Date: Thu, 8 Jan 2026 14:58:14 +0900 Subject: [PATCH] Add new regex constants for email handling and update related tests --- .../check_handling/regex_constants.py | 23 +- test-run/check_handling/regex_checks.py | 75 ++++- test-run/iterator_handling/data_search.py | 131 ++++++++- .../check_handling/test_regex_constants.py | 262 ++++++++++++++++++ 4 files changed, 476 insertions(+), 15 deletions(-) diff --git a/src/corelibs/check_handling/regex_constants.py b/src/corelibs/check_handling/regex_constants.py index aa7fba9..1fe0ecd 100644 --- a/src/corelibs/check_handling/regex_constants.py +++ b/src/corelibs/check_handling/regex_constants.py @@ -19,9 +19,26 @@ def compile_re(reg: str) -> re.Pattern[str]: # email regex -EMAIL_BASIC_REGEX: str = r""" -^[A-Za-z0-9!#$%&'*+\-\/=?^_`{|}~][A-Za-z0-9!#$%:\(\)&'*+\-\/=?^_`{|}~\.]{0,63} -@(?!-)[A-Za-z0-9-]{1,63}(? +NAME_EMAIL_SIMPLE_REGEX = r""" +^\s*(?:"(?P[^"]+)"\s*<(?P[^>]+)>| +(?P.+?)\s*<(?P[^>]+)>| +<(?P[^>]+)>| +(?P[^\s<>]+))\s*$ +""" +# name + email with the basic regex set +NAME_EMAIL_BASIC_REGEX = rf""" +^\s*(?: +"(?P[^"]+)"\s*<(?P{SUB_EMAIL_BASIC_REGEX})>| +(?P.+?)\s*<(?P{SUB_EMAIL_BASIC_REGEX})>| +<(?P{SUB_EMAIL_BASIC_REGEX})>| +(?P{SUB_EMAIL_BASIC_REGEX}) +)\s*$ """ # Domain regex with localhost DOMAIN_WITH_LOCALHOST_REGEX: str = r""" diff --git a/test-run/check_handling/regex_checks.py b/test-run/check_handling/regex_checks.py index 2843474..66e6005 100644 --- a/test-run/check_handling/regex_checks.py +++ b/test-run/check_handling/regex_checks.py @@ -2,14 +2,24 @@ Test check andling for regex checks """ -import re -from corelibs.check_handling.regex_constants import DOMAIN_WITH_LOCALHOST_REGEX +from corelibs_text_colors.text_colors import Colors +from corelibs.check_handling.regex_constants import ( + compile_re, DOMAIN_WITH_LOCALHOST_REGEX, EMAIL_BASIC_REGEX, NAME_EMAIL_BASIC_REGEX, SUB_EMAIL_BASIC_REGEX +) + +NAME_EMAIL_SIMPLE_REGEX = r""" +^\s*(?:"(?P[^"]+)"\s*<(?P[^>]+)>| +(?P.+?)\s*<(?P[^>]+)>| +<(?P[^>]+)>| +(?P[^\s<>]+))\s*$ +""" -def main(): +def domain_test(): """ - Test regex checks + domain regex test """ + print("=" * 30) test_domains = [ "example.com", "localhost", @@ -18,7 +28,7 @@ def main(): "some-domain.org" ] - regex_domain_check = re.compile(DOMAIN_WITH_LOCALHOST_REGEX) + regex_domain_check = compile_re(DOMAIN_WITH_LOCALHOST_REGEX) print(f"REGEX: {DOMAIN_WITH_LOCALHOST_REGEX}") print(f"Check regex: {regex_domain_check.search('localhost')}") @@ -29,6 +39,61 @@ def main(): print(f"Did not match: {domain}") +def email_test(): + """ + email regex test + """ + print("=" * 30) + email_list = """ + e@bar.com + + "Master" + "not valid" not@valid.com + also not valid not@valid.com + some header + test master + 日本語 + "ひほん カケ苦" + single@entry.com + arsch@popsch.com + test open + """ + + basic_email = compile_re(EMAIL_BASIC_REGEX) + sub_basic_email = compile_re(SUB_EMAIL_BASIC_REGEX) + simple_name_email_regex = compile_re(NAME_EMAIL_SIMPLE_REGEX) + full_name_email_regex = compile_re(NAME_EMAIL_BASIC_REGEX) + for email in email_list.splitlines(): + email = email.strip() + if not email: + continue + print(f">>> Testing: {email}") + if not basic_email.match(email): + print(f"{Colors.red}[EMAIL ] No match: {email}{Colors.reset}") + else: + print(f"{Colors.green}[EMAIL ] Matched : {email}{Colors.reset}") + if not sub_basic_email.match(email): + print(f"{Colors.red}[SUB ] No match: {email}{Colors.reset}") + else: + print(f"{Colors.green}[SUB ] Matched : {email}{Colors.reset}") + if not simple_name_email_regex.match(email): + print(f"{Colors.red}[SIMPLE] No match: {email}{Colors.reset}") + else: + print(f"{Colors.green}[SIMPLE] Matched : {email}{Colors.reset}") + if not full_name_email_regex.match(email): + print(f"{Colors.red}[FULL ] No match: {email}{Colors.reset}") + else: + print(f"{Colors.green}[FULL ] Matched : {email}{Colors.reset}") + + +def main(): + """ + Test regex checks + """ + domain_test() + email_test() + + if __name__ == "__main__": main() diff --git a/test-run/iterator_handling/data_search.py b/test-run/iterator_handling/data_search.py index ff4e410..501c55d 100644 --- a/test-run/iterator_handling/data_search.py +++ b/test-run/iterator_handling/data_search.py @@ -24,12 +24,19 @@ def main() -> None: "lookup_value_c": "B02", "replace_value": "R02", }, + { + "lookup_value_p": "A03", + "lookup_value_c": "B03", + "replace_value": "R03", + }, ] test_foo = ArraySearchList( - key = "lookup_value_p", - value = "A01" + key="lookup_value_p", + value="A01" ) - print(test_foo) + result = find_in_array_from_list(data, [test_foo]) + print(f"Search A: {dump_data(test_foo)} -> {dump_data(result)}") + search: list[ArraySearchList] = [ { "key": "lookup_value_p", @@ -38,12 +45,122 @@ def main() -> None: { "key": "lookup_value_c", "value": "B01" + }, + ] + result = find_in_array_from_list(data, search) + print(f"Search B: {dump_data(search)} -> {dump_data(result)}") + + search: list[ArraySearchList] = [ + { + "key": "lookup_value_p", + "value": "A01" + }, + { + "key": "lookup_value_c", + "value": "B01" + }, + { + "key": "lookup_value_c", + "value": "B02" + }, + ] + try: + result = find_in_array_from_list(data, search) + print(f"Search C: {dump_data(search)} -> {dump_data(result)}") + except KeyError as e: + print(f"Search C raised KeyError: {e}") + + search: list[ArraySearchList] = [ + { + "key": "lookup_value_p", + "value": "A01" + }, + { + "key": "lookup_value_c", + "value": ["B01", "B02"] + }, + ] + try: + result = find_in_array_from_list(data, search) + print(f"Search D: {dump_data(search)} -> {dump_data(result)}") + except KeyError as e: + print(f"Search D raised KeyError: {e}") + + search: list[ArraySearchList] = [ + { + "key": "lookup_value_p", + "value": ["A01", "A03"] + }, + { + "key": "lookup_value_c", + "value": ["B01", "B02"] + }, + ] + try: + result = find_in_array_from_list(data, search) + print(f"Search E: {dump_data(search)} -> {dump_data(result)}") + except KeyError as e: + print(f"Search E raised KeyError: {e}") + + search: list[ArraySearchList] = [ + { + "key": "lookup_value_p", + "value": "NOT FOUND" + }, + ] + try: + result = find_in_array_from_list(data, search) + print(f"Search F: {dump_data(search)} -> {dump_data(result)}") + except KeyError as e: + print(f"Search F raised KeyError: {e}") + + data = [ + { + "sd_user_id": "1593", + "email": "", + "employee_id": "" + }, + { + "sd_user_id": "1592", + "email": "", + "employee_id": "" + }, + { + "sd_user_id": "1596", + "email": "", + "employee_id": "" + }, + { + "sd_user_id": "1594", + "email": "", + "employee_id": "" + }, + { + "sd_user_id": "1595", + "email": "", + "employee_id": "" + }, + { + "sd_user_id": "1861", + "email": "", + "employee_id": "" + }, + { + "sd_user_id": "1862", + "email": "", + "employee_id": "" + }, + { + "sd_user_id": "1860", + "email": "", + "employee_id": "" } ] - - result = find_in_array_from_list(data, search) - - print(f"Search {dump_data(search)} -> {dump_data(result)}") + result = find_in_array_from_list(data, [ArraySearchList( + key="sd_user_id", + value="1593" + )]) + print(f"Search F: -> {dump_data(result)}") if __name__ == "__main__": diff --git a/tests/unit/check_handling/test_regex_constants.py b/tests/unit/check_handling/test_regex_constants.py index 4a4bdbf..136a7fe 100644 --- a/tests/unit/check_handling/test_regex_constants.py +++ b/tests/unit/check_handling/test_regex_constants.py @@ -8,7 +8,10 @@ import re import pytest from corelibs.check_handling.regex_constants import ( compile_re, + SUB_EMAIL_BASIC_REGEX, EMAIL_BASIC_REGEX, + NAME_EMAIL_SIMPLE_REGEX, + NAME_EMAIL_BASIC_REGEX, DOMAIN_WITH_LOCALHOST_REGEX, DOMAIN_WITH_LOCALHOST_PORT_REGEX, DOMAIN_REGEX, @@ -123,6 +126,265 @@ class TestEmailBasicRegex: assert not email_pattern.match(email) +class TestSubEmailBasicRegex: + """Test cases for SUB_EMAIL_BASIC_REGEX pattern (without anchors).""" + + @pytest.fixture + def sub_email_pattern(self) -> re.Pattern[str]: + """Fixture that returns compiled sub email regex pattern.""" + return compile_re(rf"^{SUB_EMAIL_BASIC_REGEX}$") + + @pytest.mark.parametrize("valid_email", [ + "user@example.com", + "test.user@example.com", + "user+tag@example.co.uk", + "first.last@subdomain.example.com", + "user123@test-domain.com", + "a@example.com", + "user_name@example.com", + "user-name@example.com", + "user@sub.domain.example.com", + "test!#$%&'*+-/=?^_`{|}~@example.com", + "1234567890@example.com", + ]) + def test_valid_emails_match(self, sub_email_pattern: re.Pattern[str], valid_email: str) -> None: + """Test that valid email addresses match SUB_EMAIL_BASIC_REGEX.""" + assert sub_email_pattern.match(valid_email), ( + f"Failed to match valid email: {valid_email}" + ) + + @pytest.mark.parametrize("invalid_email", [ + "", + "@example.com", + "user@", + "user", + "user@.com", + "user@domain", + "user @example.com", + ".user@example.com", + "user@-example.com", + "user@example-.com", + "user@example.c", + "user@example.toolong", + ]) + def test_invalid_emails_no_match(self, sub_email_pattern: re.Pattern[str], invalid_email: str) -> None: + """Test that invalid emails don't match SUB_EMAIL_BASIC_REGEX.""" + assert not sub_email_pattern.match(invalid_email), ( + f"Incorrectly matched invalid email: {invalid_email}" + ) + + def test_sub_email_max_local_part_length(self, sub_email_pattern: re.Pattern[str]) -> None: + """Test email with maximum local part length (64 characters).""" + local_part = "a" * 64 + email = f"{local_part}@example.com" + assert sub_email_pattern.match(email) + + def test_sub_email_exceeds_local_part_length(self, sub_email_pattern: re.Pattern[str]) -> None: + """Test email exceeding maximum local part length.""" + local_part = "a" * 65 + email = f"{local_part}@example.com" + assert not sub_email_pattern.match(email) + + +class TestNameEmailSimpleRegex: + """Test cases for NAME_EMAIL_SIMPLE_REGEX pattern.""" + + @pytest.fixture + def name_email_simple_pattern(self) -> re.Pattern[str]: + """Fixture that returns compiled name+email simple regex pattern.""" + return compile_re(NAME_EMAIL_SIMPLE_REGEX) + + @pytest.mark.parametrize("test_input,expected_groups", [ + ('"John Doe" ', {'name1': 'John Doe', 'email1': 'john@example.com'}), + ('John Doe ', {'name2': 'John Doe', 'email2': 'john@example.com'}), + ('', {'email3': 'john@example.com'}), + ('john@example.com', {'email4': 'john@example.com'}), + (' "Jane Smith" ', {'name1': 'Jane Smith', 'email1': 'jane@test.com'}), + ('Bob ', {'name2': 'Bob', 'email2': 'bob@test.org'}), + ]) + def test_valid_name_email_combinations( + self, name_email_simple_pattern: re.Pattern[str], test_input: str, expected_groups: dict[str, str] + ) -> None: + """Test that valid name+email combinations match and extract correct groups.""" + match = name_email_simple_pattern.match(test_input) + assert match is not None, f"Failed to match: {test_input}" + + # Check that expected groups are present and match + for group_name, expected_value in expected_groups.items(): + assert match.group(group_name) == expected_value, ( + f"Group {group_name} expected '{expected_value}', got '{match.group(group_name)}'" + ) + + @pytest.mark.parametrize("invalid_input", [ + "", + "not an email", + "<>", + '"Name Only"', + 'Name <', + '>', + 'Name ', + ]) + def test_invalid_name_email_combinations( + self, name_email_simple_pattern: re.Pattern[str], invalid_input: str + ) -> None: + """Test that invalid inputs don't match NAME_EMAIL_SIMPLE_REGEX.""" + assert not name_email_simple_pattern.match(invalid_input), ( + f"Incorrectly matched invalid input: {invalid_input}" + ) + + def test_extract_name_from_quoted( + self, name_email_simple_pattern: re.Pattern[str] + ) -> None: + """Test extracting name from quoted format.""" + match = name_email_simple_pattern.match('"Alice Wonder" ') + assert match is not None + assert match.group('name1') == 'Alice Wonder' + assert match.group('email1') == 'alice@example.com' + + def test_extract_name_from_unquoted( + self, name_email_simple_pattern: re.Pattern[str] + ) -> None: + """Test extracting name from unquoted format.""" + match = name_email_simple_pattern.match('Bob Builder ') + assert match is not None + assert match.group('name2') == 'Bob Builder' + assert match.group('email2') == 'bob@example.com' + + def test_email_only_in_brackets( + self, name_email_simple_pattern: re.Pattern[str] + ) -> None: + """Test email-only format in angle brackets.""" + match = name_email_simple_pattern.match('') + assert match is not None + assert match.group('email3') == 'charlie@example.com' + + def test_email_only_plain( + self, name_email_simple_pattern: re.Pattern[str] + ) -> None: + """Test plain email format without brackets.""" + match = name_email_simple_pattern.match('dave@example.com') + assert match is not None + assert match.group('email4') == 'dave@example.com' + + def test_whitespace_handling( + self, name_email_simple_pattern: re.Pattern[str] + ) -> None: + """Test that leading/trailing whitespace is handled correctly.""" + match = name_email_simple_pattern.match(' "User Name" ') + assert match is not None + assert match.group('name1') == 'User Name' + assert match.group('email1') == 'user@example.com' + + +class TestNameEmailBasicRegex: + """Test cases for NAME_EMAIL_BASIC_REGEX pattern with strict email validation.""" + + @pytest.fixture + def name_email_basic_pattern(self) -> re.Pattern[str]: + """Fixture that returns compiled name+email basic regex pattern.""" + return compile_re(NAME_EMAIL_BASIC_REGEX) + + @pytest.mark.parametrize("test_input,expected_name,expected_email", [ + ('"John Doe" ', 'John Doe', 'john@example.com'), + ('John Doe ', 'John Doe', 'john@example.com'), + ('', None, 'john@example.com'), + ('john@example.com', None, 'john@example.com'), + (' "Jane Smith" ', 'Jane Smith', 'jane.smith@test.co.uk'), + ('Alice Wonder ', 'Alice Wonder', 'alice+tag@example.com'), + ]) + def test_valid_name_email_with_validation( + self, + name_email_basic_pattern: re.Pattern[str], + test_input: str, + expected_name: str | None, + expected_email: str, + ) -> None: + """Test valid name+email with strict email validation.""" + match = name_email_basic_pattern.match(test_input) + assert match is not None, f"Failed to match: {test_input}" + + # Extract name and email from whichever group matched + name = match.group('name1') or match.group('name2') + email = ( + match.group('email1') or match.group('email2') or + match.group('email3') or match.group('email4') + ) + + assert name == expected_name, f"Expected name '{expected_name}', got '{name}'" + assert email == expected_email, f"Expected email '{expected_email}', got '{email}'" + + @pytest.mark.parametrize("invalid_input", [ + '"John Doe" ', # invalid email format + 'John Doe <@example.com>', # missing local part + '', # missing domain + 'user@domain', # no TLD + '"Name" ', # space in email + '<.user@example.com>', # starts with dot + 'user@-example.com', # domain starts with hyphen + 'Name ', # TLD too short + 'Name ', # TLD too long + ]) + def test_invalid_email_format_rejected( + self, name_email_basic_pattern: re.Pattern[str], invalid_input: str + ) -> None: + """Test that inputs with invalid email formats are rejected.""" + assert not name_email_basic_pattern.match(invalid_input), ( + f"Incorrectly matched invalid input: {invalid_input}" + ) + + def test_quoted_name_with_valid_email( + self, name_email_basic_pattern: re.Pattern[str] + ) -> None: + """Test quoted name format with valid email.""" + match = name_email_basic_pattern.match('"Alice Wonder" ') + assert match is not None + assert match.group('name1') == 'Alice Wonder' + assert match.group('email1') == 'alice@example.com' + + def test_unquoted_name_with_valid_email( + self, name_email_basic_pattern: re.Pattern[str] + ) -> None: + """Test unquoted name format with valid email.""" + match = name_email_basic_pattern.match('Bob Builder ') + assert match is not None + assert match.group('name2') == 'Bob Builder' + assert match.group('email2') == 'bob@example.com' + + def test_email_only_formats( + self, name_email_basic_pattern: re.Pattern[str] + ) -> None: + """Test email-only formats (with and without brackets).""" + # With brackets + match1 = name_email_basic_pattern.match('') + assert match1 is not None + assert match1.group('email3') == 'charlie@example.com' + + # Without brackets + match2 = name_email_basic_pattern.match('dave@example.com') + assert match2 is not None + assert match2.group('email4') == 'dave@example.com' + + def test_whitespace_handling( + self, name_email_basic_pattern: re.Pattern[str] + ) -> None: + """Test that leading/trailing whitespace is handled correctly.""" + match = name_email_basic_pattern.match(' "User" ') + assert match is not None + assert match.group('name1') == 'User' + assert match.group('email1') == 'user@example.com' + + def test_special_characters_in_local_part( + self, name_email_basic_pattern: re.Pattern[str] + ) -> None: + """Test email with special characters in local part.""" + match = name_email_basic_pattern.match('Test User ') + assert match is not None + assert match.group('name2') == 'Test User' + assert match.group('email2') == 'test!#$%&\'*+-/=?^_`{|}~@example.com' + + class TestDomainWithLocalhostRegex: """Test cases for DOMAIN_WITH_LOCALHOST_REGEX pattern."""