From 280e5fa861ade247650890eecc7c821f5cde690f Mon Sep 17 00:00:00 2001 From: Clemens Schwaighofer Date: Thu, 17 Jul 2025 14:37:07 +0900 Subject: [PATCH] Update the mask dict helper It now goes recursive, is case insenstivie for keys and mask keys requests Checks not for equal but for start/end or inside with edge character set pytests added --- .../iterator_handling/dict_helpers.py | 77 +++-- test-run/iterator_handling/dict_helpers.py | 63 ++++ .../iterator_handling/test_dict_helpers.py | 287 ++++++++++++++++++ uv.lock | 2 +- 4 files changed, 403 insertions(+), 26 deletions(-) create mode 100644 test-run/iterator_handling/dict_helpers.py create mode 100644 tests/unit/iterator_handling/test_dict_helpers.py diff --git a/src/corelibs/iterator_handling/dict_helpers.py b/src/corelibs/iterator_handling/dict_helpers.py index fe70d50..4740be7 100644 --- a/src/corelibs/iterator_handling/dict_helpers.py +++ b/src/corelibs/iterator_handling/dict_helpers.py @@ -3,26 +3,36 @@ Dict helpers """ -from typing import Any +from typing import TypeAlias, Union, Dict, List, Any, cast + +# definitions for the mask run below +MaskableValue: TypeAlias = Union[str, int, float, bool, None] +NestedDict: TypeAlias = Dict[str, Union[MaskableValue, List[Any], 'NestedDict']] +ProcessableValue: TypeAlias = Union[MaskableValue, List[Any], NestedDict] def mask( - data_set: dict[str, str], + data_set: dict[str, Any], mask_keys: list[str] | None = None, mask_str: str = "***", + mask_str_edges: str = '_', skip: bool = False -) -> dict[str, str]: +) -> dict[str, Any]: """ mask data for output Checks if mask_keys list exist in any key in the data set either from the start or at the end + Use the mask_str_edges to define how searches inside a string should work. Default it must start + and end with '_', remove to search string in string + Arguments: data_set {dict[str, str]} -- _description_ Keyword Arguments: mask_keys {list[str] | None} -- _description_ (default: {None}) mask_str {str} -- _description_ (default: {"***"}) - skip {bool} -- _description_ (default: {False}) + mask_str_edges {str} -- _description_ (default: {"_"}) + skip {bool} -- if set to true skip (default: {False}) Returns: dict[str, str] -- _description_ @@ -30,29 +40,46 @@ def mask( if skip is True: return data_set if mask_keys is None: - mask_keys = ["password", "secret"] + mask_keys = ["encryption", "password", "secret"] + else: + # make sure it is lower case + mask_keys = [mask_key.lower() for mask_key in mask_keys] + + def should_mask_key(key: str) -> bool: + """Check if a key should be masked""" + __key_lower = key.lower() + return any( + __key_lower.startswith(mask_key) or + __key_lower.endswith(mask_key) or + f"{mask_str_edges}{mask_key}{mask_str_edges}" in __key_lower + for mask_key in mask_keys + ) + + def mask_recursive(obj: ProcessableValue) -> ProcessableValue: + """Recursively mask values in nested structures""" + if isinstance(obj, dict): + return { + key: mask_value(value) if should_mask_key(key) else mask_recursive(value) + for key, value in obj.items() + } + if isinstance(obj, list): + return [mask_recursive(item) for item in obj] + return obj + + def mask_value(value: Any) -> Any: + """Handle masking based on value type""" + if isinstance(value, list): + # Mask each individual value in the list + return [mask_str for _ in cast('list[Any]', value)] + if isinstance(value, dict): + # Recursively process the dictionary instead of masking the whole thing + return mask_recursive(cast('ProcessableValue', value)) + # Mask primitive values + return mask_str + return { - key: mask_str - if any(key.startswith(mask_key) or key.endswith(mask_key) for mask_key in mask_keys) else value + key: mask_value(value) if should_mask_key(key) else mask_recursive(value) for key, value in data_set.items() } - -def set_entry(dict_set: dict[str, Any], key: str, value_set: Any) -> dict[str, Any]: - """ - set a new entry in the dict set - - Arguments: - key {str} -- _description_ - dict_set {dict[str, Any]} -- _description_ - value_set {Any} -- _description_ - - Returns: - dict[str, Any] -- _description_ - """ - if not dict_set.get(key): - dict_set[key] = {} - dict_set[key] = value_set - return dict_set - # __END__ diff --git a/test-run/iterator_handling/dict_helpers.py b/test-run/iterator_handling/dict_helpers.py new file mode 100644 index 0000000..d4c1557 --- /dev/null +++ b/test-run/iterator_handling/dict_helpers.py @@ -0,0 +1,63 @@ +""" +Iterator helper testing +""" + +from corelibs.debug_handling.dump_data import dump_data +from corelibs.iterator_handling.dict_helpers import mask + + +def __mask(): + data = { + # "user": "john", + # "encryption_key": "Secret key", + # "ENCRYPTION.TEST": "Secret key test", + # "inside_password_test": "Hide this", + "password": ["secret1", "secret2"], # List value gets masked + # "config": { + # "db_password": {"primary": "secret", "backup": "secret2"}, # Dict value gets masked + # "api_keys": ["key1", "key2", "key3"] # List value gets masked + # }, + # "items": [ # List value that doesn't get masked, but gets processed recursively + # {"name": "item1", "secret_key": "itemsecret"}, + # {"name": "item2", "passwords": ["pass1", "pass2"]} + # ], + # "normal_list": ["item1", "item2", "item3"] # Normal list, not masked + } + data = { + "config": { + # "password": ["secret1", "secret2"], + # "password_other": {"password": ["secret1", "secret2"]}, + # "database": { + # "host": "localhost", + # "password": "db_secret", + # "users": [ + # {"name": "admin", "password": "admin123"}, + # {"name": "user", "secret_key": "user456"} + # ] + # }, + # "api": { + # # "endpoints": ["api1", "api2"], + # "encryption_settings": { + # "enabled": True, + # "secret": "api_secret" + # } + # } + "secret_key": "normal_value", + "api_key": "normal_value", + "my_key_value": "normal_value", + } + } + result = mask(data, ['key']) + print(f"In: {dump_data(data)}") + print(f"Masked: {dump_data(result)}") + + +def main(): + """ + Test: corelibs.string_handling.string_helpers + """ + __mask() + + +if __name__ == "__main__": + main() diff --git a/tests/unit/iterator_handling/test_dict_helpers.py b/tests/unit/iterator_handling/test_dict_helpers.py new file mode 100644 index 0000000..9edc942 --- /dev/null +++ b/tests/unit/iterator_handling/test_dict_helpers.py @@ -0,0 +1,287 @@ +import pytest +from typing import Any +from corelibs.iterator_handling.dict_helpers import mask + + +def test_mask_default_behavior(): + """Test masking with default mask_keys""" + data = { + "username": "john_doe", + "password": "secret123", + "email": "john@example.com", + "api_secret": "abc123", + "encryption_key": "xyz789" + } + + result = mask(data) + + assert result["username"] == "john_doe" + assert result["password"] == "***" + assert result["email"] == "john@example.com" + assert result["api_secret"] == "***" + assert result["encryption_key"] == "***" + + +def test_mask_custom_keys(): + """Test masking with custom mask_keys""" + data = { + "username": "john_doe", + "token": "abc123", + "api_key": "xyz789", + "password": "secret123" + } + + result = mask(data, mask_keys=["token", "api"]) + + assert result["username"] == "john_doe" + assert result["token"] == "***" + assert result["api_key"] == "***" + assert result["password"] == "secret123" # Not masked with custom keys + + +def test_mask_custom_mask_string(): + """Test masking with custom mask string""" + data = {"password": "secret123"} + + result = mask(data, mask_str="[HIDDEN]") + + assert result["password"] == "[HIDDEN]" + + +def test_mask_case_insensitive(): + """Test that masking is case insensitive""" + data = { + "PASSWORD": "secret123", + "Secret_Key": "abc123", + "ENCRYPTION_data": "xyz789" + } + + result = mask(data) + + assert result["PASSWORD"] == "***" + assert result["Secret_Key"] == "***" + assert result["ENCRYPTION_data"] == "***" + + +def test_mask_key_patterns(): + """Test different key matching patterns (start, end, contains)""" + data = { + "password_hash": "hash123", # starts with + "user_password": "secret123", # ends with + "my_secret_key": "abc123", # contains with edges + "secretvalue": "xyz789", # contains without edges + "startsecretvalue": "xyz123", # contains without edges + "normal_key": "normal_value" + } + + result = mask(data) + + assert result["password_hash"] == "***" + assert result["user_password"] == "***" + assert result["my_secret_key"] == "***" + assert result["secretvalue"] == "***" # will mask beacuse starts with + assert result["startsecretvalue"] == "xyz123" # will not mask + assert result["normal_key"] == "normal_value" + + +def test_mask_custom_edges(): + """Test masking with custom edge characters""" + data = { + "my-secret-key": "abc123", + "my_secret_key": "xyz789" + } + + result = mask(data, mask_str_edges="-") + + assert result["my-secret-key"] == "***" + assert result["my_secret_key"] == "xyz789" # Underscore edges don't match + + +def test_mask_empty_edges(): + """Test masking with empty edge characters (substring matching)""" + data = { + "secretvalue": "abc123", + "mysecretkey": "xyz789", + "normal_key": "normal_value" + } + + result = mask(data, mask_str_edges="") + + assert result["secretvalue"] == "***" + assert result["mysecretkey"] == "***" + assert result["normal_key"] == "normal_value" + + +def test_mask_nested_dict(): + """Test masking nested dictionaries""" + data = { + "user": { + "name": "john", + "password": "secret123", + "profile": { + "email": "john@example.com", + "encryption_key": "abc123" + } + }, + "api_secret": "xyz789" + } + + result = mask(data) + + assert result["user"]["name"] == "john" + assert result["user"]["password"] == "***" + assert result["user"]["profile"]["email"] == "john@example.com" + assert result["user"]["profile"]["encryption_key"] == "***" + assert result["api_secret"] == "***" + + +def test_mask_lists(): + """Test masking lists and nested structures with lists""" + data = { + "users": [ + {"name": "john", "password": "secret1"}, + {"name": "jane", "password": "secret2"} + ], + "secrets": ["secret1", "secret2", "secret3"] + } + + result = mask(data) + print(f"R {result['secrets']}") + + assert result["users"][0]["name"] == "john" + assert result["users"][0]["password"] == "***" + assert result["users"][1]["name"] == "jane" + assert result["users"][1]["password"] == "***" + assert result["secrets"] == ["***", "***", "***"] + + +def test_mask_mixed_types(): + """Test masking with different value types""" + data = { + "password": "string_value", + "secret_number": 12345, + "encryption_flag": True, + "secret_float": 3.14, + "password_none": None, + "normal_key": "normal_value" + } + + result = mask(data) + + assert result["password"] == "***" + assert result["secret_number"] == "***" + assert result["encryption_flag"] == "***" + assert result["secret_float"] == "***" + assert result["password_none"] == "***" + assert result["normal_key"] == "normal_value" + + +def test_mask_skip_true(): + """Test that skip=True returns original data unchanged""" + data = { + "password": "secret123", + "encryption_key": "abc123", + "normal_key": "normal_value" + } + + result = mask(data, skip=True) + + assert result == data + assert result is data # Should return the same object + + +def test_mask_empty_dict(): + """Test masking empty dictionary""" + data: dict[str, Any] = {} + + result = mask(data) + + assert result == {} + + +def test_mask_none_mask_keys(): + """Test explicit None mask_keys uses defaults""" + data = {"password": "secret123", "token": "abc123"} + + result = mask(data, mask_keys=None) + + assert result["password"] == "***" + assert result["token"] == "abc123" # Not in default keys + + +def test_mask_empty_mask_keys(): + """Test empty mask_keys list""" + data = {"password": "secret123", "secret": "abc123"} + + result = mask(data, mask_keys=[]) + + assert result["password"] == "secret123" + assert result["secret"] == "abc123" + + +def test_mask_complex_nested_structure(): + """Test masking complex nested structure""" + data = { + "config": { + "database": { + "host": "localhost", + "password": "db_secret", + "users": [ + {"name": "admin", "password": "admin123"}, + {"name": "user", "secret_key": "user456"} + ] + }, + "api": { + "endpoints": ["api1", "api2"], + "encryption_settings": { + "enabled": True, + "secret": "api_secret" + } + } + } + } + + result = mask(data) + + assert result["config"]["database"]["host"] == "localhost" + assert result["config"]["database"]["password"] == "***" + assert result["config"]["database"]["users"][0]["name"] == "admin" + assert result["config"]["database"]["users"][0]["password"] == "***" + assert result["config"]["database"]["users"][1]["name"] == "user" + assert result["config"]["database"]["users"][1]["secret_key"] == "***" + assert result["config"]["api"]["endpoints"] == ["api1", "api2"] + assert result["config"]["api"]["encryption_settings"]["enabled"] is True + assert result["config"]["api"]["encryption_settings"]["secret"] == "***" + + +def test_mask_preserves_original_data(): + """Test that original data is not modified""" + original_data = { + "password": "secret123", + "username": "john_doe" + } + data_copy = original_data.copy() + + result = mask(original_data) + + assert original_data == data_copy # Original unchanged + assert result != original_data # Result is different + assert result["password"] == "***" + assert original_data["password"] == "secret123" + + +@pytest.mark.parametrize("mask_key,expected_keys", [ + (["pass"], ["password", "user_pass", "my_pass_key"]), + (["key"], ["api_key", "secret_key", "my_key_value"]), + (["token"], ["token", "auth_token", "my_token_here"]), +]) +def test_mask_parametrized_keys(mask_key: list[str], expected_keys: list[str]): + """Parametrized test for different mask key patterns""" + data = {key: "value" for key in expected_keys} + data["normal_entry"] = "normal_value" + + result = mask(data, mask_keys=mask_key) + + for key in expected_keys: + assert result[key] == "***" + assert result["normal_entry"] == "normal_value" diff --git a/uv.lock b/uv.lock index 2cf2d68..db8da67 100644 --- a/uv.lock +++ b/uv.lock @@ -44,7 +44,7 @@ wheels = [ [[package]] name = "corelibs" -version = "0.13.2" +version = "0.14.0" source = { editable = "." } dependencies = [ { name = "jmespath" },