Compare commits

...

4 Commits

Author SHA1 Message Date
Clemens Schwaighofer
715ed1f9c2 Docblocks update in in iterator handling fingerprint 2026-01-27 17:14:31 +09:00
Clemens Schwaighofer
82a759dd21 Fix fingerprint with mixed int and str keys
Create a fallback hash function to handle mixed key types in dictionaries
and lists, ensuring consistent hashing across different data structures.

Fallback called is prefixed with "HO_" to indicate its usage.
2026-01-27 15:59:38 +09:00
Clemens Schwaighofer
fe913608c4 Fix iteration list helpers dict list type 2026-01-27 14:52:11 +09:00
Clemens Schwaighofer
79f9c5d1c6 iterator list helpers tests run cases updated 2026-01-27 14:51:25 +09:00
4 changed files with 283 additions and 13 deletions

View File

@@ -4,11 +4,38 @@ Various dictionary, object and list hashers
import json import json
import hashlib import hashlib
from typing import Any from typing import Any, cast, Sequence
def hash_object(obj: Any) -> str:
"""
RECOMMENDED for new use
Create a hash for any dict or list with mixed key types
Arguments:
obj {Any} -- _description_
Returns:
str -- _description_
"""
def normalize(o: Any) -> Any:
if isinstance(o, dict):
# Sort by repr of keys to handle mixed types (str, int, etc.)
o = cast(dict[Any, Any], o)
return tuple(sorted((repr(k), normalize(v)) for k, v in o.items()))
if isinstance(o, (list, tuple)):
o = cast(Sequence[Any], o)
return tuple(normalize(item) for item in o)
return repr(o)
normalized = normalize(obj)
return hashlib.sha256(str(normalized).encode()).hexdigest()
def dict_hash_frozen(data: dict[Any, Any]) -> int: def dict_hash_frozen(data: dict[Any, Any]) -> int:
""" """
NOT RECOMMENDED, use dict_hash_crc or hash_object instead
If used, DO NOT CHANGE
hash a dict via freeze hash a dict via freeze
Args: Args:
@@ -22,18 +49,25 @@ def dict_hash_frozen(data: dict[Any, Any]) -> int:
def dict_hash_crc(data: dict[Any, Any] | list[Any]) -> str: def dict_hash_crc(data: dict[Any, Any] | list[Any]) -> str:
""" """
Create a sha256 hash over dict LEGACY METHOD, must be kept for fallback, if used by other code, DO NOT CHANGE
Create a sha256 hash over dict or list
alternative for alternative for
dict_hash_frozen dict_hash_frozen
Args: Args:
data (dict | list): _description_ data (dict[Any, Any] | list[Any]): _description_
Returns: Returns:
str: _description_ str: sha256 hash, prefiex with HO_ if fallback used
""" """
try:
return hashlib.sha256( return hashlib.sha256(
json.dumps(data, sort_keys=True, ensure_ascii=True).encode('utf-8') # IT IS IMPORTANT THAT THE BELOW CALL STAYS THE SAME AND DOES NOT CHANGE OR WE WILL GET DIFFERENT HASHES
# separators=(',', ':') to get rid of spaces, but if this is used the hash will be different, DO NOT ADD
json.dumps(data, sort_keys=True, ensure_ascii=True, default=str).encode('utf-8')
).hexdigest() ).hexdigest()
except TypeError:
# Fallback tod different hasher, will return DIFFERENT hash than above, so only usable in int/str key mixes
return "HO_" + hash_object(data)
# __END__ # __END__

View File

@@ -58,7 +58,12 @@ def make_unique_list_of_dicts(dict_list: list[Any]) -> list[Any]:
""" """
try: try:
# try json dumps, can fail with int and str index types # try json dumps, can fail with int and str index types
return list({json.dumps(d, sort_keys=True, ensure_ascii=True): d for d in dict_list}.values()) return list(
{
json.dumps(d, sort_keys=True, ensure_ascii=True, separators=(',', ':')): d
for d in dict_list
}.values()
)
except TypeError: except TypeError:
# Fallback for non-serializable entries, slow but works # Fallback for non-serializable entries, slow but works
unique: list[Any] = [] unique: list[Any] = []

View File

@@ -2,9 +2,10 @@
test list helpers test list helpers
""" """
# from typing import Any from typing import Any
from corelibs.debug_handling.dump_data import dump_data from corelibs.debug_handling.dump_data import dump_data
from corelibs.iterator_handling.list_helpers import is_list_in_list, convert_to_list, make_unique_list_of_dicts from corelibs.iterator_handling.list_helpers import is_list_in_list, convert_to_list, make_unique_list_of_dicts
from corelibs.iterator_handling.fingerprint import dict_hash_crc
def __test_is_list_in_list_a(): def __test_is_list_in_list_a():
@@ -29,7 +30,8 @@ def __make_unique_list_of_dicts():
{"a": 3, "b": 4, "nested": {"x": 30, "y": 40}} {"a": 3, "b": 4, "nested": {"x": 30, "y": 40}}
] ]
unique_dicts = make_unique_list_of_dicts(dict_list) unique_dicts = make_unique_list_of_dicts(dict_list)
print(f"Unique dicts: {dump_data(unique_dicts)}") dhf = dict_hash_crc(unique_dicts)
print(f"Unique dicts: {dump_data(unique_dicts)} [{dhf}]")
dict_list = [ dict_list = [
{"a": 1, 1: "one"}, {"a": 1, 1: "one"},
@@ -37,7 +39,8 @@ def __make_unique_list_of_dicts():
{"a": 2, 1: "one"} {"a": 2, 1: "one"}
] ]
unique_dicts = make_unique_list_of_dicts(dict_list) unique_dicts = make_unique_list_of_dicts(dict_list)
print(f"Unique dicts: {dump_data(unique_dicts)}") dhf = dict_hash_crc(unique_dicts)
print(f"Unique dicts: {dump_data(unique_dicts)} [{dhf}]")
dict_list = [ dict_list = [
{"a": 1, "b": [1, 2, 3]}, {"a": 1, "b": [1, 2, 3]},
@@ -46,7 +49,31 @@ def __make_unique_list_of_dicts():
1, 2, "String", 1, "Foobar" 1, 2, "String", 1, "Foobar"
] ]
unique_dicts = make_unique_list_of_dicts(dict_list) unique_dicts = make_unique_list_of_dicts(dict_list)
print(f"Unique dicts: {dump_data(unique_dicts)}") dhf = dict_hash_crc(unique_dicts)
print(f"Unique dicts: {dump_data(unique_dicts)} [{dhf}]")
dict_list: list[Any] = [
[],
{},
[],
{},
{"a": []},
{"a": []},
{"a": {}},
{"a": {}},
]
unique_dicts = make_unique_list_of_dicts(dict_list)
dhf = dict_hash_crc(unique_dicts)
print(f"Unique dicts: {dump_data(unique_dicts)} [{dhf}]")
dict_list: list[Any] = [
(1, 2),
(1, 2),
(2, 3),
]
unique_dicts = make_unique_list_of_dicts(dict_list)
dhf = dict_hash_crc(unique_dicts)
print(f"Unique dicts: {dump_data(unique_dicts)} [{dhf}]")
def main(): def main():

View File

@@ -4,7 +4,101 @@ tests for corelibs.iterator_handling.fingerprint
from typing import Any from typing import Any
import pytest import pytest
from corelibs.iterator_handling.fingerprint import dict_hash_frozen, dict_hash_crc from corelibs.iterator_handling.fingerprint import dict_hash_frozen, dict_hash_crc, hash_object
class TestHashObject:
"""Tests for hash_object function"""
def test_hash_object_simple_dict(self):
"""Test hashing a simple dictionary with hash_object"""
data = {"key1": "value1", "key2": "value2"}
result = hash_object(data)
assert isinstance(result, str)
assert len(result) == 64 # SHA256 produces 64 hex characters
def test_hash_object_mixed_keys(self):
"""Test hash_object with mixed int and string keys"""
data = {"key1": "value1", 1: "value2", 2: "value3"}
result = hash_object(data)
assert isinstance(result, str)
assert len(result) == 64
def test_hash_object_consistency(self):
"""Test that hash_object produces consistent results"""
data = {"str_key": "value", 123: "number_key"}
hash1 = hash_object(data)
hash2 = hash_object(data)
assert hash1 == hash2
def test_hash_object_order_independence(self):
"""Test that hash_object is order-independent"""
data1 = {"a": 1, 1: "one", "b": 2, 2: "two"}
data2 = {2: "two", "b": 2, 1: "one", "a": 1}
hash1 = hash_object(data1)
hash2 = hash_object(data2)
assert hash1 == hash2
def test_hash_object_list_of_dicts_mixed_keys(self):
"""Test hash_object with list of dicts containing mixed keys"""
data = [
{"name": "item1", 1: "value1"},
{"name": "item2", 2: "value2"}
]
result = hash_object(data)
assert isinstance(result, str)
assert len(result) == 64
def test_hash_object_nested_mixed_keys(self):
"""Test hash_object with nested structures containing mixed keys"""
data = {
"outer": {
"inner": "value",
1: "mixed_key"
},
2: "another_mixed"
}
result = hash_object(data)
assert isinstance(result, str)
assert len(result) == 64
def test_hash_object_different_data(self):
"""Test that different data produces different hashes"""
data1 = {"key": "value", 1: "one"}
data2 = {"key": "value", 2: "two"}
hash1 = hash_object(data1)
hash2 = hash_object(data2)
assert hash1 != hash2
def test_hash_object_complex_nested(self):
"""Test hash_object with complex nested structures"""
data = {
"level1": {
"level2": {
1: "value",
"key": [1, 2, {"nested": "deep", 3: "int_key"}]
}
}
}
result = hash_object(data)
assert isinstance(result, str)
assert len(result) == 64
def test_hash_object_list_with_tuples(self):
"""Test hash_object with lists containing tuples"""
data = [("a", 1), ("b", 2), {1: "mixed", "key": "value"}]
result = hash_object(data)
assert isinstance(result, str)
assert len(result) == 64
class TestDictHashFrozen: class TestDictHashFrozen:
@@ -279,6 +373,116 @@ class TestDictHashCrc:
assert isinstance(result, str) assert isinstance(result, str)
assert len(result) == 64 assert len(result) == 64
def test_dict_hash_crc_fallback_mixed_keys(self):
"""Test dict_hash_crc fallback with mixed int and string keys"""
data = {"key1": "value1", 1: "value2", 2: "value3"}
result = dict_hash_crc(data)
assert isinstance(result, str)
# Fallback prefixes with "HO_"
assert result.startswith("HO_")
# Hash should be 64 chars + 3 char prefix = 67 total
assert len(result) == 67
def test_dict_hash_crc_fallback_consistency(self):
"""Test that fallback produces consistent hashes"""
data = {"str_key": "value", 123: "number_key", 456: "another"}
hash1 = dict_hash_crc(data)
hash2 = dict_hash_crc(data)
assert hash1 == hash2
assert hash1.startswith("HO_")
def test_dict_hash_crc_fallback_order_independence(self):
"""Test that fallback is order-independent for mixed-key dicts"""
data1 = {"a": 1, 1: "one", "b": 2, 2: "two"}
data2 = {2: "two", "b": 2, 1: "one", "a": 1}
hash1 = dict_hash_crc(data1)
hash2 = dict_hash_crc(data2)
assert hash1 == hash2
assert hash1.startswith("HO_")
def test_dict_hash_crc_fallback_list_of_dicts_mixed_keys(self):
"""Test fallback with list of dicts containing mixed keys"""
data = [
{"name": "item1", 1: "value1"},
{"name": "item2", 2: "value2"},
{3: "value3", "type": "mixed"}
]
result = dict_hash_crc(data)
assert isinstance(result, str)
assert result.startswith("HO_")
assert len(result) == 67
def test_dict_hash_crc_fallback_nested_mixed_keys(self):
"""Test fallback with nested dicts containing mixed keys"""
data = {
"outer": {
"inner": "value",
1: "mixed_key"
},
2: "another_mixed"
}
result = dict_hash_crc(data)
assert isinstance(result, str)
assert result.startswith("HO_")
assert len(result) == 67
def test_dict_hash_crc_fallback_different_data(self):
"""Test that different mixed-key data produces different hashes"""
data1 = {"key": "value", 1: "one"}
data2 = {"key": "value", 2: "two"}
hash1 = dict_hash_crc(data1)
hash2 = dict_hash_crc(data2)
assert hash1 != hash2
assert hash1.startswith("HO_")
assert hash2.startswith("HO_")
def test_dict_hash_crc_fallback_complex_structure(self):
"""Test fallback with complex nested structure with mixed keys"""
data = [
{
"id": 1,
1: "first",
"data": {
"nested": "value",
100: "nested_int_key"
}
},
{
"id": 2,
2: "second",
"items": [1, 2, 3]
}
]
result = dict_hash_crc(data)
assert isinstance(result, str)
assert result.startswith("HO_")
assert len(result) == 67
def test_dict_hash_crc_no_fallback_string_keys_only(self):
"""Test that string-only keys don't trigger fallback"""
data = {"key1": "value1", "key2": "value2", "key3": "value3"}
result = dict_hash_crc(data)
assert isinstance(result, str)
assert not result.startswith("HO_")
assert len(result) == 64
def test_dict_hash_crc_no_fallback_int_keys_only(self):
"""Test that int-only keys don't trigger fallback"""
data = {1: "one", 2: "two", 3: "three"}
result = dict_hash_crc(data)
assert isinstance(result, str)
assert not result.startswith("HO_")
assert len(result) == 64
class TestComparisonBetweenHashFunctions: class TestComparisonBetweenHashFunctions:
"""Tests comparing dict_hash_frozen and dict_hash_crc""" """Tests comparing dict_hash_frozen and dict_hash_crc"""