Fix fingerprint with mixed int and str keys

Create a fallback hash function to handle mixed key types in dictionaries
and lists, ensuring consistent hashing across different data structures.

Fallback called is prefixed with "HO_" to indicate its usage.
This commit is contained in:
Clemens Schwaighofer
2026-01-27 15:56:17 +09:00
parent fe913608c4
commit 82a759dd21
4 changed files with 269 additions and 13 deletions

View File

@@ -4,11 +4,37 @@ Various dictionary, object and list hashers
import json
import hashlib
from typing import Any
from typing import Any, cast, Sequence
def hash_object(obj: Any) -> str:
"""
Create a hash for any dict or list with mixed key types
Arguments:
obj {Any} -- _description_
Returns:
str -- _description_
"""
def normalize(o: Any) -> Any:
if isinstance(o, dict):
# Sort by repr of keys to handle mixed types (str, int, etc.)
o = cast(dict[Any, Any], o)
return tuple(sorted((repr(k), normalize(v)) for k, v in o.items()))
if isinstance(o, (list, tuple)):
o = cast(Sequence[Any], o)
return tuple(normalize(item) for item in o)
return repr(o)
normalized = normalize(obj)
return hashlib.sha256(str(normalized).encode()).hexdigest()
def dict_hash_frozen(data: dict[Any, Any]) -> int:
"""
NOT RECOMMENDED, use dict_hash_crc or hash_object instead
If used, DO NOT CHANGE
hash a dict via freeze
Args:
@@ -22,18 +48,25 @@ def dict_hash_frozen(data: dict[Any, Any]) -> int:
def dict_hash_crc(data: dict[Any, Any] | list[Any]) -> str:
"""
Create a sha256 hash over dict
Legacy Method, must be kept for fallback
Create a sha256 hash over dict or list
alternative for
dict_hash_frozen
Args:
data (dict | list): _description_
data (dict[Any, Any] | list[Any]): _description_
Returns:
str: _description_
str: sha256 hash, prefiex with HO_ if fallback used
"""
return hashlib.sha256(
json.dumps(data, sort_keys=True, ensure_ascii=True).encode('utf-8')
).hexdigest()
try:
return hashlib.sha256(
# IT IS IMPORTANT THAT THE BELOW CALL STAYS THE SAME AND DOES NOT CHANGE OR WE WILL GET DIFFERENT HASHES
# separators=(',', ':') to get rid of spaces, but if this is used the hash will be different, DO NOT ADD
json.dumps(data, sort_keys=True, ensure_ascii=True, default=str).encode('utf-8')
).hexdigest()
except TypeError:
# Fallback tod different hasher, will return DIFFERENT hash than above, so only usable in int/str key mixes
return "HO_" + hash_object(data)
# __END__

View File

@@ -58,7 +58,12 @@ def make_unique_list_of_dicts(dict_list: list[Any]) -> list[Any]:
"""
try:
# try json dumps, can fail with int and str index types
return list({json.dumps(d, sort_keys=True, ensure_ascii=True): d for d in dict_list}.values())
return list(
{
json.dumps(d, sort_keys=True, ensure_ascii=True, separators=(',', ':')): d
for d in dict_list
}.values()
)
except TypeError:
# Fallback for non-serializable entries, slow but works
unique: list[Any] = []