Fix fingerprint with mixed int and str keys
Create a fallback hash function to handle mixed key types in dictionaries and lists, ensuring consistent hashing across different data structures. Fallback called is prefixed with "HO_" to indicate its usage.
This commit is contained in:
@@ -4,11 +4,37 @@ Various dictionary, object and list hashers
|
||||
|
||||
import json
|
||||
import hashlib
|
||||
from typing import Any
|
||||
from typing import Any, cast, Sequence
|
||||
|
||||
|
||||
def hash_object(obj: Any) -> str:
|
||||
"""
|
||||
Create a hash for any dict or list with mixed key types
|
||||
|
||||
Arguments:
|
||||
obj {Any} -- _description_
|
||||
|
||||
Returns:
|
||||
str -- _description_
|
||||
"""
|
||||
def normalize(o: Any) -> Any:
|
||||
if isinstance(o, dict):
|
||||
# Sort by repr of keys to handle mixed types (str, int, etc.)
|
||||
o = cast(dict[Any, Any], o)
|
||||
return tuple(sorted((repr(k), normalize(v)) for k, v in o.items()))
|
||||
if isinstance(o, (list, tuple)):
|
||||
o = cast(Sequence[Any], o)
|
||||
return tuple(normalize(item) for item in o)
|
||||
return repr(o)
|
||||
|
||||
normalized = normalize(obj)
|
||||
return hashlib.sha256(str(normalized).encode()).hexdigest()
|
||||
|
||||
|
||||
def dict_hash_frozen(data: dict[Any, Any]) -> int:
|
||||
"""
|
||||
NOT RECOMMENDED, use dict_hash_crc or hash_object instead
|
||||
If used, DO NOT CHANGE
|
||||
hash a dict via freeze
|
||||
|
||||
Args:
|
||||
@@ -22,18 +48,25 @@ def dict_hash_frozen(data: dict[Any, Any]) -> int:
|
||||
|
||||
def dict_hash_crc(data: dict[Any, Any] | list[Any]) -> str:
|
||||
"""
|
||||
Create a sha256 hash over dict
|
||||
Legacy Method, must be kept for fallback
|
||||
Create a sha256 hash over dict or list
|
||||
alternative for
|
||||
dict_hash_frozen
|
||||
|
||||
Args:
|
||||
data (dict | list): _description_
|
||||
data (dict[Any, Any] | list[Any]): _description_
|
||||
|
||||
Returns:
|
||||
str: _description_
|
||||
str: sha256 hash, prefiex with HO_ if fallback used
|
||||
"""
|
||||
return hashlib.sha256(
|
||||
json.dumps(data, sort_keys=True, ensure_ascii=True).encode('utf-8')
|
||||
).hexdigest()
|
||||
try:
|
||||
return hashlib.sha256(
|
||||
# IT IS IMPORTANT THAT THE BELOW CALL STAYS THE SAME AND DOES NOT CHANGE OR WE WILL GET DIFFERENT HASHES
|
||||
# separators=(',', ':') to get rid of spaces, but if this is used the hash will be different, DO NOT ADD
|
||||
json.dumps(data, sort_keys=True, ensure_ascii=True, default=str).encode('utf-8')
|
||||
).hexdigest()
|
||||
except TypeError:
|
||||
# Fallback tod different hasher, will return DIFFERENT hash than above, so only usable in int/str key mixes
|
||||
return "HO_" + hash_object(data)
|
||||
|
||||
# __END__
|
||||
|
||||
@@ -58,7 +58,12 @@ def make_unique_list_of_dicts(dict_list: list[Any]) -> list[Any]:
|
||||
"""
|
||||
try:
|
||||
# try json dumps, can fail with int and str index types
|
||||
return list({json.dumps(d, sort_keys=True, ensure_ascii=True): d for d in dict_list}.values())
|
||||
return list(
|
||||
{
|
||||
json.dumps(d, sort_keys=True, ensure_ascii=True, separators=(',', ':')): d
|
||||
for d in dict_list
|
||||
}.values()
|
||||
)
|
||||
except TypeError:
|
||||
# Fallback for non-serializable entries, slow but works
|
||||
unique: list[Any] = []
|
||||
|
||||
Reference in New Issue
Block a user