diff --git a/src/corelibs/iterator_handling/list_helpers.py b/src/corelibs/iterator_handling/list_helpers.py index 79fd691..64635a4 100644 --- a/src/corelibs/iterator_handling/list_helpers.py +++ b/src/corelibs/iterator_handling/list_helpers.py @@ -2,6 +2,7 @@ List type helpers """ +import json from typing import Any, Sequence @@ -44,4 +45,26 @@ def is_list_in_list( # Get the difference and extract just the values return [item for item, _ in set_a - set_b] + +def make_unique_list_of_dicts(dict_list: list[Any]) -> list[Any]: + """ + Create a list of unique dictionary entries + + Arguments: + dict_list {list[Any]} -- _description_ + + Returns: + list[Any] -- _description_ + """ + try: + # try json dumps, can fail with int and str index types + return list({json.dumps(d, sort_keys=True, ensure_ascii=True): d for d in dict_list}.values()) + except TypeError: + # Fallback for non-serializable entries, slow but works + unique: list[Any] = [] + for d in dict_list: + if d not in unique: + unique.append(d) + return unique + # __END__ diff --git a/test-run/iterator_handling/list_helpers.py b/test-run/iterator_handling/list_helpers.py index 33b274d..cc1d255 100644 --- a/test-run/iterator_handling/list_helpers.py +++ b/test-run/iterator_handling/list_helpers.py @@ -2,7 +2,9 @@ test list helpers """ -from corelibs.iterator_handling.list_helpers import is_list_in_list, convert_to_list +# from typing import Any +from corelibs.debug_handling.dump_data import dump_data +from corelibs.iterator_handling.list_helpers import is_list_in_list, convert_to_list, make_unique_list_of_dicts def __test_is_list_in_list_a(): @@ -18,9 +20,40 @@ def __convert_list(): print(f"IN: {source} -> {result}") +def __make_unique_list_of_dicts(): + dict_list = [ + {"a": 1, "b": 2, "nested": {"x": 10, "y": 20}}, + {"a": 1, "b": 2, "nested": {"x": 10, "y": 20}}, + {"b": 2, "a": 1, "nested": {"y": 20, "x": 10}}, + {"b": 2, "a": 1, "nested": {"y": 20, "x": 30}}, + {"a": 3, "b": 4, "nested": {"x": 30, "y": 40}} + ] + unique_dicts = make_unique_list_of_dicts(dict_list) + print(f"Unique dicts: {dump_data(unique_dicts)}") + + dict_list = [ + {"a": 1, 1: "one"}, + {1: "one", "a": 1}, + {"a": 2, 1: "one"} + ] + unique_dicts = make_unique_list_of_dicts(dict_list) + print(f"Unique dicts: {dump_data(unique_dicts)}") + + dict_list = [ + {"a": 1, "b": [1, 2, 3]}, + {"b": [1, 2, 3], "a": 1}, + {"a": 1, "b": [1, 2, 4]}, + 1, 2, "String", 1, "Foobar" + ] + unique_dicts = make_unique_list_of_dicts(dict_list) + print(f"Unique dicts: {dump_data(unique_dicts)}") + + def main(): + """List helpers test runner""" __test_is_list_in_list_a() __convert_list() + __make_unique_list_of_dicts() if __name__ == "__main__": diff --git a/tests/unit/iterator_handling/test_list_helpers.py b/tests/unit/iterator_handling/test_list_helpers.py index 75206f4..476b701 100644 --- a/tests/unit/iterator_handling/test_list_helpers.py +++ b/tests/unit/iterator_handling/test_list_helpers.py @@ -4,7 +4,7 @@ iterator_handling.list_helepr tests from typing import Any import pytest -from corelibs.iterator_handling.list_helpers import convert_to_list, is_list_in_list +from corelibs.iterator_handling.list_helpers import convert_to_list, is_list_in_list, make_unique_list_of_dicts class TestConvertToList: @@ -298,3 +298,225 @@ class TestPerformance: # Should still work correctly despite duplicates assert set(result) == {1, 3} assert isinstance(result, list) + + +class TestMakeUniqueListOfDicts: + """Test cases for make_unique_list_of_dicts function""" + + def test_basic_duplicate_removal(self): + """Test basic removal of duplicate dictionaries""" + dict_list = [ + {"a": 1, "b": 2}, + {"a": 1, "b": 2}, + {"a": 3, "b": 4} + ] + result = make_unique_list_of_dicts(dict_list) + assert len(result) == 2 + assert {"a": 1, "b": 2} in result + assert {"a": 3, "b": 4} in result + + def test_order_independent_duplicates(self): + """Test that dictionaries with different key orders are treated as duplicates""" + dict_list = [ + {"a": 1, "b": 2}, + {"b": 2, "a": 1}, # Same content, different order + {"a": 3, "b": 4} + ] + result = make_unique_list_of_dicts(dict_list) + assert len(result) == 2 + assert {"a": 1, "b": 2} in result + assert {"a": 3, "b": 4} in result + + def test_empty_list(self): + """Test with empty list""" + result = make_unique_list_of_dicts([]) + assert result == [] + assert isinstance(result, list) + + def test_single_dict(self): + """Test with single dictionary""" + dict_list = [{"a": 1, "b": 2}] + result = make_unique_list_of_dicts(dict_list) + assert result == [{"a": 1, "b": 2}] + + def test_all_unique(self): + """Test when all dictionaries are unique""" + dict_list = [ + {"a": 1}, + {"b": 2}, + {"c": 3}, + {"d": 4} + ] + result = make_unique_list_of_dicts(dict_list) + assert len(result) == 4 + for d in dict_list: + assert d in result + + def test_all_duplicates(self): + """Test when all dictionaries are duplicates""" + dict_list = [ + {"a": 1, "b": 2}, + {"a": 1, "b": 2}, + {"a": 1, "b": 2}, + {"b": 2, "a": 1} + ] + result = make_unique_list_of_dicts(dict_list) + assert len(result) == 1 + assert result[0] == {"a": 1, "b": 2} + + def test_nested_values(self): + """Test with nested structures as values""" + dict_list = [ + {"a": [1, 2], "b": 3}, + {"a": [1, 2], "b": 3}, + {"a": [1, 3], "b": 3} + ] + result = make_unique_list_of_dicts(dict_list) + assert len(result) == 2 + assert {"a": [1, 2], "b": 3} in result + assert {"a": [1, 3], "b": 3} in result + + def test_different_value_types(self): + """Test with different value types""" + dict_list = [ + {"str": "hello", "int": 42, "float": 3.14, "bool": True}, + {"str": "hello", "int": 42, "float": 3.14, "bool": True}, + {"str": "world", "int": 99, "float": 2.71, "bool": False} + ] + result = make_unique_list_of_dicts(dict_list) + assert len(result) == 2 + + def test_empty_dicts(self): + """Test with empty dictionaries""" + dict_list: list[Any] = [ + {}, + {}, + {"a": 1} + ] + result = make_unique_list_of_dicts(dict_list) + assert len(result) == 2 + assert {} in result + assert {"a": 1} in result + + def test_single_key_dicts(self): + """Test with single key dictionaries""" + dict_list = [ + {"a": 1}, + {"a": 1}, + {"a": 2}, + {"b": 1} + ] + result = make_unique_list_of_dicts(dict_list) + assert len(result) == 3 + assert {"a": 1} in result + assert {"a": 2} in result + assert {"b": 1} in result + + def test_many_keys(self): + """Test with dictionaries containing many keys""" + dict1 = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5} + dict2 = {"e": 5, "d": 4, "c": 3, "b": 2, "a": 1} # Same, different order + dict3 = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 6} # Different value + dict_list = [dict1, dict2, dict3] + result = make_unique_list_of_dicts(dict_list) + assert len(result) == 2 + + def test_numeric_keys(self): + """Test with numeric keys""" + dict_list = [ + {1: "one", 2: "two"}, + {2: "two", 1: "one"}, + {1: "one", 2: "three"} + ] + result = make_unique_list_of_dicts(dict_list) + assert len(result) == 2 + + def test_none_values(self): + """Test with None values""" + dict_list = [ + {"a": None, "b": 2}, + {"a": None, "b": 2}, + {"a": 1, "b": None} + ] + result = make_unique_list_of_dicts(dict_list) + assert len(result) == 2 + assert {"a": None, "b": 2} in result + assert {"a": 1, "b": None} in result + + def test_mixed_key_types(self): + """Test with mixed key types (string and numeric)""" + dict_list = [ + {"a": 1, 1: "one"}, + {1: "one", "a": 1}, + {"a": 2, 1: "one"} + ] + result = make_unique_list_of_dicts(dict_list) + assert len(result) == 2 + + @pytest.mark.parametrize("dict_list,expected_length", [ + ([{"a": 1}, {"a": 1}, {"a": 1}], 1), + ([{"a": 1}, {"a": 2}, {"a": 3}], 3), + ([{"a": 1, "b": 2}, {"b": 2, "a": 1}], 1), + ([{}, {}], 1), + ([{"x": [1, 2]}, {"x": [1, 2]}], 1), + ([{"a": 1}, {"b": 2}, {"c": 3}], 3), + ]) # pyright: ignore[reportUnknownArgumentType] + def test_parametrized_unique_dicts(self, dict_list: list[Any], expected_length: int): + """Test make_unique_list_of_dicts with various input combinations""" + result = make_unique_list_of_dicts(dict_list) + assert len(result) == expected_length + assert isinstance(result, list) + + def test_large_list(self): + """Test with a large list of dictionaries""" + dict_list = [{"id": i % 100, "value": f"val_{i % 100}"} for i in range(1000)] + result = make_unique_list_of_dicts(dict_list) + # Should have 100 unique dicts (0-99) + assert len(result) == 100 + + def test_preserves_last_occurrence(self): + """Test behavior with duplicate entries""" + # The function uses dict comprehension, which keeps last occurrence + dict_list = [ + {"a": 1, "b": 2}, + {"a": 3, "b": 4}, + {"a": 1, "b": 2} + ] + result = make_unique_list_of_dicts(dict_list) + assert len(result) == 2 + # Just verify correct unique count, order may vary + + def test_nested_dicts(self): + """Test with nested dictionaries""" + dict_list = [ + {"outer": {"inner": 1}}, + {"outer": {"inner": 1}}, + {"outer": {"inner": 2}} + ] + result = make_unique_list_of_dicts(dict_list) + assert len(result) == 2 + + def test_string_values_case_sensitive(self): + """Test that string values are case-sensitive""" + dict_list = [ + {"name": "John"}, + {"name": "john"}, + {"name": "JOHN"}, + {"name": "John"} + ] + result = make_unique_list_of_dicts(dict_list) + assert len(result) == 3 + + def test_boolean_values(self): + """Test with boolean values""" + dict_list = [ + {"flag": True, "count": 1}, + {"count": 1, "flag": True}, + {"flag": False, "count": 1} + ] + result = make_unique_list_of_dicts(dict_list) + assert len(result) == 2 + assert {"flag": True, "count": 1} in result + assert {"flag": False, "count": 1} in result + +# __END__