diff --git a/tests/unit/json_handling/test_jmespath_helper.py b/tests/unit/json_handling/test_jmespath_helper.py new file mode 100644 index 0000000..baaff8f --- /dev/null +++ b/tests/unit/json_handling/test_jmespath_helper.py @@ -0,0 +1,869 @@ +""" +tests for corelibs.json_handling.jmespath_helper +""" + +from typing import Any +import pytest +from corelibs.json_handling.jmespath_helper import jmespath_search + + +# MARK: jmespath_search tests +class TestJmespathSearch: + """Test cases for jmespath_search function""" + + def test_simple_key_lookup(self): + """Test simple key lookup in dictionary""" + data = {"name": "John", "age": 30} + result = jmespath_search(data, "name") + assert result == "John" + + def test_nested_key_lookup(self): + """Test nested key lookup""" + data = { + "user": { + "profile": { + "name": "John", + "age": 30 + } + } + } + result = jmespath_search(data, "user.profile.name") + assert result == "John" + + def test_array_index_access(self): + """Test accessing array element by index""" + data = { + "items": [ + {"id": 1, "name": "Item 1"}, + {"id": 2, "name": "Item 2"}, + {"id": 3, "name": "Item 3"} + ] + } + result = jmespath_search(data, "items[1].name") + assert result == "Item 2" + + def test_array_slice(self): + """Test array slicing""" + data = {"numbers": [1, 2, 3, 4, 5]} + result = jmespath_search(data, "numbers[1:3]") + assert result == [2, 3] + + def test_wildcard_projection(self): + """Test wildcard projection on array""" + data = { + "users": [ + {"name": "Alice", "age": 25}, + {"name": "Bob", "age": 30}, + {"name": "Charlie", "age": 35} + ] + } + result = jmespath_search(data, "users[*].name") + assert result == ["Alice", "Bob", "Charlie"] + + def test_filter_expression(self): + """Test filter expression""" + data = { + "products": [ + {"name": "Product 1", "price": 100, "stock": 5}, + {"name": "Product 2", "price": 200, "stock": 0}, + {"name": "Product 3", "price": 150, "stock": 10} + ] + } + result = jmespath_search(data, "products[?stock > `0`].name") + assert result == ["Product 1", "Product 3"] + + def test_pipe_expression(self): + """Test pipe expression""" + data = { + "items": [ + {"name": "Item 1", "value": 10}, + {"name": "Item 2", "value": 20}, + {"name": "Item 3", "value": 30} + ] + } + result = jmespath_search(data, "items[*].value | [0]") + assert result == 10 + + def test_multi_select_hash(self): + """Test multi-select hash""" + data = {"name": "John", "age": 30, "city": "New York", "country": "USA"} + result = jmespath_search(data, "{name: name, age: age}") + assert result == {"name": "John", "age": 30} + + def test_multi_select_list(self): + """Test multi-select list""" + data = {"first": "John", "last": "Doe", "age": 30} + result = jmespath_search(data, "[first, last]") + assert result == ["John", "Doe"] + + def test_flatten_projection(self): + """Test flatten projection""" + data = { + "groups": [ + {"items": [1, 2, 3]}, + {"items": [4, 5, 6]} + ] + } + result = jmespath_search(data, "groups[].items[]") + assert result == [1, 2, 3, 4, 5, 6] + + def test_function_length(self): + """Test length function""" + data = {"items": [1, 2, 3, 4, 5]} + result = jmespath_search(data, "length(items)") + assert result == 5 + + def test_function_max(self): + """Test max function""" + data = {"numbers": [10, 5, 20, 15]} + result = jmespath_search(data, "max(numbers)") + assert result == 20 + + def test_function_min(self): + """Test min function""" + data = {"numbers": [10, 5, 20, 15]} + result = jmespath_search(data, "min(numbers)") + assert result == 5 + + def test_function_sort(self): + """Test sort function""" + data = {"numbers": [3, 1, 4, 1, 5, 9, 2, 6]} + result = jmespath_search(data, "sort(numbers)") + assert result == [1, 1, 2, 3, 4, 5, 6, 9] + + def test_function_sort_by(self): + """Test sort_by function""" + data = { + "people": [ + {"name": "Charlie", "age": 35}, + {"name": "Alice", "age": 25}, + {"name": "Bob", "age": 30} + ] + } + result = jmespath_search(data, "sort_by(people, &age)[*].name") + assert result == ["Alice", "Bob", "Charlie"] + + def test_function_join(self): + """Test join function""" + data = {"names": ["Alice", "Bob", "Charlie"]} + result = jmespath_search(data, "join(', ', names)") + assert result == "Alice, Bob, Charlie" + + def test_function_keys(self): + """Test keys function""" + data = {"name": "John", "age": 30, "city": "New York"} + result = jmespath_search(data, "keys(@)") + assert sorted(result) == ["age", "city", "name"] + + def test_function_values(self): + """Test values function""" + data = {"a": 1, "b": 2, "c": 3} + result = jmespath_search(data, "values(@)") + assert sorted(result) == [1, 2, 3] + + def test_function_type(self): + """Test type function""" + data = {"string": "test", "number": 42, "array": [1, 2, 3]} + result = jmespath_search(data, "type(string)") + assert result == "string" + + def test_function_contains(self): + """Test contains function""" + data = {"items": [1, 2, 3, 4, 5]} + result = jmespath_search(data, "contains(items, `3`)") + assert result is True + + def test_current_node_reference(self): + """Test current node @ reference""" + data = [1, 2, 3, 4, 5] + result = jmespath_search(data, "@") + assert result == [1, 2, 3, 4, 5] + + def test_not_null_expression(self): + """Test not_null expression""" + data = { + "items": [ + {"name": "Item 1", "description": "Desc 1"}, + {"name": "Item 2", "description": None}, + {"name": "Item 3"} + ] + } + result = jmespath_search(data, "items[*].description | [?@ != null]") + assert result == ["Desc 1"] + + def test_search_returns_none_for_missing_key(self): + """Test that searching for non-existent key returns None""" + data = {"name": "John", "age": 30} + result = jmespath_search(data, "nonexistent") + assert result is None + + def test_search_with_list_input(self): + """Test search with list as input""" + data = [ + {"name": "Alice", "score": 85}, + {"name": "Bob", "score": 92}, + {"name": "Charlie", "score": 78} + ] + result = jmespath_search(data, "[?score > `80`].name") + assert result == ["Alice", "Bob"] + + def test_deeply_nested_structure(self): + """Test searching deeply nested structure""" + data = { + "level1": { + "level2": { + "level3": { + "level4": { + "level5": { + "value": "deep_value" + } + } + } + } + } + } + result = jmespath_search(data, "level1.level2.level3.level4.level5.value") + assert result == "deep_value" + + def test_complex_filter_expression(self): + """Test complex filter with multiple conditions""" + data = { + "products": [ + {"name": "Product 1", "price": 100, "stock": 5, "category": "A"}, + {"name": "Product 2", "price": 200, "stock": 0, "category": "B"}, + {"name": "Product 3", "price": 150, "stock": 10, "category": "A"}, + {"name": "Product 4", "price": 120, "stock": 3, "category": "A"} + ] + } + result = jmespath_search( + data, + "products[?category == 'A' && stock > `0`].name" + ) + assert result == ["Product 1", "Product 3", "Product 4"] + + def test_recursive_descent(self): + """Test recursive descent operator""" + data = { + "store": { + "book": [ + {"title": "Book 1", "price": 10}, + {"title": "Book 2", "price": 20} + ], + "bicycle": { + "price": 100 + } + } + } + # Note: JMESPath doesn't have a true recursive descent like JSONPath's '..' + # but we can test nested projections + result = jmespath_search(data, "store.book[*].price") + assert result == [10, 20] + + def test_empty_dict_input(self): + """Test search on empty dictionary""" + data: dict[Any, Any] = {} + result = jmespath_search(data, "key") + assert result is None + + def test_empty_list_input(self): + """Test search on empty list""" + data: list[Any] = [] + result = jmespath_search(data, "[0]") + assert result is None + + def test_unicode_keys_and_values(self): + """Test search with unicode keys and values""" + data = { + "日本語": "テスト", + "emoji_🎉": "🚀", + "nested": { + "中文": "测试" + } + } + # JMESPath requires quoted identifiers for unicode keys + result = jmespath_search(data, '"日本語"') + assert result == "テスト" + + result2 = jmespath_search(data, 'nested."中文"') + assert result2 == "测试" + + def test_numeric_values(self): + """Test search with various numeric values""" + data = { + "int": 42, + "float": 3.14, + "negative": -10, + "zero": 0, + "scientific": 1e10 + } + result = jmespath_search(data, "float") + assert result == 3.14 + + def test_boolean_values(self): + """Test search with boolean values""" + data = { + "items": [ + {"name": "Item 1", "active": True}, + {"name": "Item 2", "active": False}, + {"name": "Item 3", "active": True} + ] + } + result = jmespath_search(data, "items[?active].name") + assert result == ["Item 1", "Item 3"] + + def test_null_values(self): + """Test search with null/None values""" + data = { + "name": "John", + "middle_name": None, + "last_name": "Doe" + } + result = jmespath_search(data, "middle_name") + assert result is None + + def test_mixed_types_in_array(self): + """Test search on array with mixed types""" + data = {"mixed": [1, "two", 3.0, True, None, {"key": "value"}]} + result = jmespath_search(data, "mixed[5].key") + assert result == "value" + + def test_expression_with_literals(self): + """Test expression with literal values""" + data = { + "items": [ + {"name": "Item 1", "price": 100}, + {"name": "Item 2", "price": 200} + ] + } + result = jmespath_search(data, "items[?price == `100`].name") + assert result == ["Item 1"] + + def test_comparison_operators(self): + """Test various comparison operators""" + data = { + "numbers": [ + {"value": 10}, + {"value": 20}, + {"value": 30}, + {"value": 40} + ] + } + result = jmespath_search(data, "numbers[?value >= `20` && value <= `30`].value") + assert result == [20, 30] + + def test_logical_operators(self): + """Test logical operators (and, or, not)""" + data = { + "items": [ + {"name": "A", "active": True, "stock": 5}, + {"name": "B", "active": False, "stock": 0}, + {"name": "C", "active": True, "stock": 0}, + {"name": "D", "active": False, "stock": 10} + ] + } + result = jmespath_search(data, "items[?active || stock > `0`].name") + assert result == ["A", "C", "D"] + + +# MARK: Error handling tests +class TestJmespathSearchErrors: + """Test error handling in jmespath_search function""" + + def test_lexer_error_invalid_syntax(self): + """Test LexerError is converted to ValueError for invalid syntax""" + data = {"name": "John"} + + with pytest.raises(ValueError) as exc_info: + jmespath_search(data, "name[") + + # This actually raises a ParseError, not LexerError + assert "Parse failed" in str(exc_info.value) + + def test_lexer_error_unclosed_bracket(self): + """Test LexerError for unclosed bracket""" + data = {"items": [1, 2, 3]} + + with pytest.raises(ValueError) as exc_info: + jmespath_search(data, "items[0") + + # This actually raises a ParseError, not LexerError + assert "Parse failed" in str(exc_info.value) + + def test_parse_error_invalid_expression(self): + """Test ParseError is converted to ValueError""" + data = {"name": "John"} + + with pytest.raises(ValueError) as exc_info: + jmespath_search(data, "name..age") + + assert "Parse failed" in str(exc_info.value) + + def test_parse_error_invalid_filter(self): + """Test ParseError for invalid filter syntax""" + data = {"items": [1, 2, 3]} + + with pytest.raises(ValueError) as exc_info: + jmespath_search(data, "items[?@") + + assert "Parse failed" in str(exc_info.value) + + def test_type_error_invalid_function_usage(self): + """Test JMESPathTypeError for invalid function usage""" + data = {"name": "John", "age": 30} + + # Trying to use length on a string (in some contexts this might cause type errors) + # Note: This might not always raise an error depending on JMESPath version + # Using a more reliable example: trying to use max on non-array + with pytest.raises(ValueError) as exc_info: + jmespath_search(data, "max(name)") + + assert "Search failed with JMESPathTypeError" in str(exc_info.value) + + def test_type_error_with_none_search_params(self): + """Test TypeError when search_params is None""" + data = {"name": "John"} + + # None or empty string raises EmptyExpressionError from jmespath + with pytest.raises(Exception) as exc_info: # Catches any exception + jmespath_search(data, None) # type: ignore + + # The error message should indicate an empty expression issue + assert "empty" in str(exc_info.value).lower() or "Type error" in str(exc_info.value) + + def test_type_error_with_invalid_search_params_type(self): + """Test TypeError when search_params is not a string""" + data = {"name": "John"} + + with pytest.raises(ValueError) as exc_info: + jmespath_search(data, 123) # type: ignore + + assert "Type error for search_params" in str(exc_info.value) + + def test_type_error_with_dict_search_params(self): + """Test TypeError when search_params is a dict""" + data = {"name": "John"} + + with pytest.raises(ValueError) as exc_info: + jmespath_search(data, {"key": "value"}) # type: ignore + + assert "Type error for search_params" in str(exc_info.value) + + def test_error_message_includes_search_params(self): + """Test that error messages include the search parameters""" + data = {"name": "John"} + invalid_query = "name[" + + with pytest.raises(ValueError) as exc_info: + jmespath_search(data, invalid_query) + + error_message = str(exc_info.value) + assert invalid_query in error_message + # This raises ParseError, not LexerError + assert "Parse failed" in error_message + + def test_error_message_includes_exception_details(self): + """Test that error messages include original exception details""" + data = {"items": [1, 2, 3]} + invalid_query = "items[?" + + with pytest.raises(ValueError) as exc_info: + jmespath_search(data, invalid_query) + + error_message = str(exc_info.value) + # Should contain both the query and some indication of what went wrong + assert invalid_query in error_message + + +# MARK: Edge cases +class TestJmespathSearchEdgeCases: + """Test edge cases for jmespath_search function""" + + def test_very_large_array(self): + """Test searching large array""" + data = {"items": [{"id": i, "value": i * 10} for i in range(1000)]} + result = jmespath_search(data, "items[500].value") + assert result == 5000 + + def test_very_deep_nesting(self): + """Test very deep nesting""" + # Create 20-level deep nested structure + data: dict[str, Any] = {"level0": {}} + current = data["level0"] + for i in range(1, 20): + current[f"level{i}"] = {} + current = current[f"level{i}"] + current["value"] = "deep" + + # Build the search path + path = ".".join([f"level{i}" for i in range(20)]) + ".value" + result = jmespath_search(data, path) + assert result == "deep" + + def test_special_characters_in_keys(self): + """Test keys with special characters (requires escaping)""" + data = {"my-key": "value", "my.key": "value2"} + + # JMESPath requires quoting for keys with special characters + result = jmespath_search(data, '"my-key"') + assert result == "value" + + result2 = jmespath_search(data, '"my.key"') + assert result2 == "value2" + + def test_numeric_string_keys(self): + """Test keys that look like numbers""" + data = {"123": "numeric_key", "456": "another"} + result = jmespath_search(data, '"123"') + assert result == "numeric_key" + + def test_empty_string_key(self): + """Test empty string as key""" + data = {"": "empty_key_value", "normal": "normal_value"} + result = jmespath_search(data, '""') + assert result == "empty_key_value" + + def test_whitespace_in_keys(self): + """Test keys with whitespace""" + data = {"my key": "value", " trimmed ": "value2"} + result = jmespath_search(data, '"my key"') + assert result == "value" + + def test_array_with_negative_index(self): + """Test negative array indexing""" + data = {"items": [1, 2, 3, 4, 5]} + # JMESPath actually supports negative indexing + result = jmespath_search(data, "items[-1]") + assert result == 5 + + def test_out_of_bounds_array_index(self): + """Test out of bounds array access""" + data = {"items": [1, 2, 3]} + result = jmespath_search(data, "items[10]") + assert result is None + + def test_chaining_multiple_operations(self): + """Test chaining multiple JMESPath operations""" + data: dict[str, Any] = { + "users": [ + {"name": "Alice", "posts": [{"id": 1}, {"id": 2}]}, + {"name": "Bob", "posts": [{"id": 3}, {"id": 4}, {"id": 5}]}, + {"name": "Charlie", "posts": []} + ] + } + result = jmespath_search(data, "users[*].posts[].id") + assert result == [1, 2, 3, 4, 5] + + def test_projection_on_non_array(self): + """Test projection on non-array (should handle gracefully)""" + data = {"value": "not_an_array"} + result = jmespath_search(data, "value[*]") + assert result is None + + def test_filter_on_non_array(self): + """Test filter on non-array""" + data = {"value": "string"} + result = jmespath_search(data, "value[?@ == 'x']") + assert result is None + + def test_combining_filters_and_projections(self): + """Test combining filters with projections""" + data = { + "products": [ + { + "name": "Product 1", + "variants": [ + {"color": "red", "stock": 5}, + {"color": "blue", "stock": 0} + ] + }, + { + "name": "Product 2", + "variants": [ + {"color": "green", "stock": 10}, + {"color": "yellow", "stock": 3} + ] + } + ] + } + result = jmespath_search( + data, + "products[*].variants[?stock > `0`].color" + ) + assert result == [["red"], ["green", "yellow"]] + + def test_search_with_root_array(self): + """Test search when root is an array""" + data = [ + {"name": "Alice", "age": 25}, + {"name": "Bob", "age": 30} + ] + result = jmespath_search(data, "[0].name") + assert result == "Alice" + + def test_search_with_primitive_root(self): + """Test search when root is a primitive value""" + # When root is primitive, only @ should work + data_str = "simple_string" + result = jmespath_search(data_str, "@") # type: ignore + assert result == "simple_string" + + def test_function_with_empty_array(self): + """Test functions on empty arrays""" + data: dict[str, list[Any]] = {"items": []} + result = jmespath_search(data, "length(items)") + assert result == 0 + + def test_nested_multi_select(self): + """Test nested multi-select operations""" + data = { + "person": { + "name": "John", + "age": 30, + "address": { + "city": "New York", + "country": "USA" + } + } + } + result = jmespath_search( + data, + "person.{name: name, city: address.city}" + ) + assert result == {"name": "John", "city": "New York"} + + +# MARK: Integration tests +class TestJmespathSearchIntegration: + """Integration tests for complex real-world scenarios""" + + def test_api_response_parsing(self): + """Test parsing typical API response structure""" + api_response = { + "status": "success", + "data": { + "users": [ + { + "id": 1, + "name": "Alice", + "email": "alice@example.com", + "active": True, + "metadata": { + "created_at": "2025-01-01", + "last_login": "2025-10-23" + } + }, + { + "id": 2, + "name": "Bob", + "email": "bob@example.com", + "active": False, + "metadata": { + "created_at": "2025-02-01", + "last_login": "2025-05-15" + } + }, + { + "id": 3, + "name": "Charlie", + "email": "charlie@example.com", + "active": True, + "metadata": { + "created_at": "2025-03-01", + "last_login": "2025-10-20" + } + } + ] + }, + "metadata": { + "total": 3, + "page": 1 + } + } + + # Get all active user emails + result = jmespath_search(api_response, "data.users[?active].email") + assert result == ["alice@example.com", "charlie@example.com"] + + # Get user names and creation dates + result2 = jmespath_search( + api_response, + "data.users[*].{name: name, created: metadata.created_at}" + ) + assert len(result2) == 3 + assert result2[0]["name"] == "Alice" + assert result2[0]["created"] == "2025-01-01" + + def test_config_file_parsing(self): + """Test parsing configuration-like structure""" + config = { + "version": "1.0", + "environments": { + "development": { + "database": { + "host": "localhost", + "port": 5432, + "name": "dev_db" + }, + "cache": { + "enabled": True, + "ttl": 300 + } + }, + "production": { + "database": { + "host": "prod.example.com", + "port": 5432, + "name": "prod_db" + }, + "cache": { + "enabled": True, + "ttl": 3600 + } + } + } + } + + # Get production database host + result = jmespath_search(config, "environments.production.database.host") + assert result == "prod.example.com" + + # Get all database names using values() - object wildcard returns an object + # Need to convert to list for sorting + result2 = jmespath_search(config, "values(environments)[*].database.name") + assert result2 is not None + assert sorted(result2) == ["dev_db", "prod_db"] + + def test_nested_filtering_and_transformation(self): + """Test complex nested filtering and transformation""" + data = { + "departments": [ + { + "name": "Engineering", + "employees": [ + {"name": "Alice", "salary": 100000, "level": "Senior"}, + {"name": "Bob", "salary": 80000, "level": "Mid"}, + {"name": "Charlie", "salary": 120000, "level": "Senior"} + ] + }, + { + "name": "Marketing", + "employees": [ + {"name": "Dave", "salary": 70000, "level": "Junior"}, + {"name": "Eve", "salary": 90000, "level": "Mid"} + ] + } + ] + } + + # Get all senior employees with salary > 100k + result = jmespath_search( + data, + "departments[*].employees[?level == 'Senior' && salary > `100000`].name" + ) + # Note: 100000 is not > 100000, so Alice is excluded + assert result == [["Charlie"], []] + + # Get flattened list (using >= instead and flatten operator) + result2 = jmespath_search( + data, + "departments[].employees[?level == 'Senior' && salary >= `100000`].name | []" + ) + assert sorted(result2) == ["Alice", "Charlie"] + + def test_working_with_timestamps(self): + """Test searching and filtering timestamp-like data""" + data = { + "events": [ + {"name": "Event 1", "timestamp": "2025-10-20T10:00:00"}, + {"name": "Event 2", "timestamp": "2025-10-21T15:30:00"}, + {"name": "Event 3", "timestamp": "2025-10-23T08:45:00"}, + {"name": "Event 4", "timestamp": "2025-10-24T12:00:00"} + ] + } + + # Get events after a certain date (string comparison) + result = jmespath_search( + data, + "events[?timestamp > '2025-10-22'].name" + ) + assert result == ["Event 3", "Event 4"] + + def test_aggregation_operations(self): + """Test aggregation-like operations""" + data = { + "sales": [ + {"product": "A", "quantity": 10, "price": 100}, + {"product": "B", "quantity": 5, "price": 200}, + {"product": "C", "quantity": 8, "price": 150} + ] + } + + # Get all quantities + quantities = jmespath_search(data, "sales[*].quantity") + assert quantities == [10, 5, 8] + + # Get max quantity + max_quantity = jmespath_search(data, "max(sales[*].quantity)") + assert max_quantity == 10 + + # Get min price + min_price = jmespath_search(data, "min(sales[*].price)") + assert min_price == 100 + + # Get sorted products by price + sorted_products = jmespath_search( + data, + "sort_by(sales, &price)[*].product" + ) + assert sorted_products == ["A", "C", "B"] + + def test_data_transformation_pipeline(self): + """Test data transformation pipeline""" + raw_data = { + "response": { + "items": [ + { + "id": "item-1", + "attributes": { + "name": "Product A", + "specs": {"weight": 100, "color": "red"} + }, + "available": True + }, + { + "id": "item-2", + "attributes": { + "name": "Product B", + "specs": {"weight": 200, "color": "blue"} + }, + "available": False + }, + { + "id": "item-3", + "attributes": { + "name": "Product C", + "specs": {"weight": 150, "color": "red"} + }, + "available": True + } + ] + } + } + + # Get available red products + result = jmespath_search( + raw_data, + "response.items[?available && attributes.specs.color == 'red'].attributes.name" + ) + assert result == ["Product A", "Product C"] + + # Transform to simplified structure + result2 = jmespath_search( + raw_data, + "response.items[*].{id: id, name: attributes.name, weight: attributes.specs.weight}" + ) + assert len(result2) == 3 + assert result2[0] == {"id": "item-1", "name": "Product A", "weight": 100} + + +# __END__