diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/__init__.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py new file mode 100644 index 00000000..10c73c46 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py @@ -0,0 +1,746 @@ +"""Tests for $addToSet accumulator core behavior ($group).""" + +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import ( + Binary, + Decimal128, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# --------------------------------------------------------------------------- +# Property lists +# --------------------------------------------------------------------------- + +# Property [$$REMOVE Excluded]: $$REMOVE via $cond is treated as missing. +ADDTOSET_REMOVE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "remove_all", + docs=[{"v": -1}, {"v": -2}, {"v": -3}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$cond": [{"$gte": ["$v", 0]}, "$v", "$$REMOVE"]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": []}], + msg="$addToSet should treat $$REMOVE as missing and return empty array", + ), + AccumulatorTestCase( + "remove_some", + docs=[{"v": -1}, {"v": 5}, {"v": -2}, {"v": 10}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$cond": [{"$gte": ["$v", 0]}, "$v", "$$REMOVE"]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [5, 10]}], + msg="$addToSet should exclude $$REMOVE values and collect the rest", + ), + AccumulatorTestCase( + "remove_and_null_value", + docs=[{"v": 1}, {"v": 2}, {"v": 3}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$cond": [{"$gt": ["$v", 2]}, None, "$$REMOVE"]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should collect null produced by $cond while excluding $$REMOVE", + ), + AccumulatorTestCase( + "remove_with_duplicate_values", + docs=[{"v": 5}, {"v": 5}, {"v": -1}, {"v": -2}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$cond": [{"$gte": ["$v", 0]}, "$v", "$$REMOVE"]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [5]}], + msg="$addToSet should deduplicate values and exclude $$REMOVE entries", + ), +] + +# Property [$$REMOVE Interaction with Deduplication]: $$REMOVE entries are excluded and +# remaining values are properly deduplicated. +ADDTOSET_REMOVE_DEDUP_INTERACTION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "remove_dedup_same_value_produced", + docs=[{"v": 1}, {"v": 2}, {"v": -1}, {"v": -2}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$cond": [{"$gte": ["$v", 0]}, "kept", "$$REMOVE"]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["kept"]}], + msg="$addToSet should collect single value when $cond produces same value " + "for multiple docs and $$REMOVE for others", + ), +] + +# Property [Unique Value Collection]: $addToSet returns an array of all unique values. +ADDTOSET_UNIQUE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "unique_distinct", + docs=[{"v": 10}, {"v": 20}, {"v": 30}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, 20, 30]}], + msg="$addToSet should return all distinct values", + ), + AccumulatorTestCase( + "unique_with_duplicates", + docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, 20, 30]}], + msg="$addToSet should deduplicate repeated values", + ), + AccumulatorTestCase( + "unique_all_same", + docs=[{"v": 42}, {"v": 42}, {"v": 42}, {"v": 42}, {"v": 42}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [42]}], + msg="$addToSet should collapse identical values into one element", + ), + AccumulatorTestCase( + "unique_single_doc", + docs=[{"v": 7}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [7]}], + msg="$addToSet should return single-element array for one document", + ), +] + +# Property [Array as Single Element]: array values are appended as a single element, not unwound. +ADDTOSET_ARRAY_ELEMENT_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "array_distinct", + docs=[{"v": [1, 2]}, {"v": [3, 4]}, {"v": [1, 2]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [[1, 2], [3, 4]]}], + msg="$addToSet should treat arrays as single elements and deduplicate identical arrays", + ), + AccumulatorTestCase( + "array_empty", + docs=[{"v": []}, {"v": []}, {"v": [1]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [[], [1]]}], + msg="$addToSet should treat empty arrays as single elements and deduplicate them", + ), + AccumulatorTestCase( + "array_nested", + docs=[{"v": [[1]]}, {"v": [[2]]}, {"v": [[1]]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [[[1]], [[2]]]}], + msg="$addToSet should treat nested arrays as single elements and deduplicate them", + ), + AccumulatorTestCase( + "array_mixed_scalar", + docs=[{"v": 1}, {"v": [1]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1, [1]]}], + msg="$addToSet should distinguish scalar 1 from array [1]", + ), + AccumulatorTestCase( + "array_single_doc", + docs=[{"v": [1, 2, 3]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [[1, 2, 3]]}], + msg="$addToSet should wrap the array value as a single element in the result", + ), +] + +# Property [Expression Arguments]: $addToSet accepts various expression forms. +ADDTOSET_EXPRESSION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "expr_field_path", + docs=[{"v": 10}, {"v": 20}, {"v": 10}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, 20]}], + msg="$addToSet should collect values from a field path expression", + ), + AccumulatorTestCase( + "expr_nested_field", + docs=[{"a": {"b": 1}}, {"a": {"b": 2}}, {"a": {"b": 1}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$a.b"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1, 2]}], + msg="$addToSet should collect values from a nested field path", + ), + AccumulatorTestCase( + "expr_literal", + docs=[{"v": 1}, {"v": 2}, {"v": 3}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": 42}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [42]}], + msg="$addToSet should deduplicate a constant literal applied to all docs", + ), + AccumulatorTestCase( + "expr_computed", + docs=[{"price": 10, "qty": 2}, {"price": 5, "qty": 3}, {"price": 10, "qty": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": {"$multiply": ["$price", "$qty"]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [20, 15]}], + msg="$addToSet should collect unique computed expression results", + ), + AccumulatorTestCase( + "expr_null_literal", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": None}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should collect null literal and deduplicate across docs", + ), + AccumulatorTestCase( + "expr_composite_array_path", + docs=[{"a": [{"b": 1}, {"b": 2}]}, {"a": [{"b": 3}, {"b": 1}]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$a.b"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [[3, 1], [1, 2]]}], + msg="$addToSet should collect array values from composite array path", + ), +] + +# Property [Grouping by Key]: groups compute independently. +ADDTOSET_GROUPING_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "multi_group", + docs=[ + {"g": "A", "v": 1}, + {"g": "A", "v": 2}, + {"g": "A", "v": 1}, + {"g": "B", "v": 3}, + {"g": "B", "v": 3}, + {"g": "B", "v": 4}, + ], + pipeline=[ + {"$group": {"_id": "$g", "result": {"$addToSet": "$v"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[ + {"_id": "A", "result": [1, 2]}, + {"_id": "B", "result": [3, 4]}, + ], + msg="$addToSet should compute unique sets independently per group key", + ), +] + +# Property [Empty Collection]: $group on empty collection produces no output. +ADDTOSET_EMPTY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "empty_collection", + docs=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[], + msg="$addToSet should produce no output documents for an empty collection", + ), +] + +# Property [Edge Cases]: accumulator-specific edge cases. +ADDTOSET_EDGE_CASE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "edge_many_unique", + docs=[{"v": i} for i in range(100)], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": list(range(100))}], + msg="$addToSet should collect 100 unique values into a 100-element array", + ), + AccumulatorTestCase( + "edge_many_docs_few_unique", + docs=[{"v": i % 5} for i in range(100)], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [0, 1, 2, 3, 4]}], + msg="$addToSet should deduplicate 100 docs down to 5 unique values", + ), + AccumulatorTestCase( + "edge_array_not_unwound", + docs=[{"v": [5, 1, 8]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [[5, 1, 8]]}], + msg="$addToSet should treat array field as a single element, not traverse it", + ), + AccumulatorTestCase( + "edge_binary_different_subtypes", + docs=[{"v": Binary(b"\x00", 0)}, {"v": Binary(b"\x00", 5)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [b"\x00", Binary(b"\x00", 5)]}], + msg="$addToSet should treat Binary values with different subtypes as distinct", + ), + AccumulatorTestCase( + "edge_regex_different_flags", + docs=[{"v": Regex("abc", "i")}, {"v": Regex("abc", "m")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Regex("abc", "i"), Regex("abc", "m")]}], + msg="$addToSet should treat Regex values with different flags as distinct", + ), +] + +# --------------------------------------------------------------------------- +# Property [BSON Constant Arguments]: $addToSet accepts BSON constants as the +# accumulator argument. Since every doc yields the same constant, the result +# set contains exactly one element. +# --------------------------------------------------------------------------- +ADDTOSET_BSON_CONSTANT_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "const_true", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": True}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [True]}], + msg="$addToSet with boolean True constant should return [True]", + ), + AccumulatorTestCase( + "const_false", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": False}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [False]}], + msg="$addToSet with boolean False constant should return [False]", + ), + AccumulatorTestCase( + "const_int64", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": Int64(42)}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Int64(42)]}], + msg="$addToSet with Int64 constant should return single-element set", + ), + AccumulatorTestCase( + "const_double", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": 3.14}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [3.14]}], + msg="$addToSet with double constant should return single-element set", + ), + AccumulatorTestCase( + "const_decimal128", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": Decimal128("3.14")}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("3.14")]}], + msg="$addToSet with Decimal128 constant should return single-element set", + ), + AccumulatorTestCase( + "const_string", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "hello"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["hello"]}], + msg="$addToSet with string constant should return single-element set", + ), + AccumulatorTestCase( + "const_binary", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": Binary(b"\x01\x02")}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [b"\x01\x02"]}], + msg="$addToSet with Binary constant should return single-element set", + ), + AccumulatorTestCase( + "const_objectid", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": ObjectId("000000000000000000000000")}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [ObjectId("000000000000000000000000")]}], + msg="$addToSet with ObjectId constant should return single-element set", + ), + AccumulatorTestCase( + "const_datetime", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": datetime(2020, 1, 1, tzinfo=timezone.utc)}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [datetime(2020, 1, 1, tzinfo=timezone.utc)]}], + msg="$addToSet with datetime constant should return single-element set", + ), + AccumulatorTestCase( + "const_timestamp", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": Timestamp(1, 1)}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Timestamp(1, 1)]}], + msg="$addToSet with Timestamp constant should return single-element set", + ), + AccumulatorTestCase( + "const_regex", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": Regex("abc", "i")}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Regex("abc", "i")]}], + msg="$addToSet with Regex constant should return single-element set", + ), + AccumulatorTestCase( + "const_null", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": None}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet with null constant should return [null]", + ), + AccumulatorTestCase( + "const_minkey", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": MinKey()}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"": MinKey()}]}], + msg="$addToSet with MinKey constant should return MinKey wrapped in document", + ), + AccumulatorTestCase( + "const_maxkey", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": MaxKey()}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"": MaxKey()}]}], + msg="$addToSet with MaxKey constant should return MaxKey wrapped in document", + ), +] + +# --------------------------------------------------------------------------- +# Property [Expression Types]: $addToSet accepts various expression types as +# its operand and evaluates them per document before collecting unique values. +# --------------------------------------------------------------------------- +ADDTOSET_EXPRESSION_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "expr_type_operator_single", + docs=[{"v": -10}, {"v": 20}, {"v": -5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": {"$abs": "$v"}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, 20, 5]}], + msg="$addToSet should accept single-input expression operator", + ), + AccumulatorTestCase( + "expr_type_operator_multi_arg", + docs=[{"v": -10, "w": 3}, {"v": 20, "w": 7}, {"v": -5, "w": 1}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$add": ["$v", "$w"]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [-7, 27, -4]}], + msg="$addToSet should accept a multi-arg expression operator", + ), + AccumulatorTestCase( + "expr_type_nested", + docs=[{"v": -10}, {"v": 20}, {"v": -5}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$add": [1, {"$abs": "$v"}]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [11, 21, 6]}], + msg="$addToSet should accept nested expression operators", + ), + AccumulatorTestCase( + "expr_type_sysvar_remove", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$$REMOVE"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": []}], + msg="$addToSet with $$REMOVE should exclude all values and return empty array", + ), + AccumulatorTestCase( + "expr_type_object_expression", + docs=[{"v": 10}, {"v": 20}, {"v": 5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": {"a": "$v"}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": 10}, {"a": 20}, {"a": 5}]}], + msg="$addToSet should accept an object expression", + ), + AccumulatorTestCase( + "expr_type_object_with_operator", + docs=[{"v": -10}, {"v": 20}, {"v": -5}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"a": {"$abs": "$v"}}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": 10}, {"a": 20}, {"a": 5}]}], + msg="$addToSet should accept an object expression containing an operator", + ), + AccumulatorTestCase( + "expr_type_let", + docs=[{"v": 10}, {"v": 20}, {"v": 5}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$let": {"vars": {"x": "$v"}, "in": "$$x"}}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, 20, 5]}], + msg="$addToSet should accept a $let expression as its operand", + ), +] + +# --------------------------------------------------------------------------- +# Property [Order Independence]: $addToSet produces the same set regardless +# of input order. +# --------------------------------------------------------------------------- +ADDTOSET_ORDER_INDEPENDENCE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "order_independent_asc", + docs=[{"v": 3}, {"v": 1}, {"v": 5}, {"v": 2}, {"v": 4}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1, 2, 3, 4, 5]}], + msg="$addToSet with ascending sort should produce same set", + ), + AccumulatorTestCase( + "order_independent_desc", + docs=[{"v": 3}, {"v": 1}, {"v": 5}, {"v": 2}, {"v": 4}], + pipeline=[ + {"$sort": {"v": -1}}, + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1, 2, 3, 4, 5]}], + msg="$addToSet with descending sort should produce same set", + ), +] + +# --------------------------------------------------------------------------- +# Aggregate +# --------------------------------------------------------------------------- + +ADDTOSET_SUCCESS_TESTS = ( + ADDTOSET_REMOVE_TESTS + + ADDTOSET_REMOVE_DEDUP_INTERACTION_TESTS + + ADDTOSET_UNIQUE_TESTS + + ADDTOSET_ARRAY_ELEMENT_TESTS + + ADDTOSET_EXPRESSION_TESTS + + ADDTOSET_GROUPING_TESTS + + ADDTOSET_EMPTY_TESTS + + ADDTOSET_EDGE_CASE_TESTS + + ADDTOSET_BSON_CONSTANT_TESTS + + ADDTOSET_EXPRESSION_TYPE_TESTS + + ADDTOSET_ORDER_INDEPENDENCE_TESTS +) + +# --------------------------------------------------------------------------- +# Test function +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_SUCCESS_TESTS)) +def test_accumulator_addToSet(collection, test_case: AccumulatorTestCase): + """Test $addToSet accumulator success cases with $group.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) + + +# --------------------------------------------------------------------------- +# Property-specific tests +# --------------------------------------------------------------------------- + +# Property [Return Type]: $addToSet always returns an array type. +ADDTOSET_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "return_type_numeric", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": [1, 2], "type": "array"}], + msg="$addToSet should return array type for numeric inputs", + ), + AccumulatorTestCase( + "return_type_string", + docs=[{"v": "a"}, {"v": "b"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": ["a", "b"], "type": "array"}], + msg="$addToSet should return array type for string inputs", + ), + AccumulatorTestCase( + "return_type_null_only", + docs=[{"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": [None], "type": "array"}], + msg="$addToSet should return array type for null-only inputs", + ), + AccumulatorTestCase( + "return_type_missing_only", + docs=[{"x": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": [], "type": "array"}], + msg="$addToSet should return array type for all-missing inputs", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_RETURN_TYPE_TESTS)) +def test_accumulator_addToSet_return_type(collection, test_case: AccumulatorTestCase): + """Test $addToSet return type verification.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["value"]) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py new file mode 100644 index 00000000..644c8f4f --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py @@ -0,0 +1,239 @@ +"""Tests for $addToSet accumulator BSON type collection and deduplication.""" + +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import ( + Binary, + Decimal128, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# --------------------------------------------------------------------------- +# Property lists +# --------------------------------------------------------------------------- + +# Property [BSON Type Collection]: $addToSet collects and deduplicates values of every +# non-deprecated BSON type. +ADDTOSET_BSON_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "bson_int32", + docs=[{"v": 10}, {"v": 20}, {"v": 10}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, 20]}], + msg="$addToSet should collect and deduplicate int32 values", + ), + AccumulatorTestCase( + "bson_int64", + docs=[{"v": Int64(10)}, {"v": Int64(20)}, {"v": Int64(10)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Int64(10), Int64(20)]}], + msg="$addToSet should collect and deduplicate Int64 values", + ), + AccumulatorTestCase( + "bson_double", + docs=[{"v": 1.5}, {"v": 2.5}, {"v": 1.5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1.5, 2.5]}], + msg="$addToSet should collect and deduplicate double values", + ), + AccumulatorTestCase( + "bson_decimal128", + docs=[ + {"v": Decimal128("1.5")}, + {"v": Decimal128("2.5")}, + {"v": Decimal128("1.5")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("1.5"), Decimal128("2.5")]}], + msg="$addToSet should collect and deduplicate Decimal128 values", + ), + AccumulatorTestCase( + "bson_string", + docs=[{"v": "abc"}, {"v": "def"}, {"v": "abc"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["abc", "def"]}], + msg="$addToSet should collect and deduplicate string values", + ), + AccumulatorTestCase( + "bson_bool", + docs=[{"v": True}, {"v": False}, {"v": True}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [True, False]}], + msg="$addToSet should collect and deduplicate boolean values", + ), + AccumulatorTestCase( + "bson_datetime", + docs=[ + {"v": datetime(2020, 1, 1, tzinfo=timezone.utc)}, + {"v": datetime(2021, 1, 1, tzinfo=timezone.utc)}, + {"v": datetime(2020, 1, 1, tzinfo=timezone.utc)}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[ + { + "result": [ + datetime(2020, 1, 1, tzinfo=timezone.utc), + datetime(2021, 1, 1, tzinfo=timezone.utc), + ] + } + ], + msg="$addToSet should collect and deduplicate datetime values", + ), + AccumulatorTestCase( + "bson_objectid", + docs=[ + {"v": ObjectId("000000000000000000000001")}, + {"v": ObjectId("000000000000000000000002")}, + {"v": ObjectId("000000000000000000000001")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[ + { + "result": [ + ObjectId("000000000000000000000001"), + ObjectId("000000000000000000000002"), + ] + } + ], + msg="$addToSet should collect and deduplicate ObjectId values", + ), + AccumulatorTestCase( + "bson_binary", + docs=[{"v": Binary(b"\x00")}, {"v": Binary(b"\x01")}, {"v": Binary(b"\x00")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [b"\x00", b"\x01"]}], + msg="$addToSet should collect and deduplicate Binary values", + ), + AccumulatorTestCase( + "bson_regex", + docs=[{"v": Regex("abc")}, {"v": Regex("def")}, {"v": Regex("abc")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Regex("abc"), Regex("def")]}], + msg="$addToSet should collect and deduplicate Regex values", + ), + AccumulatorTestCase( + "bson_timestamp", + docs=[ + {"v": Timestamp(100, 1)}, + {"v": Timestamp(200, 1)}, + {"v": Timestamp(100, 1)}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Timestamp(100, 1), Timestamp(200, 1)]}], + msg="$addToSet should collect and deduplicate Timestamp values", + ), + AccumulatorTestCase( + "bson_minkey", + docs=[{"v": MinKey()}, {"v": MinKey()}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"": MinKey()}]}], + msg="$addToSet should deduplicate MinKey values", + ), + AccumulatorTestCase( + "bson_maxkey", + docs=[{"v": MaxKey()}, {"v": MaxKey()}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"": MaxKey()}]}], + msg="$addToSet should deduplicate MaxKey values", + ), + AccumulatorTestCase( + "bson_document", + docs=[{"v": {"x": 1}}, {"v": {"x": 2}}, {"v": {"x": 1}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"x": 1}, {"x": 2}]}], + msg="$addToSet should collect and deduplicate embedded document values", + ), + AccumulatorTestCase( + "bson_array", + docs=[{"v": [1, 2]}, {"v": [3, 4]}, {"v": [1, 2]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [[1, 2], [3, 4]]}], + msg="$addToSet should collect and deduplicate array values as single elements", + ), + AccumulatorTestCase( + "bson_null", + docs=[{"v": None}, {"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should deduplicate null values", + ), +] + +# --------------------------------------------------------------------------- +# Test function +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BSON_TYPE_TESTS)) +def test_accumulator_addToSet_bson_types(collection, test_case: AccumulatorTestCase): + """Test $addToSet accumulator BSON type collection and deduplication.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py new file mode 100644 index 00000000..e7ccc496 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py @@ -0,0 +1,522 @@ +"""Tests for $addToSet accumulator deduplication behavior.""" + +from __future__ import annotations + +import math + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# --------------------------------------------------------------------------- +# Property lists +# --------------------------------------------------------------------------- + +# Property [Document Duplicate Detection]: documents are duplicates only if they have +# exact same fields, values, and field order. +ADDTOSET_DOC_DEDUP_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "doc_identical", + docs=[{"v": {"a": 1, "b": 2}}, {"v": {"a": 1, "b": 2}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": 1, "b": 2}]}], + msg="$addToSet should deduplicate identical documents", + ), + AccumulatorTestCase( + "doc_different_field_order", + docs=[{"v": {"a": 1, "b": 2}}, {"v": {"b": 2, "a": 1}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"b": 2, "a": 1}, {"a": 1, "b": 2}]}], + msg="$addToSet should treat documents with different field order as distinct", + ), + AccumulatorTestCase( + "doc_different_values", + docs=[{"v": {"a": 1, "b": 2}}, {"v": {"a": 1, "b": 3}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": 1, "b": 2}, {"a": 1, "b": 3}]}], + msg="$addToSet should treat documents with different values as distinct", + ), + AccumulatorTestCase( + "doc_nested_identical", + docs=[{"v": {"a": {"x": 1}}}, {"v": {"a": {"x": 1}}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": {"x": 1}}]}], + msg="$addToSet should deduplicate nested documents with identical structure", + ), + AccumulatorTestCase( + "doc_nested_different_order", + docs=[{"v": {"a": {"x": 1, "y": 2}}}, {"v": {"a": {"y": 2, "x": 1}}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": {"x": 1, "y": 2}}, {"a": {"y": 2, "x": 1}}]}], + msg="$addToSet should treat nested documents with different field order as distinct", + ), + AccumulatorTestCase( + "doc_empty", + docs=[{"v": {}}, {"v": {}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{}]}], + msg="$addToSet should deduplicate empty documents", + ), + AccumulatorTestCase( + "doc_subset", + docs=[{"v": {"a": 1}}, {"v": {"a": 1, "b": 2}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": 1, "b": 2}, {"a": 1}]}], + msg="$addToSet should treat a document subset and superset as distinct", + ), + AccumulatorTestCase( + "doc_with_array_value", + docs=[{"v": {"a": [1, 2]}}, {"v": {"a": [1, 2]}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": [1, 2]}]}], + msg="$addToSet should deduplicate documents containing identical array values", + ), + AccumulatorTestCase( + "doc_with_null_value", + docs=[{"v": {"a": None}}, {"v": {"a": None}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": None}]}], + msg="$addToSet should deduplicate documents with null field values", + ), + AccumulatorTestCase( + "doc_with_nested_null", + docs=[{"v": {"a": {"b": None}}}, {"v": {"a": {"b": None}}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": {"b": None}}]}], + msg="$addToSet should deduplicate documents with nested null values", + ), +] + +# Property [String Deduplication]: strings are compared by byte value with no Unicode normalization. +ADDTOSET_STRING_DEDUP_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "string_identical", + docs=[{"v": "abc"}, {"v": "abc"}, {"v": "def"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["abc", "def"]}], + msg="$addToSet should deduplicate identical strings", + ), + AccumulatorTestCase( + "string_empty", + docs=[{"v": ""}, {"v": ""}, {"v": "x"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["", "x"]}], + msg="$addToSet should deduplicate empty strings", + ), + AccumulatorTestCase( + "string_unicode_no_normalization", + docs=[ + {"v": "\u00e9"}, + {"v": "\u0065\u0301"}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["\u00e9", "\u0065\u0301"]}], + msg="$addToSet should not normalize Unicode; precomposed and decomposed are distinct", + ), + AccumulatorTestCase( + "string_embedded_null_bytes", + docs=[{"v": "a\x00b"}, {"v": "a\x00b"}, {"v": "a\x00c"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["a\x00b", "a\x00c"]}], + msg="$addToSet should compare strings with embedded null bytes by byte value", + ), + AccumulatorTestCase( + "string_4byte_utf8_emoji", + docs=[{"v": "\U0001f600"}, {"v": "\U0001f600"}, {"v": "\U0001f601"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["\U0001f600", "\U0001f601"]}], + msg="$addToSet should compare 4-byte UTF-8 characters (emoji) by byte value", + ), +] + +# Property [Mixed Type Collection]: $addToSet collects values of different +# BSON types in the same group. +ADDTOSET_MIXED_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "mixed_types", + docs=[ + {"v": 42}, + {"v": "hello"}, + {"v": True}, + {"v": [1, 2]}, + {"v": {"a": 1}}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [42, "hello", True, [1, 2], {"a": 1}]}], + msg="$addToSet should collect values of different BSON types in one group", + ), +] + +# Property [Numeric Equivalence]: numerically equivalent values across types are deduplicated. +ADDTOSET_NUMERIC_EQUIV_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "equiv_all_ones", + docs=[{"v": 1}, {"v": Int64(1)}, {"v": 1.0}, {"v": Decimal128("1")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1]}], + msg="$addToSet should deduplicate numerically equivalent values of all numeric types", + ), + AccumulatorTestCase( + "equiv_all_zeros", + docs=[{"v": 0}, {"v": Int64(0)}, {"v": 0.0}, {"v": Decimal128("0")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [0]}], + msg="$addToSet should deduplicate numerically equivalent zero values", + ), + AccumulatorTestCase( + "equiv_int32_int64", + docs=[{"v": 5}, {"v": Int64(5)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [5]}], + msg="$addToSet should deduplicate int32 and Int64 with same numeric value", + ), + AccumulatorTestCase( + "equiv_double_int32", + docs=[{"v": 3.0}, {"v": 3}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [3.0]}], + msg="$addToSet should deduplicate double and int32 with same numeric value", + ), + AccumulatorTestCase( + "equiv_decimal128_int64", + docs=[{"v": Decimal128("100")}, {"v": Int64(100)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("100")]}], + msg="$addToSet should deduplicate Decimal128 and Int64 with same numeric value", + ), + AccumulatorTestCase( + "equiv_negative", + docs=[{"v": -1}, {"v": Int64(-1)}, {"v": -1.0}, {"v": Decimal128("-1")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [-1]}], + msg="$addToSet should deduplicate negative numerically equivalent values", + ), +] + +# Property [BSON Type Distinction]: values of different BSON types are distinct even when similar. +ADDTOSET_TYPE_DISTINCTION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "distinct_false_vs_zero", + docs=[{"v": False}, {"v": 0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [0, False]}], + msg="$addToSet should treat false and int32(0) as distinct BSON types", + ), + AccumulatorTestCase( + "distinct_true_vs_one", + docs=[{"v": True}, {"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1, True]}], + msg="$addToSet should treat true and int32(1) as distinct BSON types", + ), + AccumulatorTestCase( + "distinct_null_vs_missing", + docs=[{"v": None}, {"x": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should collect null but exclude missing field", + ), + AccumulatorTestCase( + "distinct_empty_string_vs_null", + docs=[{"v": ""}, {"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["", None]}], + msg="$addToSet should treat empty string and null as distinct", + ), + AccumulatorTestCase( + "distinct_string_vs_number", + docs=[{"v": "123"}, {"v": 123}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [123, "123"]}], + msg="$addToSet should treat string '123' and int 123 as distinct", + ), +] + +# Property [NaN Deduplication]: NaN values are equal for deduplication purposes. +ADDTOSET_NAN_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "nan_double_dedup", + docs=[{"v": float("nan")}, {"v": float("nan")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [pytest.approx(math.nan, nan_ok=True)]}], + msg="$addToSet should deduplicate double NaN values", + ), + AccumulatorTestCase( + "nan_decimal128_dedup", + docs=[{"v": Decimal128("NaN")}, {"v": Decimal128("NaN")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("NaN")]}], + msg="$addToSet should deduplicate Decimal128 NaN values", + ), + AccumulatorTestCase( + "nan_cross_type", + docs=[{"v": float("nan")}, {"v": Decimal128("NaN")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [pytest.approx(math.nan, nan_ok=True)]}], + msg="$addToSet should deduplicate float NaN and Decimal128 NaN as numerically equal", + ), + AccumulatorTestCase( + "nan_with_finite", + docs=[{"v": float("nan")}, {"v": 5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [pytest.approx(math.nan, nan_ok=True), 5]}], + msg="$addToSet should treat NaN and finite values as distinct", + ), +] + +# Property [Infinity Deduplication]: Infinity values are equal across numeric types. +ADDTOSET_INFINITY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "inf_double_dedup", + docs=[{"v": float("inf")}, {"v": float("inf")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [float("inf")]}], + msg="$addToSet should deduplicate positive Infinity values", + ), + AccumulatorTestCase( + "neg_inf_double_dedup", + docs=[{"v": float("-inf")}, {"v": float("-inf")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [float("-inf")]}], + msg="$addToSet should deduplicate negative Infinity values", + ), + AccumulatorTestCase( + "inf_cross_type", + docs=[{"v": float("inf")}, {"v": Decimal128("Infinity")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [float("inf")]}], + msg="$addToSet should deduplicate float Infinity and Decimal128 Infinity", + ), + AccumulatorTestCase( + "inf_vs_neg_inf", + docs=[{"v": float("inf")}, {"v": float("-inf")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [float("-inf"), float("inf")]}], + msg="$addToSet should treat positive and negative Infinity as distinct", + ), +] + +# Property [Negative Zero]: -0.0 and 0.0 are numerically equal and deduplicated. +ADDTOSET_NEG_ZERO_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "neg_zero_double", + docs=[{"v": -0.0}, {"v": 0.0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [-0.0]}], + msg="$addToSet should deduplicate -0.0 and 0.0 as numerically equal", + ), + AccumulatorTestCase( + "neg_zero_decimal128", + docs=[{"v": Decimal128("-0")}, {"v": Decimal128("0")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("-0")]}], + msg="$addToSet should deduplicate Decimal128 -0 and 0 as numerically equal", + ), + AccumulatorTestCase( + "neg_zero_cross_type", + docs=[{"v": -0.0}, {"v": 0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [-0.0]}], + msg="$addToSet should deduplicate -0.0 and int 0 as numerically equal", + ), +] + +# Property [Decimal128 Precision]: Decimal128 values with same numeric value but different +# representations are deduplicated. +ADDTOSET_DECIMAL128_PRECISION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "decimal_trailing_zeros", + docs=[{"v": Decimal128("1.0")}, {"v": Decimal128("1.00")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("1.0")]}], + msg="$addToSet should deduplicate Decimal128 values with different trailing zeros", + ), + AccumulatorTestCase( + "decimal_34_digit_precision", + docs=[ + {"v": Decimal128("1.234567890123456789012345678901234")}, + {"v": Decimal128("1.234567890123456789012345678901234")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("1.234567890123456789012345678901234")]}], + msg="$addToSet should deduplicate and preserve full 34-digit Decimal128 precision", + ), + AccumulatorTestCase( + "decimal_max_min_distinct", + docs=[ + {"v": Decimal128("9.999999999999999999999999999999999E+6144")}, + {"v": Decimal128("1E-6176")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[ + { + "result": [ + Decimal128("1E-6176"), + Decimal128("9.999999999999999999999999999999999E+6144"), + ] + } + ], + msg="$addToSet should treat Decimal128 max and min as distinct values", + ), +] + +# --------------------------------------------------------------------------- +# Aggregate +# --------------------------------------------------------------------------- + +ADDTOSET_DEDUP_TESTS = ( + ADDTOSET_DOC_DEDUP_TESTS + + ADDTOSET_STRING_DEDUP_TESTS + + ADDTOSET_MIXED_TYPE_TESTS + + ADDTOSET_NUMERIC_EQUIV_TESTS + + ADDTOSET_TYPE_DISTINCTION_TESTS + + ADDTOSET_NAN_TESTS + + ADDTOSET_INFINITY_TESTS + + ADDTOSET_NEG_ZERO_TESTS + + ADDTOSET_DECIMAL128_PRECISION_TESTS +) + +# --------------------------------------------------------------------------- +# Test function +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_DEDUP_TESTS)) +def test_accumulator_addToSet_dedup(collection, test_case: AccumulatorTestCase): + """Test $addToSet accumulator deduplication behavior.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py new file mode 100644 index 00000000..67ba0730 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py @@ -0,0 +1,127 @@ +"""Tests for $addToSet accumulator error cases.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertFailureCode +from documentdb_tests.framework.error_codes import ( + CONVERSION_FAILURE_ERROR, + DIVIDE_BY_ZERO_V2_ERROR, + EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, + GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + MODULO_BY_ZERO_V2_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# --------------------------------------------------------------------------- +# Property lists +# --------------------------------------------------------------------------- + +# Property [Arity]: $addToSet in accumulator context is a unary operator and +# rejects array syntax. +ADDTOSET_ARITY_ERROR_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "arity_empty_array", + docs=[{"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": []}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$addToSet should reject empty array in accumulator context", + ), + AccumulatorTestCase( + "arity_single_element_array", + docs=[{"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": [1]}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$addToSet should reject single-element array in accumulator context", + ), + AccumulatorTestCase( + "arity_single_field_ref_array", + docs=[{"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": ["$v"]}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$addToSet should reject single field ref in array in accumulator context", + ), + AccumulatorTestCase( + "arity_multi_element_array", + docs=[{"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": [1, 2, 3]}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$addToSet should reject multi-element array in accumulator context", + ), + AccumulatorTestCase( + "arity_multi_key_expression_object", + docs=[{"v": 1}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$add": [1, 2], "$multiply": [3, 4]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, + msg="$addToSet should reject multi-key expression object", + ), +] + +# Property [Expression Error Propagation]: errors from sub-expressions propagate. +ADDTOSET_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "error_toInt_invalid", + docs=[{"v": "not_a_number"}], + pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$toInt": "$v"}}}}], + error_code=CONVERSION_FAILURE_ERROR, + msg="$addToSet should propagate $toInt conversion error", + ), + AccumulatorTestCase( + "error_divide_by_zero", + docs=[{"v": 10}], + pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$divide": ["$v", 0]}}}}], + error_code=DIVIDE_BY_ZERO_V2_ERROR, + msg="$addToSet should propagate divide-by-zero error", + ), + AccumulatorTestCase( + "error_mod_by_zero", + docs=[{"v": 10}], + pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$mod": ["$v", 0]}}}}], + error_code=MODULO_BY_ZERO_V2_ERROR, + msg="$addToSet should propagate mod-by-zero error", + ), +] + +# --------------------------------------------------------------------------- +# Test function +# --------------------------------------------------------------------------- + + +ADDTOSET_ERROR_TESTS = ADDTOSET_ARITY_ERROR_TESTS + ADDTOSET_EXPRESSION_ERROR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_ERROR_TESTS)) +def test_accumulator_addToSet_errors(collection, test_case): + """Test $addToSet accumulator error cases with $group.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertFailureCode(result, test_case.error_code, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py new file mode 100644 index 00000000..42e5627d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py @@ -0,0 +1,132 @@ +"""Tests for $addToSet accumulator null and missing field handling.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# --------------------------------------------------------------------------- +# Property lists +# --------------------------------------------------------------------------- + +# Property [Null Collected]: null values are collected as valid values and deduplicated. +ADDTOSET_NULL_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "null_all", + docs=[{"v": None}, {"v": None}, {"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should collect null and deduplicate to a single null", + ), + AccumulatorTestCase( + "null_single", + docs=[{"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should collect a single null value", + ), + AccumulatorTestCase( + "null_among_values", + docs=[{"v": None}, {"v": 5}, {"v": 3}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None, 5, 3]}], + msg="$addToSet should collect null alongside other values", + ), +] + +# Property [Missing Excluded]: missing fields are excluded from the result. +ADDTOSET_MISSING_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "missing_all", + docs=[{"x": 1}, {"x": 2}, {"x": 3}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": []}], + msg="$addToSet should return empty array when all fields are missing", + ), + AccumulatorTestCase( + "missing_single", + docs=[{"x": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": []}], + msg="$addToSet should return empty array for a single doc with missing field", + ), + AccumulatorTestCase( + "missing_among_values", + docs=[{"x": 1}, {"v": 5}, {"v": 3}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [5, 3]}], + msg="$addToSet should exclude missing fields and collect only present values", + ), +] + +# Property [Null and Missing Combined]: null is collected while missing is excluded. +ADDTOSET_NULL_MISSING_COMBINED_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "combined_null_and_missing", + docs=[{"v": None}, {"x": 1}, {"v": None}, {"x": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should collect null but exclude missing fields", + ), + AccumulatorTestCase( + "combined_null_missing_and_values", + docs=[{"v": 10}, {"v": None}, {"x": 1}, {"v": 5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, None, 5]}], + msg="$addToSet should collect null and values but exclude missing fields", + ), +] + +# --------------------------------------------------------------------------- +# Aggregate +# --------------------------------------------------------------------------- + +ADDTOSET_NULL_MISSING_TESTS = ( + ADDTOSET_NULL_TESTS + ADDTOSET_MISSING_TESTS + ADDTOSET_NULL_MISSING_COMBINED_TESTS +) + +# --------------------------------------------------------------------------- +# Test function +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_NULL_MISSING_TESTS)) +def test_accumulator_addToSet_null_missing(collection, test_case: AccumulatorTestCase): + """Test $addToSet accumulator null and missing field handling.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_type_preservation.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_type_preservation.py new file mode 100644 index 00000000..51f70ea0 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_type_preservation.py @@ -0,0 +1,120 @@ +"""Tests for $addToSet accumulator numeric type preservation during deduplication. + +When numerically equivalent values of different BSON types are deduplicated, +verify which type survives in the result via $type projection. +""" + +from __future__ import annotations + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# --------------------------------------------------------------------------- +# Property lists +# --------------------------------------------------------------------------- + +# Property [Numeric Equivalence — Type Preservation]: when numerically equal values +# are deduplicated, verify which type survives via $type. +ADDTOSET_TYPE_PRESERVATION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "type_pres_int32_then_int64", + docs=[{"v": 5}, {"v": Int64(5)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$unwind": "$result"}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": 5, "type": "int"}], + msg="$addToSet should keep int type when int32 is inserted before int64", + ), + AccumulatorTestCase( + "type_pres_int64_then_int32", + docs=[{"v": Int64(5)}, {"v": 5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$unwind": "$result"}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": Int64(5), "type": "long"}], + msg="$addToSet should keep long type when int64 is inserted before int32", + ), + AccumulatorTestCase( + "type_pres_double_then_int32", + docs=[{"v": 3.0}, {"v": 3}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$unwind": "$result"}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": 3.0, "type": "double"}], + msg="$addToSet should keep double type when double is inserted before int32", + ), + AccumulatorTestCase( + "type_pres_int32_then_double", + docs=[{"v": 3}, {"v": 3.0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$unwind": "$result"}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": 3, "type": "int"}], + msg="$addToSet should keep int type when int32 is inserted before double", + ), + AccumulatorTestCase( + "type_pres_all_four_types", + docs=[ + {"v": 1}, + {"v": Int64(1)}, + {"v": 1.0}, + {"v": Decimal128("1")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$unwind": "$result"}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": 1, "type": "int"}], + msg="$addToSet should keep int type when int32 is inserted first " + "among all four numeric types", + ), + AccumulatorTestCase( + "type_pres_decimal128_first", + docs=[ + {"v": Decimal128("1")}, + {"v": 1}, + {"v": Int64(1)}, + {"v": 1.0}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$unwind": "$result"}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": Decimal128("1"), "type": "decimal"}], + msg="$addToSet should keep decimal type when Decimal128 is inserted " + "first among all four numeric types", + ), +] + +# --------------------------------------------------------------------------- +# Test function +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_TYPE_PRESERVATION_TESTS)) +def test_accumulator_addToSet_type_preservation(collection, test_case: AccumulatorTestCase): + """Test $addToSet numeric type preservation during deduplication.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py new file mode 100644 index 00000000..510f2260 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py @@ -0,0 +1,329 @@ +"""Tests for $addToSet accumulator composed with sibling accumulators in the same $group.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils.accumulator_test_case import ( # noqa: E501 + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# --------------------------------------------------------------------------- +# Property lists +# --------------------------------------------------------------------------- + +# Property [AddToSet with Sum]: $addToSet collects unique values while $sum +# computes the total independently in the same $group. +ADDTOSET_WITH_SUM_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "addtoset_sum_basic", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "a", "v": 10}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[{"_id": "a", "unique": [10, 20], "total": 40}], + msg="$addToSet should collect unique values while $sum totals all values " + "including duplicates", + ), + AccumulatorTestCase( + "addtoset_sum_multiple_groups", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 10}, + {"cat": "b", "v": 5}, + {"cat": "b", "v": 15}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[ + {"_id": "a", "unique": [10], "total": 20}, + {"_id": "b", "unique": [5, 15], "total": 20}, + ], + msg="$addToSet and $sum should compute independently across " "multiple groups", + ), +] + +# Property [AddToSet with Count]: $addToSet collects unique values while +# $sum(1) counts all documents including those with duplicate values. +ADDTOSET_WITH_COUNT_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "addtoset_count_dedup_vs_total", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "count": {"$sum": 1}, + } + } + ], + expected=[{"_id": "a", "unique": [10, 20], "count": 3}], + msg="$addToSet should have 2 unique values while $sum(1) counts " "all 3 documents", + ), +] + +# Property [AddToSet with Push]: $addToSet collects unique values while $push +# collects all values including duplicates. +ADDTOSET_WITH_PUSH_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "addtoset_push_dedup_vs_all", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "a", "v": 10}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "all_vals": {"$push": "$v"}, + } + }, + ], + expected=[ + {"_id": "a", "unique": [10, 20], "all_vals": [10, 10, 20]}, + ], + msg="$addToSet should deduplicate while $push preserves all values", + ), +] + +# Property [AddToSet with Min/Max]: $addToSet collects the full unique set +# while $min/$max extract extremes independently. +ADDTOSET_WITH_MIN_MAX_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "addtoset_min_max", + docs=[ + {"cat": "a", "v": 30}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "a", "v": 10}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "lo": {"$min": "$v"}, + "hi": {"$max": "$v"}, + } + } + ], + expected=[ + {"_id": "a", "unique": [10, 20, 30], "lo": 10, "hi": 30}, + ], + msg="$addToSet should collect all unique values while $min/$max " "extract extremes", + ), +] + +# Property [AddToSet with Avg]: $addToSet collects unique values while $avg +# computes the mean over all documents including duplicates. +ADDTOSET_WITH_AVG_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "addtoset_avg_includes_duplicates", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 40}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "mean": {"$avg": "$v"}, + } + } + ], + expected=[{"_id": "a", "unique": [10, 40], "mean": 20.0}], + msg="$addToSet should have 2 unique values while $avg computes " + "mean over all 3 docs (including duplicate)", + ), +] + +# Property [AddToSet Null Handling vs Sum]: $addToSet collects null as a value +# while $sum ignores null. +ADDTOSET_NULL_VS_SUM_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "addtoset_null_collected_sum_ignores", + docs=[ + {"cat": "a", "v": None}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": None}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[{"_id": "a", "unique": [None, 10], "total": 10}], + msg="$addToSet should collect null as a value while $sum ignores " + "null and totals only numeric values", + ), +] + +# Property [AddToSet with First/Last]: $addToSet collects all unique values +# regardless of order while $first/$last pick positional values after $sort. +ADDTOSET_WITH_FIRST_LAST_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "addtoset_first_last", + docs=[ + {"cat": "a", "v": 30}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "a", "v": 10}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "first_v": {"$first": "$v"}, + "last_v": {"$last": "$v"}, + } + }, + ], + expected=[ + {"_id": "a", "unique": [10, 20, 30], "first_v": 10, "last_v": 30}, + ], + msg="$addToSet should collect all unique values while $first/$last " + "pick sorted positional extremes", + ), +] + +# Property [AddToSet with MergeObjects]: $addToSet collects unique values +# while $mergeObjects combines per-document metadata independently. +ADDTOSET_WITH_MERGEOBJECTS_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "addtoset_mergeobjects", + docs=[ + {"cat": "a", "v": 10, "meta": {"src": "x"}}, + {"cat": "a", "v": 20, "meta": {"quality": "high"}}, + {"cat": "a", "v": 10, "meta": {"reviewed": True}}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "merged": {"$mergeObjects": "$meta"}, + } + }, + ], + expected=[ + { + "_id": "a", + "unique": [10, 20], + "merged": {"src": "x", "quality": "high", "reviewed": True}, + } + ], + msg="$addToSet should deduplicate values while $mergeObjects " + "merges metadata from all documents including duplicates", + ), +] + +# Property [Multiple AddToSet]: multiple $addToSet accumulators in the same +# $group independently collect unique values from different fields. +MULTIPLE_ADDTOSET_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "multiple_addtoset_different_fields", + docs=[ + {"cat": "a", "color": "red", "size": "S"}, + {"cat": "a", "color": "blue", "size": "M"}, + {"cat": "a", "color": "red", "size": "S"}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "colors": {"$addToSet": "$color"}, + "sizes": {"$addToSet": "$size"}, + } + } + ], + expected=[ + { + "_id": "a", + "colors": ["red", "blue"], + "sizes": ["S", "M"], + }, + ], + msg="Multiple $addToSet accumulators should independently collect " + "unique values from different fields", + ), +] + +# --------------------------------------------------------------------------- +# Aggregate +# --------------------------------------------------------------------------- + +ADDTOSET_INTEGRATION_TESTS = ( + ADDTOSET_WITH_SUM_TESTS + + ADDTOSET_WITH_COUNT_TESTS + + ADDTOSET_WITH_PUSH_TESTS + + ADDTOSET_WITH_MIN_MAX_TESTS + + ADDTOSET_WITH_AVG_TESTS + + ADDTOSET_NULL_VS_SUM_TESTS + + ADDTOSET_WITH_FIRST_LAST_TESTS + + ADDTOSET_WITH_MERGEOBJECTS_TESTS + + MULTIPLE_ADDTOSET_TESTS +) + +# --------------------------------------------------------------------------- +# Test function +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_INTEGRATION_TESTS)) +def test_accumulators_addToSet_integration(collection, test_case: AccumulatorTestCase): + """Test $addToSet accumulator composed with sibling accumulators.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline or [], + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ignore_doc_order=True, + ignore_order_in=["unique", "colors", "sizes"], + )