From 3ebfb329e525dff37e02569abd527c58bcc9237b Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Wed, 20 May 2026 12:28:11 -0700 Subject: [PATCH 01/13] Initial generated addToSet Signed-off-by: Alina (Xi) Li --- .../addToSet/test_accumulator_addToSet.py | 1326 +++++++++++++++++ .../test_addToSet_bucketAuto_smoke.py | 123 ++ .../addToSet/test_addToSet_bucket_smoke.py | 123 ++ .../test_addToSet_setWindowFields_smoke.py | 165 ++ 4 files changed, 1737 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucketAuto_smoke.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucket_smoke.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_setWindowFields_smoke.py diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py new file mode 100644 index 00000000..bcd07809 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py @@ -0,0 +1,1326 @@ +"""Tests for $addToSet accumulator ($group).""" + +from __future__ import annotations + +import math +from datetime import datetime, timezone + +import pytest +from bson import ( + Binary, + Code, + Decimal128, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess +from documentdb_tests.framework.error_codes import ( + CONVERSION_FAILURE_ERROR, + DIVIDE_BY_ZERO_V2_ERROR, + EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, + GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + MODULO_BY_ZERO_V2_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +_OID1 = ObjectId("000000000000000000000001") +_OID2 = ObjectId("000000000000000000000002") +_DT1 = datetime(2020, 1, 1, tzinfo=timezone.utc) +_DT2 = datetime(2021, 1, 1, tzinfo=timezone.utc) + +# --------------------------------------------------------------------------- +# Property lists +# --------------------------------------------------------------------------- + +# Property [Null Collected]: null values are collected as valid values and deduplicated. +ADDTOSET_NULL_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "null_all", + docs=[{"v": None}, {"v": None}, {"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should collect null and deduplicate to a single null", + ), + AccumulatorTestCase( + "null_single", + docs=[{"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should collect a single null value", + ), + AccumulatorTestCase( + "null_among_values", + docs=[{"v": None}, {"v": 5}, {"v": 3}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None, 5, 3]}], + msg="$addToSet should collect null alongside other values", + ), + AccumulatorTestCase( + "null_and_values_dedup", + docs=[{"v": 10}, {"v": None}, {"v": 5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, None, 5]}], + msg="$addToSet should collect null and distinct values without duplication", + ), +] + +# Property [Missing Excluded]: missing fields are excluded from the result. +ADDTOSET_MISSING_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "missing_all", + docs=[{"x": 1}, {"x": 2}, {"x": 3}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": []}], + msg="$addToSet should return empty array when all fields are missing", + ), + AccumulatorTestCase( + "missing_single", + docs=[{"x": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": []}], + msg="$addToSet should return empty array for a single doc with missing field", + ), + AccumulatorTestCase( + "missing_among_values", + docs=[{"x": 1}, {"v": 5}, {"v": 3}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [5, 3]}], + msg="$addToSet should exclude missing fields and collect only present values", + ), +] + +# Property [Null and Missing Combined]: null is collected while missing is excluded. +ADDTOSET_NULL_MISSING_COMBINED_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "combined_null_and_missing", + docs=[{"v": None}, {"x": 1}, {"v": None}, {"x": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should collect null but exclude missing fields", + ), + AccumulatorTestCase( + "combined_null_missing_and_values", + docs=[{"v": 10}, {"v": None}, {"x": 1}, {"v": 5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, None, 5]}], + msg="$addToSet should collect null and values but exclude missing fields", + ), +] + +# Property [$$REMOVE Excluded]: $$REMOVE via $cond is treated as missing. +ADDTOSET_REMOVE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "remove_all", + docs=[{"v": -1}, {"v": -2}, {"v": -3}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$cond": [{"$gte": ["$v", 0]}, "$v", "$$REMOVE"]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": []}], + msg="$addToSet should treat $$REMOVE as missing and return empty array", + ), + AccumulatorTestCase( + "remove_some", + docs=[{"v": -1}, {"v": 5}, {"v": -2}, {"v": 10}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$cond": [{"$gte": ["$v", 0]}, "$v", "$$REMOVE"]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [5, 10]}], + msg="$addToSet should exclude $$REMOVE values and collect the rest", + ), + AccumulatorTestCase( + "remove_and_null_value", + docs=[{"v": 1}, {"v": 2}, {"v": 3}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$cond": [{"$gt": ["$v", 2]}, None, "$$REMOVE"]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should collect null produced by $cond while excluding $$REMOVE", + ), + AccumulatorTestCase( + "remove_dedup", + docs=[{"v": 5}, {"v": 5}, {"v": -1}, {"v": -2}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$cond": [{"$gte": ["$v", 0]}, "$v", "$$REMOVE"]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [5]}], + msg="$addToSet should deduplicate values and exclude $$REMOVE entries", + ), +] + +# Property [Unique Value Collection]: $addToSet returns an array of all unique values. +ADDTOSET_UNIQUE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "unique_distinct", + docs=[{"v": 10}, {"v": 20}, {"v": 30}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, 20, 30]}], + msg="$addToSet should return all distinct values", + ), + AccumulatorTestCase( + "unique_with_duplicates", + docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, 20, 30]}], + msg="$addToSet should deduplicate repeated values", + ), + AccumulatorTestCase( + "unique_all_same", + docs=[{"v": 42}, {"v": 42}, {"v": 42}, {"v": 42}, {"v": 42}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [42]}], + msg="$addToSet should collapse identical values into one element", + ), + AccumulatorTestCase( + "unique_single_doc", + docs=[{"v": 7}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [7]}], + msg="$addToSet should return single-element array for one document", + ), +] + +# Property [Array as Single Element]: array values are appended as a single element, not unwound. +ADDTOSET_ARRAY_ELEMENT_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "array_distinct", + docs=[{"v": [1, 2]}, {"v": [3, 4]}, {"v": [1, 2]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [[1, 2], [3, 4]]}], + msg="$addToSet should treat arrays as single elements and deduplicate identical arrays", + ), + AccumulatorTestCase( + "array_empty", + docs=[{"v": []}, {"v": []}, {"v": [1]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [[], [1]]}], + msg="$addToSet should treat empty arrays as single elements and deduplicate them", + ), + AccumulatorTestCase( + "array_nested", + docs=[{"v": [[1]]}, {"v": [[2]]}, {"v": [[1]]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [[[1]], [[2]]]}], + msg="$addToSet should treat nested arrays as single elements and deduplicate them", + ), + AccumulatorTestCase( + "array_mixed_scalar", + docs=[{"v": 1}, {"v": [1]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1, [1]]}], + msg="$addToSet should distinguish scalar 1 from array [1]", + ), + AccumulatorTestCase( + "array_single_doc", + docs=[{"v": [1, 2, 3]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [[1, 2, 3]]}], + msg="$addToSet should wrap the array value as a single element in the result", + ), +] + +# Property [Document Duplicate Detection]: documents are duplicates only if they have +# exact same fields, values, and field order. +ADDTOSET_DOC_DEDUP_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "doc_identical", + docs=[{"v": {"a": 1, "b": 2}}, {"v": {"a": 1, "b": 2}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": 1, "b": 2}]}], + msg="$addToSet should deduplicate identical documents", + ), + AccumulatorTestCase( + "doc_different_field_order", + docs=[{"v": {"a": 1, "b": 2}}, {"v": {"b": 2, "a": 1}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"b": 2, "a": 1}, {"a": 1, "b": 2}]}], + msg="$addToSet should treat documents with different field order as distinct", + ), + AccumulatorTestCase( + "doc_different_values", + docs=[{"v": {"a": 1, "b": 2}}, {"v": {"a": 1, "b": 3}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": 1, "b": 2}, {"a": 1, "b": 3}]}], + msg="$addToSet should treat documents with different values as distinct", + ), + AccumulatorTestCase( + "doc_nested_identical", + docs=[{"v": {"a": {"x": 1}}}, {"v": {"a": {"x": 1}}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": {"x": 1}}]}], + msg="$addToSet should deduplicate nested documents with identical structure", + ), + AccumulatorTestCase( + "doc_nested_different_order", + docs=[{"v": {"a": {"x": 1, "y": 2}}}, {"v": {"a": {"y": 2, "x": 1}}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": {"x": 1, "y": 2}}, {"a": {"y": 2, "x": 1}}]}], + msg="$addToSet should treat nested documents with different field order as distinct", + ), + AccumulatorTestCase( + "doc_empty", + docs=[{"v": {}}, {"v": {}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{}]}], + msg="$addToSet should deduplicate empty documents", + ), + AccumulatorTestCase( + "doc_subset", + docs=[{"v": {"a": 1}}, {"v": {"a": 1, "b": 2}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": 1, "b": 2}, {"a": 1}]}], + msg="$addToSet should treat a document subset and superset as distinct", + ), + AccumulatorTestCase( + "doc_with_array_value", + docs=[{"v": {"a": [1, 2]}}, {"v": {"a": [1, 2]}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": [1, 2]}]}], + msg="$addToSet should deduplicate documents containing identical array values", + ), + AccumulatorTestCase( + "doc_with_null_value", + docs=[{"v": {"a": None}}, {"v": {"a": None}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": None}]}], + msg="$addToSet should deduplicate documents with null field values", + ), + AccumulatorTestCase( + "doc_with_nested_null", + docs=[{"v": {"a": {"b": None}}}, {"v": {"a": {"b": None}}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": {"b": None}}]}], + msg="$addToSet should deduplicate documents with nested null values", + ), +] + +# Property [String Deduplication]: strings are compared by byte value with no Unicode normalization. +ADDTOSET_STRING_DEDUP_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "string_identical", + docs=[{"v": "abc"}, {"v": "abc"}, {"v": "def"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["abc", "def"]}], + msg="$addToSet should deduplicate identical strings", + ), + AccumulatorTestCase( + "string_empty", + docs=[{"v": ""}, {"v": ""}, {"v": "x"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["", "x"]}], + msg="$addToSet should deduplicate empty strings", + ), + AccumulatorTestCase( + "string_unicode_no_normalization", + docs=[ + {"v": "\u00e9"}, + {"v": "\u0065\u0301"}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["\u00e9", "\u0065\u0301"]}], + msg="$addToSet should not normalize Unicode; precomposed and decomposed are distinct", + ), +] + +# Property [BSON Type Collection]: $addToSet collects and deduplicates values of every +# non-deprecated BSON type. +ADDTOSET_BSON_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "bson_int32", + docs=[{"v": 10}, {"v": 20}, {"v": 10}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, 20]}], + msg="$addToSet should collect and deduplicate int32 values", + ), + AccumulatorTestCase( + "bson_int64", + docs=[{"v": Int64(10)}, {"v": Int64(20)}, {"v": Int64(10)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Int64(10), Int64(20)]}], + msg="$addToSet should collect and deduplicate Int64 values", + ), + AccumulatorTestCase( + "bson_double", + docs=[{"v": 1.5}, {"v": 2.5}, {"v": 1.5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1.5, 2.5]}], + msg="$addToSet should collect and deduplicate double values", + ), + AccumulatorTestCase( + "bson_decimal128", + docs=[ + {"v": Decimal128("1.5")}, + {"v": Decimal128("2.5")}, + {"v": Decimal128("1.5")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("1.5"), Decimal128("2.5")]}], + msg="$addToSet should collect and deduplicate Decimal128 values", + ), + AccumulatorTestCase( + "bson_string", + docs=[{"v": "abc"}, {"v": "def"}, {"v": "abc"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["abc", "def"]}], + msg="$addToSet should collect and deduplicate string values", + ), + AccumulatorTestCase( + "bson_bool", + docs=[{"v": True}, {"v": False}, {"v": True}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [True, False]}], + msg="$addToSet should collect and deduplicate boolean values", + ), + AccumulatorTestCase( + "bson_datetime", + docs=[{"v": _DT1}, {"v": _DT2}, {"v": _DT1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [_DT1, _DT2]}], + msg="$addToSet should collect and deduplicate datetime values", + ), + AccumulatorTestCase( + "bson_objectid", + docs=[{"v": _OID1}, {"v": _OID2}, {"v": _OID1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [_OID1, _OID2]}], + msg="$addToSet should collect and deduplicate ObjectId values", + ), + AccumulatorTestCase( + "bson_binary", + docs=[{"v": Binary(b"\x00")}, {"v": Binary(b"\x01")}, {"v": Binary(b"\x00")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [b"\x00", b"\x01"]}], + msg="$addToSet should collect and deduplicate Binary values", + ), + AccumulatorTestCase( + "bson_regex", + docs=[{"v": Regex("abc")}, {"v": Regex("def")}, {"v": Regex("abc")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Regex("abc"), Regex("def")]}], + msg="$addToSet should collect and deduplicate Regex values", + ), + AccumulatorTestCase( + "bson_code", + docs=[ + {"v": Code("function(){}")}, + {"v": Code("function(){return 1}")}, + {"v": Code("function(){}")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["function(){}", "function(){return 1}"]}], + msg="$addToSet should collect and deduplicate Code values", + ), + AccumulatorTestCase( + "bson_timestamp", + docs=[ + {"v": Timestamp(100, 1)}, + {"v": Timestamp(200, 1)}, + {"v": Timestamp(100, 1)}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Timestamp(100, 1), Timestamp(200, 1)]}], + msg="$addToSet should collect and deduplicate Timestamp values", + ), + AccumulatorTestCase( + "bson_minkey", + docs=[{"v": MinKey()}, {"v": MinKey()}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"": MinKey()}]}], + msg="$addToSet should deduplicate MinKey values", + ), + AccumulatorTestCase( + "bson_maxkey", + docs=[{"v": MaxKey()}, {"v": MaxKey()}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"": MaxKey()}]}], + msg="$addToSet should deduplicate MaxKey values", + ), + AccumulatorTestCase( + "bson_document", + docs=[{"v": {"x": 1}}, {"v": {"x": 2}}, {"v": {"x": 1}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"x": 1}, {"x": 2}]}], + msg="$addToSet should collect and deduplicate embedded document values", + ), + AccumulatorTestCase( + "bson_array", + docs=[{"v": [1, 2]}, {"v": [3, 4]}, {"v": [1, 2]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [[1, 2], [3, 4]]}], + msg="$addToSet should collect and deduplicate array values as single elements", + ), + AccumulatorTestCase( + "bson_null", + docs=[{"v": None}, {"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should deduplicate null values", + ), +] + +# Property [Mixed Type Collection]: $addToSet collects values of different +# BSON types in the same group. +ADDTOSET_MIXED_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "mixed_types", + docs=[ + {"v": 42}, + {"v": "hello"}, + {"v": True}, + {"v": [1, 2]}, + {"v": {"a": 1}}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [42, "hello", True, [1, 2], {"a": 1}]}], + msg="$addToSet should collect values of different BSON types in one group", + ), +] + +# Property [Numeric Equivalence]: numerically equivalent values across types are deduplicated. +ADDTOSET_NUMERIC_EQUIV_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "equiv_all_ones", + docs=[{"v": 1}, {"v": Int64(1)}, {"v": 1.0}, {"v": Decimal128("1")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1]}], + msg="$addToSet should deduplicate numerically equivalent values of all numeric types", + ), + AccumulatorTestCase( + "equiv_all_zeros", + docs=[{"v": 0}, {"v": Int64(0)}, {"v": 0.0}, {"v": Decimal128("0")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [0]}], + msg="$addToSet should deduplicate numerically equivalent zero values", + ), + AccumulatorTestCase( + "equiv_int32_int64", + docs=[{"v": 5}, {"v": Int64(5)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [5]}], + msg="$addToSet should deduplicate int32 and Int64 with same numeric value", + ), + AccumulatorTestCase( + "equiv_double_int32", + docs=[{"v": 3.0}, {"v": 3}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [3.0]}], + msg="$addToSet should deduplicate double and int32 with same numeric value", + ), + AccumulatorTestCase( + "equiv_decimal128_int64", + docs=[{"v": Decimal128("100")}, {"v": Int64(100)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("100")]}], + msg="$addToSet should deduplicate Decimal128 and Int64 with same numeric value", + ), + AccumulatorTestCase( + "equiv_negative", + docs=[{"v": -1}, {"v": Int64(-1)}, {"v": -1.0}, {"v": Decimal128("-1")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [-1]}], + msg="$addToSet should deduplicate negative numerically equivalent values", + ), +] + +# Property [BSON Type Distinction]: values of different BSON types are distinct even when similar. +ADDTOSET_TYPE_DISTINCTION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "distinct_false_vs_zero", + docs=[{"v": False}, {"v": 0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [0, False]}], + msg="$addToSet should treat false and int32(0) as distinct BSON types", + ), + AccumulatorTestCase( + "distinct_true_vs_one", + docs=[{"v": True}, {"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1, True]}], + msg="$addToSet should treat true and int32(1) as distinct BSON types", + ), + AccumulatorTestCase( + "distinct_null_vs_missing", + docs=[{"v": None}, {"x": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should collect null but exclude missing field", + ), + AccumulatorTestCase( + "distinct_empty_string_vs_null", + docs=[{"v": ""}, {"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["", None]}], + msg="$addToSet should treat empty string and null as distinct", + ), + AccumulatorTestCase( + "distinct_string_vs_number", + docs=[{"v": "123"}, {"v": 123}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [123, "123"]}], + msg="$addToSet should treat string '123' and int 123 as distinct", + ), +] + +# Property [NaN Deduplication]: NaN values are equal for deduplication purposes. +ADDTOSET_NAN_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "nan_double_dedup", + docs=[{"v": float("nan")}, {"v": float("nan")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [pytest.approx(math.nan, nan_ok=True)]}], + msg="$addToSet should deduplicate double NaN values", + ), + AccumulatorTestCase( + "nan_decimal128_dedup", + docs=[{"v": Decimal128("NaN")}, {"v": Decimal128("NaN")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("NaN")]}], + msg="$addToSet should deduplicate Decimal128 NaN values", + ), + AccumulatorTestCase( + "nan_cross_type", + docs=[{"v": float("nan")}, {"v": Decimal128("NaN")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [pytest.approx(math.nan, nan_ok=True)]}], + msg="$addToSet should deduplicate float NaN and Decimal128 NaN as numerically equal", + ), + AccumulatorTestCase( + "nan_with_finite", + docs=[{"v": float("nan")}, {"v": 5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [pytest.approx(math.nan, nan_ok=True), 5]}], + msg="$addToSet should treat NaN and finite values as distinct", + ), +] + +# Property [Infinity Deduplication]: Infinity values are equal across numeric types. +ADDTOSET_INFINITY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "inf_double_dedup", + docs=[{"v": float("inf")}, {"v": float("inf")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [float("inf")]}], + msg="$addToSet should deduplicate positive Infinity values", + ), + AccumulatorTestCase( + "neg_inf_double_dedup", + docs=[{"v": float("-inf")}, {"v": float("-inf")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [float("-inf")]}], + msg="$addToSet should deduplicate negative Infinity values", + ), + AccumulatorTestCase( + "inf_cross_type", + docs=[{"v": float("inf")}, {"v": Decimal128("Infinity")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [float("inf")]}], + msg="$addToSet should deduplicate float Infinity and Decimal128 Infinity", + ), + AccumulatorTestCase( + "inf_vs_neg_inf", + docs=[{"v": float("inf")}, {"v": float("-inf")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [float("-inf"), float("inf")]}], + msg="$addToSet should treat positive and negative Infinity as distinct", + ), +] + +# Property [Negative Zero]: -0.0 and 0.0 are numerically equal and deduplicated. +ADDTOSET_NEG_ZERO_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "neg_zero_double", + docs=[{"v": -0.0}, {"v": 0.0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [-0.0]}], + msg="$addToSet should deduplicate -0.0 and 0.0 as numerically equal", + ), + AccumulatorTestCase( + "neg_zero_decimal128", + docs=[{"v": Decimal128("-0")}, {"v": Decimal128("0")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("-0")]}], + msg="$addToSet should deduplicate Decimal128 -0 and 0 as numerically equal", + ), + AccumulatorTestCase( + "neg_zero_cross_type", + docs=[{"v": -0.0}, {"v": 0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [-0.0]}], + msg="$addToSet should deduplicate -0.0 and int 0 as numerically equal", + ), +] + +# Property [Decimal128 Precision]: Decimal128 values with same numeric value but different +# representations are deduplicated. +ADDTOSET_DECIMAL128_PRECISION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "decimal_trailing_zeros", + docs=[{"v": Decimal128("1.0")}, {"v": Decimal128("1.00")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("1.0")]}], + msg="$addToSet should deduplicate Decimal128 values with different trailing zeros", + ), + AccumulatorTestCase( + "decimal_34_digit_precision", + docs=[{"v": Decimal128("1.234567890123456789012345678901234")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("1.234567890123456789012345678901234")]}], + msg="$addToSet should preserve full 34-digit Decimal128 precision", + ), + AccumulatorTestCase( + "decimal_max_min_distinct", + docs=[ + {"v": Decimal128("9.999999999999999999999999999999999E+6144")}, + {"v": Decimal128("1E-6176")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[ + { + "result": [ + Decimal128("1E-6176"), + Decimal128("9.999999999999999999999999999999999E+6144"), + ] + } + ], + msg="$addToSet should treat Decimal128 max and min as distinct values", + ), +] + +# Property [Expression Arguments]: $addToSet accepts various expression forms. +ADDTOSET_EXPRESSION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "expr_field_path", + docs=[{"v": 10}, {"v": 20}, {"v": 10}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, 20]}], + msg="$addToSet should collect values from a field path expression", + ), + AccumulatorTestCase( + "expr_nested_field", + docs=[{"a": {"b": 1}}, {"a": {"b": 2}}, {"a": {"b": 1}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$a.b"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1, 2]}], + msg="$addToSet should collect values from a nested field path", + ), + AccumulatorTestCase( + "expr_literal", + docs=[{"v": 1}, {"v": 2}, {"v": 3}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": 42}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [42]}], + msg="$addToSet should deduplicate a constant literal applied to all docs", + ), + AccumulatorTestCase( + "expr_computed", + docs=[{"price": 10, "qty": 2}, {"price": 5, "qty": 3}, {"price": 10, "qty": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": {"$multiply": ["$price", "$qty"]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [20, 15]}], + msg="$addToSet should collect unique computed expression results", + ), + AccumulatorTestCase( + "expr_null_literal", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": None}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should collect null literal and deduplicate across docs", + ), + AccumulatorTestCase( + "expr_composite_array_path", + docs=[{"a": [{"b": 1}, {"b": 2}]}, {"a": [{"b": 3}, {"b": 1}]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$a.b"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [[3, 1], [1, 2]]}], + msg="$addToSet should collect array values from composite array path", + ), +] + +# Property [Grouping by Key]: groups compute independently. +ADDTOSET_GROUPING_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "multi_group", + docs=[ + {"g": "A", "v": 1}, + {"g": "A", "v": 2}, + {"g": "A", "v": 1}, + {"g": "B", "v": 3}, + {"g": "B", "v": 3}, + {"g": "B", "v": 4}, + ], + pipeline=[ + {"$group": {"_id": "$g", "result": {"$addToSet": "$v"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[ + {"_id": "A", "result": [1, 2]}, + {"_id": "B", "result": [3, 4]}, + ], + msg="$addToSet should compute unique sets independently per group key", + ), +] + +# Property [Empty Collection]: $group on empty collection produces no output. +ADDTOSET_EMPTY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "empty_collection", + docs=None, + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[], + msg="$addToSet should produce no output documents for an empty collection", + ), +] + +# Property [Edge Cases]: accumulator-specific edge cases. +ADDTOSET_EDGE_CASE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "edge_single_null_doc", + docs=[{"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should return [null] for single null document", + ), + AccumulatorTestCase( + "edge_single_missing_doc", + docs=[{"x": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": []}], + msg="$addToSet should return empty array for single document with missing field", + ), + AccumulatorTestCase( + "edge_many_unique", + docs=[{"v": i} for i in range(100)], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": list(range(100))}], + msg="$addToSet should collect 100 unique values into a 100-element array", + ), + AccumulatorTestCase( + "edge_many_docs_few_unique", + docs=[{"v": i % 5} for i in range(100)], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [0, 1, 2, 3, 4]}], + msg="$addToSet should deduplicate 100 docs down to 5 unique values", + ), + AccumulatorTestCase( + "edge_array_field_not_traversed", + docs=[{"v": [5, 1, 8]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [[5, 1, 8]]}], + msg="$addToSet should treat array field as a single element, not traverse it", + ), + AccumulatorTestCase( + "edge_mixed_array_scalar", + docs=[{"v": 5}, {"v": [5]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [5, [5]]}], + msg="$addToSet should distinguish scalar 5 from array [5]", + ), + AccumulatorTestCase( + "edge_binary_different_subtypes", + docs=[{"v": Binary(b"\x00", 0)}, {"v": Binary(b"\x00", 5)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [b"\x00", Binary(b"\x00", 5)]}], + msg="$addToSet should treat Binary values with different subtypes as distinct", + ), + AccumulatorTestCase( + "edge_regex_different_flags", + docs=[{"v": Regex("abc", "i")}, {"v": Regex("abc", "m")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Regex("abc", "i"), Regex("abc", "m")]}], + msg="$addToSet should treat Regex values with different flags as distinct", + ), + AccumulatorTestCase( + "edge_expression_mixed_types", + docs=[{"v": 1}, {"v": "hello"}, {"v": True}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1, "hello", True]}], + msg="$addToSet should collect mixed-type values from expression", + ), +] + +# Property [Arity Rejection]: $addToSet in accumulator context is unary and rejects array syntax. +ADDTOSET_ARITY_ERROR_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "arity_empty_array", + docs=[{"v": 1}], + pipeline=[{"$group": {"_id": None, "result": {"$addToSet": []}}}], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$addToSet should reject empty array in accumulator context", + ), + AccumulatorTestCase( + "arity_single_element_literal", + docs=[{"v": 1}], + pipeline=[{"$group": {"_id": None, "result": {"$addToSet": [1]}}}], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$addToSet should reject single-element literal array in accumulator context", + ), + AccumulatorTestCase( + "arity_single_field_ref", + docs=[{"v": 1}], + pipeline=[{"$group": {"_id": None, "result": {"$addToSet": ["$v"]}}}], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$addToSet should reject single field ref in array in accumulator context", + ), + AccumulatorTestCase( + "arity_multi_element", + docs=[{"v": 1}], + pipeline=[{"$group": {"_id": None, "result": {"$addToSet": [1, 2, 3]}}}], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$addToSet should reject multi-element array in accumulator context", + ), + AccumulatorTestCase( + "arity_multi_key_expression", + docs=[{"v": 1}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$add": [1, 2], "$multiply": [3, 4]}}, + } + } + ], + error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, + msg="$addToSet should reject multi-key expression object", + ), +] + +# Property [Expression Error Propagation]: errors from sub-expressions propagate. +ADDTOSET_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "error_toInt_invalid", + docs=[{"v": "not_a_number"}], + pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$toInt": "$v"}}}}], + error_code=CONVERSION_FAILURE_ERROR, + msg="$addToSet should propagate $toInt conversion error", + ), + AccumulatorTestCase( + "error_divide_by_zero", + docs=[{"v": 10}], + pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$divide": ["$v", 0]}}}}], + error_code=DIVIDE_BY_ZERO_V2_ERROR, + msg="$addToSet should propagate divide-by-zero error", + ), + AccumulatorTestCase( + "error_mod_by_zero", + docs=[{"v": 10}], + pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$mod": ["$v", 0]}}}}], + error_code=MODULO_BY_ZERO_V2_ERROR, + msg="$addToSet should propagate mod-by-zero error", + ), +] + +# --------------------------------------------------------------------------- +# Aggregates +# --------------------------------------------------------------------------- + +ADDTOSET_SUCCESS_TESTS = ( + ADDTOSET_NULL_TESTS + + ADDTOSET_MISSING_TESTS + + ADDTOSET_NULL_MISSING_COMBINED_TESTS + + ADDTOSET_REMOVE_TESTS + + ADDTOSET_UNIQUE_TESTS + + ADDTOSET_ARRAY_ELEMENT_TESTS + + ADDTOSET_DOC_DEDUP_TESTS + + ADDTOSET_STRING_DEDUP_TESTS + + ADDTOSET_BSON_TYPE_TESTS + + ADDTOSET_MIXED_TYPE_TESTS + + ADDTOSET_NUMERIC_EQUIV_TESTS + + ADDTOSET_TYPE_DISTINCTION_TESTS + + ADDTOSET_NAN_TESTS + + ADDTOSET_INFINITY_TESTS + + ADDTOSET_NEG_ZERO_TESTS + + ADDTOSET_DECIMAL128_PRECISION_TESTS + + ADDTOSET_EXPRESSION_TESTS + + ADDTOSET_GROUPING_TESTS + + ADDTOSET_EMPTY_TESTS + + ADDTOSET_EDGE_CASE_TESTS +) + +ADDTOSET_ERROR_TESTS = ADDTOSET_ARITY_ERROR_TESTS + ADDTOSET_EXPRESSION_ERROR_TESTS + +# --------------------------------------------------------------------------- +# Primary test functions +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_SUCCESS_TESTS)) +def test_accumulator_addToSet(collection, test_case: AccumulatorTestCase): + """Test $addToSet accumulator success cases with $group.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_ERROR_TESTS)) +def test_accumulator_addToSet_errors(collection, test_case: AccumulatorTestCase): + """Test $addToSet accumulator error cases with $group.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertFailureCode(result, test_case.error_code, msg=test_case.msg) + + +# --------------------------------------------------------------------------- +# Property-specific tests +# --------------------------------------------------------------------------- + +# Property [Return Type]: $addToSet always returns an array type. +ADDTOSET_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "return_type_numeric", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": [1, 2], "type": "array"}], + msg="$addToSet should return array type for numeric inputs", + ), + AccumulatorTestCase( + "return_type_string", + docs=[{"v": "a"}, {"v": "b"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": ["a", "b"], "type": "array"}], + msg="$addToSet should return array type for string inputs", + ), + AccumulatorTestCase( + "return_type_null_only", + docs=[{"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": [None], "type": "array"}], + msg="$addToSet should return array type for null-only inputs", + ), + AccumulatorTestCase( + "return_type_missing_only", + docs=[{"x": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": [], "type": "array"}], + msg="$addToSet should return array type for all-missing inputs", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_RETURN_TYPE_TESTS)) +def test_accumulator_addToSet_return_type(collection, test_case: AccumulatorTestCase): + """Test $addToSet return type verification.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["value"]) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucketAuto_smoke.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucketAuto_smoke.py new file mode 100644 index 00000000..b63dea44 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucketAuto_smoke.py @@ -0,0 +1,123 @@ +"""Smoke tests for $addToSet accumulator in $bucketAuto context.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [BucketAuto Smoke]: $addToSet works correctly in $bucketAuto context. +ADDTOSET_BUCKET_AUTO_SMOKE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "bucketAuto_basic", + docs=[{"v": 10}, {"v": 20}, {"v": 30}], + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$addToSet": "$v"}}, + } + } + ], + expected=[{"_id": {"min": 0, "max": 0}, "result": [10, 20, 30]}], + msg="$addToSet should collect unique values in $bucketAuto context", + ), + AccumulatorTestCase( + "bucketAuto_duplicates", + docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}], + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$addToSet": "$v"}}, + } + } + ], + expected=[{"_id": {"min": 0, "max": 0}, "result": [10, 20, 30]}], + msg="$addToSet should deduplicate values in $bucketAuto context", + ), + AccumulatorTestCase( + "bucketAuto_null_among_values", + docs=[{"v": None}, {"v": 5}, {"v": 3}], + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$addToSet": "$v"}}, + } + } + ], + expected=[{"_id": {"min": 0, "max": 0}, "result": [None, 5, 3]}], + msg="$addToSet should collect null alongside values in $bucketAuto context", + ), +] + +# Property [BucketAuto Arity Rejection]: $addToSet rejects array syntax in $bucketAuto context. +ADDTOSET_BUCKET_AUTO_ERROR_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "bucketAuto_arity_empty_array", + docs=[{"v": 1}], + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$addToSet": []}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$addToSet should reject empty array in $bucketAuto context", + ), + AccumulatorTestCase( + "bucketAuto_expression_error", + docs=[{"v": 10}], + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$addToSet": {"$divide": ["$v", 0]}}}, + } + } + ], + error_code=BAD_VALUE_ERROR, + msg="$addToSet should propagate divide-by-zero error in $bucketAuto context", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_AUTO_SMOKE_TESTS)) +def test_addToSet_bucketAuto_smoke(collection, test_case: AccumulatorTestCase): + """Test $addToSet accumulator in $bucketAuto context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_AUTO_ERROR_TESTS)) +def test_addToSet_bucketAuto_smoke_errors(collection, test_case: AccumulatorTestCase): + """Test $addToSet error cases in $bucketAuto context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertFailureCode(result, test_case.error_code, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucket_smoke.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucket_smoke.py new file mode 100644 index 00000000..85fb5e7c --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucket_smoke.py @@ -0,0 +1,123 @@ +"""Smoke tests for $addToSet accumulator in $bucket context.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess +from documentdb_tests.framework.error_codes import ( + DIVIDE_BY_ZERO_V2_ERROR, + GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Bucket Smoke]: $addToSet works correctly in $bucket context. +ADDTOSET_BUCKET_SMOKE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "bucket_basic", + docs=[{"v": 10}, {"v": 20}, {"v": 30}], + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$addToSet": "$v"}}, + } + } + ], + expected=[{"_id": -1, "result": [10, 20, 30]}], + msg="$addToSet should collect unique values in $bucket context", + ), + AccumulatorTestCase( + "bucket_duplicates", + docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}], + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$addToSet": "$v"}}, + } + } + ], + expected=[{"_id": -1, "result": [10, 20, 30]}], + msg="$addToSet should deduplicate values in $bucket context", + ), + AccumulatorTestCase( + "bucket_null_among_values", + docs=[{"v": None}, {"v": 5}, {"v": 3}], + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$addToSet": "$v"}}, + } + } + ], + expected=[{"_id": -1, "result": [None, 5, 3]}], + msg="$addToSet should collect null alongside values in $bucket context", + ), +] + +# Property [Bucket Arity Rejection]: $addToSet rejects array syntax in $bucket context. +ADDTOSET_BUCKET_ERROR_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "bucket_arity_empty_array", + docs=[{"v": 1}], + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$addToSet": []}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$addToSet should reject empty array in $bucket context", + ), + AccumulatorTestCase( + "bucket_expression_error", + docs=[{"v": 10}], + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$addToSet": {"$divide": ["$v", 0]}}}, + } + } + ], + error_code=DIVIDE_BY_ZERO_V2_ERROR, + msg="$addToSet should propagate divide-by-zero error in $bucket context", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_SMOKE_TESTS)) +def test_addToSet_bucket_smoke(collection, test_case: AccumulatorTestCase): + """Test $addToSet accumulator in $bucket context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_ERROR_TESTS)) +def test_addToSet_bucket_smoke_errors(collection, test_case: AccumulatorTestCase): + """Test $addToSet error cases in $bucket context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertFailureCode(result, test_case.error_code, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_setWindowFields_smoke.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_setWindowFields_smoke.py new file mode 100644 index 00000000..3d57b5c4 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_setWindowFields_smoke.py @@ -0,0 +1,165 @@ +"""Smoke tests for $addToSet accumulator in $setWindowFields context.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [SetWindowFields Smoke]: $addToSet works correctly in $setWindowFields context. +ADDTOSET_SET_WINDOW_FIELDS_SMOKE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "swf_unbounded", + docs=[ + {"part": "A", "v": 10}, + {"part": "A", "v": 20}, + {"part": "A", "v": 10}, + ], + pipeline=[ + { + "$setWindowFields": { + "partitionBy": "$part", + "sortBy": {"v": 1}, + "output": { + "result": { + "$addToSet": "$v", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 0, "v": 1, "result": 1}}, + {"$sort": {"v": 1}}, + {"$limit": 1}, + ], + expected=[{"v": 10, "result": [10, 20]}], + msg="$addToSet should collect unique values across entire partition with unbounded window", + ), + AccumulatorTestCase( + "swf_cumulative", + docs=[ + {"part": "A", "v": 10}, + {"part": "A", "v": 20}, + {"part": "A", "v": 10}, + ], + pipeline=[ + { + "$setWindowFields": { + "partitionBy": "$part", + "sortBy": {"_id": 1}, + "output": { + "result": { + "$addToSet": "$v", + "window": {"documents": ["unbounded", "current"]}, + } + }, + } + }, + {"$project": {"_id": 0, "v": 1, "result": 1}}, + ], + expected=[ + {"v": 10, "result": [10]}, + {"v": 20, "result": [10, 20]}, + {"v": 10, "result": [10, 20]}, + ], + msg="$addToSet should compute cumulative unique values with [unbounded, current] window", + ), + AccumulatorTestCase( + "swf_partition_by", + docs=[ + {"part": "A", "v": 1}, + {"part": "A", "v": 2}, + {"part": "B", "v": 3}, + {"part": "B", "v": 3}, + ], + pipeline=[ + { + "$setWindowFields": { + "partitionBy": "$part", + "sortBy": {"v": 1}, + "output": { + "result": { + "$addToSet": "$v", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 0, "part": 1, "result": 1}}, + {"$group": {"_id": "$part", "result": {"$first": "$result"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "result": [1, 2]}, {"_id": "B", "result": [3]}], + msg="$addToSet should compute separate unique sets per partition", + ), + AccumulatorTestCase( + "swf_duplicates", + docs=[ + {"part": "A", "v": 5}, + {"part": "A", "v": 5}, + {"part": "A", "v": 10}, + {"part": "A", "v": 10}, + ], + pipeline=[ + { + "$setWindowFields": { + "partitionBy": "$part", + "sortBy": {"v": 1}, + "output": { + "result": { + "$addToSet": "$v", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 0, "v": 1, "result": 1}}, + {"$limit": 1}, + ], + expected=[{"v": 5, "result": [5, 10]}], + msg="$addToSet should deduplicate values within window", + ), + AccumulatorTestCase( + "swf_null_values", + docs=[ + {"part": "A", "v": None}, + {"part": "A", "v": 5}, + {"part": "A", "v": None}, + ], + pipeline=[ + { + "$setWindowFields": { + "partitionBy": "$part", + "sortBy": {"_id": 1}, + "output": { + "result": { + "$addToSet": "$v", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 0, "v": 1, "result": 1}}, + {"$limit": 1}, + ], + expected=[{"v": None, "result": [None, 5]}], + msg="$addToSet should collect null as a value in $setWindowFields window", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_SET_WINDOW_FIELDS_SMOKE_TESTS)) +def test_addToSet_setWindowFields_smoke(collection, test_case: AccumulatorTestCase): + """Test $addToSet accumulator in $setWindowFields context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) From fbaa6e29aa06af8aec3d94717d96a6307a6da612 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Wed, 20 May 2026 12:33:21 -0700 Subject: [PATCH 02/13] merge files into 1 Signed-off-by: Alina (Xi) Li --- .../addToSet/test_accumulator_addToSet.py | 387 +++++++++++++++++- .../test_addToSet_bucketAuto_smoke.py | 123 ------ .../addToSet/test_addToSet_bucket_smoke.py | 123 ------ .../test_addToSet_setWindowFields_smoke.py | 165 -------- 4 files changed, 385 insertions(+), 413 deletions(-) delete mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucketAuto_smoke.py delete mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucket_smoke.py delete mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_setWindowFields_smoke.py diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py index bcd07809..6d65a6da 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py @@ -1,4 +1,4 @@ -"""Tests for $addToSet accumulator ($group).""" +"""Tests for $addToSet accumulator ($group, $bucket, $bucketAuto, $setWindowFields).""" from __future__ import annotations @@ -23,6 +23,7 @@ ) from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, CONVERSION_FAILURE_ERROR, DIVIDE_BY_ZERO_V2_ERROR, EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, @@ -1254,7 +1255,7 @@ def test_accumulator_addToSet(collection, test_case: AccumulatorTestCase): @pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_ERROR_TESTS)) -def test_accumulator_addToSet_errors(collection, test_case: AccumulatorTestCase): +def test_accumulator_addToSet_errors(collection, test_case): """Test $addToSet accumulator error cases with $group.""" if test_case.docs: collection.insert_many(test_case.docs) @@ -1324,3 +1325,385 @@ def test_accumulator_addToSet_return_type(collection, test_case: AccumulatorTest {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, ) assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["value"]) + + +# --------------------------------------------------------------------------- +# $bucket smoke tests +# --------------------------------------------------------------------------- + +# Property [Bucket Smoke]: $addToSet works correctly in $bucket context. +ADDTOSET_BUCKET_SMOKE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "bucket_basic", + docs=[{"v": 10}, {"v": 20}, {"v": 30}], + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$addToSet": "$v"}}, + } + } + ], + expected=[{"_id": -1, "result": [10, 20, 30]}], + msg="$addToSet should collect unique values in $bucket context", + ), + AccumulatorTestCase( + "bucket_duplicates", + docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}], + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$addToSet": "$v"}}, + } + } + ], + expected=[{"_id": -1, "result": [10, 20, 30]}], + msg="$addToSet should deduplicate values in $bucket context", + ), + AccumulatorTestCase( + "bucket_null_among_values", + docs=[{"v": None}, {"v": 5}, {"v": 3}], + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$addToSet": "$v"}}, + } + } + ], + expected=[{"_id": -1, "result": [None, 5, 3]}], + msg="$addToSet should collect null alongside values in $bucket context", + ), +] + +# Property [Bucket Arity Rejection]: $addToSet rejects array syntax in $bucket context. +ADDTOSET_BUCKET_ERROR_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "bucket_arity_empty_array", + docs=[{"v": 1}], + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$addToSet": []}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$addToSet should reject empty array in $bucket context", + ), + AccumulatorTestCase( + "bucket_expression_error", + docs=[{"v": 10}], + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$addToSet": {"$divide": ["$v", 0]}}}, + } + } + ], + error_code=DIVIDE_BY_ZERO_V2_ERROR, + msg="$addToSet should propagate divide-by-zero error in $bucket context", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_SMOKE_TESTS)) +def test_addToSet_bucket_smoke(collection, test_case: AccumulatorTestCase): + """Test $addToSet accumulator in $bucket context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_ERROR_TESTS)) +def test_addToSet_bucket_smoke_errors(collection, test_case): + """Test $addToSet error cases in $bucket context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertFailureCode(result, test_case.error_code, msg=test_case.msg) + + +# --------------------------------------------------------------------------- +# $bucketAuto smoke tests +# --------------------------------------------------------------------------- + +# Property [BucketAuto Smoke]: $addToSet works correctly in $bucketAuto context. +ADDTOSET_BUCKET_AUTO_SMOKE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "bucketAuto_basic", + docs=[{"v": 10}, {"v": 20}, {"v": 30}], + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$addToSet": "$v"}}, + } + } + ], + expected=[{"_id": {"min": 0, "max": 0}, "result": [10, 20, 30]}], + msg="$addToSet should collect unique values in $bucketAuto context", + ), + AccumulatorTestCase( + "bucketAuto_duplicates", + docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}], + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$addToSet": "$v"}}, + } + } + ], + expected=[{"_id": {"min": 0, "max": 0}, "result": [10, 20, 30]}], + msg="$addToSet should deduplicate values in $bucketAuto context", + ), + AccumulatorTestCase( + "bucketAuto_null_among_values", + docs=[{"v": None}, {"v": 5}, {"v": 3}], + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$addToSet": "$v"}}, + } + } + ], + expected=[{"_id": {"min": 0, "max": 0}, "result": [None, 5, 3]}], + msg="$addToSet should collect null alongside values in $bucketAuto context", + ), +] + +# Property [BucketAuto Arity Rejection]: $addToSet rejects array syntax in $bucketAuto context. +ADDTOSET_BUCKET_AUTO_ERROR_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "bucketAuto_arity_empty_array", + docs=[{"v": 1}], + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$addToSet": []}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$addToSet should reject empty array in $bucketAuto context", + ), + AccumulatorTestCase( + "bucketAuto_expression_error", + docs=[{"v": 10}], + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$addToSet": {"$divide": ["$v", 0]}}}, + } + } + ], + error_code=BAD_VALUE_ERROR, + msg="$addToSet should propagate divide-by-zero error in $bucketAuto context", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_AUTO_SMOKE_TESTS)) +def test_addToSet_bucketAuto_smoke(collection, test_case: AccumulatorTestCase): + """Test $addToSet accumulator in $bucketAuto context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_AUTO_ERROR_TESTS)) +def test_addToSet_bucketAuto_smoke_errors(collection, test_case): + """Test $addToSet error cases in $bucketAuto context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertFailureCode(result, test_case.error_code, msg=test_case.msg) + + +# --------------------------------------------------------------------------- +# $setWindowFields smoke tests +# --------------------------------------------------------------------------- + +# Property [SetWindowFields Smoke]: $addToSet works correctly in $setWindowFields context. +ADDTOSET_SET_WINDOW_FIELDS_SMOKE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "swf_unbounded", + docs=[ + {"part": "A", "v": 10}, + {"part": "A", "v": 20}, + {"part": "A", "v": 10}, + ], + pipeline=[ + { + "$setWindowFields": { + "partitionBy": "$part", + "sortBy": {"v": 1}, + "output": { + "result": { + "$addToSet": "$v", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 0, "v": 1, "result": 1}}, + {"$sort": {"v": 1}}, + {"$limit": 1}, + ], + expected=[{"v": 10, "result": [10, 20]}], + msg="$addToSet should collect unique values across entire partition with unbounded window", + ), + AccumulatorTestCase( + "swf_cumulative", + docs=[ + {"part": "A", "v": 10}, + {"part": "A", "v": 20}, + {"part": "A", "v": 10}, + ], + pipeline=[ + { + "$setWindowFields": { + "partitionBy": "$part", + "sortBy": {"_id": 1}, + "output": { + "result": { + "$addToSet": "$v", + "window": {"documents": ["unbounded", "current"]}, + } + }, + } + }, + {"$project": {"_id": 0, "v": 1, "result": 1}}, + ], + expected=[ + {"v": 10, "result": [10]}, + {"v": 20, "result": [10, 20]}, + {"v": 10, "result": [10, 20]}, + ], + msg="$addToSet should compute cumulative unique values with [unbounded, current] window", + ), + AccumulatorTestCase( + "swf_partition_by", + docs=[ + {"part": "A", "v": 1}, + {"part": "A", "v": 2}, + {"part": "B", "v": 3}, + {"part": "B", "v": 3}, + ], + pipeline=[ + { + "$setWindowFields": { + "partitionBy": "$part", + "sortBy": {"v": 1}, + "output": { + "result": { + "$addToSet": "$v", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 0, "part": 1, "result": 1}}, + {"$group": {"_id": "$part", "result": {"$first": "$result"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "result": [1, 2]}, {"_id": "B", "result": [3]}], + msg="$addToSet should compute separate unique sets per partition", + ), + AccumulatorTestCase( + "swf_duplicates", + docs=[ + {"part": "A", "v": 5}, + {"part": "A", "v": 5}, + {"part": "A", "v": 10}, + {"part": "A", "v": 10}, + ], + pipeline=[ + { + "$setWindowFields": { + "partitionBy": "$part", + "sortBy": {"v": 1}, + "output": { + "result": { + "$addToSet": "$v", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 0, "v": 1, "result": 1}}, + {"$limit": 1}, + ], + expected=[{"v": 5, "result": [5, 10]}], + msg="$addToSet should deduplicate values within window", + ), + AccumulatorTestCase( + "swf_null_values", + docs=[ + {"part": "A", "v": None}, + {"part": "A", "v": 5}, + {"part": "A", "v": None}, + ], + pipeline=[ + { + "$setWindowFields": { + "partitionBy": "$part", + "sortBy": {"_id": 1}, + "output": { + "result": { + "$addToSet": "$v", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 0, "v": 1, "result": 1}}, + {"$limit": 1}, + ], + expected=[{"v": None, "result": [None, 5]}], + msg="$addToSet should collect null as a value in $setWindowFields window", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_SET_WINDOW_FIELDS_SMOKE_TESTS)) +def test_addToSet_setWindowFields_smoke(collection, test_case: AccumulatorTestCase): + """Test $addToSet accumulator in $setWindowFields context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucketAuto_smoke.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucketAuto_smoke.py deleted file mode 100644 index b63dea44..00000000 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucketAuto_smoke.py +++ /dev/null @@ -1,123 +0,0 @@ -"""Smoke tests for $addToSet accumulator in $bucketAuto context.""" - -from __future__ import annotations - -import pytest - -from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( - AccumulatorTestCase, -) -from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess -from documentdb_tests.framework.error_codes import ( - BAD_VALUE_ERROR, - GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, -) -from documentdb_tests.framework.executor import execute_command -from documentdb_tests.framework.parametrize import pytest_params - -# Property [BucketAuto Smoke]: $addToSet works correctly in $bucketAuto context. -ADDTOSET_BUCKET_AUTO_SMOKE_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "bucketAuto_basic", - docs=[{"v": 10}, {"v": 20}, {"v": 30}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$addToSet": "$v"}}, - } - } - ], - expected=[{"_id": {"min": 0, "max": 0}, "result": [10, 20, 30]}], - msg="$addToSet should collect unique values in $bucketAuto context", - ), - AccumulatorTestCase( - "bucketAuto_duplicates", - docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$addToSet": "$v"}}, - } - } - ], - expected=[{"_id": {"min": 0, "max": 0}, "result": [10, 20, 30]}], - msg="$addToSet should deduplicate values in $bucketAuto context", - ), - AccumulatorTestCase( - "bucketAuto_null_among_values", - docs=[{"v": None}, {"v": 5}, {"v": 3}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$addToSet": "$v"}}, - } - } - ], - expected=[{"_id": {"min": 0, "max": 0}, "result": [None, 5, 3]}], - msg="$addToSet should collect null alongside values in $bucketAuto context", - ), -] - -# Property [BucketAuto Arity Rejection]: $addToSet rejects array syntax in $bucketAuto context. -ADDTOSET_BUCKET_AUTO_ERROR_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "bucketAuto_arity_empty_array", - docs=[{"v": 1}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$addToSet": []}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$addToSet should reject empty array in $bucketAuto context", - ), - AccumulatorTestCase( - "bucketAuto_expression_error", - docs=[{"v": 10}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$addToSet": {"$divide": ["$v", 0]}}}, - } - } - ], - error_code=BAD_VALUE_ERROR, - msg="$addToSet should propagate divide-by-zero error in $bucketAuto context", - ), -] - - -@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_AUTO_SMOKE_TESTS)) -def test_addToSet_bucketAuto_smoke(collection, test_case: AccumulatorTestCase): - """Test $addToSet accumulator in $bucketAuto context.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) - - -@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_AUTO_ERROR_TESTS)) -def test_addToSet_bucketAuto_smoke_errors(collection, test_case: AccumulatorTestCase): - """Test $addToSet error cases in $bucketAuto context.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertFailureCode(result, test_case.error_code, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucket_smoke.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucket_smoke.py deleted file mode 100644 index 85fb5e7c..00000000 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucket_smoke.py +++ /dev/null @@ -1,123 +0,0 @@ -"""Smoke tests for $addToSet accumulator in $bucket context.""" - -from __future__ import annotations - -import pytest - -from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( - AccumulatorTestCase, -) -from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess -from documentdb_tests.framework.error_codes import ( - DIVIDE_BY_ZERO_V2_ERROR, - GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, -) -from documentdb_tests.framework.executor import execute_command -from documentdb_tests.framework.parametrize import pytest_params - -# Property [Bucket Smoke]: $addToSet works correctly in $bucket context. -ADDTOSET_BUCKET_SMOKE_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "bucket_basic", - docs=[{"v": 10}, {"v": 20}, {"v": 30}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$addToSet": "$v"}}, - } - } - ], - expected=[{"_id": -1, "result": [10, 20, 30]}], - msg="$addToSet should collect unique values in $bucket context", - ), - AccumulatorTestCase( - "bucket_duplicates", - docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$addToSet": "$v"}}, - } - } - ], - expected=[{"_id": -1, "result": [10, 20, 30]}], - msg="$addToSet should deduplicate values in $bucket context", - ), - AccumulatorTestCase( - "bucket_null_among_values", - docs=[{"v": None}, {"v": 5}, {"v": 3}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$addToSet": "$v"}}, - } - } - ], - expected=[{"_id": -1, "result": [None, 5, 3]}], - msg="$addToSet should collect null alongside values in $bucket context", - ), -] - -# Property [Bucket Arity Rejection]: $addToSet rejects array syntax in $bucket context. -ADDTOSET_BUCKET_ERROR_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "bucket_arity_empty_array", - docs=[{"v": 1}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$addToSet": []}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$addToSet should reject empty array in $bucket context", - ), - AccumulatorTestCase( - "bucket_expression_error", - docs=[{"v": 10}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$addToSet": {"$divide": ["$v", 0]}}}, - } - } - ], - error_code=DIVIDE_BY_ZERO_V2_ERROR, - msg="$addToSet should propagate divide-by-zero error in $bucket context", - ), -] - - -@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_SMOKE_TESTS)) -def test_addToSet_bucket_smoke(collection, test_case: AccumulatorTestCase): - """Test $addToSet accumulator in $bucket context.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) - - -@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_ERROR_TESTS)) -def test_addToSet_bucket_smoke_errors(collection, test_case: AccumulatorTestCase): - """Test $addToSet error cases in $bucket context.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertFailureCode(result, test_case.error_code, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_setWindowFields_smoke.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_setWindowFields_smoke.py deleted file mode 100644 index 3d57b5c4..00000000 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_setWindowFields_smoke.py +++ /dev/null @@ -1,165 +0,0 @@ -"""Smoke tests for $addToSet accumulator in $setWindowFields context.""" - -from __future__ import annotations - -import pytest - -from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( - AccumulatorTestCase, -) -from documentdb_tests.framework.assertions import assertSuccess -from documentdb_tests.framework.executor import execute_command -from documentdb_tests.framework.parametrize import pytest_params - -# Property [SetWindowFields Smoke]: $addToSet works correctly in $setWindowFields context. -ADDTOSET_SET_WINDOW_FIELDS_SMOKE_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "swf_unbounded", - docs=[ - {"part": "A", "v": 10}, - {"part": "A", "v": 20}, - {"part": "A", "v": 10}, - ], - pipeline=[ - { - "$setWindowFields": { - "partitionBy": "$part", - "sortBy": {"v": 1}, - "output": { - "result": { - "$addToSet": "$v", - "window": {"documents": ["unbounded", "unbounded"]}, - } - }, - } - }, - {"$project": {"_id": 0, "v": 1, "result": 1}}, - {"$sort": {"v": 1}}, - {"$limit": 1}, - ], - expected=[{"v": 10, "result": [10, 20]}], - msg="$addToSet should collect unique values across entire partition with unbounded window", - ), - AccumulatorTestCase( - "swf_cumulative", - docs=[ - {"part": "A", "v": 10}, - {"part": "A", "v": 20}, - {"part": "A", "v": 10}, - ], - pipeline=[ - { - "$setWindowFields": { - "partitionBy": "$part", - "sortBy": {"_id": 1}, - "output": { - "result": { - "$addToSet": "$v", - "window": {"documents": ["unbounded", "current"]}, - } - }, - } - }, - {"$project": {"_id": 0, "v": 1, "result": 1}}, - ], - expected=[ - {"v": 10, "result": [10]}, - {"v": 20, "result": [10, 20]}, - {"v": 10, "result": [10, 20]}, - ], - msg="$addToSet should compute cumulative unique values with [unbounded, current] window", - ), - AccumulatorTestCase( - "swf_partition_by", - docs=[ - {"part": "A", "v": 1}, - {"part": "A", "v": 2}, - {"part": "B", "v": 3}, - {"part": "B", "v": 3}, - ], - pipeline=[ - { - "$setWindowFields": { - "partitionBy": "$part", - "sortBy": {"v": 1}, - "output": { - "result": { - "$addToSet": "$v", - "window": {"documents": ["unbounded", "unbounded"]}, - } - }, - } - }, - {"$project": {"_id": 0, "part": 1, "result": 1}}, - {"$group": {"_id": "$part", "result": {"$first": "$result"}}}, - {"$sort": {"_id": 1}}, - ], - expected=[{"_id": "A", "result": [1, 2]}, {"_id": "B", "result": [3]}], - msg="$addToSet should compute separate unique sets per partition", - ), - AccumulatorTestCase( - "swf_duplicates", - docs=[ - {"part": "A", "v": 5}, - {"part": "A", "v": 5}, - {"part": "A", "v": 10}, - {"part": "A", "v": 10}, - ], - pipeline=[ - { - "$setWindowFields": { - "partitionBy": "$part", - "sortBy": {"v": 1}, - "output": { - "result": { - "$addToSet": "$v", - "window": {"documents": ["unbounded", "unbounded"]}, - } - }, - } - }, - {"$project": {"_id": 0, "v": 1, "result": 1}}, - {"$limit": 1}, - ], - expected=[{"v": 5, "result": [5, 10]}], - msg="$addToSet should deduplicate values within window", - ), - AccumulatorTestCase( - "swf_null_values", - docs=[ - {"part": "A", "v": None}, - {"part": "A", "v": 5}, - {"part": "A", "v": None}, - ], - pipeline=[ - { - "$setWindowFields": { - "partitionBy": "$part", - "sortBy": {"_id": 1}, - "output": { - "result": { - "$addToSet": "$v", - "window": {"documents": ["unbounded", "unbounded"]}, - } - }, - } - }, - {"$project": {"_id": 0, "v": 1, "result": 1}}, - {"$limit": 1}, - ], - expected=[{"v": None, "result": [None, 5]}], - msg="$addToSet should collect null as a value in $setWindowFields window", - ), -] - - -@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_SET_WINDOW_FIELDS_SMOKE_TESTS)) -def test_addToSet_setWindowFields_smoke(collection, test_case: AccumulatorTestCase): - """Test $addToSet accumulator in $setWindowFields context.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) From 804c522bb450eab6b6441c5b1eac58b2e13cccb8 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Mon, 25 May 2026 12:01:04 -0700 Subject: [PATCH 03/13] add init.py Signed-off-by: Alina (Xi) Li --- .../tests/core/operator/accumulators/addToSet/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/__init__.py diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/__init__.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/__init__.py new file mode 100644 index 00000000..e69de29b From ec3968e2c7e1ef0c7313cae0900dedb2d8e9aee0 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Mon, 25 May 2026 12:01:36 -0700 Subject: [PATCH 04/13] rename smoke tests Signed-off-by: Alina (Xi) Li --- ...accumulator_addToSet.py => test_accumulator_addToSet_smoke.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/{test_smoke_accumulator_addToSet.py => test_accumulator_addToSet_smoke.py} (100%) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_smoke_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_smoke.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_smoke_accumulator_addToSet.py rename to documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_smoke.py From c3bb55dc19a36a8eb1e1aa2311b0bc41402f1393 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Mon, 25 May 2026 12:04:32 -0700 Subject: [PATCH 05/13] remove stage tests Signed-off-by: Alina (Xi) Li --- .../addToSet/test_accumulator_addToSet.py | 435 +----------------- 1 file changed, 2 insertions(+), 433 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py index 6d65a6da..55b9eee8 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py @@ -1,4 +1,4 @@ -"""Tests for $addToSet accumulator ($group, $bucket, $bucketAuto, $setWindowFields).""" +"""Tests for $addToSet accumulator ($group).""" from __future__ import annotations @@ -23,11 +23,8 @@ ) from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess from documentdb_tests.framework.error_codes import ( - BAD_VALUE_ERROR, CONVERSION_FAILURE_ERROR, DIVIDE_BY_ZERO_V2_ERROR, - EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, - GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, MODULO_BY_ZERO_V2_ERROR, ) from documentdb_tests.framework.executor import execute_command @@ -1137,52 +1134,6 @@ ), ] -# Property [Arity Rejection]: $addToSet in accumulator context is unary and rejects array syntax. -ADDTOSET_ARITY_ERROR_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "arity_empty_array", - docs=[{"v": 1}], - pipeline=[{"$group": {"_id": None, "result": {"$addToSet": []}}}], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$addToSet should reject empty array in accumulator context", - ), - AccumulatorTestCase( - "arity_single_element_literal", - docs=[{"v": 1}], - pipeline=[{"$group": {"_id": None, "result": {"$addToSet": [1]}}}], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$addToSet should reject single-element literal array in accumulator context", - ), - AccumulatorTestCase( - "arity_single_field_ref", - docs=[{"v": 1}], - pipeline=[{"$group": {"_id": None, "result": {"$addToSet": ["$v"]}}}], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$addToSet should reject single field ref in array in accumulator context", - ), - AccumulatorTestCase( - "arity_multi_element", - docs=[{"v": 1}], - pipeline=[{"$group": {"_id": None, "result": {"$addToSet": [1, 2, 3]}}}], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$addToSet should reject multi-element array in accumulator context", - ), - AccumulatorTestCase( - "arity_multi_key_expression", - docs=[{"v": 1}], - pipeline=[ - { - "$group": { - "_id": None, - "result": {"$addToSet": {"$add": [1, 2], "$multiply": [3, 4]}}, - } - } - ], - error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, - msg="$addToSet should reject multi-key expression object", - ), -] - # Property [Expression Error Propagation]: errors from sub-expressions propagate. ADDTOSET_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( @@ -1235,7 +1186,7 @@ + ADDTOSET_EDGE_CASE_TESTS ) -ADDTOSET_ERROR_TESTS = ADDTOSET_ARITY_ERROR_TESTS + ADDTOSET_EXPRESSION_ERROR_TESTS +ADDTOSET_ERROR_TESTS = ADDTOSET_EXPRESSION_ERROR_TESTS # --------------------------------------------------------------------------- # Primary test functions @@ -1325,385 +1276,3 @@ def test_accumulator_addToSet_return_type(collection, test_case: AccumulatorTest {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, ) assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["value"]) - - -# --------------------------------------------------------------------------- -# $bucket smoke tests -# --------------------------------------------------------------------------- - -# Property [Bucket Smoke]: $addToSet works correctly in $bucket context. -ADDTOSET_BUCKET_SMOKE_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "bucket_basic", - docs=[{"v": 10}, {"v": 20}, {"v": 30}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$addToSet": "$v"}}, - } - } - ], - expected=[{"_id": -1, "result": [10, 20, 30]}], - msg="$addToSet should collect unique values in $bucket context", - ), - AccumulatorTestCase( - "bucket_duplicates", - docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$addToSet": "$v"}}, - } - } - ], - expected=[{"_id": -1, "result": [10, 20, 30]}], - msg="$addToSet should deduplicate values in $bucket context", - ), - AccumulatorTestCase( - "bucket_null_among_values", - docs=[{"v": None}, {"v": 5}, {"v": 3}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$addToSet": "$v"}}, - } - } - ], - expected=[{"_id": -1, "result": [None, 5, 3]}], - msg="$addToSet should collect null alongside values in $bucket context", - ), -] - -# Property [Bucket Arity Rejection]: $addToSet rejects array syntax in $bucket context. -ADDTOSET_BUCKET_ERROR_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "bucket_arity_empty_array", - docs=[{"v": 1}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$addToSet": []}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$addToSet should reject empty array in $bucket context", - ), - AccumulatorTestCase( - "bucket_expression_error", - docs=[{"v": 10}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$addToSet": {"$divide": ["$v", 0]}}}, - } - } - ], - error_code=DIVIDE_BY_ZERO_V2_ERROR, - msg="$addToSet should propagate divide-by-zero error in $bucket context", - ), -] - - -@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_SMOKE_TESTS)) -def test_addToSet_bucket_smoke(collection, test_case: AccumulatorTestCase): - """Test $addToSet accumulator in $bucket context.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) - - -@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_ERROR_TESTS)) -def test_addToSet_bucket_smoke_errors(collection, test_case): - """Test $addToSet error cases in $bucket context.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertFailureCode(result, test_case.error_code, msg=test_case.msg) - - -# --------------------------------------------------------------------------- -# $bucketAuto smoke tests -# --------------------------------------------------------------------------- - -# Property [BucketAuto Smoke]: $addToSet works correctly in $bucketAuto context. -ADDTOSET_BUCKET_AUTO_SMOKE_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "bucketAuto_basic", - docs=[{"v": 10}, {"v": 20}, {"v": 30}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$addToSet": "$v"}}, - } - } - ], - expected=[{"_id": {"min": 0, "max": 0}, "result": [10, 20, 30]}], - msg="$addToSet should collect unique values in $bucketAuto context", - ), - AccumulatorTestCase( - "bucketAuto_duplicates", - docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$addToSet": "$v"}}, - } - } - ], - expected=[{"_id": {"min": 0, "max": 0}, "result": [10, 20, 30]}], - msg="$addToSet should deduplicate values in $bucketAuto context", - ), - AccumulatorTestCase( - "bucketAuto_null_among_values", - docs=[{"v": None}, {"v": 5}, {"v": 3}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$addToSet": "$v"}}, - } - } - ], - expected=[{"_id": {"min": 0, "max": 0}, "result": [None, 5, 3]}], - msg="$addToSet should collect null alongside values in $bucketAuto context", - ), -] - -# Property [BucketAuto Arity Rejection]: $addToSet rejects array syntax in $bucketAuto context. -ADDTOSET_BUCKET_AUTO_ERROR_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "bucketAuto_arity_empty_array", - docs=[{"v": 1}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$addToSet": []}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$addToSet should reject empty array in $bucketAuto context", - ), - AccumulatorTestCase( - "bucketAuto_expression_error", - docs=[{"v": 10}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$addToSet": {"$divide": ["$v", 0]}}}, - } - } - ], - error_code=BAD_VALUE_ERROR, - msg="$addToSet should propagate divide-by-zero error in $bucketAuto context", - ), -] - - -@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_AUTO_SMOKE_TESTS)) -def test_addToSet_bucketAuto_smoke(collection, test_case: AccumulatorTestCase): - """Test $addToSet accumulator in $bucketAuto context.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) - - -@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_AUTO_ERROR_TESTS)) -def test_addToSet_bucketAuto_smoke_errors(collection, test_case): - """Test $addToSet error cases in $bucketAuto context.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertFailureCode(result, test_case.error_code, msg=test_case.msg) - - -# --------------------------------------------------------------------------- -# $setWindowFields smoke tests -# --------------------------------------------------------------------------- - -# Property [SetWindowFields Smoke]: $addToSet works correctly in $setWindowFields context. -ADDTOSET_SET_WINDOW_FIELDS_SMOKE_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "swf_unbounded", - docs=[ - {"part": "A", "v": 10}, - {"part": "A", "v": 20}, - {"part": "A", "v": 10}, - ], - pipeline=[ - { - "$setWindowFields": { - "partitionBy": "$part", - "sortBy": {"v": 1}, - "output": { - "result": { - "$addToSet": "$v", - "window": {"documents": ["unbounded", "unbounded"]}, - } - }, - } - }, - {"$project": {"_id": 0, "v": 1, "result": 1}}, - {"$sort": {"v": 1}}, - {"$limit": 1}, - ], - expected=[{"v": 10, "result": [10, 20]}], - msg="$addToSet should collect unique values across entire partition with unbounded window", - ), - AccumulatorTestCase( - "swf_cumulative", - docs=[ - {"part": "A", "v": 10}, - {"part": "A", "v": 20}, - {"part": "A", "v": 10}, - ], - pipeline=[ - { - "$setWindowFields": { - "partitionBy": "$part", - "sortBy": {"_id": 1}, - "output": { - "result": { - "$addToSet": "$v", - "window": {"documents": ["unbounded", "current"]}, - } - }, - } - }, - {"$project": {"_id": 0, "v": 1, "result": 1}}, - ], - expected=[ - {"v": 10, "result": [10]}, - {"v": 20, "result": [10, 20]}, - {"v": 10, "result": [10, 20]}, - ], - msg="$addToSet should compute cumulative unique values with [unbounded, current] window", - ), - AccumulatorTestCase( - "swf_partition_by", - docs=[ - {"part": "A", "v": 1}, - {"part": "A", "v": 2}, - {"part": "B", "v": 3}, - {"part": "B", "v": 3}, - ], - pipeline=[ - { - "$setWindowFields": { - "partitionBy": "$part", - "sortBy": {"v": 1}, - "output": { - "result": { - "$addToSet": "$v", - "window": {"documents": ["unbounded", "unbounded"]}, - } - }, - } - }, - {"$project": {"_id": 0, "part": 1, "result": 1}}, - {"$group": {"_id": "$part", "result": {"$first": "$result"}}}, - {"$sort": {"_id": 1}}, - ], - expected=[{"_id": "A", "result": [1, 2]}, {"_id": "B", "result": [3]}], - msg="$addToSet should compute separate unique sets per partition", - ), - AccumulatorTestCase( - "swf_duplicates", - docs=[ - {"part": "A", "v": 5}, - {"part": "A", "v": 5}, - {"part": "A", "v": 10}, - {"part": "A", "v": 10}, - ], - pipeline=[ - { - "$setWindowFields": { - "partitionBy": "$part", - "sortBy": {"v": 1}, - "output": { - "result": { - "$addToSet": "$v", - "window": {"documents": ["unbounded", "unbounded"]}, - } - }, - } - }, - {"$project": {"_id": 0, "v": 1, "result": 1}}, - {"$limit": 1}, - ], - expected=[{"v": 5, "result": [5, 10]}], - msg="$addToSet should deduplicate values within window", - ), - AccumulatorTestCase( - "swf_null_values", - docs=[ - {"part": "A", "v": None}, - {"part": "A", "v": 5}, - {"part": "A", "v": None}, - ], - pipeline=[ - { - "$setWindowFields": { - "partitionBy": "$part", - "sortBy": {"_id": 1}, - "output": { - "result": { - "$addToSet": "$v", - "window": {"documents": ["unbounded", "unbounded"]}, - } - }, - } - }, - {"$project": {"_id": 0, "v": 1, "result": 1}}, - {"$limit": 1}, - ], - expected=[{"v": None, "result": [None, 5]}], - msg="$addToSet should collect null as a value in $setWindowFields window", - ), -] - - -@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_SET_WINDOW_FIELDS_SMOKE_TESTS)) -def test_addToSet_setWindowFields_smoke(collection, test_case: AccumulatorTestCase): - """Test $addToSet accumulator in $setWindowFields context.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) From 01935684bfe07cfa968ee61041b4b511e24fdb8a Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Mon, 25 May 2026 12:21:42 -0700 Subject: [PATCH 06/13] inline test functions Signed-off-by: Alina (Xi) Li --- .../addToSet/test_accumulator_addToSet.py | 35 ++++++++++++++----- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py index 55b9eee8..05208c0c 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py @@ -30,11 +30,6 @@ from documentdb_tests.framework.executor import execute_command from documentdb_tests.framework.parametrize import pytest_params -_OID1 = ObjectId("000000000000000000000001") -_OID2 = ObjectId("000000000000000000000002") -_DT1 = datetime(2020, 1, 1, tzinfo=timezone.utc) -_DT2 = datetime(2021, 1, 1, tzinfo=timezone.utc) - # --------------------------------------------------------------------------- # Property lists # --------------------------------------------------------------------------- @@ -514,22 +509,44 @@ ), AccumulatorTestCase( "bson_datetime", - docs=[{"v": _DT1}, {"v": _DT2}, {"v": _DT1}], + docs=[ + {"v": datetime(2020, 1, 1, tzinfo=timezone.utc)}, + {"v": datetime(2021, 1, 1, tzinfo=timezone.utc)}, + {"v": datetime(2020, 1, 1, tzinfo=timezone.utc)}, + ], pipeline=[ {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, {"$project": {"_id": 0, "result": 1}}, ], - expected=[{"result": [_DT1, _DT2]}], + expected=[ + { + "result": [ + datetime(2020, 1, 1, tzinfo=timezone.utc), + datetime(2021, 1, 1, tzinfo=timezone.utc), + ] + } + ], msg="$addToSet should collect and deduplicate datetime values", ), AccumulatorTestCase( "bson_objectid", - docs=[{"v": _OID1}, {"v": _OID2}, {"v": _OID1}], + docs=[ + {"v": ObjectId("000000000000000000000001")}, + {"v": ObjectId("000000000000000000000002")}, + {"v": ObjectId("000000000000000000000001")}, + ], pipeline=[ {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, {"$project": {"_id": 0, "result": 1}}, ], - expected=[{"result": [_OID1, _OID2]}], + expected=[ + { + "result": [ + ObjectId("000000000000000000000001"), + ObjectId("000000000000000000000002"), + ] + } + ], msg="$addToSet should collect and deduplicate ObjectId values", ), AccumulatorTestCase( From 74cf984d73d3f4558a275da6f4d52aa66df0825c Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Mon, 25 May 2026 14:43:08 -0700 Subject: [PATCH 07/13] split into files Signed-off-by: Alina (Xi) Li --- .../addToSet/test_accumulator_addToSet.py | 841 +----------------- .../test_accumulator_addToSet_bson_types.py | 254 ++++++ .../test_accumulator_addToSet_dedup.py | 499 +++++++++++ .../test_accumulator_addToSet_errors.py | 62 ++ .../test_accumulator_addToSet_null_missing.py | 142 +++ 5 files changed, 963 insertions(+), 835 deletions(-) create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py index 05208c0c..4d569dd5 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py @@ -1,32 +1,14 @@ -"""Tests for $addToSet accumulator ($group).""" +"""Tests for $addToSet accumulator core behavior ($group).""" from __future__ import annotations -import math -from datetime import datetime, timezone - import pytest -from bson import ( - Binary, - Code, - Decimal128, - Int64, - MaxKey, - MinKey, - ObjectId, - Regex, - Timestamp, -) +from bson import Binary, Regex from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( AccumulatorTestCase, ) -from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess -from documentdb_tests.framework.error_codes import ( - CONVERSION_FAILURE_ERROR, - DIVIDE_BY_ZERO_V2_ERROR, - MODULO_BY_ZERO_V2_ERROR, -) +from documentdb_tests.framework.assertions import assertSuccess from documentdb_tests.framework.executor import execute_command from documentdb_tests.framework.parametrize import pytest_params @@ -34,108 +16,6 @@ # Property lists # --------------------------------------------------------------------------- -# Property [Null Collected]: null values are collected as valid values and deduplicated. -ADDTOSET_NULL_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "null_all", - docs=[{"v": None}, {"v": None}, {"v": None}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [None]}], - msg="$addToSet should collect null and deduplicate to a single null", - ), - AccumulatorTestCase( - "null_single", - docs=[{"v": None}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [None]}], - msg="$addToSet should collect a single null value", - ), - AccumulatorTestCase( - "null_among_values", - docs=[{"v": None}, {"v": 5}, {"v": 3}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [None, 5, 3]}], - msg="$addToSet should collect null alongside other values", - ), - AccumulatorTestCase( - "null_and_values_dedup", - docs=[{"v": 10}, {"v": None}, {"v": 5}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [10, None, 5]}], - msg="$addToSet should collect null and distinct values without duplication", - ), -] - -# Property [Missing Excluded]: missing fields are excluded from the result. -ADDTOSET_MISSING_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "missing_all", - docs=[{"x": 1}, {"x": 2}, {"x": 3}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": []}], - msg="$addToSet should return empty array when all fields are missing", - ), - AccumulatorTestCase( - "missing_single", - docs=[{"x": 1}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": []}], - msg="$addToSet should return empty array for a single doc with missing field", - ), - AccumulatorTestCase( - "missing_among_values", - docs=[{"x": 1}, {"v": 5}, {"v": 3}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [5, 3]}], - msg="$addToSet should exclude missing fields and collect only present values", - ), -] - -# Property [Null and Missing Combined]: null is collected while missing is excluded. -ADDTOSET_NULL_MISSING_COMBINED_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "combined_null_and_missing", - docs=[{"v": None}, {"x": 1}, {"v": None}, {"x": 2}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [None]}], - msg="$addToSet should collect null but exclude missing fields", - ), - AccumulatorTestCase( - "combined_null_missing_and_values", - docs=[{"v": 10}, {"v": None}, {"x": 1}, {"v": 5}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [10, None, 5]}], - msg="$addToSet should collect null and values but exclude missing fields", - ), -] - # Property [$$REMOVE Excluded]: $$REMOVE via $cond is treated as missing. ADDTOSET_REMOVE_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( @@ -298,663 +178,6 @@ ), ] -# Property [Document Duplicate Detection]: documents are duplicates only if they have -# exact same fields, values, and field order. -ADDTOSET_DOC_DEDUP_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "doc_identical", - docs=[{"v": {"a": 1, "b": 2}}, {"v": {"a": 1, "b": 2}}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [{"a": 1, "b": 2}]}], - msg="$addToSet should deduplicate identical documents", - ), - AccumulatorTestCase( - "doc_different_field_order", - docs=[{"v": {"a": 1, "b": 2}}, {"v": {"b": 2, "a": 1}}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [{"b": 2, "a": 1}, {"a": 1, "b": 2}]}], - msg="$addToSet should treat documents with different field order as distinct", - ), - AccumulatorTestCase( - "doc_different_values", - docs=[{"v": {"a": 1, "b": 2}}, {"v": {"a": 1, "b": 3}}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [{"a": 1, "b": 2}, {"a": 1, "b": 3}]}], - msg="$addToSet should treat documents with different values as distinct", - ), - AccumulatorTestCase( - "doc_nested_identical", - docs=[{"v": {"a": {"x": 1}}}, {"v": {"a": {"x": 1}}}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [{"a": {"x": 1}}]}], - msg="$addToSet should deduplicate nested documents with identical structure", - ), - AccumulatorTestCase( - "doc_nested_different_order", - docs=[{"v": {"a": {"x": 1, "y": 2}}}, {"v": {"a": {"y": 2, "x": 1}}}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [{"a": {"x": 1, "y": 2}}, {"a": {"y": 2, "x": 1}}]}], - msg="$addToSet should treat nested documents with different field order as distinct", - ), - AccumulatorTestCase( - "doc_empty", - docs=[{"v": {}}, {"v": {}}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [{}]}], - msg="$addToSet should deduplicate empty documents", - ), - AccumulatorTestCase( - "doc_subset", - docs=[{"v": {"a": 1}}, {"v": {"a": 1, "b": 2}}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [{"a": 1, "b": 2}, {"a": 1}]}], - msg="$addToSet should treat a document subset and superset as distinct", - ), - AccumulatorTestCase( - "doc_with_array_value", - docs=[{"v": {"a": [1, 2]}}, {"v": {"a": [1, 2]}}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [{"a": [1, 2]}]}], - msg="$addToSet should deduplicate documents containing identical array values", - ), - AccumulatorTestCase( - "doc_with_null_value", - docs=[{"v": {"a": None}}, {"v": {"a": None}}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [{"a": None}]}], - msg="$addToSet should deduplicate documents with null field values", - ), - AccumulatorTestCase( - "doc_with_nested_null", - docs=[{"v": {"a": {"b": None}}}, {"v": {"a": {"b": None}}}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [{"a": {"b": None}}]}], - msg="$addToSet should deduplicate documents with nested null values", - ), -] - -# Property [String Deduplication]: strings are compared by byte value with no Unicode normalization. -ADDTOSET_STRING_DEDUP_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "string_identical", - docs=[{"v": "abc"}, {"v": "abc"}, {"v": "def"}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": ["abc", "def"]}], - msg="$addToSet should deduplicate identical strings", - ), - AccumulatorTestCase( - "string_empty", - docs=[{"v": ""}, {"v": ""}, {"v": "x"}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": ["", "x"]}], - msg="$addToSet should deduplicate empty strings", - ), - AccumulatorTestCase( - "string_unicode_no_normalization", - docs=[ - {"v": "\u00e9"}, - {"v": "\u0065\u0301"}, - ], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": ["\u00e9", "\u0065\u0301"]}], - msg="$addToSet should not normalize Unicode; precomposed and decomposed are distinct", - ), -] - -# Property [BSON Type Collection]: $addToSet collects and deduplicates values of every -# non-deprecated BSON type. -ADDTOSET_BSON_TYPE_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "bson_int32", - docs=[{"v": 10}, {"v": 20}, {"v": 10}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [10, 20]}], - msg="$addToSet should collect and deduplicate int32 values", - ), - AccumulatorTestCase( - "bson_int64", - docs=[{"v": Int64(10)}, {"v": Int64(20)}, {"v": Int64(10)}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [Int64(10), Int64(20)]}], - msg="$addToSet should collect and deduplicate Int64 values", - ), - AccumulatorTestCase( - "bson_double", - docs=[{"v": 1.5}, {"v": 2.5}, {"v": 1.5}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [1.5, 2.5]}], - msg="$addToSet should collect and deduplicate double values", - ), - AccumulatorTestCase( - "bson_decimal128", - docs=[ - {"v": Decimal128("1.5")}, - {"v": Decimal128("2.5")}, - {"v": Decimal128("1.5")}, - ], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [Decimal128("1.5"), Decimal128("2.5")]}], - msg="$addToSet should collect and deduplicate Decimal128 values", - ), - AccumulatorTestCase( - "bson_string", - docs=[{"v": "abc"}, {"v": "def"}, {"v": "abc"}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": ["abc", "def"]}], - msg="$addToSet should collect and deduplicate string values", - ), - AccumulatorTestCase( - "bson_bool", - docs=[{"v": True}, {"v": False}, {"v": True}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [True, False]}], - msg="$addToSet should collect and deduplicate boolean values", - ), - AccumulatorTestCase( - "bson_datetime", - docs=[ - {"v": datetime(2020, 1, 1, tzinfo=timezone.utc)}, - {"v": datetime(2021, 1, 1, tzinfo=timezone.utc)}, - {"v": datetime(2020, 1, 1, tzinfo=timezone.utc)}, - ], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[ - { - "result": [ - datetime(2020, 1, 1, tzinfo=timezone.utc), - datetime(2021, 1, 1, tzinfo=timezone.utc), - ] - } - ], - msg="$addToSet should collect and deduplicate datetime values", - ), - AccumulatorTestCase( - "bson_objectid", - docs=[ - {"v": ObjectId("000000000000000000000001")}, - {"v": ObjectId("000000000000000000000002")}, - {"v": ObjectId("000000000000000000000001")}, - ], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[ - { - "result": [ - ObjectId("000000000000000000000001"), - ObjectId("000000000000000000000002"), - ] - } - ], - msg="$addToSet should collect and deduplicate ObjectId values", - ), - AccumulatorTestCase( - "bson_binary", - docs=[{"v": Binary(b"\x00")}, {"v": Binary(b"\x01")}, {"v": Binary(b"\x00")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [b"\x00", b"\x01"]}], - msg="$addToSet should collect and deduplicate Binary values", - ), - AccumulatorTestCase( - "bson_regex", - docs=[{"v": Regex("abc")}, {"v": Regex("def")}, {"v": Regex("abc")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [Regex("abc"), Regex("def")]}], - msg="$addToSet should collect and deduplicate Regex values", - ), - AccumulatorTestCase( - "bson_code", - docs=[ - {"v": Code("function(){}")}, - {"v": Code("function(){return 1}")}, - {"v": Code("function(){}")}, - ], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": ["function(){}", "function(){return 1}"]}], - msg="$addToSet should collect and deduplicate Code values", - ), - AccumulatorTestCase( - "bson_timestamp", - docs=[ - {"v": Timestamp(100, 1)}, - {"v": Timestamp(200, 1)}, - {"v": Timestamp(100, 1)}, - ], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [Timestamp(100, 1), Timestamp(200, 1)]}], - msg="$addToSet should collect and deduplicate Timestamp values", - ), - AccumulatorTestCase( - "bson_minkey", - docs=[{"v": MinKey()}, {"v": MinKey()}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [{"": MinKey()}]}], - msg="$addToSet should deduplicate MinKey values", - ), - AccumulatorTestCase( - "bson_maxkey", - docs=[{"v": MaxKey()}, {"v": MaxKey()}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [{"": MaxKey()}]}], - msg="$addToSet should deduplicate MaxKey values", - ), - AccumulatorTestCase( - "bson_document", - docs=[{"v": {"x": 1}}, {"v": {"x": 2}}, {"v": {"x": 1}}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [{"x": 1}, {"x": 2}]}], - msg="$addToSet should collect and deduplicate embedded document values", - ), - AccumulatorTestCase( - "bson_array", - docs=[{"v": [1, 2]}, {"v": [3, 4]}, {"v": [1, 2]}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [[1, 2], [3, 4]]}], - msg="$addToSet should collect and deduplicate array values as single elements", - ), - AccumulatorTestCase( - "bson_null", - docs=[{"v": None}, {"v": None}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [None]}], - msg="$addToSet should deduplicate null values", - ), -] - -# Property [Mixed Type Collection]: $addToSet collects values of different -# BSON types in the same group. -ADDTOSET_MIXED_TYPE_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "mixed_types", - docs=[ - {"v": 42}, - {"v": "hello"}, - {"v": True}, - {"v": [1, 2]}, - {"v": {"a": 1}}, - ], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [42, "hello", True, [1, 2], {"a": 1}]}], - msg="$addToSet should collect values of different BSON types in one group", - ), -] - -# Property [Numeric Equivalence]: numerically equivalent values across types are deduplicated. -ADDTOSET_NUMERIC_EQUIV_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "equiv_all_ones", - docs=[{"v": 1}, {"v": Int64(1)}, {"v": 1.0}, {"v": Decimal128("1")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [1]}], - msg="$addToSet should deduplicate numerically equivalent values of all numeric types", - ), - AccumulatorTestCase( - "equiv_all_zeros", - docs=[{"v": 0}, {"v": Int64(0)}, {"v": 0.0}, {"v": Decimal128("0")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [0]}], - msg="$addToSet should deduplicate numerically equivalent zero values", - ), - AccumulatorTestCase( - "equiv_int32_int64", - docs=[{"v": 5}, {"v": Int64(5)}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [5]}], - msg="$addToSet should deduplicate int32 and Int64 with same numeric value", - ), - AccumulatorTestCase( - "equiv_double_int32", - docs=[{"v": 3.0}, {"v": 3}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [3.0]}], - msg="$addToSet should deduplicate double and int32 with same numeric value", - ), - AccumulatorTestCase( - "equiv_decimal128_int64", - docs=[{"v": Decimal128("100")}, {"v": Int64(100)}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [Decimal128("100")]}], - msg="$addToSet should deduplicate Decimal128 and Int64 with same numeric value", - ), - AccumulatorTestCase( - "equiv_negative", - docs=[{"v": -1}, {"v": Int64(-1)}, {"v": -1.0}, {"v": Decimal128("-1")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [-1]}], - msg="$addToSet should deduplicate negative numerically equivalent values", - ), -] - -# Property [BSON Type Distinction]: values of different BSON types are distinct even when similar. -ADDTOSET_TYPE_DISTINCTION_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "distinct_false_vs_zero", - docs=[{"v": False}, {"v": 0}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [0, False]}], - msg="$addToSet should treat false and int32(0) as distinct BSON types", - ), - AccumulatorTestCase( - "distinct_true_vs_one", - docs=[{"v": True}, {"v": 1}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [1, True]}], - msg="$addToSet should treat true and int32(1) as distinct BSON types", - ), - AccumulatorTestCase( - "distinct_null_vs_missing", - docs=[{"v": None}, {"x": 1}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [None]}], - msg="$addToSet should collect null but exclude missing field", - ), - AccumulatorTestCase( - "distinct_empty_string_vs_null", - docs=[{"v": ""}, {"v": None}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": ["", None]}], - msg="$addToSet should treat empty string and null as distinct", - ), - AccumulatorTestCase( - "distinct_string_vs_number", - docs=[{"v": "123"}, {"v": 123}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [123, "123"]}], - msg="$addToSet should treat string '123' and int 123 as distinct", - ), -] - -# Property [NaN Deduplication]: NaN values are equal for deduplication purposes. -ADDTOSET_NAN_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "nan_double_dedup", - docs=[{"v": float("nan")}, {"v": float("nan")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [pytest.approx(math.nan, nan_ok=True)]}], - msg="$addToSet should deduplicate double NaN values", - ), - AccumulatorTestCase( - "nan_decimal128_dedup", - docs=[{"v": Decimal128("NaN")}, {"v": Decimal128("NaN")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [Decimal128("NaN")]}], - msg="$addToSet should deduplicate Decimal128 NaN values", - ), - AccumulatorTestCase( - "nan_cross_type", - docs=[{"v": float("nan")}, {"v": Decimal128("NaN")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [pytest.approx(math.nan, nan_ok=True)]}], - msg="$addToSet should deduplicate float NaN and Decimal128 NaN as numerically equal", - ), - AccumulatorTestCase( - "nan_with_finite", - docs=[{"v": float("nan")}, {"v": 5}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [pytest.approx(math.nan, nan_ok=True), 5]}], - msg="$addToSet should treat NaN and finite values as distinct", - ), -] - -# Property [Infinity Deduplication]: Infinity values are equal across numeric types. -ADDTOSET_INFINITY_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "inf_double_dedup", - docs=[{"v": float("inf")}, {"v": float("inf")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [float("inf")]}], - msg="$addToSet should deduplicate positive Infinity values", - ), - AccumulatorTestCase( - "neg_inf_double_dedup", - docs=[{"v": float("-inf")}, {"v": float("-inf")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [float("-inf")]}], - msg="$addToSet should deduplicate negative Infinity values", - ), - AccumulatorTestCase( - "inf_cross_type", - docs=[{"v": float("inf")}, {"v": Decimal128("Infinity")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [float("inf")]}], - msg="$addToSet should deduplicate float Infinity and Decimal128 Infinity", - ), - AccumulatorTestCase( - "inf_vs_neg_inf", - docs=[{"v": float("inf")}, {"v": float("-inf")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [float("-inf"), float("inf")]}], - msg="$addToSet should treat positive and negative Infinity as distinct", - ), -] - -# Property [Negative Zero]: -0.0 and 0.0 are numerically equal and deduplicated. -ADDTOSET_NEG_ZERO_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "neg_zero_double", - docs=[{"v": -0.0}, {"v": 0.0}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [-0.0]}], - msg="$addToSet should deduplicate -0.0 and 0.0 as numerically equal", - ), - AccumulatorTestCase( - "neg_zero_decimal128", - docs=[{"v": Decimal128("-0")}, {"v": Decimal128("0")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [Decimal128("-0")]}], - msg="$addToSet should deduplicate Decimal128 -0 and 0 as numerically equal", - ), - AccumulatorTestCase( - "neg_zero_cross_type", - docs=[{"v": -0.0}, {"v": 0}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [-0.0]}], - msg="$addToSet should deduplicate -0.0 and int 0 as numerically equal", - ), -] - -# Property [Decimal128 Precision]: Decimal128 values with same numeric value but different -# representations are deduplicated. -ADDTOSET_DECIMAL128_PRECISION_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "decimal_trailing_zeros", - docs=[{"v": Decimal128("1.0")}, {"v": Decimal128("1.00")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [Decimal128("1.0")]}], - msg="$addToSet should deduplicate Decimal128 values with different trailing zeros", - ), - AccumulatorTestCase( - "decimal_34_digit_precision", - docs=[{"v": Decimal128("1.234567890123456789012345678901234")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [Decimal128("1.234567890123456789012345678901234")]}], - msg="$addToSet should preserve full 34-digit Decimal128 precision", - ), - AccumulatorTestCase( - "decimal_max_min_distinct", - docs=[ - {"v": Decimal128("9.999999999999999999999999999999999E+6144")}, - {"v": Decimal128("1E-6176")}, - ], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[ - { - "result": [ - Decimal128("1E-6176"), - Decimal128("9.999999999999999999999999999999999E+6144"), - ] - } - ], - msg="$addToSet should treat Decimal128 max and min as distinct values", - ), -] - # Property [Expression Arguments]: $addToSet accepts various expression forms. ADDTOSET_EXPRESSION_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( @@ -1151,62 +374,22 @@ ), ] -# Property [Expression Error Propagation]: errors from sub-expressions propagate. -ADDTOSET_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "error_toInt_invalid", - docs=[{"v": "not_a_number"}], - pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$toInt": "$v"}}}}], - error_code=CONVERSION_FAILURE_ERROR, - msg="$addToSet should propagate $toInt conversion error", - ), - AccumulatorTestCase( - "error_divide_by_zero", - docs=[{"v": 10}], - pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$divide": ["$v", 0]}}}}], - error_code=DIVIDE_BY_ZERO_V2_ERROR, - msg="$addToSet should propagate divide-by-zero error", - ), - AccumulatorTestCase( - "error_mod_by_zero", - docs=[{"v": 10}], - pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$mod": ["$v", 0]}}}}], - error_code=MODULO_BY_ZERO_V2_ERROR, - msg="$addToSet should propagate mod-by-zero error", - ), -] - # --------------------------------------------------------------------------- -# Aggregates +# Aggregate # --------------------------------------------------------------------------- ADDTOSET_SUCCESS_TESTS = ( - ADDTOSET_NULL_TESTS - + ADDTOSET_MISSING_TESTS - + ADDTOSET_NULL_MISSING_COMBINED_TESTS - + ADDTOSET_REMOVE_TESTS + ADDTOSET_REMOVE_TESTS + ADDTOSET_UNIQUE_TESTS + ADDTOSET_ARRAY_ELEMENT_TESTS - + ADDTOSET_DOC_DEDUP_TESTS - + ADDTOSET_STRING_DEDUP_TESTS - + ADDTOSET_BSON_TYPE_TESTS - + ADDTOSET_MIXED_TYPE_TESTS - + ADDTOSET_NUMERIC_EQUIV_TESTS - + ADDTOSET_TYPE_DISTINCTION_TESTS - + ADDTOSET_NAN_TESTS - + ADDTOSET_INFINITY_TESTS - + ADDTOSET_NEG_ZERO_TESTS - + ADDTOSET_DECIMAL128_PRECISION_TESTS + ADDTOSET_EXPRESSION_TESTS + ADDTOSET_GROUPING_TESTS + ADDTOSET_EMPTY_TESTS + ADDTOSET_EDGE_CASE_TESTS ) -ADDTOSET_ERROR_TESTS = ADDTOSET_EXPRESSION_ERROR_TESTS - # --------------------------------------------------------------------------- -# Primary test functions +# Test function # --------------------------------------------------------------------------- @@ -1222,18 +405,6 @@ def test_accumulator_addToSet(collection, test_case: AccumulatorTestCase): assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) -@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_ERROR_TESTS)) -def test_accumulator_addToSet_errors(collection, test_case): - """Test $addToSet accumulator error cases with $group.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertFailureCode(result, test_case.error_code, msg=test_case.msg) - - # --------------------------------------------------------------------------- # Property-specific tests # --------------------------------------------------------------------------- diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py new file mode 100644 index 00000000..e319caf0 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py @@ -0,0 +1,254 @@ +"""Tests for $addToSet accumulator BSON type collection and deduplication.""" + +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import ( + Binary, + Code, + Decimal128, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# --------------------------------------------------------------------------- +# Property lists +# --------------------------------------------------------------------------- + +# Property [BSON Type Collection]: $addToSet collects and deduplicates values of every +# non-deprecated BSON type. +ADDTOSET_BSON_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "bson_int32", + docs=[{"v": 10}, {"v": 20}, {"v": 10}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, 20]}], + msg="$addToSet should collect and deduplicate int32 values", + ), + AccumulatorTestCase( + "bson_int64", + docs=[{"v": Int64(10)}, {"v": Int64(20)}, {"v": Int64(10)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Int64(10), Int64(20)]}], + msg="$addToSet should collect and deduplicate Int64 values", + ), + AccumulatorTestCase( + "bson_double", + docs=[{"v": 1.5}, {"v": 2.5}, {"v": 1.5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1.5, 2.5]}], + msg="$addToSet should collect and deduplicate double values", + ), + AccumulatorTestCase( + "bson_decimal128", + docs=[ + {"v": Decimal128("1.5")}, + {"v": Decimal128("2.5")}, + {"v": Decimal128("1.5")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("1.5"), Decimal128("2.5")]}], + msg="$addToSet should collect and deduplicate Decimal128 values", + ), + AccumulatorTestCase( + "bson_string", + docs=[{"v": "abc"}, {"v": "def"}, {"v": "abc"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["abc", "def"]}], + msg="$addToSet should collect and deduplicate string values", + ), + AccumulatorTestCase( + "bson_bool", + docs=[{"v": True}, {"v": False}, {"v": True}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [True, False]}], + msg="$addToSet should collect and deduplicate boolean values", + ), + AccumulatorTestCase( + "bson_datetime", + docs=[ + {"v": datetime(2020, 1, 1, tzinfo=timezone.utc)}, + {"v": datetime(2021, 1, 1, tzinfo=timezone.utc)}, + {"v": datetime(2020, 1, 1, tzinfo=timezone.utc)}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[ + { + "result": [ + datetime(2020, 1, 1, tzinfo=timezone.utc), + datetime(2021, 1, 1, tzinfo=timezone.utc), + ] + } + ], + msg="$addToSet should collect and deduplicate datetime values", + ), + AccumulatorTestCase( + "bson_objectid", + docs=[ + {"v": ObjectId("000000000000000000000001")}, + {"v": ObjectId("000000000000000000000002")}, + {"v": ObjectId("000000000000000000000001")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[ + { + "result": [ + ObjectId("000000000000000000000001"), + ObjectId("000000000000000000000002"), + ] + } + ], + msg="$addToSet should collect and deduplicate ObjectId values", + ), + AccumulatorTestCase( + "bson_binary", + docs=[{"v": Binary(b"\x00")}, {"v": Binary(b"\x01")}, {"v": Binary(b"\x00")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [b"\x00", b"\x01"]}], + msg="$addToSet should collect and deduplicate Binary values", + ), + AccumulatorTestCase( + "bson_regex", + docs=[{"v": Regex("abc")}, {"v": Regex("def")}, {"v": Regex("abc")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Regex("abc"), Regex("def")]}], + msg="$addToSet should collect and deduplicate Regex values", + ), + AccumulatorTestCase( + "bson_code", + docs=[ + {"v": Code("function(){}")}, + {"v": Code("function(){return 1}")}, + {"v": Code("function(){}")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["function(){}", "function(){return 1}"]}], + msg="$addToSet should collect and deduplicate Code values", + ), + AccumulatorTestCase( + "bson_timestamp", + docs=[ + {"v": Timestamp(100, 1)}, + {"v": Timestamp(200, 1)}, + {"v": Timestamp(100, 1)}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Timestamp(100, 1), Timestamp(200, 1)]}], + msg="$addToSet should collect and deduplicate Timestamp values", + ), + AccumulatorTestCase( + "bson_minkey", + docs=[{"v": MinKey()}, {"v": MinKey()}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"": MinKey()}]}], + msg="$addToSet should deduplicate MinKey values", + ), + AccumulatorTestCase( + "bson_maxkey", + docs=[{"v": MaxKey()}, {"v": MaxKey()}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"": MaxKey()}]}], + msg="$addToSet should deduplicate MaxKey values", + ), + AccumulatorTestCase( + "bson_document", + docs=[{"v": {"x": 1}}, {"v": {"x": 2}}, {"v": {"x": 1}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"x": 1}, {"x": 2}]}], + msg="$addToSet should collect and deduplicate embedded document values", + ), + AccumulatorTestCase( + "bson_array", + docs=[{"v": [1, 2]}, {"v": [3, 4]}, {"v": [1, 2]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [[1, 2], [3, 4]]}], + msg="$addToSet should collect and deduplicate array values as single elements", + ), + AccumulatorTestCase( + "bson_null", + docs=[{"v": None}, {"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should deduplicate null values", + ), +] + +# --------------------------------------------------------------------------- +# Test function +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BSON_TYPE_TESTS)) +def test_accumulator_addToSet_bson_types(collection, test_case: AccumulatorTestCase): + """Test $addToSet accumulator BSON type collection and deduplication.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py new file mode 100644 index 00000000..626ce236 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py @@ -0,0 +1,499 @@ +"""Tests for $addToSet accumulator deduplication behavior.""" + +from __future__ import annotations + +import math + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# --------------------------------------------------------------------------- +# Property lists +# --------------------------------------------------------------------------- + +# Property [Document Duplicate Detection]: documents are duplicates only if they have +# exact same fields, values, and field order. +ADDTOSET_DOC_DEDUP_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "doc_identical", + docs=[{"v": {"a": 1, "b": 2}}, {"v": {"a": 1, "b": 2}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": 1, "b": 2}]}], + msg="$addToSet should deduplicate identical documents", + ), + AccumulatorTestCase( + "doc_different_field_order", + docs=[{"v": {"a": 1, "b": 2}}, {"v": {"b": 2, "a": 1}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"b": 2, "a": 1}, {"a": 1, "b": 2}]}], + msg="$addToSet should treat documents with different field order as distinct", + ), + AccumulatorTestCase( + "doc_different_values", + docs=[{"v": {"a": 1, "b": 2}}, {"v": {"a": 1, "b": 3}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": 1, "b": 2}, {"a": 1, "b": 3}]}], + msg="$addToSet should treat documents with different values as distinct", + ), + AccumulatorTestCase( + "doc_nested_identical", + docs=[{"v": {"a": {"x": 1}}}, {"v": {"a": {"x": 1}}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": {"x": 1}}]}], + msg="$addToSet should deduplicate nested documents with identical structure", + ), + AccumulatorTestCase( + "doc_nested_different_order", + docs=[{"v": {"a": {"x": 1, "y": 2}}}, {"v": {"a": {"y": 2, "x": 1}}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": {"x": 1, "y": 2}}, {"a": {"y": 2, "x": 1}}]}], + msg="$addToSet should treat nested documents with different field order as distinct", + ), + AccumulatorTestCase( + "doc_empty", + docs=[{"v": {}}, {"v": {}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{}]}], + msg="$addToSet should deduplicate empty documents", + ), + AccumulatorTestCase( + "doc_subset", + docs=[{"v": {"a": 1}}, {"v": {"a": 1, "b": 2}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": 1, "b": 2}, {"a": 1}]}], + msg="$addToSet should treat a document subset and superset as distinct", + ), + AccumulatorTestCase( + "doc_with_array_value", + docs=[{"v": {"a": [1, 2]}}, {"v": {"a": [1, 2]}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": [1, 2]}]}], + msg="$addToSet should deduplicate documents containing identical array values", + ), + AccumulatorTestCase( + "doc_with_null_value", + docs=[{"v": {"a": None}}, {"v": {"a": None}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": None}]}], + msg="$addToSet should deduplicate documents with null field values", + ), + AccumulatorTestCase( + "doc_with_nested_null", + docs=[{"v": {"a": {"b": None}}}, {"v": {"a": {"b": None}}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": {"b": None}}]}], + msg="$addToSet should deduplicate documents with nested null values", + ), +] + +# Property [String Deduplication]: strings are compared by byte value with no Unicode normalization. +ADDTOSET_STRING_DEDUP_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "string_identical", + docs=[{"v": "abc"}, {"v": "abc"}, {"v": "def"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["abc", "def"]}], + msg="$addToSet should deduplicate identical strings", + ), + AccumulatorTestCase( + "string_empty", + docs=[{"v": ""}, {"v": ""}, {"v": "x"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["", "x"]}], + msg="$addToSet should deduplicate empty strings", + ), + AccumulatorTestCase( + "string_unicode_no_normalization", + docs=[ + {"v": "\u00e9"}, + {"v": "\u0065\u0301"}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["\u00e9", "\u0065\u0301"]}], + msg="$addToSet should not normalize Unicode; precomposed and decomposed are distinct", + ), +] + +# Property [Mixed Type Collection]: $addToSet collects values of different +# BSON types in the same group. +ADDTOSET_MIXED_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "mixed_types", + docs=[ + {"v": 42}, + {"v": "hello"}, + {"v": True}, + {"v": [1, 2]}, + {"v": {"a": 1}}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [42, "hello", True, [1, 2], {"a": 1}]}], + msg="$addToSet should collect values of different BSON types in one group", + ), +] + +# Property [Numeric Equivalence]: numerically equivalent values across types are deduplicated. +ADDTOSET_NUMERIC_EQUIV_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "equiv_all_ones", + docs=[{"v": 1}, {"v": Int64(1)}, {"v": 1.0}, {"v": Decimal128("1")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1]}], + msg="$addToSet should deduplicate numerically equivalent values of all numeric types", + ), + AccumulatorTestCase( + "equiv_all_zeros", + docs=[{"v": 0}, {"v": Int64(0)}, {"v": 0.0}, {"v": Decimal128("0")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [0]}], + msg="$addToSet should deduplicate numerically equivalent zero values", + ), + AccumulatorTestCase( + "equiv_int32_int64", + docs=[{"v": 5}, {"v": Int64(5)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [5]}], + msg="$addToSet should deduplicate int32 and Int64 with same numeric value", + ), + AccumulatorTestCase( + "equiv_double_int32", + docs=[{"v": 3.0}, {"v": 3}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [3.0]}], + msg="$addToSet should deduplicate double and int32 with same numeric value", + ), + AccumulatorTestCase( + "equiv_decimal128_int64", + docs=[{"v": Decimal128("100")}, {"v": Int64(100)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("100")]}], + msg="$addToSet should deduplicate Decimal128 and Int64 with same numeric value", + ), + AccumulatorTestCase( + "equiv_negative", + docs=[{"v": -1}, {"v": Int64(-1)}, {"v": -1.0}, {"v": Decimal128("-1")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [-1]}], + msg="$addToSet should deduplicate negative numerically equivalent values", + ), +] + +# Property [BSON Type Distinction]: values of different BSON types are distinct even when similar. +ADDTOSET_TYPE_DISTINCTION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "distinct_false_vs_zero", + docs=[{"v": False}, {"v": 0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [0, False]}], + msg="$addToSet should treat false and int32(0) as distinct BSON types", + ), + AccumulatorTestCase( + "distinct_true_vs_one", + docs=[{"v": True}, {"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1, True]}], + msg="$addToSet should treat true and int32(1) as distinct BSON types", + ), + AccumulatorTestCase( + "distinct_null_vs_missing", + docs=[{"v": None}, {"x": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should collect null but exclude missing field", + ), + AccumulatorTestCase( + "distinct_empty_string_vs_null", + docs=[{"v": ""}, {"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["", None]}], + msg="$addToSet should treat empty string and null as distinct", + ), + AccumulatorTestCase( + "distinct_string_vs_number", + docs=[{"v": "123"}, {"v": 123}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [123, "123"]}], + msg="$addToSet should treat string '123' and int 123 as distinct", + ), +] + +# Property [NaN Deduplication]: NaN values are equal for deduplication purposes. +ADDTOSET_NAN_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "nan_double_dedup", + docs=[{"v": float("nan")}, {"v": float("nan")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [pytest.approx(math.nan, nan_ok=True)]}], + msg="$addToSet should deduplicate double NaN values", + ), + AccumulatorTestCase( + "nan_decimal128_dedup", + docs=[{"v": Decimal128("NaN")}, {"v": Decimal128("NaN")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("NaN")]}], + msg="$addToSet should deduplicate Decimal128 NaN values", + ), + AccumulatorTestCase( + "nan_cross_type", + docs=[{"v": float("nan")}, {"v": Decimal128("NaN")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [pytest.approx(math.nan, nan_ok=True)]}], + msg="$addToSet should deduplicate float NaN and Decimal128 NaN as numerically equal", + ), + AccumulatorTestCase( + "nan_with_finite", + docs=[{"v": float("nan")}, {"v": 5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [pytest.approx(math.nan, nan_ok=True), 5]}], + msg="$addToSet should treat NaN and finite values as distinct", + ), +] + +# Property [Infinity Deduplication]: Infinity values are equal across numeric types. +ADDTOSET_INFINITY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "inf_double_dedup", + docs=[{"v": float("inf")}, {"v": float("inf")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [float("inf")]}], + msg="$addToSet should deduplicate positive Infinity values", + ), + AccumulatorTestCase( + "neg_inf_double_dedup", + docs=[{"v": float("-inf")}, {"v": float("-inf")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [float("-inf")]}], + msg="$addToSet should deduplicate negative Infinity values", + ), + AccumulatorTestCase( + "inf_cross_type", + docs=[{"v": float("inf")}, {"v": Decimal128("Infinity")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [float("inf")]}], + msg="$addToSet should deduplicate float Infinity and Decimal128 Infinity", + ), + AccumulatorTestCase( + "inf_vs_neg_inf", + docs=[{"v": float("inf")}, {"v": float("-inf")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [float("-inf"), float("inf")]}], + msg="$addToSet should treat positive and negative Infinity as distinct", + ), +] + +# Property [Negative Zero]: -0.0 and 0.0 are numerically equal and deduplicated. +ADDTOSET_NEG_ZERO_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "neg_zero_double", + docs=[{"v": -0.0}, {"v": 0.0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [-0.0]}], + msg="$addToSet should deduplicate -0.0 and 0.0 as numerically equal", + ), + AccumulatorTestCase( + "neg_zero_decimal128", + docs=[{"v": Decimal128("-0")}, {"v": Decimal128("0")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("-0")]}], + msg="$addToSet should deduplicate Decimal128 -0 and 0 as numerically equal", + ), + AccumulatorTestCase( + "neg_zero_cross_type", + docs=[{"v": -0.0}, {"v": 0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [-0.0]}], + msg="$addToSet should deduplicate -0.0 and int 0 as numerically equal", + ), +] + +# Property [Decimal128 Precision]: Decimal128 values with same numeric value but different +# representations are deduplicated. +ADDTOSET_DECIMAL128_PRECISION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "decimal_trailing_zeros", + docs=[{"v": Decimal128("1.0")}, {"v": Decimal128("1.00")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("1.0")]}], + msg="$addToSet should deduplicate Decimal128 values with different trailing zeros", + ), + AccumulatorTestCase( + "decimal_34_digit_precision", + docs=[{"v": Decimal128("1.234567890123456789012345678901234")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("1.234567890123456789012345678901234")]}], + msg="$addToSet should preserve full 34-digit Decimal128 precision", + ), + AccumulatorTestCase( + "decimal_max_min_distinct", + docs=[ + {"v": Decimal128("9.999999999999999999999999999999999E+6144")}, + {"v": Decimal128("1E-6176")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[ + { + "result": [ + Decimal128("1E-6176"), + Decimal128("9.999999999999999999999999999999999E+6144"), + ] + } + ], + msg="$addToSet should treat Decimal128 max and min as distinct values", + ), +] + +# --------------------------------------------------------------------------- +# Aggregate +# --------------------------------------------------------------------------- + +ADDTOSET_DEDUP_TESTS = ( + ADDTOSET_DOC_DEDUP_TESTS + + ADDTOSET_STRING_DEDUP_TESTS + + ADDTOSET_MIXED_TYPE_TESTS + + ADDTOSET_NUMERIC_EQUIV_TESTS + + ADDTOSET_TYPE_DISTINCTION_TESTS + + ADDTOSET_NAN_TESTS + + ADDTOSET_INFINITY_TESTS + + ADDTOSET_NEG_ZERO_TESTS + + ADDTOSET_DECIMAL128_PRECISION_TESTS +) + +# --------------------------------------------------------------------------- +# Test function +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_DEDUP_TESTS)) +def test_accumulator_addToSet_dedup(collection, test_case: AccumulatorTestCase): + """Test $addToSet accumulator deduplication behavior.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py new file mode 100644 index 00000000..22208c10 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py @@ -0,0 +1,62 @@ +"""Tests for $addToSet accumulator error cases.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertFailureCode +from documentdb_tests.framework.error_codes import ( + CONVERSION_FAILURE_ERROR, + DIVIDE_BY_ZERO_V2_ERROR, + MODULO_BY_ZERO_V2_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# --------------------------------------------------------------------------- +# Property lists +# --------------------------------------------------------------------------- + +# Property [Expression Error Propagation]: errors from sub-expressions propagate. +ADDTOSET_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "error_toInt_invalid", + docs=[{"v": "not_a_number"}], + pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$toInt": "$v"}}}}], + error_code=CONVERSION_FAILURE_ERROR, + msg="$addToSet should propagate $toInt conversion error", + ), + AccumulatorTestCase( + "error_divide_by_zero", + docs=[{"v": 10}], + pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$divide": ["$v", 0]}}}}], + error_code=DIVIDE_BY_ZERO_V2_ERROR, + msg="$addToSet should propagate divide-by-zero error", + ), + AccumulatorTestCase( + "error_mod_by_zero", + docs=[{"v": 10}], + pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$mod": ["$v", 0]}}}}], + error_code=MODULO_BY_ZERO_V2_ERROR, + msg="$addToSet should propagate mod-by-zero error", + ), +] + +# --------------------------------------------------------------------------- +# Test function +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_EXPRESSION_ERROR_TESTS)) +def test_accumulator_addToSet_errors(collection, test_case): + """Test $addToSet accumulator error cases with $group.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertFailureCode(result, test_case.error_code, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py new file mode 100644 index 00000000..d63f9526 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py @@ -0,0 +1,142 @@ +"""Tests for $addToSet accumulator null and missing field handling.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# --------------------------------------------------------------------------- +# Property lists +# --------------------------------------------------------------------------- + +# Property [Null Collected]: null values are collected as valid values and deduplicated. +ADDTOSET_NULL_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "null_all", + docs=[{"v": None}, {"v": None}, {"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should collect null and deduplicate to a single null", + ), + AccumulatorTestCase( + "null_single", + docs=[{"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should collect a single null value", + ), + AccumulatorTestCase( + "null_among_values", + docs=[{"v": None}, {"v": 5}, {"v": 3}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None, 5, 3]}], + msg="$addToSet should collect null alongside other values", + ), + AccumulatorTestCase( + "null_and_values_dedup", + docs=[{"v": 10}, {"v": None}, {"v": 5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, None, 5]}], + msg="$addToSet should collect null and distinct values without duplication", + ), +] + +# Property [Missing Excluded]: missing fields are excluded from the result. +ADDTOSET_MISSING_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "missing_all", + docs=[{"x": 1}, {"x": 2}, {"x": 3}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": []}], + msg="$addToSet should return empty array when all fields are missing", + ), + AccumulatorTestCase( + "missing_single", + docs=[{"x": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": []}], + msg="$addToSet should return empty array for a single doc with missing field", + ), + AccumulatorTestCase( + "missing_among_values", + docs=[{"x": 1}, {"v": 5}, {"v": 3}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [5, 3]}], + msg="$addToSet should exclude missing fields and collect only present values", + ), +] + +# Property [Null and Missing Combined]: null is collected while missing is excluded. +ADDTOSET_NULL_MISSING_COMBINED_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "combined_null_and_missing", + docs=[{"v": None}, {"x": 1}, {"v": None}, {"x": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet should collect null but exclude missing fields", + ), + AccumulatorTestCase( + "combined_null_missing_and_values", + docs=[{"v": 10}, {"v": None}, {"x": 1}, {"v": 5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, None, 5]}], + msg="$addToSet should collect null and values but exclude missing fields", + ), +] + +# --------------------------------------------------------------------------- +# Aggregate +# --------------------------------------------------------------------------- + +ADDTOSET_NULL_MISSING_TESTS = ( + ADDTOSET_NULL_TESTS + ADDTOSET_MISSING_TESTS + ADDTOSET_NULL_MISSING_COMBINED_TESTS +) + +# --------------------------------------------------------------------------- +# Test function +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_NULL_MISSING_TESTS)) +def test_accumulator_addToSet_null_missing(collection, test_case: AccumulatorTestCase): + """Test $addToSet accumulator null and missing field handling.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"]) From 026b260215131e6c061dbb84d0d3d5890f7188bc Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Mon, 25 May 2026 15:24:48 -0700 Subject: [PATCH 08/13] add missing tests Signed-off-by: Alina (Xi) Li --- .../addToSet/test_accumulator_addToSet.py | 22 ++++ .../test_accumulator_addToSet_dedup.py | 20 +++ ..._accumulator_addToSet_type_preservation.py | 120 ++++++++++++++++++ 3 files changed, 162 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_type_preservation.py diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py index 4d569dd5..ea1f9123 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py @@ -80,6 +80,27 @@ ), ] +# Property [$$REMOVE Interaction with Deduplication]: $$REMOVE entries are excluded and +# remaining values are properly deduplicated. +ADDTOSET_REMOVE_DEDUP_INTERACTION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "remove_dedup_same_value_produced", + docs=[{"v": 1}, {"v": 2}, {"v": -1}, {"v": -2}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$cond": [{"$gte": ["$v", 0]}, "kept", "$$REMOVE"]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["kept"]}], + msg="$addToSet should collect single value when $cond produces same value " + "for multiple docs and $$REMOVE for others", + ), +] + # Property [Unique Value Collection]: $addToSet returns an array of all unique values. ADDTOSET_UNIQUE_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( @@ -380,6 +401,7 @@ ADDTOSET_SUCCESS_TESTS = ( ADDTOSET_REMOVE_TESTS + + ADDTOSET_REMOVE_DEDUP_INTERACTION_TESTS + ADDTOSET_UNIQUE_TESTS + ADDTOSET_ARRAY_ELEMENT_TESTS + ADDTOSET_EXPRESSION_TESTS diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py index 626ce236..2d75b818 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py @@ -158,6 +158,26 @@ expected=[{"result": ["\u00e9", "\u0065\u0301"]}], msg="$addToSet should not normalize Unicode; precomposed and decomposed are distinct", ), + AccumulatorTestCase( + "string_embedded_null_bytes", + docs=[{"v": "a\x00b"}, {"v": "a\x00b"}, {"v": "a\x00c"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["a\x00b", "a\x00c"]}], + msg="$addToSet should compare strings with embedded null bytes by byte value", + ), + AccumulatorTestCase( + "string_4byte_utf8_emoji", + docs=[{"v": "\U0001f600"}, {"v": "\U0001f600"}, {"v": "\U0001f601"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["\U0001f600", "\U0001f601"]}], + msg="$addToSet should compare 4-byte UTF-8 characters (emoji) by byte value", + ), ] # Property [Mixed Type Collection]: $addToSet collects values of different diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_type_preservation.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_type_preservation.py new file mode 100644 index 00000000..51f70ea0 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_type_preservation.py @@ -0,0 +1,120 @@ +"""Tests for $addToSet accumulator numeric type preservation during deduplication. + +When numerically equivalent values of different BSON types are deduplicated, +verify which type survives in the result via $type projection. +""" + +from __future__ import annotations + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# --------------------------------------------------------------------------- +# Property lists +# --------------------------------------------------------------------------- + +# Property [Numeric Equivalence — Type Preservation]: when numerically equal values +# are deduplicated, verify which type survives via $type. +ADDTOSET_TYPE_PRESERVATION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "type_pres_int32_then_int64", + docs=[{"v": 5}, {"v": Int64(5)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$unwind": "$result"}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": 5, "type": "int"}], + msg="$addToSet should keep int type when int32 is inserted before int64", + ), + AccumulatorTestCase( + "type_pres_int64_then_int32", + docs=[{"v": Int64(5)}, {"v": 5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$unwind": "$result"}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": Int64(5), "type": "long"}], + msg="$addToSet should keep long type when int64 is inserted before int32", + ), + AccumulatorTestCase( + "type_pres_double_then_int32", + docs=[{"v": 3.0}, {"v": 3}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$unwind": "$result"}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": 3.0, "type": "double"}], + msg="$addToSet should keep double type when double is inserted before int32", + ), + AccumulatorTestCase( + "type_pres_int32_then_double", + docs=[{"v": 3}, {"v": 3.0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$unwind": "$result"}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": 3, "type": "int"}], + msg="$addToSet should keep int type when int32 is inserted before double", + ), + AccumulatorTestCase( + "type_pres_all_four_types", + docs=[ + {"v": 1}, + {"v": Int64(1)}, + {"v": 1.0}, + {"v": Decimal128("1")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$unwind": "$result"}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": 1, "type": "int"}], + msg="$addToSet should keep int type when int32 is inserted first " + "among all four numeric types", + ), + AccumulatorTestCase( + "type_pres_decimal128_first", + docs=[ + {"v": Decimal128("1")}, + {"v": 1}, + {"v": Int64(1)}, + {"v": 1.0}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$unwind": "$result"}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": Decimal128("1"), "type": "decimal"}], + msg="$addToSet should keep decimal type when Decimal128 is inserted " + "first among all four numeric types", + ), +] + +# --------------------------------------------------------------------------- +# Test function +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_TYPE_PRESERVATION_TESTS)) +def test_accumulator_addToSet_type_preservation(collection, test_case: AccumulatorTestCase): + """Test $addToSet numeric type preservation during deduplication.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg) From b2709cf6e7b5c5c5a3f4bcdb8c49d848df5c5eb2 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Mon, 25 May 2026 15:36:58 -0700 Subject: [PATCH 09/13] Remove duplicates and rename tests Signed-off-by: Alina (Xi) Li --- .../addToSet/test_accumulator_addToSet.py | 44 +------------------ .../test_accumulator_addToSet_dedup.py | 7 ++- .../test_accumulator_addToSet_null_missing.py | 10 ----- 3 files changed, 7 insertions(+), 54 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py index ea1f9123..9adfe706 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py @@ -64,7 +64,7 @@ msg="$addToSet should collect null produced by $cond while excluding $$REMOVE", ), AccumulatorTestCase( - "remove_dedup", + "remove_with_duplicate_values", docs=[{"v": 5}, {"v": 5}, {"v": -1}, {"v": -2}], pipeline=[ { @@ -303,26 +303,6 @@ # Property [Edge Cases]: accumulator-specific edge cases. ADDTOSET_EDGE_CASE_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "edge_single_null_doc", - docs=[{"v": None}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [None]}], - msg="$addToSet should return [null] for single null document", - ), - AccumulatorTestCase( - "edge_single_missing_doc", - docs=[{"x": 1}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": []}], - msg="$addToSet should return empty array for single document with missing field", - ), AccumulatorTestCase( "edge_many_unique", docs=[{"v": i} for i in range(100)], @@ -344,7 +324,7 @@ msg="$addToSet should deduplicate 100 docs down to 5 unique values", ), AccumulatorTestCase( - "edge_array_field_not_traversed", + "edge_array_not_unwound", docs=[{"v": [5, 1, 8]}], pipeline=[ {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, @@ -353,16 +333,6 @@ expected=[{"result": [[5, 1, 8]]}], msg="$addToSet should treat array field as a single element, not traverse it", ), - AccumulatorTestCase( - "edge_mixed_array_scalar", - docs=[{"v": 5}, {"v": [5]}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [5, [5]]}], - msg="$addToSet should distinguish scalar 5 from array [5]", - ), AccumulatorTestCase( "edge_binary_different_subtypes", docs=[{"v": Binary(b"\x00", 0)}, {"v": Binary(b"\x00", 5)}], @@ -383,16 +353,6 @@ expected=[{"result": [Regex("abc", "i"), Regex("abc", "m")]}], msg="$addToSet should treat Regex values with different flags as distinct", ), - AccumulatorTestCase( - "edge_expression_mixed_types", - docs=[{"v": 1}, {"v": "hello"}, {"v": True}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [1, "hello", True]}], - msg="$addToSet should collect mixed-type values from expression", - ), ] # --------------------------------------------------------------------------- diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py index 2d75b818..e7ccc496 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py @@ -456,13 +456,16 @@ ), AccumulatorTestCase( "decimal_34_digit_precision", - docs=[{"v": Decimal128("1.234567890123456789012345678901234")}], + docs=[ + {"v": Decimal128("1.234567890123456789012345678901234")}, + {"v": Decimal128("1.234567890123456789012345678901234")}, + ], pipeline=[ {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, {"$project": {"_id": 0, "result": 1}}, ], expected=[{"result": [Decimal128("1.234567890123456789012345678901234")]}], - msg="$addToSet should preserve full 34-digit Decimal128 precision", + msg="$addToSet should deduplicate and preserve full 34-digit Decimal128 precision", ), AccumulatorTestCase( "decimal_max_min_distinct", diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py index d63f9526..42e5627d 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py @@ -47,16 +47,6 @@ expected=[{"result": [None, 5, 3]}], msg="$addToSet should collect null alongside other values", ), - AccumulatorTestCase( - "null_and_values_dedup", - docs=[{"v": 10}, {"v": None}, {"v": 5}], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": [10, None, 5]}], - msg="$addToSet should collect null and distinct values without duplication", - ), ] # Property [Missing Excluded]: missing fields are excluded from the result. From 6a1981c69995f28fbac0378a378de3738b8dcfa3 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Mon, 25 May 2026 15:48:27 -0700 Subject: [PATCH 10/13] generate integration tests Signed-off-by: Alina (Xi) Li --- .../test_accumulators_addToSet_integration.py | 265 ++++++++++++++++++ 1 file changed, 265 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py new file mode 100644 index 00000000..075831f0 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py @@ -0,0 +1,265 @@ +"""Tests for $addToSet accumulator composed with sibling accumulators in the same $group.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils.accumulator_test_case import ( # noqa: E501 + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# --------------------------------------------------------------------------- +# Property lists +# --------------------------------------------------------------------------- + +# Property [AddToSet with Sum]: $addToSet collects unique values while $sum +# computes the total independently in the same $group. +ADDTOSET_WITH_SUM_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "addtoset_sum_basic", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "a", "v": 10}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[{"_id": "a", "unique": [10, 20], "total": 40}], + msg="$addToSet should collect unique values while $sum totals all values " + "including duplicates", + ), + AccumulatorTestCase( + "addtoset_sum_multiple_groups", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 10}, + {"cat": "b", "v": 5}, + {"cat": "b", "v": 15}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[ + {"_id": "a", "unique": [10], "total": 20}, + {"_id": "b", "unique": [5, 15], "total": 20}, + ], + msg="$addToSet and $sum should compute independently across " "multiple groups", + ), +] + +# Property [AddToSet with Count]: $addToSet collects unique values while +# $sum(1) counts all documents including those with duplicate values. +ADDTOSET_WITH_COUNT_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "addtoset_count_dedup_vs_total", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "count": {"$sum": 1}, + } + } + ], + expected=[{"_id": "a", "unique": [10, 20], "count": 3}], + msg="$addToSet should have 2 unique values while $sum(1) counts " "all 3 documents", + ), +] + +# Property [AddToSet with Push]: $addToSet collects unique values while $push +# collects all values including duplicates. +ADDTOSET_WITH_PUSH_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "addtoset_push_dedup_vs_all", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "a", "v": 10}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "all_vals": {"$push": "$v"}, + } + }, + ], + expected=[ + {"_id": "a", "unique": [10, 20], "all_vals": [10, 10, 20]}, + ], + msg="$addToSet should deduplicate while $push preserves all values", + ), +] + +# Property [AddToSet with Min/Max]: $addToSet collects the full unique set +# while $min/$max extract extremes independently. +ADDTOSET_WITH_MIN_MAX_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "addtoset_min_max", + docs=[ + {"cat": "a", "v": 30}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "a", "v": 10}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "lo": {"$min": "$v"}, + "hi": {"$max": "$v"}, + } + } + ], + expected=[ + {"_id": "a", "unique": [10, 20, 30], "lo": 10, "hi": 30}, + ], + msg="$addToSet should collect all unique values while $min/$max " "extract extremes", + ), +] + +# Property [AddToSet with Avg]: $addToSet collects unique values while $avg +# computes the mean over all documents including duplicates. +ADDTOSET_WITH_AVG_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "addtoset_avg_includes_duplicates", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 40}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "mean": {"$avg": "$v"}, + } + } + ], + expected=[{"_id": "a", "unique": [10, 40], "mean": 20.0}], + msg="$addToSet should have 2 unique values while $avg computes " + "mean over all 3 docs (including duplicate)", + ), +] + +# Property [AddToSet Null Handling vs Sum]: $addToSet collects null as a value +# while $sum ignores null. +ADDTOSET_NULL_VS_SUM_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "addtoset_null_collected_sum_ignores", + docs=[ + {"cat": "a", "v": None}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": None}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[{"_id": "a", "unique": [None, 10], "total": 10}], + msg="$addToSet should collect null as a value while $sum ignores " + "null and totals only numeric values", + ), +] + +# Property [Multiple AddToSet]: multiple $addToSet accumulators in the same +# $group independently collect unique values from different fields. +MULTIPLE_ADDTOSET_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "multiple_addtoset_different_fields", + docs=[ + {"cat": "a", "color": "red", "size": "S"}, + {"cat": "a", "color": "blue", "size": "M"}, + {"cat": "a", "color": "red", "size": "S"}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "colors": {"$addToSet": "$color"}, + "sizes": {"$addToSet": "$size"}, + } + } + ], + expected=[ + { + "_id": "a", + "colors": ["red", "blue"], + "sizes": ["S", "M"], + }, + ], + msg="Multiple $addToSet accumulators should independently collect " + "unique values from different fields", + ), +] + +# --------------------------------------------------------------------------- +# Aggregate +# --------------------------------------------------------------------------- + +ADDTOSET_INTEGRATION_TESTS = ( + ADDTOSET_WITH_SUM_TESTS + + ADDTOSET_WITH_COUNT_TESTS + + ADDTOSET_WITH_PUSH_TESTS + + ADDTOSET_WITH_MIN_MAX_TESTS + + ADDTOSET_WITH_AVG_TESTS + + ADDTOSET_NULL_VS_SUM_TESTS + + MULTIPLE_ADDTOSET_TESTS +) + +# --------------------------------------------------------------------------- +# Test function +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_INTEGRATION_TESTS)) +def test_accumulators_addToSet_integration(collection, test_case: AccumulatorTestCase): + """Test $addToSet accumulator composed with sibling accumulators.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline or [], + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ignore_doc_order=True, + ignore_order_in=["unique", "colors", "sizes"], + ) From 264eb7f9b56008ffcaa102392cfb13c86a8dee7f Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Mon, 25 May 2026 15:55:16 -0700 Subject: [PATCH 11/13] add more integration tests Signed-off-by: Alina (Xi) Li --- .../test_accumulators_addToSet_integration.py | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py index 075831f0..510f2260 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py @@ -192,6 +192,68 @@ ), ] +# Property [AddToSet with First/Last]: $addToSet collects all unique values +# regardless of order while $first/$last pick positional values after $sort. +ADDTOSET_WITH_FIRST_LAST_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "addtoset_first_last", + docs=[ + {"cat": "a", "v": 30}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "a", "v": 10}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "first_v": {"$first": "$v"}, + "last_v": {"$last": "$v"}, + } + }, + ], + expected=[ + {"_id": "a", "unique": [10, 20, 30], "first_v": 10, "last_v": 30}, + ], + msg="$addToSet should collect all unique values while $first/$last " + "pick sorted positional extremes", + ), +] + +# Property [AddToSet with MergeObjects]: $addToSet collects unique values +# while $mergeObjects combines per-document metadata independently. +ADDTOSET_WITH_MERGEOBJECTS_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "addtoset_mergeobjects", + docs=[ + {"cat": "a", "v": 10, "meta": {"src": "x"}}, + {"cat": "a", "v": 20, "meta": {"quality": "high"}}, + {"cat": "a", "v": 10, "meta": {"reviewed": True}}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "unique": {"$addToSet": "$v"}, + "merged": {"$mergeObjects": "$meta"}, + } + }, + ], + expected=[ + { + "_id": "a", + "unique": [10, 20], + "merged": {"src": "x", "quality": "high", "reviewed": True}, + } + ], + msg="$addToSet should deduplicate values while $mergeObjects " + "merges metadata from all documents including duplicates", + ), +] + # Property [Multiple AddToSet]: multiple $addToSet accumulators in the same # $group independently collect unique values from different fields. MULTIPLE_ADDTOSET_TESTS: list[AccumulatorTestCase] = [ @@ -234,6 +296,8 @@ + ADDTOSET_WITH_MIN_MAX_TESTS + ADDTOSET_WITH_AVG_TESTS + ADDTOSET_NULL_VS_SUM_TESTS + + ADDTOSET_WITH_FIRST_LAST_TESTS + + ADDTOSET_WITH_MERGEOBJECTS_TESTS + MULTIPLE_ADDTOSET_TESTS ) From 6a923f880bd7fc9af5b06534b81071ec9bbb921e Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Tue, 26 May 2026 12:15:24 -0700 Subject: [PATCH 12/13] Rename smoke tests Signed-off-by: Alina (Xi) Li --- ...lator_addToSet_smoke.py => test_smoke_accumulator_addToSet.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/{test_accumulator_addToSet_smoke.py => test_smoke_accumulator_addToSet.py} (100%) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_smoke.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_smoke_accumulator_addToSet.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_smoke.py rename to documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_smoke_accumulator_addToSet.py From d2ada0efde9f34ae1f1e0b4c52c042663422046d Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Wed, 27 May 2026 15:58:03 -0700 Subject: [PATCH 13/13] address comments Add tests: arity error tests, BSON constant tests, expression tests, order dependence tests. Removed tests. Signed-off-by: Alina (Xi) Li --- .../addToSet/test_accumulator_addToSet.py | 300 +++++++++++++++++- .../test_accumulator_addToSet_bson_types.py | 15 - .../test_accumulator_addToSet_errors.py | 67 +++- 3 files changed, 365 insertions(+), 17 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py index 9adfe706..10c73c46 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py @@ -2,8 +2,19 @@ from __future__ import annotations +from datetime import datetime, timezone + import pytest -from bson import Binary, Regex +from bson import ( + Binary, + Decimal128, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( AccumulatorTestCase, @@ -355,6 +366,290 @@ ), ] +# --------------------------------------------------------------------------- +# Property [BSON Constant Arguments]: $addToSet accepts BSON constants as the +# accumulator argument. Since every doc yields the same constant, the result +# set contains exactly one element. +# --------------------------------------------------------------------------- +ADDTOSET_BSON_CONSTANT_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "const_true", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": True}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [True]}], + msg="$addToSet with boolean True constant should return [True]", + ), + AccumulatorTestCase( + "const_false", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": False}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [False]}], + msg="$addToSet with boolean False constant should return [False]", + ), + AccumulatorTestCase( + "const_int64", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": Int64(42)}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Int64(42)]}], + msg="$addToSet with Int64 constant should return single-element set", + ), + AccumulatorTestCase( + "const_double", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": 3.14}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [3.14]}], + msg="$addToSet with double constant should return single-element set", + ), + AccumulatorTestCase( + "const_decimal128", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": Decimal128("3.14")}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Decimal128("3.14")]}], + msg="$addToSet with Decimal128 constant should return single-element set", + ), + AccumulatorTestCase( + "const_string", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "hello"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ["hello"]}], + msg="$addToSet with string constant should return single-element set", + ), + AccumulatorTestCase( + "const_binary", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": Binary(b"\x01\x02")}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [b"\x01\x02"]}], + msg="$addToSet with Binary constant should return single-element set", + ), + AccumulatorTestCase( + "const_objectid", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": ObjectId("000000000000000000000000")}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [ObjectId("000000000000000000000000")]}], + msg="$addToSet with ObjectId constant should return single-element set", + ), + AccumulatorTestCase( + "const_datetime", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": datetime(2020, 1, 1, tzinfo=timezone.utc)}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [datetime(2020, 1, 1, tzinfo=timezone.utc)]}], + msg="$addToSet with datetime constant should return single-element set", + ), + AccumulatorTestCase( + "const_timestamp", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": Timestamp(1, 1)}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Timestamp(1, 1)]}], + msg="$addToSet with Timestamp constant should return single-element set", + ), + AccumulatorTestCase( + "const_regex", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": Regex("abc", "i")}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [Regex("abc", "i")]}], + msg="$addToSet with Regex constant should return single-element set", + ), + AccumulatorTestCase( + "const_null", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": None}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [None]}], + msg="$addToSet with null constant should return [null]", + ), + AccumulatorTestCase( + "const_minkey", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": MinKey()}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"": MinKey()}]}], + msg="$addToSet with MinKey constant should return MinKey wrapped in document", + ), + AccumulatorTestCase( + "const_maxkey", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": MaxKey()}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"": MaxKey()}]}], + msg="$addToSet with MaxKey constant should return MaxKey wrapped in document", + ), +] + +# --------------------------------------------------------------------------- +# Property [Expression Types]: $addToSet accepts various expression types as +# its operand and evaluates them per document before collecting unique values. +# --------------------------------------------------------------------------- +ADDTOSET_EXPRESSION_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "expr_type_operator_single", + docs=[{"v": -10}, {"v": 20}, {"v": -5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": {"$abs": "$v"}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, 20, 5]}], + msg="$addToSet should accept single-input expression operator", + ), + AccumulatorTestCase( + "expr_type_operator_multi_arg", + docs=[{"v": -10, "w": 3}, {"v": 20, "w": 7}, {"v": -5, "w": 1}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$add": ["$v", "$w"]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [-7, 27, -4]}], + msg="$addToSet should accept a multi-arg expression operator", + ), + AccumulatorTestCase( + "expr_type_nested", + docs=[{"v": -10}, {"v": 20}, {"v": -5}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$add": [1, {"$abs": "$v"}]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [11, 21, 6]}], + msg="$addToSet should accept nested expression operators", + ), + AccumulatorTestCase( + "expr_type_sysvar_remove", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": "$$REMOVE"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": []}], + msg="$addToSet with $$REMOVE should exclude all values and return empty array", + ), + AccumulatorTestCase( + "expr_type_object_expression", + docs=[{"v": 10}, {"v": 20}, {"v": 5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": {"a": "$v"}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": 10}, {"a": 20}, {"a": 5}]}], + msg="$addToSet should accept an object expression", + ), + AccumulatorTestCase( + "expr_type_object_with_operator", + docs=[{"v": -10}, {"v": 20}, {"v": -5}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"a": {"$abs": "$v"}}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [{"a": 10}, {"a": 20}, {"a": 5}]}], + msg="$addToSet should accept an object expression containing an operator", + ), + AccumulatorTestCase( + "expr_type_let", + docs=[{"v": 10}, {"v": 20}, {"v": 5}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$let": {"vars": {"x": "$v"}, "in": "$$x"}}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [10, 20, 5]}], + msg="$addToSet should accept a $let expression as its operand", + ), +] + +# --------------------------------------------------------------------------- +# Property [Order Independence]: $addToSet produces the same set regardless +# of input order. +# --------------------------------------------------------------------------- +ADDTOSET_ORDER_INDEPENDENCE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "order_independent_asc", + docs=[{"v": 3}, {"v": 1}, {"v": 5}, {"v": 2}, {"v": 4}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1, 2, 3, 4, 5]}], + msg="$addToSet with ascending sort should produce same set", + ), + AccumulatorTestCase( + "order_independent_desc", + docs=[{"v": 3}, {"v": 1}, {"v": 5}, {"v": 2}, {"v": 4}], + pipeline=[ + {"$sort": {"v": -1}}, + {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": [1, 2, 3, 4, 5]}], + msg="$addToSet with descending sort should produce same set", + ), +] + # --------------------------------------------------------------------------- # Aggregate # --------------------------------------------------------------------------- @@ -368,6 +663,9 @@ + ADDTOSET_GROUPING_TESTS + ADDTOSET_EMPTY_TESTS + ADDTOSET_EDGE_CASE_TESTS + + ADDTOSET_BSON_CONSTANT_TESTS + + ADDTOSET_EXPRESSION_TYPE_TESTS + + ADDTOSET_ORDER_INDEPENDENCE_TESTS ) # --------------------------------------------------------------------------- diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py index e319caf0..644c8f4f 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py @@ -7,7 +7,6 @@ import pytest from bson import ( Binary, - Code, Decimal128, Int64, MaxKey, @@ -157,20 +156,6 @@ expected=[{"result": [Regex("abc"), Regex("def")]}], msg="$addToSet should collect and deduplicate Regex values", ), - AccumulatorTestCase( - "bson_code", - docs=[ - {"v": Code("function(){}")}, - {"v": Code("function(){return 1}")}, - {"v": Code("function(){}")}, - ], - pipeline=[ - {"$group": {"_id": None, "result": {"$addToSet": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": ["function(){}", "function(){return 1}"]}], - msg="$addToSet should collect and deduplicate Code values", - ), AccumulatorTestCase( "bson_timestamp", docs=[ diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py index 22208c10..67ba0730 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py @@ -11,6 +11,8 @@ from documentdb_tests.framework.error_codes import ( CONVERSION_FAILURE_ERROR, DIVIDE_BY_ZERO_V2_ERROR, + EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, + GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, MODULO_BY_ZERO_V2_ERROR, ) from documentdb_tests.framework.executor import execute_command @@ -20,6 +22,66 @@ # Property lists # --------------------------------------------------------------------------- +# Property [Arity]: $addToSet in accumulator context is a unary operator and +# rejects array syntax. +ADDTOSET_ARITY_ERROR_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "arity_empty_array", + docs=[{"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": []}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$addToSet should reject empty array in accumulator context", + ), + AccumulatorTestCase( + "arity_single_element_array", + docs=[{"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": [1]}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$addToSet should reject single-element array in accumulator context", + ), + AccumulatorTestCase( + "arity_single_field_ref_array", + docs=[{"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": ["$v"]}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$addToSet should reject single field ref in array in accumulator context", + ), + AccumulatorTestCase( + "arity_multi_element_array", + docs=[{"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$addToSet": [1, 2, 3]}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$addToSet should reject multi-element array in accumulator context", + ), + AccumulatorTestCase( + "arity_multi_key_expression_object", + docs=[{"v": 1}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$addToSet": {"$add": [1, 2], "$multiply": [3, 4]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, + msg="$addToSet should reject multi-key expression object", + ), +] + # Property [Expression Error Propagation]: errors from sub-expressions propagate. ADDTOSET_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( @@ -50,7 +112,10 @@ # --------------------------------------------------------------------------- -@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_EXPRESSION_ERROR_TESTS)) +ADDTOSET_ERROR_TESTS = ADDTOSET_ARITY_ERROR_TESTS + ADDTOSET_EXPRESSION_ERROR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_ERROR_TESTS)) def test_accumulator_addToSet_errors(collection, test_case): """Test $addToSet accumulator error cases with $group.""" if test_case.docs: