From 3ebfb329e525dff37e02569abd527c58bcc9237b Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Wed, 20 May 2026 12:28:11 -0700
Subject: [PATCH 01/13] Initial generated addToSet

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../addToSet/test_accumulator_addToSet.py     | 1326 +++++++++++++++++
 .../test_addToSet_bucketAuto_smoke.py         |  123 ++
 .../addToSet/test_addToSet_bucket_smoke.py    |  123 ++
 .../test_addToSet_setWindowFields_smoke.py    |  165 ++
 4 files changed, 1737 insertions(+)
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucketAuto_smoke.py
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucket_smoke.py
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_setWindowFields_smoke.py

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
new file mode 100644
index 00000000..bcd07809
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
@@ -0,0 +1,1326 @@
+"""Tests for $addToSet accumulator ($group)."""
+
+from __future__ import annotations
+
+import math
+from datetime import datetime, timezone
+
+import pytest
+from bson import (
+    Binary,
+    Code,
+    Decimal128,
+    Int64,
+    MaxKey,
+    MinKey,
+    ObjectId,
+    Regex,
+    Timestamp,
+)
+
+from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
+    AccumulatorTestCase,
+)
+from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess
+from documentdb_tests.framework.error_codes import (
+    CONVERSION_FAILURE_ERROR,
+    DIVIDE_BY_ZERO_V2_ERROR,
+    EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
+    GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+    MODULO_BY_ZERO_V2_ERROR,
+)
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+_OID1 = ObjectId("000000000000000000000001")
+_OID2 = ObjectId("000000000000000000000002")
+_DT1 = datetime(2020, 1, 1, tzinfo=timezone.utc)
+_DT2 = datetime(2021, 1, 1, tzinfo=timezone.utc)
+
+# ---------------------------------------------------------------------------
+# Property lists
+# ---------------------------------------------------------------------------
+
+# Property [Null Collected]: null values are collected as valid values and deduplicated.
+ADDTOSET_NULL_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "null_all",
+        docs=[{"v": None}, {"v": None}, {"v": None}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [None]}],
+        msg="$addToSet should collect null and deduplicate to a single null",
+    ),
+    AccumulatorTestCase(
+        "null_single",
+        docs=[{"v": None}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [None]}],
+        msg="$addToSet should collect a single null value",
+    ),
+    AccumulatorTestCase(
+        "null_among_values",
+        docs=[{"v": None}, {"v": 5}, {"v": 3}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [None, 5, 3]}],
+        msg="$addToSet should collect null alongside other values",
+    ),
+    AccumulatorTestCase(
+        "null_and_values_dedup",
+        docs=[{"v": 10}, {"v": None}, {"v": 5}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [10, None, 5]}],
+        msg="$addToSet should collect null and distinct values without duplication",
+    ),
+]
+
+# Property [Missing Excluded]: missing fields are excluded from the result.
+ADDTOSET_MISSING_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "missing_all",
+        docs=[{"x": 1}, {"x": 2}, {"x": 3}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": []}],
+        msg="$addToSet should return empty array when all fields are missing",
+    ),
+    AccumulatorTestCase(
+        "missing_single",
+        docs=[{"x": 1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": []}],
+        msg="$addToSet should return empty array for a single doc with missing field",
+    ),
+    AccumulatorTestCase(
+        "missing_among_values",
+        docs=[{"x": 1}, {"v": 5}, {"v": 3}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [5, 3]}],
+        msg="$addToSet should exclude missing fields and collect only present values",
+    ),
+]
+
+# Property [Null and Missing Combined]: null is collected while missing is excluded.
+ADDTOSET_NULL_MISSING_COMBINED_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "combined_null_and_missing",
+        docs=[{"v": None}, {"x": 1}, {"v": None}, {"x": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [None]}],
+        msg="$addToSet should collect null but exclude missing fields",
+    ),
+    AccumulatorTestCase(
+        "combined_null_missing_and_values",
+        docs=[{"v": 10}, {"v": None}, {"x": 1}, {"v": 5}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [10, None, 5]}],
+        msg="$addToSet should collect null and values but exclude missing fields",
+    ),
+]
+
+# Property [$$REMOVE Excluded]: $$REMOVE via $cond is treated as missing.
+ADDTOSET_REMOVE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "remove_all",
+        docs=[{"v": -1}, {"v": -2}, {"v": -3}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$addToSet": {"$cond": [{"$gte": ["$v", 0]}, "$v", "$$REMOVE"]}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": []}],
+        msg="$addToSet should treat $$REMOVE as missing and return empty array",
+    ),
+    AccumulatorTestCase(
+        "remove_some",
+        docs=[{"v": -1}, {"v": 5}, {"v": -2}, {"v": 10}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$addToSet": {"$cond": [{"$gte": ["$v", 0]}, "$v", "$$REMOVE"]}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [5, 10]}],
+        msg="$addToSet should exclude $$REMOVE values and collect the rest",
+    ),
+    AccumulatorTestCase(
+        "remove_and_null_value",
+        docs=[{"v": 1}, {"v": 2}, {"v": 3}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$addToSet": {"$cond": [{"$gt": ["$v", 2]}, None, "$$REMOVE"]}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [None]}],
+        msg="$addToSet should collect null produced by $cond while excluding $$REMOVE",
+    ),
+    AccumulatorTestCase(
+        "remove_dedup",
+        docs=[{"v": 5}, {"v": 5}, {"v": -1}, {"v": -2}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$addToSet": {"$cond": [{"$gte": ["$v", 0]}, "$v", "$$REMOVE"]}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [5]}],
+        msg="$addToSet should deduplicate values and exclude $$REMOVE entries",
+    ),
+]
+
+# Property [Unique Value Collection]: $addToSet returns an array of all unique values.
+ADDTOSET_UNIQUE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "unique_distinct",
+        docs=[{"v": 10}, {"v": 20}, {"v": 30}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [10, 20, 30]}],
+        msg="$addToSet should return all distinct values",
+    ),
+    AccumulatorTestCase(
+        "unique_with_duplicates",
+        docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [10, 20, 30]}],
+        msg="$addToSet should deduplicate repeated values",
+    ),
+    AccumulatorTestCase(
+        "unique_all_same",
+        docs=[{"v": 42}, {"v": 42}, {"v": 42}, {"v": 42}, {"v": 42}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [42]}],
+        msg="$addToSet should collapse identical values into one element",
+    ),
+    AccumulatorTestCase(
+        "unique_single_doc",
+        docs=[{"v": 7}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [7]}],
+        msg="$addToSet should return single-element array for one document",
+    ),
+]
+
+# Property [Array as Single Element]: array values are appended as a single element, not unwound.
+ADDTOSET_ARRAY_ELEMENT_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "array_distinct",
+        docs=[{"v": [1, 2]}, {"v": [3, 4]}, {"v": [1, 2]}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [[1, 2], [3, 4]]}],
+        msg="$addToSet should treat arrays as single elements and deduplicate identical arrays",
+    ),
+    AccumulatorTestCase(
+        "array_empty",
+        docs=[{"v": []}, {"v": []}, {"v": [1]}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [[], [1]]}],
+        msg="$addToSet should treat empty arrays as single elements and deduplicate them",
+    ),
+    AccumulatorTestCase(
+        "array_nested",
+        docs=[{"v": [[1]]}, {"v": [[2]]}, {"v": [[1]]}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [[[1]], [[2]]]}],
+        msg="$addToSet should treat nested arrays as single elements and deduplicate them",
+    ),
+    AccumulatorTestCase(
+        "array_mixed_scalar",
+        docs=[{"v": 1}, {"v": [1]}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [1, [1]]}],
+        msg="$addToSet should distinguish scalar 1 from array [1]",
+    ),
+    AccumulatorTestCase(
+        "array_single_doc",
+        docs=[{"v": [1, 2, 3]}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [[1, 2, 3]]}],
+        msg="$addToSet should wrap the array value as a single element in the result",
+    ),
+]
+
+# Property [Document Duplicate Detection]: documents are duplicates only if they have
+# exact same fields, values, and field order.
+ADDTOSET_DOC_DEDUP_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "doc_identical",
+        docs=[{"v": {"a": 1, "b": 2}}, {"v": {"a": 1, "b": 2}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": 1, "b": 2}]}],
+        msg="$addToSet should deduplicate identical documents",
+    ),
+    AccumulatorTestCase(
+        "doc_different_field_order",
+        docs=[{"v": {"a": 1, "b": 2}}, {"v": {"b": 2, "a": 1}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"b": 2, "a": 1}, {"a": 1, "b": 2}]}],
+        msg="$addToSet should treat documents with different field order as distinct",
+    ),
+    AccumulatorTestCase(
+        "doc_different_values",
+        docs=[{"v": {"a": 1, "b": 2}}, {"v": {"a": 1, "b": 3}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": 1, "b": 2}, {"a": 1, "b": 3}]}],
+        msg="$addToSet should treat documents with different values as distinct",
+    ),
+    AccumulatorTestCase(
+        "doc_nested_identical",
+        docs=[{"v": {"a": {"x": 1}}}, {"v": {"a": {"x": 1}}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": {"x": 1}}]}],
+        msg="$addToSet should deduplicate nested documents with identical structure",
+    ),
+    AccumulatorTestCase(
+        "doc_nested_different_order",
+        docs=[{"v": {"a": {"x": 1, "y": 2}}}, {"v": {"a": {"y": 2, "x": 1}}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": {"x": 1, "y": 2}}, {"a": {"y": 2, "x": 1}}]}],
+        msg="$addToSet should treat nested documents with different field order as distinct",
+    ),
+    AccumulatorTestCase(
+        "doc_empty",
+        docs=[{"v": {}}, {"v": {}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{}]}],
+        msg="$addToSet should deduplicate empty documents",
+    ),
+    AccumulatorTestCase(
+        "doc_subset",
+        docs=[{"v": {"a": 1}}, {"v": {"a": 1, "b": 2}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": 1, "b": 2}, {"a": 1}]}],
+        msg="$addToSet should treat a document subset and superset as distinct",
+    ),
+    AccumulatorTestCase(
+        "doc_with_array_value",
+        docs=[{"v": {"a": [1, 2]}}, {"v": {"a": [1, 2]}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": [1, 2]}]}],
+        msg="$addToSet should deduplicate documents containing identical array values",
+    ),
+    AccumulatorTestCase(
+        "doc_with_null_value",
+        docs=[{"v": {"a": None}}, {"v": {"a": None}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": None}]}],
+        msg="$addToSet should deduplicate documents with null field values",
+    ),
+    AccumulatorTestCase(
+        "doc_with_nested_null",
+        docs=[{"v": {"a": {"b": None}}}, {"v": {"a": {"b": None}}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": {"b": None}}]}],
+        msg="$addToSet should deduplicate documents with nested null values",
+    ),
+]
+
+# Property [String Deduplication]: strings are compared by byte value with no Unicode normalization.
+ADDTOSET_STRING_DEDUP_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "string_identical",
+        docs=[{"v": "abc"}, {"v": "abc"}, {"v": "def"}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": ["abc", "def"]}],
+        msg="$addToSet should deduplicate identical strings",
+    ),
+    AccumulatorTestCase(
+        "string_empty",
+        docs=[{"v": ""}, {"v": ""}, {"v": "x"}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": ["", "x"]}],
+        msg="$addToSet should deduplicate empty strings",
+    ),
+    AccumulatorTestCase(
+        "string_unicode_no_normalization",
+        docs=[
+            {"v": "\u00e9"},
+            {"v": "\u0065\u0301"},
+        ],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": ["\u00e9", "\u0065\u0301"]}],
+        msg="$addToSet should not normalize Unicode; precomposed and decomposed are distinct",
+    ),
+]
+
+# Property [BSON Type Collection]: $addToSet collects and deduplicates values of every
+# non-deprecated BSON type.
+ADDTOSET_BSON_TYPE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "bson_int32",
+        docs=[{"v": 10}, {"v": 20}, {"v": 10}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [10, 20]}],
+        msg="$addToSet should collect and deduplicate int32 values",
+    ),
+    AccumulatorTestCase(
+        "bson_int64",
+        docs=[{"v": Int64(10)}, {"v": Int64(20)}, {"v": Int64(10)}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Int64(10), Int64(20)]}],
+        msg="$addToSet should collect and deduplicate Int64 values",
+    ),
+    AccumulatorTestCase(
+        "bson_double",
+        docs=[{"v": 1.5}, {"v": 2.5}, {"v": 1.5}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [1.5, 2.5]}],
+        msg="$addToSet should collect and deduplicate double values",
+    ),
+    AccumulatorTestCase(
+        "bson_decimal128",
+        docs=[
+            {"v": Decimal128("1.5")},
+            {"v": Decimal128("2.5")},
+            {"v": Decimal128("1.5")},
+        ],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Decimal128("1.5"), Decimal128("2.5")]}],
+        msg="$addToSet should collect and deduplicate Decimal128 values",
+    ),
+    AccumulatorTestCase(
+        "bson_string",
+        docs=[{"v": "abc"}, {"v": "def"}, {"v": "abc"}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": ["abc", "def"]}],
+        msg="$addToSet should collect and deduplicate string values",
+    ),
+    AccumulatorTestCase(
+        "bson_bool",
+        docs=[{"v": True}, {"v": False}, {"v": True}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [True, False]}],
+        msg="$addToSet should collect and deduplicate boolean values",
+    ),
+    AccumulatorTestCase(
+        "bson_datetime",
+        docs=[{"v": _DT1}, {"v": _DT2}, {"v": _DT1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [_DT1, _DT2]}],
+        msg="$addToSet should collect and deduplicate datetime values",
+    ),
+    AccumulatorTestCase(
+        "bson_objectid",
+        docs=[{"v": _OID1}, {"v": _OID2}, {"v": _OID1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [_OID1, _OID2]}],
+        msg="$addToSet should collect and deduplicate ObjectId values",
+    ),
+    AccumulatorTestCase(
+        "bson_binary",
+        docs=[{"v": Binary(b"\x00")}, {"v": Binary(b"\x01")}, {"v": Binary(b"\x00")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [b"\x00", b"\x01"]}],
+        msg="$addToSet should collect and deduplicate Binary values",
+    ),
+    AccumulatorTestCase(
+        "bson_regex",
+        docs=[{"v": Regex("abc")}, {"v": Regex("def")}, {"v": Regex("abc")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Regex("abc"), Regex("def")]}],
+        msg="$addToSet should collect and deduplicate Regex values",
+    ),
+    AccumulatorTestCase(
+        "bson_code",
+        docs=[
+            {"v": Code("function(){}")},
+            {"v": Code("function(){return 1}")},
+            {"v": Code("function(){}")},
+        ],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": ["function(){}", "function(){return 1}"]}],
+        msg="$addToSet should collect and deduplicate Code values",
+    ),
+    AccumulatorTestCase(
+        "bson_timestamp",
+        docs=[
+            {"v": Timestamp(100, 1)},
+            {"v": Timestamp(200, 1)},
+            {"v": Timestamp(100, 1)},
+        ],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Timestamp(100, 1), Timestamp(200, 1)]}],
+        msg="$addToSet should collect and deduplicate Timestamp values",
+    ),
+    AccumulatorTestCase(
+        "bson_minkey",
+        docs=[{"v": MinKey()}, {"v": MinKey()}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"": MinKey()}]}],
+        msg="$addToSet should deduplicate MinKey values",
+    ),
+    AccumulatorTestCase(
+        "bson_maxkey",
+        docs=[{"v": MaxKey()}, {"v": MaxKey()}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"": MaxKey()}]}],
+        msg="$addToSet should deduplicate MaxKey values",
+    ),
+    AccumulatorTestCase(
+        "bson_document",
+        docs=[{"v": {"x": 1}}, {"v": {"x": 2}}, {"v": {"x": 1}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"x": 1}, {"x": 2}]}],
+        msg="$addToSet should collect and deduplicate embedded document values",
+    ),
+    AccumulatorTestCase(
+        "bson_array",
+        docs=[{"v": [1, 2]}, {"v": [3, 4]}, {"v": [1, 2]}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [[1, 2], [3, 4]]}],
+        msg="$addToSet should collect and deduplicate array values as single elements",
+    ),
+    AccumulatorTestCase(
+        "bson_null",
+        docs=[{"v": None}, {"v": None}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [None]}],
+        msg="$addToSet should deduplicate null values",
+    ),
+]
+
+# Property [Mixed Type Collection]: $addToSet collects values of different
+# BSON types in the same group.
+ADDTOSET_MIXED_TYPE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "mixed_types",
+        docs=[
+            {"v": 42},
+            {"v": "hello"},
+            {"v": True},
+            {"v": [1, 2]},
+            {"v": {"a": 1}},
+        ],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [42, "hello", True, [1, 2], {"a": 1}]}],
+        msg="$addToSet should collect values of different BSON types in one group",
+    ),
+]
+
+# Property [Numeric Equivalence]: numerically equivalent values across types are deduplicated.
+ADDTOSET_NUMERIC_EQUIV_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "equiv_all_ones",
+        docs=[{"v": 1}, {"v": Int64(1)}, {"v": 1.0}, {"v": Decimal128("1")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [1]}],
+        msg="$addToSet should deduplicate numerically equivalent values of all numeric types",
+    ),
+    AccumulatorTestCase(
+        "equiv_all_zeros",
+        docs=[{"v": 0}, {"v": Int64(0)}, {"v": 0.0}, {"v": Decimal128("0")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [0]}],
+        msg="$addToSet should deduplicate numerically equivalent zero values",
+    ),
+    AccumulatorTestCase(
+        "equiv_int32_int64",
+        docs=[{"v": 5}, {"v": Int64(5)}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [5]}],
+        msg="$addToSet should deduplicate int32 and Int64 with same numeric value",
+    ),
+    AccumulatorTestCase(
+        "equiv_double_int32",
+        docs=[{"v": 3.0}, {"v": 3}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [3.0]}],
+        msg="$addToSet should deduplicate double and int32 with same numeric value",
+    ),
+    AccumulatorTestCase(
+        "equiv_decimal128_int64",
+        docs=[{"v": Decimal128("100")}, {"v": Int64(100)}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Decimal128("100")]}],
+        msg="$addToSet should deduplicate Decimal128 and Int64 with same numeric value",
+    ),
+    AccumulatorTestCase(
+        "equiv_negative",
+        docs=[{"v": -1}, {"v": Int64(-1)}, {"v": -1.0}, {"v": Decimal128("-1")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [-1]}],
+        msg="$addToSet should deduplicate negative numerically equivalent values",
+    ),
+]
+
+# Property [BSON Type Distinction]: values of different BSON types are distinct even when similar.
+ADDTOSET_TYPE_DISTINCTION_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "distinct_false_vs_zero",
+        docs=[{"v": False}, {"v": 0}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [0, False]}],
+        msg="$addToSet should treat false and int32(0) as distinct BSON types",
+    ),
+    AccumulatorTestCase(
+        "distinct_true_vs_one",
+        docs=[{"v": True}, {"v": 1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [1, True]}],
+        msg="$addToSet should treat true and int32(1) as distinct BSON types",
+    ),
+    AccumulatorTestCase(
+        "distinct_null_vs_missing",
+        docs=[{"v": None}, {"x": 1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [None]}],
+        msg="$addToSet should collect null but exclude missing field",
+    ),
+    AccumulatorTestCase(
+        "distinct_empty_string_vs_null",
+        docs=[{"v": ""}, {"v": None}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": ["", None]}],
+        msg="$addToSet should treat empty string and null as distinct",
+    ),
+    AccumulatorTestCase(
+        "distinct_string_vs_number",
+        docs=[{"v": "123"}, {"v": 123}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [123, "123"]}],
+        msg="$addToSet should treat string '123' and int 123 as distinct",
+    ),
+]
+
+# Property [NaN Deduplication]: NaN values are equal for deduplication purposes.
+ADDTOSET_NAN_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "nan_double_dedup",
+        docs=[{"v": float("nan")}, {"v": float("nan")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [pytest.approx(math.nan, nan_ok=True)]}],
+        msg="$addToSet should deduplicate double NaN values",
+    ),
+    AccumulatorTestCase(
+        "nan_decimal128_dedup",
+        docs=[{"v": Decimal128("NaN")}, {"v": Decimal128("NaN")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Decimal128("NaN")]}],
+        msg="$addToSet should deduplicate Decimal128 NaN values",
+    ),
+    AccumulatorTestCase(
+        "nan_cross_type",
+        docs=[{"v": float("nan")}, {"v": Decimal128("NaN")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [pytest.approx(math.nan, nan_ok=True)]}],
+        msg="$addToSet should deduplicate float NaN and Decimal128 NaN as numerically equal",
+    ),
+    AccumulatorTestCase(
+        "nan_with_finite",
+        docs=[{"v": float("nan")}, {"v": 5}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [pytest.approx(math.nan, nan_ok=True), 5]}],
+        msg="$addToSet should treat NaN and finite values as distinct",
+    ),
+]
+
+# Property [Infinity Deduplication]: Infinity values are equal across numeric types.
+ADDTOSET_INFINITY_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "inf_double_dedup",
+        docs=[{"v": float("inf")}, {"v": float("inf")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [float("inf")]}],
+        msg="$addToSet should deduplicate positive Infinity values",
+    ),
+    AccumulatorTestCase(
+        "neg_inf_double_dedup",
+        docs=[{"v": float("-inf")}, {"v": float("-inf")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [float("-inf")]}],
+        msg="$addToSet should deduplicate negative Infinity values",
+    ),
+    AccumulatorTestCase(
+        "inf_cross_type",
+        docs=[{"v": float("inf")}, {"v": Decimal128("Infinity")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [float("inf")]}],
+        msg="$addToSet should deduplicate float Infinity and Decimal128 Infinity",
+    ),
+    AccumulatorTestCase(
+        "inf_vs_neg_inf",
+        docs=[{"v": float("inf")}, {"v": float("-inf")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [float("-inf"), float("inf")]}],
+        msg="$addToSet should treat positive and negative Infinity as distinct",
+    ),
+]
+
+# Property [Negative Zero]: -0.0 and 0.0 are numerically equal and deduplicated.
+ADDTOSET_NEG_ZERO_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "neg_zero_double",
+        docs=[{"v": -0.0}, {"v": 0.0}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [-0.0]}],
+        msg="$addToSet should deduplicate -0.0 and 0.0 as numerically equal",
+    ),
+    AccumulatorTestCase(
+        "neg_zero_decimal128",
+        docs=[{"v": Decimal128("-0")}, {"v": Decimal128("0")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Decimal128("-0")]}],
+        msg="$addToSet should deduplicate Decimal128 -0 and 0 as numerically equal",
+    ),
+    AccumulatorTestCase(
+        "neg_zero_cross_type",
+        docs=[{"v": -0.0}, {"v": 0}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [-0.0]}],
+        msg="$addToSet should deduplicate -0.0 and int 0 as numerically equal",
+    ),
+]
+
+# Property [Decimal128 Precision]: Decimal128 values with same numeric value but different
+# representations are deduplicated.
+ADDTOSET_DECIMAL128_PRECISION_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "decimal_trailing_zeros",
+        docs=[{"v": Decimal128("1.0")}, {"v": Decimal128("1.00")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Decimal128("1.0")]}],
+        msg="$addToSet should deduplicate Decimal128 values with different trailing zeros",
+    ),
+    AccumulatorTestCase(
+        "decimal_34_digit_precision",
+        docs=[{"v": Decimal128("1.234567890123456789012345678901234")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Decimal128("1.234567890123456789012345678901234")]}],
+        msg="$addToSet should preserve full 34-digit Decimal128 precision",
+    ),
+    AccumulatorTestCase(
+        "decimal_max_min_distinct",
+        docs=[
+            {"v": Decimal128("9.999999999999999999999999999999999E+6144")},
+            {"v": Decimal128("1E-6176")},
+        ],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[
+            {
+                "result": [
+                    Decimal128("1E-6176"),
+                    Decimal128("9.999999999999999999999999999999999E+6144"),
+                ]
+            }
+        ],
+        msg="$addToSet should treat Decimal128 max and min as distinct values",
+    ),
+]
+
+# Property [Expression Arguments]: $addToSet accepts various expression forms.
+ADDTOSET_EXPRESSION_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "expr_field_path",
+        docs=[{"v": 10}, {"v": 20}, {"v": 10}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [10, 20]}],
+        msg="$addToSet should collect values from a field path expression",
+    ),
+    AccumulatorTestCase(
+        "expr_nested_field",
+        docs=[{"a": {"b": 1}}, {"a": {"b": 2}}, {"a": {"b": 1}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$a.b"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [1, 2]}],
+        msg="$addToSet should collect values from a nested field path",
+    ),
+    AccumulatorTestCase(
+        "expr_literal",
+        docs=[{"v": 1}, {"v": 2}, {"v": 3}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": 42}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [42]}],
+        msg="$addToSet should deduplicate a constant literal applied to all docs",
+    ),
+    AccumulatorTestCase(
+        "expr_computed",
+        docs=[{"price": 10, "qty": 2}, {"price": 5, "qty": 3}, {"price": 10, "qty": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": {"$multiply": ["$price", "$qty"]}}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [20, 15]}],
+        msg="$addToSet should collect unique computed expression results",
+    ),
+    AccumulatorTestCase(
+        "expr_null_literal",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": None}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [None]}],
+        msg="$addToSet should collect null literal and deduplicate across docs",
+    ),
+    AccumulatorTestCase(
+        "expr_composite_array_path",
+        docs=[{"a": [{"b": 1}, {"b": 2}]}, {"a": [{"b": 3}, {"b": 1}]}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$a.b"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [[3, 1], [1, 2]]}],
+        msg="$addToSet should collect array values from composite array path",
+    ),
+]
+
+# Property [Grouping by Key]: groups compute independently.
+ADDTOSET_GROUPING_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "multi_group",
+        docs=[
+            {"g": "A", "v": 1},
+            {"g": "A", "v": 2},
+            {"g": "A", "v": 1},
+            {"g": "B", "v": 3},
+            {"g": "B", "v": 3},
+            {"g": "B", "v": 4},
+        ],
+        pipeline=[
+            {"$group": {"_id": "$g", "result": {"$addToSet": "$v"}}},
+            {"$sort": {"_id": 1}},
+        ],
+        expected=[
+            {"_id": "A", "result": [1, 2]},
+            {"_id": "B", "result": [3, 4]},
+        ],
+        msg="$addToSet should compute unique sets independently per group key",
+    ),
+]
+
+# Property [Empty Collection]: $group on empty collection produces no output.
+ADDTOSET_EMPTY_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "empty_collection",
+        docs=None,
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[],
+        msg="$addToSet should produce no output documents for an empty collection",
+    ),
+]
+
+# Property [Edge Cases]: accumulator-specific edge cases.
+ADDTOSET_EDGE_CASE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "edge_single_null_doc",
+        docs=[{"v": None}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [None]}],
+        msg="$addToSet should return [null] for single null document",
+    ),
+    AccumulatorTestCase(
+        "edge_single_missing_doc",
+        docs=[{"x": 1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": []}],
+        msg="$addToSet should return empty array for single document with missing field",
+    ),
+    AccumulatorTestCase(
+        "edge_many_unique",
+        docs=[{"v": i} for i in range(100)],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": list(range(100))}],
+        msg="$addToSet should collect 100 unique values into a 100-element array",
+    ),
+    AccumulatorTestCase(
+        "edge_many_docs_few_unique",
+        docs=[{"v": i % 5} for i in range(100)],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [0, 1, 2, 3, 4]}],
+        msg="$addToSet should deduplicate 100 docs down to 5 unique values",
+    ),
+    AccumulatorTestCase(
+        "edge_array_field_not_traversed",
+        docs=[{"v": [5, 1, 8]}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [[5, 1, 8]]}],
+        msg="$addToSet should treat array field as a single element, not traverse it",
+    ),
+    AccumulatorTestCase(
+        "edge_mixed_array_scalar",
+        docs=[{"v": 5}, {"v": [5]}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [5, [5]]}],
+        msg="$addToSet should distinguish scalar 5 from array [5]",
+    ),
+    AccumulatorTestCase(
+        "edge_binary_different_subtypes",
+        docs=[{"v": Binary(b"\x00", 0)}, {"v": Binary(b"\x00", 5)}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [b"\x00", Binary(b"\x00", 5)]}],
+        msg="$addToSet should treat Binary values with different subtypes as distinct",
+    ),
+    AccumulatorTestCase(
+        "edge_regex_different_flags",
+        docs=[{"v": Regex("abc", "i")}, {"v": Regex("abc", "m")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Regex("abc", "i"), Regex("abc", "m")]}],
+        msg="$addToSet should treat Regex values with different flags as distinct",
+    ),
+    AccumulatorTestCase(
+        "edge_expression_mixed_types",
+        docs=[{"v": 1}, {"v": "hello"}, {"v": True}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [1, "hello", True]}],
+        msg="$addToSet should collect mixed-type values from expression",
+    ),
+]
+
+# Property [Arity Rejection]: $addToSet in accumulator context is unary and rejects array syntax.
+ADDTOSET_ARITY_ERROR_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "arity_empty_array",
+        docs=[{"v": 1}],
+        pipeline=[{"$group": {"_id": None, "result": {"$addToSet": []}}}],
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$addToSet should reject empty array in accumulator context",
+    ),
+    AccumulatorTestCase(
+        "arity_single_element_literal",
+        docs=[{"v": 1}],
+        pipeline=[{"$group": {"_id": None, "result": {"$addToSet": [1]}}}],
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$addToSet should reject single-element literal array in accumulator context",
+    ),
+    AccumulatorTestCase(
+        "arity_single_field_ref",
+        docs=[{"v": 1}],
+        pipeline=[{"$group": {"_id": None, "result": {"$addToSet": ["$v"]}}}],
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$addToSet should reject single field ref in array in accumulator context",
+    ),
+    AccumulatorTestCase(
+        "arity_multi_element",
+        docs=[{"v": 1}],
+        pipeline=[{"$group": {"_id": None, "result": {"$addToSet": [1, 2, 3]}}}],
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$addToSet should reject multi-element array in accumulator context",
+    ),
+    AccumulatorTestCase(
+        "arity_multi_key_expression",
+        docs=[{"v": 1}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$addToSet": {"$add": [1, 2], "$multiply": [3, 4]}},
+                }
+            }
+        ],
+        error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
+        msg="$addToSet should reject multi-key expression object",
+    ),
+]
+
+# Property [Expression Error Propagation]: errors from sub-expressions propagate.
+ADDTOSET_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "error_toInt_invalid",
+        docs=[{"v": "not_a_number"}],
+        pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$toInt": "$v"}}}}],
+        error_code=CONVERSION_FAILURE_ERROR,
+        msg="$addToSet should propagate $toInt conversion error",
+    ),
+    AccumulatorTestCase(
+        "error_divide_by_zero",
+        docs=[{"v": 10}],
+        pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$divide": ["$v", 0]}}}}],
+        error_code=DIVIDE_BY_ZERO_V2_ERROR,
+        msg="$addToSet should propagate divide-by-zero error",
+    ),
+    AccumulatorTestCase(
+        "error_mod_by_zero",
+        docs=[{"v": 10}],
+        pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$mod": ["$v", 0]}}}}],
+        error_code=MODULO_BY_ZERO_V2_ERROR,
+        msg="$addToSet should propagate mod-by-zero error",
+    ),
+]
+
+# ---------------------------------------------------------------------------
+# Aggregates
+# ---------------------------------------------------------------------------
+
+ADDTOSET_SUCCESS_TESTS = (
+    ADDTOSET_NULL_TESTS
+    + ADDTOSET_MISSING_TESTS
+    + ADDTOSET_NULL_MISSING_COMBINED_TESTS
+    + ADDTOSET_REMOVE_TESTS
+    + ADDTOSET_UNIQUE_TESTS
+    + ADDTOSET_ARRAY_ELEMENT_TESTS
+    + ADDTOSET_DOC_DEDUP_TESTS
+    + ADDTOSET_STRING_DEDUP_TESTS
+    + ADDTOSET_BSON_TYPE_TESTS
+    + ADDTOSET_MIXED_TYPE_TESTS
+    + ADDTOSET_NUMERIC_EQUIV_TESTS
+    + ADDTOSET_TYPE_DISTINCTION_TESTS
+    + ADDTOSET_NAN_TESTS
+    + ADDTOSET_INFINITY_TESTS
+    + ADDTOSET_NEG_ZERO_TESTS
+    + ADDTOSET_DECIMAL128_PRECISION_TESTS
+    + ADDTOSET_EXPRESSION_TESTS
+    + ADDTOSET_GROUPING_TESTS
+    + ADDTOSET_EMPTY_TESTS
+    + ADDTOSET_EDGE_CASE_TESTS
+)
+
+ADDTOSET_ERROR_TESTS = ADDTOSET_ARITY_ERROR_TESTS + ADDTOSET_EXPRESSION_ERROR_TESTS
+
+# ---------------------------------------------------------------------------
+# Primary test functions
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_SUCCESS_TESTS))
+def test_accumulator_addToSet(collection, test_case: AccumulatorTestCase):
+    """Test $addToSet accumulator success cases with $group."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"])
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_ERROR_TESTS))
+def test_accumulator_addToSet_errors(collection, test_case: AccumulatorTestCase):
+    """Test $addToSet accumulator error cases with $group."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertFailureCode(result, test_case.error_code, msg=test_case.msg)
+
+
+# ---------------------------------------------------------------------------
+# Property-specific tests
+# ---------------------------------------------------------------------------
+
+# Property [Return Type]: $addToSet always returns an array type.
+ADDTOSET_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "return_type_numeric",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": [1, 2], "type": "array"}],
+        msg="$addToSet should return array type for numeric inputs",
+    ),
+    AccumulatorTestCase(
+        "return_type_string",
+        docs=[{"v": "a"}, {"v": "b"}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": ["a", "b"], "type": "array"}],
+        msg="$addToSet should return array type for string inputs",
+    ),
+    AccumulatorTestCase(
+        "return_type_null_only",
+        docs=[{"v": None}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": [None], "type": "array"}],
+        msg="$addToSet should return array type for null-only inputs",
+    ),
+    AccumulatorTestCase(
+        "return_type_missing_only",
+        docs=[{"x": 1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": [], "type": "array"}],
+        msg="$addToSet should return array type for all-missing inputs",
+    ),
+]
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_RETURN_TYPE_TESTS))
+def test_accumulator_addToSet_return_type(collection, test_case: AccumulatorTestCase):
+    """Test $addToSet return type verification."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["value"])
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucketAuto_smoke.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucketAuto_smoke.py
new file mode 100644
index 00000000..b63dea44
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucketAuto_smoke.py
@@ -0,0 +1,123 @@
+"""Smoke tests for $addToSet accumulator in $bucketAuto context."""
+
+from __future__ import annotations
+
+import pytest
+
+from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
+    AccumulatorTestCase,
+)
+from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess
+from documentdb_tests.framework.error_codes import (
+    BAD_VALUE_ERROR,
+    GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+)
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+# Property [BucketAuto Smoke]: $addToSet works correctly in $bucketAuto context.
+ADDTOSET_BUCKET_AUTO_SMOKE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "bucketAuto_basic",
+        docs=[{"v": 10}, {"v": 20}, {"v": 30}],
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$addToSet": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": {"min": 0, "max": 0}, "result": [10, 20, 30]}],
+        msg="$addToSet should collect unique values in $bucketAuto context",
+    ),
+    AccumulatorTestCase(
+        "bucketAuto_duplicates",
+        docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}],
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$addToSet": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": {"min": 0, "max": 0}, "result": [10, 20, 30]}],
+        msg="$addToSet should deduplicate values in $bucketAuto context",
+    ),
+    AccumulatorTestCase(
+        "bucketAuto_null_among_values",
+        docs=[{"v": None}, {"v": 5}, {"v": 3}],
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$addToSet": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": {"min": 0, "max": 0}, "result": [None, 5, 3]}],
+        msg="$addToSet should collect null alongside values in $bucketAuto context",
+    ),
+]
+
+# Property [BucketAuto Arity Rejection]: $addToSet rejects array syntax in $bucketAuto context.
+ADDTOSET_BUCKET_AUTO_ERROR_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "bucketAuto_arity_empty_array",
+        docs=[{"v": 1}],
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$addToSet": []}},
+                }
+            }
+        ],
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$addToSet should reject empty array in $bucketAuto context",
+    ),
+    AccumulatorTestCase(
+        "bucketAuto_expression_error",
+        docs=[{"v": 10}],
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$addToSet": {"$divide": ["$v", 0]}}},
+                }
+            }
+        ],
+        error_code=BAD_VALUE_ERROR,
+        msg="$addToSet should propagate divide-by-zero error in $bucketAuto context",
+    ),
+]
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_AUTO_SMOKE_TESTS))
+def test_addToSet_bucketAuto_smoke(collection, test_case: AccumulatorTestCase):
+    """Test $addToSet accumulator in $bucketAuto context."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"])
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_AUTO_ERROR_TESTS))
+def test_addToSet_bucketAuto_smoke_errors(collection, test_case: AccumulatorTestCase):
+    """Test $addToSet error cases in $bucketAuto context."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertFailureCode(result, test_case.error_code, msg=test_case.msg)
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucket_smoke.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucket_smoke.py
new file mode 100644
index 00000000..85fb5e7c
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucket_smoke.py
@@ -0,0 +1,123 @@
+"""Smoke tests for $addToSet accumulator in $bucket context."""
+
+from __future__ import annotations
+
+import pytest
+
+from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
+    AccumulatorTestCase,
+)
+from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess
+from documentdb_tests.framework.error_codes import (
+    DIVIDE_BY_ZERO_V2_ERROR,
+    GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+)
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+# Property [Bucket Smoke]: $addToSet works correctly in $bucket context.
+ADDTOSET_BUCKET_SMOKE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "bucket_basic",
+        docs=[{"v": 10}, {"v": 20}, {"v": 30}],
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$addToSet": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": -1, "result": [10, 20, 30]}],
+        msg="$addToSet should collect unique values in $bucket context",
+    ),
+    AccumulatorTestCase(
+        "bucket_duplicates",
+        docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}],
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$addToSet": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": -1, "result": [10, 20, 30]}],
+        msg="$addToSet should deduplicate values in $bucket context",
+    ),
+    AccumulatorTestCase(
+        "bucket_null_among_values",
+        docs=[{"v": None}, {"v": 5}, {"v": 3}],
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$addToSet": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": -1, "result": [None, 5, 3]}],
+        msg="$addToSet should collect null alongside values in $bucket context",
+    ),
+]
+
+# Property [Bucket Arity Rejection]: $addToSet rejects array syntax in $bucket context.
+ADDTOSET_BUCKET_ERROR_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "bucket_arity_empty_array",
+        docs=[{"v": 1}],
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$addToSet": []}},
+                }
+            }
+        ],
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$addToSet should reject empty array in $bucket context",
+    ),
+    AccumulatorTestCase(
+        "bucket_expression_error",
+        docs=[{"v": 10}],
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$addToSet": {"$divide": ["$v", 0]}}},
+                }
+            }
+        ],
+        error_code=DIVIDE_BY_ZERO_V2_ERROR,
+        msg="$addToSet should propagate divide-by-zero error in $bucket context",
+    ),
+]
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_SMOKE_TESTS))
+def test_addToSet_bucket_smoke(collection, test_case: AccumulatorTestCase):
+    """Test $addToSet accumulator in $bucket context."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"])
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_ERROR_TESTS))
+def test_addToSet_bucket_smoke_errors(collection, test_case: AccumulatorTestCase):
+    """Test $addToSet error cases in $bucket context."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertFailureCode(result, test_case.error_code, msg=test_case.msg)
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_setWindowFields_smoke.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_setWindowFields_smoke.py
new file mode 100644
index 00000000..3d57b5c4
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_setWindowFields_smoke.py
@@ -0,0 +1,165 @@
+"""Smoke tests for $addToSet accumulator in $setWindowFields context."""
+
+from __future__ import annotations
+
+import pytest
+
+from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
+    AccumulatorTestCase,
+)
+from documentdb_tests.framework.assertions import assertSuccess
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+# Property [SetWindowFields Smoke]: $addToSet works correctly in $setWindowFields context.
+ADDTOSET_SET_WINDOW_FIELDS_SMOKE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "swf_unbounded",
+        docs=[
+            {"part": "A", "v": 10},
+            {"part": "A", "v": 20},
+            {"part": "A", "v": 10},
+        ],
+        pipeline=[
+            {
+                "$setWindowFields": {
+                    "partitionBy": "$part",
+                    "sortBy": {"v": 1},
+                    "output": {
+                        "result": {
+                            "$addToSet": "$v",
+                            "window": {"documents": ["unbounded", "unbounded"]},
+                        }
+                    },
+                }
+            },
+            {"$project": {"_id": 0, "v": 1, "result": 1}},
+            {"$sort": {"v": 1}},
+            {"$limit": 1},
+        ],
+        expected=[{"v": 10, "result": [10, 20]}],
+        msg="$addToSet should collect unique values across entire partition with unbounded window",
+    ),
+    AccumulatorTestCase(
+        "swf_cumulative",
+        docs=[
+            {"part": "A", "v": 10},
+            {"part": "A", "v": 20},
+            {"part": "A", "v": 10},
+        ],
+        pipeline=[
+            {
+                "$setWindowFields": {
+                    "partitionBy": "$part",
+                    "sortBy": {"_id": 1},
+                    "output": {
+                        "result": {
+                            "$addToSet": "$v",
+                            "window": {"documents": ["unbounded", "current"]},
+                        }
+                    },
+                }
+            },
+            {"$project": {"_id": 0, "v": 1, "result": 1}},
+        ],
+        expected=[
+            {"v": 10, "result": [10]},
+            {"v": 20, "result": [10, 20]},
+            {"v": 10, "result": [10, 20]},
+        ],
+        msg="$addToSet should compute cumulative unique values with [unbounded, current] window",
+    ),
+    AccumulatorTestCase(
+        "swf_partition_by",
+        docs=[
+            {"part": "A", "v": 1},
+            {"part": "A", "v": 2},
+            {"part": "B", "v": 3},
+            {"part": "B", "v": 3},
+        ],
+        pipeline=[
+            {
+                "$setWindowFields": {
+                    "partitionBy": "$part",
+                    "sortBy": {"v": 1},
+                    "output": {
+                        "result": {
+                            "$addToSet": "$v",
+                            "window": {"documents": ["unbounded", "unbounded"]},
+                        }
+                    },
+                }
+            },
+            {"$project": {"_id": 0, "part": 1, "result": 1}},
+            {"$group": {"_id": "$part", "result": {"$first": "$result"}}},
+            {"$sort": {"_id": 1}},
+        ],
+        expected=[{"_id": "A", "result": [1, 2]}, {"_id": "B", "result": [3]}],
+        msg="$addToSet should compute separate unique sets per partition",
+    ),
+    AccumulatorTestCase(
+        "swf_duplicates",
+        docs=[
+            {"part": "A", "v": 5},
+            {"part": "A", "v": 5},
+            {"part": "A", "v": 10},
+            {"part": "A", "v": 10},
+        ],
+        pipeline=[
+            {
+                "$setWindowFields": {
+                    "partitionBy": "$part",
+                    "sortBy": {"v": 1},
+                    "output": {
+                        "result": {
+                            "$addToSet": "$v",
+                            "window": {"documents": ["unbounded", "unbounded"]},
+                        }
+                    },
+                }
+            },
+            {"$project": {"_id": 0, "v": 1, "result": 1}},
+            {"$limit": 1},
+        ],
+        expected=[{"v": 5, "result": [5, 10]}],
+        msg="$addToSet should deduplicate values within window",
+    ),
+    AccumulatorTestCase(
+        "swf_null_values",
+        docs=[
+            {"part": "A", "v": None},
+            {"part": "A", "v": 5},
+            {"part": "A", "v": None},
+        ],
+        pipeline=[
+            {
+                "$setWindowFields": {
+                    "partitionBy": "$part",
+                    "sortBy": {"_id": 1},
+                    "output": {
+                        "result": {
+                            "$addToSet": "$v",
+                            "window": {"documents": ["unbounded", "unbounded"]},
+                        }
+                    },
+                }
+            },
+            {"$project": {"_id": 0, "v": 1, "result": 1}},
+            {"$limit": 1},
+        ],
+        expected=[{"v": None, "result": [None, 5]}],
+        msg="$addToSet should collect null as a value in $setWindowFields window",
+    ),
+]
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_SET_WINDOW_FIELDS_SMOKE_TESTS))
+def test_addToSet_setWindowFields_smoke(collection, test_case: AccumulatorTestCase):
+    """Test $addToSet accumulator in $setWindowFields context."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"])

From fbaa6e29aa06af8aec3d94717d96a6307a6da612 Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Wed, 20 May 2026 12:33:21 -0700
Subject: [PATCH 02/13] merge files into 1

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../addToSet/test_accumulator_addToSet.py     | 387 +++++++++++++++++-
 .../test_addToSet_bucketAuto_smoke.py         | 123 ------
 .../addToSet/test_addToSet_bucket_smoke.py    | 123 ------
 .../test_addToSet_setWindowFields_smoke.py    | 165 --------
 4 files changed, 385 insertions(+), 413 deletions(-)
 delete mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucketAuto_smoke.py
 delete mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucket_smoke.py
 delete mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_setWindowFields_smoke.py

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
index bcd07809..6d65a6da 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
@@ -1,4 +1,4 @@
-"""Tests for $addToSet accumulator ($group)."""
+"""Tests for $addToSet accumulator ($group, $bucket, $bucketAuto, $setWindowFields)."""
 
 from __future__ import annotations
 
@@ -23,6 +23,7 @@
 )
 from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess
 from documentdb_tests.framework.error_codes import (
+    BAD_VALUE_ERROR,
     CONVERSION_FAILURE_ERROR,
     DIVIDE_BY_ZERO_V2_ERROR,
     EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
@@ -1254,7 +1255,7 @@ def test_accumulator_addToSet(collection, test_case: AccumulatorTestCase):
 
 
 @pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_ERROR_TESTS))
-def test_accumulator_addToSet_errors(collection, test_case: AccumulatorTestCase):
+def test_accumulator_addToSet_errors(collection, test_case):
     """Test $addToSet accumulator error cases with $group."""
     if test_case.docs:
         collection.insert_many(test_case.docs)
@@ -1324,3 +1325,385 @@ def test_accumulator_addToSet_return_type(collection, test_case: AccumulatorTest
         {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
     )
     assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["value"])
+
+
+# ---------------------------------------------------------------------------
+# $bucket smoke tests
+# ---------------------------------------------------------------------------
+
+# Property [Bucket Smoke]: $addToSet works correctly in $bucket context.
+ADDTOSET_BUCKET_SMOKE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "bucket_basic",
+        docs=[{"v": 10}, {"v": 20}, {"v": 30}],
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$addToSet": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": -1, "result": [10, 20, 30]}],
+        msg="$addToSet should collect unique values in $bucket context",
+    ),
+    AccumulatorTestCase(
+        "bucket_duplicates",
+        docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}],
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$addToSet": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": -1, "result": [10, 20, 30]}],
+        msg="$addToSet should deduplicate values in $bucket context",
+    ),
+    AccumulatorTestCase(
+        "bucket_null_among_values",
+        docs=[{"v": None}, {"v": 5}, {"v": 3}],
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$addToSet": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": -1, "result": [None, 5, 3]}],
+        msg="$addToSet should collect null alongside values in $bucket context",
+    ),
+]
+
+# Property [Bucket Arity Rejection]: $addToSet rejects array syntax in $bucket context.
+ADDTOSET_BUCKET_ERROR_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "bucket_arity_empty_array",
+        docs=[{"v": 1}],
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$addToSet": []}},
+                }
+            }
+        ],
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$addToSet should reject empty array in $bucket context",
+    ),
+    AccumulatorTestCase(
+        "bucket_expression_error",
+        docs=[{"v": 10}],
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$addToSet": {"$divide": ["$v", 0]}}},
+                }
+            }
+        ],
+        error_code=DIVIDE_BY_ZERO_V2_ERROR,
+        msg="$addToSet should propagate divide-by-zero error in $bucket context",
+    ),
+]
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_SMOKE_TESTS))
+def test_addToSet_bucket_smoke(collection, test_case: AccumulatorTestCase):
+    """Test $addToSet accumulator in $bucket context."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"])
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_ERROR_TESTS))
+def test_addToSet_bucket_smoke_errors(collection, test_case):
+    """Test $addToSet error cases in $bucket context."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertFailureCode(result, test_case.error_code, msg=test_case.msg)
+
+
+# ---------------------------------------------------------------------------
+# $bucketAuto smoke tests
+# ---------------------------------------------------------------------------
+
+# Property [BucketAuto Smoke]: $addToSet works correctly in $bucketAuto context.
+ADDTOSET_BUCKET_AUTO_SMOKE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "bucketAuto_basic",
+        docs=[{"v": 10}, {"v": 20}, {"v": 30}],
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$addToSet": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": {"min": 0, "max": 0}, "result": [10, 20, 30]}],
+        msg="$addToSet should collect unique values in $bucketAuto context",
+    ),
+    AccumulatorTestCase(
+        "bucketAuto_duplicates",
+        docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}],
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$addToSet": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": {"min": 0, "max": 0}, "result": [10, 20, 30]}],
+        msg="$addToSet should deduplicate values in $bucketAuto context",
+    ),
+    AccumulatorTestCase(
+        "bucketAuto_null_among_values",
+        docs=[{"v": None}, {"v": 5}, {"v": 3}],
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$addToSet": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": {"min": 0, "max": 0}, "result": [None, 5, 3]}],
+        msg="$addToSet should collect null alongside values in $bucketAuto context",
+    ),
+]
+
+# Property [BucketAuto Arity Rejection]: $addToSet rejects array syntax in $bucketAuto context.
+ADDTOSET_BUCKET_AUTO_ERROR_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "bucketAuto_arity_empty_array",
+        docs=[{"v": 1}],
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$addToSet": []}},
+                }
+            }
+        ],
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$addToSet should reject empty array in $bucketAuto context",
+    ),
+    AccumulatorTestCase(
+        "bucketAuto_expression_error",
+        docs=[{"v": 10}],
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$addToSet": {"$divide": ["$v", 0]}}},
+                }
+            }
+        ],
+        error_code=BAD_VALUE_ERROR,
+        msg="$addToSet should propagate divide-by-zero error in $bucketAuto context",
+    ),
+]
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_AUTO_SMOKE_TESTS))
+def test_addToSet_bucketAuto_smoke(collection, test_case: AccumulatorTestCase):
+    """Test $addToSet accumulator in $bucketAuto context."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"])
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_AUTO_ERROR_TESTS))
+def test_addToSet_bucketAuto_smoke_errors(collection, test_case):
+    """Test $addToSet error cases in $bucketAuto context."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertFailureCode(result, test_case.error_code, msg=test_case.msg)
+
+
+# ---------------------------------------------------------------------------
+# $setWindowFields smoke tests
+# ---------------------------------------------------------------------------
+
+# Property [SetWindowFields Smoke]: $addToSet works correctly in $setWindowFields context.
+ADDTOSET_SET_WINDOW_FIELDS_SMOKE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "swf_unbounded",
+        docs=[
+            {"part": "A", "v": 10},
+            {"part": "A", "v": 20},
+            {"part": "A", "v": 10},
+        ],
+        pipeline=[
+            {
+                "$setWindowFields": {
+                    "partitionBy": "$part",
+                    "sortBy": {"v": 1},
+                    "output": {
+                        "result": {
+                            "$addToSet": "$v",
+                            "window": {"documents": ["unbounded", "unbounded"]},
+                        }
+                    },
+                }
+            },
+            {"$project": {"_id": 0, "v": 1, "result": 1}},
+            {"$sort": {"v": 1}},
+            {"$limit": 1},
+        ],
+        expected=[{"v": 10, "result": [10, 20]}],
+        msg="$addToSet should collect unique values across entire partition with unbounded window",
+    ),
+    AccumulatorTestCase(
+        "swf_cumulative",
+        docs=[
+            {"part": "A", "v": 10},
+            {"part": "A", "v": 20},
+            {"part": "A", "v": 10},
+        ],
+        pipeline=[
+            {
+                "$setWindowFields": {
+                    "partitionBy": "$part",
+                    "sortBy": {"_id": 1},
+                    "output": {
+                        "result": {
+                            "$addToSet": "$v",
+                            "window": {"documents": ["unbounded", "current"]},
+                        }
+                    },
+                }
+            },
+            {"$project": {"_id": 0, "v": 1, "result": 1}},
+        ],
+        expected=[
+            {"v": 10, "result": [10]},
+            {"v": 20, "result": [10, 20]},
+            {"v": 10, "result": [10, 20]},
+        ],
+        msg="$addToSet should compute cumulative unique values with [unbounded, current] window",
+    ),
+    AccumulatorTestCase(
+        "swf_partition_by",
+        docs=[
+            {"part": "A", "v": 1},
+            {"part": "A", "v": 2},
+            {"part": "B", "v": 3},
+            {"part": "B", "v": 3},
+        ],
+        pipeline=[
+            {
+                "$setWindowFields": {
+                    "partitionBy": "$part",
+                    "sortBy": {"v": 1},
+                    "output": {
+                        "result": {
+                            "$addToSet": "$v",
+                            "window": {"documents": ["unbounded", "unbounded"]},
+                        }
+                    },
+                }
+            },
+            {"$project": {"_id": 0, "part": 1, "result": 1}},
+            {"$group": {"_id": "$part", "result": {"$first": "$result"}}},
+            {"$sort": {"_id": 1}},
+        ],
+        expected=[{"_id": "A", "result": [1, 2]}, {"_id": "B", "result": [3]}],
+        msg="$addToSet should compute separate unique sets per partition",
+    ),
+    AccumulatorTestCase(
+        "swf_duplicates",
+        docs=[
+            {"part": "A", "v": 5},
+            {"part": "A", "v": 5},
+            {"part": "A", "v": 10},
+            {"part": "A", "v": 10},
+        ],
+        pipeline=[
+            {
+                "$setWindowFields": {
+                    "partitionBy": "$part",
+                    "sortBy": {"v": 1},
+                    "output": {
+                        "result": {
+                            "$addToSet": "$v",
+                            "window": {"documents": ["unbounded", "unbounded"]},
+                        }
+                    },
+                }
+            },
+            {"$project": {"_id": 0, "v": 1, "result": 1}},
+            {"$limit": 1},
+        ],
+        expected=[{"v": 5, "result": [5, 10]}],
+        msg="$addToSet should deduplicate values within window",
+    ),
+    AccumulatorTestCase(
+        "swf_null_values",
+        docs=[
+            {"part": "A", "v": None},
+            {"part": "A", "v": 5},
+            {"part": "A", "v": None},
+        ],
+        pipeline=[
+            {
+                "$setWindowFields": {
+                    "partitionBy": "$part",
+                    "sortBy": {"_id": 1},
+                    "output": {
+                        "result": {
+                            "$addToSet": "$v",
+                            "window": {"documents": ["unbounded", "unbounded"]},
+                        }
+                    },
+                }
+            },
+            {"$project": {"_id": 0, "v": 1, "result": 1}},
+            {"$limit": 1},
+        ],
+        expected=[{"v": None, "result": [None, 5]}],
+        msg="$addToSet should collect null as a value in $setWindowFields window",
+    ),
+]
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_SET_WINDOW_FIELDS_SMOKE_TESTS))
+def test_addToSet_setWindowFields_smoke(collection, test_case: AccumulatorTestCase):
+    """Test $addToSet accumulator in $setWindowFields context."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"])
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucketAuto_smoke.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucketAuto_smoke.py
deleted file mode 100644
index b63dea44..00000000
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucketAuto_smoke.py
+++ /dev/null
@@ -1,123 +0,0 @@
-"""Smoke tests for $addToSet accumulator in $bucketAuto context."""
-
-from __future__ import annotations
-
-import pytest
-
-from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
-    AccumulatorTestCase,
-)
-from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess
-from documentdb_tests.framework.error_codes import (
-    BAD_VALUE_ERROR,
-    GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-)
-from documentdb_tests.framework.executor import execute_command
-from documentdb_tests.framework.parametrize import pytest_params
-
-# Property [BucketAuto Smoke]: $addToSet works correctly in $bucketAuto context.
-ADDTOSET_BUCKET_AUTO_SMOKE_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "bucketAuto_basic",
-        docs=[{"v": 10}, {"v": 20}, {"v": 30}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$addToSet": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": {"min": 0, "max": 0}, "result": [10, 20, 30]}],
-        msg="$addToSet should collect unique values in $bucketAuto context",
-    ),
-    AccumulatorTestCase(
-        "bucketAuto_duplicates",
-        docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$addToSet": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": {"min": 0, "max": 0}, "result": [10, 20, 30]}],
-        msg="$addToSet should deduplicate values in $bucketAuto context",
-    ),
-    AccumulatorTestCase(
-        "bucketAuto_null_among_values",
-        docs=[{"v": None}, {"v": 5}, {"v": 3}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$addToSet": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": {"min": 0, "max": 0}, "result": [None, 5, 3]}],
-        msg="$addToSet should collect null alongside values in $bucketAuto context",
-    ),
-]
-
-# Property [BucketAuto Arity Rejection]: $addToSet rejects array syntax in $bucketAuto context.
-ADDTOSET_BUCKET_AUTO_ERROR_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "bucketAuto_arity_empty_array",
-        docs=[{"v": 1}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$addToSet": []}},
-                }
-            }
-        ],
-        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-        msg="$addToSet should reject empty array in $bucketAuto context",
-    ),
-    AccumulatorTestCase(
-        "bucketAuto_expression_error",
-        docs=[{"v": 10}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$addToSet": {"$divide": ["$v", 0]}}},
-                }
-            }
-        ],
-        error_code=BAD_VALUE_ERROR,
-        msg="$addToSet should propagate divide-by-zero error in $bucketAuto context",
-    ),
-]
-
-
-@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_AUTO_SMOKE_TESTS))
-def test_addToSet_bucketAuto_smoke(collection, test_case: AccumulatorTestCase):
-    """Test $addToSet accumulator in $bucketAuto context."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"])
-
-
-@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_AUTO_ERROR_TESTS))
-def test_addToSet_bucketAuto_smoke_errors(collection, test_case: AccumulatorTestCase):
-    """Test $addToSet error cases in $bucketAuto context."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertFailureCode(result, test_case.error_code, msg=test_case.msg)
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucket_smoke.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucket_smoke.py
deleted file mode 100644
index 85fb5e7c..00000000
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_bucket_smoke.py
+++ /dev/null
@@ -1,123 +0,0 @@
-"""Smoke tests for $addToSet accumulator in $bucket context."""
-
-from __future__ import annotations
-
-import pytest
-
-from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
-    AccumulatorTestCase,
-)
-from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess
-from documentdb_tests.framework.error_codes import (
-    DIVIDE_BY_ZERO_V2_ERROR,
-    GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-)
-from documentdb_tests.framework.executor import execute_command
-from documentdb_tests.framework.parametrize import pytest_params
-
-# Property [Bucket Smoke]: $addToSet works correctly in $bucket context.
-ADDTOSET_BUCKET_SMOKE_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "bucket_basic",
-        docs=[{"v": 10}, {"v": 20}, {"v": 30}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$addToSet": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": -1, "result": [10, 20, 30]}],
-        msg="$addToSet should collect unique values in $bucket context",
-    ),
-    AccumulatorTestCase(
-        "bucket_duplicates",
-        docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$addToSet": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": -1, "result": [10, 20, 30]}],
-        msg="$addToSet should deduplicate values in $bucket context",
-    ),
-    AccumulatorTestCase(
-        "bucket_null_among_values",
-        docs=[{"v": None}, {"v": 5}, {"v": 3}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$addToSet": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": -1, "result": [None, 5, 3]}],
-        msg="$addToSet should collect null alongside values in $bucket context",
-    ),
-]
-
-# Property [Bucket Arity Rejection]: $addToSet rejects array syntax in $bucket context.
-ADDTOSET_BUCKET_ERROR_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "bucket_arity_empty_array",
-        docs=[{"v": 1}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$addToSet": []}},
-                }
-            }
-        ],
-        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-        msg="$addToSet should reject empty array in $bucket context",
-    ),
-    AccumulatorTestCase(
-        "bucket_expression_error",
-        docs=[{"v": 10}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$addToSet": {"$divide": ["$v", 0]}}},
-                }
-            }
-        ],
-        error_code=DIVIDE_BY_ZERO_V2_ERROR,
-        msg="$addToSet should propagate divide-by-zero error in $bucket context",
-    ),
-]
-
-
-@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_SMOKE_TESTS))
-def test_addToSet_bucket_smoke(collection, test_case: AccumulatorTestCase):
-    """Test $addToSet accumulator in $bucket context."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"])
-
-
-@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_ERROR_TESTS))
-def test_addToSet_bucket_smoke_errors(collection, test_case: AccumulatorTestCase):
-    """Test $addToSet error cases in $bucket context."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertFailureCode(result, test_case.error_code, msg=test_case.msg)
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_setWindowFields_smoke.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_setWindowFields_smoke.py
deleted file mode 100644
index 3d57b5c4..00000000
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_addToSet_setWindowFields_smoke.py
+++ /dev/null
@@ -1,165 +0,0 @@
-"""Smoke tests for $addToSet accumulator in $setWindowFields context."""
-
-from __future__ import annotations
-
-import pytest
-
-from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
-    AccumulatorTestCase,
-)
-from documentdb_tests.framework.assertions import assertSuccess
-from documentdb_tests.framework.executor import execute_command
-from documentdb_tests.framework.parametrize import pytest_params
-
-# Property [SetWindowFields Smoke]: $addToSet works correctly in $setWindowFields context.
-ADDTOSET_SET_WINDOW_FIELDS_SMOKE_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "swf_unbounded",
-        docs=[
-            {"part": "A", "v": 10},
-            {"part": "A", "v": 20},
-            {"part": "A", "v": 10},
-        ],
-        pipeline=[
-            {
-                "$setWindowFields": {
-                    "partitionBy": "$part",
-                    "sortBy": {"v": 1},
-                    "output": {
-                        "result": {
-                            "$addToSet": "$v",
-                            "window": {"documents": ["unbounded", "unbounded"]},
-                        }
-                    },
-                }
-            },
-            {"$project": {"_id": 0, "v": 1, "result": 1}},
-            {"$sort": {"v": 1}},
-            {"$limit": 1},
-        ],
-        expected=[{"v": 10, "result": [10, 20]}],
-        msg="$addToSet should collect unique values across entire partition with unbounded window",
-    ),
-    AccumulatorTestCase(
-        "swf_cumulative",
-        docs=[
-            {"part": "A", "v": 10},
-            {"part": "A", "v": 20},
-            {"part": "A", "v": 10},
-        ],
-        pipeline=[
-            {
-                "$setWindowFields": {
-                    "partitionBy": "$part",
-                    "sortBy": {"_id": 1},
-                    "output": {
-                        "result": {
-                            "$addToSet": "$v",
-                            "window": {"documents": ["unbounded", "current"]},
-                        }
-                    },
-                }
-            },
-            {"$project": {"_id": 0, "v": 1, "result": 1}},
-        ],
-        expected=[
-            {"v": 10, "result": [10]},
-            {"v": 20, "result": [10, 20]},
-            {"v": 10, "result": [10, 20]},
-        ],
-        msg="$addToSet should compute cumulative unique values with [unbounded, current] window",
-    ),
-    AccumulatorTestCase(
-        "swf_partition_by",
-        docs=[
-            {"part": "A", "v": 1},
-            {"part": "A", "v": 2},
-            {"part": "B", "v": 3},
-            {"part": "B", "v": 3},
-        ],
-        pipeline=[
-            {
-                "$setWindowFields": {
-                    "partitionBy": "$part",
-                    "sortBy": {"v": 1},
-                    "output": {
-                        "result": {
-                            "$addToSet": "$v",
-                            "window": {"documents": ["unbounded", "unbounded"]},
-                        }
-                    },
-                }
-            },
-            {"$project": {"_id": 0, "part": 1, "result": 1}},
-            {"$group": {"_id": "$part", "result": {"$first": "$result"}}},
-            {"$sort": {"_id": 1}},
-        ],
-        expected=[{"_id": "A", "result": [1, 2]}, {"_id": "B", "result": [3]}],
-        msg="$addToSet should compute separate unique sets per partition",
-    ),
-    AccumulatorTestCase(
-        "swf_duplicates",
-        docs=[
-            {"part": "A", "v": 5},
-            {"part": "A", "v": 5},
-            {"part": "A", "v": 10},
-            {"part": "A", "v": 10},
-        ],
-        pipeline=[
-            {
-                "$setWindowFields": {
-                    "partitionBy": "$part",
-                    "sortBy": {"v": 1},
-                    "output": {
-                        "result": {
-                            "$addToSet": "$v",
-                            "window": {"documents": ["unbounded", "unbounded"]},
-                        }
-                    },
-                }
-            },
-            {"$project": {"_id": 0, "v": 1, "result": 1}},
-            {"$limit": 1},
-        ],
-        expected=[{"v": 5, "result": [5, 10]}],
-        msg="$addToSet should deduplicate values within window",
-    ),
-    AccumulatorTestCase(
-        "swf_null_values",
-        docs=[
-            {"part": "A", "v": None},
-            {"part": "A", "v": 5},
-            {"part": "A", "v": None},
-        ],
-        pipeline=[
-            {
-                "$setWindowFields": {
-                    "partitionBy": "$part",
-                    "sortBy": {"_id": 1},
-                    "output": {
-                        "result": {
-                            "$addToSet": "$v",
-                            "window": {"documents": ["unbounded", "unbounded"]},
-                        }
-                    },
-                }
-            },
-            {"$project": {"_id": 0, "v": 1, "result": 1}},
-            {"$limit": 1},
-        ],
-        expected=[{"v": None, "result": [None, 5]}],
-        msg="$addToSet should collect null as a value in $setWindowFields window",
-    ),
-]
-
-
-@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_SET_WINDOW_FIELDS_SMOKE_TESTS))
-def test_addToSet_setWindowFields_smoke(collection, test_case: AccumulatorTestCase):
-    """Test $addToSet accumulator in $setWindowFields context."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"])

From 804c522bb450eab6b6441c5b1eac58b2e13cccb8 Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Mon, 25 May 2026 12:01:04 -0700
Subject: [PATCH 03/13] add init.py

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../tests/core/operator/accumulators/addToSet/__init__.py         | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/__init__.py

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/__init__.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/__init__.py
new file mode 100644
index 00000000..e69de29b

From ec3968e2c7e1ef0c7313cae0900dedb2d8e9aee0 Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Mon, 25 May 2026 12:01:36 -0700
Subject: [PATCH 04/13] rename smoke tests

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 ...accumulator_addToSet.py => test_accumulator_addToSet_smoke.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/{test_smoke_accumulator_addToSet.py => test_accumulator_addToSet_smoke.py} (100%)

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_smoke_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_smoke.py
similarity index 100%
rename from documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_smoke_accumulator_addToSet.py
rename to documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_smoke.py

From c3bb55dc19a36a8eb1e1aa2311b0bc41402f1393 Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Mon, 25 May 2026 12:04:32 -0700
Subject: [PATCH 05/13] remove stage tests

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../addToSet/test_accumulator_addToSet.py     | 435 +-----------------
 1 file changed, 2 insertions(+), 433 deletions(-)

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
index 6d65a6da..55b9eee8 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
@@ -1,4 +1,4 @@
-"""Tests for $addToSet accumulator ($group, $bucket, $bucketAuto, $setWindowFields)."""
+"""Tests for $addToSet accumulator ($group)."""
 
 from __future__ import annotations
 
@@ -23,11 +23,8 @@
 )
 from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess
 from documentdb_tests.framework.error_codes import (
-    BAD_VALUE_ERROR,
     CONVERSION_FAILURE_ERROR,
     DIVIDE_BY_ZERO_V2_ERROR,
-    EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
-    GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
     MODULO_BY_ZERO_V2_ERROR,
 )
 from documentdb_tests.framework.executor import execute_command
@@ -1137,52 +1134,6 @@
     ),
 ]
 
-# Property [Arity Rejection]: $addToSet in accumulator context is unary and rejects array syntax.
-ADDTOSET_ARITY_ERROR_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "arity_empty_array",
-        docs=[{"v": 1}],
-        pipeline=[{"$group": {"_id": None, "result": {"$addToSet": []}}}],
-        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-        msg="$addToSet should reject empty array in accumulator context",
-    ),
-    AccumulatorTestCase(
-        "arity_single_element_literal",
-        docs=[{"v": 1}],
-        pipeline=[{"$group": {"_id": None, "result": {"$addToSet": [1]}}}],
-        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-        msg="$addToSet should reject single-element literal array in accumulator context",
-    ),
-    AccumulatorTestCase(
-        "arity_single_field_ref",
-        docs=[{"v": 1}],
-        pipeline=[{"$group": {"_id": None, "result": {"$addToSet": ["$v"]}}}],
-        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-        msg="$addToSet should reject single field ref in array in accumulator context",
-    ),
-    AccumulatorTestCase(
-        "arity_multi_element",
-        docs=[{"v": 1}],
-        pipeline=[{"$group": {"_id": None, "result": {"$addToSet": [1, 2, 3]}}}],
-        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-        msg="$addToSet should reject multi-element array in accumulator context",
-    ),
-    AccumulatorTestCase(
-        "arity_multi_key_expression",
-        docs=[{"v": 1}],
-        pipeline=[
-            {
-                "$group": {
-                    "_id": None,
-                    "result": {"$addToSet": {"$add": [1, 2], "$multiply": [3, 4]}},
-                }
-            }
-        ],
-        error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
-        msg="$addToSet should reject multi-key expression object",
-    ),
-]
-
 # Property [Expression Error Propagation]: errors from sub-expressions propagate.
 ADDTOSET_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
@@ -1235,7 +1186,7 @@
     + ADDTOSET_EDGE_CASE_TESTS
 )
 
-ADDTOSET_ERROR_TESTS = ADDTOSET_ARITY_ERROR_TESTS + ADDTOSET_EXPRESSION_ERROR_TESTS
+ADDTOSET_ERROR_TESTS = ADDTOSET_EXPRESSION_ERROR_TESTS
 
 # ---------------------------------------------------------------------------
 # Primary test functions
@@ -1325,385 +1276,3 @@ def test_accumulator_addToSet_return_type(collection, test_case: AccumulatorTest
         {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
     )
     assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["value"])
-
-
-# ---------------------------------------------------------------------------
-# $bucket smoke tests
-# ---------------------------------------------------------------------------
-
-# Property [Bucket Smoke]: $addToSet works correctly in $bucket context.
-ADDTOSET_BUCKET_SMOKE_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "bucket_basic",
-        docs=[{"v": 10}, {"v": 20}, {"v": 30}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$addToSet": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": -1, "result": [10, 20, 30]}],
-        msg="$addToSet should collect unique values in $bucket context",
-    ),
-    AccumulatorTestCase(
-        "bucket_duplicates",
-        docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$addToSet": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": -1, "result": [10, 20, 30]}],
-        msg="$addToSet should deduplicate values in $bucket context",
-    ),
-    AccumulatorTestCase(
-        "bucket_null_among_values",
-        docs=[{"v": None}, {"v": 5}, {"v": 3}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$addToSet": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": -1, "result": [None, 5, 3]}],
-        msg="$addToSet should collect null alongside values in $bucket context",
-    ),
-]
-
-# Property [Bucket Arity Rejection]: $addToSet rejects array syntax in $bucket context.
-ADDTOSET_BUCKET_ERROR_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "bucket_arity_empty_array",
-        docs=[{"v": 1}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$addToSet": []}},
-                }
-            }
-        ],
-        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-        msg="$addToSet should reject empty array in $bucket context",
-    ),
-    AccumulatorTestCase(
-        "bucket_expression_error",
-        docs=[{"v": 10}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$addToSet": {"$divide": ["$v", 0]}}},
-                }
-            }
-        ],
-        error_code=DIVIDE_BY_ZERO_V2_ERROR,
-        msg="$addToSet should propagate divide-by-zero error in $bucket context",
-    ),
-]
-
-
-@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_SMOKE_TESTS))
-def test_addToSet_bucket_smoke(collection, test_case: AccumulatorTestCase):
-    """Test $addToSet accumulator in $bucket context."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"])
-
-
-@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_ERROR_TESTS))
-def test_addToSet_bucket_smoke_errors(collection, test_case):
-    """Test $addToSet error cases in $bucket context."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertFailureCode(result, test_case.error_code, msg=test_case.msg)
-
-
-# ---------------------------------------------------------------------------
-# $bucketAuto smoke tests
-# ---------------------------------------------------------------------------
-
-# Property [BucketAuto Smoke]: $addToSet works correctly in $bucketAuto context.
-ADDTOSET_BUCKET_AUTO_SMOKE_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "bucketAuto_basic",
-        docs=[{"v": 10}, {"v": 20}, {"v": 30}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$addToSet": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": {"min": 0, "max": 0}, "result": [10, 20, 30]}],
-        msg="$addToSet should collect unique values in $bucketAuto context",
-    ),
-    AccumulatorTestCase(
-        "bucketAuto_duplicates",
-        docs=[{"v": 10}, {"v": 20}, {"v": 10}, {"v": 30}, {"v": 20}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$addToSet": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": {"min": 0, "max": 0}, "result": [10, 20, 30]}],
-        msg="$addToSet should deduplicate values in $bucketAuto context",
-    ),
-    AccumulatorTestCase(
-        "bucketAuto_null_among_values",
-        docs=[{"v": None}, {"v": 5}, {"v": 3}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$addToSet": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": {"min": 0, "max": 0}, "result": [None, 5, 3]}],
-        msg="$addToSet should collect null alongside values in $bucketAuto context",
-    ),
-]
-
-# Property [BucketAuto Arity Rejection]: $addToSet rejects array syntax in $bucketAuto context.
-ADDTOSET_BUCKET_AUTO_ERROR_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "bucketAuto_arity_empty_array",
-        docs=[{"v": 1}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$addToSet": []}},
-                }
-            }
-        ],
-        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-        msg="$addToSet should reject empty array in $bucketAuto context",
-    ),
-    AccumulatorTestCase(
-        "bucketAuto_expression_error",
-        docs=[{"v": 10}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$addToSet": {"$divide": ["$v", 0]}}},
-                }
-            }
-        ],
-        error_code=BAD_VALUE_ERROR,
-        msg="$addToSet should propagate divide-by-zero error in $bucketAuto context",
-    ),
-]
-
-
-@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_AUTO_SMOKE_TESTS))
-def test_addToSet_bucketAuto_smoke(collection, test_case: AccumulatorTestCase):
-    """Test $addToSet accumulator in $bucketAuto context."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"])
-
-
-@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BUCKET_AUTO_ERROR_TESTS))
-def test_addToSet_bucketAuto_smoke_errors(collection, test_case):
-    """Test $addToSet error cases in $bucketAuto context."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertFailureCode(result, test_case.error_code, msg=test_case.msg)
-
-
-# ---------------------------------------------------------------------------
-# $setWindowFields smoke tests
-# ---------------------------------------------------------------------------
-
-# Property [SetWindowFields Smoke]: $addToSet works correctly in $setWindowFields context.
-ADDTOSET_SET_WINDOW_FIELDS_SMOKE_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "swf_unbounded",
-        docs=[
-            {"part": "A", "v": 10},
-            {"part": "A", "v": 20},
-            {"part": "A", "v": 10},
-        ],
-        pipeline=[
-            {
-                "$setWindowFields": {
-                    "partitionBy": "$part",
-                    "sortBy": {"v": 1},
-                    "output": {
-                        "result": {
-                            "$addToSet": "$v",
-                            "window": {"documents": ["unbounded", "unbounded"]},
-                        }
-                    },
-                }
-            },
-            {"$project": {"_id": 0, "v": 1, "result": 1}},
-            {"$sort": {"v": 1}},
-            {"$limit": 1},
-        ],
-        expected=[{"v": 10, "result": [10, 20]}],
-        msg="$addToSet should collect unique values across entire partition with unbounded window",
-    ),
-    AccumulatorTestCase(
-        "swf_cumulative",
-        docs=[
-            {"part": "A", "v": 10},
-            {"part": "A", "v": 20},
-            {"part": "A", "v": 10},
-        ],
-        pipeline=[
-            {
-                "$setWindowFields": {
-                    "partitionBy": "$part",
-                    "sortBy": {"_id": 1},
-                    "output": {
-                        "result": {
-                            "$addToSet": "$v",
-                            "window": {"documents": ["unbounded", "current"]},
-                        }
-                    },
-                }
-            },
-            {"$project": {"_id": 0, "v": 1, "result": 1}},
-        ],
-        expected=[
-            {"v": 10, "result": [10]},
-            {"v": 20, "result": [10, 20]},
-            {"v": 10, "result": [10, 20]},
-        ],
-        msg="$addToSet should compute cumulative unique values with [unbounded, current] window",
-    ),
-    AccumulatorTestCase(
-        "swf_partition_by",
-        docs=[
-            {"part": "A", "v": 1},
-            {"part": "A", "v": 2},
-            {"part": "B", "v": 3},
-            {"part": "B", "v": 3},
-        ],
-        pipeline=[
-            {
-                "$setWindowFields": {
-                    "partitionBy": "$part",
-                    "sortBy": {"v": 1},
-                    "output": {
-                        "result": {
-                            "$addToSet": "$v",
-                            "window": {"documents": ["unbounded", "unbounded"]},
-                        }
-                    },
-                }
-            },
-            {"$project": {"_id": 0, "part": 1, "result": 1}},
-            {"$group": {"_id": "$part", "result": {"$first": "$result"}}},
-            {"$sort": {"_id": 1}},
-        ],
-        expected=[{"_id": "A", "result": [1, 2]}, {"_id": "B", "result": [3]}],
-        msg="$addToSet should compute separate unique sets per partition",
-    ),
-    AccumulatorTestCase(
-        "swf_duplicates",
-        docs=[
-            {"part": "A", "v": 5},
-            {"part": "A", "v": 5},
-            {"part": "A", "v": 10},
-            {"part": "A", "v": 10},
-        ],
-        pipeline=[
-            {
-                "$setWindowFields": {
-                    "partitionBy": "$part",
-                    "sortBy": {"v": 1},
-                    "output": {
-                        "result": {
-                            "$addToSet": "$v",
-                            "window": {"documents": ["unbounded", "unbounded"]},
-                        }
-                    },
-                }
-            },
-            {"$project": {"_id": 0, "v": 1, "result": 1}},
-            {"$limit": 1},
-        ],
-        expected=[{"v": 5, "result": [5, 10]}],
-        msg="$addToSet should deduplicate values within window",
-    ),
-    AccumulatorTestCase(
-        "swf_null_values",
-        docs=[
-            {"part": "A", "v": None},
-            {"part": "A", "v": 5},
-            {"part": "A", "v": None},
-        ],
-        pipeline=[
-            {
-                "$setWindowFields": {
-                    "partitionBy": "$part",
-                    "sortBy": {"_id": 1},
-                    "output": {
-                        "result": {
-                            "$addToSet": "$v",
-                            "window": {"documents": ["unbounded", "unbounded"]},
-                        }
-                    },
-                }
-            },
-            {"$project": {"_id": 0, "v": 1, "result": 1}},
-            {"$limit": 1},
-        ],
-        expected=[{"v": None, "result": [None, 5]}],
-        msg="$addToSet should collect null as a value in $setWindowFields window",
-    ),
-]
-
-
-@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_SET_WINDOW_FIELDS_SMOKE_TESTS))
-def test_addToSet_setWindowFields_smoke(collection, test_case: AccumulatorTestCase):
-    """Test $addToSet accumulator in $setWindowFields context."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"])

From 01935684bfe07cfa968ee61041b4b511e24fdb8a Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Mon, 25 May 2026 12:21:42 -0700
Subject: [PATCH 06/13] inline test functions

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../addToSet/test_accumulator_addToSet.py     | 35 ++++++++++++++-----
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
index 55b9eee8..05208c0c 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
@@ -30,11 +30,6 @@
 from documentdb_tests.framework.executor import execute_command
 from documentdb_tests.framework.parametrize import pytest_params
 
-_OID1 = ObjectId("000000000000000000000001")
-_OID2 = ObjectId("000000000000000000000002")
-_DT1 = datetime(2020, 1, 1, tzinfo=timezone.utc)
-_DT2 = datetime(2021, 1, 1, tzinfo=timezone.utc)
-
 # ---------------------------------------------------------------------------
 # Property lists
 # ---------------------------------------------------------------------------
@@ -514,22 +509,44 @@
     ),
     AccumulatorTestCase(
         "bson_datetime",
-        docs=[{"v": _DT1}, {"v": _DT2}, {"v": _DT1}],
+        docs=[
+            {"v": datetime(2020, 1, 1, tzinfo=timezone.utc)},
+            {"v": datetime(2021, 1, 1, tzinfo=timezone.utc)},
+            {"v": datetime(2020, 1, 1, tzinfo=timezone.utc)},
+        ],
         pipeline=[
             {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
             {"$project": {"_id": 0, "result": 1}},
         ],
-        expected=[{"result": [_DT1, _DT2]}],
+        expected=[
+            {
+                "result": [
+                    datetime(2020, 1, 1, tzinfo=timezone.utc),
+                    datetime(2021, 1, 1, tzinfo=timezone.utc),
+                ]
+            }
+        ],
         msg="$addToSet should collect and deduplicate datetime values",
     ),
     AccumulatorTestCase(
         "bson_objectid",
-        docs=[{"v": _OID1}, {"v": _OID2}, {"v": _OID1}],
+        docs=[
+            {"v": ObjectId("000000000000000000000001")},
+            {"v": ObjectId("000000000000000000000002")},
+            {"v": ObjectId("000000000000000000000001")},
+        ],
         pipeline=[
             {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
             {"$project": {"_id": 0, "result": 1}},
         ],
-        expected=[{"result": [_OID1, _OID2]}],
+        expected=[
+            {
+                "result": [
+                    ObjectId("000000000000000000000001"),
+                    ObjectId("000000000000000000000002"),
+                ]
+            }
+        ],
         msg="$addToSet should collect and deduplicate ObjectId values",
     ),
     AccumulatorTestCase(

From 74cf984d73d3f4558a275da6f4d52aa66df0825c Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Mon, 25 May 2026 14:43:08 -0700
Subject: [PATCH 07/13] split into files

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../addToSet/test_accumulator_addToSet.py     | 841 +-----------------
 .../test_accumulator_addToSet_bson_types.py   | 254 ++++++
 .../test_accumulator_addToSet_dedup.py        | 499 +++++++++++
 .../test_accumulator_addToSet_errors.py       |  62 ++
 .../test_accumulator_addToSet_null_missing.py | 142 +++
 5 files changed, 963 insertions(+), 835 deletions(-)
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
index 05208c0c..4d569dd5 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
@@ -1,32 +1,14 @@
-"""Tests for $addToSet accumulator ($group)."""
+"""Tests for $addToSet accumulator core behavior ($group)."""
 
 from __future__ import annotations
 
-import math
-from datetime import datetime, timezone
-
 import pytest
-from bson import (
-    Binary,
-    Code,
-    Decimal128,
-    Int64,
-    MaxKey,
-    MinKey,
-    ObjectId,
-    Regex,
-    Timestamp,
-)
+from bson import Binary, Regex
 
 from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
     AccumulatorTestCase,
 )
-from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess
-from documentdb_tests.framework.error_codes import (
-    CONVERSION_FAILURE_ERROR,
-    DIVIDE_BY_ZERO_V2_ERROR,
-    MODULO_BY_ZERO_V2_ERROR,
-)
+from documentdb_tests.framework.assertions import assertSuccess
 from documentdb_tests.framework.executor import execute_command
 from documentdb_tests.framework.parametrize import pytest_params
 
@@ -34,108 +16,6 @@
 # Property lists
 # ---------------------------------------------------------------------------
 
-# Property [Null Collected]: null values are collected as valid values and deduplicated.
-ADDTOSET_NULL_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "null_all",
-        docs=[{"v": None}, {"v": None}, {"v": None}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [None]}],
-        msg="$addToSet should collect null and deduplicate to a single null",
-    ),
-    AccumulatorTestCase(
-        "null_single",
-        docs=[{"v": None}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [None]}],
-        msg="$addToSet should collect a single null value",
-    ),
-    AccumulatorTestCase(
-        "null_among_values",
-        docs=[{"v": None}, {"v": 5}, {"v": 3}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [None, 5, 3]}],
-        msg="$addToSet should collect null alongside other values",
-    ),
-    AccumulatorTestCase(
-        "null_and_values_dedup",
-        docs=[{"v": 10}, {"v": None}, {"v": 5}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [10, None, 5]}],
-        msg="$addToSet should collect null and distinct values without duplication",
-    ),
-]
-
-# Property [Missing Excluded]: missing fields are excluded from the result.
-ADDTOSET_MISSING_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "missing_all",
-        docs=[{"x": 1}, {"x": 2}, {"x": 3}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": []}],
-        msg="$addToSet should return empty array when all fields are missing",
-    ),
-    AccumulatorTestCase(
-        "missing_single",
-        docs=[{"x": 1}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": []}],
-        msg="$addToSet should return empty array for a single doc with missing field",
-    ),
-    AccumulatorTestCase(
-        "missing_among_values",
-        docs=[{"x": 1}, {"v": 5}, {"v": 3}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [5, 3]}],
-        msg="$addToSet should exclude missing fields and collect only present values",
-    ),
-]
-
-# Property [Null and Missing Combined]: null is collected while missing is excluded.
-ADDTOSET_NULL_MISSING_COMBINED_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "combined_null_and_missing",
-        docs=[{"v": None}, {"x": 1}, {"v": None}, {"x": 2}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [None]}],
-        msg="$addToSet should collect null but exclude missing fields",
-    ),
-    AccumulatorTestCase(
-        "combined_null_missing_and_values",
-        docs=[{"v": 10}, {"v": None}, {"x": 1}, {"v": 5}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [10, None, 5]}],
-        msg="$addToSet should collect null and values but exclude missing fields",
-    ),
-]
-
 # Property [$$REMOVE Excluded]: $$REMOVE via $cond is treated as missing.
 ADDTOSET_REMOVE_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
@@ -298,663 +178,6 @@
     ),
 ]
 
-# Property [Document Duplicate Detection]: documents are duplicates only if they have
-# exact same fields, values, and field order.
-ADDTOSET_DOC_DEDUP_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "doc_identical",
-        docs=[{"v": {"a": 1, "b": 2}}, {"v": {"a": 1, "b": 2}}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [{"a": 1, "b": 2}]}],
-        msg="$addToSet should deduplicate identical documents",
-    ),
-    AccumulatorTestCase(
-        "doc_different_field_order",
-        docs=[{"v": {"a": 1, "b": 2}}, {"v": {"b": 2, "a": 1}}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [{"b": 2, "a": 1}, {"a": 1, "b": 2}]}],
-        msg="$addToSet should treat documents with different field order as distinct",
-    ),
-    AccumulatorTestCase(
-        "doc_different_values",
-        docs=[{"v": {"a": 1, "b": 2}}, {"v": {"a": 1, "b": 3}}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [{"a": 1, "b": 2}, {"a": 1, "b": 3}]}],
-        msg="$addToSet should treat documents with different values as distinct",
-    ),
-    AccumulatorTestCase(
-        "doc_nested_identical",
-        docs=[{"v": {"a": {"x": 1}}}, {"v": {"a": {"x": 1}}}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [{"a": {"x": 1}}]}],
-        msg="$addToSet should deduplicate nested documents with identical structure",
-    ),
-    AccumulatorTestCase(
-        "doc_nested_different_order",
-        docs=[{"v": {"a": {"x": 1, "y": 2}}}, {"v": {"a": {"y": 2, "x": 1}}}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [{"a": {"x": 1, "y": 2}}, {"a": {"y": 2, "x": 1}}]}],
-        msg="$addToSet should treat nested documents with different field order as distinct",
-    ),
-    AccumulatorTestCase(
-        "doc_empty",
-        docs=[{"v": {}}, {"v": {}}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [{}]}],
-        msg="$addToSet should deduplicate empty documents",
-    ),
-    AccumulatorTestCase(
-        "doc_subset",
-        docs=[{"v": {"a": 1}}, {"v": {"a": 1, "b": 2}}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [{"a": 1, "b": 2}, {"a": 1}]}],
-        msg="$addToSet should treat a document subset and superset as distinct",
-    ),
-    AccumulatorTestCase(
-        "doc_with_array_value",
-        docs=[{"v": {"a": [1, 2]}}, {"v": {"a": [1, 2]}}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [{"a": [1, 2]}]}],
-        msg="$addToSet should deduplicate documents containing identical array values",
-    ),
-    AccumulatorTestCase(
-        "doc_with_null_value",
-        docs=[{"v": {"a": None}}, {"v": {"a": None}}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [{"a": None}]}],
-        msg="$addToSet should deduplicate documents with null field values",
-    ),
-    AccumulatorTestCase(
-        "doc_with_nested_null",
-        docs=[{"v": {"a": {"b": None}}}, {"v": {"a": {"b": None}}}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [{"a": {"b": None}}]}],
-        msg="$addToSet should deduplicate documents with nested null values",
-    ),
-]
-
-# Property [String Deduplication]: strings are compared by byte value with no Unicode normalization.
-ADDTOSET_STRING_DEDUP_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "string_identical",
-        docs=[{"v": "abc"}, {"v": "abc"}, {"v": "def"}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": ["abc", "def"]}],
-        msg="$addToSet should deduplicate identical strings",
-    ),
-    AccumulatorTestCase(
-        "string_empty",
-        docs=[{"v": ""}, {"v": ""}, {"v": "x"}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": ["", "x"]}],
-        msg="$addToSet should deduplicate empty strings",
-    ),
-    AccumulatorTestCase(
-        "string_unicode_no_normalization",
-        docs=[
-            {"v": "\u00e9"},
-            {"v": "\u0065\u0301"},
-        ],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": ["\u00e9", "\u0065\u0301"]}],
-        msg="$addToSet should not normalize Unicode; precomposed and decomposed are distinct",
-    ),
-]
-
-# Property [BSON Type Collection]: $addToSet collects and deduplicates values of every
-# non-deprecated BSON type.
-ADDTOSET_BSON_TYPE_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "bson_int32",
-        docs=[{"v": 10}, {"v": 20}, {"v": 10}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [10, 20]}],
-        msg="$addToSet should collect and deduplicate int32 values",
-    ),
-    AccumulatorTestCase(
-        "bson_int64",
-        docs=[{"v": Int64(10)}, {"v": Int64(20)}, {"v": Int64(10)}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [Int64(10), Int64(20)]}],
-        msg="$addToSet should collect and deduplicate Int64 values",
-    ),
-    AccumulatorTestCase(
-        "bson_double",
-        docs=[{"v": 1.5}, {"v": 2.5}, {"v": 1.5}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [1.5, 2.5]}],
-        msg="$addToSet should collect and deduplicate double values",
-    ),
-    AccumulatorTestCase(
-        "bson_decimal128",
-        docs=[
-            {"v": Decimal128("1.5")},
-            {"v": Decimal128("2.5")},
-            {"v": Decimal128("1.5")},
-        ],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [Decimal128("1.5"), Decimal128("2.5")]}],
-        msg="$addToSet should collect and deduplicate Decimal128 values",
-    ),
-    AccumulatorTestCase(
-        "bson_string",
-        docs=[{"v": "abc"}, {"v": "def"}, {"v": "abc"}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": ["abc", "def"]}],
-        msg="$addToSet should collect and deduplicate string values",
-    ),
-    AccumulatorTestCase(
-        "bson_bool",
-        docs=[{"v": True}, {"v": False}, {"v": True}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [True, False]}],
-        msg="$addToSet should collect and deduplicate boolean values",
-    ),
-    AccumulatorTestCase(
-        "bson_datetime",
-        docs=[
-            {"v": datetime(2020, 1, 1, tzinfo=timezone.utc)},
-            {"v": datetime(2021, 1, 1, tzinfo=timezone.utc)},
-            {"v": datetime(2020, 1, 1, tzinfo=timezone.utc)},
-        ],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[
-            {
-                "result": [
-                    datetime(2020, 1, 1, tzinfo=timezone.utc),
-                    datetime(2021, 1, 1, tzinfo=timezone.utc),
-                ]
-            }
-        ],
-        msg="$addToSet should collect and deduplicate datetime values",
-    ),
-    AccumulatorTestCase(
-        "bson_objectid",
-        docs=[
-            {"v": ObjectId("000000000000000000000001")},
-            {"v": ObjectId("000000000000000000000002")},
-            {"v": ObjectId("000000000000000000000001")},
-        ],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[
-            {
-                "result": [
-                    ObjectId("000000000000000000000001"),
-                    ObjectId("000000000000000000000002"),
-                ]
-            }
-        ],
-        msg="$addToSet should collect and deduplicate ObjectId values",
-    ),
-    AccumulatorTestCase(
-        "bson_binary",
-        docs=[{"v": Binary(b"\x00")}, {"v": Binary(b"\x01")}, {"v": Binary(b"\x00")}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [b"\x00", b"\x01"]}],
-        msg="$addToSet should collect and deduplicate Binary values",
-    ),
-    AccumulatorTestCase(
-        "bson_regex",
-        docs=[{"v": Regex("abc")}, {"v": Regex("def")}, {"v": Regex("abc")}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [Regex("abc"), Regex("def")]}],
-        msg="$addToSet should collect and deduplicate Regex values",
-    ),
-    AccumulatorTestCase(
-        "bson_code",
-        docs=[
-            {"v": Code("function(){}")},
-            {"v": Code("function(){return 1}")},
-            {"v": Code("function(){}")},
-        ],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": ["function(){}", "function(){return 1}"]}],
-        msg="$addToSet should collect and deduplicate Code values",
-    ),
-    AccumulatorTestCase(
-        "bson_timestamp",
-        docs=[
-            {"v": Timestamp(100, 1)},
-            {"v": Timestamp(200, 1)},
-            {"v": Timestamp(100, 1)},
-        ],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [Timestamp(100, 1), Timestamp(200, 1)]}],
-        msg="$addToSet should collect and deduplicate Timestamp values",
-    ),
-    AccumulatorTestCase(
-        "bson_minkey",
-        docs=[{"v": MinKey()}, {"v": MinKey()}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [{"": MinKey()}]}],
-        msg="$addToSet should deduplicate MinKey values",
-    ),
-    AccumulatorTestCase(
-        "bson_maxkey",
-        docs=[{"v": MaxKey()}, {"v": MaxKey()}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [{"": MaxKey()}]}],
-        msg="$addToSet should deduplicate MaxKey values",
-    ),
-    AccumulatorTestCase(
-        "bson_document",
-        docs=[{"v": {"x": 1}}, {"v": {"x": 2}}, {"v": {"x": 1}}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [{"x": 1}, {"x": 2}]}],
-        msg="$addToSet should collect and deduplicate embedded document values",
-    ),
-    AccumulatorTestCase(
-        "bson_array",
-        docs=[{"v": [1, 2]}, {"v": [3, 4]}, {"v": [1, 2]}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [[1, 2], [3, 4]]}],
-        msg="$addToSet should collect and deduplicate array values as single elements",
-    ),
-    AccumulatorTestCase(
-        "bson_null",
-        docs=[{"v": None}, {"v": None}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [None]}],
-        msg="$addToSet should deduplicate null values",
-    ),
-]
-
-# Property [Mixed Type Collection]: $addToSet collects values of different
-# BSON types in the same group.
-ADDTOSET_MIXED_TYPE_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "mixed_types",
-        docs=[
-            {"v": 42},
-            {"v": "hello"},
-            {"v": True},
-            {"v": [1, 2]},
-            {"v": {"a": 1}},
-        ],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [42, "hello", True, [1, 2], {"a": 1}]}],
-        msg="$addToSet should collect values of different BSON types in one group",
-    ),
-]
-
-# Property [Numeric Equivalence]: numerically equivalent values across types are deduplicated.
-ADDTOSET_NUMERIC_EQUIV_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "equiv_all_ones",
-        docs=[{"v": 1}, {"v": Int64(1)}, {"v": 1.0}, {"v": Decimal128("1")}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [1]}],
-        msg="$addToSet should deduplicate numerically equivalent values of all numeric types",
-    ),
-    AccumulatorTestCase(
-        "equiv_all_zeros",
-        docs=[{"v": 0}, {"v": Int64(0)}, {"v": 0.0}, {"v": Decimal128("0")}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [0]}],
-        msg="$addToSet should deduplicate numerically equivalent zero values",
-    ),
-    AccumulatorTestCase(
-        "equiv_int32_int64",
-        docs=[{"v": 5}, {"v": Int64(5)}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [5]}],
-        msg="$addToSet should deduplicate int32 and Int64 with same numeric value",
-    ),
-    AccumulatorTestCase(
-        "equiv_double_int32",
-        docs=[{"v": 3.0}, {"v": 3}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [3.0]}],
-        msg="$addToSet should deduplicate double and int32 with same numeric value",
-    ),
-    AccumulatorTestCase(
-        "equiv_decimal128_int64",
-        docs=[{"v": Decimal128("100")}, {"v": Int64(100)}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [Decimal128("100")]}],
-        msg="$addToSet should deduplicate Decimal128 and Int64 with same numeric value",
-    ),
-    AccumulatorTestCase(
-        "equiv_negative",
-        docs=[{"v": -1}, {"v": Int64(-1)}, {"v": -1.0}, {"v": Decimal128("-1")}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [-1]}],
-        msg="$addToSet should deduplicate negative numerically equivalent values",
-    ),
-]
-
-# Property [BSON Type Distinction]: values of different BSON types are distinct even when similar.
-ADDTOSET_TYPE_DISTINCTION_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "distinct_false_vs_zero",
-        docs=[{"v": False}, {"v": 0}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [0, False]}],
-        msg="$addToSet should treat false and int32(0) as distinct BSON types",
-    ),
-    AccumulatorTestCase(
-        "distinct_true_vs_one",
-        docs=[{"v": True}, {"v": 1}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [1, True]}],
-        msg="$addToSet should treat true and int32(1) as distinct BSON types",
-    ),
-    AccumulatorTestCase(
-        "distinct_null_vs_missing",
-        docs=[{"v": None}, {"x": 1}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [None]}],
-        msg="$addToSet should collect null but exclude missing field",
-    ),
-    AccumulatorTestCase(
-        "distinct_empty_string_vs_null",
-        docs=[{"v": ""}, {"v": None}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": ["", None]}],
-        msg="$addToSet should treat empty string and null as distinct",
-    ),
-    AccumulatorTestCase(
-        "distinct_string_vs_number",
-        docs=[{"v": "123"}, {"v": 123}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [123, "123"]}],
-        msg="$addToSet should treat string '123' and int 123 as distinct",
-    ),
-]
-
-# Property [NaN Deduplication]: NaN values are equal for deduplication purposes.
-ADDTOSET_NAN_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "nan_double_dedup",
-        docs=[{"v": float("nan")}, {"v": float("nan")}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [pytest.approx(math.nan, nan_ok=True)]}],
-        msg="$addToSet should deduplicate double NaN values",
-    ),
-    AccumulatorTestCase(
-        "nan_decimal128_dedup",
-        docs=[{"v": Decimal128("NaN")}, {"v": Decimal128("NaN")}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [Decimal128("NaN")]}],
-        msg="$addToSet should deduplicate Decimal128 NaN values",
-    ),
-    AccumulatorTestCase(
-        "nan_cross_type",
-        docs=[{"v": float("nan")}, {"v": Decimal128("NaN")}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [pytest.approx(math.nan, nan_ok=True)]}],
-        msg="$addToSet should deduplicate float NaN and Decimal128 NaN as numerically equal",
-    ),
-    AccumulatorTestCase(
-        "nan_with_finite",
-        docs=[{"v": float("nan")}, {"v": 5}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [pytest.approx(math.nan, nan_ok=True), 5]}],
-        msg="$addToSet should treat NaN and finite values as distinct",
-    ),
-]
-
-# Property [Infinity Deduplication]: Infinity values are equal across numeric types.
-ADDTOSET_INFINITY_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "inf_double_dedup",
-        docs=[{"v": float("inf")}, {"v": float("inf")}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [float("inf")]}],
-        msg="$addToSet should deduplicate positive Infinity values",
-    ),
-    AccumulatorTestCase(
-        "neg_inf_double_dedup",
-        docs=[{"v": float("-inf")}, {"v": float("-inf")}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [float("-inf")]}],
-        msg="$addToSet should deduplicate negative Infinity values",
-    ),
-    AccumulatorTestCase(
-        "inf_cross_type",
-        docs=[{"v": float("inf")}, {"v": Decimal128("Infinity")}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [float("inf")]}],
-        msg="$addToSet should deduplicate float Infinity and Decimal128 Infinity",
-    ),
-    AccumulatorTestCase(
-        "inf_vs_neg_inf",
-        docs=[{"v": float("inf")}, {"v": float("-inf")}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [float("-inf"), float("inf")]}],
-        msg="$addToSet should treat positive and negative Infinity as distinct",
-    ),
-]
-
-# Property [Negative Zero]: -0.0 and 0.0 are numerically equal and deduplicated.
-ADDTOSET_NEG_ZERO_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "neg_zero_double",
-        docs=[{"v": -0.0}, {"v": 0.0}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [-0.0]}],
-        msg="$addToSet should deduplicate -0.0 and 0.0 as numerically equal",
-    ),
-    AccumulatorTestCase(
-        "neg_zero_decimal128",
-        docs=[{"v": Decimal128("-0")}, {"v": Decimal128("0")}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [Decimal128("-0")]}],
-        msg="$addToSet should deduplicate Decimal128 -0 and 0 as numerically equal",
-    ),
-    AccumulatorTestCase(
-        "neg_zero_cross_type",
-        docs=[{"v": -0.0}, {"v": 0}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [-0.0]}],
-        msg="$addToSet should deduplicate -0.0 and int 0 as numerically equal",
-    ),
-]
-
-# Property [Decimal128 Precision]: Decimal128 values with same numeric value but different
-# representations are deduplicated.
-ADDTOSET_DECIMAL128_PRECISION_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "decimal_trailing_zeros",
-        docs=[{"v": Decimal128("1.0")}, {"v": Decimal128("1.00")}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [Decimal128("1.0")]}],
-        msg="$addToSet should deduplicate Decimal128 values with different trailing zeros",
-    ),
-    AccumulatorTestCase(
-        "decimal_34_digit_precision",
-        docs=[{"v": Decimal128("1.234567890123456789012345678901234")}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [Decimal128("1.234567890123456789012345678901234")]}],
-        msg="$addToSet should preserve full 34-digit Decimal128 precision",
-    ),
-    AccumulatorTestCase(
-        "decimal_max_min_distinct",
-        docs=[
-            {"v": Decimal128("9.999999999999999999999999999999999E+6144")},
-            {"v": Decimal128("1E-6176")},
-        ],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[
-            {
-                "result": [
-                    Decimal128("1E-6176"),
-                    Decimal128("9.999999999999999999999999999999999E+6144"),
-                ]
-            }
-        ],
-        msg="$addToSet should treat Decimal128 max and min as distinct values",
-    ),
-]
-
 # Property [Expression Arguments]: $addToSet accepts various expression forms.
 ADDTOSET_EXPRESSION_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
@@ -1151,62 +374,22 @@
     ),
 ]
 
-# Property [Expression Error Propagation]: errors from sub-expressions propagate.
-ADDTOSET_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "error_toInt_invalid",
-        docs=[{"v": "not_a_number"}],
-        pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$toInt": "$v"}}}}],
-        error_code=CONVERSION_FAILURE_ERROR,
-        msg="$addToSet should propagate $toInt conversion error",
-    ),
-    AccumulatorTestCase(
-        "error_divide_by_zero",
-        docs=[{"v": 10}],
-        pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$divide": ["$v", 0]}}}}],
-        error_code=DIVIDE_BY_ZERO_V2_ERROR,
-        msg="$addToSet should propagate divide-by-zero error",
-    ),
-    AccumulatorTestCase(
-        "error_mod_by_zero",
-        docs=[{"v": 10}],
-        pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$mod": ["$v", 0]}}}}],
-        error_code=MODULO_BY_ZERO_V2_ERROR,
-        msg="$addToSet should propagate mod-by-zero error",
-    ),
-]
-
 # ---------------------------------------------------------------------------
-# Aggregates
+# Aggregate
 # ---------------------------------------------------------------------------
 
 ADDTOSET_SUCCESS_TESTS = (
-    ADDTOSET_NULL_TESTS
-    + ADDTOSET_MISSING_TESTS
-    + ADDTOSET_NULL_MISSING_COMBINED_TESTS
-    + ADDTOSET_REMOVE_TESTS
+    ADDTOSET_REMOVE_TESTS
     + ADDTOSET_UNIQUE_TESTS
     + ADDTOSET_ARRAY_ELEMENT_TESTS
-    + ADDTOSET_DOC_DEDUP_TESTS
-    + ADDTOSET_STRING_DEDUP_TESTS
-    + ADDTOSET_BSON_TYPE_TESTS
-    + ADDTOSET_MIXED_TYPE_TESTS
-    + ADDTOSET_NUMERIC_EQUIV_TESTS
-    + ADDTOSET_TYPE_DISTINCTION_TESTS
-    + ADDTOSET_NAN_TESTS
-    + ADDTOSET_INFINITY_TESTS
-    + ADDTOSET_NEG_ZERO_TESTS
-    + ADDTOSET_DECIMAL128_PRECISION_TESTS
     + ADDTOSET_EXPRESSION_TESTS
     + ADDTOSET_GROUPING_TESTS
     + ADDTOSET_EMPTY_TESTS
     + ADDTOSET_EDGE_CASE_TESTS
 )
 
-ADDTOSET_ERROR_TESTS = ADDTOSET_EXPRESSION_ERROR_TESTS
-
 # ---------------------------------------------------------------------------
-# Primary test functions
+# Test function
 # ---------------------------------------------------------------------------
 
 
@@ -1222,18 +405,6 @@ def test_accumulator_addToSet(collection, test_case: AccumulatorTestCase):
     assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"])
 
 
-@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_ERROR_TESTS))
-def test_accumulator_addToSet_errors(collection, test_case):
-    """Test $addToSet accumulator error cases with $group."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertFailureCode(result, test_case.error_code, msg=test_case.msg)
-
-
 # ---------------------------------------------------------------------------
 # Property-specific tests
 # ---------------------------------------------------------------------------
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py
new file mode 100644
index 00000000..e319caf0
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py
@@ -0,0 +1,254 @@
+"""Tests for $addToSet accumulator BSON type collection and deduplication."""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+
+import pytest
+from bson import (
+    Binary,
+    Code,
+    Decimal128,
+    Int64,
+    MaxKey,
+    MinKey,
+    ObjectId,
+    Regex,
+    Timestamp,
+)
+
+from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
+    AccumulatorTestCase,
+)
+from documentdb_tests.framework.assertions import assertSuccess
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+# ---------------------------------------------------------------------------
+# Property lists
+# ---------------------------------------------------------------------------
+
+# Property [BSON Type Collection]: $addToSet collects and deduplicates values of every
+# non-deprecated BSON type.
+ADDTOSET_BSON_TYPE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "bson_int32",
+        docs=[{"v": 10}, {"v": 20}, {"v": 10}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [10, 20]}],
+        msg="$addToSet should collect and deduplicate int32 values",
+    ),
+    AccumulatorTestCase(
+        "bson_int64",
+        docs=[{"v": Int64(10)}, {"v": Int64(20)}, {"v": Int64(10)}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Int64(10), Int64(20)]}],
+        msg="$addToSet should collect and deduplicate Int64 values",
+    ),
+    AccumulatorTestCase(
+        "bson_double",
+        docs=[{"v": 1.5}, {"v": 2.5}, {"v": 1.5}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [1.5, 2.5]}],
+        msg="$addToSet should collect and deduplicate double values",
+    ),
+    AccumulatorTestCase(
+        "bson_decimal128",
+        docs=[
+            {"v": Decimal128("1.5")},
+            {"v": Decimal128("2.5")},
+            {"v": Decimal128("1.5")},
+        ],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Decimal128("1.5"), Decimal128("2.5")]}],
+        msg="$addToSet should collect and deduplicate Decimal128 values",
+    ),
+    AccumulatorTestCase(
+        "bson_string",
+        docs=[{"v": "abc"}, {"v": "def"}, {"v": "abc"}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": ["abc", "def"]}],
+        msg="$addToSet should collect and deduplicate string values",
+    ),
+    AccumulatorTestCase(
+        "bson_bool",
+        docs=[{"v": True}, {"v": False}, {"v": True}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [True, False]}],
+        msg="$addToSet should collect and deduplicate boolean values",
+    ),
+    AccumulatorTestCase(
+        "bson_datetime",
+        docs=[
+            {"v": datetime(2020, 1, 1, tzinfo=timezone.utc)},
+            {"v": datetime(2021, 1, 1, tzinfo=timezone.utc)},
+            {"v": datetime(2020, 1, 1, tzinfo=timezone.utc)},
+        ],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[
+            {
+                "result": [
+                    datetime(2020, 1, 1, tzinfo=timezone.utc),
+                    datetime(2021, 1, 1, tzinfo=timezone.utc),
+                ]
+            }
+        ],
+        msg="$addToSet should collect and deduplicate datetime values",
+    ),
+    AccumulatorTestCase(
+        "bson_objectid",
+        docs=[
+            {"v": ObjectId("000000000000000000000001")},
+            {"v": ObjectId("000000000000000000000002")},
+            {"v": ObjectId("000000000000000000000001")},
+        ],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[
+            {
+                "result": [
+                    ObjectId("000000000000000000000001"),
+                    ObjectId("000000000000000000000002"),
+                ]
+            }
+        ],
+        msg="$addToSet should collect and deduplicate ObjectId values",
+    ),
+    AccumulatorTestCase(
+        "bson_binary",
+        docs=[{"v": Binary(b"\x00")}, {"v": Binary(b"\x01")}, {"v": Binary(b"\x00")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [b"\x00", b"\x01"]}],
+        msg="$addToSet should collect and deduplicate Binary values",
+    ),
+    AccumulatorTestCase(
+        "bson_regex",
+        docs=[{"v": Regex("abc")}, {"v": Regex("def")}, {"v": Regex("abc")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Regex("abc"), Regex("def")]}],
+        msg="$addToSet should collect and deduplicate Regex values",
+    ),
+    AccumulatorTestCase(
+        "bson_code",
+        docs=[
+            {"v": Code("function(){}")},
+            {"v": Code("function(){return 1}")},
+            {"v": Code("function(){}")},
+        ],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": ["function(){}", "function(){return 1}"]}],
+        msg="$addToSet should collect and deduplicate Code values",
+    ),
+    AccumulatorTestCase(
+        "bson_timestamp",
+        docs=[
+            {"v": Timestamp(100, 1)},
+            {"v": Timestamp(200, 1)},
+            {"v": Timestamp(100, 1)},
+        ],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Timestamp(100, 1), Timestamp(200, 1)]}],
+        msg="$addToSet should collect and deduplicate Timestamp values",
+    ),
+    AccumulatorTestCase(
+        "bson_minkey",
+        docs=[{"v": MinKey()}, {"v": MinKey()}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"": MinKey()}]}],
+        msg="$addToSet should deduplicate MinKey values",
+    ),
+    AccumulatorTestCase(
+        "bson_maxkey",
+        docs=[{"v": MaxKey()}, {"v": MaxKey()}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"": MaxKey()}]}],
+        msg="$addToSet should deduplicate MaxKey values",
+    ),
+    AccumulatorTestCase(
+        "bson_document",
+        docs=[{"v": {"x": 1}}, {"v": {"x": 2}}, {"v": {"x": 1}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"x": 1}, {"x": 2}]}],
+        msg="$addToSet should collect and deduplicate embedded document values",
+    ),
+    AccumulatorTestCase(
+        "bson_array",
+        docs=[{"v": [1, 2]}, {"v": [3, 4]}, {"v": [1, 2]}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [[1, 2], [3, 4]]}],
+        msg="$addToSet should collect and deduplicate array values as single elements",
+    ),
+    AccumulatorTestCase(
+        "bson_null",
+        docs=[{"v": None}, {"v": None}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [None]}],
+        msg="$addToSet should deduplicate null values",
+    ),
+]
+
+# ---------------------------------------------------------------------------
+# Test function
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_BSON_TYPE_TESTS))
+def test_accumulator_addToSet_bson_types(collection, test_case: AccumulatorTestCase):
+    """Test $addToSet accumulator BSON type collection and deduplication."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"])
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py
new file mode 100644
index 00000000..626ce236
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py
@@ -0,0 +1,499 @@
+"""Tests for $addToSet accumulator deduplication behavior."""
+
+from __future__ import annotations
+
+import math
+
+import pytest
+from bson import Decimal128, Int64
+
+from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
+    AccumulatorTestCase,
+)
+from documentdb_tests.framework.assertions import assertSuccess
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+# ---------------------------------------------------------------------------
+# Property lists
+# ---------------------------------------------------------------------------
+
+# Property [Document Duplicate Detection]: documents are duplicates only if they have
+# exact same fields, values, and field order.
+ADDTOSET_DOC_DEDUP_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "doc_identical",
+        docs=[{"v": {"a": 1, "b": 2}}, {"v": {"a": 1, "b": 2}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": 1, "b": 2}]}],
+        msg="$addToSet should deduplicate identical documents",
+    ),
+    AccumulatorTestCase(
+        "doc_different_field_order",
+        docs=[{"v": {"a": 1, "b": 2}}, {"v": {"b": 2, "a": 1}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"b": 2, "a": 1}, {"a": 1, "b": 2}]}],
+        msg="$addToSet should treat documents with different field order as distinct",
+    ),
+    AccumulatorTestCase(
+        "doc_different_values",
+        docs=[{"v": {"a": 1, "b": 2}}, {"v": {"a": 1, "b": 3}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": 1, "b": 2}, {"a": 1, "b": 3}]}],
+        msg="$addToSet should treat documents with different values as distinct",
+    ),
+    AccumulatorTestCase(
+        "doc_nested_identical",
+        docs=[{"v": {"a": {"x": 1}}}, {"v": {"a": {"x": 1}}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": {"x": 1}}]}],
+        msg="$addToSet should deduplicate nested documents with identical structure",
+    ),
+    AccumulatorTestCase(
+        "doc_nested_different_order",
+        docs=[{"v": {"a": {"x": 1, "y": 2}}}, {"v": {"a": {"y": 2, "x": 1}}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": {"x": 1, "y": 2}}, {"a": {"y": 2, "x": 1}}]}],
+        msg="$addToSet should treat nested documents with different field order as distinct",
+    ),
+    AccumulatorTestCase(
+        "doc_empty",
+        docs=[{"v": {}}, {"v": {}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{}]}],
+        msg="$addToSet should deduplicate empty documents",
+    ),
+    AccumulatorTestCase(
+        "doc_subset",
+        docs=[{"v": {"a": 1}}, {"v": {"a": 1, "b": 2}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": 1, "b": 2}, {"a": 1}]}],
+        msg="$addToSet should treat a document subset and superset as distinct",
+    ),
+    AccumulatorTestCase(
+        "doc_with_array_value",
+        docs=[{"v": {"a": [1, 2]}}, {"v": {"a": [1, 2]}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": [1, 2]}]}],
+        msg="$addToSet should deduplicate documents containing identical array values",
+    ),
+    AccumulatorTestCase(
+        "doc_with_null_value",
+        docs=[{"v": {"a": None}}, {"v": {"a": None}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": None}]}],
+        msg="$addToSet should deduplicate documents with null field values",
+    ),
+    AccumulatorTestCase(
+        "doc_with_nested_null",
+        docs=[{"v": {"a": {"b": None}}}, {"v": {"a": {"b": None}}}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": {"b": None}}]}],
+        msg="$addToSet should deduplicate documents with nested null values",
+    ),
+]
+
+# Property [String Deduplication]: strings are compared by byte value with no Unicode normalization.
+ADDTOSET_STRING_DEDUP_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "string_identical",
+        docs=[{"v": "abc"}, {"v": "abc"}, {"v": "def"}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": ["abc", "def"]}],
+        msg="$addToSet should deduplicate identical strings",
+    ),
+    AccumulatorTestCase(
+        "string_empty",
+        docs=[{"v": ""}, {"v": ""}, {"v": "x"}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": ["", "x"]}],
+        msg="$addToSet should deduplicate empty strings",
+    ),
+    AccumulatorTestCase(
+        "string_unicode_no_normalization",
+        docs=[
+            {"v": "\u00e9"},
+            {"v": "\u0065\u0301"},
+        ],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": ["\u00e9", "\u0065\u0301"]}],
+        msg="$addToSet should not normalize Unicode; precomposed and decomposed are distinct",
+    ),
+]
+
+# Property [Mixed Type Collection]: $addToSet collects values of different
+# BSON types in the same group.
+ADDTOSET_MIXED_TYPE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "mixed_types",
+        docs=[
+            {"v": 42},
+            {"v": "hello"},
+            {"v": True},
+            {"v": [1, 2]},
+            {"v": {"a": 1}},
+        ],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [42, "hello", True, [1, 2], {"a": 1}]}],
+        msg="$addToSet should collect values of different BSON types in one group",
+    ),
+]
+
+# Property [Numeric Equivalence]: numerically equivalent values across types are deduplicated.
+ADDTOSET_NUMERIC_EQUIV_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "equiv_all_ones",
+        docs=[{"v": 1}, {"v": Int64(1)}, {"v": 1.0}, {"v": Decimal128("1")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [1]}],
+        msg="$addToSet should deduplicate numerically equivalent values of all numeric types",
+    ),
+    AccumulatorTestCase(
+        "equiv_all_zeros",
+        docs=[{"v": 0}, {"v": Int64(0)}, {"v": 0.0}, {"v": Decimal128("0")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [0]}],
+        msg="$addToSet should deduplicate numerically equivalent zero values",
+    ),
+    AccumulatorTestCase(
+        "equiv_int32_int64",
+        docs=[{"v": 5}, {"v": Int64(5)}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [5]}],
+        msg="$addToSet should deduplicate int32 and Int64 with same numeric value",
+    ),
+    AccumulatorTestCase(
+        "equiv_double_int32",
+        docs=[{"v": 3.0}, {"v": 3}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [3.0]}],
+        msg="$addToSet should deduplicate double and int32 with same numeric value",
+    ),
+    AccumulatorTestCase(
+        "equiv_decimal128_int64",
+        docs=[{"v": Decimal128("100")}, {"v": Int64(100)}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Decimal128("100")]}],
+        msg="$addToSet should deduplicate Decimal128 and Int64 with same numeric value",
+    ),
+    AccumulatorTestCase(
+        "equiv_negative",
+        docs=[{"v": -1}, {"v": Int64(-1)}, {"v": -1.0}, {"v": Decimal128("-1")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [-1]}],
+        msg="$addToSet should deduplicate negative numerically equivalent values",
+    ),
+]
+
+# Property [BSON Type Distinction]: values of different BSON types are distinct even when similar.
+ADDTOSET_TYPE_DISTINCTION_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "distinct_false_vs_zero",
+        docs=[{"v": False}, {"v": 0}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [0, False]}],
+        msg="$addToSet should treat false and int32(0) as distinct BSON types",
+    ),
+    AccumulatorTestCase(
+        "distinct_true_vs_one",
+        docs=[{"v": True}, {"v": 1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [1, True]}],
+        msg="$addToSet should treat true and int32(1) as distinct BSON types",
+    ),
+    AccumulatorTestCase(
+        "distinct_null_vs_missing",
+        docs=[{"v": None}, {"x": 1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [None]}],
+        msg="$addToSet should collect null but exclude missing field",
+    ),
+    AccumulatorTestCase(
+        "distinct_empty_string_vs_null",
+        docs=[{"v": ""}, {"v": None}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": ["", None]}],
+        msg="$addToSet should treat empty string and null as distinct",
+    ),
+    AccumulatorTestCase(
+        "distinct_string_vs_number",
+        docs=[{"v": "123"}, {"v": 123}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [123, "123"]}],
+        msg="$addToSet should treat string '123' and int 123 as distinct",
+    ),
+]
+
+# Property [NaN Deduplication]: NaN values are equal for deduplication purposes.
+ADDTOSET_NAN_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "nan_double_dedup",
+        docs=[{"v": float("nan")}, {"v": float("nan")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [pytest.approx(math.nan, nan_ok=True)]}],
+        msg="$addToSet should deduplicate double NaN values",
+    ),
+    AccumulatorTestCase(
+        "nan_decimal128_dedup",
+        docs=[{"v": Decimal128("NaN")}, {"v": Decimal128("NaN")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Decimal128("NaN")]}],
+        msg="$addToSet should deduplicate Decimal128 NaN values",
+    ),
+    AccumulatorTestCase(
+        "nan_cross_type",
+        docs=[{"v": float("nan")}, {"v": Decimal128("NaN")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [pytest.approx(math.nan, nan_ok=True)]}],
+        msg="$addToSet should deduplicate float NaN and Decimal128 NaN as numerically equal",
+    ),
+    AccumulatorTestCase(
+        "nan_with_finite",
+        docs=[{"v": float("nan")}, {"v": 5}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [pytest.approx(math.nan, nan_ok=True), 5]}],
+        msg="$addToSet should treat NaN and finite values as distinct",
+    ),
+]
+
+# Property [Infinity Deduplication]: Infinity values are equal across numeric types.
+ADDTOSET_INFINITY_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "inf_double_dedup",
+        docs=[{"v": float("inf")}, {"v": float("inf")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [float("inf")]}],
+        msg="$addToSet should deduplicate positive Infinity values",
+    ),
+    AccumulatorTestCase(
+        "neg_inf_double_dedup",
+        docs=[{"v": float("-inf")}, {"v": float("-inf")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [float("-inf")]}],
+        msg="$addToSet should deduplicate negative Infinity values",
+    ),
+    AccumulatorTestCase(
+        "inf_cross_type",
+        docs=[{"v": float("inf")}, {"v": Decimal128("Infinity")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [float("inf")]}],
+        msg="$addToSet should deduplicate float Infinity and Decimal128 Infinity",
+    ),
+    AccumulatorTestCase(
+        "inf_vs_neg_inf",
+        docs=[{"v": float("inf")}, {"v": float("-inf")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [float("-inf"), float("inf")]}],
+        msg="$addToSet should treat positive and negative Infinity as distinct",
+    ),
+]
+
+# Property [Negative Zero]: -0.0 and 0.0 are numerically equal and deduplicated.
+ADDTOSET_NEG_ZERO_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "neg_zero_double",
+        docs=[{"v": -0.0}, {"v": 0.0}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [-0.0]}],
+        msg="$addToSet should deduplicate -0.0 and 0.0 as numerically equal",
+    ),
+    AccumulatorTestCase(
+        "neg_zero_decimal128",
+        docs=[{"v": Decimal128("-0")}, {"v": Decimal128("0")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Decimal128("-0")]}],
+        msg="$addToSet should deduplicate Decimal128 -0 and 0 as numerically equal",
+    ),
+    AccumulatorTestCase(
+        "neg_zero_cross_type",
+        docs=[{"v": -0.0}, {"v": 0}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [-0.0]}],
+        msg="$addToSet should deduplicate -0.0 and int 0 as numerically equal",
+    ),
+]
+
+# Property [Decimal128 Precision]: Decimal128 values with same numeric value but different
+# representations are deduplicated.
+ADDTOSET_DECIMAL128_PRECISION_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "decimal_trailing_zeros",
+        docs=[{"v": Decimal128("1.0")}, {"v": Decimal128("1.00")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Decimal128("1.0")]}],
+        msg="$addToSet should deduplicate Decimal128 values with different trailing zeros",
+    ),
+    AccumulatorTestCase(
+        "decimal_34_digit_precision",
+        docs=[{"v": Decimal128("1.234567890123456789012345678901234")}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Decimal128("1.234567890123456789012345678901234")]}],
+        msg="$addToSet should preserve full 34-digit Decimal128 precision",
+    ),
+    AccumulatorTestCase(
+        "decimal_max_min_distinct",
+        docs=[
+            {"v": Decimal128("9.999999999999999999999999999999999E+6144")},
+            {"v": Decimal128("1E-6176")},
+        ],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[
+            {
+                "result": [
+                    Decimal128("1E-6176"),
+                    Decimal128("9.999999999999999999999999999999999E+6144"),
+                ]
+            }
+        ],
+        msg="$addToSet should treat Decimal128 max and min as distinct values",
+    ),
+]
+
+# ---------------------------------------------------------------------------
+# Aggregate
+# ---------------------------------------------------------------------------
+
+ADDTOSET_DEDUP_TESTS = (
+    ADDTOSET_DOC_DEDUP_TESTS
+    + ADDTOSET_STRING_DEDUP_TESTS
+    + ADDTOSET_MIXED_TYPE_TESTS
+    + ADDTOSET_NUMERIC_EQUIV_TESTS
+    + ADDTOSET_TYPE_DISTINCTION_TESTS
+    + ADDTOSET_NAN_TESTS
+    + ADDTOSET_INFINITY_TESTS
+    + ADDTOSET_NEG_ZERO_TESTS
+    + ADDTOSET_DECIMAL128_PRECISION_TESTS
+)
+
+# ---------------------------------------------------------------------------
+# Test function
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_DEDUP_TESTS))
+def test_accumulator_addToSet_dedup(collection, test_case: AccumulatorTestCase):
+    """Test $addToSet accumulator deduplication behavior."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"])
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py
new file mode 100644
index 00000000..22208c10
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py
@@ -0,0 +1,62 @@
+"""Tests for $addToSet accumulator error cases."""
+
+from __future__ import annotations
+
+import pytest
+
+from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
+    AccumulatorTestCase,
+)
+from documentdb_tests.framework.assertions import assertFailureCode
+from documentdb_tests.framework.error_codes import (
+    CONVERSION_FAILURE_ERROR,
+    DIVIDE_BY_ZERO_V2_ERROR,
+    MODULO_BY_ZERO_V2_ERROR,
+)
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+# ---------------------------------------------------------------------------
+# Property lists
+# ---------------------------------------------------------------------------
+
+# Property [Expression Error Propagation]: errors from sub-expressions propagate.
+ADDTOSET_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "error_toInt_invalid",
+        docs=[{"v": "not_a_number"}],
+        pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$toInt": "$v"}}}}],
+        error_code=CONVERSION_FAILURE_ERROR,
+        msg="$addToSet should propagate $toInt conversion error",
+    ),
+    AccumulatorTestCase(
+        "error_divide_by_zero",
+        docs=[{"v": 10}],
+        pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$divide": ["$v", 0]}}}}],
+        error_code=DIVIDE_BY_ZERO_V2_ERROR,
+        msg="$addToSet should propagate divide-by-zero error",
+    ),
+    AccumulatorTestCase(
+        "error_mod_by_zero",
+        docs=[{"v": 10}],
+        pipeline=[{"$group": {"_id": None, "result": {"$addToSet": {"$mod": ["$v", 0]}}}}],
+        error_code=MODULO_BY_ZERO_V2_ERROR,
+        msg="$addToSet should propagate mod-by-zero error",
+    ),
+]
+
+# ---------------------------------------------------------------------------
+# Test function
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_EXPRESSION_ERROR_TESTS))
+def test_accumulator_addToSet_errors(collection, test_case):
+    """Test $addToSet accumulator error cases with $group."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertFailureCode(result, test_case.error_code, msg=test_case.msg)
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py
new file mode 100644
index 00000000..d63f9526
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py
@@ -0,0 +1,142 @@
+"""Tests for $addToSet accumulator null and missing field handling."""
+
+from __future__ import annotations
+
+import pytest
+
+from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
+    AccumulatorTestCase,
+)
+from documentdb_tests.framework.assertions import assertSuccess
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+# ---------------------------------------------------------------------------
+# Property lists
+# ---------------------------------------------------------------------------
+
+# Property [Null Collected]: null values are collected as valid values and deduplicated.
+ADDTOSET_NULL_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "null_all",
+        docs=[{"v": None}, {"v": None}, {"v": None}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [None]}],
+        msg="$addToSet should collect null and deduplicate to a single null",
+    ),
+    AccumulatorTestCase(
+        "null_single",
+        docs=[{"v": None}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [None]}],
+        msg="$addToSet should collect a single null value",
+    ),
+    AccumulatorTestCase(
+        "null_among_values",
+        docs=[{"v": None}, {"v": 5}, {"v": 3}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [None, 5, 3]}],
+        msg="$addToSet should collect null alongside other values",
+    ),
+    AccumulatorTestCase(
+        "null_and_values_dedup",
+        docs=[{"v": 10}, {"v": None}, {"v": 5}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [10, None, 5]}],
+        msg="$addToSet should collect null and distinct values without duplication",
+    ),
+]
+
+# Property [Missing Excluded]: missing fields are excluded from the result.
+ADDTOSET_MISSING_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "missing_all",
+        docs=[{"x": 1}, {"x": 2}, {"x": 3}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": []}],
+        msg="$addToSet should return empty array when all fields are missing",
+    ),
+    AccumulatorTestCase(
+        "missing_single",
+        docs=[{"x": 1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": []}],
+        msg="$addToSet should return empty array for a single doc with missing field",
+    ),
+    AccumulatorTestCase(
+        "missing_among_values",
+        docs=[{"x": 1}, {"v": 5}, {"v": 3}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [5, 3]}],
+        msg="$addToSet should exclude missing fields and collect only present values",
+    ),
+]
+
+# Property [Null and Missing Combined]: null is collected while missing is excluded.
+ADDTOSET_NULL_MISSING_COMBINED_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "combined_null_and_missing",
+        docs=[{"v": None}, {"x": 1}, {"v": None}, {"x": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [None]}],
+        msg="$addToSet should collect null but exclude missing fields",
+    ),
+    AccumulatorTestCase(
+        "combined_null_missing_and_values",
+        docs=[{"v": 10}, {"v": None}, {"x": 1}, {"v": 5}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [10, None, 5]}],
+        msg="$addToSet should collect null and values but exclude missing fields",
+    ),
+]
+
+# ---------------------------------------------------------------------------
+# Aggregate
+# ---------------------------------------------------------------------------
+
+ADDTOSET_NULL_MISSING_TESTS = (
+    ADDTOSET_NULL_TESTS + ADDTOSET_MISSING_TESTS + ADDTOSET_NULL_MISSING_COMBINED_TESTS
+)
+
+# ---------------------------------------------------------------------------
+# Test function
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_NULL_MISSING_TESTS))
+def test_accumulator_addToSet_null_missing(collection, test_case: AccumulatorTestCase):
+    """Test $addToSet accumulator null and missing field handling."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertSuccess(result, test_case.expected, msg=test_case.msg, ignore_order_in=["result"])

From 026b260215131e6c061dbb84d0d3d5890f7188bc Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Mon, 25 May 2026 15:24:48 -0700
Subject: [PATCH 08/13] add missing tests

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../addToSet/test_accumulator_addToSet.py     |  22 ++++
 .../test_accumulator_addToSet_dedup.py        |  20 +++
 ..._accumulator_addToSet_type_preservation.py | 120 ++++++++++++++++++
 3 files changed, 162 insertions(+)
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_type_preservation.py

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
index 4d569dd5..ea1f9123 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
@@ -80,6 +80,27 @@
     ),
 ]
 
+# Property [$$REMOVE Interaction with Deduplication]: $$REMOVE entries are excluded and
+# remaining values are properly deduplicated.
+ADDTOSET_REMOVE_DEDUP_INTERACTION_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "remove_dedup_same_value_produced",
+        docs=[{"v": 1}, {"v": 2}, {"v": -1}, {"v": -2}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$addToSet": {"$cond": [{"$gte": ["$v", 0]}, "kept", "$$REMOVE"]}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": ["kept"]}],
+        msg="$addToSet should collect single value when $cond produces same value "
+        "for multiple docs and $$REMOVE for others",
+    ),
+]
+
 # Property [Unique Value Collection]: $addToSet returns an array of all unique values.
 ADDTOSET_UNIQUE_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
@@ -380,6 +401,7 @@
 
 ADDTOSET_SUCCESS_TESTS = (
     ADDTOSET_REMOVE_TESTS
+    + ADDTOSET_REMOVE_DEDUP_INTERACTION_TESTS
     + ADDTOSET_UNIQUE_TESTS
     + ADDTOSET_ARRAY_ELEMENT_TESTS
     + ADDTOSET_EXPRESSION_TESTS
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py
index 626ce236..2d75b818 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py
@@ -158,6 +158,26 @@
         expected=[{"result": ["\u00e9", "\u0065\u0301"]}],
         msg="$addToSet should not normalize Unicode; precomposed and decomposed are distinct",
     ),
+    AccumulatorTestCase(
+        "string_embedded_null_bytes",
+        docs=[{"v": "a\x00b"}, {"v": "a\x00b"}, {"v": "a\x00c"}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": ["a\x00b", "a\x00c"]}],
+        msg="$addToSet should compare strings with embedded null bytes by byte value",
+    ),
+    AccumulatorTestCase(
+        "string_4byte_utf8_emoji",
+        docs=[{"v": "\U0001f600"}, {"v": "\U0001f600"}, {"v": "\U0001f601"}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": ["\U0001f600", "\U0001f601"]}],
+        msg="$addToSet should compare 4-byte UTF-8 characters (emoji) by byte value",
+    ),
 ]
 
 # Property [Mixed Type Collection]: $addToSet collects values of different
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_type_preservation.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_type_preservation.py
new file mode 100644
index 00000000..51f70ea0
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_type_preservation.py
@@ -0,0 +1,120 @@
+"""Tests for $addToSet accumulator numeric type preservation during deduplication.
+
+When numerically equivalent values of different BSON types are deduplicated,
+verify which type survives in the result via $type projection.
+"""
+
+from __future__ import annotations
+
+import pytest
+from bson import Decimal128, Int64
+
+from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
+    AccumulatorTestCase,
+)
+from documentdb_tests.framework.assertions import assertSuccess
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+# ---------------------------------------------------------------------------
+# Property lists
+# ---------------------------------------------------------------------------
+
+# Property [Numeric Equivalence — Type Preservation]: when numerically equal values
+# are deduplicated, verify which type survives via $type.
+ADDTOSET_TYPE_PRESERVATION_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "type_pres_int32_then_int64",
+        docs=[{"v": 5}, {"v": Int64(5)}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$unwind": "$result"},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": 5, "type": "int"}],
+        msg="$addToSet should keep int type when int32 is inserted before int64",
+    ),
+    AccumulatorTestCase(
+        "type_pres_int64_then_int32",
+        docs=[{"v": Int64(5)}, {"v": 5}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$unwind": "$result"},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": Int64(5), "type": "long"}],
+        msg="$addToSet should keep long type when int64 is inserted before int32",
+    ),
+    AccumulatorTestCase(
+        "type_pres_double_then_int32",
+        docs=[{"v": 3.0}, {"v": 3}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$unwind": "$result"},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": 3.0, "type": "double"}],
+        msg="$addToSet should keep double type when double is inserted before int32",
+    ),
+    AccumulatorTestCase(
+        "type_pres_int32_then_double",
+        docs=[{"v": 3}, {"v": 3.0}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$unwind": "$result"},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": 3, "type": "int"}],
+        msg="$addToSet should keep int type when int32 is inserted before double",
+    ),
+    AccumulatorTestCase(
+        "type_pres_all_four_types",
+        docs=[
+            {"v": 1},
+            {"v": Int64(1)},
+            {"v": 1.0},
+            {"v": Decimal128("1")},
+        ],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$unwind": "$result"},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": 1, "type": "int"}],
+        msg="$addToSet should keep int type when int32 is inserted first "
+        "among all four numeric types",
+    ),
+    AccumulatorTestCase(
+        "type_pres_decimal128_first",
+        docs=[
+            {"v": Decimal128("1")},
+            {"v": 1},
+            {"v": Int64(1)},
+            {"v": 1.0},
+        ],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$unwind": "$result"},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": Decimal128("1"), "type": "decimal"}],
+        msg="$addToSet should keep decimal type when Decimal128 is inserted "
+        "first among all four numeric types",
+    ),
+]
+
+# ---------------------------------------------------------------------------
+# Test function
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_TYPE_PRESERVATION_TESTS))
+def test_accumulator_addToSet_type_preservation(collection, test_case: AccumulatorTestCase):
+    """Test $addToSet numeric type preservation during deduplication."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertSuccess(result, test_case.expected, msg=test_case.msg)

From b2709cf6e7b5c5c5a3f4bcdb8c49d848df5c5eb2 Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Mon, 25 May 2026 15:36:58 -0700
Subject: [PATCH 09/13] Remove duplicates and rename tests

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../addToSet/test_accumulator_addToSet.py     | 44 +------------------
 .../test_accumulator_addToSet_dedup.py        |  7 ++-
 .../test_accumulator_addToSet_null_missing.py | 10 -----
 3 files changed, 7 insertions(+), 54 deletions(-)

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
index ea1f9123..9adfe706 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
@@ -64,7 +64,7 @@
         msg="$addToSet should collect null produced by $cond while excluding $$REMOVE",
     ),
     AccumulatorTestCase(
-        "remove_dedup",
+        "remove_with_duplicate_values",
         docs=[{"v": 5}, {"v": 5}, {"v": -1}, {"v": -2}],
         pipeline=[
             {
@@ -303,26 +303,6 @@
 
 # Property [Edge Cases]: accumulator-specific edge cases.
 ADDTOSET_EDGE_CASE_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "edge_single_null_doc",
-        docs=[{"v": None}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [None]}],
-        msg="$addToSet should return [null] for single null document",
-    ),
-    AccumulatorTestCase(
-        "edge_single_missing_doc",
-        docs=[{"x": 1}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": []}],
-        msg="$addToSet should return empty array for single document with missing field",
-    ),
     AccumulatorTestCase(
         "edge_many_unique",
         docs=[{"v": i} for i in range(100)],
@@ -344,7 +324,7 @@
         msg="$addToSet should deduplicate 100 docs down to 5 unique values",
     ),
     AccumulatorTestCase(
-        "edge_array_field_not_traversed",
+        "edge_array_not_unwound",
         docs=[{"v": [5, 1, 8]}],
         pipeline=[
             {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
@@ -353,16 +333,6 @@
         expected=[{"result": [[5, 1, 8]]}],
         msg="$addToSet should treat array field as a single element, not traverse it",
     ),
-    AccumulatorTestCase(
-        "edge_mixed_array_scalar",
-        docs=[{"v": 5}, {"v": [5]}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [5, [5]]}],
-        msg="$addToSet should distinguish scalar 5 from array [5]",
-    ),
     AccumulatorTestCase(
         "edge_binary_different_subtypes",
         docs=[{"v": Binary(b"\x00", 0)}, {"v": Binary(b"\x00", 5)}],
@@ -383,16 +353,6 @@
         expected=[{"result": [Regex("abc", "i"), Regex("abc", "m")]}],
         msg="$addToSet should treat Regex values with different flags as distinct",
     ),
-    AccumulatorTestCase(
-        "edge_expression_mixed_types",
-        docs=[{"v": 1}, {"v": "hello"}, {"v": True}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [1, "hello", True]}],
-        msg="$addToSet should collect mixed-type values from expression",
-    ),
 ]
 
 # ---------------------------------------------------------------------------
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py
index 2d75b818..e7ccc496 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_dedup.py
@@ -456,13 +456,16 @@
     ),
     AccumulatorTestCase(
         "decimal_34_digit_precision",
-        docs=[{"v": Decimal128("1.234567890123456789012345678901234")}],
+        docs=[
+            {"v": Decimal128("1.234567890123456789012345678901234")},
+            {"v": Decimal128("1.234567890123456789012345678901234")},
+        ],
         pipeline=[
             {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
             {"$project": {"_id": 0, "result": 1}},
         ],
         expected=[{"result": [Decimal128("1.234567890123456789012345678901234")]}],
-        msg="$addToSet should preserve full 34-digit Decimal128 precision",
+        msg="$addToSet should deduplicate and preserve full 34-digit Decimal128 precision",
     ),
     AccumulatorTestCase(
         "decimal_max_min_distinct",
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py
index d63f9526..42e5627d 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_null_missing.py
@@ -47,16 +47,6 @@
         expected=[{"result": [None, 5, 3]}],
         msg="$addToSet should collect null alongside other values",
     ),
-    AccumulatorTestCase(
-        "null_and_values_dedup",
-        docs=[{"v": 10}, {"v": None}, {"v": 5}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": [10, None, 5]}],
-        msg="$addToSet should collect null and distinct values without duplication",
-    ),
 ]
 
 # Property [Missing Excluded]: missing fields are excluded from the result.

From 6a1981c69995f28fbac0378a378de3738b8dcfa3 Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Mon, 25 May 2026 15:48:27 -0700
Subject: [PATCH 10/13] generate integration tests

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../test_accumulators_addToSet_integration.py | 265 ++++++++++++++++++
 1 file changed, 265 insertions(+)
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py
new file mode 100644
index 00000000..075831f0
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py
@@ -0,0 +1,265 @@
+"""Tests for $addToSet accumulator composed with sibling accumulators in the same $group."""
+
+from __future__ import annotations
+
+import pytest
+
+from documentdb_tests.compatibility.tests.core.operator.accumulators.utils.accumulator_test_case import (  # noqa: E501
+    AccumulatorTestCase,
+)
+from documentdb_tests.framework.assertions import assertResult
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+# ---------------------------------------------------------------------------
+# Property lists
+# ---------------------------------------------------------------------------
+
+# Property [AddToSet with Sum]: $addToSet collects unique values while $sum
+# computes the total independently in the same $group.
+ADDTOSET_WITH_SUM_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "addtoset_sum_basic",
+        docs=[
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 20},
+            {"cat": "a", "v": 10},
+        ],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "unique": {"$addToSet": "$v"},
+                    "total": {"$sum": "$v"},
+                }
+            }
+        ],
+        expected=[{"_id": "a", "unique": [10, 20], "total": 40}],
+        msg="$addToSet should collect unique values while $sum totals all values "
+        "including duplicates",
+    ),
+    AccumulatorTestCase(
+        "addtoset_sum_multiple_groups",
+        docs=[
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 10},
+            {"cat": "b", "v": 5},
+            {"cat": "b", "v": 15},
+        ],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "unique": {"$addToSet": "$v"},
+                    "total": {"$sum": "$v"},
+                }
+            }
+        ],
+        expected=[
+            {"_id": "a", "unique": [10], "total": 20},
+            {"_id": "b", "unique": [5, 15], "total": 20},
+        ],
+        msg="$addToSet and $sum should compute independently across " "multiple groups",
+    ),
+]
+
+# Property [AddToSet with Count]: $addToSet collects unique values while
+# $sum(1) counts all documents including those with duplicate values.
+ADDTOSET_WITH_COUNT_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "addtoset_count_dedup_vs_total",
+        docs=[
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 20},
+        ],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "unique": {"$addToSet": "$v"},
+                    "count": {"$sum": 1},
+                }
+            }
+        ],
+        expected=[{"_id": "a", "unique": [10, 20], "count": 3}],
+        msg="$addToSet should have 2 unique values while $sum(1) counts " "all 3 documents",
+    ),
+]
+
+# Property [AddToSet with Push]: $addToSet collects unique values while $push
+# collects all values including duplicates.
+ADDTOSET_WITH_PUSH_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "addtoset_push_dedup_vs_all",
+        docs=[
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 20},
+            {"cat": "a", "v": 10},
+        ],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "unique": {"$addToSet": "$v"},
+                    "all_vals": {"$push": "$v"},
+                }
+            },
+        ],
+        expected=[
+            {"_id": "a", "unique": [10, 20], "all_vals": [10, 10, 20]},
+        ],
+        msg="$addToSet should deduplicate while $push preserves all values",
+    ),
+]
+
+# Property [AddToSet with Min/Max]: $addToSet collects the full unique set
+# while $min/$max extract extremes independently.
+ADDTOSET_WITH_MIN_MAX_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "addtoset_min_max",
+        docs=[
+            {"cat": "a", "v": 30},
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 20},
+            {"cat": "a", "v": 10},
+        ],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "unique": {"$addToSet": "$v"},
+                    "lo": {"$min": "$v"},
+                    "hi": {"$max": "$v"},
+                }
+            }
+        ],
+        expected=[
+            {"_id": "a", "unique": [10, 20, 30], "lo": 10, "hi": 30},
+        ],
+        msg="$addToSet should collect all unique values while $min/$max " "extract extremes",
+    ),
+]
+
+# Property [AddToSet with Avg]: $addToSet collects unique values while $avg
+# computes the mean over all documents including duplicates.
+ADDTOSET_WITH_AVG_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "addtoset_avg_includes_duplicates",
+        docs=[
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 40},
+        ],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "unique": {"$addToSet": "$v"},
+                    "mean": {"$avg": "$v"},
+                }
+            }
+        ],
+        expected=[{"_id": "a", "unique": [10, 40], "mean": 20.0}],
+        msg="$addToSet should have 2 unique values while $avg computes "
+        "mean over all 3 docs (including duplicate)",
+    ),
+]
+
+# Property [AddToSet Null Handling vs Sum]: $addToSet collects null as a value
+# while $sum ignores null.
+ADDTOSET_NULL_VS_SUM_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "addtoset_null_collected_sum_ignores",
+        docs=[
+            {"cat": "a", "v": None},
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": None},
+        ],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "unique": {"$addToSet": "$v"},
+                    "total": {"$sum": "$v"},
+                }
+            }
+        ],
+        expected=[{"_id": "a", "unique": [None, 10], "total": 10}],
+        msg="$addToSet should collect null as a value while $sum ignores "
+        "null and totals only numeric values",
+    ),
+]
+
+# Property [Multiple AddToSet]: multiple $addToSet accumulators in the same
+# $group independently collect unique values from different fields.
+MULTIPLE_ADDTOSET_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "multiple_addtoset_different_fields",
+        docs=[
+            {"cat": "a", "color": "red", "size": "S"},
+            {"cat": "a", "color": "blue", "size": "M"},
+            {"cat": "a", "color": "red", "size": "S"},
+        ],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "colors": {"$addToSet": "$color"},
+                    "sizes": {"$addToSet": "$size"},
+                }
+            }
+        ],
+        expected=[
+            {
+                "_id": "a",
+                "colors": ["red", "blue"],
+                "sizes": ["S", "M"],
+            },
+        ],
+        msg="Multiple $addToSet accumulators should independently collect "
+        "unique values from different fields",
+    ),
+]
+
+# ---------------------------------------------------------------------------
+# Aggregate
+# ---------------------------------------------------------------------------
+
+ADDTOSET_INTEGRATION_TESTS = (
+    ADDTOSET_WITH_SUM_TESTS
+    + ADDTOSET_WITH_COUNT_TESTS
+    + ADDTOSET_WITH_PUSH_TESTS
+    + ADDTOSET_WITH_MIN_MAX_TESTS
+    + ADDTOSET_WITH_AVG_TESTS
+    + ADDTOSET_NULL_VS_SUM_TESTS
+    + MULTIPLE_ADDTOSET_TESTS
+)
+
+# ---------------------------------------------------------------------------
+# Test function
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_INTEGRATION_TESTS))
+def test_accumulators_addToSet_integration(collection, test_case: AccumulatorTestCase):
+    """Test $addToSet accumulator composed with sibling accumulators."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {
+            "aggregate": collection.name,
+            "pipeline": test_case.pipeline or [],
+            "cursor": {},
+        },
+    )
+    assertResult(
+        result,
+        expected=test_case.expected,
+        error_code=test_case.error_code,
+        msg=test_case.msg,
+        ignore_doc_order=True,
+        ignore_order_in=["unique", "colors", "sizes"],
+    )

From 264eb7f9b56008ffcaa102392cfb13c86a8dee7f Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Mon, 25 May 2026 15:55:16 -0700
Subject: [PATCH 11/13] add more integration tests

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../test_accumulators_addToSet_integration.py | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py
index 075831f0..510f2260 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_addToSet_integration.py
@@ -192,6 +192,68 @@
     ),
 ]
 
+# Property [AddToSet with First/Last]: $addToSet collects all unique values
+# regardless of order while $first/$last pick positional values after $sort.
+ADDTOSET_WITH_FIRST_LAST_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "addtoset_first_last",
+        docs=[
+            {"cat": "a", "v": 30},
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 20},
+            {"cat": "a", "v": 10},
+        ],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "unique": {"$addToSet": "$v"},
+                    "first_v": {"$first": "$v"},
+                    "last_v": {"$last": "$v"},
+                }
+            },
+        ],
+        expected=[
+            {"_id": "a", "unique": [10, 20, 30], "first_v": 10, "last_v": 30},
+        ],
+        msg="$addToSet should collect all unique values while $first/$last "
+        "pick sorted positional extremes",
+    ),
+]
+
+# Property [AddToSet with MergeObjects]: $addToSet collects unique values
+# while $mergeObjects combines per-document metadata independently.
+ADDTOSET_WITH_MERGEOBJECTS_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "addtoset_mergeobjects",
+        docs=[
+            {"cat": "a", "v": 10, "meta": {"src": "x"}},
+            {"cat": "a", "v": 20, "meta": {"quality": "high"}},
+            {"cat": "a", "v": 10, "meta": {"reviewed": True}},
+        ],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "unique": {"$addToSet": "$v"},
+                    "merged": {"$mergeObjects": "$meta"},
+                }
+            },
+        ],
+        expected=[
+            {
+                "_id": "a",
+                "unique": [10, 20],
+                "merged": {"src": "x", "quality": "high", "reviewed": True},
+            }
+        ],
+        msg="$addToSet should deduplicate values while $mergeObjects "
+        "merges metadata from all documents including duplicates",
+    ),
+]
+
 # Property [Multiple AddToSet]: multiple $addToSet accumulators in the same
 # $group independently collect unique values from different fields.
 MULTIPLE_ADDTOSET_TESTS: list[AccumulatorTestCase] = [
@@ -234,6 +296,8 @@
     + ADDTOSET_WITH_MIN_MAX_TESTS
     + ADDTOSET_WITH_AVG_TESTS
     + ADDTOSET_NULL_VS_SUM_TESTS
+    + ADDTOSET_WITH_FIRST_LAST_TESTS
+    + ADDTOSET_WITH_MERGEOBJECTS_TESTS
     + MULTIPLE_ADDTOSET_TESTS
 )
 

From 6a923f880bd7fc9af5b06534b81071ec9bbb921e Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Tue, 26 May 2026 12:15:24 -0700
Subject: [PATCH 12/13] Rename smoke tests

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 ...lator_addToSet_smoke.py => test_smoke_accumulator_addToSet.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/{test_accumulator_addToSet_smoke.py => test_smoke_accumulator_addToSet.py} (100%)

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_smoke.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_smoke_accumulator_addToSet.py
similarity index 100%
rename from documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_smoke.py
rename to documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_smoke_accumulator_addToSet.py

From d2ada0efde9f34ae1f1e0b4c52c042663422046d Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Wed, 27 May 2026 15:58:03 -0700
Subject: [PATCH 13/13] address comments

Add tests: arity error tests, BSON constant tests, expression tests, order dependence tests. Removed  tests.

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../addToSet/test_accumulator_addToSet.py     | 300 +++++++++++++++++-
 .../test_accumulator_addToSet_bson_types.py   |  15 -
 .../test_accumulator_addToSet_errors.py       |  67 +++-
 3 files changed, 365 insertions(+), 17 deletions(-)

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
index 9adfe706..10c73c46 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet.py
@@ -2,8 +2,19 @@
 
 from __future__ import annotations
 
+from datetime import datetime, timezone
+
 import pytest
-from bson import Binary, Regex
+from bson import (
+    Binary,
+    Decimal128,
+    Int64,
+    MaxKey,
+    MinKey,
+    ObjectId,
+    Regex,
+    Timestamp,
+)
 
 from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
     AccumulatorTestCase,
@@ -355,6 +366,290 @@
     ),
 ]
 
+# ---------------------------------------------------------------------------
+# Property [BSON Constant Arguments]: $addToSet accepts BSON constants as the
+# accumulator argument. Since every doc yields the same constant, the result
+# set contains exactly one element.
+# ---------------------------------------------------------------------------
+ADDTOSET_BSON_CONSTANT_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "const_true",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": True}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [True]}],
+        msg="$addToSet with boolean True constant should return [True]",
+    ),
+    AccumulatorTestCase(
+        "const_false",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": False}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [False]}],
+        msg="$addToSet with boolean False constant should return [False]",
+    ),
+    AccumulatorTestCase(
+        "const_int64",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": Int64(42)}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Int64(42)]}],
+        msg="$addToSet with Int64 constant should return single-element set",
+    ),
+    AccumulatorTestCase(
+        "const_double",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": 3.14}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [3.14]}],
+        msg="$addToSet with double constant should return single-element set",
+    ),
+    AccumulatorTestCase(
+        "const_decimal128",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": Decimal128("3.14")}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Decimal128("3.14")]}],
+        msg="$addToSet with Decimal128 constant should return single-element set",
+    ),
+    AccumulatorTestCase(
+        "const_string",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "hello"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": ["hello"]}],
+        msg="$addToSet with string constant should return single-element set",
+    ),
+    AccumulatorTestCase(
+        "const_binary",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": Binary(b"\x01\x02")}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [b"\x01\x02"]}],
+        msg="$addToSet with Binary constant should return single-element set",
+    ),
+    AccumulatorTestCase(
+        "const_objectid",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$addToSet": ObjectId("000000000000000000000000")},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [ObjectId("000000000000000000000000")]}],
+        msg="$addToSet with ObjectId constant should return single-element set",
+    ),
+    AccumulatorTestCase(
+        "const_datetime",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$addToSet": datetime(2020, 1, 1, tzinfo=timezone.utc)},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [datetime(2020, 1, 1, tzinfo=timezone.utc)]}],
+        msg="$addToSet with datetime constant should return single-element set",
+    ),
+    AccumulatorTestCase(
+        "const_timestamp",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": Timestamp(1, 1)}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Timestamp(1, 1)]}],
+        msg="$addToSet with Timestamp constant should return single-element set",
+    ),
+    AccumulatorTestCase(
+        "const_regex",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": Regex("abc", "i")}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [Regex("abc", "i")]}],
+        msg="$addToSet with Regex constant should return single-element set",
+    ),
+    AccumulatorTestCase(
+        "const_null",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": None}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [None]}],
+        msg="$addToSet with null constant should return [null]",
+    ),
+    AccumulatorTestCase(
+        "const_minkey",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": MinKey()}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"": MinKey()}]}],
+        msg="$addToSet with MinKey constant should return MinKey wrapped in document",
+    ),
+    AccumulatorTestCase(
+        "const_maxkey",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": MaxKey()}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"": MaxKey()}]}],
+        msg="$addToSet with MaxKey constant should return MaxKey wrapped in document",
+    ),
+]
+
+# ---------------------------------------------------------------------------
+# Property [Expression Types]: $addToSet accepts various expression types as
+# its operand and evaluates them per document before collecting unique values.
+# ---------------------------------------------------------------------------
+ADDTOSET_EXPRESSION_TYPE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "expr_type_operator_single",
+        docs=[{"v": -10}, {"v": 20}, {"v": -5}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": {"$abs": "$v"}}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [10, 20, 5]}],
+        msg="$addToSet should accept single-input expression operator",
+    ),
+    AccumulatorTestCase(
+        "expr_type_operator_multi_arg",
+        docs=[{"v": -10, "w": 3}, {"v": 20, "w": 7}, {"v": -5, "w": 1}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$addToSet": {"$add": ["$v", "$w"]}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [-7, 27, -4]}],
+        msg="$addToSet should accept a multi-arg expression operator",
+    ),
+    AccumulatorTestCase(
+        "expr_type_nested",
+        docs=[{"v": -10}, {"v": 20}, {"v": -5}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$addToSet": {"$add": [1, {"$abs": "$v"}]}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [11, 21, 6]}],
+        msg="$addToSet should accept nested expression operators",
+    ),
+    AccumulatorTestCase(
+        "expr_type_sysvar_remove",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": "$$REMOVE"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": []}],
+        msg="$addToSet with $$REMOVE should exclude all values and return empty array",
+    ),
+    AccumulatorTestCase(
+        "expr_type_object_expression",
+        docs=[{"v": 10}, {"v": 20}, {"v": 5}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": {"a": "$v"}}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": 10}, {"a": 20}, {"a": 5}]}],
+        msg="$addToSet should accept an object expression",
+    ),
+    AccumulatorTestCase(
+        "expr_type_object_with_operator",
+        docs=[{"v": -10}, {"v": 20}, {"v": -5}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$addToSet": {"a": {"$abs": "$v"}}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [{"a": 10}, {"a": 20}, {"a": 5}]}],
+        msg="$addToSet should accept an object expression containing an operator",
+    ),
+    AccumulatorTestCase(
+        "expr_type_let",
+        docs=[{"v": 10}, {"v": 20}, {"v": 5}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$addToSet": {"$let": {"vars": {"x": "$v"}, "in": "$$x"}}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [10, 20, 5]}],
+        msg="$addToSet should accept a $let expression as its operand",
+    ),
+]
+
+# ---------------------------------------------------------------------------
+# Property [Order Independence]: $addToSet produces the same set regardless
+# of input order.
+# ---------------------------------------------------------------------------
+ADDTOSET_ORDER_INDEPENDENCE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "order_independent_asc",
+        docs=[{"v": 3}, {"v": 1}, {"v": 5}, {"v": 2}, {"v": 4}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [1, 2, 3, 4, 5]}],
+        msg="$addToSet with ascending sort should produce same set",
+    ),
+    AccumulatorTestCase(
+        "order_independent_desc",
+        docs=[{"v": 3}, {"v": 1}, {"v": 5}, {"v": 2}, {"v": 4}],
+        pipeline=[
+            {"$sort": {"v": -1}},
+            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": [1, 2, 3, 4, 5]}],
+        msg="$addToSet with descending sort should produce same set",
+    ),
+]
+
 # ---------------------------------------------------------------------------
 # Aggregate
 # ---------------------------------------------------------------------------
@@ -368,6 +663,9 @@
     + ADDTOSET_GROUPING_TESTS
     + ADDTOSET_EMPTY_TESTS
     + ADDTOSET_EDGE_CASE_TESTS
+    + ADDTOSET_BSON_CONSTANT_TESTS
+    + ADDTOSET_EXPRESSION_TYPE_TESTS
+    + ADDTOSET_ORDER_INDEPENDENCE_TESTS
 )
 
 # ---------------------------------------------------------------------------
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py
index e319caf0..644c8f4f 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_bson_types.py
@@ -7,7 +7,6 @@
 import pytest
 from bson import (
     Binary,
-    Code,
     Decimal128,
     Int64,
     MaxKey,
@@ -157,20 +156,6 @@
         expected=[{"result": [Regex("abc"), Regex("def")]}],
         msg="$addToSet should collect and deduplicate Regex values",
     ),
-    AccumulatorTestCase(
-        "bson_code",
-        docs=[
-            {"v": Code("function(){}")},
-            {"v": Code("function(){return 1}")},
-            {"v": Code("function(){}")},
-        ],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$addToSet": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": ["function(){}", "function(){return 1}"]}],
-        msg="$addToSet should collect and deduplicate Code values",
-    ),
     AccumulatorTestCase(
         "bson_timestamp",
         docs=[
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py
index 22208c10..67ba0730 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/addToSet/test_accumulator_addToSet_errors.py
@@ -11,6 +11,8 @@
 from documentdb_tests.framework.error_codes import (
     CONVERSION_FAILURE_ERROR,
     DIVIDE_BY_ZERO_V2_ERROR,
+    EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
+    GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
     MODULO_BY_ZERO_V2_ERROR,
 )
 from documentdb_tests.framework.executor import execute_command
@@ -20,6 +22,66 @@
 # Property lists
 # ---------------------------------------------------------------------------
 
+# Property [Arity]: $addToSet in accumulator context is a unary operator and
+# rejects array syntax.
+ADDTOSET_ARITY_ERROR_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "arity_empty_array",
+        docs=[{"v": 1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": []}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$addToSet should reject empty array in accumulator context",
+    ),
+    AccumulatorTestCase(
+        "arity_single_element_array",
+        docs=[{"v": 1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": [1]}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$addToSet should reject single-element array in accumulator context",
+    ),
+    AccumulatorTestCase(
+        "arity_single_field_ref_array",
+        docs=[{"v": 1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": ["$v"]}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$addToSet should reject single field ref in array in accumulator context",
+    ),
+    AccumulatorTestCase(
+        "arity_multi_element_array",
+        docs=[{"v": 1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$addToSet": [1, 2, 3]}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$addToSet should reject multi-element array in accumulator context",
+    ),
+    AccumulatorTestCase(
+        "arity_multi_key_expression_object",
+        docs=[{"v": 1}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$addToSet": {"$add": [1, 2], "$multiply": [3, 4]}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
+        msg="$addToSet should reject multi-key expression object",
+    ),
+]
+
 # Property [Expression Error Propagation]: errors from sub-expressions propagate.
 ADDTOSET_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
@@ -50,7 +112,10 @@
 # ---------------------------------------------------------------------------
 
 
-@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_EXPRESSION_ERROR_TESTS))
+ADDTOSET_ERROR_TESTS = ADDTOSET_ARITY_ERROR_TESTS + ADDTOSET_EXPRESSION_ERROR_TESTS
+
+
+@pytest.mark.parametrize("test_case", pytest_params(ADDTOSET_ERROR_TESTS))
 def test_accumulator_addToSet_errors(collection, test_case):
     """Test $addToSet accumulator error cases with $group."""
     if test_case.docs: