From ce426a8f7476b8e68544ce861e101620ff1e8203 Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Fri, 15 May 2026 15:18:53 -0700
Subject: [PATCH 01/10]  generated tests

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../first/test_accumulator_first.py           | 1205 +++++++++++++++++
 1 file changed, 1205 insertions(+)
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py
new file mode 100644
index 00000000..3da60355
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py
@@ -0,0 +1,1205 @@
+"""Tests for $first accumulator in $group, $bucket, and $bucketAuto contexts."""
+
+from __future__ import annotations
+
+import math
+from datetime import datetime, timezone
+from typing import Any
+
+import pytest
+from bson import (
+    Binary,
+    Code,
+    Decimal128,
+    Int64,
+    MaxKey,
+    MinKey,
+    ObjectId,
+    Regex,
+    Timestamp,
+)
+
+from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
+    AccumulatorTestCase,
+)
+from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess
+from documentdb_tests.framework.error_codes import (
+    BAD_VALUE_ERROR,
+    CONVERSION_FAILURE_ERROR,
+    DIVIDE_BY_ZERO_V2_ERROR,
+    EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
+    GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+    MODULO_BY_ZERO_V2_ERROR,
+    MODULO_ZERO_REMAINDER_ERROR,
+)
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+from documentdb_tests.framework.test_constants import (
+    DECIMAL128_INFINITY,
+    DECIMAL128_LARGE_EXPONENT,
+    DECIMAL128_MIN_POSITIVE,
+    DECIMAL128_NAN,
+    DECIMAL128_NEGATIVE_INFINITY,
+    DECIMAL128_NEGATIVE_NAN,
+    DECIMAL128_NEGATIVE_ZERO,
+    DECIMAL128_ZERO,
+    DOUBLE_NEGATIVE_ZERO,
+    DOUBLE_ZERO,
+    FLOAT_INFINITY,
+    FLOAT_NAN,
+    FLOAT_NEGATIVE_INFINITY,
+    FLOAT_NEGATIVE_NAN,
+)
+
+# ===========================================================================
+# Pipeline Helpers
+# ===========================================================================
+
+
+def _group_first(accumulator: Any) -> list[dict[str, Any]]:
+    """Build a $group pipeline that computes $first."""
+    return [
+        {"$group": {"_id": None, "result": {"$first": accumulator}}},
+        {"$project": {"_id": 0, "result": 1}},
+    ]
+
+
+def _bucket_first(accumulator: Any) -> list[dict[str, Any]]:
+    """Build a $bucket pipeline that computes $first."""
+    return [
+        {
+            "$bucket": {
+                "groupBy": {"$literal": 0},
+                "boundaries": [-1, 1],
+                "output": {"result": {"$first": accumulator}},
+            }
+        },
+        {"$project": {"_id": 0, "result": 1}},
+    ]
+
+
+def _bucket_auto_first(accumulator: Any) -> list[dict[str, Any]]:
+    """Build a $bucketAuto pipeline that computes $first."""
+    return [
+        {
+            "$bucketAuto": {
+                "groupBy": {"$literal": 0},
+                "buckets": 1,
+                "output": {"result": {"$first": accumulator}},
+            }
+        },
+        {"$project": {"_id": 0, "result": 1}},
+    ]
+
+
+def _group_first_with_type(accumulator: Any) -> list[dict[str, Any]]:
+    """Build a $group pipeline that computes $first with $type projection."""
+    return [
+        {"$group": {"_id": None, "result": {"$first": accumulator}}},
+        {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+    ]
+
+
+def _run(collection, test_case: AccumulatorTestCase):
+    """Insert docs and run the test case pipeline."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    return execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+
+
+# ===========================================================================
+# 1. Null and Missing Handling ($group primary)
+# ===========================================================================
+
+# Property [Null and Missing NOT Excluded]: $first returns whatever the first
+# document has. Unlike $min/$max, null and missing are NOT excluded -- they
+# are returned as the result if they are the first value.
+FIRST_NULL_MISSING_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "null_first_then_value",
+        docs=[{"v": None}, {"v": 5}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": None}],
+        msg="$first should return null when first doc has null (first wins)",
+    ),
+    AccumulatorTestCase(
+        "null_missing_first_then_value",
+        docs=[{"x": 1}, {"v": 5}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": None}],
+        msg="$first should return null when first doc has missing field",
+    ),
+    AccumulatorTestCase(
+        "null_value_first_then_null",
+        docs=[{"v": 5}, {"v": None}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": 5}],
+        msg="$first should return 5 when first doc has value, second is null",
+    ),
+    AccumulatorTestCase(
+        "null_value_first_then_missing",
+        docs=[{"v": 5}, {"x": 1}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": 5}],
+        msg="$first should return 5 when first doc has value, second is missing",
+    ),
+    AccumulatorTestCase(
+        "null_all",
+        docs=[{"v": None}, {"v": None}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": None}],
+        msg="$first should return null when all docs have null",
+    ),
+    AccumulatorTestCase(
+        "null_missing_all",
+        docs=[{"x": 1}, {"x": 2}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": None}],
+        msg="$first should return null when all docs have missing field",
+    ),
+    AccumulatorTestCase(
+        "null_and_missing_mixed",
+        docs=[{"v": None}, {"x": 1}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": None}],
+        msg="$first should return null when first is null and second is missing",
+    ),
+    AccumulatorTestCase(
+        "null_remove_first_then_value",
+        docs=[{"v": -1}, {"v": 5}],
+        pipeline=_group_first({"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}),
+        expected=[{"result": None}],
+        msg="$first should return null when first doc produces $$REMOVE",
+    ),
+    AccumulatorTestCase(
+        "null_remove_all",
+        docs=[{"v": -1}, {"v": -2}],
+        pipeline=_group_first({"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}),
+        expected=[{"result": None}],
+        msg="$first should return null when all docs produce $$REMOVE",
+    ),
+    AccumulatorTestCase(
+        "null_remove_second_value_first",
+        docs=[{"v": 5}, {"v": -1}],
+        pipeline=_group_first({"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}),
+        expected=[{"result": 5}],
+        msg="$first should return value when first doc has value, second $$REMOVE",
+    ),
+]
+
+
+# ===========================================================================
+# 2. BSON Type Preservation ($group primary)
+# ===========================================================================
+
+# Property [BSON Type Preservation]: $first returns the first document's value
+# with its BSON type preserved exactly. No coercion, no comparison, no type
+# promotion.
+FIRST_BSON_TYPE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "type_int32",
+        docs=[{"v": 42}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": 42}],
+        msg="$first should preserve int32 type",
+    ),
+    AccumulatorTestCase(
+        "type_int64",
+        docs=[{"v": Int64(42)}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": Int64(42)}],
+        msg="$first should preserve Int64 type",
+    ),
+    AccumulatorTestCase(
+        "type_double",
+        docs=[{"v": 3.14}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": 3.14}],
+        msg="$first should preserve double type",
+    ),
+    AccumulatorTestCase(
+        "type_decimal128",
+        docs=[{"v": Decimal128("3.14")}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": Decimal128("3.14")}],
+        msg="$first should preserve Decimal128 type",
+    ),
+    AccumulatorTestCase(
+        "type_string",
+        docs=[{"v": "hello"}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": "hello"}],
+        msg="$first should preserve string type",
+    ),
+    AccumulatorTestCase(
+        "type_bool_true",
+        docs=[{"v": True}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": True}],
+        msg="$first should preserve boolean True",
+    ),
+    AccumulatorTestCase(
+        "type_bool_false",
+        docs=[{"v": False}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": False}],
+        msg="$first should preserve boolean False",
+    ),
+    AccumulatorTestCase(
+        "type_null",
+        docs=[{"v": None}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": None}],
+        msg="$first should preserve null value",
+    ),
+    AccumulatorTestCase(
+        "type_embedded_doc",
+        docs=[{"v": {"a": 1, "b": 2}}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": {"a": 1, "b": 2}}],
+        msg="$first should preserve embedded document",
+    ),
+    AccumulatorTestCase(
+        "type_empty_doc",
+        docs=[{"v": {}}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": {}}],
+        msg="$first should preserve empty document",
+    ),
+    AccumulatorTestCase(
+        "type_array",
+        docs=[{"v": [1, 2, 3]}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": [1, 2, 3]}],
+        msg="$first should preserve array value",
+    ),
+    AccumulatorTestCase(
+        "type_empty_array",
+        docs=[{"v": []}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": []}],
+        msg="$first should preserve empty array",
+    ),
+    AccumulatorTestCase(
+        "type_binary",
+        docs=[{"v": Binary(b"\x01\x02")}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": b"\x01\x02"}],
+        msg="$first should preserve Binary value",
+    ),
+    AccumulatorTestCase(
+        "type_binary_custom_subtype",
+        docs=[{"v": Binary(b"\x01", 5)}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": Binary(b"\x01", 5)}],
+        msg="$first should preserve Binary with custom subtype",
+    ),
+    AccumulatorTestCase(
+        "type_objectid",
+        docs=[{"v": ObjectId("000000000000000000000001")}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": ObjectId("000000000000000000000001")}],
+        msg="$first should preserve ObjectId value",
+    ),
+    AccumulatorTestCase(
+        "type_datetime",
+        docs=[{"v": datetime(2023, 6, 15, tzinfo=timezone.utc)}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": datetime(2023, 6, 15, tzinfo=timezone.utc)}],
+        msg="$first should preserve datetime value",
+    ),
+    AccumulatorTestCase(
+        "type_timestamp",
+        docs=[{"v": Timestamp(100, 1)}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": Timestamp(100, 1)}],
+        msg="$first should preserve Timestamp value",
+    ),
+    AccumulatorTestCase(
+        "type_regex",
+        docs=[{"v": Regex("abc", "i")}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": Regex("abc", "i")}],
+        msg="$first should preserve Regex value",
+    ),
+]
+
+
+# ===========================================================================
+# 3. Special Numeric Value Preservation ($group primary)
+# ===========================================================================
+
+# Property [Special Numeric Preservation]: $first passes through values
+# without comparison or reduction. Special numeric values must be preserved
+# exactly as stored in the first document.
+FIRST_SPECIAL_NUMERIC_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "special_float_nan",
+        docs=[{"v": FLOAT_NAN}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": pytest.approx(math.nan, nan_ok=True)}],
+        msg="$first should preserve float NaN",
+    ),
+    AccumulatorTestCase(
+        "special_float_neg_zero",
+        docs=[{"v": DOUBLE_NEGATIVE_ZERO}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": DOUBLE_NEGATIVE_ZERO}],
+        msg="$first should preserve double -0.0",
+    ),
+    AccumulatorTestCase(
+        "special_float_inf",
+        docs=[{"v": FLOAT_INFINITY}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": FLOAT_INFINITY}],
+        msg="$first should preserve float Infinity",
+    ),
+    AccumulatorTestCase(
+        "special_float_neg_inf",
+        docs=[{"v": FLOAT_NEGATIVE_INFINITY}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": FLOAT_NEGATIVE_INFINITY}],
+        msg="$first should preserve float -Infinity",
+    ),
+    AccumulatorTestCase(
+        "special_decimal_nan",
+        docs=[{"v": DECIMAL128_NAN}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": DECIMAL128_NAN}],
+        msg="$first should preserve Decimal128 NaN",
+    ),
+    AccumulatorTestCase(
+        "special_decimal_neg_nan",
+        docs=[{"v": DECIMAL128_NEGATIVE_NAN}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": DECIMAL128_NEGATIVE_NAN}],
+        msg="$first should preserve Decimal128 -NaN",
+    ),
+    AccumulatorTestCase(
+        "special_decimal_neg_zero",
+        docs=[{"v": DECIMAL128_NEGATIVE_ZERO}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": DECIMAL128_NEGATIVE_ZERO}],
+        msg="$first should preserve Decimal128 -0",
+    ),
+    AccumulatorTestCase(
+        "special_decimal_inf",
+        docs=[{"v": DECIMAL128_INFINITY}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": DECIMAL128_INFINITY}],
+        msg="$first should preserve Decimal128 Infinity",
+    ),
+    AccumulatorTestCase(
+        "special_decimal_neg_inf",
+        docs=[{"v": DECIMAL128_NEGATIVE_INFINITY}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": DECIMAL128_NEGATIVE_INFINITY}],
+        msg="$first should preserve Decimal128 -Infinity",
+    ),
+]
+
+
+# ===========================================================================
+# 4. Decimal128 Precision Preservation ($group primary)
+# ===========================================================================
+
+# Property [Decimal128 Precision]: $first must pass through Decimal128 values
+# without modifying precision, trailing zeros, or exponent representation.
+FIRST_DECIMAL_PRECISION_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "decimal_high_precision",
+        docs=[{"v": Decimal128("1.234567890123456789012345678901234")}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": Decimal128("1.234567890123456789012345678901234")}],
+        msg="$first should preserve 34-digit Decimal128 precision",
+    ),
+    AccumulatorTestCase(
+        "decimal_trailing_zeros",
+        docs=[{"v": Decimal128("1.00")}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": Decimal128("1.00")}],
+        msg="$first should preserve trailing zeros in Decimal128",
+    ),
+    AccumulatorTestCase(
+        "decimal_large_exponent",
+        docs=[{"v": DECIMAL128_LARGE_EXPONENT}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": DECIMAL128_LARGE_EXPONENT}],
+        msg="$first should preserve Decimal128 with large exponent",
+    ),
+    AccumulatorTestCase(
+        "decimal_small_positive",
+        docs=[{"v": DECIMAL128_MIN_POSITIVE}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": DECIMAL128_MIN_POSITIVE}],
+        msg="$first should preserve smallest positive Decimal128",
+    ),
+    AccumulatorTestCase(
+        "decimal_zero",
+        docs=[{"v": DECIMAL128_ZERO}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": DECIMAL128_ZERO}],
+        msg="$first should preserve Decimal128 zero",
+    ),
+    AccumulatorTestCase(
+        "decimal_negative_zero",
+        docs=[{"v": DECIMAL128_NEGATIVE_ZERO}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": DECIMAL128_NEGATIVE_ZERO}],
+        msg="$first should preserve Decimal128 negative zero",
+    ),
+]
+
+
+# ===========================================================================
+# 5. BSON Type Distinction (No Coercion) ($group primary)
+# ===========================================================================
+
+# Property [No Coercion]: $first preserves BSON type distinctions. Values
+# that look similar but are different BSON types are NOT coerced.
+FIRST_TYPE_DISTINCTION_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "distinct_false_not_zero",
+        docs=[{"v": False}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": False}],
+        msg="$first should return False, not coerce to 0",
+    ),
+    AccumulatorTestCase(
+        "distinct_true_not_one",
+        docs=[{"v": True}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": True}],
+        msg="$first should return True, not coerce to 1",
+    ),
+    AccumulatorTestCase(
+        "distinct_zero_not_false",
+        docs=[{"v": 0}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": 0}],
+        msg="$first should return int32(0), not coerce to False",
+    ),
+    AccumulatorTestCase(
+        "distinct_empty_string",
+        docs=[{"v": ""}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": ""}],
+        msg="$first should return empty string, not coerce to null",
+    ),
+    AccumulatorTestCase(
+        "distinct_string_number",
+        docs=[{"v": "123"}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": "123"}],
+        msg="$first should return string '123', not coerce to int",
+    ),
+]
+
+
+# ===========================================================================
+# 6. Mixed Type Documents ($group primary)
+# ===========================================================================
+
+# Property [Position-Based]: $first picks the first document's value
+# regardless of what other documents contain. Unlike $min/$max, there is no
+# type comparison across documents.
+FIRST_MIXED_TYPE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "mixed_int_then_string",
+        docs=[{"v": 42}, {"v": "hello"}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": 42}],
+        msg="$first should return int when first doc is int, second is string",
+    ),
+    AccumulatorTestCase(
+        "mixed_string_then_int",
+        docs=[{"v": "hello"}, {"v": 42}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": "hello"}],
+        msg="$first should return string when first doc is string, second is int",
+    ),
+    AccumulatorTestCase(
+        "mixed_bool_then_number",
+        docs=[{"v": True}, {"v": 42}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": True}],
+        msg="$first should return True when first doc is bool, second is int",
+    ),
+    AccumulatorTestCase(
+        "mixed_array_then_scalar",
+        docs=[{"v": [1, 2, 3]}, {"v": 42}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": [1, 2, 3]}],
+        msg="$first should return array when first doc is array, second is scalar",
+    ),
+    AccumulatorTestCase(
+        "mixed_null_then_value",
+        docs=[{"v": None}, {"v": 5}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": None}],
+        msg="$first should return null when first doc is null, second has value",
+    ),
+    AccumulatorTestCase(
+        "mixed_value_then_null",
+        docs=[{"v": 5}, {"v": None}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": 5}],
+        msg="$first should return value when first doc has value, second is null",
+    ),
+]
+
+
+# ===========================================================================
+# 7. Return Type Verification ($group primary)
+# ===========================================================================
+
+# Property [Return Type]: $first preserves the BSON type of the returned
+# value. Verified using $type in a subsequent $project stage.
+FIRST_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "return_type_int32",
+        docs=[{"v": 42}, {"v": 999}],
+        pipeline=_group_first_with_type("$v"),
+        expected=[{"value": 42, "type": "int"}],
+        msg="$first of int32 should return type 'int'",
+    ),
+    AccumulatorTestCase(
+        "return_type_int64",
+        docs=[{"v": Int64(42)}, {"v": 999}],
+        pipeline=_group_first_with_type("$v"),
+        expected=[{"value": Int64(42), "type": "long"}],
+        msg="$first of Int64 should return type 'long'",
+    ),
+    AccumulatorTestCase(
+        "return_type_double",
+        docs=[{"v": 3.14}, {"v": 999}],
+        pipeline=_group_first_with_type("$v"),
+        expected=[{"value": 3.14, "type": "double"}],
+        msg="$first of double should return type 'double'",
+    ),
+    AccumulatorTestCase(
+        "return_type_decimal",
+        docs=[{"v": Decimal128("3.14")}, {"v": 999}],
+        pipeline=_group_first_with_type("$v"),
+        expected=[{"value": Decimal128("3.14"), "type": "decimal"}],
+        msg="$first of Decimal128 should return type 'decimal'",
+    ),
+    AccumulatorTestCase(
+        "return_type_string",
+        docs=[{"v": "hello"}, {"v": 999}],
+        pipeline=_group_first_with_type("$v"),
+        expected=[{"value": "hello", "type": "string"}],
+        msg="$first of string should return type 'string'",
+    ),
+    AccumulatorTestCase(
+        "return_type_boolean",
+        docs=[{"v": True}, {"v": 999}],
+        pipeline=_group_first_with_type("$v"),
+        expected=[{"value": True, "type": "bool"}],
+        msg="$first of boolean should return type 'bool'",
+    ),
+    AccumulatorTestCase(
+        "return_type_date",
+        docs=[{"v": datetime(2023, 6, 15, tzinfo=timezone.utc)}, {"v": 999}],
+        pipeline=_group_first_with_type("$v"),
+        expected=[{"value": datetime(2023, 6, 15, tzinfo=timezone.utc), "type": "date"}],
+        msg="$first of datetime should return type 'date'",
+    ),
+    AccumulatorTestCase(
+        "return_type_null",
+        docs=[{"v": None}, {"v": 999}],
+        pipeline=_group_first_with_type("$v"),
+        expected=[{"value": None, "type": "null"}],
+        msg="$first of null should return type 'null'",
+    ),
+]
+
+
+# ===========================================================================
+# 8. Expression Argument Tests ($group primary)
+# ===========================================================================
+
+# Property [Input Forms]: $first accumulator accepts various expression types
+# as its operand.
+FIRST_INPUT_FORM_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "input_field_path",
+        docs=[{"v": 10}, {"v": 20}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": 10}],
+        msg="$first should accept a basic field path reference",
+    ),
+    AccumulatorTestCase(
+        "input_nested_field",
+        docs=[{"a": {"b": 10}}, {"a": {"b": 20}}],
+        pipeline=_group_first("$a.b"),
+        expected=[{"result": 10}],
+        msg="$first should accept a nested document field path",
+    ),
+    AccumulatorTestCase(
+        "input_literal",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=_group_first(42),
+        expected=[{"result": 42}],
+        msg="$first with a literal constant should return that constant",
+    ),
+    AccumulatorTestCase(
+        "input_expression",
+        docs=[{"price": 10, "qty": 2}, {"price": 5, "qty": 10}],
+        pipeline=_group_first({"$multiply": ["$price", "$qty"]}),
+        expected=[{"result": 20}],
+        msg="$first should accept a computed expression as operand",
+    ),
+    AccumulatorTestCase(
+        "input_cond_remove",
+        docs=[{"v": -1}, {"v": 5}],
+        pipeline=_group_first({"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}),
+        expected=[{"result": None}],
+        msg="$first should accept conditional with $$REMOVE as operand",
+    ),
+    AccumulatorTestCase(
+        "input_null_literal",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=_group_first(None),
+        expected=[{"result": None}],
+        msg="$first with null literal should return null",
+    ),
+]
+
+
+# ===========================================================================
+# 9. Arity Rejection ($group primary)
+# ===========================================================================
+
+# Property [Arity]: $first in accumulator context is a unary operator and
+# rejects array syntax.
+FIRST_ARITY_GROUP_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "arity_empty_array_group",
+        docs=[{"v": 1}],
+        pipeline=_group_first([]),
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$first should reject empty array in accumulator context ($group)",
+    ),
+    AccumulatorTestCase(
+        "arity_single_element_group",
+        docs=[{"v": 1}],
+        pipeline=_group_first([1]),
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$first should reject single-element array in accumulator context ($group)",
+    ),
+    AccumulatorTestCase(
+        "arity_single_field_ref_group",
+        docs=[{"v": 1}],
+        pipeline=_group_first(["$v"]),
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$first should reject single field ref in array in accumulator context ($group)",
+    ),
+    AccumulatorTestCase(
+        "arity_multi_element_group",
+        docs=[{"v": 1}],
+        pipeline=_group_first([1, 2, 3]),
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$first should reject multi-element array in accumulator context ($group)",
+    ),
+    AccumulatorTestCase(
+        "arity_multi_key_expression_group",
+        docs=[{"v": 1}],
+        pipeline=_group_first({"$add": [1, 2], "$multiply": [3, 4]}),
+        error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
+        msg="$first should reject multi-key expression object ($group)",
+    ),
+]
+
+
+# ===========================================================================
+# 10. Expression Error Propagation ($group primary)
+# ===========================================================================
+
+# Property [Expression Error Propagation]: errors in sub-expressions used as
+# $first operand propagate as errors.
+FIRST_EXPRESSION_ERROR_GROUP_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "error_toInt_invalid_group",
+        docs=[{"v": "not_a_number"}],
+        pipeline=_group_first({"$toInt": "$v"}),
+        error_code=CONVERSION_FAILURE_ERROR,
+        msg="$first should propagate conversion error from $toInt sub-expression in $group",
+    ),
+    AccumulatorTestCase(
+        "error_divide_by_zero_group",
+        docs=[{"v": 10}],
+        pipeline=_group_first({"$divide": ["$v", 0]}),
+        error_code=DIVIDE_BY_ZERO_V2_ERROR,
+        msg="$first should propagate divide-by-zero error in $group",
+    ),
+    AccumulatorTestCase(
+        "error_mod_by_zero_group",
+        docs=[{"v": 10}],
+        pipeline=_group_first({"$mod": ["$v", 0]}),
+        error_code=MODULO_BY_ZERO_V2_ERROR,
+        msg="$first should propagate mod-by-zero error in $group",
+    ),
+]
+
+
+# ===========================================================================
+# 11. Accumulator-Specific Edge Cases ($group primary)
+# ===========================================================================
+
+# Property [Edge Cases]: edge cases unique to the accumulator context.
+FIRST_EDGE_CASE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "edge_single_doc",
+        docs=[{"v": 42}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": 42}],
+        msg="$first of a single document should return that document's value",
+    ),
+    AccumulatorTestCase(
+        "edge_single_null_doc",
+        docs=[{"v": None}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": None}],
+        msg="$first of a single null document should return null",
+    ),
+    AccumulatorTestCase(
+        "edge_single_missing_doc",
+        docs=[{"x": 1}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": None}],
+        msg="$first of a single document with missing field should return null",
+    ),
+    AccumulatorTestCase(
+        "edge_many_docs",
+        docs=[{"v": i} for i in range(100)],
+        pipeline=_group_first("$v"),
+        expected=[{"result": 0}],
+        msg="$first should return first document's value (v=0) across 100 documents",
+    ),
+    AccumulatorTestCase(
+        "edge_empty_collection",
+        docs=None,
+        pipeline=_group_first("$v"),
+        expected=[],
+        msg="$first on empty collection should return empty result",
+    ),
+    AccumulatorTestCase(
+        "edge_array_not_traversed",
+        docs=[{"v": [5, 1, 8]}, {"v": [3, 9, 2]}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": [5, 1, 8]}],
+        msg="$first should return array as whole value, not traverse it",
+    ),
+    AccumulatorTestCase(
+        "edge_literal_constant",
+        docs=[{"v": 1}, {"v": 2}, {"v": 3}],
+        pipeline=_group_first(42),
+        expected=[{"result": 42}],
+        msg="$first with literal constant should always return that constant",
+    ),
+]
+
+
+# ===========================================================================
+# Combine all $group primary success tests
+# ===========================================================================
+
+FIRST_GROUP_SUCCESS_TESTS = (
+    FIRST_NULL_MISSING_TESTS
+    + FIRST_BSON_TYPE_TESTS
+    + FIRST_SPECIAL_NUMERIC_TESTS
+    + FIRST_DECIMAL_PRECISION_TESTS
+    + FIRST_TYPE_DISTINCTION_TESTS
+    + FIRST_MIXED_TYPE_TESTS
+    + FIRST_RETURN_TYPE_TESTS
+    + FIRST_INPUT_FORM_TESTS
+    + FIRST_EDGE_CASE_TESTS
+)
+
+
+# ===========================================================================
+# $group primary test function
+# ===========================================================================
+
+
+@pytest.mark.parametrize("test_case", pytest_params(FIRST_GROUP_SUCCESS_TESTS))
+def test_accumulator_first_group(collection, test_case: AccumulatorTestCase):
+    """Test $first accumulator success cases via $group."""
+    result = _run(collection, test_case)
+    assertSuccess(result, test_case.expected, msg=test_case.msg)
+
+
+# ===========================================================================
+# 12a. $bucket Smoke Tests
+# ===========================================================================
+
+# Property [Bucket Stage Smoke]: $first produces correct results through
+# $bucket for representative cases.
+FIRST_BUCKET_SMOKE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "bucket_basic_numeric",
+        docs=[{"v": 10}, {"v": 20}, {"v": 30}],
+        pipeline=_bucket_first("$v"),
+        expected=[{"result": 10}],
+        msg="$first via $bucket should return first numeric value",
+    ),
+    AccumulatorTestCase(
+        "bucket_null_first",
+        docs=[{"v": None}, {"v": 5}],
+        pipeline=_bucket_first("$v"),
+        expected=[{"result": None}],
+        msg="$first via $bucket should return null when first doc is null",
+    ),
+    AccumulatorTestCase(
+        "bucket_missing_first",
+        docs=[{"x": 1}, {"v": 5}],
+        pipeline=_bucket_first("$v"),
+        expected=[{"result": None}],
+        msg="$first via $bucket should return null when first doc has missing field",
+    ),
+    AccumulatorTestCase(
+        "bucket_string_first",
+        docs=[{"v": "hello"}, {"v": "world"}],
+        pipeline=_bucket_first("$v"),
+        expected=[{"result": "hello"}],
+        msg="$first via $bucket should return first string value",
+    ),
+    AccumulatorTestCase(
+        "bucket_array_first",
+        docs=[{"v": [1, 2]}, {"v": [3, 4]}],
+        pipeline=_bucket_first("$v"),
+        expected=[{"result": [1, 2]}],
+        msg="$first via $bucket should return first array value",
+    ),
+    AccumulatorTestCase(
+        "bucket_single_doc",
+        docs=[{"v": 42}],
+        pipeline=_bucket_first("$v"),
+        expected=[{"result": 42}],
+        msg="$first via $bucket should handle single document",
+    ),
+    AccumulatorTestCase(
+        "bucket_nan_preserved",
+        docs=[{"v": FLOAT_NAN}, {"v": 5}],
+        pipeline=_bucket_first("$v"),
+        expected=[{"result": pytest.approx(math.nan, nan_ok=True)}],
+        msg="$first via $bucket should preserve NaN as first value",
+    ),
+]
+
+
+@pytest.mark.parametrize("test_case", pytest_params(FIRST_BUCKET_SMOKE_TESTS))
+def test_accumulator_first_bucket(collection, test_case: AccumulatorTestCase):
+    """Test $first accumulator via $bucket for representative cases."""
+    result = _run(collection, test_case)
+    assertSuccess(result, test_case.expected, msg=test_case.msg)
+
+
+# ===========================================================================
+# 12b. $bucketAuto Smoke Tests
+# ===========================================================================
+
+# Property [BucketAuto Stage Smoke]: $first produces correct results through
+# $bucketAuto for representative cases.
+FIRST_BUCKET_AUTO_SMOKE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "bucket_auto_basic_numeric",
+        docs=[{"v": 10}, {"v": 20}, {"v": 30}],
+        pipeline=_bucket_auto_first("$v"),
+        expected=[{"result": 10}],
+        msg="$first via $bucketAuto should return first numeric value",
+    ),
+    AccumulatorTestCase(
+        "bucket_auto_null_first",
+        docs=[{"v": None}, {"v": 5}],
+        pipeline=_bucket_auto_first("$v"),
+        expected=[{"result": None}],
+        msg="$first via $bucketAuto should return null when first doc is null",
+    ),
+    AccumulatorTestCase(
+        "bucket_auto_missing_first",
+        docs=[{"x": 1}, {"v": 5}],
+        pipeline=_bucket_auto_first("$v"),
+        expected=[{"result": None}],
+        msg="$first via $bucketAuto should return null when first doc has missing field",
+    ),
+    AccumulatorTestCase(
+        "bucket_auto_string_first",
+        docs=[{"v": "hello"}, {"v": "world"}],
+        pipeline=_bucket_auto_first("$v"),
+        expected=[{"result": "hello"}],
+        msg="$first via $bucketAuto should return first string value",
+    ),
+    AccumulatorTestCase(
+        "bucket_auto_array_first",
+        docs=[{"v": [1, 2]}, {"v": [3, 4]}],
+        pipeline=_bucket_auto_first("$v"),
+        expected=[{"result": [1, 2]}],
+        msg="$first via $bucketAuto should return first array value",
+    ),
+    AccumulatorTestCase(
+        "bucket_auto_single_doc",
+        docs=[{"v": 42}],
+        pipeline=_bucket_auto_first("$v"),
+        expected=[{"result": 42}],
+        msg="$first via $bucketAuto should handle single document",
+    ),
+    AccumulatorTestCase(
+        "bucket_auto_nan_preserved",
+        docs=[{"v": FLOAT_NAN}, {"v": 5}],
+        pipeline=_bucket_auto_first("$v"),
+        expected=[{"result": pytest.approx(math.nan, nan_ok=True)}],
+        msg="$first via $bucketAuto should preserve NaN as first value",
+    ),
+]
+
+
+@pytest.mark.parametrize("test_case", pytest_params(FIRST_BUCKET_AUTO_SMOKE_TESTS))
+def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCase):
+    """Test $first accumulator via $bucketAuto for representative cases."""
+    result = _run(collection, test_case)
+    assertSuccess(result, test_case.expected, msg=test_case.msg)
+
+
+# ===========================================================================
+# 12c. Stage-Specific Behavior Tests (divergence between stages)
+# ===========================================================================
+
+# ---------------------------------------------------------------------------
+# 12c-i. BSON Type Serialization Divergence
+# ---------------------------------------------------------------------------
+
+# Property [Code Serialization Divergence]: Code without scope is returned as
+# str in $group/$bucket but as Code object in $bucketAuto.
+FIRST_CODE_GROUP_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "code_without_scope_group",
+        docs=[{"v": Code("abc")}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": "abc"}],
+        msg="$first should return Code without scope as str in $group",
+    ),
+]
+
+FIRST_CODE_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "code_without_scope_bucket_auto",
+        docs=[{"v": Code("abc")}, {"v": 999}],
+        pipeline=_bucket_auto_first("$v"),
+        expected=[{"result": Code("abc", None)}],
+        msg="$first should return Code without scope as Code object in $bucketAuto",
+    ),
+]
+
+# Property [MinKey Serialization Divergence]: MinKey is wrapped in a document
+# in $group/$bucket but returned directly in $bucketAuto.
+FIRST_MINKEY_GROUP_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "minkey_group",
+        docs=[{"v": MinKey()}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": {"": MinKey()}}],
+        msg="$first should return MinKey wrapped in dict in $group",
+    ),
+]
+
+FIRST_MINKEY_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "minkey_bucket_auto",
+        docs=[{"v": MinKey()}, {"v": 999}],
+        pipeline=_bucket_auto_first("$v"),
+        expected=[{"result": MinKey()}],
+        msg="$first should return MinKey directly in $bucketAuto",
+    ),
+]
+
+# Property [MaxKey Serialization Divergence]: MaxKey is wrapped in a document
+# in $group/$bucket but returned directly in $bucketAuto.
+FIRST_MAXKEY_GROUP_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "maxkey_group",
+        docs=[{"v": MaxKey()}, {"v": 999}],
+        pipeline=_group_first("$v"),
+        expected=[{"result": {"": MaxKey()}}],
+        msg="$first should return MaxKey wrapped in dict in $group",
+    ),
+]
+
+FIRST_MAXKEY_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "maxkey_bucket_auto",
+        docs=[{"v": MaxKey()}, {"v": 999}],
+        pipeline=_bucket_auto_first("$v"),
+        expected=[{"result": MaxKey()}],
+        msg="$first should return MaxKey directly in $bucketAuto",
+    ),
+]
+
+# ---------------------------------------------------------------------------
+# 12c-ii. Expression Error Code Divergence
+# ---------------------------------------------------------------------------
+
+# Property [Error Code Divergence]: $group/$bucket and $bucketAuto use
+# different error codes for divide-by-zero and mod-by-zero.
+FIRST_ERROR_BUCKET_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "error_toInt_invalid_bucket",
+        docs=[{"v": "not_a_number"}],
+        pipeline=_bucket_first({"$toInt": "$v"}),
+        error_code=CONVERSION_FAILURE_ERROR,
+        msg="$first should propagate conversion error from $toInt in $bucket",
+    ),
+    AccumulatorTestCase(
+        "error_divide_by_zero_bucket",
+        docs=[{"v": 10}],
+        pipeline=_bucket_first({"$divide": ["$v", 0]}),
+        error_code=DIVIDE_BY_ZERO_V2_ERROR,
+        msg="$first should propagate divide-by-zero error in $bucket",
+    ),
+    AccumulatorTestCase(
+        "error_mod_by_zero_bucket",
+        docs=[{"v": 10}],
+        pipeline=_bucket_first({"$mod": ["$v", 0]}),
+        error_code=MODULO_BY_ZERO_V2_ERROR,
+        msg="$first should propagate mod-by-zero error in $bucket",
+    ),
+]
+
+FIRST_ERROR_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "error_toInt_invalid_bucket_auto",
+        docs=[{"v": "not_a_number"}],
+        pipeline=_bucket_auto_first({"$toInt": "$v"}),
+        error_code=CONVERSION_FAILURE_ERROR,
+        msg="$first should propagate conversion error from $toInt in $bucketAuto",
+    ),
+    AccumulatorTestCase(
+        "error_divide_by_zero_bucket_auto",
+        docs=[{"v": 10}],
+        pipeline=_bucket_auto_first({"$divide": ["$v", 0]}),
+        error_code=BAD_VALUE_ERROR,
+        msg="$first should propagate divide-by-zero in $bucketAuto (wrapped as BAD_VALUE)",
+    ),
+    AccumulatorTestCase(
+        "error_mod_by_zero_bucket_auto",
+        docs=[{"v": 10}],
+        pipeline=_bucket_auto_first({"$mod": ["$v", 0]}),
+        error_code=MODULO_ZERO_REMAINDER_ERROR,
+        msg="$first should propagate mod-by-zero in $bucketAuto (wrapped as 16610)",
+    ),
+]
+
+# ---------------------------------------------------------------------------
+# 12c-iii. Arity Rejection Across Stages
+# ---------------------------------------------------------------------------
+
+# Property [Arity Across Stages]: arity rejection is consistent across all
+# three stages.
+FIRST_ARITY_BUCKET_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "arity_empty_array_bucket",
+        docs=[{"v": 1}],
+        pipeline=_bucket_first([]),
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$first should reject empty array in accumulator context ($bucket)",
+    ),
+    AccumulatorTestCase(
+        "arity_multi_element_bucket",
+        docs=[{"v": 1}],
+        pipeline=_bucket_first([1, 2, 3]),
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$first should reject multi-element array in accumulator context ($bucket)",
+    ),
+    AccumulatorTestCase(
+        "arity_multi_key_expression_bucket",
+        docs=[{"v": 1}],
+        pipeline=_bucket_first({"$add": [1, 2], "$multiply": [3, 4]}),
+        error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
+        msg="$first should reject multi-key expression object ($bucket)",
+    ),
+]
+
+FIRST_ARITY_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "arity_empty_array_bucket_auto",
+        docs=[{"v": 1}],
+        pipeline=_bucket_auto_first([]),
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$first should reject empty array in accumulator context ($bucketAuto)",
+    ),
+    AccumulatorTestCase(
+        "arity_multi_element_bucket_auto",
+        docs=[{"v": 1}],
+        pipeline=_bucket_auto_first([1, 2, 3]),
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$first should reject multi-element array in accumulator context ($bucketAuto)",
+    ),
+    AccumulatorTestCase(
+        "arity_multi_key_expression_bucket_auto",
+        docs=[{"v": 1}],
+        pipeline=_bucket_auto_first({"$add": [1, 2], "$multiply": [3, 4]}),
+        error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
+        msg="$first should reject multi-key expression object ($bucketAuto)",
+    ),
+]
+
+
+# ===========================================================================
+# Combine stage divergence success tests
+# ===========================================================================
+
+FIRST_STAGE_DIVERGENCE_TESTS = (
+    FIRST_CODE_GROUP_TESTS
+    + FIRST_CODE_BUCKET_AUTO_TESTS
+    + FIRST_MINKEY_GROUP_TESTS
+    + FIRST_MINKEY_BUCKET_AUTO_TESTS
+    + FIRST_MAXKEY_GROUP_TESTS
+    + FIRST_MAXKEY_BUCKET_AUTO_TESTS
+)
+
+
+@pytest.mark.parametrize("test_case", pytest_params(FIRST_STAGE_DIVERGENCE_TESTS))
+def test_accumulator_first_stage_divergence(collection, test_case: AccumulatorTestCase):
+    """Test $first cases where behavior differs between stages."""
+    result = _run(collection, test_case)
+    assertSuccess(result, test_case.expected, msg=test_case.msg)
+
+
+# ===========================================================================
+# Combine all error tests
+# ===========================================================================
+
+FIRST_EXPRESSION_ERROR_TESTS = (
+    FIRST_EXPRESSION_ERROR_GROUP_TESTS
+    + FIRST_ERROR_BUCKET_TESTS
+    + FIRST_ERROR_BUCKET_AUTO_TESTS
+)
+
+
+@pytest.mark.parametrize("test_case", pytest_params(FIRST_EXPRESSION_ERROR_TESTS))
+def test_accumulator_first_expression_errors(collection, test_case: AccumulatorTestCase):
+    """Test $first expression error propagation."""
+    result = _run(collection, test_case)
+    assertFailureCode(result, test_case.error_code, msg=test_case.msg)
+
+
+# ===========================================================================
+# Combine all arity error tests
+# ===========================================================================
+
+FIRST_ARITY_ERROR_TESTS = (
+    FIRST_ARITY_GROUP_TESTS
+    + FIRST_ARITY_BUCKET_TESTS
+    + FIRST_ARITY_BUCKET_AUTO_TESTS
+)
+
+
+@pytest.mark.parametrize("test_case", pytest_params(FIRST_ARITY_ERROR_TESTS))
+def test_accumulator_first_arity_errors(collection, test_case: AccumulatorTestCase):
+    """Test $first arity rejection across all three stages."""
+    result = _run(collection, test_case)
+    assertFailureCode(result, test_case.error_code, msg=test_case.msg)

From 26487d853405c28b1fcbaa814ce3a12d8dd1e251 Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Wed, 20 May 2026 16:19:56 -0700
Subject: [PATCH 02/10] Initial changes

Update generared files with initial change

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../first/test_accumulator_first.py           | 1218 ++++++++++-------
 1 file changed, 690 insertions(+), 528 deletions(-)

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py
index 3da60355..f4166f7d 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py
@@ -4,7 +4,6 @@
 
 import math
 from datetime import datetime, timezone
-from typing import Any
 
 import pytest
 from bson import (
@@ -44,770 +43,594 @@
     DECIMAL128_NEGATIVE_ZERO,
     DECIMAL128_ZERO,
     DOUBLE_NEGATIVE_ZERO,
-    DOUBLE_ZERO,
     FLOAT_INFINITY,
     FLOAT_NAN,
     FLOAT_NEGATIVE_INFINITY,
-    FLOAT_NEGATIVE_NAN,
 )
 
-# ===========================================================================
-# Pipeline Helpers
-# ===========================================================================
-
-
-def _group_first(accumulator: Any) -> list[dict[str, Any]]:
-    """Build a $group pipeline that computes $first."""
-    return [
-        {"$group": {"_id": None, "result": {"$first": accumulator}}},
-        {"$project": {"_id": 0, "result": 1}},
-    ]
-
-
-def _bucket_first(accumulator: Any) -> list[dict[str, Any]]:
-    """Build a $bucket pipeline that computes $first."""
-    return [
-        {
-            "$bucket": {
-                "groupBy": {"$literal": 0},
-                "boundaries": [-1, 1],
-                "output": {"result": {"$first": accumulator}},
-            }
-        },
-        {"$project": {"_id": 0, "result": 1}},
-    ]
-
-
-def _bucket_auto_first(accumulator: Any) -> list[dict[str, Any]]:
-    """Build a $bucketAuto pipeline that computes $first."""
-    return [
-        {
-            "$bucketAuto": {
-                "groupBy": {"$literal": 0},
-                "buckets": 1,
-                "output": {"result": {"$first": accumulator}},
-            }
-        },
-        {"$project": {"_id": 0, "result": 1}},
-    ]
-
-
-def _group_first_with_type(accumulator: Any) -> list[dict[str, Any]]:
-    """Build a $group pipeline that computes $first with $type projection."""
-    return [
-        {"$group": {"_id": None, "result": {"$first": accumulator}}},
-        {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
-    ]
-
-
-def _run(collection, test_case: AccumulatorTestCase):
-    """Insert docs and run the test case pipeline."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    return execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-
-
-# ===========================================================================
-# 1. Null and Missing Handling ($group primary)
-# ===========================================================================
-
-# Property [Null and Missing NOT Excluded]: $first returns whatever the first
-# document has. Unlike $min/$max, null and missing are NOT excluded -- they
-# are returned as the result if they are the first value.
+# Property [Null and Missing NOT Excluded]: $first returns whatever the
+# first document has, including null and missing values.
 FIRST_NULL_MISSING_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
         "null_first_then_value",
         docs=[{"v": None}, {"v": 5}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": None}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": None}],
         msg="$first should return null when first doc has null (first wins)",
     ),
     AccumulatorTestCase(
         "null_missing_first_then_value",
         docs=[{"x": 1}, {"v": 5}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": None}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": None}],
         msg="$first should return null when first doc has missing field",
     ),
     AccumulatorTestCase(
         "null_value_first_then_null",
         docs=[{"v": 5}, {"v": None}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": 5}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": 5}],
         msg="$first should return 5 when first doc has value, second is null",
     ),
     AccumulatorTestCase(
         "null_value_first_then_missing",
         docs=[{"v": 5}, {"x": 1}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": 5}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": 5}],
         msg="$first should return 5 when first doc has value, second is missing",
     ),
     AccumulatorTestCase(
         "null_all",
         docs=[{"v": None}, {"v": None}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": None}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": None}],
         msg="$first should return null when all docs have null",
     ),
     AccumulatorTestCase(
         "null_missing_all",
         docs=[{"x": 1}, {"x": 2}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": None}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": None}],
         msg="$first should return null when all docs have missing field",
     ),
     AccumulatorTestCase(
         "null_and_missing_mixed",
         docs=[{"v": None}, {"x": 1}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": None}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": None}],
         msg="$first should return null when first is null and second is missing",
     ),
     AccumulatorTestCase(
         "null_remove_first_then_value",
         docs=[{"v": -1}, {"v": 5}],
-        pipeline=_group_first({"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}),
-        expected=[{"result": None}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}},
+                }
+            }
+        ],
+        expected=[{"_id": None, "result": None}],
         msg="$first should return null when first doc produces $$REMOVE",
     ),
     AccumulatorTestCase(
         "null_remove_all",
         docs=[{"v": -1}, {"v": -2}],
-        pipeline=_group_first({"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}),
-        expected=[{"result": None}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}},
+                }
+            }
+        ],
+        expected=[{"_id": None, "result": None}],
         msg="$first should return null when all docs produce $$REMOVE",
     ),
     AccumulatorTestCase(
         "null_remove_second_value_first",
         docs=[{"v": 5}, {"v": -1}],
-        pipeline=_group_first({"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}),
-        expected=[{"result": 5}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}},
+                }
+            }
+        ],
+        expected=[{"_id": None, "result": 5}],
         msg="$first should return value when first doc has value, second $$REMOVE",
     ),
 ]
 
-
-# ===========================================================================
-# 2. BSON Type Preservation ($group primary)
-# ===========================================================================
-
-# Property [BSON Type Preservation]: $first returns the first document's value
-# with its BSON type preserved exactly. No coercion, no comparison, no type
-# promotion.
+# Property [BSON Type Preservation]: $first returns the first document's
+# value with its BSON type preserved exactly.
 FIRST_BSON_TYPE_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
         "type_int32",
         docs=[{"v": 42}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": 42}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": 42}],
         msg="$first should preserve int32 type",
     ),
     AccumulatorTestCase(
         "type_int64",
         docs=[{"v": Int64(42)}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": Int64(42)}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": Int64(42)}],
         msg="$first should preserve Int64 type",
     ),
     AccumulatorTestCase(
         "type_double",
         docs=[{"v": 3.14}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": 3.14}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": 3.14}],
         msg="$first should preserve double type",
     ),
     AccumulatorTestCase(
         "type_decimal128",
         docs=[{"v": Decimal128("3.14")}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": Decimal128("3.14")}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": Decimal128("3.14")}],
         msg="$first should preserve Decimal128 type",
     ),
     AccumulatorTestCase(
         "type_string",
         docs=[{"v": "hello"}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": "hello"}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": "hello"}],
         msg="$first should preserve string type",
     ),
     AccumulatorTestCase(
         "type_bool_true",
         docs=[{"v": True}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": True}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": True}],
         msg="$first should preserve boolean True",
     ),
     AccumulatorTestCase(
         "type_bool_false",
         docs=[{"v": False}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": False}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": False}],
         msg="$first should preserve boolean False",
     ),
     AccumulatorTestCase(
         "type_null",
         docs=[{"v": None}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": None}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": None}],
         msg="$first should preserve null value",
     ),
     AccumulatorTestCase(
         "type_embedded_doc",
         docs=[{"v": {"a": 1, "b": 2}}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": {"a": 1, "b": 2}}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": {"a": 1, "b": 2}}],
         msg="$first should preserve embedded document",
     ),
     AccumulatorTestCase(
         "type_empty_doc",
         docs=[{"v": {}}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": {}}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": {}}],
         msg="$first should preserve empty document",
     ),
     AccumulatorTestCase(
         "type_array",
         docs=[{"v": [1, 2, 3]}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": [1, 2, 3]}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": [1, 2, 3]}],
         msg="$first should preserve array value",
     ),
     AccumulatorTestCase(
         "type_empty_array",
         docs=[{"v": []}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": []}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": []}],
         msg="$first should preserve empty array",
     ),
     AccumulatorTestCase(
         "type_binary",
         docs=[{"v": Binary(b"\x01\x02")}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": b"\x01\x02"}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": b"\x01\x02"}],
         msg="$first should preserve Binary value",
     ),
     AccumulatorTestCase(
         "type_binary_custom_subtype",
         docs=[{"v": Binary(b"\x01", 5)}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": Binary(b"\x01", 5)}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": Binary(b"\x01", 5)}],
         msg="$first should preserve Binary with custom subtype",
     ),
     AccumulatorTestCase(
         "type_objectid",
         docs=[{"v": ObjectId("000000000000000000000001")}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": ObjectId("000000000000000000000001")}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": ObjectId("000000000000000000000001")}],
         msg="$first should preserve ObjectId value",
     ),
     AccumulatorTestCase(
         "type_datetime",
         docs=[{"v": datetime(2023, 6, 15, tzinfo=timezone.utc)}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": datetime(2023, 6, 15, tzinfo=timezone.utc)}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": datetime(2023, 6, 15, tzinfo=timezone.utc)}],
         msg="$first should preserve datetime value",
     ),
     AccumulatorTestCase(
         "type_timestamp",
         docs=[{"v": Timestamp(100, 1)}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": Timestamp(100, 1)}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": Timestamp(100, 1)}],
         msg="$first should preserve Timestamp value",
     ),
     AccumulatorTestCase(
         "type_regex",
         docs=[{"v": Regex("abc", "i")}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": Regex("abc", "i")}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": Regex("abc", "i")}],
         msg="$first should preserve Regex value",
     ),
 ]
 
-
-# ===========================================================================
-# 3. Special Numeric Value Preservation ($group primary)
-# ===========================================================================
-
-# Property [Special Numeric Preservation]: $first passes through values
-# without comparison or reduction. Special numeric values must be preserved
-# exactly as stored in the first document.
+# Property [Special Numeric Preservation]: $first passes through special
+# numeric values exactly as stored in the first document.
 FIRST_SPECIAL_NUMERIC_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
         "special_float_nan",
         docs=[{"v": FLOAT_NAN}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": pytest.approx(math.nan, nan_ok=True)}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": pytest.approx(math.nan, nan_ok=True)}],
         msg="$first should preserve float NaN",
     ),
     AccumulatorTestCase(
         "special_float_neg_zero",
         docs=[{"v": DOUBLE_NEGATIVE_ZERO}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": DOUBLE_NEGATIVE_ZERO}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": DOUBLE_NEGATIVE_ZERO}],
         msg="$first should preserve double -0.0",
     ),
     AccumulatorTestCase(
         "special_float_inf",
         docs=[{"v": FLOAT_INFINITY}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": FLOAT_INFINITY}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": FLOAT_INFINITY}],
         msg="$first should preserve float Infinity",
     ),
     AccumulatorTestCase(
         "special_float_neg_inf",
         docs=[{"v": FLOAT_NEGATIVE_INFINITY}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": FLOAT_NEGATIVE_INFINITY}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": FLOAT_NEGATIVE_INFINITY}],
         msg="$first should preserve float -Infinity",
     ),
     AccumulatorTestCase(
         "special_decimal_nan",
         docs=[{"v": DECIMAL128_NAN}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": DECIMAL128_NAN}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": DECIMAL128_NAN}],
         msg="$first should preserve Decimal128 NaN",
     ),
     AccumulatorTestCase(
         "special_decimal_neg_nan",
         docs=[{"v": DECIMAL128_NEGATIVE_NAN}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": DECIMAL128_NEGATIVE_NAN}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": DECIMAL128_NEGATIVE_NAN}],
         msg="$first should preserve Decimal128 -NaN",
     ),
     AccumulatorTestCase(
         "special_decimal_neg_zero",
         docs=[{"v": DECIMAL128_NEGATIVE_ZERO}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": DECIMAL128_NEGATIVE_ZERO}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": DECIMAL128_NEGATIVE_ZERO}],
         msg="$first should preserve Decimal128 -0",
     ),
     AccumulatorTestCase(
         "special_decimal_inf",
         docs=[{"v": DECIMAL128_INFINITY}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": DECIMAL128_INFINITY}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": DECIMAL128_INFINITY}],
         msg="$first should preserve Decimal128 Infinity",
     ),
     AccumulatorTestCase(
         "special_decimal_neg_inf",
         docs=[{"v": DECIMAL128_NEGATIVE_INFINITY}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": DECIMAL128_NEGATIVE_INFINITY}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": DECIMAL128_NEGATIVE_INFINITY}],
         msg="$first should preserve Decimal128 -Infinity",
     ),
 ]
 
-
-# ===========================================================================
-# 4. Decimal128 Precision Preservation ($group primary)
-# ===========================================================================
-
-# Property [Decimal128 Precision]: $first must pass through Decimal128 values
-# without modifying precision, trailing zeros, or exponent representation.
+# Property [Decimal128 Precision]: $first passes through Decimal128 values
+# without modifying precision, trailing zeros, or exponent.
 FIRST_DECIMAL_PRECISION_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
         "decimal_high_precision",
         docs=[{"v": Decimal128("1.234567890123456789012345678901234")}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": Decimal128("1.234567890123456789012345678901234")}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": Decimal128("1.234567890123456789012345678901234")}],
         msg="$first should preserve 34-digit Decimal128 precision",
     ),
     AccumulatorTestCase(
         "decimal_trailing_zeros",
         docs=[{"v": Decimal128("1.00")}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": Decimal128("1.00")}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": Decimal128("1.00")}],
         msg="$first should preserve trailing zeros in Decimal128",
     ),
     AccumulatorTestCase(
         "decimal_large_exponent",
         docs=[{"v": DECIMAL128_LARGE_EXPONENT}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": DECIMAL128_LARGE_EXPONENT}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": DECIMAL128_LARGE_EXPONENT}],
         msg="$first should preserve Decimal128 with large exponent",
     ),
     AccumulatorTestCase(
         "decimal_small_positive",
         docs=[{"v": DECIMAL128_MIN_POSITIVE}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": DECIMAL128_MIN_POSITIVE}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": DECIMAL128_MIN_POSITIVE}],
         msg="$first should preserve smallest positive Decimal128",
     ),
     AccumulatorTestCase(
         "decimal_zero",
         docs=[{"v": DECIMAL128_ZERO}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": DECIMAL128_ZERO}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": DECIMAL128_ZERO}],
         msg="$first should preserve Decimal128 zero",
     ),
-    AccumulatorTestCase(
-        "decimal_negative_zero",
-        docs=[{"v": DECIMAL128_NEGATIVE_ZERO}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": DECIMAL128_NEGATIVE_ZERO}],
-        msg="$first should preserve Decimal128 negative zero",
-    ),
 ]
 
-
-# ===========================================================================
-# 5. BSON Type Distinction (No Coercion) ($group primary)
-# ===========================================================================
-
-# Property [No Coercion]: $first preserves BSON type distinctions. Values
-# that look similar but are different BSON types are NOT coerced.
+# Property [No Coercion]: $first preserves BSON type distinctions without
+# coercing similar-looking values.
 FIRST_TYPE_DISTINCTION_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
         "distinct_false_not_zero",
         docs=[{"v": False}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": False}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": False}],
         msg="$first should return False, not coerce to 0",
     ),
     AccumulatorTestCase(
         "distinct_true_not_one",
         docs=[{"v": True}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": True}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": True}],
         msg="$first should return True, not coerce to 1",
     ),
     AccumulatorTestCase(
         "distinct_zero_not_false",
         docs=[{"v": 0}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": 0}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": 0}],
         msg="$first should return int32(0), not coerce to False",
     ),
     AccumulatorTestCase(
         "distinct_empty_string",
         docs=[{"v": ""}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": ""}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": ""}],
         msg="$first should return empty string, not coerce to null",
     ),
     AccumulatorTestCase(
         "distinct_string_number",
         docs=[{"v": "123"}, {"v": 999}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": "123"}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": "123"}],
         msg="$first should return string '123', not coerce to int",
     ),
 ]
 
-
-# ===========================================================================
-# 6. Mixed Type Documents ($group primary)
-# ===========================================================================
-
 # Property [Position-Based]: $first picks the first document's value
-# regardless of what other documents contain. Unlike $min/$max, there is no
-# type comparison across documents.
+# regardless of what other documents contain.
 FIRST_MIXED_TYPE_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
         "mixed_int_then_string",
         docs=[{"v": 42}, {"v": "hello"}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": 42}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": 42}],
         msg="$first should return int when first doc is int, second is string",
     ),
     AccumulatorTestCase(
         "mixed_string_then_int",
         docs=[{"v": "hello"}, {"v": 42}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": "hello"}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": "hello"}],
         msg="$first should return string when first doc is string, second is int",
     ),
     AccumulatorTestCase(
         "mixed_bool_then_number",
         docs=[{"v": True}, {"v": 42}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": True}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": True}],
         msg="$first should return True when first doc is bool, second is int",
     ),
     AccumulatorTestCase(
         "mixed_array_then_scalar",
         docs=[{"v": [1, 2, 3]}, {"v": 42}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": [1, 2, 3]}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": [1, 2, 3]}],
         msg="$first should return array when first doc is array, second is scalar",
     ),
-    AccumulatorTestCase(
-        "mixed_null_then_value",
-        docs=[{"v": None}, {"v": 5}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": None}],
-        msg="$first should return null when first doc is null, second has value",
-    ),
-    AccumulatorTestCase(
-        "mixed_value_then_null",
-        docs=[{"v": 5}, {"v": None}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": 5}],
-        msg="$first should return value when first doc has value, second is null",
-    ),
 ]
 
-
-# ===========================================================================
-# 7. Return Type Verification ($group primary)
-# ===========================================================================
-
-# Property [Return Type]: $first preserves the BSON type of the returned
-# value. Verified using $type in a subsequent $project stage.
-FIRST_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "return_type_int32",
-        docs=[{"v": 42}, {"v": 999}],
-        pipeline=_group_first_with_type("$v"),
-        expected=[{"value": 42, "type": "int"}],
-        msg="$first of int32 should return type 'int'",
-    ),
-    AccumulatorTestCase(
-        "return_type_int64",
-        docs=[{"v": Int64(42)}, {"v": 999}],
-        pipeline=_group_first_with_type("$v"),
-        expected=[{"value": Int64(42), "type": "long"}],
-        msg="$first of Int64 should return type 'long'",
-    ),
-    AccumulatorTestCase(
-        "return_type_double",
-        docs=[{"v": 3.14}, {"v": 999}],
-        pipeline=_group_first_with_type("$v"),
-        expected=[{"value": 3.14, "type": "double"}],
-        msg="$first of double should return type 'double'",
-    ),
-    AccumulatorTestCase(
-        "return_type_decimal",
-        docs=[{"v": Decimal128("3.14")}, {"v": 999}],
-        pipeline=_group_first_with_type("$v"),
-        expected=[{"value": Decimal128("3.14"), "type": "decimal"}],
-        msg="$first of Decimal128 should return type 'decimal'",
-    ),
-    AccumulatorTestCase(
-        "return_type_string",
-        docs=[{"v": "hello"}, {"v": 999}],
-        pipeline=_group_first_with_type("$v"),
-        expected=[{"value": "hello", "type": "string"}],
-        msg="$first of string should return type 'string'",
-    ),
-    AccumulatorTestCase(
-        "return_type_boolean",
-        docs=[{"v": True}, {"v": 999}],
-        pipeline=_group_first_with_type("$v"),
-        expected=[{"value": True, "type": "bool"}],
-        msg="$first of boolean should return type 'bool'",
-    ),
-    AccumulatorTestCase(
-        "return_type_date",
-        docs=[{"v": datetime(2023, 6, 15, tzinfo=timezone.utc)}, {"v": 999}],
-        pipeline=_group_first_with_type("$v"),
-        expected=[{"value": datetime(2023, 6, 15, tzinfo=timezone.utc), "type": "date"}],
-        msg="$first of datetime should return type 'date'",
-    ),
-    AccumulatorTestCase(
-        "return_type_null",
-        docs=[{"v": None}, {"v": 999}],
-        pipeline=_group_first_with_type("$v"),
-        expected=[{"value": None, "type": "null"}],
-        msg="$first of null should return type 'null'",
-    ),
-]
-
-
-# ===========================================================================
-# 8. Expression Argument Tests ($group primary)
-# ===========================================================================
-
-# Property [Input Forms]: $first accumulator accepts various expression types
-# as its operand.
+# Property [Input Forms]: $first accumulator accepts various expression types as its operand.
 FIRST_INPUT_FORM_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
         "input_field_path",
         docs=[{"v": 10}, {"v": 20}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": 10}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": 10}],
         msg="$first should accept a basic field path reference",
     ),
     AccumulatorTestCase(
         "input_nested_field",
         docs=[{"a": {"b": 10}}, {"a": {"b": 20}}],
-        pipeline=_group_first("$a.b"),
-        expected=[{"result": 10}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$a.b"}}}],
+        expected=[{"_id": None, "result": 10}],
         msg="$first should accept a nested document field path",
     ),
     AccumulatorTestCase(
         "input_literal",
         docs=[{"v": 1}, {"v": 2}],
-        pipeline=_group_first(42),
-        expected=[{"result": 42}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": 42}}}],
+        expected=[{"_id": None, "result": 42}],
         msg="$first with a literal constant should return that constant",
     ),
     AccumulatorTestCase(
         "input_expression",
         docs=[{"price": 10, "qty": 2}, {"price": 5, "qty": 10}],
-        pipeline=_group_first({"$multiply": ["$price", "$qty"]}),
-        expected=[{"result": 20}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": {"$multiply": ["$price", "$qty"]}}}}
+        ],
+        expected=[{"_id": None, "result": 20}],
         msg="$first should accept a computed expression as operand",
     ),
     AccumulatorTestCase(
         "input_cond_remove",
         docs=[{"v": -1}, {"v": 5}],
-        pipeline=_group_first({"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}),
-        expected=[{"result": None}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}},
+                }
+            }
+        ],
+        expected=[{"_id": None, "result": None}],
         msg="$first should accept conditional with $$REMOVE as operand",
     ),
     AccumulatorTestCase(
         "input_null_literal",
         docs=[{"v": 1}, {"v": 2}],
-        pipeline=_group_first(None),
-        expected=[{"result": None}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": None}}}],
+        expected=[{"_id": None, "result": None}],
         msg="$first with null literal should return null",
     ),
 ]
 
+# Property [Edge Cases]: edge cases unique to the accumulator context.
+FIRST_EDGE_CASE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "edge_single_doc",
+        docs=[{"v": 42}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": 42}],
+        msg="$first of a single document should return that document's value",
+    ),
+    AccumulatorTestCase(
+        "edge_single_null_doc",
+        docs=[{"v": None}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": None}],
+        msg="$first of a single null document should return null",
+    ),
+    AccumulatorTestCase(
+        "edge_single_missing_doc",
+        docs=[{"x": 1}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": None}],
+        msg="$first of a single document with missing field should return null",
+    ),
+    AccumulatorTestCase(
+        "edge_many_docs",
+        docs=[{"v": i} for i in range(100)],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": 0}],
+        msg="$first should return first document's value (v=0) across 100 documents",
+    ),
+    AccumulatorTestCase(
+        "edge_empty_collection",
+        docs=None,
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[],
+        msg="$first on empty collection should return empty result",
+    ),
+    AccumulatorTestCase(
+        "edge_array_not_traversed",
+        docs=[{"v": [5, 1, 8]}, {"v": [3, 9, 2]}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": [5, 1, 8]}],
+        msg="$first should return array as whole value, not traverse it",
+    ),
+    AccumulatorTestCase(
+        "edge_literal_constant",
+        docs=[{"v": 1}, {"v": 2}, {"v": 3}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": 42}}}],
+        expected=[{"_id": None, "result": 42}],
+        msg="$first with literal constant should always return that constant",
+    ),
+]
 
-# ===========================================================================
-# 9. Arity Rejection ($group primary)
-# ===========================================================================
-
-# Property [Arity]: $first in accumulator context is a unary operator and
-# rejects array syntax.
+# Property [Arity]: $first in accumulator context is a unary operator and rejects array syntax.
 FIRST_ARITY_GROUP_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
         "arity_empty_array_group",
         docs=[{"v": 1}],
-        pipeline=_group_first([]),
+        pipeline=[{"$group": {"_id": None, "result": {"$first": []}}}],
         error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
         msg="$first should reject empty array in accumulator context ($group)",
     ),
     AccumulatorTestCase(
         "arity_single_element_group",
         docs=[{"v": 1}],
-        pipeline=_group_first([1]),
+        pipeline=[{"$group": {"_id": None, "result": {"$first": [1]}}}],
         error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
         msg="$first should reject single-element array in accumulator context ($group)",
     ),
     AccumulatorTestCase(
         "arity_single_field_ref_group",
         docs=[{"v": 1}],
-        pipeline=_group_first(["$v"]),
+        pipeline=[{"$group": {"_id": None, "result": {"$first": ["$v"]}}}],
         error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
         msg="$first should reject single field ref in array in accumulator context ($group)",
     ),
     AccumulatorTestCase(
         "arity_multi_element_group",
         docs=[{"v": 1}],
-        pipeline=_group_first([1, 2, 3]),
+        pipeline=[{"$group": {"_id": None, "result": {"$first": [1, 2, 3]}}}],
         error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
         msg="$first should reject multi-element array in accumulator context ($group)",
     ),
     AccumulatorTestCase(
         "arity_multi_key_expression_group",
         docs=[{"v": 1}],
-        pipeline=_group_first({"$add": [1, 2], "$multiply": [3, 4]}),
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": {"$add": [1, 2], "$multiply": [3, 4]}}}}
+        ],
         error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
         msg="$first should reject multi-key expression object ($group)",
     ),
 ]
 
-
-# ===========================================================================
-# 10. Expression Error Propagation ($group primary)
-# ===========================================================================
-
-# Property [Expression Error Propagation]: errors in sub-expressions used as
-# $first operand propagate as errors.
+# Property [Expression Error Propagation]: errors in sub-expressions used
+# as $first operand propagate as errors.
 FIRST_EXPRESSION_ERROR_GROUP_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
         "error_toInt_invalid_group",
         docs=[{"v": "not_a_number"}],
-        pipeline=_group_first({"$toInt": "$v"}),
+        pipeline=[{"$group": {"_id": None, "result": {"$first": {"$toInt": "$v"}}}}],
         error_code=CONVERSION_FAILURE_ERROR,
         msg="$first should propagate conversion error from $toInt sub-expression in $group",
     ),
     AccumulatorTestCase(
         "error_divide_by_zero_group",
         docs=[{"v": 10}],
-        pipeline=_group_first({"$divide": ["$v", 0]}),
+        pipeline=[{"$group": {"_id": None, "result": {"$first": {"$divide": ["$v", 0]}}}}],
         error_code=DIVIDE_BY_ZERO_V2_ERROR,
         msg="$first should propagate divide-by-zero error in $group",
     ),
     AccumulatorTestCase(
         "error_mod_by_zero_group",
         docs=[{"v": 10}],
-        pipeline=_group_first({"$mod": ["$v", 0]}),
+        pipeline=[{"$group": {"_id": None, "result": {"$first": {"$mod": ["$v", 0]}}}}],
         error_code=MODULO_BY_ZERO_V2_ERROR,
         msg="$first should propagate mod-by-zero error in $group",
     ),
 ]
 
-
-# ===========================================================================
-# 11. Accumulator-Specific Edge Cases ($group primary)
-# ===========================================================================
-
-# Property [Edge Cases]: edge cases unique to the accumulator context.
-FIRST_EDGE_CASE_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "edge_single_doc",
-        docs=[{"v": 42}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": 42}],
-        msg="$first of a single document should return that document's value",
-    ),
-    AccumulatorTestCase(
-        "edge_single_null_doc",
-        docs=[{"v": None}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": None}],
-        msg="$first of a single null document should return null",
-    ),
-    AccumulatorTestCase(
-        "edge_single_missing_doc",
-        docs=[{"x": 1}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": None}],
-        msg="$first of a single document with missing field should return null",
-    ),
-    AccumulatorTestCase(
-        "edge_many_docs",
-        docs=[{"v": i} for i in range(100)],
-        pipeline=_group_first("$v"),
-        expected=[{"result": 0}],
-        msg="$first should return first document's value (v=0) across 100 documents",
-    ),
-    AccumulatorTestCase(
-        "edge_empty_collection",
-        docs=None,
-        pipeline=_group_first("$v"),
-        expected=[],
-        msg="$first on empty collection should return empty result",
-    ),
-    AccumulatorTestCase(
-        "edge_array_not_traversed",
-        docs=[{"v": [5, 1, 8]}, {"v": [3, 9, 2]}],
-        pipeline=_group_first("$v"),
-        expected=[{"result": [5, 1, 8]}],
-        msg="$first should return array as whole value, not traverse it",
-    ),
-    AccumulatorTestCase(
-        "edge_literal_constant",
-        docs=[{"v": 1}, {"v": 2}, {"v": 3}],
-        pipeline=_group_first(42),
-        expected=[{"result": 42}],
-        msg="$first with literal constant should always return that constant",
-    ),
-]
-
-
-# ===========================================================================
-# Combine all $group primary success tests
-# ===========================================================================
-
 FIRST_GROUP_SUCCESS_TESTS = (
     FIRST_NULL_MISSING_TESTS
     + FIRST_BSON_TYPE_TESTS
@@ -815,27 +638,36 @@ def _run(collection, test_case: AccumulatorTestCase):
     + FIRST_DECIMAL_PRECISION_TESTS
     + FIRST_TYPE_DISTINCTION_TESTS
     + FIRST_MIXED_TYPE_TESTS
-    + FIRST_RETURN_TYPE_TESTS
     + FIRST_INPUT_FORM_TESTS
     + FIRST_EDGE_CASE_TESTS
 )
 
-
-# ===========================================================================
-# $group primary test function
-# ===========================================================================
+FIRST_GROUP_ERROR_TESTS = FIRST_ARITY_GROUP_TESTS + FIRST_EXPRESSION_ERROR_GROUP_TESTS
 
 
 @pytest.mark.parametrize("test_case", pytest_params(FIRST_GROUP_SUCCESS_TESTS))
 def test_accumulator_first_group(collection, test_case: AccumulatorTestCase):
     """Test $first accumulator success cases via $group."""
-    result = _run(collection, test_case)
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
     assertSuccess(result, test_case.expected, msg=test_case.msg)
 
 
-# ===========================================================================
-# 12a. $bucket Smoke Tests
-# ===========================================================================
+@pytest.mark.parametrize("test_case", pytest_params(FIRST_GROUP_ERROR_TESTS))
+def test_accumulator_first_group_errors(collection, test_case):
+    """Test $first accumulator error cases via $group."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertFailureCode(result, test_case.error_code, msg=test_case.msg)
+
 
 # Property [Bucket Stage Smoke]: $first produces correct results through
 # $bucket for representative cases.
@@ -843,50 +675,106 @@ def test_accumulator_first_group(collection, test_case: AccumulatorTestCase):
     AccumulatorTestCase(
         "bucket_basic_numeric",
         docs=[{"v": 10}, {"v": 20}, {"v": 30}],
-        pipeline=_bucket_first("$v"),
-        expected=[{"result": 10}],
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$first": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": -1, "result": 10}],
         msg="$first via $bucket should return first numeric value",
     ),
     AccumulatorTestCase(
         "bucket_null_first",
         docs=[{"v": None}, {"v": 5}],
-        pipeline=_bucket_first("$v"),
-        expected=[{"result": None}],
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$first": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": -1, "result": None}],
         msg="$first via $bucket should return null when first doc is null",
     ),
     AccumulatorTestCase(
         "bucket_missing_first",
         docs=[{"x": 1}, {"v": 5}],
-        pipeline=_bucket_first("$v"),
-        expected=[{"result": None}],
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$first": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": -1, "result": None}],
         msg="$first via $bucket should return null when first doc has missing field",
     ),
     AccumulatorTestCase(
         "bucket_string_first",
         docs=[{"v": "hello"}, {"v": "world"}],
-        pipeline=_bucket_first("$v"),
-        expected=[{"result": "hello"}],
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$first": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": -1, "result": "hello"}],
         msg="$first via $bucket should return first string value",
     ),
     AccumulatorTestCase(
         "bucket_array_first",
         docs=[{"v": [1, 2]}, {"v": [3, 4]}],
-        pipeline=_bucket_first("$v"),
-        expected=[{"result": [1, 2]}],
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$first": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": -1, "result": [1, 2]}],
         msg="$first via $bucket should return first array value",
     ),
     AccumulatorTestCase(
         "bucket_single_doc",
         docs=[{"v": 42}],
-        pipeline=_bucket_first("$v"),
-        expected=[{"result": 42}],
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$first": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": -1, "result": 42}],
         msg="$first via $bucket should handle single document",
     ),
     AccumulatorTestCase(
         "bucket_nan_preserved",
         docs=[{"v": FLOAT_NAN}, {"v": 5}],
-        pipeline=_bucket_first("$v"),
-        expected=[{"result": pytest.approx(math.nan, nan_ok=True)}],
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$first": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": -1, "result": pytest.approx(math.nan, nan_ok=True)}],
         msg="$first via $bucket should preserve NaN as first value",
     ),
 ]
@@ -895,64 +783,121 @@ def test_accumulator_first_group(collection, test_case: AccumulatorTestCase):
 @pytest.mark.parametrize("test_case", pytest_params(FIRST_BUCKET_SMOKE_TESTS))
 def test_accumulator_first_bucket(collection, test_case: AccumulatorTestCase):
     """Test $first accumulator via $bucket for representative cases."""
-    result = _run(collection, test_case)
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
     assertSuccess(result, test_case.expected, msg=test_case.msg)
 
 
-# ===========================================================================
-# 12b. $bucketAuto Smoke Tests
-# ===========================================================================
-
-# Property [BucketAuto Stage Smoke]: $first produces correct results through
-# $bucketAuto for representative cases.
+# Property [BucketAuto Stage Smoke]: $first produces correct results
+# through $bucketAuto for representative cases.
 FIRST_BUCKET_AUTO_SMOKE_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
         "bucket_auto_basic_numeric",
         docs=[{"v": 10}, {"v": 20}, {"v": 30}],
-        pipeline=_bucket_auto_first("$v"),
-        expected=[{"result": 10}],
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$first": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": {"min": 0, "max": 0}, "result": 10}],
         msg="$first via $bucketAuto should return first numeric value",
     ),
     AccumulatorTestCase(
         "bucket_auto_null_first",
         docs=[{"v": None}, {"v": 5}],
-        pipeline=_bucket_auto_first("$v"),
-        expected=[{"result": None}],
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$first": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": {"min": 0, "max": 0}, "result": None}],
         msg="$first via $bucketAuto should return null when first doc is null",
     ),
     AccumulatorTestCase(
         "bucket_auto_missing_first",
         docs=[{"x": 1}, {"v": 5}],
-        pipeline=_bucket_auto_first("$v"),
-        expected=[{"result": None}],
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$first": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": {"min": 0, "max": 0}, "result": None}],
         msg="$first via $bucketAuto should return null when first doc has missing field",
     ),
     AccumulatorTestCase(
         "bucket_auto_string_first",
         docs=[{"v": "hello"}, {"v": "world"}],
-        pipeline=_bucket_auto_first("$v"),
-        expected=[{"result": "hello"}],
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$first": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": {"min": 0, "max": 0}, "result": "hello"}],
         msg="$first via $bucketAuto should return first string value",
     ),
     AccumulatorTestCase(
         "bucket_auto_array_first",
         docs=[{"v": [1, 2]}, {"v": [3, 4]}],
-        pipeline=_bucket_auto_first("$v"),
-        expected=[{"result": [1, 2]}],
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$first": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": {"min": 0, "max": 0}, "result": [1, 2]}],
         msg="$first via $bucketAuto should return first array value",
     ),
     AccumulatorTestCase(
         "bucket_auto_single_doc",
         docs=[{"v": 42}],
-        pipeline=_bucket_auto_first("$v"),
-        expected=[{"result": 42}],
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$first": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": {"min": 0, "max": 0}, "result": 42}],
         msg="$first via $bucketAuto should handle single document",
     ),
     AccumulatorTestCase(
         "bucket_auto_nan_preserved",
         docs=[{"v": FLOAT_NAN}, {"v": 5}],
-        pipeline=_bucket_auto_first("$v"),
-        expected=[{"result": pytest.approx(math.nan, nan_ok=True)}],
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$first": "$v"}},
+                }
+            }
+        ],
+        expected=[{"_id": {"min": 0, "max": 0}, "result": pytest.approx(math.nan, nan_ok=True)}],
         msg="$first via $bucketAuto should preserve NaN as first value",
     ),
 ]
@@ -961,25 +906,25 @@ def test_accumulator_first_bucket(collection, test_case: AccumulatorTestCase):
 @pytest.mark.parametrize("test_case", pytest_params(FIRST_BUCKET_AUTO_SMOKE_TESTS))
 def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCase):
     """Test $first accumulator via $bucketAuto for representative cases."""
-    result = _run(collection, test_case)
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
     assertSuccess(result, test_case.expected, msg=test_case.msg)
 
 
-# ===========================================================================
-# 12c. Stage-Specific Behavior Tests (divergence between stages)
-# ===========================================================================
-
-# ---------------------------------------------------------------------------
-# 12c-i. BSON Type Serialization Divergence
-# ---------------------------------------------------------------------------
-
-# Property [Code Serialization Divergence]: Code without scope is returned as
-# str in $group/$bucket but as Code object in $bucketAuto.
+# Property [Code Serialization Divergence]: Code without scope is returned
+# as str when projected in $group/$bucket but as Code object in $bucketAuto.
 FIRST_CODE_GROUP_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
         "code_without_scope_group",
         docs=[{"v": Code("abc")}, {"v": 999}],
-        pipeline=_group_first("$v"),
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
         expected=[{"result": "abc"}],
         msg="$first should return Code without scope as str in $group",
     ),
@@ -989,19 +934,32 @@ def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCas
     AccumulatorTestCase(
         "code_without_scope_bucket_auto",
         docs=[{"v": Code("abc")}, {"v": 999}],
-        pipeline=_bucket_auto_first("$v"),
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$first": "$v"}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
         expected=[{"result": Code("abc", None)}],
-        msg="$first should return Code without scope as Code object in $bucketAuto",
+        msg="$first should return Code without scope as Code in $bucketAuto",
     ),
 ]
 
-# Property [MinKey Serialization Divergence]: MinKey is wrapped in a document
-# in $group/$bucket but returned directly in $bucketAuto.
+# Property [MinKey Serialization Divergence]: MinKey is wrapped in a
+# document when projected in $group/$bucket but returned directly in
+# $bucketAuto.
 FIRST_MINKEY_GROUP_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
         "minkey_group",
         docs=[{"v": MinKey()}, {"v": 999}],
-        pipeline=_group_first("$v"),
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
         expected=[{"result": {"": MinKey()}}],
         msg="$first should return MinKey wrapped in dict in $group",
     ),
@@ -1011,19 +969,32 @@ def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCas
     AccumulatorTestCase(
         "minkey_bucket_auto",
         docs=[{"v": MinKey()}, {"v": 999}],
-        pipeline=_bucket_auto_first("$v"),
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$first": "$v"}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
         expected=[{"result": MinKey()}],
         msg="$first should return MinKey directly in $bucketAuto",
     ),
 ]
 
-# Property [MaxKey Serialization Divergence]: MaxKey is wrapped in a document
-# in $group/$bucket but returned directly in $bucketAuto.
+# Property [MaxKey Serialization Divergence]: MaxKey is wrapped in a
+# document when projected in $group/$bucket but returned directly in
+# $bucketAuto.
 FIRST_MAXKEY_GROUP_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
         "maxkey_group",
         docs=[{"v": MaxKey()}, {"v": 999}],
-        pipeline=_group_first("$v"),
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": "$v"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
         expected=[{"result": {"": MaxKey()}}],
         msg="$first should return MaxKey wrapped in dict in $group",
     ),
@@ -1033,15 +1004,42 @@ def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCas
     AccumulatorTestCase(
         "maxkey_bucket_auto",
         docs=[{"v": MaxKey()}, {"v": 999}],
-        pipeline=_bucket_auto_first("$v"),
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$first": "$v"}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
         expected=[{"result": MaxKey()}],
         msg="$first should return MaxKey directly in $bucketAuto",
     ),
 ]
 
-# ---------------------------------------------------------------------------
-# 12c-ii. Expression Error Code Divergence
-# ---------------------------------------------------------------------------
+FIRST_STAGE_DIVERGENCE_SUCCESS_TESTS = (
+    FIRST_CODE_GROUP_TESTS
+    + FIRST_CODE_BUCKET_AUTO_TESTS
+    + FIRST_MINKEY_GROUP_TESTS
+    + FIRST_MINKEY_BUCKET_AUTO_TESTS
+    + FIRST_MAXKEY_GROUP_TESTS
+    + FIRST_MAXKEY_BUCKET_AUTO_TESTS
+)
+
+
+@pytest.mark.parametrize("test_case", pytest_params(FIRST_STAGE_DIVERGENCE_SUCCESS_TESTS))
+def test_accumulator_first_stage_divergence(collection, test_case: AccumulatorTestCase):
+    """Test $first cases where behavior differs between stages."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertSuccess(result, test_case.expected, msg=test_case.msg)
+
 
 # Property [Error Code Divergence]: $group/$bucket and $bucketAuto use
 # different error codes for divide-by-zero and mod-by-zero.
@@ -1049,21 +1047,45 @@ def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCas
     AccumulatorTestCase(
         "error_toInt_invalid_bucket",
         docs=[{"v": "not_a_number"}],
-        pipeline=_bucket_first({"$toInt": "$v"}),
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$first": {"$toInt": "$v"}}},
+                }
+            }
+        ],
         error_code=CONVERSION_FAILURE_ERROR,
         msg="$first should propagate conversion error from $toInt in $bucket",
     ),
     AccumulatorTestCase(
         "error_divide_by_zero_bucket",
         docs=[{"v": 10}],
-        pipeline=_bucket_first({"$divide": ["$v", 0]}),
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$first": {"$divide": ["$v", 0]}}},
+                }
+            }
+        ],
         error_code=DIVIDE_BY_ZERO_V2_ERROR,
         msg="$first should propagate divide-by-zero error in $bucket",
     ),
     AccumulatorTestCase(
         "error_mod_by_zero_bucket",
         docs=[{"v": 10}],
-        pipeline=_bucket_first({"$mod": ["$v", 0]}),
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$first": {"$mod": ["$v", 0]}}},
+                }
+            }
+        ],
         error_code=MODULO_BY_ZERO_V2_ERROR,
         msg="$first should propagate mod-by-zero error in $bucket",
     ),
@@ -1073,51 +1095,94 @@ def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCas
     AccumulatorTestCase(
         "error_toInt_invalid_bucket_auto",
         docs=[{"v": "not_a_number"}],
-        pipeline=_bucket_auto_first({"$toInt": "$v"}),
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$first": {"$toInt": "$v"}}},
+                }
+            }
+        ],
         error_code=CONVERSION_FAILURE_ERROR,
         msg="$first should propagate conversion error from $toInt in $bucketAuto",
     ),
     AccumulatorTestCase(
         "error_divide_by_zero_bucket_auto",
         docs=[{"v": 10}],
-        pipeline=_bucket_auto_first({"$divide": ["$v", 0]}),
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$first": {"$divide": ["$v", 0]}}},
+                }
+            }
+        ],
         error_code=BAD_VALUE_ERROR,
         msg="$first should propagate divide-by-zero in $bucketAuto (wrapped as BAD_VALUE)",
     ),
     AccumulatorTestCase(
         "error_mod_by_zero_bucket_auto",
         docs=[{"v": 10}],
-        pipeline=_bucket_auto_first({"$mod": ["$v", 0]}),
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$first": {"$mod": ["$v", 0]}}},
+                }
+            }
+        ],
         error_code=MODULO_ZERO_REMAINDER_ERROR,
         msg="$first should propagate mod-by-zero in $bucketAuto (wrapped as 16610)",
     ),
 ]
 
-# ---------------------------------------------------------------------------
-# 12c-iii. Arity Rejection Across Stages
-# ---------------------------------------------------------------------------
-
-# Property [Arity Across Stages]: arity rejection is consistent across all
-# three stages.
+# Property [Arity Across Stages]: arity rejection is consistent across $bucket and $bucketAuto.
 FIRST_ARITY_BUCKET_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
         "arity_empty_array_bucket",
         docs=[{"v": 1}],
-        pipeline=_bucket_first([]),
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$first": []}},
+                }
+            }
+        ],
         error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
         msg="$first should reject empty array in accumulator context ($bucket)",
     ),
     AccumulatorTestCase(
         "arity_multi_element_bucket",
         docs=[{"v": 1}],
-        pipeline=_bucket_first([1, 2, 3]),
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$first": [1, 2, 3]}},
+                }
+            }
+        ],
         error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
         msg="$first should reject multi-element array in accumulator context ($bucket)",
     ),
     AccumulatorTestCase(
         "arity_multi_key_expression_bucket",
         docs=[{"v": 1}],
-        pipeline=_bucket_first({"$add": [1, 2], "$multiply": [3, 4]}),
+        pipeline=[
+            {
+                "$bucket": {
+                    "groupBy": {"$literal": 0},
+                    "boundaries": [-1, 1],
+                    "output": {"result": {"$first": {"$add": [1, 2], "$multiply": [3, 4]}}},
+                }
+            }
+        ],
         error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
         msg="$first should reject multi-key expression object ($bucket)",
     ),
@@ -1127,79 +1192,176 @@ def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCas
     AccumulatorTestCase(
         "arity_empty_array_bucket_auto",
         docs=[{"v": 1}],
-        pipeline=_bucket_auto_first([]),
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$first": []}},
+                }
+            }
+        ],
         error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
         msg="$first should reject empty array in accumulator context ($bucketAuto)",
     ),
     AccumulatorTestCase(
         "arity_multi_element_bucket_auto",
         docs=[{"v": 1}],
-        pipeline=_bucket_auto_first([1, 2, 3]),
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$first": [1, 2, 3]}},
+                }
+            }
+        ],
         error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
         msg="$first should reject multi-element array in accumulator context ($bucketAuto)",
     ),
     AccumulatorTestCase(
         "arity_multi_key_expression_bucket_auto",
         docs=[{"v": 1}],
-        pipeline=_bucket_auto_first({"$add": [1, 2], "$multiply": [3, 4]}),
+        pipeline=[
+            {
+                "$bucketAuto": {
+                    "groupBy": {"$literal": 0},
+                    "buckets": 1,
+                    "output": {"result": {"$first": {"$add": [1, 2], "$multiply": [3, 4]}}},
+                }
+            }
+        ],
         error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
         msg="$first should reject multi-key expression object ($bucketAuto)",
     ),
 ]
 
-
-# ===========================================================================
-# Combine stage divergence success tests
-# ===========================================================================
-
-FIRST_STAGE_DIVERGENCE_TESTS = (
-    FIRST_CODE_GROUP_TESTS
-    + FIRST_CODE_BUCKET_AUTO_TESTS
-    + FIRST_MINKEY_GROUP_TESTS
-    + FIRST_MINKEY_BUCKET_AUTO_TESTS
-    + FIRST_MAXKEY_GROUP_TESTS
-    + FIRST_MAXKEY_BUCKET_AUTO_TESTS
+FIRST_EXPRESSION_ERROR_TESTS = (
+    FIRST_EXPRESSION_ERROR_GROUP_TESTS + FIRST_ERROR_BUCKET_TESTS + FIRST_ERROR_BUCKET_AUTO_TESTS
 )
 
-
-@pytest.mark.parametrize("test_case", pytest_params(FIRST_STAGE_DIVERGENCE_TESTS))
-def test_accumulator_first_stage_divergence(collection, test_case: AccumulatorTestCase):
-    """Test $first cases where behavior differs between stages."""
-    result = _run(collection, test_case)
-    assertSuccess(result, test_case.expected, msg=test_case.msg)
-
-
-# ===========================================================================
-# Combine all error tests
-# ===========================================================================
-
-FIRST_EXPRESSION_ERROR_TESTS = (
-    FIRST_EXPRESSION_ERROR_GROUP_TESTS
-    + FIRST_ERROR_BUCKET_TESTS
-    + FIRST_ERROR_BUCKET_AUTO_TESTS
+FIRST_ARITY_ERROR_TESTS = (
+    FIRST_ARITY_GROUP_TESTS + FIRST_ARITY_BUCKET_TESTS + FIRST_ARITY_BUCKET_AUTO_TESTS
 )
 
 
 @pytest.mark.parametrize("test_case", pytest_params(FIRST_EXPRESSION_ERROR_TESTS))
-def test_accumulator_first_expression_errors(collection, test_case: AccumulatorTestCase):
+def test_accumulator_first_expression_errors(collection, test_case):
     """Test $first expression error propagation."""
-    result = _run(collection, test_case)
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
     assertFailureCode(result, test_case.error_code, msg=test_case.msg)
 
 
-# ===========================================================================
-# Combine all arity error tests
-# ===========================================================================
-
-FIRST_ARITY_ERROR_TESTS = (
-    FIRST_ARITY_GROUP_TESTS
-    + FIRST_ARITY_BUCKET_TESTS
-    + FIRST_ARITY_BUCKET_AUTO_TESTS
-)
-
-
 @pytest.mark.parametrize("test_case", pytest_params(FIRST_ARITY_ERROR_TESTS))
-def test_accumulator_first_arity_errors(collection, test_case: AccumulatorTestCase):
+def test_accumulator_first_arity_errors(collection, test_case):
     """Test $first arity rejection across all three stages."""
-    result = _run(collection, test_case)
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
     assertFailureCode(result, test_case.error_code, msg=test_case.msg)
+
+
+# Property [Return Type]: $first preserves the BSON type of the returned
+# value, verified using $type projection.
+FIRST_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "return_type_int32",
+        docs=[{"v": 42}, {"v": 999}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": "$v"}}},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": 42, "type": "int"}],
+        msg="$first of int32 should return type 'int'",
+    ),
+    AccumulatorTestCase(
+        "return_type_int64",
+        docs=[{"v": Int64(42)}, {"v": 999}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": "$v"}}},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": Int64(42), "type": "long"}],
+        msg="$first of Int64 should return type 'long'",
+    ),
+    AccumulatorTestCase(
+        "return_type_double",
+        docs=[{"v": 3.14}, {"v": 999}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": "$v"}}},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": 3.14, "type": "double"}],
+        msg="$first of double should return type 'double'",
+    ),
+    AccumulatorTestCase(
+        "return_type_decimal",
+        docs=[{"v": Decimal128("3.14")}, {"v": 999}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": "$v"}}},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": Decimal128("3.14"), "type": "decimal"}],
+        msg="$first of Decimal128 should return type 'decimal'",
+    ),
+    AccumulatorTestCase(
+        "return_type_string",
+        docs=[{"v": "hello"}, {"v": 999}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": "$v"}}},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": "hello", "type": "string"}],
+        msg="$first of string should return type 'string'",
+    ),
+    AccumulatorTestCase(
+        "return_type_boolean",
+        docs=[{"v": True}, {"v": 999}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": "$v"}}},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": True, "type": "bool"}],
+        msg="$first of boolean should return type 'bool'",
+    ),
+    AccumulatorTestCase(
+        "return_type_date",
+        docs=[{"v": datetime(2023, 6, 15, tzinfo=timezone.utc)}, {"v": 999}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": "$v"}}},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": datetime(2023, 6, 15, tzinfo=timezone.utc), "type": "date"}],
+        msg="$first of datetime should return type 'date'",
+    ),
+    AccumulatorTestCase(
+        "return_type_null",
+        docs=[{"v": None}, {"v": 999}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": "$v"}}},
+            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
+        ],
+        expected=[{"value": None, "type": "null"}],
+        msg="$first of null should return type 'null'",
+    ),
+]
+
+
+@pytest.mark.parametrize("test_case", pytest_params(FIRST_RETURN_TYPE_TESTS))
+def test_accumulator_first_return_type(collection, test_case: AccumulatorTestCase):
+    """Test $first return type verification."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertSuccess(result, test_case.expected, msg=test_case.msg)

From 786e9b15dbb18560827066c3116437f785ae26f7 Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Wed, 20 May 2026 16:45:08 -0700
Subject: [PATCH 03/10] add init.py

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../tests/core/operator/accumulators/first/__init__.py            | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/first/__init__.py

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/__init__.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/__init__.py
new file mode 100644
index 00000000..e69de29b

From c86016377880d481118e4aa4c557f6e483e6966e Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Wed, 20 May 2026 16:54:08 -0700
Subject: [PATCH 04/10] Remove expression tests for accumulator

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../first/test_accumulator_first.py           | 221 +-----------------
 1 file changed, 1 insertion(+), 220 deletions(-)

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py
index f4166f7d..a0d0d185 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py
@@ -8,7 +8,6 @@
 import pytest
 from bson import (
     Binary,
-    Code,
     Decimal128,
     Int64,
     MaxKey,
@@ -23,13 +22,8 @@
 )
 from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess
 from documentdb_tests.framework.error_codes import (
-    BAD_VALUE_ERROR,
-    CONVERSION_FAILURE_ERROR,
-    DIVIDE_BY_ZERO_V2_ERROR,
     EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
     GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-    MODULO_BY_ZERO_V2_ERROR,
-    MODULO_ZERO_REMAINDER_ERROR,
 )
 from documentdb_tests.framework.executor import execute_command
 from documentdb_tests.framework.parametrize import pytest_params
@@ -479,29 +473,6 @@
         expected=[{"_id": None, "result": 42}],
         msg="$first with a literal constant should return that constant",
     ),
-    AccumulatorTestCase(
-        "input_expression",
-        docs=[{"price": 10, "qty": 2}, {"price": 5, "qty": 10}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$first": {"$multiply": ["$price", "$qty"]}}}}
-        ],
-        expected=[{"_id": None, "result": 20}],
-        msg="$first should accept a computed expression as operand",
-    ),
-    AccumulatorTestCase(
-        "input_cond_remove",
-        docs=[{"v": -1}, {"v": 5}],
-        pipeline=[
-            {
-                "$group": {
-                    "_id": None,
-                    "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}},
-                }
-            }
-        ],
-        expected=[{"_id": None, "result": None}],
-        msg="$first should accept conditional with $$REMOVE as operand",
-    ),
     AccumulatorTestCase(
         "input_null_literal",
         docs=[{"v": 1}, {"v": 2}],
@@ -605,32 +576,6 @@
     ),
 ]
 
-# Property [Expression Error Propagation]: errors in sub-expressions used
-# as $first operand propagate as errors.
-FIRST_EXPRESSION_ERROR_GROUP_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "error_toInt_invalid_group",
-        docs=[{"v": "not_a_number"}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": {"$toInt": "$v"}}}}],
-        error_code=CONVERSION_FAILURE_ERROR,
-        msg="$first should propagate conversion error from $toInt sub-expression in $group",
-    ),
-    AccumulatorTestCase(
-        "error_divide_by_zero_group",
-        docs=[{"v": 10}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": {"$divide": ["$v", 0]}}}}],
-        error_code=DIVIDE_BY_ZERO_V2_ERROR,
-        msg="$first should propagate divide-by-zero error in $group",
-    ),
-    AccumulatorTestCase(
-        "error_mod_by_zero_group",
-        docs=[{"v": 10}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": {"$mod": ["$v", 0]}}}}],
-        error_code=MODULO_BY_ZERO_V2_ERROR,
-        msg="$first should propagate mod-by-zero error in $group",
-    ),
-]
-
 FIRST_GROUP_SUCCESS_TESTS = (
     FIRST_NULL_MISSING_TESTS
     + FIRST_BSON_TYPE_TESTS
@@ -642,8 +587,6 @@
     + FIRST_EDGE_CASE_TESTS
 )
 
-FIRST_GROUP_ERROR_TESTS = FIRST_ARITY_GROUP_TESTS + FIRST_EXPRESSION_ERROR_GROUP_TESTS
-
 
 @pytest.mark.parametrize("test_case", pytest_params(FIRST_GROUP_SUCCESS_TESTS))
 def test_accumulator_first_group(collection, test_case: AccumulatorTestCase):
@@ -657,18 +600,6 @@ def test_accumulator_first_group(collection, test_case: AccumulatorTestCase):
     assertSuccess(result, test_case.expected, msg=test_case.msg)
 
 
-@pytest.mark.parametrize("test_case", pytest_params(FIRST_GROUP_ERROR_TESTS))
-def test_accumulator_first_group_errors(collection, test_case):
-    """Test $first accumulator error cases via $group."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertFailureCode(result, test_case.error_code, msg=test_case.msg)
-
-
 # Property [Bucket Stage Smoke]: $first produces correct results through
 # $bucket for representative cases.
 FIRST_BUCKET_SMOKE_TESTS: list[AccumulatorTestCase] = [
@@ -915,40 +846,6 @@ def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCas
     assertSuccess(result, test_case.expected, msg=test_case.msg)
 
 
-# Property [Code Serialization Divergence]: Code without scope is returned
-# as str when projected in $group/$bucket but as Code object in $bucketAuto.
-FIRST_CODE_GROUP_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "code_without_scope_group",
-        docs=[{"v": Code("abc")}, {"v": 999}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$first": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": "abc"}],
-        msg="$first should return Code without scope as str in $group",
-    ),
-]
-
-FIRST_CODE_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "code_without_scope_bucket_auto",
-        docs=[{"v": Code("abc")}, {"v": 999}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$first": "$v"}},
-                }
-            },
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": Code("abc", None)}],
-        msg="$first should return Code without scope as Code in $bucketAuto",
-    ),
-]
-
 # Property [MinKey Serialization Divergence]: MinKey is wrapped in a
 # document when projected in $group/$bucket but returned directly in
 # $bucketAuto.
@@ -1020,9 +917,7 @@ def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCas
 ]
 
 FIRST_STAGE_DIVERGENCE_SUCCESS_TESTS = (
-    FIRST_CODE_GROUP_TESTS
-    + FIRST_CODE_BUCKET_AUTO_TESTS
-    + FIRST_MINKEY_GROUP_TESTS
+    FIRST_MINKEY_GROUP_TESTS
     + FIRST_MINKEY_BUCKET_AUTO_TESTS
     + FIRST_MAXKEY_GROUP_TESTS
     + FIRST_MAXKEY_BUCKET_AUTO_TESTS
@@ -1041,104 +936,6 @@ def test_accumulator_first_stage_divergence(collection, test_case: AccumulatorTe
     assertSuccess(result, test_case.expected, msg=test_case.msg)
 
 
-# Property [Error Code Divergence]: $group/$bucket and $bucketAuto use
-# different error codes for divide-by-zero and mod-by-zero.
-FIRST_ERROR_BUCKET_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "error_toInt_invalid_bucket",
-        docs=[{"v": "not_a_number"}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$first": {"$toInt": "$v"}}},
-                }
-            }
-        ],
-        error_code=CONVERSION_FAILURE_ERROR,
-        msg="$first should propagate conversion error from $toInt in $bucket",
-    ),
-    AccumulatorTestCase(
-        "error_divide_by_zero_bucket",
-        docs=[{"v": 10}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$first": {"$divide": ["$v", 0]}}},
-                }
-            }
-        ],
-        error_code=DIVIDE_BY_ZERO_V2_ERROR,
-        msg="$first should propagate divide-by-zero error in $bucket",
-    ),
-    AccumulatorTestCase(
-        "error_mod_by_zero_bucket",
-        docs=[{"v": 10}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$first": {"$mod": ["$v", 0]}}},
-                }
-            }
-        ],
-        error_code=MODULO_BY_ZERO_V2_ERROR,
-        msg="$first should propagate mod-by-zero error in $bucket",
-    ),
-]
-
-FIRST_ERROR_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "error_toInt_invalid_bucket_auto",
-        docs=[{"v": "not_a_number"}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$first": {"$toInt": "$v"}}},
-                }
-            }
-        ],
-        error_code=CONVERSION_FAILURE_ERROR,
-        msg="$first should propagate conversion error from $toInt in $bucketAuto",
-    ),
-    AccumulatorTestCase(
-        "error_divide_by_zero_bucket_auto",
-        docs=[{"v": 10}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$first": {"$divide": ["$v", 0]}}},
-                }
-            }
-        ],
-        error_code=BAD_VALUE_ERROR,
-        msg="$first should propagate divide-by-zero in $bucketAuto (wrapped as BAD_VALUE)",
-    ),
-    AccumulatorTestCase(
-        "error_mod_by_zero_bucket_auto",
-        docs=[{"v": 10}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$first": {"$mod": ["$v", 0]}}},
-                }
-            }
-        ],
-        error_code=MODULO_ZERO_REMAINDER_ERROR,
-        msg="$first should propagate mod-by-zero in $bucketAuto (wrapped as 16610)",
-    ),
-]
-
 # Property [Arity Across Stages]: arity rejection is consistent across $bucket and $bucketAuto.
 FIRST_ARITY_BUCKET_TESTS: list[AccumulatorTestCase] = [
     AccumulatorTestCase(
@@ -1236,27 +1033,11 @@ def test_accumulator_first_stage_divergence(collection, test_case: AccumulatorTe
     ),
 ]
 
-FIRST_EXPRESSION_ERROR_TESTS = (
-    FIRST_EXPRESSION_ERROR_GROUP_TESTS + FIRST_ERROR_BUCKET_TESTS + FIRST_ERROR_BUCKET_AUTO_TESTS
-)
-
 FIRST_ARITY_ERROR_TESTS = (
     FIRST_ARITY_GROUP_TESTS + FIRST_ARITY_BUCKET_TESTS + FIRST_ARITY_BUCKET_AUTO_TESTS
 )
 
 
-@pytest.mark.parametrize("test_case", pytest_params(FIRST_EXPRESSION_ERROR_TESTS))
-def test_accumulator_first_expression_errors(collection, test_case):
-    """Test $first expression error propagation."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertFailureCode(result, test_case.error_code, msg=test_case.msg)
-
-
 @pytest.mark.parametrize("test_case", pytest_params(FIRST_ARITY_ERROR_TESTS))
 def test_accumulator_first_arity_errors(collection, test_case):
     """Test $first arity rejection across all three stages."""

From a548a0102f6970784580ad7debd61ec70f981772 Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Thu, 21 May 2026 15:56:28 -0700
Subject: [PATCH 05/10] remove stage tests

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../first/test_accumulator_first.py           | 501 +-----------------
 1 file changed, 2 insertions(+), 499 deletions(-)

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py
index a0d0d185..714df0df 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py
@@ -1,4 +1,4 @@
-"""Tests for $first accumulator in $group, $bucket, and $bucketAuto contexts."""
+"""Tests for $first accumulator in $group context."""
 
 from __future__ import annotations
 
@@ -10,8 +10,6 @@
     Binary,
     Decimal128,
     Int64,
-    MaxKey,
-    MinKey,
     ObjectId,
     Regex,
     Timestamp,
@@ -20,11 +18,7 @@
 from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
     AccumulatorTestCase,
 )
-from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess
-from documentdb_tests.framework.error_codes import (
-    EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
-    GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-)
+from documentdb_tests.framework.assertions import assertSuccess
 from documentdb_tests.framework.executor import execute_command
 from documentdb_tests.framework.parametrize import pytest_params
 from documentdb_tests.framework.test_constants import (
@@ -535,47 +529,6 @@
     ),
 ]
 
-# Property [Arity]: $first in accumulator context is a unary operator and rejects array syntax.
-FIRST_ARITY_GROUP_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "arity_empty_array_group",
-        docs=[{"v": 1}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": []}}}],
-        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-        msg="$first should reject empty array in accumulator context ($group)",
-    ),
-    AccumulatorTestCase(
-        "arity_single_element_group",
-        docs=[{"v": 1}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": [1]}}}],
-        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-        msg="$first should reject single-element array in accumulator context ($group)",
-    ),
-    AccumulatorTestCase(
-        "arity_single_field_ref_group",
-        docs=[{"v": 1}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": ["$v"]}}}],
-        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-        msg="$first should reject single field ref in array in accumulator context ($group)",
-    ),
-    AccumulatorTestCase(
-        "arity_multi_element_group",
-        docs=[{"v": 1}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": [1, 2, 3]}}}],
-        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-        msg="$first should reject multi-element array in accumulator context ($group)",
-    ),
-    AccumulatorTestCase(
-        "arity_multi_key_expression_group",
-        docs=[{"v": 1}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$first": {"$add": [1, 2], "$multiply": [3, 4]}}}}
-        ],
-        error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
-        msg="$first should reject multi-key expression object ($group)",
-    ),
-]
-
 FIRST_GROUP_SUCCESS_TESTS = (
     FIRST_NULL_MISSING_TESTS
     + FIRST_BSON_TYPE_TESTS
@@ -600,456 +553,6 @@ def test_accumulator_first_group(collection, test_case: AccumulatorTestCase):
     assertSuccess(result, test_case.expected, msg=test_case.msg)
 
 
-# Property [Bucket Stage Smoke]: $first produces correct results through
-# $bucket for representative cases.
-FIRST_BUCKET_SMOKE_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "bucket_basic_numeric",
-        docs=[{"v": 10}, {"v": 20}, {"v": 30}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$first": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": -1, "result": 10}],
-        msg="$first via $bucket should return first numeric value",
-    ),
-    AccumulatorTestCase(
-        "bucket_null_first",
-        docs=[{"v": None}, {"v": 5}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$first": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": -1, "result": None}],
-        msg="$first via $bucket should return null when first doc is null",
-    ),
-    AccumulatorTestCase(
-        "bucket_missing_first",
-        docs=[{"x": 1}, {"v": 5}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$first": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": -1, "result": None}],
-        msg="$first via $bucket should return null when first doc has missing field",
-    ),
-    AccumulatorTestCase(
-        "bucket_string_first",
-        docs=[{"v": "hello"}, {"v": "world"}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$first": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": -1, "result": "hello"}],
-        msg="$first via $bucket should return first string value",
-    ),
-    AccumulatorTestCase(
-        "bucket_array_first",
-        docs=[{"v": [1, 2]}, {"v": [3, 4]}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$first": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": -1, "result": [1, 2]}],
-        msg="$first via $bucket should return first array value",
-    ),
-    AccumulatorTestCase(
-        "bucket_single_doc",
-        docs=[{"v": 42}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$first": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": -1, "result": 42}],
-        msg="$first via $bucket should handle single document",
-    ),
-    AccumulatorTestCase(
-        "bucket_nan_preserved",
-        docs=[{"v": FLOAT_NAN}, {"v": 5}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$first": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": -1, "result": pytest.approx(math.nan, nan_ok=True)}],
-        msg="$first via $bucket should preserve NaN as first value",
-    ),
-]
-
-
-@pytest.mark.parametrize("test_case", pytest_params(FIRST_BUCKET_SMOKE_TESTS))
-def test_accumulator_first_bucket(collection, test_case: AccumulatorTestCase):
-    """Test $first accumulator via $bucket for representative cases."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertSuccess(result, test_case.expected, msg=test_case.msg)
-
-
-# Property [BucketAuto Stage Smoke]: $first produces correct results
-# through $bucketAuto for representative cases.
-FIRST_BUCKET_AUTO_SMOKE_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "bucket_auto_basic_numeric",
-        docs=[{"v": 10}, {"v": 20}, {"v": 30}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$first": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": {"min": 0, "max": 0}, "result": 10}],
-        msg="$first via $bucketAuto should return first numeric value",
-    ),
-    AccumulatorTestCase(
-        "bucket_auto_null_first",
-        docs=[{"v": None}, {"v": 5}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$first": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": {"min": 0, "max": 0}, "result": None}],
-        msg="$first via $bucketAuto should return null when first doc is null",
-    ),
-    AccumulatorTestCase(
-        "bucket_auto_missing_first",
-        docs=[{"x": 1}, {"v": 5}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$first": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": {"min": 0, "max": 0}, "result": None}],
-        msg="$first via $bucketAuto should return null when first doc has missing field",
-    ),
-    AccumulatorTestCase(
-        "bucket_auto_string_first",
-        docs=[{"v": "hello"}, {"v": "world"}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$first": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": {"min": 0, "max": 0}, "result": "hello"}],
-        msg="$first via $bucketAuto should return first string value",
-    ),
-    AccumulatorTestCase(
-        "bucket_auto_array_first",
-        docs=[{"v": [1, 2]}, {"v": [3, 4]}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$first": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": {"min": 0, "max": 0}, "result": [1, 2]}],
-        msg="$first via $bucketAuto should return first array value",
-    ),
-    AccumulatorTestCase(
-        "bucket_auto_single_doc",
-        docs=[{"v": 42}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$first": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": {"min": 0, "max": 0}, "result": 42}],
-        msg="$first via $bucketAuto should handle single document",
-    ),
-    AccumulatorTestCase(
-        "bucket_auto_nan_preserved",
-        docs=[{"v": FLOAT_NAN}, {"v": 5}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$first": "$v"}},
-                }
-            }
-        ],
-        expected=[{"_id": {"min": 0, "max": 0}, "result": pytest.approx(math.nan, nan_ok=True)}],
-        msg="$first via $bucketAuto should preserve NaN as first value",
-    ),
-]
-
-
-@pytest.mark.parametrize("test_case", pytest_params(FIRST_BUCKET_AUTO_SMOKE_TESTS))
-def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCase):
-    """Test $first accumulator via $bucketAuto for representative cases."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertSuccess(result, test_case.expected, msg=test_case.msg)
-
-
-# Property [MinKey Serialization Divergence]: MinKey is wrapped in a
-# document when projected in $group/$bucket but returned directly in
-# $bucketAuto.
-FIRST_MINKEY_GROUP_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "minkey_group",
-        docs=[{"v": MinKey()}, {"v": 999}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$first": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": {"": MinKey()}}],
-        msg="$first should return MinKey wrapped in dict in $group",
-    ),
-]
-
-FIRST_MINKEY_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "minkey_bucket_auto",
-        docs=[{"v": MinKey()}, {"v": 999}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$first": "$v"}},
-                }
-            },
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": MinKey()}],
-        msg="$first should return MinKey directly in $bucketAuto",
-    ),
-]
-
-# Property [MaxKey Serialization Divergence]: MaxKey is wrapped in a
-# document when projected in $group/$bucket but returned directly in
-# $bucketAuto.
-FIRST_MAXKEY_GROUP_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "maxkey_group",
-        docs=[{"v": MaxKey()}, {"v": 999}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$first": "$v"}}},
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": {"": MaxKey()}}],
-        msg="$first should return MaxKey wrapped in dict in $group",
-    ),
-]
-
-FIRST_MAXKEY_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "maxkey_bucket_auto",
-        docs=[{"v": MaxKey()}, {"v": 999}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$first": "$v"}},
-                }
-            },
-            {"$project": {"_id": 0, "result": 1}},
-        ],
-        expected=[{"result": MaxKey()}],
-        msg="$first should return MaxKey directly in $bucketAuto",
-    ),
-]
-
-FIRST_STAGE_DIVERGENCE_SUCCESS_TESTS = (
-    FIRST_MINKEY_GROUP_TESTS
-    + FIRST_MINKEY_BUCKET_AUTO_TESTS
-    + FIRST_MAXKEY_GROUP_TESTS
-    + FIRST_MAXKEY_BUCKET_AUTO_TESTS
-)
-
-
-@pytest.mark.parametrize("test_case", pytest_params(FIRST_STAGE_DIVERGENCE_SUCCESS_TESTS))
-def test_accumulator_first_stage_divergence(collection, test_case: AccumulatorTestCase):
-    """Test $first cases where behavior differs between stages."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertSuccess(result, test_case.expected, msg=test_case.msg)
-
-
-# Property [Arity Across Stages]: arity rejection is consistent across $bucket and $bucketAuto.
-FIRST_ARITY_BUCKET_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "arity_empty_array_bucket",
-        docs=[{"v": 1}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$first": []}},
-                }
-            }
-        ],
-        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-        msg="$first should reject empty array in accumulator context ($bucket)",
-    ),
-    AccumulatorTestCase(
-        "arity_multi_element_bucket",
-        docs=[{"v": 1}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$first": [1, 2, 3]}},
-                }
-            }
-        ],
-        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-        msg="$first should reject multi-element array in accumulator context ($bucket)",
-    ),
-    AccumulatorTestCase(
-        "arity_multi_key_expression_bucket",
-        docs=[{"v": 1}],
-        pipeline=[
-            {
-                "$bucket": {
-                    "groupBy": {"$literal": 0},
-                    "boundaries": [-1, 1],
-                    "output": {"result": {"$first": {"$add": [1, 2], "$multiply": [3, 4]}}},
-                }
-            }
-        ],
-        error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
-        msg="$first should reject multi-key expression object ($bucket)",
-    ),
-]
-
-FIRST_ARITY_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "arity_empty_array_bucket_auto",
-        docs=[{"v": 1}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$first": []}},
-                }
-            }
-        ],
-        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-        msg="$first should reject empty array in accumulator context ($bucketAuto)",
-    ),
-    AccumulatorTestCase(
-        "arity_multi_element_bucket_auto",
-        docs=[{"v": 1}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$first": [1, 2, 3]}},
-                }
-            }
-        ],
-        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
-        msg="$first should reject multi-element array in accumulator context ($bucketAuto)",
-    ),
-    AccumulatorTestCase(
-        "arity_multi_key_expression_bucket_auto",
-        docs=[{"v": 1}],
-        pipeline=[
-            {
-                "$bucketAuto": {
-                    "groupBy": {"$literal": 0},
-                    "buckets": 1,
-                    "output": {"result": {"$first": {"$add": [1, 2], "$multiply": [3, 4]}}},
-                }
-            }
-        ],
-        error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
-        msg="$first should reject multi-key expression object ($bucketAuto)",
-    ),
-]
-
-FIRST_ARITY_ERROR_TESTS = (
-    FIRST_ARITY_GROUP_TESTS + FIRST_ARITY_BUCKET_TESTS + FIRST_ARITY_BUCKET_AUTO_TESTS
-)
-
-
-@pytest.mark.parametrize("test_case", pytest_params(FIRST_ARITY_ERROR_TESTS))
-def test_accumulator_first_arity_errors(collection, test_case):
-    """Test $first arity rejection across all three stages."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertFailureCode(result, test_case.error_code, msg=test_case.msg)
-
-
 # Property [Return Type]: $first preserves the BSON type of the returned
 # value, verified using $type projection.
 FIRST_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [

From 4d1685d6e5dac71e1d25d6e38c73d1de6df101a7 Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Thu, 21 May 2026 16:02:20 -0700
Subject: [PATCH 06/10] split into files

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../test_accumulator_first_null_missing.py    | 207 ++++++++++++++++
 ...rst.py => test_accumulator_first_types.py} | 222 +-----------------
 2 files changed, 220 insertions(+), 209 deletions(-)
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py
 rename documentdb_tests/compatibility/tests/core/operator/accumulators/first/{test_accumulator_first.py => test_accumulator_first_types.py} (69%)

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py
new file mode 100644
index 00000000..af62338b
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py
@@ -0,0 +1,207 @@
+"""Tests for $first accumulator null, missing, input form, and edge case behavior."""
+
+from __future__ import annotations
+
+import pytest
+
+from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
+    AccumulatorTestCase,
+)
+from documentdb_tests.framework.assertions import assertSuccess
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+# Property [Null and Missing NOT Excluded]: $first returns whatever the
+# first document has, including null and missing values.
+FIRST_NULL_MISSING_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "null_first_then_value",
+        docs=[{"v": None}, {"v": 5}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": None}],
+        msg="$first should return null when first doc has null (first wins)",
+    ),
+    AccumulatorTestCase(
+        "null_missing_first_then_value",
+        docs=[{"x": 1}, {"v": 5}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": None}],
+        msg="$first should return null when first doc has missing field",
+    ),
+    AccumulatorTestCase(
+        "null_value_first_then_null",
+        docs=[{"v": 5}, {"v": None}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": 5}],
+        msg="$first should return 5 when first doc has value, second is null",
+    ),
+    AccumulatorTestCase(
+        "null_value_first_then_missing",
+        docs=[{"v": 5}, {"x": 1}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": 5}],
+        msg="$first should return 5 when first doc has value, second is missing",
+    ),
+    AccumulatorTestCase(
+        "null_all",
+        docs=[{"v": None}, {"v": None}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": None}],
+        msg="$first should return null when all docs have null",
+    ),
+    AccumulatorTestCase(
+        "null_missing_all",
+        docs=[{"x": 1}, {"x": 2}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": None}],
+        msg="$first should return null when all docs have missing field",
+    ),
+    AccumulatorTestCase(
+        "null_and_missing_mixed",
+        docs=[{"v": None}, {"x": 1}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": None}],
+        msg="$first should return null when first is null and second is missing",
+    ),
+    AccumulatorTestCase(
+        "null_remove_first_then_value",
+        docs=[{"v": -1}, {"v": 5}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}},
+                }
+            }
+        ],
+        expected=[{"_id": None, "result": None}],
+        msg="$first should return null when first doc produces $$REMOVE",
+    ),
+    AccumulatorTestCase(
+        "null_remove_all",
+        docs=[{"v": -1}, {"v": -2}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}},
+                }
+            }
+        ],
+        expected=[{"_id": None, "result": None}],
+        msg="$first should return null when all docs produce $$REMOVE",
+    ),
+    AccumulatorTestCase(
+        "null_remove_second_value_first",
+        docs=[{"v": 5}, {"v": -1}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}},
+                }
+            }
+        ],
+        expected=[{"_id": None, "result": 5}],
+        msg="$first should return value when first doc has value, second $$REMOVE",
+    ),
+]
+
+# Property [Input Forms]: $first accumulator accepts various expression types as its operand.
+FIRST_INPUT_FORM_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "input_field_path",
+        docs=[{"v": 10}, {"v": 20}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": 10}],
+        msg="$first should accept a basic field path reference",
+    ),
+    AccumulatorTestCase(
+        "input_nested_field",
+        docs=[{"a": {"b": 10}}, {"a": {"b": 20}}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$a.b"}}}],
+        expected=[{"_id": None, "result": 10}],
+        msg="$first should accept a nested document field path",
+    ),
+    AccumulatorTestCase(
+        "input_literal",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": 42}}}],
+        expected=[{"_id": None, "result": 42}],
+        msg="$first with a literal constant should return that constant",
+    ),
+    AccumulatorTestCase(
+        "input_null_literal",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": None}}}],
+        expected=[{"_id": None, "result": None}],
+        msg="$first with null literal should return null",
+    ),
+]
+
+# Property [Edge Cases]: edge cases unique to the accumulator context.
+FIRST_EDGE_CASE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "edge_single_doc",
+        docs=[{"v": 42}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": 42}],
+        msg="$first of a single document should return that document's value",
+    ),
+    AccumulatorTestCase(
+        "edge_single_null_doc",
+        docs=[{"v": None}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": None}],
+        msg="$first of a single null document should return null",
+    ),
+    AccumulatorTestCase(
+        "edge_single_missing_doc",
+        docs=[{"x": 1}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": None}],
+        msg="$first of a single document with missing field should return null",
+    ),
+    AccumulatorTestCase(
+        "edge_many_docs",
+        docs=[{"v": i} for i in range(100)],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": 0}],
+        msg="$first should return first document's value (v=0) across 100 documents",
+    ),
+    AccumulatorTestCase(
+        "edge_empty_collection",
+        docs=None,
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[],
+        msg="$first on empty collection should return empty result",
+    ),
+    AccumulatorTestCase(
+        "edge_array_not_traversed",
+        docs=[{"v": [5, 1, 8]}, {"v": [3, 9, 2]}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[{"_id": None, "result": [5, 1, 8]}],
+        msg="$first should return array as whole value, not traverse it",
+    ),
+    AccumulatorTestCase(
+        "edge_literal_constant",
+        docs=[{"v": 1}, {"v": 2}, {"v": 3}],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": 42}}}],
+        expected=[{"_id": None, "result": 42}],
+        msg="$first with literal constant should always return that constant",
+    ),
+]
+
+FIRST_SUCCESS_TESTS = FIRST_NULL_MISSING_TESTS + FIRST_INPUT_FORM_TESTS + FIRST_EDGE_CASE_TESTS
+
+
+@pytest.mark.parametrize("test_case", pytest_params(FIRST_SUCCESS_TESTS))
+def test_accumulator_first_null_missing(collection, test_case: AccumulatorTestCase):
+    """Test $first accumulator null, missing, input form, and edge case behavior."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertSuccess(result, test_case.expected, msg=test_case.msg)
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py
similarity index 69%
rename from documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py
rename to documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py
index 714df0df..b7e086e8 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py
@@ -1,4 +1,4 @@
-"""Tests for $first accumulator in $group context."""
+"""Tests for $first accumulator BSON type preservation and type fidelity."""
 
 from __future__ import annotations
 
@@ -36,102 +36,6 @@
     FLOAT_NEGATIVE_INFINITY,
 )
 
-# Property [Null and Missing NOT Excluded]: $first returns whatever the
-# first document has, including null and missing values.
-FIRST_NULL_MISSING_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "null_first_then_value",
-        docs=[{"v": None}, {"v": 5}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": None}],
-        msg="$first should return null when first doc has null (first wins)",
-    ),
-    AccumulatorTestCase(
-        "null_missing_first_then_value",
-        docs=[{"x": 1}, {"v": 5}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": None}],
-        msg="$first should return null when first doc has missing field",
-    ),
-    AccumulatorTestCase(
-        "null_value_first_then_null",
-        docs=[{"v": 5}, {"v": None}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": 5}],
-        msg="$first should return 5 when first doc has value, second is null",
-    ),
-    AccumulatorTestCase(
-        "null_value_first_then_missing",
-        docs=[{"v": 5}, {"x": 1}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": 5}],
-        msg="$first should return 5 when first doc has value, second is missing",
-    ),
-    AccumulatorTestCase(
-        "null_all",
-        docs=[{"v": None}, {"v": None}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": None}],
-        msg="$first should return null when all docs have null",
-    ),
-    AccumulatorTestCase(
-        "null_missing_all",
-        docs=[{"x": 1}, {"x": 2}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": None}],
-        msg="$first should return null when all docs have missing field",
-    ),
-    AccumulatorTestCase(
-        "null_and_missing_mixed",
-        docs=[{"v": None}, {"x": 1}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": None}],
-        msg="$first should return null when first is null and second is missing",
-    ),
-    AccumulatorTestCase(
-        "null_remove_first_then_value",
-        docs=[{"v": -1}, {"v": 5}],
-        pipeline=[
-            {
-                "$group": {
-                    "_id": None,
-                    "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}},
-                }
-            }
-        ],
-        expected=[{"_id": None, "result": None}],
-        msg="$first should return null when first doc produces $$REMOVE",
-    ),
-    AccumulatorTestCase(
-        "null_remove_all",
-        docs=[{"v": -1}, {"v": -2}],
-        pipeline=[
-            {
-                "$group": {
-                    "_id": None,
-                    "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}},
-                }
-            }
-        ],
-        expected=[{"_id": None, "result": None}],
-        msg="$first should return null when all docs produce $$REMOVE",
-    ),
-    AccumulatorTestCase(
-        "null_remove_second_value_first",
-        docs=[{"v": 5}, {"v": -1}],
-        pipeline=[
-            {
-                "$group": {
-                    "_id": None,
-                    "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}},
-                }
-            }
-        ],
-        expected=[{"_id": None, "result": 5}],
-        msg="$first should return value when first doc has value, second $$REMOVE",
-    ),
-]
-
 # Property [BSON Type Preservation]: $first returns the first document's
 # value with its BSON type preserved exactly.
 FIRST_BSON_TYPE_TESTS: list[AccumulatorTestCase] = [
@@ -444,115 +348,6 @@
     ),
 ]
 
-# Property [Input Forms]: $first accumulator accepts various expression types as its operand.
-FIRST_INPUT_FORM_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "input_field_path",
-        docs=[{"v": 10}, {"v": 20}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": 10}],
-        msg="$first should accept a basic field path reference",
-    ),
-    AccumulatorTestCase(
-        "input_nested_field",
-        docs=[{"a": {"b": 10}}, {"a": {"b": 20}}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$a.b"}}}],
-        expected=[{"_id": None, "result": 10}],
-        msg="$first should accept a nested document field path",
-    ),
-    AccumulatorTestCase(
-        "input_literal",
-        docs=[{"v": 1}, {"v": 2}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": 42}}}],
-        expected=[{"_id": None, "result": 42}],
-        msg="$first with a literal constant should return that constant",
-    ),
-    AccumulatorTestCase(
-        "input_null_literal",
-        docs=[{"v": 1}, {"v": 2}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": None}}}],
-        expected=[{"_id": None, "result": None}],
-        msg="$first with null literal should return null",
-    ),
-]
-
-# Property [Edge Cases]: edge cases unique to the accumulator context.
-FIRST_EDGE_CASE_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "edge_single_doc",
-        docs=[{"v": 42}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": 42}],
-        msg="$first of a single document should return that document's value",
-    ),
-    AccumulatorTestCase(
-        "edge_single_null_doc",
-        docs=[{"v": None}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": None}],
-        msg="$first of a single null document should return null",
-    ),
-    AccumulatorTestCase(
-        "edge_single_missing_doc",
-        docs=[{"x": 1}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": None}],
-        msg="$first of a single document with missing field should return null",
-    ),
-    AccumulatorTestCase(
-        "edge_many_docs",
-        docs=[{"v": i} for i in range(100)],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": 0}],
-        msg="$first should return first document's value (v=0) across 100 documents",
-    ),
-    AccumulatorTestCase(
-        "edge_empty_collection",
-        docs=None,
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[],
-        msg="$first on empty collection should return empty result",
-    ),
-    AccumulatorTestCase(
-        "edge_array_not_traversed",
-        docs=[{"v": [5, 1, 8]}, {"v": [3, 9, 2]}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": [5, 1, 8]}],
-        msg="$first should return array as whole value, not traverse it",
-    ),
-    AccumulatorTestCase(
-        "edge_literal_constant",
-        docs=[{"v": 1}, {"v": 2}, {"v": 3}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": 42}}}],
-        expected=[{"_id": None, "result": 42}],
-        msg="$first with literal constant should always return that constant",
-    ),
-]
-
-FIRST_GROUP_SUCCESS_TESTS = (
-    FIRST_NULL_MISSING_TESTS
-    + FIRST_BSON_TYPE_TESTS
-    + FIRST_SPECIAL_NUMERIC_TESTS
-    + FIRST_DECIMAL_PRECISION_TESTS
-    + FIRST_TYPE_DISTINCTION_TESTS
-    + FIRST_MIXED_TYPE_TESTS
-    + FIRST_INPUT_FORM_TESTS
-    + FIRST_EDGE_CASE_TESTS
-)
-
-
-@pytest.mark.parametrize("test_case", pytest_params(FIRST_GROUP_SUCCESS_TESTS))
-def test_accumulator_first_group(collection, test_case: AccumulatorTestCase):
-    """Test $first accumulator success cases via $group."""
-    if test_case.docs:
-        collection.insert_many(test_case.docs)
-    result = execute_command(
-        collection,
-        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
-    )
-    assertSuccess(result, test_case.expected, msg=test_case.msg)
-
-
 # Property [Return Type]: $first preserves the BSON type of the returned
 # value, verified using $type projection.
 FIRST_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [
@@ -638,10 +433,19 @@ def test_accumulator_first_group(collection, test_case: AccumulatorTestCase):
     ),
 ]
 
+FIRST_TYPE_SUCCESS_TESTS = (
+    FIRST_BSON_TYPE_TESTS
+    + FIRST_SPECIAL_NUMERIC_TESTS
+    + FIRST_DECIMAL_PRECISION_TESTS
+    + FIRST_TYPE_DISTINCTION_TESTS
+    + FIRST_MIXED_TYPE_TESTS
+    + FIRST_RETURN_TYPE_TESTS
+)
+
 
-@pytest.mark.parametrize("test_case", pytest_params(FIRST_RETURN_TYPE_TESTS))
-def test_accumulator_first_return_type(collection, test_case: AccumulatorTestCase):
-    """Test $first return type verification."""
+@pytest.mark.parametrize("test_case", pytest_params(FIRST_TYPE_SUCCESS_TESTS))
+def test_accumulator_first_types(collection, test_case: AccumulatorTestCase):
+    """Test $first accumulator BSON type preservation and type fidelity."""
     if test_case.docs:
         collection.insert_many(test_case.docs)
     result = execute_command(

From b8cd95212b130bff8c45b0d8609419b3b8f149c9 Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Thu, 21 May 2026 16:08:36 -0700
Subject: [PATCH 07/10] remove duplicate tests

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../test_accumulator_first_null_missing.py    | 14 -------
 .../first/test_accumulator_first_types.py     | 41 -------------------
 2 files changed, 55 deletions(-)

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py
index af62338b..5fe5086e 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py
@@ -109,13 +109,6 @@
 
 # Property [Input Forms]: $first accumulator accepts various expression types as its operand.
 FIRST_INPUT_FORM_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "input_field_path",
-        docs=[{"v": 10}, {"v": 20}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": 10}],
-        msg="$first should accept a basic field path reference",
-    ),
     AccumulatorTestCase(
         "input_nested_field",
         docs=[{"a": {"b": 10}}, {"a": {"b": 20}}],
@@ -183,13 +176,6 @@
         expected=[{"_id": None, "result": [5, 1, 8]}],
         msg="$first should return array as whole value, not traverse it",
     ),
-    AccumulatorTestCase(
-        "edge_literal_constant",
-        docs=[{"v": 1}, {"v": 2}, {"v": 3}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": 42}}}],
-        expected=[{"_id": None, "result": 42}],
-        msg="$first with literal constant should always return that constant",
-    ),
 ]
 
 FIRST_SUCCESS_TESTS = FIRST_NULL_MISSING_TESTS + FIRST_INPUT_FORM_TESTS + FIRST_EDGE_CASE_TESTS
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py
index b7e086e8..b385e89e 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py
@@ -275,46 +275,6 @@
     ),
 ]
 
-# Property [No Coercion]: $first preserves BSON type distinctions without
-# coercing similar-looking values.
-FIRST_TYPE_DISTINCTION_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "distinct_false_not_zero",
-        docs=[{"v": False}, {"v": 999}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": False}],
-        msg="$first should return False, not coerce to 0",
-    ),
-    AccumulatorTestCase(
-        "distinct_true_not_one",
-        docs=[{"v": True}, {"v": 999}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": True}],
-        msg="$first should return True, not coerce to 1",
-    ),
-    AccumulatorTestCase(
-        "distinct_zero_not_false",
-        docs=[{"v": 0}, {"v": 999}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": 0}],
-        msg="$first should return int32(0), not coerce to False",
-    ),
-    AccumulatorTestCase(
-        "distinct_empty_string",
-        docs=[{"v": ""}, {"v": 999}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": ""}],
-        msg="$first should return empty string, not coerce to null",
-    ),
-    AccumulatorTestCase(
-        "distinct_string_number",
-        docs=[{"v": "123"}, {"v": 999}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": "123"}],
-        msg="$first should return string '123', not coerce to int",
-    ),
-]
-
 # Property [Position-Based]: $first picks the first document's value
 # regardless of what other documents contain.
 FIRST_MIXED_TYPE_TESTS: list[AccumulatorTestCase] = [
@@ -437,7 +397,6 @@
     FIRST_BSON_TYPE_TESTS
     + FIRST_SPECIAL_NUMERIC_TESTS
     + FIRST_DECIMAL_PRECISION_TESTS
-    + FIRST_TYPE_DISTINCTION_TESTS
     + FIRST_MIXED_TYPE_TESTS
     + FIRST_RETURN_TYPE_TESTS
 )

From c32a862b670da72580e92f6be92ce474558dd93b Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Thu, 21 May 2026 16:13:44 -0700
Subject: [PATCH 08/10] rename unclear tests and remove unrelated tests

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../test_accumulator_first_null_missing.py    | 87 +------------------
 .../first/test_accumulator_first_types.py     | 86 ------------------
 2 files changed, 3 insertions(+), 170 deletions(-)

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py
index 5fe5086e..98de6e4b 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py
@@ -1,4 +1,4 @@
-"""Tests for $first accumulator null, missing, input form, and edge case behavior."""
+"""Tests for $first accumulator null, missing, and edge case behavior."""
 
 from __future__ import annotations
 
@@ -63,73 +63,6 @@
         expected=[{"_id": None, "result": None}],
         msg="$first should return null when first is null and second is missing",
     ),
-    AccumulatorTestCase(
-        "null_remove_first_then_value",
-        docs=[{"v": -1}, {"v": 5}],
-        pipeline=[
-            {
-                "$group": {
-                    "_id": None,
-                    "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}},
-                }
-            }
-        ],
-        expected=[{"_id": None, "result": None}],
-        msg="$first should return null when first doc produces $$REMOVE",
-    ),
-    AccumulatorTestCase(
-        "null_remove_all",
-        docs=[{"v": -1}, {"v": -2}],
-        pipeline=[
-            {
-                "$group": {
-                    "_id": None,
-                    "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}},
-                }
-            }
-        ],
-        expected=[{"_id": None, "result": None}],
-        msg="$first should return null when all docs produce $$REMOVE",
-    ),
-    AccumulatorTestCase(
-        "null_remove_second_value_first",
-        docs=[{"v": 5}, {"v": -1}],
-        pipeline=[
-            {
-                "$group": {
-                    "_id": None,
-                    "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}},
-                }
-            }
-        ],
-        expected=[{"_id": None, "result": 5}],
-        msg="$first should return value when first doc has value, second $$REMOVE",
-    ),
-]
-
-# Property [Input Forms]: $first accumulator accepts various expression types as its operand.
-FIRST_INPUT_FORM_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "input_nested_field",
-        docs=[{"a": {"b": 10}}, {"a": {"b": 20}}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$a.b"}}}],
-        expected=[{"_id": None, "result": 10}],
-        msg="$first should accept a nested document field path",
-    ),
-    AccumulatorTestCase(
-        "input_literal",
-        docs=[{"v": 1}, {"v": 2}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": 42}}}],
-        expected=[{"_id": None, "result": 42}],
-        msg="$first with a literal constant should return that constant",
-    ),
-    AccumulatorTestCase(
-        "input_null_literal",
-        docs=[{"v": 1}, {"v": 2}],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": None}}}],
-        expected=[{"_id": None, "result": None}],
-        msg="$first with null literal should return null",
-    ),
 ]
 
 # Property [Edge Cases]: edge cases unique to the accumulator context.
@@ -155,20 +88,6 @@
         expected=[{"_id": None, "result": None}],
         msg="$first of a single document with missing field should return null",
     ),
-    AccumulatorTestCase(
-        "edge_many_docs",
-        docs=[{"v": i} for i in range(100)],
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[{"_id": None, "result": 0}],
-        msg="$first should return first document's value (v=0) across 100 documents",
-    ),
-    AccumulatorTestCase(
-        "edge_empty_collection",
-        docs=None,
-        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
-        expected=[],
-        msg="$first on empty collection should return empty result",
-    ),
     AccumulatorTestCase(
         "edge_array_not_traversed",
         docs=[{"v": [5, 1, 8]}, {"v": [3, 9, 2]}],
@@ -178,12 +97,12 @@
     ),
 ]
 
-FIRST_SUCCESS_TESTS = FIRST_NULL_MISSING_TESTS + FIRST_INPUT_FORM_TESTS + FIRST_EDGE_CASE_TESTS
+FIRST_SUCCESS_TESTS = FIRST_NULL_MISSING_TESTS + FIRST_EDGE_CASE_TESTS
 
 
 @pytest.mark.parametrize("test_case", pytest_params(FIRST_SUCCESS_TESTS))
 def test_accumulator_first_null_missing(collection, test_case: AccumulatorTestCase):
-    """Test $first accumulator null, missing, input form, and edge case behavior."""
+    """Test $first accumulator null, missing, and edge case behavior."""
     if test_case.docs:
         collection.insert_many(test_case.docs)
     result = execute_command(
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py
index b385e89e..2682c5f6 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py
@@ -308,97 +308,11 @@
     ),
 ]
 
-# Property [Return Type]: $first preserves the BSON type of the returned
-# value, verified using $type projection.
-FIRST_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [
-    AccumulatorTestCase(
-        "return_type_int32",
-        docs=[{"v": 42}, {"v": 999}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$first": "$v"}}},
-            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
-        ],
-        expected=[{"value": 42, "type": "int"}],
-        msg="$first of int32 should return type 'int'",
-    ),
-    AccumulatorTestCase(
-        "return_type_int64",
-        docs=[{"v": Int64(42)}, {"v": 999}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$first": "$v"}}},
-            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
-        ],
-        expected=[{"value": Int64(42), "type": "long"}],
-        msg="$first of Int64 should return type 'long'",
-    ),
-    AccumulatorTestCase(
-        "return_type_double",
-        docs=[{"v": 3.14}, {"v": 999}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$first": "$v"}}},
-            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
-        ],
-        expected=[{"value": 3.14, "type": "double"}],
-        msg="$first of double should return type 'double'",
-    ),
-    AccumulatorTestCase(
-        "return_type_decimal",
-        docs=[{"v": Decimal128("3.14")}, {"v": 999}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$first": "$v"}}},
-            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
-        ],
-        expected=[{"value": Decimal128("3.14"), "type": "decimal"}],
-        msg="$first of Decimal128 should return type 'decimal'",
-    ),
-    AccumulatorTestCase(
-        "return_type_string",
-        docs=[{"v": "hello"}, {"v": 999}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$first": "$v"}}},
-            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
-        ],
-        expected=[{"value": "hello", "type": "string"}],
-        msg="$first of string should return type 'string'",
-    ),
-    AccumulatorTestCase(
-        "return_type_boolean",
-        docs=[{"v": True}, {"v": 999}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$first": "$v"}}},
-            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
-        ],
-        expected=[{"value": True, "type": "bool"}],
-        msg="$first of boolean should return type 'bool'",
-    ),
-    AccumulatorTestCase(
-        "return_type_date",
-        docs=[{"v": datetime(2023, 6, 15, tzinfo=timezone.utc)}, {"v": 999}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$first": "$v"}}},
-            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
-        ],
-        expected=[{"value": datetime(2023, 6, 15, tzinfo=timezone.utc), "type": "date"}],
-        msg="$first of datetime should return type 'date'",
-    ),
-    AccumulatorTestCase(
-        "return_type_null",
-        docs=[{"v": None}, {"v": 999}],
-        pipeline=[
-            {"$group": {"_id": None, "result": {"$first": "$v"}}},
-            {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}},
-        ],
-        expected=[{"value": None, "type": "null"}],
-        msg="$first of null should return type 'null'",
-    ),
-]
-
 FIRST_TYPE_SUCCESS_TESTS = (
     FIRST_BSON_TYPE_TESTS
     + FIRST_SPECIAL_NUMERIC_TESTS
     + FIRST_DECIMAL_PRECISION_TESTS
     + FIRST_MIXED_TYPE_TESTS
-    + FIRST_RETURN_TYPE_TESTS
 )
 
 

From a4e29655293e411d71f5f3647dea732161c6feb1 Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Thu, 21 May 2026 16:46:46 -0700
Subject: [PATCH 09/10] add initial integration tests

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../test_accumulators_first_integration.py    | 483 ++++++++++++++++++
 1 file changed, 483 insertions(+)
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_first_integration.py

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_first_integration.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_first_integration.py
new file mode 100644
index 00000000..3ee006e5
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_first_integration.py
@@ -0,0 +1,483 @@
+"""Tests for $first accumulator composed with sibling accumulators in the same $group."""
+
+from __future__ import annotations
+
+import pytest
+from bson import Decimal128, Int64
+
+from documentdb_tests.compatibility.tests.core.operator.accumulators.utils.accumulator_test_case import (  # noqa: E501
+    AccumulatorTestCase,
+)
+from documentdb_tests.framework.assertions import assertResult
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+# Property [First with Last]: $first and $last coexist in the same $group,
+# picking the first and last values respectively.  A preceding $sort
+# establishes deterministic order.
+FIRST_WITH_LAST_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "first_last_sorted_asc",
+        docs=[
+            {"cat": "a", "v": 30},
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 20},
+        ],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_v": {"$first": "$v"},
+                    "last_v": {"$last": "$v"},
+                }
+            },
+        ],
+        expected=[{"_id": "a", "first_v": 10, "last_v": 30}],
+        msg="$first should pick smallest and $last should pick largest after ascending sort",
+    ),
+    AccumulatorTestCase(
+        "first_last_sorted_desc",
+        docs=[
+            {"cat": "a", "v": 30},
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 20},
+        ],
+        pipeline=[
+            {"$sort": {"v": -1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_v": {"$first": "$v"},
+                    "last_v": {"$last": "$v"},
+                }
+            },
+        ],
+        expected=[{"_id": "a", "first_v": 30, "last_v": 10}],
+        msg="$first should pick largest and $last should pick smallest after descending sort",
+    ),
+    AccumulatorTestCase(
+        "first_last_multiple_groups",
+        docs=[
+            {"cat": "a", "v": 5},
+            {"cat": "a", "v": 15},
+            {"cat": "b", "v": 100},
+            {"cat": "b", "v": 200},
+            {"cat": "b", "v": 300},
+        ],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_v": {"$first": "$v"},
+                    "last_v": {"$last": "$v"},
+                }
+            },
+        ],
+        expected=[
+            {"_id": "a", "first_v": 5, "last_v": 15},
+            {"_id": "b", "first_v": 100, "last_v": 300},
+        ],
+        msg="$first and $last should work independently across multiple groups",
+    ),
+    AccumulatorTestCase(
+        "first_last_null_first_doc",
+        docs=[
+            {"cat": "a", "v": None},
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 20},
+        ],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_v": {"$first": "$v"},
+                    "last_v": {"$last": "$v"},
+                }
+            },
+        ],
+        expected=[{"_id": "a", "first_v": None, "last_v": 20}],
+        msg="$first should return null (null sorts first) while $last returns 20",
+    ),
+]
+
+# Property [First with Min/Max]: $first is position-based while $min/$max
+# are value-based.  The same data can produce different $first results
+# depending on sort order, but $min/$max are always the same.
+FIRST_WITH_MIN_MAX_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "first_min_max_sorted_asc",
+        docs=[
+            {"cat": "a", "v": 30},
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 20},
+        ],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_v": {"$first": "$v"},
+                    "lo": {"$min": "$v"},
+                    "hi": {"$max": "$v"},
+                }
+            },
+        ],
+        expected=[{"_id": "a", "first_v": 10, "lo": 10, "hi": 30}],
+        msg="$first equals $min after ascending sort; $max is independent",
+    ),
+    AccumulatorTestCase(
+        "first_min_max_sorted_desc",
+        docs=[
+            {"cat": "a", "v": 30},
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 20},
+        ],
+        pipeline=[
+            {"$sort": {"v": -1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_v": {"$first": "$v"},
+                    "lo": {"$min": "$v"},
+                    "hi": {"$max": "$v"},
+                }
+            },
+        ],
+        expected=[{"_id": "a", "first_v": 30, "lo": 10, "hi": 30}],
+        msg="$first equals $max after descending sort; $min/$max unchanged",
+    ),
+    AccumulatorTestCase(
+        "first_min_max_null_divergence",
+        docs=[
+            {"cat": "a", "v": None},
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 5},
+        ],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_v": {"$first": "$v"},
+                    "lo": {"$min": "$v"},
+                    "hi": {"$max": "$v"},
+                }
+            },
+        ],
+        expected=[{"_id": "a", "first_v": None, "lo": 5, "hi": 10}],
+        msg="$first returns null (includes it) while $min/$max ignore null",
+    ),
+]
+
+# Property [First with Sum/Avg]: $first picks one value, $sum/$avg
+# aggregate all.  Null divergence: $first returns null when it's in the
+# first position; $sum treats null as 0; $avg excludes null from count.
+FIRST_WITH_SUM_AVG_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "first_sum_avg_basic",
+        docs=[
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 20},
+            {"cat": "a", "v": 30},
+        ],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_v": {"$first": "$v"},
+                    "total": {"$sum": "$v"},
+                    "mean": {"$avg": "$v"},
+                }
+            },
+        ],
+        expected=[{"_id": "a", "first_v": 10, "total": 60, "mean": 20.0}],
+        msg="$first picks 10 while $sum and $avg compute over all values",
+    ),
+    AccumulatorTestCase(
+        "first_sum_avg_null_first_doc",
+        docs=[
+            {"cat": "a", "v": None},
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 20},
+        ],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_v": {"$first": "$v"},
+                    "total": {"$sum": "$v"},
+                    "mean": {"$avg": "$v"},
+                }
+            },
+        ],
+        expected=[{"_id": "a", "first_v": None, "total": 30, "mean": 15.0}],
+        msg="$first returns null; $sum ignores null (30); $avg ignores null (15.0)",
+    ),
+    AccumulatorTestCase(
+        "first_sum_avg_all_null",
+        docs=[
+            {"cat": "a", "v": None},
+            {"cat": "a", "v": None},
+        ],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_v": {"$first": "$v"},
+                    "total": {"$sum": "$v"},
+                    "mean": {"$avg": "$v"},
+                }
+            }
+        ],
+        expected=[{"_id": "a", "first_v": None, "total": 0, "mean": None}],
+        msg="$first returns null; $sum returns 0; $avg returns null when all null",
+    ),
+]
+
+# Property [First with Count]: $first picks one value while $count counts
+# all documents in the group.
+FIRST_WITH_COUNT_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "first_count_basic",
+        docs=[
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 20},
+            {"cat": "b", "v": 5},
+        ],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_v": {"$first": "$v"},
+                    "n": {"$sum": 1},
+                }
+            },
+        ],
+        expected=[
+            {"_id": "a", "first_v": 10, "n": 2},
+            {"_id": "b", "first_v": 5, "n": 1},
+        ],
+        msg="$first picks one value while $sum(1) counts all docs per group",
+    ),
+    AccumulatorTestCase(
+        "first_count_null_counted",
+        docs=[
+            {"cat": "a", "v": None},
+            {"cat": "a", "v": 10},
+        ],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_v": {"$first": "$v"},
+                    "n": {"$sum": 1},
+                }
+            },
+        ],
+        expected=[{"_id": "a", "first_v": None, "n": 2}],
+        msg="$first returns null; $sum(1) still counts the null doc",
+    ),
+]
+
+# Property [First with Push/AddToSet]: $first picks one value while $push
+# collects all values and $addToSet collects unique values.
+FIRST_WITH_PUSH_ADDTOSET_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "first_push_addtoset",
+        docs=[
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": 20},
+            {"cat": "a", "v": 10},
+        ],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_v": {"$first": "$v"},
+                    "all_vals": {"$push": "$v"},
+                    "unique_vals": {"$addToSet": "$v"},
+                }
+            },
+        ],
+        expected=[
+            {"_id": "a", "first_v": 10, "all_vals": [10, 10, 20], "unique_vals": [10, 20]},
+        ],
+        msg="$first picks 10 while $push collects all and $addToSet collects unique",
+    ),
+    AccumulatorTestCase(
+        "first_push_null_handling",
+        docs=[
+            {"cat": "a", "v": None},
+            {"cat": "a", "v": 10},
+        ],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_v": {"$first": "$v"},
+                    "all_vals": {"$push": "$v"},
+                }
+            },
+        ],
+        expected=[
+            {"_id": "a", "first_v": None, "all_vals": [None, 10]},
+        ],
+        msg="$first returns null; $push includes null in the collected array",
+    ),
+]
+
+# Property [First with MergeObjects]: $first picks one scalar value while
+# $mergeObjects combines per-document subdocuments into one merged object.
+FIRST_WITH_MERGE_OBJECTS_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "first_merge_objects",
+        docs=[
+            {"cat": "a", "v": 10, "meta": {"src": "x"}},
+            {"cat": "a", "v": 20, "meta": {"quality": "high"}},
+        ],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_v": {"$first": "$v"},
+                    "merged": {"$mergeObjects": "$meta"},
+                }
+            },
+        ],
+        expected=[
+            {"_id": "a", "first_v": 10, "merged": {"src": "x", "quality": "high"}},
+        ],
+        msg="$first picks 10 while $mergeObjects combines all metadata objects",
+    ),
+]
+
+# Property [Multiple First]: multiple $first accumulators in the same $group
+# independently pick the first value from different fields.
+MULTIPLE_FIRST_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "multiple_first_different_fields",
+        docs=[
+            {"cat": "a", "name": "alice", "score": 85},
+            {"cat": "a", "name": "bob", "score": 92},
+            {"cat": "b", "name": "carol", "score": 78},
+        ],
+        pipeline=[
+            {"$sort": {"score": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_name": {"$first": "$name"},
+                    "first_score": {"$first": "$score"},
+                }
+            },
+        ],
+        expected=[
+            {"_id": "a", "first_name": "alice", "first_score": 85},
+            {"_id": "b", "first_name": "carol", "first_score": 78},
+        ],
+        msg="Multiple $first accumulators should independently pick first from each field",
+    ),
+    AccumulatorTestCase(
+        "multiple_first_one_missing",
+        docs=[
+            {"cat": "a", "score": 85},
+            {"cat": "a", "name": "bob", "score": 92},
+        ],
+        pipeline=[
+            {"$sort": {"score": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_name": {"$first": "$name"},
+                    "first_score": {"$first": "$score"},
+                }
+            },
+        ],
+        expected=[{"_id": "a", "first_name": None, "first_score": 85}],
+        msg="$first returns null for missing field while sibling $first returns value",
+    ),
+]
+
+# Property [First Type Preservation with Sibling]: $first preserves the BSON
+# type of the first document's value, even when sibling accumulators promote
+# types (e.g. $sum promoting int32+Decimal128 to Decimal128).
+FIRST_TYPE_PRESERVATION_WITH_SIBLING_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "first_int32_with_sum_decimal128",
+        docs=[
+            {"cat": "a", "v": 10},
+            {"cat": "a", "v": Decimal128("20.5")},
+        ],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_v": {"$first": "$v"},
+                    "total": {"$sum": "$v"},
+                }
+            },
+        ],
+        expected=[{"_id": "a", "first_v": 10, "total": Decimal128("30.5")}],
+        msg="$first preserves int32 while $sum promotes to Decimal128",
+    ),
+    AccumulatorTestCase(
+        "first_int64_with_sum_double",
+        docs=[
+            {"cat": "a", "v": Int64(100)},
+            {"cat": "a", "v": 2.5},
+        ],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": "$cat",
+                    "first_v": {"$first": "$v"},
+                    "total": {"$sum": "$v"},
+                }
+            },
+        ],
+        expected=[{"_id": "a", "first_v": 2.5, "total": 102.5}],
+        msg="$first preserves double (2.5 sorts first) while $sum promotes to double",
+    ),
+]
+
+FIRST_INTEGRATION_TESTS = (
+    FIRST_WITH_LAST_TESTS
+    + FIRST_WITH_MIN_MAX_TESTS
+    + FIRST_WITH_SUM_AVG_TESTS
+    + FIRST_WITH_COUNT_TESTS
+    + FIRST_WITH_PUSH_ADDTOSET_TESTS
+    + FIRST_WITH_MERGE_OBJECTS_TESTS
+    + MULTIPLE_FIRST_TESTS
+    + FIRST_TYPE_PRESERVATION_WITH_SIBLING_TESTS
+)
+
+
+@pytest.mark.parametrize("test_case", pytest_params(FIRST_INTEGRATION_TESTS))
+def test_accumulators_first_integration(collection, test_case: AccumulatorTestCase):
+    """Test $first accumulator composed with sibling accumulators in the same $group."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline or [], "cursor": {}},
+    )
+    assertResult(
+        result,
+        expected=test_case.expected,
+        error_code=test_case.error_code,
+        msg=test_case.msg,
+        ignore_doc_order=True,
+        ignore_order_in=["unique_vals"],
+    )

From 5cdbefe8fcd0ac1da7ec9a1e1d37b0ad8a49fc4e Mon Sep 17 00:00:00 2001
From: "Alina (Xi) Li" <Alina.Li@improving.com>
Date: Wed, 27 May 2026 15:15:04 -0700
Subject: [PATCH 10/10] Address comments

Add tests: arity tests, BSON constant tests, expression tests, expression error propogation, empty-group behavior, and order dependence tests

Signed-off-by: Alina (Xi) Li <Alina.Li@improving.com>
---
 .../first/test_accumulator_first_errors.py    | 117 ++++++++
 .../test_accumulator_first_null_missing.py    |  27 ++
 .../first/test_accumulator_first_types.py     | 280 ++++++++++++++++++
 3 files changed, 424 insertions(+)
 create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_errors.py

diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_errors.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_errors.py
new file mode 100644
index 00000000..c102ad9b
--- /dev/null
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_errors.py
@@ -0,0 +1,117 @@
+"""Tests for $first accumulator error cases: arity rejection and expression error propagation."""
+
+from __future__ import annotations
+
+import pytest
+
+from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import (
+    AccumulatorTestCase,
+)
+from documentdb_tests.framework.assertions import assertFailureCode
+from documentdb_tests.framework.error_codes import (
+    CONVERSION_FAILURE_ERROR,
+    DIVIDE_BY_ZERO_V2_ERROR,
+    EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
+    GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+)
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+# Property [Arity]: $first in accumulator context is a unary operator and
+# rejects array syntax.
+FIRST_ARITY_ERROR_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "arity_empty_array",
+        docs=[{"v": 1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": []}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$first should reject empty array in accumulator context",
+    ),
+    AccumulatorTestCase(
+        "arity_single_element_array",
+        docs=[{"v": 1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": [1]}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$first should reject single-element literal array in accumulator context",
+    ),
+    AccumulatorTestCase(
+        "arity_single_field_ref_array",
+        docs=[{"v": 1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": ["$v"]}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$first should reject single field ref in array in accumulator context",
+    ),
+    AccumulatorTestCase(
+        "arity_multi_element_array",
+        docs=[{"v": 1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": [1, 2, 3]}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR,
+        msg="$first should reject multi-element array in accumulator context",
+    ),
+    AccumulatorTestCase(
+        "arity_multi_key_expression_object",
+        docs=[{"v": 1}],
+        pipeline=[
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$first": {"$add": [1, 2], "$multiply": [3, 4]}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR,
+        msg="$first should reject multi-key expression object",
+    ),
+]
+
+# Property [Expression Error Propagation]: errors raised during sub-expression
+# evaluation propagate through the accumulator without being caught.
+FIRST_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "expr_error_divide_by_zero",
+        docs=[{"v": 1}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": {"$divide": ["$v", 0]}}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        error_code=DIVIDE_BY_ZERO_V2_ERROR,
+        msg="$first should propagate $divide by zero error",
+    ),
+    AccumulatorTestCase(
+        "expr_error_to_int_invalid_string",
+        docs=[{"v": "abc"}],
+        pipeline=[
+            {"$group": {"_id": None, "result": {"$first": {"$toInt": "$v"}}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        error_code=CONVERSION_FAILURE_ERROR,
+        msg="$first should propagate $toInt conversion error from expression",
+    ),
+]
+
+FIRST_ERROR_TESTS = FIRST_ARITY_ERROR_TESTS + FIRST_EXPRESSION_ERROR_TESTS
+
+
+@pytest.mark.parametrize("test_case", pytest_params(FIRST_ERROR_TESTS))
+def test_accumulator_first_errors(collection, test_case):
+    """Test $first accumulator error cases."""
+    if test_case.docs:
+        collection.insert_many(test_case.docs)
+    result = execute_command(
+        collection,
+        {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}},
+    )
+    assertFailureCode(result, test_case.error_code, msg=test_case.msg)
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py
index 98de6e4b..d2e2b9e8 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py
@@ -95,6 +95,33 @@
         expected=[{"_id": None, "result": [5, 1, 8]}],
         msg="$first should return array as whole value, not traverse it",
     ),
+    AccumulatorTestCase(
+        "edge_empty_collection",
+        docs=[],
+        pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}],
+        expected=[],
+        msg="$first on empty collection should produce no groups (empty result)",
+    ),
+    AccumulatorTestCase(
+        "edge_order_dependent_asc",
+        docs=[{"v": 3}, {"v": 1}, {"v": 5}, {"v": 2}, {"v": 4}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {"$group": {"_id": None, "result": {"$first": "$v"}}},
+        ],
+        expected=[{"_id": None, "result": 1}],
+        msg="$first with ascending sort should return smallest value",
+    ),
+    AccumulatorTestCase(
+        "edge_order_dependent_desc",
+        docs=[{"v": 3}, {"v": 1}, {"v": 5}, {"v": 2}, {"v": 4}],
+        pipeline=[
+            {"$sort": {"v": -1}},
+            {"$group": {"_id": None, "result": {"$first": "$v"}}},
+        ],
+        expected=[{"_id": None, "result": 5}],
+        msg="$first with descending sort should return largest value",
+    ),
 ]
 
 FIRST_SUCCESS_TESTS = FIRST_NULL_MISSING_TESTS + FIRST_EDGE_CASE_TESTS
diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py
index 2682c5f6..df4fa9f9 100644
--- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py
+++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py
@@ -10,6 +10,8 @@
     Binary,
     Decimal128,
     Int64,
+    MaxKey,
+    MinKey,
     ObjectId,
     Regex,
     Timestamp,
@@ -308,11 +310,289 @@
     ),
 ]
 
+# ---------------------------------------------------------------------------
+# Property [BSON Constant Arguments]: $first accepts BSON constants as the
+# accumulator argument (not field references). The constant is returned for
+# every document, so the "first" value is that constant.
+# ---------------------------------------------------------------------------
+FIRST_BSON_CONSTANT_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "const_true",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {"$group": {"_id": None, "result": {"$first": True}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": True}],
+        msg="$first with boolean True constant should return True",
+    ),
+    AccumulatorTestCase(
+        "const_false",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {"$group": {"_id": None, "result": {"$first": False}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": False}],
+        msg="$first with boolean False constant should return False",
+    ),
+    AccumulatorTestCase(
+        "const_int64",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {"$group": {"_id": None, "result": {"$first": Int64(42)}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": Int64(42)}],
+        msg="$first with Int64 constant should return that Int64 value",
+    ),
+    AccumulatorTestCase(
+        "const_double",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {"$group": {"_id": None, "result": {"$first": 3.14}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": 3.14}],
+        msg="$first with double constant should return that double value",
+    ),
+    AccumulatorTestCase(
+        "const_decimal128",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {"$group": {"_id": None, "result": {"$first": Decimal128("3.14")}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": Decimal128("3.14")}],
+        msg="$first with Decimal128 constant should return that Decimal128 value",
+    ),
+    AccumulatorTestCase(
+        "const_string",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {"$group": {"_id": None, "result": {"$first": "hello"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": "hello"}],
+        msg="$first with string constant (no $) should return that string",
+    ),
+    AccumulatorTestCase(
+        "const_binary",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {"$group": {"_id": None, "result": {"$first": Binary(b"\x01\x02")}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": b"\x01\x02"}],
+        msg="$first with Binary constant should return that Binary value",
+    ),
+    AccumulatorTestCase(
+        "const_objectid",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$first": ObjectId("000000000000000000000000")},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": ObjectId("000000000000000000000000")}],
+        msg="$first with ObjectId constant should return that ObjectId",
+    ),
+    AccumulatorTestCase(
+        "const_datetime",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$first": datetime(2020, 1, 1, tzinfo=timezone.utc)},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": datetime(2020, 1, 1, tzinfo=timezone.utc)}],
+        msg="$first with datetime constant should return that datetime",
+    ),
+    AccumulatorTestCase(
+        "const_timestamp",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {"$group": {"_id": None, "result": {"$first": Timestamp(1, 1)}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": Timestamp(1, 1)}],
+        msg="$first with Timestamp constant should return that Timestamp",
+    ),
+    AccumulatorTestCase(
+        "const_regex",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {"$group": {"_id": None, "result": {"$first": Regex("abc", "i")}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": Regex("abc", "i")}],
+        msg="$first with Regex constant should return that Regex",
+    ),
+    AccumulatorTestCase(
+        "const_null",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {"$group": {"_id": None, "result": {"$first": None}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": None}],
+        msg="$first with null constant should return null",
+    ),
+    AccumulatorTestCase(
+        "const_minkey",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {"$group": {"_id": None, "result": {"$first": MinKey()}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": {"": MinKey()}}],
+        msg="$first with MinKey constant should return MinKey wrapped in document",
+    ),
+    AccumulatorTestCase(
+        "const_maxkey",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {"$group": {"_id": None, "result": {"$first": MaxKey()}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": {"": MaxKey()}}],
+        msg="$first with MaxKey constant should return MaxKey wrapped in document",
+    ),
+]
+
+# ---------------------------------------------------------------------------
+# Property [Expression Types]: $first accepts various expression types as
+# its operand and evaluates them per document before picking the first.
+# ---------------------------------------------------------------------------
+FIRST_EXPRESSION_TYPE_TESTS: list[AccumulatorTestCase] = [
+    AccumulatorTestCase(
+        "expr_operator_single",
+        docs=[{"v": -10}, {"v": 20}, {"v": -5}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {"$group": {"_id": None, "result": {"$first": {"$abs": "$v"}}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": 10}],
+        msg="$first should accept single-input expression operator",
+    ),
+    AccumulatorTestCase(
+        "expr_operator_multi_arg",
+        docs=[{"v": -10, "w": 3}, {"v": 20, "w": 7}, {"v": -5, "w": 1}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$first": {"$add": ["$v", "$w"]}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": -7}],
+        msg="$first should accept a multi-arg expression operator",
+    ),
+    AccumulatorTestCase(
+        "expr_nested",
+        docs=[{"v": -10}, {"v": 20}, {"v": -5}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$first": {"$add": [1, {"$abs": "$v"}]}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": 11}],
+        msg="$first should accept nested expression operators",
+    ),
+    AccumulatorTestCase(
+        "expr_sysvar_remove",
+        docs=[{"v": 1}, {"v": 2}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {"$group": {"_id": None, "result": {"$first": "$$REMOVE"}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": None}],
+        msg="$first with $$REMOVE should treat value as missing and return null",
+    ),
+    AccumulatorTestCase(
+        "expr_object_expression",
+        docs=[{"v": 10}, {"v": 20}, {"v": 5}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {"$group": {"_id": None, "result": {"$first": {"a": "$v"}}}},
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": {"a": 5}}],
+        msg="$first should accept an object expression",
+    ),
+    AccumulatorTestCase(
+        "expr_object_with_operator",
+        docs=[{"v": -10}, {"v": 20}, {"v": -5}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$first": {"a": {"$abs": "$v"}}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": {"a": 10}}],
+        msg="$first should accept an object expression containing an operator",
+    ),
+    AccumulatorTestCase(
+        "expr_let",
+        docs=[{"v": 10}, {"v": 20}, {"v": 5}],
+        pipeline=[
+            {"$sort": {"v": 1}},
+            {
+                "$group": {
+                    "_id": None,
+                    "result": {"$first": {"$let": {"vars": {"x": "$v"}, "in": "$$x"}}},
+                }
+            },
+            {"$project": {"_id": 0, "result": 1}},
+        ],
+        expected=[{"result": 5}],
+        msg="$first should accept a $let expression as its operand",
+    ),
+]
+
 FIRST_TYPE_SUCCESS_TESTS = (
     FIRST_BSON_TYPE_TESTS
     + FIRST_SPECIAL_NUMERIC_TESTS
     + FIRST_DECIMAL_PRECISION_TESTS
     + FIRST_MIXED_TYPE_TESTS
+    + FIRST_BSON_CONSTANT_TESTS
+    + FIRST_EXPRESSION_TYPE_TESTS
 )