From ce426a8f7476b8e68544ce861e101620ff1e8203 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Fri, 15 May 2026 15:18:53 -0700 Subject: [PATCH 01/10] generated tests Signed-off-by: Alina (Xi) Li --- .../first/test_accumulator_first.py | 1205 +++++++++++++++++ 1 file changed, 1205 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py new file mode 100644 index 00000000..3da60355 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py @@ -0,0 +1,1205 @@ +"""Tests for $first accumulator in $group, $bucket, and $bucketAuto contexts.""" + +from __future__ import annotations + +import math +from datetime import datetime, timezone +from typing import Any + +import pytest +from bson import ( + Binary, + Code, + Decimal128, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + CONVERSION_FAILURE_ERROR, + DIVIDE_BY_ZERO_V2_ERROR, + EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, + GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + MODULO_BY_ZERO_V2_ERROR, + MODULO_ZERO_REMAINDER_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_LARGE_EXPONENT, + DECIMAL128_MIN_POSITIVE, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + DECIMAL128_NEGATIVE_NAN, + DECIMAL128_NEGATIVE_ZERO, + DECIMAL128_ZERO, + DOUBLE_NEGATIVE_ZERO, + DOUBLE_ZERO, + FLOAT_INFINITY, + FLOAT_NAN, + FLOAT_NEGATIVE_INFINITY, + FLOAT_NEGATIVE_NAN, +) + +# =========================================================================== +# Pipeline Helpers +# =========================================================================== + + +def _group_first(accumulator: Any) -> list[dict[str, Any]]: + """Build a $group pipeline that computes $first.""" + return [ + {"$group": {"_id": None, "result": {"$first": accumulator}}}, + {"$project": {"_id": 0, "result": 1}}, + ] + + +def _bucket_first(accumulator: Any) -> list[dict[str, Any]]: + """Build a $bucket pipeline that computes $first.""" + return [ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$first": accumulator}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ] + + +def _bucket_auto_first(accumulator: Any) -> list[dict[str, Any]]: + """Build a $bucketAuto pipeline that computes $first.""" + return [ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$first": accumulator}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ] + + +def _group_first_with_type(accumulator: Any) -> list[dict[str, Any]]: + """Build a $group pipeline that computes $first with $type projection.""" + return [ + {"$group": {"_id": None, "result": {"$first": accumulator}}}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ] + + +def _run(collection, test_case: AccumulatorTestCase): + """Insert docs and run the test case pipeline.""" + if test_case.docs: + collection.insert_many(test_case.docs) + return execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + + +# =========================================================================== +# 1. Null and Missing Handling ($group primary) +# =========================================================================== + +# Property [Null and Missing NOT Excluded]: $first returns whatever the first +# document has. Unlike $min/$max, null and missing are NOT excluded -- they +# are returned as the result if they are the first value. +FIRST_NULL_MISSING_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "null_first_then_value", + docs=[{"v": None}, {"v": 5}], + pipeline=_group_first("$v"), + expected=[{"result": None}], + msg="$first should return null when first doc has null (first wins)", + ), + AccumulatorTestCase( + "null_missing_first_then_value", + docs=[{"x": 1}, {"v": 5}], + pipeline=_group_first("$v"), + expected=[{"result": None}], + msg="$first should return null when first doc has missing field", + ), + AccumulatorTestCase( + "null_value_first_then_null", + docs=[{"v": 5}, {"v": None}], + pipeline=_group_first("$v"), + expected=[{"result": 5}], + msg="$first should return 5 when first doc has value, second is null", + ), + AccumulatorTestCase( + "null_value_first_then_missing", + docs=[{"v": 5}, {"x": 1}], + pipeline=_group_first("$v"), + expected=[{"result": 5}], + msg="$first should return 5 when first doc has value, second is missing", + ), + AccumulatorTestCase( + "null_all", + docs=[{"v": None}, {"v": None}], + pipeline=_group_first("$v"), + expected=[{"result": None}], + msg="$first should return null when all docs have null", + ), + AccumulatorTestCase( + "null_missing_all", + docs=[{"x": 1}, {"x": 2}], + pipeline=_group_first("$v"), + expected=[{"result": None}], + msg="$first should return null when all docs have missing field", + ), + AccumulatorTestCase( + "null_and_missing_mixed", + docs=[{"v": None}, {"x": 1}], + pipeline=_group_first("$v"), + expected=[{"result": None}], + msg="$first should return null when first is null and second is missing", + ), + AccumulatorTestCase( + "null_remove_first_then_value", + docs=[{"v": -1}, {"v": 5}], + pipeline=_group_first({"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}), + expected=[{"result": None}], + msg="$first should return null when first doc produces $$REMOVE", + ), + AccumulatorTestCase( + "null_remove_all", + docs=[{"v": -1}, {"v": -2}], + pipeline=_group_first({"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}), + expected=[{"result": None}], + msg="$first should return null when all docs produce $$REMOVE", + ), + AccumulatorTestCase( + "null_remove_second_value_first", + docs=[{"v": 5}, {"v": -1}], + pipeline=_group_first({"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}), + expected=[{"result": 5}], + msg="$first should return value when first doc has value, second $$REMOVE", + ), +] + + +# =========================================================================== +# 2. BSON Type Preservation ($group primary) +# =========================================================================== + +# Property [BSON Type Preservation]: $first returns the first document's value +# with its BSON type preserved exactly. No coercion, no comparison, no type +# promotion. +FIRST_BSON_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "type_int32", + docs=[{"v": 42}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": 42}], + msg="$first should preserve int32 type", + ), + AccumulatorTestCase( + "type_int64", + docs=[{"v": Int64(42)}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": Int64(42)}], + msg="$first should preserve Int64 type", + ), + AccumulatorTestCase( + "type_double", + docs=[{"v": 3.14}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": 3.14}], + msg="$first should preserve double type", + ), + AccumulatorTestCase( + "type_decimal128", + docs=[{"v": Decimal128("3.14")}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": Decimal128("3.14")}], + msg="$first should preserve Decimal128 type", + ), + AccumulatorTestCase( + "type_string", + docs=[{"v": "hello"}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": "hello"}], + msg="$first should preserve string type", + ), + AccumulatorTestCase( + "type_bool_true", + docs=[{"v": True}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": True}], + msg="$first should preserve boolean True", + ), + AccumulatorTestCase( + "type_bool_false", + docs=[{"v": False}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": False}], + msg="$first should preserve boolean False", + ), + AccumulatorTestCase( + "type_null", + docs=[{"v": None}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": None}], + msg="$first should preserve null value", + ), + AccumulatorTestCase( + "type_embedded_doc", + docs=[{"v": {"a": 1, "b": 2}}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": {"a": 1, "b": 2}}], + msg="$first should preserve embedded document", + ), + AccumulatorTestCase( + "type_empty_doc", + docs=[{"v": {}}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": {}}], + msg="$first should preserve empty document", + ), + AccumulatorTestCase( + "type_array", + docs=[{"v": [1, 2, 3]}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": [1, 2, 3]}], + msg="$first should preserve array value", + ), + AccumulatorTestCase( + "type_empty_array", + docs=[{"v": []}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": []}], + msg="$first should preserve empty array", + ), + AccumulatorTestCase( + "type_binary", + docs=[{"v": Binary(b"\x01\x02")}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": b"\x01\x02"}], + msg="$first should preserve Binary value", + ), + AccumulatorTestCase( + "type_binary_custom_subtype", + docs=[{"v": Binary(b"\x01", 5)}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": Binary(b"\x01", 5)}], + msg="$first should preserve Binary with custom subtype", + ), + AccumulatorTestCase( + "type_objectid", + docs=[{"v": ObjectId("000000000000000000000001")}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": ObjectId("000000000000000000000001")}], + msg="$first should preserve ObjectId value", + ), + AccumulatorTestCase( + "type_datetime", + docs=[{"v": datetime(2023, 6, 15, tzinfo=timezone.utc)}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": datetime(2023, 6, 15, tzinfo=timezone.utc)}], + msg="$first should preserve datetime value", + ), + AccumulatorTestCase( + "type_timestamp", + docs=[{"v": Timestamp(100, 1)}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": Timestamp(100, 1)}], + msg="$first should preserve Timestamp value", + ), + AccumulatorTestCase( + "type_regex", + docs=[{"v": Regex("abc", "i")}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": Regex("abc", "i")}], + msg="$first should preserve Regex value", + ), +] + + +# =========================================================================== +# 3. Special Numeric Value Preservation ($group primary) +# =========================================================================== + +# Property [Special Numeric Preservation]: $first passes through values +# without comparison or reduction. Special numeric values must be preserved +# exactly as stored in the first document. +FIRST_SPECIAL_NUMERIC_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "special_float_nan", + docs=[{"v": FLOAT_NAN}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": pytest.approx(math.nan, nan_ok=True)}], + msg="$first should preserve float NaN", + ), + AccumulatorTestCase( + "special_float_neg_zero", + docs=[{"v": DOUBLE_NEGATIVE_ZERO}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": DOUBLE_NEGATIVE_ZERO}], + msg="$first should preserve double -0.0", + ), + AccumulatorTestCase( + "special_float_inf", + docs=[{"v": FLOAT_INFINITY}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": FLOAT_INFINITY}], + msg="$first should preserve float Infinity", + ), + AccumulatorTestCase( + "special_float_neg_inf", + docs=[{"v": FLOAT_NEGATIVE_INFINITY}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": FLOAT_NEGATIVE_INFINITY}], + msg="$first should preserve float -Infinity", + ), + AccumulatorTestCase( + "special_decimal_nan", + docs=[{"v": DECIMAL128_NAN}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": DECIMAL128_NAN}], + msg="$first should preserve Decimal128 NaN", + ), + AccumulatorTestCase( + "special_decimal_neg_nan", + docs=[{"v": DECIMAL128_NEGATIVE_NAN}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": DECIMAL128_NEGATIVE_NAN}], + msg="$first should preserve Decimal128 -NaN", + ), + AccumulatorTestCase( + "special_decimal_neg_zero", + docs=[{"v": DECIMAL128_NEGATIVE_ZERO}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": DECIMAL128_NEGATIVE_ZERO}], + msg="$first should preserve Decimal128 -0", + ), + AccumulatorTestCase( + "special_decimal_inf", + docs=[{"v": DECIMAL128_INFINITY}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": DECIMAL128_INFINITY}], + msg="$first should preserve Decimal128 Infinity", + ), + AccumulatorTestCase( + "special_decimal_neg_inf", + docs=[{"v": DECIMAL128_NEGATIVE_INFINITY}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": DECIMAL128_NEGATIVE_INFINITY}], + msg="$first should preserve Decimal128 -Infinity", + ), +] + + +# =========================================================================== +# 4. Decimal128 Precision Preservation ($group primary) +# =========================================================================== + +# Property [Decimal128 Precision]: $first must pass through Decimal128 values +# without modifying precision, trailing zeros, or exponent representation. +FIRST_DECIMAL_PRECISION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "decimal_high_precision", + docs=[{"v": Decimal128("1.234567890123456789012345678901234")}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": Decimal128("1.234567890123456789012345678901234")}], + msg="$first should preserve 34-digit Decimal128 precision", + ), + AccumulatorTestCase( + "decimal_trailing_zeros", + docs=[{"v": Decimal128("1.00")}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": Decimal128("1.00")}], + msg="$first should preserve trailing zeros in Decimal128", + ), + AccumulatorTestCase( + "decimal_large_exponent", + docs=[{"v": DECIMAL128_LARGE_EXPONENT}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": DECIMAL128_LARGE_EXPONENT}], + msg="$first should preserve Decimal128 with large exponent", + ), + AccumulatorTestCase( + "decimal_small_positive", + docs=[{"v": DECIMAL128_MIN_POSITIVE}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": DECIMAL128_MIN_POSITIVE}], + msg="$first should preserve smallest positive Decimal128", + ), + AccumulatorTestCase( + "decimal_zero", + docs=[{"v": DECIMAL128_ZERO}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": DECIMAL128_ZERO}], + msg="$first should preserve Decimal128 zero", + ), + AccumulatorTestCase( + "decimal_negative_zero", + docs=[{"v": DECIMAL128_NEGATIVE_ZERO}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": DECIMAL128_NEGATIVE_ZERO}], + msg="$first should preserve Decimal128 negative zero", + ), +] + + +# =========================================================================== +# 5. BSON Type Distinction (No Coercion) ($group primary) +# =========================================================================== + +# Property [No Coercion]: $first preserves BSON type distinctions. Values +# that look similar but are different BSON types are NOT coerced. +FIRST_TYPE_DISTINCTION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "distinct_false_not_zero", + docs=[{"v": False}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": False}], + msg="$first should return False, not coerce to 0", + ), + AccumulatorTestCase( + "distinct_true_not_one", + docs=[{"v": True}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": True}], + msg="$first should return True, not coerce to 1", + ), + AccumulatorTestCase( + "distinct_zero_not_false", + docs=[{"v": 0}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": 0}], + msg="$first should return int32(0), not coerce to False", + ), + AccumulatorTestCase( + "distinct_empty_string", + docs=[{"v": ""}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": ""}], + msg="$first should return empty string, not coerce to null", + ), + AccumulatorTestCase( + "distinct_string_number", + docs=[{"v": "123"}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": "123"}], + msg="$first should return string '123', not coerce to int", + ), +] + + +# =========================================================================== +# 6. Mixed Type Documents ($group primary) +# =========================================================================== + +# Property [Position-Based]: $first picks the first document's value +# regardless of what other documents contain. Unlike $min/$max, there is no +# type comparison across documents. +FIRST_MIXED_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "mixed_int_then_string", + docs=[{"v": 42}, {"v": "hello"}], + pipeline=_group_first("$v"), + expected=[{"result": 42}], + msg="$first should return int when first doc is int, second is string", + ), + AccumulatorTestCase( + "mixed_string_then_int", + docs=[{"v": "hello"}, {"v": 42}], + pipeline=_group_first("$v"), + expected=[{"result": "hello"}], + msg="$first should return string when first doc is string, second is int", + ), + AccumulatorTestCase( + "mixed_bool_then_number", + docs=[{"v": True}, {"v": 42}], + pipeline=_group_first("$v"), + expected=[{"result": True}], + msg="$first should return True when first doc is bool, second is int", + ), + AccumulatorTestCase( + "mixed_array_then_scalar", + docs=[{"v": [1, 2, 3]}, {"v": 42}], + pipeline=_group_first("$v"), + expected=[{"result": [1, 2, 3]}], + msg="$first should return array when first doc is array, second is scalar", + ), + AccumulatorTestCase( + "mixed_null_then_value", + docs=[{"v": None}, {"v": 5}], + pipeline=_group_first("$v"), + expected=[{"result": None}], + msg="$first should return null when first doc is null, second has value", + ), + AccumulatorTestCase( + "mixed_value_then_null", + docs=[{"v": 5}, {"v": None}], + pipeline=_group_first("$v"), + expected=[{"result": 5}], + msg="$first should return value when first doc has value, second is null", + ), +] + + +# =========================================================================== +# 7. Return Type Verification ($group primary) +# =========================================================================== + +# Property [Return Type]: $first preserves the BSON type of the returned +# value. Verified using $type in a subsequent $project stage. +FIRST_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "return_type_int32", + docs=[{"v": 42}, {"v": 999}], + pipeline=_group_first_with_type("$v"), + expected=[{"value": 42, "type": "int"}], + msg="$first of int32 should return type 'int'", + ), + AccumulatorTestCase( + "return_type_int64", + docs=[{"v": Int64(42)}, {"v": 999}], + pipeline=_group_first_with_type("$v"), + expected=[{"value": Int64(42), "type": "long"}], + msg="$first of Int64 should return type 'long'", + ), + AccumulatorTestCase( + "return_type_double", + docs=[{"v": 3.14}, {"v": 999}], + pipeline=_group_first_with_type("$v"), + expected=[{"value": 3.14, "type": "double"}], + msg="$first of double should return type 'double'", + ), + AccumulatorTestCase( + "return_type_decimal", + docs=[{"v": Decimal128("3.14")}, {"v": 999}], + pipeline=_group_first_with_type("$v"), + expected=[{"value": Decimal128("3.14"), "type": "decimal"}], + msg="$first of Decimal128 should return type 'decimal'", + ), + AccumulatorTestCase( + "return_type_string", + docs=[{"v": "hello"}, {"v": 999}], + pipeline=_group_first_with_type("$v"), + expected=[{"value": "hello", "type": "string"}], + msg="$first of string should return type 'string'", + ), + AccumulatorTestCase( + "return_type_boolean", + docs=[{"v": True}, {"v": 999}], + pipeline=_group_first_with_type("$v"), + expected=[{"value": True, "type": "bool"}], + msg="$first of boolean should return type 'bool'", + ), + AccumulatorTestCase( + "return_type_date", + docs=[{"v": datetime(2023, 6, 15, tzinfo=timezone.utc)}, {"v": 999}], + pipeline=_group_first_with_type("$v"), + expected=[{"value": datetime(2023, 6, 15, tzinfo=timezone.utc), "type": "date"}], + msg="$first of datetime should return type 'date'", + ), + AccumulatorTestCase( + "return_type_null", + docs=[{"v": None}, {"v": 999}], + pipeline=_group_first_with_type("$v"), + expected=[{"value": None, "type": "null"}], + msg="$first of null should return type 'null'", + ), +] + + +# =========================================================================== +# 8. Expression Argument Tests ($group primary) +# =========================================================================== + +# Property [Input Forms]: $first accumulator accepts various expression types +# as its operand. +FIRST_INPUT_FORM_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "input_field_path", + docs=[{"v": 10}, {"v": 20}], + pipeline=_group_first("$v"), + expected=[{"result": 10}], + msg="$first should accept a basic field path reference", + ), + AccumulatorTestCase( + "input_nested_field", + docs=[{"a": {"b": 10}}, {"a": {"b": 20}}], + pipeline=_group_first("$a.b"), + expected=[{"result": 10}], + msg="$first should accept a nested document field path", + ), + AccumulatorTestCase( + "input_literal", + docs=[{"v": 1}, {"v": 2}], + pipeline=_group_first(42), + expected=[{"result": 42}], + msg="$first with a literal constant should return that constant", + ), + AccumulatorTestCase( + "input_expression", + docs=[{"price": 10, "qty": 2}, {"price": 5, "qty": 10}], + pipeline=_group_first({"$multiply": ["$price", "$qty"]}), + expected=[{"result": 20}], + msg="$first should accept a computed expression as operand", + ), + AccumulatorTestCase( + "input_cond_remove", + docs=[{"v": -1}, {"v": 5}], + pipeline=_group_first({"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}), + expected=[{"result": None}], + msg="$first should accept conditional with $$REMOVE as operand", + ), + AccumulatorTestCase( + "input_null_literal", + docs=[{"v": 1}, {"v": 2}], + pipeline=_group_first(None), + expected=[{"result": None}], + msg="$first with null literal should return null", + ), +] + + +# =========================================================================== +# 9. Arity Rejection ($group primary) +# =========================================================================== + +# Property [Arity]: $first in accumulator context is a unary operator and +# rejects array syntax. +FIRST_ARITY_GROUP_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "arity_empty_array_group", + docs=[{"v": 1}], + pipeline=_group_first([]), + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$first should reject empty array in accumulator context ($group)", + ), + AccumulatorTestCase( + "arity_single_element_group", + docs=[{"v": 1}], + pipeline=_group_first([1]), + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$first should reject single-element array in accumulator context ($group)", + ), + AccumulatorTestCase( + "arity_single_field_ref_group", + docs=[{"v": 1}], + pipeline=_group_first(["$v"]), + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$first should reject single field ref in array in accumulator context ($group)", + ), + AccumulatorTestCase( + "arity_multi_element_group", + docs=[{"v": 1}], + pipeline=_group_first([1, 2, 3]), + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$first should reject multi-element array in accumulator context ($group)", + ), + AccumulatorTestCase( + "arity_multi_key_expression_group", + docs=[{"v": 1}], + pipeline=_group_first({"$add": [1, 2], "$multiply": [3, 4]}), + error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, + msg="$first should reject multi-key expression object ($group)", + ), +] + + +# =========================================================================== +# 10. Expression Error Propagation ($group primary) +# =========================================================================== + +# Property [Expression Error Propagation]: errors in sub-expressions used as +# $first operand propagate as errors. +FIRST_EXPRESSION_ERROR_GROUP_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "error_toInt_invalid_group", + docs=[{"v": "not_a_number"}], + pipeline=_group_first({"$toInt": "$v"}), + error_code=CONVERSION_FAILURE_ERROR, + msg="$first should propagate conversion error from $toInt sub-expression in $group", + ), + AccumulatorTestCase( + "error_divide_by_zero_group", + docs=[{"v": 10}], + pipeline=_group_first({"$divide": ["$v", 0]}), + error_code=DIVIDE_BY_ZERO_V2_ERROR, + msg="$first should propagate divide-by-zero error in $group", + ), + AccumulatorTestCase( + "error_mod_by_zero_group", + docs=[{"v": 10}], + pipeline=_group_first({"$mod": ["$v", 0]}), + error_code=MODULO_BY_ZERO_V2_ERROR, + msg="$first should propagate mod-by-zero error in $group", + ), +] + + +# =========================================================================== +# 11. Accumulator-Specific Edge Cases ($group primary) +# =========================================================================== + +# Property [Edge Cases]: edge cases unique to the accumulator context. +FIRST_EDGE_CASE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "edge_single_doc", + docs=[{"v": 42}], + pipeline=_group_first("$v"), + expected=[{"result": 42}], + msg="$first of a single document should return that document's value", + ), + AccumulatorTestCase( + "edge_single_null_doc", + docs=[{"v": None}], + pipeline=_group_first("$v"), + expected=[{"result": None}], + msg="$first of a single null document should return null", + ), + AccumulatorTestCase( + "edge_single_missing_doc", + docs=[{"x": 1}], + pipeline=_group_first("$v"), + expected=[{"result": None}], + msg="$first of a single document with missing field should return null", + ), + AccumulatorTestCase( + "edge_many_docs", + docs=[{"v": i} for i in range(100)], + pipeline=_group_first("$v"), + expected=[{"result": 0}], + msg="$first should return first document's value (v=0) across 100 documents", + ), + AccumulatorTestCase( + "edge_empty_collection", + docs=None, + pipeline=_group_first("$v"), + expected=[], + msg="$first on empty collection should return empty result", + ), + AccumulatorTestCase( + "edge_array_not_traversed", + docs=[{"v": [5, 1, 8]}, {"v": [3, 9, 2]}], + pipeline=_group_first("$v"), + expected=[{"result": [5, 1, 8]}], + msg="$first should return array as whole value, not traverse it", + ), + AccumulatorTestCase( + "edge_literal_constant", + docs=[{"v": 1}, {"v": 2}, {"v": 3}], + pipeline=_group_first(42), + expected=[{"result": 42}], + msg="$first with literal constant should always return that constant", + ), +] + + +# =========================================================================== +# Combine all $group primary success tests +# =========================================================================== + +FIRST_GROUP_SUCCESS_TESTS = ( + FIRST_NULL_MISSING_TESTS + + FIRST_BSON_TYPE_TESTS + + FIRST_SPECIAL_NUMERIC_TESTS + + FIRST_DECIMAL_PRECISION_TESTS + + FIRST_TYPE_DISTINCTION_TESTS + + FIRST_MIXED_TYPE_TESTS + + FIRST_RETURN_TYPE_TESTS + + FIRST_INPUT_FORM_TESTS + + FIRST_EDGE_CASE_TESTS +) + + +# =========================================================================== +# $group primary test function +# =========================================================================== + + +@pytest.mark.parametrize("test_case", pytest_params(FIRST_GROUP_SUCCESS_TESTS)) +def test_accumulator_first_group(collection, test_case: AccumulatorTestCase): + """Test $first accumulator success cases via $group.""" + result = _run(collection, test_case) + assertSuccess(result, test_case.expected, msg=test_case.msg) + + +# =========================================================================== +# 12a. $bucket Smoke Tests +# =========================================================================== + +# Property [Bucket Stage Smoke]: $first produces correct results through +# $bucket for representative cases. +FIRST_BUCKET_SMOKE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "bucket_basic_numeric", + docs=[{"v": 10}, {"v": 20}, {"v": 30}], + pipeline=_bucket_first("$v"), + expected=[{"result": 10}], + msg="$first via $bucket should return first numeric value", + ), + AccumulatorTestCase( + "bucket_null_first", + docs=[{"v": None}, {"v": 5}], + pipeline=_bucket_first("$v"), + expected=[{"result": None}], + msg="$first via $bucket should return null when first doc is null", + ), + AccumulatorTestCase( + "bucket_missing_first", + docs=[{"x": 1}, {"v": 5}], + pipeline=_bucket_first("$v"), + expected=[{"result": None}], + msg="$first via $bucket should return null when first doc has missing field", + ), + AccumulatorTestCase( + "bucket_string_first", + docs=[{"v": "hello"}, {"v": "world"}], + pipeline=_bucket_first("$v"), + expected=[{"result": "hello"}], + msg="$first via $bucket should return first string value", + ), + AccumulatorTestCase( + "bucket_array_first", + docs=[{"v": [1, 2]}, {"v": [3, 4]}], + pipeline=_bucket_first("$v"), + expected=[{"result": [1, 2]}], + msg="$first via $bucket should return first array value", + ), + AccumulatorTestCase( + "bucket_single_doc", + docs=[{"v": 42}], + pipeline=_bucket_first("$v"), + expected=[{"result": 42}], + msg="$first via $bucket should handle single document", + ), + AccumulatorTestCase( + "bucket_nan_preserved", + docs=[{"v": FLOAT_NAN}, {"v": 5}], + pipeline=_bucket_first("$v"), + expected=[{"result": pytest.approx(math.nan, nan_ok=True)}], + msg="$first via $bucket should preserve NaN as first value", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(FIRST_BUCKET_SMOKE_TESTS)) +def test_accumulator_first_bucket(collection, test_case: AccumulatorTestCase): + """Test $first accumulator via $bucket for representative cases.""" + result = _run(collection, test_case) + assertSuccess(result, test_case.expected, msg=test_case.msg) + + +# =========================================================================== +# 12b. $bucketAuto Smoke Tests +# =========================================================================== + +# Property [BucketAuto Stage Smoke]: $first produces correct results through +# $bucketAuto for representative cases. +FIRST_BUCKET_AUTO_SMOKE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "bucket_auto_basic_numeric", + docs=[{"v": 10}, {"v": 20}, {"v": 30}], + pipeline=_bucket_auto_first("$v"), + expected=[{"result": 10}], + msg="$first via $bucketAuto should return first numeric value", + ), + AccumulatorTestCase( + "bucket_auto_null_first", + docs=[{"v": None}, {"v": 5}], + pipeline=_bucket_auto_first("$v"), + expected=[{"result": None}], + msg="$first via $bucketAuto should return null when first doc is null", + ), + AccumulatorTestCase( + "bucket_auto_missing_first", + docs=[{"x": 1}, {"v": 5}], + pipeline=_bucket_auto_first("$v"), + expected=[{"result": None}], + msg="$first via $bucketAuto should return null when first doc has missing field", + ), + AccumulatorTestCase( + "bucket_auto_string_first", + docs=[{"v": "hello"}, {"v": "world"}], + pipeline=_bucket_auto_first("$v"), + expected=[{"result": "hello"}], + msg="$first via $bucketAuto should return first string value", + ), + AccumulatorTestCase( + "bucket_auto_array_first", + docs=[{"v": [1, 2]}, {"v": [3, 4]}], + pipeline=_bucket_auto_first("$v"), + expected=[{"result": [1, 2]}], + msg="$first via $bucketAuto should return first array value", + ), + AccumulatorTestCase( + "bucket_auto_single_doc", + docs=[{"v": 42}], + pipeline=_bucket_auto_first("$v"), + expected=[{"result": 42}], + msg="$first via $bucketAuto should handle single document", + ), + AccumulatorTestCase( + "bucket_auto_nan_preserved", + docs=[{"v": FLOAT_NAN}, {"v": 5}], + pipeline=_bucket_auto_first("$v"), + expected=[{"result": pytest.approx(math.nan, nan_ok=True)}], + msg="$first via $bucketAuto should preserve NaN as first value", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(FIRST_BUCKET_AUTO_SMOKE_TESTS)) +def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCase): + """Test $first accumulator via $bucketAuto for representative cases.""" + result = _run(collection, test_case) + assertSuccess(result, test_case.expected, msg=test_case.msg) + + +# =========================================================================== +# 12c. Stage-Specific Behavior Tests (divergence between stages) +# =========================================================================== + +# --------------------------------------------------------------------------- +# 12c-i. BSON Type Serialization Divergence +# --------------------------------------------------------------------------- + +# Property [Code Serialization Divergence]: Code without scope is returned as +# str in $group/$bucket but as Code object in $bucketAuto. +FIRST_CODE_GROUP_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "code_without_scope_group", + docs=[{"v": Code("abc")}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": "abc"}], + msg="$first should return Code without scope as str in $group", + ), +] + +FIRST_CODE_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "code_without_scope_bucket_auto", + docs=[{"v": Code("abc")}, {"v": 999}], + pipeline=_bucket_auto_first("$v"), + expected=[{"result": Code("abc", None)}], + msg="$first should return Code without scope as Code object in $bucketAuto", + ), +] + +# Property [MinKey Serialization Divergence]: MinKey is wrapped in a document +# in $group/$bucket but returned directly in $bucketAuto. +FIRST_MINKEY_GROUP_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "minkey_group", + docs=[{"v": MinKey()}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": {"": MinKey()}}], + msg="$first should return MinKey wrapped in dict in $group", + ), +] + +FIRST_MINKEY_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "minkey_bucket_auto", + docs=[{"v": MinKey()}, {"v": 999}], + pipeline=_bucket_auto_first("$v"), + expected=[{"result": MinKey()}], + msg="$first should return MinKey directly in $bucketAuto", + ), +] + +# Property [MaxKey Serialization Divergence]: MaxKey is wrapped in a document +# in $group/$bucket but returned directly in $bucketAuto. +FIRST_MAXKEY_GROUP_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "maxkey_group", + docs=[{"v": MaxKey()}, {"v": 999}], + pipeline=_group_first("$v"), + expected=[{"result": {"": MaxKey()}}], + msg="$first should return MaxKey wrapped in dict in $group", + ), +] + +FIRST_MAXKEY_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "maxkey_bucket_auto", + docs=[{"v": MaxKey()}, {"v": 999}], + pipeline=_bucket_auto_first("$v"), + expected=[{"result": MaxKey()}], + msg="$first should return MaxKey directly in $bucketAuto", + ), +] + +# --------------------------------------------------------------------------- +# 12c-ii. Expression Error Code Divergence +# --------------------------------------------------------------------------- + +# Property [Error Code Divergence]: $group/$bucket and $bucketAuto use +# different error codes for divide-by-zero and mod-by-zero. +FIRST_ERROR_BUCKET_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "error_toInt_invalid_bucket", + docs=[{"v": "not_a_number"}], + pipeline=_bucket_first({"$toInt": "$v"}), + error_code=CONVERSION_FAILURE_ERROR, + msg="$first should propagate conversion error from $toInt in $bucket", + ), + AccumulatorTestCase( + "error_divide_by_zero_bucket", + docs=[{"v": 10}], + pipeline=_bucket_first({"$divide": ["$v", 0]}), + error_code=DIVIDE_BY_ZERO_V2_ERROR, + msg="$first should propagate divide-by-zero error in $bucket", + ), + AccumulatorTestCase( + "error_mod_by_zero_bucket", + docs=[{"v": 10}], + pipeline=_bucket_first({"$mod": ["$v", 0]}), + error_code=MODULO_BY_ZERO_V2_ERROR, + msg="$first should propagate mod-by-zero error in $bucket", + ), +] + +FIRST_ERROR_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "error_toInt_invalid_bucket_auto", + docs=[{"v": "not_a_number"}], + pipeline=_bucket_auto_first({"$toInt": "$v"}), + error_code=CONVERSION_FAILURE_ERROR, + msg="$first should propagate conversion error from $toInt in $bucketAuto", + ), + AccumulatorTestCase( + "error_divide_by_zero_bucket_auto", + docs=[{"v": 10}], + pipeline=_bucket_auto_first({"$divide": ["$v", 0]}), + error_code=BAD_VALUE_ERROR, + msg="$first should propagate divide-by-zero in $bucketAuto (wrapped as BAD_VALUE)", + ), + AccumulatorTestCase( + "error_mod_by_zero_bucket_auto", + docs=[{"v": 10}], + pipeline=_bucket_auto_first({"$mod": ["$v", 0]}), + error_code=MODULO_ZERO_REMAINDER_ERROR, + msg="$first should propagate mod-by-zero in $bucketAuto (wrapped as 16610)", + ), +] + +# --------------------------------------------------------------------------- +# 12c-iii. Arity Rejection Across Stages +# --------------------------------------------------------------------------- + +# Property [Arity Across Stages]: arity rejection is consistent across all +# three stages. +FIRST_ARITY_BUCKET_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "arity_empty_array_bucket", + docs=[{"v": 1}], + pipeline=_bucket_first([]), + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$first should reject empty array in accumulator context ($bucket)", + ), + AccumulatorTestCase( + "arity_multi_element_bucket", + docs=[{"v": 1}], + pipeline=_bucket_first([1, 2, 3]), + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$first should reject multi-element array in accumulator context ($bucket)", + ), + AccumulatorTestCase( + "arity_multi_key_expression_bucket", + docs=[{"v": 1}], + pipeline=_bucket_first({"$add": [1, 2], "$multiply": [3, 4]}), + error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, + msg="$first should reject multi-key expression object ($bucket)", + ), +] + +FIRST_ARITY_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "arity_empty_array_bucket_auto", + docs=[{"v": 1}], + pipeline=_bucket_auto_first([]), + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$first should reject empty array in accumulator context ($bucketAuto)", + ), + AccumulatorTestCase( + "arity_multi_element_bucket_auto", + docs=[{"v": 1}], + pipeline=_bucket_auto_first([1, 2, 3]), + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$first should reject multi-element array in accumulator context ($bucketAuto)", + ), + AccumulatorTestCase( + "arity_multi_key_expression_bucket_auto", + docs=[{"v": 1}], + pipeline=_bucket_auto_first({"$add": [1, 2], "$multiply": [3, 4]}), + error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, + msg="$first should reject multi-key expression object ($bucketAuto)", + ), +] + + +# =========================================================================== +# Combine stage divergence success tests +# =========================================================================== + +FIRST_STAGE_DIVERGENCE_TESTS = ( + FIRST_CODE_GROUP_TESTS + + FIRST_CODE_BUCKET_AUTO_TESTS + + FIRST_MINKEY_GROUP_TESTS + + FIRST_MINKEY_BUCKET_AUTO_TESTS + + FIRST_MAXKEY_GROUP_TESTS + + FIRST_MAXKEY_BUCKET_AUTO_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(FIRST_STAGE_DIVERGENCE_TESTS)) +def test_accumulator_first_stage_divergence(collection, test_case: AccumulatorTestCase): + """Test $first cases where behavior differs between stages.""" + result = _run(collection, test_case) + assertSuccess(result, test_case.expected, msg=test_case.msg) + + +# =========================================================================== +# Combine all error tests +# =========================================================================== + +FIRST_EXPRESSION_ERROR_TESTS = ( + FIRST_EXPRESSION_ERROR_GROUP_TESTS + + FIRST_ERROR_BUCKET_TESTS + + FIRST_ERROR_BUCKET_AUTO_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(FIRST_EXPRESSION_ERROR_TESTS)) +def test_accumulator_first_expression_errors(collection, test_case: AccumulatorTestCase): + """Test $first expression error propagation.""" + result = _run(collection, test_case) + assertFailureCode(result, test_case.error_code, msg=test_case.msg) + + +# =========================================================================== +# Combine all arity error tests +# =========================================================================== + +FIRST_ARITY_ERROR_TESTS = ( + FIRST_ARITY_GROUP_TESTS + + FIRST_ARITY_BUCKET_TESTS + + FIRST_ARITY_BUCKET_AUTO_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(FIRST_ARITY_ERROR_TESTS)) +def test_accumulator_first_arity_errors(collection, test_case: AccumulatorTestCase): + """Test $first arity rejection across all three stages.""" + result = _run(collection, test_case) + assertFailureCode(result, test_case.error_code, msg=test_case.msg) From 26487d853405c28b1fcbaa814ce3a12d8dd1e251 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Wed, 20 May 2026 16:19:56 -0700 Subject: [PATCH 02/10] Initial changes Update generared files with initial change Signed-off-by: Alina (Xi) Li --- .../first/test_accumulator_first.py | 1218 ++++++++++------- 1 file changed, 690 insertions(+), 528 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py index 3da60355..f4166f7d 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py @@ -4,7 +4,6 @@ import math from datetime import datetime, timezone -from typing import Any import pytest from bson import ( @@ -44,770 +43,594 @@ DECIMAL128_NEGATIVE_ZERO, DECIMAL128_ZERO, DOUBLE_NEGATIVE_ZERO, - DOUBLE_ZERO, FLOAT_INFINITY, FLOAT_NAN, FLOAT_NEGATIVE_INFINITY, - FLOAT_NEGATIVE_NAN, ) -# =========================================================================== -# Pipeline Helpers -# =========================================================================== - - -def _group_first(accumulator: Any) -> list[dict[str, Any]]: - """Build a $group pipeline that computes $first.""" - return [ - {"$group": {"_id": None, "result": {"$first": accumulator}}}, - {"$project": {"_id": 0, "result": 1}}, - ] - - -def _bucket_first(accumulator: Any) -> list[dict[str, Any]]: - """Build a $bucket pipeline that computes $first.""" - return [ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$first": accumulator}}, - } - }, - {"$project": {"_id": 0, "result": 1}}, - ] - - -def _bucket_auto_first(accumulator: Any) -> list[dict[str, Any]]: - """Build a $bucketAuto pipeline that computes $first.""" - return [ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$first": accumulator}}, - } - }, - {"$project": {"_id": 0, "result": 1}}, - ] - - -def _group_first_with_type(accumulator: Any) -> list[dict[str, Any]]: - """Build a $group pipeline that computes $first with $type projection.""" - return [ - {"$group": {"_id": None, "result": {"$first": accumulator}}}, - {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, - ] - - -def _run(collection, test_case: AccumulatorTestCase): - """Insert docs and run the test case pipeline.""" - if test_case.docs: - collection.insert_many(test_case.docs) - return execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - - -# =========================================================================== -# 1. Null and Missing Handling ($group primary) -# =========================================================================== - -# Property [Null and Missing NOT Excluded]: $first returns whatever the first -# document has. Unlike $min/$max, null and missing are NOT excluded -- they -# are returned as the result if they are the first value. +# Property [Null and Missing NOT Excluded]: $first returns whatever the +# first document has, including null and missing values. FIRST_NULL_MISSING_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( "null_first_then_value", docs=[{"v": None}, {"v": 5}], - pipeline=_group_first("$v"), - expected=[{"result": None}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": None}], msg="$first should return null when first doc has null (first wins)", ), AccumulatorTestCase( "null_missing_first_then_value", docs=[{"x": 1}, {"v": 5}], - pipeline=_group_first("$v"), - expected=[{"result": None}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": None}], msg="$first should return null when first doc has missing field", ), AccumulatorTestCase( "null_value_first_then_null", docs=[{"v": 5}, {"v": None}], - pipeline=_group_first("$v"), - expected=[{"result": 5}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": 5}], msg="$first should return 5 when first doc has value, second is null", ), AccumulatorTestCase( "null_value_first_then_missing", docs=[{"v": 5}, {"x": 1}], - pipeline=_group_first("$v"), - expected=[{"result": 5}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": 5}], msg="$first should return 5 when first doc has value, second is missing", ), AccumulatorTestCase( "null_all", docs=[{"v": None}, {"v": None}], - pipeline=_group_first("$v"), - expected=[{"result": None}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": None}], msg="$first should return null when all docs have null", ), AccumulatorTestCase( "null_missing_all", docs=[{"x": 1}, {"x": 2}], - pipeline=_group_first("$v"), - expected=[{"result": None}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": None}], msg="$first should return null when all docs have missing field", ), AccumulatorTestCase( "null_and_missing_mixed", docs=[{"v": None}, {"x": 1}], - pipeline=_group_first("$v"), - expected=[{"result": None}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": None}], msg="$first should return null when first is null and second is missing", ), AccumulatorTestCase( "null_remove_first_then_value", docs=[{"v": -1}, {"v": 5}], - pipeline=_group_first({"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}), - expected=[{"result": None}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}}, + } + } + ], + expected=[{"_id": None, "result": None}], msg="$first should return null when first doc produces $$REMOVE", ), AccumulatorTestCase( "null_remove_all", docs=[{"v": -1}, {"v": -2}], - pipeline=_group_first({"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}), - expected=[{"result": None}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}}, + } + } + ], + expected=[{"_id": None, "result": None}], msg="$first should return null when all docs produce $$REMOVE", ), AccumulatorTestCase( "null_remove_second_value_first", docs=[{"v": 5}, {"v": -1}], - pipeline=_group_first({"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}), - expected=[{"result": 5}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}}, + } + } + ], + expected=[{"_id": None, "result": 5}], msg="$first should return value when first doc has value, second $$REMOVE", ), ] - -# =========================================================================== -# 2. BSON Type Preservation ($group primary) -# =========================================================================== - -# Property [BSON Type Preservation]: $first returns the first document's value -# with its BSON type preserved exactly. No coercion, no comparison, no type -# promotion. +# Property [BSON Type Preservation]: $first returns the first document's +# value with its BSON type preserved exactly. FIRST_BSON_TYPE_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( "type_int32", docs=[{"v": 42}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": 42}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": 42}], msg="$first should preserve int32 type", ), AccumulatorTestCase( "type_int64", docs=[{"v": Int64(42)}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": Int64(42)}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": Int64(42)}], msg="$first should preserve Int64 type", ), AccumulatorTestCase( "type_double", docs=[{"v": 3.14}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": 3.14}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": 3.14}], msg="$first should preserve double type", ), AccumulatorTestCase( "type_decimal128", docs=[{"v": Decimal128("3.14")}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": Decimal128("3.14")}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": Decimal128("3.14")}], msg="$first should preserve Decimal128 type", ), AccumulatorTestCase( "type_string", docs=[{"v": "hello"}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": "hello"}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": "hello"}], msg="$first should preserve string type", ), AccumulatorTestCase( "type_bool_true", docs=[{"v": True}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": True}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": True}], msg="$first should preserve boolean True", ), AccumulatorTestCase( "type_bool_false", docs=[{"v": False}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": False}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": False}], msg="$first should preserve boolean False", ), AccumulatorTestCase( "type_null", docs=[{"v": None}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": None}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": None}], msg="$first should preserve null value", ), AccumulatorTestCase( "type_embedded_doc", docs=[{"v": {"a": 1, "b": 2}}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": {"a": 1, "b": 2}}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": {"a": 1, "b": 2}}], msg="$first should preserve embedded document", ), AccumulatorTestCase( "type_empty_doc", docs=[{"v": {}}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": {}}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": {}}], msg="$first should preserve empty document", ), AccumulatorTestCase( "type_array", docs=[{"v": [1, 2, 3]}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": [1, 2, 3]}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": [1, 2, 3]}], msg="$first should preserve array value", ), AccumulatorTestCase( "type_empty_array", docs=[{"v": []}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": []}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": []}], msg="$first should preserve empty array", ), AccumulatorTestCase( "type_binary", docs=[{"v": Binary(b"\x01\x02")}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": b"\x01\x02"}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": b"\x01\x02"}], msg="$first should preserve Binary value", ), AccumulatorTestCase( "type_binary_custom_subtype", docs=[{"v": Binary(b"\x01", 5)}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": Binary(b"\x01", 5)}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": Binary(b"\x01", 5)}], msg="$first should preserve Binary with custom subtype", ), AccumulatorTestCase( "type_objectid", docs=[{"v": ObjectId("000000000000000000000001")}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": ObjectId("000000000000000000000001")}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": ObjectId("000000000000000000000001")}], msg="$first should preserve ObjectId value", ), AccumulatorTestCase( "type_datetime", docs=[{"v": datetime(2023, 6, 15, tzinfo=timezone.utc)}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": datetime(2023, 6, 15, tzinfo=timezone.utc)}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": datetime(2023, 6, 15, tzinfo=timezone.utc)}], msg="$first should preserve datetime value", ), AccumulatorTestCase( "type_timestamp", docs=[{"v": Timestamp(100, 1)}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": Timestamp(100, 1)}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": Timestamp(100, 1)}], msg="$first should preserve Timestamp value", ), AccumulatorTestCase( "type_regex", docs=[{"v": Regex("abc", "i")}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": Regex("abc", "i")}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": Regex("abc", "i")}], msg="$first should preserve Regex value", ), ] - -# =========================================================================== -# 3. Special Numeric Value Preservation ($group primary) -# =========================================================================== - -# Property [Special Numeric Preservation]: $first passes through values -# without comparison or reduction. Special numeric values must be preserved -# exactly as stored in the first document. +# Property [Special Numeric Preservation]: $first passes through special +# numeric values exactly as stored in the first document. FIRST_SPECIAL_NUMERIC_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( "special_float_nan", docs=[{"v": FLOAT_NAN}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": pytest.approx(math.nan, nan_ok=True)}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": pytest.approx(math.nan, nan_ok=True)}], msg="$first should preserve float NaN", ), AccumulatorTestCase( "special_float_neg_zero", docs=[{"v": DOUBLE_NEGATIVE_ZERO}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": DOUBLE_NEGATIVE_ZERO}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": DOUBLE_NEGATIVE_ZERO}], msg="$first should preserve double -0.0", ), AccumulatorTestCase( "special_float_inf", docs=[{"v": FLOAT_INFINITY}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": FLOAT_INFINITY}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": FLOAT_INFINITY}], msg="$first should preserve float Infinity", ), AccumulatorTestCase( "special_float_neg_inf", docs=[{"v": FLOAT_NEGATIVE_INFINITY}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": FLOAT_NEGATIVE_INFINITY}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": FLOAT_NEGATIVE_INFINITY}], msg="$first should preserve float -Infinity", ), AccumulatorTestCase( "special_decimal_nan", docs=[{"v": DECIMAL128_NAN}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": DECIMAL128_NAN}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": DECIMAL128_NAN}], msg="$first should preserve Decimal128 NaN", ), AccumulatorTestCase( "special_decimal_neg_nan", docs=[{"v": DECIMAL128_NEGATIVE_NAN}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": DECIMAL128_NEGATIVE_NAN}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": DECIMAL128_NEGATIVE_NAN}], msg="$first should preserve Decimal128 -NaN", ), AccumulatorTestCase( "special_decimal_neg_zero", docs=[{"v": DECIMAL128_NEGATIVE_ZERO}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": DECIMAL128_NEGATIVE_ZERO}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": DECIMAL128_NEGATIVE_ZERO}], msg="$first should preserve Decimal128 -0", ), AccumulatorTestCase( "special_decimal_inf", docs=[{"v": DECIMAL128_INFINITY}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": DECIMAL128_INFINITY}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": DECIMAL128_INFINITY}], msg="$first should preserve Decimal128 Infinity", ), AccumulatorTestCase( "special_decimal_neg_inf", docs=[{"v": DECIMAL128_NEGATIVE_INFINITY}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": DECIMAL128_NEGATIVE_INFINITY}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": DECIMAL128_NEGATIVE_INFINITY}], msg="$first should preserve Decimal128 -Infinity", ), ] - -# =========================================================================== -# 4. Decimal128 Precision Preservation ($group primary) -# =========================================================================== - -# Property [Decimal128 Precision]: $first must pass through Decimal128 values -# without modifying precision, trailing zeros, or exponent representation. +# Property [Decimal128 Precision]: $first passes through Decimal128 values +# without modifying precision, trailing zeros, or exponent. FIRST_DECIMAL_PRECISION_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( "decimal_high_precision", docs=[{"v": Decimal128("1.234567890123456789012345678901234")}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": Decimal128("1.234567890123456789012345678901234")}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": Decimal128("1.234567890123456789012345678901234")}], msg="$first should preserve 34-digit Decimal128 precision", ), AccumulatorTestCase( "decimal_trailing_zeros", docs=[{"v": Decimal128("1.00")}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": Decimal128("1.00")}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": Decimal128("1.00")}], msg="$first should preserve trailing zeros in Decimal128", ), AccumulatorTestCase( "decimal_large_exponent", docs=[{"v": DECIMAL128_LARGE_EXPONENT}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": DECIMAL128_LARGE_EXPONENT}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": DECIMAL128_LARGE_EXPONENT}], msg="$first should preserve Decimal128 with large exponent", ), AccumulatorTestCase( "decimal_small_positive", docs=[{"v": DECIMAL128_MIN_POSITIVE}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": DECIMAL128_MIN_POSITIVE}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": DECIMAL128_MIN_POSITIVE}], msg="$first should preserve smallest positive Decimal128", ), AccumulatorTestCase( "decimal_zero", docs=[{"v": DECIMAL128_ZERO}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": DECIMAL128_ZERO}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": DECIMAL128_ZERO}], msg="$first should preserve Decimal128 zero", ), - AccumulatorTestCase( - "decimal_negative_zero", - docs=[{"v": DECIMAL128_NEGATIVE_ZERO}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": DECIMAL128_NEGATIVE_ZERO}], - msg="$first should preserve Decimal128 negative zero", - ), ] - -# =========================================================================== -# 5. BSON Type Distinction (No Coercion) ($group primary) -# =========================================================================== - -# Property [No Coercion]: $first preserves BSON type distinctions. Values -# that look similar but are different BSON types are NOT coerced. +# Property [No Coercion]: $first preserves BSON type distinctions without +# coercing similar-looking values. FIRST_TYPE_DISTINCTION_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( "distinct_false_not_zero", docs=[{"v": False}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": False}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": False}], msg="$first should return False, not coerce to 0", ), AccumulatorTestCase( "distinct_true_not_one", docs=[{"v": True}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": True}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": True}], msg="$first should return True, not coerce to 1", ), AccumulatorTestCase( "distinct_zero_not_false", docs=[{"v": 0}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": 0}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": 0}], msg="$first should return int32(0), not coerce to False", ), AccumulatorTestCase( "distinct_empty_string", docs=[{"v": ""}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": ""}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": ""}], msg="$first should return empty string, not coerce to null", ), AccumulatorTestCase( "distinct_string_number", docs=[{"v": "123"}, {"v": 999}], - pipeline=_group_first("$v"), - expected=[{"result": "123"}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": "123"}], msg="$first should return string '123', not coerce to int", ), ] - -# =========================================================================== -# 6. Mixed Type Documents ($group primary) -# =========================================================================== - # Property [Position-Based]: $first picks the first document's value -# regardless of what other documents contain. Unlike $min/$max, there is no -# type comparison across documents. +# regardless of what other documents contain. FIRST_MIXED_TYPE_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( "mixed_int_then_string", docs=[{"v": 42}, {"v": "hello"}], - pipeline=_group_first("$v"), - expected=[{"result": 42}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": 42}], msg="$first should return int when first doc is int, second is string", ), AccumulatorTestCase( "mixed_string_then_int", docs=[{"v": "hello"}, {"v": 42}], - pipeline=_group_first("$v"), - expected=[{"result": "hello"}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": "hello"}], msg="$first should return string when first doc is string, second is int", ), AccumulatorTestCase( "mixed_bool_then_number", docs=[{"v": True}, {"v": 42}], - pipeline=_group_first("$v"), - expected=[{"result": True}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": True}], msg="$first should return True when first doc is bool, second is int", ), AccumulatorTestCase( "mixed_array_then_scalar", docs=[{"v": [1, 2, 3]}, {"v": 42}], - pipeline=_group_first("$v"), - expected=[{"result": [1, 2, 3]}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": [1, 2, 3]}], msg="$first should return array when first doc is array, second is scalar", ), - AccumulatorTestCase( - "mixed_null_then_value", - docs=[{"v": None}, {"v": 5}], - pipeline=_group_first("$v"), - expected=[{"result": None}], - msg="$first should return null when first doc is null, second has value", - ), - AccumulatorTestCase( - "mixed_value_then_null", - docs=[{"v": 5}, {"v": None}], - pipeline=_group_first("$v"), - expected=[{"result": 5}], - msg="$first should return value when first doc has value, second is null", - ), ] - -# =========================================================================== -# 7. Return Type Verification ($group primary) -# =========================================================================== - -# Property [Return Type]: $first preserves the BSON type of the returned -# value. Verified using $type in a subsequent $project stage. -FIRST_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "return_type_int32", - docs=[{"v": 42}, {"v": 999}], - pipeline=_group_first_with_type("$v"), - expected=[{"value": 42, "type": "int"}], - msg="$first of int32 should return type 'int'", - ), - AccumulatorTestCase( - "return_type_int64", - docs=[{"v": Int64(42)}, {"v": 999}], - pipeline=_group_first_with_type("$v"), - expected=[{"value": Int64(42), "type": "long"}], - msg="$first of Int64 should return type 'long'", - ), - AccumulatorTestCase( - "return_type_double", - docs=[{"v": 3.14}, {"v": 999}], - pipeline=_group_first_with_type("$v"), - expected=[{"value": 3.14, "type": "double"}], - msg="$first of double should return type 'double'", - ), - AccumulatorTestCase( - "return_type_decimal", - docs=[{"v": Decimal128("3.14")}, {"v": 999}], - pipeline=_group_first_with_type("$v"), - expected=[{"value": Decimal128("3.14"), "type": "decimal"}], - msg="$first of Decimal128 should return type 'decimal'", - ), - AccumulatorTestCase( - "return_type_string", - docs=[{"v": "hello"}, {"v": 999}], - pipeline=_group_first_with_type("$v"), - expected=[{"value": "hello", "type": "string"}], - msg="$first of string should return type 'string'", - ), - AccumulatorTestCase( - "return_type_boolean", - docs=[{"v": True}, {"v": 999}], - pipeline=_group_first_with_type("$v"), - expected=[{"value": True, "type": "bool"}], - msg="$first of boolean should return type 'bool'", - ), - AccumulatorTestCase( - "return_type_date", - docs=[{"v": datetime(2023, 6, 15, tzinfo=timezone.utc)}, {"v": 999}], - pipeline=_group_first_with_type("$v"), - expected=[{"value": datetime(2023, 6, 15, tzinfo=timezone.utc), "type": "date"}], - msg="$first of datetime should return type 'date'", - ), - AccumulatorTestCase( - "return_type_null", - docs=[{"v": None}, {"v": 999}], - pipeline=_group_first_with_type("$v"), - expected=[{"value": None, "type": "null"}], - msg="$first of null should return type 'null'", - ), -] - - -# =========================================================================== -# 8. Expression Argument Tests ($group primary) -# =========================================================================== - -# Property [Input Forms]: $first accumulator accepts various expression types -# as its operand. +# Property [Input Forms]: $first accumulator accepts various expression types as its operand. FIRST_INPUT_FORM_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( "input_field_path", docs=[{"v": 10}, {"v": 20}], - pipeline=_group_first("$v"), - expected=[{"result": 10}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": 10}], msg="$first should accept a basic field path reference", ), AccumulatorTestCase( "input_nested_field", docs=[{"a": {"b": 10}}, {"a": {"b": 20}}], - pipeline=_group_first("$a.b"), - expected=[{"result": 10}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$a.b"}}}], + expected=[{"_id": None, "result": 10}], msg="$first should accept a nested document field path", ), AccumulatorTestCase( "input_literal", docs=[{"v": 1}, {"v": 2}], - pipeline=_group_first(42), - expected=[{"result": 42}], + pipeline=[{"$group": {"_id": None, "result": {"$first": 42}}}], + expected=[{"_id": None, "result": 42}], msg="$first with a literal constant should return that constant", ), AccumulatorTestCase( "input_expression", docs=[{"price": 10, "qty": 2}, {"price": 5, "qty": 10}], - pipeline=_group_first({"$multiply": ["$price", "$qty"]}), - expected=[{"result": 20}], + pipeline=[ + {"$group": {"_id": None, "result": {"$first": {"$multiply": ["$price", "$qty"]}}}} + ], + expected=[{"_id": None, "result": 20}], msg="$first should accept a computed expression as operand", ), AccumulatorTestCase( "input_cond_remove", docs=[{"v": -1}, {"v": 5}], - pipeline=_group_first({"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}), - expected=[{"result": None}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}}, + } + } + ], + expected=[{"_id": None, "result": None}], msg="$first should accept conditional with $$REMOVE as operand", ), AccumulatorTestCase( "input_null_literal", docs=[{"v": 1}, {"v": 2}], - pipeline=_group_first(None), - expected=[{"result": None}], + pipeline=[{"$group": {"_id": None, "result": {"$first": None}}}], + expected=[{"_id": None, "result": None}], msg="$first with null literal should return null", ), ] +# Property [Edge Cases]: edge cases unique to the accumulator context. +FIRST_EDGE_CASE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "edge_single_doc", + docs=[{"v": 42}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": 42}], + msg="$first of a single document should return that document's value", + ), + AccumulatorTestCase( + "edge_single_null_doc", + docs=[{"v": None}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": None}], + msg="$first of a single null document should return null", + ), + AccumulatorTestCase( + "edge_single_missing_doc", + docs=[{"x": 1}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": None}], + msg="$first of a single document with missing field should return null", + ), + AccumulatorTestCase( + "edge_many_docs", + docs=[{"v": i} for i in range(100)], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": 0}], + msg="$first should return first document's value (v=0) across 100 documents", + ), + AccumulatorTestCase( + "edge_empty_collection", + docs=None, + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[], + msg="$first on empty collection should return empty result", + ), + AccumulatorTestCase( + "edge_array_not_traversed", + docs=[{"v": [5, 1, 8]}, {"v": [3, 9, 2]}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": [5, 1, 8]}], + msg="$first should return array as whole value, not traverse it", + ), + AccumulatorTestCase( + "edge_literal_constant", + docs=[{"v": 1}, {"v": 2}, {"v": 3}], + pipeline=[{"$group": {"_id": None, "result": {"$first": 42}}}], + expected=[{"_id": None, "result": 42}], + msg="$first with literal constant should always return that constant", + ), +] -# =========================================================================== -# 9. Arity Rejection ($group primary) -# =========================================================================== - -# Property [Arity]: $first in accumulator context is a unary operator and -# rejects array syntax. +# Property [Arity]: $first in accumulator context is a unary operator and rejects array syntax. FIRST_ARITY_GROUP_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( "arity_empty_array_group", docs=[{"v": 1}], - pipeline=_group_first([]), + pipeline=[{"$group": {"_id": None, "result": {"$first": []}}}], error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, msg="$first should reject empty array in accumulator context ($group)", ), AccumulatorTestCase( "arity_single_element_group", docs=[{"v": 1}], - pipeline=_group_first([1]), + pipeline=[{"$group": {"_id": None, "result": {"$first": [1]}}}], error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, msg="$first should reject single-element array in accumulator context ($group)", ), AccumulatorTestCase( "arity_single_field_ref_group", docs=[{"v": 1}], - pipeline=_group_first(["$v"]), + pipeline=[{"$group": {"_id": None, "result": {"$first": ["$v"]}}}], error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, msg="$first should reject single field ref in array in accumulator context ($group)", ), AccumulatorTestCase( "arity_multi_element_group", docs=[{"v": 1}], - pipeline=_group_first([1, 2, 3]), + pipeline=[{"$group": {"_id": None, "result": {"$first": [1, 2, 3]}}}], error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, msg="$first should reject multi-element array in accumulator context ($group)", ), AccumulatorTestCase( "arity_multi_key_expression_group", docs=[{"v": 1}], - pipeline=_group_first({"$add": [1, 2], "$multiply": [3, 4]}), + pipeline=[ + {"$group": {"_id": None, "result": {"$first": {"$add": [1, 2], "$multiply": [3, 4]}}}} + ], error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, msg="$first should reject multi-key expression object ($group)", ), ] - -# =========================================================================== -# 10. Expression Error Propagation ($group primary) -# =========================================================================== - -# Property [Expression Error Propagation]: errors in sub-expressions used as -# $first operand propagate as errors. +# Property [Expression Error Propagation]: errors in sub-expressions used +# as $first operand propagate as errors. FIRST_EXPRESSION_ERROR_GROUP_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( "error_toInt_invalid_group", docs=[{"v": "not_a_number"}], - pipeline=_group_first({"$toInt": "$v"}), + pipeline=[{"$group": {"_id": None, "result": {"$first": {"$toInt": "$v"}}}}], error_code=CONVERSION_FAILURE_ERROR, msg="$first should propagate conversion error from $toInt sub-expression in $group", ), AccumulatorTestCase( "error_divide_by_zero_group", docs=[{"v": 10}], - pipeline=_group_first({"$divide": ["$v", 0]}), + pipeline=[{"$group": {"_id": None, "result": {"$first": {"$divide": ["$v", 0]}}}}], error_code=DIVIDE_BY_ZERO_V2_ERROR, msg="$first should propagate divide-by-zero error in $group", ), AccumulatorTestCase( "error_mod_by_zero_group", docs=[{"v": 10}], - pipeline=_group_first({"$mod": ["$v", 0]}), + pipeline=[{"$group": {"_id": None, "result": {"$first": {"$mod": ["$v", 0]}}}}], error_code=MODULO_BY_ZERO_V2_ERROR, msg="$first should propagate mod-by-zero error in $group", ), ] - -# =========================================================================== -# 11. Accumulator-Specific Edge Cases ($group primary) -# =========================================================================== - -# Property [Edge Cases]: edge cases unique to the accumulator context. -FIRST_EDGE_CASE_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "edge_single_doc", - docs=[{"v": 42}], - pipeline=_group_first("$v"), - expected=[{"result": 42}], - msg="$first of a single document should return that document's value", - ), - AccumulatorTestCase( - "edge_single_null_doc", - docs=[{"v": None}], - pipeline=_group_first("$v"), - expected=[{"result": None}], - msg="$first of a single null document should return null", - ), - AccumulatorTestCase( - "edge_single_missing_doc", - docs=[{"x": 1}], - pipeline=_group_first("$v"), - expected=[{"result": None}], - msg="$first of a single document with missing field should return null", - ), - AccumulatorTestCase( - "edge_many_docs", - docs=[{"v": i} for i in range(100)], - pipeline=_group_first("$v"), - expected=[{"result": 0}], - msg="$first should return first document's value (v=0) across 100 documents", - ), - AccumulatorTestCase( - "edge_empty_collection", - docs=None, - pipeline=_group_first("$v"), - expected=[], - msg="$first on empty collection should return empty result", - ), - AccumulatorTestCase( - "edge_array_not_traversed", - docs=[{"v": [5, 1, 8]}, {"v": [3, 9, 2]}], - pipeline=_group_first("$v"), - expected=[{"result": [5, 1, 8]}], - msg="$first should return array as whole value, not traverse it", - ), - AccumulatorTestCase( - "edge_literal_constant", - docs=[{"v": 1}, {"v": 2}, {"v": 3}], - pipeline=_group_first(42), - expected=[{"result": 42}], - msg="$first with literal constant should always return that constant", - ), -] - - -# =========================================================================== -# Combine all $group primary success tests -# =========================================================================== - FIRST_GROUP_SUCCESS_TESTS = ( FIRST_NULL_MISSING_TESTS + FIRST_BSON_TYPE_TESTS @@ -815,27 +638,36 @@ def _run(collection, test_case: AccumulatorTestCase): + FIRST_DECIMAL_PRECISION_TESTS + FIRST_TYPE_DISTINCTION_TESTS + FIRST_MIXED_TYPE_TESTS - + FIRST_RETURN_TYPE_TESTS + FIRST_INPUT_FORM_TESTS + FIRST_EDGE_CASE_TESTS ) - -# =========================================================================== -# $group primary test function -# =========================================================================== +FIRST_GROUP_ERROR_TESTS = FIRST_ARITY_GROUP_TESTS + FIRST_EXPRESSION_ERROR_GROUP_TESTS @pytest.mark.parametrize("test_case", pytest_params(FIRST_GROUP_SUCCESS_TESTS)) def test_accumulator_first_group(collection, test_case: AccumulatorTestCase): """Test $first accumulator success cases via $group.""" - result = _run(collection, test_case) + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) assertSuccess(result, test_case.expected, msg=test_case.msg) -# =========================================================================== -# 12a. $bucket Smoke Tests -# =========================================================================== +@pytest.mark.parametrize("test_case", pytest_params(FIRST_GROUP_ERROR_TESTS)) +def test_accumulator_first_group_errors(collection, test_case): + """Test $first accumulator error cases via $group.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertFailureCode(result, test_case.error_code, msg=test_case.msg) + # Property [Bucket Stage Smoke]: $first produces correct results through # $bucket for representative cases. @@ -843,50 +675,106 @@ def test_accumulator_first_group(collection, test_case: AccumulatorTestCase): AccumulatorTestCase( "bucket_basic_numeric", docs=[{"v": 10}, {"v": 20}, {"v": 30}], - pipeline=_bucket_first("$v"), - expected=[{"result": 10}], + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$first": "$v"}}, + } + } + ], + expected=[{"_id": -1, "result": 10}], msg="$first via $bucket should return first numeric value", ), AccumulatorTestCase( "bucket_null_first", docs=[{"v": None}, {"v": 5}], - pipeline=_bucket_first("$v"), - expected=[{"result": None}], + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$first": "$v"}}, + } + } + ], + expected=[{"_id": -1, "result": None}], msg="$first via $bucket should return null when first doc is null", ), AccumulatorTestCase( "bucket_missing_first", docs=[{"x": 1}, {"v": 5}], - pipeline=_bucket_first("$v"), - expected=[{"result": None}], + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$first": "$v"}}, + } + } + ], + expected=[{"_id": -1, "result": None}], msg="$first via $bucket should return null when first doc has missing field", ), AccumulatorTestCase( "bucket_string_first", docs=[{"v": "hello"}, {"v": "world"}], - pipeline=_bucket_first("$v"), - expected=[{"result": "hello"}], + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$first": "$v"}}, + } + } + ], + expected=[{"_id": -1, "result": "hello"}], msg="$first via $bucket should return first string value", ), AccumulatorTestCase( "bucket_array_first", docs=[{"v": [1, 2]}, {"v": [3, 4]}], - pipeline=_bucket_first("$v"), - expected=[{"result": [1, 2]}], + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$first": "$v"}}, + } + } + ], + expected=[{"_id": -1, "result": [1, 2]}], msg="$first via $bucket should return first array value", ), AccumulatorTestCase( "bucket_single_doc", docs=[{"v": 42}], - pipeline=_bucket_first("$v"), - expected=[{"result": 42}], + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$first": "$v"}}, + } + } + ], + expected=[{"_id": -1, "result": 42}], msg="$first via $bucket should handle single document", ), AccumulatorTestCase( "bucket_nan_preserved", docs=[{"v": FLOAT_NAN}, {"v": 5}], - pipeline=_bucket_first("$v"), - expected=[{"result": pytest.approx(math.nan, nan_ok=True)}], + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$first": "$v"}}, + } + } + ], + expected=[{"_id": -1, "result": pytest.approx(math.nan, nan_ok=True)}], msg="$first via $bucket should preserve NaN as first value", ), ] @@ -895,64 +783,121 @@ def test_accumulator_first_group(collection, test_case: AccumulatorTestCase): @pytest.mark.parametrize("test_case", pytest_params(FIRST_BUCKET_SMOKE_TESTS)) def test_accumulator_first_bucket(collection, test_case: AccumulatorTestCase): """Test $first accumulator via $bucket for representative cases.""" - result = _run(collection, test_case) + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) assertSuccess(result, test_case.expected, msg=test_case.msg) -# =========================================================================== -# 12b. $bucketAuto Smoke Tests -# =========================================================================== - -# Property [BucketAuto Stage Smoke]: $first produces correct results through -# $bucketAuto for representative cases. +# Property [BucketAuto Stage Smoke]: $first produces correct results +# through $bucketAuto for representative cases. FIRST_BUCKET_AUTO_SMOKE_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( "bucket_auto_basic_numeric", docs=[{"v": 10}, {"v": 20}, {"v": 30}], - pipeline=_bucket_auto_first("$v"), - expected=[{"result": 10}], + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$first": "$v"}}, + } + } + ], + expected=[{"_id": {"min": 0, "max": 0}, "result": 10}], msg="$first via $bucketAuto should return first numeric value", ), AccumulatorTestCase( "bucket_auto_null_first", docs=[{"v": None}, {"v": 5}], - pipeline=_bucket_auto_first("$v"), - expected=[{"result": None}], + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$first": "$v"}}, + } + } + ], + expected=[{"_id": {"min": 0, "max": 0}, "result": None}], msg="$first via $bucketAuto should return null when first doc is null", ), AccumulatorTestCase( "bucket_auto_missing_first", docs=[{"x": 1}, {"v": 5}], - pipeline=_bucket_auto_first("$v"), - expected=[{"result": None}], + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$first": "$v"}}, + } + } + ], + expected=[{"_id": {"min": 0, "max": 0}, "result": None}], msg="$first via $bucketAuto should return null when first doc has missing field", ), AccumulatorTestCase( "bucket_auto_string_first", docs=[{"v": "hello"}, {"v": "world"}], - pipeline=_bucket_auto_first("$v"), - expected=[{"result": "hello"}], + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$first": "$v"}}, + } + } + ], + expected=[{"_id": {"min": 0, "max": 0}, "result": "hello"}], msg="$first via $bucketAuto should return first string value", ), AccumulatorTestCase( "bucket_auto_array_first", docs=[{"v": [1, 2]}, {"v": [3, 4]}], - pipeline=_bucket_auto_first("$v"), - expected=[{"result": [1, 2]}], + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$first": "$v"}}, + } + } + ], + expected=[{"_id": {"min": 0, "max": 0}, "result": [1, 2]}], msg="$first via $bucketAuto should return first array value", ), AccumulatorTestCase( "bucket_auto_single_doc", docs=[{"v": 42}], - pipeline=_bucket_auto_first("$v"), - expected=[{"result": 42}], + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$first": "$v"}}, + } + } + ], + expected=[{"_id": {"min": 0, "max": 0}, "result": 42}], msg="$first via $bucketAuto should handle single document", ), AccumulatorTestCase( "bucket_auto_nan_preserved", docs=[{"v": FLOAT_NAN}, {"v": 5}], - pipeline=_bucket_auto_first("$v"), - expected=[{"result": pytest.approx(math.nan, nan_ok=True)}], + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$first": "$v"}}, + } + } + ], + expected=[{"_id": {"min": 0, "max": 0}, "result": pytest.approx(math.nan, nan_ok=True)}], msg="$first via $bucketAuto should preserve NaN as first value", ), ] @@ -961,25 +906,25 @@ def test_accumulator_first_bucket(collection, test_case: AccumulatorTestCase): @pytest.mark.parametrize("test_case", pytest_params(FIRST_BUCKET_AUTO_SMOKE_TESTS)) def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCase): """Test $first accumulator via $bucketAuto for representative cases.""" - result = _run(collection, test_case) + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) assertSuccess(result, test_case.expected, msg=test_case.msg) -# =========================================================================== -# 12c. Stage-Specific Behavior Tests (divergence between stages) -# =========================================================================== - -# --------------------------------------------------------------------------- -# 12c-i. BSON Type Serialization Divergence -# --------------------------------------------------------------------------- - -# Property [Code Serialization Divergence]: Code without scope is returned as -# str in $group/$bucket but as Code object in $bucketAuto. +# Property [Code Serialization Divergence]: Code without scope is returned +# as str when projected in $group/$bucket but as Code object in $bucketAuto. FIRST_CODE_GROUP_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( "code_without_scope_group", docs=[{"v": Code("abc")}, {"v": 999}], - pipeline=_group_first("$v"), + pipeline=[ + {"$group": {"_id": None, "result": {"$first": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], expected=[{"result": "abc"}], msg="$first should return Code without scope as str in $group", ), @@ -989,19 +934,32 @@ def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCas AccumulatorTestCase( "code_without_scope_bucket_auto", docs=[{"v": Code("abc")}, {"v": 999}], - pipeline=_bucket_auto_first("$v"), + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$first": "$v"}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], expected=[{"result": Code("abc", None)}], - msg="$first should return Code without scope as Code object in $bucketAuto", + msg="$first should return Code without scope as Code in $bucketAuto", ), ] -# Property [MinKey Serialization Divergence]: MinKey is wrapped in a document -# in $group/$bucket but returned directly in $bucketAuto. +# Property [MinKey Serialization Divergence]: MinKey is wrapped in a +# document when projected in $group/$bucket but returned directly in +# $bucketAuto. FIRST_MINKEY_GROUP_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( "minkey_group", docs=[{"v": MinKey()}, {"v": 999}], - pipeline=_group_first("$v"), + pipeline=[ + {"$group": {"_id": None, "result": {"$first": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], expected=[{"result": {"": MinKey()}}], msg="$first should return MinKey wrapped in dict in $group", ), @@ -1011,19 +969,32 @@ def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCas AccumulatorTestCase( "minkey_bucket_auto", docs=[{"v": MinKey()}, {"v": 999}], - pipeline=_bucket_auto_first("$v"), + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$first": "$v"}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], expected=[{"result": MinKey()}], msg="$first should return MinKey directly in $bucketAuto", ), ] -# Property [MaxKey Serialization Divergence]: MaxKey is wrapped in a document -# in $group/$bucket but returned directly in $bucketAuto. +# Property [MaxKey Serialization Divergence]: MaxKey is wrapped in a +# document when projected in $group/$bucket but returned directly in +# $bucketAuto. FIRST_MAXKEY_GROUP_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( "maxkey_group", docs=[{"v": MaxKey()}, {"v": 999}], - pipeline=_group_first("$v"), + pipeline=[ + {"$group": {"_id": None, "result": {"$first": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], expected=[{"result": {"": MaxKey()}}], msg="$first should return MaxKey wrapped in dict in $group", ), @@ -1033,15 +1004,42 @@ def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCas AccumulatorTestCase( "maxkey_bucket_auto", docs=[{"v": MaxKey()}, {"v": 999}], - pipeline=_bucket_auto_first("$v"), + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$first": "$v"}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], expected=[{"result": MaxKey()}], msg="$first should return MaxKey directly in $bucketAuto", ), ] -# --------------------------------------------------------------------------- -# 12c-ii. Expression Error Code Divergence -# --------------------------------------------------------------------------- +FIRST_STAGE_DIVERGENCE_SUCCESS_TESTS = ( + FIRST_CODE_GROUP_TESTS + + FIRST_CODE_BUCKET_AUTO_TESTS + + FIRST_MINKEY_GROUP_TESTS + + FIRST_MINKEY_BUCKET_AUTO_TESTS + + FIRST_MAXKEY_GROUP_TESTS + + FIRST_MAXKEY_BUCKET_AUTO_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(FIRST_STAGE_DIVERGENCE_SUCCESS_TESTS)) +def test_accumulator_first_stage_divergence(collection, test_case: AccumulatorTestCase): + """Test $first cases where behavior differs between stages.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg) + # Property [Error Code Divergence]: $group/$bucket and $bucketAuto use # different error codes for divide-by-zero and mod-by-zero. @@ -1049,21 +1047,45 @@ def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCas AccumulatorTestCase( "error_toInt_invalid_bucket", docs=[{"v": "not_a_number"}], - pipeline=_bucket_first({"$toInt": "$v"}), + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$first": {"$toInt": "$v"}}}, + } + } + ], error_code=CONVERSION_FAILURE_ERROR, msg="$first should propagate conversion error from $toInt in $bucket", ), AccumulatorTestCase( "error_divide_by_zero_bucket", docs=[{"v": 10}], - pipeline=_bucket_first({"$divide": ["$v", 0]}), + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$first": {"$divide": ["$v", 0]}}}, + } + } + ], error_code=DIVIDE_BY_ZERO_V2_ERROR, msg="$first should propagate divide-by-zero error in $bucket", ), AccumulatorTestCase( "error_mod_by_zero_bucket", docs=[{"v": 10}], - pipeline=_bucket_first({"$mod": ["$v", 0]}), + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$first": {"$mod": ["$v", 0]}}}, + } + } + ], error_code=MODULO_BY_ZERO_V2_ERROR, msg="$first should propagate mod-by-zero error in $bucket", ), @@ -1073,51 +1095,94 @@ def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCas AccumulatorTestCase( "error_toInt_invalid_bucket_auto", docs=[{"v": "not_a_number"}], - pipeline=_bucket_auto_first({"$toInt": "$v"}), + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$first": {"$toInt": "$v"}}}, + } + } + ], error_code=CONVERSION_FAILURE_ERROR, msg="$first should propagate conversion error from $toInt in $bucketAuto", ), AccumulatorTestCase( "error_divide_by_zero_bucket_auto", docs=[{"v": 10}], - pipeline=_bucket_auto_first({"$divide": ["$v", 0]}), + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$first": {"$divide": ["$v", 0]}}}, + } + } + ], error_code=BAD_VALUE_ERROR, msg="$first should propagate divide-by-zero in $bucketAuto (wrapped as BAD_VALUE)", ), AccumulatorTestCase( "error_mod_by_zero_bucket_auto", docs=[{"v": 10}], - pipeline=_bucket_auto_first({"$mod": ["$v", 0]}), + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$first": {"$mod": ["$v", 0]}}}, + } + } + ], error_code=MODULO_ZERO_REMAINDER_ERROR, msg="$first should propagate mod-by-zero in $bucketAuto (wrapped as 16610)", ), ] -# --------------------------------------------------------------------------- -# 12c-iii. Arity Rejection Across Stages -# --------------------------------------------------------------------------- - -# Property [Arity Across Stages]: arity rejection is consistent across all -# three stages. +# Property [Arity Across Stages]: arity rejection is consistent across $bucket and $bucketAuto. FIRST_ARITY_BUCKET_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( "arity_empty_array_bucket", docs=[{"v": 1}], - pipeline=_bucket_first([]), + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$first": []}}, + } + } + ], error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, msg="$first should reject empty array in accumulator context ($bucket)", ), AccumulatorTestCase( "arity_multi_element_bucket", docs=[{"v": 1}], - pipeline=_bucket_first([1, 2, 3]), + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$first": [1, 2, 3]}}, + } + } + ], error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, msg="$first should reject multi-element array in accumulator context ($bucket)", ), AccumulatorTestCase( "arity_multi_key_expression_bucket", docs=[{"v": 1}], - pipeline=_bucket_first({"$add": [1, 2], "$multiply": [3, 4]}), + pipeline=[ + { + "$bucket": { + "groupBy": {"$literal": 0}, + "boundaries": [-1, 1], + "output": {"result": {"$first": {"$add": [1, 2], "$multiply": [3, 4]}}}, + } + } + ], error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, msg="$first should reject multi-key expression object ($bucket)", ), @@ -1127,79 +1192,176 @@ def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCas AccumulatorTestCase( "arity_empty_array_bucket_auto", docs=[{"v": 1}], - pipeline=_bucket_auto_first([]), + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$first": []}}, + } + } + ], error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, msg="$first should reject empty array in accumulator context ($bucketAuto)", ), AccumulatorTestCase( "arity_multi_element_bucket_auto", docs=[{"v": 1}], - pipeline=_bucket_auto_first([1, 2, 3]), + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$first": [1, 2, 3]}}, + } + } + ], error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, msg="$first should reject multi-element array in accumulator context ($bucketAuto)", ), AccumulatorTestCase( "arity_multi_key_expression_bucket_auto", docs=[{"v": 1}], - pipeline=_bucket_auto_first({"$add": [1, 2], "$multiply": [3, 4]}), + pipeline=[ + { + "$bucketAuto": { + "groupBy": {"$literal": 0}, + "buckets": 1, + "output": {"result": {"$first": {"$add": [1, 2], "$multiply": [3, 4]}}}, + } + } + ], error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, msg="$first should reject multi-key expression object ($bucketAuto)", ), ] - -# =========================================================================== -# Combine stage divergence success tests -# =========================================================================== - -FIRST_STAGE_DIVERGENCE_TESTS = ( - FIRST_CODE_GROUP_TESTS - + FIRST_CODE_BUCKET_AUTO_TESTS - + FIRST_MINKEY_GROUP_TESTS - + FIRST_MINKEY_BUCKET_AUTO_TESTS - + FIRST_MAXKEY_GROUP_TESTS - + FIRST_MAXKEY_BUCKET_AUTO_TESTS +FIRST_EXPRESSION_ERROR_TESTS = ( + FIRST_EXPRESSION_ERROR_GROUP_TESTS + FIRST_ERROR_BUCKET_TESTS + FIRST_ERROR_BUCKET_AUTO_TESTS ) - -@pytest.mark.parametrize("test_case", pytest_params(FIRST_STAGE_DIVERGENCE_TESTS)) -def test_accumulator_first_stage_divergence(collection, test_case: AccumulatorTestCase): - """Test $first cases where behavior differs between stages.""" - result = _run(collection, test_case) - assertSuccess(result, test_case.expected, msg=test_case.msg) - - -# =========================================================================== -# Combine all error tests -# =========================================================================== - -FIRST_EXPRESSION_ERROR_TESTS = ( - FIRST_EXPRESSION_ERROR_GROUP_TESTS - + FIRST_ERROR_BUCKET_TESTS - + FIRST_ERROR_BUCKET_AUTO_TESTS +FIRST_ARITY_ERROR_TESTS = ( + FIRST_ARITY_GROUP_TESTS + FIRST_ARITY_BUCKET_TESTS + FIRST_ARITY_BUCKET_AUTO_TESTS ) @pytest.mark.parametrize("test_case", pytest_params(FIRST_EXPRESSION_ERROR_TESTS)) -def test_accumulator_first_expression_errors(collection, test_case: AccumulatorTestCase): +def test_accumulator_first_expression_errors(collection, test_case): """Test $first expression error propagation.""" - result = _run(collection, test_case) + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) assertFailureCode(result, test_case.error_code, msg=test_case.msg) -# =========================================================================== -# Combine all arity error tests -# =========================================================================== - -FIRST_ARITY_ERROR_TESTS = ( - FIRST_ARITY_GROUP_TESTS - + FIRST_ARITY_BUCKET_TESTS - + FIRST_ARITY_BUCKET_AUTO_TESTS -) - - @pytest.mark.parametrize("test_case", pytest_params(FIRST_ARITY_ERROR_TESTS)) -def test_accumulator_first_arity_errors(collection, test_case: AccumulatorTestCase): +def test_accumulator_first_arity_errors(collection, test_case): """Test $first arity rejection across all three stages.""" - result = _run(collection, test_case) + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) assertFailureCode(result, test_case.error_code, msg=test_case.msg) + + +# Property [Return Type]: $first preserves the BSON type of the returned +# value, verified using $type projection. +FIRST_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "return_type_int32", + docs=[{"v": 42}, {"v": 999}], + pipeline=[ + {"$group": {"_id": None, "result": {"$first": "$v"}}}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": 42, "type": "int"}], + msg="$first of int32 should return type 'int'", + ), + AccumulatorTestCase( + "return_type_int64", + docs=[{"v": Int64(42)}, {"v": 999}], + pipeline=[ + {"$group": {"_id": None, "result": {"$first": "$v"}}}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": Int64(42), "type": "long"}], + msg="$first of Int64 should return type 'long'", + ), + AccumulatorTestCase( + "return_type_double", + docs=[{"v": 3.14}, {"v": 999}], + pipeline=[ + {"$group": {"_id": None, "result": {"$first": "$v"}}}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": 3.14, "type": "double"}], + msg="$first of double should return type 'double'", + ), + AccumulatorTestCase( + "return_type_decimal", + docs=[{"v": Decimal128("3.14")}, {"v": 999}], + pipeline=[ + {"$group": {"_id": None, "result": {"$first": "$v"}}}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": Decimal128("3.14"), "type": "decimal"}], + msg="$first of Decimal128 should return type 'decimal'", + ), + AccumulatorTestCase( + "return_type_string", + docs=[{"v": "hello"}, {"v": 999}], + pipeline=[ + {"$group": {"_id": None, "result": {"$first": "$v"}}}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": "hello", "type": "string"}], + msg="$first of string should return type 'string'", + ), + AccumulatorTestCase( + "return_type_boolean", + docs=[{"v": True}, {"v": 999}], + pipeline=[ + {"$group": {"_id": None, "result": {"$first": "$v"}}}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": True, "type": "bool"}], + msg="$first of boolean should return type 'bool'", + ), + AccumulatorTestCase( + "return_type_date", + docs=[{"v": datetime(2023, 6, 15, tzinfo=timezone.utc)}, {"v": 999}], + pipeline=[ + {"$group": {"_id": None, "result": {"$first": "$v"}}}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": datetime(2023, 6, 15, tzinfo=timezone.utc), "type": "date"}], + msg="$first of datetime should return type 'date'", + ), + AccumulatorTestCase( + "return_type_null", + docs=[{"v": None}, {"v": 999}], + pipeline=[ + {"$group": {"_id": None, "result": {"$first": "$v"}}}, + {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, + ], + expected=[{"value": None, "type": "null"}], + msg="$first of null should return type 'null'", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(FIRST_RETURN_TYPE_TESTS)) +def test_accumulator_first_return_type(collection, test_case: AccumulatorTestCase): + """Test $first return type verification.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg) From 786e9b15dbb18560827066c3116437f785ae26f7 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Wed, 20 May 2026 16:45:08 -0700 Subject: [PATCH 03/10] add init.py Signed-off-by: Alina (Xi) Li --- .../tests/core/operator/accumulators/first/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/first/__init__.py diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/__init__.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/__init__.py new file mode 100644 index 00000000..e69de29b From c86016377880d481118e4aa4c557f6e483e6966e Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Wed, 20 May 2026 16:54:08 -0700 Subject: [PATCH 04/10] Remove expression tests for accumulator Signed-off-by: Alina (Xi) Li --- .../first/test_accumulator_first.py | 221 +----------------- 1 file changed, 1 insertion(+), 220 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py index f4166f7d..a0d0d185 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py @@ -8,7 +8,6 @@ import pytest from bson import ( Binary, - Code, Decimal128, Int64, MaxKey, @@ -23,13 +22,8 @@ ) from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess from documentdb_tests.framework.error_codes import ( - BAD_VALUE_ERROR, - CONVERSION_FAILURE_ERROR, - DIVIDE_BY_ZERO_V2_ERROR, EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - MODULO_BY_ZERO_V2_ERROR, - MODULO_ZERO_REMAINDER_ERROR, ) from documentdb_tests.framework.executor import execute_command from documentdb_tests.framework.parametrize import pytest_params @@ -479,29 +473,6 @@ expected=[{"_id": None, "result": 42}], msg="$first with a literal constant should return that constant", ), - AccumulatorTestCase( - "input_expression", - docs=[{"price": 10, "qty": 2}, {"price": 5, "qty": 10}], - pipeline=[ - {"$group": {"_id": None, "result": {"$first": {"$multiply": ["$price", "$qty"]}}}} - ], - expected=[{"_id": None, "result": 20}], - msg="$first should accept a computed expression as operand", - ), - AccumulatorTestCase( - "input_cond_remove", - docs=[{"v": -1}, {"v": 5}], - pipeline=[ - { - "$group": { - "_id": None, - "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}}, - } - } - ], - expected=[{"_id": None, "result": None}], - msg="$first should accept conditional with $$REMOVE as operand", - ), AccumulatorTestCase( "input_null_literal", docs=[{"v": 1}, {"v": 2}], @@ -605,32 +576,6 @@ ), ] -# Property [Expression Error Propagation]: errors in sub-expressions used -# as $first operand propagate as errors. -FIRST_EXPRESSION_ERROR_GROUP_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "error_toInt_invalid_group", - docs=[{"v": "not_a_number"}], - pipeline=[{"$group": {"_id": None, "result": {"$first": {"$toInt": "$v"}}}}], - error_code=CONVERSION_FAILURE_ERROR, - msg="$first should propagate conversion error from $toInt sub-expression in $group", - ), - AccumulatorTestCase( - "error_divide_by_zero_group", - docs=[{"v": 10}], - pipeline=[{"$group": {"_id": None, "result": {"$first": {"$divide": ["$v", 0]}}}}], - error_code=DIVIDE_BY_ZERO_V2_ERROR, - msg="$first should propagate divide-by-zero error in $group", - ), - AccumulatorTestCase( - "error_mod_by_zero_group", - docs=[{"v": 10}], - pipeline=[{"$group": {"_id": None, "result": {"$first": {"$mod": ["$v", 0]}}}}], - error_code=MODULO_BY_ZERO_V2_ERROR, - msg="$first should propagate mod-by-zero error in $group", - ), -] - FIRST_GROUP_SUCCESS_TESTS = ( FIRST_NULL_MISSING_TESTS + FIRST_BSON_TYPE_TESTS @@ -642,8 +587,6 @@ + FIRST_EDGE_CASE_TESTS ) -FIRST_GROUP_ERROR_TESTS = FIRST_ARITY_GROUP_TESTS + FIRST_EXPRESSION_ERROR_GROUP_TESTS - @pytest.mark.parametrize("test_case", pytest_params(FIRST_GROUP_SUCCESS_TESTS)) def test_accumulator_first_group(collection, test_case: AccumulatorTestCase): @@ -657,18 +600,6 @@ def test_accumulator_first_group(collection, test_case: AccumulatorTestCase): assertSuccess(result, test_case.expected, msg=test_case.msg) -@pytest.mark.parametrize("test_case", pytest_params(FIRST_GROUP_ERROR_TESTS)) -def test_accumulator_first_group_errors(collection, test_case): - """Test $first accumulator error cases via $group.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertFailureCode(result, test_case.error_code, msg=test_case.msg) - - # Property [Bucket Stage Smoke]: $first produces correct results through # $bucket for representative cases. FIRST_BUCKET_SMOKE_TESTS: list[AccumulatorTestCase] = [ @@ -915,40 +846,6 @@ def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCas assertSuccess(result, test_case.expected, msg=test_case.msg) -# Property [Code Serialization Divergence]: Code without scope is returned -# as str when projected in $group/$bucket but as Code object in $bucketAuto. -FIRST_CODE_GROUP_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "code_without_scope_group", - docs=[{"v": Code("abc")}, {"v": 999}], - pipeline=[ - {"$group": {"_id": None, "result": {"$first": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": "abc"}], - msg="$first should return Code without scope as str in $group", - ), -] - -FIRST_CODE_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "code_without_scope_bucket_auto", - docs=[{"v": Code("abc")}, {"v": 999}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$first": "$v"}}, - } - }, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": Code("abc", None)}], - msg="$first should return Code without scope as Code in $bucketAuto", - ), -] - # Property [MinKey Serialization Divergence]: MinKey is wrapped in a # document when projected in $group/$bucket but returned directly in # $bucketAuto. @@ -1020,9 +917,7 @@ def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCas ] FIRST_STAGE_DIVERGENCE_SUCCESS_TESTS = ( - FIRST_CODE_GROUP_TESTS - + FIRST_CODE_BUCKET_AUTO_TESTS - + FIRST_MINKEY_GROUP_TESTS + FIRST_MINKEY_GROUP_TESTS + FIRST_MINKEY_BUCKET_AUTO_TESTS + FIRST_MAXKEY_GROUP_TESTS + FIRST_MAXKEY_BUCKET_AUTO_TESTS @@ -1041,104 +936,6 @@ def test_accumulator_first_stage_divergence(collection, test_case: AccumulatorTe assertSuccess(result, test_case.expected, msg=test_case.msg) -# Property [Error Code Divergence]: $group/$bucket and $bucketAuto use -# different error codes for divide-by-zero and mod-by-zero. -FIRST_ERROR_BUCKET_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "error_toInt_invalid_bucket", - docs=[{"v": "not_a_number"}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$first": {"$toInt": "$v"}}}, - } - } - ], - error_code=CONVERSION_FAILURE_ERROR, - msg="$first should propagate conversion error from $toInt in $bucket", - ), - AccumulatorTestCase( - "error_divide_by_zero_bucket", - docs=[{"v": 10}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$first": {"$divide": ["$v", 0]}}}, - } - } - ], - error_code=DIVIDE_BY_ZERO_V2_ERROR, - msg="$first should propagate divide-by-zero error in $bucket", - ), - AccumulatorTestCase( - "error_mod_by_zero_bucket", - docs=[{"v": 10}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$first": {"$mod": ["$v", 0]}}}, - } - } - ], - error_code=MODULO_BY_ZERO_V2_ERROR, - msg="$first should propagate mod-by-zero error in $bucket", - ), -] - -FIRST_ERROR_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "error_toInt_invalid_bucket_auto", - docs=[{"v": "not_a_number"}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$first": {"$toInt": "$v"}}}, - } - } - ], - error_code=CONVERSION_FAILURE_ERROR, - msg="$first should propagate conversion error from $toInt in $bucketAuto", - ), - AccumulatorTestCase( - "error_divide_by_zero_bucket_auto", - docs=[{"v": 10}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$first": {"$divide": ["$v", 0]}}}, - } - } - ], - error_code=BAD_VALUE_ERROR, - msg="$first should propagate divide-by-zero in $bucketAuto (wrapped as BAD_VALUE)", - ), - AccumulatorTestCase( - "error_mod_by_zero_bucket_auto", - docs=[{"v": 10}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$first": {"$mod": ["$v", 0]}}}, - } - } - ], - error_code=MODULO_ZERO_REMAINDER_ERROR, - msg="$first should propagate mod-by-zero in $bucketAuto (wrapped as 16610)", - ), -] - # Property [Arity Across Stages]: arity rejection is consistent across $bucket and $bucketAuto. FIRST_ARITY_BUCKET_TESTS: list[AccumulatorTestCase] = [ AccumulatorTestCase( @@ -1236,27 +1033,11 @@ def test_accumulator_first_stage_divergence(collection, test_case: AccumulatorTe ), ] -FIRST_EXPRESSION_ERROR_TESTS = ( - FIRST_EXPRESSION_ERROR_GROUP_TESTS + FIRST_ERROR_BUCKET_TESTS + FIRST_ERROR_BUCKET_AUTO_TESTS -) - FIRST_ARITY_ERROR_TESTS = ( FIRST_ARITY_GROUP_TESTS + FIRST_ARITY_BUCKET_TESTS + FIRST_ARITY_BUCKET_AUTO_TESTS ) -@pytest.mark.parametrize("test_case", pytest_params(FIRST_EXPRESSION_ERROR_TESTS)) -def test_accumulator_first_expression_errors(collection, test_case): - """Test $first expression error propagation.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertFailureCode(result, test_case.error_code, msg=test_case.msg) - - @pytest.mark.parametrize("test_case", pytest_params(FIRST_ARITY_ERROR_TESTS)) def test_accumulator_first_arity_errors(collection, test_case): """Test $first arity rejection across all three stages.""" From a548a0102f6970784580ad7debd61ec70f981772 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Thu, 21 May 2026 15:56:28 -0700 Subject: [PATCH 05/10] remove stage tests Signed-off-by: Alina (Xi) Li --- .../first/test_accumulator_first.py | 501 +----------------- 1 file changed, 2 insertions(+), 499 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py index a0d0d185..714df0df 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py @@ -1,4 +1,4 @@ -"""Tests for $first accumulator in $group, $bucket, and $bucketAuto contexts.""" +"""Tests for $first accumulator in $group context.""" from __future__ import annotations @@ -10,8 +10,6 @@ Binary, Decimal128, Int64, - MaxKey, - MinKey, ObjectId, Regex, Timestamp, @@ -20,11 +18,7 @@ from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( AccumulatorTestCase, ) -from documentdb_tests.framework.assertions import assertFailureCode, assertSuccess -from documentdb_tests.framework.error_codes import ( - EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, - GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, -) +from documentdb_tests.framework.assertions import assertSuccess from documentdb_tests.framework.executor import execute_command from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import ( @@ -535,47 +529,6 @@ ), ] -# Property [Arity]: $first in accumulator context is a unary operator and rejects array syntax. -FIRST_ARITY_GROUP_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "arity_empty_array_group", - docs=[{"v": 1}], - pipeline=[{"$group": {"_id": None, "result": {"$first": []}}}], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$first should reject empty array in accumulator context ($group)", - ), - AccumulatorTestCase( - "arity_single_element_group", - docs=[{"v": 1}], - pipeline=[{"$group": {"_id": None, "result": {"$first": [1]}}}], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$first should reject single-element array in accumulator context ($group)", - ), - AccumulatorTestCase( - "arity_single_field_ref_group", - docs=[{"v": 1}], - pipeline=[{"$group": {"_id": None, "result": {"$first": ["$v"]}}}], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$first should reject single field ref in array in accumulator context ($group)", - ), - AccumulatorTestCase( - "arity_multi_element_group", - docs=[{"v": 1}], - pipeline=[{"$group": {"_id": None, "result": {"$first": [1, 2, 3]}}}], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$first should reject multi-element array in accumulator context ($group)", - ), - AccumulatorTestCase( - "arity_multi_key_expression_group", - docs=[{"v": 1}], - pipeline=[ - {"$group": {"_id": None, "result": {"$first": {"$add": [1, 2], "$multiply": [3, 4]}}}} - ], - error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, - msg="$first should reject multi-key expression object ($group)", - ), -] - FIRST_GROUP_SUCCESS_TESTS = ( FIRST_NULL_MISSING_TESTS + FIRST_BSON_TYPE_TESTS @@ -600,456 +553,6 @@ def test_accumulator_first_group(collection, test_case: AccumulatorTestCase): assertSuccess(result, test_case.expected, msg=test_case.msg) -# Property [Bucket Stage Smoke]: $first produces correct results through -# $bucket for representative cases. -FIRST_BUCKET_SMOKE_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "bucket_basic_numeric", - docs=[{"v": 10}, {"v": 20}, {"v": 30}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$first": "$v"}}, - } - } - ], - expected=[{"_id": -1, "result": 10}], - msg="$first via $bucket should return first numeric value", - ), - AccumulatorTestCase( - "bucket_null_first", - docs=[{"v": None}, {"v": 5}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$first": "$v"}}, - } - } - ], - expected=[{"_id": -1, "result": None}], - msg="$first via $bucket should return null when first doc is null", - ), - AccumulatorTestCase( - "bucket_missing_first", - docs=[{"x": 1}, {"v": 5}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$first": "$v"}}, - } - } - ], - expected=[{"_id": -1, "result": None}], - msg="$first via $bucket should return null when first doc has missing field", - ), - AccumulatorTestCase( - "bucket_string_first", - docs=[{"v": "hello"}, {"v": "world"}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$first": "$v"}}, - } - } - ], - expected=[{"_id": -1, "result": "hello"}], - msg="$first via $bucket should return first string value", - ), - AccumulatorTestCase( - "bucket_array_first", - docs=[{"v": [1, 2]}, {"v": [3, 4]}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$first": "$v"}}, - } - } - ], - expected=[{"_id": -1, "result": [1, 2]}], - msg="$first via $bucket should return first array value", - ), - AccumulatorTestCase( - "bucket_single_doc", - docs=[{"v": 42}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$first": "$v"}}, - } - } - ], - expected=[{"_id": -1, "result": 42}], - msg="$first via $bucket should handle single document", - ), - AccumulatorTestCase( - "bucket_nan_preserved", - docs=[{"v": FLOAT_NAN}, {"v": 5}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$first": "$v"}}, - } - } - ], - expected=[{"_id": -1, "result": pytest.approx(math.nan, nan_ok=True)}], - msg="$first via $bucket should preserve NaN as first value", - ), -] - - -@pytest.mark.parametrize("test_case", pytest_params(FIRST_BUCKET_SMOKE_TESTS)) -def test_accumulator_first_bucket(collection, test_case: AccumulatorTestCase): - """Test $first accumulator via $bucket for representative cases.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertSuccess(result, test_case.expected, msg=test_case.msg) - - -# Property [BucketAuto Stage Smoke]: $first produces correct results -# through $bucketAuto for representative cases. -FIRST_BUCKET_AUTO_SMOKE_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "bucket_auto_basic_numeric", - docs=[{"v": 10}, {"v": 20}, {"v": 30}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$first": "$v"}}, - } - } - ], - expected=[{"_id": {"min": 0, "max": 0}, "result": 10}], - msg="$first via $bucketAuto should return first numeric value", - ), - AccumulatorTestCase( - "bucket_auto_null_first", - docs=[{"v": None}, {"v": 5}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$first": "$v"}}, - } - } - ], - expected=[{"_id": {"min": 0, "max": 0}, "result": None}], - msg="$first via $bucketAuto should return null when first doc is null", - ), - AccumulatorTestCase( - "bucket_auto_missing_first", - docs=[{"x": 1}, {"v": 5}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$first": "$v"}}, - } - } - ], - expected=[{"_id": {"min": 0, "max": 0}, "result": None}], - msg="$first via $bucketAuto should return null when first doc has missing field", - ), - AccumulatorTestCase( - "bucket_auto_string_first", - docs=[{"v": "hello"}, {"v": "world"}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$first": "$v"}}, - } - } - ], - expected=[{"_id": {"min": 0, "max": 0}, "result": "hello"}], - msg="$first via $bucketAuto should return first string value", - ), - AccumulatorTestCase( - "bucket_auto_array_first", - docs=[{"v": [1, 2]}, {"v": [3, 4]}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$first": "$v"}}, - } - } - ], - expected=[{"_id": {"min": 0, "max": 0}, "result": [1, 2]}], - msg="$first via $bucketAuto should return first array value", - ), - AccumulatorTestCase( - "bucket_auto_single_doc", - docs=[{"v": 42}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$first": "$v"}}, - } - } - ], - expected=[{"_id": {"min": 0, "max": 0}, "result": 42}], - msg="$first via $bucketAuto should handle single document", - ), - AccumulatorTestCase( - "bucket_auto_nan_preserved", - docs=[{"v": FLOAT_NAN}, {"v": 5}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$first": "$v"}}, - } - } - ], - expected=[{"_id": {"min": 0, "max": 0}, "result": pytest.approx(math.nan, nan_ok=True)}], - msg="$first via $bucketAuto should preserve NaN as first value", - ), -] - - -@pytest.mark.parametrize("test_case", pytest_params(FIRST_BUCKET_AUTO_SMOKE_TESTS)) -def test_accumulator_first_bucket_auto(collection, test_case: AccumulatorTestCase): - """Test $first accumulator via $bucketAuto for representative cases.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertSuccess(result, test_case.expected, msg=test_case.msg) - - -# Property [MinKey Serialization Divergence]: MinKey is wrapped in a -# document when projected in $group/$bucket but returned directly in -# $bucketAuto. -FIRST_MINKEY_GROUP_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "minkey_group", - docs=[{"v": MinKey()}, {"v": 999}], - pipeline=[ - {"$group": {"_id": None, "result": {"$first": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": {"": MinKey()}}], - msg="$first should return MinKey wrapped in dict in $group", - ), -] - -FIRST_MINKEY_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "minkey_bucket_auto", - docs=[{"v": MinKey()}, {"v": 999}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$first": "$v"}}, - } - }, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": MinKey()}], - msg="$first should return MinKey directly in $bucketAuto", - ), -] - -# Property [MaxKey Serialization Divergence]: MaxKey is wrapped in a -# document when projected in $group/$bucket but returned directly in -# $bucketAuto. -FIRST_MAXKEY_GROUP_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "maxkey_group", - docs=[{"v": MaxKey()}, {"v": 999}], - pipeline=[ - {"$group": {"_id": None, "result": {"$first": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": {"": MaxKey()}}], - msg="$first should return MaxKey wrapped in dict in $group", - ), -] - -FIRST_MAXKEY_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "maxkey_bucket_auto", - docs=[{"v": MaxKey()}, {"v": 999}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$first": "$v"}}, - } - }, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": MaxKey()}], - msg="$first should return MaxKey directly in $bucketAuto", - ), -] - -FIRST_STAGE_DIVERGENCE_SUCCESS_TESTS = ( - FIRST_MINKEY_GROUP_TESTS - + FIRST_MINKEY_BUCKET_AUTO_TESTS - + FIRST_MAXKEY_GROUP_TESTS - + FIRST_MAXKEY_BUCKET_AUTO_TESTS -) - - -@pytest.mark.parametrize("test_case", pytest_params(FIRST_STAGE_DIVERGENCE_SUCCESS_TESTS)) -def test_accumulator_first_stage_divergence(collection, test_case: AccumulatorTestCase): - """Test $first cases where behavior differs between stages.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertSuccess(result, test_case.expected, msg=test_case.msg) - - -# Property [Arity Across Stages]: arity rejection is consistent across $bucket and $bucketAuto. -FIRST_ARITY_BUCKET_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "arity_empty_array_bucket", - docs=[{"v": 1}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$first": []}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$first should reject empty array in accumulator context ($bucket)", - ), - AccumulatorTestCase( - "arity_multi_element_bucket", - docs=[{"v": 1}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$first": [1, 2, 3]}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$first should reject multi-element array in accumulator context ($bucket)", - ), - AccumulatorTestCase( - "arity_multi_key_expression_bucket", - docs=[{"v": 1}], - pipeline=[ - { - "$bucket": { - "groupBy": {"$literal": 0}, - "boundaries": [-1, 1], - "output": {"result": {"$first": {"$add": [1, 2], "$multiply": [3, 4]}}}, - } - } - ], - error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, - msg="$first should reject multi-key expression object ($bucket)", - ), -] - -FIRST_ARITY_BUCKET_AUTO_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "arity_empty_array_bucket_auto", - docs=[{"v": 1}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$first": []}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$first should reject empty array in accumulator context ($bucketAuto)", - ), - AccumulatorTestCase( - "arity_multi_element_bucket_auto", - docs=[{"v": 1}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$first": [1, 2, 3]}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$first should reject multi-element array in accumulator context ($bucketAuto)", - ), - AccumulatorTestCase( - "arity_multi_key_expression_bucket_auto", - docs=[{"v": 1}], - pipeline=[ - { - "$bucketAuto": { - "groupBy": {"$literal": 0}, - "buckets": 1, - "output": {"result": {"$first": {"$add": [1, 2], "$multiply": [3, 4]}}}, - } - } - ], - error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, - msg="$first should reject multi-key expression object ($bucketAuto)", - ), -] - -FIRST_ARITY_ERROR_TESTS = ( - FIRST_ARITY_GROUP_TESTS + FIRST_ARITY_BUCKET_TESTS + FIRST_ARITY_BUCKET_AUTO_TESTS -) - - -@pytest.mark.parametrize("test_case", pytest_params(FIRST_ARITY_ERROR_TESTS)) -def test_accumulator_first_arity_errors(collection, test_case): - """Test $first arity rejection across all three stages.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertFailureCode(result, test_case.error_code, msg=test_case.msg) - - # Property [Return Type]: $first preserves the BSON type of the returned # value, verified using $type projection. FIRST_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [ From 4d1685d6e5dac71e1d25d6e38c73d1de6df101a7 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Thu, 21 May 2026 16:02:20 -0700 Subject: [PATCH 06/10] split into files Signed-off-by: Alina (Xi) Li --- .../test_accumulator_first_null_missing.py | 207 ++++++++++++++++ ...rst.py => test_accumulator_first_types.py} | 222 +----------------- 2 files changed, 220 insertions(+), 209 deletions(-) create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py rename documentdb_tests/compatibility/tests/core/operator/accumulators/first/{test_accumulator_first.py => test_accumulator_first_types.py} (69%) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py new file mode 100644 index 00000000..af62338b --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py @@ -0,0 +1,207 @@ +"""Tests for $first accumulator null, missing, input form, and edge case behavior.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Null and Missing NOT Excluded]: $first returns whatever the +# first document has, including null and missing values. +FIRST_NULL_MISSING_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "null_first_then_value", + docs=[{"v": None}, {"v": 5}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": None}], + msg="$first should return null when first doc has null (first wins)", + ), + AccumulatorTestCase( + "null_missing_first_then_value", + docs=[{"x": 1}, {"v": 5}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": None}], + msg="$first should return null when first doc has missing field", + ), + AccumulatorTestCase( + "null_value_first_then_null", + docs=[{"v": 5}, {"v": None}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": 5}], + msg="$first should return 5 when first doc has value, second is null", + ), + AccumulatorTestCase( + "null_value_first_then_missing", + docs=[{"v": 5}, {"x": 1}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": 5}], + msg="$first should return 5 when first doc has value, second is missing", + ), + AccumulatorTestCase( + "null_all", + docs=[{"v": None}, {"v": None}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": None}], + msg="$first should return null when all docs have null", + ), + AccumulatorTestCase( + "null_missing_all", + docs=[{"x": 1}, {"x": 2}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": None}], + msg="$first should return null when all docs have missing field", + ), + AccumulatorTestCase( + "null_and_missing_mixed", + docs=[{"v": None}, {"x": 1}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": None}], + msg="$first should return null when first is null and second is missing", + ), + AccumulatorTestCase( + "null_remove_first_then_value", + docs=[{"v": -1}, {"v": 5}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}}, + } + } + ], + expected=[{"_id": None, "result": None}], + msg="$first should return null when first doc produces $$REMOVE", + ), + AccumulatorTestCase( + "null_remove_all", + docs=[{"v": -1}, {"v": -2}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}}, + } + } + ], + expected=[{"_id": None, "result": None}], + msg="$first should return null when all docs produce $$REMOVE", + ), + AccumulatorTestCase( + "null_remove_second_value_first", + docs=[{"v": 5}, {"v": -1}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}}, + } + } + ], + expected=[{"_id": None, "result": 5}], + msg="$first should return value when first doc has value, second $$REMOVE", + ), +] + +# Property [Input Forms]: $first accumulator accepts various expression types as its operand. +FIRST_INPUT_FORM_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "input_field_path", + docs=[{"v": 10}, {"v": 20}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": 10}], + msg="$first should accept a basic field path reference", + ), + AccumulatorTestCase( + "input_nested_field", + docs=[{"a": {"b": 10}}, {"a": {"b": 20}}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$a.b"}}}], + expected=[{"_id": None, "result": 10}], + msg="$first should accept a nested document field path", + ), + AccumulatorTestCase( + "input_literal", + docs=[{"v": 1}, {"v": 2}], + pipeline=[{"$group": {"_id": None, "result": {"$first": 42}}}], + expected=[{"_id": None, "result": 42}], + msg="$first with a literal constant should return that constant", + ), + AccumulatorTestCase( + "input_null_literal", + docs=[{"v": 1}, {"v": 2}], + pipeline=[{"$group": {"_id": None, "result": {"$first": None}}}], + expected=[{"_id": None, "result": None}], + msg="$first with null literal should return null", + ), +] + +# Property [Edge Cases]: edge cases unique to the accumulator context. +FIRST_EDGE_CASE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "edge_single_doc", + docs=[{"v": 42}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": 42}], + msg="$first of a single document should return that document's value", + ), + AccumulatorTestCase( + "edge_single_null_doc", + docs=[{"v": None}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": None}], + msg="$first of a single null document should return null", + ), + AccumulatorTestCase( + "edge_single_missing_doc", + docs=[{"x": 1}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": None}], + msg="$first of a single document with missing field should return null", + ), + AccumulatorTestCase( + "edge_many_docs", + docs=[{"v": i} for i in range(100)], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": 0}], + msg="$first should return first document's value (v=0) across 100 documents", + ), + AccumulatorTestCase( + "edge_empty_collection", + docs=None, + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[], + msg="$first on empty collection should return empty result", + ), + AccumulatorTestCase( + "edge_array_not_traversed", + docs=[{"v": [5, 1, 8]}, {"v": [3, 9, 2]}], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[{"_id": None, "result": [5, 1, 8]}], + msg="$first should return array as whole value, not traverse it", + ), + AccumulatorTestCase( + "edge_literal_constant", + docs=[{"v": 1}, {"v": 2}, {"v": 3}], + pipeline=[{"$group": {"_id": None, "result": {"$first": 42}}}], + expected=[{"_id": None, "result": 42}], + msg="$first with literal constant should always return that constant", + ), +] + +FIRST_SUCCESS_TESTS = FIRST_NULL_MISSING_TESTS + FIRST_INPUT_FORM_TESTS + FIRST_EDGE_CASE_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(FIRST_SUCCESS_TESTS)) +def test_accumulator_first_null_missing(collection, test_case: AccumulatorTestCase): + """Test $first accumulator null, missing, input form, and edge case behavior.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertSuccess(result, test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py similarity index 69% rename from documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py rename to documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py index 714df0df..b7e086e8 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py @@ -1,4 +1,4 @@ -"""Tests for $first accumulator in $group context.""" +"""Tests for $first accumulator BSON type preservation and type fidelity.""" from __future__ import annotations @@ -36,102 +36,6 @@ FLOAT_NEGATIVE_INFINITY, ) -# Property [Null and Missing NOT Excluded]: $first returns whatever the -# first document has, including null and missing values. -FIRST_NULL_MISSING_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "null_first_then_value", - docs=[{"v": None}, {"v": 5}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": None}], - msg="$first should return null when first doc has null (first wins)", - ), - AccumulatorTestCase( - "null_missing_first_then_value", - docs=[{"x": 1}, {"v": 5}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": None}], - msg="$first should return null when first doc has missing field", - ), - AccumulatorTestCase( - "null_value_first_then_null", - docs=[{"v": 5}, {"v": None}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": 5}], - msg="$first should return 5 when first doc has value, second is null", - ), - AccumulatorTestCase( - "null_value_first_then_missing", - docs=[{"v": 5}, {"x": 1}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": 5}], - msg="$first should return 5 when first doc has value, second is missing", - ), - AccumulatorTestCase( - "null_all", - docs=[{"v": None}, {"v": None}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": None}], - msg="$first should return null when all docs have null", - ), - AccumulatorTestCase( - "null_missing_all", - docs=[{"x": 1}, {"x": 2}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": None}], - msg="$first should return null when all docs have missing field", - ), - AccumulatorTestCase( - "null_and_missing_mixed", - docs=[{"v": None}, {"x": 1}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": None}], - msg="$first should return null when first is null and second is missing", - ), - AccumulatorTestCase( - "null_remove_first_then_value", - docs=[{"v": -1}, {"v": 5}], - pipeline=[ - { - "$group": { - "_id": None, - "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}}, - } - } - ], - expected=[{"_id": None, "result": None}], - msg="$first should return null when first doc produces $$REMOVE", - ), - AccumulatorTestCase( - "null_remove_all", - docs=[{"v": -1}, {"v": -2}], - pipeline=[ - { - "$group": { - "_id": None, - "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}}, - } - } - ], - expected=[{"_id": None, "result": None}], - msg="$first should return null when all docs produce $$REMOVE", - ), - AccumulatorTestCase( - "null_remove_second_value_first", - docs=[{"v": 5}, {"v": -1}], - pipeline=[ - { - "$group": { - "_id": None, - "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}}, - } - } - ], - expected=[{"_id": None, "result": 5}], - msg="$first should return value when first doc has value, second $$REMOVE", - ), -] - # Property [BSON Type Preservation]: $first returns the first document's # value with its BSON type preserved exactly. FIRST_BSON_TYPE_TESTS: list[AccumulatorTestCase] = [ @@ -444,115 +348,6 @@ ), ] -# Property [Input Forms]: $first accumulator accepts various expression types as its operand. -FIRST_INPUT_FORM_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "input_field_path", - docs=[{"v": 10}, {"v": 20}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": 10}], - msg="$first should accept a basic field path reference", - ), - AccumulatorTestCase( - "input_nested_field", - docs=[{"a": {"b": 10}}, {"a": {"b": 20}}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$a.b"}}}], - expected=[{"_id": None, "result": 10}], - msg="$first should accept a nested document field path", - ), - AccumulatorTestCase( - "input_literal", - docs=[{"v": 1}, {"v": 2}], - pipeline=[{"$group": {"_id": None, "result": {"$first": 42}}}], - expected=[{"_id": None, "result": 42}], - msg="$first with a literal constant should return that constant", - ), - AccumulatorTestCase( - "input_null_literal", - docs=[{"v": 1}, {"v": 2}], - pipeline=[{"$group": {"_id": None, "result": {"$first": None}}}], - expected=[{"_id": None, "result": None}], - msg="$first with null literal should return null", - ), -] - -# Property [Edge Cases]: edge cases unique to the accumulator context. -FIRST_EDGE_CASE_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "edge_single_doc", - docs=[{"v": 42}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": 42}], - msg="$first of a single document should return that document's value", - ), - AccumulatorTestCase( - "edge_single_null_doc", - docs=[{"v": None}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": None}], - msg="$first of a single null document should return null", - ), - AccumulatorTestCase( - "edge_single_missing_doc", - docs=[{"x": 1}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": None}], - msg="$first of a single document with missing field should return null", - ), - AccumulatorTestCase( - "edge_many_docs", - docs=[{"v": i} for i in range(100)], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": 0}], - msg="$first should return first document's value (v=0) across 100 documents", - ), - AccumulatorTestCase( - "edge_empty_collection", - docs=None, - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[], - msg="$first on empty collection should return empty result", - ), - AccumulatorTestCase( - "edge_array_not_traversed", - docs=[{"v": [5, 1, 8]}, {"v": [3, 9, 2]}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": [5, 1, 8]}], - msg="$first should return array as whole value, not traverse it", - ), - AccumulatorTestCase( - "edge_literal_constant", - docs=[{"v": 1}, {"v": 2}, {"v": 3}], - pipeline=[{"$group": {"_id": None, "result": {"$first": 42}}}], - expected=[{"_id": None, "result": 42}], - msg="$first with literal constant should always return that constant", - ), -] - -FIRST_GROUP_SUCCESS_TESTS = ( - FIRST_NULL_MISSING_TESTS - + FIRST_BSON_TYPE_TESTS - + FIRST_SPECIAL_NUMERIC_TESTS - + FIRST_DECIMAL_PRECISION_TESTS - + FIRST_TYPE_DISTINCTION_TESTS - + FIRST_MIXED_TYPE_TESTS - + FIRST_INPUT_FORM_TESTS - + FIRST_EDGE_CASE_TESTS -) - - -@pytest.mark.parametrize("test_case", pytest_params(FIRST_GROUP_SUCCESS_TESTS)) -def test_accumulator_first_group(collection, test_case: AccumulatorTestCase): - """Test $first accumulator success cases via $group.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, - ) - assertSuccess(result, test_case.expected, msg=test_case.msg) - - # Property [Return Type]: $first preserves the BSON type of the returned # value, verified using $type projection. FIRST_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [ @@ -638,10 +433,19 @@ def test_accumulator_first_group(collection, test_case: AccumulatorTestCase): ), ] +FIRST_TYPE_SUCCESS_TESTS = ( + FIRST_BSON_TYPE_TESTS + + FIRST_SPECIAL_NUMERIC_TESTS + + FIRST_DECIMAL_PRECISION_TESTS + + FIRST_TYPE_DISTINCTION_TESTS + + FIRST_MIXED_TYPE_TESTS + + FIRST_RETURN_TYPE_TESTS +) + -@pytest.mark.parametrize("test_case", pytest_params(FIRST_RETURN_TYPE_TESTS)) -def test_accumulator_first_return_type(collection, test_case: AccumulatorTestCase): - """Test $first return type verification.""" +@pytest.mark.parametrize("test_case", pytest_params(FIRST_TYPE_SUCCESS_TESTS)) +def test_accumulator_first_types(collection, test_case: AccumulatorTestCase): + """Test $first accumulator BSON type preservation and type fidelity.""" if test_case.docs: collection.insert_many(test_case.docs) result = execute_command( From b8cd95212b130bff8c45b0d8609419b3b8f149c9 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Thu, 21 May 2026 16:08:36 -0700 Subject: [PATCH 07/10] remove duplicate tests Signed-off-by: Alina (Xi) Li --- .../test_accumulator_first_null_missing.py | 14 ------- .../first/test_accumulator_first_types.py | 41 ------------------- 2 files changed, 55 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py index af62338b..5fe5086e 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py @@ -109,13 +109,6 @@ # Property [Input Forms]: $first accumulator accepts various expression types as its operand. FIRST_INPUT_FORM_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "input_field_path", - docs=[{"v": 10}, {"v": 20}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": 10}], - msg="$first should accept a basic field path reference", - ), AccumulatorTestCase( "input_nested_field", docs=[{"a": {"b": 10}}, {"a": {"b": 20}}], @@ -183,13 +176,6 @@ expected=[{"_id": None, "result": [5, 1, 8]}], msg="$first should return array as whole value, not traverse it", ), - AccumulatorTestCase( - "edge_literal_constant", - docs=[{"v": 1}, {"v": 2}, {"v": 3}], - pipeline=[{"$group": {"_id": None, "result": {"$first": 42}}}], - expected=[{"_id": None, "result": 42}], - msg="$first with literal constant should always return that constant", - ), ] FIRST_SUCCESS_TESTS = FIRST_NULL_MISSING_TESTS + FIRST_INPUT_FORM_TESTS + FIRST_EDGE_CASE_TESTS diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py index b7e086e8..b385e89e 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py @@ -275,46 +275,6 @@ ), ] -# Property [No Coercion]: $first preserves BSON type distinctions without -# coercing similar-looking values. -FIRST_TYPE_DISTINCTION_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "distinct_false_not_zero", - docs=[{"v": False}, {"v": 999}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": False}], - msg="$first should return False, not coerce to 0", - ), - AccumulatorTestCase( - "distinct_true_not_one", - docs=[{"v": True}, {"v": 999}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": True}], - msg="$first should return True, not coerce to 1", - ), - AccumulatorTestCase( - "distinct_zero_not_false", - docs=[{"v": 0}, {"v": 999}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": 0}], - msg="$first should return int32(0), not coerce to False", - ), - AccumulatorTestCase( - "distinct_empty_string", - docs=[{"v": ""}, {"v": 999}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": ""}], - msg="$first should return empty string, not coerce to null", - ), - AccumulatorTestCase( - "distinct_string_number", - docs=[{"v": "123"}, {"v": 999}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": "123"}], - msg="$first should return string '123', not coerce to int", - ), -] - # Property [Position-Based]: $first picks the first document's value # regardless of what other documents contain. FIRST_MIXED_TYPE_TESTS: list[AccumulatorTestCase] = [ @@ -437,7 +397,6 @@ FIRST_BSON_TYPE_TESTS + FIRST_SPECIAL_NUMERIC_TESTS + FIRST_DECIMAL_PRECISION_TESTS - + FIRST_TYPE_DISTINCTION_TESTS + FIRST_MIXED_TYPE_TESTS + FIRST_RETURN_TYPE_TESTS ) From c32a862b670da72580e92f6be92ce474558dd93b Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Thu, 21 May 2026 16:13:44 -0700 Subject: [PATCH 08/10] rename unclear tests and remove unrelated tests Signed-off-by: Alina (Xi) Li --- .../test_accumulator_first_null_missing.py | 87 +------------------ .../first/test_accumulator_first_types.py | 86 ------------------ 2 files changed, 3 insertions(+), 170 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py index 5fe5086e..98de6e4b 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py @@ -1,4 +1,4 @@ -"""Tests for $first accumulator null, missing, input form, and edge case behavior.""" +"""Tests for $first accumulator null, missing, and edge case behavior.""" from __future__ import annotations @@ -63,73 +63,6 @@ expected=[{"_id": None, "result": None}], msg="$first should return null when first is null and second is missing", ), - AccumulatorTestCase( - "null_remove_first_then_value", - docs=[{"v": -1}, {"v": 5}], - pipeline=[ - { - "$group": { - "_id": None, - "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}}, - } - } - ], - expected=[{"_id": None, "result": None}], - msg="$first should return null when first doc produces $$REMOVE", - ), - AccumulatorTestCase( - "null_remove_all", - docs=[{"v": -1}, {"v": -2}], - pipeline=[ - { - "$group": { - "_id": None, - "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}}, - } - } - ], - expected=[{"_id": None, "result": None}], - msg="$first should return null when all docs produce $$REMOVE", - ), - AccumulatorTestCase( - "null_remove_second_value_first", - docs=[{"v": 5}, {"v": -1}], - pipeline=[ - { - "$group": { - "_id": None, - "result": {"$first": {"$cond": [{"$gt": ["$v", 0]}, "$v", "$$REMOVE"]}}, - } - } - ], - expected=[{"_id": None, "result": 5}], - msg="$first should return value when first doc has value, second $$REMOVE", - ), -] - -# Property [Input Forms]: $first accumulator accepts various expression types as its operand. -FIRST_INPUT_FORM_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "input_nested_field", - docs=[{"a": {"b": 10}}, {"a": {"b": 20}}], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$a.b"}}}], - expected=[{"_id": None, "result": 10}], - msg="$first should accept a nested document field path", - ), - AccumulatorTestCase( - "input_literal", - docs=[{"v": 1}, {"v": 2}], - pipeline=[{"$group": {"_id": None, "result": {"$first": 42}}}], - expected=[{"_id": None, "result": 42}], - msg="$first with a literal constant should return that constant", - ), - AccumulatorTestCase( - "input_null_literal", - docs=[{"v": 1}, {"v": 2}], - pipeline=[{"$group": {"_id": None, "result": {"$first": None}}}], - expected=[{"_id": None, "result": None}], - msg="$first with null literal should return null", - ), ] # Property [Edge Cases]: edge cases unique to the accumulator context. @@ -155,20 +88,6 @@ expected=[{"_id": None, "result": None}], msg="$first of a single document with missing field should return null", ), - AccumulatorTestCase( - "edge_many_docs", - docs=[{"v": i} for i in range(100)], - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[{"_id": None, "result": 0}], - msg="$first should return first document's value (v=0) across 100 documents", - ), - AccumulatorTestCase( - "edge_empty_collection", - docs=None, - pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], - expected=[], - msg="$first on empty collection should return empty result", - ), AccumulatorTestCase( "edge_array_not_traversed", docs=[{"v": [5, 1, 8]}, {"v": [3, 9, 2]}], @@ -178,12 +97,12 @@ ), ] -FIRST_SUCCESS_TESTS = FIRST_NULL_MISSING_TESTS + FIRST_INPUT_FORM_TESTS + FIRST_EDGE_CASE_TESTS +FIRST_SUCCESS_TESTS = FIRST_NULL_MISSING_TESTS + FIRST_EDGE_CASE_TESTS @pytest.mark.parametrize("test_case", pytest_params(FIRST_SUCCESS_TESTS)) def test_accumulator_first_null_missing(collection, test_case: AccumulatorTestCase): - """Test $first accumulator null, missing, input form, and edge case behavior.""" + """Test $first accumulator null, missing, and edge case behavior.""" if test_case.docs: collection.insert_many(test_case.docs) result = execute_command( diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py index b385e89e..2682c5f6 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py @@ -308,97 +308,11 @@ ), ] -# Property [Return Type]: $first preserves the BSON type of the returned -# value, verified using $type projection. -FIRST_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "return_type_int32", - docs=[{"v": 42}, {"v": 999}], - pipeline=[ - {"$group": {"_id": None, "result": {"$first": "$v"}}}, - {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, - ], - expected=[{"value": 42, "type": "int"}], - msg="$first of int32 should return type 'int'", - ), - AccumulatorTestCase( - "return_type_int64", - docs=[{"v": Int64(42)}, {"v": 999}], - pipeline=[ - {"$group": {"_id": None, "result": {"$first": "$v"}}}, - {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, - ], - expected=[{"value": Int64(42), "type": "long"}], - msg="$first of Int64 should return type 'long'", - ), - AccumulatorTestCase( - "return_type_double", - docs=[{"v": 3.14}, {"v": 999}], - pipeline=[ - {"$group": {"_id": None, "result": {"$first": "$v"}}}, - {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, - ], - expected=[{"value": 3.14, "type": "double"}], - msg="$first of double should return type 'double'", - ), - AccumulatorTestCase( - "return_type_decimal", - docs=[{"v": Decimal128("3.14")}, {"v": 999}], - pipeline=[ - {"$group": {"_id": None, "result": {"$first": "$v"}}}, - {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, - ], - expected=[{"value": Decimal128("3.14"), "type": "decimal"}], - msg="$first of Decimal128 should return type 'decimal'", - ), - AccumulatorTestCase( - "return_type_string", - docs=[{"v": "hello"}, {"v": 999}], - pipeline=[ - {"$group": {"_id": None, "result": {"$first": "$v"}}}, - {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, - ], - expected=[{"value": "hello", "type": "string"}], - msg="$first of string should return type 'string'", - ), - AccumulatorTestCase( - "return_type_boolean", - docs=[{"v": True}, {"v": 999}], - pipeline=[ - {"$group": {"_id": None, "result": {"$first": "$v"}}}, - {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, - ], - expected=[{"value": True, "type": "bool"}], - msg="$first of boolean should return type 'bool'", - ), - AccumulatorTestCase( - "return_type_date", - docs=[{"v": datetime(2023, 6, 15, tzinfo=timezone.utc)}, {"v": 999}], - pipeline=[ - {"$group": {"_id": None, "result": {"$first": "$v"}}}, - {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, - ], - expected=[{"value": datetime(2023, 6, 15, tzinfo=timezone.utc), "type": "date"}], - msg="$first of datetime should return type 'date'", - ), - AccumulatorTestCase( - "return_type_null", - docs=[{"v": None}, {"v": 999}], - pipeline=[ - {"$group": {"_id": None, "result": {"$first": "$v"}}}, - {"$project": {"_id": 0, "value": "$result", "type": {"$type": "$result"}}}, - ], - expected=[{"value": None, "type": "null"}], - msg="$first of null should return type 'null'", - ), -] - FIRST_TYPE_SUCCESS_TESTS = ( FIRST_BSON_TYPE_TESTS + FIRST_SPECIAL_NUMERIC_TESTS + FIRST_DECIMAL_PRECISION_TESTS + FIRST_MIXED_TYPE_TESTS - + FIRST_RETURN_TYPE_TESTS ) From a4e29655293e411d71f5f3647dea732161c6feb1 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Thu, 21 May 2026 16:46:46 -0700 Subject: [PATCH 09/10] add initial integration tests Signed-off-by: Alina (Xi) Li --- .../test_accumulators_first_integration.py | 483 ++++++++++++++++++ 1 file changed, 483 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_first_integration.py diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_first_integration.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_first_integration.py new file mode 100644 index 00000000..3ee006e5 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_first_integration.py @@ -0,0 +1,483 @@ +"""Tests for $first accumulator composed with sibling accumulators in the same $group.""" + +from __future__ import annotations + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils.accumulator_test_case import ( # noqa: E501 + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [First with Last]: $first and $last coexist in the same $group, +# picking the first and last values respectively. A preceding $sort +# establishes deterministic order. +FIRST_WITH_LAST_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "first_last_sorted_asc", + docs=[ + {"cat": "a", "v": 30}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "first_v": {"$first": "$v"}, + "last_v": {"$last": "$v"}, + } + }, + ], + expected=[{"_id": "a", "first_v": 10, "last_v": 30}], + msg="$first should pick smallest and $last should pick largest after ascending sort", + ), + AccumulatorTestCase( + "first_last_sorted_desc", + docs=[ + {"cat": "a", "v": 30}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + ], + pipeline=[ + {"$sort": {"v": -1}}, + { + "$group": { + "_id": "$cat", + "first_v": {"$first": "$v"}, + "last_v": {"$last": "$v"}, + } + }, + ], + expected=[{"_id": "a", "first_v": 30, "last_v": 10}], + msg="$first should pick largest and $last should pick smallest after descending sort", + ), + AccumulatorTestCase( + "first_last_multiple_groups", + docs=[ + {"cat": "a", "v": 5}, + {"cat": "a", "v": 15}, + {"cat": "b", "v": 100}, + {"cat": "b", "v": 200}, + {"cat": "b", "v": 300}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "first_v": {"$first": "$v"}, + "last_v": {"$last": "$v"}, + } + }, + ], + expected=[ + {"_id": "a", "first_v": 5, "last_v": 15}, + {"_id": "b", "first_v": 100, "last_v": 300}, + ], + msg="$first and $last should work independently across multiple groups", + ), + AccumulatorTestCase( + "first_last_null_first_doc", + docs=[ + {"cat": "a", "v": None}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "first_v": {"$first": "$v"}, + "last_v": {"$last": "$v"}, + } + }, + ], + expected=[{"_id": "a", "first_v": None, "last_v": 20}], + msg="$first should return null (null sorts first) while $last returns 20", + ), +] + +# Property [First with Min/Max]: $first is position-based while $min/$max +# are value-based. The same data can produce different $first results +# depending on sort order, but $min/$max are always the same. +FIRST_WITH_MIN_MAX_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "first_min_max_sorted_asc", + docs=[ + {"cat": "a", "v": 30}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "first_v": {"$first": "$v"}, + "lo": {"$min": "$v"}, + "hi": {"$max": "$v"}, + } + }, + ], + expected=[{"_id": "a", "first_v": 10, "lo": 10, "hi": 30}], + msg="$first equals $min after ascending sort; $max is independent", + ), + AccumulatorTestCase( + "first_min_max_sorted_desc", + docs=[ + {"cat": "a", "v": 30}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + ], + pipeline=[ + {"$sort": {"v": -1}}, + { + "$group": { + "_id": "$cat", + "first_v": {"$first": "$v"}, + "lo": {"$min": "$v"}, + "hi": {"$max": "$v"}, + } + }, + ], + expected=[{"_id": "a", "first_v": 30, "lo": 10, "hi": 30}], + msg="$first equals $max after descending sort; $min/$max unchanged", + ), + AccumulatorTestCase( + "first_min_max_null_divergence", + docs=[ + {"cat": "a", "v": None}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 5}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "first_v": {"$first": "$v"}, + "lo": {"$min": "$v"}, + "hi": {"$max": "$v"}, + } + }, + ], + expected=[{"_id": "a", "first_v": None, "lo": 5, "hi": 10}], + msg="$first returns null (includes it) while $min/$max ignore null", + ), +] + +# Property [First with Sum/Avg]: $first picks one value, $sum/$avg +# aggregate all. Null divergence: $first returns null when it's in the +# first position; $sum treats null as 0; $avg excludes null from count. +FIRST_WITH_SUM_AVG_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "first_sum_avg_basic", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "a", "v": 30}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "first_v": {"$first": "$v"}, + "total": {"$sum": "$v"}, + "mean": {"$avg": "$v"}, + } + }, + ], + expected=[{"_id": "a", "first_v": 10, "total": 60, "mean": 20.0}], + msg="$first picks 10 while $sum and $avg compute over all values", + ), + AccumulatorTestCase( + "first_sum_avg_null_first_doc", + docs=[ + {"cat": "a", "v": None}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "first_v": {"$first": "$v"}, + "total": {"$sum": "$v"}, + "mean": {"$avg": "$v"}, + } + }, + ], + expected=[{"_id": "a", "first_v": None, "total": 30, "mean": 15.0}], + msg="$first returns null; $sum ignores null (30); $avg ignores null (15.0)", + ), + AccumulatorTestCase( + "first_sum_avg_all_null", + docs=[ + {"cat": "a", "v": None}, + {"cat": "a", "v": None}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "first_v": {"$first": "$v"}, + "total": {"$sum": "$v"}, + "mean": {"$avg": "$v"}, + } + } + ], + expected=[{"_id": "a", "first_v": None, "total": 0, "mean": None}], + msg="$first returns null; $sum returns 0; $avg returns null when all null", + ), +] + +# Property [First with Count]: $first picks one value while $count counts +# all documents in the group. +FIRST_WITH_COUNT_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "first_count_basic", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "b", "v": 5}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "first_v": {"$first": "$v"}, + "n": {"$sum": 1}, + } + }, + ], + expected=[ + {"_id": "a", "first_v": 10, "n": 2}, + {"_id": "b", "first_v": 5, "n": 1}, + ], + msg="$first picks one value while $sum(1) counts all docs per group", + ), + AccumulatorTestCase( + "first_count_null_counted", + docs=[ + {"cat": "a", "v": None}, + {"cat": "a", "v": 10}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "first_v": {"$first": "$v"}, + "n": {"$sum": 1}, + } + }, + ], + expected=[{"_id": "a", "first_v": None, "n": 2}], + msg="$first returns null; $sum(1) still counts the null doc", + ), +] + +# Property [First with Push/AddToSet]: $first picks one value while $push +# collects all values and $addToSet collects unique values. +FIRST_WITH_PUSH_ADDTOSET_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "first_push_addtoset", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "a", "v": 10}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "first_v": {"$first": "$v"}, + "all_vals": {"$push": "$v"}, + "unique_vals": {"$addToSet": "$v"}, + } + }, + ], + expected=[ + {"_id": "a", "first_v": 10, "all_vals": [10, 10, 20], "unique_vals": [10, 20]}, + ], + msg="$first picks 10 while $push collects all and $addToSet collects unique", + ), + AccumulatorTestCase( + "first_push_null_handling", + docs=[ + {"cat": "a", "v": None}, + {"cat": "a", "v": 10}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "first_v": {"$first": "$v"}, + "all_vals": {"$push": "$v"}, + } + }, + ], + expected=[ + {"_id": "a", "first_v": None, "all_vals": [None, 10]}, + ], + msg="$first returns null; $push includes null in the collected array", + ), +] + +# Property [First with MergeObjects]: $first picks one scalar value while +# $mergeObjects combines per-document subdocuments into one merged object. +FIRST_WITH_MERGE_OBJECTS_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "first_merge_objects", + docs=[ + {"cat": "a", "v": 10, "meta": {"src": "x"}}, + {"cat": "a", "v": 20, "meta": {"quality": "high"}}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "first_v": {"$first": "$v"}, + "merged": {"$mergeObjects": "$meta"}, + } + }, + ], + expected=[ + {"_id": "a", "first_v": 10, "merged": {"src": "x", "quality": "high"}}, + ], + msg="$first picks 10 while $mergeObjects combines all metadata objects", + ), +] + +# Property [Multiple First]: multiple $first accumulators in the same $group +# independently pick the first value from different fields. +MULTIPLE_FIRST_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "multiple_first_different_fields", + docs=[ + {"cat": "a", "name": "alice", "score": 85}, + {"cat": "a", "name": "bob", "score": 92}, + {"cat": "b", "name": "carol", "score": 78}, + ], + pipeline=[ + {"$sort": {"score": 1}}, + { + "$group": { + "_id": "$cat", + "first_name": {"$first": "$name"}, + "first_score": {"$first": "$score"}, + } + }, + ], + expected=[ + {"_id": "a", "first_name": "alice", "first_score": 85}, + {"_id": "b", "first_name": "carol", "first_score": 78}, + ], + msg="Multiple $first accumulators should independently pick first from each field", + ), + AccumulatorTestCase( + "multiple_first_one_missing", + docs=[ + {"cat": "a", "score": 85}, + {"cat": "a", "name": "bob", "score": 92}, + ], + pipeline=[ + {"$sort": {"score": 1}}, + { + "$group": { + "_id": "$cat", + "first_name": {"$first": "$name"}, + "first_score": {"$first": "$score"}, + } + }, + ], + expected=[{"_id": "a", "first_name": None, "first_score": 85}], + msg="$first returns null for missing field while sibling $first returns value", + ), +] + +# Property [First Type Preservation with Sibling]: $first preserves the BSON +# type of the first document's value, even when sibling accumulators promote +# types (e.g. $sum promoting int32+Decimal128 to Decimal128). +FIRST_TYPE_PRESERVATION_WITH_SIBLING_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "first_int32_with_sum_decimal128", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": Decimal128("20.5")}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "first_v": {"$first": "$v"}, + "total": {"$sum": "$v"}, + } + }, + ], + expected=[{"_id": "a", "first_v": 10, "total": Decimal128("30.5")}], + msg="$first preserves int32 while $sum promotes to Decimal128", + ), + AccumulatorTestCase( + "first_int64_with_sum_double", + docs=[ + {"cat": "a", "v": Int64(100)}, + {"cat": "a", "v": 2.5}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "first_v": {"$first": "$v"}, + "total": {"$sum": "$v"}, + } + }, + ], + expected=[{"_id": "a", "first_v": 2.5, "total": 102.5}], + msg="$first preserves double (2.5 sorts first) while $sum promotes to double", + ), +] + +FIRST_INTEGRATION_TESTS = ( + FIRST_WITH_LAST_TESTS + + FIRST_WITH_MIN_MAX_TESTS + + FIRST_WITH_SUM_AVG_TESTS + + FIRST_WITH_COUNT_TESTS + + FIRST_WITH_PUSH_ADDTOSET_TESTS + + FIRST_WITH_MERGE_OBJECTS_TESTS + + MULTIPLE_FIRST_TESTS + + FIRST_TYPE_PRESERVATION_WITH_SIBLING_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(FIRST_INTEGRATION_TESTS)) +def test_accumulators_first_integration(collection, test_case: AccumulatorTestCase): + """Test $first accumulator composed with sibling accumulators in the same $group.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline or [], "cursor": {}}, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ignore_doc_order=True, + ignore_order_in=["unique_vals"], + ) From 5cdbefe8fcd0ac1da7ec9a1e1d37b0ad8a49fc4e Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Wed, 27 May 2026 15:15:04 -0700 Subject: [PATCH 10/10] Address comments Add tests: arity tests, BSON constant tests, expression tests, expression error propogation, empty-group behavior, and order dependence tests Signed-off-by: Alina (Xi) Li --- .../first/test_accumulator_first_errors.py | 117 ++++++++ .../test_accumulator_first_null_missing.py | 27 ++ .../first/test_accumulator_first_types.py | 280 ++++++++++++++++++ 3 files changed, 424 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_errors.py diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_errors.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_errors.py new file mode 100644 index 00000000..c102ad9b --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_errors.py @@ -0,0 +1,117 @@ +"""Tests for $first accumulator error cases: arity rejection and expression error propagation.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertFailureCode +from documentdb_tests.framework.error_codes import ( + CONVERSION_FAILURE_ERROR, + DIVIDE_BY_ZERO_V2_ERROR, + EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, + GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Arity]: $first in accumulator context is a unary operator and +# rejects array syntax. +FIRST_ARITY_ERROR_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "arity_empty_array", + docs=[{"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$first": []}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$first should reject empty array in accumulator context", + ), + AccumulatorTestCase( + "arity_single_element_array", + docs=[{"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$first": [1]}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$first should reject single-element literal array in accumulator context", + ), + AccumulatorTestCase( + "arity_single_field_ref_array", + docs=[{"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$first": ["$v"]}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$first should reject single field ref in array in accumulator context", + ), + AccumulatorTestCase( + "arity_multi_element_array", + docs=[{"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$first": [1, 2, 3]}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$first should reject multi-element array in accumulator context", + ), + AccumulatorTestCase( + "arity_multi_key_expression_object", + docs=[{"v": 1}], + pipeline=[ + { + "$group": { + "_id": None, + "result": {"$first": {"$add": [1, 2], "$multiply": [3, 4]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=EXPRESSION_OBJECT_MULTIPLE_FIELDS_ERROR, + msg="$first should reject multi-key expression object", + ), +] + +# Property [Expression Error Propagation]: errors raised during sub-expression +# evaluation propagate through the accumulator without being caught. +FIRST_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "expr_error_divide_by_zero", + docs=[{"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$first": {"$divide": ["$v", 0]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=DIVIDE_BY_ZERO_V2_ERROR, + msg="$first should propagate $divide by zero error", + ), + AccumulatorTestCase( + "expr_error_to_int_invalid_string", + docs=[{"v": "abc"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$first": {"$toInt": "$v"}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=CONVERSION_FAILURE_ERROR, + msg="$first should propagate $toInt conversion error from expression", + ), +] + +FIRST_ERROR_TESTS = FIRST_ARITY_ERROR_TESTS + FIRST_EXPRESSION_ERROR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(FIRST_ERROR_TESTS)) +def test_accumulator_first_errors(collection, test_case): + """Test $first accumulator error cases.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, + ) + assertFailureCode(result, test_case.error_code, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py index 98de6e4b..d2e2b9e8 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_null_missing.py @@ -95,6 +95,33 @@ expected=[{"_id": None, "result": [5, 1, 8]}], msg="$first should return array as whole value, not traverse it", ), + AccumulatorTestCase( + "edge_empty_collection", + docs=[], + pipeline=[{"$group": {"_id": None, "result": {"$first": "$v"}}}], + expected=[], + msg="$first on empty collection should produce no groups (empty result)", + ), + AccumulatorTestCase( + "edge_order_dependent_asc", + docs=[{"v": 3}, {"v": 1}, {"v": 5}, {"v": 2}, {"v": 4}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$first": "$v"}}}, + ], + expected=[{"_id": None, "result": 1}], + msg="$first with ascending sort should return smallest value", + ), + AccumulatorTestCase( + "edge_order_dependent_desc", + docs=[{"v": 3}, {"v": 1}, {"v": 5}, {"v": 2}, {"v": 4}], + pipeline=[ + {"$sort": {"v": -1}}, + {"$group": {"_id": None, "result": {"$first": "$v"}}}, + ], + expected=[{"_id": None, "result": 5}], + msg="$first with descending sort should return largest value", + ), ] FIRST_SUCCESS_TESTS = FIRST_NULL_MISSING_TESTS + FIRST_EDGE_CASE_TESTS diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py index 2682c5f6..df4fa9f9 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/first/test_accumulator_first_types.py @@ -10,6 +10,8 @@ Binary, Decimal128, Int64, + MaxKey, + MinKey, ObjectId, Regex, Timestamp, @@ -308,11 +310,289 @@ ), ] +# --------------------------------------------------------------------------- +# Property [BSON Constant Arguments]: $first accepts BSON constants as the +# accumulator argument (not field references). The constant is returned for +# every document, so the "first" value is that constant. +# --------------------------------------------------------------------------- +FIRST_BSON_CONSTANT_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "const_true", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$first": True}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": True}], + msg="$first with boolean True constant should return True", + ), + AccumulatorTestCase( + "const_false", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$first": False}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": False}], + msg="$first with boolean False constant should return False", + ), + AccumulatorTestCase( + "const_int64", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$first": Int64(42)}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Int64(42)}], + msg="$first with Int64 constant should return that Int64 value", + ), + AccumulatorTestCase( + "const_double", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$first": 3.14}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 3.14}], + msg="$first with double constant should return that double value", + ), + AccumulatorTestCase( + "const_decimal128", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$first": Decimal128("3.14")}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("3.14")}], + msg="$first with Decimal128 constant should return that Decimal128 value", + ), + AccumulatorTestCase( + "const_string", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$first": "hello"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": "hello"}], + msg="$first with string constant (no $) should return that string", + ), + AccumulatorTestCase( + "const_binary", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$first": Binary(b"\x01\x02")}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": b"\x01\x02"}], + msg="$first with Binary constant should return that Binary value", + ), + AccumulatorTestCase( + "const_objectid", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": None, + "result": {"$first": ObjectId("000000000000000000000000")}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": ObjectId("000000000000000000000000")}], + msg="$first with ObjectId constant should return that ObjectId", + ), + AccumulatorTestCase( + "const_datetime", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": None, + "result": {"$first": datetime(2020, 1, 1, tzinfo=timezone.utc)}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": datetime(2020, 1, 1, tzinfo=timezone.utc)}], + msg="$first with datetime constant should return that datetime", + ), + AccumulatorTestCase( + "const_timestamp", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$first": Timestamp(1, 1)}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Timestamp(1, 1)}], + msg="$first with Timestamp constant should return that Timestamp", + ), + AccumulatorTestCase( + "const_regex", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$first": Regex("abc", "i")}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Regex("abc", "i")}], + msg="$first with Regex constant should return that Regex", + ), + AccumulatorTestCase( + "const_null", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$first": None}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$first with null constant should return null", + ), + AccumulatorTestCase( + "const_minkey", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$first": MinKey()}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": {"": MinKey()}}], + msg="$first with MinKey constant should return MinKey wrapped in document", + ), + AccumulatorTestCase( + "const_maxkey", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$first": MaxKey()}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": {"": MaxKey()}}], + msg="$first with MaxKey constant should return MaxKey wrapped in document", + ), +] + +# --------------------------------------------------------------------------- +# Property [Expression Types]: $first accepts various expression types as +# its operand and evaluates them per document before picking the first. +# --------------------------------------------------------------------------- +FIRST_EXPRESSION_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "expr_operator_single", + docs=[{"v": -10}, {"v": 20}, {"v": -5}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$first": {"$abs": "$v"}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 10}], + msg="$first should accept single-input expression operator", + ), + AccumulatorTestCase( + "expr_operator_multi_arg", + docs=[{"v": -10, "w": 3}, {"v": 20, "w": 7}, {"v": -5, "w": 1}], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": None, + "result": {"$first": {"$add": ["$v", "$w"]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": -7}], + msg="$first should accept a multi-arg expression operator", + ), + AccumulatorTestCase( + "expr_nested", + docs=[{"v": -10}, {"v": 20}, {"v": -5}], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": None, + "result": {"$first": {"$add": [1, {"$abs": "$v"}]}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 11}], + msg="$first should accept nested expression operators", + ), + AccumulatorTestCase( + "expr_sysvar_remove", + docs=[{"v": 1}, {"v": 2}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$first": "$$REMOVE"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$first with $$REMOVE should treat value as missing and return null", + ), + AccumulatorTestCase( + "expr_object_expression", + docs=[{"v": 10}, {"v": 20}, {"v": 5}], + pipeline=[ + {"$sort": {"v": 1}}, + {"$group": {"_id": None, "result": {"$first": {"a": "$v"}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": {"a": 5}}], + msg="$first should accept an object expression", + ), + AccumulatorTestCase( + "expr_object_with_operator", + docs=[{"v": -10}, {"v": 20}, {"v": -5}], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": None, + "result": {"$first": {"a": {"$abs": "$v"}}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": {"a": 10}}], + msg="$first should accept an object expression containing an operator", + ), + AccumulatorTestCase( + "expr_let", + docs=[{"v": 10}, {"v": 20}, {"v": 5}], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": None, + "result": {"$first": {"$let": {"vars": {"x": "$v"}, "in": "$$x"}}}, + } + }, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 5}], + msg="$first should accept a $let expression as its operand", + ), +] + FIRST_TYPE_SUCCESS_TESTS = ( FIRST_BSON_TYPE_TESTS + FIRST_SPECIAL_NUMERIC_TESTS + FIRST_DECIMAL_PRECISION_TESTS + FIRST_MIXED_TYPE_TESTS + + FIRST_BSON_CONSTANT_TESTS + + FIRST_EXPRESSION_TYPE_TESTS )