From 6f4d80d82f13617433d044b19cae90cee0a40612 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Thu, 14 May 2026 16:34:34 -0700 Subject: [PATCH 1/8] avg tests used tests from local generate and Daniel F Signed-off-by: Alina (Xi) Li copied AccumulatorTestCase from sum branch Signed-off-by: Alina (Xi) Li convert tests to use Signed-off-by: Alina (Xi) Li Add init.py Signed-off-by: Alina (Xi) Li split into smaller test files Signed-off-by: Alina (Xi) Li remove duplicate tests Signed-off-by: Alina (Xi) Li rename to make tests clearer Signed-off-by: Alina (Xi) Li Avg integration tests Signed-off-by: Alina (Xi) Li style changes Signed-off-by: Alina (Xi) Li --- .../operator/accumulators/avg/__init__.py | 0 .../accumulators/avg/test_avg_errors.py | 189 +++++++ .../accumulators/avg/test_avg_field_lookup.py | 217 ++++++++ .../avg/test_avg_group_boundaries.py | 519 ++++++++++++++++++ .../avg/test_avg_group_context.py | 327 +++++++++++ .../accumulators/avg/test_avg_group_types.py | 258 +++++++++ .../accumulators/avg/test_avg_non_numeric.py | 255 +++++++++ .../accumulators/avg/test_avg_null_missing.py | 118 ++++ .../avg/test_avg_pipeline_contexts.py | 486 ++++++++++++++++ .../avg/test_avg_special_numeric.py | 152 +++++ .../test_accumulators_avg_integration.py | 372 +++++++++++++ documentdb_tests/framework/error_codes.py | 1 + 12 files changed, 2894 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_non_numeric.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_null_missing.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/__init__.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py new file mode 100644 index 00000000..d163850f --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py @@ -0,0 +1,189 @@ +""" +Tests for $avg accumulator error handling. + +Covers arity validation (rejects array syntax in $group, $bucket, $bucketAuto) +and expression error propagation ($toInt, $divide, $mod). +""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + CONVERSION_FAILURE_ERROR, + DIVIDE_BY_ZERO_V2_ERROR, + GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + MODULO_BY_ZERO_V2_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Arity]: $avg in accumulator context is a unary operator and +# rejects array syntax in $group, $bucket, and $bucketAuto. +AVG_ARITY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "arity_multi_element_group", + pipeline=[{"$group": {"_id": None, "result": {"$avg": ["$v", "$v"]}}}], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject multi-element array syntax in $group", + ), + AccumulatorTestCase( + "arity_empty_array_group", + pipeline=[{"$group": {"_id": None, "result": {"$avg": []}}}], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject empty array syntax in $group", + ), + AccumulatorTestCase( + "arity_single_element_group", + pipeline=[{"$group": {"_id": None, "result": {"$avg": ["$v"]}}}], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject single-element array syntax in $group", + ), + AccumulatorTestCase( + "arity_multi_element_bucket", + pipeline=[ + { + "$bucket": { + "groupBy": "$v", + "boundaries": [0, 10], + "output": {"result": {"$avg": ["$v", "$v"]}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject multi-element array syntax in $bucket", + ), + AccumulatorTestCase( + "arity_empty_array_bucket", + pipeline=[ + { + "$bucket": { + "groupBy": "$v", + "boundaries": [0, 10], + "output": {"result": {"$avg": []}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject empty array syntax in $bucket", + ), + AccumulatorTestCase( + "arity_single_element_bucket", + pipeline=[ + { + "$bucket": { + "groupBy": "$v", + "boundaries": [0, 10], + "output": {"result": {"$avg": ["$v"]}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject single-element array syntax in $bucket", + ), + AccumulatorTestCase( + "arity_multi_element_bucket_auto", + pipeline=[ + { + "$bucketAuto": { + "groupBy": "$v", + "buckets": 1, + "output": {"result": {"$avg": ["$v", "$v"]}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject multi-element array syntax in $bucketAuto", + ), + AccumulatorTestCase( + "arity_empty_array_bucket_auto", + pipeline=[ + { + "$bucketAuto": { + "groupBy": "$v", + "buckets": 1, + "output": {"result": {"$avg": []}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject empty array syntax in $bucketAuto", + ), + AccumulatorTestCase( + "arity_single_element_bucket_auto", + pipeline=[ + { + "$bucketAuto": { + "groupBy": "$v", + "buckets": 1, + "output": {"result": {"$avg": ["$v"]}}, + } + } + ], + error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, + msg="$avg should reject single-element array syntax in $bucketAuto", + ), +] + +# Property [Expression Error Propagation]: errors from sub-expressions +# propagate through $avg without being caught or suppressed. +AVG_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "error_prop_toint_non_convertible", + docs=[{"v": "hello"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$toInt": "$v"}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=CONVERSION_FAILURE_ERROR, + msg="$avg should propagate $toInt conversion error for non-convertible value", + ), + AccumulatorTestCase( + "error_prop_divide_by_zero", + docs=[{"v": 10}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$divide": ["$v", 0]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=DIVIDE_BY_ZERO_V2_ERROR, + msg="$avg should propagate $divide by zero error", + ), + AccumulatorTestCase( + "error_prop_mod_by_zero", + docs=[{"v": 10}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$mod": ["$v", 0]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + error_code=MODULO_BY_ZERO_V2_ERROR, + msg="$avg should propagate $mod by zero error", + ), +] + +AVG_ERROR_TESTS: list[AccumulatorTestCase] = AVG_ARITY_TESTS + AVG_EXPRESSION_ERROR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_ERROR_TESTS)) +def test_avg_errors(collection, test_case: AccumulatorTestCase): + """Test $avg accumulator error handling.""" + if test_case.docs: + collection.insert_many(test_case.docs) + else: + collection.insert_one({"v": 1}) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py new file mode 100644 index 00000000..45173106 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py @@ -0,0 +1,217 @@ +""" +Tests for $avg accumulator expression types and field lookup in $group context. + +Covers expression types (literal, field path, computed expressions, conditional) +and field path resolution (simple, nested, missing, array traversal). +""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Expression Type]: $avg accepts field paths, computed expressions, +# literals, and conditional expressions in $group context. + +AVG_EXPRESSION_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "field_path", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$value"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="$avg with field path should average field values", + ), + AccumulatorTestCase( + "computed_expression", + docs=[ + {"_id": 1, "a": 2, "b": 3}, + {"_id": 2, "a": 4, "b": 6}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": {"$multiply": ["$a", "$b"]}}}}], + # (2*3 + 4*6) / 2 = (6 + 24) / 2 = 15 + expected=[{"_id": None, "avg": 15.0}], + msg="$avg with computed expression should average computed values", + ), + AccumulatorTestCase( + "literal_numeric", + docs=[ + {"_id": 1}, + {"_id": 2}, + {"_id": 3}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": 5}}}], + expected=[{"_id": None, "avg": 5.0}], + msg="$avg with literal numeric should return that constant", + ), + AccumulatorTestCase( + "literal_null", + docs=[{"_id": 1}, {"_id": 2}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": None}}}], + expected=[{"_id": None, "avg": None}], + msg="$avg with null literal should return null", + ), + AccumulatorTestCase( + "cond_expression", + docs=[ + {"_id": 1, "value": 10, "include": True}, + {"_id": 2, "value": 20, "include": False}, + {"_id": 3, "value": 30, "include": True}, + ], + pipeline=[ + { + "$group": { + "_id": None, + "avg": { + "$avg": { + "$cond": [ + "$include", + "$value", + None, + ] + } + }, + } + }, + ], + # Only values 10 and 30 contribute (null is ignored), avg = 20 + expected=[{"_id": None, "avg": 20.0}], + msg="$avg with $cond should average only non-null conditional results", + ), + AccumulatorTestCase( + "ifnull_expression", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2}, + {"_id": 3, "value": 30}, + ], + pipeline=[ + { + "$group": { + "_id": None, + "avg": {"$avg": {"$ifNull": ["$value", 0]}}, + } + }, + ], + # (10 + 0 + 30) / 3 = 13.333... + expected=[{"_id": None, "avg": 13.333333333333334}], + msg="$avg with $ifNull should replace missing with 0", + ), +] + +# Property [Field Resolution]: field path resolution behaviors with $avg in $group context. + +AVG_FIELD_RESOLUTION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "nested_field_path", + docs=[ + {"_id": 1, "nested": {"value": 10}}, + {"_id": 2, "nested": {"value": 20}}, + {"_id": 3, "nested": {"value": 30}}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$nested.value"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="$avg with nested field path should resolve and average", + ), + AccumulatorTestCase( + "missing_field", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$nonexistent"}}}], + expected=[{"_id": None, "avg": None}], + msg="$avg with non-existent field should return null", + ), + AccumulatorTestCase( + "field_resolves_to_array", + docs=[ + {"_id": 1, "value": [1, 2, 3]}, + {"_id": 2, "value": [4, 5, 6]}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$value"}}}], + expected=[{"_id": None, "avg": None}], + msg="$avg in $group should treat array values as non-numeric", + ), + AccumulatorTestCase( + "mixed_array_and_numeric", + docs=[ + {"_id": 1, "value": [1, 2, 3]}, + {"_id": 2, "value": 10}, + {"_id": 3, "value": 20}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$value"}}}], + # Array is ignored: (10 + 20) / 2 = 15 + expected=[{"_id": None, "avg": 15.0}], + msg="$avg in $group should ignore array values and average numerics", + ), + AccumulatorTestCase( + "deeply_nested_path", + docs=[ + {"_id": 1, "a": {"b": {"c": {"d": 10}}}}, + {"_id": 2, "a": {"b": {"c": {"d": 20}}}}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$a.b.c.d"}}}], + expected=[{"_id": None, "avg": 15.0}], + msg="$avg with deeply nested path should resolve correctly", + ), + AccumulatorTestCase( + "intermediate_null", + docs=[ + {"_id": 1, "a": {"b": 10}}, + {"_id": 2, "a": None}, + {"_id": 3, "a": {"b": 30}}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$a.b"}}}], + # Doc 2 has null intermediate, treated as missing: (10 + 30) / 2 = 20 + expected=[{"_id": None, "avg": 20.0}], + msg="$avg should treat null intermediate as missing", + ), + AccumulatorTestCase( + "multiple_accumulators", + docs=[ + {"_id": 1, "a": 10, "b": 100}, + {"_id": 2, "a": 20, "b": 200}, + ], + pipeline=[ + { + "$group": { + "_id": None, + "avg_a": {"$avg": "$a"}, + "avg_b": {"$avg": "$b"}, + } + }, + ], + expected=[{"_id": None, "avg_a": 15.0, "avg_b": 150.0}], + msg="Multiple $avg accumulators should work independently", + ), +] + +AVG_FIELD_LOOKUP_TESTS: list[AccumulatorTestCase] = ( + AVG_EXPRESSION_TYPE_TESTS + AVG_FIELD_RESOLUTION_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_FIELD_LOOKUP_TESTS)) +def test_avg_field_lookup(collection, test_case: AccumulatorTestCase): + """Test $avg field lookup and expression types in $group context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py new file mode 100644 index 00000000..a1ed3c86 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py @@ -0,0 +1,519 @@ +""" +Tests for $avg accumulator boundary values and overflow in $group context. + +Covers int32/int64 boundary values, double boundary values (subnormal, normal, +near-max), Decimal128 precision and boundary values, and sum overflow behavior. +""" + +from __future__ import annotations + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_INT64_OVERFLOW, + DECIMAL128_LARGE_EXPONENT, + DECIMAL128_MAX, + DECIMAL128_MIN, + DECIMAL128_MIN_POSITIVE, + DECIMAL128_SMALL_EXPONENT, + DECIMAL128_TRAILING_ZERO, + DOUBLE_FROM_INT64_MAX, + DOUBLE_MAX, + DOUBLE_MAX_SAFE_INTEGER, + DOUBLE_MIN_NEGATIVE_SUBNORMAL, + DOUBLE_MIN_NORMAL, + DOUBLE_MIN_SUBNORMAL, + DOUBLE_NEAR_MAX, + DOUBLE_NEAR_MIN, + DOUBLE_ZERO, + FLOAT_INFINITY, + INT32_MAX, + INT32_MAX_MINUS_1, + INT32_MIN, + INT64_MAX, + INT64_MAX_MINUS_1, + INT64_MIN, + INT64_MIN_PLUS_1, + INT64_ZERO, +) + +# Property [Integer Boundaries]: $avg handles int32 and int64 boundary values +# including MAX, MIN, adjacent values, and overflow combinations. +AVG_INT_BOUNDARY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="int32_zeros", + docs=[{"v": 0}, {"v": 0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_ZERO}], + msg="$avg should return 0.0 for two int32 zeros", + ), + AccumulatorTestCase( + id="int32_one_neg_one", + docs=[{"v": 1}, {"v": -1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_ZERO}], + msg="$avg should return 0.0 for int32 1 and -1", + ), + AccumulatorTestCase( + id="int32_max_pair", + docs=[{"_id": 0, "v": INT32_MAX}, {"_id": 1, "v": INT32_MAX}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": float(INT32_MAX)}], + msg="avg of two INT32_MAX should return INT32_MAX as double", + ), + AccumulatorTestCase( + id="int32_min_pair", + docs=[{"_id": 0, "v": INT32_MIN}, {"_id": 1, "v": INT32_MIN}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": float(INT32_MIN)}], + msg="avg of two INT32_MIN should return INT32_MIN as double", + ), + AccumulatorTestCase( + id="int32_max_and_min", + docs=[{"_id": 0, "v": INT32_MAX}, {"_id": 1, "v": INT32_MIN}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + # (2147483647 + -2147483648) / 2 = -0.5 + expected=[{"_id": None, "avg": -0.5}], + msg="avg of INT32_MAX and INT32_MIN should be -0.5", + ), + AccumulatorTestCase( + id="int32_adjacent_max", + docs=[{"v": INT32_MAX_MINUS_1}, {"v": INT32_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 2_147_483_646.5}], + msg="$avg of adjacent int32 MAX values should produce exact double", + ), + AccumulatorTestCase( + id="int32_adjacent_min", + docs=[{"v": INT32_MIN}, {"v": INT32_MIN + 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": -2_147_483_647.5}], + msg="$avg of adjacent int32 MIN values should produce exact double", + ), + AccumulatorTestCase( + id="int64_max_pair", + docs=[{"_id": 0, "v": INT64_MAX}, {"_id": 1, "v": INT64_MAX}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 9.223372036854776e18}], + msg="avg of two INT64_MAX should handle overflow", + ), + AccumulatorTestCase( + id="int64_min_pair", + docs=[{"_id": 0, "v": INT64_MIN}, {"_id": 1, "v": INT64_MIN}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": -9.223372036854776e18}], + msg="avg of two INT64_MIN should handle overflow", + ), + AccumulatorTestCase( + id="int64_max_and_zero", + docs=[{"v": INT64_MAX}, {"v": INT64_ZERO}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_FROM_INT64_MAX / 2}], + msg="$avg should handle int64 MAX with precision loss in double", + ), + AccumulatorTestCase( + id="int64_max_and_min", + docs=[{"v": INT64_MAX}, {"v": INT64_MIN}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": -0.5}], + msg="$avg should handle int64 MAX and MIN together", + ), + AccumulatorTestCase( + id="int64_max_and_one", + docs=[{"_id": 0, "v": INT64_MAX}, {"_id": 1, "v": Int64(1)}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 4.611686018427388e18}], + msg="avg of INT64_MAX and 1", + ), + AccumulatorTestCase( + id="int64_adjacent_max", + docs=[{"v": INT64_MAX_MINUS_1}, {"v": INT64_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_FROM_INT64_MAX}], + msg="$avg of adjacent int64 MAX values should produce double with precision loss", + ), + AccumulatorTestCase( + id="int64_adjacent_min", + docs=[{"v": INT64_MIN_PLUS_1}, {"v": INT64_MIN}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": -DOUBLE_FROM_INT64_MAX}], + msg="$avg of adjacent int64 MIN values should produce double with precision loss", + ), +] + +# Property [Double Boundaries]: $avg handles double boundary values +# including subnormal, minimum normal, near-max, and max safe integer. +AVG_DOUBLE_BOUNDARY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="double_whole_number", + docs=[{"v": 3.0}, {"v": 5.0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 4.0}], + msg="$avg should produce correct average for whole-number floats", + ), + AccumulatorTestCase( + id="double_subnormal_positive", + docs=[{"v": DOUBLE_MIN_SUBNORMAL}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_MIN_SUBNORMAL}], + msg="$avg should handle positive subnormal value correctly", + ), + AccumulatorTestCase( + id="double_subnormal_negative", + docs=[{"v": DOUBLE_MIN_NEGATIVE_SUBNORMAL}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_MIN_NEGATIVE_SUBNORMAL}], + msg="$avg should handle negative subnormal value correctly", + ), + AccumulatorTestCase( + id="double_subnormal_pair", + docs=[ + {"_id": 0, "v": DOUBLE_MIN_SUBNORMAL}, + {"_id": 1, "v": DOUBLE_MIN_SUBNORMAL}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DOUBLE_MIN_SUBNORMAL}], + msg="avg of two subnormal doubles should return subnormal", + ), + AccumulatorTestCase( + id="double_min_normal", + docs=[{"v": DOUBLE_MIN_NORMAL}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_MIN_NORMAL}], + msg="$avg should handle smallest positive normal double correctly", + ), + AccumulatorTestCase( + id="double_max_single", + docs=[{"v": DOUBLE_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_MAX}], + msg="$avg should handle DBL_MAX as a single value correctly", + ), + AccumulatorTestCase( + id="double_max_safe_integer", + docs=[{"v": float(DOUBLE_MAX_SAFE_INTEGER)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": float(DOUBLE_MAX_SAFE_INTEGER)}], + msg="$avg should handle max safe integer value correctly", + ), + AccumulatorTestCase( + id="double_max_safe_integer_pair", + docs=[ + {"v": float(DOUBLE_MAX_SAFE_INTEGER)}, + {"v": float(DOUBLE_MAX_SAFE_INTEGER)}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": float(DOUBLE_MAX_SAFE_INTEGER)}], + msg="$avg of two max safe integer values should return that value", + ), + AccumulatorTestCase( + id="double_near_min_pair", + docs=[{"v": DOUBLE_NEAR_MIN}, {"v": DOUBLE_NEAR_MIN}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_NEAR_MIN}], + msg="$avg should handle values near minimum normal correctly", + ), + AccumulatorTestCase( + id="double_near_max_single", + docs=[{"v": DOUBLE_NEAR_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_NEAR_MAX}], + msg="$avg should handle values near maximum finite correctly", + ), +] + +# Property [Decimal128 Precision]: $avg preserves Decimal128 precision +# across extreme exponent differences, trailing zeros, and boundary values. +AVG_DECIMAL128_BOUNDARY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="decimal128_full_precision", + docs=[ + {"v": Decimal128("1.000000000000000000000000000000001")}, + {"v": Decimal128("1.000000000000000000000000000000003")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("1.000000000000000000000000000000002")}], + msg="$avg should preserve full 34-digit Decimal128 precision", + ), + AccumulatorTestCase( + id="decimal128_high_precision", + docs=[ + { + "_id": 0, + "v": Decimal128("1.000000000000000000000000000000001"), + }, + { + "_id": 1, + "v": Decimal128("2.999999999999999999999999999999999"), + }, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("2.000000000000000000000000000000000")}], + msg="decimal128 avg should preserve high precision", + ), + AccumulatorTestCase( + id="decimal128_34_digit_integer", + docs=[ + {"v": Decimal128("1234567890123456789012345678901234")}, + {"v": Decimal128("1234567890123456789012345678901234")}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("1234567890123456789012345678901234")}], + msg="$avg should preserve 34-digit integer Decimal128 values", + ), + AccumulatorTestCase( + id="decimal128_trailing_zeros", + docs=[{"v": Decimal128("2.00")}, {"v": Decimal128("4.00")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("3.00")}], + msg="$avg should preserve trailing zeros in Decimal128 results", + ), + AccumulatorTestCase( + id="decimal128_trailing_zeros_single_digit", + docs=[{"v": DECIMAL128_TRAILING_ZERO}, {"v": Decimal128("3.0")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("2.0")}], + msg="$avg should preserve single trailing zero in Decimal128 results", + ), + AccumulatorTestCase( + id="decimal128_subnormal_pair", + docs=[{"v": DECIMAL128_MIN_POSITIVE}, {"v": DECIMAL128_MIN_POSITIVE}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_MIN_POSITIVE}], + msg="$avg should handle Decimal128 subnormal values correctly", + ), + AccumulatorTestCase( + id="decimal128_subnormal_single", + docs=[{"v": DECIMAL128_MIN_POSITIVE}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_MIN_POSITIVE}], + msg="$avg should handle a single Decimal128 subnormal value", + ), + AccumulatorTestCase( + id="decimal128_near_max_single", + docs=[{"v": DECIMAL128_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_MAX}], + msg="$avg should handle a single near-maximum Decimal128 value", + ), + AccumulatorTestCase( + id="decimal128_near_max_with_small", + docs=[{"v": DECIMAL128_MAX}, {"v": Decimal128("1")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("5.000000000000000000000000000000000E+6144")}], + msg="$avg should handle near-maximum Decimal128 averaged with a small value", + ), + AccumulatorTestCase( + id="decimal128_max_and_min", + docs=[{"_id": 0, "v": DECIMAL128_MAX}, {"_id": 1, "v": DECIMAL128_MIN}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("0")}], + msg="avg of DECIMAL128_MAX and DECIMAL128_MIN", + ), + AccumulatorTestCase( + id="decimal128_large_exponent", + docs=[ + {"_id": 0, "v": DECIMAL128_LARGE_EXPONENT}, + {"_id": 1, "v": DECIMAL128_LARGE_EXPONENT}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_LARGE_EXPONENT}], + msg="avg of two identical large exponent values should return same value", + ), + AccumulatorTestCase( + id="decimal128_small_exponent", + docs=[ + {"_id": 0, "v": DECIMAL128_SMALL_EXPONENT}, + {"_id": 1, "v": DECIMAL128_SMALL_EXPONENT}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_SMALL_EXPONENT}], + msg="avg of two identical small exponent values should return same value", + ), + AccumulatorTestCase( + id="decimal128_extreme_exponent_diff", + docs=[ + {"_id": 0, "v": Decimal128("1E+6144")}, + {"_id": 1, "v": Decimal128("1")}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[ + { + "_id": None, + "avg": Decimal128("5.00000000000000000000000000000000E+6143"), + } + ], + msg="avg with extreme exponent difference", + ), + AccumulatorTestCase( + id="decimal128_exceeds_int64", + docs=[ + {"v": DECIMAL128_INT64_OVERFLOW}, + {"v": DECIMAL128_INT64_OVERFLOW}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_INT64_OVERFLOW}], + msg="$avg should produce Decimal128 for values exceeding int64 range", + ), +] + +# Property [Overflow]: sum overflow during accumulation produces Infinity for +# doubles and Decimal128, and int32/int64 overflow is handled via type +# promotion without error. +AVG_OVERFLOW_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="overflow_double_near_max_pair", + docs=[{"_id": 0, "v": DOUBLE_NEAR_MAX}, {"_id": 1, "v": DOUBLE_NEAR_MAX}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": float("inf")}], + msg="avg of two DOUBLE_NEAR_MAX overflows sum to inf", + ), + AccumulatorTestCase( + id="overflow_double_max", + docs=[{"v": DOUBLE_MAX}, {"v": DOUBLE_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": FLOAT_INFINITY}], + msg="$avg should return Infinity when two DBL_MAX values overflow the sum", + ), + AccumulatorTestCase( + id="overflow_decimal128_max", + docs=[{"v": DECIMAL128_MAX}, {"v": DECIMAL128_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_INFINITY}], + msg="$avg should return Decimal128 Infinity when two Decimal128 max values overflow", + ), + AccumulatorTestCase( + id="overflow_int32_sum", + docs=[{"v": INT32_MAX}, {"v": INT32_MAX}, {"v": INT32_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": float(INT32_MAX)}], + msg="$avg should handle int32 sum overflow via type promotion without error", + ), + AccumulatorTestCase( + id="overflow_int64_sum", + docs=[{"v": INT64_MAX}, {"v": INT64_MAX}, {"v": INT64_MAX}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DOUBLE_FROM_INT64_MAX}], + msg="$avg should handle int64 sum overflow by converting to double", + ), +] + +AVG_GROUP_BOUNDARY_TESTS: list[AccumulatorTestCase] = ( + AVG_INT_BOUNDARY_TESTS + + AVG_DOUBLE_BOUNDARY_TESTS + + AVG_DECIMAL128_BOUNDARY_TESTS + + AVG_OVERFLOW_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_GROUP_BOUNDARY_TESTS)) +def test_avg_group_boundaries(collection, test_case: AccumulatorTestCase): + """Test $avg accumulator boundary values in $group context.""" + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py new file mode 100644 index 00000000..b2a2ca4d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py @@ -0,0 +1,327 @@ +""" +Tests for $avg accumulator in $group context. + +Covers numeric equivalence in grouping, single/empty groups, +precision edge cases, multiple groups, and comparison with $sum. +""" + +from __future__ import annotations + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Numeric Equivalence]: numerically equivalent group keys +# (int32, int64, double, Decimal128) produce a single group. + +NUMERIC_EQUIVALENCE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="numeric_equivalence_grouping", + docs=[ + {"_id": 1, "key": 1, "value": 10}, + {"_id": 2, "key": Int64(1), "value": 20}, + {"_id": 3, "key": 1.0, "value": 30}, + {"_id": 4, "key": Decimal128("1"), "value": 40}, + ], + pipeline=[ + {"$group": {"_id": "$key", "avg": {"$avg": "$value"}}}, + ], + expected=[{"_id": 1, "avg": 25.0}], + msg="Numerically equivalent group keys should produce a single group", + ), + AccumulatorTestCase( + id="zero_equivalence", + docs=[ + {"_id": 1, "key": 0, "value": 10}, + {"_id": 2, "key": Int64(0), "value": 20}, + {"_id": 3, "key": 0.0, "value": 30}, + {"_id": 4, "key": Decimal128("0"), "value": 40}, + ], + pipeline=[ + {"$group": {"_id": "$key", "avg": {"$avg": "$value"}}}, + ], + expected=[{"_id": 0, "avg": 25.0}], + msg="All zero representations should group together", + ), +] + +# Property [Single and Empty Groups]: $avg returns correct results for +# single-document groups, empty collections, and null group IDs. + +SINGLE_EMPTY_GROUP_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="single_document", + docs=[{"_id": 1, "category": "A", "value": 42}], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": 42.0}], + msg="$avg of single document should return that value as double", + ), + AccumulatorTestCase( + id="single_document_non_numeric", + docs=[{"_id": 1, "category": "A", "value": "hello"}], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": None}], + msg="$avg of single non-numeric document should return null", + ), + AccumulatorTestCase( + id="single_document_null", + docs=[{"_id": 1, "category": "A", "value": None}], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": None}], + msg="$avg of single null document should return null", + ), + AccumulatorTestCase( + id="single_document_missing_field", + docs=[{"_id": 1, "category": "A"}], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": None}], + msg="$avg of single document with missing field should return null", + ), + AccumulatorTestCase( + id="empty_collection", + docs=None, + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + expected=[], + msg="$avg on empty collection should produce no output", + ), + AccumulatorTestCase( + id="all_filtered_out", + docs=[ + {"_id": 1, "category": "A", "value": 10}, + {"_id": 2, "category": "A", "value": 20}, + ], + pipeline=[ + {"$match": {"category": "Z"}}, + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + expected=[], + msg="$avg after filtering all documents should produce no output", + ), + AccumulatorTestCase( + id="null_id", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + ], + pipeline=[ + {"$group": {"_id": None, "avg": {"$avg": "$value"}}}, + ], + expected=[{"_id": None, "avg": 20.0}], + msg="$avg with _id: null should average entire collection", + ), + AccumulatorTestCase( + id="single_document_int64", + docs=[{"v": Int64(42)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 42.0}], + msg="$avg should return the value as double for a single int64 document", + ), +] + +# Property [Precision]: $avg produces correct fractional and repeating +# decimal results and handles large document counts. + +PRECISION_EDGE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="odd_sum_two_int32", + docs=[ + {"_id": 1, "category": "A", "value": 1}, + {"_id": 2, "category": "A", "value": 2}, + ], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": 1.5}], + msg="$avg of 1 and 2 should return 1.5", + ), + AccumulatorTestCase( + id="repeating_decimal", + docs=[ + {"_id": 1, "category": "A", "value": 1}, + {"_id": 2, "category": "A", "value": 1}, + {"_id": 3, "category": "A", "value": 2}, + ], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + expected=[{"_id": "A", "avg": 1.3333333333333333}], + msg="$avg of 1,1,2 should return 4/3", + ), + AccumulatorTestCase( + id="sequence_1_to_100", + docs=[{"_id": i, "category": "A", "value": i} for i in range(1, 101)], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": 50.5}], + msg="$avg of 1..100 should return 50.5", + ), + AccumulatorTestCase( + id="large_count_identical", + docs=[{"_id": i, "category": "A", "value": 7} for i in range(1000)], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[{"_id": "A", "avg": 7.0}], + msg="$avg of 1000 identical values should return that value", + ), +] + +# Property [Multiple Groups]: $avg computes independent averages per group +# with different counts, null groups, and mixed types. + +MULTIPLE_GROUPS_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="different_counts", + docs=[ + {"_id": 1, "category": "A", "value": 10}, + {"_id": 2, "category": "B", "value": 20}, + {"_id": 3, "category": "B", "value": 40}, + {"_id": 4, "category": "C", "value": 5}, + {"_id": 5, "category": "C", "value": 10}, + {"_id": 6, "category": "C", "value": 15}, + ], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[ + {"_id": "A", "avg": 10.0}, + {"_id": "B", "avg": 30.0}, + {"_id": "C", "avg": 10.0}, + ], + msg="$avg should compute correct average per group with different counts", + ), + AccumulatorTestCase( + id="one_all_nulls_one_all_numeric", + docs=[ + {"_id": 1, "category": "A", "value": None}, + {"_id": 2, "category": "A", "value": None}, + {"_id": 3, "category": "B", "value": 10}, + {"_id": 4, "category": "B", "value": 20}, + ], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[ + {"_id": "A", "avg": None}, + {"_id": "B", "avg": 15.0}, + ], + msg="Group with all nulls returns null, group with numerics returns average", + ), + AccumulatorTestCase( + id="mixed_types_per_group", + docs=[ + {"_id": 1, "category": "int", "value": 10}, + {"_id": 2, "category": "int", "value": 20}, + {"_id": 3, "category": "dec", "value": Decimal128("10")}, + {"_id": 4, "category": "dec", "value": Decimal128("20")}, + ], + pipeline=[ + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + {"$sort": {"_id": 1}}, + ], + expected=[ + {"_id": "dec", "avg": Decimal128("15")}, + {"_id": "int", "avg": 15.0}, + ], + msg="Int group returns double, Decimal128 group returns Decimal128", + ), +] + +# Property [Comparison with Related Operators]: $avg results are consistent +# with $sum/$count, and non-numeric handling differs from $sum. + +COMPARISON_WITH_RELATED_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + id="equals_sum_divided_by_count", + docs=[ + {"_id": 1, "category": "A", "value": 10}, + {"_id": 2, "category": "A", "value": 20}, + {"_id": 3, "category": "A", "value": 30}, + {"_id": 4, "category": "A", "value": 40}, + ], + pipeline=[ + { + "$group": { + "_id": "$category", + "avg": {"$avg": "$value"}, + "sum": {"$sum": "$value"}, + "count": {"$sum": 1}, + } + }, + ], + expected=[{"_id": "A", "avg": 25.0, "sum": 100, "count": 4}], + msg="$avg should equal $sum / count", + ), + AccumulatorTestCase( + id="vs_sum_non_numeric_handling", + docs=[ + {"_id": 1, "category": "A", "value": "hello"}, + {"_id": 2, "category": "A", "value": "world"}, + ], + pipeline=[ + { + "$group": { + "_id": "$category", + "avg": {"$avg": "$value"}, + "sum": {"$sum": "$value"}, + } + }, + ], + expected=[{"_id": "A", "avg": None, "sum": 0}], + msg="$avg returns null for non-numeric but $sum returns 0", + ), +] + +AVG_GROUP_CONTEXT_TESTS: list[AccumulatorTestCase] = ( + NUMERIC_EQUIVALENCE_TESTS + + SINGLE_EMPTY_GROUP_TESTS + + PRECISION_EDGE_TESTS + + MULTIPLE_GROUPS_TESTS + + COMPARISON_WITH_RELATED_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_GROUP_CONTEXT_TESTS)) +def test_avg_group_context(collection, test_case: AccumulatorTestCase): + """Test $avg in $group context with grouping behavior.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py new file mode 100644 index 00000000..5397303e --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py @@ -0,0 +1,258 @@ +""" +Tests for $avg accumulator type promotion and return type in $group context. + +Covers type promotion rules (int32, int64, double, Decimal128), return type +verification via $type, and negative zero normalization. +""" + +from __future__ import annotations + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_NEGATIVE_ZERO, + DECIMAL128_ZERO, + DOUBLE_NEGATIVE_ZERO, + DOUBLE_ZERO, +) + +# Property [Type Promotion]: $avg returns double for integer and double inputs, +# and Decimal128 when any input is Decimal128. +AVG_TYPE_PROMOTION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "all_int32", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": 20}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="int32 avg should return double", + ), + AccumulatorTestCase( + "all_int64", + docs=[ + {"_id": 0, "v": Int64(10)}, + {"_id": 1, "v": Int64(20)}, + {"_id": 2, "v": Int64(30)}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="int64 avg should return double", + ), + AccumulatorTestCase( + "all_double", + docs=[{"_id": 0, "v": 10.0}, {"_id": 1, "v": 20.0}, {"_id": 2, "v": 30.0}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="double avg should return double", + ), + AccumulatorTestCase( + "all_decimal128", + docs=[ + {"_id": 0, "v": Decimal128("10")}, + {"_id": 1, "v": Decimal128("20")}, + {"_id": 2, "v": Decimal128("30")}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("20")}], + msg="decimal128 avg should return decimal128", + ), + AccumulatorTestCase( + "int32_and_int64", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": Int64(20)}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 15.0}], + msg="int32+int64 avg should return double", + ), + AccumulatorTestCase( + "int32_and_double", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": 20.0}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 15.0}], + msg="int32+double avg should return double", + ), + AccumulatorTestCase( + "int32_and_decimal128", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": Decimal128("20")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("15")}], + msg="int32+decimal128 avg should return decimal128", + ), + AccumulatorTestCase( + "int64_and_decimal128", + docs=[{"_id": 0, "v": Int64(10)}, {"_id": 1, "v": Decimal128("20")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("15")}], + msg="int64+decimal128 avg should return decimal128", + ), + AccumulatorTestCase( + "double_and_decimal128", + docs=[{"_id": 0, "v": 10.0}, {"_id": 1, "v": Decimal128("20")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("15")}], + msg="double+decimal128 avg should return decimal128", + ), + AccumulatorTestCase( + "all_four_types", + docs=[ + {"_id": 0, "v": 10}, + {"_id": 1, "v": Int64(20)}, + {"_id": 2, "v": 30.0}, + {"_id": 3, "v": Decimal128("40")}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": Decimal128("25")}], + msg="all four numeric types avg should return decimal128", + ), + AccumulatorTestCase( + "fractional_result_from_int32", + docs=[{"_id": 0, "v": 1}, {"_id": 1, "v": 2}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 1.5}], + msg="int32 avg producing fraction should return double", + ), +] + +# Property [Negative Zero]: $avg normalizes negative zero to positive zero +# for both double and Decimal128. +AVG_NEGATIVE_ZERO_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "negative_zero_double", + docs=[ + {"_id": 0, "v": DOUBLE_NEGATIVE_ZERO}, + {"_id": 1, "v": DOUBLE_NEGATIVE_ZERO}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DOUBLE_ZERO}], + msg="Double -0.0 avg should normalize to 0.0", + ), + AccumulatorTestCase( + "negative_zero_decimal128", + docs=[ + {"_id": 0, "v": DECIMAL128_NEGATIVE_ZERO}, + {"_id": 1, "v": DECIMAL128_NEGATIVE_ZERO}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_ZERO}], + msg="Decimal128 -0 avg should normalize to 0", + ), +] + +# Property [Return Type]: the result is double by default, but Decimal128 if +# any input value is Decimal128. +AVG_RETURN_TYPE_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "type_int32_only", + docs=[{"v": 2}, {"v": 4}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], + msg="$avg should return double when all inputs are int32", + ), + AccumulatorTestCase( + "type_int64_only", + docs=[{"v": Int64(2)}, {"v": Int64(4)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], + msg="$avg should return double when all inputs are int64", + ), + AccumulatorTestCase( + "type_int32_int64", + docs=[{"v": 2}, {"v": Int64(4)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], + msg="$avg should return double for int32 and int64 mix", + ), + AccumulatorTestCase( + "type_int32_double", + docs=[{"v": 2}, {"v": 4.0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], + msg="$avg should return double for int32 and double mix", + ), + AccumulatorTestCase( + "type_int64_double", + docs=[{"v": Int64(2)}, {"v": 4.0}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "double"}], + msg="$avg should return double for int64 and double mix", + ), + AccumulatorTestCase( + "type_int32_decimal128", + docs=[{"v": 2}, {"v": Decimal128("4")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "decimal"}], + msg="$avg should return Decimal128 when any input is Decimal128", + ), + AccumulatorTestCase( + "type_int64_decimal128", + docs=[{"v": Int64(2)}, {"v": Decimal128("4")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "decimal"}], + msg="$avg should return Decimal128 for int64 and Decimal128 mix", + ), + AccumulatorTestCase( + "type_double_decimal128", + docs=[{"v": 2.0}, {"v": Decimal128("4")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "decimal"}], + msg="$avg should return Decimal128 for double and Decimal128 mix", + ), + AccumulatorTestCase( + "type_decimal128_before_int32", + docs=[{"v": Decimal128("4")}, {"v": 2}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "type": {"$type": "$result"}}}, + ], + expected=[{"type": "decimal"}], + msg="$avg should return Decimal128 regardless of document order", + ), +] + +AVG_GROUP_TYPE_TESTS: list[AccumulatorTestCase] = ( + AVG_TYPE_PROMOTION_TESTS + AVG_NEGATIVE_ZERO_TESTS + AVG_RETURN_TYPE_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_GROUP_TYPE_TESTS)) +def test_avg_group_types(collection, test_case: AccumulatorTestCase): + """Test $avg type promotion and return type in $group context.""" + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_non_numeric.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_non_numeric.py new file mode 100644 index 00000000..831216cb --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_non_numeric.py @@ -0,0 +1,255 @@ +""" +Tests for $avg accumulator non-numeric type handling in $group context. + +Covers all non-numeric BSON types (string, boolean, object, ObjectId, datetime, +Timestamp, Binary, Regex, Code, MinKey, MaxKey, arrays) and verifies they are +silently ignored and excluded from both sum and count. +""" + +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Code, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Non-Numeric Types Ignored]: all non-numeric BSON types are +# silently ignored and excluded from both sum and count, producing null +# when no numeric values remain. +AVG_NON_NUMERIC_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "string", + docs=[{"v": "hello"}, {"v": "world"}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore string values and return null", + ), + AccumulatorTestCase( + "boolean_true", + docs=[{"v": True}, {"v": True}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore boolean true without coercing to numeric", + ), + AccumulatorTestCase( + "boolean_false", + docs=[{"v": False}, {"v": False}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore boolean false without coercing to numeric", + ), + AccumulatorTestCase( + "boolean_not_numeric", + docs=[{"_id": 0, "v": False}, {"_id": 1, "v": True}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": None}], + msg="Booleans should not be treated as 0/1 in avg", + ), + AccumulatorTestCase( + "object", + docs=[{"v": {"x": 1}}, {"v": {"y": 2}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore plain objects", + ), + AccumulatorTestCase( + "empty_object", + docs=[{"v": {}}, {"v": {}}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore empty objects", + ), + AccumulatorTestCase( + "objectid", + docs=[{"v": ObjectId()}, {"v": ObjectId()}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore ObjectId values", + ), + AccumulatorTestCase( + "datetime", + docs=[ + {"v": datetime(2023, 1, 1, tzinfo=timezone.utc)}, + {"v": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + ], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore datetime values", + ), + AccumulatorTestCase( + "timestamp", + docs=[{"v": Timestamp(1, 1)}, {"v": Timestamp(2, 1)}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore Timestamp values", + ), + AccumulatorTestCase( + "binary", + docs=[{"v": Binary(b"\x01")}, {"v": Binary(b"\x02")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore Binary values", + ), + AccumulatorTestCase( + "regex", + docs=[{"v": Regex("abc")}, {"v": Regex("def")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore Regex values", + ), + AccumulatorTestCase( + "code", + docs=[{"v": Code("x")}, {"v": Code("y")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore Code values", + ), + AccumulatorTestCase( + "minkey", + docs=[{"v": MinKey()}, {"v": MinKey()}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore MinKey values", + ), + AccumulatorTestCase( + "maxkey", + docs=[{"v": MaxKey()}, {"v": MaxKey()}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore MaxKey values", + ), + AccumulatorTestCase( + "array", + docs=[{"v": [1, 2, 3]}, {"v": [4, 5]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore arrays without unwrapping", + ), + AccumulatorTestCase( + "single_element_array", + docs=[{"v": [42]}, {"v": [7]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should not unwrap single-element numeric arrays", + ), + AccumulatorTestCase( + "empty_array", + docs=[{"v": []}, {"v": []}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore empty arrays", + ), + AccumulatorTestCase( + "nested_array", + docs=[{"v": [[1, 2]]}, {"v": [[3]]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should ignore nested arrays", + ), + AccumulatorTestCase( + "array_from_expression", + docs=[{"v": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$literal": [1, 2, 3]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should treat array expressions as non-numeric", + ), + AccumulatorTestCase( + "mixed_with_numerics", + docs=[{"v": "hello"}, {"v": 10}, {"v": True}, {"v": 20}, {"v": [5]}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": 15.0}], + msg="$avg should compute average only over numeric values, ignoring non-numerics", + ), + AccumulatorTestCase( + "all_non_numeric", + docs=[ + {"_id": 0, "v": "a"}, + {"_id": 1, "v": True}, + {"_id": 2, "v": [1]}, + {"_id": 3, "v": {"x": 1}}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": None}], + msg="All non-numeric values should return null", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_NON_NUMERIC_TESTS)) +def test_avg_non_numeric(collection, test_case: AccumulatorTestCase): + """Test $avg non-numeric type handling in $group context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_null_missing.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_null_missing.py new file mode 100644 index 00000000..e1a222aa --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_null_missing.py @@ -0,0 +1,118 @@ +""" +Tests for $avg accumulator null and missing value handling in $group context. + +Covers null values, missing fields, $$REMOVE, and combinations with numeric values. +""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Null and Missing Ignored]: null values, missing fields, and +# $$REMOVE are treated as non-numeric and excluded from both the sum and +# count, producing null when no numeric values remain. +AVG_NULL_MISSING_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "all_null", + docs=[{"_id": 0, "v": None}, {"_id": 1, "v": None}, {"_id": 2, "v": None}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": None}], + msg="$avg should return null when all values in the group are null", + ), + AccumulatorTestCase( + "single_null", + docs=[{"v": None}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should return null when the only value is null", + ), + AccumulatorTestCase( + "some_null", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": None}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="$avg should exclude null from both sum and count", + ), + AccumulatorTestCase( + "all_missing", + docs=[{"_id": 0, "other": 0}, {"_id": 1, "other": 1}, {"_id": 2, "other": 2}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": None}], + msg="$avg should return null when all values reference missing fields", + ), + AccumulatorTestCase( + "single_missing", + docs=[{"x": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should return null when the only value is a missing field", + ), + AccumulatorTestCase( + "some_missing", + docs=[{"_id": 0, "v": 10}, {"_id": 1}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="$avg should exclude missing fields from both sum and count", + ), + AccumulatorTestCase( + "mixed_null_and_missing_no_numerics", + docs=[{"v": None}, {"x": 1}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should return null when values are a mix of null and missing", + ), + AccumulatorTestCase( + "mix_null_missing_numeric", + docs=[ + {"_id": 0, "v": 10}, + {"_id": 1, "v": None}, + {"_id": 2}, + {"_id": 3, "v": 30}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": 20.0}], + msg="Only numeric values should contribute to average", + ), + AccumulatorTestCase( + "remove_only", + docs=[{"v": 5}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": {"$cond": [False, 1, "$$REMOVE"]}}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": None}], + msg="$avg should treat $$REMOVE as missing and return null", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_NULL_MISSING_TESTS)) +def test_avg_null_missing(collection, test_case: AccumulatorTestCase): + """Test $avg null and missing value handling in $group context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py new file mode 100644 index 00000000..3aa3025b --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py @@ -0,0 +1,486 @@ +""" +Tests for $avg in various pipeline contexts. + +Covers $group, $bucket, $setWindowFields, $project/$addFields, +$match+$expr, and pipeline interaction patterns. +""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult, assertSuccess +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# --- $group with computed _id --- + +# Property [Group Computed ID]: $avg with computed _id expression in $group. +AVG_GROUP_COMPUTED_ID_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "group_computed_id", + docs=[ + {"_id": 1, "value": 10, "score": 80}, + {"_id": 2, "value": 20, "score": 90}, + {"_id": 3, "value": 30, "score": 85}, + {"_id": 4, "value": 40, "score": 95}, + ], + pipeline=[ + { + "$group": { + "_id": {"$gt": ["$score", 85]}, + "avg": {"$avg": "$value"}, + } + }, + {"$sort": {"_id": 1}}, + ], + # score <= 85: docs 1,3 -> avg(10,30) = 20 + # score > 85: docs 2,4 -> avg(20,40) = 30 + expected=[ + {"_id": False, "avg": 20.0}, + {"_id": True, "avg": 30.0}, + ], + msg="$avg with computed _id should group and average correctly", + ), +] + +# --- $bucket / $bucketAuto --- + +# Property [Bucket]: $avg in $bucket and $bucketAuto output specifications. +AVG_BUCKET_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "bucket", + docs=[ + {"_id": 1, "score": 15, "value": 10}, + {"_id": 2, "score": 25, "value": 20}, + {"_id": 3, "score": 35, "value": 30}, + {"_id": 4, "score": 45, "value": 40}, + ], + pipeline=[ + { + "$bucket": { + "groupBy": "$score", + "boundaries": [0, 20, 40, 60], + "output": {"avg_value": {"$avg": "$value"}}, + } + }, + ], + expected=[ + {"_id": 0, "avg_value": 10.0}, + {"_id": 20, "avg_value": 25.0}, + {"_id": 40, "avg_value": 40.0}, + ], + msg="$avg in $bucket should compute average per bucket", + ), + AccumulatorTestCase( + "bucketauto", + docs=[ + {"_id": 1, "score": 10, "value": 100}, + {"_id": 2, "score": 20, "value": 200}, + {"_id": 3, "score": 30, "value": 300}, + {"_id": 4, "score": 40, "value": 400}, + ], + pipeline=[ + { + "$bucketAuto": { + "groupBy": "$score", + "buckets": 2, + "output": {"avg_value": {"$avg": "$value"}}, + } + }, + ], + expected=[ + {"_id": {"min": 10, "max": 30}, "avg_value": 150.0}, + {"_id": {"min": 30, "max": 40}, "avg_value": 350.0}, + ], + msg="$avg in $bucketAuto should compute average per auto-bucket", + ), +] + +# --- $setWindowFields --- + +# Property [Window]: $avg in $setWindowFields with various window types. +AVG_WINDOW_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "window_unbounded", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + expected=[ + {"_id": 1, "value": 10, "avg": 20.0}, + {"_id": 2, "value": 20, "avg": 20.0}, + {"_id": 3, "value": 30, "avg": 20.0}, + ], + msg="$avg with unbounded window should return full partition average", + ), + AccumulatorTestCase( + "window_cumulative", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": ["unbounded", "current"]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + expected=[ + {"_id": 1, "value": 10, "avg": 10.0}, + {"_id": 2, "value": 20, "avg": 15.0}, + {"_id": 3, "value": 30, "avg": 20.0}, + ], + msg="$avg with cumulative window should compute running average", + ), + AccumulatorTestCase( + "window_sliding", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + {"_id": 4, "value": 40}, + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": [-1, 1]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + # avg(10,20), avg(10,20,30), avg(20,30,40), avg(30,40) + expected=[ + {"_id": 1, "value": 10, "avg": 15.0}, + {"_id": 2, "value": 20, "avg": 20.0}, + {"_id": 3, "value": 30, "avg": 30.0}, + {"_id": 4, "value": 40, "avg": 35.0}, + ], + msg="$avg with sliding window should compute local average", + ), + AccumulatorTestCase( + "window_current_only", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2, "value": 20}, + {"_id": 3, "value": 30}, + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": [0, 0]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + expected=[ + {"_id": 1, "value": 10, "avg": 10.0}, + {"_id": 2, "value": 20, "avg": 20.0}, + {"_id": 3, "value": 30, "avg": 30.0}, + ], + msg="$avg with [0,0] window should return current document value", + ), + AccumulatorTestCase( + "window_with_nulls", + docs=[ + {"_id": 1, "value": 10}, + {"_id": 2, "value": None}, + {"_id": 3, "value": 30}, + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 1, "value": 1, "avg": 1}}, + ], + expected=[ + {"_id": 1, "value": 10, "avg": 20.0}, + {"_id": 2, "value": None, "avg": 20.0}, + {"_id": 3, "value": 30, "avg": 20.0}, + ], + msg="$avg in window should ignore null values", + ), + AccumulatorTestCase( + "window_range_based", + docs=[ + {"_id": 1, "pos": 0, "value": 10}, + {"_id": 2, "pos": 5, "value": 20}, + {"_id": 3, "pos": 10, "value": 30}, + {"_id": 4, "pos": 15, "value": 40}, + ], + pipeline=[ + {"$sort": {"pos": 1}}, + { + "$setWindowFields": { + "sortBy": {"pos": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"range": [-5, 5]}, + } + }, + } + }, + {"$project": {"_id": 1, "pos": 1, "value": 1, "avg": 1}}, + ], + # pos=0: range [-5,5] includes pos 0,5 -> avg(10,20)=15 + # pos=5: range [0,10] includes pos 0,5,10 -> avg(10,20,30)=20 + # pos=10: range [5,15] includes pos 5,10,15 -> avg(20,30,40)=30 + # pos=15: range [10,20] includes pos 10,15 -> avg(30,40)=35 + expected=[ + {"_id": 1, "pos": 0, "value": 10, "avg": 15.0}, + {"_id": 2, "pos": 5, "value": 20, "avg": 20.0}, + {"_id": 3, "pos": 10, "value": 30, "avg": 30.0}, + {"_id": 4, "pos": 15, "value": 40, "avg": 35.0}, + ], + msg="$avg with range-based window should compute average within range", + ), + AccumulatorTestCase( + "window_multiple_partitions", + docs=[ + {"_id": 1, "group": "A", "value": 10}, + {"_id": 2, "group": "A", "value": 20}, + {"_id": 3, "group": "A", "value": 30}, + {"_id": 4, "group": "B", "value": 100}, + {"_id": 5, "group": "B", "value": 200}, + ], + pipeline=[ + {"$sort": {"_id": 1}}, + { + "$setWindowFields": { + "partitionBy": "$group", + "sortBy": {"_id": 1}, + "output": { + "avg": { + "$avg": "$value", + "window": {"documents": ["unbounded", "unbounded"]}, + } + }, + } + }, + {"$project": {"_id": 1, "group": 1, "avg": 1}}, + ], + expected=[ + {"_id": 1, "group": "A", "avg": 20.0}, + {"_id": 2, "group": "A", "avg": 20.0}, + {"_id": 3, "group": "A", "avg": 20.0}, + {"_id": 4, "group": "B", "avg": 150.0}, + {"_id": 5, "group": "B", "avg": 150.0}, + ], + msg="$avg should compute independent averages per partition", + ), +] + +# --- Expression contexts ($project, $addFields, $match+$expr) --- + +# Property [Expression Context]: $avg used in expression contexts. +AVG_EXPRESSION_CONTEXT_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "in_addfields", + docs=[ + {"_id": 1, "scores": [80, 90, 100]}, + ], + pipeline=[ + {"$addFields": {"avg_score": {"$avg": "$scores"}}}, + {"$project": {"_id": 0, "avg_score": 1}}, + ], + expected=[{"avg_score": 90.0}], + msg="$avg in $addFields should traverse array field and average", + ), + AccumulatorTestCase( + "in_match_expr", + docs=[ + {"_id": 1, "scores": [80, 90, 100]}, + {"_id": 2, "scores": [40, 50, 60]}, + {"_id": 3, "scores": [70, 80, 90]}, + ], + pipeline=[ + {"$match": {"$expr": {"$gt": [{"$avg": "$scores"}, 75]}}}, + {"$project": {"_id": 1}}, + {"$sort": {"_id": 1}}, + ], + # avg([80,90,100])=90 > 75, avg([40,50,60])=50 < 75, avg([70,80,90])=80 > 75 + expected=[{"_id": 1}, {"_id": 3}], + msg="$avg in $match $expr should filter based on computed average", + ), +] + +# --- Pipeline interaction patterns --- + +# Property [Pipeline Interaction]: $avg combined with other pipeline stages. +AVG_PIPELINE_INTERACTION_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "group_after_unwind", + docs=[ + {"_id": 1, "category": "A", "values": [10, 20]}, + {"_id": 2, "category": "A", "values": [30]}, + ], + pipeline=[ + {"$unwind": "$values"}, + {"$group": {"_id": "$category", "avg": {"$avg": "$values"}}}, + ], + # Unwound: 10, 20, 30 -> avg = 20 + expected=[{"_id": "A", "avg": 20.0}], + msg="$avg after $unwind should average all unwound values", + ), + AccumulatorTestCase( + "group_after_match", + docs=[ + {"_id": 1, "category": "A", "value": 10, "active": True}, + {"_id": 2, "category": "A", "value": 20, "active": False}, + {"_id": 3, "category": "A", "value": 30, "active": True}, + ], + pipeline=[ + {"$match": {"active": True}}, + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + # Only active docs: avg(10, 30) = 20 + expected=[{"_id": "A", "avg": 20.0}], + msg="$avg after $match should only average filtered documents", + ), + AccumulatorTestCase( + "project_after_group", + docs=[ + {"_id": 1, "category": "A", "value": 10}, + {"_id": 2, "category": "A", "value": 20}, + {"_id": 3, "category": "B", "value": 30}, + {"_id": 4, "category": "B", "value": 40}, + ], + pipeline=[ + { + "$group": { + "_id": "$category", + "sum": {"$sum": "$value"}, + "count": {"$sum": 1}, + } + }, + {"$sort": {"_id": 1}}, + { + "$project": { + "_id": 1, + "manual_avg": {"$divide": ["$sum", "$count"]}, + } + }, + ], + expected=[ + {"_id": "A", "manual_avg": 15.0}, + {"_id": "B", "manual_avg": 35.0}, + ], + msg="Manual average via $divide after $group should work", + ), + AccumulatorTestCase( + "group_after_project_rename", + docs=[ + {"_id": 1, "cat": "A", "val": 10}, + {"_id": 2, "cat": "A", "val": 20}, + ], + pipeline=[ + {"$project": {"category": "$cat", "value": "$val"}}, + {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, + ], + expected=[{"_id": "A", "avg": 15.0}], + msg="$avg should work on renamed fields from $project", + ), +] + +# --- Combined list --- + +AVG_PIPELINE_CONTEXT_TESTS: list[AccumulatorTestCase] = ( + AVG_GROUP_COMPUTED_ID_TESTS + + AVG_BUCKET_TESTS + + AVG_WINDOW_TESTS + + AVG_EXPRESSION_CONTEXT_TESTS + + AVG_PIPELINE_INTERACTION_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_PIPELINE_CONTEXT_TESTS)) +def test_avg_pipeline_contexts(collection, test_case: AccumulatorTestCase): + """Test $avg in various pipeline contexts.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) + + +def test_avg_in_project_array_literal(collection): + """Test $avg in $project with array of literal values. + + This test uses ``aggregate: 1`` with ``$documents`` instead of a + collection, so it is kept as a standalone test. + """ + result = execute_command( + collection, + { + "aggregate": 1, + "pipeline": [ + {"$documents": [{}]}, + {"$project": {"_id": 0, "avg": {"$avg": [10, 20, 30]}}}, + ], + "cursor": {}, + }, + ) + assertSuccess( + result, + [{"avg": 20.0}], + msg="$avg in $project with literal array should average values", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py new file mode 100644 index 00000000..05325e38 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py @@ -0,0 +1,152 @@ +""" +Tests for $avg accumulator special numeric value handling in $group context. + +Covers NaN behavior, Infinity behavior, and cross-type interactions +for both double and Decimal128 types. +""" + +from __future__ import annotations + +import math + +import pytest +from bson import Decimal128 + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + FLOAT_INFINITY, + FLOAT_NEGATIVE_INFINITY, +) + +# Property [NaN]: NaN is numeric and produces NaN in the result; +# NaN with Infinity produces NaN; cross-type NaN promotes to Decimal128. +AVG_NAN_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "nan_with_finite", + docs=[{"_id": 0, "v": 10}, {"_id": 1, "v": float("nan")}, {"_id": 2, "v": 30}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], + msg="NaN among finite values should produce NaN result", + ), + AccumulatorTestCase( + "all_nan", + docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": float("nan")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], + msg="All NaN values should return NaN", + ), + AccumulatorTestCase( + "nan_with_infinity", + docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": FLOAT_INFINITY}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], + msg="NaN with Infinity should produce NaN", + ), + AccumulatorTestCase( + "decimal128_nan_with_finite", + docs=[ + {"_id": 0, "v": Decimal128("10")}, + {"_id": 1, "v": DECIMAL128_NAN}, + {"_id": 2, "v": Decimal128("30")}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_NAN}], + msg="Decimal128 NaN among finite values should produce Decimal128 NaN", + ), + AccumulatorTestCase( + "decimal128_nan_with_infinity", + docs=[{"v": DECIMAL128_NAN}, {"v": DECIMAL128_INFINITY}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": Decimal128("NaN")}], + msg="Decimal128 NaN with Decimal128 Infinity should produce Decimal128 NaN", + ), + AccumulatorTestCase( + "cross_type_nan", + docs=[{"_id": 0, "v": float("nan")}, {"_id": 1, "v": Decimal128("5")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_NAN}], + msg="double NaN with Decimal128 should return Decimal128 NaN", + ), +] + +# Property [Infinity]: Infinity with finite values produces Infinity; +# Infinity with -Infinity produces NaN. +AVG_INFINITY_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "infinity_with_finite", + docs=[{"_id": 0, "v": FLOAT_INFINITY}, {"_id": 1, "v": 10}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": FLOAT_INFINITY}], + msg="Infinity with finite value should produce Infinity", + ), + AccumulatorTestCase( + "negative_infinity_with_finite", + docs=[{"_id": 0, "v": FLOAT_NEGATIVE_INFINITY}, {"_id": 1, "v": 10}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": FLOAT_NEGATIVE_INFINITY}], + msg="-Infinity with finite value should produce -Infinity", + ), + AccumulatorTestCase( + "inf_and_neg_inf", + docs=[{"_id": 0, "v": FLOAT_INFINITY}, {"_id": 1, "v": FLOAT_NEGATIVE_INFINITY}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": pytest.approx(math.nan, nan_ok=True)}], + msg="Infinity with -Infinity should produce NaN", + ), + AccumulatorTestCase( + "decimal128_infinity_with_finite", + docs=[{"_id": 0, "v": DECIMAL128_INFINITY}, {"_id": 1, "v": Decimal128("10")}], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_INFINITY}], + msg="Decimal128 Infinity with finite value should produce Decimal128 Infinity", + ), + AccumulatorTestCase( + "decimal128_neg_infinity_with_finite", + docs=[{"v": DECIMAL128_NEGATIVE_INFINITY}, {"v": Decimal128("5")}], + pipeline=[ + {"$group": {"_id": None, "result": {"$avg": "$v"}}}, + {"$project": {"_id": 0, "result": 1}}, + ], + expected=[{"result": DECIMAL128_NEGATIVE_INFINITY}], + msg="Decimal128 -Infinity with finite value should produce Decimal128 -Infinity", + ), + AccumulatorTestCase( + "decimal128_inf_and_neg_inf", + docs=[ + {"_id": 0, "v": DECIMAL128_INFINITY}, + {"_id": 1, "v": DECIMAL128_NEGATIVE_INFINITY}, + ], + pipeline=[{"$group": {"_id": None, "avg": {"$avg": "$v"}}}], + expected=[{"_id": None, "avg": DECIMAL128_NAN}], + msg="Decimal128 Infinity with -Infinity should produce Decimal128 NaN", + ), +] + +AVG_SPECIAL_NUMERIC_TESTS: list[AccumulatorTestCase] = AVG_NAN_TESTS + AVG_INFINITY_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_SPECIAL_NUMERIC_TESTS)) +def test_avg_special_numeric(collection, test_case: AccumulatorTestCase): + """Test $avg special numeric value handling in $group context.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult(result, expected=test_case.expected, msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py new file mode 100644 index 00000000..5b4b9666 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py @@ -0,0 +1,372 @@ +"""Tests for $avg accumulator composed with sibling accumulators in the same $group.""" + +from __future__ import annotations + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.accumulators.utils.accumulator_test_case import ( # noqa: E501 + AccumulatorTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Avg with Sum]: $avg and $sum coexist in the same $group and +# independently compute the mean and the total. $avg always returns double +# for integer inputs; $sum returns int32 when all inputs are int32. +AVG_WITH_SUM_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_sum_single_group", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "a", "v": 30}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[{"_id": "a", "mean": 20.0, "total": 60}], + msg="$avg and $sum should independently produce mean and total", + ), + AccumulatorTestCase( + "avg_sum_multiple_groups", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "b", "v": 5}, + {"cat": "b", "v": 15}, + {"cat": "b", "v": 25}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[ + {"_id": "a", "mean": 15.0, "total": 30}, + {"_id": "b", "mean": 15.0, "total": 45}, + ], + msg="$avg and $sum should produce correct results across multiple groups", + ), + AccumulatorTestCase( + "avg_sum_null_handling_diverges", + docs=[ + {"cat": "a", "v": None}, + {"cat": "a", "v": 10}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[{"_id": "a", "mean": 10.0, "total": 10}], + msg="$avg and $sum should both ignore null (avg=10.0 from one value, sum=10)", + ), + AccumulatorTestCase( + "avg_sum_all_null_diverges", + docs=[ + {"cat": "a", "v": None}, + {"cat": "a", "v": None}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "total": {"$sum": "$v"}, + } + } + ], + expected=[{"_id": "a", "mean": None, "total": 0}], + msg="$avg returns null but $sum returns 0 when all values are null", + ), +] + +# Property [Avg with Count]: $avg of a field and $sum with constant 1 (count +# pattern) coexist, independently computing a mean and a document count. +AVG_WITH_COUNT_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_count_basic", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "b", "v": 5}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "count": {"$sum": 1}, + } + } + ], + expected=[ + {"_id": "a", "mean": 15.0, "count": 2}, + {"_id": "b", "mean": 5.0, "count": 1}, + ], + msg="$avg of field and $sum(1) should independently compute mean and count", + ), + AccumulatorTestCase( + "avg_count_non_numeric_ignored_but_counted", + docs=[ + {"cat": "a", "v": "hello"}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": True}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "count": {"$sum": 1}, + } + } + ], + expected=[{"_id": "a", "mean": 10.0, "count": 3}], + msg="$avg ignores non-numeric values but $sum(1) counts all documents", + ), +] + +# Property [Avg with Min/Max]: $avg, $min, and $max coexist in the same +# $group, each independently computing the mean, minimum, and maximum. +AVG_WITH_MIN_MAX_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_min_max_basic", + docs=[ + {"cat": "a", "v": 30}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "lo": {"$min": "$v"}, + "hi": {"$max": "$v"}, + } + } + ], + expected=[{"_id": "a", "mean": 20.0, "lo": 10, "hi": 30}], + msg="$avg, $min, and $max should independently compute mean, min, and max", + ), + AccumulatorTestCase( + "avg_min_max_mixed_types", + docs=[ + {"cat": "a", "v": 5}, + {"cat": "a", "v": Int64(100)}, + {"cat": "a", "v": 2.5}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "lo": {"$min": "$v"}, + "hi": {"$max": "$v"}, + } + } + ], + expected=[{"_id": "a", "mean": 35.833333333333336, "lo": 2.5, "hi": Int64(100)}], + msg="$avg should return double while $min/$max preserve original types", + ), +] + +# Property [Avg with First/Last]: $avg computes the mean while $first/$last +# pick positional values from the group. A preceding $sort establishes order +# for $first and $last; $avg is order-independent. +AVG_WITH_FIRST_LAST_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_first_last_with_sort", + docs=[ + {"cat": "a", "v": 30}, + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "first_v": {"$first": "$v"}, + "last_v": {"$last": "$v"}, + } + }, + ], + expected=[{"_id": "a", "mean": 20.0, "first_v": 10, "last_v": 30}], + msg="$avg should compute mean while $first/$last pick sorted extremes", + ), +] + +# Property [Avg with Push/AddToSet]: $avg computes the mean while $push +# collects all values and $addToSet collects unique values. +AVG_WITH_PUSH_ADDTOSET_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_push_addtoset", + docs=[ + {"cat": "a", "v": 10}, + {"cat": "a", "v": 20}, + {"cat": "a", "v": 10}, + ], + pipeline=[ + {"$sort": {"v": 1}}, + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "all_vals": {"$push": "$v"}, + "unique_vals": {"$addToSet": "$v"}, + } + }, + ], + expected=[ + { + "_id": "a", + "mean": 13.333333333333334, + "all_vals": [10, 10, 20], + "unique_vals": [10, 20], + }, + ], + msg="$avg computes mean while $push keeps all values and $addToSet keeps unique values", + ), +] + +# Property [Avg with MergeObjects]: $avg computes the mean while +# $mergeObjects combines per-document metadata into a single object. +AVG_WITH_MERGE_OBJECTS_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_merge_objects", + docs=[ + {"cat": "a", "v": 10, "meta": {"src": "x"}}, + {"cat": "a", "v": 20, "meta": {"quality": "high"}}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "merged": {"$mergeObjects": "$meta"}, + } + } + ], + expected=[ + {"_id": "a", "mean": 15.0, "merged": {"src": "x", "quality": "high"}}, + ], + msg="$avg computes mean while $mergeObjects combines metadata objects", + ), +] + +# Property [Multiple Avg Expressions]: multiple $avg accumulators in the same +# $group independently average different fields or expressions. +MULTIPLE_AVG_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "multiple_avg_different_fields", + docs=[ + {"cat": "a", "price": 100, "qty": 2}, + {"cat": "a", "price": 200, "qty": 3}, + {"cat": "b", "price": 50, "qty": 10}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "avg_price": {"$avg": "$price"}, + "avg_qty": {"$avg": "$qty"}, + } + } + ], + expected=[ + {"_id": "a", "avg_price": 150.0, "avg_qty": 2.5}, + {"_id": "b", "avg_price": 50.0, "avg_qty": 10.0}, + ], + msg="Multiple $avg accumulators should independently average different fields", + ), + AccumulatorTestCase( + "multiple_avg_different_expressions", + docs=[ + {"cat": "a", "price": 100, "qty": 2, "revenue": 200}, + {"cat": "a", "price": 200, "qty": 3, "revenue": 600}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "avg_price": {"$avg": "$price"}, + "avg_revenue": {"$avg": "$revenue"}, + } + } + ], + expected=[{"_id": "a", "avg_price": 150.0, "avg_revenue": 400.0}], + msg="Multiple $avg accumulators should independently average different fields", + ), +] + +# Property [Avg Type Promotion with Sibling]: $avg promoting to Decimal128 +# does not interfere with sibling accumulators that return simpler types. +AVG_TYPE_PROMOTION_WITH_SIBLING_TESTS: list[AccumulatorTestCase] = [ + AccumulatorTestCase( + "avg_decimal128_with_int_count", + docs=[ + {"cat": "a", "v": Decimal128("1.5")}, + {"cat": "a", "v": Decimal128("2.5")}, + ], + pipeline=[ + { + "$group": { + "_id": "$cat", + "mean": {"$avg": "$v"}, + "count": {"$sum": 1}, + } + } + ], + expected=[{"_id": "a", "mean": Decimal128("2.0"), "count": 2}], + msg="$avg promoting to Decimal128 should not affect sibling $sum(1) returning int32", + ), +] + +AVG_INTEGRATION_TESTS = ( + AVG_WITH_SUM_TESTS + + AVG_WITH_COUNT_TESTS + + AVG_WITH_MIN_MAX_TESTS + + AVG_WITH_FIRST_LAST_TESTS + + AVG_WITH_PUSH_ADDTOSET_TESTS + + AVG_WITH_MERGE_OBJECTS_TESTS + + MULTIPLE_AVG_TESTS + + AVG_TYPE_PROMOTION_WITH_SIBLING_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(AVG_INTEGRATION_TESTS)) +def test_accumulators_avg_integration(collection, test_case: AccumulatorTestCase): + """Test $avg accumulator composed with sibling accumulators in the same $group.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + {"aggregate": collection.name, "pipeline": test_case.pipeline or [], "cursor": {}}, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ignore_doc_order=True, + ignore_order_in=["unique_vals"], + ) diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py index adbc5c20..93b61c7e 100644 --- a/documentdb_tests/framework/error_codes.py +++ b/documentdb_tests/framework/error_codes.py @@ -368,6 +368,7 @@ ACCUMULATOR_NULL_FUNCTION_ERROR = 4544702 ACCUMULATOR_MISSING_ACCUMULATE_ARGS_ERROR = 4544710 DIVIDE_BY_ZERO_V2_ERROR = 4848401 +MODULO_BY_ZERO_V2_ERROR = 4848403 ARRAY_TO_OBJECT_NULL_BYTE_PAIR_KEY_ERROR = 4940400 ARRAY_TO_OBJECT_NULL_BYTE_KV_KEY_ERROR = 4940401 SKIP_INVALID_ARGUMENT_ERROR = 5107200 From c18348d4cbe3afc7d82dc527f0dada6e123a33e0 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Tue, 19 May 2026 14:27:06 -0700 Subject: [PATCH 2/8] fix accumulator test Signed-off-by: Alina (Xi) Li --- .../core/operator/accumulators/utils/accumulator_test_case.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/utils/accumulator_test_case.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/utils/accumulator_test_case.py index f21a9f62..b471bde0 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/utils/accumulator_test_case.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/utils/accumulator_test_case.py @@ -2,7 +2,7 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any from documentdb_tests.framework.test_case import BaseTestCase @@ -13,4 +13,4 @@ class AccumulatorTestCase(BaseTestCase): """Test case for accumulator tests.""" docs: list[dict] | None = None - pipeline: list[dict[str, Any]] | None = None + pipeline: list[dict[str, Any]] = field(default_factory=list) From 8a3f7abb438a5a07bc4b127d3f4c8968f7789ed9 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Tue, 19 May 2026 14:31:12 -0700 Subject: [PATCH 3/8] remove unneeded Signed-off-by: Alina (Xi) Li --- .../operator/accumulators/test_accumulators_avg_integration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py index 5b4b9666..b3d937d5 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/test_accumulators_avg_integration.py @@ -360,7 +360,7 @@ def test_accumulators_avg_integration(collection, test_case: AccumulatorTestCase collection.insert_many(test_case.docs) result = execute_command( collection, - {"aggregate": collection.name, "pipeline": test_case.pipeline or [], "cursor": {}}, + {"aggregate": collection.name, "pipeline": test_case.pipeline, "cursor": {}}, ) assertResult( result, From 4f958dcd486aa94401ad922b70f4af7d6fe8e52a Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Wed, 20 May 2026 15:59:11 -0700 Subject: [PATCH 4/8] remove avg expression tests Signed-off-by: Alina (Xi) Li --- .../avg/test_avg_pipeline_contexts.py | 64 +------------------ 1 file changed, 2 insertions(+), 62 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py index 3aa3025b..213111e6 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py @@ -1,8 +1,7 @@ """ Tests for $avg in various pipeline contexts. -Covers $group, $bucket, $setWindowFields, $project/$addFields, -$match+$expr, and pipeline interaction patterns. +Covers $group, $bucket, $setWindowFields, and pipeline interaction patterns. """ from __future__ import annotations @@ -12,7 +11,7 @@ from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( AccumulatorTestCase, ) -from documentdb_tests.framework.assertions import assertResult, assertSuccess +from documentdb_tests.framework.assertions import assertResult from documentdb_tests.framework.executor import execute_command from documentdb_tests.framework.parametrize import pytest_params @@ -323,40 +322,6 @@ ), ] -# --- Expression contexts ($project, $addFields, $match+$expr) --- - -# Property [Expression Context]: $avg used in expression contexts. -AVG_EXPRESSION_CONTEXT_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "in_addfields", - docs=[ - {"_id": 1, "scores": [80, 90, 100]}, - ], - pipeline=[ - {"$addFields": {"avg_score": {"$avg": "$scores"}}}, - {"$project": {"_id": 0, "avg_score": 1}}, - ], - expected=[{"avg_score": 90.0}], - msg="$avg in $addFields should traverse array field and average", - ), - AccumulatorTestCase( - "in_match_expr", - docs=[ - {"_id": 1, "scores": [80, 90, 100]}, - {"_id": 2, "scores": [40, 50, 60]}, - {"_id": 3, "scores": [70, 80, 90]}, - ], - pipeline=[ - {"$match": {"$expr": {"$gt": [{"$avg": "$scores"}, 75]}}}, - {"$project": {"_id": 1}}, - {"$sort": {"_id": 1}}, - ], - # avg([80,90,100])=90 > 75, avg([40,50,60])=50 < 75, avg([70,80,90])=80 > 75 - expected=[{"_id": 1}, {"_id": 3}], - msg="$avg in $match $expr should filter based on computed average", - ), -] - # --- Pipeline interaction patterns --- # Property [Pipeline Interaction]: $avg combined with other pipeline stages. @@ -441,7 +406,6 @@ AVG_GROUP_COMPUTED_ID_TESTS + AVG_BUCKET_TESTS + AVG_WINDOW_TESTS - + AVG_EXPRESSION_CONTEXT_TESTS + AVG_PIPELINE_INTERACTION_TESTS ) @@ -460,27 +424,3 @@ def test_avg_pipeline_contexts(collection, test_case: AccumulatorTestCase): }, ) assertResult(result, expected=test_case.expected, msg=test_case.msg) - - -def test_avg_in_project_array_literal(collection): - """Test $avg in $project with array of literal values. - - This test uses ``aggregate: 1`` with ``$documents`` instead of a - collection, so it is kept as a standalone test. - """ - result = execute_command( - collection, - { - "aggregate": 1, - "pipeline": [ - {"$documents": [{}]}, - {"$project": {"_id": 0, "avg": {"$avg": [10, 20, 30]}}}, - ], - "cursor": {}, - }, - ) - assertSuccess( - result, - [{"avg": 20.0}], - msg="$avg in $project with literal array should average values", - ) From 9718cd6a4c9d3ba8c02d9c7e998c6daf2e02b0c5 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Thu, 21 May 2026 14:29:34 -0700 Subject: [PATCH 5/8] remove stage tests in avg Signed-off-by: Alina (Xi) Li --- .../accumulators/avg/test_avg_errors.py | 113 +---- .../avg/test_avg_pipeline_contexts.py | 426 ------------------ 2 files changed, 2 insertions(+), 537 deletions(-) delete mode 100644 documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py index d163850f..8caca165 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py @@ -1,8 +1,7 @@ """ Tests for $avg accumulator error handling. -Covers arity validation (rejects array syntax in $group, $bucket, $bucketAuto) -and expression error propagation ($toInt, $divide, $mod). +Covers expression error propagation ($toInt, $divide, $mod). """ from __future__ import annotations @@ -16,119 +15,11 @@ from documentdb_tests.framework.error_codes import ( CONVERSION_FAILURE_ERROR, DIVIDE_BY_ZERO_V2_ERROR, - GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, MODULO_BY_ZERO_V2_ERROR, ) from documentdb_tests.framework.executor import execute_command from documentdb_tests.framework.parametrize import pytest_params -# Property [Arity]: $avg in accumulator context is a unary operator and -# rejects array syntax in $group, $bucket, and $bucketAuto. -AVG_ARITY_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "arity_multi_element_group", - pipeline=[{"$group": {"_id": None, "result": {"$avg": ["$v", "$v"]}}}], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject multi-element array syntax in $group", - ), - AccumulatorTestCase( - "arity_empty_array_group", - pipeline=[{"$group": {"_id": None, "result": {"$avg": []}}}], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject empty array syntax in $group", - ), - AccumulatorTestCase( - "arity_single_element_group", - pipeline=[{"$group": {"_id": None, "result": {"$avg": ["$v"]}}}], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject single-element array syntax in $group", - ), - AccumulatorTestCase( - "arity_multi_element_bucket", - pipeline=[ - { - "$bucket": { - "groupBy": "$v", - "boundaries": [0, 10], - "output": {"result": {"$avg": ["$v", "$v"]}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject multi-element array syntax in $bucket", - ), - AccumulatorTestCase( - "arity_empty_array_bucket", - pipeline=[ - { - "$bucket": { - "groupBy": "$v", - "boundaries": [0, 10], - "output": {"result": {"$avg": []}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject empty array syntax in $bucket", - ), - AccumulatorTestCase( - "arity_single_element_bucket", - pipeline=[ - { - "$bucket": { - "groupBy": "$v", - "boundaries": [0, 10], - "output": {"result": {"$avg": ["$v"]}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject single-element array syntax in $bucket", - ), - AccumulatorTestCase( - "arity_multi_element_bucket_auto", - pipeline=[ - { - "$bucketAuto": { - "groupBy": "$v", - "buckets": 1, - "output": {"result": {"$avg": ["$v", "$v"]}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject multi-element array syntax in $bucketAuto", - ), - AccumulatorTestCase( - "arity_empty_array_bucket_auto", - pipeline=[ - { - "$bucketAuto": { - "groupBy": "$v", - "buckets": 1, - "output": {"result": {"$avg": []}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject empty array syntax in $bucketAuto", - ), - AccumulatorTestCase( - "arity_single_element_bucket_auto", - pipeline=[ - { - "$bucketAuto": { - "groupBy": "$v", - "buckets": 1, - "output": {"result": {"$avg": ["$v"]}}, - } - } - ], - error_code=GROUP_ACCUMULATOR_ARRAY_ARGUMENT_ERROR, - msg="$avg should reject single-element array syntax in $bucketAuto", - ), -] - # Property [Expression Error Propagation]: errors from sub-expressions # propagate through $avg without being caught or suppressed. AVG_EXPRESSION_ERROR_TESTS: list[AccumulatorTestCase] = [ @@ -164,7 +55,7 @@ ), ] -AVG_ERROR_TESTS: list[AccumulatorTestCase] = AVG_ARITY_TESTS + AVG_EXPRESSION_ERROR_TESTS +AVG_ERROR_TESTS: list[AccumulatorTestCase] = AVG_EXPRESSION_ERROR_TESTS @pytest.mark.parametrize("test_case", pytest_params(AVG_ERROR_TESTS)) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py deleted file mode 100644 index 213111e6..00000000 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_pipeline_contexts.py +++ /dev/null @@ -1,426 +0,0 @@ -""" -Tests for $avg in various pipeline contexts. - -Covers $group, $bucket, $setWindowFields, and pipeline interaction patterns. -""" - -from __future__ import annotations - -import pytest - -from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( - AccumulatorTestCase, -) -from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.executor import execute_command -from documentdb_tests.framework.parametrize import pytest_params - -# --- $group with computed _id --- - -# Property [Group Computed ID]: $avg with computed _id expression in $group. -AVG_GROUP_COMPUTED_ID_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "group_computed_id", - docs=[ - {"_id": 1, "value": 10, "score": 80}, - {"_id": 2, "value": 20, "score": 90}, - {"_id": 3, "value": 30, "score": 85}, - {"_id": 4, "value": 40, "score": 95}, - ], - pipeline=[ - { - "$group": { - "_id": {"$gt": ["$score", 85]}, - "avg": {"$avg": "$value"}, - } - }, - {"$sort": {"_id": 1}}, - ], - # score <= 85: docs 1,3 -> avg(10,30) = 20 - # score > 85: docs 2,4 -> avg(20,40) = 30 - expected=[ - {"_id": False, "avg": 20.0}, - {"_id": True, "avg": 30.0}, - ], - msg="$avg with computed _id should group and average correctly", - ), -] - -# --- $bucket / $bucketAuto --- - -# Property [Bucket]: $avg in $bucket and $bucketAuto output specifications. -AVG_BUCKET_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "bucket", - docs=[ - {"_id": 1, "score": 15, "value": 10}, - {"_id": 2, "score": 25, "value": 20}, - {"_id": 3, "score": 35, "value": 30}, - {"_id": 4, "score": 45, "value": 40}, - ], - pipeline=[ - { - "$bucket": { - "groupBy": "$score", - "boundaries": [0, 20, 40, 60], - "output": {"avg_value": {"$avg": "$value"}}, - } - }, - ], - expected=[ - {"_id": 0, "avg_value": 10.0}, - {"_id": 20, "avg_value": 25.0}, - {"_id": 40, "avg_value": 40.0}, - ], - msg="$avg in $bucket should compute average per bucket", - ), - AccumulatorTestCase( - "bucketauto", - docs=[ - {"_id": 1, "score": 10, "value": 100}, - {"_id": 2, "score": 20, "value": 200}, - {"_id": 3, "score": 30, "value": 300}, - {"_id": 4, "score": 40, "value": 400}, - ], - pipeline=[ - { - "$bucketAuto": { - "groupBy": "$score", - "buckets": 2, - "output": {"avg_value": {"$avg": "$value"}}, - } - }, - ], - expected=[ - {"_id": {"min": 10, "max": 30}, "avg_value": 150.0}, - {"_id": {"min": 30, "max": 40}, "avg_value": 350.0}, - ], - msg="$avg in $bucketAuto should compute average per auto-bucket", - ), -] - -# --- $setWindowFields --- - -# Property [Window]: $avg in $setWindowFields with various window types. -AVG_WINDOW_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "window_unbounded", - docs=[ - {"_id": 1, "value": 10}, - {"_id": 2, "value": 20}, - {"_id": 3, "value": 30}, - ], - pipeline=[ - {"$sort": {"_id": 1}}, - { - "$setWindowFields": { - "sortBy": {"_id": 1}, - "output": { - "avg": { - "$avg": "$value", - "window": {"documents": ["unbounded", "unbounded"]}, - } - }, - } - }, - {"$project": {"_id": 1, "value": 1, "avg": 1}}, - ], - expected=[ - {"_id": 1, "value": 10, "avg": 20.0}, - {"_id": 2, "value": 20, "avg": 20.0}, - {"_id": 3, "value": 30, "avg": 20.0}, - ], - msg="$avg with unbounded window should return full partition average", - ), - AccumulatorTestCase( - "window_cumulative", - docs=[ - {"_id": 1, "value": 10}, - {"_id": 2, "value": 20}, - {"_id": 3, "value": 30}, - ], - pipeline=[ - {"$sort": {"_id": 1}}, - { - "$setWindowFields": { - "sortBy": {"_id": 1}, - "output": { - "avg": { - "$avg": "$value", - "window": {"documents": ["unbounded", "current"]}, - } - }, - } - }, - {"$project": {"_id": 1, "value": 1, "avg": 1}}, - ], - expected=[ - {"_id": 1, "value": 10, "avg": 10.0}, - {"_id": 2, "value": 20, "avg": 15.0}, - {"_id": 3, "value": 30, "avg": 20.0}, - ], - msg="$avg with cumulative window should compute running average", - ), - AccumulatorTestCase( - "window_sliding", - docs=[ - {"_id": 1, "value": 10}, - {"_id": 2, "value": 20}, - {"_id": 3, "value": 30}, - {"_id": 4, "value": 40}, - ], - pipeline=[ - {"$sort": {"_id": 1}}, - { - "$setWindowFields": { - "sortBy": {"_id": 1}, - "output": { - "avg": { - "$avg": "$value", - "window": {"documents": [-1, 1]}, - } - }, - } - }, - {"$project": {"_id": 1, "value": 1, "avg": 1}}, - ], - # avg(10,20), avg(10,20,30), avg(20,30,40), avg(30,40) - expected=[ - {"_id": 1, "value": 10, "avg": 15.0}, - {"_id": 2, "value": 20, "avg": 20.0}, - {"_id": 3, "value": 30, "avg": 30.0}, - {"_id": 4, "value": 40, "avg": 35.0}, - ], - msg="$avg with sliding window should compute local average", - ), - AccumulatorTestCase( - "window_current_only", - docs=[ - {"_id": 1, "value": 10}, - {"_id": 2, "value": 20}, - {"_id": 3, "value": 30}, - ], - pipeline=[ - {"$sort": {"_id": 1}}, - { - "$setWindowFields": { - "sortBy": {"_id": 1}, - "output": { - "avg": { - "$avg": "$value", - "window": {"documents": [0, 0]}, - } - }, - } - }, - {"$project": {"_id": 1, "value": 1, "avg": 1}}, - ], - expected=[ - {"_id": 1, "value": 10, "avg": 10.0}, - {"_id": 2, "value": 20, "avg": 20.0}, - {"_id": 3, "value": 30, "avg": 30.0}, - ], - msg="$avg with [0,0] window should return current document value", - ), - AccumulatorTestCase( - "window_with_nulls", - docs=[ - {"_id": 1, "value": 10}, - {"_id": 2, "value": None}, - {"_id": 3, "value": 30}, - ], - pipeline=[ - {"$sort": {"_id": 1}}, - { - "$setWindowFields": { - "sortBy": {"_id": 1}, - "output": { - "avg": { - "$avg": "$value", - "window": {"documents": ["unbounded", "unbounded"]}, - } - }, - } - }, - {"$project": {"_id": 1, "value": 1, "avg": 1}}, - ], - expected=[ - {"_id": 1, "value": 10, "avg": 20.0}, - {"_id": 2, "value": None, "avg": 20.0}, - {"_id": 3, "value": 30, "avg": 20.0}, - ], - msg="$avg in window should ignore null values", - ), - AccumulatorTestCase( - "window_range_based", - docs=[ - {"_id": 1, "pos": 0, "value": 10}, - {"_id": 2, "pos": 5, "value": 20}, - {"_id": 3, "pos": 10, "value": 30}, - {"_id": 4, "pos": 15, "value": 40}, - ], - pipeline=[ - {"$sort": {"pos": 1}}, - { - "$setWindowFields": { - "sortBy": {"pos": 1}, - "output": { - "avg": { - "$avg": "$value", - "window": {"range": [-5, 5]}, - } - }, - } - }, - {"$project": {"_id": 1, "pos": 1, "value": 1, "avg": 1}}, - ], - # pos=0: range [-5,5] includes pos 0,5 -> avg(10,20)=15 - # pos=5: range [0,10] includes pos 0,5,10 -> avg(10,20,30)=20 - # pos=10: range [5,15] includes pos 5,10,15 -> avg(20,30,40)=30 - # pos=15: range [10,20] includes pos 10,15 -> avg(30,40)=35 - expected=[ - {"_id": 1, "pos": 0, "value": 10, "avg": 15.0}, - {"_id": 2, "pos": 5, "value": 20, "avg": 20.0}, - {"_id": 3, "pos": 10, "value": 30, "avg": 30.0}, - {"_id": 4, "pos": 15, "value": 40, "avg": 35.0}, - ], - msg="$avg with range-based window should compute average within range", - ), - AccumulatorTestCase( - "window_multiple_partitions", - docs=[ - {"_id": 1, "group": "A", "value": 10}, - {"_id": 2, "group": "A", "value": 20}, - {"_id": 3, "group": "A", "value": 30}, - {"_id": 4, "group": "B", "value": 100}, - {"_id": 5, "group": "B", "value": 200}, - ], - pipeline=[ - {"$sort": {"_id": 1}}, - { - "$setWindowFields": { - "partitionBy": "$group", - "sortBy": {"_id": 1}, - "output": { - "avg": { - "$avg": "$value", - "window": {"documents": ["unbounded", "unbounded"]}, - } - }, - } - }, - {"$project": {"_id": 1, "group": 1, "avg": 1}}, - ], - expected=[ - {"_id": 1, "group": "A", "avg": 20.0}, - {"_id": 2, "group": "A", "avg": 20.0}, - {"_id": 3, "group": "A", "avg": 20.0}, - {"_id": 4, "group": "B", "avg": 150.0}, - {"_id": 5, "group": "B", "avg": 150.0}, - ], - msg="$avg should compute independent averages per partition", - ), -] - -# --- Pipeline interaction patterns --- - -# Property [Pipeline Interaction]: $avg combined with other pipeline stages. -AVG_PIPELINE_INTERACTION_TESTS: list[AccumulatorTestCase] = [ - AccumulatorTestCase( - "group_after_unwind", - docs=[ - {"_id": 1, "category": "A", "values": [10, 20]}, - {"_id": 2, "category": "A", "values": [30]}, - ], - pipeline=[ - {"$unwind": "$values"}, - {"$group": {"_id": "$category", "avg": {"$avg": "$values"}}}, - ], - # Unwound: 10, 20, 30 -> avg = 20 - expected=[{"_id": "A", "avg": 20.0}], - msg="$avg after $unwind should average all unwound values", - ), - AccumulatorTestCase( - "group_after_match", - docs=[ - {"_id": 1, "category": "A", "value": 10, "active": True}, - {"_id": 2, "category": "A", "value": 20, "active": False}, - {"_id": 3, "category": "A", "value": 30, "active": True}, - ], - pipeline=[ - {"$match": {"active": True}}, - {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, - ], - # Only active docs: avg(10, 30) = 20 - expected=[{"_id": "A", "avg": 20.0}], - msg="$avg after $match should only average filtered documents", - ), - AccumulatorTestCase( - "project_after_group", - docs=[ - {"_id": 1, "category": "A", "value": 10}, - {"_id": 2, "category": "A", "value": 20}, - {"_id": 3, "category": "B", "value": 30}, - {"_id": 4, "category": "B", "value": 40}, - ], - pipeline=[ - { - "$group": { - "_id": "$category", - "sum": {"$sum": "$value"}, - "count": {"$sum": 1}, - } - }, - {"$sort": {"_id": 1}}, - { - "$project": { - "_id": 1, - "manual_avg": {"$divide": ["$sum", "$count"]}, - } - }, - ], - expected=[ - {"_id": "A", "manual_avg": 15.0}, - {"_id": "B", "manual_avg": 35.0}, - ], - msg="Manual average via $divide after $group should work", - ), - AccumulatorTestCase( - "group_after_project_rename", - docs=[ - {"_id": 1, "cat": "A", "val": 10}, - {"_id": 2, "cat": "A", "val": 20}, - ], - pipeline=[ - {"$project": {"category": "$cat", "value": "$val"}}, - {"$group": {"_id": "$category", "avg": {"$avg": "$value"}}}, - ], - expected=[{"_id": "A", "avg": 15.0}], - msg="$avg should work on renamed fields from $project", - ), -] - -# --- Combined list --- - -AVG_PIPELINE_CONTEXT_TESTS: list[AccumulatorTestCase] = ( - AVG_GROUP_COMPUTED_ID_TESTS - + AVG_BUCKET_TESTS - + AVG_WINDOW_TESTS - + AVG_PIPELINE_INTERACTION_TESTS -) - - -@pytest.mark.parametrize("test_case", pytest_params(AVG_PIPELINE_CONTEXT_TESTS)) -def test_avg_pipeline_contexts(collection, test_case: AccumulatorTestCase): - """Test $avg in various pipeline contexts.""" - if test_case.docs: - collection.insert_many(test_case.docs) - result = execute_command( - collection, - { - "aggregate": collection.name, - "pipeline": test_case.pipeline, - "cursor": {}, - }, - ) - assertResult(result, expected=test_case.expected, msg=test_case.msg) From dd0b7cc356dfbf018f347697af717967137bee88 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Thu, 21 May 2026 15:12:28 -0700 Subject: [PATCH 6/8] rename to test_accumlator_*.py Signed-off-by: Alina (Xi) Li --- .../avg/{test_avg_errors.py => test_accumulator_avg_errors.py} | 2 +- ...avg_field_lookup.py => test_accumulator_avg_field_lookup.py} | 2 +- ...p_boundaries.py => test_accumulator_avg_group_boundaries.py} | 2 +- ...g_group_context.py => test_accumulator_avg_group_context.py} | 2 +- ...t_avg_group_types.py => test_accumulator_avg_group_types.py} | 2 +- ...t_avg_non_numeric.py => test_accumulator_avg_non_numeric.py} | 2 +- ...avg_null_missing.py => test_accumulator_avg_null_missing.py} | 2 +- ...t_smoke_accumulator_avg.py => test_accumulator_avg_smoke.py} | 2 +- ...ecial_numeric.py => test_accumulator_avg_special_numeric.py} | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) rename documentdb_tests/compatibility/tests/core/operator/accumulators/avg/{test_avg_errors.py => test_accumulator_avg_errors.py} (97%) rename documentdb_tests/compatibility/tests/core/operator/accumulators/avg/{test_avg_field_lookup.py => test_accumulator_avg_field_lookup.py} (98%) rename documentdb_tests/compatibility/tests/core/operator/accumulators/avg/{test_avg_group_boundaries.py => test_accumulator_avg_group_boundaries.py} (99%) rename documentdb_tests/compatibility/tests/core/operator/accumulators/avg/{test_avg_group_context.py => test_accumulator_avg_group_context.py} (99%) rename documentdb_tests/compatibility/tests/core/operator/accumulators/avg/{test_avg_group_types.py => test_accumulator_avg_group_types.py} (99%) rename documentdb_tests/compatibility/tests/core/operator/accumulators/avg/{test_avg_non_numeric.py => test_accumulator_avg_non_numeric.py} (99%) rename documentdb_tests/compatibility/tests/core/operator/accumulators/avg/{test_avg_null_missing.py => test_accumulator_avg_null_missing.py} (98%) rename documentdb_tests/compatibility/tests/core/operator/accumulators/avg/{test_smoke_accumulator_avg.py => test_accumulator_avg_smoke.py} (95%) rename documentdb_tests/compatibility/tests/core/operator/accumulators/avg/{test_avg_special_numeric.py => test_accumulator_avg_special_numeric.py} (98%) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_errors.py similarity index 97% rename from documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py rename to documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_errors.py index 8caca165..37626353 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_errors.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_errors.py @@ -59,7 +59,7 @@ @pytest.mark.parametrize("test_case", pytest_params(AVG_ERROR_TESTS)) -def test_avg_errors(collection, test_case: AccumulatorTestCase): +def test_accumulator_avg_errors(collection, test_case: AccumulatorTestCase): """Test $avg accumulator error handling.""" if test_case.docs: collection.insert_many(test_case.docs) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_field_lookup.py similarity index 98% rename from documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py rename to documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_field_lookup.py index 45173106..f96c46e3 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_field_lookup.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_field_lookup.py @@ -202,7 +202,7 @@ @pytest.mark.parametrize("test_case", pytest_params(AVG_FIELD_LOOKUP_TESTS)) -def test_avg_field_lookup(collection, test_case: AccumulatorTestCase): +def test_accumulator_avg_field_lookup(collection, test_case: AccumulatorTestCase): """Test $avg field lookup and expression types in $group context.""" if test_case.docs: collection.insert_many(test_case.docs) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_group_boundaries.py similarity index 99% rename from documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py rename to documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_group_boundaries.py index a1ed3c86..8c04f1fb 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_boundaries.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_group_boundaries.py @@ -505,7 +505,7 @@ @pytest.mark.parametrize("test_case", pytest_params(AVG_GROUP_BOUNDARY_TESTS)) -def test_avg_group_boundaries(collection, test_case: AccumulatorTestCase): +def test_accumulator_avg_group_boundaries(collection, test_case: AccumulatorTestCase): """Test $avg accumulator boundary values in $group context.""" collection.insert_many(test_case.docs) result = execute_command( diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_group_context.py similarity index 99% rename from documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py rename to documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_group_context.py index b2a2ca4d..45e0eb55 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_context.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_group_context.py @@ -312,7 +312,7 @@ @pytest.mark.parametrize("test_case", pytest_params(AVG_GROUP_CONTEXT_TESTS)) -def test_avg_group_context(collection, test_case: AccumulatorTestCase): +def test_accumulator_avg_group_context(collection, test_case: AccumulatorTestCase): """Test $avg in $group context with grouping behavior.""" if test_case.docs: collection.insert_many(test_case.docs) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_group_types.py similarity index 99% rename from documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py rename to documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_group_types.py index 5397303e..28c2e432 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_group_types.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_group_types.py @@ -244,7 +244,7 @@ @pytest.mark.parametrize("test_case", pytest_params(AVG_GROUP_TYPE_TESTS)) -def test_avg_group_types(collection, test_case: AccumulatorTestCase): +def test_accumulator_avg_group_types(collection, test_case: AccumulatorTestCase): """Test $avg type promotion and return type in $group context.""" collection.insert_many(test_case.docs) result = execute_command( diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_non_numeric.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_non_numeric.py similarity index 99% rename from documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_non_numeric.py rename to documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_non_numeric.py index 831216cb..391929cd 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_non_numeric.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_non_numeric.py @@ -240,7 +240,7 @@ @pytest.mark.parametrize("test_case", pytest_params(AVG_NON_NUMERIC_TESTS)) -def test_avg_non_numeric(collection, test_case: AccumulatorTestCase): +def test_accumulator_avg_non_numeric(collection, test_case: AccumulatorTestCase): """Test $avg non-numeric type handling in $group context.""" if test_case.docs: collection.insert_many(test_case.docs) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_null_missing.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_null_missing.py similarity index 98% rename from documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_null_missing.py rename to documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_null_missing.py index e1a222aa..ba96730d 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_null_missing.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_null_missing.py @@ -103,7 +103,7 @@ @pytest.mark.parametrize("test_case", pytest_params(AVG_NULL_MISSING_TESTS)) -def test_avg_null_missing(collection, test_case: AccumulatorTestCase): +def test_accumulator_avg_null_missing(collection, test_case: AccumulatorTestCase): """Test $avg null and missing value handling in $group context.""" if test_case.docs: collection.insert_many(test_case.docs) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_smoke_accumulator_avg.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_smoke.py similarity index 95% rename from documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_smoke_accumulator_avg.py rename to documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_smoke.py index 2a3c6b08..8f77df55 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_smoke_accumulator_avg.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_smoke.py @@ -12,7 +12,7 @@ pytestmark = pytest.mark.smoke -def test_smoke_accumulator_avg(collection): +def test_accumulator_avg_smoke(collection): """Test basic $avg accumulator behavior.""" collection.insert_many( [ diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_special_numeric.py similarity index 98% rename from documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py rename to documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_special_numeric.py index 05325e38..3eada8ff 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_avg_special_numeric.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_special_numeric.py @@ -137,7 +137,7 @@ @pytest.mark.parametrize("test_case", pytest_params(AVG_SPECIAL_NUMERIC_TESTS)) -def test_avg_special_numeric(collection, test_case: AccumulatorTestCase): +def test_accumulator_avg_special_numeric(collection, test_case: AccumulatorTestCase): """Test $avg special numeric value handling in $group context.""" if test_case.docs: collection.insert_many(test_case.docs) From 1c4e9e3679a08acc9a873801437b3a4f6fff2aeb Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Fri, 22 May 2026 12:56:16 -0700 Subject: [PATCH 7/8] remove Code test Signed-off-by: Alina (Xi) Li --- .../avg/test_accumulator_avg_non_numeric.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_non_numeric.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_non_numeric.py index 391929cd..1338e79b 100644 --- a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_non_numeric.py +++ b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_non_numeric.py @@ -2,7 +2,7 @@ Tests for $avg accumulator non-numeric type handling in $group context. Covers all non-numeric BSON types (string, boolean, object, ObjectId, datetime, -Timestamp, Binary, Regex, Code, MinKey, MaxKey, arrays) and verifies they are +Timestamp, Binary, Regex, MinKey, MaxKey, arrays) and verifies they are silently ignored and excluded from both sum and count. """ @@ -11,7 +11,7 @@ from datetime import datetime, timezone import pytest -from bson import Binary, Code, MaxKey, MinKey, ObjectId, Regex, Timestamp +from bson import Binary, MaxKey, MinKey, ObjectId, Regex, Timestamp from documentdb_tests.compatibility.tests.core.operator.accumulators.utils import ( AccumulatorTestCase, @@ -134,16 +134,6 @@ expected=[{"result": None}], msg="$avg should ignore Regex values", ), - AccumulatorTestCase( - "code", - docs=[{"v": Code("x")}, {"v": Code("y")}], - pipeline=[ - {"$group": {"_id": None, "result": {"$avg": "$v"}}}, - {"$project": {"_id": 0, "result": 1}}, - ], - expected=[{"result": None}], - msg="$avg should ignore Code values", - ), AccumulatorTestCase( "minkey", docs=[{"v": MinKey()}, {"v": MinKey()}], From 69de0f5210a8c6c5e2e9a471c7757f48e8cc4822 Mon Sep 17 00:00:00 2001 From: "Alina (Xi) Li" Date: Tue, 26 May 2026 12:10:53 -0700 Subject: [PATCH 8/8] rename smoke tests Signed-off-by: Alina (Xi) Li --- ...est_accumulator_avg_smoke.py => test_smoke_accumulator_avg.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename documentdb_tests/compatibility/tests/core/operator/accumulators/avg/{test_accumulator_avg_smoke.py => test_smoke_accumulator_avg.py} (100%) diff --git a/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_smoke.py b/documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_smoke_accumulator_avg.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_accumulator_avg_smoke.py rename to documentdb_tests/compatibility/tests/core/operator/accumulators/avg/test_smoke_accumulator_avg.py