diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation.py new file mode 100644 index 00000000..d47f8d21 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation.py @@ -0,0 +1,89 @@ +"""Tests for distinct command collation field syntax validation.""" + +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import TYPE_MISMATCH_ERROR +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Collation Acceptance]: the collation field accepts null and +# a document type. +DISTINCT_COLLATION_ACCEPTANCE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collation_null", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "collation": None}, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should accept null collation", + ), + CommandTestCase( + "collation_empty_doc", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "collation": {}}, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should accept empty document collation", + ), +] + +# Property [Collation Type Rejection]: all non-document, non-null BSON types +# for the collation field produce a type mismatch error. +DISTINCT_COLLATION_TYPE_REJECTION_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"collation_type_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "collation": v, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} as collation", + ) + for tid, val in [ + ("string", "en"), + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", [1, 2]), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"data")), + ("regex", Regex("abc")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"x": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + +DISTINCT_COLLATION_TESTS: list[CommandTestCase] = ( + DISTINCT_COLLATION_ACCEPTANCE_TESTS + DISTINCT_COLLATION_TYPE_REJECTION_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_COLLATION_TESTS)) +def test_distinct_collation(database_client, collection, test): + """Test distinct command collation field syntax validation.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collection_types.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collection_types.py new file mode 100644 index 00000000..9f60e318 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collection_types.py @@ -0,0 +1,92 @@ +"""Tests for distinct command collection type acceptance.""" + +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import ( + CappedCollection, + ClusteredCollection, + TimeseriesCollection, + ViewCollection, +) + +# Property [Collection Type Acceptance]: distinct produces correct results +# regardless of the underlying collection type. +DISTINCT_COLLECTION_TYPE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "regular", + docs=[{"_id": i, "x": i % 3} for i in range(5)], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [0, 1, 2], "ok": 1.0}, + ignore_order_in=["values"], + msg="distinct should work on a regular collection", + ), + CommandTestCase( + "view", + target_collection=ViewCollection( + options={"pipeline": [{"$match": {"x": {"$gte": 1}}}]}, + suffix="_view", + ), + docs=[{"_id": i, "x": i % 3} for i in range(5)], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [1, 2], "ok": 1}, + ignore_order_in=["values"], + msg="distinct on view should only see documents passing the view pipeline", + ), + CommandTestCase( + "capped", + target_collection=CappedCollection(size=100_000), + docs=[{"_id": i, "x": i % 3} for i in range(5)], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [0, 1, 2], "ok": 1.0}, + ignore_order_in=["values"], + msg="distinct should work on a capped collection", + ), + CommandTestCase( + "timeseries", + target_collection=TimeseriesCollection(), + docs=[ + {"ts": datetime(2024, 1, i, tzinfo=timezone.utc), "meta": "a", "x": i % 3} + for i in range(1, 6) + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [0, 1, 2], "ok": 1}, + ignore_order_in=["values"], + msg="distinct should work on a timeseries collection", + ), + CommandTestCase( + "clustered", + target_collection=ClusteredCollection(), + docs=[{"_id": i, "x": i % 3} for i in range(5)], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [0, 1, 2], "ok": 1.0}, + ignore_order_in=["values"], + msg="distinct should work on a clustered collection", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_COLLECTION_TYPE_TESTS)) +def test_distinct_collection_types(database_client, collection, test): + """Test distinct command collection type acceptance.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_command_errors.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_command_errors.py new file mode 100644 index 00000000..28048a3a --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_command_errors.py @@ -0,0 +1,316 @@ +"""Tests for distinct command validation and structural errors.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any + +import pytest +from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex +from bson.timestamp import Timestamp + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + DISTINCT_TOO_BIG_ERROR, + FAILED_TO_PARSE_ERROR, + INVALID_OPTIONS_ERROR, + KEY_FIELD_NULL_BYTE_ERROR, + TYPE_MISMATCH_ERROR, + UNRECOGNIZED_COMMAND_FIELD_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + DECIMAL128_NEGATIVE_NAN, + DECIMAL128_ONE_AND_HALF, + FLOAT_INFINITY, + FLOAT_NAN, + FLOAT_NEGATIVE_INFINITY, + FLOAT_NEGATIVE_NAN, + INT32_MAX, +) + +# Property [Query Validation]: query semantics are validated even when the +# collection does not exist; invalid operators produce BAD_VALUE_ERROR. +DISTINCT_QUERY_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "query_invalid_operator_nonexistent_collection", + docs=None, + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"$invalid": 1}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject invalid query operators even on non-existent collections", + ), +] + +# Property [Key Field Null Byte Rejection]: a null byte anywhere in the key +# string produces an error. +DISTINCT_KEY_NULL_BYTE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"key_null_byte_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: {"distinct": ctx.collection, "key": v}, + error_code=KEY_FIELD_NULL_BYTE_ERROR, + msg=f"distinct should reject a key with a null byte {tid}", + ) + for tid, val in [ + ("middle", "x\x00y"), + ("start", "\x00x"), + ("end", "x\x00"), + ("only", "\x00"), + ] +] + +# Property [Unrecognized Fields]: unrecognized fields in the command document +# produce an IDLUnknownField error; field name matching is case-sensitive. +DISTINCT_UNRECOGNIZED_FIELDS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "unrecognized_unknown_field", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "unknownField": 1, + }, + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + msg="distinct should reject unrecognized fields in the command document", + ), + CommandTestCase( + "unrecognized_case_variant_key", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "Key": "y", + }, + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + msg="distinct should treat case variants of known fields as unrecognized", + ), + CommandTestCase( + "unrecognized_case_variant_query", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "Query": {}, + }, + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + msg="distinct should treat 'Query' as unrecognized (case-sensitive matching)", + ), + CommandTestCase( + "unrecognized_case_variant_hint", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "Hint": {"x": 1}, + }, + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + msg="distinct should treat 'Hint' as unrecognized (case-sensitive matching)", + ), +] + +# Property [WriteConcern Rejection]: writeConcern is not accepted by the distinct +# command. +DISTINCT_WRITE_CONCERN_TESTS: list[CommandTestCase] = [ + *[ + CommandTestCase( + f"writeconcern_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "writeConcern": v, + }, + error_code=INVALID_OPTIONS_ERROR, + msg=f"distinct should reject writeConcern {tid} as unsupported", + ) + for tid, val in [ + ("w_1", {"w": 1}), + ("w_majority", {"w": "majority"}), + ("w_0", {"w": 0}), + ("j_true", {"j": True}), + ("wtimeout", {"wtimeout": 1000}), + ("empty_doc", {}), + ] + ], + CommandTestCase( + "writeconcern_null_accepted", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "writeConcern": None, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should treat writeConcern null as omitted", + ), + *[ + CommandTestCase( + f"writeconcern_type_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "writeConcern": v, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} as writeConcern", + ) + for tid, val in [ + ("string", "majority"), + ("int32", 1), + ("int64", Int64(1)), + ("double", 1.0), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", [1]), + ("objectid", ObjectId("000000000000000000000001")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"data", 0)), + ("regex", Regex("abc", "")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"s": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], +] + +# Property [maxTimeMS Validation Errors]: invalid maxTimeMS values produce +# appropriate errors based on the type of invalidity. +DISTINCT_MAXTIMEMS_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "maxtimems_err_negative", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "maxTimeMS": -1}, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject negative maxTimeMS", + ), + CommandTestCase( + "maxtimems_err_exceeds_int32_max", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "maxTimeMS": INT32_MAX + 1, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject maxTimeMS exceeding the maximum int32 value", + ), + CommandTestCase( + "maxtimems_err_int64_exceeds_int32_max", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "maxTimeMS": Int64(INT32_MAX + 1), + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject Int64 maxTimeMS exceeding the maximum int32 value", + ), + *[ + CommandTestCase( + f"maxtimems_err_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "maxTimeMS": v, + }, + error_code=FAILED_TO_PARSE_ERROR, + msg=f"distinct should reject {tid} as maxTimeMS", + ) + for tid, val in [ + ("fractional", 1.5), + ("decimal128_fractional", DECIMAL128_ONE_AND_HALF), + ("nan", FLOAT_NAN), + ("neg_nan", FLOAT_NEGATIVE_NAN), + ("decimal128_nan", DECIMAL128_NAN), + ("decimal128_neg_nan", DECIMAL128_NEGATIVE_NAN), + ("infinity", FLOAT_INFINITY), + ("neg_infinity", FLOAT_NEGATIVE_INFINITY), + ("decimal128_infinity", DECIMAL128_INFINITY), + ("decimal128_neg_infinity", DECIMAL128_NEGATIVE_INFINITY), + ] + ], + *[ + CommandTestCase( + f"maxtimems_err_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "maxTimeMS": v, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} as maxTimeMS", + ) + for tid, val in [ + ("string", "hello"), + ("bool", True), + ("array", [1]), + ("object", {"a": 1}), + ("objectid", ObjectId("000000000000000000000001")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"data", 0)), + ("regex", Regex("abc", "")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"s": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], +] + +# Property [BSON Size Limit]: when the distinct values exceed the maximum BSON +# document size (16MB), the command produces an error. +DISTINCT_BSON_SIZE_LIMIT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "bson_size_limit_exceeded", + docs=[{"_id": i, "x": f"v{i}" + "x" * 17_000} for i in range(1100)], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + error_code=DISTINCT_TOO_BIG_ERROR, + msg="distinct should produce an error when results exceed the 16MB BSON size limit", + ), +] + +DISTINCT_COMMAND_ERROR_TESTS: list[CommandTestCase] = ( + DISTINCT_QUERY_ERROR_TESTS + + DISTINCT_KEY_NULL_BYTE_TESTS + + DISTINCT_UNRECOGNIZED_FIELDS_TESTS + + DISTINCT_WRITE_CONCERN_TESTS + + DISTINCT_MAXTIMEMS_ERROR_TESTS + + DISTINCT_BSON_SIZE_LIMIT_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_COMMAND_ERROR_TESTS)) +def test_distinct_command_errors( + database_client: Any, collection: Any, test: CommandTestCase +) -> None: + """Test distinct command error cases.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_deduplication.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_deduplication.py new file mode 100644 index 00000000..c1c32192 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_deduplication.py @@ -0,0 +1,431 @@ +"""Tests for distinct command deduplication behavior.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any + +import pytest +from bson import Binary, Code, Decimal128, Int64, Regex +from bson.timestamp import Timestamp + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import ViewCollection +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_NEGATIVE_INFINITY, + DECIMAL128_NEGATIVE_ZERO, + DECIMAL128_ZERO, + DOUBLE_MAX_SAFE_INTEGER, + DOUBLE_NEGATIVE_ZERO, + DOUBLE_PRECISION_LOSS, + FLOAT_INFINITY, + FLOAT_NAN, + FLOAT_NEGATIVE_INFINITY, + FLOAT_NEGATIVE_NAN, +) + +# Property [Array Unwinding]: when the key field value is an array, each element +# is treated as a separate value for deduplication. +DISTINCT_ARRAY_UNWINDING_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "array_top_level_elements", + docs=[{"_id": 1, "x": [1, 2, 3]}, {"_id": 2, "x": [2, 4]}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [1, 2, 3, 4], "ok": 1.0}, + msg="distinct should treat each array element as a separate value", + ), + CommandTestCase( + "array_nested_preserved", + docs=[{"_id": 1, "x": [1, [1], 1]}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [1, [1]], "ok": 1.0}, + msg="distinct should preserve nested arrays as distinct values", + ), + CommandTestCase( + "array_empty_contributes_nothing", + docs=[{"_id": 1, "x": []}, {"_id": 2, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should extract zero elements from an empty array", + ), + CommandTestCase( + "array_single_level_only", + docs=[{"_id": 1, "x": [[["a"]]]}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [[["a"]]], "ok": 1.0}, + msg="distinct should only unwrap one level of array nesting", + ), + CommandTestCase( + "array_mixed_with_scalar", + docs=[{"_id": 1, "x": [1, 2]}, {"_id": 2, "x": 3}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [1, 2, 3], "ok": 1.0}, + msg="distinct should combine array elements and scalar values", + ), + CommandTestCase( + "array_null_elements_unwound", + docs=[{"_id": 1, "x": [1, None, 2]}, {"_id": 2, "x": [None, 3]}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [None, 1, 2, 3], "ok": 1.0}, + msg="distinct should unwrap null elements from arrays and deduplicate them", + ), + CommandTestCase( + "array_null_element_dedup_with_explicit_null", + docs=[ + {"_id": 1, "x": [1, None]}, + {"_id": 2, "x": None}, + {"_id": 3, "x": [2]}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [None, 1, 2], "ok": 1.0}, + msg="distinct should deduplicate null from array with explicit null field value", + ), +] + +# Property [Value Deduplication]: numeric values with the same mathematical value +# are deduplicated across types, and the first-encountered representation is +# returned. +DISTINCT_VALUE_DEDUP_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "dedup_numeric_across_types", + docs=[ + {"_id": 1, "x": 1}, + {"_id": 2, "x": Int64(1)}, + {"_id": 3, "x": 1.0}, + {"_id": 4, "x": Decimal128("1")}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [1], "ok": 1.0}, + msg="distinct should deduplicate numerically equal values across types", + ), + CommandTestCase( + "dedup_all_zeros", + docs=[ + {"_id": 1, "x": 0}, + {"_id": 2, "x": DOUBLE_NEGATIVE_ZERO}, + {"_id": 3, "x": DECIMAL128_NEGATIVE_ZERO}, + {"_id": 4, "x": DECIMAL128_ZERO}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [0], "ok": 1.0}, + msg="distinct should deduplicate all zero representations to a single value", + ), + CommandTestCase( + "dedup_nan_across_types", + docs=[ + {"_id": 1, "x": FLOAT_NAN}, + {"_id": 2, "x": Decimal128("NaN")}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [pytest.approx(FLOAT_NAN, nan_ok=True)], + "ok": 1.0, + }, + msg="distinct should deduplicate NaN across float and Decimal128", + ), + CommandTestCase( + "dedup_pos_infinity_across_types", + docs=[ + {"_id": 1, "x": FLOAT_INFINITY}, + {"_id": 2, "x": DECIMAL128_INFINITY}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [FLOAT_INFINITY], "ok": 1.0}, + msg="distinct should deduplicate +Infinity across float and Decimal128", + ), + CommandTestCase( + "dedup_neg_infinity_across_types", + docs=[ + {"_id": 1, "x": FLOAT_NEGATIVE_INFINITY}, + {"_id": 2, "x": DECIMAL128_NEGATIVE_INFINITY}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [FLOAT_NEGATIVE_INFINITY], "ok": 1.0}, + msg="distinct should deduplicate -Infinity across float and Decimal128", + ), + CommandTestCase( + "dedup_bool_not_numeric", + docs=[ + {"_id": 1, "x": 0}, + {"_id": 2, "x": 1}, + {"_id": 3, "x": False}, + {"_id": 4, "x": True}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [0, 1, False, True], "ok": 1.0}, + msg="distinct should not deduplicate booleans with their numeric equivalents", + ), + CommandTestCase( + "dedup_decimal128_trailing_zeros", + docs=[ + {"_id": 1, "x": Decimal128("0.1")}, + {"_id": 2, "x": Decimal128("0.10")}, + {"_id": 3, "x": Decimal128("0.100")}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [Decimal128("0.1")], "ok": 1.0}, + msg="distinct should deduplicate Decimal128 values with trailing zeros", + ), + CommandTestCase( + "dedup_decimal128_vs_double_distinct", + docs=[ + {"_id": 1, "x": Decimal128("0.1")}, + {"_id": 2, "x": 0.1}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [Decimal128("0.1"), 0.1], "ok": 1.0}, + msg=( + "distinct should treat Decimal128 and double as distinct" + " when they differ in exact representation" + ), + ), + CommandTestCase( + "dedup_int64_beyond_double_precision", + docs=[ + {"_id": 1, "x": Int64(DOUBLE_PRECISION_LOSS)}, + {"_id": 2, "x": float(DOUBLE_MAX_SAFE_INTEGER)}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [float(DOUBLE_MAX_SAFE_INTEGER), Int64(DOUBLE_PRECISION_LOSS)], + "ok": 1.0, + }, + msg="distinct should compare Int64 at full precision against double", + ), + CommandTestCase( + "dedup_object_key_order_matters", + docs=[ + {"_id": 1, "x": {"a": 1, "b": 2}}, + {"_id": 2, "x": {"b": 2, "a": 1}}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [{"a": 1, "b": 2}, {"b": 2, "a": 1}], + "ok": 1.0, + }, + msg="distinct should treat objects with different key order as distinct", + ), + CommandTestCase( + "dedup_binary_subtype_matters", + docs=[ + {"_id": 1, "x": Binary(b"hello", 0)}, + {"_id": 2, "x": Binary(b"hello", 5)}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [b"hello", Binary(b"hello", 5)], + "ok": 1.0, + }, + msg="distinct should treat same data with different binary subtypes as distinct", + ), + CommandTestCase( + "dedup_timestamp_by_pair", + docs=[ + {"_id": 1, "x": Timestamp(100, 1)}, + {"_id": 2, "x": Timestamp(100, 1)}, + {"_id": 3, "x": Timestamp(100, 2)}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [Timestamp(100, 1), Timestamp(100, 2)], + "ok": 1.0, + }, + msg="distinct should deduplicate Timestamp values by their (time, increment) pair", + ), + CommandTestCase( + "dedup_datetime_millisecond_precision", + docs=[ + {"_id": 1, "x": datetime(2024, 1, 1, 0, 0, 0, 0, tzinfo=timezone.utc)}, + {"_id": 2, "x": datetime(2024, 1, 1, 0, 0, 0, 1000, tzinfo=timezone.utc)}, + {"_id": 3, "x": datetime(2024, 1, 1, 0, 0, 0, 0, tzinfo=timezone.utc)}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [ + datetime(2024, 1, 1, 0, 0, 0, 0, tzinfo=timezone.utc), + datetime(2024, 1, 1, 0, 0, 0, 1000, tzinfo=timezone.utc), + ], + "ok": 1.0, + }, + msg="distinct should preserve millisecond precision for datetime deduplication", + ), + CommandTestCase( + "dedup_first_encountered_type_wins", + docs=[ + {"_id": 1, "x": 5.0}, + {"_id": 2, "x": 5}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [5.0], "ok": 1.0}, + msg="distinct should return the first-encountered type when duplicates exist", + ), + CommandTestCase( + "dedup_decimal128_scientific_notation", + docs=[ + {"_id": 1, "x": Decimal128("1E+3")}, + {"_id": 2, "x": Decimal128("1000")}, + {"_id": 3, "x": 1000}, + {"_id": 4, "x": 1000.0}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [Decimal128("1E+3")], "ok": 1.0}, + msg=( + "distinct should deduplicate Decimal128 scientific notation" + " with equivalent integer and double values" + ), + ), + CommandTestCase( + "dedup_nan_all_variants", + docs=[ + {"_id": 1, "x": FLOAT_NAN}, + {"_id": 2, "x": FLOAT_NEGATIVE_NAN}, + {"_id": 3, "x": Decimal128("NaN")}, + {"_id": 4, "x": Decimal128("-NaN")}, + {"_id": 5, "x": Decimal128("sNaN")}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [pytest.approx(FLOAT_NAN, nan_ok=True)], + "ok": 1.0, + }, + msg="distinct should deduplicate all NaN variants (NaN, -NaN, sNaN) to one value", + ), + CommandTestCase( + "dedup_regex_flags_matter", + docs=[ + {"_id": 1, "x": Regex("abc", "i")}, + {"_id": 2, "x": Regex("abc", "")}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [Regex("abc", ""), Regex("abc", "i")], + "ok": 1.0, + }, + msg="distinct should treat regex values with different flags as distinct", + ), + CommandTestCase( + "dedup_regex_empty_flags_equals_no_flags", + docs=[ + {"_id": 1, "x": Regex("abc", "")}, + {"_id": 2, "x": Regex("abc")}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [Regex("abc", "")], "ok": 1.0}, + msg="distinct should deduplicate regex with empty flags and regex with no flags", + ), + CommandTestCase( + "dedup_code_vs_code_with_scope", + docs=[ + {"_id": 1, "x": Code("function()")}, + {"_id": 2, "x": Code("function()", {"s": 1})}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [Code("function()"), Code("function()", {"s": 1})], + "ok": 1.0, + }, + msg="distinct should treat Code and CodeWithScope as distinct types", + ), + CommandTestCase( + "dedup_code_with_scope_different_scopes", + docs=[ + {"_id": 1, "x": Code("function()", {"x": 1})}, + {"_id": 2, "x": Code("function()", {"x": 2})}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [ + Code("function()", {"x": 1}), + Code("function()", {"x": 2}), + ], + "ok": 1.0, + }, + msg="distinct should treat CodeWithScope values with different scopes as distinct", + ), +] + +# Property [Unicode Deduplication]: precomposed and combining Unicode characters +# are distinct under binary comparison but collapsed under ICU collation. +DISTINCT_UNICODE_DEDUP_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "dedup_unicode_binary_distinct", + # U+00E9 (precomposed) vs U+0065 U+0301 (combining). + docs=[{"_id": 1, "x": "\u00e9"}, {"_id": 2, "x": "e\u0301"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": ["e\u0301", "\u00e9"], "ok": 1.0}, + msg=( + "distinct should treat precomposed and combining characters" + " as distinct under binary comparison" + ), + ), + CommandTestCase( + "dedup_unicode_icu_collapsed", + # U+00E9 (precomposed) vs U+0065 U+0301 (combining). + docs=[{"_id": 1, "x": "\u00e9"}, {"_id": 2, "x": "e\u0301"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 1}, + }, + expected={"values": ["\u00e9"], "ok": 1.0}, + msg="distinct should collapse precomposed and combining characters under ICU collation", + ), + CommandTestCase( + "dedup_unicode_simple_locale_distinct", + # U+00E9 (precomposed) vs U+0065 U+0301 (combining). + docs=[{"_id": 1, "x": "\u00e9"}, {"_id": 2, "x": "e\u0301"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "simple"}, + }, + expected={"values": ["e\u0301", "\u00e9"], "ok": 1.0}, + msg="distinct should preserve binary distinction with locale=simple", + ), +] + +# Property [Array Unwinding on Views]: array unwinding behavior is identical +# for collections and views. +DISTINCT_ARRAY_UNWINDING_VIEW_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "array_unwinding_view", + target_collection=ViewCollection(), + docs=[{"_id": 1, "x": [1, 2, 3]}, {"_id": 2, "x": [2, 4]}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [1, 2, 3, 4], "ok": 1}, + ignore_order_in=["values"], + msg="distinct should unwrap arrays identically on views", + ), +] + +DISTINCT_DEDUPLICATION_TESTS: list[CommandTestCase] = ( + DISTINCT_ARRAY_UNWINDING_TESTS + + DISTINCT_VALUE_DEDUP_TESTS + + DISTINCT_UNICODE_DEDUP_TESTS + + DISTINCT_ARRAY_UNWINDING_VIEW_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_DEDUPLICATION_TESTS)) +def test_distinct_deduplication( + database_client: Any, collection: Any, test: CommandTestCase +) -> None: + """Test distinct deduplication cases.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_hint.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_hint.py new file mode 100644 index 00000000..b70688f1 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_hint.py @@ -0,0 +1,292 @@ +"""Tests for distinct command hint parameter behavior.""" + +from __future__ import annotations + +from typing import Any + +import pytest +from bson import Decimal128, Int64 +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import ViewCollection + +# Property [Hint Success]: valid hint values are accepted and influence index +# selection for the distinct command. +DISTINCT_HINT_SUCCESS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "hint_string_matches_index_name", + indexes=[IndexModel([("x", 1)], name="x_1")], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": "x_1"}, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept a string hint that exactly matches an index name", + ), + CommandTestCase( + "hint_doc_matches_key_pattern", + indexes=[IndexModel([("x", 1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": {"x": 1}}, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept a document hint matching the index key pattern", + ), + CommandTestCase( + "hint_doc_direction_int64", + indexes=[IndexModel([("x", 1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": Int64(1)}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept Int64(1) as a direction value in document hint", + ), + CommandTestCase( + "hint_doc_direction_double", + indexes=[IndexModel([("x", 1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": 1.0}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept double 1.0 as a direction value in document hint", + ), + CommandTestCase( + "hint_doc_direction_decimal128", + indexes=[IndexModel([("x", 1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": Decimal128("1")}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept Decimal128('1') as a direction value in document hint", + ), + CommandTestCase( + "hint_doc_direction_neg1_int64", + indexes=[IndexModel([("x", -1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": Int64(-1)}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept Int64(-1) as a direction value in document hint", + ), + CommandTestCase( + "hint_doc_direction_neg1_double", + indexes=[IndexModel([("x", -1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": -1.0}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept double -1.0 as a direction value in document hint", + ), + CommandTestCase( + "hint_doc_direction_neg1_decimal128", + indexes=[IndexModel([("x", -1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": Decimal128("-1")}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept Decimal128('-1') as a direction value in document hint", + ), + CommandTestCase( + "hint_doc_direction_neg1_int32", + indexes=[IndexModel([("x", -1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": -1}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept int32 -1 as a direction value in document hint", + ), + CommandTestCase( + "hint_natural_forward", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"$natural": 1}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept $natural: 1 for forward collection scan", + ), + CommandTestCase( + "hint_natural_backward", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"$natural": -1}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept $natural: -1 for backward collection scan", + ), + CommandTestCase( + "hint_empty_doc", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": {}}, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should treat empty document hint as no hint", + ), + CommandTestCase( + "hint_nonexistent_collection_string", + docs=None, + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": "any_index_name", + }, + expected={"values": [], "ok": 1.0}, + msg="distinct should skip hint validation for non-existent collections (string hint)", + ), + CommandTestCase( + "hint_nonexistent_collection_doc", + docs=None, + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"any_field": 1}, + }, + expected={"values": [], "ok": 1.0}, + msg="distinct should skip hint validation for non-existent collections (doc hint)", + ), + CommandTestCase( + "hint_sparse_index", + indexes=[IndexModel([("y", 1)], sparse=True)], + docs=[ + {"_id": 1, "x": "a", "y": 1}, + {"_id": 2, "x": "b"}, + {"_id": 3, "x": "c", "y": 3}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": {"y": 1}}, + expected={"values": ["a", "c"], "ok": 1.0}, + msg=( + "distinct with sparse index hint should return only" + " documents that have the indexed field" + ), + ), + CommandTestCase( + "hint_partial_index", + indexes=[ + IndexModel( + [("x", 1)], + partialFilterExpression={"status": "active"}, + ) + ], + docs=[ + {"_id": 1, "x": "a", "status": "active"}, + {"_id": 2, "x": "b", "status": "inactive"}, + {"_id": 3, "x": "c", "status": "active"}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": {"x": 1}}, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct with partial index hint should return only documents matching the filter", + ), + CommandTestCase( + "hint_compound_index_by_name", + indexes=[IndexModel([("x", 1), ("y", 1)], name="x_1_y_1")], + docs=[{"_id": 1, "x": "a", "y": 1}, {"_id": 2, "x": "b", "y": 2}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": "x_1_y_1"}, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept a compound index hint by name", + ), + CommandTestCase( + "hint_compound_index_by_pattern", + indexes=[IndexModel([("x", 1), ("y", 1)])], + docs=[{"_id": 1, "x": "a", "y": 1}, {"_id": 2, "x": "b", "y": 2}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": 1, "y": 1}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept a compound index hint by key pattern", + ), + CommandTestCase( + "hint_id_index_by_name", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": "_id_"}, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept the default _id index hint by name", + ), + CommandTestCase( + "hint_id_index_by_pattern", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"_id": 1}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept the default _id index hint by key pattern", + ), + CommandTestCase( + "hint_non_collation_compatible_index", + indexes=[IndexModel([("x", 1)], collation={"locale": "fr"})], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "A"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": 1}, + "collation": {"locale": "en", "strength": 1}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg=( + "distinct should accept hint referencing a non-collation-compatible" + " index when collation is specified" + ), + ), +] + +# Property [Hint Accepted on Views]: hint is accepted on views without error. +DISTINCT_HINT_VIEW_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "hint_accepted_on_view", + target_collection=ViewCollection(), + indexes=[IndexModel([("x", 1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": {"x": 1}}, + expected={"values": sorted(["a", "b"]), "ok": 1}, + ignore_order_in=["values"], + msg="distinct should accept hint on views without error", + ), +] + +DISTINCT_HINT_TESTS: list[CommandTestCase] = DISTINCT_HINT_SUCCESS_TESTS + DISTINCT_HINT_VIEW_TESTS + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_HINT_TESTS)) +def test_distinct_hint(database_client: Any, collection: Any, test: CommandTestCase) -> None: + """Test distinct hint cases.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_key_field.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_key_field.py new file mode 100644 index 00000000..2861fbd2 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_key_field.py @@ -0,0 +1,307 @@ +"""Tests for distinct command key field behavior.""" + +from __future__ import annotations + +from functools import reduce +from typing import Any + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Null Field Value]: when a document has an explicit null value for the +# key field, null appears in the distinct values; missing fields are silently skipped. +DISTINCT_NULL_FIELD_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "null_explicit_null_included", + docs=[{"_id": 1, "x": None}, {"_id": 2, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [None, "a"], "ok": 1.0}, + msg="distinct should include explicit null in results", + ), + CommandTestCase( + "null_missing_field_skipped", + docs=[{"_id": 1, "y": "a"}, {"_id": 2, "y": "b"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty when all documents are missing the key field", + ), + CommandTestCase( + "null_missing_does_not_contribute_null", + docs=[{"_id": 1}, {"_id": 2, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should not add null for documents missing the key field", + ), + CommandTestCase( + "null_explicit_null_deduplicated", + docs=[{"_id": 1, "x": None}, {"_id": 2, "x": None}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [None], "ok": 1.0}, + msg="distinct should deduplicate multiple explicit null values", + ), + CommandTestCase( + "null_mixed_null_and_missing", + docs=[{"_id": 1, "x": None}, {"_id": 2}, {"_id": 3, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [None, "a"], "ok": 1.0}, + msg="distinct should include explicit null but skip missing fields", + ), +] + +# Property [Dot Notation and Field Path Traversal]: the key parameter supports +# dot notation to traverse nested document structures. +DISTINCT_DOT_NOTATION_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "dot_embedded_document", + docs=[ + {"_id": 1, "item": {"sku": "abc"}}, + {"_id": 2, "item": {"sku": "def"}}, + {"_id": 3, "item": {"sku": "abc"}}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "item.sku"}, + expected={"values": ["abc", "def"], "ok": 1.0}, + msg="distinct should access fields within embedded documents via dot notation", + ), + CommandTestCase( + "dot_numeric_array_index", + docs=[ + {"_id": 1, "temps": [{"value": 10}, {"value": 20}]}, + {"_id": 2, "temps": [{"value": 30}, {"value": 40}]}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "temps.1.value"}, + expected={"values": [20, 40], "ok": 1.0}, + msg="distinct should use numeric path components to address array positions", + ), + CommandTestCase( + "dot_descend_into_array_of_objects", + docs=[ + {"_id": 1, "items": [{"name": "a"}, {"name": "b"}]}, + {"_id": 2, "items": [{"name": "b"}, {"name": "c"}]}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "items.name"}, + expected={"values": ["a", "b", "c"], "ok": 1.0}, + msg=( + "distinct should descend into array elements to extract" + " nested fields from each object" + ), + ), + CommandTestCase( + "dot_multi_level_array_traversal", + docs=[ + {"_id": 1, "a": [{"b": [{"c": 1}, {"c": 2}]}, {"b": [{"c": 3}]}]}, + {"_id": 2, "a": [{"b": [{"c": 2}, {"c": 4}]}]}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "a.b.c"}, + expected={"values": [1, 2, 3, 4], "ok": 1.0}, + msg="distinct should traverse multiple levels of nested arrays", + ), + CommandTestCase( + "dot_leading_dot_empty", + docs=[{"_id": 1, "x": "hello"}], + command=lambda ctx: {"distinct": ctx.collection, "key": ".x"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty results for a key with a leading dot", + ), + CommandTestCase( + "dot_trailing_dot_empty", + docs=[{"_id": 1, "x": "hello"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x."}, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty results for a key with a trailing dot", + ), + CommandTestCase( + "dot_consecutive_dots_empty", + docs=[{"_id": 1, "x": {"y": "hello"}}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x..y"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty results for a key with consecutive dots", + ), + CommandTestCase( + "dot_negative_numeric_empty", + docs=[{"_id": 1, "arr": ["a", "b", "c"]}], + command=lambda ctx: {"distinct": ctx.collection, "key": "arr.-1"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty results for negative numeric path components", + ), + CommandTestCase( + "dot_out_of_bounds_empty", + docs=[{"_id": 1, "arr": ["a", "b", "c"]}], + command=lambda ctx: {"distinct": ctx.collection, "key": "arr.99"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty results for out-of-bounds numeric path components", + ), + CommandTestCase( + "dot_beyond_int32_literal_field_name", + docs=[{"_id": 1, "data": {"2147483648": "found"}}], + command=lambda ctx: {"distinct": ctx.collection, "key": "data.2147483648"}, + expected={"values": ["found"], "ok": 1.0}, + msg="distinct should treat numeric components beyond int32 range as literal field names", + ), + CommandTestCase( + "dot_deeply_nested_accepted", + docs=[{"_id": 1, **reduce(lambda inner, _: {"n": inner}, range(100), {"val": "deep"})}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": ".".join(["n"] * 100 + ["val"]), + }, + expected={"values": ["deep"], "ok": 1.0}, + msg="distinct should accept deeply nested paths with 100+ segments without error", + ), + CommandTestCase( + "dot_mixed_object_and_array_at_path", + docs=[ + {"_id": 1, "x": {"y": "from_obj"}}, + {"_id": 2, "x": [{"y": "from_arr1"}, {"y": "from_arr2"}]}, + {"_id": 3, "x": "scalar"}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x.y"}, + expected={"values": ["from_arr1", "from_arr2", "from_obj"], "ok": 1.0}, + msg=( + "distinct should traverse both objects and arrays at the same path" + " across different documents" + ), + ), + CommandTestCase( + "dot_numeric_on_mixed_object_and_array", + docs=[ + {"_id": 1, "data": ["arr_zero", "arr_one"]}, + {"_id": 2, "data": {"0": "obj_zero", "1": "obj_one"}}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "data.0"}, + expected={"values": ["arr_zero", "obj_zero"], "ok": 1.0}, + msg=( + "distinct should match numeric path component as both array index" + " and literal field name across documents" + ), + ), +] + +# Property [Key Field Special Characters]: dollar signs, whitespace, Unicode +# characters, and empty string are treated as literal field name characters in +# the key parameter. +DISTINCT_SPECIAL_CHARS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "special_dollar_in_key", + docs=[{"_id": 1, "$price": 9.99}], + command=lambda ctx: {"distinct": ctx.collection, "key": "$price"}, + expected={"values": [9.99], "ok": 1.0}, + msg="distinct should treat dollar sign in key as a literal field name character", + ), + CommandTestCase( + "special_dollar_only", + docs=[{"_id": 1, "x": "hello"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "$"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should succeed with $ as entire key (returns empty if no matching field)", + ), + CommandTestCase( + "special_double_dollar", + docs=[{"_id": 1, "x": "hello"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "$$"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should succeed with $$ as entire key (returns empty if no matching field)", + ), + CommandTestCase( + "special_space_in_key", + docs=[{"_id": 1, "my field": "space_value"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "my field"}, + expected={"values": ["space_value"], "ok": 1.0}, + msg="distinct should accept space characters in key field names", + ), + CommandTestCase( + "special_tab_in_key", + docs=[{"_id": 1, "tab\tfield": "tab_value"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "tab\tfield"}, + expected={"values": ["tab_value"], "ok": 1.0}, + msg="distinct should accept tab characters in key field names", + ), + CommandTestCase( + "special_newline_in_key", + docs=[{"_id": 1, "new\nline": "newline_value"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "new\nline"}, + expected={"values": ["newline_value"], "ok": 1.0}, + msg="distinct should accept newline characters in key field names", + ), + CommandTestCase( + "special_cjk_in_key", + # CJK Unified Ideographs. + docs=[{"_id": 1, "\u65e5\u672c\u8a9e": "cjk_value"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "\u65e5\u672c\u8a9e"}, + expected={"values": ["cjk_value"], "ok": 1.0}, + msg="distinct should accept CJK characters in key field names", + ), + CommandTestCase( + "special_emoji_in_key", + docs=[{"_id": 1, "\U0001f389": "emoji_value"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "\U0001f389"}, + expected={"values": ["emoji_value"], "ok": 1.0}, + msg="distinct should accept emoji characters in key field names", + ), + CommandTestCase( + "special_combining_mark_in_key", + # U+0065 U+0301 (e + combining acute accent). + docs=[{"_id": 1, "e\u0301": "combining_value"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "e\u0301"}, + expected={"values": ["combining_value"], "ok": 1.0}, + msg="distinct should accept combining mark characters in key field names", + ), + CommandTestCase( + "special_empty_string_key", + docs=[{"_id": 1, "": "empty_key_value"}], + command=lambda ctx: {"distinct": ctx.collection, "key": ""}, + expected={"values": ["empty_key_value"], "ok": 1.0}, + msg='distinct should match documents with a field literally named ""', + ), +] + +# Property [Distinct on _id Field]: the _id field can be used as the key +# parameter and returns the distinct _id values. +DISTINCT_ID_FIELD_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "id_field_as_key", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}, {"_id": 3, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "_id"}, + expected={"values": [1, 2, 3], "ok": 1.0}, + msg="distinct should return all _id values when key is '_id'", + ), + CommandTestCase( + "id_field_dot_notation", + docs=[ + {"_id": {"a": 1, "b": 2}, "x": "hello"}, + {"_id": {"a": 1, "b": 3}, "x": "world"}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "_id.a"}, + expected={"values": [1], "ok": 1.0}, + msg="distinct should support dot notation into compound _id fields", + ), +] + +DISTINCT_KEY_FIELD_TESTS: list[CommandTestCase] = ( + DISTINCT_NULL_FIELD_TESTS + + DISTINCT_DOT_NOTATION_TESTS + + DISTINCT_SPECIAL_CHARS_TESTS + + DISTINCT_ID_FIELD_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_KEY_FIELD_TESTS)) +def test_distinct_key_field(database_client: Any, collection: Any, test: CommandTestCase) -> None: + """Test distinct key field cases.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_parameters.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_parameters.py new file mode 100644 index 00000000..65c69df5 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_parameters.py @@ -0,0 +1,224 @@ +"""Tests for distinct command parameter acceptance behavior.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any + +import pytest +from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex +from bson.timestamp import Timestamp + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.property_checks import Eq, Len, Ne +from documentdb_tests.framework.test_constants import ( + DOUBLE_NEGATIVE_ZERO, + INT32_MAX, +) + +# Property [Query Parameter Behavior]: the query parameter filters which documents +# contribute to distinct values; an empty document matches all. +DISTINCT_QUERY_SUCCESS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "query_filters_documents", + docs=[ + {"_id": 1, "x": "a", "status": "active"}, + {"_id": 2, "x": "b", "status": "inactive"}, + {"_id": 3, "x": "c", "status": "active"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"status": "active"}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should filter documents by the query parameter", + ), + CommandTestCase( + "query_empty_doc_matches_all", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "b"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should treat empty document query as matching all documents", + ), +] + +# Property [Query No Match]: when the query matches no documents on an existing +# collection, distinct returns an empty values array. +DISTINCT_QUERY_NO_MATCH_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "query_no_matching_documents", + docs=[ + {"_id": 1, "x": "a", "y": 1}, + {"_id": 2, "x": "b", "y": 2}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"y": 99}, + }, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty values when query matches no documents", + ), +] + +# Property [Comment Parameter Behavior]: all BSON types are accepted as the +# comment value without error, and the comment does not affect command results. +DISTINCT_COMMENT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"comment_{tid}", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "comment": v, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg=f"distinct should accept {tid} as comment without affecting results", + ) + for tid, val in [ + ("string", "a string comment"), + ("int32", 42), + ("int64", Int64(123456789)), + ("double", 3.14), + ("decimal128", Decimal128("9.99")), + ("bool", True), + ("array", [1, "two", 3]), + ("object", {"reason": "testing"}), + ("objectid", ObjectId("000000000000000000000001")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(100, 1)), + ("binary", Binary(b"data", 0)), + ("regex", Regex("pattern", "i")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"s": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + +# Property [ReadConcern Success]: readConcern accepts "local", "available", and +# "majority" levels, as well as an empty object or provenance-only without a level. +DISTINCT_READCONCERN_SUCCESS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"readconcern_{tid}", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "readConcern": v, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg=f"distinct should accept readConcern {tid}", + ) + for tid, val in [ + ("local", {"level": "local"}), + ("available", {"level": "available"}), + ("majority", {"level": "majority"}), + ("empty_object", {}), + ("provenance_only", {"provenance": "clientSupplied"}), + ] +] + +# Property [maxTimeMS Acceptance]: maxTimeMS accepts 0, positive integers up to +# INT32_MAX, whole-number floats, Decimal128 integers, and -0.0. +DISTINCT_MAXTIMEMS_ACCEPTANCE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"maxtimems_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "maxTimeMS": v, + }, + expected={"values": ["a"], "ok": 1.0}, + msg=f"distinct should accept {tid} as maxTimeMS", + ) + for tid, val in [ + ("zero", 0), + ("positive_int", 1000), + ("int32_max", INT32_MAX), + ("int64_int32_max", Int64(INT32_MAX)), + ("whole_number_float", 500.0), + ("decimal128_integer", Decimal128("100")), + ("negative_zero", DOUBLE_NEGATIVE_ZERO), + ("decimal128_neg_zero_exponent", Decimal128("-0E+10")), + ] +] + +# Property [Timestamp Zero Replacement]: Timestamp(0, 0) is replaced by the server +# on insert; the stored values participate in deduplication, not the literal (0, 0). +DISTINCT_TIMESTAMP_ZERO_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "dedup_timestamp_zero_replaced", + docs=[{"_id": 1, "x": Timestamp(0, 0)}, {"_id": 2, "x": Timestamp(0, 0)}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": Len(2), + "values.0": Ne(Timestamp(0, 0)), + "values.1": Ne(Timestamp(0, 0)), + "ok": Eq(1.0), + }, + msg=( + "distinct should return server-assigned timestamps for Timestamp(0, 0)," + " not deduplicate them as identical" + ), + ), +] + +# Property [Null Optional Parameters]: when optional parameters (query, +# readConcern, comment, maxTimeMS) are null, they are treated as omitted. +DISTINCT_NULL_PARAMS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"null_{tid}_param", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx, p=param: {"distinct": ctx.collection, "key": "x", p: None}, + expected={"values": ["a", "b"], "ok": 1.0}, + msg=f"distinct should treat {param}=null as omitted", + ) + for tid, param in [ + ("query", "query"), + ("read_concern", "readConcern"), + ("comment", "comment"), + ("max_time_ms", "maxTimeMS"), + ] +] + +DISTINCT_PARAMETER_TESTS: list[CommandTestCase] = ( + DISTINCT_NULL_PARAMS_TESTS + + DISTINCT_QUERY_SUCCESS_TESTS + + DISTINCT_QUERY_NO_MATCH_TESTS + + DISTINCT_COMMENT_TESTS + + DISTINCT_READCONCERN_SUCCESS_TESTS + + DISTINCT_MAXTIMEMS_ACCEPTANCE_TESTS + + DISTINCT_TIMESTAMP_ZERO_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_PARAMETER_TESTS)) +def test_distinct_parameters(database_client: Any, collection: Any, test: CommandTestCase) -> None: + """Test distinct parameter acceptance cases.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_query_operators.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_query_operators.py new file mode 100644 index 00000000..170dd7b6 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_query_operators.py @@ -0,0 +1,539 @@ +"""Representative query operator wiring tests for the distinct command. + +One test per operator category confirms the distinct command's query parameter +is correctly wired to the query engine. Exhaustive operator behavior is +tested in core/operator/query/. +""" + +from __future__ import annotations + +from typing import Any + +import pytest +from bson import Int64 +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Query Operator Wiring]: the distinct command's query parameter supports +# comparison, logical, array, element, evaluation, and bitwise operators. +DISTINCT_QUERY_OPERATOR_TESTS: list[CommandTestCase] = [ + # Comparison operators. + CommandTestCase( + "query_eq", + docs=[{"_id": 1, "x": "a", "n": 1}, {"_id": 2, "x": "b", "n": 2}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$eq": 1}}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should support $eq in query", + ), + CommandTestCase( + "query_ne", + docs=[{"_id": 1, "x": "a", "n": 1}, {"_id": 2, "x": "b", "n": 2}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$ne": 1}}, + }, + expected={"values": ["b"], "ok": 1.0}, + msg="distinct should support $ne in query", + ), + CommandTestCase( + "query_gt", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 5}, + {"_id": 3, "x": "c", "n": 10}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$gt": 4}}, + }, + expected={"values": ["b", "c"], "ok": 1.0}, + msg="distinct should support $gt in query", + ), + CommandTestCase( + "query_gte", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 5}, + {"_id": 3, "x": "c", "n": 10}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$gte": 5}}, + }, + expected={"values": ["b", "c"], "ok": 1.0}, + msg="distinct should support $gte in query", + ), + CommandTestCase( + "query_lt", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 5}, + {"_id": 3, "x": "c", "n": 10}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$lt": 5}}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should support $lt in query", + ), + CommandTestCase( + "query_lte", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 5}, + {"_id": 3, "x": "c", "n": 10}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$lte": 5}}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should support $lte in query", + ), + CommandTestCase( + "query_in", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 2}, + {"_id": 3, "x": "c", "n": 3}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$in": [1, 3]}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $in in query", + ), + CommandTestCase( + "query_nin", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 2}, + {"_id": 3, "x": "c", "n": 3}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$nin": [1, 3]}}, + }, + expected={"values": ["b"], "ok": 1.0}, + msg="distinct should support $nin in query", + ), + # Logical operators. + CommandTestCase( + "query_and", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 5}, + {"_id": 3, "x": "c", "n": 10}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"$and": [{"n": {"$gt": 1}}, {"n": {"$lt": 10}}]}, + }, + expected={"values": ["b"], "ok": 1.0}, + msg="distinct should support $and in query", + ), + CommandTestCase( + "query_or", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 5}, + {"_id": 3, "x": "c", "n": 10}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"$or": [{"n": 1}, {"n": 10}]}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $or in query", + ), + CommandTestCase( + "query_nor", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 5}, + {"_id": 3, "x": "c", "n": 10}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"$nor": [{"n": 1}, {"n": 10}]}, + }, + expected={"values": ["b"], "ok": 1.0}, + msg="distinct should support $nor in query", + ), + CommandTestCase( + "query_not", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 5}, + {"_id": 3, "x": "c", "n": 10}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$not": {"$gt": 5}}}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should support $not in query", + ), + # Element operators. + CommandTestCase( + "query_exists", + docs=[ + {"_id": 1, "x": "a", "opt": "yes"}, + {"_id": 2, "x": "b"}, + {"_id": 3, "x": "c", "opt": "no"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"opt": {"$exists": True}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $exists in query", + ), + CommandTestCase( + "query_type", + docs=[ + {"_id": 1, "x": "a", "v": 1}, + {"_id": 2, "x": "b", "v": "str"}, + {"_id": 3, "x": "c", "v": 3}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"v": {"$type": "string"}}, + }, + expected={"values": ["b"], "ok": 1.0}, + msg="distinct should support $type in query", + ), + # Array operators. + CommandTestCase( + "query_all", + docs=[ + {"_id": 1, "x": "a", "tags": ["red", "blue"]}, + {"_id": 2, "x": "b", "tags": ["green"]}, + {"_id": 3, "x": "c", "tags": ["red", "green"]}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"tags": {"$all": ["red"]}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $all in query", + ), + CommandTestCase( + "query_elemMatch", + docs=[ + {"_id": 1, "x": "a", "scores": [{"v": 80}, {"v": 90}]}, + {"_id": 2, "x": "b", "scores": [{"v": 60}, {"v": 70}]}, + {"_id": 3, "x": "c", "scores": [{"v": 95}]}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"scores": {"$elemMatch": {"v": {"$gte": 90}}}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $elemMatch in query", + ), + CommandTestCase( + "query_size", + docs=[ + {"_id": 1, "x": "a", "tags": ["red", "blue"]}, + {"_id": 2, "x": "b", "tags": ["green"]}, + {"_id": 3, "x": "c", "tags": ["red", "green"]}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"tags": {"$size": 1}}, + }, + expected={"values": ["b"], "ok": 1.0}, + msg="distinct should support $size in query", + ), + # Evaluation operators. + CommandTestCase( + "query_regex", + docs=[ + {"_id": 1, "x": "a", "name": "apple"}, + {"_id": 2, "x": "b", "name": "banana"}, + {"_id": 3, "x": "c", "name": "apricot"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"name": {"$regex": "^ap"}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $regex in query", + ), + CommandTestCase( + "query_mod", + docs=[ + {"_id": 1, "x": "a", "n": 10}, + {"_id": 2, "x": "b", "n": 15}, + {"_id": 3, "x": "c", "n": 20}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$mod": [10, 0]}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $mod in query", + ), + CommandTestCase( + "query_expr", + docs=[ + {"_id": 1, "x": "a", "a": 5, "b": 3}, + {"_id": 2, "x": "b", "a": 2, "b": 7}, + {"_id": 3, "x": "c", "a": 10, "b": 1}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"$expr": {"$gt": ["$a", "$b"]}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $expr in query", + ), + # Bitwise operators. + CommandTestCase( + "query_bitsAllSet", + docs=[ + {"_id": 1, "x": "a", "flags": 7}, + {"_id": 2, "x": "b", "flags": 3}, + {"_id": 3, "x": "c", "flags": 5}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"flags": {"$bitsAllSet": 5}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $bitsAllSet in query", + ), + CommandTestCase( + "query_bitsAllClear", + docs=[ + {"_id": 1, "x": "a", "flags": 7}, + {"_id": 2, "x": "b", "flags": 3}, + {"_id": 3, "x": "c", "flags": 0}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"flags": {"$bitsAllClear": 4}}, + }, + expected={"values": ["b", "c"], "ok": 1.0}, + msg="distinct should support $bitsAllClear in query", + ), + CommandTestCase( + "query_bitsAnySet", + docs=[ + {"_id": 1, "x": "a", "flags": 7}, + {"_id": 2, "x": "b", "flags": 0}, + {"_id": 3, "x": "c", "flags": 4}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"flags": {"$bitsAnySet": 4}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $bitsAnySet in query", + ), + CommandTestCase( + "query_bitsAnyClear", + docs=[ + {"_id": 1, "x": "a", "flags": 7}, + {"_id": 2, "x": "b", "flags": 3}, + {"_id": 3, "x": "c", "flags": 5}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"flags": {"$bitsAnyClear": 6}}, + }, + expected={"values": ["b", "c"], "ok": 1.0}, + msg="distinct should support $bitsAnyClear in query", + ), + # Geospatial operators. + CommandTestCase( + "query_geoWithin", + indexes=[IndexModel([("loc", "2dsphere")])], + docs=[ + {"_id": 1, "x": "a", "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "x": "b", "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"loc": {"$geoWithin": {"$centerSphere": [[0, 0], 0.5]}}}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should support $geoWithin in query", + ), + CommandTestCase( + "query_geoIntersects", + indexes=[IndexModel([("loc", "2dsphere")])], + docs=[ + {"_id": 1, "x": "a", "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "x": "b", "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": { + "loc": { + "$geoIntersects": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[-1, -1], [1, -1], [1, 1], [-1, 1], [-1, -1]]], + } + } + } + }, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should support $geoIntersects in query", + ), + CommandTestCase( + "query_near", + indexes=[IndexModel([("loc", "2dsphere")])], + docs=[ + {"_id": 1, "x": "a", "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "x": "b", "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": { + "loc": { + "$near": { + "$geometry": {"type": "Point", "coordinates": [0, 0]}, + "$maxDistance": 100000, + } + } + }, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should support $near in query", + ), + CommandTestCase( + "query_nearSphere", + indexes=[IndexModel([("loc", "2dsphere")])], + docs=[ + {"_id": 1, "x": "a", "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "x": "b", "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": { + "loc": { + "$nearSphere": { + "$geometry": {"type": "Point", "coordinates": [0, 0]}, + "$maxDistance": 100000, + } + } + }, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should support $nearSphere in query", + ), + # Schema and scripting operators. + CommandTestCase( + "query_jsonSchema", + docs=[ + {"_id": 1, "x": "a", "name": "hello"}, + {"_id": 2, "x": "b", "name": Int64(123)}, + {"_id": 3, "x": "c"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": { + "$jsonSchema": { + "required": ["name"], + "properties": {"name": {"bsonType": "string"}}, + } + }, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should support $jsonSchema in query", + ), + CommandTestCase( + "query_where", + docs=[ + {"_id": 1, "x": "a", "n": 5}, + {"_id": 2, "x": "b", "n": 15}, + {"_id": 3, "x": "c", "n": 25}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"$where": "this.n > 10"}, + }, + expected={"values": ["b", "c"], "ok": 1.0}, + msg="distinct should support $where in query", + ), + # Text search (requires text index). + CommandTestCase( + "query_text", + indexes=[IndexModel([("content", "text")])], + docs=[ + {"_id": 1, "x": "a", "content": "hello world"}, + {"_id": 2, "x": "b", "content": "foo bar"}, + {"_id": 3, "x": "c", "content": "hello foo"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"$text": {"$search": "hello"}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $text in query", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_QUERY_OPERATOR_TESTS)) +def test_distinct_query_operators( + database_client: Any, collection: Any, test: CommandTestCase +) -> None: + """Test distinct command query operator wiring.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_readconcern_subfields.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_readconcern_subfields.py new file mode 100644 index 00000000..17073259 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_readconcern_subfields.py @@ -0,0 +1,342 @@ +"""Tests for distinct command readConcern sub-field validation.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any + +import pytest +from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + ILLEGAL_OPERATION_ERROR, + INVALID_OPTIONS_ERROR, + NOT_A_REPLICA_SET_ERROR, + TYPE_MISMATCH_ERROR, + UNRECOGNIZED_COMMAND_FIELD_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [ReadConcern Level Validation]: the readConcern level sub-field +# validates type and value; null is treated as omitted; invalid strings produce +# BadValue; non-string types produce TypeMismatch. +DISTINCT_READCONCERN_LEVEL_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "readconcern_level_null_accepted", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"level": None}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should accept null readConcern level (treated as omitted)", + ), + CommandTestCase( + "readconcern_level_empty_string", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"level": ""}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject empty string for readConcern level", + ), + CommandTestCase( + "readconcern_level_unknown", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"level": "unknown"}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject unknown readConcern level string", + ), + CommandTestCase( + "readconcern_level_wrong_case", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"level": "LOCAL"}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject wrong-case readConcern level string", + ), + CommandTestCase( + "readconcern_linearizable", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"level": "linearizable"}, + }, + error_code=NOT_A_REPLICA_SET_ERROR, + msg="distinct with linearizable readConcern should fail on non-replica-set", + ), + CommandTestCase( + "readconcern_snapshot", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"level": "snapshot"}, + }, + error_code=NOT_A_REPLICA_SET_ERROR, + msg="distinct with snapshot readConcern should fail on non-replica-set", + ), + *[ + CommandTestCase( + f"readconcern_level_type_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"level": v}, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} for readConcern level sub-field", + ) + for tid, val in [ + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", ["local"]), + ("object", {"a": 1}), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"\x01\x02")), + ("regex", Regex("^abc")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"x": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], +] + +# Property [ReadConcern Unknown Fields]: unknown fields in the readConcern +# document produce an UnrecognizedCommandField error. +DISTINCT_READCONCERN_UNKNOWN_FIELDS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "readconcern_unknown_field", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"level": "local", "unknownField": 1}, + }, + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + msg="distinct should reject unknown fields in readConcern document", + ), +] + +# Property [ReadConcern afterClusterTime]: afterClusterTime validates type +# and is rejected on standalone. +DISTINCT_READCONCERN_AFTER_CLUSTER_TIME_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "readconcern_after_cluster_time_timestamp", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"afterClusterTime": Timestamp(1, 1)}, + }, + error_code=ILLEGAL_OPERATION_ERROR, + msg="distinct afterClusterTime should be rejected on standalone", + ), + *[ + CommandTestCase( + f"readconcern_after_cluster_time_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"afterClusterTime": v}, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct afterClusterTime as {tid} should produce TypeMismatch", + ) + for tid, val in [ + ("null", None), + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("string", "hello"), + ("bool", True), + ("array", [1, 2]), + ("object", {"a": 1}), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("binary", Binary(b"\x01\x02")), + ("regex", Regex("^abc")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"x": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], +] + +# Property [ReadConcern atClusterTime]: atClusterTime validates type and +# requires snapshot read concern level. +DISTINCT_READCONCERN_AT_CLUSTER_TIME_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "readconcern_at_cluster_time_timestamp", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"atClusterTime": Timestamp(1, 1)}, + }, + error_code=INVALID_OPTIONS_ERROR, + msg="distinct atClusterTime without snapshot level should be rejected", + ), + *[ + CommandTestCase( + f"readconcern_at_cluster_time_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"atClusterTime": v}, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct atClusterTime as {tid} should produce TypeMismatch", + ) + for tid, val in [ + ("null", None), + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("string", "hello"), + ("bool", True), + ("array", [1, 2]), + ("object", {"a": 1}), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("binary", Binary(b"\x01\x02")), + ("regex", Regex("^abc")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"x": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], +] + +# Property [ReadConcern provenance]: the provenance sub-field validates type +# and enum value. +DISTINCT_READCONCERN_PROVENANCE_TESTS: list[CommandTestCase] = [ + *[ + CommandTestCase( + f"readconcern_provenance_{prov}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, p=prov: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"provenance": p}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg=f"distinct readConcern provenance '{prov}' should succeed", + ) + for prov in [ + "clientSupplied", + "implicitDefault", + "customDefault", + "getLastErrorDefaults", + ] + ], + CommandTestCase( + "readconcern_provenance_null", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"provenance": None}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct readConcern provenance null should succeed", + ), + CommandTestCase( + "readconcern_provenance_invalid", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"provenance": "invalid"}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct readConcern provenance invalid string should be rejected", + ), + *[ + CommandTestCase( + f"readconcern_provenance_type_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"provenance": v}, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct readConcern provenance as {tid} should produce TypeMismatch", + ) + for tid, val in [ + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", [1, 2]), + ("object", {"a": 1}), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"\x01\x02")), + ("regex", Regex("^abc")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"x": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], +] + +DISTINCT_READCONCERN_SUBFIELD_TESTS: list[CommandTestCase] = ( + DISTINCT_READCONCERN_LEVEL_TESTS + + DISTINCT_READCONCERN_UNKNOWN_FIELDS_TESTS + + DISTINCT_READCONCERN_AFTER_CLUSTER_TIME_TESTS + + DISTINCT_READCONCERN_AT_CLUSTER_TIME_TESTS + + DISTINCT_READCONCERN_PROVENANCE_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_READCONCERN_SUBFIELD_TESTS)) +def test_distinct_readconcern_subfields( + database_client: Any, collection: Any, test: CommandTestCase +) -> None: + """Test distinct command readConcern sub-field validation.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_result_ordering.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_result_ordering.py new file mode 100644 index 00000000..4a93da91 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_result_ordering.py @@ -0,0 +1,172 @@ +"""Tests for distinct command result ordering and response format.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any + +import pytest +from bson import Binary, Code, MaxKey, MinKey, ObjectId, Regex +from bson.timestamp import Timestamp + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import TargetDatabase + +# Property [Result Ordering]: distinct results are returned in BSON type +# comparison order. +DISTINCT_RESULT_ORDERING_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "ordering_cross_type", + docs=[ + {"_id": 1, "x": MaxKey()}, + {"_id": 2, "x": "hello"}, + {"_id": 3, "x": None}, + {"_id": 4, "x": 42}, + {"_id": 5, "x": {"a": 1}}, + {"_id": 6, "x": Binary(b"data", 0)}, + {"_id": 7, "x": ObjectId("000000000000000000000001")}, + {"_id": 8, "x": True}, + {"_id": 9, "x": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + {"_id": 10, "x": Timestamp(100, 1)}, + {"_id": 11, "x": Regex("abc", "")}, + {"_id": 12, "x": MinKey()}, + {"_id": 13, "x": Code("function()", {"scope": 1})}, + {"_id": 14, "x": Code("function()")}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [ + MinKey(), + None, + 42, + "hello", + {"a": 1}, + b"data", + ObjectId("000000000000000000000001"), + True, + datetime(2024, 1, 1, tzinfo=timezone.utc), + Timestamp(100, 1), + Regex("abc", ""), + Code("function()"), + Code("function()", {"scope": 1}), + MaxKey(), + ], + "ok": 1.0, + }, + msg=( + "distinct should return results in BSON type comparison order:" + " MinKey < null < numbers < string < object < binary" + " < ObjectId < bool < datetime < Timestamp < Regex" + " < Code < CodeWithScope < MaxKey" + ), + ), + CommandTestCase( + "ordering_within_numbers", + docs=[ + {"_id": 1, "x": 100}, + {"_id": 2, "x": -5}, + {"_id": 3, "x": 0}, + {"_id": 4, "x": 42}, + {"_id": 5, "x": -100}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [-100, -5, 0, 42, 100], "ok": 1.0}, + msg="distinct should order numbers by numeric value within the number type", + ), + CommandTestCase( + "ordering_within_strings", + docs=[ + {"_id": 1, "x": "banana"}, + {"_id": 2, "x": "apple"}, + {"_id": 3, "x": "cherry"}, + {"_id": 4, "x": "Apple"}, + {"_id": 5, "x": ""}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": ["", "Apple", "apple", "banana", "cherry"], + "ok": 1.0, + }, + msg="distinct should order strings by binary comparison within the string type", + ), + CommandTestCase( + "ordering_within_booleans", + docs=[{"_id": 1, "x": True}, {"_id": 2, "x": False}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [False, True], "ok": 1.0}, + msg="distinct should order booleans with False before True", + ), + CommandTestCase( + "ordering_within_datetimes", + docs=[ + {"_id": 1, "x": datetime(2024, 6, 1, tzinfo=timezone.utc)}, + {"_id": 2, "x": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + {"_id": 3, "x": datetime(2024, 12, 1, tzinfo=timezone.utc)}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [ + datetime(2024, 1, 1, tzinfo=timezone.utc), + datetime(2024, 6, 1, tzinfo=timezone.utc), + datetime(2024, 12, 1, tzinfo=timezone.utc), + ], + "ok": 1.0, + }, + msg="distinct should order datetimes chronologically within the datetime type", + ), +] + +# Property [Return Type and Response Format]: the response document contains the +# distinct values and succeeds even for non-existent collections. +DISTINCT_RESPONSE_FORMAT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "response_format_basic", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should return a response with values array and ok field", + ), + CommandTestCase( + "response_format_nonexistent_database", + target_collection=TargetDatabase(suffix="nonexistent"), + docs=None, + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty values array for a non-existent database", + ), + CommandTestCase( + "response_format_empty_collection", + docs=[], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty values array for an empty collection", + ), +] + +DISTINCT_RESULT_FORMAT_TESTS: list[CommandTestCase] = ( + DISTINCT_RESULT_ORDERING_TESTS + DISTINCT_RESPONSE_FORMAT_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_RESULT_FORMAT_TESTS)) +def test_distinct_result_ordering( + database_client: Any, collection: Any, test: CommandTestCase +) -> None: + """Test distinct result ordering cases.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_type_errors.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_type_errors.py new file mode 100644 index 00000000..9267ae10 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_type_errors.py @@ -0,0 +1,411 @@ +"""Tests for distinct command parameter type errors.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any + +import pytest +from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex +from bson.timestamp import Timestamp +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + FAILED_TO_PARSE_ERROR, + INVALID_NAMESPACE_ERROR, + MISSING_FIELD_ERROR, + TYPE_MISMATCH_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + DECIMAL128_NEGATIVE_NAN, + FLOAT_INFINITY, + FLOAT_NAN, + FLOAT_NEGATIVE_INFINITY, + FLOAT_NEGATIVE_NAN, +) + +# Property [Null Hint Error]: unlike other optional parameters, hint=null produces +# a parse error instead of being treated as omitted. +DISTINCT_NULL_HINT_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "null_hint_param_error", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": None}, + error_code=FAILED_TO_PARSE_ERROR, + msg="distinct should reject hint=null with a parse error", + ), +] + +# Property [Query Parameter Type Errors]: all non-object, non-null BSON types +# for query produce TypeMismatch error; invalid query operators produce +# BAD_VALUE_ERROR. +DISTINCT_QUERY_TYPE_ERROR_TESTS: list[CommandTestCase] = [ + *[ + CommandTestCase( + f"query_type_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: {"distinct": ctx.collection, "key": "x", "query": v}, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} as query", + ) + for tid, val in [ + ("string", "hello"), + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", [1, 2]), + ("objectid", ObjectId("000000000000000000000001")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"data", 0)), + ("regex", Regex("abc", "")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"s": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], + *[ + CommandTestCase( + f"query_invalid_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "query": v, + }, + error_code=BAD_VALUE_ERROR, + msg=f"distinct should reject {tid} in query", + ) + for tid, val in [ + ("update_operator", {"$set": {"x": 1}}), + ("aggregation_stage", {"$group": {"_id": None}}), + ("unknown_operator", {"$foobar": 1}), + ] + ], +] + +# Property [ReadConcern Parameter Type Errors]: all non-object, non-null BSON +# types for readConcern produce TypeMismatch error. +DISTINCT_READCONCERN_TYPE_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"readconcern_type_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "readConcern": v, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} as readConcern", + ) + for tid, val in [ + ("string", "local"), + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", [1, 2]), + ("objectid", ObjectId("000000000000000000000001")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"data", 0)), + ("regex", Regex("abc", "")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"s": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + +# Property [Key Parameter Type Errors]: all non-string BSON types for key produce +# TypeMismatch error; null or omitted key produces a missing field error. +DISTINCT_KEY_TYPE_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"key_type_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: {"distinct": ctx.collection, "key": v}, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} as key", + ) + for tid, val in [ + ("int32", 123), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", ["x"]), + ("object", {"a": 1}), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"data", 0)), + ("regex", Regex("abc", "")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"s": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + [ + CommandTestCase( + "key_type_null", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": None}, + error_code=MISSING_FIELD_ERROR, + msg="distinct should reject null key as a missing required field", + ), + CommandTestCase( + "key_type_omitted", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection}, + error_code=MISSING_FIELD_ERROR, + msg="distinct should reject omitted key field as a missing required field", + ), +] + +# Property [Hint Parameter Type Errors]: invalid BSON types and values for the +# hint parameter produce appropriate errors. +DISTINCT_HINT_TYPE_ERROR_TESTS: list[CommandTestCase] = [ + *[ + CommandTestCase( + f"hint_type_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "hint": v, + }, + error_code=FAILED_TO_PARSE_ERROR, + msg=f"distinct should reject {tid} as hint", + ) + for tid, val in [ + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", [1, 2]), + ("objectid", ObjectId("000000000000000000000001")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"data", 0)), + ("regex", Regex("abc", "")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"s": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], + CommandTestCase( + "hint_nonexistent_index_name", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": "nonexistent_index", + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject a non-existent index name on an existing collection", + ), + CommandTestCase( + "hint_empty_string_existing_collection", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": ""}, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject empty string as hint on an existing collection", + ), + CommandTestCase( + "hint_doc_wrong_field_order", + indexes=[IndexModel([("x", 1), ("y", 1)])], + docs=[{"_id": 1, "x": "a", "y": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"y": 1, "x": 1}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject document hint with incorrect field order", + ), + CommandTestCase( + "hint_string_case_sensitive", + indexes=[IndexModel([("x", 1)], name="x_1")], + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": "X_1", + }, + error_code=BAD_VALUE_ERROR, + msg="distinct string hint should be case-sensitive", + ), + CommandTestCase( + "hint_string_no_trimming", + indexes=[IndexModel([("x", 1)], name="x_1")], + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": " x_1 ", + }, + error_code=BAD_VALUE_ERROR, + msg="distinct string hint should not trim whitespace", + ), + CommandTestCase( + "hint_nonexistent_index_empty_collection", + docs=[], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": "nonexistent_idx", + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject a non-existent index name on an empty collection", + ), + *[ + CommandTestCase( + f"hint_direction_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": v}, + }, + error_code=BAD_VALUE_ERROR, + msg=f"distinct should reject {tid} as direction value in document hint", + ) + for tid, val in [ + ("zero", 0), + ("two", 2), + ("fractional", 0.5), + ("nan", FLOAT_NAN), + ("neg_nan", FLOAT_NEGATIVE_NAN), + ("decimal128_nan", DECIMAL128_NAN), + ("decimal128_neg_nan", DECIMAL128_NEGATIVE_NAN), + ("infinity", FLOAT_INFINITY), + ("neg_infinity", FLOAT_NEGATIVE_INFINITY), + ("decimal128_infinity", DECIMAL128_INFINITY), + ("decimal128_neg_infinity", DECIMAL128_NEGATIVE_INFINITY), + ("bool", True), + ("null", None), + ("string", "asc"), + ("string_text", "text"), + ("string_hashed", "hashed"), + ("string_2dsphere", "2dsphere"), + ("string_2d", "2d"), + ] + ], + *[ + CommandTestCase( + f"hint_natural_direction_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "hint": {"$natural": v}, + }, + error_code=BAD_VALUE_ERROR, + msg=f"distinct should reject $natural {tid} direction value", + ) + for tid, val in [ + ("zero", 0), + ("two", 2), + ("neg_two", -2), + ("fractional", 0.5), + ("nan", FLOAT_NAN), + ("neg_nan", FLOAT_NEGATIVE_NAN), + ("decimal128_nan", DECIMAL128_NAN), + ("decimal128_neg_nan", DECIMAL128_NEGATIVE_NAN), + ("infinity", FLOAT_INFINITY), + ("neg_infinity", FLOAT_NEGATIVE_INFINITY), + ("decimal128_infinity", DECIMAL128_INFINITY), + ("decimal128_neg_infinity", DECIMAL128_NEGATIVE_INFINITY), + ("bool", True), + ("string", "forward"), + ("null", None), + ("array", [1]), + ("object", {"a": 1}), + ] + ], + CommandTestCase( + "hint_natural_combined_with_other_fields", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"$natural": 1, "x": 1}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject $natural combined with other fields in hint", + ), +] + +# Property [Collection Name Type Errors]: non-string types (except Binary subtype +# 4) and null as collection name produce InvalidNamespace error. +DISTINCT_COLLNAME_TYPE_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"collname_type_{tid}", + docs=None, + command=lambda ctx, v=val: {"distinct": v, "key": "x"}, + error_code=INVALID_NAMESPACE_ERROR, + msg=f"distinct should reject {tid} as collection name", + ) + for tid, val in [ + ("null", None), + ("int32", 123), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", [1, 2]), + ("object", {"a": 1}), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary_subtype0", Binary(b"hello", 0)), + ("binary_subtype5", Binary(b"hello", 5)), + ("regex", Regex("abc", "")), + ("code", Code("function(){}")), + ("code_scope", Code("function(){}", {"s": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + +DISTINCT_TYPE_ERROR_TESTS: list[CommandTestCase] = ( + DISTINCT_NULL_HINT_ERROR_TESTS + + DISTINCT_QUERY_TYPE_ERROR_TESTS + + DISTINCT_READCONCERN_TYPE_ERROR_TESTS + + DISTINCT_KEY_TYPE_ERROR_TESTS + + DISTINCT_HINT_TYPE_ERROR_TESTS + + DISTINCT_COLLNAME_TYPE_ERROR_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_TYPE_ERROR_TESTS)) +def test_distinct_type_errors(database_client: Any, collection: Any, test: CommandTestCase) -> None: + """Test distinct type error cases.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_with_expr.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_with_expr.py deleted file mode 100644 index dd4d7c9f..00000000 --- a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_with_expr.py +++ /dev/null @@ -1,28 +0,0 @@ -""" -Tests for $expr in distinct command contexts. -""" - -from documentdb_tests.framework.assertions import assertSuccess -from documentdb_tests.framework.executor import execute_command - - -def test_expr_in_distinct(collection): - """Test $expr in distinct command.""" - collection.insert_many( - [ - {"_id": 1, "cat": "A", "val": 10}, - {"_id": 2, "cat": "B", "val": 5}, - {"_id": 3, "cat": "A", "val": 3}, - ] - ) - result = execute_command( - collection, - { - "distinct": collection.name, - "key": "cat", - "query": {"$expr": {"$gt": ["$val", 4]}}, - }, - ) - assertSuccess( - result, sorted(["A", "B"]), raw_res=True, transform=lambda r: sorted(r.get("values", [])) - ) diff --git a/documentdb_tests/compatibility/tests/core/collections/commands/utils/command_test_case.py b/documentdb_tests/compatibility/tests/core/collections/commands/utils/command_test_case.py index 4d990382..26ff3717 100644 --- a/documentdb_tests/compatibility/tests/core/collections/commands/utils/command_test_case.py +++ b/documentdb_tests/compatibility/tests/core/collections/commands/utils/command_test_case.py @@ -72,6 +72,7 @@ class CommandTestCase(BaseTestCase): docs: list[dict[str, Any]] | None = None command: dict[str, Any] | Callable[..., dict[str, Any]] | None = None expected: dict[str, Any] | list[dict[str, Any]] | Callable[..., dict[str, Any]] | None = None + ignore_order_in: list[str] | None = None def prepare(self, db: Database, collection: Collection) -> Collection: """Resolve the target collection and apply indexes/docs. diff --git a/documentdb_tests/framework/assertions.py b/documentdb_tests/framework/assertions.py index eb911629..184ae2ad 100644 --- a/documentdb_tests/framework/assertions.py +++ b/documentdb_tests/framework/assertions.py @@ -73,12 +73,18 @@ def _sort_if_list(value): def _sort_fields(docs, fields): """Sort list values for the named fields in each document.""" + if isinstance(docs, dict): + docs = dict(docs) + for f in fields: + if f in docs: + docs[f] = _sort_if_list(docs[f]) + return docs sorted_docs = [] for doc in docs: doc = dict(doc) - for field in fields: - if field in doc: - doc[field] = _sort_if_list(doc[field]) + for f in fields: + if f in doc: + doc[f] = _sort_if_list(doc[f]) sorted_docs.append(doc) return sorted_docs diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py index 0dd8d0e6..fb18c906 100644 --- a/documentdb_tests/framework/error_codes.py +++ b/documentdb_tests/framework/error_codes.py @@ -110,6 +110,7 @@ COND_EXTRA_FIELD_ERROR = 17083 SIZE_NOT_ARRAY_ERROR = 17124 OUT_CAPPED_COLLECTION_ERROR = 17152 +DISTINCT_TOO_BIG_ERROR = 17217 LET_UNDEFINED_VARIABLE_ERROR = 17276 META_NON_STRING_ERROR = 17307 UNSUPPORTED_META_FIELD_ERROR = 17308 @@ -169,6 +170,7 @@ REGEX_MISSING_INPUT_ERROR = 31022 REGEX_MISSING_REGEX_ERROR = 31023 REGEX_UNKNOWN_FIELD_ERROR = 31024 +KEY_FIELD_NULL_BYTE_ERROR = 31032 OUT_OF_RANGE_CONVERSION_ERROR = 31109 UNSET_EMPTY_ARRAY_ERROR = 31119 UNSET_ARRAY_ELEMENT_TYPE_ERROR = 31120 diff --git a/documentdb_tests/framework/target_collection.py b/documentdb_tests/framework/target_collection.py index 6020ee0c..efd9895d 100644 --- a/documentdb_tests/framework/target_collection.py +++ b/documentdb_tests/framework/target_collection.py @@ -73,6 +73,23 @@ def resolve(self, db: Database, collection: Collection) -> Collection: return db[name] +@dataclass(frozen=True) +class ViewOnCustomCollection(TargetCollection): + """A view on a custom collection created with arbitrary options.""" + + source_options: dict[str, Any] = field(default_factory=dict) + + def resolve(self, db: Database, collection: Collection) -> Collection: + src_name = f"{collection.name}_custom_src" + db.command("create", src_name, **self.source_options) + view_name = f"{collection.name}_custom_view" + db.command("create", view_name, viewOn=src_name, pipeline=[]) + return db[view_name] + + def writable(self, source: Collection, resolved: Collection) -> Collection: + return source.database[f"{source.name}_custom_src"] + + @dataclass(frozen=True) class CappedCollection(TargetCollection): """A capped collection."""