From c77d1552b06c2d53ebe07e9210366578baaae154 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Thu, 14 May 2026 08:12:00 -0700 Subject: [PATCH 1/4] Add $distinct aggregation command tests Signed-off-by: Daniel Frankcom --- .../distinct/test_distinct_collation.py | 303 ++++++++ .../test_distinct_collation_subfields.py | 707 ++++++++++++++++++ .../distinct/test_distinct_command_errors.py | 340 +++++++++ .../distinct/test_distinct_deduplication.py | 431 +++++++++++ .../commands/distinct/test_distinct_hint.py | 292 ++++++++ .../distinct/test_distinct_key_field.py | 307 ++++++++ .../distinct/test_distinct_parameters.py | 365 +++++++++ .../distinct/test_distinct_query_operators.py | 539 +++++++++++++ .../test_distinct_readconcern_subfields.py | 342 +++++++++ .../distinct/test_distinct_result_ordering.py | 172 +++++ .../distinct/test_distinct_type_errors.py | 480 ++++++++++++ .../distinct/test_distinct_with_expr.py | 28 - .../commands/utils/command_test_case.py | 19 +- documentdb_tests/framework/assertions.py | 12 +- documentdb_tests/framework/error_codes.py | 3 + .../framework/target_collection.py | 30 + 16 files changed, 4333 insertions(+), 37 deletions(-) create mode 100644 documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation.py create mode 100644 documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation_subfields.py create mode 100644 documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_command_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_deduplication.py create mode 100644 documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_hint.py create mode 100644 documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_key_field.py create mode 100644 documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_parameters.py create mode 100644 documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_query_operators.py create mode 100644 documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_readconcern_subfields.py create mode 100644 documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_result_ordering.py create mode 100644 documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_type_errors.py delete mode 100644 documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_with_expr.py diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation.py new file mode 100644 index 00000000..216e6ce5 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation.py @@ -0,0 +1,303 @@ +"""Tests for distinct command collation behavior.""" + +from __future__ import annotations + +from typing import Any + +import pytest +from bson import Regex + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import ( + CustomCollection, + ViewOnCustomCollection, +) + +# Property [Collation Effects on Deduplication]: collation affects which values +# are considered duplicates during distinct. +DISTINCT_COLLATION_DEDUP_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collation_basic_string_dedup", + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "APPLE"}, {"_id": 3, "x": "banana"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 1}, + }, + expected={"values": ["apple", "banana"], "ok": 1.0}, + msg="distinct should collapse case-equivalent strings under case-insensitive collation", + ), + CommandTestCase( + "collation_nested_array_dedup", + docs=[{"_id": 1, "x": [["hello"]]}, {"_id": 2, "x": [["HELLO"]]}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 1}, + }, + expected={"values": [["hello"]], "ok": 1.0}, + msg=( + "distinct should collapse nested arrays containing" + " case-equivalent strings under collation" + ), + ), + CommandTestCase( + "collation_nested_object_dedup", + docs=[ + {"_id": 1, "x": {"name": "hello"}}, + {"_id": 2, "x": {"name": "HELLO"}}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 1}, + }, + expected={"values": [{"name": "hello"}], "ok": 1.0}, + msg="distinct should collapse objects with case-equivalent string values under collation", + ), + CommandTestCase( + "collation_after_array_unwinding", + docs=[ + {"_id": 1, "x": ["hello", "world"]}, + {"_id": 2, "x": ["HELLO", "WORLD"]}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 1}, + }, + expected={"values": ["hello", "world"], "ok": 1.0}, + msg="distinct should apply collation dedup to individual array elements after unwinding", + ), + CommandTestCase( + "collation_non_string_unaffected", + docs=[ + {"_id": 1, "x": 1}, + {"_id": 2, "x": "a"}, + {"_id": 3, "x": "A"}, + {"_id": 4, "x": None}, + {"_id": 5, "x": True}, + {"_id": 6, "x": False}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 1}, + }, + expected={"values": [None, 1, "a", False, True], "ok": 1.0}, + msg=( + "distinct should not collapse non-string elements" + " (numbers, null, booleans) under collation" + ), + ), + CommandTestCase( + "collation_regex_unaffected", + docs=[ + {"_id": 1, "x": Regex("hello", "")}, + {"_id": 2, "x": Regex("HELLO", "")}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 1}, + }, + expected={ + "values": [Regex("HELLO", ""), Regex("hello", "")], + "ok": 1.0, + }, + msg="distinct should not collapse regex values under collation", + ), + CommandTestCase( + "collation_first_encountered_wins", + docs=[ + {"_id": 1, "x": "Hello"}, + {"_id": 2, "x": "HELLO"}, + {"_id": 3, "x": "hello"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 1}, + }, + expected={"values": ["Hello"], "ok": 1.0}, + msg=( + "distinct should return the first-encountered value" + " when collation collapses duplicates" + ), + ), +] + +# Property [Collation Inheritance]: the collection's default collation is used +# when no explicit collation is specified. +DISTINCT_COLLATION_INHERITANCE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "inherit_collation_omitted", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 1}}), + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "APPLE"}, {"_id": 3, "x": "banana"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": ["apple", "banana"], "ok": 1.0}, + msg="distinct should use collection's default collation when collation is omitted", + ), + CommandTestCase( + "inherit_collation_null", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 1}}), + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "APPLE"}, {"_id": 3, "x": "banana"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "collation": None}, + expected={"values": ["apple", "banana"], "ok": 1.0}, + msg="distinct should use collection's default collation when collation is null", + ), + CommandTestCase( + "inherit_collation_empty_doc", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 1}}), + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "APPLE"}, {"_id": 3, "x": "banana"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "collation": {}}, + expected={"values": ["apple", "banana"], "ok": 1.0}, + msg="distinct should use collection's default collation when collation is empty doc", + ), + CommandTestCase( + "inherit_key_always_case_sensitive", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 1}}), + docs=[{"_id": 1, "Name": "alice"}, {"_id": 2, "name": "bob"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "Name"}, + expected={"values": ["alice"], "ok": 1.0}, + msg="distinct key field path matching should be case-sensitive regardless of collation", + ), + CommandTestCase( + "inherit_explicit_overrides_default", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 1}}), + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "APPLE"}, {"_id": 3, "x": "banana"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 3}, + }, + expected={"values": ["apple", "APPLE", "banana"], "ok": 1.0}, + msg="distinct should use explicit collation over collection default when specified", + ), +] + +# Property [Collation Effects on Ordering]: collation changes the sort order of +# results from binary comparison to locale-aware ordering. +DISTINCT_COLLATION_ORDERING_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collation_ordering_locale_aware", + docs=[ + {"_id": 1, "x": "Banana"}, + {"_id": 2, "x": "apple"}, + {"_id": 3, "x": "Cherry"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en"}, + }, + expected={"values": ["apple", "Banana", "Cherry"], "ok": 1.0}, + msg=( + "distinct with collation should order results by locale-aware comparison" + " instead of binary comparison" + ), + ), + CommandTestCase( + "collation_ordering_binary_default", + docs=[ + {"_id": 1, "x": "Banana"}, + {"_id": 2, "x": "apple"}, + {"_id": 3, "x": "Cherry"}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": ["Banana", "Cherry", "apple"], "ok": 1.0}, + msg="distinct without collation should order results by binary comparison", + ), +] + +# Property [Collation Affects Query Matching]: the collation parameter applies +# to the query filter, not just deduplication. +DISTINCT_COLLATION_QUERY_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collation_query_case_insensitive_match", + docs=[ + {"_id": 1, "x": "val1", "status": "Active"}, + {"_id": 2, "x": "val2", "status": "active"}, + {"_id": 3, "x": "val3", "status": "INACTIVE"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"status": "active"}, + "collation": {"locale": "en", "strength": 1}, + }, + expected={"values": ["val1", "val2"], "ok": 1.0}, + msg=( + "distinct should apply collation to query filter matching," + " allowing case-insensitive comparison" + ), + ), + CommandTestCase( + "collation_query_without_collation_exact_match", + docs=[ + {"_id": 1, "x": "val1", "status": "Active"}, + {"_id": 2, "x": "val2", "status": "active"}, + {"_id": 3, "x": "val3", "status": "INACTIVE"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"status": "active"}, + }, + expected={"values": ["val2"], "ok": 1.0}, + msg="distinct without collation should match query filter exactly", + ), +] + +# Property [Collation Inheritance on Views]: views without an explicit collation +# use simple binary comparison, not the source collection's collation. +DISTINCT_COLLATION_INHERITANCE_VIEW_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "inherit_view_no_inherit", + target_collection=ViewOnCustomCollection( + source_options={"collation": {"locale": "en", "strength": 1}} + ), + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "APPLE"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": sorted(["APPLE", "apple", "banana"]), "ok": 1}, + ignore_order_in=["values"], + msg=( + "distinct on a view should use binary comparison," + " not the source collection's collation" + ), + ), +] + +DISTINCT_COLLATION_TESTS: list[CommandTestCase] = ( + DISTINCT_COLLATION_DEDUP_TESTS + + DISTINCT_COLLATION_INHERITANCE_TESTS + + DISTINCT_COLLATION_ORDERING_TESTS + + DISTINCT_COLLATION_QUERY_TESTS + + DISTINCT_COLLATION_INHERITANCE_VIEW_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_COLLATION_TESTS)) +def test_distinct_collation(database_client: Any, collection: Any, test: CommandTestCase) -> None: + """Test distinct collation cases.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation_subfields.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation_subfields.py new file mode 100644 index 00000000..4fecda76 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation_subfields.py @@ -0,0 +1,707 @@ +"""Tests for distinct command collation sub-field validation and behavior.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any + +import pytest +from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + MISSING_FIELD_ERROR, + TYPE_MISMATCH_ERROR, + UNRECOGNIZED_COMMAND_FIELD_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Type Strictness: collation (locale)]: the locale sub-field is +# required and validates type and value. +DISTINCT_TYPE_STRICTNESS_COLLATION_LOCALE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "type_collation_locale_null", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": None}, + }, + error_code=MISSING_FIELD_ERROR, + msg="distinct should reject collation with null locale", + ), + *[ + CommandTestCase( + f"type_collation_locale_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": v}, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} for collation locale", + ) + for tid, val in [ + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", ["en"]), + ("object", {"name": "en"}), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"\x01\x02")), + ("regex", Regex("^en")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"x": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], + *[ + CommandTestCase( + f"type_collation_locale_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": v}, + }, + error_code=BAD_VALUE_ERROR, + msg=f"distinct should reject {tid} for collation locale", + ) + for tid, val in [ + ("invalid", "invalid_locale_xyz"), + ("wrong_case", "EN"), + ] + ], +] + +# Property [Type Strictness: collation (strength)]: the strength sub-field +# validates type and range. +DISTINCT_TYPE_STRICTNESS_COLLATION_STRENGTH_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "type_collation_strength_one_valid", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 1}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should accept strength value 1 (lower boundary)", + ), + CommandTestCase( + "type_collation_strength_five_valid", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 5}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should accept strength value 5 (upper boundary)", + ), + CommandTestCase( + "type_collation_strength_int32_valid", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 3}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should accept int32 strength value 3", + ), + CommandTestCase( + "type_collation_strength_int64_valid", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": Int64(3)}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should accept Int64 strength value 3", + ), + CommandTestCase( + "type_collation_strength_double_valid", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 3.0}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should accept double strength value 3.0", + ), + CommandTestCase( + "type_collation_strength_decimal128_valid", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": Decimal128("3")}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should accept Decimal128 strength value 3", + ), + CommandTestCase( + "type_collation_strength_null_valid", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": None}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should accept null strength (treated as omitted)", + ), + CommandTestCase( + "type_collation_strength_zero_invalid", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 0}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject strength value 0", + ), + CommandTestCase( + "type_collation_strength_six_invalid", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 6}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject strength value 6", + ), + *[ + CommandTestCase( + f"type_collation_strength_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": v}, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} for collation strength", + ) + for tid, val in [ + ("string", "one"), + ("bool", True), + ("array", [1]), + ("object", {"a": 1}), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"\x01\x02")), + ("regex", Regex("^abc")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"x": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], +] + +# Property [Type Strictness: collation (boolean sub-fields)]: the boolean +# sub-fields validate type strictly and have field-specific null handling. +DISTINCT_TYPE_STRICTNESS_COLLATION_BOOL_FIELDS_TESTS: list[CommandTestCase] = [ + *[ + CommandTestCase( + f"type_collation_{field}_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, f=field, v=val: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", f: v}, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} for collation {field}", + ) + for field in ["caseLevel", "numericOrdering", "backwards", "normalization"] + for tid, val in [ + ("int32", 1), + ("int64", Int64(1)), + ("double", 1.0), + ("decimal128", Decimal128("1")), + ("string", "true"), + ("array", [True]), + ("object", {"a": True}), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"\x01\x02")), + ("regex", Regex("^abc")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"x": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], + # Null handling: caseLevel, numericOrdering, normalization accept null; + # backwards rejects null. + *[ + CommandTestCase( + f"type_collation_{field}_null_accepted", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, f=field: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", f: None}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg=f"distinct should accept null for collation {field} (treated as omitted)", + ) + for field in ["caseLevel", "numericOrdering", "normalization"] + ], + CommandTestCase( + "type_collation_backwards_null_rejected", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "backwards": None}, + }, + error_code=TYPE_MISMATCH_ERROR, + msg="distinct should reject null for collation backwards", + ), +] + +# Property [Type Strictness: collation (enum sub-fields)]: the string enum +# sub-fields validate type, value, and field-specific constraints. +DISTINCT_TYPE_STRICTNESS_COLLATION_ENUM_FIELDS_TESTS: list[CommandTestCase] = [ + # caseFirst valid values and constraints + CommandTestCase( + "type_collation_casefirst_lower_accepted", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "caseFirst": "lower", "strength": 3}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg='distinct should accept caseFirst "lower" with strength > 2', + ), + CommandTestCase( + "type_collation_casefirst_with_strength_3", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "caseFirst": "upper", "strength": 3}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should accept caseFirst with strength > 2", + ), + CommandTestCase( + "type_collation_casefirst_with_caselevel", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": { + "locale": "en", + "caseFirst": "upper", + "caseLevel": True, + }, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should accept caseFirst with caseLevel=true", + ), + CommandTestCase( + "type_collation_casefirst_off_always_valid", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "caseFirst": "off", "strength": 1}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg='distinct should accept caseFirst "off" regardless of strength or caseLevel', + ), + CommandTestCase( + "type_collation_casefirst_requires_caselevel_or_strength", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "caseFirst": "upper", "strength": 1}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject caseFirst without caseLevel=true or strength > 2", + ), + # alternate valid values + CommandTestCase( + "type_collation_alternate_non_ignorable_accepted", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "alternate": "non-ignorable"}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg='distinct should accept alternate "non-ignorable"', + ), + CommandTestCase( + "type_collation_alternate_shifted_accepted", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "alternate": "shifted"}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg='distinct should accept alternate "shifted"', + ), + # maxVariable valid values + CommandTestCase( + "type_collation_maxvariable_punct_accepted", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "maxVariable": "punct"}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg='distinct should accept maxVariable "punct"', + ), + CommandTestCase( + "type_collation_maxvariable_space_accepted", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "maxVariable": "space"}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg='distinct should accept maxVariable "space"', + ), + # Null acceptance for all enum sub-fields + *[ + CommandTestCase( + f"type_collation_{field}_null_accepted", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, f=field: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", f: None}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg=f"distinct should accept null for collation {field} (treated as omitted)", + ) + for field in ["caseFirst", "alternate", "maxVariable"] + ], + # Invalid string values (BadValue) + *[ + CommandTestCase( + f"type_collation_{field}_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, f=field, v=val: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", f: v}, + }, + error_code=BAD_VALUE_ERROR, + msg=f"distinct should reject {tid} for collation {field}", + ) + for field, tid, val in [ + ("caseFirst", "invalid", "invalid"), + ("caseFirst", "empty", ""), + ("caseFirst", "wrong_case", "Upper"), + ("alternate", "invalid", "invalid"), + ("alternate", "empty", ""), + ("alternate", "wrong_case", "Shifted"), + ("maxVariable", "invalid", "invalid"), + ("maxVariable", "empty", ""), + ("maxVariable", "wrong_case", "Punct"), + ] + ], + # Non-string type rejection (TypeMismatch) for all enum sub-fields + *[ + CommandTestCase( + f"type_collation_{field}_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, f=field, v=val: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", f: v}, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} for collation {field}", + ) + for field in ["caseFirst", "alternate", "maxVariable"] + for tid, val in [ + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", [1, 2]), + ("object", {"a": 1}), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"\x01\x02")), + ("regex", Regex("^abc")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"x": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], +] + +# Property [Type Strictness: collation (unknown fields)]: unknown fields in the +# collation document produce an UnrecognizedCommandField error. +DISTINCT_TYPE_STRICTNESS_COLLATION_UNKNOWN_FIELDS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "type_collation_unknown_field", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "unknownField": 1}, + }, + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + msg="distinct should reject unknown fields in collation document", + ), +] + +# Property [Collation Behavior: numericOrdering]: numericOrdering=true causes +# numeric strings to be ordered by their numeric value rather than +# lexicographically, affecting both deduplication and result ordering. +DISTINCT_COLLATION_NUMERIC_ORDERING_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collation_numericordering_true_ordering", + docs=[ + {"_id": 1, "x": "10"}, + {"_id": 2, "x": "2"}, + {"_id": 3, "x": "1"}, + {"_id": 4, "x": "20"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected={"values": ["1", "2", "10", "20"], "ok": 1.0}, + msg="distinct with numericOrdering=true should order numeric strings numerically", + ), + CommandTestCase( + "collation_numericordering_false_ordering", + docs=[ + {"_id": 1, "x": "10"}, + {"_id": 2, "x": "2"}, + {"_id": 3, "x": "1"}, + {"_id": 4, "x": "20"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "numericOrdering": False}, + }, + expected={"values": ["1", "10", "2", "20"], "ok": 1.0}, + msg="distinct with numericOrdering=false should order strings lexicographically", + ), +] + +# Property [Collation Behavior: alternate]: alternate="shifted" causes +# punctuation and whitespace to be treated as equivalent at primary/secondary +# strength levels, collapsing them during deduplication. +DISTINCT_COLLATION_ALTERNATE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collation_alternate_shifted_dedup", + docs=[ + {"_id": 1, "x": "abc"}, + {"_id": 2, "x": "a-b-c"}, + {"_id": 3, "x": "a b c"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "alternate": "shifted", "strength": 1}, + }, + expected={"values": ["abc"], "ok": 1.0}, + msg="distinct with alternate=shifted should collapse punctuation/whitespace variants", + ), + CommandTestCase( + "collation_alternate_non_ignorable_preserves", + docs=[ + {"_id": 1, "x": "abc"}, + {"_id": 2, "x": "a-b-c"}, + {"_id": 3, "x": "a b c"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "alternate": "non-ignorable", "strength": 1}, + }, + expected={"values": ["a b c", "a-b-c", "abc"], "ok": 1.0}, + msg="distinct with alternate=non-ignorable should preserve punctuation distinctions", + ), +] + +# Property [Collation Behavior: maxVariable]: maxVariable controls which +# characters are ignored when alternate="shifted"; "space" ignores only +# whitespace, "punct" ignores both whitespace and punctuation. +DISTINCT_COLLATION_MAX_VARIABLE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collation_maxvariable_space", + docs=[ + {"_id": 1, "x": "abc"}, + {"_id": 2, "x": "a bc"}, + {"_id": 3, "x": "a.bc"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": { + "locale": "en", + "alternate": "shifted", + "maxVariable": "space", + "strength": 1, + }, + }, + expected={"values": ["a.bc", "abc"], "ok": 1.0}, + msg="distinct with maxVariable=space should ignore only whitespace", + ), + CommandTestCase( + "collation_maxvariable_punct", + docs=[ + {"_id": 1, "x": "abc"}, + {"_id": 2, "x": "a bc"}, + {"_id": 3, "x": "a.bc"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": { + "locale": "en", + "alternate": "shifted", + "maxVariable": "punct", + "strength": 1, + }, + }, + expected={"values": ["abc"], "ok": 1.0}, + msg="distinct with maxVariable=punct should ignore whitespace and punctuation", + ), +] + +# Property [Collation Behavior: backwards]: backwards=true reverses the +# secondary (accent) comparison direction, affecting result ordering. +DISTINCT_COLLATION_BACKWARDS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collation_backwards_true", + docs=[ + {"_id": 1, "x": "cote"}, + {"_id": 2, "x": "cot\u00e9"}, + {"_id": 3, "x": "c\u00f4te"}, + {"_id": 4, "x": "c\u00f4t\u00e9"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 2, "backwards": True}, + }, + expected={"values": ["cote", "c\u00f4te", "cot\u00e9", "c\u00f4t\u00e9"], "ok": 1.0}, + msg="distinct with backwards=true should reverse accent comparison direction", + ), + CommandTestCase( + "collation_backwards_false", + docs=[ + {"_id": 1, "x": "cote"}, + {"_id": 2, "x": "cot\u00e9"}, + {"_id": 3, "x": "c\u00f4te"}, + {"_id": 4, "x": "c\u00f4t\u00e9"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 2, "backwards": False}, + }, + expected={"values": ["cote", "cot\u00e9", "c\u00f4te", "c\u00f4t\u00e9"], "ok": 1.0}, + msg="distinct with backwards=false should use normal accent comparison direction", + ), +] + +# Property [Collation Behavior: caseFirst]: caseFirst controls whether +# uppercase or lowercase sorts first at the tertiary level. +DISTINCT_COLLATION_CASEFIRST_BEHAVIOR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collation_casefirst_upper", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "caseFirst": "upper", "strength": 3}, + }, + expected={"values": ["A", "a", "B", "b"], "ok": 1.0}, + msg="distinct with caseFirst=upper should sort uppercase before lowercase", + ), + CommandTestCase( + "collation_casefirst_lower", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "caseFirst": "lower", "strength": 3}, + }, + expected={"values": ["a", "A", "b", "B"], "ok": 1.0}, + msg="distinct with caseFirst=lower should sort lowercase before uppercase", + ), +] + +DISTINCT_COLLATION_SUBFIELD_TESTS: list[CommandTestCase] = ( + DISTINCT_TYPE_STRICTNESS_COLLATION_LOCALE_TESTS + + DISTINCT_TYPE_STRICTNESS_COLLATION_STRENGTH_TESTS + + DISTINCT_TYPE_STRICTNESS_COLLATION_BOOL_FIELDS_TESTS + + DISTINCT_TYPE_STRICTNESS_COLLATION_ENUM_FIELDS_TESTS + + DISTINCT_TYPE_STRICTNESS_COLLATION_UNKNOWN_FIELDS_TESTS + + DISTINCT_COLLATION_NUMERIC_ORDERING_TESTS + + DISTINCT_COLLATION_ALTERNATE_TESTS + + DISTINCT_COLLATION_MAX_VARIABLE_TESTS + + DISTINCT_COLLATION_BACKWARDS_TESTS + + DISTINCT_COLLATION_CASEFIRST_BEHAVIOR_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_COLLATION_SUBFIELD_TESTS)) +def test_distinct_collation_subfields( + database_client: Any, collection: Any, test: CommandTestCase +) -> None: + """Test distinct command collation sub-field validation and behavior.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_command_errors.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_command_errors.py new file mode 100644 index 00000000..19fc68df --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_command_errors.py @@ -0,0 +1,340 @@ +"""Tests for distinct command validation and structural errors.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any + +import pytest +from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex +from bson.timestamp import Timestamp + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + DISTINCT_TOO_BIG_ERROR, + FAILED_TO_PARSE_ERROR, + INVALID_NAMESPACE_ERROR, + INVALID_OPTIONS_ERROR, + KEY_FIELD_NULL_BYTE_ERROR, + TYPE_MISMATCH_ERROR, + UNRECOGNIZED_COMMAND_FIELD_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + DECIMAL128_NEGATIVE_NAN, + DECIMAL128_ONE_AND_HALF, + FLOAT_INFINITY, + FLOAT_NAN, + FLOAT_NEGATIVE_INFINITY, + FLOAT_NEGATIVE_NAN, + INT32_MAX, +) + +# Property [Query Validation]: query semantics are validated even when the +# collection does not exist; invalid operators produce BAD_VALUE_ERROR. +DISTINCT_QUERY_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "query_invalid_operator_nonexistent_collection", + docs=None, + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"$invalid": 1}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject invalid query operators even on non-existent collections", + ), +] + +# Property [Key Field Null Byte Rejection]: a null byte anywhere in the key +# string produces an error. +DISTINCT_KEY_NULL_BYTE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"key_null_byte_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: {"distinct": ctx.collection, "key": v}, + error_code=KEY_FIELD_NULL_BYTE_ERROR, + msg=f"distinct should reject a key with a null byte {tid}", + ) + for tid, val in [ + ("middle", "x\x00y"), + ("start", "\x00x"), + ("end", "x\x00"), + ("only", "\x00"), + ] +] + +# Property [Collection Name String Validation]: empty string, null bytes, leading +# dots, and dollar signs in the collection name produce InvalidNamespace error. +DISTINCT_COLLNAME_STRING_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"collname_{tid}", + docs=None, + command=lambda ctx, v=val: {"distinct": v, "key": "x"}, + error_code=INVALID_NAMESPACE_ERROR, + msg=f"distinct should reject {desc}", + ) + for tid, val, desc in [ + ("empty_string", "", "empty string as collection name"), + ("null_byte_start", "\x00test", "collection name with null byte at start"), + ("null_byte_middle", "te\x00st", "collection name with null byte in middle"), + ("null_byte_end", "test\x00", "collection name with null byte at end"), + ("leading_dot", ".test", "collection name starting with a dot"), + ("dollar_start", "$test", "collection name with dollar sign at start"), + ("dollar_middle", "te$st", "collection name with dollar sign in middle"), + ("dollar_end", "test$", "collection name with dollar sign at end"), + ] +] + +# Property [Unrecognized Fields]: unrecognized fields in the command document +# produce an IDLUnknownField error; field name matching is case-sensitive. +DISTINCT_UNRECOGNIZED_FIELDS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "unrecognized_unknown_field", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "unknownField": 1, + }, + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + msg="distinct should reject unrecognized fields in the command document", + ), + CommandTestCase( + "unrecognized_case_variant_key", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "Key": "y", + }, + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + msg="distinct should treat case variants of known fields as unrecognized", + ), + CommandTestCase( + "unrecognized_case_variant_query", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "Query": {}, + }, + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + msg="distinct should treat 'Query' as unrecognized (case-sensitive matching)", + ), + CommandTestCase( + "unrecognized_case_variant_hint", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "Hint": {"x": 1}, + }, + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + msg="distinct should treat 'Hint' as unrecognized (case-sensitive matching)", + ), +] + +# Property [WriteConcern Rejection]: writeConcern is not accepted by the distinct +# command. +DISTINCT_WRITE_CONCERN_TESTS: list[CommandTestCase] = [ + *[ + CommandTestCase( + f"writeconcern_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "writeConcern": v, + }, + error_code=INVALID_OPTIONS_ERROR, + msg=f"distinct should reject writeConcern {tid} as unsupported", + ) + for tid, val in [ + ("w_1", {"w": 1}), + ("w_majority", {"w": "majority"}), + ("w_0", {"w": 0}), + ("j_true", {"j": True}), + ("wtimeout", {"wtimeout": 1000}), + ("empty_doc", {}), + ] + ], + CommandTestCase( + "writeconcern_null_accepted", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "writeConcern": None, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should treat writeConcern null as omitted", + ), + *[ + CommandTestCase( + f"writeconcern_type_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "writeConcern": v, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} as writeConcern", + ) + for tid, val in [ + ("string", "majority"), + ("int32", 1), + ("int64", Int64(1)), + ("double", 1.0), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", [1]), + ("objectid", ObjectId("000000000000000000000001")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"data", 0)), + ("regex", Regex("abc", "")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"s": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], +] + +# Property [maxTimeMS Validation Errors]: invalid maxTimeMS values produce +# appropriate errors based on the type of invalidity. +DISTINCT_MAXTIMEMS_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "maxtimems_err_negative", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "maxTimeMS": -1}, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject negative maxTimeMS", + ), + CommandTestCase( + "maxtimems_err_exceeds_int32_max", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "maxTimeMS": INT32_MAX + 1, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject maxTimeMS exceeding the maximum int32 value", + ), + CommandTestCase( + "maxtimems_err_int64_exceeds_int32_max", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "maxTimeMS": Int64(INT32_MAX + 1), + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject Int64 maxTimeMS exceeding the maximum int32 value", + ), + *[ + CommandTestCase( + f"maxtimems_err_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "maxTimeMS": v, + }, + error_code=FAILED_TO_PARSE_ERROR, + msg=f"distinct should reject {tid} as maxTimeMS", + ) + for tid, val in [ + ("fractional", 1.5), + ("decimal128_fractional", DECIMAL128_ONE_AND_HALF), + ("nan", FLOAT_NAN), + ("neg_nan", FLOAT_NEGATIVE_NAN), + ("decimal128_nan", DECIMAL128_NAN), + ("decimal128_neg_nan", DECIMAL128_NEGATIVE_NAN), + ("infinity", FLOAT_INFINITY), + ("neg_infinity", FLOAT_NEGATIVE_INFINITY), + ("decimal128_infinity", DECIMAL128_INFINITY), + ("decimal128_neg_infinity", DECIMAL128_NEGATIVE_INFINITY), + ] + ], + *[ + CommandTestCase( + f"maxtimems_err_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "maxTimeMS": v, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} as maxTimeMS", + ) + for tid, val in [ + ("string", "hello"), + ("bool", True), + ("array", [1]), + ("object", {"a": 1}), + ("objectid", ObjectId("000000000000000000000001")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"data", 0)), + ("regex", Regex("abc", "")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"s": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], +] + +# Property [BSON Size Limit]: when the distinct values exceed the maximum BSON +# document size (16MB), the command produces an error. +DISTINCT_BSON_SIZE_LIMIT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "bson_size_limit_exceeded", + docs=[{"_id": i, "x": f"v{i}" + "x" * 17_000} for i in range(1100)], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + error_code=DISTINCT_TOO_BIG_ERROR, + msg="distinct should produce an error when results exceed the 16MB BSON size limit", + ), +] + +DISTINCT_COMMAND_ERROR_TESTS: list[CommandTestCase] = ( + DISTINCT_QUERY_ERROR_TESTS + + DISTINCT_KEY_NULL_BYTE_TESTS + + DISTINCT_COLLNAME_STRING_ERROR_TESTS + + DISTINCT_UNRECOGNIZED_FIELDS_TESTS + + DISTINCT_WRITE_CONCERN_TESTS + + DISTINCT_MAXTIMEMS_ERROR_TESTS + + DISTINCT_BSON_SIZE_LIMIT_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_COMMAND_ERROR_TESTS)) +def test_distinct_command_errors( + database_client: Any, collection: Any, test: CommandTestCase +) -> None: + """Test distinct command error cases.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_deduplication.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_deduplication.py new file mode 100644 index 00000000..c1c32192 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_deduplication.py @@ -0,0 +1,431 @@ +"""Tests for distinct command deduplication behavior.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any + +import pytest +from bson import Binary, Code, Decimal128, Int64, Regex +from bson.timestamp import Timestamp + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import ViewCollection +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_NEGATIVE_INFINITY, + DECIMAL128_NEGATIVE_ZERO, + DECIMAL128_ZERO, + DOUBLE_MAX_SAFE_INTEGER, + DOUBLE_NEGATIVE_ZERO, + DOUBLE_PRECISION_LOSS, + FLOAT_INFINITY, + FLOAT_NAN, + FLOAT_NEGATIVE_INFINITY, + FLOAT_NEGATIVE_NAN, +) + +# Property [Array Unwinding]: when the key field value is an array, each element +# is treated as a separate value for deduplication. +DISTINCT_ARRAY_UNWINDING_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "array_top_level_elements", + docs=[{"_id": 1, "x": [1, 2, 3]}, {"_id": 2, "x": [2, 4]}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [1, 2, 3, 4], "ok": 1.0}, + msg="distinct should treat each array element as a separate value", + ), + CommandTestCase( + "array_nested_preserved", + docs=[{"_id": 1, "x": [1, [1], 1]}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [1, [1]], "ok": 1.0}, + msg="distinct should preserve nested arrays as distinct values", + ), + CommandTestCase( + "array_empty_contributes_nothing", + docs=[{"_id": 1, "x": []}, {"_id": 2, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should extract zero elements from an empty array", + ), + CommandTestCase( + "array_single_level_only", + docs=[{"_id": 1, "x": [[["a"]]]}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [[["a"]]], "ok": 1.0}, + msg="distinct should only unwrap one level of array nesting", + ), + CommandTestCase( + "array_mixed_with_scalar", + docs=[{"_id": 1, "x": [1, 2]}, {"_id": 2, "x": 3}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [1, 2, 3], "ok": 1.0}, + msg="distinct should combine array elements and scalar values", + ), + CommandTestCase( + "array_null_elements_unwound", + docs=[{"_id": 1, "x": [1, None, 2]}, {"_id": 2, "x": [None, 3]}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [None, 1, 2, 3], "ok": 1.0}, + msg="distinct should unwrap null elements from arrays and deduplicate them", + ), + CommandTestCase( + "array_null_element_dedup_with_explicit_null", + docs=[ + {"_id": 1, "x": [1, None]}, + {"_id": 2, "x": None}, + {"_id": 3, "x": [2]}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [None, 1, 2], "ok": 1.0}, + msg="distinct should deduplicate null from array with explicit null field value", + ), +] + +# Property [Value Deduplication]: numeric values with the same mathematical value +# are deduplicated across types, and the first-encountered representation is +# returned. +DISTINCT_VALUE_DEDUP_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "dedup_numeric_across_types", + docs=[ + {"_id": 1, "x": 1}, + {"_id": 2, "x": Int64(1)}, + {"_id": 3, "x": 1.0}, + {"_id": 4, "x": Decimal128("1")}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [1], "ok": 1.0}, + msg="distinct should deduplicate numerically equal values across types", + ), + CommandTestCase( + "dedup_all_zeros", + docs=[ + {"_id": 1, "x": 0}, + {"_id": 2, "x": DOUBLE_NEGATIVE_ZERO}, + {"_id": 3, "x": DECIMAL128_NEGATIVE_ZERO}, + {"_id": 4, "x": DECIMAL128_ZERO}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [0], "ok": 1.0}, + msg="distinct should deduplicate all zero representations to a single value", + ), + CommandTestCase( + "dedup_nan_across_types", + docs=[ + {"_id": 1, "x": FLOAT_NAN}, + {"_id": 2, "x": Decimal128("NaN")}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [pytest.approx(FLOAT_NAN, nan_ok=True)], + "ok": 1.0, + }, + msg="distinct should deduplicate NaN across float and Decimal128", + ), + CommandTestCase( + "dedup_pos_infinity_across_types", + docs=[ + {"_id": 1, "x": FLOAT_INFINITY}, + {"_id": 2, "x": DECIMAL128_INFINITY}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [FLOAT_INFINITY], "ok": 1.0}, + msg="distinct should deduplicate +Infinity across float and Decimal128", + ), + CommandTestCase( + "dedup_neg_infinity_across_types", + docs=[ + {"_id": 1, "x": FLOAT_NEGATIVE_INFINITY}, + {"_id": 2, "x": DECIMAL128_NEGATIVE_INFINITY}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [FLOAT_NEGATIVE_INFINITY], "ok": 1.0}, + msg="distinct should deduplicate -Infinity across float and Decimal128", + ), + CommandTestCase( + "dedup_bool_not_numeric", + docs=[ + {"_id": 1, "x": 0}, + {"_id": 2, "x": 1}, + {"_id": 3, "x": False}, + {"_id": 4, "x": True}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [0, 1, False, True], "ok": 1.0}, + msg="distinct should not deduplicate booleans with their numeric equivalents", + ), + CommandTestCase( + "dedup_decimal128_trailing_zeros", + docs=[ + {"_id": 1, "x": Decimal128("0.1")}, + {"_id": 2, "x": Decimal128("0.10")}, + {"_id": 3, "x": Decimal128("0.100")}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [Decimal128("0.1")], "ok": 1.0}, + msg="distinct should deduplicate Decimal128 values with trailing zeros", + ), + CommandTestCase( + "dedup_decimal128_vs_double_distinct", + docs=[ + {"_id": 1, "x": Decimal128("0.1")}, + {"_id": 2, "x": 0.1}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [Decimal128("0.1"), 0.1], "ok": 1.0}, + msg=( + "distinct should treat Decimal128 and double as distinct" + " when they differ in exact representation" + ), + ), + CommandTestCase( + "dedup_int64_beyond_double_precision", + docs=[ + {"_id": 1, "x": Int64(DOUBLE_PRECISION_LOSS)}, + {"_id": 2, "x": float(DOUBLE_MAX_SAFE_INTEGER)}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [float(DOUBLE_MAX_SAFE_INTEGER), Int64(DOUBLE_PRECISION_LOSS)], + "ok": 1.0, + }, + msg="distinct should compare Int64 at full precision against double", + ), + CommandTestCase( + "dedup_object_key_order_matters", + docs=[ + {"_id": 1, "x": {"a": 1, "b": 2}}, + {"_id": 2, "x": {"b": 2, "a": 1}}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [{"a": 1, "b": 2}, {"b": 2, "a": 1}], + "ok": 1.0, + }, + msg="distinct should treat objects with different key order as distinct", + ), + CommandTestCase( + "dedup_binary_subtype_matters", + docs=[ + {"_id": 1, "x": Binary(b"hello", 0)}, + {"_id": 2, "x": Binary(b"hello", 5)}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [b"hello", Binary(b"hello", 5)], + "ok": 1.0, + }, + msg="distinct should treat same data with different binary subtypes as distinct", + ), + CommandTestCase( + "dedup_timestamp_by_pair", + docs=[ + {"_id": 1, "x": Timestamp(100, 1)}, + {"_id": 2, "x": Timestamp(100, 1)}, + {"_id": 3, "x": Timestamp(100, 2)}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [Timestamp(100, 1), Timestamp(100, 2)], + "ok": 1.0, + }, + msg="distinct should deduplicate Timestamp values by their (time, increment) pair", + ), + CommandTestCase( + "dedup_datetime_millisecond_precision", + docs=[ + {"_id": 1, "x": datetime(2024, 1, 1, 0, 0, 0, 0, tzinfo=timezone.utc)}, + {"_id": 2, "x": datetime(2024, 1, 1, 0, 0, 0, 1000, tzinfo=timezone.utc)}, + {"_id": 3, "x": datetime(2024, 1, 1, 0, 0, 0, 0, tzinfo=timezone.utc)}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [ + datetime(2024, 1, 1, 0, 0, 0, 0, tzinfo=timezone.utc), + datetime(2024, 1, 1, 0, 0, 0, 1000, tzinfo=timezone.utc), + ], + "ok": 1.0, + }, + msg="distinct should preserve millisecond precision for datetime deduplication", + ), + CommandTestCase( + "dedup_first_encountered_type_wins", + docs=[ + {"_id": 1, "x": 5.0}, + {"_id": 2, "x": 5}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [5.0], "ok": 1.0}, + msg="distinct should return the first-encountered type when duplicates exist", + ), + CommandTestCase( + "dedup_decimal128_scientific_notation", + docs=[ + {"_id": 1, "x": Decimal128("1E+3")}, + {"_id": 2, "x": Decimal128("1000")}, + {"_id": 3, "x": 1000}, + {"_id": 4, "x": 1000.0}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [Decimal128("1E+3")], "ok": 1.0}, + msg=( + "distinct should deduplicate Decimal128 scientific notation" + " with equivalent integer and double values" + ), + ), + CommandTestCase( + "dedup_nan_all_variants", + docs=[ + {"_id": 1, "x": FLOAT_NAN}, + {"_id": 2, "x": FLOAT_NEGATIVE_NAN}, + {"_id": 3, "x": Decimal128("NaN")}, + {"_id": 4, "x": Decimal128("-NaN")}, + {"_id": 5, "x": Decimal128("sNaN")}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [pytest.approx(FLOAT_NAN, nan_ok=True)], + "ok": 1.0, + }, + msg="distinct should deduplicate all NaN variants (NaN, -NaN, sNaN) to one value", + ), + CommandTestCase( + "dedup_regex_flags_matter", + docs=[ + {"_id": 1, "x": Regex("abc", "i")}, + {"_id": 2, "x": Regex("abc", "")}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [Regex("abc", ""), Regex("abc", "i")], + "ok": 1.0, + }, + msg="distinct should treat regex values with different flags as distinct", + ), + CommandTestCase( + "dedup_regex_empty_flags_equals_no_flags", + docs=[ + {"_id": 1, "x": Regex("abc", "")}, + {"_id": 2, "x": Regex("abc")}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [Regex("abc", "")], "ok": 1.0}, + msg="distinct should deduplicate regex with empty flags and regex with no flags", + ), + CommandTestCase( + "dedup_code_vs_code_with_scope", + docs=[ + {"_id": 1, "x": Code("function()")}, + {"_id": 2, "x": Code("function()", {"s": 1})}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [Code("function()"), Code("function()", {"s": 1})], + "ok": 1.0, + }, + msg="distinct should treat Code and CodeWithScope as distinct types", + ), + CommandTestCase( + "dedup_code_with_scope_different_scopes", + docs=[ + {"_id": 1, "x": Code("function()", {"x": 1})}, + {"_id": 2, "x": Code("function()", {"x": 2})}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [ + Code("function()", {"x": 1}), + Code("function()", {"x": 2}), + ], + "ok": 1.0, + }, + msg="distinct should treat CodeWithScope values with different scopes as distinct", + ), +] + +# Property [Unicode Deduplication]: precomposed and combining Unicode characters +# are distinct under binary comparison but collapsed under ICU collation. +DISTINCT_UNICODE_DEDUP_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "dedup_unicode_binary_distinct", + # U+00E9 (precomposed) vs U+0065 U+0301 (combining). + docs=[{"_id": 1, "x": "\u00e9"}, {"_id": 2, "x": "e\u0301"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": ["e\u0301", "\u00e9"], "ok": 1.0}, + msg=( + "distinct should treat precomposed and combining characters" + " as distinct under binary comparison" + ), + ), + CommandTestCase( + "dedup_unicode_icu_collapsed", + # U+00E9 (precomposed) vs U+0065 U+0301 (combining). + docs=[{"_id": 1, "x": "\u00e9"}, {"_id": 2, "x": "e\u0301"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 1}, + }, + expected={"values": ["\u00e9"], "ok": 1.0}, + msg="distinct should collapse precomposed and combining characters under ICU collation", + ), + CommandTestCase( + "dedup_unicode_simple_locale_distinct", + # U+00E9 (precomposed) vs U+0065 U+0301 (combining). + docs=[{"_id": 1, "x": "\u00e9"}, {"_id": 2, "x": "e\u0301"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "simple"}, + }, + expected={"values": ["e\u0301", "\u00e9"], "ok": 1.0}, + msg="distinct should preserve binary distinction with locale=simple", + ), +] + +# Property [Array Unwinding on Views]: array unwinding behavior is identical +# for collections and views. +DISTINCT_ARRAY_UNWINDING_VIEW_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "array_unwinding_view", + target_collection=ViewCollection(), + docs=[{"_id": 1, "x": [1, 2, 3]}, {"_id": 2, "x": [2, 4]}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [1, 2, 3, 4], "ok": 1}, + ignore_order_in=["values"], + msg="distinct should unwrap arrays identically on views", + ), +] + +DISTINCT_DEDUPLICATION_TESTS: list[CommandTestCase] = ( + DISTINCT_ARRAY_UNWINDING_TESTS + + DISTINCT_VALUE_DEDUP_TESTS + + DISTINCT_UNICODE_DEDUP_TESTS + + DISTINCT_ARRAY_UNWINDING_VIEW_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_DEDUPLICATION_TESTS)) +def test_distinct_deduplication( + database_client: Any, collection: Any, test: CommandTestCase +) -> None: + """Test distinct deduplication cases.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_hint.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_hint.py new file mode 100644 index 00000000..b70688f1 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_hint.py @@ -0,0 +1,292 @@ +"""Tests for distinct command hint parameter behavior.""" + +from __future__ import annotations + +from typing import Any + +import pytest +from bson import Decimal128, Int64 +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import ViewCollection + +# Property [Hint Success]: valid hint values are accepted and influence index +# selection for the distinct command. +DISTINCT_HINT_SUCCESS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "hint_string_matches_index_name", + indexes=[IndexModel([("x", 1)], name="x_1")], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": "x_1"}, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept a string hint that exactly matches an index name", + ), + CommandTestCase( + "hint_doc_matches_key_pattern", + indexes=[IndexModel([("x", 1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": {"x": 1}}, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept a document hint matching the index key pattern", + ), + CommandTestCase( + "hint_doc_direction_int64", + indexes=[IndexModel([("x", 1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": Int64(1)}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept Int64(1) as a direction value in document hint", + ), + CommandTestCase( + "hint_doc_direction_double", + indexes=[IndexModel([("x", 1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": 1.0}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept double 1.0 as a direction value in document hint", + ), + CommandTestCase( + "hint_doc_direction_decimal128", + indexes=[IndexModel([("x", 1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": Decimal128("1")}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept Decimal128('1') as a direction value in document hint", + ), + CommandTestCase( + "hint_doc_direction_neg1_int64", + indexes=[IndexModel([("x", -1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": Int64(-1)}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept Int64(-1) as a direction value in document hint", + ), + CommandTestCase( + "hint_doc_direction_neg1_double", + indexes=[IndexModel([("x", -1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": -1.0}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept double -1.0 as a direction value in document hint", + ), + CommandTestCase( + "hint_doc_direction_neg1_decimal128", + indexes=[IndexModel([("x", -1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": Decimal128("-1")}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept Decimal128('-1') as a direction value in document hint", + ), + CommandTestCase( + "hint_doc_direction_neg1_int32", + indexes=[IndexModel([("x", -1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": -1}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept int32 -1 as a direction value in document hint", + ), + CommandTestCase( + "hint_natural_forward", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"$natural": 1}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept $natural: 1 for forward collection scan", + ), + CommandTestCase( + "hint_natural_backward", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"$natural": -1}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept $natural: -1 for backward collection scan", + ), + CommandTestCase( + "hint_empty_doc", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": {}}, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should treat empty document hint as no hint", + ), + CommandTestCase( + "hint_nonexistent_collection_string", + docs=None, + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": "any_index_name", + }, + expected={"values": [], "ok": 1.0}, + msg="distinct should skip hint validation for non-existent collections (string hint)", + ), + CommandTestCase( + "hint_nonexistent_collection_doc", + docs=None, + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"any_field": 1}, + }, + expected={"values": [], "ok": 1.0}, + msg="distinct should skip hint validation for non-existent collections (doc hint)", + ), + CommandTestCase( + "hint_sparse_index", + indexes=[IndexModel([("y", 1)], sparse=True)], + docs=[ + {"_id": 1, "x": "a", "y": 1}, + {"_id": 2, "x": "b"}, + {"_id": 3, "x": "c", "y": 3}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": {"y": 1}}, + expected={"values": ["a", "c"], "ok": 1.0}, + msg=( + "distinct with sparse index hint should return only" + " documents that have the indexed field" + ), + ), + CommandTestCase( + "hint_partial_index", + indexes=[ + IndexModel( + [("x", 1)], + partialFilterExpression={"status": "active"}, + ) + ], + docs=[ + {"_id": 1, "x": "a", "status": "active"}, + {"_id": 2, "x": "b", "status": "inactive"}, + {"_id": 3, "x": "c", "status": "active"}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": {"x": 1}}, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct with partial index hint should return only documents matching the filter", + ), + CommandTestCase( + "hint_compound_index_by_name", + indexes=[IndexModel([("x", 1), ("y", 1)], name="x_1_y_1")], + docs=[{"_id": 1, "x": "a", "y": 1}, {"_id": 2, "x": "b", "y": 2}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": "x_1_y_1"}, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept a compound index hint by name", + ), + CommandTestCase( + "hint_compound_index_by_pattern", + indexes=[IndexModel([("x", 1), ("y", 1)])], + docs=[{"_id": 1, "x": "a", "y": 1}, {"_id": 2, "x": "b", "y": 2}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": 1, "y": 1}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept a compound index hint by key pattern", + ), + CommandTestCase( + "hint_id_index_by_name", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": "_id_"}, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept the default _id index hint by name", + ), + CommandTestCase( + "hint_id_index_by_pattern", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"_id": 1}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should accept the default _id index hint by key pattern", + ), + CommandTestCase( + "hint_non_collation_compatible_index", + indexes=[IndexModel([("x", 1)], collation={"locale": "fr"})], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "A"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": 1}, + "collation": {"locale": "en", "strength": 1}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg=( + "distinct should accept hint referencing a non-collation-compatible" + " index when collation is specified" + ), + ), +] + +# Property [Hint Accepted on Views]: hint is accepted on views without error. +DISTINCT_HINT_VIEW_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "hint_accepted_on_view", + target_collection=ViewCollection(), + indexes=[IndexModel([("x", 1)])], + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": {"x": 1}}, + expected={"values": sorted(["a", "b"]), "ok": 1}, + ignore_order_in=["values"], + msg="distinct should accept hint on views without error", + ), +] + +DISTINCT_HINT_TESTS: list[CommandTestCase] = DISTINCT_HINT_SUCCESS_TESTS + DISTINCT_HINT_VIEW_TESTS + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_HINT_TESTS)) +def test_distinct_hint(database_client: Any, collection: Any, test: CommandTestCase) -> None: + """Test distinct hint cases.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_key_field.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_key_field.py new file mode 100644 index 00000000..2861fbd2 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_key_field.py @@ -0,0 +1,307 @@ +"""Tests for distinct command key field behavior.""" + +from __future__ import annotations + +from functools import reduce +from typing import Any + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Null Field Value]: when a document has an explicit null value for the +# key field, null appears in the distinct values; missing fields are silently skipped. +DISTINCT_NULL_FIELD_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "null_explicit_null_included", + docs=[{"_id": 1, "x": None}, {"_id": 2, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [None, "a"], "ok": 1.0}, + msg="distinct should include explicit null in results", + ), + CommandTestCase( + "null_missing_field_skipped", + docs=[{"_id": 1, "y": "a"}, {"_id": 2, "y": "b"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty when all documents are missing the key field", + ), + CommandTestCase( + "null_missing_does_not_contribute_null", + docs=[{"_id": 1}, {"_id": 2, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should not add null for documents missing the key field", + ), + CommandTestCase( + "null_explicit_null_deduplicated", + docs=[{"_id": 1, "x": None}, {"_id": 2, "x": None}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [None], "ok": 1.0}, + msg="distinct should deduplicate multiple explicit null values", + ), + CommandTestCase( + "null_mixed_null_and_missing", + docs=[{"_id": 1, "x": None}, {"_id": 2}, {"_id": 3, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [None, "a"], "ok": 1.0}, + msg="distinct should include explicit null but skip missing fields", + ), +] + +# Property [Dot Notation and Field Path Traversal]: the key parameter supports +# dot notation to traverse nested document structures. +DISTINCT_DOT_NOTATION_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "dot_embedded_document", + docs=[ + {"_id": 1, "item": {"sku": "abc"}}, + {"_id": 2, "item": {"sku": "def"}}, + {"_id": 3, "item": {"sku": "abc"}}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "item.sku"}, + expected={"values": ["abc", "def"], "ok": 1.0}, + msg="distinct should access fields within embedded documents via dot notation", + ), + CommandTestCase( + "dot_numeric_array_index", + docs=[ + {"_id": 1, "temps": [{"value": 10}, {"value": 20}]}, + {"_id": 2, "temps": [{"value": 30}, {"value": 40}]}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "temps.1.value"}, + expected={"values": [20, 40], "ok": 1.0}, + msg="distinct should use numeric path components to address array positions", + ), + CommandTestCase( + "dot_descend_into_array_of_objects", + docs=[ + {"_id": 1, "items": [{"name": "a"}, {"name": "b"}]}, + {"_id": 2, "items": [{"name": "b"}, {"name": "c"}]}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "items.name"}, + expected={"values": ["a", "b", "c"], "ok": 1.0}, + msg=( + "distinct should descend into array elements to extract" + " nested fields from each object" + ), + ), + CommandTestCase( + "dot_multi_level_array_traversal", + docs=[ + {"_id": 1, "a": [{"b": [{"c": 1}, {"c": 2}]}, {"b": [{"c": 3}]}]}, + {"_id": 2, "a": [{"b": [{"c": 2}, {"c": 4}]}]}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "a.b.c"}, + expected={"values": [1, 2, 3, 4], "ok": 1.0}, + msg="distinct should traverse multiple levels of nested arrays", + ), + CommandTestCase( + "dot_leading_dot_empty", + docs=[{"_id": 1, "x": "hello"}], + command=lambda ctx: {"distinct": ctx.collection, "key": ".x"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty results for a key with a leading dot", + ), + CommandTestCase( + "dot_trailing_dot_empty", + docs=[{"_id": 1, "x": "hello"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x."}, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty results for a key with a trailing dot", + ), + CommandTestCase( + "dot_consecutive_dots_empty", + docs=[{"_id": 1, "x": {"y": "hello"}}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x..y"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty results for a key with consecutive dots", + ), + CommandTestCase( + "dot_negative_numeric_empty", + docs=[{"_id": 1, "arr": ["a", "b", "c"]}], + command=lambda ctx: {"distinct": ctx.collection, "key": "arr.-1"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty results for negative numeric path components", + ), + CommandTestCase( + "dot_out_of_bounds_empty", + docs=[{"_id": 1, "arr": ["a", "b", "c"]}], + command=lambda ctx: {"distinct": ctx.collection, "key": "arr.99"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty results for out-of-bounds numeric path components", + ), + CommandTestCase( + "dot_beyond_int32_literal_field_name", + docs=[{"_id": 1, "data": {"2147483648": "found"}}], + command=lambda ctx: {"distinct": ctx.collection, "key": "data.2147483648"}, + expected={"values": ["found"], "ok": 1.0}, + msg="distinct should treat numeric components beyond int32 range as literal field names", + ), + CommandTestCase( + "dot_deeply_nested_accepted", + docs=[{"_id": 1, **reduce(lambda inner, _: {"n": inner}, range(100), {"val": "deep"})}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": ".".join(["n"] * 100 + ["val"]), + }, + expected={"values": ["deep"], "ok": 1.0}, + msg="distinct should accept deeply nested paths with 100+ segments without error", + ), + CommandTestCase( + "dot_mixed_object_and_array_at_path", + docs=[ + {"_id": 1, "x": {"y": "from_obj"}}, + {"_id": 2, "x": [{"y": "from_arr1"}, {"y": "from_arr2"}]}, + {"_id": 3, "x": "scalar"}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x.y"}, + expected={"values": ["from_arr1", "from_arr2", "from_obj"], "ok": 1.0}, + msg=( + "distinct should traverse both objects and arrays at the same path" + " across different documents" + ), + ), + CommandTestCase( + "dot_numeric_on_mixed_object_and_array", + docs=[ + {"_id": 1, "data": ["arr_zero", "arr_one"]}, + {"_id": 2, "data": {"0": "obj_zero", "1": "obj_one"}}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "data.0"}, + expected={"values": ["arr_zero", "obj_zero"], "ok": 1.0}, + msg=( + "distinct should match numeric path component as both array index" + " and literal field name across documents" + ), + ), +] + +# Property [Key Field Special Characters]: dollar signs, whitespace, Unicode +# characters, and empty string are treated as literal field name characters in +# the key parameter. +DISTINCT_SPECIAL_CHARS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "special_dollar_in_key", + docs=[{"_id": 1, "$price": 9.99}], + command=lambda ctx: {"distinct": ctx.collection, "key": "$price"}, + expected={"values": [9.99], "ok": 1.0}, + msg="distinct should treat dollar sign in key as a literal field name character", + ), + CommandTestCase( + "special_dollar_only", + docs=[{"_id": 1, "x": "hello"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "$"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should succeed with $ as entire key (returns empty if no matching field)", + ), + CommandTestCase( + "special_double_dollar", + docs=[{"_id": 1, "x": "hello"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "$$"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should succeed with $$ as entire key (returns empty if no matching field)", + ), + CommandTestCase( + "special_space_in_key", + docs=[{"_id": 1, "my field": "space_value"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "my field"}, + expected={"values": ["space_value"], "ok": 1.0}, + msg="distinct should accept space characters in key field names", + ), + CommandTestCase( + "special_tab_in_key", + docs=[{"_id": 1, "tab\tfield": "tab_value"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "tab\tfield"}, + expected={"values": ["tab_value"], "ok": 1.0}, + msg="distinct should accept tab characters in key field names", + ), + CommandTestCase( + "special_newline_in_key", + docs=[{"_id": 1, "new\nline": "newline_value"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "new\nline"}, + expected={"values": ["newline_value"], "ok": 1.0}, + msg="distinct should accept newline characters in key field names", + ), + CommandTestCase( + "special_cjk_in_key", + # CJK Unified Ideographs. + docs=[{"_id": 1, "\u65e5\u672c\u8a9e": "cjk_value"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "\u65e5\u672c\u8a9e"}, + expected={"values": ["cjk_value"], "ok": 1.0}, + msg="distinct should accept CJK characters in key field names", + ), + CommandTestCase( + "special_emoji_in_key", + docs=[{"_id": 1, "\U0001f389": "emoji_value"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "\U0001f389"}, + expected={"values": ["emoji_value"], "ok": 1.0}, + msg="distinct should accept emoji characters in key field names", + ), + CommandTestCase( + "special_combining_mark_in_key", + # U+0065 U+0301 (e + combining acute accent). + docs=[{"_id": 1, "e\u0301": "combining_value"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "e\u0301"}, + expected={"values": ["combining_value"], "ok": 1.0}, + msg="distinct should accept combining mark characters in key field names", + ), + CommandTestCase( + "special_empty_string_key", + docs=[{"_id": 1, "": "empty_key_value"}], + command=lambda ctx: {"distinct": ctx.collection, "key": ""}, + expected={"values": ["empty_key_value"], "ok": 1.0}, + msg='distinct should match documents with a field literally named ""', + ), +] + +# Property [Distinct on _id Field]: the _id field can be used as the key +# parameter and returns the distinct _id values. +DISTINCT_ID_FIELD_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "id_field_as_key", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}, {"_id": 3, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "_id"}, + expected={"values": [1, 2, 3], "ok": 1.0}, + msg="distinct should return all _id values when key is '_id'", + ), + CommandTestCase( + "id_field_dot_notation", + docs=[ + {"_id": {"a": 1, "b": 2}, "x": "hello"}, + {"_id": {"a": 1, "b": 3}, "x": "world"}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "_id.a"}, + expected={"values": [1], "ok": 1.0}, + msg="distinct should support dot notation into compound _id fields", + ), +] + +DISTINCT_KEY_FIELD_TESTS: list[CommandTestCase] = ( + DISTINCT_NULL_FIELD_TESTS + + DISTINCT_DOT_NOTATION_TESTS + + DISTINCT_SPECIAL_CHARS_TESTS + + DISTINCT_ID_FIELD_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_KEY_FIELD_TESTS)) +def test_distinct_key_field(database_client: Any, collection: Any, test: CommandTestCase) -> None: + """Test distinct key field cases.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_parameters.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_parameters.py new file mode 100644 index 00000000..61428da6 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_parameters.py @@ -0,0 +1,365 @@ +"""Tests for distinct command parameter acceptance behavior.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any + +import pytest +from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex +from bson.timestamp import Timestamp + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import NAMESPACE_NOT_FOUND_ERROR +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.property_checks import Eq, Len, Ne +from documentdb_tests.framework.test_constants import ( + DOUBLE_NEGATIVE_ZERO, + INT32_MAX, +) + +# Property [Query Parameter Behavior]: the query parameter filters which documents +# contribute to distinct values; an empty document matches all. +DISTINCT_QUERY_SUCCESS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "query_filters_documents", + docs=[ + {"_id": 1, "x": "a", "status": "active"}, + {"_id": 2, "x": "b", "status": "inactive"}, + {"_id": 3, "x": "c", "status": "active"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"status": "active"}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should filter documents by the query parameter", + ), + CommandTestCase( + "query_empty_doc_matches_all", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "b"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should treat empty document query as matching all documents", + ), +] + +# Property [Query No Match]: when the query matches no documents on an existing +# collection, distinct returns an empty values array. +DISTINCT_QUERY_NO_MATCH_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "query_no_matching_documents", + docs=[ + {"_id": 1, "x": "a", "y": 1}, + {"_id": 2, "x": "b", "y": 2}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"y": 99}, + }, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty values when query matches no documents", + ), +] + +# Property [Comment Parameter Behavior]: all BSON types are accepted as the +# comment value without error, and the comment does not affect command results. +DISTINCT_COMMENT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"comment_{tid}", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "comment": v, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg=f"distinct should accept {tid} as comment without affecting results", + ) + for tid, val in [ + ("string", "a string comment"), + ("int32", 42), + ("int64", Int64(123456789)), + ("double", 3.14), + ("decimal128", Decimal128("9.99")), + ("bool", True), + ("array", [1, "two", 3]), + ("object", {"reason": "testing"}), + ("objectid", ObjectId("000000000000000000000001")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(100, 1)), + ("binary", Binary(b"data", 0)), + ("regex", Regex("pattern", "i")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"s": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + +# Property [ReadConcern Success]: readConcern accepts "local", "available", and +# "majority" levels, as well as an empty object or provenance-only without a level. +DISTINCT_READCONCERN_SUCCESS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"readconcern_{tid}", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "readConcern": v, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg=f"distinct should accept readConcern {tid}", + ) + for tid, val in [ + ("local", {"level": "local"}), + ("available", {"level": "available"}), + ("majority", {"level": "majority"}), + ("empty_object", {}), + ("provenance_only", {"provenance": "clientSupplied"}), + ] +] + +# Property [maxTimeMS Acceptance]: maxTimeMS accepts 0, positive integers up to +# INT32_MAX, whole-number floats, Decimal128 integers, and -0.0. +DISTINCT_MAXTIMEMS_ACCEPTANCE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"maxtimems_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "maxTimeMS": v, + }, + expected={"values": ["a"], "ok": 1.0}, + msg=f"distinct should accept {tid} as maxTimeMS", + ) + for tid, val in [ + ("zero", 0), + ("positive_int", 1000), + ("int32_max", INT32_MAX), + ("int64_int32_max", Int64(INT32_MAX)), + ("whole_number_float", 500.0), + ("decimal128_integer", Decimal128("100")), + ("negative_zero", DOUBLE_NEGATIVE_ZERO), + ("decimal128_neg_zero_exponent", Decimal128("-0E+10")), + ] +] + +# Property [Timestamp Zero Replacement]: Timestamp(0, 0) is replaced by the server +# on insert; the stored values participate in deduplication, not the literal (0, 0). +DISTINCT_TIMESTAMP_ZERO_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "dedup_timestamp_zero_replaced", + docs=[{"_id": 1, "x": Timestamp(0, 0)}, {"_id": 2, "x": Timestamp(0, 0)}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": Len(2), + "values.0": Ne(Timestamp(0, 0)), + "values.1": Ne(Timestamp(0, 0)), + "ok": Eq(1.0), + }, + msg=( + "distinct should return server-assigned timestamps for Timestamp(0, 0)," + " not deduplicate them as identical" + ), + ), +] + +# Property [Collection Name Acceptance]: non-existent collection names with special +# characters, Unicode, number-like strings, and long names succeed with empty results. +DISTINCT_COLLECTION_NAME_ACCEPTANCE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collname_nonexistent", + docs=None, + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should succeed with empty results for a non-existent collection", + ), + CommandTestCase( + "collname_space", + docs=None, + command=lambda ctx: {"distinct": f"{ctx.collection} space", "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should accept space characters in collection names", + ), + CommandTestCase( + "collname_punctuation", + docs=None, + command=lambda ctx: {"distinct": f"{ctx.collection}!@#%^&*()", "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should accept punctuation characters in collection names", + ), + CommandTestCase( + "collname_control_chars", + docs=None, + command=lambda ctx: {"distinct": f"{ctx.collection}\x01\x02\x03", "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should accept control characters in collection names", + ), + CommandTestCase( + "collname_zero_width_space", + # U+200B zero-width space. + docs=None, + command=lambda ctx: {"distinct": f"{ctx.collection}\u200b", "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should accept zero-width space in collection names", + ), + CommandTestCase( + "collname_emoji", + docs=None, + command=lambda ctx: {"distinct": f"{ctx.collection}\U0001f389", "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should accept emoji characters in collection names", + ), + CommandTestCase( + "collname_tab", + docs=None, + command=lambda ctx: {"distinct": f"{ctx.collection}\t", "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should accept tab characters in collection names", + ), + CommandTestCase( + "collname_newline", + docs=None, + command=lambda ctx: {"distinct": f"{ctx.collection}\n", "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should accept newline characters in collection names", + ), + CommandTestCase( + "collname_number_zero", + docs=None, + command=lambda ctx: {"distinct": "0", "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg='distinct should accept "0" as collection name without coercion', + ), + CommandTestCase( + "collname_number_nan", + docs=None, + command=lambda ctx: {"distinct": "NaN", "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg='distinct should accept "NaN" as collection name without coercion', + ), + CommandTestCase( + "collname_number_infinity", + docs=None, + command=lambda ctx: {"distinct": "Infinity", "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg='distinct should accept "Infinity" as collection name without coercion', + ), + CommandTestCase( + "collname_number_true", + docs=None, + command=lambda ctx: {"distinct": "true", "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg='distinct should accept "true" as collection name without coercion', + ), + CommandTestCase( + "collname_number_null", + docs=None, + command=lambda ctx: {"distinct": "null", "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg='distinct should accept "null" as collection name without coercion', + ), + CommandTestCase( + "collname_long_name", + docs=None, + command=lambda ctx: {"distinct": "a" * 10_000, "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should accept very long collection names without error", + ), +] + +# Property [Collection Name UUID Resolution]: Binary subtype 4 (UUID) as the +# distinct field triggers UUID-based collection resolution, producing a namespace +# not found error when the UUID does not match any collection. +DISTINCT_COLLECTION_NAME_UUID_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collname_uuid_binary_error", + docs=None, + command=lambda ctx: { + "distinct": Binary(b"\x00" * 16, 4), + "key": "x", + }, + error_code=NAMESPACE_NOT_FOUND_ERROR, + msg=( + "distinct should trigger UUID-based resolution for Binary subtype 4," + " producing a namespace not found error when UUID does not match" + ), + ), +] + +# Property [Collection Name UUID Success]: Binary subtype 4 (UUID) as the distinct +# field triggers UUID-based collection resolution; when the UUID matches an existing +# collection, the command succeeds. +DISTINCT_COLLECTION_NAME_UUID_SUCCESS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collname_uuid_success", + docs=[{"_id": 1, "x": "found"}], + command=lambda ctx: {"distinct": ctx.uuids[ctx.collection], "key": "x"}, + expected={"values": ["found"], "ok": 1.0}, + msg="distinct should succeed when Binary subtype 4 (UUID) matches an existing collection", + ), +] + +# Property [Null Optional Parameters]: when optional parameters (query, collation, +# readConcern, comment, maxTimeMS) are null, they are treated as omitted. +DISTINCT_NULL_PARAMS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"null_{tid}_param", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx, p=param: {"distinct": ctx.collection, "key": "x", p: None}, + expected={"values": ["a", "b"], "ok": 1.0}, + msg=f"distinct should treat {param}=null as omitted", + ) + for tid, param in [ + ("query", "query"), + ("collation", "collation"), + ("read_concern", "readConcern"), + ("comment", "comment"), + ("max_time_ms", "maxTimeMS"), + ] +] + +DISTINCT_PARAMETER_TESTS: list[CommandTestCase] = ( + DISTINCT_NULL_PARAMS_TESTS + + DISTINCT_QUERY_SUCCESS_TESTS + + DISTINCT_QUERY_NO_MATCH_TESTS + + DISTINCT_COMMENT_TESTS + + DISTINCT_READCONCERN_SUCCESS_TESTS + + DISTINCT_MAXTIMEMS_ACCEPTANCE_TESTS + + DISTINCT_TIMESTAMP_ZERO_TESTS + + DISTINCT_COLLECTION_NAME_ACCEPTANCE_TESTS + + DISTINCT_COLLECTION_NAME_UUID_TESTS + + DISTINCT_COLLECTION_NAME_UUID_SUCCESS_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_PARAMETER_TESTS)) +def test_distinct_parameters(database_client: Any, collection: Any, test: CommandTestCase) -> None: + """Test distinct parameter acceptance cases.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_query_operators.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_query_operators.py new file mode 100644 index 00000000..170dd7b6 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_query_operators.py @@ -0,0 +1,539 @@ +"""Representative query operator wiring tests for the distinct command. + +One test per operator category confirms the distinct command's query parameter +is correctly wired to the query engine. Exhaustive operator behavior is +tested in core/operator/query/. +""" + +from __future__ import annotations + +from typing import Any + +import pytest +from bson import Int64 +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Query Operator Wiring]: the distinct command's query parameter supports +# comparison, logical, array, element, evaluation, and bitwise operators. +DISTINCT_QUERY_OPERATOR_TESTS: list[CommandTestCase] = [ + # Comparison operators. + CommandTestCase( + "query_eq", + docs=[{"_id": 1, "x": "a", "n": 1}, {"_id": 2, "x": "b", "n": 2}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$eq": 1}}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should support $eq in query", + ), + CommandTestCase( + "query_ne", + docs=[{"_id": 1, "x": "a", "n": 1}, {"_id": 2, "x": "b", "n": 2}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$ne": 1}}, + }, + expected={"values": ["b"], "ok": 1.0}, + msg="distinct should support $ne in query", + ), + CommandTestCase( + "query_gt", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 5}, + {"_id": 3, "x": "c", "n": 10}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$gt": 4}}, + }, + expected={"values": ["b", "c"], "ok": 1.0}, + msg="distinct should support $gt in query", + ), + CommandTestCase( + "query_gte", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 5}, + {"_id": 3, "x": "c", "n": 10}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$gte": 5}}, + }, + expected={"values": ["b", "c"], "ok": 1.0}, + msg="distinct should support $gte in query", + ), + CommandTestCase( + "query_lt", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 5}, + {"_id": 3, "x": "c", "n": 10}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$lt": 5}}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should support $lt in query", + ), + CommandTestCase( + "query_lte", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 5}, + {"_id": 3, "x": "c", "n": 10}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$lte": 5}}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should support $lte in query", + ), + CommandTestCase( + "query_in", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 2}, + {"_id": 3, "x": "c", "n": 3}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$in": [1, 3]}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $in in query", + ), + CommandTestCase( + "query_nin", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 2}, + {"_id": 3, "x": "c", "n": 3}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$nin": [1, 3]}}, + }, + expected={"values": ["b"], "ok": 1.0}, + msg="distinct should support $nin in query", + ), + # Logical operators. + CommandTestCase( + "query_and", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 5}, + {"_id": 3, "x": "c", "n": 10}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"$and": [{"n": {"$gt": 1}}, {"n": {"$lt": 10}}]}, + }, + expected={"values": ["b"], "ok": 1.0}, + msg="distinct should support $and in query", + ), + CommandTestCase( + "query_or", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 5}, + {"_id": 3, "x": "c", "n": 10}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"$or": [{"n": 1}, {"n": 10}]}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $or in query", + ), + CommandTestCase( + "query_nor", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 5}, + {"_id": 3, "x": "c", "n": 10}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"$nor": [{"n": 1}, {"n": 10}]}, + }, + expected={"values": ["b"], "ok": 1.0}, + msg="distinct should support $nor in query", + ), + CommandTestCase( + "query_not", + docs=[ + {"_id": 1, "x": "a", "n": 1}, + {"_id": 2, "x": "b", "n": 5}, + {"_id": 3, "x": "c", "n": 10}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$not": {"$gt": 5}}}, + }, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should support $not in query", + ), + # Element operators. + CommandTestCase( + "query_exists", + docs=[ + {"_id": 1, "x": "a", "opt": "yes"}, + {"_id": 2, "x": "b"}, + {"_id": 3, "x": "c", "opt": "no"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"opt": {"$exists": True}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $exists in query", + ), + CommandTestCase( + "query_type", + docs=[ + {"_id": 1, "x": "a", "v": 1}, + {"_id": 2, "x": "b", "v": "str"}, + {"_id": 3, "x": "c", "v": 3}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"v": {"$type": "string"}}, + }, + expected={"values": ["b"], "ok": 1.0}, + msg="distinct should support $type in query", + ), + # Array operators. + CommandTestCase( + "query_all", + docs=[ + {"_id": 1, "x": "a", "tags": ["red", "blue"]}, + {"_id": 2, "x": "b", "tags": ["green"]}, + {"_id": 3, "x": "c", "tags": ["red", "green"]}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"tags": {"$all": ["red"]}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $all in query", + ), + CommandTestCase( + "query_elemMatch", + docs=[ + {"_id": 1, "x": "a", "scores": [{"v": 80}, {"v": 90}]}, + {"_id": 2, "x": "b", "scores": [{"v": 60}, {"v": 70}]}, + {"_id": 3, "x": "c", "scores": [{"v": 95}]}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"scores": {"$elemMatch": {"v": {"$gte": 90}}}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $elemMatch in query", + ), + CommandTestCase( + "query_size", + docs=[ + {"_id": 1, "x": "a", "tags": ["red", "blue"]}, + {"_id": 2, "x": "b", "tags": ["green"]}, + {"_id": 3, "x": "c", "tags": ["red", "green"]}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"tags": {"$size": 1}}, + }, + expected={"values": ["b"], "ok": 1.0}, + msg="distinct should support $size in query", + ), + # Evaluation operators. + CommandTestCase( + "query_regex", + docs=[ + {"_id": 1, "x": "a", "name": "apple"}, + {"_id": 2, "x": "b", "name": "banana"}, + {"_id": 3, "x": "c", "name": "apricot"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"name": {"$regex": "^ap"}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $regex in query", + ), + CommandTestCase( + "query_mod", + docs=[ + {"_id": 1, "x": "a", "n": 10}, + {"_id": 2, "x": "b", "n": 15}, + {"_id": 3, "x": "c", "n": 20}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$mod": [10, 0]}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $mod in query", + ), + CommandTestCase( + "query_expr", + docs=[ + {"_id": 1, "x": "a", "a": 5, "b": 3}, + {"_id": 2, "x": "b", "a": 2, "b": 7}, + {"_id": 3, "x": "c", "a": 10, "b": 1}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"$expr": {"$gt": ["$a", "$b"]}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $expr in query", + ), + # Bitwise operators. + CommandTestCase( + "query_bitsAllSet", + docs=[ + {"_id": 1, "x": "a", "flags": 7}, + {"_id": 2, "x": "b", "flags": 3}, + {"_id": 3, "x": "c", "flags": 5}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"flags": {"$bitsAllSet": 5}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $bitsAllSet in query", + ), + CommandTestCase( + "query_bitsAllClear", + docs=[ + {"_id": 1, "x": "a", "flags": 7}, + {"_id": 2, "x": "b", "flags": 3}, + {"_id": 3, "x": "c", "flags": 0}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"flags": {"$bitsAllClear": 4}}, + }, + expected={"values": ["b", "c"], "ok": 1.0}, + msg="distinct should support $bitsAllClear in query", + ), + CommandTestCase( + "query_bitsAnySet", + docs=[ + {"_id": 1, "x": "a", "flags": 7}, + {"_id": 2, "x": "b", "flags": 0}, + {"_id": 3, "x": "c", "flags": 4}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"flags": {"$bitsAnySet": 4}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $bitsAnySet in query", + ), + CommandTestCase( + "query_bitsAnyClear", + docs=[ + {"_id": 1, "x": "a", "flags": 7}, + {"_id": 2, "x": "b", "flags": 3}, + {"_id": 3, "x": "c", "flags": 5}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"flags": {"$bitsAnyClear": 6}}, + }, + expected={"values": ["b", "c"], "ok": 1.0}, + msg="distinct should support $bitsAnyClear in query", + ), + # Geospatial operators. + CommandTestCase( + "query_geoWithin", + indexes=[IndexModel([("loc", "2dsphere")])], + docs=[ + {"_id": 1, "x": "a", "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "x": "b", "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"loc": {"$geoWithin": {"$centerSphere": [[0, 0], 0.5]}}}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should support $geoWithin in query", + ), + CommandTestCase( + "query_geoIntersects", + indexes=[IndexModel([("loc", "2dsphere")])], + docs=[ + {"_id": 1, "x": "a", "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "x": "b", "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": { + "loc": { + "$geoIntersects": { + "$geometry": { + "type": "Polygon", + "coordinates": [[[-1, -1], [1, -1], [1, 1], [-1, 1], [-1, -1]]], + } + } + } + }, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should support $geoIntersects in query", + ), + CommandTestCase( + "query_near", + indexes=[IndexModel([("loc", "2dsphere")])], + docs=[ + {"_id": 1, "x": "a", "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "x": "b", "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": { + "loc": { + "$near": { + "$geometry": {"type": "Point", "coordinates": [0, 0]}, + "$maxDistance": 100000, + } + } + }, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should support $near in query", + ), + CommandTestCase( + "query_nearSphere", + indexes=[IndexModel([("loc", "2dsphere")])], + docs=[ + {"_id": 1, "x": "a", "loc": {"type": "Point", "coordinates": [0, 0]}}, + {"_id": 2, "x": "b", "loc": {"type": "Point", "coordinates": [50, 50]}}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": { + "loc": { + "$nearSphere": { + "$geometry": {"type": "Point", "coordinates": [0, 0]}, + "$maxDistance": 100000, + } + } + }, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should support $nearSphere in query", + ), + # Schema and scripting operators. + CommandTestCase( + "query_jsonSchema", + docs=[ + {"_id": 1, "x": "a", "name": "hello"}, + {"_id": 2, "x": "b", "name": Int64(123)}, + {"_id": 3, "x": "c"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": { + "$jsonSchema": { + "required": ["name"], + "properties": {"name": {"bsonType": "string"}}, + } + }, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should support $jsonSchema in query", + ), + CommandTestCase( + "query_where", + docs=[ + {"_id": 1, "x": "a", "n": 5}, + {"_id": 2, "x": "b", "n": 15}, + {"_id": 3, "x": "c", "n": 25}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"$where": "this.n > 10"}, + }, + expected={"values": ["b", "c"], "ok": 1.0}, + msg="distinct should support $where in query", + ), + # Text search (requires text index). + CommandTestCase( + "query_text", + indexes=[IndexModel([("content", "text")])], + docs=[ + {"_id": 1, "x": "a", "content": "hello world"}, + {"_id": 2, "x": "b", "content": "foo bar"}, + {"_id": 3, "x": "c", "content": "hello foo"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"$text": {"$search": "hello"}}, + }, + expected={"values": ["a", "c"], "ok": 1.0}, + msg="distinct should support $text in query", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_QUERY_OPERATOR_TESTS)) +def test_distinct_query_operators( + database_client: Any, collection: Any, test: CommandTestCase +) -> None: + """Test distinct command query operator wiring.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_readconcern_subfields.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_readconcern_subfields.py new file mode 100644 index 00000000..17073259 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_readconcern_subfields.py @@ -0,0 +1,342 @@ +"""Tests for distinct command readConcern sub-field validation.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any + +import pytest +from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + ILLEGAL_OPERATION_ERROR, + INVALID_OPTIONS_ERROR, + NOT_A_REPLICA_SET_ERROR, + TYPE_MISMATCH_ERROR, + UNRECOGNIZED_COMMAND_FIELD_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [ReadConcern Level Validation]: the readConcern level sub-field +# validates type and value; null is treated as omitted; invalid strings produce +# BadValue; non-string types produce TypeMismatch. +DISTINCT_READCONCERN_LEVEL_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "readconcern_level_null_accepted", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"level": None}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should accept null readConcern level (treated as omitted)", + ), + CommandTestCase( + "readconcern_level_empty_string", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"level": ""}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject empty string for readConcern level", + ), + CommandTestCase( + "readconcern_level_unknown", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"level": "unknown"}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject unknown readConcern level string", + ), + CommandTestCase( + "readconcern_level_wrong_case", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"level": "LOCAL"}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject wrong-case readConcern level string", + ), + CommandTestCase( + "readconcern_linearizable", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"level": "linearizable"}, + }, + error_code=NOT_A_REPLICA_SET_ERROR, + msg="distinct with linearizable readConcern should fail on non-replica-set", + ), + CommandTestCase( + "readconcern_snapshot", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"level": "snapshot"}, + }, + error_code=NOT_A_REPLICA_SET_ERROR, + msg="distinct with snapshot readConcern should fail on non-replica-set", + ), + *[ + CommandTestCase( + f"readconcern_level_type_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"level": v}, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} for readConcern level sub-field", + ) + for tid, val in [ + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", ["local"]), + ("object", {"a": 1}), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"\x01\x02")), + ("regex", Regex("^abc")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"x": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], +] + +# Property [ReadConcern Unknown Fields]: unknown fields in the readConcern +# document produce an UnrecognizedCommandField error. +DISTINCT_READCONCERN_UNKNOWN_FIELDS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "readconcern_unknown_field", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"level": "local", "unknownField": 1}, + }, + error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, + msg="distinct should reject unknown fields in readConcern document", + ), +] + +# Property [ReadConcern afterClusterTime]: afterClusterTime validates type +# and is rejected on standalone. +DISTINCT_READCONCERN_AFTER_CLUSTER_TIME_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "readconcern_after_cluster_time_timestamp", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"afterClusterTime": Timestamp(1, 1)}, + }, + error_code=ILLEGAL_OPERATION_ERROR, + msg="distinct afterClusterTime should be rejected on standalone", + ), + *[ + CommandTestCase( + f"readconcern_after_cluster_time_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"afterClusterTime": v}, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct afterClusterTime as {tid} should produce TypeMismatch", + ) + for tid, val in [ + ("null", None), + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("string", "hello"), + ("bool", True), + ("array", [1, 2]), + ("object", {"a": 1}), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("binary", Binary(b"\x01\x02")), + ("regex", Regex("^abc")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"x": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], +] + +# Property [ReadConcern atClusterTime]: atClusterTime validates type and +# requires snapshot read concern level. +DISTINCT_READCONCERN_AT_CLUSTER_TIME_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "readconcern_at_cluster_time_timestamp", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"atClusterTime": Timestamp(1, 1)}, + }, + error_code=INVALID_OPTIONS_ERROR, + msg="distinct atClusterTime without snapshot level should be rejected", + ), + *[ + CommandTestCase( + f"readconcern_at_cluster_time_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"atClusterTime": v}, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct atClusterTime as {tid} should produce TypeMismatch", + ) + for tid, val in [ + ("null", None), + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("string", "hello"), + ("bool", True), + ("array", [1, 2]), + ("object", {"a": 1}), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("binary", Binary(b"\x01\x02")), + ("regex", Regex("^abc")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"x": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], +] + +# Property [ReadConcern provenance]: the provenance sub-field validates type +# and enum value. +DISTINCT_READCONCERN_PROVENANCE_TESTS: list[CommandTestCase] = [ + *[ + CommandTestCase( + f"readconcern_provenance_{prov}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, p=prov: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"provenance": p}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg=f"distinct readConcern provenance '{prov}' should succeed", + ) + for prov in [ + "clientSupplied", + "implicitDefault", + "customDefault", + "getLastErrorDefaults", + ] + ], + CommandTestCase( + "readconcern_provenance_null", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"provenance": None}, + }, + expected={"values": ["a"], "ok": 1.0}, + msg="distinct readConcern provenance null should succeed", + ), + CommandTestCase( + "readconcern_provenance_invalid", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"provenance": "invalid"}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct readConcern provenance invalid string should be rejected", + ), + *[ + CommandTestCase( + f"readconcern_provenance_type_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "readConcern": {"provenance": v}, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct readConcern provenance as {tid} should produce TypeMismatch", + ) + for tid, val in [ + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", [1, 2]), + ("object", {"a": 1}), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"\x01\x02")), + ("regex", Regex("^abc")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"x": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], +] + +DISTINCT_READCONCERN_SUBFIELD_TESTS: list[CommandTestCase] = ( + DISTINCT_READCONCERN_LEVEL_TESTS + + DISTINCT_READCONCERN_UNKNOWN_FIELDS_TESTS + + DISTINCT_READCONCERN_AFTER_CLUSTER_TIME_TESTS + + DISTINCT_READCONCERN_AT_CLUSTER_TIME_TESTS + + DISTINCT_READCONCERN_PROVENANCE_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_READCONCERN_SUBFIELD_TESTS)) +def test_distinct_readconcern_subfields( + database_client: Any, collection: Any, test: CommandTestCase +) -> None: + """Test distinct command readConcern sub-field validation.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_result_ordering.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_result_ordering.py new file mode 100644 index 00000000..4a93da91 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_result_ordering.py @@ -0,0 +1,172 @@ +"""Tests for distinct command result ordering and response format.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any + +import pytest +from bson import Binary, Code, MaxKey, MinKey, ObjectId, Regex +from bson.timestamp import Timestamp + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import TargetDatabase + +# Property [Result Ordering]: distinct results are returned in BSON type +# comparison order. +DISTINCT_RESULT_ORDERING_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "ordering_cross_type", + docs=[ + {"_id": 1, "x": MaxKey()}, + {"_id": 2, "x": "hello"}, + {"_id": 3, "x": None}, + {"_id": 4, "x": 42}, + {"_id": 5, "x": {"a": 1}}, + {"_id": 6, "x": Binary(b"data", 0)}, + {"_id": 7, "x": ObjectId("000000000000000000000001")}, + {"_id": 8, "x": True}, + {"_id": 9, "x": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + {"_id": 10, "x": Timestamp(100, 1)}, + {"_id": 11, "x": Regex("abc", "")}, + {"_id": 12, "x": MinKey()}, + {"_id": 13, "x": Code("function()", {"scope": 1})}, + {"_id": 14, "x": Code("function()")}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [ + MinKey(), + None, + 42, + "hello", + {"a": 1}, + b"data", + ObjectId("000000000000000000000001"), + True, + datetime(2024, 1, 1, tzinfo=timezone.utc), + Timestamp(100, 1), + Regex("abc", ""), + Code("function()"), + Code("function()", {"scope": 1}), + MaxKey(), + ], + "ok": 1.0, + }, + msg=( + "distinct should return results in BSON type comparison order:" + " MinKey < null < numbers < string < object < binary" + " < ObjectId < bool < datetime < Timestamp < Regex" + " < Code < CodeWithScope < MaxKey" + ), + ), + CommandTestCase( + "ordering_within_numbers", + docs=[ + {"_id": 1, "x": 100}, + {"_id": 2, "x": -5}, + {"_id": 3, "x": 0}, + {"_id": 4, "x": 42}, + {"_id": 5, "x": -100}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [-100, -5, 0, 42, 100], "ok": 1.0}, + msg="distinct should order numbers by numeric value within the number type", + ), + CommandTestCase( + "ordering_within_strings", + docs=[ + {"_id": 1, "x": "banana"}, + {"_id": 2, "x": "apple"}, + {"_id": 3, "x": "cherry"}, + {"_id": 4, "x": "Apple"}, + {"_id": 5, "x": ""}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": ["", "Apple", "apple", "banana", "cherry"], + "ok": 1.0, + }, + msg="distinct should order strings by binary comparison within the string type", + ), + CommandTestCase( + "ordering_within_booleans", + docs=[{"_id": 1, "x": True}, {"_id": 2, "x": False}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [False, True], "ok": 1.0}, + msg="distinct should order booleans with False before True", + ), + CommandTestCase( + "ordering_within_datetimes", + docs=[ + {"_id": 1, "x": datetime(2024, 6, 1, tzinfo=timezone.utc)}, + {"_id": 2, "x": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + {"_id": 3, "x": datetime(2024, 12, 1, tzinfo=timezone.utc)}, + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={ + "values": [ + datetime(2024, 1, 1, tzinfo=timezone.utc), + datetime(2024, 6, 1, tzinfo=timezone.utc), + datetime(2024, 12, 1, tzinfo=timezone.utc), + ], + "ok": 1.0, + }, + msg="distinct should order datetimes chronologically within the datetime type", + ), +] + +# Property [Return Type and Response Format]: the response document contains the +# distinct values and succeeds even for non-existent collections. +DISTINCT_RESPONSE_FORMAT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "response_format_basic", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "b"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": ["a", "b"], "ok": 1.0}, + msg="distinct should return a response with values array and ok field", + ), + CommandTestCase( + "response_format_nonexistent_database", + target_collection=TargetDatabase(suffix="nonexistent"), + docs=None, + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty values array for a non-existent database", + ), + CommandTestCase( + "response_format_empty_collection", + docs=[], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [], "ok": 1.0}, + msg="distinct should return empty values array for an empty collection", + ), +] + +DISTINCT_RESULT_FORMAT_TESTS: list[CommandTestCase] = ( + DISTINCT_RESULT_ORDERING_TESTS + DISTINCT_RESPONSE_FORMAT_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_RESULT_FORMAT_TESTS)) +def test_distinct_result_ordering( + database_client: Any, collection: Any, test: CommandTestCase +) -> None: + """Test distinct result ordering cases.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_type_errors.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_type_errors.py new file mode 100644 index 00000000..15175012 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_type_errors.py @@ -0,0 +1,480 @@ +"""Tests for distinct command parameter type errors.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any + +import pytest +from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex +from bson.timestamp import Timestamp +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + FAILED_TO_PARSE_ERROR, + INVALID_NAMESPACE_ERROR, + MISSING_FIELD_ERROR, + TYPE_MISMATCH_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + DECIMAL128_NEGATIVE_NAN, + FLOAT_INFINITY, + FLOAT_NAN, + FLOAT_NEGATIVE_INFINITY, + FLOAT_NEGATIVE_NAN, +) + +# Property [Null Hint Error]: unlike other optional parameters, hint=null produces +# a parse error instead of being treated as omitted. +DISTINCT_NULL_HINT_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "null_hint_param_error", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": None}, + error_code=FAILED_TO_PARSE_ERROR, + msg="distinct should reject hint=null with a parse error", + ), +] + +# Property [Query Parameter Type Errors]: all non-object, non-null BSON types +# for query produce TypeMismatch error; invalid query operators produce +# BAD_VALUE_ERROR. +DISTINCT_QUERY_TYPE_ERROR_TESTS: list[CommandTestCase] = [ + *[ + CommandTestCase( + f"query_type_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: {"distinct": ctx.collection, "key": "x", "query": v}, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} as query", + ) + for tid, val in [ + ("string", "hello"), + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", [1, 2]), + ("objectid", ObjectId("000000000000000000000001")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"data", 0)), + ("regex", Regex("abc", "")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"s": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], + *[ + CommandTestCase( + f"query_invalid_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "query": v, + }, + error_code=BAD_VALUE_ERROR, + msg=f"distinct should reject {tid} in query", + ) + for tid, val in [ + ("update_operator", {"$set": {"x": 1}}), + ("aggregation_stage", {"$group": {"_id": None}}), + ("unknown_operator", {"$foobar": 1}), + ] + ], +] + +# Property [ReadConcern Parameter Type Errors]: all non-object, non-null BSON +# types for readConcern produce TypeMismatch error. +DISTINCT_READCONCERN_TYPE_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"readconcern_type_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "readConcern": v, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} as readConcern", + ) + for tid, val in [ + ("string", "local"), + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", [1, 2]), + ("objectid", ObjectId("000000000000000000000001")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"data", 0)), + ("regex", Regex("abc", "")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"s": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + +# Property [Collation Parameter Type Errors]: invalid BSON types and values for +# the collation parameter produce appropriate errors. +DISTINCT_COLLATION_TYPE_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"collation_type_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "collation": v, + }, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} as collation", + ) + for tid, val in [ + ("string", "en"), + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", [1, 2]), + ("objectid", ObjectId("000000000000000000000001")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"data", 0)), + ("regex", Regex("abc", "")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"s": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + [ + CommandTestCase( + "collation_missing_locale", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"strength": 1}, + }, + error_code=MISSING_FIELD_ERROR, + msg="distinct should reject collation with missing locale", + ), + CommandTestCase( + "collation_invalid_locale", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": ""}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject collation with invalid (empty) locale", + ), + CommandTestCase( + "collation_validated_nonexistent_collection", + docs=None, + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"strength": 1}, + }, + error_code=MISSING_FIELD_ERROR, + msg="distinct should validate collation even when the collection does not exist", + ), +] + +# Property [Key Parameter Type Errors]: all non-string BSON types for key produce +# TypeMismatch error; null or omitted key produces a missing field error. +DISTINCT_KEY_TYPE_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"key_type_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: {"distinct": ctx.collection, "key": v}, + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} as key", + ) + for tid, val in [ + ("int32", 123), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", ["x"]), + ("object", {"a": 1}), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"data", 0)), + ("regex", Regex("abc", "")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"s": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + [ + CommandTestCase( + "key_type_null", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": None}, + error_code=MISSING_FIELD_ERROR, + msg="distinct should reject null key as a missing required field", + ), + CommandTestCase( + "key_type_omitted", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection}, + error_code=MISSING_FIELD_ERROR, + msg="distinct should reject omitted key field as a missing required field", + ), +] + +# Property [Hint Parameter Type Errors]: invalid BSON types and values for the +# hint parameter produce appropriate errors. +DISTINCT_HINT_TYPE_ERROR_TESTS: list[CommandTestCase] = [ + *[ + CommandTestCase( + f"hint_type_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "hint": v, + }, + error_code=FAILED_TO_PARSE_ERROR, + msg=f"distinct should reject {tid} as hint", + ) + for tid, val in [ + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", [1, 2]), + ("objectid", ObjectId("000000000000000000000001")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"data", 0)), + ("regex", Regex("abc", "")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"s": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] + ], + CommandTestCase( + "hint_nonexistent_index_name", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": "nonexistent_index", + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject a non-existent index name on an existing collection", + ), + CommandTestCase( + "hint_empty_string_existing_collection", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: {"distinct": ctx.collection, "key": "x", "hint": ""}, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject empty string as hint on an existing collection", + ), + CommandTestCase( + "hint_doc_wrong_field_order", + indexes=[IndexModel([("x", 1), ("y", 1)])], + docs=[{"_id": 1, "x": "a", "y": "b"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"y": 1, "x": 1}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject document hint with incorrect field order", + ), + CommandTestCase( + "hint_string_case_sensitive", + indexes=[IndexModel([("x", 1)], name="x_1")], + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": "X_1", + }, + error_code=BAD_VALUE_ERROR, + msg="distinct string hint should be case-sensitive", + ), + CommandTestCase( + "hint_string_no_trimming", + indexes=[IndexModel([("x", 1)], name="x_1")], + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": " x_1 ", + }, + error_code=BAD_VALUE_ERROR, + msg="distinct string hint should not trim whitespace", + ), + CommandTestCase( + "hint_nonexistent_index_empty_collection", + docs=[], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": "nonexistent_idx", + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject a non-existent index name on an empty collection", + ), + *[ + CommandTestCase( + f"hint_direction_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "hint": {"x": v}, + }, + error_code=BAD_VALUE_ERROR, + msg=f"distinct should reject {tid} as direction value in document hint", + ) + for tid, val in [ + ("zero", 0), + ("two", 2), + ("fractional", 0.5), + ("nan", FLOAT_NAN), + ("neg_nan", FLOAT_NEGATIVE_NAN), + ("decimal128_nan", DECIMAL128_NAN), + ("decimal128_neg_nan", DECIMAL128_NEGATIVE_NAN), + ("infinity", FLOAT_INFINITY), + ("neg_infinity", FLOAT_NEGATIVE_INFINITY), + ("decimal128_infinity", DECIMAL128_INFINITY), + ("decimal128_neg_infinity", DECIMAL128_NEGATIVE_INFINITY), + ("bool", True), + ("null", None), + ("string", "asc"), + ("string_text", "text"), + ("string_hashed", "hashed"), + ("string_2dsphere", "2dsphere"), + ("string_2d", "2d"), + ] + ], + *[ + CommandTestCase( + f"hint_natural_direction_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { + "distinct": ctx.collection, + "key": "x", + "hint": {"$natural": v}, + }, + error_code=BAD_VALUE_ERROR, + msg=f"distinct should reject $natural {tid} direction value", + ) + for tid, val in [ + ("zero", 0), + ("two", 2), + ("neg_two", -2), + ("fractional", 0.5), + ("nan", FLOAT_NAN), + ("neg_nan", FLOAT_NEGATIVE_NAN), + ("decimal128_nan", DECIMAL128_NAN), + ("decimal128_neg_nan", DECIMAL128_NEGATIVE_NAN), + ("infinity", FLOAT_INFINITY), + ("neg_infinity", FLOAT_NEGATIVE_INFINITY), + ("decimal128_infinity", DECIMAL128_INFINITY), + ("decimal128_neg_infinity", DECIMAL128_NEGATIVE_INFINITY), + ("bool", True), + ("string", "forward"), + ("null", None), + ("array", [1]), + ("object", {"a": 1}), + ] + ], + CommandTestCase( + "hint_natural_combined_with_other_fields", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "hint": {"$natural": 1, "x": 1}, + }, + error_code=BAD_VALUE_ERROR, + msg="distinct should reject $natural combined with other fields in hint", + ), +] + +# Property [Collection Name Type Errors]: non-string types (except Binary subtype +# 4) and null as collection name produce InvalidNamespace error. +DISTINCT_COLLNAME_TYPE_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + f"collname_type_{tid}", + docs=None, + command=lambda ctx, v=val: {"distinct": v, "key": "x"}, + error_code=INVALID_NAMESPACE_ERROR, + msg=f"distinct should reject {tid} as collection name", + ) + for tid, val in [ + ("null", None), + ("int32", 123), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", [1, 2]), + ("object", {"a": 1}), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary_subtype0", Binary(b"hello", 0)), + ("binary_subtype5", Binary(b"hello", 5)), + ("regex", Regex("abc", "")), + ("code", Code("function(){}")), + ("code_scope", Code("function(){}", {"s": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + +DISTINCT_TYPE_ERROR_TESTS: list[CommandTestCase] = ( + DISTINCT_NULL_HINT_ERROR_TESTS + + DISTINCT_QUERY_TYPE_ERROR_TESTS + + DISTINCT_READCONCERN_TYPE_ERROR_TESTS + + DISTINCT_COLLATION_TYPE_ERROR_TESTS + + DISTINCT_KEY_TYPE_ERROR_TESTS + + DISTINCT_HINT_TYPE_ERROR_TESTS + + DISTINCT_COLLNAME_TYPE_ERROR_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_TYPE_ERROR_TESTS)) +def test_distinct_type_errors(database_client: Any, collection: Any, test: CommandTestCase) -> None: + """Test distinct type error cases.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_with_expr.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_with_expr.py deleted file mode 100644 index dd4d7c9f..00000000 --- a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_with_expr.py +++ /dev/null @@ -1,28 +0,0 @@ -""" -Tests for $expr in distinct command contexts. -""" - -from documentdb_tests.framework.assertions import assertSuccess -from documentdb_tests.framework.executor import execute_command - - -def test_expr_in_distinct(collection): - """Test $expr in distinct command.""" - collection.insert_many( - [ - {"_id": 1, "cat": "A", "val": 10}, - {"_id": 2, "cat": "B", "val": 5}, - {"_id": 3, "cat": "A", "val": 3}, - ] - ) - result = execute_command( - collection, - { - "distinct": collection.name, - "key": "cat", - "query": {"$expr": {"$gt": ["$val", 4]}}, - }, - ) - assertSuccess( - result, sorted(["A", "B"]), raw_res=True, transform=lambda r: sorted(r.get("values", [])) - ) diff --git a/documentdb_tests/compatibility/tests/core/collections/commands/utils/command_test_case.py b/documentdb_tests/compatibility/tests/core/collections/commands/utils/command_test_case.py index 7d6072b2..26ff3717 100644 --- a/documentdb_tests/compatibility/tests/core/collections/commands/utils/command_test_case.py +++ b/documentdb_tests/compatibility/tests/core/collections/commands/utils/command_test_case.py @@ -72,26 +72,33 @@ class CommandTestCase(BaseTestCase): docs: list[dict[str, Any]] | None = None command: dict[str, Any] | Callable[..., dict[str, Any]] | None = None expected: dict[str, Any] | list[dict[str, Any]] | Callable[..., dict[str, Any]] | None = None + ignore_order_in: list[str] | None = None def prepare(self, db: Database, collection: Collection) -> Collection: """Resolve the target collection and apply indexes/docs. + Documents and indexes are inserted into the collection returned + by ``target_collection.writable(source, resolved)``. For views + this is the source; for regular collections it is the resolved + collection itself. + - If ``docs=None``, the collection is not created and will not exist. - If ``docs=[]``, the collection is explicitly created but left empty. - If ``docs=[...]``, the collection is created and documents are inserted. """ - collection = self.target_collection.resolve(db, collection) + resolved = self.target_collection.resolve(db, collection) + target = self.target_collection.writable(collection, resolved) if self.indexes: - collection.create_indexes(self.indexes) + target.create_indexes(self.indexes) if self.docs is not None: - if collection.name not in collection.database.list_collection_names(): - collection.database.create_collection(collection.name) + if target.name not in target.database.list_collection_names(): + target.database.create_collection(target.name) if self.docs: - collection.insert_many(self.docs) + target.insert_many(self.docs) if self.siblings: for sibling in self.siblings: sibling.create(db, collection) - return collection + return resolved def build_command(self, ctx: CommandContext) -> dict[str, Any]: """Resolve the command dict from a callable or plain dict.""" diff --git a/documentdb_tests/framework/assertions.py b/documentdb_tests/framework/assertions.py index 9a3a76ae..2a4ff8aa 100644 --- a/documentdb_tests/framework/assertions.py +++ b/documentdb_tests/framework/assertions.py @@ -73,12 +73,18 @@ def _sort_if_list(value): def _sort_fields(docs, fields): """Sort list values for the named fields in each document.""" + if isinstance(docs, dict): + docs = dict(docs) + for f in fields: + if f in docs: + docs[f] = _sort_if_list(docs[f]) + return docs sorted_docs = [] for doc in docs: doc = dict(doc) - for field in fields: - if field in doc: - doc[field] = _sort_if_list(doc[field]) + for f in fields: + if f in doc: + doc[f] = _sort_if_list(doc[f]) sorted_docs.append(doc) return sorted_docs diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py index 5fad1cc9..c78bf657 100644 --- a/documentdb_tests/framework/error_codes.py +++ b/documentdb_tests/framework/error_codes.py @@ -20,6 +20,7 @@ UNKNOWN_REPL_WRITE_CONCERN_ERROR = 79 INDEX_OPTIONS_CONFLICT_ERROR = 85 INDEX_KEY_SPECS_CONFLICT_ERROR = 86 +NOT_A_REPLICA_SET_ERROR = 123 INCOMPATIBLE_COLLATION_VERSION_ERROR = 161 VIEW_DEPTH_LIMIT_ERROR = 165 COMMAND_NOT_SUPPORTED_ON_VIEW_ERROR = 166 @@ -105,6 +106,7 @@ COND_MISSING_ELSE_ERROR = 17082 COND_EXTRA_FIELD_ERROR = 17083 SIZE_NOT_ARRAY_ERROR = 17124 +DISTINCT_TOO_BIG_ERROR = 17217 LET_UNDEFINED_VARIABLE_ERROR = 17276 META_NON_STRING_ERROR = 17307 UNSUPPORTED_META_FIELD_ERROR = 17308 @@ -163,6 +165,7 @@ REGEX_MISSING_INPUT_ERROR = 31022 REGEX_MISSING_REGEX_ERROR = 31023 REGEX_UNKNOWN_FIELD_ERROR = 31024 +KEY_FIELD_NULL_BYTE_ERROR = 31032 OUT_OF_RANGE_CONVERSION_ERROR = 31109 UNSET_EMPTY_ARRAY_ERROR = 31119 UNSET_ARRAY_ELEMENT_TYPE_ERROR = 31120 diff --git a/documentdb_tests/framework/target_collection.py b/documentdb_tests/framework/target_collection.py index f4d31f75..7cc27c07 100644 --- a/documentdb_tests/framework/target_collection.py +++ b/documentdb_tests/framework/target_collection.py @@ -22,6 +22,10 @@ class TargetCollection: def resolve(self, db: Database, collection: Collection) -> Collection: return collection + def writable(self, source: Collection, resolved: Collection) -> Collection: + """Return the collection where docs and indexes should be inserted.""" + return resolved + @dataclass(frozen=True) class ViewCollection(TargetCollection): @@ -32,6 +36,9 @@ def resolve(self, db: Database, collection: Collection) -> Collection: db.command("create", view_name, viewOn=collection.name, pipeline=[]) return db[view_name] + def writable(self, source: Collection, resolved: Collection) -> Collection: + return source + @dataclass(frozen=True) class SystemViewsCollection(ViewCollection): @@ -59,6 +66,23 @@ def resolve(self, db: Database, collection: Collection) -> Collection: return db[name] +@dataclass(frozen=True) +class ViewOnCustomCollection(TargetCollection): + """A view on a custom collection created with arbitrary options.""" + + source_options: dict[str, Any] = field(default_factory=dict) + + def resolve(self, db: Database, collection: Collection) -> Collection: + src_name = f"{collection.name}_custom_src" + db.command("create", src_name, **self.source_options) + view_name = f"{collection.name}_custom_view" + db.command("create", view_name, viewOn=src_name, pipeline=[]) + return db[view_name] + + def writable(self, source: Collection, resolved: Collection) -> Collection: + return source.database[f"{source.name}_custom_src"] + + @dataclass(frozen=True) class CappedCollection(TargetCollection): """A capped collection.""" @@ -132,6 +156,9 @@ def resolve(self, db: Database, collection: Collection) -> Collection: source = name return db[source] + def writable(self, source: Collection, resolved: Collection) -> Collection: + return source + @dataclass(frozen=True) class ExistingCollection(TargetCollection): @@ -196,6 +223,9 @@ def resolve(self, db: Database, collection: Collection) -> Collection: ) return db[view_name] + def writable(self, source: Collection, resolved: Collection) -> Collection: + return source + @dataclass(frozen=True) class ValidatedCollection(TargetCollection): From 63f65e26181dffe2650b2120ea086c0aea4e7f1f Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Mon, 25 May 2026 15:16:04 -0700 Subject: [PATCH 2/4] Add view tests Signed-off-by: Daniel Frankcom --- .../distinct/test_distinct_parameters.py | 40 +++++++++++++++++++ .../framework/target_collection.py | 4 +- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_parameters.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_parameters.py index 61428da6..a810242c 100644 --- a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_parameters.py +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_parameters.py @@ -18,6 +18,7 @@ from documentdb_tests.framework.executor import execute_command from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.property_checks import Eq, Len, Ne +from documentdb_tests.framework.target_collection import ViewCollection from documentdb_tests.framework.test_constants import ( DOUBLE_NEGATIVE_ZERO, INT32_MAX, @@ -335,6 +336,44 @@ ] ] +# Property [Query Composition on Views]: the query parameter composes with +# the view's pipeline filter to further restrict visible documents. +DISTINCT_QUERY_VIEW_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "query_on_filtered_view", + target_collection=ViewCollection(pipeline=[{"$match": {"status": "active"}}]), + docs=[ + {"_id": 1, "status": "active", "cat": "a", "x": 10}, + {"_id": 2, "status": "active", "cat": "b", "x": 20}, + {"_id": 3, "status": "inactive", "cat": "a", "x": 30}, + {"_id": 4, "status": "active", "cat": "a", "x": 40}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "cat", + "query": {"x": {"$gte": 20}}, + }, + expected={"values": ["a", "b"], "ok": 1}, + ignore_order_in=["values"], + msg="distinct query should compose with view pipeline filter", + ), + CommandTestCase( + "query_excludes_all_on_filtered_view", + target_collection=ViewCollection(pipeline=[{"$match": {"status": "active"}}]), + docs=[ + {"_id": 1, "status": "active", "cat": "a", "x": 10}, + {"_id": 2, "status": "inactive", "cat": "b", "x": 50}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "cat", + "query": {"x": {"$gte": 50}}, + }, + expected={"values": [], "ok": 1}, + msg="distinct query + view filter should return empty when no docs match both", + ), +] + DISTINCT_PARAMETER_TESTS: list[CommandTestCase] = ( DISTINCT_NULL_PARAMS_TESTS + DISTINCT_QUERY_SUCCESS_TESTS @@ -346,6 +385,7 @@ + DISTINCT_COLLECTION_NAME_ACCEPTANCE_TESTS + DISTINCT_COLLECTION_NAME_UUID_TESTS + DISTINCT_COLLECTION_NAME_UUID_SUCCESS_TESTS + + DISTINCT_QUERY_VIEW_TESTS ) diff --git a/documentdb_tests/framework/target_collection.py b/documentdb_tests/framework/target_collection.py index 7cc27c07..f5be4347 100644 --- a/documentdb_tests/framework/target_collection.py +++ b/documentdb_tests/framework/target_collection.py @@ -31,9 +31,11 @@ def writable(self, source: Collection, resolved: Collection) -> Collection: class ViewCollection(TargetCollection): """A view on the fixture collection.""" + pipeline: list[dict[str, Any]] = field(default_factory=list) + def resolve(self, db: Database, collection: Collection) -> Collection: view_name = f"{collection.name}_view" - db.command("create", view_name, viewOn=collection.name, pipeline=[]) + db.command("create", view_name, viewOn=collection.name, pipeline=self.pipeline) return db[view_name] def writable(self, source: Collection, resolved: Collection) -> Collection: From 21b95ac1e07311b94635abc40f11f573db757f57 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Wed, 27 May 2026 12:20:27 -0700 Subject: [PATCH 3/4] Refine distinct command test scope Remove collation semantic/subfield tests (covered in collation/), namespace naming rules and UUID resolution (common spec), and view tests (merged into collection type tests). Add collection type acceptance tests. Signed-off-by: Daniel Frankcom --- .../distinct/test_distinct_collation.py | 306 ++------ .../test_distinct_collation_subfields.py | 707 ------------------ .../test_distinct_collection_types.py | 92 +++ .../distinct/test_distinct_command_errors.py | 24 - .../distinct/test_distinct_parameters.py | 183 +---- .../distinct/test_distinct_type_errors.py | 69 -- 6 files changed, 139 insertions(+), 1242 deletions(-) delete mode 100644 documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation_subfields.py create mode 100644 documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collection_types.py diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation.py index 216e6ce5..d47f8d21 100644 --- a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation.py +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation.py @@ -1,295 +1,82 @@ -"""Tests for distinct command collation behavior.""" +"""Tests for distinct command collation field syntax validation.""" from __future__ import annotations -from typing import Any +from datetime import datetime, timezone import pytest -from bson import Regex +from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( CommandContext, CommandTestCase, ) from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import TYPE_MISMATCH_ERROR from documentdb_tests.framework.executor import execute_command from documentdb_tests.framework.parametrize import pytest_params -from documentdb_tests.framework.target_collection import ( - CustomCollection, - ViewOnCustomCollection, -) - -# Property [Collation Effects on Deduplication]: collation affects which values -# are considered duplicates during distinct. -DISTINCT_COLLATION_DEDUP_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "collation_basic_string_dedup", - docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "APPLE"}, {"_id": 3, "x": "banana"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": 1}, - }, - expected={"values": ["apple", "banana"], "ok": 1.0}, - msg="distinct should collapse case-equivalent strings under case-insensitive collation", - ), - CommandTestCase( - "collation_nested_array_dedup", - docs=[{"_id": 1, "x": [["hello"]]}, {"_id": 2, "x": [["HELLO"]]}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": 1}, - }, - expected={"values": [["hello"]], "ok": 1.0}, - msg=( - "distinct should collapse nested arrays containing" - " case-equivalent strings under collation" - ), - ), - CommandTestCase( - "collation_nested_object_dedup", - docs=[ - {"_id": 1, "x": {"name": "hello"}}, - {"_id": 2, "x": {"name": "HELLO"}}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": 1}, - }, - expected={"values": [{"name": "hello"}], "ok": 1.0}, - msg="distinct should collapse objects with case-equivalent string values under collation", - ), - CommandTestCase( - "collation_after_array_unwinding", - docs=[ - {"_id": 1, "x": ["hello", "world"]}, - {"_id": 2, "x": ["HELLO", "WORLD"]}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": 1}, - }, - expected={"values": ["hello", "world"], "ok": 1.0}, - msg="distinct should apply collation dedup to individual array elements after unwinding", - ), - CommandTestCase( - "collation_non_string_unaffected", - docs=[ - {"_id": 1, "x": 1}, - {"_id": 2, "x": "a"}, - {"_id": 3, "x": "A"}, - {"_id": 4, "x": None}, - {"_id": 5, "x": True}, - {"_id": 6, "x": False}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": 1}, - }, - expected={"values": [None, 1, "a", False, True], "ok": 1.0}, - msg=( - "distinct should not collapse non-string elements" - " (numbers, null, booleans) under collation" - ), - ), - CommandTestCase( - "collation_regex_unaffected", - docs=[ - {"_id": 1, "x": Regex("hello", "")}, - {"_id": 2, "x": Regex("HELLO", "")}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": 1}, - }, - expected={ - "values": [Regex("HELLO", ""), Regex("hello", "")], - "ok": 1.0, - }, - msg="distinct should not collapse regex values under collation", - ), - CommandTestCase( - "collation_first_encountered_wins", - docs=[ - {"_id": 1, "x": "Hello"}, - {"_id": 2, "x": "HELLO"}, - {"_id": 3, "x": "hello"}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": 1}, - }, - expected={"values": ["Hello"], "ok": 1.0}, - msg=( - "distinct should return the first-encountered value" - " when collation collapses duplicates" - ), - ), -] -# Property [Collation Inheritance]: the collection's default collation is used -# when no explicit collation is specified. -DISTINCT_COLLATION_INHERITANCE_TESTS: list[CommandTestCase] = [ +# Property [Collation Acceptance]: the collation field accepts null and +# a document type. +DISTINCT_COLLATION_ACCEPTANCE_TESTS: list[CommandTestCase] = [ CommandTestCase( - "inherit_collation_omitted", - target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 1}}), - docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "APPLE"}, {"_id": 3, "x": "banana"}], - command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, - expected={"values": ["apple", "banana"], "ok": 1.0}, - msg="distinct should use collection's default collation when collation is omitted", - ), - CommandTestCase( - "inherit_collation_null", - target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 1}}), - docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "APPLE"}, {"_id": 3, "x": "banana"}], + "collation_null", + docs=[{"_id": 1, "x": "a"}], command=lambda ctx: {"distinct": ctx.collection, "key": "x", "collation": None}, - expected={"values": ["apple", "banana"], "ok": 1.0}, - msg="distinct should use collection's default collation when collation is null", + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should accept null collation", ), CommandTestCase( - "inherit_collation_empty_doc", - target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 1}}), - docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "APPLE"}, {"_id": 3, "x": "banana"}], + "collation_empty_doc", + docs=[{"_id": 1, "x": "a"}], command=lambda ctx: {"distinct": ctx.collection, "key": "x", "collation": {}}, - expected={"values": ["apple", "banana"], "ok": 1.0}, - msg="distinct should use collection's default collation when collation is empty doc", - ), - CommandTestCase( - "inherit_key_always_case_sensitive", - target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 1}}), - docs=[{"_id": 1, "Name": "alice"}, {"_id": 2, "name": "bob"}], - command=lambda ctx: {"distinct": ctx.collection, "key": "Name"}, - expected={"values": ["alice"], "ok": 1.0}, - msg="distinct key field path matching should be case-sensitive regardless of collation", - ), - CommandTestCase( - "inherit_explicit_overrides_default", - target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 1}}), - docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "APPLE"}, {"_id": 3, "x": "banana"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": 3}, - }, - expected={"values": ["apple", "APPLE", "banana"], "ok": 1.0}, - msg="distinct should use explicit collation over collection default when specified", - ), -] - -# Property [Collation Effects on Ordering]: collation changes the sort order of -# results from binary comparison to locale-aware ordering. -DISTINCT_COLLATION_ORDERING_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "collation_ordering_locale_aware", - docs=[ - {"_id": 1, "x": "Banana"}, - {"_id": 2, "x": "apple"}, - {"_id": 3, "x": "Cherry"}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en"}, - }, - expected={"values": ["apple", "Banana", "Cherry"], "ok": 1.0}, - msg=( - "distinct with collation should order results by locale-aware comparison" - " instead of binary comparison" - ), - ), - CommandTestCase( - "collation_ordering_binary_default", - docs=[ - {"_id": 1, "x": "Banana"}, - {"_id": 2, "x": "apple"}, - {"_id": 3, "x": "Cherry"}, - ], - command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, - expected={"values": ["Banana", "Cherry", "apple"], "ok": 1.0}, - msg="distinct without collation should order results by binary comparison", + expected={"values": ["a"], "ok": 1.0}, + msg="distinct should accept empty document collation", ), ] -# Property [Collation Affects Query Matching]: the collation parameter applies -# to the query filter, not just deduplication. -DISTINCT_COLLATION_QUERY_TESTS: list[CommandTestCase] = [ +# Property [Collation Type Rejection]: all non-document, non-null BSON types +# for the collation field produce a type mismatch error. +DISTINCT_COLLATION_TYPE_REJECTION_TESTS: list[CommandTestCase] = [ CommandTestCase( - "collation_query_case_insensitive_match", - docs=[ - {"_id": 1, "x": "val1", "status": "Active"}, - {"_id": 2, "x": "val2", "status": "active"}, - {"_id": 3, "x": "val3", "status": "INACTIVE"}, - ], - command=lambda ctx: { + f"collation_type_{tid}", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx, v=val: { "distinct": ctx.collection, "key": "x", - "query": {"status": "active"}, - "collation": {"locale": "en", "strength": 1}, + "collation": v, }, - expected={"values": ["val1", "val2"], "ok": 1.0}, - msg=( - "distinct should apply collation to query filter matching," - " allowing case-insensitive comparison" - ), - ), - CommandTestCase( - "collation_query_without_collation_exact_match", - docs=[ - {"_id": 1, "x": "val1", "status": "Active"}, - {"_id": 2, "x": "val2", "status": "active"}, - {"_id": 3, "x": "val3", "status": "INACTIVE"}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "query": {"status": "active"}, - }, - expected={"values": ["val2"], "ok": 1.0}, - msg="distinct without collation should match query filter exactly", - ), -] - -# Property [Collation Inheritance on Views]: views without an explicit collation -# use simple binary comparison, not the source collection's collation. -DISTINCT_COLLATION_INHERITANCE_VIEW_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "inherit_view_no_inherit", - target_collection=ViewOnCustomCollection( - source_options={"collation": {"locale": "en", "strength": 1}} - ), - docs=[ - {"_id": 1, "x": "apple"}, - {"_id": 2, "x": "APPLE"}, - {"_id": 3, "x": "banana"}, - ], - command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, - expected={"values": sorted(["APPLE", "apple", "banana"]), "ok": 1}, - ignore_order_in=["values"], - msg=( - "distinct on a view should use binary comparison," - " not the source collection's collation" - ), - ), + error_code=TYPE_MISMATCH_ERROR, + msg=f"distinct should reject {tid} as collation", + ) + for tid, val in [ + ("string", "en"), + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", Decimal128("1")), + ("bool", True), + ("array", [1, 2]), + ("objectid", ObjectId()), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"data")), + ("regex", Regex("abc")), + ("code", Code("function(){}")), + ("code_with_scope", Code("function(){}", {"x": 1})), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] ] DISTINCT_COLLATION_TESTS: list[CommandTestCase] = ( - DISTINCT_COLLATION_DEDUP_TESTS - + DISTINCT_COLLATION_INHERITANCE_TESTS - + DISTINCT_COLLATION_ORDERING_TESTS - + DISTINCT_COLLATION_QUERY_TESTS - + DISTINCT_COLLATION_INHERITANCE_VIEW_TESTS + DISTINCT_COLLATION_ACCEPTANCE_TESTS + DISTINCT_COLLATION_TYPE_REJECTION_TESTS ) @pytest.mark.parametrize("test", pytest_params(DISTINCT_COLLATION_TESTS)) -def test_distinct_collation(database_client: Any, collection: Any, test: CommandTestCase) -> None: - """Test distinct collation cases.""" +def test_distinct_collation(database_client, collection, test): + """Test distinct command collation field syntax validation.""" collection = test.prepare(database_client, collection) ctx = CommandContext.from_collection(collection) result = execute_command(collection, test.build_command(ctx)) @@ -299,5 +86,4 @@ def test_distinct_collation(database_client: Any, collection: Any, test: Command error_code=test.error_code, msg=test.msg, raw_res=True, - ignore_order_in=test.ignore_order_in, ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation_subfields.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation_subfields.py deleted file mode 100644 index 4fecda76..00000000 --- a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collation_subfields.py +++ /dev/null @@ -1,707 +0,0 @@ -"""Tests for distinct command collation sub-field validation and behavior.""" - -from __future__ import annotations - -from datetime import datetime, timezone -from typing import Any - -import pytest -from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp - -from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( - CommandContext, - CommandTestCase, -) -from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.error_codes import ( - BAD_VALUE_ERROR, - MISSING_FIELD_ERROR, - TYPE_MISMATCH_ERROR, - UNRECOGNIZED_COMMAND_FIELD_ERROR, -) -from documentdb_tests.framework.executor import execute_command -from documentdb_tests.framework.parametrize import pytest_params - -# Property [Type Strictness: collation (locale)]: the locale sub-field is -# required and validates type and value. -DISTINCT_TYPE_STRICTNESS_COLLATION_LOCALE_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "type_collation_locale_null", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": None}, - }, - error_code=MISSING_FIELD_ERROR, - msg="distinct should reject collation with null locale", - ), - *[ - CommandTestCase( - f"type_collation_locale_{tid}", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx, v=val: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": v}, - }, - error_code=TYPE_MISMATCH_ERROR, - msg=f"distinct should reject {tid} for collation locale", - ) - for tid, val in [ - ("int32", 42), - ("int64", Int64(1)), - ("double", 3.14), - ("decimal128", Decimal128("1")), - ("bool", True), - ("array", ["en"]), - ("object", {"name": "en"}), - ("objectid", ObjectId()), - ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), - ("timestamp", Timestamp(1, 1)), - ("binary", Binary(b"\x01\x02")), - ("regex", Regex("^en")), - ("code", Code("function(){}")), - ("code_with_scope", Code("function(){}", {"x": 1})), - ("minkey", MinKey()), - ("maxkey", MaxKey()), - ] - ], - *[ - CommandTestCase( - f"type_collation_locale_{tid}", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx, v=val: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": v}, - }, - error_code=BAD_VALUE_ERROR, - msg=f"distinct should reject {tid} for collation locale", - ) - for tid, val in [ - ("invalid", "invalid_locale_xyz"), - ("wrong_case", "EN"), - ] - ], -] - -# Property [Type Strictness: collation (strength)]: the strength sub-field -# validates type and range. -DISTINCT_TYPE_STRICTNESS_COLLATION_STRENGTH_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "type_collation_strength_one_valid", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": 1}, - }, - expected={"values": ["a"], "ok": 1.0}, - msg="distinct should accept strength value 1 (lower boundary)", - ), - CommandTestCase( - "type_collation_strength_five_valid", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": 5}, - }, - expected={"values": ["a"], "ok": 1.0}, - msg="distinct should accept strength value 5 (upper boundary)", - ), - CommandTestCase( - "type_collation_strength_int32_valid", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": 3}, - }, - expected={"values": ["a"], "ok": 1.0}, - msg="distinct should accept int32 strength value 3", - ), - CommandTestCase( - "type_collation_strength_int64_valid", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": Int64(3)}, - }, - expected={"values": ["a"], "ok": 1.0}, - msg="distinct should accept Int64 strength value 3", - ), - CommandTestCase( - "type_collation_strength_double_valid", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": 3.0}, - }, - expected={"values": ["a"], "ok": 1.0}, - msg="distinct should accept double strength value 3.0", - ), - CommandTestCase( - "type_collation_strength_decimal128_valid", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": Decimal128("3")}, - }, - expected={"values": ["a"], "ok": 1.0}, - msg="distinct should accept Decimal128 strength value 3", - ), - CommandTestCase( - "type_collation_strength_null_valid", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": None}, - }, - expected={"values": ["a"], "ok": 1.0}, - msg="distinct should accept null strength (treated as omitted)", - ), - CommandTestCase( - "type_collation_strength_zero_invalid", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": 0}, - }, - error_code=BAD_VALUE_ERROR, - msg="distinct should reject strength value 0", - ), - CommandTestCase( - "type_collation_strength_six_invalid", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": 6}, - }, - error_code=BAD_VALUE_ERROR, - msg="distinct should reject strength value 6", - ), - *[ - CommandTestCase( - f"type_collation_strength_{tid}", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx, v=val: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": v}, - }, - error_code=TYPE_MISMATCH_ERROR, - msg=f"distinct should reject {tid} for collation strength", - ) - for tid, val in [ - ("string", "one"), - ("bool", True), - ("array", [1]), - ("object", {"a": 1}), - ("objectid", ObjectId()), - ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), - ("timestamp", Timestamp(1, 1)), - ("binary", Binary(b"\x01\x02")), - ("regex", Regex("^abc")), - ("code", Code("function(){}")), - ("code_with_scope", Code("function(){}", {"x": 1})), - ("minkey", MinKey()), - ("maxkey", MaxKey()), - ] - ], -] - -# Property [Type Strictness: collation (boolean sub-fields)]: the boolean -# sub-fields validate type strictly and have field-specific null handling. -DISTINCT_TYPE_STRICTNESS_COLLATION_BOOL_FIELDS_TESTS: list[CommandTestCase] = [ - *[ - CommandTestCase( - f"type_collation_{field}_{tid}", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx, f=field, v=val: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", f: v}, - }, - error_code=TYPE_MISMATCH_ERROR, - msg=f"distinct should reject {tid} for collation {field}", - ) - for field in ["caseLevel", "numericOrdering", "backwards", "normalization"] - for tid, val in [ - ("int32", 1), - ("int64", Int64(1)), - ("double", 1.0), - ("decimal128", Decimal128("1")), - ("string", "true"), - ("array", [True]), - ("object", {"a": True}), - ("objectid", ObjectId()), - ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), - ("timestamp", Timestamp(1, 1)), - ("binary", Binary(b"\x01\x02")), - ("regex", Regex("^abc")), - ("code", Code("function(){}")), - ("code_with_scope", Code("function(){}", {"x": 1})), - ("minkey", MinKey()), - ("maxkey", MaxKey()), - ] - ], - # Null handling: caseLevel, numericOrdering, normalization accept null; - # backwards rejects null. - *[ - CommandTestCase( - f"type_collation_{field}_null_accepted", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx, f=field: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", f: None}, - }, - expected={"values": ["a"], "ok": 1.0}, - msg=f"distinct should accept null for collation {field} (treated as omitted)", - ) - for field in ["caseLevel", "numericOrdering", "normalization"] - ], - CommandTestCase( - "type_collation_backwards_null_rejected", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "backwards": None}, - }, - error_code=TYPE_MISMATCH_ERROR, - msg="distinct should reject null for collation backwards", - ), -] - -# Property [Type Strictness: collation (enum sub-fields)]: the string enum -# sub-fields validate type, value, and field-specific constraints. -DISTINCT_TYPE_STRICTNESS_COLLATION_ENUM_FIELDS_TESTS: list[CommandTestCase] = [ - # caseFirst valid values and constraints - CommandTestCase( - "type_collation_casefirst_lower_accepted", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "caseFirst": "lower", "strength": 3}, - }, - expected={"values": ["a"], "ok": 1.0}, - msg='distinct should accept caseFirst "lower" with strength > 2', - ), - CommandTestCase( - "type_collation_casefirst_with_strength_3", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "caseFirst": "upper", "strength": 3}, - }, - expected={"values": ["a"], "ok": 1.0}, - msg="distinct should accept caseFirst with strength > 2", - ), - CommandTestCase( - "type_collation_casefirst_with_caselevel", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": { - "locale": "en", - "caseFirst": "upper", - "caseLevel": True, - }, - }, - expected={"values": ["a"], "ok": 1.0}, - msg="distinct should accept caseFirst with caseLevel=true", - ), - CommandTestCase( - "type_collation_casefirst_off_always_valid", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "caseFirst": "off", "strength": 1}, - }, - expected={"values": ["a"], "ok": 1.0}, - msg='distinct should accept caseFirst "off" regardless of strength or caseLevel', - ), - CommandTestCase( - "type_collation_casefirst_requires_caselevel_or_strength", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "caseFirst": "upper", "strength": 1}, - }, - error_code=BAD_VALUE_ERROR, - msg="distinct should reject caseFirst without caseLevel=true or strength > 2", - ), - # alternate valid values - CommandTestCase( - "type_collation_alternate_non_ignorable_accepted", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "alternate": "non-ignorable"}, - }, - expected={"values": ["a"], "ok": 1.0}, - msg='distinct should accept alternate "non-ignorable"', - ), - CommandTestCase( - "type_collation_alternate_shifted_accepted", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "alternate": "shifted"}, - }, - expected={"values": ["a"], "ok": 1.0}, - msg='distinct should accept alternate "shifted"', - ), - # maxVariable valid values - CommandTestCase( - "type_collation_maxvariable_punct_accepted", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "maxVariable": "punct"}, - }, - expected={"values": ["a"], "ok": 1.0}, - msg='distinct should accept maxVariable "punct"', - ), - CommandTestCase( - "type_collation_maxvariable_space_accepted", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "maxVariable": "space"}, - }, - expected={"values": ["a"], "ok": 1.0}, - msg='distinct should accept maxVariable "space"', - ), - # Null acceptance for all enum sub-fields - *[ - CommandTestCase( - f"type_collation_{field}_null_accepted", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx, f=field: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", f: None}, - }, - expected={"values": ["a"], "ok": 1.0}, - msg=f"distinct should accept null for collation {field} (treated as omitted)", - ) - for field in ["caseFirst", "alternate", "maxVariable"] - ], - # Invalid string values (BadValue) - *[ - CommandTestCase( - f"type_collation_{field}_{tid}", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx, f=field, v=val: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", f: v}, - }, - error_code=BAD_VALUE_ERROR, - msg=f"distinct should reject {tid} for collation {field}", - ) - for field, tid, val in [ - ("caseFirst", "invalid", "invalid"), - ("caseFirst", "empty", ""), - ("caseFirst", "wrong_case", "Upper"), - ("alternate", "invalid", "invalid"), - ("alternate", "empty", ""), - ("alternate", "wrong_case", "Shifted"), - ("maxVariable", "invalid", "invalid"), - ("maxVariable", "empty", ""), - ("maxVariable", "wrong_case", "Punct"), - ] - ], - # Non-string type rejection (TypeMismatch) for all enum sub-fields - *[ - CommandTestCase( - f"type_collation_{field}_{tid}", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx, f=field, v=val: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", f: v}, - }, - error_code=TYPE_MISMATCH_ERROR, - msg=f"distinct should reject {tid} for collation {field}", - ) - for field in ["caseFirst", "alternate", "maxVariable"] - for tid, val in [ - ("int32", 42), - ("int64", Int64(1)), - ("double", 3.14), - ("decimal128", Decimal128("1")), - ("bool", True), - ("array", [1, 2]), - ("object", {"a": 1}), - ("objectid", ObjectId()), - ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), - ("timestamp", Timestamp(1, 1)), - ("binary", Binary(b"\x01\x02")), - ("regex", Regex("^abc")), - ("code", Code("function(){}")), - ("code_with_scope", Code("function(){}", {"x": 1})), - ("minkey", MinKey()), - ("maxkey", MaxKey()), - ] - ], -] - -# Property [Type Strictness: collation (unknown fields)]: unknown fields in the -# collation document produce an UnrecognizedCommandField error. -DISTINCT_TYPE_STRICTNESS_COLLATION_UNKNOWN_FIELDS_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "type_collation_unknown_field", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "unknownField": 1}, - }, - error_code=UNRECOGNIZED_COMMAND_FIELD_ERROR, - msg="distinct should reject unknown fields in collation document", - ), -] - -# Property [Collation Behavior: numericOrdering]: numericOrdering=true causes -# numeric strings to be ordered by their numeric value rather than -# lexicographically, affecting both deduplication and result ordering. -DISTINCT_COLLATION_NUMERIC_ORDERING_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "collation_numericordering_true_ordering", - docs=[ - {"_id": 1, "x": "10"}, - {"_id": 2, "x": "2"}, - {"_id": 3, "x": "1"}, - {"_id": 4, "x": "20"}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "numericOrdering": True}, - }, - expected={"values": ["1", "2", "10", "20"], "ok": 1.0}, - msg="distinct with numericOrdering=true should order numeric strings numerically", - ), - CommandTestCase( - "collation_numericordering_false_ordering", - docs=[ - {"_id": 1, "x": "10"}, - {"_id": 2, "x": "2"}, - {"_id": 3, "x": "1"}, - {"_id": 4, "x": "20"}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "numericOrdering": False}, - }, - expected={"values": ["1", "10", "2", "20"], "ok": 1.0}, - msg="distinct with numericOrdering=false should order strings lexicographically", - ), -] - -# Property [Collation Behavior: alternate]: alternate="shifted" causes -# punctuation and whitespace to be treated as equivalent at primary/secondary -# strength levels, collapsing them during deduplication. -DISTINCT_COLLATION_ALTERNATE_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "collation_alternate_shifted_dedup", - docs=[ - {"_id": 1, "x": "abc"}, - {"_id": 2, "x": "a-b-c"}, - {"_id": 3, "x": "a b c"}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "alternate": "shifted", "strength": 1}, - }, - expected={"values": ["abc"], "ok": 1.0}, - msg="distinct with alternate=shifted should collapse punctuation/whitespace variants", - ), - CommandTestCase( - "collation_alternate_non_ignorable_preserves", - docs=[ - {"_id": 1, "x": "abc"}, - {"_id": 2, "x": "a-b-c"}, - {"_id": 3, "x": "a b c"}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "alternate": "non-ignorable", "strength": 1}, - }, - expected={"values": ["a b c", "a-b-c", "abc"], "ok": 1.0}, - msg="distinct with alternate=non-ignorable should preserve punctuation distinctions", - ), -] - -# Property [Collation Behavior: maxVariable]: maxVariable controls which -# characters are ignored when alternate="shifted"; "space" ignores only -# whitespace, "punct" ignores both whitespace and punctuation. -DISTINCT_COLLATION_MAX_VARIABLE_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "collation_maxvariable_space", - docs=[ - {"_id": 1, "x": "abc"}, - {"_id": 2, "x": "a bc"}, - {"_id": 3, "x": "a.bc"}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": { - "locale": "en", - "alternate": "shifted", - "maxVariable": "space", - "strength": 1, - }, - }, - expected={"values": ["a.bc", "abc"], "ok": 1.0}, - msg="distinct with maxVariable=space should ignore only whitespace", - ), - CommandTestCase( - "collation_maxvariable_punct", - docs=[ - {"_id": 1, "x": "abc"}, - {"_id": 2, "x": "a bc"}, - {"_id": 3, "x": "a.bc"}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": { - "locale": "en", - "alternate": "shifted", - "maxVariable": "punct", - "strength": 1, - }, - }, - expected={"values": ["abc"], "ok": 1.0}, - msg="distinct with maxVariable=punct should ignore whitespace and punctuation", - ), -] - -# Property [Collation Behavior: backwards]: backwards=true reverses the -# secondary (accent) comparison direction, affecting result ordering. -DISTINCT_COLLATION_BACKWARDS_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "collation_backwards_true", - docs=[ - {"_id": 1, "x": "cote"}, - {"_id": 2, "x": "cot\u00e9"}, - {"_id": 3, "x": "c\u00f4te"}, - {"_id": 4, "x": "c\u00f4t\u00e9"}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": 2, "backwards": True}, - }, - expected={"values": ["cote", "c\u00f4te", "cot\u00e9", "c\u00f4t\u00e9"], "ok": 1.0}, - msg="distinct with backwards=true should reverse accent comparison direction", - ), - CommandTestCase( - "collation_backwards_false", - docs=[ - {"_id": 1, "x": "cote"}, - {"_id": 2, "x": "cot\u00e9"}, - {"_id": 3, "x": "c\u00f4te"}, - {"_id": 4, "x": "c\u00f4t\u00e9"}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "strength": 2, "backwards": False}, - }, - expected={"values": ["cote", "cot\u00e9", "c\u00f4te", "c\u00f4t\u00e9"], "ok": 1.0}, - msg="distinct with backwards=false should use normal accent comparison direction", - ), -] - -# Property [Collation Behavior: caseFirst]: caseFirst controls whether -# uppercase or lowercase sorts first at the tertiary level. -DISTINCT_COLLATION_CASEFIRST_BEHAVIOR_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "collation_casefirst_upper", - docs=[ - {"_id": 1, "x": "a"}, - {"_id": 2, "x": "A"}, - {"_id": 3, "x": "b"}, - {"_id": 4, "x": "B"}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "caseFirst": "upper", "strength": 3}, - }, - expected={"values": ["A", "a", "B", "b"], "ok": 1.0}, - msg="distinct with caseFirst=upper should sort uppercase before lowercase", - ), - CommandTestCase( - "collation_casefirst_lower", - docs=[ - {"_id": 1, "x": "a"}, - {"_id": 2, "x": "A"}, - {"_id": 3, "x": "b"}, - {"_id": 4, "x": "B"}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": "en", "caseFirst": "lower", "strength": 3}, - }, - expected={"values": ["a", "A", "b", "B"], "ok": 1.0}, - msg="distinct with caseFirst=lower should sort lowercase before uppercase", - ), -] - -DISTINCT_COLLATION_SUBFIELD_TESTS: list[CommandTestCase] = ( - DISTINCT_TYPE_STRICTNESS_COLLATION_LOCALE_TESTS - + DISTINCT_TYPE_STRICTNESS_COLLATION_STRENGTH_TESTS - + DISTINCT_TYPE_STRICTNESS_COLLATION_BOOL_FIELDS_TESTS - + DISTINCT_TYPE_STRICTNESS_COLLATION_ENUM_FIELDS_TESTS - + DISTINCT_TYPE_STRICTNESS_COLLATION_UNKNOWN_FIELDS_TESTS - + DISTINCT_COLLATION_NUMERIC_ORDERING_TESTS - + DISTINCT_COLLATION_ALTERNATE_TESTS - + DISTINCT_COLLATION_MAX_VARIABLE_TESTS - + DISTINCT_COLLATION_BACKWARDS_TESTS - + DISTINCT_COLLATION_CASEFIRST_BEHAVIOR_TESTS -) - - -@pytest.mark.parametrize("test", pytest_params(DISTINCT_COLLATION_SUBFIELD_TESTS)) -def test_distinct_collation_subfields( - database_client: Any, collection: Any, test: CommandTestCase -) -> None: - """Test distinct command collation sub-field validation and behavior.""" - collection = test.prepare(database_client, collection) - ctx = CommandContext.from_collection(collection) - result = execute_command(collection, test.build_command(ctx)) - assertResult( - result, - expected=test.build_expected(ctx), - error_code=test.error_code, - msg=test.msg, - raw_res=True, - ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collection_types.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collection_types.py new file mode 100644 index 00000000..9f60e318 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_collection_types.py @@ -0,0 +1,92 @@ +"""Tests for distinct command collection type acceptance.""" + +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import ( + CappedCollection, + ClusteredCollection, + TimeseriesCollection, + ViewCollection, +) + +# Property [Collection Type Acceptance]: distinct produces correct results +# regardless of the underlying collection type. +DISTINCT_COLLECTION_TYPE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "regular", + docs=[{"_id": i, "x": i % 3} for i in range(5)], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [0, 1, 2], "ok": 1.0}, + ignore_order_in=["values"], + msg="distinct should work on a regular collection", + ), + CommandTestCase( + "view", + target_collection=ViewCollection( + options={"pipeline": [{"$match": {"x": {"$gte": 1}}}]}, + suffix="_view", + ), + docs=[{"_id": i, "x": i % 3} for i in range(5)], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [1, 2], "ok": 1}, + ignore_order_in=["values"], + msg="distinct on view should only see documents passing the view pipeline", + ), + CommandTestCase( + "capped", + target_collection=CappedCollection(size=100_000), + docs=[{"_id": i, "x": i % 3} for i in range(5)], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [0, 1, 2], "ok": 1.0}, + ignore_order_in=["values"], + msg="distinct should work on a capped collection", + ), + CommandTestCase( + "timeseries", + target_collection=TimeseriesCollection(), + docs=[ + {"ts": datetime(2024, 1, i, tzinfo=timezone.utc), "meta": "a", "x": i % 3} + for i in range(1, 6) + ], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [0, 1, 2], "ok": 1}, + ignore_order_in=["values"], + msg="distinct should work on a timeseries collection", + ), + CommandTestCase( + "clustered", + target_collection=ClusteredCollection(), + docs=[{"_id": i, "x": i % 3} for i in range(5)], + command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, + expected={"values": [0, 1, 2], "ok": 1.0}, + ignore_order_in=["values"], + msg="distinct should work on a clustered collection", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(DISTINCT_COLLECTION_TYPE_TESTS)) +def test_distinct_collection_types(database_client, collection, test): + """Test distinct command collection type acceptance.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_command_errors.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_command_errors.py index 19fc68df..28048a3a 100644 --- a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_command_errors.py +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_command_errors.py @@ -18,7 +18,6 @@ BAD_VALUE_ERROR, DISTINCT_TOO_BIG_ERROR, FAILED_TO_PARSE_ERROR, - INVALID_NAMESPACE_ERROR, INVALID_OPTIONS_ERROR, KEY_FIELD_NULL_BYTE_ERROR, TYPE_MISMATCH_ERROR, @@ -73,28 +72,6 @@ ] ] -# Property [Collection Name String Validation]: empty string, null bytes, leading -# dots, and dollar signs in the collection name produce InvalidNamespace error. -DISTINCT_COLLNAME_STRING_ERROR_TESTS: list[CommandTestCase] = [ - CommandTestCase( - f"collname_{tid}", - docs=None, - command=lambda ctx, v=val: {"distinct": v, "key": "x"}, - error_code=INVALID_NAMESPACE_ERROR, - msg=f"distinct should reject {desc}", - ) - for tid, val, desc in [ - ("empty_string", "", "empty string as collection name"), - ("null_byte_start", "\x00test", "collection name with null byte at start"), - ("null_byte_middle", "te\x00st", "collection name with null byte in middle"), - ("null_byte_end", "test\x00", "collection name with null byte at end"), - ("leading_dot", ".test", "collection name starting with a dot"), - ("dollar_start", "$test", "collection name with dollar sign at start"), - ("dollar_middle", "te$st", "collection name with dollar sign in middle"), - ("dollar_end", "test$", "collection name with dollar sign at end"), - ] -] - # Property [Unrecognized Fields]: unrecognized fields in the command document # produce an IDLUnknownField error; field name matching is case-sensitive. DISTINCT_UNRECOGNIZED_FIELDS_TESTS: list[CommandTestCase] = [ @@ -314,7 +291,6 @@ DISTINCT_COMMAND_ERROR_TESTS: list[CommandTestCase] = ( DISTINCT_QUERY_ERROR_TESTS + DISTINCT_KEY_NULL_BYTE_TESTS - + DISTINCT_COLLNAME_STRING_ERROR_TESTS + DISTINCT_UNRECOGNIZED_FIELDS_TESTS + DISTINCT_WRITE_CONCERN_TESTS + DISTINCT_MAXTIMEMS_ERROR_TESTS diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_parameters.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_parameters.py index a810242c..65c69df5 100644 --- a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_parameters.py +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_parameters.py @@ -14,11 +14,9 @@ CommandTestCase, ) from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.error_codes import NAMESPACE_NOT_FOUND_ERROR from documentdb_tests.framework.executor import execute_command from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.property_checks import Eq, Len, Ne -from documentdb_tests.framework.target_collection import ViewCollection from documentdb_tests.framework.test_constants import ( DOUBLE_NEGATIVE_ZERO, INT32_MAX, @@ -181,143 +179,7 @@ ), ] -# Property [Collection Name Acceptance]: non-existent collection names with special -# characters, Unicode, number-like strings, and long names succeed with empty results. -DISTINCT_COLLECTION_NAME_ACCEPTANCE_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "collname_nonexistent", - docs=None, - command=lambda ctx: {"distinct": ctx.collection, "key": "x"}, - expected={"values": [], "ok": 1.0}, - msg="distinct should succeed with empty results for a non-existent collection", - ), - CommandTestCase( - "collname_space", - docs=None, - command=lambda ctx: {"distinct": f"{ctx.collection} space", "key": "x"}, - expected={"values": [], "ok": 1.0}, - msg="distinct should accept space characters in collection names", - ), - CommandTestCase( - "collname_punctuation", - docs=None, - command=lambda ctx: {"distinct": f"{ctx.collection}!@#%^&*()", "key": "x"}, - expected={"values": [], "ok": 1.0}, - msg="distinct should accept punctuation characters in collection names", - ), - CommandTestCase( - "collname_control_chars", - docs=None, - command=lambda ctx: {"distinct": f"{ctx.collection}\x01\x02\x03", "key": "x"}, - expected={"values": [], "ok": 1.0}, - msg="distinct should accept control characters in collection names", - ), - CommandTestCase( - "collname_zero_width_space", - # U+200B zero-width space. - docs=None, - command=lambda ctx: {"distinct": f"{ctx.collection}\u200b", "key": "x"}, - expected={"values": [], "ok": 1.0}, - msg="distinct should accept zero-width space in collection names", - ), - CommandTestCase( - "collname_emoji", - docs=None, - command=lambda ctx: {"distinct": f"{ctx.collection}\U0001f389", "key": "x"}, - expected={"values": [], "ok": 1.0}, - msg="distinct should accept emoji characters in collection names", - ), - CommandTestCase( - "collname_tab", - docs=None, - command=lambda ctx: {"distinct": f"{ctx.collection}\t", "key": "x"}, - expected={"values": [], "ok": 1.0}, - msg="distinct should accept tab characters in collection names", - ), - CommandTestCase( - "collname_newline", - docs=None, - command=lambda ctx: {"distinct": f"{ctx.collection}\n", "key": "x"}, - expected={"values": [], "ok": 1.0}, - msg="distinct should accept newline characters in collection names", - ), - CommandTestCase( - "collname_number_zero", - docs=None, - command=lambda ctx: {"distinct": "0", "key": "x"}, - expected={"values": [], "ok": 1.0}, - msg='distinct should accept "0" as collection name without coercion', - ), - CommandTestCase( - "collname_number_nan", - docs=None, - command=lambda ctx: {"distinct": "NaN", "key": "x"}, - expected={"values": [], "ok": 1.0}, - msg='distinct should accept "NaN" as collection name without coercion', - ), - CommandTestCase( - "collname_number_infinity", - docs=None, - command=lambda ctx: {"distinct": "Infinity", "key": "x"}, - expected={"values": [], "ok": 1.0}, - msg='distinct should accept "Infinity" as collection name without coercion', - ), - CommandTestCase( - "collname_number_true", - docs=None, - command=lambda ctx: {"distinct": "true", "key": "x"}, - expected={"values": [], "ok": 1.0}, - msg='distinct should accept "true" as collection name without coercion', - ), - CommandTestCase( - "collname_number_null", - docs=None, - command=lambda ctx: {"distinct": "null", "key": "x"}, - expected={"values": [], "ok": 1.0}, - msg='distinct should accept "null" as collection name without coercion', - ), - CommandTestCase( - "collname_long_name", - docs=None, - command=lambda ctx: {"distinct": "a" * 10_000, "key": "x"}, - expected={"values": [], "ok": 1.0}, - msg="distinct should accept very long collection names without error", - ), -] - -# Property [Collection Name UUID Resolution]: Binary subtype 4 (UUID) as the -# distinct field triggers UUID-based collection resolution, producing a namespace -# not found error when the UUID does not match any collection. -DISTINCT_COLLECTION_NAME_UUID_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "collname_uuid_binary_error", - docs=None, - command=lambda ctx: { - "distinct": Binary(b"\x00" * 16, 4), - "key": "x", - }, - error_code=NAMESPACE_NOT_FOUND_ERROR, - msg=( - "distinct should trigger UUID-based resolution for Binary subtype 4," - " producing a namespace not found error when UUID does not match" - ), - ), -] - -# Property [Collection Name UUID Success]: Binary subtype 4 (UUID) as the distinct -# field triggers UUID-based collection resolution; when the UUID matches an existing -# collection, the command succeeds. -DISTINCT_COLLECTION_NAME_UUID_SUCCESS_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "collname_uuid_success", - docs=[{"_id": 1, "x": "found"}], - command=lambda ctx: {"distinct": ctx.uuids[ctx.collection], "key": "x"}, - expected={"values": ["found"], "ok": 1.0}, - msg="distinct should succeed when Binary subtype 4 (UUID) matches an existing collection", - ), -] - -# Property [Null Optional Parameters]: when optional parameters (query, collation, +# Property [Null Optional Parameters]: when optional parameters (query, # readConcern, comment, maxTimeMS) are null, they are treated as omitted. DISTINCT_NULL_PARAMS_TESTS: list[CommandTestCase] = [ CommandTestCase( @@ -329,51 +191,12 @@ ) for tid, param in [ ("query", "query"), - ("collation", "collation"), ("read_concern", "readConcern"), ("comment", "comment"), ("max_time_ms", "maxTimeMS"), ] ] -# Property [Query Composition on Views]: the query parameter composes with -# the view's pipeline filter to further restrict visible documents. -DISTINCT_QUERY_VIEW_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "query_on_filtered_view", - target_collection=ViewCollection(pipeline=[{"$match": {"status": "active"}}]), - docs=[ - {"_id": 1, "status": "active", "cat": "a", "x": 10}, - {"_id": 2, "status": "active", "cat": "b", "x": 20}, - {"_id": 3, "status": "inactive", "cat": "a", "x": 30}, - {"_id": 4, "status": "active", "cat": "a", "x": 40}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "cat", - "query": {"x": {"$gte": 20}}, - }, - expected={"values": ["a", "b"], "ok": 1}, - ignore_order_in=["values"], - msg="distinct query should compose with view pipeline filter", - ), - CommandTestCase( - "query_excludes_all_on_filtered_view", - target_collection=ViewCollection(pipeline=[{"$match": {"status": "active"}}]), - docs=[ - {"_id": 1, "status": "active", "cat": "a", "x": 10}, - {"_id": 2, "status": "inactive", "cat": "b", "x": 50}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "cat", - "query": {"x": {"$gte": 50}}, - }, - expected={"values": [], "ok": 1}, - msg="distinct query + view filter should return empty when no docs match both", - ), -] - DISTINCT_PARAMETER_TESTS: list[CommandTestCase] = ( DISTINCT_NULL_PARAMS_TESTS + DISTINCT_QUERY_SUCCESS_TESTS @@ -382,10 +205,6 @@ + DISTINCT_READCONCERN_SUCCESS_TESTS + DISTINCT_MAXTIMEMS_ACCEPTANCE_TESTS + DISTINCT_TIMESTAMP_ZERO_TESTS - + DISTINCT_COLLECTION_NAME_ACCEPTANCE_TESTS - + DISTINCT_COLLECTION_NAME_UUID_TESTS - + DISTINCT_COLLECTION_NAME_UUID_SUCCESS_TESTS - + DISTINCT_QUERY_VIEW_TESTS ) diff --git a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_type_errors.py b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_type_errors.py index 15175012..9267ae10 100644 --- a/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_type_errors.py +++ b/documentdb_tests/compatibility/tests/core/aggregation/commands/distinct/test_distinct_type_errors.py @@ -132,74 +132,6 @@ ] ] -# Property [Collation Parameter Type Errors]: invalid BSON types and values for -# the collation parameter produce appropriate errors. -DISTINCT_COLLATION_TYPE_ERROR_TESTS: list[CommandTestCase] = [ - CommandTestCase( - f"collation_type_{tid}", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx, v=val: { - "distinct": ctx.collection, - "key": "x", - "collation": v, - }, - error_code=TYPE_MISMATCH_ERROR, - msg=f"distinct should reject {tid} as collation", - ) - for tid, val in [ - ("string", "en"), - ("int32", 42), - ("int64", Int64(1)), - ("double", 3.14), - ("decimal128", Decimal128("1")), - ("bool", True), - ("array", [1, 2]), - ("objectid", ObjectId("000000000000000000000001")), - ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), - ("timestamp", Timestamp(1, 1)), - ("binary", Binary(b"data", 0)), - ("regex", Regex("abc", "")), - ("code", Code("function(){}")), - ("code_with_scope", Code("function(){}", {"s": 1})), - ("minkey", MinKey()), - ("maxkey", MaxKey()), - ] -] + [ - CommandTestCase( - "collation_missing_locale", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"strength": 1}, - }, - error_code=MISSING_FIELD_ERROR, - msg="distinct should reject collation with missing locale", - ), - CommandTestCase( - "collation_invalid_locale", - docs=[{"_id": 1, "x": "a"}], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"locale": ""}, - }, - error_code=BAD_VALUE_ERROR, - msg="distinct should reject collation with invalid (empty) locale", - ), - CommandTestCase( - "collation_validated_nonexistent_collection", - docs=None, - command=lambda ctx: { - "distinct": ctx.collection, - "key": "x", - "collation": {"strength": 1}, - }, - error_code=MISSING_FIELD_ERROR, - msg="distinct should validate collation even when the collection does not exist", - ), -] - # Property [Key Parameter Type Errors]: all non-string BSON types for key produce # TypeMismatch error; null or omitted key produces a missing field error. DISTINCT_KEY_TYPE_ERROR_TESTS: list[CommandTestCase] = [ @@ -457,7 +389,6 @@ DISTINCT_NULL_HINT_ERROR_TESTS + DISTINCT_QUERY_TYPE_ERROR_TESTS + DISTINCT_READCONCERN_TYPE_ERROR_TESTS - + DISTINCT_COLLATION_TYPE_ERROR_TESTS + DISTINCT_KEY_TYPE_ERROR_TESTS + DISTINCT_HINT_TYPE_ERROR_TESTS + DISTINCT_COLLNAME_TYPE_ERROR_TESTS From cbbbf922c6ec75d7ef17ca453a4eb6d207518995 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Wed, 27 May 2026 14:27:14 -0700 Subject: [PATCH 4/4] Add ViewCollection options from parallel review Signed-off-by: Daniel Frankcom --- documentdb_tests/framework/target_collection.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/documentdb_tests/framework/target_collection.py b/documentdb_tests/framework/target_collection.py index f5be4347..a40de34c 100644 --- a/documentdb_tests/framework/target_collection.py +++ b/documentdb_tests/framework/target_collection.py @@ -29,13 +29,18 @@ def writable(self, source: Collection, resolved: Collection) -> Collection: @dataclass(frozen=True) class ViewCollection(TargetCollection): - """A view on the fixture collection.""" + """A view on the fixture collection. - pipeline: list[dict[str, Any]] = field(default_factory=list) + Pass any extra keyword arguments accepted by the ``create`` command + (e.g. ``pipeline``, ``collation``) via the ``options`` dict. + """ + + options: dict[str, Any] = field(default_factory=dict) + suffix: str = "_view" def resolve(self, db: Database, collection: Collection) -> Collection: - view_name = f"{collection.name}_view" - db.command("create", view_name, viewOn=collection.name, pipeline=self.pipeline) + view_name = f"{collection.name}{self.suffix}" + db.command("create", view_name, viewOn=collection.name, **self.options) return db[view_name] def writable(self, source: Collection, resolved: Collection) -> Collection: