From 1b14e862704bb5dc742662cdf77f1884513b2fab Mon Sep 17 00:00:00 2001 From: Daria Pardue Date: Fri, 13 Mar 2026 16:55:35 -0400 Subject: [PATCH 1/3] test: backpressure retries are non-blocking --- .../backpressure-connection-checkin.json | 129 ++++++++++++++++++ .../backpressure-connection-checkin.yml | 67 +++++++++ 2 files changed, 196 insertions(+) create mode 100644 test/spec/client-backpressure/backpressure-connection-checkin.json create mode 100644 test/spec/client-backpressure/backpressure-connection-checkin.yml diff --git a/test/spec/client-backpressure/backpressure-connection-checkin.json b/test/spec/client-backpressure/backpressure-connection-checkin.json new file mode 100644 index 00000000000..340c87c0142 --- /dev/null +++ b/test/spec/client-backpressure/backpressure-connection-checkin.json @@ -0,0 +1,129 @@ +{ + "description": "tests that connections are returned to the pool on retry attempts for overload errors", + "schemaVersion": "1.3", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "replicaset", + "sharded", + "load-balanced" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "client", + "useMultipleMongoses": false, + "observeEvents": [ + "connectionCheckedOutEvent", + "connectionCheckedInEvent" + ] + } + }, + { + "client": { + "id": "fail_point_client", + "useMultipleMongoses": false + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "backpressure-connection-checkin" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "coll" + } + } + ], + "tests": [ + { + "description": "overload error retry attempts return connections to the pool", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "find" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "find", + "object": "collection", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] + } + ] + } + ] +} diff --git a/test/spec/client-backpressure/backpressure-connection-checkin.yml b/test/spec/client-backpressure/backpressure-connection-checkin.yml new file mode 100644 index 00000000000..7c4359335c3 --- /dev/null +++ b/test/spec/client-backpressure/backpressure-connection-checkin.yml @@ -0,0 +1,67 @@ +description: tests that connections are returned to the pool on retry attempts for overload errors +schemaVersion: "1.3" +runOnRequirements: + - minServerVersion: "4.4" + topologies: + - replicaset + - sharded + - load-balanced +createEntities: + - client: + id: client + useMultipleMongoses: false + observeEvents: + - connectionCheckedOutEvent + - connectionCheckedInEvent + - client: + id: fail_point_client + useMultipleMongoses: false + - database: + id: database + client: client + databaseName: backpressure-connection-checkin + - collection: + id: collection + database: database + collectionName: coll +tests: + - description: overload error retry attempts return connections to the pool + operations: + - name: failPoint + object: testRunner + arguments: + client: fail_point_client + failPoint: + configureFailPoint: failCommand + mode: alwaysOn + data: + failCommands: + - find + errorLabels: + - RetryableError + - SystemOverloadedError + errorCode: 2 + - name: find + object: collection + arguments: + filter: {} + expectError: + isError: true + isClientError: false + expectEvents: + - client: client + eventType: cmap + events: + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} + From 6a0af2b54ceb9bfd75e16574af87ff70dbbcccf9 Mon Sep 17 00:00:00 2001 From: Sergey Zelenov Date: Tue, 17 Mar 2026 14:55:48 +0100 Subject: [PATCH 2/3] unpin the session when a SystemOverloadedError occurs --- src/operations/execute_operation.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/operations/execute_operation.ts b/src/operations/execute_operation.ts index 6b74920aa69..4c6b0baccdc 100644 --- a/src/operations/execute_operation.ts +++ b/src/operations/execute_operation.ts @@ -349,6 +349,15 @@ async function executeOperationWithRetries< session.unpin({ force: true, forceClear: true }); } + if ( + operationError.hasErrorLabel(MongoErrorLabel.SystemOverloadedError) && + session != null && + session.isPinned && + !session.inTransaction() + ) { + session.unpin({ force: true }); + } + if ( topology.description.type === TopologyType.Sharded || operationError.hasErrorLabel(MongoErrorLabel.SystemOverloadedError) From a6e7f88814789c429b49ae54cdf24c566f773eba Mon Sep 17 00:00:00 2001 From: Sergey Zelenov Date: Wed, 18 Mar 2026 14:20:12 +0100 Subject: [PATCH 3/3] check the aspect Aspect.CURSOR_CREATING to prevent breaking cursor pinning --- src/operations/execute_operation.ts | 31 +++++++++++++++-------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/operations/execute_operation.ts b/src/operations/execute_operation.ts index 4c6b0baccdc..0715a5f0704 100644 --- a/src/operations/execute_operation.ts +++ b/src/operations/execute_operation.ts @@ -324,21 +324,6 @@ async function executeOperationWithRetries< throw error; } - if (operationError.hasErrorLabel(MongoErrorLabel.SystemOverloadedError)) { - const backoffMS = Math.random() * Math.min(MAX_BACKOFF_MS, BASE_BACKOFF_MS * 2 ** attempt); - - // if the backoff would exhaust the CSOT timeout, short-circuit. - if (timeoutContext.csotEnabled() && backoffMS > timeoutContext.remainingTimeMS) { - throw error; - } - - if (topology.s.options.adaptiveRetries && !topology.tokenBucket.consume(RETRY_COST)) { - throw error; - } - - await setTimeout(backoffMS); - } - if ( operationError instanceof MongoNetworkError && operation.hasAspect(Aspect.CURSOR_CREATING) && @@ -351,6 +336,7 @@ async function executeOperationWithRetries< if ( operationError.hasErrorLabel(MongoErrorLabel.SystemOverloadedError) && + operation.hasAspect(Aspect.CURSOR_CREATING) && session != null && session.isPinned && !session.inTransaction() @@ -358,6 +344,21 @@ async function executeOperationWithRetries< session.unpin({ force: true }); } + if (operationError.hasErrorLabel(MongoErrorLabel.SystemOverloadedError)) { + const backoffMS = Math.random() * Math.min(MAX_BACKOFF_MS, BASE_BACKOFF_MS * 2 ** attempt); + + // if the backoff would exhaust the CSOT timeout, short-circuit. + if (timeoutContext.csotEnabled() && backoffMS > timeoutContext.remainingTimeMS) { + throw error; + } + + if (topology.s.options.adaptiveRetries && !topology.tokenBucket.consume(RETRY_COST)) { + throw error; + } + + await setTimeout(backoffMS); + } + if ( topology.description.type === TopologyType.Sharded || operationError.hasErrorLabel(MongoErrorLabel.SystemOverloadedError)