Skip to content

Commit bcf6a51

Browse files
committed
deposit retry tokens only on retries, update maxAttempts property conditionally
1 parent f2b9750 commit bcf6a51

8 files changed

Lines changed: 26 additions & 92 deletions

File tree

package-lock.json

Lines changed: 1 addition & 16 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmap/connection.ts

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -586,12 +586,6 @@ export class Connection extends TypedEventEmitter<ConnectionEvents> {
586586
this.throwIfAborted();
587587
}
588588
} catch (error) {
589-
// Note for Sergey: when retrying a command with `startTransaction`, the spec says drivers must only mark the transaction
590-
// as in progress _after_ the server responds, regardless of the command's outcome.
591-
// Server errors are thrown from the try-block above _after_ updateSessionFromResponse is called. So, server errors already correctly update the
592-
// session's state.
593-
// Network errors, however, are thrown from the try-block from `sendWire()`, and as such bypass the call to `updateSessionFromResponse` above. So, we have to
594-
// handle updating sessions for non-server errors here.
595589
if (options.session != null && !(error instanceof MongoServerError)) {
596590
updateSessionFromResponse(options.session, MongoDBResponse.empty);
597591
}

src/operations/execute_operation.ts

Lines changed: 12 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import {
1212
MongoInvalidArgumentError,
1313
MongoNetworkError,
1414
MongoNotConnectedError,
15-
MongoOperationTimeoutError,
1615
MongoRuntimeError,
1716
MongoServerError,
1817
MongoTransactionError,
@@ -29,13 +28,8 @@ import {
2928
import type { Topology } from '../sdam/topology';
3029
import type { ClientSession } from '../sessions';
3130
import { TimeoutContext } from '../timeout';
32-
import { RETRY_COST, TOKEN_REFRESH_RATE } from '../token_bucket';
33-
import {
34-
abortable,
35-
ExponentialBackoffProvider,
36-
maxWireVersion,
37-
supportsRetryableWrites
38-
} from '../utils';
31+
import { RETRY_COST, RETRY_TOKEN_RETURN_RATE } from '../token_bucket';
32+
import { abortable, maxWireVersion, supportsRetryableWrites } from '../utils';
3933
import { AggregateOperation } from './aggregate';
4034
import { AbstractOperation, Aspect } from './operation';
4135
import { RunCommandOperation } from './run_command';
@@ -194,7 +188,6 @@ type RetryOptions = {
194188
*
195189
* @param operation - The operation to execute
196190
*/
197-
// Note for Sergey: The rename here could be reverted - I just thought this was more descriptive.
198191
async function executeOperationWithRetries<
199192
T extends AbstractOperation,
200193
TResult = ResultTypeFromOperation<T>
@@ -248,11 +241,6 @@ async function executeOperationWithRetries<
248241
}
249242

250243
const deprioritizedServers = new DeprioritizedServers();
251-
const backoffDelayProvider = new ExponentialBackoffProvider(
252-
10_000, // MAX_BACKOFF
253-
100, // base backoff
254-
2 // backoff rate
255-
);
256244

257245
let maxAttempts =
258246
typeof operation.maxAttempts === 'number'
@@ -271,6 +259,7 @@ async function executeOperationWithRetries<
271259
let error: MongoError | null = null;
272260

273261
for (let attempt = 0; attempt < maxAttempts; attempt++) {
262+
operation.attemptsMade = attempt + 1;
274263
operation.server = server;
275264

276265
try {
@@ -279,28 +268,20 @@ async function executeOperationWithRetries<
279268
topology.tokenBucket.deposit(
280269
attempt > 0
281270
? // on successful retry, deposit the retry cost + the refresh rate.
282-
TOKEN_REFRESH_RATE + RETRY_COST
271+
RETRY_TOKEN_RETURN_RATE + RETRY_COST
283272
: // otherwise, just deposit the refresh rate.
284-
TOKEN_REFRESH_RATE
273+
RETRY_TOKEN_RETURN_RATE
285274
);
286275
return operation.handleOk(result);
287276
} catch (error) {
288277
return operation.handleError(error);
289278
}
290-
291-
// Note for Sergey: This ended up being a larger refactor than I anticipated. But it made more sense to me to put the error handling
292-
// that previously lived in the try-block into the catch-block.
293-
// The primary motivator for this change was to make error tracking simpler. The post failure but pre-retry logic needs access to both the
294-
// most recently encountered operation error _and_ the error we're storing to potentially throw, if we need to (because in some circumstances
295-
// the error raised from here is _not_ the most recently encountered operation error). If we chose to keep the previously existing structure,
296-
// this would force us to keep track of two errors outside of the for loop. By moving the pre-retry logic into the catch block, it gains
297-
// access to `operationError`, and still only requires keeping track of one error outside the loop.
298279
} catch (operationError) {
299280
// Should never happen but if it does - propagate the error.
300281
if (!(operationError instanceof MongoError)) throw operationError;
301282

302-
if (!operationError.hasErrorLabel(MongoErrorLabel.SystemOverloadedError)) {
303-
// if an operation fails with an error that does not contain the SystemOverloadError, deposit 1 token.
283+
if (attempt > 0 && !operationError.hasErrorLabel(MongoErrorLabel.SystemOverloadedError)) {
284+
// if a retry attempt fails with a non-overload error, deposit 1 token.
304285
topology.tokenBucket.deposit(RETRY_COST);
305286
}
306287

@@ -327,10 +308,9 @@ async function executeOperationWithRetries<
327308
throw error;
328309
}
329310

330-
maxAttempts =
331-
shouldRetry && operationError.hasErrorLabel(MongoErrorLabel.SystemOverloadedError)
332-
? 6
333-
: maxAttempts;
311+
if (operationError.hasErrorLabel(MongoErrorLabel.SystemOverloadedError)) {
312+
maxAttempts = Math.min(6, operation.maxAttempts ?? 6);
313+
}
334314

335315
if (attempt + 1 >= maxAttempts) {
336316
throw error;
@@ -341,17 +321,11 @@ async function executeOperationWithRetries<
341321
throw error;
342322
}
343323

344-
const delayMS = backoffDelayProvider.getNextBackoffDuration();
324+
const delayMS = Math.random() * Math.min(10_000, 100 * 2 ** attempt);
345325

346326
// if the delay would exhaust the CSOT timeout, short-circuit.
347327
if (timeoutContext.csotEnabled() && delayMS > timeoutContext.remainingTimeMS) {
348-
// TODO: is this the right error to throw?
349-
throw new MongoOperationTimeoutError(
350-
`MongoDB SystemOverload exponential backoff would exceed timeoutMS deadline: remaining CSOT deadline=${timeoutContext.remainingTimeMS}, backoff delayMS=${delayMS}`,
351-
{
352-
cause: error
353-
}
354-
);
328+
throw error;
355329
}
356330

357331
await setTimeout(delayMS);

src/operations/operation.ts

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,12 @@ export abstract class AbstractOperation<TResult = any> {
6666
/** Specifies the time an operation will run until it throws a timeout error. */
6767
timeoutMS?: number;
6868

69-
// Note for Sergey: sort of a hack, one that I planned to revisit before officially opening the PR for review.
70-
// For every operation _except_ transactions, all backpressure retries occur inside one `executeOperation` call. `commitTransaction`, however,
71-
// currently hard-codes two retry attempts, each with a `RunCommandOperation`. Neither retry attempt is retryable by itself.
72-
// Because we use two separate RunCommand operations, it's possible to retry up to 2 * MAX_BACKPRESSURE_RETRIES times, if the server is overloaded.
73-
// I hacked around it here by storing the max attempts for an operation on the operation, and sharing the value between both operations.
74-
// I don't have a solid idea of a better way to do this right now though.
69+
/** @internal Used by commitTransaction to share the retry budget across two executeOperatin calls. */
7570
maxAttempts?: number;
7671

72+
/** @internal Tracks how many attempts were made in the last executeOperation call. */
73+
attemptsMade?: number;
74+
7775
private _session: ClientSession | undefined;
7876

7977
static aspects?: Set<symbol>;

src/sdam/topology.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,6 @@ export class Topology extends TypedEventEmitter<TopologyEvents> {
213213
hello?: Document;
214214
_type?: string;
215215

216-
// Note for Sergey: expect this to change, and instead be stored on the server class.
217216
tokenBucket = new TokenBucket(1000);
218217

219218
client!: MongoClient;

src/sessions.ts

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,7 @@ export class ClientSession
497497
readPreference: ReadPreference.primary,
498498
bypassPinningCheck: true
499499
});
500-
operation.maxAttempts = 5;
500+
operation.maxAttempts = 6;
501501

502502
const timeoutContext =
503503
this.timeoutContext ??
@@ -515,8 +515,12 @@ export class ClientSession
515515
return;
516516
} catch (firstCommitError) {
517517
this.commitAttempted = true;
518-
// Backpressure exhausted all retries on the initial retry loop.
519-
if (operation.maxAttempts === 5) throw firstCommitError;
518+
519+
const remainingAttempts = 6 - (operation.attemptsMade ?? 1);
520+
if (remainingAttempts <= 0) {
521+
throw firstCommitError;
522+
}
523+
520524
if (firstCommitError instanceof MongoError && isRetryableWriteError(firstCommitError)) {
521525
// SPEC-1185: apply majority write concern when retrying commitTransaction
522526
WriteConcern.apply(command, { wtimeoutMS: 10000, ...wc, w: 'majority' });
@@ -529,7 +533,7 @@ export class ClientSession
529533
readPreference: ReadPreference.primary,
530534
bypassPinningCheck: true
531535
});
532-
op.maxAttempts = operation.maxAttempts;
536+
op.maxAttempts = remainingAttempts;
533537
await executeOperation(this.client, op, timeoutContext);
534538
return;
535539
} catch (retryCommitError) {

src/token_bucket.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ export class TokenBucket {
2626
* @internal
2727
* The amount to deposit on successful operations, as defined in the backpressure specification.
2828
*/
29-
export const TOKEN_REFRESH_RATE = 0.1;
29+
export const RETRY_TOKEN_RETURN_RATE = 0.1;
3030
/**
3131
* @internal
3232
* The initial size of the token bucket, as defined in the backpressure specification.

src/utils.ts

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1419,23 +1419,3 @@ export async function abortable<T>(
14191419
abortListener?.[kDispose]();
14201420
}
14211421
}
1422-
1423-
// Note for Sergey: overkill, I know - but sometimes you gotta over-engineer a bit for fun and revert before opening for review ¯\_(ツ)_/¯
1424-
// feel free to remove this class. Or leave it. If you keep it, we should use this for the convenient transactions' API as well.
1425-
export class ExponentialBackoffProvider {
1426-
constructor(
1427-
public readonly maxBackoff: number,
1428-
public readonly baseBackoff: number,
1429-
public readonly backoffIncreaseRate: number,
1430-
public iteration = 0
1431-
) {}
1432-
1433-
getNextBackoffDuration(): number {
1434-
const iteration = this.iteration;
1435-
this.iteration += 1;
1436-
return (
1437-
Math.random() *
1438-
Math.min(this.maxBackoff, this.baseBackoff * this.backoffIncreaseRate ** iteration)
1439-
);
1440-
}
1441-
}

0 commit comments

Comments
 (0)