Skip to content

Commit cf0a12e

Browse files
Upper bound individual retries, move configurations in web config. (#6975)
Upper bound individual retries, move configurations in web config.
1 parent 0e467a3 commit cf0a12e

7 files changed

Lines changed: 107 additions & 28 deletions

File tree

src/NuGetGallery/App_Start/DefaultDependenciesModule.cs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -744,9 +744,14 @@ private static void ConfigureResilientSearch(ILoggerFactory loggerFactory, IGall
744744
.AddHttpMessageHandler<TracingHttpHandler>()
745745
.AddHttpMessageHandler<CorrelatingHttpClientHandler>()
746746
.AddPolicyHandler(SearchClientPolicies.SearchClientFallBackCircuitBreakerPolicy(logger, searchClient.name, telemetryService))
747-
.AddPolicyHandler(SearchClientPolicies.SearchClientWaitAndRetryForeverPolicy(logger, searchClient.name, telemetryService))
747+
.AddPolicyHandler(SearchClientPolicies.SearchClientWaitAndRetryPolicy(
748+
configuration.Current.SearchCircuitBreakerWaitAndRetryCount,
749+
configuration.Current.SearchCircuitBreakerWaitAndRetryIntervalInMilliseconds,
750+
logger,
751+
searchClient.name,
752+
telemetryService))
748753
.AddPolicyHandler(SearchClientPolicies.SearchClientCircuitBreakerPolicy(
749-
SearchClientConfiguration.SearchRetryCount,
754+
configuration.Current.SearchCircuitBreakerBreakAfterCount,
750755
TimeSpan.FromSeconds(configuration.Current.SearchCircuitBreakerDelayInSeconds),
751756
logger,
752757
searchClient.name,

src/NuGetGallery/Configuration/AppConfiguration.cs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,20 @@ public string ExternalBrandingMessage
378378
/// </summary>
379379
public Uri SearchServiceUriSecondary { get; set; }
380380

381+
[DefaultValue(600)]
381382
public int SearchCircuitBreakerDelayInSeconds { get; set; }
383+
384+
// The default value was chosen to have searchRetryCount*retryInterval to be close to 1 second in order to keep the user still engaged.
385+
// https://www.nngroup.com/articles/website-response-times/
386+
[DefaultValue(500)]
387+
public int SearchCircuitBreakerWaitAndRetryIntervalInMilliseconds { get; set; }
388+
389+
[DefaultValue(3)]
390+
public int SearchCircuitBreakerWaitAndRetryCount { get; set; }
391+
392+
// Default value was chosen using the AI data.
393+
// It is the average of the search request count per second during the last 90 days.
394+
[DefaultValue(200)]
395+
public int SearchCircuitBreakerBreakAfterCount { get; set; }
382396
}
383397
}

src/NuGetGallery/Configuration/IAppConfiguration.cs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,5 +405,20 @@ public interface IAppConfiguration : IMessageServiceConfiguration
405405
/// The time in seconds for the circuit breaker delay. (The time the circuit breaker will stay in open state)
406406
/// </summary>
407407
int SearchCircuitBreakerDelayInSeconds { get; set; }
408+
409+
/// <summary>
410+
/// The wait time in milliseconds for the WaitAndRetry policy.
411+
/// </summary>
412+
int SearchCircuitBreakerWaitAndRetryIntervalInMilliseconds { get; set; }
413+
414+
/// <summary>
415+
/// A request will fail after this number of retries. In total a request will fail after this number of retries + 1.
416+
/// </summary>
417+
int SearchCircuitBreakerWaitAndRetryCount { get; set; }
418+
419+
/// <summary>
420+
/// CircuitBreaker will open after this number of consecutive failed requests.
421+
/// </summary>
422+
int SearchCircuitBreakerBreakAfterCount { get; set; }
408423
}
409424
}

src/NuGetGallery/Infrastructure/Lucene/SearchClientConfiguration.cs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,5 @@ public class SearchClientConfiguration
77
{
88
public static string SearchPrimaryInstance = "SearchPrimary";
99
public static string SearchSecondaryInstance = "SearchSecondary";
10-
public static int SearchRetryCount = 3;
11-
// Try to have searchRetryCount*retryInterval to be close to 1 second in order to keep the user still engaged. https://www.nngroup.com/articles/website-response-times/
12-
public static int WaitAndRetryDefaultIntervalInMilliseconds = 500;
1310
}
1411
}

src/NuGetGallery/Infrastructure/Lucene/SearchClientPolicies.cs

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ public class SearchClientPolicies
2727
/// When the timeout expires trial requsts are allowed. The state will be changed to a Close state if they succeed.
2828
/// If they do not succeed the circuit will be moved back in the Open state for another <paramref name="breakDuration"/>.
2929
/// </summary>
30-
/// <param name="breakAfterCount">The number of retries until circuit will be open.</param>
30+
/// <param name="breakAfterCount">The number of allowed failures until circuit will be open.</param>
3131
/// <param name="breakDuration">The <see cref="TimeSpan"/> for the circuit to stay open.</param>
3232
/// <param name="logger">An <see cref="ILogger"/> instance.</param>
3333
/// <returns>The policy.</returns>
@@ -58,11 +58,15 @@ public static IAsyncPolicy<HttpResponseMessage> SearchClientCircuitBreakerPolicy
5858

5959
/// <summary>
6060
/// A WaitAndRetryForever policy to be used with the SearchClientCircuitBreakerPolicy.
61-
/// The policy will retry on any transient error and will not retry on <see cref="BrokenCircuitException"/> to avoid non-necessary retries due to circuit breaker exceptions.
61+
/// The policy will retry for <paramref name="retryCount"/> on any transient error and will not retry on <see cref="BrokenCircuitException"/> to avoid non-necessary retries due to circuit breaker exceptions.
6262
/// </summary>
63-
/// <param name="logger">An <see cref="ILogger"/> instance.</param>
63+
/// <param name="retryCount">The allowed number of retries.</param>
64+
/// <param name="waitInMilliseconds">The time to wait between retries.</param>
65+
/// <param name="logger">The logger</param>
66+
/// <param name="searchName">The search name.</param>
67+
/// <param name="telemetryService">Telemetry service</param>
6468
/// <returns>The policy.</returns>
65-
public static IAsyncPolicy<HttpResponseMessage> SearchClientWaitAndRetryForeverPolicy(ILogger logger, string searchName, ITelemetryService telemetryService)
69+
public static IAsyncPolicy<HttpResponseMessage> SearchClientWaitAndRetryPolicy(int retryCount, int waitInMilliseconds, ILogger logger, string searchName, ITelemetryService telemetryService)
6670
{
6771
return HttpPolicyExtensions.
6872
HandleTransientHttpError()
@@ -73,17 +77,18 @@ public static IAsyncPolicy<HttpResponseMessage> SearchClientWaitAndRetryForeverP
7377
// There should not be any retry in this case
7478
return false;
7579
}).
76-
WaitAndRetryForeverAsync(
77-
sleepDurationProvider: (retryAttempt, context) => TimeSpan.FromMilliseconds(SearchClientConfiguration.WaitAndRetryDefaultIntervalInMilliseconds),
80+
WaitAndRetryAsync(
81+
retryCount: retryCount,
82+
sleepDurationProvider: (retryAttempt, context) => TimeSpan.FromMilliseconds(waitInMilliseconds),
7883
onRetry: (delegateResult, waitDuration, context) =>
79-
{
80-
telemetryService.TrackMetricForSearchOnRetry(searchName,
81-
delegateResult.Exception,
82-
context.CorrelationId.ToString(),
83-
GetValueFromContext(ContextKey_RequestUri, context),
84-
GetValueFromContext(ContextKey_CircuitBreakerStatus, context));
85-
logger.LogInformation("Policy retry - it will retry after {RetryMilliseconds} milliseconds. {Exception} {SearchName}", waitDuration.TotalMilliseconds, delegateResult.Exception, searchName);
86-
});
84+
{
85+
telemetryService.TrackMetricForSearchOnRetry(searchName,
86+
delegateResult.Exception,
87+
context.CorrelationId.ToString(),
88+
GetValueFromContext(ContextKey_RequestUri, context),
89+
GetValueFromContext(ContextKey_CircuitBreakerStatus, context));
90+
logger.LogInformation("Policy retry - it will retry after {RetryMilliseconds} milliseconds. {Exception} {SearchName}", waitDuration.TotalMilliseconds, delegateResult.Exception, searchName);
91+
});
8792
}
8893

8994
/// <summary>

src/NuGetGallery/Web.config

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,9 @@
138138
<add key="Gallery.SearchServiceUriPrimary" value=""/>
139139
<add key="Gallery.SearchServiceUriSecondary" value=""/>
140140
<add key="Gallery.SearchCircuitBreakerDelayInSeconds" value="600"/>
141+
<add key="Gallery.SearchCircuitBreakerWaitAndRetryIntervalInMilliseconds" value="500"/>
142+
<add key="Gallery.SearchCircuitBreakerWaitAndRetryCount" value="3"/>
143+
<add key="Gallery.SearchCircuitBreakerBreakAfterCount" value="200"/>
141144
<add key="Gallery.Brand" value="NuGet Gallery"/>
142145
<add key="Gallery.GalleryOwner" value="NuGet Gallery &lt;[email protected]&gt;"/>
143146
<add key="Gallery.GalleryNoReplyAddress" value="NuGet Gallery &lt;[email protected]&gt;"/>

tests/NuGetGallery.Facts/Infrastructure/Lucene/SearchPolicyFacts.cs

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,12 @@ namespace NuGetGallery.Infrastructure.Search
1717
{
1818
public class SearchPolicyFacts
1919
{
20-
private static int _retryCount = 2;
20+
private static int _waitBetweenRetriesInMilliseconds = 500;
21+
private static int _circuitBreakerFailAfter = 2;
22+
private static int _retryCount = 10;
23+
// set the circuit breker to be larger than the retryCount
24+
private static int _circuitBreakerFailAfter_2 = 10;
25+
private static int _retryCount_2 = 2;
2126
private static int _circuitBreakerLongDelaySeconds = 600;
2227
private static int _circuitBreakerShortDelaySeconds = 1;
2328
private static ServiceCollection _services = null;
@@ -26,11 +31,14 @@ public class SearchPolicyFacts
2631
private static LoggerFor_TestSearchHttpClient _loggerFor_InvalidTestSearchHttpClientWithShortCircuitBreakDelay;
2732
private static LoggerFor_TestSearchHttpClient _loggerFor_ValidTestSearchHttpClient;
2833
private static string _nameFor_InvalidTestSearchHttpClientWithLongCircuitBreakDelay = "InvalidTestSearchHttpClientWithLongCircuitBreakDelay";
29-
private static string _nameFor_InvalidTestSearchHttpClientWithShortCircuitBreakDelay = "InvalidTestSearchHttpClientWithSWhortCircuitBreakDelay";
34+
private static string _nameFor_InvalidTestSearchHttpClientWithShortCircuitBreakDelay = "InvalidTestSearchHttpClientWithShortCircuitBreakDelay";
3035
private static string _nameFor_ValidTestSearchHttpClient = "ValidTestSearchHttpClient";
3136
private static readonly string _longInvalidAddress = "https://api-v2v3search-long.nuget.org";
3237
private static readonly string _shortInvalidAddress = "https://api-v2v3search-short.nuget.org";
3338
private static readonly string _validAddress = "https://api-v2v3search-0.nuget.org";
39+
private static readonly string _shortInvalidAddress_2 = "https://api-v2v3search-short-2.nuget.org";
40+
private static string _nameFor_InvalidTestSearchHttpClientRetryCountExpires = "InvalidTestSearchHttpClientRetryCountExpires";
41+
private static LoggerFor_TestSearchHttpClient _loggerFor_InvalidTestSearchHttpClientRetryCountExpires;
3442

3543
public SearchPolicyFacts()
3644
{
@@ -163,6 +171,28 @@ public async Task TestCircuitBreakerForValidRequests()
163171
Assert.Equal(HttpStatusCode.OK, r.StatusCode);
164172
}
165173

174+
[Fact]
175+
public async Task TestWaitAndRetryForInvalidRequests()
176+
{
177+
var invalidClient = _services.BuildServiceProvider().GetServices<TestSearchHttpClient>().Where(s => s.BaseAddress == new Uri(_shortInvalidAddress_2)).ElementAt(0);
178+
var uri = new Uri($"{invalidClient.BaseAddress}query?q=packageid:Newtonsoft.Json version:12.0.1");
179+
180+
var r = await invalidClient.GetAsync(uri);
181+
182+
var retryInfo = _loggerFor_InvalidTestSearchHttpClientRetryCountExpires.Informations.Where(s => s.StartsWith("Policy retry - it will retry after")).Count();
183+
var onCircuitBreakerfallBackInfo = _loggerFor_InvalidTestSearchHttpClientRetryCountExpires.Informations.Where(s => s.StartsWith("On circuit breaker fallback.")).Count();
184+
var circuitBreakerWarning = _loggerFor_InvalidTestSearchHttpClientRetryCountExpires.Warnings.Where(s => s.StartsWith("SearchCircuitBreaker logging: Breaking the circuit for")).Count();
185+
var onCircuitBreakerReset = _loggerFor_InvalidTestSearchHttpClientRetryCountExpires.Informations.Where(s => s.StartsWith("SearchCircuitBreaker logging: Call ok! Closed the circuit again!")).Count();
186+
var onCircuitBreakerHalfOpen = _loggerFor_InvalidTestSearchHttpClientRetryCountExpires.Informations.Where(s => s.StartsWith("SearchCircuitBreaker logging: Half-open: Next call is a trial!")).Count();
187+
188+
Assert.Equal(_retryCount_2, retryInfo);
189+
Assert.Equal(1, onCircuitBreakerfallBackInfo);
190+
Assert.Equal(0, circuitBreakerWarning);
191+
Assert.Equal(0, onCircuitBreakerReset);
192+
Assert.Equal(0, onCircuitBreakerHalfOpen);
193+
Assert.Equal(HttpStatusCode.ServiceUnavailable, r.StatusCode);
194+
}
195+
166196
private static ILogger<ResilientSearchHttpClient> GetLogger()
167197
{
168198
var mockConfiguration = new Mock<ILogger<ResilientSearchHttpClient>>();
@@ -178,34 +208,44 @@ private static ServiceCollection ConfigureServices()
178208
_loggerFor_InvalidTestSearchHttpClientWithLongCircuitBreakDelay = new LoggerFor_TestSearchHttpClient();
179209
_loggerFor_InvalidTestSearchHttpClientWithShortCircuitBreakDelay = new LoggerFor_TestSearchHttpClient();
180210
_loggerFor_ValidTestSearchHttpClient = new LoggerFor_TestSearchHttpClient();
211+
_loggerFor_InvalidTestSearchHttpClientRetryCountExpires = new LoggerFor_TestSearchHttpClient();
181212

182213
services.AddHttpClient<TestSearchHttpClient>(_nameFor_InvalidTestSearchHttpClientWithLongCircuitBreakDelay, c => c.BaseAddress = new Uri(_longInvalidAddress))
183214
.AddPolicyHandler(SearchClientPolicies.SearchClientFallBackCircuitBreakerPolicy(_loggerFor_InvalidTestSearchHttpClientWithLongCircuitBreakDelay, "InvalidTestSearchHttpClientWithLongCircuitBreakDelay", telemetryServiceResolver))
184-
.AddPolicyHandler(SearchClientPolicies.SearchClientWaitAndRetryForeverPolicy(_loggerFor_InvalidTestSearchHttpClientWithLongCircuitBreakDelay, "InvalidTestSearchHttpClientWithLongCircuitBreakDelay", telemetryServiceResolver))
215+
.AddPolicyHandler(SearchClientPolicies.SearchClientWaitAndRetryPolicy(_retryCount, _waitBetweenRetriesInMilliseconds, _loggerFor_InvalidTestSearchHttpClientWithLongCircuitBreakDelay, "InvalidTestSearchHttpClientWithLongCircuitBreakDelay", telemetryServiceResolver))
185216
.AddPolicyHandler(SearchClientPolicies.SearchClientCircuitBreakerPolicy(
186-
_retryCount,
217+
_circuitBreakerFailAfter,
187218
TimeSpan.FromSeconds(_circuitBreakerLongDelaySeconds),
188219
_loggerFor_InvalidTestSearchHttpClientWithLongCircuitBreakDelay,
189220
"InvalidTestSearchHttpClientWithLongCircuitBreakDelay", telemetryServiceResolver));
190221

191222
services.AddHttpClient<TestSearchHttpClient>(_nameFor_InvalidTestSearchHttpClientWithShortCircuitBreakDelay, c => c.BaseAddress = new Uri(_shortInvalidAddress))
192223
.AddPolicyHandler(SearchClientPolicies.SearchClientFallBackCircuitBreakerPolicy(_loggerFor_InvalidTestSearchHttpClientWithShortCircuitBreakDelay, "InvalidTestSearchHttpClientWithShortCircuitBreakDelay", telemetryServiceResolver))
193-
.AddPolicyHandler(SearchClientPolicies.SearchClientWaitAndRetryForeverPolicy(_loggerFor_InvalidTestSearchHttpClientWithShortCircuitBreakDelay, "InvalidTestSearchHttpClientWithShortCircuitBreakDelay", telemetryServiceResolver))
224+
.AddPolicyHandler(SearchClientPolicies.SearchClientWaitAndRetryPolicy(_retryCount, _waitBetweenRetriesInMilliseconds, _loggerFor_InvalidTestSearchHttpClientWithShortCircuitBreakDelay, "InvalidTestSearchHttpClientWithShortCircuitBreakDelay", telemetryServiceResolver))
194225
.AddPolicyHandler(SearchClientPolicies.SearchClientCircuitBreakerPolicy(
195-
_retryCount,
226+
_circuitBreakerFailAfter,
196227
TimeSpan.FromSeconds(_circuitBreakerShortDelaySeconds),
197228
_loggerFor_InvalidTestSearchHttpClientWithShortCircuitBreakDelay,
198229
"InvalidTestSearchHttpClientWithShortCircuitBreakDelay", telemetryServiceResolver));
199230

200231
services.AddHttpClient<TestSearchHttpClient>(_nameFor_ValidTestSearchHttpClient, c => c.BaseAddress = new Uri(_validAddress))
201-
.AddPolicyHandler(SearchClientPolicies.SearchClientFallBackCircuitBreakerPolicy(_loggerFor_ValidTestSearchHttpClient, "ValidTestSearchHttpClient", telemetryServiceResolver))
202-
.AddPolicyHandler(SearchClientPolicies.SearchClientWaitAndRetryForeverPolicy(_loggerFor_ValidTestSearchHttpClient, "InvalidTestSearchHttpClientWithShortCircuitBreakDelay", telemetryServiceResolver))
232+
.AddPolicyHandler(SearchClientPolicies.SearchClientFallBackCircuitBreakerPolicy(_loggerFor_ValidTestSearchHttpClient, "InvalidTestSearchHttpClientWithShortCircuitBreakDelay", telemetryServiceResolver))
233+
.AddPolicyHandler(SearchClientPolicies.SearchClientWaitAndRetryPolicy(_retryCount, _waitBetweenRetriesInMilliseconds, _loggerFor_ValidTestSearchHttpClient, "InvalidTestSearchHttpClientWithShortCircuitBreakDelay", telemetryServiceResolver))
203234
.AddPolicyHandler(SearchClientPolicies.SearchClientCircuitBreakerPolicy(
204-
_retryCount,
235+
_circuitBreakerFailAfter,
205236
TimeSpan.FromSeconds(_circuitBreakerShortDelaySeconds),
206237
_loggerFor_ValidTestSearchHttpClient,
207238
"InvalidTestSearchHttpClientWithShortCircuitBreakDelay", telemetryServiceResolver));
208239

240+
services.AddHttpClient<TestSearchHttpClient>(_nameFor_InvalidTestSearchHttpClientRetryCountExpires, c => c.BaseAddress = new Uri(_shortInvalidAddress_2))
241+
.AddPolicyHandler(SearchClientPolicies.SearchClientFallBackCircuitBreakerPolicy(_loggerFor_InvalidTestSearchHttpClientRetryCountExpires, "InvalidTestSearchHttpClientRetryCountExpires", telemetryServiceResolver))
242+
.AddPolicyHandler(SearchClientPolicies.SearchClientWaitAndRetryPolicy(_retryCount_2, _waitBetweenRetriesInMilliseconds, _loggerFor_InvalidTestSearchHttpClientRetryCountExpires, "InvalidTestSearchHttpClientRetryCountExpires", telemetryServiceResolver))
243+
.AddPolicyHandler(SearchClientPolicies.SearchClientCircuitBreakerPolicy(
244+
_circuitBreakerFailAfter_2,
245+
TimeSpan.FromSeconds(_circuitBreakerShortDelaySeconds),
246+
_loggerFor_InvalidTestSearchHttpClientRetryCountExpires,
247+
"InvalidTestSearchHttpClientRetryCountExpires", telemetryServiceResolver));
248+
209249
return services;
210250
}
211251

0 commit comments

Comments
 (0)