Skip to content
This repository was archived by the owner on Jul 30, 2024. It is now read-only.

Commit b3aa1e7

Browse files
author
Scott Bommarito
authored
Allow configuration of degree of parallelism of report generation for Stats.CreateAzureCdnWarehouseReports (#286)
1 parent 25c3ea3 commit b3aa1e7

4 files changed

Lines changed: 39 additions & 21 deletions

File tree

src/NuGet.Jobs.Common/Configuration/JobArgumentNames.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ public static class JobArgumentNames
4545

4646
//Arguments specific to CreateWarehouseReports job
4747
public const string WarehouseReportName = "WarehouseReportName";
48+
public const string PerPackageReportDegreeOfParallelism = "PerPackageReportDegreeOfParallelism";
4849

4950
// Arguments specific to Search* jobs
5051
public const string DataStorageAccount = "DataStorageAccount";

src/Stats.CreateAzureCdnWarehouseReports/Job.cs

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ namespace Stats.CreateAzureCdnWarehouseReports
1717
public class Job
1818
: JobBase
1919
{
20-
private const int DefaultSqlCommandTimeout = 1800; // 30 minute SQL command timeout by default
20+
private const int DefaultPerPackageReportDegreeOfParallelism = 8; // Generate
21+
private const int DefaultSqlCommandTimeoutSeconds = 1800; // 30 minute SQL command timeout by default
2122
private const string _recentPopularityDetailByPackageReportBaseName = "recentpopularitydetail_";
2223
private CloudStorageAccount _cloudStorageAccount;
2324
private CloudStorageAccount _dataStorageAccount;
@@ -26,7 +27,8 @@ public class Job
2627
private SqlConnectionStringBuilder _galleryDatabase;
2728
private string _reportName;
2829
private string[] _dataContainerNames;
29-
private int _sqlCommandTimeout = DefaultSqlCommandTimeout;
30+
private int _sqlCommandTimeoutSeconds = DefaultSqlCommandTimeoutSeconds;
31+
private int _perPackageReportDegreeOfParallelism = DefaultPerPackageReportDegreeOfParallelism;
3032

3133
private static readonly IDictionary<string, string> _storedProcedures = new Dictionary<string, string>
3234
{
@@ -50,7 +52,8 @@ public override void Init(IDictionary<string, string> jobArgsDictionary)
5052
var statisticsDatabaseConnectionString = JobConfigurationManager.GetArgument(jobArgsDictionary, JobArgumentNames.StatisticsDatabase);
5153
var galleryDatabaseConnectionString = JobConfigurationManager.GetArgument(jobArgsDictionary, JobArgumentNames.SourceDatabase);
5254
var dataStorageAccountConnectionString = JobConfigurationManager.GetArgument(jobArgsDictionary, JobArgumentNames.DataStorageAccount);
53-
_sqlCommandTimeout = JobConfigurationManager.TryGetIntArgument(jobArgsDictionary, JobArgumentNames.CommandTimeOut) ?? DefaultSqlCommandTimeout;
55+
_sqlCommandTimeoutSeconds = JobConfigurationManager.TryGetIntArgument(jobArgsDictionary, JobArgumentNames.CommandTimeOut) ?? DefaultSqlCommandTimeoutSeconds;
56+
_perPackageReportDegreeOfParallelism = JobConfigurationManager.TryGetIntArgument(jobArgsDictionary, JobArgumentNames.PerPackageReportDegreeOfParallelism) ?? DefaultPerPackageReportDegreeOfParallelism;
5457

5558
_cloudStorageAccount = ValidateAzureCloudStorageAccount(cloudStorageAccountConnectionString, JobArgumentNames.AzureCdnCloudStorageAccount);
5659
_statisticsContainerName = ValidateAzureContainerName(JobConfigurationManager.GetArgument(jobArgsDictionary, JobArgumentNames.AzureCdnCloudStorageContainerName), JobArgumentNames.AzureCdnCloudStorageContainerName);
@@ -84,12 +87,12 @@ public override async Task Run()
8487
// generate all reports
8588
var reportGenerators = new Dictionary<ReportBuilder, ReportDataCollector>
8689
{
87-
{ new ReportBuilder(reportBuilderLogger, ReportNames.NuGetClientVersion), new ReportDataCollector(reportCollectorLogger, _storedProcedures[ReportNames.NuGetClientVersion], _statisticsDatabase, _sqlCommandTimeout) },
88-
{ new ReportBuilder(reportBuilderLogger, ReportNames.Last6Weeks), new ReportDataCollector(reportCollectorLogger, _storedProcedures[ReportNames.Last6Weeks], _statisticsDatabase, _sqlCommandTimeout) },
89-
{ new ReportBuilder(reportBuilderLogger, ReportNames.RecentCommunityPopularity), new ReportDataCollector(reportCollectorLogger, _storedProcedures[ReportNames.RecentCommunityPopularity], _statisticsDatabase, _sqlCommandTimeout) },
90-
{ new ReportBuilder(reportBuilderLogger, ReportNames.RecentCommunityPopularityDetail), new ReportDataCollector(reportCollectorLogger, _storedProcedures[ReportNames.RecentCommunityPopularityDetail], _statisticsDatabase, _sqlCommandTimeout) },
91-
{ new ReportBuilder(reportBuilderLogger, ReportNames.RecentPopularity), new ReportDataCollector(reportCollectorLogger, _storedProcedures[ReportNames.RecentPopularity], _statisticsDatabase, _sqlCommandTimeout) },
92-
{ new ReportBuilder(reportBuilderLogger, ReportNames.RecentPopularityDetail), new ReportDataCollector(reportCollectorLogger, _storedProcedures[ReportNames.RecentPopularityDetail], _statisticsDatabase, _sqlCommandTimeout) }
90+
{ new ReportBuilder(reportBuilderLogger, ReportNames.NuGetClientVersion), new ReportDataCollector(reportCollectorLogger, _storedProcedures[ReportNames.NuGetClientVersion], _statisticsDatabase, _sqlCommandTimeoutSeconds) },
91+
{ new ReportBuilder(reportBuilderLogger, ReportNames.Last6Weeks), new ReportDataCollector(reportCollectorLogger, _storedProcedures[ReportNames.Last6Weeks], _statisticsDatabase, _sqlCommandTimeoutSeconds) },
92+
{ new ReportBuilder(reportBuilderLogger, ReportNames.RecentCommunityPopularity), new ReportDataCollector(reportCollectorLogger, _storedProcedures[ReportNames.RecentCommunityPopularity], _statisticsDatabase, _sqlCommandTimeoutSeconds) },
93+
{ new ReportBuilder(reportBuilderLogger, ReportNames.RecentCommunityPopularityDetail), new ReportDataCollector(reportCollectorLogger, _storedProcedures[ReportNames.RecentCommunityPopularityDetail], _statisticsDatabase, _sqlCommandTimeoutSeconds) },
94+
{ new ReportBuilder(reportBuilderLogger, ReportNames.RecentPopularity), new ReportDataCollector(reportCollectorLogger, _storedProcedures[ReportNames.RecentPopularity], _statisticsDatabase, _sqlCommandTimeoutSeconds) },
95+
{ new ReportBuilder(reportBuilderLogger, ReportNames.RecentPopularityDetail), new ReportDataCollector(reportCollectorLogger, _storedProcedures[ReportNames.RecentPopularityDetail], _statisticsDatabase, _sqlCommandTimeoutSeconds) }
9396
};
9497

9598
foreach (var reportGenerator in reportGenerators)
@@ -105,7 +108,7 @@ public override async Task Run()
105108
{
106109
// generate only the specific report
107110
var reportBuilder = new ReportBuilder(reportBuilderLogger, _reportName);
108-
var reportDataCollector = new ReportDataCollector(reportCollectorLogger, _storedProcedures[_reportName], _statisticsDatabase, _sqlCommandTimeout);
111+
var reportDataCollector = new ReportDataCollector(reportCollectorLogger, _storedProcedures[_reportName], _statisticsDatabase, _sqlCommandTimeoutSeconds);
109112

110113
await ProcessReport(LoggerFactory, destinationContainer, reportBuilder, reportDataCollector, reportGenerationTime);
111114
}
@@ -165,15 +168,15 @@ private static async Task ProcessReport(ILoggerFactory loggerFactory, CloudBlobC
165168

166169
private async Task RebuildPackageReports(CloudBlobContainer destinationContainer, DateTime reportGenerationTime)
167170
{
168-
var dirtyPackageIds = await ReportDataCollector.GetDirtyPackageIds(LoggerFactory.CreateLogger<ReportDataCollector>(), _statisticsDatabase, reportGenerationTime, _sqlCommandTimeout);
171+
var dirtyPackageIds = await ReportDataCollector.GetDirtyPackageIds(LoggerFactory.CreateLogger<ReportDataCollector>(), _statisticsDatabase, reportGenerationTime, _sqlCommandTimeoutSeconds);
169172

170173
if (!dirtyPackageIds.Any())
171174
return;
172175

173176
// first process the top 100 packages
174177
var top100 = dirtyPackageIds.Take(100);
175-
var reportDataCollector = new ReportDataCollector(LoggerFactory.CreateLogger<ReportDataCollector>(), _storedProceduresPerPackageId[ReportNames.RecentPopularityDetailByPackageId], _statisticsDatabase, _sqlCommandTimeout);
176-
var top100Task = Parallel.ForEach(top100, new ParallelOptions { MaxDegreeOfParallelism = 4 }, dirtyPackageId =>
178+
var reportDataCollector = new ReportDataCollector(LoggerFactory.CreateLogger<ReportDataCollector>(), _storedProceduresPerPackageId[ReportNames.RecentPopularityDetailByPackageId], _statisticsDatabase, _sqlCommandTimeoutSeconds);
179+
var top100Task = Parallel.ForEach(top100, new ParallelOptions { MaxDegreeOfParallelism = _perPackageReportDegreeOfParallelism }, dirtyPackageId =>
177180
{
178181
var packageId = dirtyPackageId.PackageId.ToLowerInvariant();
179182
var reportBuilder = new RecentPopularityDetailByPackageReportBuilder(LoggerFactory.CreateLogger<RecentPopularityDetailByPackageReportBuilder>(), ReportNames.RecentPopularityDetailByPackageId, "recentpopularity/" + _recentPopularityDetailByPackageReportBaseName + packageId);
@@ -187,7 +190,7 @@ private async Task RebuildPackageReports(CloudBlobContainer destinationContainer
187190
{
188191
var excludingTop100 = dirtyPackageIds.Skip(100);
189192

190-
top100Task = Parallel.ForEach(excludingTop100, new ParallelOptions { MaxDegreeOfParallelism = 8 },
193+
top100Task = Parallel.ForEach(excludingTop100, new ParallelOptions { MaxDegreeOfParallelism = _perPackageReportDegreeOfParallelism },
191194
dirtyPackageId =>
192195
{
193196
// generate all reports
@@ -203,7 +206,7 @@ private async Task RebuildPackageReports(CloudBlobContainer destinationContainer
203206
LoggerFactory.CreateLogger<ReportDataCollector>(),
204207
_storedProceduresPerPackageId[ReportNames.RecentPopularityDetailByPackageId],
205208
_statisticsDatabase,
206-
_sqlCommandTimeout)
209+
_sqlCommandTimeoutSeconds)
207210
}
208211
};
209212

@@ -219,15 +222,15 @@ private async Task RebuildPackageReports(CloudBlobContainer destinationContainer
219222
if (top100Task.IsCompleted)
220223
{
221224
var runToCursor = dirtyPackageIds.First().RunToCuror;
222-
await ReportDataCollector.UpdateDirtyPackageIdCursor(_statisticsDatabase, runToCursor, _sqlCommandTimeout);
225+
await ReportDataCollector.UpdateDirtyPackageIdCursor(_statisticsDatabase, runToCursor, _sqlCommandTimeoutSeconds);
223226
}
224227
}
225228
}
226229

227230
private async Task CleanInactiveRecentPopularityDetailByPackageReports(CloudBlobContainer destinationContainer, DateTime reportGenerationTime)
228231
{
229232
Logger.LogDebug("Getting list of inactive packages.");
230-
var packageIds = await ReportDataCollector.ListInactivePackageIdReports(_statisticsDatabase, reportGenerationTime, _sqlCommandTimeout);
233+
var packageIds = await ReportDataCollector.ListInactivePackageIdReports(_statisticsDatabase, reportGenerationTime, _sqlCommandTimeoutSeconds);
231234
Logger.LogInformation("Found {InactivePackageCount} inactive packages.", packageIds.Count);
232235

233236
// Collect the list of reports

src/Stats.CreateAzureCdnWarehouseReports/ReportDataCollector.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ namespace Stats.CreateAzureCdnWarehouseReports
1313
{
1414
internal class ReportDataCollector
1515
{
16-
private int _commandTimeout;
16+
private int _commandTimeoutSeconds;
1717
private readonly string _procedureName;
1818
private readonly SqlConnectionStringBuilder _sourceDatabase;
1919

@@ -24,7 +24,7 @@ public ReportDataCollector(ILogger<ReportDataCollector> logger, string procedure
2424
_logger = logger;
2525
_procedureName = procedureName;
2626
_sourceDatabase = sourceDatabase;
27-
_commandTimeout = timeout;
27+
_commandTimeoutSeconds = timeout;
2828
}
2929

3030
public async Task<DataTable> CollectAsync(DateTime reportGenerationTime, params Tuple<string, int, string>[] parameters)
@@ -114,7 +114,7 @@ private async Task<DataTable> ExecuteSql(DateTime reportGenerationTime, params T
114114
{
115115
var command = new SqlCommand(_procedureName, connection);
116116
command.CommandType = CommandType.StoredProcedure;
117-
command.CommandTimeout = _commandTimeout;
117+
command.CommandTimeout = _commandTimeoutSeconds;
118118

119119
command.Parameters.Add("ReportGenerationTime", SqlDbType.DateTime).Value = reportGenerationTime;
120120

src/Stats.CreateAzureCdnWarehouseReports/Scripts/Stats.CreateAzureCdnWarehouseReports.cmd

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,21 @@ cd bin
77

88
title #{Jobs.stats.createazurecdnwarehousereports.Title}
99

10-
start /w stats.createazurecdnwarehousereports.exe -VaultName "#{Deployment.Azure.KeyVault.VaultName}" -ClientId "#{Deployment.Azure.KeyVault.ClientId}" -CertificateThumbprint "#{Deployment.Azure.KeyVault.CertificateThumbprint}" -AzureCdnCloudStorageAccount "#{Jobs.stats.createazurecdnwarehousereports.AzureCdn.CloudStorageAccount}" -AzureCdnCloudStorageContainerName "#{Jobs.stats.createazurecdnwarehousereports.AzureCdn.CloudStorageContainerName}" -StatisticsDatabase "#{Jobs.stats.createazurecdnwarehousereports.StatisticsDatabase}" -SourceDatabase "#{Jobs.stats.createazurecdnwarehousereports.SourceDatabase}" -DataStorageAccount "#{Jobs.stats.createazurecdnwarehousereports.DataStorageAccount}" -InstrumentationKey "#{Jobs.stats.createazurecdnwarehousereports.InstrumentationKey}" -DataContainerName "#{Jobs.stats.createazurecdnwarehousereports.DataContainerName}" -CommandTimeOut "#{Jobs.stats.createazurecdnwarehousereports.CommandTimeOut}" -verbose true -Interval #{Jobs.stats.createazurecdnwarehousereports.Interval}
10+
start /w stats.createazurecdnwarehousereports.exe ^
11+
-VaultName "#{Deployment.Azure.KeyVault.VaultName}" ^
12+
-ClientId "#{Deployment.Azure.KeyVault.ClientId}" ^
13+
-CertificateThumbprint "#{Deployment.Azure.KeyVault.CertificateThumbprint}" ^
14+
-AzureCdnCloudStorageAccount "#{Jobs.stats.createazurecdnwarehousereports.AzureCdn.CloudStorageAccount}" ^
15+
-AzureCdnCloudStorageContainerName "#{Jobs.stats.createazurecdnwarehousereports.AzureCdn.CloudStorageContainerName}" ^
16+
-StatisticsDatabase "#{Jobs.stats.createazurecdnwarehousereports.StatisticsDatabase}" ^
17+
-SourceDatabase "#{Jobs.stats.createazurecdnwarehousereports.SourceDatabase}" ^
18+
-DataStorageAccount "#{Jobs.stats.createazurecdnwarehousereports.DataStorageAccount}" ^
19+
-InstrumentationKey "#{Jobs.stats.createazurecdnwarehousereports.InstrumentationKey}" ^
20+
-DataContainerName "#{Jobs.stats.createazurecdnwarehousereports.DataContainerName}" ^
21+
-CommandTimeOut "#{Jobs.stats.createazurecdnwarehousereports.CommandTimeOut}" ^
22+
-PerPackageReportDegreeOfParallelism "#{Jobs.stats.createazurecdnwarehousereports.PerPackageReportDegreeOfParallelism}" ^
23+
-verbose true ^
24+
-Interval #{Jobs.stats.createazurecdnwarehousereports.Interval}
1125

1226
echo "Finished #{Jobs.stats.createazurecdnwarehousereports.Title}"
1327

0 commit comments

Comments
 (0)