Skip to content
This repository was archived by the owner on Jul 30, 2024. It is now read-only.

Commit fe2af29

Browse files
authored
Change parsing algorithm from download URL to package id+version (#343)
* Change parsing algorithm from download URL to package id+version * PR comments * bug fix * check failing cases
1 parent 7fe5850 commit fe2af29

14 files changed

Lines changed: 240 additions & 116 deletions

src/Stats.AzureCdnLogs.Common/LogEvents.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ public class LogEvents
2222
public static EventId FailedToGetFtpResponse = new EventId(512, "Failed to get FTP response");
2323
public static EventId FailedToCheckAlreadyProcessedLogFilePackageStatistics = new EventId(513, "Failed to check already processed package statistics for log file");
2424
public static EventId FailedToCheckAlreadyProcessedLogFileToolStatistics = new EventId(514, "Failed to check already processed tool statistics for log file");
25+
public static EventId MultiplePackageIDVersionParseOptions = new EventId(515, "Multiple package id/version parse options");
26+
public static EventId TranslatedPackageIdVersion = new EventId(516, "Translated package id and version");
2527
public static EventId JobRunFailed = new EventId(550, "Job run failed");
2628
public static EventId JobInitFailed = new EventId(551, "Job initialization failed");
2729
}

src/Stats.AzureCdnLogs.Common/PackageDefinition.cs

Lines changed: 0 additions & 85 deletions
This file was deleted.

src/Stats.AzureCdnLogs.Common/Stats.AzureCdnLogs.Common.csproj

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@
9292
<Compile Include="CdnLogCustomFieldParser.cs" />
9393
<Compile Include="CdnLogEntryParser.cs" />
9494
<Compile Include="NuGetCustomHeaders.cs" />
95-
<Compile Include="PackageDefinition.cs" />
9695
<Compile Include="PackageStatistics.cs" />
9796
<Compile Include="Properties\AssemblyInfo.cs" />
9897
<Compile Include="ToolStatistics.cs" />

src/Stats.ImportAzureCdnStatistics/Job.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ public override async Task Run()
8686
foreach (var leasedLogFile in leasedLogFiles)
8787
{
8888
var packageTranslator = new PackageTranslator("packagetranslations.json");
89-
var packageStatisticsParser = new PackageStatisticsParser(packageTranslator);
89+
var packageStatisticsParser = new PackageStatisticsParser(packageTranslator, LoggerFactory);
9090
await logProcessor.ProcessLogFileAsync(leasedLogFile, packageStatisticsParser, _aggregatesOnly);
9191

9292
if (_aggregatesOnly)
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// Copyright (c) .NET Foundation. All rights reserved.
2+
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
3+
4+
using System;
5+
using System.Collections.Generic;
6+
using System.Linq;
7+
using System.Web;
8+
using NuGet.Versioning;
9+
10+
namespace Stats.ImportAzureCdnStatistics
11+
{
12+
public class PackageDefinition
13+
{
14+
private const string _nupkgExtension = ".nupkg";
15+
private const string _dotSeparator = ".";
16+
17+
public string PackageId { get; set; }
18+
public string PackageVersion { get; set; }
19+
20+
public PackageDefinition()
21+
{
22+
}
23+
24+
private PackageDefinition(string packageId, string packageVersion)
25+
{
26+
PackageId = packageId.Trim();
27+
PackageVersion = packageVersion.Trim();
28+
}
29+
30+
public static IList<PackageDefinition> FromRequestUrl(string requestUrl)
31+
{
32+
if (string.IsNullOrWhiteSpace(requestUrl) || !requestUrl.EndsWith(_nupkgExtension, StringComparison.InvariantCultureIgnoreCase))
33+
{
34+
return null;
35+
}
36+
37+
List<PackageDefinition> resolutionOptions = new List<PackageDefinition>();
38+
39+
requestUrl = HttpUtility.UrlDecode(requestUrl);
40+
41+
var urlSegments = requestUrl.Split(new[] { '/' }, StringSplitOptions.RemoveEmptyEntries);
42+
43+
var fileName = urlSegments.Last();
44+
45+
fileName = fileName.Remove(fileName.Length - _nupkgExtension.Length, _nupkgExtension.Length);
46+
47+
// Special handling for flat container
48+
if (urlSegments.Length > 3)
49+
{
50+
var packageIdContainer = urlSegments[urlSegments.Length - 3];
51+
var packageVersionContainer = urlSegments[urlSegments.Length - 2];
52+
53+
if (string.Equals(fileName, $"{packageIdContainer}.{packageVersionContainer}", StringComparison.InvariantCultureIgnoreCase))
54+
{
55+
resolutionOptions.Add(new PackageDefinition(packageIdContainer, packageVersionContainer));
56+
}
57+
}
58+
59+
if (!resolutionOptions.Any())
60+
{
61+
var nextDotIndex = fileName.IndexOf('.');
62+
63+
while (nextDotIndex != -1)
64+
{
65+
string packagePart = fileName.Substring(0, nextDotIndex);
66+
string versionPart = fileName.Substring(nextDotIndex + 1);
67+
68+
if (NuGetVersion.TryParse(versionPart, out var parsedVersion))
69+
{
70+
var normalizedVersion = parsedVersion.ToNormalizedString();
71+
72+
if (string.Equals(normalizedVersion, versionPart, StringComparison.InvariantCultureIgnoreCase))
73+
{
74+
resolutionOptions.Add(new PackageDefinition(packagePart, versionPart));
75+
}
76+
}
77+
78+
nextDotIndex = fileName.IndexOf('.', nextDotIndex + 1);
79+
}
80+
}
81+
82+
return resolutionOptions;
83+
}
84+
85+
public override string ToString()
86+
{
87+
return $"[{PackageId}, {PackageVersion}]";
88+
}
89+
}
90+
}

src/Stats.ImportAzureCdnStatistics/PackageStatisticsParser.cs

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
// Copyright (c) .NET Foundation. All rights reserved.
22
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
33

4-
using NuGet;
4+
using System;
5+
using System.Linq;
6+
using Microsoft.Extensions.Logging;
57
using NuGet.Versioning;
68
using Stats.AzureCdnLogs.Common;
79

@@ -10,25 +12,50 @@ namespace Stats.ImportAzureCdnStatistics
1012
public class PackageStatisticsParser
1113
: StatisticsParser, IPackageStatisticsParser
1214
{
15+
private readonly ILogger<PackageStatisticsParser> _logger;
1316
private readonly PackageTranslator _packageTranslator;
1417

15-
public PackageStatisticsParser(PackageTranslator packageTranslator)
18+
public PackageStatisticsParser(PackageTranslator packageTranslator, ILoggerFactory loggerFactory)
1619
{
20+
if (loggerFactory == null)
21+
{
22+
throw new ArgumentNullException(nameof(loggerFactory));
23+
}
24+
1725
_packageTranslator = packageTranslator;
26+
_logger = loggerFactory.CreateLogger<PackageStatisticsParser>();
1827
}
1928

2029
public PackageStatistics FromCdnLogEntry(CdnLogEntry cdnLogEntry)
2130
{
22-
var packageDefinition = PackageDefinition.FromRequestUrl(cdnLogEntry.RequestUrl);
31+
var packageDefinitions = PackageDefinition.FromRequestUrl(cdnLogEntry.RequestUrl);
2332

24-
if (packageDefinition == null)
33+
if (packageDefinitions == null || !packageDefinitions.Any())
2534
{
2635
return null;
2736
}
2837

38+
if (packageDefinitions.Count > 1)
39+
{
40+
_logger.LogWarning(LogEvents.MultiplePackageIDVersionParseOptions,
41+
"Found multiple parse options for URL {RequestUrl}: {PackageDefinitions}",
42+
cdnLogEntry.RequestUrl,
43+
string.Join(", ", packageDefinitions));
44+
}
45+
46+
var packageDefinition = packageDefinitions.First();
47+
2948
if (_packageTranslator != null)
3049
{
31-
packageDefinition = _packageTranslator.TranslatePackageDefinition(packageDefinition);
50+
bool translateOccured = _packageTranslator.TryTranslatePackageDefinition(packageDefinition);
51+
52+
if (translateOccured)
53+
{
54+
_logger.LogInformation(LogEvents.TranslatedPackageIdVersion,
55+
"Translated package. Url: {RequestUrl}, New definition: {PackageDefinition}",
56+
cdnLogEntry.RequestUrl,
57+
packageDefinition);
58+
}
3259
}
3360

3461
var statistic = new PackageStatistics();
@@ -42,7 +69,7 @@ public PackageStatistics FromCdnLogEntry(CdnLogEntry cdnLogEntry)
4269
statistic.UserAgent = GetUserAgentValue(cdnLogEntry);
4370
statistic.EdgeServerIpAddress = cdnLogEntry.EdgeServerIpAddress;
4471

45-
// ignore blacklisted user agents
72+
// Ignore blacklisted user agents
4673
if (!IsBlackListed(statistic.UserAgent))
4774
{
4875
return statistic;

src/Stats.ImportAzureCdnStatistics/PackageTranslator.cs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
using System.IO;
77
using System.Text.RegularExpressions;
88
using Newtonsoft.Json.Linq;
9-
using Stats.AzureCdnLogs.Common;
109

1110
namespace Stats.ImportAzureCdnStatistics
1211
{
@@ -45,8 +44,10 @@ public PackageTranslator(string packageTranslationsJsonPath)
4544
}
4645
}
4746

48-
public PackageDefinition TranslatePackageDefinition(PackageDefinition packageDefinition)
47+
public bool TryTranslatePackageDefinition(PackageDefinition packageDefinition)
4948
{
49+
bool translateOccurred = false;
50+
5051
if (packageDefinition != null
5152
&& !string.IsNullOrEmpty(packageDefinition.PackageId)
5253
&& !string.IsNullOrEmpty(packageDefinition.PackageVersion)
@@ -63,12 +64,13 @@ public PackageDefinition TranslatePackageDefinition(PackageDefinition packageDef
6364
{
6465
packageDefinition.PackageId = potentialTranslation.CorrectedPackageId;
6566
packageDefinition.PackageVersion = correctedPackageVersion;
67+
translateOccurred = true;
6668
break;
6769
}
6870
}
6971
}
7072

71-
return packageDefinition;
73+
return translateOccurred;
7274
}
7375
}
7476
}

src/Stats.ImportAzureCdnStatistics/Stats.ImportAzureCdnStatistics.csproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@
124124
<Reference Include="System.Spatial, Version=5.7.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
125125
<HintPath>..\..\packages\System.Spatial.5.7.0\lib\net40\System.Spatial.dll</HintPath>
126126
</Reference>
127+
<Reference Include="System.Web" />
127128
<Reference Include="System.Xml.Linq" />
128129
<Reference Include="System.Data.DataSetExtensions" />
129130
<Reference Include="Microsoft.CSharp" />
@@ -138,6 +139,7 @@
138139
<Compile Include="ApplicationInsightsHelper.cs" />
139140
<Compile Include="IPackageStatisticsParser.cs" />
140141
<Compile Include="IStatisticsBlobContainerUtility.cs" />
142+
<Compile Include="PackageDefinition.cs" />
141143
<Compile Include="StatisticsBlobContainerUtility.cs" />
142144
<Compile Include="Dimensions\IpAddressFact.cs" />
143145
<Compile Include="IStatisticsWarehouse.cs" />

tests/Tests.Stats.ImportAzureCdnStatistics/LogFileProcessorFacts.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ public class LogFileProcessorFacts
2424
new PackageTranslator("packageTranslations.json");
2525

2626
private static readonly IPackageStatisticsParser _packageStatisticsParser =
27-
new PackageStatisticsParser(_packageTranslator);
27+
new PackageStatisticsParser(_packageTranslator, new LoggerFactory());
2828

2929
public class WhenOnlyPackageStatisticsInLogFile
3030
{

0 commit comments

Comments
 (0)