Skip to content
This repository was archived by the owner on Jan 12, 2024. It is now read-only.

Commit 4b5a8e3

Browse files
authored
Merge pull request #151 from thorgeir/remove-duplicates
Remove duplicate files instead of throwing an exception.
2 parents 76e8482 + 9557076 commit 4b5a8e3

8 files changed

Lines changed: 174 additions & 32 deletions

File tree

src/Build/NuGet.Build.Packaging.Tasks/CreatePackage.cs

Lines changed: 77 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
using NuGet.Packaging;
1111
using System.Collections.Generic;
1212
using static NuGet.Build.Packaging.Properties.Strings;
13+
using System.Security.Cryptography;
1314

1415
namespace NuGet.Build.Packaging.Tasks
1516
{
@@ -40,7 +41,7 @@ public override bool Execute()
4041

4142
OutputPackage = new TaskItem(TargetPath);
4243
Manifest.CopyMetadataTo(OutputPackage);
43-
44+
4445
return !Log.HasLoggedErrors;
4546
}
4647
catch (Exception ex)
@@ -106,7 +107,7 @@ public Manifest CreateManifest()
106107
void AddDependencies(Manifest manifest)
107108
{
108109
var dependencies = from item in Contents
109-
where item.GetMetadata(MetadataName.Kind) == PackageItemKind.Dependency &&
110+
where item.GetMetadata(MetadataName.Kind) == PackageItemKind.Dependency &&
110111
!"all".Equals(item.GetMetadata(MetadataName.PrivateAssets), StringComparison.OrdinalIgnoreCase)
111112
select new Dependency
112113
{
@@ -145,18 +146,75 @@ select item.GetNuGetTargetFramework()))
145146

146147
void AddFiles(Manifest manifest)
147148
{
148-
var contents = Contents.Where(item =>
149-
!string.IsNullOrEmpty(item.GetMetadata(MetadataName.PackagePath)));
150-
151-
var duplicates = contents.GroupBy(item => item.GetMetadata(MetadataName.PackagePath))
152-
.Where(x => x.Count() > 1)
153-
.Select(x => x.Key);
154-
155-
foreach (var duplicate in duplicates)
149+
var contents = new List<ITaskItem>();
150+
151+
var groupedByPackagePath = Contents
152+
.Where(item => !string.IsNullOrEmpty(item.GetMetadata(MetadataName.PackagePath)))
153+
.GroupBy(item => item.GetMetadata(MetadataName.PackagePath))
154+
// Iterate only once for this grouping.
155+
.ToDictionary(item => item.Key, item => item.ToArray());
156+
157+
// Add the ones we already determined as unique by package path.
158+
contents.AddRange(groupedByPackagePath
159+
.Where(group => group.Value.Length == 1)
160+
.Select(group => group.Value.First()));
161+
162+
var groupedByLastWriteAndLength = groupedByPackagePath
163+
.Where(group => group.Value.Length > 1)
164+
.SelectMany(group => group.Value)
165+
// Tuple provides structural comparison and hashing already, so leverage that.
166+
.GroupBy(item => Tuple.Create(
167+
item.GetMetadata(MetadataName.PackagePath),
168+
item.GetMetadata("Filename"),
169+
item.GetMetadata("Extension"),
170+
File.GetLastWriteTime(item.GetMetadata("FullPath")),
171+
new FileInfo(item.GetMetadata("FullPath")).Length))
172+
.ToDictionary(item => item.Key, item => item.ToArray());
173+
174+
// Add the ones we already determined to be duplicates that can safely be
175+
// unified by package path, file name, last write time and file length.
176+
contents.AddRange(groupedByLastWriteAndLength
177+
.Where(group => group.Value.Length > 1)
178+
.Select(group => group.Value.First()));
179+
180+
var md5 = new Lazy<HashAlgorithm>(() => MD5.Create());
181+
string hash(ITaskItem item)
156182
{
157-
Log.LogErrorCode(nameof(ErrorCode.NG0012), ErrorCode.NG0012(duplicate));
183+
using (var file = File.OpenRead(item.GetMetadata("FullPath")))
184+
{
185+
return string.Concat(md5.Value.ComputeHash(file).Select(x => x.ToString("x2")));
186+
}
158187
}
159188

189+
// Last remaining attempt at de-duplication is costly, but by now, we should
190+
// have successfully removed all obvious cases.
191+
// This deals with case where the files are modified at different times
192+
// (maybe a generated file?) but their actual contents are the same.
193+
var groupedByContentHash = groupedByLastWriteAndLength
194+
.Where(group => group.Value.Length == 1)
195+
.SelectMany(group => group.Value)
196+
.GroupBy(item => Tuple.Create(
197+
item.GetMetadata(MetadataName.PackagePath),
198+
hash(item)))
199+
.ToDictionary(item => item.Key, item => item.ToArray());
200+
201+
// Add the ones we determined to be duplicates that can safely be
202+
// unified by package path and MD5 hash
203+
contents.AddRange(groupedByContentHash
204+
.Where(group => group.Value.Length > 1)
205+
.Select(group => group.Value.First()));
206+
207+
// At this point, we're 100% certain these are duplicate package path
208+
// files that have distinct sources and would result in one overwriting
209+
// the other or an invalid package.
210+
var duplicates = string.Join(Environment.NewLine, groupedByContentHash
211+
.Where(group => group.Value.Length == 1)
212+
.SelectMany(group => group.Value)
213+
.Select(item => $"'{item.GetMetadata("FullPath")}' > '{item.GetMetadata(MetadataName.PackagePath)}'"));
214+
215+
if (duplicates.Length > 0)
216+
Log.LogErrorCode(nameof(ErrorCode.NG0012), ErrorCode.NG0012(duplicates));
217+
160218
// All files need to be added so they are included in the nupkg
161219
manifest.Files.AddRange(contents
162220
.Select(item => new ManifestFile
@@ -180,12 +238,12 @@ void AddFiles(Manifest manifest)
180238
void AddFrameworkAssemblies(Manifest manifest)
181239
{
182240
var frameworkReferences = (from item in Contents
183-
where item.GetMetadata(MetadataName.Kind) == PackageItemKind.FrameworkReference
184-
select new FrameworkAssemblyReference
185-
(
186-
item.ItemSpec,
187-
new[] { NuGetFramework.Parse(item.GetTargetFrameworkMoniker().FullName) }
188-
)).Distinct(FrameworkAssemblyReferenceComparer.Default);
241+
where item.GetMetadata(MetadataName.Kind) == PackageItemKind.FrameworkReference
242+
select new FrameworkAssemblyReference
243+
(
244+
item.ItemSpec,
245+
new[] { NuGetFramework.Parse(item.GetTargetFrameworkMoniker().FullName) }
246+
)).Distinct(FrameworkAssemblyReferenceComparer.Default);
189247

190248
manifest.Metadata.FrameworkReferences = frameworkReferences;
191249
}
@@ -199,9 +257,9 @@ void BuildPackage(Stream output)
199257
// We don't use PopulateFiles because that performs search expansion, base path
200258
// extraction and the like, which messes with our determined files to include.
201259
// TBD: do we support wilcard-based include/exclude?
202-
builder.Files.AddRange(manifest.Files.Select(file =>
260+
builder.Files.AddRange(manifest.Files.Select(file =>
203261
new PhysicalPackageFile { SourcePath = file.Source, TargetPath = file.Target }));
204-
262+
205263
builder.Save(output);
206264

207265
if (!string.IsNullOrEmpty(NuspecFile))

src/Build/NuGet.Build.Packaging.Tasks/Properties/Resources.Designer.cs

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/Build/NuGet.Build.Packaging.Tasks/Properties/Resources.resx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@
124124
<value>Project references to include in package must be nugetized.</value>
125125
</data>
126126
<data name="ErrorCode_NG0012" xml:space="preserve">
127-
<value>Duplicate package source files detected for target package file path '{0}'.</value>
127+
<value>Duplicate package source files with distinct content detected. Duplicates are not allowed in the package. Please remove the conflict between these files:
128+
{0}</value>
128129
</data>
129130
<data name="ErrorCode_NG0013" xml:space="preserve">
130131
<value>Content files cannot specify the reserved 'contentFiles' relative directory. Please use the 'CodeLanguage' and 'TargetFramework' item metadata for '{0}' to control its relative path within 'contentFiles'.</value>

src/Build/NuGet.Build.Packaging.Tests/Scenarios/given_a_library_with_non_nugetized_reference/b/project.lock.json

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"version": 2,
2+
"version": 3,
33
"targets": {
44
".NETFramework,Version=v4.6": {
55
"Microsoft.CodeAnalysis/1.3.2": {
@@ -3662,10 +3662,18 @@
36623662
"projectName": "b",
36633663
"projectPath": "C:\\Code\\Xamarin\\NuGet.Build.Packaging\\src\\Build\\NuGet.Build.Packaging.Tests\\Scenarios\\given_a_library_with_non_nugetized_reference\\b\\b.csproj",
36643664
"projectJsonPath": "C:\\Code\\Xamarin\\NuGet.Build.Packaging\\src\\Build\\NuGet.Build.Packaging.Tests\\Scenarios\\given_a_library_with_non_nugetized_reference\\b\\project.json",
3665+
"packagesPath": "C:\\Users\\kzu\\.nuget\\packages\\",
3666+
"outputPath": "C:\\Code\\Xamarin\\NuGet.Build.Packaging\\src\\Build\\NuGet.Build.Packaging.Tests\\Scenarios\\given_a_library_with_non_nugetized_reference\\b\\obj\\",
36653667
"projectStyle": "ProjectJson",
36663668
"validateRuntimeAssets": true,
3667-
"files": {
3668-
"lib/net46/b.dll": "C:\\Code\\Xamarin\\NuGet.Build.Packaging\\src\\Build\\NuGet.Build.Packaging.Tests\\Scenarios\\given_a_library_with_non_nugetized_reference\\b\\bin\\placeholder\\net46\\b.dll"
3669+
"configFilePaths": [
3670+
"C:\\Code\\Xamarin\\NuGet.Build.Packaging\\src\\NuGet.Config",
3671+
"C:\\Users\\kzu\\AppData\\Roaming\\NuGet\\NuGet.Config",
3672+
"C:\\Program Files (x86)\\NuGet\\Config\\Microsoft.VisualStudio.Offline.config"
3673+
],
3674+
"sources": {
3675+
"https://api.nuget.org/v3/index.json": {},
3676+
"https://dotnet.myget.org/F/nuget-volatile/api/v3/index.json": {}
36693677
},
36703678
"frameworks": {
36713679
"net46": {

src/Build/NuGet.Build.Packaging.Tests/Scenarios/given_a_packaging_project_with_netstandard/b/b.csproj

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,6 @@
1919
</PackageReference>
2020
</ItemGroup>
2121

22-
<Import Project="$(NuGetTargetsPath)\NuGet.Build.Packaging.targets" Condition="Exists('$(NuGetTargetsPath)\NuGet.Build.Packaging.targets')" />
22+
<Import Project="$(NuGetTargetsPath)\buildMultiTargeting\NuGet.Build.Packaging.targets" Condition="Exists('$(NuGetTargetsPath)\buildMultiTargeting\NuGet.Build.Packaging.targets')" />
23+
<Import Project="$(NuGetTargetsPath)\NuGet.Build.Packaging.targets" Condition="Exists('$(NuGetTargetsPath)\NuGet.Build.Packaging.targets')" />
2324
</Project>

src/Build/NuGet.Build.Packaging.Tests/Scenarios/given_duplicate_package_files/a.csproj

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
<PropertyGroup>
55
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
66
<PackageId>A</PackageId>
7+
<ReadmeA Condition="'$(ReadmeA)' == ''">This is a readme</ReadmeA>
78
</PropertyGroup>
89
<ItemGroup>
910
<ProjectReference Include="b.csproj">
@@ -18,6 +19,20 @@
1819
<PackageFile Include="content\content.txt">
1920
<PackagePath>content\content.txt</PackagePath>
2021
</PackageFile>
21-
</ItemGroup>
22+
<PackageFile Include="content\content.txt">
23+
<PackagePath>content\a\1\content.txt</PackagePath>
24+
</PackageFile>
25+
<PackageFile Include="content\content.txt">
26+
<PackagePath>content\a\2\content.txt</PackagePath>
27+
</PackageFile>
28+
</ItemGroup>
29+
<Target Name="GenerateReadme" BeforeTargets="GetPackageContents">
30+
<WriteLinesToFile File="$(OutputPath)\Readme.txt" Lines="$(ReadmeA)" Overwrite="true" />
31+
<ItemGroup>
32+
<PackageFile Include="$(OutputPath)\Readme.txt">
33+
<PackagePath>Readme.txt</PackagePath>
34+
</PackageFile>
35+
</ItemGroup>
36+
</Target>
2237
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), Scenario.targets))\Scenario.targets" />
2338
</Project>

src/Build/NuGet.Build.Packaging.Tests/Scenarios/given_duplicate_package_files/b.csproj

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
<PropertyGroup>
55
<ProjectGuid>$(GuidB)</ProjectGuid>
66
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
7+
<ReadmeB Condition="'$(ReadmeB)' == ''">This is a readme</ReadmeB>
78
</PropertyGroup>
89
<ItemGroup>
910
<Content Include="content\web\**\*.*">
@@ -12,6 +13,20 @@
1213
<PackageFile Include="content\content.txt">
1314
<PackagePath>content\content.txt</PackagePath>
1415
</PackageFile>
15-
</ItemGroup>
16-
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), Scenario.targets))\Scenario.targets" />
16+
<PackageFile Include="content\content.txt">
17+
<PackagePath>content\b\1\content.txt</PackagePath>
18+
</PackageFile>
19+
<PackageFile Include="content\content.txt">
20+
<PackagePath>content\b\2\content.txt</PackagePath>
21+
</PackageFile>
22+
</ItemGroup>
23+
<Target Name="GenerateReadme" BeforeTargets="GetPackageContents">
24+
<WriteLinesToFile File="$(OutputPath)\Readme.txt" Lines="$(ReadmeB)" Overwrite="true" />
25+
<ItemGroup>
26+
<PackageFile Include="$(OutputPath)\Readme.txt">
27+
<PackagePath>Readme.txt</PackagePath>
28+
</PackageFile>
29+
</ItemGroup>
30+
</Target>
31+
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), Scenario.targets))\Scenario.targets" />
1732
</Project>

src/Build/NuGet.Build.Packaging.Tests/given_duplicate_package_files.cs

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,57 @@ public given_duplicate_package_files(ITestOutputHelper output)
1515
}
1616

1717
[Fact]
18-
public void build_fails()
18+
public void exact_duplicates_are_removed()
1919
{
2020
var result = Builder.BuildScenario(nameof(given_duplicate_package_files), target: "Pack");
2121

22+
Assert.Equal(TargetResultCode.Success, result.ResultCode);
23+
}
24+
25+
[Fact]
26+
public void same_source_different_target_are_preserved()
27+
{
28+
var result = Builder.BuildScenario(nameof(given_duplicate_package_files), target: "GetPackageContents");
29+
Assert.Equal(TargetResultCode.Success, result.ResultCode);
30+
31+
Assert.Contains(result.Items, item => item.Matches(new
32+
{
33+
Kind = PackageItemKind.None,
34+
PackagePath = @"content\a\1\content.txt"
35+
}));
36+
Assert.Contains(result.Items, item => item.Matches(new
37+
{
38+
Kind = PackageItemKind.None,
39+
PackagePath = @"content\a\2\content.txt"
40+
}));
41+
Assert.Contains(result.Items, item => item.Matches(new
42+
{
43+
Kind = PackageItemKind.None,
44+
PackagePath = @"content\b\1\content.txt"
45+
}));
46+
Assert.Contains(result.Items, item => item.Matches(new
47+
{
48+
Kind = PackageItemKind.None,
49+
PackagePath = @"content\b\2\content.txt"
50+
}));
51+
52+
result = Builder.BuildScenario(nameof(given_duplicate_package_files), target: "Pack");
53+
Assert.Equal(TargetResultCode.Success, result.ResultCode);
54+
}
55+
56+
57+
[Fact]
58+
public void real_duplicates_fail()
59+
{
60+
var result = Builder.BuildScenario(nameof(given_duplicate_package_files),
61+
new { ReadmeA = "First Readme", ReadmeB = "Second Readme" },
62+
target: "Pack");
63+
2264
Assert.Equal(TargetResultCode.Failure, result.ResultCode);
2365

24-
Assert.True(result.Logger.Errors.Any(e => e.Message.Contains("content.txt")));
25-
Assert.True(result.Logger.Errors.Any(e => e.Message.Contains("nuget.js")));
66+
// A distinct Readme.txt is generated by a.csproj and b.csproj but both have the
67+
// same PackagePath so we can't disambiguate after hashing the contents.
68+
Assert.True(result.Logger.Errors.Any(e => e.Message.Contains("Readme.txt")));
2669
}
2770
}
28-
}
71+
}

0 commit comments

Comments
 (0)