1010using NuGet . Packaging ;
1111using System . Collections . Generic ;
1212using static NuGet . Build . Packaging . Properties . Strings ;
13+ using System . Security . Cryptography ;
1314
1415namespace NuGet . Build . Packaging . Tasks
1516{
@@ -40,7 +41,7 @@ public override bool Execute()
4041
4142 OutputPackage = new TaskItem ( TargetPath ) ;
4243 Manifest . CopyMetadataTo ( OutputPackage ) ;
43-
44+
4445 return ! Log . HasLoggedErrors ;
4546 }
4647 catch ( Exception ex )
@@ -106,7 +107,7 @@ public Manifest CreateManifest()
106107 void AddDependencies ( Manifest manifest )
107108 {
108109 var dependencies = from item in Contents
109- where item . GetMetadata ( MetadataName . Kind ) == PackageItemKind . Dependency &&
110+ where item . GetMetadata ( MetadataName . Kind ) == PackageItemKind . Dependency &&
110111 ! "all" . Equals ( item . GetMetadata ( MetadataName . PrivateAssets ) , StringComparison . OrdinalIgnoreCase )
111112 select new Dependency
112113 {
@@ -145,18 +146,75 @@ select item.GetNuGetTargetFramework()))
145146
146147 void AddFiles ( Manifest manifest )
147148 {
148- var contents = Contents . Where ( item =>
149- ! string . IsNullOrEmpty ( item . GetMetadata ( MetadataName . PackagePath ) ) ) ;
150-
151- var duplicates = contents . GroupBy ( item => item . GetMetadata ( MetadataName . PackagePath ) )
152- . Where ( x => x . Count ( ) > 1 )
153- . Select ( x => x . Key ) ;
154-
155- foreach ( var duplicate in duplicates )
149+ var contents = new List < ITaskItem > ( ) ;
150+
151+ var groupedByPackagePath = Contents
152+ . Where ( item => ! string . IsNullOrEmpty ( item . GetMetadata ( MetadataName . PackagePath ) ) )
153+ . GroupBy ( item => item . GetMetadata ( MetadataName . PackagePath ) )
154+ // Iterate only once for this grouping.
155+ . ToDictionary ( item => item . Key , item => item . ToArray ( ) ) ;
156+
157+ // Add the ones we already determined as unique by package path.
158+ contents . AddRange ( groupedByPackagePath
159+ . Where ( group => group . Value . Length == 1 )
160+ . Select ( group => group . Value . First ( ) ) ) ;
161+
162+ var groupedByLastWriteAndLength = groupedByPackagePath
163+ . Where ( group => group . Value . Length > 1 )
164+ . SelectMany ( group => group . Value )
165+ // Tuple provides structural comparison and hashing already, so leverage that.
166+ . GroupBy ( item => Tuple . Create (
167+ item . GetMetadata ( MetadataName . PackagePath ) ,
168+ item . GetMetadata ( "Filename" ) ,
169+ item . GetMetadata ( "Extension" ) ,
170+ File . GetLastWriteTime ( item . GetMetadata ( "FullPath" ) ) ,
171+ new FileInfo ( item . GetMetadata ( "FullPath" ) ) . Length ) )
172+ . ToDictionary ( item => item . Key , item => item . ToArray ( ) ) ;
173+
174+ // Add the ones we already determined to be duplicates that can safely be
175+ // unified by package path, file name, last write time and file length.
176+ contents . AddRange ( groupedByLastWriteAndLength
177+ . Where ( group => group . Value . Length > 1 )
178+ . Select ( group => group . Value . First ( ) ) ) ;
179+
180+ var md5 = new Lazy < HashAlgorithm > ( ( ) => MD5 . Create ( ) ) ;
181+ string hash ( ITaskItem item )
156182 {
157- Log . LogErrorCode ( nameof ( ErrorCode . NG0012 ) , ErrorCode . NG0012 ( duplicate ) ) ;
183+ using ( var file = File . OpenRead ( item . GetMetadata ( "FullPath" ) ) )
184+ {
185+ return string . Concat ( md5 . Value . ComputeHash ( file ) . Select ( x => x . ToString ( "x2" ) ) ) ;
186+ }
158187 }
159188
189+ // Last remaining attempt at de-duplication is costly, but by now, we should
190+ // have successfully removed all obvious cases.
191+ // This deals with case where the files are modified at different times
192+ // (maybe a generated file?) but their actual contents are the same.
193+ var groupedByContentHash = groupedByLastWriteAndLength
194+ . Where ( group => group . Value . Length == 1 )
195+ . SelectMany ( group => group . Value )
196+ . GroupBy ( item => Tuple . Create (
197+ item . GetMetadata ( MetadataName . PackagePath ) ,
198+ hash ( item ) ) )
199+ . ToDictionary ( item => item . Key , item => item . ToArray ( ) ) ;
200+
201+ // Add the ones we determined to be duplicates that can safely be
202+ // unified by package path and MD5 hash
203+ contents . AddRange ( groupedByContentHash
204+ . Where ( group => group . Value . Length > 1 )
205+ . Select ( group => group . Value . First ( ) ) ) ;
206+
207+ // At this point, we're 100% certain these are duplicate package path
208+ // files that have distinct sources and would result in one overwriting
209+ // the other or an invalid package.
210+ var duplicates = string . Join ( Environment . NewLine , groupedByContentHash
211+ . Where ( group => group . Value . Length == 1 )
212+ . SelectMany ( group => group . Value )
213+ . Select ( item => $ "'{ item . GetMetadata ( "FullPath" ) } ' > '{ item . GetMetadata ( MetadataName . PackagePath ) } '") ) ;
214+
215+ if ( duplicates . Length > 0 )
216+ Log . LogErrorCode ( nameof ( ErrorCode . NG0012 ) , ErrorCode . NG0012 ( duplicates ) ) ;
217+
160218 // All files need to be added so they are included in the nupkg
161219 manifest . Files . AddRange ( contents
162220 . Select ( item => new ManifestFile
@@ -180,12 +238,12 @@ void AddFiles(Manifest manifest)
180238 void AddFrameworkAssemblies ( Manifest manifest )
181239 {
182240 var frameworkReferences = ( from item in Contents
183- where item . GetMetadata ( MetadataName . Kind ) == PackageItemKind . FrameworkReference
184- select new FrameworkAssemblyReference
185- (
186- item . ItemSpec ,
187- new [ ] { NuGetFramework . Parse ( item . GetTargetFrameworkMoniker ( ) . FullName ) }
188- ) ) . Distinct ( FrameworkAssemblyReferenceComparer . Default ) ;
241+ where item . GetMetadata ( MetadataName . Kind ) == PackageItemKind . FrameworkReference
242+ select new FrameworkAssemblyReference
243+ (
244+ item . ItemSpec ,
245+ new [ ] { NuGetFramework . Parse ( item . GetTargetFrameworkMoniker ( ) . FullName ) }
246+ ) ) . Distinct ( FrameworkAssemblyReferenceComparer . Default ) ;
189247
190248 manifest . Metadata . FrameworkReferences = frameworkReferences ;
191249 }
@@ -199,9 +257,9 @@ void BuildPackage(Stream output)
199257 // We don't use PopulateFiles because that performs search expansion, base path
200258 // extraction and the like, which messes with our determined files to include.
201259 // TBD: do we support wilcard-based include/exclude?
202- builder . Files . AddRange ( manifest . Files . Select ( file =>
260+ builder . Files . AddRange ( manifest . Files . Select ( file =>
203261 new PhysicalPackageFile { SourcePath = file . Source , TargetPath = file . Target } ) ) ;
204-
262+
205263 builder . Save ( output ) ;
206264
207265 if ( ! string . IsNullOrEmpty ( NuspecFile ) )
0 commit comments