Skip to content

Commit d05d2f6

Browse files
authored
Typosquatting: add typosquatting check service codes (#6315)
Add the codes of typosquatting algorithms and retrieve the latest owner list info. 1. Typosquatting check service with distance calculation and comparison; 2. Normalize the string before checking; 3. Call `public bool IsDistanceLessThanThreshold(string str1, string str2, int threshold)` to compare two strings; (changed to private) 4. Call `public bool IsUploadedPackageIdTyposquatting(string uploadedPackageId)` to check typosquatting in the checlist. Fixes: https://github.com/NuGet/Engineering/issues/1593
1 parent 6c3041c commit d05d2f6

8 files changed

Lines changed: 745 additions & 0 deletions
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Copyright (c) .NET Foundation. All rights reserved.
2+
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
3+
4+
namespace NuGetGallery
5+
{
6+
/// <summary>
7+
/// This interface is used to check typo-squatting of uploaded package ID with the owner.
8+
/// </summary>
9+
public interface ITyposquattingCheckService
10+
{
11+
/// <summary>
12+
/// The function is used to check whether the uploaded package is a typo-squatting package.
13+
/// </summary>
14+
/// <param name="uploadedPackageId"> The package ID of the uploaded package. We check the pacakge ID with the packages in the gallery for typo-squatting issue</param>
15+
/// <param name="uploadedPackageOwner"> The package owner of the uploaded package.</param>
16+
bool IsUploadedPackageIdTyposquatting(string uploadedPackageId, User uploadedPackageOwner);
17+
}
18+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// Copyright (c) .NET Foundation. All rights reserved.
2+
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
3+
4+
namespace NuGetGallery
5+
{
6+
/// <summary>
7+
/// The interface and method are used to check the latest info like owners' list from the DB for typo-squatting.
8+
/// </summary>
9+
public interface ITyposquattingUserService
10+
{
11+
/// <summary>
12+
/// The function is used to check the latest info of owners from the DB to confirm that the uploaded package and the conflict package are not shared by the same user.
13+
/// </summary>
14+
/// <param name="packageId"> The package ID of the potential conflict package in the gallery.
15+
/// We'd like to double check that the conflict package and uploaded package don't share the same user</param>
16+
/// <param name="userName"> The package owner of the uploaded package.</param>
17+
bool CanUserTyposquat(string packageId, string userName);
18+
}
19+
}
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
// Copyright (c) .NET Foundation. All rights reserved.
2+
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
3+
4+
using System;
5+
using System.Collections.Generic;
6+
using System.Threading;
7+
using System.Threading.Tasks;
8+
9+
namespace NuGetGallery
10+
{
11+
public class TyposquattingCheckService : ITyposquattingCheckService
12+
{
13+
// TODO: Threshold parameters will be saved in the configuration file.
14+
// https://github.com/NuGet/Engineering/issues/1645
15+
private static List<ThresholdInfo> _thresholdsList = new List<ThresholdInfo>
16+
{
17+
new ThresholdInfo { LowerBound = 0, UpperBound = 30, Threshold = 0 },
18+
new ThresholdInfo { LowerBound = 30, UpperBound = 50, Threshold = 1 },
19+
new ThresholdInfo { LowerBound = 50, UpperBound = 120, Threshold = 2 }
20+
};
21+
22+
// TODO: popular packages checklist will be implemented
23+
// https://github.com/NuGet/Engineering/issues/1624
24+
public static List<PackageInfo> PackagesCheckList { get; set; }
25+
26+
private readonly ITyposquattingUserService _userTyposquattingService;
27+
28+
public TyposquattingCheckService(ITyposquattingUserService typosquattingUserService)
29+
{
30+
_userTyposquattingService = typosquattingUserService ?? throw new ArgumentNullException(nameof(typosquattingUserService));
31+
}
32+
33+
public bool IsUploadedPackageIdTyposquatting(string uploadedPackageId, User uploadedPackageOwner)
34+
{
35+
if (uploadedPackageId == null)
36+
{
37+
throw new ArgumentNullException(nameof(uploadedPackageId));
38+
}
39+
40+
if (uploadedPackageOwner == null)
41+
{
42+
throw new ArgumentNullException(nameof(uploadedPackageOwner));
43+
}
44+
45+
var threshold = GetThreshold(uploadedPackageId);
46+
uploadedPackageId = TyposquattingStringNormalization.NormalizeString(uploadedPackageId);
47+
48+
var countCollision = 0;
49+
Parallel.ForEach(PackagesCheckList, (package, loopState) =>
50+
{
51+
// TODO: handle the package which is owned by an organization.
52+
// https://github.com/NuGet/Engineering/issues/1656
53+
if (package.Owners.Contains(uploadedPackageOwner.Username))
54+
{
55+
return;
56+
}
57+
58+
if (TyposquattingDistanceCalculation.IsDistanceLessThanThreshold(uploadedPackageId, package.Id, threshold))
59+
{
60+
// Double check the owners list in the latest DB.
61+
if (_userTyposquattingService.CanUserTyposquat(package.Id, uploadedPackageOwner.Username))
62+
{
63+
return;
64+
}
65+
66+
Interlocked.Increment(ref countCollision);
67+
loopState.Stop();
68+
}
69+
});
70+
71+
return countCollision != 0;
72+
}
73+
74+
private static int GetThreshold(string packageId)
75+
{
76+
foreach (var thresholdInfo in _thresholdsList)
77+
{
78+
if (packageId.Length >= thresholdInfo.LowerBound && packageId.Length < thresholdInfo.UpperBound)
79+
{
80+
return thresholdInfo.Threshold;
81+
}
82+
}
83+
84+
throw new ArgumentException("There is no predefined typo-squatting threshold for this package Id: " + packageId);
85+
}
86+
}
87+
88+
public class PackageInfo
89+
{
90+
public string Id { get; set; }
91+
public HashSet<string> Owners { get; set; }
92+
}
93+
94+
public class ThresholdInfo
95+
{
96+
public int LowerBound { get; set; }
97+
public int UpperBound { get; set; }
98+
public int Threshold { get; set; }
99+
}
100+
}
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
// Copyright (c) .NET Foundation. All rights reserved.
2+
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
3+
4+
using System;
5+
using System.Collections.Generic;
6+
using System.Linq;
7+
using System.Text;
8+
using System.Text.RegularExpressions;
9+
10+
namespace NuGetGallery
11+
{
12+
public static class TyposquattingDistanceCalculation
13+
{
14+
private const char PlaceholderForAlignment = '*'; // This const place holder variable is used for strings alignment
15+
16+
private static readonly HashSet<char> SpecialCharacters = new HashSet<char> { '.', '_', '-' };
17+
private static readonly string SpecialCharactersToString = "[" + new string(SpecialCharacters.ToArray()) + "]";
18+
19+
private class BasicEditDistanceInfo
20+
{
21+
public int Distance { get; set; }
22+
public PathInfo[,] Path { get; set; }
23+
}
24+
25+
private enum PathInfo
26+
{
27+
Match,
28+
Delete,
29+
Substitute,
30+
Insert,
31+
}
32+
33+
public static bool IsDistanceLessThanThreshold(string str1, string str2, int threshold)
34+
{
35+
if (str1 == null)
36+
{
37+
throw new ArgumentNullException(nameof(str1));
38+
}
39+
if (str2 == null)
40+
{
41+
throw new ArgumentNullException(nameof(str2));
42+
}
43+
44+
var newStr1 = Regex.Replace(str1, SpecialCharactersToString, string.Empty);
45+
var newStr2 = Regex.Replace(str2, SpecialCharactersToString, string.Empty);
46+
if (Math.Abs(newStr1.Length - newStr2.Length) > threshold)
47+
{
48+
return false;
49+
}
50+
51+
return GetDistance(str1, str2, threshold) <= threshold;
52+
}
53+
private static int GetDistance(string str1, string str2, int threshold)
54+
{
55+
var basicEditDistanceInfo = GetBasicEditDistanceWithPath(str1, str2);
56+
if (basicEditDistanceInfo.Distance <= threshold)
57+
{
58+
return basicEditDistanceInfo.Distance;
59+
}
60+
var alignedStrings = TraceBackAndAlignStrings(basicEditDistanceInfo.Path, str1, str2);
61+
var refreshedEditDistance = RefreshEditDistance(alignedStrings[0], alignedStrings[1], basicEditDistanceInfo.Distance);
62+
63+
return refreshedEditDistance;
64+
}
65+
66+
/// <summary>
67+
/// The following function is used to calculate the classical edit distance and construct the path in dynamic programming way.
68+
/// </summary>
69+
private static BasicEditDistanceInfo GetBasicEditDistanceWithPath(string str1, string str2)
70+
{
71+
var distances = new int[str1.Length + 1, str2.Length + 1];
72+
var path = new PathInfo[str1.Length + 1, str2.Length + 1];
73+
distances[0, 0] = 0;
74+
path[0, 0] = PathInfo.Match;
75+
for (var i = 1; i <= str1.Length; i++)
76+
{
77+
distances[i, 0] = i;
78+
path[i, 0] = PathInfo.Delete;
79+
}
80+
81+
for (var j = 1; j <= str2.Length; j++)
82+
{
83+
distances[0, j] = j;
84+
path[0, j] = PathInfo.Insert;
85+
}
86+
87+
for (var i = 1; i <= str1.Length; i++)
88+
{
89+
for (var j = 1; j <= str2.Length; j++)
90+
{
91+
if (str1[i - 1] == str2[j - 1])
92+
{
93+
distances[i, j] = distances[i - 1, j - 1];
94+
path[i, j] = PathInfo.Match;
95+
}
96+
else
97+
{
98+
distances[i, j] = distances[i - 1, j - 1] + 1;
99+
path[i, j] = PathInfo.Substitute;
100+
101+
if (distances[i - 1, j] + 1 < distances[i, j])
102+
{
103+
distances[i, j] = distances[i - 1, j] + 1;
104+
path[i, j] = PathInfo.Delete;
105+
}
106+
107+
if (distances[i, j - 1] + 1 < distances[i, j])
108+
{
109+
distances[i, j] = distances[i, j - 1] + 1;
110+
path[i, j] = PathInfo.Insert;
111+
}
112+
}
113+
}
114+
}
115+
116+
return new BasicEditDistanceInfo
117+
{
118+
Distance = distances[str1.Length, str2.Length],
119+
Path = path
120+
};
121+
}
122+
123+
/// <summary>
124+
/// The following function is used to traceback based on the construction path and align two strings.
125+
/// Example: For two strings: "asp.net" "aspnet". After traceback and alignment, we will have aligned strings as "asp.net" "asp*net" ('*' is the placeholder).
126+
/// The returned strings contain the two inputted strings after alignment.
127+
/// </summary>
128+
private static string[] TraceBackAndAlignStrings(PathInfo[,] path, string str1, string str2)
129+
{
130+
var newStr1 = new StringBuilder(str1);
131+
var newStr2 = new StringBuilder(str2);
132+
var alignedStrs = new string[2];
133+
134+
var i = str1.Length;
135+
var j = str2.Length;
136+
while (i > 0 && j > 0)
137+
{
138+
switch (path[i, j])
139+
{
140+
case PathInfo.Match:
141+
i--;
142+
j--;
143+
break;
144+
case PathInfo.Substitute:
145+
i--;
146+
j--;
147+
break;
148+
case PathInfo.Delete:
149+
newStr2.Insert(j, PlaceholderForAlignment);
150+
i--;
151+
break;
152+
case PathInfo.Insert:
153+
newStr1.Insert(i, PlaceholderForAlignment);
154+
j--;
155+
break;
156+
default:
157+
throw new ArgumentException("Invalidate operation for edit distance trace back: " + path[i, j]);
158+
}
159+
}
160+
161+
for (var k = 0; k < i; k++)
162+
{
163+
newStr2.Insert(k, PlaceholderForAlignment);
164+
}
165+
166+
for (var k = 0; k < j; k++)
167+
{
168+
newStr1.Insert(k, PlaceholderForAlignment);
169+
}
170+
171+
alignedStrs[0] = newStr1.ToString();
172+
alignedStrs[1] = newStr2.ToString();
173+
174+
return alignedStrs;
175+
}
176+
177+
/// <summary>
178+
/// The following function is used to refresh the edit distance based on predefined rules. (Insert/Delete special characters will not account for distance)
179+
/// Example: For two aligned strings: "asp.net" "asp*net" ('*' is the placeholder), we will scan the two strings again and the mapping from '.' to '*' will not account for the distance.
180+
/// So the final distance will be 0 for these two strings "asp.net" "aspnet".
181+
/// </summary>
182+
private static int RefreshEditDistance(string alignedStr1, string alignedStr2, int basicEditDistance)
183+
{
184+
if (alignedStr1.Length != alignedStr2.Length)
185+
{
186+
throw new ArgumentException("The lengths of two aligned strings are not same!");
187+
}
188+
189+
var sameSubstitution = 0;
190+
for (var i = 0; i < alignedStr2.Length; i++)
191+
{
192+
if (alignedStr1[i] != alignedStr2[i])
193+
{
194+
if (alignedStr1[i] == PlaceholderForAlignment && SpecialCharacters.Contains(alignedStr2[i]))
195+
{
196+
sameSubstitution += 1;
197+
}
198+
else if (alignedStr2[i] == PlaceholderForAlignment && SpecialCharacters.Contains(alignedStr1[i]))
199+
{
200+
sameSubstitution += 1;
201+
}
202+
else
203+
{
204+
continue;
205+
}
206+
}
207+
}
208+
209+
return basicEditDistance - sameSubstitution;
210+
}
211+
}
212+
}

0 commit comments

Comments
 (0)