11// Copyright (c) .NET Foundation. All rights reserved.
22// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
33
4- using System . Collections . Generic ;
54using System . Text ;
5+ using System . Globalization ;
6+ using System . Collections . Generic ;
67
78namespace NuGetGallery
89{
@@ -12,63 +13,70 @@ public static class TyposquattingStringNormalization
1213 /// The following dictionary is built through picking up similar characters manually from wiki unicode page.
1314 /// https://en.wikipedia.org/wiki/List_of_Unicode_characters
1415 /// </summary>
15- private static readonly IReadOnlyDictionary < char , string > SimilarCharacterDictionary = new Dictionary < char , string > ( )
16+ private static readonly IReadOnlyDictionary < string , string > SimilarCharacterDictionary = new Dictionary < string , string > ( )
1617 {
17- { 'a' , "AÀÁÂÃÄÅàáâãäåĀāĂ㥹ǍǎǞǟǠǡǺǻȀȁȂȃȦȧȺΆΑάαἀἁἂἃἄἅἆἇἈἉἊἋἌἍἎἏӐӑӒӓὰάᾀᾁᾂᾃᾄᾅᾆᾇᾈᾊᾋᾌᾍᾎᾏᾰᾱᾲᾳᾴᾶᾷᾸᾹᾺΆᾼАДад " } ,
18- { 'b' , "BƀƁƂƃƄƅɃḂḃΒϦЂБВЪЬвъьѢѣҌҍႦႪხҔҕӃӄ" } ,
19- { 'c' , "CÇçĆćĈĉĊċČčƇƈȻȼϲϹСсҪҫ𐒨 " } ,
20- { 'd' , "DÐĎďĐđƉƊƋƌǷḊḋԀԁԂԃ " } ,
21- { 'e' , "EÈÉÊËèéêëĒēĔĕĖėĘęĚěȄȅȆȇȨȩɆɇΈΕЀЁЄѐёҼҽҾҿӖӗἘἙἚἛἜἝῈΈЕе " } ,
22- { 'f' , "FƑƒḞḟϜϝҒғӺӻ " } ,
23- { 'g' , "GĜĝĞğĠġĢģƓǤǥǦǧǴǵԌԍ " } ,
24- { 'h' , "HĤĥħǶȞȟΉΗἨἩἪἫἬἭἮἯᾘᾙᾚᾛᾜᾝᾞᾟῊΉῌЋНнћҢңҤҥҺһӇӈӉӊԊԋԦԧԨԩհႬႹ𐒅𐒌𐒎𐒣 " } ,
25- { 'i' , "I¡ìíîïǐȉȋΐίιϊіїὶίῐῑῒΐῖῗΊΙΪȊȈἰἱἲἳἴἵἶἷἸἹἺἻἼἽἾἿῘῙῚΊІЇӀӏÌÍÎÏĨĩĪīĬĭĮįİǏ " } ,
26- { 'j' , "JĴĵǰȷͿϳЈ " } ,
27- { 'k' , "KĶķĸƘƙǨǩΚκϏЌКкќҚқҜҝҞҟҠҡԞԟ " } ,
28- { 'l' , "LĹĺĻļĽľĿŀŁłſƖƪȴẛ" } ,
29- { 'm' , "MṀṁΜϺϻМмӍӎ𐒄 " } ,
30- { 'n' , "NÑñŃńŅņŇňʼnƝǸǹΝᾐᾑᾒᾓᾔᾕᾖᾗῂῃῄῆῇпԤԥԦԧԮԯ𐒐 " } ,
31- { 'o' , "OÒÓÔÕÖðòóôõöøŌōŎŏŐőƠơǑǒǪǫǬǭȌȍȎȏȪȫȬȭȮȯȰȱΌΟδοόϘϙὀὁὂὃὄὅὈὉὊὋὌὍὸόῸΌОоӦӧՕჿჾ𐒆𐒠0 " } ,
32- { 'p' , "PÞþƤƥƿṖṗΡρϷϸῤῥῬРрҎҏႲႼ " } ,
33- { 'q' , "QȡɊɋԚԛգႭႳ " } ,
34- { 'r' , "RŔŕŖŗŘřƦȐȑȒȓɌɼгѓ " } ,
35- { 's' , "SŚśŜŝŞşŠšȘșȿṠṡЅѕՏႽჽ𐒖𐒡 " } ,
36- { 't' , "TŢţŤťŦŧƬƭƮȚțȾṪṫͲͳΤτТтҬҭէ " } ,
37- { 'u' , "UÙÚÛÜùúûüŨũŪūŬŭŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜȔȕȖȗμυϋύὐὑὒὓὔὕὖὗὺύῠῡῢΰῦῧՍႮ𐒩 " } ,
38- { 'v' , "VƔƲνѴѵѶѷ " } ,
39- { 'w' , "WŴŵƜẀẁẂẃẄẅωώШЩшщѡѿὠὡὢὣὤὥὦὧὼώᾠᾡᾢᾣᾤᾥᾦᾧῲῳῴῶῷԜԝ " } ,
40- { 'x' , "X×ΧχХхҲҳӼӽӾӿჯ " } ,
41- { 'y' , "YÝýÿŶŷŸƳƴȲȳɎɏỲỳΎΥΫγϒϓϔЎУЧуўҮүҶҷҸҹӋӌӮӯӰӱӲӳӴӵὙὛὝὟῨῩῪΎႯႸ𐒋𐒦 " } ,
42- { 'z' , "ZŹźŻżŽžƵƶȤȥΖჍ " } ,
43- { '3' , "ƷǮǯȜȝʒЗзэӞӟӠӡჳ " } ,
44- { '8' , "Ȣȣ" } ,
45- { '_' , ".-" }
18+ { "a" , "AΑАαаÀÁÂÃÄÅàáâãäåĀāĂ㥹ǍǎǞǟǠǡǺǻȀȁȂȃȦȧȺΆάἀἁἂἃἄἅἆἇἈἉἊἋἌΆἍἎἏӐӑӒӓὰάᾀᾁᾂᾃᾄᾅᾆᾇᾈᾊᾋᾌᾍᾎᾏᾰᾱᾲᾳᾴᾶᾷᾸᾹᾺᾼДд " } ,
19+ { "b" , "BΒВЪЬƀƁƂƃƄƅɃḂḃϦЂБвъьѢѣҌҍႦႪხҔҕӃӄ" } ,
20+ { "c" , "CСсϹϲÇçĆćĈĉĊċČčƇƈȻȼҪҫ𐒨 " } ,
21+ { "d" , "DƊԁÐĎďĐđƉƋƌǷḊḋԀԂԃ " } ,
22+ { "e" , "EΕЕеÈÉÊËèéêëĒēĔĕĖėĘęĚěȄȅȆȇȨȩɆɇΈЀЁЄѐёҼҽҾҿӖӗἘἙἚἛἜἝῈΈ " } ,
23+ { "f" , "FϜƑƒḞḟϝҒғӺӻ " } ,
24+ { "g" , "GǤԌĜĝĞğĠġĢģƓǥǦǧǴǵԍ " } ,
25+ { "h" , "HΗНһհҺĤĥħǶȞȟΉἨἩἪἫἬἭἮἯᾘᾙᾚᾛᾜᾝᾞᾟῊΉῌЋнћҢңҤҥӇӈӉӊԊԋԦԧԨԩႬႹ𐒅𐒌𐒎𐒣 " } ,
26+ { "i" , "IΙІӀ¡ìíîïǐȉȋΐίιϊіїὶίῐῑῒΐῖῗΊΪȊȈἰἱἲἳἴἵἶἷἸἹἺἻἼἽἾἿῘῙῚΊЇӏÌÍÎÏĨĩĪīĬĭĮįİǏ " } ,
27+ { "j" , "JЈͿϳĴĵǰȷ " } ,
28+ { "k" , "KΚКKĶķĸƘƙǨǩκϏЌкќҚқҜҝҞҟҠҡԞԟ " } ,
29+ { "l" , "LĹĺĻļĽľĿŀŁłſƖƪȴẛ" } ,
30+ { "m" , "MΜМṀṁϺϻмӍӎ𐒄 " } ,
31+ { "n" , "NΝпÑñŃńŅņŇňʼnƝǸǹᾐᾑᾒᾓᾔᾕᾖᾗῂῃῄῆῇԤԥԮԯ𐒐 " } ,
32+ { "o" , "OΟОՕჿоοÒÓÔÕÖðòóôõöøŌōŎŏŐőƠơǑǒǪǫǬǭȌȍȎȏȪȫȬȭȮȯȰȱΌδόϘϙὀὁὂὃὄὅὈὉὊὋὌὍὸόῸΌӦӧჾ𐒆𐒠0 " } ,
33+ { "p" , "PΡРрρÞþƤƥƿṖṗϷϸῤῥῬҎҏႲႼ " } ,
34+ { "q" , "QգԛȡɊɋԚႭႳ " } ,
35+ { "r" , "RгŔŕŖŗŘřƦȐȑȒȓɌɼѓ " } ,
36+ { "s" , "SЅѕՏႽჽŚśŜŝŞşŠšȘșȿṠṡ𐒖𐒡 " } ,
37+ { "t" , "TΤТͲͳŢţŤťŦŧƬƭƮȚțȾṪṫτтҬҭէ " } ,
38+ { "u" , "UՍႮÙÚÛÜùúûüŨũŪūŬŭŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜȔȕȖȗμυϋύὐὑὒὓὔὕὖὗὺύῠῡῢΰῦῧ𐒩 " } ,
39+ { "v" , "VνѴѵƔƲѶѷ " } ,
40+ { "w" , "WωшԜԝŴŵƜẀẁẂẃẄẅώШЩщѡѿὠὡὢὣὤὥὦὧὼώᾠᾡᾢᾣᾤᾥᾦᾧῲῳῴῶῷ " } ,
41+ { "x" , "XХΧх×χҲҳӼӽӾӿჯ " } ,
42+ { "y" , "YΥҮƳуУÝýÿŶŷŸƴȲȳɎɏỲỳΎΫγϒϓϔЎЧўүҶҷҸҹӋӌӮӯӰӱӲӳӴӵὙὛὝὟῨῩῪΎႯႸ𐒋𐒦 " } ,
43+ { "z" , "ZΖჍŹźŻżŽžƵƶȤȥ " } ,
44+ { "3" , "ƷЗʒӡჳǮǯȜȝзэӞӟӠ " } ,
45+ { "8" , "Ȣȣ" } ,
46+ { "_" , ".-" }
4647 } ;
47-
48- private static readonly IReadOnlyDictionary < char , char > NormalizedMappingDictionary = GetNormalizedMappingDictionary ( SimilarCharacterDictionary ) ;
48+
49+ private static readonly IReadOnlyDictionary < string , string > NormalizedMappingDictionary = GetNormalizedMappingDictionary ( SimilarCharacterDictionary ) ;
4950
5051 public static string NormalizeString ( string str )
5152 {
52- var normalizedStr = new StringBuilder ( str ) ;
53- for ( var i = 0 ; i < normalizedStr . Length ; i ++ )
53+ var normalizedString = new StringBuilder ( ) ;
54+ var textElementEnumerator = StringInfo . GetTextElementEnumerator ( str ) ;
55+ while ( textElementEnumerator . MoveNext ( ) )
5456 {
55- if ( NormalizedMappingDictionary . TryGetValue ( normalizedStr [ i ] , out var normalizedCharacter ) )
57+ var textElement = textElementEnumerator . GetTextElement ( ) ;
58+ if ( NormalizedMappingDictionary . TryGetValue ( textElement , out var normalizedTextElement ) )
59+ {
60+ normalizedString . Append ( normalizedTextElement ) ;
61+ }
62+ else
5663 {
57- normalizedStr [ i ] = normalizedCharacter ;
64+ normalizedString . Append ( textElement ) ;
5865 }
5966 }
6067
61- return normalizedStr . ToString ( ) ;
68+ return normalizedString . ToString ( ) ;
6269 }
6370
64- private static Dictionary < char , char > GetNormalizedMappingDictionary ( IReadOnlyDictionary < char , string > similarCharacterDictionary )
71+ private static Dictionary < string , string > GetNormalizedMappingDictionary ( IReadOnlyDictionary < string , string > similarCharacterDictionary )
6572 {
66- var normalizedMappingDictionary = new Dictionary < char , char > ( ) ;
73+ var normalizedMappingDictionary = new Dictionary < string , string > ( ) ;
6774 foreach ( var item in similarCharacterDictionary )
6875 {
69- foreach ( var c in item . Value )
76+ var textElementEnumerator = StringInfo . GetTextElementEnumerator ( item . Value ) ;
77+ while ( textElementEnumerator . MoveNext ( ) )
7078 {
71- normalizedMappingDictionary [ c ] = item . Key ;
79+ normalizedMappingDictionary [ textElementEnumerator . GetTextElement ( ) ] = item . Key ;
7280 }
7381 }
7482
0 commit comments