44using System ;
55using System . IO ;
66using System . Reflection ;
7+ using System . Text . RegularExpressions ;
78using UAParser ;
89
910namespace Stats . ImportAzureCdnStatistics
1011{
1112 public class UserAgentParser
1213 {
1314 private static readonly Parser _defaultParser ;
14- private static readonly Parser _customParser ;
15+ private static readonly Parser _knownClientsParser ;
16+ private static readonly Parser _knownClientsInChinaParser ;
1517
1618 static UserAgentParser ( )
1719 {
1820 _defaultParser = Parser . GetDefault ( ) ;
1921
2022 var yaml = ReadKnownClientsYaml ( ) ;
21- _customParser = Parser . FromYaml ( yaml ) ;
23+ _knownClientsParser = Parser . FromYaml ( yaml ) ;
24+
25+ var patchedYaml = AddSupportForChinaCdn ( yaml ) ;
26+ _knownClientsInChinaParser = Parser . FromYaml ( patchedYaml ) ;
27+ }
28+
29+ private static string AddSupportForChinaCdn ( string yaml )
30+ {
31+ // Seems like user agent headers from requests hitting the China CDN endpoints
32+ // are using '+' characters instead of whitespace characters
33+
34+ var patchedYaml = Regex . Replace (
35+ yaml ,
36+ @"(?:[:]\s'\()+([\w-.\s]+)(?:\))+" , // Look for any matches of : '(user agent)
37+ ReplaceWhitespaceWithPlusSign , // Replace whitespace ' ' character with '+' character in the user agent matches
38+ RegexOptions . Compiled ) ;
39+
40+ return patchedYaml ;
41+ }
42+
43+ private static string ReplaceWhitespaceWithPlusSign ( Match match )
44+ {
45+ // The + sign needs to be escaped by a \
46+ // as it is output to another regex in YAML.
47+ return ": '(" + match . Groups [ 1 ] . Value . Replace ( " " , @"\+" ) + ")" ;
2248 }
2349
2450 private static string ReadKnownClientsYaml ( )
@@ -35,7 +61,14 @@ private static string ReadKnownClientsYaml()
3561 public UserAgent ParseUserAgent ( string userAgent )
3662 {
3763 // try custom parser first
38- var parsedResult = _customParser . ParseUserAgent ( userAgent ) ;
64+ var parsedResult = _knownClientsParser . ParseUserAgent ( userAgent ) ;
65+
66+ if ( string . Equals ( parsedResult . Family , "other" , StringComparison . InvariantCultureIgnoreCase ) )
67+ {
68+ // fallback to China parser
69+ parsedResult = _knownClientsInChinaParser . ParseUserAgent ( userAgent ) ;
70+ }
71+
3972 if ( string . Equals ( parsedResult . Family , "other" , StringComparison . InvariantCultureIgnoreCase ) )
4073 {
4174 // fallback to default parser
@@ -47,7 +80,14 @@ public UserAgent ParseUserAgent(string userAgent)
4780 public OS ParseOS ( string userAgent )
4881 {
4982 // try custom parser first
50- var parsedResult = _customParser . ParseOS ( userAgent ) ;
83+ var parsedResult = _knownClientsParser . ParseOS ( userAgent ) ;
84+
85+ if ( string . Equals ( parsedResult . Family , "other" , StringComparison . InvariantCultureIgnoreCase ) )
86+ {
87+ // fallback to China parser
88+ parsedResult = _knownClientsInChinaParser . ParseOS ( userAgent ) ;
89+ }
90+
5191 if ( string . Equals ( parsedResult . Family , "other" , StringComparison . InvariantCultureIgnoreCase ) )
5292 {
5393 // fallback to default parser
@@ -60,7 +100,14 @@ public OS ParseOS(string userAgent)
60100 public Device ParseDevice ( string userAgent )
61101 {
62102 // try custom parser first
63- var parsedResult = _customParser . ParseDevice ( userAgent ) ;
103+ var parsedResult = _knownClientsParser . ParseDevice ( userAgent ) ;
104+
105+ if ( string . Equals ( parsedResult . Family , "other" , StringComparison . InvariantCultureIgnoreCase ) )
106+ {
107+ // fallback to China parser
108+ parsedResult = _knownClientsInChinaParser . ParseDevice ( userAgent ) ;
109+ }
110+
64111 if ( string . Equals ( parsedResult . Family , "other" , StringComparison . InvariantCultureIgnoreCase ) )
65112 {
66113 // fallback to default parser
0 commit comments