Skip to content

Commit 666b91d

Browse files
Add client name translation (#10134)
1 parent 34f60ee commit 666b91d

3 files changed

Lines changed: 178 additions & 1 deletion

File tree

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
from dataclasses import dataclass, field
2+
from typing import List
3+
4+
# Client Categories using dataclass
5+
@dataclass(frozen=True)
6+
class ClientCategories:
7+
nuget: str = "NuGet"
8+
webmatrix: str = "WebMatrix"
9+
nuget_package_explorer: str = "NuGet Package Explorer"
10+
script: str = "Script"
11+
crawler: str = "Crawler"
12+
mobile: str = "Mobile"
13+
browser: str = "Browser"
14+
unknown: str = "Unknown"
15+
16+
# Client Names using dataclass
17+
@dataclass(frozen=True)
18+
class ClientNames:
19+
nuget: List[str] = field(default_factory=lambda: [
20+
"NuGet Cross-Platform Command Line",
21+
"NuGet Client V3",
22+
"NuGet VS VSIX",
23+
"NuGet VS PowerShell Console",
24+
"NuGet VS Packages Dialog - Solution",
25+
"NuGet VS Packages Dialog",
26+
"NuGet Shim",
27+
"NuGet Add Package Dialog",
28+
"NuGet Command Line",
29+
"NuGet Package Manager Console",
30+
"NuGet Visual Studio Extension",
31+
"Package-Installer",
32+
"NuGet MSBuild Task",
33+
"NuGet .NET Core MSBuild Task",
34+
"NuGet Desktop MSBuild Task"
35+
])
36+
37+
webmatrix: List[str] = field(default_factory=lambda: ["WebMatrix"])
38+
39+
nuget_package_explorer: List[str] = field(default_factory=lambda: [
40+
"NuGet Package Explorer Metro",
41+
"NuGet Package Explorer"
42+
])
43+
44+
script: List[str] = field(default_factory=lambda: [
45+
"Powershell",
46+
"curl",
47+
"Wget",
48+
"Java"
49+
])
50+
51+
crawler: List[str] = field(default_factory=lambda: [
52+
"Bot",
53+
"bot",
54+
"Slurp",
55+
"BingPreview",
56+
"crawler",
57+
"sniffer",
58+
"spider"
59+
])
60+
61+
mobile: List[str] = field(default_factory=lambda: [
62+
"Mobile",
63+
"Android",
64+
"Kindle",
65+
"BlackBerry",
66+
"Openwave",
67+
"NetFront",
68+
"CFNetwork",
69+
"iLunascape"
70+
])
71+
72+
browser: List[str] = field(default_factory=lambda: [
73+
"Mozilla",
74+
"Firefox",
75+
"Opera",
76+
"Chrome",
77+
"Chromium",
78+
"Internet Explorer",
79+
"Browser",
80+
"Safari",
81+
"Sogou Explorer",
82+
"Maxthon",
83+
"SeaMonkey",
84+
"Iceweasel",
85+
"Sleipnir",
86+
"Konqueror",
87+
"Lynx",
88+
"Galeon",
89+
"Epiphany",
90+
"Lunascape"
91+
])
92+
93+
absolute_browser_names: List[str] = field(default_factory=lambda: [
94+
"IE",
95+
"Iron"
96+
])
97+
98+
unknown: List[str] = field(default_factory=lambda: [
99+
"PhantomJS",
100+
"WebKit Nightly",
101+
"Python Requests",
102+
"Jasmine",
103+
"Java",
104+
"AppleMail",
105+
"NuGet Test Client"
106+
])
107+
108+
# Client Name Translation Logic using static methods
109+
class ClientNameTranslation:
110+
@staticmethod
111+
def get_client_category(client_name: str) -> str:
112+
if not client_name or client_name.strip() == "":
113+
return ""
114+
115+
if ClientNameTranslation.contains_any_client_name(client_name, ClientNames().nuget):
116+
return ClientCategories().nuget
117+
118+
if ClientNameTranslation.contains_any_client_name(client_name, ClientNames().webmatrix):
119+
return ClientCategories().webmatrix
120+
121+
if ClientNameTranslation.contains_any_client_name(client_name, ClientNames().nuget_package_explorer):
122+
return ClientCategories().nuget_package_explorer
123+
124+
if ClientNameTranslation.contains_any_client_name(client_name, ClientNames().script):
125+
return ClientCategories().script
126+
127+
if ClientNameTranslation.contains_any_client_name(client_name, ClientNames().crawler):
128+
return ClientCategories().crawler
129+
130+
if ClientNameTranslation.contains_any_client_name(client_name, ClientNames().mobile):
131+
return ClientCategories().mobile
132+
133+
# Check these late in the process, because other User Agents tend to also send browser strings
134+
if (ClientNameTranslation.contains_any_client_name(client_name, ClientNames().browser) or
135+
any(client_name.strip().lower() == abn.lower() for abn in ClientNames().absolute_browser_names)):
136+
return ClientCategories().browser
137+
138+
# Explicitly categorize unknowns, test frameworks, or others that should be filtered out in the reports
139+
if ClientNameTranslation.contains_any_client_name(client_name, ClientNames().unknown):
140+
return ClientCategories().unknown
141+
142+
# Return empty for all others to allow ecosystem user agents to be picked up in the reports
143+
return ""
144+
145+
@staticmethod
146+
def contains(source: str, target: str, comparison=str.casefold) -> bool:
147+
return comparison(target) in comparison(source)
148+
149+
@staticmethod
150+
def contains_any_client_name(source: str, target_list: List[str], comparison=str.casefold) -> bool:
151+
if not source or source.strip() == "":
152+
return False
153+
return any(ClientNameTranslation.contains(source, target, comparison) for target in target_list)

python/StatsLogParser/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "loginterpretation"
3-
version = "0.1.0"
3+
version = "0.2.0"
44
description = "Helper functions for parsing CDN logs to generate statistics"
55
authors = ["NuGet Server Team Engineering <[email protected]>"]
66
readme = "README.md"
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import sys
2+
sys.path.append('..') # This is to add the parent directory to the path so that the module can be imported
3+
from loginterpretation.clientnametranslation import ClientNameTranslation
4+
import pytest
5+
6+
@pytest.mark.parametrize("expected_category,client_name", [
7+
("NuGet", "NuGet MSBuild Task"),
8+
("NuGet", "NuGet Command Line"),
9+
("NuGet", "NuGet Client V3"),
10+
("Browser", "safari"),
11+
("Browser", "chrome"),
12+
("Crawler", "bot"),
13+
("Crawler", "spider"),
14+
("Crawler", "slurp"),
15+
("Script", "Powershell"),
16+
("Script", "PowerShell"),
17+
("NuGet Package Explorer", "NuGet Package Explorer")])
18+
def test_clientname_returns_correct_category(expected_category, client_name):
19+
found = ClientNameTranslation.get_client_category(client_name)
20+
assert found and found == expected_category
21+
22+
# To invoke the pytest framework and run all tests
23+
if __name__ == "__main__":
24+
pytest.main()

0 commit comments

Comments
 (0)