Skip to content

Commit e520913

Browse files
committed
refactor: add three new test units and fix the fatal
1 parent b2971b1 commit e520913

3 files changed

Lines changed: 107 additions & 28 deletions

File tree

builder/source/utils/discard-resolved-dup-wildcard.ts

Lines changed: 84 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,97 @@
11
import * as TLDTS from 'tldts'
2+
import { PublicSuffixList } from './wildcard-suffix-converter.js'
23

3-
export function DiscardResolvedDupWildcard(OriginSet: Set<string>): Set<string> {
4-
// Step 1: Remove subdomains whose registered domain already exists in the set
5-
const WithoutCoveredSubdomains = new Set<string>()
6-
for (const Entry of OriginSet) {
7-
const Parsed = TLDTS.parse(Entry)
8-
if (Parsed.subdomain && Parsed.domain && OriginSet.has(Parsed.domain)) {
9-
continue
4+
type ParsedEntry = {
5+
Entry: string
6+
RootLabel: string
7+
Stem: string
8+
PublicSuffix: string | null
9+
WildcardSuffix: boolean
10+
}
11+
12+
function ParseEntry(Entry: string): ParsedEntry {
13+
if (Entry.endsWith('.*')) {
14+
const Stem = Entry.slice(0, -2)
15+
const RootLabel = Stem.split('.').at(-1) ?? Stem
16+
return {
17+
Entry,
18+
RootLabel,
19+
Stem,
20+
PublicSuffix: null,
21+
WildcardSuffix: true
1022
}
11-
WithoutCoveredSubdomains.add(Entry)
1223
}
1324

14-
// Step 2: Group by domainWithoutSuffix
15-
const Groups = new Map<string, string[]>()
16-
for (const Entry of WithoutCoveredSubdomains) {
17-
const Parsed = TLDTS.parse(Entry)
18-
const Key = Parsed.domainWithoutSuffix ?? Entry
19-
const Group = Groups.get(Key)
20-
if (typeof Group === 'undefined') {
21-
Groups.set(Key, [Entry])
22-
} else {
23-
Group.push(Entry)
25+
const Parsed = TLDTS.parse(Entry)
26+
if (Parsed.publicSuffix) {
27+
PublicSuffixList.add(Parsed.publicSuffix)
28+
}
29+
30+
if (Parsed.hostname && Parsed.publicSuffix && Parsed.hostname.endsWith(`.${Parsed.publicSuffix}`)) {
31+
return {
32+
Entry,
33+
RootLabel: Parsed.domainWithoutSuffix ?? Parsed.hostname,
34+
Stem: Parsed.hostname.slice(0, -(Parsed.publicSuffix.length + 1)),
35+
PublicSuffix: Parsed.publicSuffix,
36+
WildcardSuffix: false
2437
}
2538
}
2639

27-
// Step 3: Consolidate groups with 2+ top-level entries into wildcards
40+
const Stem = Parsed.hostname ?? Entry
41+
return {
42+
Entry,
43+
RootLabel: Stem.split('.').at(-1) ?? Stem,
44+
Stem,
45+
PublicSuffix: Parsed.publicSuffix,
46+
WildcardSuffix: false
47+
}
48+
}
49+
50+
function IsCoveredByParent(Child: ParsedEntry, Parent: ParsedEntry): boolean {
51+
if (Child.Stem === Parent.Stem || !Child.Stem.endsWith(`.${Parent.Stem}`)) {
52+
return false
53+
}
54+
55+
if (Parent.WildcardSuffix) {
56+
return true
57+
}
58+
59+
return !Child.WildcardSuffix && Child.PublicSuffix === Parent.PublicSuffix
60+
}
61+
62+
export function DiscardResolvedDupWildcard(OriginSet: Set<string>): Set<string> {
63+
const ParsedEntries = [...OriginSet].map(ParseEntry)
64+
const RootsWithWildcard = new Set(
65+
ParsedEntries
66+
.filter(Entry => Entry.WildcardSuffix)
67+
.map(Entry => Entry.RootLabel)
68+
)
69+
70+
const ConcreteStemCounts = new Map<string, number>()
71+
ParsedEntries
72+
.filter(Entry => !Entry.WildcardSuffix)
73+
.forEach(Entry => ConcreteStemCounts.set(Entry.Stem, (ConcreteStemCounts.get(Entry.Stem) ?? 0) + 1))
74+
75+
const NormalizedEntries = ParsedEntries.map(Entry => {
76+
if (Entry.WildcardSuffix) {
77+
return Entry
78+
}
79+
80+
if (RootsWithWildcard.has(Entry.RootLabel) || (ConcreteStemCounts.get(Entry.Stem) ?? 0) >= 2) {
81+
return ParseEntry(`${Entry.Stem}.*`)
82+
}
83+
84+
return Entry
85+
})
86+
2887
const Result = new Set<string>()
29-
for (const [Key, Entries] of Groups) {
30-
if (Entries.length >= 2 && Entries.every(E => !TLDTS.parse(E).subdomain)) {
31-
Result.add(`${Key}.*`)
32-
} else {
33-
for (const Entry of Entries) {
34-
Result.add(Entry)
35-
}
88+
const UniqueNormalizedEntries = [...new Map(NormalizedEntries.map(Entry => [Entry.Entry, Entry])).values()]
89+
for (const Entry of UniqueNormalizedEntries) {
90+
if (UniqueNormalizedEntries.some(Parent => Parent.Entry !== Entry.Entry && IsCoveredByParent(Entry, Parent))) {
91+
continue
3692
}
93+
94+
Result.add(Entry.Entry)
3795
}
3896

3997
return Result

builder/source/utils/wildcard-suffix-converter.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
const PublicSuffixList = [
1+
export const PublicSuffixList = new Set([
22
'com', 'org', 'co', 'de', 'ru', 'fr', 'me', 'it', 'nl', 'io', 'cc', 'in', 'pl', 'xyz', 'es', 'se', 'uk', 'tv', 'info',
33
'site', 'us', 'online', 'ch', 'at', 'eu', 'top', 'be', 'cz', 'app', 'ca', 'to', 'jp', 'dev', 'kr'
4-
]
4+
])
55

66
export function ConvertWildcardSuffixToRegexPattern(Domain: string): string[] {
77
const Result: string[] = []

builder/test/utils/discard-resolved-dup-wildcard.test.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,25 @@ Test('DiscardResolvedDupWildcard does not remove non-duplicate wildcards with mu
1919
const Input = new Set(['access.chatgpt.com', 'info.chatgpt.com', 'access.claude.ai', 'info.claude.ai', 'access.huggingface.co', 'info.huggingface.co'])
2020

2121
return T.deepEqual(DiscardResolvedDupWildcard(Input), Input)
22+
})
23+
24+
Test('DiscardResolvedDupWildcard removes resolved duplicate wildcards with multiple subdomains', T => {
25+
const Input = new Set(['google.*', 'access.google.*', 'google.com', 'google.co.kr'])
26+
const Expected = new Set(['google.*'])
27+
28+
return T.deepEqual(DiscardResolvedDupWildcard(Input), Expected)
29+
})
30+
31+
Test('DiscardResolvedDupWildcard handles nested wildcard scenarios', T => {
32+
const Input = new Set(['token.google.*', 'access.google.*', 'tools.google.com', 'google.google.co.kr'])
33+
const Expected = new Set(['token.google.*', 'access.google.*', 'tools.google.*', 'google.google.*'])
34+
35+
return T.deepEqual(DiscardResolvedDupWildcard(Input), Expected)
36+
})
37+
38+
Test('DiscardResolvedDupWildcard handles complex wildcard scenarios', T => {
39+
const Input = new Set(['token.google.*', 'access.google.*', 'tools.google.com', 'google.google.co.kr', 'example.*', 'example.com', 'rust-lang.org'])
40+
const Expected = new Set(['token.google.*', 'access.google.*', 'tools.google.*', 'google.google.*','example.*', 'rust-lang.org'])
41+
42+
return T.deepEqual(DiscardResolvedDupWildcard(Input), Expected)
2243
})

0 commit comments

Comments
 (0)