Skip to content

Commit 63374d0

Browse files
committed
chore: add RegroupDomainTldLevel func
1 parent e520913 commit 63374d0

2 files changed

Lines changed: 114 additions & 0 deletions

File tree

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import * as TLDTS from 'tldts'
2+
import { PublicSuffixList } from './wildcard-suffix-converter.js'
3+
4+
type ParsedEntry = {
5+
Entry: string
6+
RootLabel: string
7+
Stem: string
8+
PublicSuffix: string | null
9+
WildcardSuffix: boolean
10+
}
11+
12+
function ParseEntry(Entry: string): ParsedEntry {
13+
if (Entry.endsWith('.*')) {
14+
const Stem = Entry.slice(0, -2)
15+
const RootLabel = Stem.split('.').at(-1) ?? Stem
16+
return {
17+
Entry,
18+
RootLabel,
19+
Stem,
20+
PublicSuffix: null,
21+
WildcardSuffix: true
22+
}
23+
}
24+
25+
const Parsed = TLDTS.parse(Entry)
26+
if (Parsed.publicSuffix) {
27+
PublicSuffixList.add(Parsed.publicSuffix)
28+
}
29+
30+
if (Parsed.hostname && Parsed.publicSuffix && Parsed.hostname.endsWith(`.${Parsed.publicSuffix}`)) {
31+
return {
32+
Entry,
33+
RootLabel: Parsed.domainWithoutSuffix ?? Parsed.hostname,
34+
Stem: Parsed.hostname.slice(0, -(Parsed.publicSuffix.length + 1)),
35+
PublicSuffix: Parsed.publicSuffix,
36+
WildcardSuffix: false
37+
}
38+
}
39+
40+
const Stem = Parsed.hostname ?? Entry
41+
return {
42+
Entry,
43+
RootLabel: Stem.split('.').at(-1) ?? Stem,
44+
Stem,
45+
PublicSuffix: Parsed.publicSuffix,
46+
WildcardSuffix: false
47+
}
48+
}
49+
50+
function IsCoveredByParent(Child: ParsedEntry, Parent: ParsedEntry): boolean {
51+
if (Child.Stem === Parent.Stem || !Child.Stem.endsWith(`.${Parent.Stem}`)) {
52+
return false
53+
}
54+
55+
if (Parent.WildcardSuffix) {
56+
return true
57+
}
58+
59+
return !Child.WildcardSuffix && Child.PublicSuffix === Parent.PublicSuffix
60+
}
61+
62+
export function RegroupDomainTldLevel(OriginSet: Set<string>): Set<Set<string>> {
63+
const ParsedEntries = [...OriginSet].map(ParseEntry)
64+
const WithoutCoveredSubdomains = ParsedEntries.filter(
65+
Entry => !ParsedEntries.some(Parent => Parent.Entry !== Entry.Entry && IsCoveredByParent(Entry, Parent))
66+
)
67+
68+
const RootLevelDomains = new Map<string, string>()
69+
const Groups = new Map<string, Set<string>>()
70+
71+
for (const Entry of WithoutCoveredSubdomains) {
72+
if (Entry.Stem === Entry.RootLabel) {
73+
const Existing = RootLevelDomains.get(Entry.RootLabel)
74+
if (Existing && Existing !== Entry.Entry) {
75+
throw new Error('RegroupDomainTldLevel: Found multiple domains with the same TLD level. Use DiscardResolvedDupWildcard func first before using RegroupDomainTldLevel.')
76+
}
77+
78+
RootLevelDomains.set(Entry.RootLabel, Entry.Entry)
79+
}
80+
81+
const Group = Groups.get(Entry.RootLabel)
82+
if (typeof Group === 'undefined') {
83+
Groups.set(Entry.RootLabel, new Set([Entry.Entry]))
84+
continue
85+
}
86+
87+
Group.add(Entry.Entry)
88+
}
89+
90+
return new Set(Groups.values())
91+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import Test from 'ava'
2+
import { RegroupDomainTldLevel } from '@builder/utils/regroup-domain-tldlevel.js'
3+
4+
Test('RegroupDomainTldLevel discard subdomain elements only if their parent domain exists', T => {
5+
const Origin = new Set(['duckduckgo.com', 'access.duckduckgo.com', 'google.com', 'www.google.com'])
6+
const Expected = new Set([new Set(['duckduckgo.com']), new Set(['google.com'])])
7+
const Actual = RegroupDomainTldLevel(Origin)
8+
return T.deepEqual(Actual, Expected)
9+
})
10+
11+
Test('RegroupDomainTldLevel keep subdomain elements if their parent domain does not exist', T => {
12+
const Origin = new Set(['access.duckduckgo.com', 'token.duckduckgo.com', 'www.google.com', 'accounts.google.com'])
13+
const Expected = new Set([new Set(['access.duckduckgo.com', 'token.duckduckgo.com']), new Set(['www.google.com', 'accounts.google.com'])])
14+
const Actual = RegroupDomainTldLevel(Origin)
15+
return T.deepEqual(Actual, Expected)
16+
})
17+
18+
Test('RegroupDomainTldLevel throw error if multiple domains with the same TLD level exist', T => {
19+
const Origin = new Set(['duckduckgo.com', 'duckduckgo.co.kr', 'duckduckgo.co.jp'])
20+
const ErrorInstance = T.throws(() => RegroupDomainTldLevel(Origin))
21+
const Message = 'RegroupDomainTldLevel: Found multiple domains with the same TLD level. Use DiscardResolvedDupWildcard func first before using RegroupDomainTldLevel.'
22+
return T.is(ErrorInstance?.message, Message)
23+
})

0 commit comments

Comments
 (0)