Skip to content

Commit 4cd9943

Browse files
IhateTrainscode-factorCopilot
authored
Optimize the calculation of CK3 localization key hashes (#2935) #patch
- faster - reduced memory usage --------- Co-authored-by: codefactor-io <[email protected]> Co-authored-by: Copilot <[email protected]>
1 parent d83e8cc commit 4cd9943

3 files changed

Lines changed: 220 additions & 177 deletions

File tree

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
using ImperatorToCK3.UnitTests.TestHelpers;
2+
using Xunit;
3+
4+
namespace ImperatorToCK3.UnitTests.CK3;
5+
6+
public class CK3LocDBTests {
7+
[Theory]
8+
[InlineData("Mallobald", "laamp_base_contract_schemes.2541.e.tt.employer_has_trait.paranoid")]
9+
[InlineData("dynn_Hkeng", "debug_min_popular_opinion_modifier")]
10+
[InlineData("b_hinggan_adj", "grand_wedding_completed_guest")]
11+
[InlineData("c_biak_adj", "b_celtzene")]
12+
[InlineData("b_molungr_adj", "c_somkhiti")]
13+
[InlineData("BrewPositiveAdjectiveSpectacular", "duchy_theo_cath_andalusian")]
14+
[InlineData("childhood.2200.desc", "b_dezful_adj")]
15+
[InlineData("khabzism_devoteeplural", "caballero_flavor")]
16+
[InlineData("building_nishapur_mines_02", "k_IRTOCK3_ATV_adj")]
17+
public void HashCollisionsAreDetected(string key1, string key2) {
18+
var locDB = new TestCK3LocDB();
19+
locDB.AddLocForLanguage(key1, language: "english", string.Empty);
20+
Assert.True(locDB.KeyHasConflictingHash(key2));
21+
}
22+
23+
[Theory]
24+
[InlineData("a", "b")]
25+
[InlineData("key1", "key2")]
26+
[InlineData("Mallobald", "laamp_base_contract_schemes.2541")]
27+
[InlineData("dynn_Hkeng", "dynn_Heng")]
28+
[InlineData("b_hinggan_adj", "b_hinggan_adj2")]
29+
[InlineData("c_biak_adj", "c_biak_adj2")]
30+
[InlineData("b_molungr_adj", "b_molungr_adj2")]
31+
[InlineData("BrewPositiveAdjectiveSpectacular", "BrewPositiveAdjectiveSpectacular2")]
32+
[InlineData("childhood.2200.desc", "childhood.2200.desc2")]
33+
[InlineData("khabzism_devoteeplural", "khabzism_devoteeplural2")]
34+
public void FalseHashCollisionsAreNotDetected(string key1, string key2) {
35+
var locDB = new TestCK3LocDB();
36+
locDB.AddLocForLanguage(key1, language: "english", string.Empty);
37+
Assert.False(locDB.KeyHasConflictingHash(key2));
38+
}
39+
}

ImperatorToCK3/CK3/CK3LocDB.cs

Lines changed: 180 additions & 176 deletions
Original file line numberDiff line numberDiff line change
@@ -1,177 +1,181 @@
1-
using commonItems;
2-
using commonItems.Collections;
3-
using commonItems.Localization;
4-
using commonItems.Mods;
5-
using ImperatorToCK3.CK3.Localization;
6-
using Murmur;
7-
using System.Collections.Generic;
8-
using System.IO;
9-
using ZLinq;
10-
11-
namespace ImperatorToCK3.CK3;
12-
13-
internal class CK3LocDB : ConcurrentIdObjectCollection<string, CK3LocBlock> {
14-
public CK3LocDB() { }
15-
16-
public CK3LocDB(ModFilesystem ck3ModFS, IEnumerable<string> activeModFlags) {
17-
LoadLocFromModFS(ck3ModFS, activeModFlags);
18-
}
19-
20-
public void LoadLocFromModFS(ModFilesystem ck3ModFS, IEnumerable<string> activeModFlags) {
21-
// Read loc from CK3 and selected CK3 mods.
22-
var modFSLocDB = new LocDB(ConverterGlobals.PrimaryLanguage, ConverterGlobals.SecondaryLanguages);
23-
modFSLocDB.ScrapeLocalizations(ck3ModFS);
24-
ImportLocFromLocDB(modFSLocDB);
25-
26-
// Read loc from ImperatorToCK3 configurables.
27-
// It will only be outputted for keys localized in neither ModFSLocDB nor ConverterGeneratedLocDB.
28-
LoadOptionalLoc(activeModFlags);
29-
}
30-
31-
private void ImportLocFromLocDB(LocDB locDB) {
32-
foreach (var locBlock in locDB) {
33-
var ck3LocBlock = GetOrCreateLocBlock(locBlock.Id);
34-
foreach (var (language, loc) in locBlock) {
35-
if (loc is null) {
36-
continue;
37-
}
38-
ck3LocBlock.AddModFSLoc(language, loc);
39-
}
40-
}
41-
}
42-
43-
private void LoadOptionalLoc(IEnumerable<string> activeModFlags) {
44-
const string optionalLocDir = "configurables/localization";
45-
if (!Directory.Exists(optionalLocDir)) {
46-
Logger.Warn("Optional loc directory not found, skipping optional loc loading.");
47-
return;
48-
}
49-
50-
string baseLocDir = Path.Combine(optionalLocDir, "base");
51-
var optionalLocFilePaths = Directory.GetFiles(baseLocDir, "*.yml", SearchOption.AllDirectories);
52-
foreach (var modFlag in activeModFlags) {
53-
string modLocDir = Path.Combine(optionalLocDir, modFlag);
54-
if (!Directory.Exists(modLocDir)) {
55-
continue;
56-
}
57-
optionalLocFilePaths = optionalLocFilePaths.AsValueEnumerable()
58-
.Concat(Directory.GetFiles(modLocDir, "*.yml", SearchOption.AllDirectories)).ToArray();
59-
}
60-
61-
var optionalConverterLocDB = new LocDB(ConverterGlobals.PrimaryLanguage, ConverterGlobals.SecondaryLanguages);
62-
foreach (var outputtedLocFilePath in optionalLocFilePaths) {
63-
optionalConverterLocDB.ScrapeFile(outputtedLocFilePath);
64-
}
65-
66-
foreach (var locBlock in optionalConverterLocDB) {
67-
// Only add loc for the languages that are not already in the CK3LocDB.
68-
var ck3LocBlock = GetOrCreateLocBlock(locBlock.Id);
69-
foreach (var (language, loc) in locBlock) {
70-
if (loc is null) {
71-
continue;
72-
}
73-
if (!ck3LocBlock.HasLocForLanguage(language)) {
74-
ck3LocBlock.AddOptionalLoc(language, loc);
75-
}
76-
}
77-
}
78-
}
79-
80-
private readonly System.Threading.Lock insertionLock = new();
81-
82-
public CK3LocBlock GetOrCreateLocBlock(string id) {
83-
lock (insertionLock) {
84-
if (TryGetValue(id, out var locBlock)) {
85-
return locBlock;
86-
}
87-
88-
// Check for hash collision.
89-
var hashStr = GetHashStrForKey(id);
90-
if (hashToKeyDict.TryGetValue(hashStr, out var existingKey)) {
91-
Logger.Warn($"Hash collision detected for loc key: {id}. Existing key: {existingKey}");
92-
} else {
93-
hashToKeyDict[hashStr] = id;
94-
}
95-
96-
// Create new loc block.
97-
locBlock = new CK3LocBlock(id, ConverterGlobals.PrimaryLanguage);
98-
Add(locBlock);
99-
return locBlock;
100-
}
101-
}
102-
103-
// TODO: add unit test for combining loc from all the sources into one locblock
104-
105-
106-
public CK3LocBlock? GetLocBlockForKey(string key) {
107-
if (TryGetValue(key, out var locBlock)) {
108-
return locBlock;
109-
}
110-
111-
return null;
112-
}
113-
114-
public bool HasKeyLocForLanguage(string key, string language) {
115-
if (TryGetValue(key, out var locBlock)) {
116-
return locBlock.HasLocForLanguage(language);
117-
}
118-
119-
return false;
120-
}
121-
122-
public void AddLocForLanguage(string key, string language, string loc) {
123-
lock (insertionLock) {
124-
var locBlock = GetOrCreateLocBlock(key);
125-
locBlock[language] = loc;
126-
}
127-
}
128-
129-
public string? GetYmlLocLineForLanguage(string key, string language) {
130-
if (TryGetValue(key, out var locBlock) && locBlock.HasLocForLanguage(language)) {
131-
return locBlock.GetYmlLocLineForLanguage(language);
132-
}
133-
134-
return null;
135-
}
136-
137-
public List<string> GetLocLinesToOutputForLanguage(string language) {
138-
var locLinesToOutput = new List<string>();
139-
140-
foreach (var locBlock in this) {
141-
if (locBlock.GetLocTypeForLanguage(language) is null or CK3LocType.CK3ModFS) {
142-
// If there's no loc for the language, the returned loc type is null.
143-
// CK3ModFS locs are already present in the CK3/mod/blankMod files, we don't need to output them.
144-
continue;
145-
}
146-
147-
var loc = locBlock[language];
148-
if (loc is null) {
149-
continue;
150-
}
151-
152-
locLinesToOutput.Add(locBlock.GetYmlLocLineForLanguage(language));
153-
}
154-
155-
return locLinesToOutput;
156-
}
157-
158-
public bool KeyHasConflictingHash(string key) {
159-
return hashToKeyDict.ContainsKey(GetHashStrForKey(key));
160-
}
161-
162-
private static string GetHashStrForKey(string key) {
163-
var keyBytes = System.Text.Encoding.UTF8.GetBytes(key);
164-
var hash = murmur3A.ComputeHash(keyBytes);
165-
166-
var sb = new System.Text.StringBuilder(hash.Length * 2);
167-
foreach (byte t in hash) {
168-
sb.Append(t.ToString("X2"));
169-
}
170-
171-
return sb.ToString();
172-
}
173-
174-
private readonly Dictionary<string, string> hashToKeyDict = []; // stores MurmurHash3A hash to key mapping
175-
176-
private static readonly Murmur32 murmur3A = MurmurHash.Create32();
1+
using commonItems;
2+
using commonItems.Collections;
3+
using commonItems.Localization;
4+
using commonItems.Mods;
5+
using ImperatorToCK3.CK3.Localization;
6+
using MurmurHash.Net;
7+
using System;
8+
using System.Buffers;
9+
using System.Collections.Generic;
10+
using System.IO;
11+
using ZLinq;
12+
13+
namespace ImperatorToCK3.CK3;
14+
15+
internal class CK3LocDB : ConcurrentIdObjectCollection<string, CK3LocBlock> {
16+
public CK3LocDB() { }
17+
18+
public CK3LocDB(ModFilesystem ck3ModFS, IEnumerable<string> activeModFlags) {
19+
LoadLocFromModFS(ck3ModFS, activeModFlags);
20+
}
21+
22+
public void LoadLocFromModFS(ModFilesystem ck3ModFS, IEnumerable<string> activeModFlags) {
23+
// Read loc from CK3 and selected CK3 mods.
24+
var modFSLocDB = new LocDB(ConverterGlobals.PrimaryLanguage, ConverterGlobals.SecondaryLanguages);
25+
modFSLocDB.ScrapeLocalizations(ck3ModFS);
26+
ImportLocFromLocDB(modFSLocDB);
27+
28+
// Read loc from ImperatorToCK3 configurables.
29+
// It will only be outputted for keys localized in neither ModFSLocDB nor ConverterGeneratedLocDB.
30+
LoadOptionalLoc(activeModFlags);
31+
}
32+
33+
private void ImportLocFromLocDB(LocDB locDB) {
34+
foreach (var locBlock in locDB) {
35+
var ck3LocBlock = GetOrCreateLocBlock(locBlock.Id);
36+
foreach (var (language, loc) in locBlock) {
37+
if (loc is null) {
38+
continue;
39+
}
40+
ck3LocBlock.AddModFSLoc(language, loc);
41+
}
42+
}
43+
}
44+
45+
private void LoadOptionalLoc(IEnumerable<string> activeModFlags) {
46+
const string optionalLocDir = "configurables/localization";
47+
if (!Directory.Exists(optionalLocDir)) {
48+
Logger.Warn("Optional loc directory not found, skipping optional loc loading.");
49+
return;
50+
}
51+
52+
string baseLocDir = Path.Combine(optionalLocDir, "base");
53+
var optionalLocFilePaths = Directory.GetFiles(baseLocDir, "*.yml", SearchOption.AllDirectories);
54+
foreach (var modFlag in activeModFlags) {
55+
string modLocDir = Path.Combine(optionalLocDir, modFlag);
56+
if (!Directory.Exists(modLocDir)) {
57+
continue;
58+
}
59+
optionalLocFilePaths = optionalLocFilePaths.AsValueEnumerable()
60+
.Concat(Directory.GetFiles(modLocDir, "*.yml", SearchOption.AllDirectories)).ToArray();
61+
}
62+
63+
var optionalConverterLocDB = new LocDB(ConverterGlobals.PrimaryLanguage, ConverterGlobals.SecondaryLanguages);
64+
foreach (var outputtedLocFilePath in optionalLocFilePaths) {
65+
optionalConverterLocDB.ScrapeFile(outputtedLocFilePath);
66+
}
67+
68+
foreach (var locBlock in optionalConverterLocDB) {
69+
// Only add loc for the languages that are not already in the CK3LocDB.
70+
var ck3LocBlock = GetOrCreateLocBlock(locBlock.Id);
71+
foreach (var (language, loc) in locBlock) {
72+
if (loc is null) {
73+
continue;
74+
}
75+
if (!ck3LocBlock.HasLocForLanguage(language)) {
76+
ck3LocBlock.AddOptionalLoc(language, loc);
77+
}
78+
}
79+
}
80+
}
81+
82+
private readonly System.Threading.Lock insertionLock = new();
83+
84+
public CK3LocBlock GetOrCreateLocBlock(string id) {
85+
lock (insertionLock) {
86+
if (TryGetValue(id, out var locBlock)) {
87+
return locBlock;
88+
}
89+
90+
// Check for hash collision.
91+
var hash = GetHashForKey(id);
92+
if (hashToKeyDict.TryGetValue(hash, out var existingKey)) {
93+
Logger.Warn($"Hash collision detected for loc key: {id}. Existing key: {existingKey}");
94+
} else {
95+
hashToKeyDict[hash] = id;
96+
}
97+
98+
// Create new loc block.
99+
locBlock = new CK3LocBlock(id, ConverterGlobals.PrimaryLanguage);
100+
Add(locBlock);
101+
return locBlock;
102+
}
103+
}
104+
105+
// TODO: add unit test for combining loc from all the sources into one locblock
106+
107+
public CK3LocBlock? GetLocBlockForKey(string key) {
108+
if (TryGetValue(key, out var locBlock)) {
109+
return locBlock;
110+
}
111+
112+
return null;
113+
}
114+
115+
public bool HasKeyLocForLanguage(string key, string language) {
116+
if (TryGetValue(key, out var locBlock)) {
117+
return locBlock.HasLocForLanguage(language);
118+
}
119+
120+
return false;
121+
}
122+
123+
public void AddLocForLanguage(string key, string language, string loc) {
124+
lock (insertionLock) {
125+
var locBlock = GetOrCreateLocBlock(key);
126+
locBlock[language] = loc;
127+
}
128+
}
129+
130+
public string? GetYmlLocLineForLanguage(string key, string language) {
131+
if (TryGetValue(key, out var locBlock) && locBlock.HasLocForLanguage(language)) {
132+
return locBlock.GetYmlLocLineForLanguage(language);
133+
}
134+
135+
return null;
136+
}
137+
138+
public List<string> GetLocLinesToOutputForLanguage(string language) {
139+
var locLinesToOutput = new List<string>();
140+
141+
foreach (var locBlock in this) {
142+
if (locBlock.GetLocTypeForLanguage(language) is null or CK3LocType.CK3ModFS) {
143+
// If there's no loc for the language, the returned loc type is null.
144+
// CK3ModFS locs are already present in the CK3/mod/blankMod files, we don't need to output them.
145+
continue;
146+
}
147+
148+
var loc = locBlock[language];
149+
if (loc is null) {
150+
continue;
151+
}
152+
153+
locLinesToOutput.Add(locBlock.GetYmlLocLineForLanguage(language));
154+
}
155+
156+
return locLinesToOutput;
157+
}
158+
159+
public bool KeyHasConflictingHash(string key) {
160+
lock (insertionLock) {
161+
return hashToKeyDict.ContainsKey(GetHashForKey(key));
162+
}
163+
}
164+
165+
private static uint GetHashForKey(string key) {
166+
// Encode key into rented buffer to avoid allocating a dedicated byte[] for every key.
167+
var enc = System.Text.Encoding.UTF8;
168+
var pool = ArrayPool<byte>.Shared;
169+
int maxBytes = enc.GetMaxByteCount(key.Length);
170+
byte[]? rented = pool.Rent(maxBytes);
171+
try {
172+
int bytesWritten = enc.GetBytes(key, 0, key.Length, rented, 0);
173+
ReadOnlySpan<byte> bytes = rented.AsSpan(0, bytesWritten);
174+
return MurmurHash3.Hash32(bytes, seed: 0);
175+
} finally {
176+
if (rented is not null) pool.Return(rented);
177+
}
178+
}
179+
180+
private readonly Dictionary<uint, string> hashToKeyDict = new(); // stores Murmur32 hash to key mapping
177181
}

0 commit comments

Comments
 (0)