Skip to content

Commit 79629ae

Browse files
committed
fix(safari): latin-1 origin decoding, NULL key skip, count fast-path
- Decode originEncASCII via decodeLatin1 so high-byte records preserve their ISO-8859-1 meaning instead of being interpreted as UTF-8. Matches the pattern in chromium/extract_storage.go. - Skip ItemTable rows where key is NULL — SQLite's UNIQUE constraint permits multiple NULLs, and silently lowering them to empty strings would collide with legitimate empty-string keys. - countLocalStorage now walks origin dirs and runs SELECT COUNT(key) per localstorage.sqlite3 instead of fully decoding every value. COUNT(key) naturally excludes NULLs, keeping count and extract symmetric. Addresses Copilot review feedback on #582.
1 parent 594a8ea commit 79629ae

3 files changed

Lines changed: 145 additions & 4 deletions

File tree

browser/safari/extract_storage.go

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,43 @@ func extractLocalStorage(root string) ([]types.StorageEntry, error) {
8282
return entries, nil
8383
}
8484

85+
// countLocalStorage sums ItemTable row counts across every origin DB under root without
86+
// parsing origin files or decoding values — CountEntries callers only need the total, not the
87+
// URLs or plaintext. COUNT(key) naturally excludes NULL keys, matching the same skip rule
88+
// applied by readLocalStorageFile, so count and extract stay in sync.
8589
func countLocalStorage(root string) (int, error) {
86-
entries, err := extractLocalStorage(root)
90+
dirs, err := findOriginDataDirs(root)
8791
if err != nil {
8892
return 0, err
8993
}
90-
return len(entries), nil
94+
total := 0
95+
for _, od := range dirs {
96+
dbPath := filepath.Join(od, webkitLocalStorageSubdir, webkitLocalStorageDB)
97+
n, err := countLocalStorageFile(dbPath)
98+
if err != nil {
99+
log.Debugf("safari localstorage: count %s: %v", dbPath, err)
100+
continue
101+
}
102+
total += n
103+
}
104+
return total, nil
105+
}
106+
107+
func countLocalStorageFile(path string) (int, error) {
108+
dsn := "file:" + path + "?mode=ro&immutable=1"
109+
db, err := sql.Open("sqlite", dsn)
110+
if err != nil {
111+
return 0, fmt.Errorf("open %s: %w", path, err)
112+
}
113+
defer db.Close()
114+
if err := db.Ping(); err != nil {
115+
return 0, fmt.Errorf("ping %s: %w", path, err)
116+
}
117+
var count int
118+
if err := db.QueryRow(`SELECT COUNT(key) FROM ItemTable`).Scan(&count); err != nil {
119+
return 0, fmt.Errorf("count ItemTable: %w", err)
120+
}
121+
return count, nil
91122
}
92123

93124
// findOriginDataDirs returns <root>/<h1>/<h2>/ paths that contain both an "origin" file and
@@ -206,14 +237,24 @@ func readOriginString(data []byte, pos int) (string, int, error) {
206237
pos += length
207238
switch enc {
208239
case originEncASCII:
209-
return string(chunk), pos, nil
240+
return decodeLatin1(chunk), pos, nil
210241
case originEncUTF16:
211242
return decodeUTF16LE(chunk), pos, nil
212243
default:
213-
return string(chunk), pos, nil
244+
return decodeLatin1(chunk), pos, nil
214245
}
215246
}
216247

248+
// decodeLatin1 converts ISO-8859-1 bytes to a valid UTF-8 Go string. Latin-1 byte values map
249+
// 1:1 to Unicode code points U+0000–U+00FF. Mirrors the helper in chromium/extract_storage.go.
250+
func decodeLatin1(b []byte) string {
251+
runes := make([]rune, len(b))
252+
for i, c := range b {
253+
runes[i] = rune(c)
254+
}
255+
return string(runes)
256+
}
257+
217258
func formatOriginURL(ep originEndpoint) string {
218259
url := ep.scheme + "://" + ep.host
219260
if ep.port != 0 {
@@ -253,6 +294,13 @@ func readLocalStorageFile(path string) ([]localStorageItem, error) {
253294
log.Debugf("safari localstorage: scan row in %s: %v", path, err)
254295
continue
255296
}
297+
if !key.Valid {
298+
// NULL keys would collide with legitimate empty-string keys in the output and are
299+
// not meaningful localStorage entries. The UNIQUE constraint in ItemTable still
300+
// permits multiple NULL rows in SQLite, so we filter them here.
301+
log.Debugf("safari localstorage: skip row with NULL key in %s", path)
302+
continue
303+
}
256304
items = append(items, localStorageItem{
257305
key: key.String,
258306
value: decodeLocalStorageValue(value),

browser/safari/extract_storage_test.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
package safari
22

33
import (
4+
"database/sql"
45
"os"
56
"path/filepath"
67
"strings"
78
"testing"
89

910
"github.com/stretchr/testify/assert"
1011
"github.com/stretchr/testify/require"
12+
_ "modernc.org/sqlite"
1113
)
1214

1315
// ---------------------------------------------------------------------------
@@ -35,6 +37,26 @@ func TestReadOriginBlock_NonDefaultPort(t *testing.T) {
3537
assert.Equal(t, "https://example.com:8443", formatOriginURL(top))
3638
}
3739

40+
func TestReadOriginBlock_Latin1HighByte(t *testing.T) {
41+
// WebKit stores scheme/host records with encoding byte 0x01 = Latin-1. Verify high-byte
42+
// bytes decode as Latin-1 (é = 0xE9) rather than being passed through as invalid UTF-8.
43+
data := []byte{
44+
0x04, 0x00, 0x00, 0x00, 0x01, 'h', 't', 't', 'p', // scheme "http"
45+
0x04, 0x00, 0x00, 0x00, 0x01, 'c', 'a', 'f', 0xe9, // host "café" (Latin-1)
46+
0x00, // port default
47+
}
48+
ep, _, err := readOriginBlock(data, 0)
49+
require.NoError(t, err)
50+
assert.Equal(t, "http", ep.scheme)
51+
assert.Equal(t, "café", ep.host)
52+
}
53+
54+
func TestDecodeLatin1(t *testing.T) {
55+
assert.Equal(t, "café", decodeLatin1([]byte{'c', 'a', 'f', 0xe9}))
56+
assert.Equal(t, "hello", decodeLatin1([]byte("hello")))
57+
assert.Empty(t, decodeLatin1(nil))
58+
}
59+
3860
func TestReadOriginFile_FramePreferred(t *testing.T) {
3961
dir := t.TempDir()
4062
originPath := filepath.Join(dir, "origin")
@@ -250,3 +272,46 @@ func TestCountLocalStorage_DirMissing(t *testing.T) {
250272
require.Error(t, err)
251273
assert.Equal(t, 0, count)
252274
}
275+
276+
// ---------------------------------------------------------------------------
277+
// NULL-key handling — readLocalStorageFile / countLocalStorageFile both skip NULL keys,
278+
// keeping count and extract in sync.
279+
// ---------------------------------------------------------------------------
280+
281+
func TestReadLocalStorageFile_SkipsNullKey(t *testing.T) {
282+
dbPath := filepath.Join(t.TempDir(), "ls.sqlite3")
283+
writeLocalStorageDB(t, dbPath, []testLocalStorageItem{
284+
{Key: "real", Value: "keeper"},
285+
}, true /*addNullKey*/)
286+
287+
items, err := readLocalStorageFile(dbPath)
288+
require.NoError(t, err)
289+
require.Len(t, items, 1)
290+
assert.Equal(t, "real", items[0].key)
291+
assert.Equal(t, "keeper", items[0].value)
292+
}
293+
294+
func TestCountLocalStorageFile_SkipsNullKey(t *testing.T) {
295+
dbPath := filepath.Join(t.TempDir(), "ls.sqlite3")
296+
writeLocalStorageDB(t, dbPath, []testLocalStorageItem{
297+
{Key: "k1", Value: "v1"},
298+
{Key: "k2", Value: "v2"},
299+
}, true /*addNullKey*/)
300+
301+
count, err := countLocalStorageFile(dbPath)
302+
require.NoError(t, err)
303+
assert.Equal(t, 2, count, "NULL keys are excluded from count to match extract's skip rule")
304+
}
305+
306+
func TestCountLocalStorageFile_MissingTable(t *testing.T) {
307+
// Real Safari has origin dirs with LocalStorage/localstorage.sqlite3 but no ItemTable yet
308+
// (seen during live verification). countLocalStorageFile must surface the error so the
309+
// caller can log-and-skip rather than counting 0 silently.
310+
dbPath := filepath.Join(t.TempDir(), "empty.sqlite3")
311+
db, err := sql.Open("sqlite", dbPath)
312+
require.NoError(t, err)
313+
require.NoError(t, db.Close())
314+
315+
_, err = countLocalStorageFile(dbPath)
316+
require.Error(t, err)
317+
}

browser/safari/testutil_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,34 @@ func splitTestOriginURL(u string) (scheme, host string, port uint16) {
230230
return scheme, rest, 0
231231
}
232232

233+
// writeLocalStorageDB creates a minimal localstorage.sqlite3 at path with ItemTable populated
234+
// from items. When addNullKey is true, a NULL-key row is inserted first to exercise the
235+
// skip-NULL-key logic in readLocalStorageFile / countLocalStorageFile. This is a direct-DB
236+
// variant of buildTestLocalStorageDir — use it when the test targets one DB, not the full
237+
// Origins nesting.
238+
func writeLocalStorageDB(t *testing.T, path string, items []testLocalStorageItem, addNullKey bool) {
239+
t.Helper()
240+
db, err := sql.Open("sqlite", path)
241+
require.NoError(t, err)
242+
_, err = db.Exec(`CREATE TABLE ItemTable (key TEXT UNIQUE ON CONFLICT REPLACE, value BLOB NOT NULL ON CONFLICT FAIL)`)
243+
require.NoError(t, err)
244+
if addNullKey {
245+
_, err = db.Exec(
246+
`INSERT INTO ItemTable (key, value) VALUES (NULL, ?)`,
247+
encodeUTF16LE("null-key-sentinel"),
248+
)
249+
require.NoError(t, err)
250+
}
251+
for _, item := range items {
252+
_, err = db.Exec(
253+
`INSERT INTO ItemTable (key, value) VALUES (?, ?)`,
254+
item.Key, encodeUTF16LE(item.Value),
255+
)
256+
require.NoError(t, err)
257+
}
258+
require.NoError(t, db.Close())
259+
}
260+
233261
// encodeUTF16LE is the inverse of extract_storage.go's decodeUTF16LE — it mirrors
234262
// the WebKit encoding so test fixtures round-trip through the extractor.
235263
func encodeUTF16LE(s string) []byte {

0 commit comments

Comments
 (0)