Skip to content

Commit 3c0ffe0

Browse files
committed
fix(safari): round-2 review — WAL replay, stable ordering, error context
- Drop immutable=1 on temp-copy SQLite opens in readLocalStorageFile / countLocalStorageFile. Session.Acquire copies the -wal / -shm sidecars, so mode=ro alone lets SQLite replay WAL on the ephemeral copy and surface entries Safari committed to WAL but hasn't checkpointed yet. Live-file reads in profiles.go keep immutable=1 as before. - Order ItemTable query by (key, rowid) for deterministic exports across runs and SQLite versions. - Wrap os.ReadFile / os.ReadDir errors with the offending path so multi-origin debug logs stay scannable. - RFC-011 §7 rewritten to explain the live-vs-temp split. - New regression test asserts ORDER BY surfaces rows in key order. Addresses round-2 Copilot review on #582.
1 parent 79629ae commit 3c0ffe0

3 files changed

Lines changed: 33 additions & 7 deletions

File tree

browser/safari/extract_storage.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,11 @@ func countLocalStorage(root string) (int, error) {
105105
}
106106

107107
func countLocalStorageFile(path string) (int, error) {
108-
dsn := "file:" + path + "?mode=ro&immutable=1"
108+
// mode=ro (no immutable) so SQLite replays the copied -wal sidecar — this surfaces entries
109+
// Safari has committed to WAL but not yet checkpointed to the main DB. Writes SQLite might
110+
// make to the temp-copy's -shm during replay are harmless; the Session cleanup removes
111+
// everything. Live-file reads (profiles.go) still use immutable=1 to stay off the real WAL.
112+
dsn := "file:" + path + "?mode=ro"
109113
db, err := sql.Open("sqlite", dsn)
110114
if err != nil {
111115
return 0, fmt.Errorf("open %s: %w", path, err)
@@ -127,7 +131,7 @@ func countLocalStorageFile(path string) (int, error) {
127131
func findOriginDataDirs(root string) ([]string, error) {
128132
topEntries, err := os.ReadDir(root)
129133
if err != nil {
130-
return nil, fmt.Errorf("read origins root: %w", err)
134+
return nil, fmt.Errorf("read origins root %s: %w", root, err)
131135
}
132136
var out []string
133137
for _, top := range topEntries {
@@ -172,7 +176,7 @@ type originEndpoint struct {
172176
func readOriginFile(path string) (string, error) {
173177
data, err := os.ReadFile(path)
174178
if err != nil {
175-
return "", err
179+
return "", fmt.Errorf("read origin file %s: %w", path, err)
176180
}
177181
top, pos, terr := readOriginBlock(data, 0)
178182
if terr != nil {
@@ -269,8 +273,10 @@ type localStorageItem struct {
269273
}
270274

271275
func readLocalStorageFile(path string) ([]localStorageItem, error) {
272-
// Read-only + immutable so we don't disturb a live WAL (same pattern as profiles.go).
273-
dsn := "file:" + path + "?mode=ro&immutable=1"
276+
// mode=ro (no immutable) — see countLocalStorageFile for the WAL-replay rationale; the same
277+
// live-vs-temp split applies here. ORDER BY key, rowid makes exports byte-for-byte stable
278+
// across runs and SQLite versions.
279+
dsn := "file:" + path + "?mode=ro"
274280
db, err := sql.Open("sqlite", dsn)
275281
if err != nil {
276282
return nil, fmt.Errorf("open %s: %w", path, err)
@@ -280,7 +286,7 @@ func readLocalStorageFile(path string) ([]localStorageItem, error) {
280286
return nil, fmt.Errorf("ping %s: %w", path, err)
281287
}
282288

283-
rows, err := db.Query(`SELECT key, value FROM ItemTable`)
289+
rows, err := db.Query(`SELECT key, value FROM ItemTable ORDER BY key, rowid`)
284290
if err != nil {
285291
return nil, fmt.Errorf("query ItemTable: %w", err)
286292
}

browser/safari/extract_storage_test.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,24 @@ func TestCountLocalStorageFile_SkipsNullKey(t *testing.T) {
303303
assert.Equal(t, 2, count, "NULL keys are excluded from count to match extract's skip rule")
304304
}
305305

306+
func TestReadLocalStorageFile_ReturnsRowsInKeyOrder(t *testing.T) {
307+
// Rows are inserted in reverse alphabetical order; ORDER BY key, rowid in the extractor
308+
// query must surface them ascending so exports are deterministic across runs.
309+
dbPath := filepath.Join(t.TempDir(), "ls.sqlite3")
310+
writeLocalStorageDB(t, dbPath, []testLocalStorageItem{
311+
{Key: "zebra", Value: "z"},
312+
{Key: "mango", Value: "m"},
313+
{Key: "apple", Value: "a"},
314+
}, false /*addNullKey*/)
315+
316+
items, err := readLocalStorageFile(dbPath)
317+
require.NoError(t, err)
318+
require.Len(t, items, 3)
319+
assert.Equal(t, "apple", items[0].key)
320+
assert.Equal(t, "mango", items[1].key)
321+
assert.Equal(t, "zebra", items[2].key)
322+
}
323+
306324
func TestCountLocalStorageFile_MissingTable(t *testing.T) {
307325
// Real Safari has origin dirs with LocalStorage/localstorage.sqlite3 but no ItemTable yet
308326
// (seen during live verification). countLocalStorageFile must surface the error so the

rfcs/011-safari-data-storage.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,9 @@ The only encrypted category is passwords. Because they are not stored in Safari'
239239

240240
- **macOS-only**. There is no Safari on Windows or Linux.
241241
- **Full Disk Access (TCC)** is required to read the sandboxed container. Without it, cookies / history / downloads / localStorage reads fail silently with permission errors at stat or open time. Legacy paths under `~/Library/Safari/` sometimes remain readable without FDA, but are mostly empty on modern systems.
242-
- **Live-file safety**: `SafariTabs.db`, `History.db`, and `localstorage.sqlite3` can be written to by a running Safari instance. All live SQL reads use `?mode=ro&immutable=1`, which disables WAL replay and locking — the extractor sees a consistent snapshot of the main DB as of read time. Uncommitted WAL content is intentionally not replayed to avoid race-induced corruption.
242+
- **Live-file safety** follows a live-vs-temp split:
243+
- **Live reads** (`SafariTabs.db` during profile discovery in `profiles.go`) use `?mode=ro&immutable=1`, which disables WAL replay and locking so the extractor cannot disturb a running Safari — it sees a consistent snapshot of the main DB as of read time, at the cost of missing any pending WAL content.
244+
- **Temp-copy reads** (`History.db`, `localstorage.sqlite3`, etc. via `filemanager.Session.Acquire`) use `?mode=ro` only. `Session.Acquire` copies the `-wal` / `-shm` sidecars alongside the main DB, so SQLite can replay uncommitted transactions on the copy — surfacing entries Safari has written to WAL but not yet checkpointed. Any `-shm` writes SQLite performs during replay land on the ephemeral copy and are deleted with the session.
243245
- **Multi-profile availability**: requires Safari 17 (macOS 14 Sonoma) or newer. Older Safari versions have only the default profile; discovery degrades cleanly via the ReadDir fallback described in §2.1.
244246
- **File acquisition**: all per-profile files are copied into a `filemanager.Session` temp directory before extraction, except the discovery-time `SafariTabs.db` read which opens the live file directly. See [RFC-008](008-file-acquisition-and-platform-quirks.md) for the general pattern.
245247

0 commit comments

Comments
 (0)