-
Notifications
You must be signed in to change notification settings - Fork 1.8k
feat(safari): localstorage extraction #582
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
1561898
594a8ea
79629ae
3c0ffe0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,290 @@ | ||||||||||||||
| package safari | ||||||||||||||
|
|
||||||||||||||
| import ( | ||||||||||||||
| "database/sql" | ||||||||||||||
| "encoding/binary" | ||||||||||||||
| "fmt" | ||||||||||||||
| "os" | ||||||||||||||
| "path/filepath" | ||||||||||||||
| "unicode/utf16" | ||||||||||||||
|
|
||||||||||||||
| _ "modernc.org/sqlite" | ||||||||||||||
|
|
||||||||||||||
| "github.com/moond4rk/hackbrowserdata/log" | ||||||||||||||
| "github.com/moond4rk/hackbrowserdata/types" | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
| // Modern WebKit (Safari 17+) stores localStorage under a nested, partitioned layout rooted at | ||||||||||||||
| // either WebsiteDataStore/<uuid>/Origins (per named profile) or WebsiteData/Default | ||||||||||||||
| // (the pre-profile default store). Within that root: | ||||||||||||||
| // | ||||||||||||||
| // <root>/<top-frame-hash>/<frame-hash>/origin — binary; encodes top+frame origins | ||||||||||||||
| // <root>/<top-frame-hash>/<frame-hash>/LocalStorage/localstorage.sqlite3 | ||||||||||||||
| // | ||||||||||||||
| // top-hash == frame-hash ⇒ first-party; they differ for third-party partitioned storage. | ||||||||||||||
| // We report the frame origin because that's what window.localStorage exposes to JS. | ||||||||||||||
| // ItemTable: (key TEXT UNIQUE ON CONFLICT REPLACE, value BLOB NOT NULL ON CONFLICT FAIL); | ||||||||||||||
| // value BLOBs are UTF-16 LE strings. | ||||||||||||||
| // | ||||||||||||||
| // The flat "LocalStorage/<scheme>_<host>_<port>.localstorage" directory that older builds used | ||||||||||||||
| // is empty on current Safari and is no longer a supported source. | ||||||||||||||
|
|
||||||||||||||
| const ( | ||||||||||||||
| webkitOriginFile = "origin" | ||||||||||||||
| webkitLocalStorageSubdir = "LocalStorage" | ||||||||||||||
| webkitLocalStorageDB = "localstorage.sqlite3" | ||||||||||||||
| webkitOriginSaltName = "salt" // HMAC salt sibling of the <hash> dirs; not a data dir | ||||||||||||||
|
|
||||||||||||||
| maxLocalStorageValueLength = 2048 | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
| // origin file encoding-byte constants (WebCore SecurityOrigin serialization). | ||||||||||||||
| const ( | ||||||||||||||
| originEncASCII = 0x01 // Latin-1 / ASCII | ||||||||||||||
| originEncUTF16 = 0x00 // UTF-16 LE | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
| // Port marker values after the (scheme, host) pair in an origin block. | ||||||||||||||
| // 0x00 → port is the scheme default (stored as 0). | ||||||||||||||
| // 0x01 → next two bytes are a uint16_le port. | ||||||||||||||
| const ( | ||||||||||||||
| originPortDefaultMarker = 0x00 | ||||||||||||||
| originPortExplicitFlag = 0x01 | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
| func extractLocalStorage(root string) ([]types.StorageEntry, error) { | ||||||||||||||
| dirs, err := findOriginDataDirs(root) | ||||||||||||||
| if err != nil { | ||||||||||||||
| return nil, err | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| var entries []types.StorageEntry | ||||||||||||||
| for _, od := range dirs { | ||||||||||||||
| origin, err := readOriginFile(filepath.Join(od, webkitOriginFile)) | ||||||||||||||
| if err != nil { | ||||||||||||||
| log.Debugf("safari localstorage: origin %s: %v", od, err) | ||||||||||||||
| continue | ||||||||||||||
| } | ||||||||||||||
| dbPath := filepath.Join(od, webkitLocalStorageSubdir, webkitLocalStorageDB) | ||||||||||||||
| items, err := readLocalStorageFile(dbPath) | ||||||||||||||
| if err != nil { | ||||||||||||||
| log.Debugf("safari localstorage: db %s: %v", dbPath, err) | ||||||||||||||
| continue | ||||||||||||||
| } | ||||||||||||||
| for _, it := range items { | ||||||||||||||
| entries = append(entries, types.StorageEntry{ | ||||||||||||||
| URL: origin, | ||||||||||||||
| Key: it.key, | ||||||||||||||
| Value: it.value, | ||||||||||||||
| }) | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| return entries, nil | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| func countLocalStorage(root string) (int, error) { | ||||||||||||||
| entries, err := extractLocalStorage(root) | ||||||||||||||
| if err != nil { | ||||||||||||||
| return 0, err | ||||||||||||||
| } | ||||||||||||||
| return len(entries), nil | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // findOriginDataDirs returns <root>/<h1>/<h2>/ paths that contain both an "origin" file and | ||||||||||||||
| // a "LocalStorage/localstorage.sqlite3" database. Non-directory entries, the "salt" sibling, | ||||||||||||||
| // and partition dirs without localStorage data are silently skipped. | ||||||||||||||
| func findOriginDataDirs(root string) ([]string, error) { | ||||||||||||||
| topEntries, err := os.ReadDir(root) | ||||||||||||||
| if err != nil { | ||||||||||||||
| return nil, fmt.Errorf("read origins root: %w", err) | ||||||||||||||
|
||||||||||||||
| return nil, fmt.Errorf("read origins root: %w", err) | |
| return nil, fmt.Errorf("read origins root %s: %w", root, err) |
Copilot
AI
Apr 21, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
readOriginFile returns the raw os.ReadFile error without path context, which makes debug logs harder to interpret. Wrap the error with the filename (e.g. fmt.Errorf("read origin file %s: %w", path, err)) for consistent diagnostics with the rest of this package.
| return "", err | |
| return "", fmt.Errorf("read origin file %s: %w", path, err) |
Copilot
AI
Apr 21, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
originEncASCII is documented as Latin-1/ASCII, but this branch converts bytes with string(chunk) (UTF-8). If WebKit stores Latin-1 here, non-ASCII bytes will be mis-decoded. Consider decoding ISO-8859-1 (similar to browser/chromium/extract_storage.go’s decodeLatin1) to match the documented encoding.
Copilot
AI
Apr 21, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The default branch silently accepts unknown origin string encodings by returning string(chunk). This can propagate corrupted scheme/host into output URLs. Consider returning an error on unknown encodings (similar to Chromium’s decodeChromiumString behavior) so the caller can skip the partition cleanly.
Copilot
AI
Apr 21, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
immutable=1 disables WAL replay, but the extractor is reading from the temp copy of the Origins directory (and the repo’s filemanager.Session copies WAL/SHM). Dropping immutable=1 and keeping read-only access will make localStorage extraction include committed data that hasn’t been checkpointed yet.
Copilot
AI
Apr 21, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Querying ItemTable without an ORDER BY can yield nondeterministic output ordering across runs/SQLite versions. Consider ordering by key (and/or rowid) to keep exports stable and tests/consumers deterministic.
| rows, err := db.Query(`SELECT key, value FROM ItemTable`) | |
| rows, err := db.Query(`SELECT key, value FROM ItemTable ORDER BY key, rowid`) |
Copilot
AI
Apr 21, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ItemTable key is scanned into sql.NullString but key.Valid is not checked. If a row has a NULL key, it will be emitted as an empty string, potentially causing collisions and misleading output. Consider skipping rows where key.Valid is false (or logging and continuing).
| } | |
| } | |
| if !key.Valid { | |
| log.Debugf("safari localstorage: skip row with NULL key in %s", path) | |
| continue | |
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
countLocalStorage currently calls extractLocalStorage and decodes all values just to compute a count. For large Origins trees this can be significantly slower than necessary during CountEntries. Consider counting without value decoding (e.g., traverse origin dirs and run SELECT COUNT(*) per localstorage.sqlite3, summing results).