Skip to content

Commit 1561898

Browse files
committed
feat(safari): localstorage extraction
Extracts Safari 17+ localStorage from WebKit's nested layout — WebsiteDataStore/<uuid>/Origins/<top-hash>/<frame-hash>/LocalStorage/ localstorage.sqlite3 for named profiles, WebsiteData/Default for the default profile. Parses the binary SecurityOrigin serialization (length-prefixed scheme+host plus 0x00 default-port or 0x01 <uint16_le> explicit-port section) and decodes UTF-16 LE ItemTable value BLOBs, capping oversized values at 2048 bytes to match the Chromium extractor. Reports the frame origin URL so partitioned third-party storage is attributed to the iframe origin JavaScript actually sees. Closes the remaining LocalStorage checkbox in #565.
1 parent d75738b commit 1561898

6 files changed

Lines changed: 719 additions & 11 deletions

File tree

browser/safari/extract_storage.go

Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
package safari
2+
3+
import (
4+
"database/sql"
5+
"encoding/binary"
6+
"fmt"
7+
"os"
8+
"path/filepath"
9+
"unicode/utf16"
10+
11+
_ "modernc.org/sqlite"
12+
13+
"github.com/moond4rk/hackbrowserdata/log"
14+
"github.com/moond4rk/hackbrowserdata/types"
15+
)
16+
17+
// Modern WebKit (Safari 17+) stores localStorage under a nested, partitioned layout rooted at
18+
// either WebsiteDataStore/<uuid>/Origins (per named profile) or WebsiteData/Default
19+
// (the pre-profile default store). Within that root:
20+
//
21+
// <root>/<top-frame-hash>/<frame-hash>/origin — binary; encodes top+frame origins
22+
// <root>/<top-frame-hash>/<frame-hash>/LocalStorage/localstorage.sqlite3
23+
//
24+
// top-hash == frame-hash ⇒ first-party; they differ for third-party partitioned storage.
25+
// We report the frame origin because that's what window.localStorage exposes to JS.
26+
// ItemTable: (key TEXT UNIQUE ON CONFLICT REPLACE, value BLOB NOT NULL ON CONFLICT FAIL);
27+
// value BLOBs are UTF-16 LE strings.
28+
//
29+
// The flat "LocalStorage/<scheme>_<host>_<port>.localstorage" directory that older builds used
30+
// is empty on current Safari and is no longer a supported source.
31+
32+
const (
33+
webkitOriginFile = "origin"
34+
webkitLocalStorageSubdir = "LocalStorage"
35+
webkitLocalStorageDB = "localstorage.sqlite3"
36+
webkitOriginSaltName = "salt" // HMAC salt sibling of the <hash> dirs; not a data dir
37+
38+
maxLocalStorageValueLength = 2048
39+
)
40+
41+
// origin file encoding-byte constants (WebCore SecurityOrigin serialization).
42+
const (
43+
originEncASCII = 0x01 // Latin-1 / ASCII
44+
originEncUTF16 = 0x00 // UTF-16 LE
45+
)
46+
47+
// Port marker values after the (scheme, host) pair in an origin block.
48+
// 0x00 → port is the scheme default (stored as 0).
49+
// 0x01 → next two bytes are a uint16_le port.
50+
const (
51+
originPortDefaultMarker = 0x00
52+
originPortExplicitFlag = 0x01
53+
)
54+
55+
func extractLocalStorage(root string) ([]types.StorageEntry, error) {
56+
dirs, err := findOriginDataDirs(root)
57+
if err != nil {
58+
return nil, err
59+
}
60+
61+
var entries []types.StorageEntry
62+
for _, od := range dirs {
63+
origin, err := readOriginFile(filepath.Join(od, webkitOriginFile))
64+
if err != nil {
65+
log.Debugf("safari localstorage: origin %s: %v", od, err)
66+
continue
67+
}
68+
dbPath := filepath.Join(od, webkitLocalStorageSubdir, webkitLocalStorageDB)
69+
items, err := readLocalStorageFile(dbPath)
70+
if err != nil {
71+
log.Debugf("safari localstorage: db %s: %v", dbPath, err)
72+
continue
73+
}
74+
for _, it := range items {
75+
entries = append(entries, types.StorageEntry{
76+
URL: origin,
77+
Key: it.key,
78+
Value: it.value,
79+
})
80+
}
81+
}
82+
return entries, nil
83+
}
84+
85+
func countLocalStorage(root string) (int, error) {
86+
entries, err := extractLocalStorage(root)
87+
if err != nil {
88+
return 0, err
89+
}
90+
return len(entries), nil
91+
}
92+
93+
// findOriginDataDirs returns <root>/<h1>/<h2>/ paths that contain both an "origin" file and
94+
// a "LocalStorage/localstorage.sqlite3" database. Non-directory entries, the "salt" sibling,
95+
// and partition dirs without localStorage data are silently skipped.
96+
func findOriginDataDirs(root string) ([]string, error) {
97+
topEntries, err := os.ReadDir(root)
98+
if err != nil {
99+
return nil, fmt.Errorf("read origins root: %w", err)
100+
}
101+
var out []string
102+
for _, top := range topEntries {
103+
if !top.IsDir() || top.Name() == webkitOriginSaltName {
104+
continue
105+
}
106+
topPath := filepath.Join(root, top.Name())
107+
frameEntries, err := os.ReadDir(topPath)
108+
if err != nil {
109+
continue
110+
}
111+
for _, frame := range frameEntries {
112+
if !frame.IsDir() {
113+
continue
114+
}
115+
framePath := filepath.Join(topPath, frame.Name())
116+
if _, err := os.Stat(filepath.Join(framePath, webkitOriginFile)); err != nil {
117+
continue
118+
}
119+
dbPath := filepath.Join(framePath, webkitLocalStorageSubdir, webkitLocalStorageDB)
120+
if _, err := os.Stat(dbPath); err != nil {
121+
continue
122+
}
123+
out = append(out, framePath)
124+
}
125+
}
126+
return out, nil
127+
}
128+
129+
// originEndpoint is one half of an origin file (top-frame or frame). Port 0 means the scheme
130+
// default (443 for https, 80 for http) and is omitted from the URL rendering.
131+
type originEndpoint struct {
132+
scheme string
133+
host string
134+
port uint16
135+
}
136+
137+
// readOriginFile parses WebKit's SecurityOrigin binary serialization and returns the frame
138+
// origin URL (scheme://host[:port]). The file holds two origin blocks back-to-back: top-frame
139+
// then frame. When the frame block is missing/unreadable we fall back to the top-frame so we
140+
// can still attribute the data to *something* meaningful.
141+
func readOriginFile(path string) (string, error) {
142+
data, err := os.ReadFile(path)
143+
if err != nil {
144+
return "", err
145+
}
146+
top, pos, terr := readOriginBlock(data, 0)
147+
if terr != nil {
148+
return "", fmt.Errorf("parse top-frame origin: %w", terr)
149+
}
150+
frame, _, ferr := readOriginBlock(data, pos)
151+
if ferr != nil {
152+
// Partitioned info unavailable — attribute to the top-frame origin.
153+
frame = top
154+
}
155+
if frame.scheme == "" || frame.host == "" {
156+
return "", fmt.Errorf("origin file missing scheme/host")
157+
}
158+
return formatOriginURL(frame), nil
159+
}
160+
161+
// readOriginBlock reads one origin block: scheme record, host record, port marker.
162+
// Returns the parsed endpoint and the byte offset immediately after the block.
163+
func readOriginBlock(data []byte, pos int) (originEndpoint, int, error) {
164+
var ep originEndpoint
165+
var err error
166+
ep.scheme, pos, err = readOriginString(data, pos)
167+
if err != nil {
168+
return ep, pos, err
169+
}
170+
ep.host, pos, err = readOriginString(data, pos)
171+
if err != nil {
172+
return ep, pos, err
173+
}
174+
if pos >= len(data) {
175+
return ep, pos, fmt.Errorf("unexpected EOF before port marker")
176+
}
177+
marker := data[pos]
178+
pos++
179+
switch marker {
180+
case originPortDefaultMarker:
181+
ep.port = 0
182+
case originPortExplicitFlag:
183+
if pos+2 > len(data) {
184+
return ep, pos, fmt.Errorf("truncated port value at offset %d", pos)
185+
}
186+
ep.port = binary.LittleEndian.Uint16(data[pos : pos+2])
187+
pos += 2
188+
default:
189+
return ep, pos, fmt.Errorf("unexpected port marker 0x%02x at offset %d", marker, pos-1)
190+
}
191+
return ep, pos, nil
192+
}
193+
194+
// readOriginString consumes one length-prefixed record (uint32_le length + encoding byte + data).
195+
func readOriginString(data []byte, pos int) (string, int, error) {
196+
if pos+5 > len(data) {
197+
return "", pos, fmt.Errorf("truncated string record at offset %d", pos)
198+
}
199+
length := int(binary.LittleEndian.Uint32(data[pos : pos+4]))
200+
enc := data[pos+4]
201+
pos += 5
202+
if length < 0 || pos+length > len(data) {
203+
return "", pos, fmt.Errorf("string record overruns buffer: length %d at offset %d", length, pos-5)
204+
}
205+
chunk := data[pos : pos+length]
206+
pos += length
207+
switch enc {
208+
case originEncASCII:
209+
return string(chunk), pos, nil
210+
case originEncUTF16:
211+
return decodeUTF16LE(chunk), pos, nil
212+
default:
213+
return string(chunk), pos, nil
214+
}
215+
}
216+
217+
func formatOriginURL(ep originEndpoint) string {
218+
url := ep.scheme + "://" + ep.host
219+
if ep.port != 0 {
220+
url += fmt.Sprintf(":%d", ep.port)
221+
}
222+
return url
223+
}
224+
225+
type localStorageItem struct {
226+
key string
227+
value string
228+
}
229+
230+
func readLocalStorageFile(path string) ([]localStorageItem, error) {
231+
// Read-only + immutable so we don't disturb a live WAL (same pattern as profiles.go).
232+
dsn := "file:" + path + "?mode=ro&immutable=1"
233+
db, err := sql.Open("sqlite", dsn)
234+
if err != nil {
235+
return nil, fmt.Errorf("open %s: %w", path, err)
236+
}
237+
defer db.Close()
238+
if err := db.Ping(); err != nil {
239+
return nil, fmt.Errorf("ping %s: %w", path, err)
240+
}
241+
242+
rows, err := db.Query(`SELECT key, value FROM ItemTable`)
243+
if err != nil {
244+
return nil, fmt.Errorf("query ItemTable: %w", err)
245+
}
246+
defer rows.Close()
247+
248+
var items []localStorageItem
249+
for rows.Next() {
250+
var key sql.NullString
251+
var value []byte
252+
if err := rows.Scan(&key, &value); err != nil {
253+
log.Debugf("safari localstorage: scan row in %s: %v", path, err)
254+
continue
255+
}
256+
items = append(items, localStorageItem{
257+
key: key.String,
258+
value: decodeLocalStorageValue(value),
259+
})
260+
}
261+
return items, rows.Err()
262+
}
263+
264+
// decodeLocalStorageValue treats the BLOB as UTF-16 LE. Values at or above the cap are replaced
265+
// with a size marker to keep JSON/CSV output bounded, matching chromium/extract_storage.go.
266+
func decodeLocalStorageValue(b []byte) string {
267+
if len(b) >= maxLocalStorageValueLength {
268+
return fmt.Sprintf(
269+
"value is too long, length is %d, supported max length is %d",
270+
len(b), maxLocalStorageValueLength,
271+
)
272+
}
273+
return decodeUTF16LE(b)
274+
}
275+
276+
// decodeUTF16LE returns the input as a Go string on odd-length (malformed) inputs; WebKit values
277+
// are always even-length in practice but we don't want a stray byte to drop a whole row.
278+
func decodeUTF16LE(b []byte) string {
279+
if len(b) == 0 {
280+
return ""
281+
}
282+
if len(b)%2 != 0 {
283+
return string(b)
284+
}
285+
u16 := make([]uint16, len(b)/2)
286+
for i := range u16 {
287+
u16[i] = binary.LittleEndian.Uint16(b[i*2:])
288+
}
289+
return string(utf16.Decode(u16))
290+
}

0 commit comments

Comments
 (0)