Skip to content

Commit 068b821

Browse files
authored
fix: improve extract parsing with proper decoding and error handling (#543)
* fix: implement proper Chromium localStorage LevelDB parsing * feat: add IsMeta field to StorageEntry and keep META entries * fix: add error logging for decryption and missing data fields * fix: address PR review for localStorage parsing * fix: use naïve instead of café in Latin-1 test to avoid typos false positive * fix: extension enabled detection and sessionStorage decoding * fix: session storage origin resolution and extension enabled detection * fix: address PR review comments for storage parsing
1 parent a58d432 commit 068b821

10 files changed

Lines changed: 495 additions & 55 deletions

browser/chromium/extract_bookmark.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,12 @@ func extractBookmarks(path string) ([]types.BookmarkEntry, error) {
3131

3232
// walkBookmarks recursively traverses the bookmark tree, collecting URL entries.
3333
func walkBookmarks(node gjson.Result, folder string, out *[]types.BookmarkEntry) {
34-
if node.Get("type").String() == "url" {
34+
nodeType := node.Get("type").String()
35+
if nodeType == "url" {
3536
*out = append(*out, types.BookmarkEntry{
37+
ID: node.Get("id").Int(),
3638
Name: node.Get("name").String(),
39+
Type: nodeType,
3740
URL: node.Get("url").String(),
3841
Folder: folder,
3942
CreatedAt: typeutil.TimeEpoch(node.Get("date_added").Int()),

browser/chromium/extract_cookie.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"database/sql"
77
"sort"
88

9+
"github.com/moond4rk/hackbrowserdata/log"
910
"github.com/moond4rk/hackbrowserdata/types"
1011
"github.com/moond4rk/hackbrowserdata/utils/sqliteutil"
1112
"github.com/moond4rk/hackbrowserdata/utils/typeutil"
@@ -31,7 +32,10 @@ func extractCookies(masterKey []byte, path string) ([]types.CookieEntry, error)
3132
return types.CookieEntry{}, err
3233
}
3334

34-
value, _ := decryptValue(masterKey, encryptedValue)
35+
value, err := decryptValue(masterKey, encryptedValue)
36+
if err != nil {
37+
log.Debugf("decrypt cookie %s on %s: %v", name, host, err)
38+
}
3539
value = stripCookieHash(value, host)
3640
return types.CookieEntry{
3741
Name: name,

browser/chromium/extract_creditcard.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,28 @@ package chromium
33
import (
44
"database/sql"
55

6+
"github.com/moond4rk/hackbrowserdata/log"
67
"github.com/moond4rk/hackbrowserdata/types"
78
"github.com/moond4rk/hackbrowserdata/utils/sqliteutil"
89
)
910

10-
const defaultCreditCardQuery = `SELECT name_on_card, expiration_month, expiration_year,
11+
const defaultCreditCardQuery = `SELECT COALESCE(guid, ''), name_on_card, expiration_month, expiration_year,
1112
card_number_encrypted, COALESCE(nickname, ''), COALESCE(billing_address_id, '') FROM credit_cards`
1213

1314
func extractCreditCards(masterKey []byte, path string) ([]types.CreditCardEntry, error) {
1415
return sqliteutil.QueryRows(path, false, defaultCreditCardQuery,
1516
func(rows *sql.Rows) (types.CreditCardEntry, error) {
16-
var name, month, year, nickName, address string
17+
var guid, name, month, year, nickName, address string
1718
var encNumber []byte
18-
if err := rows.Scan(&name, &month, &year, &encNumber, &nickName, &address); err != nil {
19+
if err := rows.Scan(&guid, &name, &month, &year, &encNumber, &nickName, &address); err != nil {
1920
return types.CreditCardEntry{}, err
2021
}
21-
number, _ := decryptValue(masterKey, encNumber)
22+
number, err := decryptValue(masterKey, encNumber)
23+
if err != nil {
24+
log.Debugf("decrypt credit card for %s: %v", name, err)
25+
}
2226
return types.CreditCardEntry{
27+
GUID: guid,
2328
Name: name,
2429
Number: string(number),
2530
ExpMonth: month,

browser/chromium/extract_extension.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,14 +60,25 @@ func extractExtensionsWithKeys(path string, keys []string) ([]types.ExtensionEnt
6060
Description: manifest.Get("description").String(),
6161
Version: manifest.Get("version").String(),
6262
HomepageURL: manifest.Get("homepage_url").String(),
63-
Enabled: ext.Get("state").Int() == 1,
63+
Enabled: isExtensionEnabled(ext),
6464
})
6565
return true
6666
})
6767

6868
return extensions, nil
6969
}
7070

71+
// isExtensionEnabled checks whether an extension is enabled.
72+
// Modern Chrome uses disable_reasons (array): empty [] = enabled, non-empty [1] = disabled.
73+
// Older Chrome uses state (int): 1 = enabled.
74+
func isExtensionEnabled(ext gjson.Result) bool {
75+
reasons := ext.Get("disable_reasons")
76+
if reasons.Exists() {
77+
return reasons.IsArray() && len(reasons.Array()) == 0
78+
}
79+
return ext.Get("state").Int() == 1
80+
}
81+
7182
// extractOperaExtensions extracts extensions from Opera's Secure Preferences,
7283
// which stores extension data under "extensions.opsettings" instead of the
7384
// standard "extensions.settings".

browser/chromium/extract_password.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"database/sql"
55
"sort"
66

7+
"github.com/moond4rk/hackbrowserdata/log"
78
"github.com/moond4rk/hackbrowserdata/types"
89
"github.com/moond4rk/hackbrowserdata/utils/sqliteutil"
910
"github.com/moond4rk/hackbrowserdata/utils/typeutil"
@@ -24,7 +25,10 @@ func extractPasswordsWithQuery(masterKey []byte, path, query string) ([]types.Lo
2425
if err := rows.Scan(&url, &username, &pwd, &created); err != nil {
2526
return types.LoginEntry{}, err
2627
}
27-
password, _ := decryptValue(masterKey, pwd)
28+
password, err := decryptValue(masterKey, pwd)
29+
if err != nil {
30+
log.Debugf("decrypt password for %s: %v", url, err)
31+
}
2832
return types.LoginEntry{
2933
URL: url,
3034
Username: username,

browser/chromium/extract_storage.go

Lines changed: 219 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,33 @@ package chromium
22

33
import (
44
"bytes"
5+
"encoding/binary"
56
"fmt"
67
"os"
8+
"strings"
9+
"unicode/utf16"
710

811
"github.com/syndtr/goleveldb/leveldb"
912

1013
"github.com/moond4rk/hackbrowserdata/types"
1114
)
1215

13-
func extractLocalStorage(path string) ([]types.StorageEntry, error) {
14-
return extractLevelDB(path, []byte("\x00"))
15-
}
16+
// Chromium localStorage LevelDB key prefixes and string format bytes.
17+
// Reference: https://chromium.googlesource.com/chromium/src/+/main/components/services/storage/dom_storage/local_storage_impl.cc
18+
const (
19+
localStorageVersionKey = "VERSION"
20+
localStorageMetaPrefix = "META:"
21+
localStorageMetaAccessKey = "METAACCESS:"
22+
localStorageDataPrefix = '_'
23+
chromiumStringUTF16Format = 0
24+
chromiumStringLatin1Format = 1
25+
)
1626

17-
func extractSessionStorage(path string) ([]types.StorageEntry, error) {
18-
return extractLevelDB(path, []byte("-"))
19-
}
27+
const maxLocalStorageValueLength = 2048
2028

21-
// extractLevelDB iterates over all entries in a LevelDB directory,
22-
// splitting each key by the separator into (url, name).
23-
func extractLevelDB(path string, separator []byte) ([]types.StorageEntry, error) {
24-
if _, err := os.Stat(path); os.IsNotExist(err) {
25-
return nil, fmt.Errorf("leveldb path not found: %s", path)
29+
func extractLocalStorage(path string) ([]types.StorageEntry, error) {
30+
if _, err := os.Stat(path); err != nil {
31+
return nil, fmt.Errorf("leveldb path %q: %w", path, err)
2632
}
2733
db, err := leveldb.OpenFile(path, nil)
2834
if err != nil {
@@ -35,24 +41,214 @@ func extractLevelDB(path string, separator []byte) ([]types.StorageEntry, error)
3541
defer iter.Release()
3642

3743
for iter.Next() {
38-
url, name := parseStorageKey(iter.Key(), separator)
39-
if url == "" {
44+
entry, ok := parseLocalStorageEntry(iter.Key(), iter.Value())
45+
if !ok {
4046
continue
4147
}
48+
entries = append(entries, entry)
49+
}
50+
return entries, iter.Error()
51+
}
52+
53+
// parseLocalStorageEntry classifies a LevelDB key/value pair and decodes it.
54+
// Returns false for VERSION entries and any unrecognized keys. META entries are kept with IsMeta=true.
55+
func parseLocalStorageEntry(key, value []byte) (types.StorageEntry, bool) {
56+
switch {
57+
case bytes.Equal(key, []byte(localStorageVersionKey)):
58+
return types.StorageEntry{}, false
59+
case bytes.HasPrefix(key, []byte(localStorageMetaAccessKey)):
60+
return types.StorageEntry{
61+
IsMeta: true,
62+
URL: string(bytes.TrimPrefix(key, []byte(localStorageMetaAccessKey))),
63+
Value: fmt.Sprintf("meta data, value bytes is %v", value),
64+
}, true
65+
case bytes.HasPrefix(key, []byte(localStorageMetaPrefix)):
66+
return types.StorageEntry{
67+
IsMeta: true,
68+
URL: string(bytes.TrimPrefix(key, []byte(localStorageMetaPrefix))),
69+
Value: fmt.Sprintf("meta data, value bytes is %v", value),
70+
}, true
71+
case len(key) > 0 && key[0] == localStorageDataPrefix:
72+
return parseLocalStorageDataEntry(key[1:], value), true
73+
default:
74+
return types.StorageEntry{}, false
75+
}
76+
}
77+
78+
// parseLocalStorageDataEntry decodes a data entry with format: origin\x00<encoded-key>.
79+
func parseLocalStorageDataEntry(key, value []byte) types.StorageEntry {
80+
entry := types.StorageEntry{
81+
Value: decodeLocalStorageValue(value),
82+
}
83+
84+
separator := bytes.IndexByte(key, 0)
85+
if separator < 0 {
86+
return entry
87+
}
88+
89+
entry.URL = string(key[:separator])
90+
scriptKey, err := decodeChromiumString(key[separator+1:])
91+
if err != nil {
92+
return entry
93+
}
94+
entry.Key = scriptKey
95+
return entry
96+
}
97+
98+
// decodeChromiumString decodes a Chromium-encoded string.
99+
// Format byte 0x01 = Latin-1, 0x00 = UTF-16 LE.
100+
func decodeChromiumString(b []byte) (string, error) {
101+
if len(b) == 0 {
102+
return "", fmt.Errorf("empty chromium string")
103+
}
104+
switch b[0] {
105+
case chromiumStringLatin1Format:
106+
return decodeLatin1(b[1:]), nil
107+
case chromiumStringUTF16Format:
108+
return decodeUTF16LE(b[1:])
109+
default:
110+
return "", fmt.Errorf("unknown chromium string format 0x%02x", b[0])
111+
}
112+
}
113+
114+
// decodeLatin1 converts ISO-8859-1 bytes to a valid UTF-8 Go string.
115+
// Latin-1 byte values map 1:1 to Unicode code points U+0000–U+00FF.
116+
func decodeLatin1(b []byte) string {
117+
runes := make([]rune, len(b))
118+
for i, c := range b {
119+
runes[i] = rune(c)
120+
}
121+
return string(runes)
122+
}
123+
124+
// decodeUTF16LE decodes a UTF-16 Little-Endian byte slice to a Go string.
125+
func decodeUTF16LE(b []byte) (string, error) {
126+
if len(b) == 0 {
127+
return "", nil
128+
}
129+
if len(b)%2 != 0 {
130+
return "", fmt.Errorf("invalid UTF-16 byte length %d", len(b))
131+
}
132+
u16s := make([]uint16, len(b)/2)
133+
for i := range u16s {
134+
u16s[i] = binary.LittleEndian.Uint16(b[i*2:])
135+
}
136+
return string(utf16.Decode(u16s)), nil
137+
}
138+
139+
func decodeLocalStorageValue(value []byte) string {
140+
if len(value) >= maxLocalStorageValueLength {
141+
return fmt.Sprintf(
142+
"value is too long, length is %d, supported max length is %d",
143+
len(value), maxLocalStorageValueLength,
144+
)
145+
}
146+
decoded, err := decodeChromiumString(value)
147+
if err != nil {
148+
return fmt.Sprintf("unsupported value encoding: %v", err)
149+
}
150+
return decoded
151+
}
152+
153+
// extractSessionStorage reads Chromium session storage LevelDB.
154+
//
155+
// LevelDB key format:
156+
//
157+
// namespace-<guid>-<origin> → <map_id> (origin mapping)
158+
// map-<map_id>-<key_name> → <value> (actual data, UTF-16 LE)
159+
// next-map-id / version (metadata, skipped)
160+
func extractSessionStorage(path string) ([]types.StorageEntry, error) {
161+
if _, err := os.Stat(path); err != nil {
162+
return nil, fmt.Errorf("leveldb path %q: %w", path, err)
163+
}
164+
db, err := leveldb.OpenFile(path, nil)
165+
if err != nil {
166+
return nil, err
167+
}
168+
defer db.Close()
169+
170+
// Pass 1: build map_id → origin lookup from namespace entries.
171+
// Key: "namespace-<guid>-<origin>", Value: "<map_id>" (ASCII digits).
172+
originByMapID := make(map[string]string)
173+
iter := db.NewIterator(nil, nil)
174+
for iter.Next() {
175+
key := string(iter.Key())
176+
if !strings.HasPrefix(key, "namespace-") {
177+
continue
178+
}
179+
// Extract origin by finding "-https://", "-http://", or "-chrome://" in the key.
180+
// Namespace GUIDs use underscores (e.g., "03b2df3a_0d95_4d55_ae57_...") so
181+
// there is no ambiguity with the origin separator.
182+
origin := extractNamespaceOrigin(key)
183+
if origin == "" {
184+
continue
185+
}
186+
mapID := string(iter.Value())
187+
originByMapID[mapID] = origin
188+
}
189+
iter.Release()
190+
if err := iter.Error(); err != nil {
191+
return nil, fmt.Errorf("read namespace entries: %w", err)
192+
}
193+
194+
// Pass 2: read map entries and resolve origins.
195+
var entries []types.StorageEntry
196+
iter2 := db.NewIterator(nil, nil)
197+
defer iter2.Release()
198+
199+
mapPrefix := []byte("map-")
200+
for iter2.Next() {
201+
key := iter2.Key()
202+
if !bytes.HasPrefix(key, mapPrefix) {
203+
continue
204+
}
205+
rest := key[len(mapPrefix):] // "<map_id>-<key_name>"
206+
sep := bytes.IndexByte(rest, '-')
207+
if sep < 0 {
208+
continue
209+
}
210+
mapID := string(rest[:sep])
211+
keyName := string(rest[sep+1:])
212+
213+
origin := originByMapID[mapID]
214+
if origin == "" {
215+
origin = mapID // fallback to map_id if namespace not found
216+
}
217+
218+
value := decodeSessionStorageValue(iter2.Value())
42219
entries = append(entries, types.StorageEntry{
43-
URL: url,
44-
Key: name,
45-
Value: string(iter.Value()),
220+
URL: origin,
221+
Key: keyName,
222+
Value: value,
46223
})
47224
}
48-
return entries, iter.Error()
225+
return entries, iter2.Error()
226+
}
227+
228+
// extractNamespaceOrigin extracts the origin from a namespace key.
229+
// Key format: "namespace-<guid_with_underscores>-<origin>"
230+
// The GUID uses underscores, so we find the origin by looking for "-http" or "-chrome".
231+
func extractNamespaceOrigin(key string) string {
232+
for _, prefix := range []string{"-https://", "-http://", "-chrome://"} {
233+
idx := strings.Index(key, prefix)
234+
if idx >= 0 {
235+
return key[idx+1:]
236+
}
237+
}
238+
return ""
49239
}
50240

51-
// parseStorageKey splits a LevelDB key into (url, name) by the given separator.
52-
func parseStorageKey(key, separator []byte) (url, name string) {
53-
parts := bytes.SplitN(key, separator, 2)
54-
if len(parts) != 2 {
55-
return "", ""
241+
// decodeSessionStorageValue decodes a session storage value.
242+
// Values are raw UTF-16 LE (no format byte prefix, unlike localStorage).
243+
func decodeSessionStorageValue(value []byte) string {
244+
if len(value) == 0 {
245+
return ""
246+
}
247+
if len(value)%2 == 0 {
248+
decoded, err := decodeUTF16LE(value)
249+
if err == nil {
250+
return decoded
251+
}
56252
}
57-
return string(parts[0]), string(parts[1])
253+
return string(value)
58254
}

0 commit comments

Comments
 (0)