@@ -2,27 +2,33 @@ package chromium
22
33import (
44 "bytes"
5+ "encoding/binary"
56 "fmt"
67 "os"
8+ "strings"
9+ "unicode/utf16"
710
811 "github.com/syndtr/goleveldb/leveldb"
912
1013 "github.com/moond4rk/hackbrowserdata/types"
1114)
1215
13- func extractLocalStorage (path string ) ([]types.StorageEntry , error ) {
14- return extractLevelDB (path , []byte ("\x00 " ))
15- }
16+ // Chromium localStorage LevelDB key prefixes and string format bytes.
17+ // Reference: https://chromium.googlesource.com/chromium/src/+/main/components/services/storage/dom_storage/local_storage_impl.cc
18+ const (
19+ localStorageVersionKey = "VERSION"
20+ localStorageMetaPrefix = "META:"
21+ localStorageMetaAccessKey = "METAACCESS:"
22+ localStorageDataPrefix = '_'
23+ chromiumStringUTF16Format = 0
24+ chromiumStringLatin1Format = 1
25+ )
1626
17- func extractSessionStorage (path string ) ([]types.StorageEntry , error ) {
18- return extractLevelDB (path , []byte ("-" ))
19- }
27+ const maxLocalStorageValueLength = 2048
2028
21- // extractLevelDB iterates over all entries in a LevelDB directory,
22- // splitting each key by the separator into (url, name).
23- func extractLevelDB (path string , separator []byte ) ([]types.StorageEntry , error ) {
24- if _ , err := os .Stat (path ); os .IsNotExist (err ) {
25- return nil , fmt .Errorf ("leveldb path not found: %s" , path )
29+ func extractLocalStorage (path string ) ([]types.StorageEntry , error ) {
30+ if _ , err := os .Stat (path ); err != nil {
31+ return nil , fmt .Errorf ("leveldb path %q: %w" , path , err )
2632 }
2733 db , err := leveldb .OpenFile (path , nil )
2834 if err != nil {
@@ -35,24 +41,214 @@ func extractLevelDB(path string, separator []byte) ([]types.StorageEntry, error)
3541 defer iter .Release ()
3642
3743 for iter .Next () {
38- url , name := parseStorageKey (iter .Key (), separator )
39- if url == "" {
44+ entry , ok := parseLocalStorageEntry (iter .Key (), iter . Value () )
45+ if ! ok {
4046 continue
4147 }
48+ entries = append (entries , entry )
49+ }
50+ return entries , iter .Error ()
51+ }
52+
53+ // parseLocalStorageEntry classifies a LevelDB key/value pair and decodes it.
54+ // Returns false for VERSION entries and any unrecognized keys. META entries are kept with IsMeta=true.
55+ func parseLocalStorageEntry (key , value []byte ) (types.StorageEntry , bool ) {
56+ switch {
57+ case bytes .Equal (key , []byte (localStorageVersionKey )):
58+ return types.StorageEntry {}, false
59+ case bytes .HasPrefix (key , []byte (localStorageMetaAccessKey )):
60+ return types.StorageEntry {
61+ IsMeta : true ,
62+ URL : string (bytes .TrimPrefix (key , []byte (localStorageMetaAccessKey ))),
63+ Value : fmt .Sprintf ("meta data, value bytes is %v" , value ),
64+ }, true
65+ case bytes .HasPrefix (key , []byte (localStorageMetaPrefix )):
66+ return types.StorageEntry {
67+ IsMeta : true ,
68+ URL : string (bytes .TrimPrefix (key , []byte (localStorageMetaPrefix ))),
69+ Value : fmt .Sprintf ("meta data, value bytes is %v" , value ),
70+ }, true
71+ case len (key ) > 0 && key [0 ] == localStorageDataPrefix :
72+ return parseLocalStorageDataEntry (key [1 :], value ), true
73+ default :
74+ return types.StorageEntry {}, false
75+ }
76+ }
77+
78+ // parseLocalStorageDataEntry decodes a data entry with format: origin\x00<encoded-key>.
79+ func parseLocalStorageDataEntry (key , value []byte ) types.StorageEntry {
80+ entry := types.StorageEntry {
81+ Value : decodeLocalStorageValue (value ),
82+ }
83+
84+ separator := bytes .IndexByte (key , 0 )
85+ if separator < 0 {
86+ return entry
87+ }
88+
89+ entry .URL = string (key [:separator ])
90+ scriptKey , err := decodeChromiumString (key [separator + 1 :])
91+ if err != nil {
92+ return entry
93+ }
94+ entry .Key = scriptKey
95+ return entry
96+ }
97+
98+ // decodeChromiumString decodes a Chromium-encoded string.
99+ // Format byte 0x01 = Latin-1, 0x00 = UTF-16 LE.
100+ func decodeChromiumString (b []byte ) (string , error ) {
101+ if len (b ) == 0 {
102+ return "" , fmt .Errorf ("empty chromium string" )
103+ }
104+ switch b [0 ] {
105+ case chromiumStringLatin1Format :
106+ return decodeLatin1 (b [1 :]), nil
107+ case chromiumStringUTF16Format :
108+ return decodeUTF16LE (b [1 :])
109+ default :
110+ return "" , fmt .Errorf ("unknown chromium string format 0x%02x" , b [0 ])
111+ }
112+ }
113+
114+ // decodeLatin1 converts ISO-8859-1 bytes to a valid UTF-8 Go string.
115+ // Latin-1 byte values map 1:1 to Unicode code points U+0000–U+00FF.
116+ func decodeLatin1 (b []byte ) string {
117+ runes := make ([]rune , len (b ))
118+ for i , c := range b {
119+ runes [i ] = rune (c )
120+ }
121+ return string (runes )
122+ }
123+
124+ // decodeUTF16LE decodes a UTF-16 Little-Endian byte slice to a Go string.
125+ func decodeUTF16LE (b []byte ) (string , error ) {
126+ if len (b ) == 0 {
127+ return "" , nil
128+ }
129+ if len (b )% 2 != 0 {
130+ return "" , fmt .Errorf ("invalid UTF-16 byte length %d" , len (b ))
131+ }
132+ u16s := make ([]uint16 , len (b )/ 2 )
133+ for i := range u16s {
134+ u16s [i ] = binary .LittleEndian .Uint16 (b [i * 2 :])
135+ }
136+ return string (utf16 .Decode (u16s )), nil
137+ }
138+
139+ func decodeLocalStorageValue (value []byte ) string {
140+ if len (value ) >= maxLocalStorageValueLength {
141+ return fmt .Sprintf (
142+ "value is too long, length is %d, supported max length is %d" ,
143+ len (value ), maxLocalStorageValueLength ,
144+ )
145+ }
146+ decoded , err := decodeChromiumString (value )
147+ if err != nil {
148+ return fmt .Sprintf ("unsupported value encoding: %v" , err )
149+ }
150+ return decoded
151+ }
152+
153+ // extractSessionStorage reads Chromium session storage LevelDB.
154+ //
155+ // LevelDB key format:
156+ //
157+ // namespace-<guid>-<origin> → <map_id> (origin mapping)
158+ // map-<map_id>-<key_name> → <value> (actual data, UTF-16 LE)
159+ // next-map-id / version (metadata, skipped)
160+ func extractSessionStorage (path string ) ([]types.StorageEntry , error ) {
161+ if _ , err := os .Stat (path ); err != nil {
162+ return nil , fmt .Errorf ("leveldb path %q: %w" , path , err )
163+ }
164+ db , err := leveldb .OpenFile (path , nil )
165+ if err != nil {
166+ return nil , err
167+ }
168+ defer db .Close ()
169+
170+ // Pass 1: build map_id → origin lookup from namespace entries.
171+ // Key: "namespace-<guid>-<origin>", Value: "<map_id>" (ASCII digits).
172+ originByMapID := make (map [string ]string )
173+ iter := db .NewIterator (nil , nil )
174+ for iter .Next () {
175+ key := string (iter .Key ())
176+ if ! strings .HasPrefix (key , "namespace-" ) {
177+ continue
178+ }
179+ // Extract origin by finding "-https://", "-http://", or "-chrome://" in the key.
180+ // Namespace GUIDs use underscores (e.g., "03b2df3a_0d95_4d55_ae57_...") so
181+ // there is no ambiguity with the origin separator.
182+ origin := extractNamespaceOrigin (key )
183+ if origin == "" {
184+ continue
185+ }
186+ mapID := string (iter .Value ())
187+ originByMapID [mapID ] = origin
188+ }
189+ iter .Release ()
190+ if err := iter .Error (); err != nil {
191+ return nil , fmt .Errorf ("read namespace entries: %w" , err )
192+ }
193+
194+ // Pass 2: read map entries and resolve origins.
195+ var entries []types.StorageEntry
196+ iter2 := db .NewIterator (nil , nil )
197+ defer iter2 .Release ()
198+
199+ mapPrefix := []byte ("map-" )
200+ for iter2 .Next () {
201+ key := iter2 .Key ()
202+ if ! bytes .HasPrefix (key , mapPrefix ) {
203+ continue
204+ }
205+ rest := key [len (mapPrefix ):] // "<map_id>-<key_name>"
206+ sep := bytes .IndexByte (rest , '-' )
207+ if sep < 0 {
208+ continue
209+ }
210+ mapID := string (rest [:sep ])
211+ keyName := string (rest [sep + 1 :])
212+
213+ origin := originByMapID [mapID ]
214+ if origin == "" {
215+ origin = mapID // fallback to map_id if namespace not found
216+ }
217+
218+ value := decodeSessionStorageValue (iter2 .Value ())
42219 entries = append (entries , types.StorageEntry {
43- URL : url ,
44- Key : name ,
45- Value : string ( iter . Value ()) ,
220+ URL : origin ,
221+ Key : keyName ,
222+ Value : value ,
46223 })
47224 }
48- return entries , iter .Error ()
225+ return entries , iter2 .Error ()
226+ }
227+
228+ // extractNamespaceOrigin extracts the origin from a namespace key.
229+ // Key format: "namespace-<guid_with_underscores>-<origin>"
230+ // The GUID uses underscores, so we find the origin by looking for "-http" or "-chrome".
231+ func extractNamespaceOrigin (key string ) string {
232+ for _ , prefix := range []string {"-https://" , "-http://" , "-chrome://" } {
233+ idx := strings .Index (key , prefix )
234+ if idx >= 0 {
235+ return key [idx + 1 :]
236+ }
237+ }
238+ return ""
49239}
50240
51- // parseStorageKey splits a LevelDB key into (url, name) by the given separator.
52- func parseStorageKey (key , separator []byte ) (url , name string ) {
53- parts := bytes .SplitN (key , separator , 2 )
54- if len (parts ) != 2 {
55- return "" , ""
241+ // decodeSessionStorageValue decodes a session storage value.
242+ // Values are raw UTF-16 LE (no format byte prefix, unlike localStorage).
243+ func decodeSessionStorageValue (value []byte ) string {
244+ if len (value ) == 0 {
245+ return ""
246+ }
247+ if len (value )% 2 == 0 {
248+ decoded , err := decodeUTF16LE (value )
249+ if err == nil {
250+ return decoded
251+ }
56252 }
57- return string (parts [ 0 ]), string ( parts [ 1 ] )
253+ return string (value )
58254}
0 commit comments