1515
1616/*
1717 * Compression parameters.
18+ *
19+ * LZ77_MATCH_MAX_DIST: Farthest back a match can be from current position (can be 1 - 8K).
20+ * LZ77_HASH_LOG:
21+ * LZ77_HASH_SIZE: ilog2 hash size (recommended to be 13 - 18, default 15 (hash size
22+ * 32k)).
23+ * LZ77_RSTEP_SIZE: Number of bytes to read from input buffer for hashing and initial
24+ * match check (default 4 bytes, this effectivelly makes this the min
25+ * match len).
26+ * LZ77_MSTEP_SIZE: Number of bytes to extend-compare a found match (default 8 bytes).
27+ * LZ77_SKIP_TRIGGER: ilog2 value for adaptive skipping, i.e. to progressively skip input
28+ * bytes when we can't find matches. Default is 4.
29+ * Higher values (>0) will decrease compression time, but will result
30+ * in worse compression ratio. Lower values will give better
31+ * compression ratio (more matches found), but will increase time.
1832 */
1933#define LZ77_MATCH_MAX_DIST SZ_8K
2034#define LZ77_HASH_LOG 15
@@ -86,6 +100,19 @@ static __always_inline u32 lz77_match_len(const void *match, const void *cur, co
86100 return (cur - start );
87101}
88102
103+ /**
104+ * lz77_encode_match() - Match encoding.
105+ * @dst: compressed buffer
106+ * @nib: pointer to an address in @dst
107+ * @dist: match distance
108+ * @len: match length
109+ *
110+ * Assumes all args were previously checked.
111+ *
112+ * Return: @dst advanced to new position
113+ *
114+ * Ref: MS-XCA 2.3.4 "Plain LZ77 Compression Algorithm Details" - "Processing"
115+ */
89116static __always_inline void * lz77_encode_match (void * dst , void * * nib , u16 dist , u32 len )
90117{
91118 len -= 3 ;
@@ -95,12 +122,12 @@ static __always_inline void *lz77_encode_match(void *dst, void **nib, u16 dist,
95122 if (len < 7 ) {
96123 lz77_write16 (dst , dist + len );
97124
98- return dst + 2 ;
125+ return dst + sizeof ( u16 ) ;
99126 }
100127
101128 dist |= 7 ;
102129 lz77_write16 (dst , dist );
103- dst += 2 ;
130+ dst += sizeof ( u16 ) ;
104131 len -= 7 ;
105132
106133 if (!* nib ) {
@@ -130,16 +157,32 @@ static __always_inline void *lz77_encode_match(void *dst, void **nib, u16 dist,
130157 if (len <= 0xffff ) {
131158 lz77_write16 (dst , len );
132159
133- return dst + 2 ;
160+ return dst + sizeof ( u16 ) ;
134161 }
135162
136163 lz77_write16 (dst , 0 );
137- dst += 2 ;
164+ dst += sizeof ( u16 ) ;
138165 lz77_write32 (dst , len );
139166
140- return dst + 4 ;
167+ return dst + sizeof ( u32 ) ;
141168}
142169
170+ /**
171+ * lz77_encode_literals() - Literals encoding.
172+ * @start: where to start copying literals (uncompressed buffer)
173+ * @end: when to stop copying (uncompressed buffer)
174+ * @dst: compressed buffer
175+ * @f: pointer to current flag value
176+ * @fc: pointer to current flag count
177+ * @fp: pointer to current flag address
178+ *
179+ * Batch copy literals from @start to @dst, updating flag values accordingly.
180+ * Assumes all args were previously checked.
181+ *
182+ * Return: @dst advanced to new position
183+ *
184+ * MS-XCA 2.3.4 "Plain LZ77 Compression Algorithm Details" - "Processing"
185+ */
143186static __always_inline void * lz77_encode_literals (const void * start , const void * end , void * dst ,
144187 long * f , u32 * fc , void * * fp )
145188{
@@ -160,7 +203,7 @@ static __always_inline void *lz77_encode_literals(const void *start, const void
160203 lz77_write32 (* fp , * f );
161204 * fc = 0 ;
162205 * fp = dst ;
163- dst += 4 ;
206+ dst += sizeof ( u32 ) ;
164207 }
165208 } while (start < end );
166209
@@ -188,7 +231,7 @@ noinline int lz77_compress(const void *src, const u32 slen, void *dst, u32 *dlen
188231 rlim = end - LZ77_MSTEP_SIZE ; /* read limit (for lz77_match_len()) */
189232 dstp = dst ;
190233 flag_pos = dstp ;
191- dstp += 4 ;
234+ dstp += sizeof ( u32 ) ;
192235 nib = NULL ;
193236
194237 htable = kvcalloc (LZ77_HASH_SIZE , sizeof (* htable ), GFP_KERNEL );
@@ -197,6 +240,10 @@ noinline int lz77_compress(const void *src, const u32 slen, void *dst, u32 *dlen
197240
198241 LZ77_PREFETCH (srcp + LZ77_RSTEP_SIZE );
199242
243+ /*
244+ * Adjust @srcp so we don't get a false positive match on first iteration.
245+ * Then prepare hash for first loop iteration (don't advance @srcp again).
246+ */
200247 hash = lz77_hash (lz77_read32 (srcp ++ ));
201248 htable [hash ] = 0 ;
202249 hash = lz77_hash (lz77_read32 (srcp ));
@@ -219,6 +266,14 @@ noinline int lz77_compress(const void *src, const u32 slen, void *dst, u32 *dlen
219266
220267 srcp = next ;
221268 next += step ;
269+
270+ /*
271+ * Adaptive skipping.
272+ *
273+ * Increment @step every (1 << LZ77_SKIP_TRIGGER, 16 in our case) bytes
274+ * without a match.
275+ * Reset to 1 when a match is found.
276+ */
222277 step = (skip ++ >> LZ77_SKIP_TRIGGER );
223278 if (unlikely (next > rlim ))
224279 goto out ;
@@ -229,6 +284,16 @@ noinline int lz77_compress(const void *src, const u32 slen, void *dst, u32 *dlen
229284 } while (likely (match + LZ77_MATCH_MAX_DIST < srcp ) ||
230285 lz77_read32 (match ) != lz77_read32 (srcp ));
231286
287+ /*
288+ * Match found. Warm/cold path; begin parsing @srcp and writing to @dstp:
289+ * - flush literals
290+ * - compute match length (*)
291+ * - encode match
292+ *
293+ * (*) Current minimum match length is defined by the memory read size above, so
294+ * here we already know that we have 4 matching bytes, but it's just faster to
295+ * redundantly compute it again in lz77_match_len() than to adjust pointers/len.
296+ */
232297 dstp = lz77_encode_literals (anchor , srcp , dstp , & flag , & flag_count , & flag_pos );
233298 len = lz77_match_len (match , srcp , end );
234299 dstp = lz77_encode_match (dstp , & nib , srcp - match , len );
@@ -243,7 +308,7 @@ noinline int lz77_compress(const void *src, const u32 slen, void *dst, u32 *dlen
243308 lz77_write32 (flag_pos , flag );
244309 flag_count = 0 ;
245310 flag_pos = dstp ;
246- dstp += 4 ;
311+ dstp += sizeof ( u32 ) ;
247312 }
248313
249314 if (unlikely (srcp > rlim ))
0 commit comments