Skip to content

Commit dd95dd3

Browse files
Optimize literal writing by unrolling loops
Unrolled literal writing loops in `compress_greedy_block` and `write_dynamic_block_with_sequences` to process 4 literals per iteration instead of 2. This reduces loop overhead and improves performance for incompressible data. - Modified `src/compress/mod.rs` to add `while lit_remain >= 4` loops. - Uses `write_literals_2` twice within the unrolled loop. - Relies on existing buffer space checks (which cover the worst case expansion). Performance: - Throughput for "Compress Parallel Incompressible" improved by ~0.6% (234.2 MiB/s -> 235.7 MiB/s). - Verified correctness with `cargo test`. Co-authored-by: 404Setup <[email protected]>
1 parent bbe8091 commit dd95dd3

1 file changed

Lines changed: 20 additions & 0 deletions

File tree

src/compress/mod.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,6 +1145,18 @@ impl Compressor {
11451145
// We add 16 bytes margin for the last flush (8 bytes) and safety.
11461146
if bs.out_idx + 16 + (seq.litrunlen as usize * 2) < bs.output.len() {
11471147
let mut lit_remain = seq.litrunlen as usize;
1148+
while lit_remain >= 4 {
1149+
// SAFETY: We verified sufficient buffer space above.
1150+
// `write_literals_2` writes at most 30 bits and may flush 4 bytes.
1151+
// We do this twice, so max 60 bits + flush overhead.
1152+
// The loop precondition checks for space.
1153+
unsafe {
1154+
self.write_literals_2(bs, input[in_pos], input[in_pos + 1]);
1155+
self.write_literals_2(bs, input[in_pos + 2], input[in_pos + 3]);
1156+
}
1157+
in_pos += 4;
1158+
lit_remain -= 4;
1159+
}
11481160
while lit_remain >= 2 {
11491161
// SAFETY: We verified sufficient buffer space above.
11501162
// `write_literals_2` writes at most 30 bits and may flush 4 bytes.
@@ -1355,6 +1367,14 @@ impl Compressor {
13551367
if seq.litrunlen > 0 {
13561368
if bs.out_idx + 16 + (seq.litrunlen as usize * 2) < bs.output.len() {
13571369
let mut lit_remain = seq.litrunlen as usize;
1370+
while lit_remain >= 4 {
1371+
unsafe {
1372+
self.write_literals_2(bs, input[in_pos], input[in_pos + 1]);
1373+
self.write_literals_2(bs, input[in_pos + 2], input[in_pos + 3]);
1374+
}
1375+
in_pos += 4;
1376+
lit_remain -= 4;
1377+
}
13581378
while lit_remain >= 2 {
13591379
unsafe { self.write_literals_2(bs, input[in_pos], input[in_pos + 1]) };
13601380
in_pos += 2;

0 commit comments

Comments
 (0)