Skip to content

Commit 6502d7b

Browse files
authored
Merge pull request #235 from 404Setup/bolt-adler32-tail-863113544921461937
⚡ Bolt: Optimize adler32 tail processing
2 parents bb9bcc7 + d120e2c commit 6502d7b

2 files changed

Lines changed: 45 additions & 18 deletions

File tree

benches/bench_main.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,30 @@ fn bench_adler32_micro(c: &mut Criterion) {
419419
group.finish();
420420
}
421421

422+
fn bench_adler32_tail(c: &mut Criterion) {
423+
// Sizes to test tail optimization: 1, 2, 3 (small), 7 (4+3), 15 (8+4+3), 31 (16+8+4+3)
424+
let sizes = [1, 2, 3, 7, 15, 31];
425+
let mut group = c.benchmark_group("Adler32 Tail");
426+
427+
for size in sizes {
428+
let data = vec![0u8; size];
429+
group.throughput(Throughput::Bytes(size as u64));
430+
431+
group.bench_with_input(
432+
BenchmarkId::new("libdeflate-rs", size),
433+
&size,
434+
|b, &_size| {
435+
b.iter(|| adler32(1, &data));
436+
},
437+
);
438+
439+
group.bench_with_input(BenchmarkId::new("libdeflater", size), &size, |b, &_size| {
440+
b.iter(|| libdeflater::adler32(&data));
441+
});
442+
}
443+
group.finish();
444+
}
445+
422446
fn bench_checksums(c: &mut Criterion) {
423447
let files = [
424448
("XXS", "bench_data/data_XXS.bin"),
@@ -1412,6 +1436,7 @@ criterion_group!(
14121436
bench_parallel_alloc,
14131437
bench_adler32_nano,
14141438
bench_adler32_micro,
1439+
bench_adler32_tail,
14151440
bench_crc32_micro,
14161441
bench_decompress_offset8,
14171442
bench_decompress_offset3,

src/adler32/x86.rs

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -61,24 +61,26 @@ macro_rules! adler32_tail {
6161
}
6262

6363
// Remaining 0-3 bytes.
64-
if $len > 0 {
65-
let b = *$ptr as u32;
66-
$s1 += b;
67-
$s2 += $s1;
68-
$ptr = $ptr.add(1);
69-
$len -= 1;
70-
}
71-
if $len > 0 {
72-
let b = *$ptr as u32;
73-
$s1 += b;
74-
$s2 += $s1;
75-
$ptr = $ptr.add(1);
76-
$len -= 1;
77-
}
78-
if $len > 0 {
79-
let b = *$ptr as u32;
80-
$s1 += b;
81-
$s2 += $s1;
64+
match $len {
65+
3 => {
66+
let b0 = *$ptr as u32;
67+
let b1 = *$ptr.add(1) as u32;
68+
let b2 = *$ptr.add(2) as u32;
69+
$s2 += ($s1 << 1) + $s1 + (b0 * 3) + (b1 * 2) + b2;
70+
$s1 += b0 + b1 + b2;
71+
}
72+
2 => {
73+
let b0 = *$ptr as u32;
74+
let b1 = *$ptr.add(1) as u32;
75+
$s2 += ($s1 << 1) + (b0 * 2) + b1;
76+
$s1 += b0 + b1;
77+
}
78+
1 => {
79+
let b0 = *$ptr as u32;
80+
$s2 += $s1 + b0;
81+
$s1 += b0;
82+
}
83+
_ => {}
8284
}
8385
}
8486
};

0 commit comments

Comments
 (0)