diff --git a/benches/bench_main.rs b/benches/bench_main.rs index 438212a..45eb2dd 100644 --- a/benches/bench_main.rs +++ b/benches/bench_main.rs @@ -419,6 +419,30 @@ fn bench_adler32_micro(c: &mut Criterion) { group.finish(); } +fn bench_adler32_tail(c: &mut Criterion) { + // Sizes to test tail optimization: 1, 2, 3 (small), 7 (4+3), 15 (8+4+3), 31 (16+8+4+3) + let sizes = [1, 2, 3, 7, 15, 31]; + let mut group = c.benchmark_group("Adler32 Tail"); + + for size in sizes { + let data = vec![0u8; size]; + group.throughput(Throughput::Bytes(size as u64)); + + group.bench_with_input( + BenchmarkId::new("libdeflate-rs", size), + &size, + |b, &_size| { + b.iter(|| adler32(1, &data)); + }, + ); + + group.bench_with_input(BenchmarkId::new("libdeflater", size), &size, |b, &_size| { + b.iter(|| libdeflater::adler32(&data)); + }); + } + group.finish(); +} + fn bench_checksums(c: &mut Criterion) { let files = [ ("XXS", "bench_data/data_XXS.bin"), @@ -1412,6 +1436,7 @@ criterion_group!( bench_parallel_alloc, bench_adler32_nano, bench_adler32_micro, + bench_adler32_tail, bench_crc32_micro, bench_decompress_offset8, bench_decompress_offset3, diff --git a/src/adler32/x86.rs b/src/adler32/x86.rs index 0813ccc..63ba486 100644 --- a/src/adler32/x86.rs +++ b/src/adler32/x86.rs @@ -61,24 +61,26 @@ macro_rules! adler32_tail { } // Remaining 0-3 bytes. - if $len > 0 { - let b = *$ptr as u32; - $s1 += b; - $s2 += $s1; - $ptr = $ptr.add(1); - $len -= 1; - } - if $len > 0 { - let b = *$ptr as u32; - $s1 += b; - $s2 += $s1; - $ptr = $ptr.add(1); - $len -= 1; - } - if $len > 0 { - let b = *$ptr as u32; - $s1 += b; - $s2 += $s1; + match $len { + 3 => { + let b0 = *$ptr as u32; + let b1 = *$ptr.add(1) as u32; + let b2 = *$ptr.add(2) as u32; + $s2 += ($s1 << 1) + $s1 + (b0 * 3) + (b1 * 2) + b2; + $s1 += b0 + b1 + b2; + } + 2 => { + let b0 = *$ptr as u32; + let b1 = *$ptr.add(1) as u32; + $s2 += ($s1 << 1) + (b0 * 2) + b1; + $s1 += b0 + b1; + } + 1 => { + let b0 = *$ptr as u32; + $s2 += $s1 + b0; + $s1 += b0; + } + _ => {} } } };