diff --git a/Cargo.toml b/Cargo.toml
index 14525a8..566a520 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -57,3 +57,7 @@ libdeflater = "1.25.0"
 [[bench]]
 name = "bench_main"
 harness = false
+
+[[bench]]
+name = "encoder_perf"
+harness = false
diff --git a/benches/encoder_perf.rs b/benches/encoder_perf.rs
new file mode 100644
index 0000000..121e97b
--- /dev/null
+++ b/benches/encoder_perf.rs
@@ -0,0 +1,28 @@
+use criterion::{Criterion, Throughput, criterion_group, criterion_main};
+use libdeflate::stream::DeflateEncoder;
+use std::io::Write;
+
+fn bench_encoder_parallel(c: &mut Criterion) {
+    let size = 10 * 1024 * 1024; // 10MB
+    let mut data = Vec::with_capacity(size);
+    for i in 0..size {
+        data.push((i % 256) as u8);
+    }
+
+    let mut group = c.benchmark_group("DeflateEncoder Parallel");
+    group.throughput(Throughput::Bytes(size as u64));
+
+    group.bench_function("write_all 10MB", |b| {
+        b.iter(|| {
+            let sink = std::io::sink();
+            let mut encoder = DeflateEncoder::new(sink, 6); // Default 1MB buffer
+            encoder.write_all(&data).unwrap();
+            encoder.finish().unwrap();
+        });
+    });
+
+    group.finish();
+}
+
+criterion_group!(benches, bench_encoder_parallel);
+criterion_main!(benches);
diff --git a/src/adler32/mod.rs b/src/adler32/mod.rs
index 0734c62..b7e13bc 100644
--- a/src/adler32/mod.rs
+++ b/src/adler32/mod.rs
@@ -46,7 +46,7 @@ fn adler32_chunk(s1: &mut u32, s2: &mut u32, p: &[u8]) {
             + (b12 * 4)
             + (b13 * 3)
             + (b14 * 2)
-            + (b15 * 1);
+            + b15;
 
         s1_local +=
             b0 + b1 + b2 + b3 + b4 + b5 + b6 + b7 + b8 + b9 + b10 + b11 + b12 + b13 + b14 + b15;
@@ -63,7 +63,7 @@ fn adler32_chunk(s1: &mut u32, s2: &mut u32, p: &[u8]) {
         let b2 = unsafe { *ptr.add(2) as u32 };
         let b3 = unsafe { *ptr.add(3) as u32 };
 
-        s2_local += (s1_local << 2) + (b0 * 4) + (b1 * 3) + (b2 * 2) + (b3 * 1);
+        s2_local += (s1_local << 2) + (b0 * 4) + (b1 * 3) + (b2 * 2) + b3;
         s1_local += b0 + b1 + b2 + b3;
 
         unsafe {
diff --git a/src/api.rs b/src/api.rs
index 4da83ae..7579769 100644
--- a/src/api.rs
+++ b/src/api.rs
@@ -8,7 +8,7 @@ pub struct Compressor {
 
 impl Compressor {
     pub fn new(level: i32) -> io::Result<Self> {
-        if level < 0 || level > 12 {
+        if !(0..=12).contains(&level) {
             return Err(io::Error::new(
                 io::ErrorKind::InvalidInput,
                 "Compression level must be between 0 and 12",
@@ -79,7 +79,7 @@ impl Compressor {
         let mut output = Vec::new();
         output
             .try_reserve_exact(bound)
-            .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
+            .map_err(io::Error::other)?;
 
         // Use spare_capacity_mut to avoid zero-initialization.
         // Since len is 0, this returns the entire capacity as MaybeUninit.
@@ -96,7 +96,7 @@ impl Compressor {
                 Ok(output)
             }
             CompressResult::InsufficientSpace => {
-                Err(io::Error::new(io::ErrorKind::Other, "Insufficient space"))
+                Err(io::Error::other("Insufficient space"))
             }
         }
     }
@@ -131,7 +131,7 @@ impl Compressor {
         if res == CompressResult::Success {
             Ok(size)
         } else {
-            Err(io::Error::new(io::ErrorKind::Other, error_msg))
+            Err(io::Error::other(error_msg))
         }
     }
 }
@@ -142,6 +142,12 @@ pub struct Decompressor {
     limit_ratio: usize,
 }
 
+impl Default for Decompressor {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl Decompressor {
     pub fn new() -> Self {
         Self {
@@ -239,7 +245,7 @@ impl Decompressor {
         let mut output = Vec::new();
         output
             .try_reserve_exact(expected_size)
-            .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
+            .map_err(io::Error::other)?;
 
         // Use spare_capacity_mut to avoid zero-initialization.
         let out_uninit = output.spare_capacity_mut();
@@ -304,8 +310,8 @@ fn is_overlapping(s1: &[u8], s2: &[u8]) -> bool {
     let p2 = s2.as_ptr() as usize;
     let len2 = s2.len();
 
-    let end1 = p1.checked_add(len1).unwrap_or(usize::MAX);
-    let end2 = p2.checked_add(len2).unwrap_or(usize::MAX);
+    let end1 = p1.saturating_add(len1);
+    let end2 = p2.saturating_add(len2);
 
     use std::cmp::{max, min};
     max(p1, p2) < min(end1, end2)
diff --git a/src/batch.rs b/src/batch.rs
index 0d51c6d..52090d4 100644
--- a/src/batch.rs
+++ b/src/batch.rs
@@ -60,6 +60,12 @@ impl BatchCompressor {
 
 pub struct BatchDecompressor;
 
+impl Default for BatchDecompressor {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl BatchDecompressor {
     pub fn new() -> Self {
         Self
diff --git a/src/compress/huffman_comp.rs b/src/compress/huffman_comp.rs
index 9b9ca20..947c4e4 100644
--- a/src/compress/huffman_comp.rs
+++ b/src/compress/huffman_comp.rs
@@ -42,7 +42,7 @@ fn build_tree(a: &mut [u32], sym_count: usize) {
     let mut e = 0;
     while e < last_idx {
         let new_freq;
-        if i + 1 <= last_idx && (b == e || (a[i + 1] & FREQ_MASK) <= (a[b] & FREQ_MASK)) {
+        if i < last_idx && (b == e || (a[i + 1] & FREQ_MASK) <= (a[b] & FREQ_MASK)) {
             new_freq = (a[i] & FREQ_MASK) + (a[i + 1] & FREQ_MASK);
             i += 2;
         } else if b + 2 <= e && (i > last_idx || (a[b + 1] & FREQ_MASK) < (a[i] & FREQ_MASK)) {
diff --git a/src/compress/matchfinder.rs b/src/compress/matchfinder.rs
index 9eb34a8..096c44c 100644
--- a/src/compress/matchfinder.rs
+++ b/src/compress/matchfinder.rs
@@ -441,16 +441,14 @@ unsafe fn match_len_avx2(a: *const u8, b: *const u8, max_len: usize) -> usize {
                 let mask = _mm256_movemask_epi8(cmp) as u32;
                 return len + 32 + (!mask).trailing_zeros() as usize;
             }
+        } else if _mm256_testz_si256(xor3, xor3) == 0 {
+            let cmp = _mm256_cmpeq_epi8(xor3, v_zero);
+            let mask = _mm256_movemask_epi8(cmp) as u32;
+            return len + 64 + (!mask).trailing_zeros() as usize;
         } else {
-            if _mm256_testz_si256(xor3, xor3) == 0 {
-                let cmp = _mm256_cmpeq_epi8(xor3, v_zero);
-                let mask = _mm256_movemask_epi8(cmp) as u32;
-                return len + 64 + (!mask).trailing_zeros() as usize;
-            } else {
-                let cmp = _mm256_cmpeq_epi8(xor4, v_zero);
-                let mask = _mm256_movemask_epi8(cmp) as u32;
-                return len + 96 + (!mask).trailing_zeros() as usize;
-            }
+            let cmp = _mm256_cmpeq_epi8(xor4, v_zero);
+            let mask = _mm256_movemask_epi8(cmp) as u32;
+            return len + 96 + (!mask).trailing_zeros() as usize;
         }
     }
 
@@ -691,7 +689,7 @@ impl MatchFinder {
     where
         F: FnMut(usize, usize),
     {
-        if pos.checked_add(3).map_or(true, |end| end > data.len()) {
+        if pos.checked_add(3).is_none_or(|end| end > data.len()) {
             return (0, 0);
         }
 
@@ -705,7 +703,7 @@ impl MatchFinder {
             src_val_4 = (src as *const u32).read_unaligned();
             src_val = src_val_4 & 0xFFFFFF;
         } else {
-            src_val = ((src.read() as u32) << 0)
+            src_val = (src.read() as u32)
                 | ((src.add(1).read() as u32) << 8)
                 | ((src.add(2).read() as u32) << 16);
         }
@@ -756,11 +754,10 @@ impl MatchFinder {
             }
 
             let mut match_ok = true;
-            if best_len >= 3 {
-                if *match_ptr.add(best_len) != *src.add(best_len) {
+            if best_len >= 3
+                && *match_ptr.add(best_len) != *src.add(best_len) {
                     match_ok = false;
                 }
-            }
 
             if match_ok {
                 if safe_to_read_u32 {
@@ -787,7 +784,7 @@ impl MatchFinder {
                     if p_rel + 4 <= data.len() {
                         match_val = (match_ptr as *const u32).read_unaligned() & 0xFFFFFF;
                     } else {
-                        match_val = ((match_ptr.read() as u32) << 0)
+                        match_val = (match_ptr.read() as u32)
                             | ((match_ptr.add(1).read() as u32) << 8)
                             | ((match_ptr.add(2).read() as u32) << 16);
                     }
@@ -949,7 +946,7 @@ impl MatchFinder {
         }
     }
     pub fn skip_match(&mut self, data: &[u8], pos: usize) {
-        if pos.checked_add(3).map_or(true, |end| end > data.len()) {
+        if pos.checked_add(3).is_none_or(|end| end > data.len()) {
             return;
         }
         unsafe {
@@ -958,7 +955,7 @@ impl MatchFinder {
             if pos + 4 <= data.len() {
                 src_val = (src as *const u32).read_unaligned() & 0xFFFFFF;
             } else {
-                src_val = ((src.read() as u32) << 0)
+                src_val = (src.read() as u32)
                     | ((src.add(1).read() as u32) << 8)
                     | ((src.add(2).read() as u32) << 16);
             }
@@ -993,7 +990,7 @@ impl MatchFinder {
         }
         if pos
             .checked_add(count + 3)
-            .map_or(true, |end| end > data.len())
+            .is_none_or(|end| end > data.len())
         {
             for i in 0..count {
                 self.skip_match(data, pos + i);
@@ -1068,7 +1065,7 @@ impl HtMatchFinder {
     }
 
     pub fn find_match(&mut self, data: &[u8], pos: usize) -> (usize, usize) {
-        if pos.checked_add(3).map_or(true, |end| end > data.len()) {
+        if pos.checked_add(3).is_none_or(|end| end > data.len()) {
             return (0, 0);
         }
 
@@ -1081,7 +1078,7 @@ impl HtMatchFinder {
             if safe_to_read_u32 {
                 src_val = (src as *const u32).read_unaligned() & 0xFFFFFF;
             } else {
-                src_val = ((src.read() as u32) << 0)
+                src_val = (src.read() as u32)
                     | ((src.add(1).read() as u32) << 8)
                     | ((src.add(2).read() as u32) << 16);
             }
@@ -1112,7 +1109,7 @@ impl HtMatchFinder {
             } else if p_rel + 4 <= data.len() {
                 match_val = (match_ptr as *const u32).read_unaligned() & 0xFFFFFF;
             } else {
-                match_val = ((match_ptr.read() as u32) << 0)
+                match_val = (match_ptr.read() as u32)
                     | ((match_ptr.add(1).read() as u32) << 8)
                     | ((match_ptr.add(2).read() as u32) << 16);
             }
@@ -1139,7 +1136,7 @@ impl HtMatchFinder {
     }
 
     pub fn skip_match(&mut self, data: &[u8], pos: usize) {
-        if pos.checked_add(3).map_or(true, |end| end > data.len()) {
+        if pos.checked_add(3).is_none_or(|end| end > data.len()) {
             return;
         }
         unsafe {
@@ -1148,7 +1145,7 @@ impl HtMatchFinder {
             if pos + 4 <= data.len() {
                 src_val = (src as *const u32).read_unaligned() & 0xFFFFFF;
             } else {
-                src_val = ((src.read() as u32) << 0)
+                src_val = (src.read() as u32)
                     | ((src.add(1).read() as u32) << 8)
                     | ((src.add(2).read() as u32) << 16);
             }
@@ -1281,7 +1278,7 @@ impl BtMatchFinder {
         max_depth: usize,
         mut visitor: V,
     ) -> V {
-        if pos.checked_add(4).map_or(true, |end| end > data.len()) {
+        if pos.checked_add(4).is_none_or(|end| end > data.len()) {
             return visitor;
         }
 
diff --git a/src/compress/mod.rs b/src/compress/mod.rs
index 814a1dc..61dd8c2 100644
--- a/src/compress/mod.rs
+++ b/src/compress/mod.rs
@@ -738,7 +738,7 @@ impl Compressor {
                                 ))
                             }
                         } else {
-                            Err(io::Error::new(io::ErrorKind::Other, "Compression failed"))
+                            Err(io::Error::other("Compression failed"))
                         }
                     },
                 )
@@ -808,7 +808,7 @@ impl Compressor {
         }
 
         mf.advance(input.len());
-        (total_bits + 7) / 8
+        total_bits.div_ceil(8)
     }
 
     fn calculate_block_size_fast<T: MatchFinderTrait>(
@@ -972,7 +972,7 @@ impl Compressor {
             let lit_cost = self.litlen_lens[block_input[pos] as usize] as u32;
             if cur_cost + lit_cost < self.dp_costs[pos + 1] {
                 self.dp_costs[pos + 1] = cur_cost + lit_cost;
-                self.dp_path[pos + 1] = (1 as u32) | (0 as u32) << 16;
+                self.dp_path[pos + 1] = 1_u32;
             }
 
             mf.find_matches(
@@ -1069,7 +1069,7 @@ impl Compressor {
     pub fn compress_to_size(&mut self, input: &[u8], final_block: bool) -> usize {
         if self.compression_level == 0 {
             let num_blocks = input.len() / 65535
-                + if input.len() % 65535 != 0 || (input.len() == 0 && final_block) {
+                + if !input.len().is_multiple_of(65535) || (input.is_empty() && final_block) {
                     1
                 } else {
                     0
@@ -1094,7 +1094,7 @@ impl Compressor {
         mf: &mut T,
         input: &[u8],
         start_pos: usize,
-        lazy_depth: u32,
+        _lazy_depth: u32,
     ) -> usize {
         self.split_stats.reset();
         let mut in_idx = start_pos;
@@ -1201,14 +1201,12 @@ impl Compressor {
                     precode_freqs[17] += 1;
                     run -= min(run, 10);
                 }
-            } else {
-                if run >= 4 {
-                    precode_freqs[len as usize] += 1;
-                    run -= 1;
-                    while run >= 3 {
-                        precode_freqs[16] += 1;
-                        run -= min(run, 6);
-                    }
+            } else if run >= 4 {
+                precode_freqs[len as usize] += 1;
+                run -= 1;
+                while run >= 3 {
+                    precode_freqs[16] += 1;
+                    run -= min(run, 6);
                 }
             }
             while run > 0 {
@@ -1769,7 +1767,7 @@ impl Compressor {
             let lit_cost = self.litlen_lens[block_input[pos] as usize] as u32;
             if cur_cost + lit_cost < self.dp_costs[pos + 1] {
                 self.dp_costs[pos + 1] = cur_cost + lit_cost;
-                self.dp_path[pos + 1] = (1 as u32) | (0 as u32) << 16;
+                self.dp_path[pos + 1] = 1_u32;
             }
 
             mf.find_matches(
@@ -1910,19 +1908,17 @@ impl Compressor {
                     precode_freqs[17] += 1;
                     run -= c;
                 }
-            } else {
-                if run >= 4 {
-                    precode_items[num_precode_items] = (len as u16) << 8;
+            } else if run >= 4 {
+                precode_items[num_precode_items] = (len as u16) << 8;
+                num_precode_items += 1;
+                precode_freqs[len as usize] += 1;
+                run -= 1;
+                while run >= 3 {
+                    let c = min(run, 6);
+                    precode_items[num_precode_items] = (16 << 8) | ((c - 3) as u16);
                     num_precode_items += 1;
-                    precode_freqs[len as usize] += 1;
-                    run -= 1;
-                    while run >= 3 {
-                        let c = min(run, 6);
-                        precode_items[num_precode_items] = (16 << 8) | ((c - 3) as u16);
-                        num_precode_items += 1;
-                        precode_freqs[16] += 1;
-                        run -= c;
-                    }
+                    precode_freqs[16] += 1;
+                    run -= c;
                 }
             }
             while run > 0 {
@@ -1971,11 +1967,10 @@ impl Compressor {
                 if !bs.write_bits(extra, 3) {
                     return false;
                 }
-            } else if sym == 18 {
-                if !bs.write_bits(extra, 7) {
+            } else if sym == 18
+                && !bs.write_bits(extra, 7) {
                     return false;
                 }
-            }
         }
         true
     }
@@ -1988,7 +1983,8 @@ impl Compressor {
         self.offset_lens.copy_from_slice(&tables.offset_lens);
         self.litlen_table.copy_from_slice(&tables.litlen_table);
         self.offset_table.copy_from_slice(&tables.offset_table);
-        self.match_len_table.copy_from_slice(&tables.match_len_table);
+        self.match_len_table
+            .copy_from_slice(&tables.match_len_table);
     }
 
     #[inline(always)]
@@ -2107,7 +2103,7 @@ impl Compressor {
         for len in 3..=DEFLATE_MAX_MATCH_LEN {
             let len_info = unsafe { *LENGTH_WRITE_TABLE.get_unchecked(len) };
             let len_slot = (len_info >> 24) as usize;
-            let len_extra_bits = ((len_info >> 16) & 0xFF) as u32;
+            let len_extra_bits = (len_info >> 16) & 0xFF ;
 
             let len_cost =
                 unsafe { *self.litlen_lens.get_unchecked(257 + len_slot) } as u32 + len_extra_bits;
diff --git a/src/crc32/mod.rs b/src/crc32/mod.rs
index 5c5bf26..704ff47 100644
--- a/src/crc32/mod.rs
+++ b/src/crc32/mod.rs
@@ -57,22 +57,22 @@ pub fn crc32_slice8(mut crc: u32, p: &[u8]) -> u32 {
         let t4 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x300 + idx4) };
         let t5 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x200 + idx5) };
         let t6 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x100 + idx6) };
-        let t7 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x000 + idx7) };
+        let t7 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(idx7) };
 
         let t12 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x300 + idx12) };
         let t13 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x200 + idx13) };
         let t14 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x100 + idx14) };
-        let t15 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x000 + idx15) };
+        let t15 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(idx15) };
 
         let t20 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x300 + idx20) };
         let t21 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x200 + idx21) };
         let t22 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x100 + idx22) };
-        let t23 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x000 + idx23) };
+        let t23 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(idx23) };
 
         let t28 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x300 + idx28) };
         let t29 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x200 + idx29) };
         let t30 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x100 + idx30) };
-        let t31 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x000 + idx31) };
+        let t31 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(idx31) };
 
         // Dependent chain
         // Chunk A
@@ -156,13 +156,13 @@ pub fn crc32_slice8(mut crc: u32, p: &[u8]) -> u32 {
         let t4 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x300 + idx4) };
         let t5 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x200 + idx5) };
         let t6 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x100 + idx6) };
-        let t7 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x000 + idx7) };
+        let t7 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(idx7) };
 
         // Start independent lookups for the second chunk early
         let t12 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x300 + idx12) };
         let t13 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x200 + idx13) };
         let t14 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x100 + idx14) };
-        let t15 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x000 + idx15) };
+        let t15 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(idx15) };
 
         crc = ((t0 ^ t1) ^ (t2 ^ t3)) ^ ((t4 ^ t5) ^ (t6 ^ t7));
 
@@ -205,7 +205,7 @@ pub fn crc32_slice8(mut crc: u32, p: &[u8]) -> u32 {
         let t4 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x300 + idx4) };
         let t5 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x200 + idx5) };
         let t6 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x100 + idx6) };
-        let t7 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(0x000 + idx7) };
+        let t7 = unsafe { *CRC32_SLICE8_TABLE.get_unchecked(idx7) };
 
         // Optimization: Use tree-based XOR reduction to break dependency chains and increase ILP.
         crc = ((t0 ^ t1) ^ (t2 ^ t3)) ^ ((t4 ^ t5) ^ (t6 ^ t7));
@@ -222,7 +222,7 @@ pub fn crc32_slice8(mut crc: u32, p: &[u8]) -> u32 {
             *CRC32_SLICE8_TABLE.get_unchecked(0x300 + (crc as u8) as usize)
                 ^ *CRC32_SLICE8_TABLE.get_unchecked(0x200 + ((crc >> 8) as u8) as usize)
                 ^ *CRC32_SLICE8_TABLE.get_unchecked(0x100 + ((crc >> 16) as u8) as usize)
-                ^ *CRC32_SLICE8_TABLE.get_unchecked(0x000 + ((crc >> 24) as u8) as usize)
+                ^ *CRC32_SLICE8_TABLE.get_unchecked(((crc >> 24) as u8) as usize )
         };
         unsafe {
             ptr = ptr.add(4);
@@ -245,7 +245,7 @@ pub fn crc32_slice8(mut crc: u32, p: &[u8]) -> u32 {
                     (crc >> 24)
                         ^ *CRC32_SLICE8_TABLE.get_unchecked(0x200 + idx0 as usize)
                         ^ *CRC32_SLICE8_TABLE.get_unchecked(0x100 + idx1 as usize)
-                        ^ *CRC32_SLICE8_TABLE.get_unchecked(0x000 + idx2 as usize)
+                        ^ *CRC32_SLICE8_TABLE.get_unchecked(idx2 as usize )
                 };
             }
             2 => {
@@ -259,7 +259,7 @@ pub fn crc32_slice8(mut crc: u32, p: &[u8]) -> u32 {
                 crc = unsafe {
                     (crc >> 16)
                         ^ *CRC32_SLICE8_TABLE.get_unchecked(0x100 + idx0 as usize)
-                        ^ *CRC32_SLICE8_TABLE.get_unchecked(0x000 + idx1 as usize)
+                        ^ *CRC32_SLICE8_TABLE.get_unchecked(idx1 as usize )
                 };
             }
             1 => {
diff --git a/src/crc32/x86.rs b/src/crc32/x86.rs
index 0fe07b4..498b803 100644
--- a/src/crc32/x86.rs
+++ b/src/crc32/x86.rs
@@ -148,24 +148,22 @@ pub unsafe fn crc32_x86_pclmulqdq(mut crc: u32, p: &[u8]) -> u32 {
         let x0_new = fold_vec128(x0, x2, mults_256b);
         let x1_new = fold_vec128(x1, x3, mults_256b);
         x0 = fold_vec128(x0_new, x1_new, mults_128b);
-    } else {
-        if len >= 48 {
-            let v0 = _mm_loadu_si128(data.as_ptr() as *const __m128i);
-            let v1 = _mm_loadu_si128(data.as_ptr().add(16) as *const __m128i);
-            let v2 = _mm_loadu_si128(data.as_ptr().add(32) as *const __m128i);
-            x0 = _mm_xor_si128(x0, v0);
+    } else if len >= 48 {
+        let v0 = _mm_loadu_si128(data.as_ptr() as *const __m128i);
+        let v1 = _mm_loadu_si128(data.as_ptr().add(16) as *const __m128i);
+        let v2 = _mm_loadu_si128(data.as_ptr().add(32) as *const __m128i);
+        x0 = _mm_xor_si128(x0, v0);
 
-            let t1 = fold_vec128(x0, v2, mults_256b);
-            x0 = fold_vec128(v1, t1, mults_128b);
+        let t1 = fold_vec128(x0, v2, mults_256b);
+        x0 = fold_vec128(v1, t1, mults_128b);
 
-            data = &data[48..];
-            len -= 48;
-        } else {
-            let v0 = _mm_loadu_si128(data.as_ptr() as *const __m128i);
-            x0 = _mm_xor_si128(x0, v0);
-            data = &data[16..];
-            len -= 16;
-        }
+        data = &data[48..];
+        len -= 48;
+    } else {
+        let v0 = _mm_loadu_si128(data.as_ptr() as *const __m128i);
+        x0 = _mm_xor_si128(x0, v0);
+        data = &data[16..];
+        len -= 16;
     }
 
     if len >= 32 {
diff --git a/src/decompress/mod.rs b/src/decompress/mod.rs
index 7dc2d25..fe75bb2 100644
--- a/src/decompress/mod.rs
+++ b/src/decompress/mod.rs
@@ -84,6 +84,12 @@ pub enum DecompressResult {
     ShortInput,
 }
 
+impl Default for Decompressor {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl Decompressor {
     pub fn new() -> Self {
         Self {
@@ -738,26 +744,24 @@ impl Decompressor {
                                 i += 1;
                             }
                         }
+                    } else if offset >= length {
+                        std::ptr::copy_nonoverlapping(src, out_next, length);
                     } else {
-                        if offset >= length {
-                            std::ptr::copy_nonoverlapping(src, out_next, length);
-                        } else {
-                            // Optimization: Use u64 copy loop for overlapping case with offset >= 8.
-                            // This avoids function call overhead of copy_nonoverlapping for small chunks.
-                            // Since offset >= 8, we can read 8 bytes and write 8 bytes safely
-                            // (the read source is at least 8 bytes behind the write destination).
-                            let src_ptr = src;
-                            let dest_ptr = out_next;
-                            let mut i = 0;
-                            while i + 8 <= length {
-                                let val = (src_ptr.add(i) as *const u64).read_unaligned();
-                                (dest_ptr.add(i) as *mut u64).write_unaligned(val);
-                                i += 8;
-                            }
-                            while i < length {
-                                *dest_ptr.add(i) = *src_ptr.add(i);
-                                i += 1;
-                            }
+                        // Optimization: Use u64 copy loop for overlapping case with offset >= 8.
+                        // This avoids function call overhead of copy_nonoverlapping for small chunks.
+                        // Since offset >= 8, we can read 8 bytes and write 8 bytes safely
+                        // (the read source is at least 8 bytes behind the write destination).
+                        let src_ptr = src;
+                        let dest_ptr = out_next;
+                        let mut i = 0;
+                        while i + 8 <= length {
+                            let val = (src_ptr.add(i) as *const u64).read_unaligned();
+                            (dest_ptr.add(i) as *mut u64).write_unaligned(val);
+                            i += 8;
+                        }
+                        while i < length {
+                            *dest_ptr.add(i) = *src_ptr.add(i);
+                            i += 1;
                         }
                     }
                     out_next = out_next.add(length);
@@ -1077,7 +1081,7 @@ impl Decompressor {
         }
 
         let hdr = u16::from_be_bytes([input[0], input[1]]);
-        if hdr % 31 != 0 {
+        if !hdr.is_multiple_of(31) {
             return (DecompressResult::BadData, 0, 0);
         }
         if ((hdr >> 8) & 0xF) as u8 != ZLIB_CM_DEFLATE {
diff --git a/src/decompress/x86.rs b/src/decompress/x86.rs
index 648be7d..6e81552 100644
--- a/src/decompress/x86.rs
+++ b/src/decompress/x86.rs
@@ -2266,9 +2266,9 @@ pub unsafe fn decompress_bmi2_ptr(
                                                 41 => decompress_offset_cycle3::<7>(
                                                     out_next, src, v, length,
                                                 ),
-                                                42 => decompress_offset_42(
-                                                    out_next, src, v, length,
-                                                ),
+                                                42 => {
+                                                    decompress_offset_42(out_next, src, v, length)
+                                                }
                                                 43 => decompress_offset_cycle3::<5>(
                                                     out_next, src, v, length,
                                                 ),
diff --git a/src/stream.rs b/src/stream.rs
index 47c07f2..e1d2fb4 100644
--- a/src/stream.rs
+++ b/src/stream.rs
@@ -66,7 +66,7 @@ impl<W: Write + Send> DeflateEncoder<W> {
                 if output.len() < bound {
                     output
                         .try_reserve(bound - output.len())
-                        .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
+                        .map_err(io::Error::other)?;
                     // SAFETY: We just reserved sufficient capacity. The compressor writes to
                     // the buffer using `MaybeUninit` pointers, so uninitialized memory is fine.
                     unsafe {
@@ -91,7 +91,7 @@ impl<W: Write + Send> DeflateEncoder<W> {
                         writer.write_all(&output[..size])?;
                     }
                 } else {
-                    return Err(io::Error::new(io::ErrorKind::Other, "Compression failed"));
+                    return Err(io::Error::other("Compression failed"));
                 }
             } else {
                 let compressed_chunks: Vec<io::Result<usize>> = chunks
@@ -104,7 +104,7 @@ impl<W: Write + Send> DeflateEncoder<W> {
                         if output.len() < bound {
                             output
                                 .try_reserve(bound - output.len())
-                                .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
+                                .map_err(io::Error::other)?;
                             // SAFETY: We just reserved sufficient capacity. The compressor writes to
                             // the buffer using `MaybeUninit` pointers, so uninitialized memory is fine.
                             unsafe {
@@ -127,7 +127,7 @@ impl<W: Write + Send> DeflateEncoder<W> {
                         if res == CompressResult::Success {
                             Ok(size)
                         } else {
-                            Err(io::Error::new(io::ErrorKind::Other, "Compression failed"))
+                            Err(io::Error::other("Compression failed"))
                         }
                     })
                     .collect();
@@ -153,7 +153,7 @@ impl<W: Write + Send> DeflateEncoder<W> {
             if output.len() < bound {
                 output
                     .try_reserve(bound - output.len())
-                    .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
+                    .map_err(io::Error::other)?;
                 // SAFETY: We just reserved sufficient capacity. The compressor writes to
                 // the buffer using `MaybeUninit` pointers, so uninitialized memory is fine.
                 unsafe {
@@ -178,7 +178,7 @@ impl<W: Write + Send> DeflateEncoder<W> {
                     writer.write_all(&output[..size])?;
                 }
             } else {
-                return Err(io::Error::new(io::ErrorKind::Other, "Compression failed"));
+                return Err(io::Error::other("Compression failed"));
             }
         }
 
@@ -265,8 +265,8 @@ impl<R: Read> Read for DeflateDecoder<R> {
         }
 
         loop {
-            if self.write_pos >= 64 * 1024 {
-                if self.read_pos >= 32 * 1024 {
+            if self.write_pos >= 64 * 1024
+                && self.read_pos >= 32 * 1024 {
                     self.window.copy_within(
                         self.read_pos - 32 * 1024..self.write_pos,
                         32 * 1024 - (self.read_pos - 32 * 1024),
@@ -278,7 +278,6 @@ impl<R: Read> Read for DeflateDecoder<R> {
                     self.write_pos = amount_to_keep;
                     self.read_pos -= shift;
                 }
-            }
 
             let mut output_full = false;
             if self.input_pos < self.input_cap {
@@ -340,7 +339,7 @@ impl<R: Read> Read for DeflateDecoder<R> {
                     if self.input_buffer.len() < 1024 * 1024 {
                         self.input_buffer.resize(self.input_buffer.len() * 2, 0);
                     } else {
-                        return Err(io::Error::new(io::ErrorKind::Other, "input buffer full"));
+                        return Err(io::Error::other("input buffer full"));
                     }
                 }