Skip to content

Commit dfc6f9e

Browse files
Optimize static Huffman table generation (#370)
Precompute static Huffman tables (litlen, offset, match_len) using `std::sync::OnceLock`. This eliminates redundant table generation for every block in Level 0/1 compression, improving throughput for small files. Also refactored `gen_codewords_from_lens` to use stack arrays instead of `Vec` to avoid allocation. Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>
1 parent 97d1cce commit dfc6f9e

1 file changed

Lines changed: 110 additions & 44 deletions

File tree

src/compress/mod.rs

Lines changed: 110 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use rayon::prelude::*;
1111
use std::cmp::min;
1212
use std::io;
1313
use std::mem::MaybeUninit;
14+
use std::sync::OnceLock;
1415

1516
const LENGTH_WRITE_TABLE: [u32; 260] = [
1617
3, 3, 3, 3, 16777220, 33554437, 50331654, 67108871, 83886088, 100663305, 117440522, 134283275,
@@ -127,6 +128,107 @@ pub const MAX_LITLEN_CODEWORD_LEN: usize = 14;
127128
pub const MAX_OFFSET_CODEWORD_LEN: usize = 15;
128129
pub const MAX_PRE_CODEWORD_LEN: usize = 7;
129130

131+
fn gen_codewords_from_lens(lens: &[u8], codewords: &mut [u32], max_len: usize) {
132+
let mut len_counts = [0u32; 16];
133+
for &l in lens {
134+
if l > 0 {
135+
len_counts[l as usize] += 1;
136+
}
137+
}
138+
let mut next_code = [0u32; 16];
139+
let mut code = 0u32;
140+
for len in 1..=max_len {
141+
code = (code + len_counts[len - 1]) << 1;
142+
next_code[len] = code;
143+
}
144+
for i in 0..lens.len() {
145+
if lens[i] > 0 {
146+
let c = next_code[lens[i] as usize];
147+
next_code[lens[i] as usize] += 1;
148+
codewords[i] = (c as u16).reverse_bits() as u32 >> (16 - lens[i]);
149+
}
150+
}
151+
}
152+
153+
struct StaticTables {
154+
litlen_lens: [u8; DEFLATE_NUM_LITLEN_SYMS],
155+
offset_lens: [u8; DEFLATE_NUM_OFFSET_SYMS],
156+
litlen_table: [u64; DEFLATE_NUM_LITLEN_SYMS],
157+
offset_table: [u64; DEFLATE_NUM_OFFSET_SYMS],
158+
match_len_table: [u64; DEFLATE_MAX_MATCH_LEN + 1],
159+
}
160+
161+
fn compute_static_tables() -> StaticTables {
162+
let mut litlen_lens = [0u8; DEFLATE_NUM_LITLEN_SYMS];
163+
let mut offset_lens = [0u8; DEFLATE_NUM_OFFSET_SYMS];
164+
let mut litlen_codewords = [0u32; DEFLATE_NUM_LITLEN_SYMS];
165+
let mut offset_codewords = [0u32; DEFLATE_NUM_OFFSET_SYMS];
166+
let mut litlen_table = [0u64; DEFLATE_NUM_LITLEN_SYMS];
167+
let mut offset_table = [0u64; DEFLATE_NUM_OFFSET_SYMS];
168+
let mut match_len_table = [0u64; DEFLATE_MAX_MATCH_LEN + 1];
169+
170+
let mut i = 0;
171+
while i < 144 {
172+
litlen_lens[i] = 8;
173+
i += 1;
174+
}
175+
while i < 256 {
176+
litlen_lens[i] = 9;
177+
i += 1;
178+
}
179+
while i < 280 {
180+
litlen_lens[i] = 7;
181+
i += 1;
182+
}
183+
while i < 288 {
184+
litlen_lens[i] = 8;
185+
i += 1;
186+
}
187+
for i in 0..32 {
188+
offset_lens[i] = 5;
189+
}
190+
191+
gen_codewords_from_lens(&litlen_lens, &mut litlen_codewords, 9);
192+
gen_codewords_from_lens(&offset_lens, &mut offset_codewords, 5);
193+
194+
for i in 0..DEFLATE_NUM_LITLEN_SYMS {
195+
litlen_table[i] = (litlen_codewords[i] as u64) | ((litlen_lens[i] as u64) << 32);
196+
}
197+
for i in 0..DEFLATE_NUM_OFFSET_SYMS {
198+
let mut entry = (offset_codewords[i] as u64) | ((offset_lens[i] as u64) << 32);
199+
if i < 30 {
200+
// SAFETY: Arrays are static consts of size 30.
201+
entry |= (unsafe { *OFFSET_EXTRA_BITS_TABLE.get_unchecked(i) } as u64) << 40;
202+
entry |= (unsafe { *OFFSET_BASE_TABLE.get_unchecked(i) } as u64) << 48;
203+
}
204+
offset_table[i] = entry;
205+
}
206+
207+
for len in 3..=DEFLATE_MAX_MATCH_LEN {
208+
let len_info = unsafe { *LENGTH_WRITE_TABLE.get_unchecked(len) };
209+
let slot = (len_info >> 24) as usize;
210+
let extra = (len_info >> 16) as u8;
211+
let base = len_info as u16;
212+
213+
let huff_entry = unsafe { *litlen_table.get_unchecked(257 + slot) };
214+
let code = huff_entry as u16;
215+
let huff_len = (huff_entry >> 32) as u8;
216+
217+
match_len_table[len] = (code as u64)
218+
| ((huff_len as u64) << 16)
219+
| ((extra as u64) << 24)
220+
| ((base as u64) << 32);
221+
}
222+
223+
StaticTables {
224+
litlen_lens,
225+
offset_lens,
226+
litlen_table,
227+
offset_table,
228+
match_len_table,
229+
}
230+
}
231+
130232
#[derive(Debug, PartialEq, Eq)]
131233
#[must_use = "Compression result must be checked to ensure data integrity"]
132234
pub enum CompressResult {
@@ -1879,50 +1981,14 @@ impl Compressor {
18791981
}
18801982

18811983
fn load_static_huffman_codes(&mut self) {
1882-
let mut i = 0;
1883-
while i < 144 {
1884-
self.litlen_lens[i] = 8;
1885-
i += 1;
1886-
}
1887-
while i < 256 {
1888-
self.litlen_lens[i] = 9;
1889-
i += 1;
1890-
}
1891-
while i < 280 {
1892-
self.litlen_lens[i] = 7;
1893-
i += 1;
1894-
}
1895-
while i < 288 {
1896-
self.litlen_lens[i] = 8;
1897-
i += 1;
1898-
}
1899-
for i in 0..32 {
1900-
self.offset_lens[i] = 5;
1901-
}
1902-
fn gen_codewords_from_lens(lens: &[u8], codewords: &mut [u32], max_len: usize) {
1903-
let mut len_counts = vec![0u32; max_len + 1];
1904-
for &l in lens {
1905-
if l > 0 {
1906-
len_counts[l as usize] += 1;
1907-
}
1908-
}
1909-
let mut next_code = vec![0u32; max_len + 1];
1910-
let mut code = 0u32;
1911-
for len in 1..=max_len {
1912-
code = (code + len_counts[len - 1]) << 1;
1913-
next_code[len] = code;
1914-
}
1915-
for i in 0..lens.len() {
1916-
if lens[i] > 0 {
1917-
let c = next_code[lens[i] as usize];
1918-
next_code[lens[i] as usize] += 1;
1919-
codewords[i] = (c as u16).reverse_bits() as u32 >> (16 - lens[i]);
1920-
}
1921-
}
1922-
}
1923-
gen_codewords_from_lens(&self.litlen_lens, &mut self.litlen_codewords, 9);
1924-
gen_codewords_from_lens(&self.offset_lens, &mut self.offset_codewords, 5);
1925-
self.update_huffman_tables();
1984+
static STATIC_TABLES: OnceLock<StaticTables> = OnceLock::new();
1985+
let tables = STATIC_TABLES.get_or_init(compute_static_tables);
1986+
1987+
self.litlen_lens.copy_from_slice(&tables.litlen_lens);
1988+
self.offset_lens.copy_from_slice(&tables.offset_lens);
1989+
self.litlen_table.copy_from_slice(&tables.litlen_table);
1990+
self.offset_table.copy_from_slice(&tables.offset_table);
1991+
self.match_len_table.copy_from_slice(&tables.match_len_table);
19261992
}
19271993

19281994
#[inline(always)]

0 commit comments

Comments
 (0)