From 0d8345310f9b755b7dc8b7b2ebb7281d5f47c432 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Mon, 11 Aug 2025 14:50:01 +0200 Subject: [PATCH 01/49] core implementation --- crates/consistent-hashing/Cargo.toml | 17 ++ crates/consistent-hashing/README.md | 60 ++++++ crates/consistent-hashing/src/lib.rs | 292 +++++++++++++++++++++++++++ 3 files changed, 369 insertions(+) create mode 100644 crates/consistent-hashing/Cargo.toml create mode 100644 crates/consistent-hashing/README.md create mode 100644 crates/consistent-hashing/src/lib.rs diff --git a/crates/consistent-hashing/Cargo.toml b/crates/consistent-hashing/Cargo.toml new file mode 100644 index 0000000..53f4e02 --- /dev/null +++ b/crates/consistent-hashing/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "consistent-hashing" +version = "0.1.0" +edition = "2021" +description = "Constant time consistent hashing algorithms." +repository = "https://github.com/github/rust-gems" +license = "MIT" +keywords = ["probabilistic", "algorithm", "consistent hashing", "jump hashing", "rendezvous hashing"] +categories = ["algorithms", "data-structures", "mathematics", "science"] + +[lib] +crate-type = ["lib", "staticlib"] +bench = false + +[dependencies] + +[dev-dependencies] diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md new file mode 100644 index 0000000..0db52d9 --- /dev/null +++ b/crates/consistent-hashing/README.md @@ -0,0 +1,60 @@ +# Consistent Hashing + +Consistent hashing maps keys to a changing set of nodes (shards, servers) so that when nodes join or leave, only a small fraction of keys move. It is used in distributed caches, databases, object stores, and load balancers to achieve scalability and high availability with minimal data reshuffling. + +Common algorithms +- [Consistent hashing](https://en.wikipedia.org/wiki/Consistent_hashing) (hash ring with virtual nodes) +- [Rendezvous hashing](https://en.wikipedia.org/wiki/Rendezvous_hashing) +- [Jump consistent hash](https://en.wikipedia.org/wiki/Jump_consistent_hash) +- [Maglev hashing](https://research.google/pubs/pub44824) +- [AnchorHash: A Scalable Consistent Hash](https://arxiv.org/abs/1812.09674) +- [DXHash](https://arxiv.org/abs/2107.07930) +- [JumpBackHash](https://arxiv.org/abs/2403.18682) + +## Complexity summary + +where `N` is the number of nodes and `R` is the number of replicas. + +| Algorithm | Lookup per key | Node add/remove | Memory | Replication support | +|-------------------------|----------------------|----------------------------------------|---------------------------|--------------------------------------------------| +| Hash ring (with vnodes) | O(log N) binary search over N points; O(1) with specialized structures | O(log N) to insert/remove points | O(N) points | Yes: take next R distinct successors; O(log N + R) | +| Rendezvous | O(N) score per node; top-1 | O(1) (no state to rebalance) | O(N) node list | Yes: pick top R scores; O(N log R) | +| Jump consistent hash | O(log(N)) | O(1) | O(1) | Not native | +| AnchorHash | O(1) expected | O(1) expected/amortized | O(N) | Not native | +| DXHash | O(1) expected | O(1) expected | O(N) | Not native | +| JumpBackHash | O(1) | O(1) expected | O(1) | Not native | + +Replication of keys +- Hash ring: replicate by walking clockwise to the next R distinct nodes. Virtual nodes help spread replicas evenly and avoid hotspots. +- Rendezvous hashing: replicate by selecting the top R nodes by score for the key. This naturally yields R distinct owners and supports weights. +- Jump consistent hash: the base function returns one bucket. Replication can be achieved by hashing (key, replica_index) and collecting R distinct buckets; this is simple but lacks the single-pass global ranking HRW provides. + +Why replication matters +- Tolerates node failures and maintenance without data unavailability. +- Distributes read/write load across multiple owners, reducing hotspots. +- Enables fast recovery and higher tail-latency resilience. + +## N-Choose-R replication + +We define the consistent `n-choose-rk` replication as follows: + +1. for a given number `n` of nodes, choose `k` distinct nodes `S`. +2. for a given `key` the chosen set of nodes must be uniformly chosen from all possible sets of size `k`. +3. when `n` increases by one, exactly one node in the chosen set will be changed with probability `k/(n+1)`. + +For simplicity, nodes are represented by integers `0..n`. +Given `k` independent consistent hash functions `h_i(n)` for a given key, the following algorithm will have the desired properties: + +``` +fn consistent_choose_k(key: Key, k: usize, n: usize) -> Vec { + (0..k).rev().scan(n, |n, k| Some(consistent_choose_next(key, k, n))).collect() +} + +fn consistent_choose_next(key: Key, k: usize, n: usize) -> usize { + (0..k).map(|k| consistent_hash(key, k, n - k) + k).max() +} + +fn consistent_hash(key: Key, k: usize, n: usize) -> usize { + // compute the k-th independent consistent hash for `key` and `n` nodes. +} +``` diff --git a/crates/consistent-hashing/src/lib.rs b/crates/consistent-hashing/src/lib.rs new file mode 100644 index 0000000..c2f9e7a --- /dev/null +++ b/crates/consistent-hashing/src/lib.rs @@ -0,0 +1,292 @@ +use std::hash::{DefaultHasher, Hash, Hasher}; + +/// One building block for the consistent hashing algorithm is a consistent +/// hash iterator which enumerates all the hashes for a given for a specific bucket. +/// A bucket covers the range `(1< Self { + let mut hasher = DefaultHasher::new(); + key.hash(&mut hasher); + bit.hash(&mut hasher); + Self { + hasher, + n, + is_first: true, + bit, + } + } +} + +impl Iterator for BucketIterator { + type Item = usize; + + fn next(&mut self) -> Option { + if self.bit == 0 { + return None; + } + if self.is_first { + let res = self.hasher.finish() % self.bit + self.bit; + if res < self.n as u64 { + self.n = res as usize; + return Some(self.n); + } + self.is_first = false; + } + loop { + 478392.hash(&mut self.hasher); + let res = self.hasher.finish() % (self.bit * 2); + if res & self.bit == 0 { + return None; + } + if res < self.n as u64 { + self.n = res as usize; + return Some(self.n); + } + } + } +} + +/// An iterator which enumerates all the consistent hashes for a given key +/// from largest to smallest in the range `0..n`. +pub struct ConsistentHashRevIterator { + bits: u64, + key: u64, + n: usize, + inner: BucketIterator, +} + +impl ConsistentHashRevIterator { + pub fn new(key: u64, n: usize) -> Self { + let mut hasher = DefaultHasher::new(); + key.hash(&mut hasher); + let bits = hasher.finish() % n.next_power_of_two() as u64; + let inner = BucketIterator::default(); + Self { + bits, + key, + n, + inner, + } + } +} + +impl Iterator for ConsistentHashRevIterator { + type Item = usize; + + fn next(&mut self) -> Option { + if self.n == 0 { + return None; + } + if let Some(res) = self.inner.next() { + return Some(res); + } + while self.bits > 0 { + let bit = 1 << self.bits.ilog2(); + self.bits ^= bit; + self.inner = BucketIterator::new(self.key, self.n, bit); + if let Some(res) = self.inner.next() { + return Some(res); + } + } + self.n = 0; + Some(self.n) + } +} + +/// Same as `ConsistentHashRevIterator`, but iterates from smallest to largest +/// for the range `n..`. +pub struct ConsistentHashIterator { + bits: u64, + key: u64, + n: usize, + stack: Vec, +} + +impl ConsistentHashIterator { + pub fn new(key: u64, n: usize) -> Self { + let mut hasher = DefaultHasher::new(); + key.hash(&mut hasher); + let mut bits = hasher.finish() as u64; + bits &= !((n + 2).next_power_of_two() as u64 / 2 - 1); + let stack = if n == 0 { vec![0] } else { vec![] }; + Self { + bits, + key, + n, + stack, + } + } +} + +impl Iterator for ConsistentHashIterator { + type Item = usize; + + fn next(&mut self) -> Option { + if let Some(res) = self.stack.pop() { + return Some(res); + } + while self.bits > 0 { + let bit = self.bits & !(self.bits - 1); + self.bits &= self.bits - 1; + let inner = BucketIterator::new(self.key, bit as usize * 2, bit); + self.stack = inner.take_while(|x| *x >= self.n).collect(); + if let Some(res) = self.stack.pop() { + return Some(res); + } + } + None + } +} + +/// Wrapper around `ConsistentHashIterator` and `ConsistentHashRevIterator` to compute +/// the next or previous consistent hash for a given key for a given number of nodes `n`. +pub struct ConsistentHasher { + key: u64, +} + +impl ConsistentHasher { + pub fn new(key: u64) -> Self { + Self { key } + } + + pub fn prev(&self, n: usize) -> usize { + let mut sampler = ConsistentHashRevIterator::new(self.key, n); + sampler.next().expect("n must be > 0!") + } + + pub fn next(&self, n: usize) -> usize { + let mut sampler = ConsistentHashIterator::new(self.key, n); + sampler.next().expect("Exceeded iterator bounds :(") + } +} + +/// Implementation of a consistent choose k hashing algorithm. +/// It returns k distinct consistent hashes in the range `0..n`. +/// The hashes are consistent when `n` changes and when `k` changes! +/// I.e. on average exactly `1/(n+1)` (resp. `1/(k+1)`) many hashes will change +/// when `n` (resp. `k`) increases by one. Additionally, the returned `k` tuple +/// is guaranteed to be uniformely chosen from all possible `n-choose-k` tuples. +pub struct ConsistentChooseKHasher { + key: u64, + k: usize, +} + +impl ConsistentChooseKHasher { + pub fn new(key: u64, k: usize) -> Self { + Self { key, k } + } + + // TODO: Implement this as an iterator! + pub fn prev(&self, mut n: usize) -> Vec { + let mut samples = Vec::with_capacity(self.k); + let mut samplers: Vec<_> = (0..self.k) + .map(|i| ConsistentHashRevIterator::new(self.key + 43987492 * i as u64, n - i).peekable()) + .collect(); + for i in (0..self.k).rev() { + let mut max = 0; + for k in 0..=i { + while samplers[k].peek() >= Some(&(n - k)) && n - k > 0 { + samplers[k].next(); + } + max = max.max(samplers[k].peek().unwrap() + k); + } + samples.push(max); + n = max; + } + samples.sort(); + samples + } +} + + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_uniform_1() { + for k in 0..100 { + let sampler = ConsistentHasher::new(k); + for n in 0..1000 { + assert!(sampler.prev(n + 1) <= sampler.prev(n + 2)); + let next = sampler.next(n); + assert_eq!(next, sampler.prev(next + 1)); + } + let mut iter_rev: Vec<_> = ConsistentHashIterator::new(k, 0) + .take_while(|x| *x < 1000) + .collect(); + iter_rev.reverse(); + let iter: Vec<_> = ConsistentHashRevIterator::new(k, 1000).collect(); + assert_eq!(iter, iter_rev); + } + let mut stats = vec![0; 13]; + for i in 0..100000 { + let sampler = ConsistentHasher::new(i); + let x = sampler.prev(stats.len()); + stats[x] += 1; + } + println!("{stats:?}"); + } + + #[test] + fn test_uniform_k() { + const K: usize = 3; + for k in 0..100 { + let sampler = ConsistentChooseKHasher::new(k, K); + for n in K..1000 { + let samples = sampler.prev(n + 1); + assert!(samples.len() == K); + for i in 0..K - 1 { + assert!(samples[i] < samples[i + 1]); + } + let next = sampler.prev(n + 2); + for i in 0..K { + assert!(samples[i] <= next[i]); + } + let mut merged = samples.clone(); + merged.extend(next.clone()); + merged.sort(); + merged.dedup(); + assert!( + merged.len() == K || merged.len() == K + 1, + "Unexpected {samples:?} vs. {next:?}" + ); + } + } + let mut stats = vec![0; 8]; + for i in 0..32 { + let sampler = ConsistentChooseKHasher::new(i + 32783, 2); + let samples = sampler.prev(stats.len()); + for s in samples { + stats[s] += 1; + } + } + println!("{stats:?}"); + // Test consistency when increasing k! + for k in 1..10 { + for n in k + 1..20 { + for key in 0..1000 { + let sampler1 = ConsistentChooseKHasher::new(key, k); + let sampler2 = ConsistentChooseKHasher::new(key, k + 1); + let set1 = sampler1.prev(n); + let set2 = sampler2.prev(n); + assert_eq!(set1.len(), k); + assert_eq!(set2.len(), k + 1); + let mut merged = set1.clone(); + merged.extend(set2); + merged.sort(); + merged.dedup(); + assert_eq!(merged.len(), k + 1); + } + } + } + } +} From 89f8ad42a11a3b3a7ffa3aa5d2cd9b5e093ed9c7 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Tue, 12 Aug 2025 09:42:37 +0200 Subject: [PATCH 02/49] Update README.md --- crates/consistent-hashing/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index 0db52d9..27b6d00 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -43,7 +43,7 @@ We define the consistent `n-choose-rk` replication as follows: 3. when `n` increases by one, exactly one node in the chosen set will be changed with probability `k/(n+1)`. For simplicity, nodes are represented by integers `0..n`. -Given `k` independent consistent hash functions `h_i(n)` for a given key, the following algorithm will have the desired properties: +Given `k` independent consistent hash functions `consistent_hash(key, k, n)` for a given `key`, the following algorithm will have the desired properties: ``` fn consistent_choose_k(key: Key, k: usize, n: usize) -> Vec { From b03f0b70ffe37912c2ca23bff5f56b57e42d9c94 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Tue, 12 Aug 2025 10:23:09 +0200 Subject: [PATCH 03/49] Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- crates/consistent-hashing/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/consistent-hashing/src/lib.rs b/crates/consistent-hashing/src/lib.rs index c2f9e7a..d3bf878 100644 --- a/crates/consistent-hashing/src/lib.rs +++ b/crates/consistent-hashing/src/lib.rs @@ -1,7 +1,7 @@ use std::hash::{DefaultHasher, Hash, Hasher}; /// One building block for the consistent hashing algorithm is a consistent -/// hash iterator which enumerates all the hashes for a given for a specific bucket. +/// hash iterator which enumerates all the hashes for a specific bucket. /// A bucket covers the range `(1< Date: Wed, 13 Aug 2025 10:47:04 +0200 Subject: [PATCH 04/49] finish proof --- crates/consistent-hashing/README.md | 36 +++++++++++++++++++++------- crates/consistent-hashing/src/lib.rs | 14 +++++------ 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index 27b6d00..ac238a0 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -36,25 +36,45 @@ Why replication matters ## N-Choose-R replication -We define the consistent `n-choose-rk` replication as follows: +We define the consistent `n-choose-k` replication as follows: -1. for a given number `n` of nodes, choose `k` distinct nodes `S`. -2. for a given `key` the chosen set of nodes must be uniformly chosen from all possible sets of size `k`. -3. when `n` increases by one, exactly one node in the chosen set will be changed with probability `k/(n+1)`. +1. For a given number `n` of nodes, choose `k` distinct nodes `S`. +2. For a given `key` the chosen set of nodes must be uniformly chosen from all possible sets of size `k`. +3. When `n` increases by one, exactly one node in the chosen set will be changed. +4. and the node will be changed with probability `k/(n+1)`. For simplicity, nodes are represented by integers `0..n`. Given `k` independent consistent hash functions `consistent_hash(key, k, n)` for a given `key`, the following algorithm will have the desired properties: ``` fn consistent_choose_k(key: Key, k: usize, n: usize) -> Vec { - (0..k).rev().scan(n, |n, k| Some(consistent_choose_next(key, k, n))).collect() + (0..k).rev().scan(n, |n, k| Some(consistent_choose_max(key, k + 1, n))).collect() } -fn consistent_choose_next(key: Key, k: usize, n: usize) -> usize { +fn consistent_choose_max(key: Key, k: usize, n: usize) -> usize { (0..k).map(|k| consistent_hash(key, k, n - k) + k).max() } -fn consistent_hash(key: Key, k: usize, n: usize) -> usize { - // compute the k-th independent consistent hash for `key` and `n` nodes. +fn consistent_hash(key: Key, i: usize, n: usize) -> usize { + // compute the i-th independent consistent hash for `key` and `n` nodes. } ``` + +Let's define `M(k,n) = consistent_choose_max(_, k, n)` and `S(k, n) := consistent_choose_k(_, k, n)` as short-cuts for some arbitrary fixed `key`. + +Since `M(k, n) < n` and `S(k, n) = {M(k, n)} ∪ S(k - 1, M(k, n))` for `k > 1`, `S(k, n)` constructs a strictly monotonically decreasing sequence. The sequence outputs exactly `k` elements which therefore must all be distinct which proves property 1 for `k <= n`. + +Properties 2, 3, and 4 can be proven via induction as follows. + +`k = 1`: We expect that `consistent_hash` returns a single uniformly distributed node index which is consistent in `n`, i.e. changes the hash value with probability `1/(n+1)`, when `n` increments by one. In our implementation, we use an `O(1)` implementation of the jump-hash algorithm. For `k=1`, `consistent_choose_k(key, 1, n)` becomes a single function call to `consistent_choose_max(key, 1, n)` which in turn calls `consistent_hash(key, 0, n)`. I.e. `consistent_choose_k` inherits the all the desired properties from `consistent_hash` for `k=1` and all `n>=1`. + +`k -> k+1`: `M(k+1, n+1) = M(k+1, n)` iff `M(k, n+1) < n` and `consistent_hash(_, k, n+1-k) < n - k`. The probability for this is `(n+1-k)/(n+1)` for the former by induction and `(n-k)/(n+1-k)` by the assumption that `consistent_hash` is a proper consistent hash function. Since both these probabilities are assumed to be independent, the probability that our initial value changes is `1 - (n+1-k)/(n+1) * (n-k)/(n+1-k) = 1 - (n-k)/(n+1) = (k+1)/(n+1)` proving property 4. + +Property 3 is trivially satisfied if `S(k+1, n+1) = S(k+1, n)`. So, we focus on the case where `S(k+1, n+1) != S(k+1, n)`, which implies that `n ∈ S(k+1, n+1)` as largest element. +We know that `S(k+1, n) = {m} ∪ S(k, m)` for some `m` by definition and `S(k, n) = S(k, u) ∖ {v} ∪ {w}` by induction for some `u`, `v`, and `w`. Thus far we have `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, u) ∖ {v} ∪ {w}`. + +If `u == m`, then `S(k+1, n) = {m} ∪ S(k, m) ∖ {v} ∪ {w}` and `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, m) ∖ {v} ∪ {w}` and the two differ exaclty in the elemetns `m` and `n` proving property 3. + +If `u != m`, then `consistent_hash(_, k, n) = m`, since that's the only way how the largest values in `S(k+1, n)` and `S(k, n)` can differ. In this case, `m ∉ S(k+1, n+1)`, since `n` (and not `m`) is the largest element of `S(k+1, n+1)`. Furthermore, `S(k, n) = S(k, m)`, since `consistent_hash(_, i, n) < m` for all `i < k` (otherwise there is a contradiction). +Putting it together leads to `S(k+1, n+1) = {n} ∪ S(k, m)` and `S(k+1, n) = {m} ∪ S(k, m)` which differ exactly in the elements `n` and `m` which concludes the proof. + diff --git a/crates/consistent-hashing/src/lib.rs b/crates/consistent-hashing/src/lib.rs index c2f9e7a..8ae98ec 100644 --- a/crates/consistent-hashing/src/lib.rs +++ b/crates/consistent-hashing/src/lib.rs @@ -157,14 +157,14 @@ impl ConsistentHasher { Self { key } } - pub fn prev(&self, n: usize) -> usize { + pub fn prev(&self, n: usize) -> Option { let mut sampler = ConsistentHashRevIterator::new(self.key, n); - sampler.next().expect("n must be > 0!") + sampler.next() } - pub fn next(&self, n: usize) -> usize { + pub fn next(&self, n: usize) -> Option { let mut sampler = ConsistentHashIterator::new(self.key, n); - sampler.next().expect("Exceeded iterator bounds :(") + sampler.next() } } @@ -217,8 +217,8 @@ mod tests { let sampler = ConsistentHasher::new(k); for n in 0..1000 { assert!(sampler.prev(n + 1) <= sampler.prev(n + 2)); - let next = sampler.next(n); - assert_eq!(next, sampler.prev(next + 1)); + let next = sampler.next(n).unwrap(); + assert_eq!(next, sampler.prev(next + 1).unwrap()); } let mut iter_rev: Vec<_> = ConsistentHashIterator::new(k, 0) .take_while(|x| *x < 1000) @@ -230,7 +230,7 @@ mod tests { let mut stats = vec![0; 13]; for i in 0..100000 { let sampler = ConsistentHasher::new(i); - let x = sampler.prev(stats.len()); + let x = sampler.prev(stats.len()).unwrap(); stats[x] += 1; } println!("{stats:?}"); From a5eb91eeb68300a15a945a90d1499a7ffbe1a7e6 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Wed, 13 Aug 2025 11:24:22 +0200 Subject: [PATCH 05/49] Update README.md --- crates/consistent-hashing/README.md | 58 +++++++++++++++++++---------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index ac238a0..b0bacf3 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -15,36 +15,33 @@ Common algorithms where `N` is the number of nodes and `R` is the number of replicas. -| Algorithm | Lookup per key | Node add/remove | Memory | Replication support | -|-------------------------|----------------------|----------------------------------------|---------------------------|--------------------------------------------------| -| Hash ring (with vnodes) | O(log N) binary search over N points; O(1) with specialized structures | O(log N) to insert/remove points | O(N) points | Yes: take next R distinct successors; O(log N + R) | -| Rendezvous | O(N) score per node; top-1 | O(1) (no state to rebalance) | O(N) node list | Yes: pick top R scores; O(N log R) | -| Jump consistent hash | O(log(N)) | O(1) | O(1) | Not native | -| AnchorHash | O(1) expected | O(1) expected/amortized | O(N) | Not native | -| DXHash | O(1) expected | O(1) expected | O(N) | Not native | -| JumpBackHash | O(1) | O(1) expected | O(1) | Not native | +| Algorithm | Lookup per key | Node add/remove | Memory | Lookup with replication | +| | (no replication) | | | | +|-------------------------|---------------------|----------------------------------------|---------------------------|-------------------------------------| +| Hash ring (with vnodes) | O(log N): binary search over N points; O(1): with specialized structures | O(log N) | O(N) | O(log N + R): Take next R distinct successors | +| Rendezvous | O(N): max score | O(1) | O(N) node list | O(N log R): pick top R scores | +| Jump consistent hash | O(log(N)) expected | 0 | O(1) | Not native | +| AnchorHash | O(1) expected | O(1)? | O(N)? | Not native | +| DXHash | O(1) expected | O(1)? | O(N)? | Not native | +| JumpBackHash | O(1) expected | 0 | O(1) | Not native | +| $ConsistentChooseK$ | $O(1) expected$ | $0$ | $O(1)$ | $O(R^2)$; $O(R log(R))$: using heap | Replication of keys -- Hash ring: replicate by walking clockwise to the next R distinct nodes. Virtual nodes help spread replicas evenly and avoid hotspots. +- Hash ring: replicate by walking clockwise to the next R distinct nodes. Virtual nodes help spread replicas more evenly. Replicas are not independently distributed. - Rendezvous hashing: replicate by selecting the top R nodes by score for the key. This naturally yields R distinct owners and supports weights. -- Jump consistent hash: the base function returns one bucket. Replication can be achieved by hashing (key, replica_index) and collecting R distinct buckets; this is simple but lacks the single-pass global ranking HRW provides. +- Jump consistent hash and variatns: the base function returns one bucket. Replication can be achieved by hashing (key, replica_index) and collecting R distinct buckets; this is simple but loses the consistency property! +- ConsistentChooseK: Faster and more memory efficient than all other solutions. Why replication matters - Tolerates node failures and maintenance without data unavailability. - Distributes read/write load across multiple owners, reducing hotspots. - Enables fast recovery and higher tail-latency resilience. -## N-Choose-R replication - -We define the consistent `n-choose-k` replication as follows: - -1. For a given number `n` of nodes, choose `k` distinct nodes `S`. -2. For a given `key` the chosen set of nodes must be uniformly chosen from all possible sets of size `k`. -3. When `n` increases by one, exactly one node in the chosen set will be changed. -4. and the node will be changed with probability `k/(n+1)`. +## ConsistentChooseK algorithm -For simplicity, nodes are represented by integers `0..n`. -Given `k` independent consistent hash functions `consistent_hash(key, k, n)` for a given `key`, the following algorithm will have the desired properties: +The following functions summarize the core algorithmic innovation as a minimal Rust excerpt. +`n` is the number of nodes and `k` is the number of desired replica. +The chosen nodes are returned as distinct integers in the range `0..n`. ``` fn consistent_choose_k(key: Key, k: usize, n: usize) -> Vec { @@ -60,7 +57,28 @@ fn consistent_hash(key: Key, i: usize, n: usize) -> usize { } ``` +`consistent_choose_k` makes `k` calls to `consistent_choose_max` which calls `consistent_hash` another `k` times. +In total, `consistent_hash` is called `k * (k+1) / 2` Utilizing a `O(1)` solution for `consistent_hash` leads to a `O(k^2)` runtime. +This runtime can be further improved by replacing the max operation with a heap where popped elements are updated according to the new arguments `n` and `k`. +With this optimization, the complexity reduces to `O(k log k)`. +With some probabilistic bucketing strategy, it should be possible to reduce the expected runtime to `O(k)`. +For small `k` neither optimization is probably improving the actual performance though. + +The next section proves why this simple code works. + +## N-Choose-R replication + +We define the consistent `n-choose-k` replication as follows: + +1. For a given number `n` of nodes, choose `k` distinct nodes `S`. +2. For a given `key` the chosen set of nodes must be uniformly chosen from all possible sets of size `k`. +3. When `n` increases by one, exactly one node in the chosen set will be changed. +4. and the node will be changed with probability `k/(n+1)`. + +In the remainder of this section we prove that the `consistent_choose_k` algorithm satisfies those properties. + Let's define `M(k,n) = consistent_choose_max(_, k, n)` and `S(k, n) := consistent_choose_k(_, k, n)` as short-cuts for some arbitrary fixed `key`. +We assume that `consistent_hash(key, k, n)` computes `k` independent consistent hash functions. Since `M(k, n) < n` and `S(k, n) = {M(k, n)} ∪ S(k - 1, M(k, n))` for `k > 1`, `S(k, n)` constructs a strictly monotonically decreasing sequence. The sequence outputs exactly `k` elements which therefore must all be distinct which proves property 1 for `k <= n`. From 220624d716d15e06ba451eaa9f7178e08537fef6 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Wed, 13 Aug 2025 11:26:35 +0200 Subject: [PATCH 06/49] Update README.md --- crates/consistent-hashing/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index b0bacf3..c9b7484 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -24,7 +24,7 @@ where `N` is the number of nodes and `R` is the number of replicas. | AnchorHash | O(1) expected | O(1)? | O(N)? | Not native | | DXHash | O(1) expected | O(1)? | O(N)? | Not native | | JumpBackHash | O(1) expected | 0 | O(1) | Not native | -| $ConsistentChooseK$ | $O(1) expected$ | $0$ | $O(1)$ | $O(R^2)$; $O(R log(R))$: using heap | +| $$ConsistentChooseK$$ | $$O(1) expected$$ | $$0$$ | $$O(1)$$ | $$O(R^2)$$; $$O(R log(R))$$: using heap | Replication of keys - Hash ring: replicate by walking clockwise to the next R distinct nodes. Virtual nodes help spread replicas more evenly. Replicas are not independently distributed. From fc69a9eaab94cb9f5cb20fb9bb63dfec73857498 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Wed, 13 Aug 2025 11:33:25 +0200 Subject: [PATCH 07/49] Update README.md --- crates/consistent-hashing/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index c9b7484..27e5c2f 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -5,7 +5,7 @@ Consistent hashing maps keys to a changing set of nodes (shards, servers) so tha Common algorithms - [Consistent hashing](https://en.wikipedia.org/wiki/Consistent_hashing) (hash ring with virtual nodes) - [Rendezvous hashing](https://en.wikipedia.org/wiki/Rendezvous_hashing) -- [Jump consistent hash](https://en.wikipedia.org/wiki/Jump_consistent_hash) +- [Jump consistent hash](https://arxiv.org/pdf/1406.2294) - [Maglev hashing](https://research.google/pubs/pub44824) - [AnchorHash: A Scalable Consistent Hash](https://arxiv.org/abs/1812.09674) - [DXHash](https://arxiv.org/abs/2107.07930) @@ -24,7 +24,7 @@ where `N` is the number of nodes and `R` is the number of replicas. | AnchorHash | O(1) expected | O(1)? | O(N)? | Not native | | DXHash | O(1) expected | O(1)? | O(N)? | Not native | | JumpBackHash | O(1) expected | 0 | O(1) | Not native | -| $$ConsistentChooseK$$ | $$O(1) expected$$ | $$0$$ | $$O(1)$$ | $$O(R^2)$$; $$O(R log(R))$$: using heap | +| **ConsistentChooseK** | **O(1) expected** | **0** | **O(1)** | **O(R^2)**; **O(R log(R))**: using heap | Replication of keys - Hash ring: replicate by walking clockwise to the next R distinct nodes. Virtual nodes help spread replicas more evenly. Replicas are not independently distributed. @@ -64,7 +64,7 @@ With this optimization, the complexity reduces to `O(k log k)`. With some probabilistic bucketing strategy, it should be possible to reduce the expected runtime to `O(k)`. For small `k` neither optimization is probably improving the actual performance though. -The next section proves why this simple code works. +The next section proves the correctness of this algorithm. ## N-Choose-R replication @@ -91,8 +91,8 @@ Properties 2, 3, and 4 can be proven via induction as follows. Property 3 is trivially satisfied if `S(k+1, n+1) = S(k+1, n)`. So, we focus on the case where `S(k+1, n+1) != S(k+1, n)`, which implies that `n ∈ S(k+1, n+1)` as largest element. We know that `S(k+1, n) = {m} ∪ S(k, m)` for some `m` by definition and `S(k, n) = S(k, u) ∖ {v} ∪ {w}` by induction for some `u`, `v`, and `w`. Thus far we have `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, u) ∖ {v} ∪ {w}`. -If `u == m`, then `S(k+1, n) = {m} ∪ S(k, m) ∖ {v} ∪ {w}` and `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, m) ∖ {v} ∪ {w}` and the two differ exaclty in the elemetns `m` and `n` proving property 3. +If `u = m`, then `S(k+1, n) = {m} ∪ S(k, m) ∖ {v} ∪ {w}` and `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, m) ∖ {v} ∪ {w}` and the two differ exaclty in the elemetns `m` and `n` proving property 3. -If `u != m`, then `consistent_hash(_, k, n) = m`, since that's the only way how the largest values in `S(k+1, n)` and `S(k, n)` can differ. In this case, `m ∉ S(k+1, n+1)`, since `n` (and not `m`) is the largest element of `S(k+1, n+1)`. Furthermore, `S(k, n) = S(k, m)`, since `consistent_hash(_, i, n) < m` for all `i < k` (otherwise there is a contradiction). +If `u ≠ m`, then `consistent_hash(_, k, n) = m`, since that's the only way how the largest values in `S(k+1, n)` and `S(k, n)` can differ. In this case, `m ∉ S(k+1, n+1)`, since `n` (and not `m`) is the largest element of `S(k+1, n+1)`. Furthermore, `S(k, n) = S(k, m)`, since `consistent_hash(_, i, n) < m` for all `i < k` (otherwise there is a contradiction). Putting it together leads to `S(k+1, n+1) = {n} ∪ S(k, m)` and `S(k+1, n) = {m} ∪ S(k, m)` which differ exactly in the elements `n` and `m` which concludes the proof. From 90259e92e127907a670cb36bcaafe541cf7e42cf Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Wed, 13 Aug 2025 11:45:18 +0200 Subject: [PATCH 08/49] Update README.md --- crates/consistent-hashing/README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index 27e5c2f..bcd137a 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -20,16 +20,17 @@ where `N` is the number of nodes and `R` is the number of replicas. |-------------------------|---------------------|----------------------------------------|---------------------------|-------------------------------------| | Hash ring (with vnodes) | O(log N): binary search over N points; O(1): with specialized structures | O(log N) | O(N) | O(log N + R): Take next R distinct successors | | Rendezvous | O(N): max score | O(1) | O(N) node list | O(N log R): pick top R scores | -| Jump consistent hash | O(log(N)) expected | 0 | O(1) | Not native | -| AnchorHash | O(1) expected | O(1)? | O(N)? | Not native | -| DXHash | O(1) expected | O(1)? | O(N)? | Not native | +| Jump consistent hash | O(log(N)) expected | 0 | O(1) | O(R log N) | +| AnchorHash | O(1) expected | O(1) | O(N) | Not native | +| DXHash | O(1) expected | O(1) | O(N) | Not native | | JumpBackHash | O(1) expected | 0 | O(1) | Not native | | **ConsistentChooseK** | **O(1) expected** | **0** | **O(1)** | **O(R^2)**; **O(R log(R))**: using heap | Replication of keys - Hash ring: replicate by walking clockwise to the next R distinct nodes. Virtual nodes help spread replicas more evenly. Replicas are not independently distributed. - Rendezvous hashing: replicate by selecting the top R nodes by score for the key. This naturally yields R distinct owners and supports weights. -- Jump consistent hash and variatns: the base function returns one bucket. Replication can be achieved by hashing (key, replica_index) and collecting R distinct buckets; this is simple but loses the consistency property! +- Jump consistent hash: the base function doesn't support replication. But the math can be easily modified to support consistent replication. +- JumpBackHash and variants: The trick of Jump consistent hash to support replication won't work here due to the additional state introduced. - ConsistentChooseK: Faster and more memory efficient than all other solutions. Why replication matters From 8480ea3ed645c6944be006cf91fb6d8c50530cc0 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Wed, 13 Aug 2025 15:55:20 +0200 Subject: [PATCH 09/49] Replace key with hasher traits --- crates/consistent-hashing/README.md | 7 +- crates/consistent-hashing/src/lib.rs | 190 +++++++++++++++++---------- 2 files changed, 127 insertions(+), 70 deletions(-) diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index bcd137a..4f29c56 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -67,7 +67,7 @@ For small `k` neither optimization is probably improving the actual performance The next section proves the correctness of this algorithm. -## N-Choose-R replication +## N-Choose-K replication We define the consistent `n-choose-k` replication as follows: @@ -87,7 +87,7 @@ Properties 2, 3, and 4 can be proven via induction as follows. `k = 1`: We expect that `consistent_hash` returns a single uniformly distributed node index which is consistent in `n`, i.e. changes the hash value with probability `1/(n+1)`, when `n` increments by one. In our implementation, we use an `O(1)` implementation of the jump-hash algorithm. For `k=1`, `consistent_choose_k(key, 1, n)` becomes a single function call to `consistent_choose_max(key, 1, n)` which in turn calls `consistent_hash(key, 0, n)`. I.e. `consistent_choose_k` inherits the all the desired properties from `consistent_hash` for `k=1` and all `n>=1`. -`k -> k+1`: `M(k+1, n+1) = M(k+1, n)` iff `M(k, n+1) < n` and `consistent_hash(_, k, n+1-k) < n - k`. The probability for this is `(n+1-k)/(n+1)` for the former by induction and `(n-k)/(n+1-k)` by the assumption that `consistent_hash` is a proper consistent hash function. Since both these probabilities are assumed to be independent, the probability that our initial value changes is `1 - (n+1-k)/(n+1) * (n-k)/(n+1-k) = 1 - (n-k)/(n+1) = (k+1)/(n+1)` proving property 4. +`k → k+1`: `M(k+1, n+1) = M(k+1, n)` iff `M(k, n+1) < n` and `consistent_hash(_, k, n+1-k) < n - k`. The probability for this is `(n+1-k)/(n+1)` for the former by induction and `(n-k)/(n+1-k)` by the assumption that `consistent_hash` is a proper consistent hash function. Since both these probabilities are assumed to be independent, the probability that our initial value changes is `1 - (n+1-k)/(n+1) * (n-k)/(n+1-k) = 1 - (n-k)/(n+1) = (k+1)/(n+1)` proving property 4. Property 3 is trivially satisfied if `S(k+1, n+1) = S(k+1, n)`. So, we focus on the case where `S(k+1, n+1) != S(k+1, n)`, which implies that `n ∈ S(k+1, n+1)` as largest element. We know that `S(k+1, n) = {m} ∪ S(k, m)` for some `m` by definition and `S(k, n) = S(k, u) ∖ {v} ∪ {w}` by induction for some `u`, `v`, and `w`. Thus far we have `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, u) ∖ {v} ∪ {w}`. @@ -95,5 +95,4 @@ We know that `S(k+1, n) = {m} ∪ S(k, m)` for some `m` by definition and `S(k, If `u = m`, then `S(k+1, n) = {m} ∪ S(k, m) ∖ {v} ∪ {w}` and `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, m) ∖ {v} ∪ {w}` and the two differ exaclty in the elemetns `m` and `n` proving property 3. If `u ≠ m`, then `consistent_hash(_, k, n) = m`, since that's the only way how the largest values in `S(k+1, n)` and `S(k, n)` can differ. In this case, `m ∉ S(k+1, n+1)`, since `n` (and not `m`) is the largest element of `S(k+1, n+1)`. Furthermore, `S(k, n) = S(k, m)`, since `consistent_hash(_, i, n) < m` for all `i < k` (otherwise there is a contradiction). -Putting it together leads to `S(k+1, n+1) = {n} ∪ S(k, m)` and `S(k+1, n) = {m} ∪ S(k, m)` which differ exactly in the elements `n` and `m` which concludes the proof. - +Putting it together leads to `S(k+1, n+1) = {n} ∪ S(k, m)` and `S(k+1, n) = {m} ∪ S(k, m)` which differ exactly in the elements `n` and `m` which concludes the proof. \ No newline at end of file diff --git a/crates/consistent-hashing/src/lib.rs b/crates/consistent-hashing/src/lib.rs index 647215c..dd7675c 100644 --- a/crates/consistent-hashing/src/lib.rs +++ b/crates/consistent-hashing/src/lib.rs @@ -1,21 +1,76 @@ -use std::hash::{DefaultHasher, Hash, Hasher}; +use std::hash::{Hash, Hasher}; + +/// A trait which behaves like a pseudo-random number generator. +/// It is used to generate consistent hashes within one bucket. +/// Note: the hasher must have been seeded with the key during construction. +pub trait HashSequence { + fn next(&mut self) -> u64; +} + +/// A trait for building a special bit mask and sequences of hashes for different bit positions. +/// Note: the hasher must have been seeded with the key during construction. +pub trait HashSeqBuilder { + type Seq: HashSequence; + + fn bit_mask(&self) -> u64; + /// Return a HashSequence instance which is seeded with the given bit position + /// and the seed of this builder. + fn hash_seq(&self, bit: u64) -> Self::Seq; +} + +/// A trait for building multiple independent hash builders +/// Note: the hasher must have been seeded with the key during construction. +pub trait ManySeqBuilder { + type Builder: HashSeqBuilder; + + /// Returns the i-th independent hash builder. + fn seq_builder(&self, i: usize) -> Self::Builder; +} + +impl HashSequence for H { + fn next(&mut self) -> u64 { + 54387634019u64.hash(self); + self.finish() + } +} + +impl HashSeqBuilder for H { + type Seq = H; + + fn bit_mask(&self) -> u64 { + self.finish() + } + + fn hash_seq(&self, bit: u64) -> Self::Seq { + let mut hasher = self.clone(); + bit.hash(&mut hasher); + hasher + } +} + +impl ManySeqBuilder for H { + type Builder = H; + + fn seq_builder(&self, i: usize) -> Self::Builder { + let mut hasher = self.clone(); + i.hash(&mut hasher); + hasher + } +} /// One building block for the consistent hashing algorithm is a consistent /// hash iterator which enumerates all the hashes for a specific bucket. /// A bucket covers the range `(1< { + hasher: H, n: usize, is_first: bool, - bit: u64, + bit: u64, // A bitmask with a single bit set. } -impl BucketIterator { - fn new(key: u64, n: usize, bit: u64) -> Self { - let mut hasher = DefaultHasher::new(); - key.hash(&mut hasher); - bit.hash(&mut hasher); +impl BucketIterator { + fn new(n: usize, bit: u64, hasher: H) -> Self { Self { hasher, n, @@ -25,7 +80,7 @@ impl BucketIterator { } } -impl Iterator for BucketIterator { +impl Iterator for BucketIterator { type Item = usize; fn next(&mut self) -> Option { @@ -33,16 +88,15 @@ impl Iterator for BucketIterator { return None; } if self.is_first { - let res = self.hasher.finish() % self.bit + self.bit; + let res = (self.hasher.next() & (self.bit - 1)) + self.bit; + self.is_first = false; if res < self.n as u64 { self.n = res as usize; return Some(self.n); } - self.is_first = false; } loop { - 478392.hash(&mut self.hasher); - let res = self.hasher.finish() % (self.bit * 2); + let res = self.hasher.next() & (self.bit * 2 - 1); if res & self.bit == 0 { return None; } @@ -56,77 +110,70 @@ impl Iterator for BucketIterator { /// An iterator which enumerates all the consistent hashes for a given key /// from largest to smallest in the range `0..n`. -pub struct ConsistentHashRevIterator { +pub struct ConsistentHashRevIterator { + builder: H, bits: u64, - key: u64, n: usize, - inner: BucketIterator, + inner: Option>, } -impl ConsistentHashRevIterator { - pub fn new(key: u64, n: usize) -> Self { - let mut hasher = DefaultHasher::new(); - key.hash(&mut hasher); - let bits = hasher.finish() % n.next_power_of_two() as u64; - let inner = BucketIterator::default(); +impl ConsistentHashRevIterator { + pub fn new(n: usize, builder: H) -> Self { Self { - bits, - key, + bits: builder.bit_mask() & (n.next_power_of_two() as u64 - 1), + builder, n, - inner, + inner: None, } } } -impl Iterator for ConsistentHashRevIterator { +impl Iterator for ConsistentHashRevIterator { type Item = usize; fn next(&mut self) -> Option { if self.n == 0 { return None; } - if let Some(res) = self.inner.next() { + if let Some(res) = self.inner.as_mut().and_then(|inner| inner.next()) { return Some(res); } while self.bits > 0 { let bit = 1 << self.bits.ilog2(); self.bits ^= bit; - self.inner = BucketIterator::new(self.key, self.n, bit); - if let Some(res) = self.inner.next() { + let seq = self.builder.hash_seq(bit); + let mut iter = BucketIterator::new(self.n, bit, seq); + if let Some(res) = iter.next() { + self.inner = Some(iter); return Some(res); } } self.n = 0; - Some(self.n) + Some(0) } } /// Same as `ConsistentHashRevIterator`, but iterates from smallest to largest /// for the range `n..`. -pub struct ConsistentHashIterator { +pub struct ConsistentHashIterator { bits: u64, - key: u64, n: usize, + builder: H, stack: Vec, } -impl ConsistentHashIterator { - pub fn new(key: u64, n: usize) -> Self { - let mut hasher = DefaultHasher::new(); - key.hash(&mut hasher); - let mut bits = hasher.finish() as u64; - bits &= !((n + 2).next_power_of_two() as u64 / 2 - 1); - let stack = if n == 0 { vec![0] } else { vec![] }; +impl ConsistentHashIterator { + pub fn new(n: usize, builder: H) -> Self { Self { - bits, - key, + bits: builder.bit_mask() & !((n + 2).next_power_of_two() as u64 / 2 - 1), + stack: if n == 0 { vec![0] } else { vec![] }, + builder, n, - stack, } } } -impl Iterator for ConsistentHashIterator { +impl Iterator for ConsistentHashIterator { type Item = usize; fn next(&mut self) -> Option { @@ -136,7 +183,7 @@ impl Iterator for ConsistentHashIterator { while self.bits > 0 { let bit = self.bits & !(self.bits - 1); self.bits &= self.bits - 1; - let inner = BucketIterator::new(self.key, bit as usize * 2, bit); + let inner = BucketIterator::new(bit as usize * 2, bit, self.builder.hash_seq(bit)); self.stack = inner.take_while(|x| *x >= self.n).collect(); if let Some(res) = self.stack.pop() { return Some(res); @@ -148,22 +195,22 @@ impl Iterator for ConsistentHashIterator { /// Wrapper around `ConsistentHashIterator` and `ConsistentHashRevIterator` to compute /// the next or previous consistent hash for a given key for a given number of nodes `n`. -pub struct ConsistentHasher { - key: u64, +pub struct ConsistentHasher { + builder: H, } -impl ConsistentHasher { - pub fn new(key: u64) -> Self { - Self { key } +impl ConsistentHasher { + pub fn new(builder: H) -> Self { + Self { builder } } pub fn prev(&self, n: usize) -> Option { - let mut sampler = ConsistentHashRevIterator::new(self.key, n); + let mut sampler = ConsistentHashRevIterator::new(n, self.builder.clone()); sampler.next() } pub fn next(&self, n: usize) -> Option { - let mut sampler = ConsistentHashIterator::new(self.key, n); + let mut sampler = ConsistentHashIterator::new(n, self.builder.clone()); sampler.next() } } @@ -174,21 +221,21 @@ impl ConsistentHasher { /// I.e. on average exactly `1/(n+1)` (resp. `1/(k+1)`) many hashes will change /// when `n` (resp. `k`) increases by one. Additionally, the returned `k` tuple /// is guaranteed to be uniformely chosen from all possible `n-choose-k` tuples. -pub struct ConsistentChooseKHasher { - key: u64, +pub struct ConsistentChooseKHasher { + builder: H, k: usize, } -impl ConsistentChooseKHasher { - pub fn new(key: u64, k: usize) -> Self { - Self { key, k } +impl ConsistentChooseKHasher { + pub fn new(builder: H, k: usize) -> Self { + Self { builder, k } } // TODO: Implement this as an iterator! pub fn prev(&self, mut n: usize) -> Vec { let mut samples = Vec::with_capacity(self.k); let mut samplers: Vec<_> = (0..self.k) - .map(|i| ConsistentHashRevIterator::new(self.key + 43987492 * i as u64, n - i).peekable()) + .map(|i| ConsistentHashRevIterator::new(n - i, self.builder.seq_builder(i)).peekable()) .collect(); for i in (0..self.k).rev() { let mut max = 0; @@ -211,25 +258,33 @@ impl ConsistentChooseKHasher { mod tests { use super::*; + fn hasher_for_key(key: u64) -> DefaultHasher { + let mut hasher = DefaultHasher::default(); + key.hash(&mut hasher); + hasher + } + #[test] fn test_uniform_1() { for k in 0..100 { - let sampler = ConsistentHasher::new(k); + let hasher = hasher_for_key(k); + let sampler = ConsistentHasher::new(hasher.clone()); for n in 0..1000 { assert!(sampler.prev(n + 1) <= sampler.prev(n + 2)); let next = sampler.next(n).unwrap(); assert_eq!(next, sampler.prev(next + 1).unwrap()); } - let mut iter_rev: Vec<_> = ConsistentHashIterator::new(k, 0) + let mut iter_rev: Vec<_> = ConsistentHashIterator::new(0, hasher.clone()) .take_while(|x| *x < 1000) .collect(); iter_rev.reverse(); - let iter: Vec<_> = ConsistentHashRevIterator::new(k, 1000).collect(); + let iter: Vec<_> = ConsistentHashRevIterator::new(1000, hasher).collect(); assert_eq!(iter, iter_rev); } let mut stats = vec![0; 13]; for i in 0..100000 { - let sampler = ConsistentHasher::new(i); + let hasher = hasher_for_key(i); + let sampler = ConsistentHasher::new(hasher); let x = sampler.prev(stats.len()).unwrap(); stats[x] += 1; } @@ -240,7 +295,8 @@ mod tests { fn test_uniform_k() { const K: usize = 3; for k in 0..100 { - let sampler = ConsistentChooseKHasher::new(k, K); + let hasher = hasher_for_key(k); + let sampler = ConsistentChooseKHasher::new(hasher, K); for n in K..1000 { let samples = sampler.prev(n + 1); assert!(samples.len() == K); @@ -263,7 +319,8 @@ mod tests { } let mut stats = vec![0; 8]; for i in 0..32 { - let sampler = ConsistentChooseKHasher::new(i + 32783, 2); + let hasher = hasher_for_key(i + 32783); + let sampler = ConsistentChooseKHasher::new(hasher, 2); let samples = sampler.prev(stats.len()); for s in samples { stats[s] += 1; @@ -274,8 +331,9 @@ mod tests { for k in 1..10 { for n in k + 1..20 { for key in 0..1000 { - let sampler1 = ConsistentChooseKHasher::new(key, k); - let sampler2 = ConsistentChooseKHasher::new(key, k + 1); + let hasher = hasher_for_key(key); + let sampler1 = ConsistentChooseKHasher::new(hasher.clone(), k); + let sampler2 = ConsistentChooseKHasher::new(hasher, k + 1); let set1 = sampler1.prev(n); let set2 = sampler2.prev(n); assert_eq!(set1.len(), k); From 0baaafc83b2b58f5023aea9e814170f01c5cc091 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Wed, 13 Aug 2025 15:55:34 +0200 Subject: [PATCH 10/49] Update lib.rs --- crates/consistent-hashing/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/consistent-hashing/src/lib.rs b/crates/consistent-hashing/src/lib.rs index dd7675c..b7dc434 100644 --- a/crates/consistent-hashing/src/lib.rs +++ b/crates/consistent-hashing/src/lib.rs @@ -256,6 +256,8 @@ impl ConsistentChooseKHasher { #[cfg(test)] mod tests { + use std::hash::DefaultHasher; + use super::*; fn hasher_for_key(key: u64) -> DefaultHasher { From 0dcb137a20015a825ca4f5d833fb3af737c3a7aa Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Wed, 13 Aug 2025 17:51:45 +0200 Subject: [PATCH 11/49] Update README.md --- crates/consistent-hashing/README.md | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index 4f29c56..b15224d 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -81,18 +81,37 @@ In the remainder of this section we prove that the `consistent_choose_k` algorit Let's define `M(k,n) = consistent_choose_max(_, k, n)` and `S(k, n) := consistent_choose_k(_, k, n)` as short-cuts for some arbitrary fixed `key`. We assume that `consistent_hash(key, k, n)` computes `k` independent consistent hash functions. +### Property 1 + Since `M(k, n) < n` and `S(k, n) = {M(k, n)} ∪ S(k - 1, M(k, n))` for `k > 1`, `S(k, n)` constructs a strictly monotonically decreasing sequence. The sequence outputs exactly `k` elements which therefore must all be distinct which proves property 1 for `k <= n`. Properties 2, 3, and 4 can be proven via induction as follows. +### Property 4 + `k = 1`: We expect that `consistent_hash` returns a single uniformly distributed node index which is consistent in `n`, i.e. changes the hash value with probability `1/(n+1)`, when `n` increments by one. In our implementation, we use an `O(1)` implementation of the jump-hash algorithm. For `k=1`, `consistent_choose_k(key, 1, n)` becomes a single function call to `consistent_choose_max(key, 1, n)` which in turn calls `consistent_hash(key, 0, n)`. I.e. `consistent_choose_k` inherits the all the desired properties from `consistent_hash` for `k=1` and all `n>=1`. `k → k+1`: `M(k+1, n+1) = M(k+1, n)` iff `M(k, n+1) < n` and `consistent_hash(_, k, n+1-k) < n - k`. The probability for this is `(n+1-k)/(n+1)` for the former by induction and `(n-k)/(n+1-k)` by the assumption that `consistent_hash` is a proper consistent hash function. Since both these probabilities are assumed to be independent, the probability that our initial value changes is `1 - (n+1-k)/(n+1) * (n-k)/(n+1-k) = 1 - (n-k)/(n+1) = (k+1)/(n+1)` proving property 4. +### Property 3 + Property 3 is trivially satisfied if `S(k+1, n+1) = S(k+1, n)`. So, we focus on the case where `S(k+1, n+1) != S(k+1, n)`, which implies that `n ∈ S(k+1, n+1)` as largest element. We know that `S(k+1, n) = {m} ∪ S(k, m)` for some `m` by definition and `S(k, n) = S(k, u) ∖ {v} ∪ {w}` by induction for some `u`, `v`, and `w`. Thus far we have `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, u) ∖ {v} ∪ {w}`. If `u = m`, then `S(k+1, n) = {m} ∪ S(k, m) ∖ {v} ∪ {w}` and `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, m) ∖ {v} ∪ {w}` and the two differ exaclty in the elemetns `m` and `n` proving property 3. If `u ≠ m`, then `consistent_hash(_, k, n) = m`, since that's the only way how the largest values in `S(k+1, n)` and `S(k, n)` can differ. In this case, `m ∉ S(k+1, n+1)`, since `n` (and not `m`) is the largest element of `S(k+1, n+1)`. Furthermore, `S(k, n) = S(k, m)`, since `consistent_hash(_, i, n) < m` for all `i < k` (otherwise there is a contradiction). -Putting it together leads to `S(k+1, n+1) = {n} ∪ S(k, m)` and `S(k+1, n) = {m} ∪ S(k, m)` which differ exactly in the elements `n` and `m` which concludes the proof. \ No newline at end of file +Putting it together leads to `S(k+1, n+1) = {n} ∪ S(k, m)` and `S(k+1, n) = {m} ∪ S(k, m)` which differ exactly in the elements `n` and `m` which concludes the proof. + +### Property 2 + +The final part is to prove property 2. This time we have an inducation over `k` and `n`. +As before, induction start for `k=1` and for all `n>0` is inherited from the `consistency_hash` implementation. The case `n=k` is also trivially covered, since the only valid set are the numbers `{0, ..., k-1}` which the algorithm correctly outputs. So, we only need to care about the induction step where `k>1` and `n>k`. + +We need to prove that `P(i ∈ S(k+1, n+1)) = (k+1)/(n+1)` for all `0 <= i <= n`. Property 3 already proves the case `i = n`. Furthermore we know that `P(n ∈ S(k+1, n+1)) = (k+1)/(n+1)` and vice versa `P(n ∉ S(k+1, n+1)) = 1 - (k+1)/(n+1)`. Let's consider those two cases separately. + +`n ∈ S(k+1, n+1)`: By the definition of `S`, we know that `S(k+1, n+1) = {n} ∪ S(k, n)`. `P(i ∈ S(k+1, n+1)) = P(i ∈ S(k, n)) P(n ∈ S(k+1, n+1)) = k/n * (k+1)/(n+1)` for all `0 <= i < n`. + +`n ∉ S(k+1, n+1)`: Once more by definition, `S(k+1, n+1) = S(k+1, n)` in this case. `P(i ∈ S(k+1, n+1)) = P(i ∈ S(k+1, n)) P(n ∉ S(k+1, n+1)) = (k+1)/n * (1 - (k+1)/(n+1))` for all `0 <= i < n`. + +Summing both cases together leads to `P(i ∈ S(k+1, n+1)) = k/n * (k+1)/(n+1) + (k+1)/n * (1 - (k+1)/(n+1)) = k/n * (k+1)/(n+1) + k/n * (1 - (k+1)/(n+1)) + 1/n * (1 - (k+1)/(n+1)) = k/n * (k+1)/(n+1) + k/n - k/n * (k+1)/(n+1) + 1/n - 1/n * (k+1)/(n+1) = k/n + 1/n - 1/n * (k+1)/(n+1) = (k+1)/n - (k+1)/(n+1)/n = (k+1)/n * (1 - 1/(n+1)) = (k+1)/(n+1)` for all `0 <= i < n` which concludes the proof. From d4b841024aa2ee72aec331033974b7519d71df9d Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 15 Aug 2025 10:13:25 +0200 Subject: [PATCH 12/49] Update crates/consistent-hashing/README.md Co-authored-by: Luke Francl --- crates/consistent-hashing/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index b15224d..7de40f2 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -59,7 +59,7 @@ fn consistent_hash(key: Key, i: usize, n: usize) -> usize { ``` `consistent_choose_k` makes `k` calls to `consistent_choose_max` which calls `consistent_hash` another `k` times. -In total, `consistent_hash` is called `k * (k+1) / 2` Utilizing a `O(1)` solution for `consistent_hash` leads to a `O(k^2)` runtime. +In total, `consistent_hash` is called `k * (k+1) / 2` many times. Utilizing a `O(1)` solution for `consistent_hash` leads to a `O(k^2)` runtime. This runtime can be further improved by replacing the max operation with a heap where popped elements are updated according to the new arguments `n` and `k`. With this optimization, the complexity reduces to `O(k log k)`. With some probabilistic bucketing strategy, it should be possible to reduce the expected runtime to `O(k)`. From 0935ea061da1bda16d942c29b39cd7904800312b Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 15 Aug 2025 10:13:35 +0200 Subject: [PATCH 13/49] Update crates/consistent-hashing/README.md Co-authored-by: Luke Francl --- crates/consistent-hashing/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index 7de40f2..8dfa1c3 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -98,7 +98,7 @@ Properties 2, 3, and 4 can be proven via induction as follows. Property 3 is trivially satisfied if `S(k+1, n+1) = S(k+1, n)`. So, we focus on the case where `S(k+1, n+1) != S(k+1, n)`, which implies that `n ∈ S(k+1, n+1)` as largest element. We know that `S(k+1, n) = {m} ∪ S(k, m)` for some `m` by definition and `S(k, n) = S(k, u) ∖ {v} ∪ {w}` by induction for some `u`, `v`, and `w`. Thus far we have `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, u) ∖ {v} ∪ {w}`. -If `u = m`, then `S(k+1, n) = {m} ∪ S(k, m) ∖ {v} ∪ {w}` and `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, m) ∖ {v} ∪ {w}` and the two differ exaclty in the elemetns `m` and `n` proving property 3. +If `u = m`, then `S(k+1, n) = {m} ∪ S(k, m) ∖ {v} ∪ {w}` and `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, m) ∖ {v} ∪ {w}` and the two differ exactly in the elemetns `m` and `n` proving property 3. If `u ≠ m`, then `consistent_hash(_, k, n) = m`, since that's the only way how the largest values in `S(k+1, n)` and `S(k, n)` can differ. In this case, `m ∉ S(k+1, n+1)`, since `n` (and not `m`) is the largest element of `S(k+1, n+1)`. Furthermore, `S(k, n) = S(k, m)`, since `consistent_hash(_, i, n) < m` for all `i < k` (otherwise there is a contradiction). Putting it together leads to `S(k+1, n+1) = {n} ∪ S(k, m)` and `S(k+1, n) = {m} ∪ S(k, m)` which differ exactly in the elements `n` and `m` which concludes the proof. From 99c69f3a059e09668f27b2c51a49bcb706c735f5 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 15 Aug 2025 10:15:12 +0200 Subject: [PATCH 14/49] Update crates/consistent-hashing/README.md Co-authored-by: Luke Francl --- crates/consistent-hashing/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index 8dfa1c3..fb3fda8 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -106,7 +106,7 @@ Putting it together leads to `S(k+1, n+1) = {n} ∪ S(k, m)` and `S(k+1, n) = {m ### Property 2 The final part is to prove property 2. This time we have an inducation over `k` and `n`. -As before, induction start for `k=1` and for all `n>0` is inherited from the `consistency_hash` implementation. The case `n=k` is also trivially covered, since the only valid set are the numbers `{0, ..., k-1}` which the algorithm correctly outputs. So, we only need to care about the induction step where `k>1` and `n>k`. +As before, the base case of the induction for `k=1` and all `n>0` is inherited from the `consistency_hash` implementation. The case `n=k` is also trivially covered, since the only valid set are the numbers `{0, ..., k-1}` which the algorithm correctly outputs. So, we only need to care about the induction step where `k>1` and `n>k`. We need to prove that `P(i ∈ S(k+1, n+1)) = (k+1)/(n+1)` for all `0 <= i <= n`. Property 3 already proves the case `i = n`. Furthermore we know that `P(n ∈ S(k+1, n+1)) = (k+1)/(n+1)` and vice versa `P(n ∉ S(k+1, n+1)) = 1 - (k+1)/(n+1)`. Let's consider those two cases separately. From 496f5394f0f96c455357cff2839e1d1554f69748 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 15 Aug 2025 10:15:39 +0200 Subject: [PATCH 15/49] Update crates/consistent-hashing/README.md Co-authored-by: Luke Francl --- crates/consistent-hashing/README.md | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index fb3fda8..fb95783 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -15,16 +15,15 @@ Common algorithms where `N` is the number of nodes and `R` is the number of replicas. -| Algorithm | Lookup per key | Node add/remove | Memory | Lookup with replication | -| | (no replication) | | | | -|-------------------------|---------------------|----------------------------------------|---------------------------|-------------------------------------| -| Hash ring (with vnodes) | O(log N): binary search over N points; O(1): with specialized structures | O(log N) | O(N) | O(log N + R): Take next R distinct successors | -| Rendezvous | O(N): max score | O(1) | O(N) node list | O(N log R): pick top R scores | -| Jump consistent hash | O(log(N)) expected | 0 | O(1) | O(R log N) | -| AnchorHash | O(1) expected | O(1) | O(N) | Not native | -| DXHash | O(1) expected | O(1) | O(N) | Not native | -| JumpBackHash | O(1) expected | 0 | O(1) | Not native | -| **ConsistentChooseK** | **O(1) expected** | **0** | **O(1)** | **O(R^2)**; **O(R log(R))**: using heap | +| Algorithm | Lookup per key
(no replication) | Node add/remove | Memory | Lookup with replication | +|-------------------------|--------------------------------------------------------------------------|-----------------|----------------|-----------------------------------------------| +| Hash ring (with vnodes) | O(log N): binary search over N points; O(1): with specialized structures | O(log N) | O(N) | O(log N + R): Take next R distinct successors | +| Rendezvous | O(N): max score | O(1) | O(N) node list | O(N log R): pick top R scores | +| Jump consistent hash | O(log(N)) expected | 0 | O(1) | O(R log N) | +| AnchorHash | O(1) expected | O(1) | O(N) | Not native | +| DXHash | O(1) expected | O(1) | O(N) | Not native | +| JumpBackHash | O(1) expected | 0 | O(1) | Not native | +| **ConsistentChooseK** | **O(1) expected** | **0** | **O(1)** | **O(R^2)**; **O(R log(R))**: using heap | Replication of keys - Hash ring: replicate by walking clockwise to the next R distinct nodes. Virtual nodes help spread replicas more evenly. Replicas are not independently distributed. From 23f308089d61b95e3535d3f133a015ad4f91d82a Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 15 Aug 2025 14:05:38 +0200 Subject: [PATCH 16/49] add benchmark --- Cargo.toml | 1 + .../consistent-hashing/benchmarks/Cargo.toml | 15 +++++ .../benchmarks/criterion.toml | 18 ++++++ .../benchmarks/performance.rs | 63 +++++++++++++++++++ crates/consistent-hashing/src/lib.rs | 6 +- 5 files changed, 99 insertions(+), 4 deletions(-) create mode 100644 crates/consistent-hashing/benchmarks/Cargo.toml create mode 100644 crates/consistent-hashing/benchmarks/criterion.toml create mode 100644 crates/consistent-hashing/benchmarks/performance.rs diff --git a/Cargo.toml b/Cargo.toml index 312f46d..0b09dcb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ members = [ "crates/*", "crates/bpe/benchmarks", "crates/bpe/tests", + "crates/consistent-hashing/benchmarks", ] resolver = "2" diff --git a/crates/consistent-hashing/benchmarks/Cargo.toml b/crates/consistent-hashing/benchmarks/Cargo.toml new file mode 100644 index 0000000..580e5ab --- /dev/null +++ b/crates/consistent-hashing/benchmarks/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "consistent-hashing-benchmarks" +edition = "2021" + +[[bench]] +name = "performance" +path = "performance.rs" +harness = false +test = false + +[dependencies] +consistent-hashing = { path = "../" } + +criterion = { version = "0.7", features = ["csv_output"] } +rand = "0.9" diff --git a/crates/consistent-hashing/benchmarks/criterion.toml b/crates/consistent-hashing/benchmarks/criterion.toml new file mode 100644 index 0000000..0e43927 --- /dev/null +++ b/crates/consistent-hashing/benchmarks/criterion.toml @@ -0,0 +1,18 @@ +# save report in this directory, even if a custom target directory is set +criterion_home = "./target/criterion" + +# The colors table allows users to configure the colors used by the charts +# cargo-criterion generates. +[colors] +# Color-blind friendly color scheme from https://personal.sron.nl/~pault/. +comparison_colors = [ + {r = 51, g = 34, b = 136 }, # indigo + {r = 136, g = 204, b = 238 }, # cyan + {r = 68, g = 170, b = 153 }, # teal + {r = 17, g = 119, b = 51 }, # green + {r = 153, g = 153, b = 51 }, # olive + {r = 221, g = 204, b = 119 }, # sand + {r = 204, g = 102, b = 119 }, # rose + {r = 136, g = 34, b = 85 }, # wine + {r = 170, g = 68, b = 153 }, # purple +] diff --git a/crates/consistent-hashing/benchmarks/performance.rs b/crates/consistent-hashing/benchmarks/performance.rs new file mode 100644 index 0000000..4bdc91b --- /dev/null +++ b/crates/consistent-hashing/benchmarks/performance.rs @@ -0,0 +1,63 @@ +use std::{ + hash::{DefaultHasher, Hash}, + hint::black_box, + time::Duration, +}; + +use consistent_hashing::{ConsistentChooseKHasher, ConsistentHasher}; +use criterion::{ + criterion_group, criterion_main, AxisScale, Bencher, BenchmarkId, Criterion, PlotConfiguration, + Throughput, +}; +use rand::{rng, Rng}; + +fn throughput_benchmark(c: &mut Criterion) { + let keys: Vec = rng().random_iter().take(1000).collect(); + + let mut group = c.benchmark_group(format!("choose")); + group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); + for n in [1usize, 10, 100, 1000, 10000] { + group.throughput(Throughput::Elements(keys.len() as u64)); + group.bench_with_input(BenchmarkId::new(format!("1"), n), &n, |b, n| { + b.iter_batched( + || &keys, + |keys| { + for key in keys { + let mut h = DefaultHasher::new(); + key.hash(&mut h); + black_box(ConsistentHasher::new(h).prev(*n + 1)); + } + }, + criterion::BatchSize::SmallInput, + ) + }); + for k in [1, 2, 3, 10, 100] { + group.bench_with_input(BenchmarkId::new(format!("k_{k}"), n), &n, |b, n| { + b.iter_batched( + || &keys, + |keys| { + let mut res = Vec::with_capacity(k); + for key in keys { + let mut h = DefaultHasher::new(); + key.hash(&mut h); + black_box(ConsistentChooseKHasher::new(h, k).prev(*n + k, &mut res)); + } + }, + criterion::BatchSize::SmallInput, + ) + }); + } + } + group.finish(); +} + +criterion_group!( + name = benches; + config = Criterion::default() + .warm_up_time(Duration::from_millis(500)) + .measurement_time(Duration::from_millis(4000)) + .nresamples(1000); + + targets = throughput_benchmark, +); +criterion_main!(benches); diff --git a/crates/consistent-hashing/src/lib.rs b/crates/consistent-hashing/src/lib.rs index b7dc434..7a19eb8 100644 --- a/crates/consistent-hashing/src/lib.rs +++ b/crates/consistent-hashing/src/lib.rs @@ -232,11 +232,11 @@ impl ConsistentChooseKHasher { } // TODO: Implement this as an iterator! - pub fn prev(&self, mut n: usize) -> Vec { - let mut samples = Vec::with_capacity(self.k); + pub fn prev(&self, mut n: usize, samples: &mut Vec) { let mut samplers: Vec<_> = (0..self.k) .map(|i| ConsistentHashRevIterator::new(n - i, self.builder.seq_builder(i)).peekable()) .collect(); + samples.clear(); for i in (0..self.k).rev() { let mut max = 0; for k in 0..=i { @@ -248,8 +248,6 @@ impl ConsistentChooseKHasher { samples.push(max); n = max; } - samples.sort(); - samples } } From 5d522374057df648bba7e3aee1b3276415788255 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 15 Aug 2025 14:38:23 +0200 Subject: [PATCH 17/49] remove second vector --- .../benchmarks/performance.rs | 10 ++-- crates/consistent-hashing/src/lib.rs | 57 +++++++++++++------ 2 files changed, 45 insertions(+), 22 deletions(-) diff --git a/crates/consistent-hashing/benchmarks/performance.rs b/crates/consistent-hashing/benchmarks/performance.rs index 4bdc91b..8dc8e62 100644 --- a/crates/consistent-hashing/benchmarks/performance.rs +++ b/crates/consistent-hashing/benchmarks/performance.rs @@ -1,12 +1,12 @@ use std::{ - hash::{DefaultHasher, Hash}, + hash::{DefaultHasher, Hash, Hasher}, hint::black_box, time::Duration, }; use consistent_hashing::{ConsistentChooseKHasher, ConsistentHasher}; use criterion::{ - criterion_group, criterion_main, AxisScale, Bencher, BenchmarkId, Criterion, PlotConfiguration, + criterion_group, criterion_main, AxisScale, BenchmarkId, Criterion, PlotConfiguration, Throughput, }; use rand::{rng, Rng}; @@ -23,7 +23,7 @@ fn throughput_benchmark(c: &mut Criterion) { || &keys, |keys| { for key in keys { - let mut h = DefaultHasher::new(); + let mut h = DefaultHasher::default(); key.hash(&mut h); black_box(ConsistentHasher::new(h).prev(*n + 1)); } @@ -38,9 +38,9 @@ fn throughput_benchmark(c: &mut Criterion) { |keys| { let mut res = Vec::with_capacity(k); for key in keys { - let mut h = DefaultHasher::new(); + let mut h = DefaultHasher::default(); key.hash(&mut h); - black_box(ConsistentChooseKHasher::new(h, k).prev(*n + k, &mut res)); + black_box(ConsistentChooseKHasher::new(h, k).prev_with_vec(*n + k, &mut res)); } }, criterion::BatchSize::SmallInput, diff --git a/crates/consistent-hashing/src/lib.rs b/crates/consistent-hashing/src/lib.rs index 7a19eb8..fd6391f 100644 --- a/crates/consistent-hashing/src/lib.rs +++ b/crates/consistent-hashing/src/lib.rs @@ -12,6 +12,7 @@ pub trait HashSequence { pub trait HashSeqBuilder { type Seq: HashSequence; + /// Returns a bit mask indicating which buckets have at least one hash. fn bit_mask(&self) -> u64; /// Return a HashSequence instance which is seeded with the given bit position /// and the seed of this builder. @@ -66,7 +67,7 @@ struct BucketIterator { hasher: H, n: usize, is_first: bool, - bit: u64, // A bitmask with a single bit set. + bit: u64, // A bitmask with a single bit set. } impl BucketIterator { @@ -199,20 +200,30 @@ pub struct ConsistentHasher { builder: H, } -impl ConsistentHasher { +impl ConsistentHasher { pub fn new(builder: H) -> Self { Self { builder } } - pub fn prev(&self, n: usize) -> Option { + pub fn prev(&self, n: usize) -> Option + where + H: Clone, + { let mut sampler = ConsistentHashRevIterator::new(n, self.builder.clone()); sampler.next() } - pub fn next(&self, n: usize) -> Option { + pub fn next(&self, n: usize) -> Option + where + H: Clone, + { let mut sampler = ConsistentHashIterator::new(n, self.builder.clone()); sampler.next() } + + pub fn into_prev(self, n: usize) -> Option { + ConsistentHashRevIterator::new(n, self.builder).next() + } } /// Implementation of a consistent choose k hashing algorithm. @@ -231,27 +242,38 @@ impl ConsistentChooseKHasher { Self { builder, k } } - // TODO: Implement this as an iterator! - pub fn prev(&self, mut n: usize, samples: &mut Vec) { - let mut samplers: Vec<_> = (0..self.k) - .map(|i| ConsistentHashRevIterator::new(n - i, self.builder.seq_builder(i)).peekable()) - .collect(); + pub fn prev(&self, n: usize) -> Vec { + let mut res = Vec::with_capacity(self.k); + self.prev_with_vec(n, &mut res); + res + } + + pub fn prev_with_vec(&self, mut n: usize, samples: &mut Vec) { + assert!(n >= self.k, "n must be at least k"); samples.clear(); + for i in 0..self.k { + samples.push( + ConsistentHasher::new(self.builder.seq_builder(i)) + .into_prev(n - i) + .expect("must not fail") + + i, + ); + } for i in (0..self.k).rev() { - let mut max = 0; - for k in 0..=i { - while samplers[k].peek() >= Some(&(n - k)) && n - k > 0 { - samplers[k].next(); + n = samples[0..=i].iter().copied().max().expect(""); + samples[i] = n; + for j in 0..i { + if samples[j] == n { + samples[j] = ConsistentHasher::new(self.builder.seq_builder(j)) + .into_prev(n - j) + .expect("must not fail") + + j; } - max = max.max(samplers[k].peek().unwrap() + k); } - samples.push(max); - n = max; } } } - #[cfg(test)] mod tests { use std::hash::DefaultHasher; @@ -327,6 +349,7 @@ mod tests { } } println!("{stats:?}"); + assert_eq!(stats, vec![10, 12, 6, 6, 6, 5, 9, 10]); // Test consistency when increasing k! for k in 1..10 { for n in k + 1..20 { From f6e29f7faa428345548868c66135d904fdf33c82 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 15 Aug 2025 15:16:51 +0200 Subject: [PATCH 18/49] Update README.md --- crates/consistent-hashing/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index fb95783..0830c66 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -28,8 +28,8 @@ where `N` is the number of nodes and `R` is the number of replicas. Replication of keys - Hash ring: replicate by walking clockwise to the next R distinct nodes. Virtual nodes help spread replicas more evenly. Replicas are not independently distributed. - Rendezvous hashing: replicate by selecting the top R nodes by score for the key. This naturally yields R distinct owners and supports weights. -- Jump consistent hash: the base function doesn't support replication. But the math can be easily modified to support consistent replication. -- JumpBackHash and variants: The trick of Jump consistent hash to support replication won't work here due to the additional state introduced. +- Jump consistent hash: the base function doesn't support replication. While the math can be modified to support consistent replication, it cannot be efficiently solved for large k and even for small k (=2 or =3), a quadratic or cubic equation has to be solved. +- JumpBackHash and variants: The trick of Jump consistent hash to support replication won't work here due to the introduction of additional state. - ConsistentChooseK: Faster and more memory efficient than all other solutions. Why replication matters From d20f9b600a8704245ef4e65154d6d51767cbf832 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 15 Aug 2025 15:17:59 +0200 Subject: [PATCH 19/49] Update performance.rs --- crates/consistent-hashing/benchmarks/performance.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/consistent-hashing/benchmarks/performance.rs b/crates/consistent-hashing/benchmarks/performance.rs index 8dc8e62..05dd929 100644 --- a/crates/consistent-hashing/benchmarks/performance.rs +++ b/crates/consistent-hashing/benchmarks/performance.rs @@ -40,7 +40,9 @@ fn throughput_benchmark(c: &mut Criterion) { for key in keys { let mut h = DefaultHasher::default(); key.hash(&mut h); - black_box(ConsistentChooseKHasher::new(h, k).prev_with_vec(*n + k, &mut res)); + black_box( + ConsistentChooseKHasher::new(h, k).prev_with_vec(*n + k, &mut res), + ); } }, criterion::BatchSize::SmallInput, From 1dde97c913c18af5e992fd633e2d9f94d82e2838 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 15 Aug 2025 15:22:38 +0200 Subject: [PATCH 20/49] make linter happy --- crates/consistent-hashing/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/consistent-hashing/src/lib.rs b/crates/consistent-hashing/src/lib.rs index fd6391f..dcfbb0c 100644 --- a/crates/consistent-hashing/src/lib.rs +++ b/crates/consistent-hashing/src/lib.rs @@ -262,9 +262,9 @@ impl ConsistentChooseKHasher { for i in (0..self.k).rev() { n = samples[0..=i].iter().copied().max().expect(""); samples[i] = n; - for j in 0..i { - if samples[j] == n { - samples[j] = ConsistentHasher::new(self.builder.seq_builder(j)) + for (j, sample) in samples[0..i].iter_mut().enumerate() { + if *sample == n { + *sample = ConsistentHasher::new(self.builder.seq_builder(j)) .into_prev(n - j) .expect("must not fail") + j; From 91714448adfa7a2023565fb842506876065c39f4 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Mon, 18 May 2026 17:24:34 +0200 Subject: [PATCH 21/49] some more docu + better choose_k implementation --- crates/consistent-hashing/README.md | 24 +- .../benchmarks/performance.rs | 35 +- .../examples/bounded_load.rs | 140 +++++++ crates/consistent-hashing/src/choose_k.rs | 358 ++++++++++++++++++ crates/consistent-hashing/src/lib.rs | 109 +----- 5 files changed, 555 insertions(+), 111 deletions(-) create mode 100644 crates/consistent-hashing/examples/bounded_load.rs create mode 100644 crates/consistent-hashing/src/choose_k.rs diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index 0830c66..46a2a32 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -17,7 +17,7 @@ where `N` is the number of nodes and `R` is the number of replicas. | Algorithm | Lookup per key
(no replication) | Node add/remove | Memory | Lookup with replication | |-------------------------|--------------------------------------------------------------------------|-----------------|----------------|-----------------------------------------------| -| Hash ring (with vnodes) | O(log N): binary search over N points; O(1): with specialized structures | O(log N) | O(N) | O(log N + R): Take next R distinct successors | +| Hash ring (with vnodes) | O(log(V·N)): binary search; V = 100–200 virtual nodes per physical node | O(V·log(V·N)) | O(V·N) | O(log(V·N) + R): walk to next R distinct nodes | | Rendezvous | O(N): max score | O(1) | O(N) node list | O(N log R): pick top R scores | | Jump consistent hash | O(log(N)) expected | 0 | O(1) | O(R log N) | | AnchorHash | O(1) expected | O(1) | O(N) | Not native | @@ -37,6 +37,28 @@ Why replication matters - Distributes read/write load across multiple owners, reducing hotspots. - Enables fast recovery and higher tail-latency resilience. +## Applications beyond replication + +The `ConsistentChooseK` iterator produces a per-key ranking of all `n` nodes in priority order — consistently and with zero memory overhead. This ranking is a strict superset of simple replication and enables drop-in replacements for several well-known algorithms that traditionally require maintaining expensive data structures such as hash rings. + +### Bounded-load consistent hashing + +[Consistent Hashing with Bounded Loads](https://research.google/pubs/pub46580/) (Mirrokni et al., 2018) caps the maximum load any single node may receive. When a key's preferred node is full, it overflows to the next candidate. Classic implementations walk a hash ring to find successors, requiring O(V·N) memory for the ring where V is the number of virtual nodes per physical node (typically V > 100–200 for acceptable load variance). Lookups cost O(log(V·N)) via binary search. + +With `ConsistentChooseK`, the ranking iterator directly yields each key's preference list on the fly — no ring required. Assignment becomes: iterate tokens round by round, and for each token advance its ranking iterator until a node with remaining capacity is found. This achieves the same bounded-load guarantees with O(k) for k keys and O(k) time to extract the k-th key. + +See [`examples/bounded_load.rs`](examples/bounded_load.rs) for a working implementation. + +### Power of two choices + +The [power of two choices](https://www.eecs.harvard.edu/~michaelm/postscripts/mythesis.pdf) paradigm (Mitzenmacher, 2001; Azar et al., 1999) assigns each key to the least-loaded of two (or d) randomly chosen nodes. This reduces maximum load from O(log n / log log n) to O(log log n / log d) with high probability. + +Traditionally this requires drawing d independent random nodes per key. However, the original algorithm ignores the corner case where multiple independent hash functions collide on the same node, effectively reducing the number of distinct choices below d. With `ConsistentChooseK`, the first d elements from the ranking iterator are guaranteed to be distinct nodes. The choices are also consistent across time — the same key always considers the same d candidates — so reassignment only happens when a node actually joins or leaves. + +### Priority-based failover + +In active-passive or tiered architectures, each key needs a deterministic failover order. The ranking iterator provides exactly this: the first node is the primary, the second is the hot standby, and so on. When a node fails, the next node in the ranking takes over — consistently for all keys that had the failed node at the same rank position, and without any coordination or ring rebalancing. + ## ConsistentChooseK algorithm The following functions summarize the core algorithmic innovation as a minimal Rust excerpt. diff --git a/crates/consistent-hashing/benchmarks/performance.rs b/crates/consistent-hashing/benchmarks/performance.rs index 05dd929..3717e5c 100644 --- a/crates/consistent-hashing/benchmarks/performance.rs +++ b/crates/consistent-hashing/benchmarks/performance.rs @@ -1,5 +1,5 @@ use std::{ - hash::{DefaultHasher, Hash, Hasher}, + hash::{DefaultHasher, Hash}, hint::black_box, time::Duration, }; @@ -36,12 +36,11 @@ fn throughput_benchmark(c: &mut Criterion) { b.iter_batched( || &keys, |keys| { - let mut res = Vec::with_capacity(k); for key in keys { let mut h = DefaultHasher::default(); key.hash(&mut h); black_box( - ConsistentChooseKHasher::new(h, k).prev_with_vec(*n + k, &mut res), + ConsistentChooseKHasher::new_with_k(h, *n + k, k), ); } }, @@ -53,6 +52,34 @@ fn throughput_benchmark(c: &mut Criterion) { group.finish(); } +fn append_vs_new_with_k(c: &mut Criterion) { + let mut group = c.benchmark_group("append_vs_new_with_k"); + group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); + for n in [10usize, 100, 1000, 10000] { + for k in [2, 3, 10, 100] { + group.bench_function( + BenchmarkId::new(format!("new_with_k/k_{k}"), n), + |b| { + b.iter(|| { + let h = DefaultHasher::default(); + black_box(ConsistentChooseKHasher::new_with_k(h, n + k, k)); + }) + }, + ); + group.bench_function(BenchmarkId::new(format!("append/k_{k}"), n), |b| { + b.iter(|| { + let h = DefaultHasher::default(); + let mut iter = ConsistentChooseKHasher::new(h, n + k); + black_box(for _ in 0..k { + iter.grow_k(); + }) + }) + }); + } + } + group.finish(); +} + criterion_group!( name = benches; config = Criterion::default() @@ -60,6 +87,6 @@ criterion_group!( .measurement_time(Duration::from_millis(4000)) .nresamples(1000); - targets = throughput_benchmark, + targets = throughput_benchmark, append_vs_new_with_k, ); criterion_main!(benches); diff --git a/crates/consistent-hashing/examples/bounded_load.rs b/crates/consistent-hashing/examples/bounded_load.rs new file mode 100644 index 0000000..29fcaf4 --- /dev/null +++ b/crates/consistent-hashing/examples/bounded_load.rs @@ -0,0 +1,140 @@ +//! Bounded-load consistent hashing example. +//! +//! Pure consistent hashing selects each node with equal probability, but for +//! small workloads (e.g. 64 tokens across 24 machines) random variance causes +//! highly skewed assignments. This example layers a capacity cap on top of +//! ConsistentChooseK to enforce near-perfect balance. +//! +//! Assignment uses round-robin over replicas: first assign every token's +//! most-preferred machine, then every token's second-preferred, etc. This +//! ensures all tokens compete fairly for each replica round. +//! +//! Run with: cargo run --example bounded_load + +use std::hash::{DefaultHasher, Hash}; + +use consistent_hashing::ConsistentChooseKHasher; + +/// Round-robin bounded-load assignment. +/// +/// For each replica round r = 0..k, iterate over all tokens and assign each +/// to its next most-preferred node that still has capacity. This gives every +/// token equal priority within each round. +fn bounded_load_assign( + rankings: &[Vec], + k: usize, + n: usize, + max_load: usize, +) -> (Vec>, Vec) { + let mut load = vec![0usize; n]; + let num_tokens = rankings.len(); + let mut assignments = vec![Vec::with_capacity(k); num_tokens]; + let mut cursors = vec![0usize; num_tokens]; + + for _round in 0..k { + for (token, ranking) in rankings.iter().enumerate() { + while cursors[token] < ranking.len() { + let node = ranking[cursors[token]]; + cursors[token] += 1; + if load[node] < max_load { + load[node] += 1; + assignments[token].push(node); + break; + } + } + } + } + (assignments, load) +} + +fn main() { + let num_tokens: usize = 64; + let k: usize = 2; // replicas per token + let n: usize = 24; // machines + let total = num_tokens * k; + let cap = total.div_ceil(n); // ceil(128/24) = 6 + + println!("Parameters: {num_tokens} tokens, k={k} replicas, {n} machines"); + println!("Total assignments: {total}, capacity cap per machine: {cap}"); + println!( + "Perfect balance: {}×{} + {}×{}\n", + n - total % n, + total / n, + total % n, + total / n + 1 + ); + + // ── Unbounded ──────────────────────────────────────────────────────── + let unbounded: Vec> = (0..num_tokens as u64) + .map(|key| { + let mut h = DefaultHasher::default(); + key.hash(&mut h); + ConsistentChooseKHasher::new(h, n).take(k).collect() + }) + .collect(); + let mut unbounded_load = vec![0usize; n]; + for a in &unbounded { + for &node in a { + unbounded_load[node] += 1; + } + } + + // ── Bounded (round-robin) ──────────────────────────────────────────── + let rankings: Vec> = (0..num_tokens as u64) + .map(|key| { + let mut h = DefaultHasher::default(); + key.hash(&mut h); + ConsistentChooseKHasher::new(h, n).collect() + }) + .collect(); + let (bounded, bounded_load) = bounded_load_assign(&rankings, k, n, cap); + + // ── Display ────────────────────────────────────────────────────────── + println!("{:<12} {:>10} {:>10}", "Machine", "Unbounded", "Bounded"); + println!("{:-<12} {:->10} {:->10}", "", "", ""); + for i in 0..n { + println!( + "{:<12} {:>10} {:>10}", + i, unbounded_load[i], bounded_load[i] + ); + } + + let ub_min = *unbounded_load.iter().min().unwrap(); + let ub_max = *unbounded_load.iter().max().unwrap(); + let b_min = *bounded_load.iter().min().unwrap(); + let b_max = *bounded_load.iter().max().unwrap(); + println!("{:-<12} {:->10} {:->10}", "", "", ""); + println!( + "{:<12} {:>10} {:>10}", + "spread", + ub_max - ub_min, + b_max - b_min + ); + + // ── Consistency check: what happens when we add one machine? ───────── + let n2 = n + 1; + let cap2 = (num_tokens * k).div_ceil(n2); + let rankings2: Vec> = (0..num_tokens as u64) + .map(|key| { + let mut h = DefaultHasher::default(); + key.hash(&mut h); + ConsistentChooseKHasher::new(h, n2).collect() + }) + .collect(); + let (bounded2, _) = bounded_load_assign(&rankings2, k, n2, cap2); + + let mut changes = 0; + for (before, after) in bounded.iter().zip(bounded2.iter()) { + for node in before { + if !after.contains(node) { + changes += 1; + } + } + } + println!("\nConsistency: adding machine {n} → {n2}"); + println!( + " {changes}/{total} assignments changed ({:.1}%), ideal ≈ {:.1}%", + changes as f64 / total as f64 * 100.0, + k as f64 / n2 as f64 * 100.0 + ); +} diff --git a/crates/consistent-hashing/src/choose_k.rs b/crates/consistent-hashing/src/choose_k.rs new file mode 100644 index 0000000..f51b16f --- /dev/null +++ b/crates/consistent-hashing/src/choose_k.rs @@ -0,0 +1,358 @@ +use crate::{ConsistentHasher, ManySeqBuilder}; + +/// A sample from the consistent choose-k algorithm, pairing a hash value +/// with the index of the hash sequence that produced it. +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Sample { + pos: usize, + seq: usize, +} + +impl Sample { + fn new(pos: usize, seq: usize) -> Self { + Self { pos, seq } + } + + pub fn pos(&self) -> usize { + self.pos + } +} + +/// Implementation of a consistent choose k hashing algorithm. +/// It returns k distinct consistent hashes in the range `0..n`. +/// The hashes are consistent when `n` changes and when `k` changes! +/// I.e. on average exactly `1/(n+1)` (resp. `1/(k+1)`) many hashes will change +/// when `n` (resp. `k`) increases by one. Additionally, the returned `k` tuple +/// is guaranteed to be uniformely chosen from all possible `n-choose-k` tuples. +/// +/// Also implements `Iterator` to yield the next sample when k is increased. +/// Note: since this hashing algorithm implements choose k semantics, all the returned samples are distinct. +/// Note: they won't be sorted by their position, since the order is changing when k is changing. +/// +/// # Example +/// ``` +/// use std::hash::{DefaultHasher, Hash}; +/// use consistent_hashing::ConsistentChooseKHasher; +/// +/// let mut h = DefaultHasher::default(); +/// 42u64.hash(&mut h); +/// let top3: Vec = ConsistentChooseKHasher::new(h, 100).take(3).collect(); +/// assert_eq!(top3.len(), 3); +/// ``` +pub struct ConsistentChooseKHasher { + builder: H, + n: usize, + pub(crate) samples: Vec, +} + +impl ConsistentChooseKHasher { + /// Create a new iterator for `n` nodes starting with k=0. + /// + /// Time: O(1) + pub fn new(builder: H, n: usize) -> Self { + Self { + builder, + n, + samples: Vec::new(), + } + } + + /// Create with the choose-k set for `k` out of `n` nodes pre-built. + /// + /// Average time: O(k^2) + pub fn new_with_k(builder: H, n: usize, k: usize) -> Self { + assert!(n >= k, "n must be at least k"); + let mut iter = Self::new(builder, n); + for i in 0..k { + iter.samples.push(Sample::new(iter.get_sample(i, n), i)); + } + for i in (0..k).rev() { + let s = iter.samples[0..=i].iter().copied().max().expect(""); + iter.samples[i] = s; + for j in 0..i { + if iter.samples[j].pos == s.pos { + iter.samples[j] = Sample::new(iter.get_sample(j, s.pos), j); + } + } + } + iter + } + + /// Returns an iterator over the sampled positions in increasing order. + /// + /// Time: O(1) + pub fn positions(&self) -> impl Iterator + '_ { + self.samples.iter().map(|s| s.pos) + } + + /// Returns the underlying samples. + pub fn samples(&self) -> &[Sample] { + &self.samples + } + + /// Returns the current universe size. + pub fn n(&self) -> usize { + self.n + } + + /// Returns the current sample size. + pub fn k(&self) -> usize { + self.samples.len() + } + + /// (Average) time: O(1) + fn get_sample(&self, k: usize, n: usize) -> usize { + ConsistentHasher::new(self.builder.seq_builder(k)) + .into_prev(n - k) + .expect("must not fail") + + k + } + + /// Decrements n to the largest sample and computes the new sample it is + /// being replaced with. Returns the index of the new largest sample. + /// + /// Time: O(k) + pub fn shrink_n(&mut self) -> usize { + let mut n = self.samples.last().expect("samples must not be empty").pos; + self.n = n; + for i in (0..self.samples.len()).rev() { + if self.samples[i].pos < n { + // We are done! + return i + 1; + } + // Here the maximum could be k, k-1, or i! + let k = self.samples[i].seq; + let si = Sample::new(self.get_sample(i, n), i); + let sk = Sample::new(self.get_sample(k, n), k); + let new_sample = si.max(sk); + if i > 0 && self.samples[i - 1] > new_sample { + self.samples[i] = self.samples[i - 1]; + } else { + self.samples[i] = new_sample; + } + n = self.samples[i].pos; + } + 0 + } + + /// Grow the sample set by one element. Returns the index at which the new + /// element was inserted (i.e. its rank position). + /// + /// Time: O(k) + pub fn grow_k(&mut self) -> usize { + let k = self.samples.len(); + let sk = Sample::new(self.get_sample(k, self.n), k); + if let Some(last) = self.samples.last().copied() { + if last.pos < sk.pos { + self.samples.push(sk); + } else if last.pos == sk.pos { + self.shrink_n(); + self.samples.push(sk); + } else { + let i = self.shrink_n(); + self.samples.push(last); + return i; + } + } else { + self.samples.push(sk); + } + k + } +} + +impl Iterator for ConsistentChooseKHasher { + type Item = usize; + + fn next(&mut self) -> Option { + if self.samples.len() >= self.n { + return None; + } + let idx = self.grow_k(); + Some(self.samples[idx].pos) + } +} + +#[cfg(test)] +mod tests { + use std::hash::{DefaultHasher, Hash}; + + use super::*; + + fn hasher_for_key(key: u64) -> DefaultHasher { + let mut hasher = DefaultHasher::default(); + key.hash(&mut hasher); + hasher + } + + #[test] + fn test_ranking_matches_prev() { + // Every prefix of the ranking must equal the sorted prev(n) set. + for key in 0..200 { + for n in 2..25 { + let hasher = hasher_for_key(key); + let full: Vec = ConsistentChooseKHasher::new(hasher.clone(), n).collect(); + assert_eq!(full.len(), n); + for k in 1..=n { + let expected: Vec = + ConsistentChooseKHasher::new_with_k(hasher.clone(), n, k) + .positions() + .collect(); + let mut prefix = full[..k].to_vec(); + prefix.sort(); + assert_eq!( + prefix, expected, + "key={key} n={n} k={k}: ranking prefix mismatch" + ); + } + } + } + } + + #[test] + fn test_ranking_k_equals_1() { + for key in 0..500 { + let hasher = hasher_for_key(key); + for n in 1..50 { + let first = ConsistentChooseKHasher::new(hasher.clone(), n) + .next() + .unwrap(); + let prev: Vec = + ConsistentChooseKHasher::new_with_k(hasher.clone(), n, 1) + .positions() + .collect(); + assert_eq!(first, prev[0]); + } + } + } + + #[test] + fn test_ranking_k_equals_n() { + // When exhausted, the ranking contains all nodes 0..n. + for key in 0..200 { + for n in 1..20 { + let hasher = hasher_for_key(key); + let mut ranking: Vec = + ConsistentChooseKHasher::new(hasher, n).collect(); + ranking.sort(); + let expected: Vec = (0..n).collect(); + assert_eq!(ranking, expected, "key={key} n={n}"); + } + } + } + + #[test] + fn test_partial_iteration() { + // Taking fewer than n elements must still be correct. + for key in 0..100 { + let hasher = hasher_for_key(key); + let n = 20; + let full: Vec = ConsistentChooseKHasher::new(hasher.clone(), n).collect(); + for take in 1..=n { + let partial: Vec = + ConsistentChooseKHasher::new(hasher.clone(), n).take(take).collect(); + assert_eq!(&partial[..], &full[..take]); + } + } + } + + #[test] + fn test_uniform_k() { + const K: usize = 3; + for k in 0..100 { + let hasher = hasher_for_key(k); + for n in K..1000 { + let samples: Vec = + ConsistentChooseKHasher::new_with_k(hasher.clone(), n + 1, K) + .positions() + .collect(); + assert!(samples.len() == K); + for i in 0..K - 1 { + assert!(samples[i] < samples[i + 1]); + } + let next: Vec = + ConsistentChooseKHasher::new_with_k(hasher.clone(), n + 2, K) + .positions() + .collect(); + for i in 0..K { + assert!(samples[i] <= next[i]); + } + let mut merged = samples.clone(); + merged.extend(next.clone()); + merged.sort(); + merged.dedup(); + assert!( + merged.len() == K || merged.len() == K + 1, + "Unexpected {samples:?} vs. {next:?}" + ); + } + } + let mut stats = vec![0; 8]; + for i in 0..32 { + let hasher = hasher_for_key(i + 32783); + let samples: Vec = + ConsistentChooseKHasher::new_with_k(hasher, stats.len(), 2) + .positions() + .collect(); + for s in samples { + stats[s] += 1; + } + } + println!("{stats:?}"); + assert_eq!(stats, vec![10, 12, 6, 6, 6, 5, 9, 10]); + // Test consistency when increasing k! + for k in 1..10 { + for n in k + 1..20 { + for key in 0..1000 { + let hasher = hasher_for_key(key); + let set1: Vec = + ConsistentChooseKHasher::new_with_k(hasher.clone(), n, k) + .positions() + .collect(); + let set2: Vec = + ConsistentChooseKHasher::new_with_k(hasher, n, k + 1) + .positions() + .collect(); + assert_eq!(set1.len(), k); + assert_eq!(set2.len(), k + 1); + let mut merged = set1.clone(); + merged.extend(set2); + merged.sort(); + merged.dedup(); + assert_eq!(merged.len(), k + 1); + } + } + } + } + + #[test] + fn test_shrink_n() { + for k in 1..10 { + for n in k + 1..30 { + let mut iter = ConsistentChooseKHasher::new_with_k(DefaultHasher::new(), n, k); + while iter.samples.last().unwrap().pos > k { + let expected = + ConsistentChooseKHasher::new_with_k(DefaultHasher::new(), iter.samples.last().unwrap().pos, k); + + println!("n: {n}, k: {k}"); + println!("before: {:?}", iter.samples); + println!("expected {:?}", expected.samples); + + iter.shrink_n(); + assert_eq!(iter.samples, expected.samples); + } + } + } + } + + #[test] + fn test_grow_k() { + for n in 1..30 { + let mut iter = ConsistentChooseKHasher::new(DefaultHasher::new(), n); + for k in 1..10.min(n) { + let expected = ConsistentChooseKHasher::new_with_k(DefaultHasher::new(), n, k); + iter.grow_k(); + assert_eq!(iter.samples, expected.samples); + } + } + } +} diff --git a/crates/consistent-hashing/src/lib.rs b/crates/consistent-hashing/src/lib.rs index dcfbb0c..f1b6fe0 100644 --- a/crates/consistent-hashing/src/lib.rs +++ b/crates/consistent-hashing/src/lib.rs @@ -1,5 +1,8 @@ use std::hash::{Hash, Hasher}; +mod choose_k; +pub use choose_k::ConsistentChooseKHasher; + /// A trait which behaves like a pseudo-random number generator. /// It is used to generate consistent hashes within one bucket. /// Note: the hasher must have been seeded with the key during construction. @@ -226,54 +229,6 @@ impl ConsistentHasher { } } -/// Implementation of a consistent choose k hashing algorithm. -/// It returns k distinct consistent hashes in the range `0..n`. -/// The hashes are consistent when `n` changes and when `k` changes! -/// I.e. on average exactly `1/(n+1)` (resp. `1/(k+1)`) many hashes will change -/// when `n` (resp. `k`) increases by one. Additionally, the returned `k` tuple -/// is guaranteed to be uniformely chosen from all possible `n-choose-k` tuples. -pub struct ConsistentChooseKHasher { - builder: H, - k: usize, -} - -impl ConsistentChooseKHasher { - pub fn new(builder: H, k: usize) -> Self { - Self { builder, k } - } - - pub fn prev(&self, n: usize) -> Vec { - let mut res = Vec::with_capacity(self.k); - self.prev_with_vec(n, &mut res); - res - } - - pub fn prev_with_vec(&self, mut n: usize, samples: &mut Vec) { - assert!(n >= self.k, "n must be at least k"); - samples.clear(); - for i in 0..self.k { - samples.push( - ConsistentHasher::new(self.builder.seq_builder(i)) - .into_prev(n - i) - .expect("must not fail") - + i, - ); - } - for i in (0..self.k).rev() { - n = samples[0..=i].iter().copied().max().expect(""); - samples[i] = n; - for (j, sample) in samples[0..i].iter_mut().enumerate() { - if *sample == n { - *sample = ConsistentHasher::new(self.builder.seq_builder(j)) - .into_prev(n - j) - .expect("must not fail") - + j; - } - } - } - } -} - #[cfg(test)] mod tests { use std::hash::DefaultHasher; @@ -312,62 +267,4 @@ mod tests { } println!("{stats:?}"); } - - #[test] - fn test_uniform_k() { - const K: usize = 3; - for k in 0..100 { - let hasher = hasher_for_key(k); - let sampler = ConsistentChooseKHasher::new(hasher, K); - for n in K..1000 { - let samples = sampler.prev(n + 1); - assert!(samples.len() == K); - for i in 0..K - 1 { - assert!(samples[i] < samples[i + 1]); - } - let next = sampler.prev(n + 2); - for i in 0..K { - assert!(samples[i] <= next[i]); - } - let mut merged = samples.clone(); - merged.extend(next.clone()); - merged.sort(); - merged.dedup(); - assert!( - merged.len() == K || merged.len() == K + 1, - "Unexpected {samples:?} vs. {next:?}" - ); - } - } - let mut stats = vec![0; 8]; - for i in 0..32 { - let hasher = hasher_for_key(i + 32783); - let sampler = ConsistentChooseKHasher::new(hasher, 2); - let samples = sampler.prev(stats.len()); - for s in samples { - stats[s] += 1; - } - } - println!("{stats:?}"); - assert_eq!(stats, vec![10, 12, 6, 6, 6, 5, 9, 10]); - // Test consistency when increasing k! - for k in 1..10 { - for n in k + 1..20 { - for key in 0..1000 { - let hasher = hasher_for_key(key); - let sampler1 = ConsistentChooseKHasher::new(hasher.clone(), k); - let sampler2 = ConsistentChooseKHasher::new(hasher, k + 1); - let set1 = sampler1.prev(n); - let set2 = sampler2.prev(n); - assert_eq!(set1.len(), k); - assert_eq!(set2.len(), k + 1); - let mut merged = set1.clone(); - merged.extend(set2); - merged.sort(); - merged.dedup(); - assert_eq!(merged.len(), k + 1); - } - } - } - } } From 20048d86e7d2c58f77ea47e01f4ace3e09a130f5 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Mon, 18 May 2026 19:48:51 +0200 Subject: [PATCH 22/49] fix some tests --- crates/consistent-hashing/src/choose_k.rs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/crates/consistent-hashing/src/choose_k.rs b/crates/consistent-hashing/src/choose_k.rs index f51b16f..2594f33 100644 --- a/crates/consistent-hashing/src/choose_k.rs +++ b/crates/consistent-hashing/src/choose_k.rs @@ -113,14 +113,18 @@ impl ConsistentChooseKHasher { /// /// Time: O(k) pub fn shrink_n(&mut self) -> usize { - let mut n = self.samples.last().expect("samples must not be empty").pos; + let n = self.samples.last().expect("samples must not be empty").pos; self.n = n; + self.shrink_n_inner(n) + } + + fn shrink_n_inner(&mut self, mut n: usize) -> usize { for i in (0..self.samples.len()).rev() { if self.samples[i].pos < n { // We are done! return i + 1; } - // Here the maximum could be k, k-1, or i! + // Here the maximum could be k, k-1, or i! let k = self.samples[i].seq; let si = Sample::new(self.get_sample(i, n), i); let sk = Sample::new(self.get_sample(k, n), k); @@ -140,16 +144,16 @@ impl ConsistentChooseKHasher { /// /// Time: O(k) pub fn grow_k(&mut self) -> usize { - let k = self.samples.len(); + let k = self.samples.len(); let sk = Sample::new(self.get_sample(k, self.n), k); if let Some(last) = self.samples.last().copied() { if last.pos < sk.pos { self.samples.push(sk); } else if last.pos == sk.pos { - self.shrink_n(); + self.shrink_n_inner(last.pos); self.samples.push(sk); } else { - let i = self.shrink_n(); + let i = self.shrink_n_inner(last.pos); self.samples.push(last); return i; } @@ -332,11 +336,6 @@ mod tests { while iter.samples.last().unwrap().pos > k { let expected = ConsistentChooseKHasher::new_with_k(DefaultHasher::new(), iter.samples.last().unwrap().pos, k); - - println!("n: {n}, k: {k}"); - println!("before: {:?}", iter.samples); - println!("expected {:?}", expected.samples); - iter.shrink_n(); assert_eq!(iter.samples, expected.samples); } From e9fe3bd20918c8387e5ab634e1cde73136f24190 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Mon, 18 May 2026 19:54:20 +0200 Subject: [PATCH 23/49] fix remaining tests --- crates/consistent-hashing/src/choose_k.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/consistent-hashing/src/choose_k.rs b/crates/consistent-hashing/src/choose_k.rs index 2594f33..808347e 100644 --- a/crates/consistent-hashing/src/choose_k.rs +++ b/crates/consistent-hashing/src/choose_k.rs @@ -150,8 +150,9 @@ impl ConsistentChooseKHasher { if last.pos < sk.pos { self.samples.push(sk); } else if last.pos == sk.pos { - self.shrink_n_inner(last.pos); + let i = self.shrink_n_inner(last.pos); self.samples.push(sk); + return i; } else { let i = self.shrink_n_inner(last.pos); self.samples.push(last); From bec3c8f03ca5a3234cbba41bd201c040dcdfe892 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Mon, 18 May 2026 19:55:34 +0200 Subject: [PATCH 24/49] clippy + fmt --- .../benchmarks/performance.rs | 19 ++++----- crates/consistent-hashing/src/choose_k.rs | 40 +++++++++---------- 2 files changed, 27 insertions(+), 32 deletions(-) diff --git a/crates/consistent-hashing/benchmarks/performance.rs b/crates/consistent-hashing/benchmarks/performance.rs index 3717e5c..b8cbdbe 100644 --- a/crates/consistent-hashing/benchmarks/performance.rs +++ b/crates/consistent-hashing/benchmarks/performance.rs @@ -39,9 +39,7 @@ fn throughput_benchmark(c: &mut Criterion) { for key in keys { let mut h = DefaultHasher::default(); key.hash(&mut h); - black_box( - ConsistentChooseKHasher::new_with_k(h, *n + k, k), - ); + black_box(ConsistentChooseKHasher::new_with_k(h, *n + k, k)); } }, criterion::BatchSize::SmallInput, @@ -57,15 +55,12 @@ fn append_vs_new_with_k(c: &mut Criterion) { group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); for n in [10usize, 100, 1000, 10000] { for k in [2, 3, 10, 100] { - group.bench_function( - BenchmarkId::new(format!("new_with_k/k_{k}"), n), - |b| { - b.iter(|| { - let h = DefaultHasher::default(); - black_box(ConsistentChooseKHasher::new_with_k(h, n + k, k)); - }) - }, - ); + group.bench_function(BenchmarkId::new(format!("new_with_k/k_{k}"), n), |b| { + b.iter(|| { + let h = DefaultHasher::default(); + black_box(ConsistentChooseKHasher::new_with_k(h, n + k, k)); + }) + }); group.bench_function(BenchmarkId::new(format!("append/k_{k}"), n), |b| { b.iter(|| { let h = DefaultHasher::default(); diff --git a/crates/consistent-hashing/src/choose_k.rs b/crates/consistent-hashing/src/choose_k.rs index 808347e..f26ec2d 100644 --- a/crates/consistent-hashing/src/choose_k.rs +++ b/crates/consistent-hashing/src/choose_k.rs @@ -124,7 +124,7 @@ impl ConsistentChooseKHasher { // We are done! return i + 1; } - // Here the maximum could be k, k-1, or i! + // Here the maximum could be k, k-1, or i! let k = self.samples[i].seq; let si = Sample::new(self.get_sample(i, n), i); let sk = Sample::new(self.get_sample(k, n), k); @@ -144,7 +144,7 @@ impl ConsistentChooseKHasher { /// /// Time: O(k) pub fn grow_k(&mut self) -> usize { - let k = self.samples.len(); + let k = self.samples.len(); let sk = Sample::new(self.get_sample(k, self.n), k); if let Some(last) = self.samples.last().copied() { if last.pos < sk.pos { @@ -221,10 +221,9 @@ mod tests { let first = ConsistentChooseKHasher::new(hasher.clone(), n) .next() .unwrap(); - let prev: Vec = - ConsistentChooseKHasher::new_with_k(hasher.clone(), n, 1) - .positions() - .collect(); + let prev: Vec = ConsistentChooseKHasher::new_with_k(hasher.clone(), n, 1) + .positions() + .collect(); assert_eq!(first, prev[0]); } } @@ -236,8 +235,7 @@ mod tests { for key in 0..200 { for n in 1..20 { let hasher = hasher_for_key(key); - let mut ranking: Vec = - ConsistentChooseKHasher::new(hasher, n).collect(); + let mut ranking: Vec = ConsistentChooseKHasher::new(hasher, n).collect(); ranking.sort(); let expected: Vec = (0..n).collect(); assert_eq!(ranking, expected, "key={key} n={n}"); @@ -253,8 +251,9 @@ mod tests { let n = 20; let full: Vec = ConsistentChooseKHasher::new(hasher.clone(), n).collect(); for take in 1..=n { - let partial: Vec = - ConsistentChooseKHasher::new(hasher.clone(), n).take(take).collect(); + let partial: Vec = ConsistentChooseKHasher::new(hasher.clone(), n) + .take(take) + .collect(); assert_eq!(&partial[..], &full[..take]); } } @@ -294,10 +293,9 @@ mod tests { let mut stats = vec![0; 8]; for i in 0..32 { let hasher = hasher_for_key(i + 32783); - let samples: Vec = - ConsistentChooseKHasher::new_with_k(hasher, stats.len(), 2) - .positions() - .collect(); + let samples: Vec = ConsistentChooseKHasher::new_with_k(hasher, stats.len(), 2) + .positions() + .collect(); for s in samples { stats[s] += 1; } @@ -313,10 +311,9 @@ mod tests { ConsistentChooseKHasher::new_with_k(hasher.clone(), n, k) .positions() .collect(); - let set2: Vec = - ConsistentChooseKHasher::new_with_k(hasher, n, k + 1) - .positions() - .collect(); + let set2: Vec = ConsistentChooseKHasher::new_with_k(hasher, n, k + 1) + .positions() + .collect(); assert_eq!(set1.len(), k); assert_eq!(set2.len(), k + 1); let mut merged = set1.clone(); @@ -335,8 +332,11 @@ mod tests { for n in k + 1..30 { let mut iter = ConsistentChooseKHasher::new_with_k(DefaultHasher::new(), n, k); while iter.samples.last().unwrap().pos > k { - let expected = - ConsistentChooseKHasher::new_with_k(DefaultHasher::new(), iter.samples.last().unwrap().pos, k); + let expected = ConsistentChooseKHasher::new_with_k( + DefaultHasher::new(), + iter.samples.last().unwrap().pos, + k, + ); iter.shrink_n(); assert_eq!(iter.samples, expected.samples); } From da37dd7f10137ae775a19ac32882c25b04db0da3 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Tue, 19 May 2026 07:52:14 +0200 Subject: [PATCH 25/49] Update bounded_load.rs --- .../examples/bounded_load.rs | 250 +++++++++++------- 1 file changed, 154 insertions(+), 96 deletions(-) diff --git a/crates/consistent-hashing/examples/bounded_load.rs b/crates/consistent-hashing/examples/bounded_load.rs index 29fcaf4..99e1c5c 100644 --- a/crates/consistent-hashing/examples/bounded_load.rs +++ b/crates/consistent-hashing/examples/bounded_load.rs @@ -1,140 +1,198 @@ //! Bounded-load consistent hashing example. //! -//! Pure consistent hashing selects each node with equal probability, but for -//! small workloads (e.g. 64 tokens across 24 machines) random variance causes -//! highly skewed assignments. This example layers a capacity cap on top of -//! ConsistentChooseK to enforce near-perfect balance. +//! Compares unbounded vs bounded-load assignment across many random seeds, +//! reporting average and standard deviation of load spread and consistency +//! (fraction of assignments that change when a node is added). //! -//! Assignment uses round-robin over replicas: first assign every token's -//! most-preferred machine, then every token's second-preferred, etc. This -//! ensures all tokens compete fairly for each replica round. +//! Bounded assignment iterates over tokens sequentially, greedily assigning +//! each token its k most-preferred nodes that still have capacity. Using +//! round-robin (all tokens claim one replica per round) yields nearly +//! identical churn numbers with marginally better load spread. //! //! Run with: cargo run --example bounded_load -use std::hash::{DefaultHasher, Hash}; +use std::hash::{DefaultHasher, Hash, Hasher}; use consistent_hashing::ConsistentChooseKHasher; -/// Round-robin bounded-load assignment. +/// Bounded-load assignment. /// -/// For each replica round r = 0..k, iterate over all tokens and assign each -/// to its next most-preferred node that still has capacity. This gives every -/// token equal priority within each round. +/// Each token claims all k replicas before moving to the next token, +/// skipping any node that has reached `max_load`. fn bounded_load_assign( - rankings: &[Vec], + iters: impl IntoIterator>, k: usize, n: usize, max_load: usize, ) -> (Vec>, Vec) { let mut load = vec![0usize; n]; - let num_tokens = rankings.len(); - let mut assignments = vec![Vec::with_capacity(k); num_tokens]; - let mut cursors = vec![0usize; num_tokens]; - - for _round in 0..k { - for (token, ranking) in rankings.iter().enumerate() { - while cursors[token] < ranking.len() { - let node = ranking[cursors[token]]; - cursors[token] += 1; - if load[node] < max_load { - load[node] += 1; - assignments[token].push(node); + let mut assignments = Vec::new(); + + for mut iter in iters { + let mut assigned = Vec::with_capacity(k); + for node in iter.by_ref() { + if load[node] < max_load { + load[node] += 1; + assigned.push(node); + if assigned.len() == k { break; } } } + assignments.push(assigned); } (assignments, load) } -fn main() { - let num_tokens: usize = 64; - let k: usize = 2; // replicas per token - let n: usize = 24; // machines +fn hasher_for_seed_and_key(seed: u64, key: u64) -> DefaultHasher { + let mut h = DefaultHasher::default(); + seed.hash(&mut h); + let seed_state = h.finish(); + let mut h2 = DefaultHasher::default(); + seed_state.hash(&mut h2); + key.hash(&mut h2); + h2 +} + +struct Stats { + sum: f64, + sum_sq: f64, + count: f64, +} + +impl Stats { + fn new() -> Self { + Self { + sum: 0.0, + sum_sq: 0.0, + count: 0.0, + } + } + + fn push(&mut self, x: f64) { + self.sum += x; + self.sum_sq += x * x; + self.count += 1.0; + } + + fn mean(&self) -> f64 { + self.sum / self.count + } + + fn stddev(&self) -> f64 { + (self.sum_sq / self.count - self.mean().powi(2)) + .max(0.0) + .sqrt() + } +} + +fn run(num_tokens: usize, k: usize, n: usize, num_seeds: u64) { let total = num_tokens * k; - let cap = total.div_ceil(n); // ceil(128/24) = 6 + let cap = total.div_ceil(n); - println!("Parameters: {num_tokens} tokens, k={k} replicas, {n} machines"); + println!("Parameters: {num_tokens} tokens, k={k} replicas, {n} machines, {num_seeds} seeds"); println!("Total assignments: {total}, capacity cap per machine: {cap}"); println!( - "Perfect balance: {}×{} + {}×{}\n", + "Perfect balance: {}×{} + {}×{}", n - total % n, total / n, total % n, total / n + 1 ); + println!(); + + let mut ub_spread = Stats::new(); + let mut b_spread = Stats::new(); + let mut ub_changes = Stats::new(); + let mut b_changes = Stats::new(); + + for seed in 0..num_seeds { + // ── Unbounded ──────────────────────────────────────────────────── + let make_iters = |n| { + (0..num_tokens as u64) + .map(move |key| ConsistentChooseKHasher::new(hasher_for_seed_and_key(seed, key), n)) + }; + let (unbounded, ub_load) = bounded_load_assign(make_iters(n), k, n, usize::MAX); + let ub_min = *ub_load.iter().min().unwrap(); + let ub_max = *ub_load.iter().max().unwrap(); + ub_spread.push((ub_max - ub_min) as f64); + + // ── Bounded ────────────────────────────────────────────────────── + let (bounded, b_load) = bounded_load_assign(make_iters(n), k, n, cap); + let b_min = *b_load.iter().min().unwrap(); + let b_max = *b_load.iter().max().unwrap(); + b_spread.push((b_max - b_min) as f64); - // ── Unbounded ──────────────────────────────────────────────────────── - let unbounded: Vec> = (0..num_tokens as u64) - .map(|key| { - let mut h = DefaultHasher::default(); - key.hash(&mut h); - ConsistentChooseKHasher::new(h, n).take(k).collect() - }) - .collect(); - let mut unbounded_load = vec![0usize; n]; - for a in &unbounded { - for &node in a { - unbounded_load[node] += 1; + // ── Consistency: add one machine ───────────────────────────────── + let n2 = n + 1; + let cap2 = total.div_ceil(n2); + + let (unbounded2, _) = bounded_load_assign(make_iters(n2), k, n2, usize::MAX); + let mut ub_chg = 0usize; + for (before, after) in unbounded.iter().zip(unbounded2.iter()) { + for node in before { + if !after.contains(node) { + ub_chg += 1; + } + } } - } + ub_changes.push(ub_chg as f64 / total as f64 * 100.0); - // ── Bounded (round-robin) ──────────────────────────────────────────── - let rankings: Vec> = (0..num_tokens as u64) - .map(|key| { - let mut h = DefaultHasher::default(); - key.hash(&mut h); - ConsistentChooseKHasher::new(h, n).collect() - }) - .collect(); - let (bounded, bounded_load) = bounded_load_assign(&rankings, k, n, cap); - - // ── Display ────────────────────────────────────────────────────────── - println!("{:<12} {:>10} {:>10}", "Machine", "Unbounded", "Bounded"); - println!("{:-<12} {:->10} {:->10}", "", "", ""); - for i in 0..n { - println!( - "{:<12} {:>10} {:>10}", - i, unbounded_load[i], bounded_load[i] - ); + let (bounded2, _) = bounded_load_assign(make_iters(n2), k, n2, cap2); + let mut b_chg = 0usize; + for (before, after) in bounded.iter().zip(bounded2.iter()) { + for node in before { + if !after.contains(node) { + b_chg += 1; + } + } + } + b_changes.push(b_chg as f64 / total as f64 * 100.0); } - let ub_min = *unbounded_load.iter().min().unwrap(); - let ub_max = *unbounded_load.iter().max().unwrap(); - let b_min = *bounded_load.iter().min().unwrap(); - let b_max = *bounded_load.iter().max().unwrap(); - println!("{:-<12} {:->10} {:->10}", "", "", ""); println!( - "{:<12} {:>10} {:>10}", - "spread", - ub_max - ub_min, - b_max - b_min + "{:<24} {:>16} {:>16}", + "", "Unbounded", "Bounded" ); + println!("{:-<24} {:->16} {:->16}", "", "", ""); + println!( + "{:<24} {:>11.2} ± {:<5.2} {:>10.2} ± {:<5.2}", + "Load spread (max-min)", + ub_spread.mean(), + ub_spread.stddev(), + b_spread.mean(), + b_spread.stddev(), + ); + println!( + "{:<24} {:>10.2}% ± {:<5.2} {:>9.2}% ± {:<5.2}", + "Churn on n→n+1", + ub_changes.mean(), + ub_changes.stddev(), + b_changes.mean(), + b_changes.stddev(), + ); + println!( + "\n ideal churn: {:.2}%", + 1.0 / (n + 1) as f64 * 100.0 + ); +} - // ── Consistency check: what happens when we add one machine? ───────── - let n2 = n + 1; - let cap2 = (num_tokens * k).div_ceil(n2); - let rankings2: Vec> = (0..num_tokens as u64) - .map(|key| { - let mut h = DefaultHasher::default(); - key.hash(&mut h); - ConsistentChooseKHasher::new(h, n2).collect() - }) - .collect(); - let (bounded2, _) = bounded_load_assign(&rankings2, k, n2, cap2); - - let mut changes = 0; - for (before, after) in bounded.iter().zip(bounded2.iter()) { - for node in before { - if !after.contains(node) { - changes += 1; - } +fn main() { + let configs: &[(usize, usize, usize)] = &[ + // (num_tokens, k, n) + (64, 3, 24), // original + (256, 3, 24), // more tokens, same k and n + (64, 1, 24), // k=1 (no replication) + (64, 5, 24), // higher replication + (64, 3, 8), // fewer machines + (64, 3, 60), // many machines (sparse) + ]; + let num_seeds = 1000; + + for (i, &(num_tokens, k, n)) in configs.iter().enumerate() { + if i > 0 { + println!("\n{}\n", "=".repeat(76)); } + run(num_tokens, k, n, num_seeds); } - println!("\nConsistency: adding machine {n} → {n2}"); - println!( - " {changes}/{total} assignments changed ({:.1}%), ideal ≈ {:.1}%", - changes as f64 / total as f64 * 100.0, - k as f64 / n2 as f64 * 100.0 - ); } From 48e3b0b54d76507d2993746839a88b09ce951964 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Tue, 19 May 2026 08:07:26 +0200 Subject: [PATCH 26/49] Update bounded_load.rs --- .../examples/bounded_load.rs | 149 ++++++++++++++---- 1 file changed, 116 insertions(+), 33 deletions(-) diff --git a/crates/consistent-hashing/examples/bounded_load.rs b/crates/consistent-hashing/examples/bounded_load.rs index 99e1c5c..bd76799 100644 --- a/crates/consistent-hashing/examples/bounded_load.rs +++ b/crates/consistent-hashing/examples/bounded_load.rs @@ -12,6 +12,7 @@ //! Run with: cargo run --example bounded_load use std::hash::{DefaultHasher, Hash, Hasher}; +use std::rc::Rc; use consistent_hashing::ConsistentChooseKHasher; @@ -19,8 +20,8 @@ use consistent_hashing::ConsistentChooseKHasher; /// /// Each token claims all k replicas before moving to the next token, /// skipping any node that has reached `max_load`. -fn bounded_load_assign( - iters: impl IntoIterator>, +fn bounded_load_assign>( + iters: impl IntoIterator, k: usize, n: usize, max_load: usize, @@ -44,6 +45,79 @@ fn bounded_load_assign( (assignments, load) } +/// Count the number of assignments that changed between two runs. +fn count_churn(before: &[Vec], after: &[Vec]) -> usize { + before + .iter() + .zip(after.iter()) + .map(|(b, a)| b.iter().filter(|node| !a.contains(node)).count()) + .sum() +} + +/// Load spread: difference between max and min loaded nodes. +fn load_spread(load: &[usize]) -> usize { + load.iter().max().unwrap() - load.iter().min().unwrap() +} + +/// A hash ring with `v` virtual nodes per physical node. +struct HashRing { + ring: Rc>, +} + +impl HashRing { + fn new(seed: u64, n: usize, v: usize) -> Self { + let mut ring: Vec<(u64, usize)> = (0..n) + .flat_map(|node| { + (0..v).map(move |vi| { + let mut h = DefaultHasher::default(); + seed.hash(&mut h); + node.hash(&mut h); + vi.hash(&mut h); + (h.finish(), node) + }) + }) + .collect(); + ring.sort_unstable_by_key(|&(pos, _)| pos); + Self { ring: Rc::new(ring) } + } + + /// Return an iterator over distinct physical nodes for the given token hash, + /// walking clockwise from the token's position on the ring. + fn iter(&self, token_hash: u64) -> HashRingIter { + let start = self.ring.partition_point(|&(pos, _)| pos < token_hash); + HashRingIter { + ring: Rc::clone(&self.ring), + start, + offset: 0, + seen: Vec::new(), + } + } +} + +/// Iterator that walks a hash ring clockwise, yielding distinct physical nodes. +struct HashRingIter { + ring: Rc>, + start: usize, + offset: usize, + seen: Vec, +} + +impl Iterator for HashRingIter { + type Item = usize; + + fn next(&mut self) -> Option { + while self.offset < self.ring.len() { + let (_, node) = self.ring[(self.start + self.offset) % self.ring.len()]; + self.offset += 1; + if !self.seen.contains(&node) { + self.seen.push(node); + return Some(node); + } + } + None + } +} + fn hasher_for_seed_and_key(seed: u64, key: u64) -> DefaultHasher { let mut h = DefaultHasher::default(); seed.hash(&mut h); @@ -86,6 +160,8 @@ impl Stats { } } +const VIRTUAL_NODES: usize = 200; + fn run(num_tokens: usize, k: usize, n: usize, num_seeds: u64) { let total = num_tokens * k; let cap = total.div_ceil(n); @@ -103,73 +179,80 @@ fn run(num_tokens: usize, k: usize, n: usize, num_seeds: u64) { let mut ub_spread = Stats::new(); let mut b_spread = Stats::new(); + let mut ring_spread = Stats::new(); let mut ub_changes = Stats::new(); let mut b_changes = Stats::new(); + let mut ring_changes = Stats::new(); for seed in 0..num_seeds { - // ── Unbounded ──────────────────────────────────────────────────── + // ── Choose-k (unbounded) ───────────────────────────────────────── let make_iters = |n| { (0..num_tokens as u64) .map(move |key| ConsistentChooseKHasher::new(hasher_for_seed_and_key(seed, key), n)) }; let (unbounded, ub_load) = bounded_load_assign(make_iters(n), k, n, usize::MAX); - let ub_min = *ub_load.iter().min().unwrap(); - let ub_max = *ub_load.iter().max().unwrap(); - ub_spread.push((ub_max - ub_min) as f64); + ub_spread.push(load_spread(&ub_load) as f64); - // ── Bounded ────────────────────────────────────────────────────── + // ── Choose-k (bounded) ─────────────────────────────────────────── let (bounded, b_load) = bounded_load_assign(make_iters(n), k, n, cap); - let b_min = *b_load.iter().min().unwrap(); - let b_max = *b_load.iter().max().unwrap(); - b_spread.push((b_max - b_min) as f64); + b_spread.push(load_spread(&b_load) as f64); + + // ── Hash ring (bounded) ────────────────────────────────────────── + let ring = HashRing::new(seed, n, VIRTUAL_NODES); + let (ring_assign, r_load) = bounded_load_assign( + (0..num_tokens as u64) + .map(|key| ring.iter(hasher_for_seed_and_key(seed, key).finish())), + k, + n, + cap, + ); + ring_spread.push(load_spread(&r_load) as f64); // ── Consistency: add one machine ───────────────────────────────── let n2 = n + 1; let cap2 = total.div_ceil(n2); let (unbounded2, _) = bounded_load_assign(make_iters(n2), k, n2, usize::MAX); - let mut ub_chg = 0usize; - for (before, after) in unbounded.iter().zip(unbounded2.iter()) { - for node in before { - if !after.contains(node) { - ub_chg += 1; - } - } - } - ub_changes.push(ub_chg as f64 / total as f64 * 100.0); + ub_changes.push(count_churn(&unbounded, &unbounded2) as f64 / total as f64 * 100.0); let (bounded2, _) = bounded_load_assign(make_iters(n2), k, n2, cap2); - let mut b_chg = 0usize; - for (before, after) in bounded.iter().zip(bounded2.iter()) { - for node in before { - if !after.contains(node) { - b_chg += 1; - } - } - } - b_changes.push(b_chg as f64 / total as f64 * 100.0); + b_changes.push(count_churn(&bounded, &bounded2) as f64 / total as f64 * 100.0); + + let ring2 = HashRing::new(seed, n2, VIRTUAL_NODES); + let (ring_assign2, _) = bounded_load_assign( + (0..num_tokens as u64) + .map(|key| ring2.iter(hasher_for_seed_and_key(seed, key).finish())), + k, + n2, + cap2, + ); + ring_changes.push(count_churn(&ring_assign, &ring_assign2) as f64 / total as f64 * 100.0); } println!( - "{:<24} {:>16} {:>16}", - "", "Unbounded", "Bounded" + "{:<24} {:>16} {:>16} {:>16}", + "", "Choose-k", "Bounded", "Ring Bounded" ); - println!("{:-<24} {:->16} {:->16}", "", "", ""); + println!("{:-<24} {:->16} {:->16} {:->16}", "", "", "", ""); println!( - "{:<24} {:>11.2} ± {:<5.2} {:>10.2} ± {:<5.2}", + "{:<24} {:>11.2} ± {:<5.2} {:>10.2} ± {:<5.2} {:>10.2} ± {:<5.2}", "Load spread (max-min)", ub_spread.mean(), ub_spread.stddev(), b_spread.mean(), b_spread.stddev(), + ring_spread.mean(), + ring_spread.stddev(), ); println!( - "{:<24} {:>10.2}% ± {:<5.2} {:>9.2}% ± {:<5.2}", + "{:<24} {:>10.2}% ± {:<5.2} {:>9.2}% ± {:<5.2} {:>9.2}% ± {:<5.2}", "Churn on n→n+1", ub_changes.mean(), ub_changes.stddev(), b_changes.mean(), b_changes.stddev(), + ring_changes.mean(), + ring_changes.stddev(), ); println!( "\n ideal churn: {:.2}%", From 25a42f88237464454fb0605e468b6003335d691c Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Tue, 19 May 2026 08:38:49 +0200 Subject: [PATCH 27/49] Update bounded_load.rs --- .../examples/bounded_load.rs | 81 +++++++++++-------- 1 file changed, 48 insertions(+), 33 deletions(-) diff --git a/crates/consistent-hashing/examples/bounded_load.rs b/crates/consistent-hashing/examples/bounded_load.rs index bd76799..d9ce34a 100644 --- a/crates/consistent-hashing/examples/bounded_load.rs +++ b/crates/consistent-hashing/examples/bounded_load.rs @@ -11,8 +11,9 @@ //! //! Run with: cargo run --example bounded_load +use std::collections::HashSet; use std::hash::{DefaultHasher, Hash, Hasher}; -use std::rc::Rc; +use std::time::Instant; use consistent_hashing::ConsistentChooseKHasher; @@ -54,14 +55,16 @@ fn count_churn(before: &[Vec], after: &[Vec]) -> usize { .sum() } -/// Load spread: difference between max and min loaded nodes. -fn load_spread(load: &[usize]) -> usize { - load.iter().max().unwrap() - load.iter().min().unwrap() +/// Standard deviation of load across machines. +fn load_stddev(load: &[usize]) -> f64 { + let mean = load.iter().sum::() as f64 / load.len() as f64; + let var = load.iter().map(|&x| (x as f64 - mean).powi(2)).sum::() / load.len() as f64; + var.sqrt() } /// A hash ring with `v` virtual nodes per physical node. struct HashRing { - ring: Rc>, + ring: Vec<(u64, usize)>, } impl HashRing { @@ -78,39 +81,38 @@ impl HashRing { }) .collect(); ring.sort_unstable_by_key(|&(pos, _)| pos); - Self { ring: Rc::new(ring) } + Self { ring } } /// Return an iterator over distinct physical nodes for the given token hash, /// walking clockwise from the token's position on the ring. - fn iter(&self, token_hash: u64) -> HashRingIter { + fn iter(&self, token_hash: u64) -> HashRingIter<'_> { let start = self.ring.partition_point(|&(pos, _)| pos < token_hash); HashRingIter { - ring: Rc::clone(&self.ring), + ring: &self.ring, start, offset: 0, - seen: Vec::new(), + seen: HashSet::new(), } } } /// Iterator that walks a hash ring clockwise, yielding distinct physical nodes. -struct HashRingIter { - ring: Rc>, +struct HashRingIter<'a> { + ring: &'a [(u64, usize)], start: usize, offset: usize, - seen: Vec, + seen: HashSet, } -impl Iterator for HashRingIter { +impl Iterator for HashRingIter<'_> { type Item = usize; fn next(&mut self) -> Option { while self.offset < self.ring.len() { let (_, node) = self.ring[(self.start + self.offset) % self.ring.len()]; self.offset += 1; - if !self.seen.contains(&node) { - self.seen.push(node); + if self.seen.insert(node) { return Some(node); } } @@ -164,7 +166,7 @@ const VIRTUAL_NODES: usize = 200; fn run(num_tokens: usize, k: usize, n: usize, num_seeds: u64) { let total = num_tokens * k; - let cap = total.div_ceil(n); + let cap = total.div_ceil(n) + 1; println!("Parameters: {num_tokens} tokens, k={k} replicas, {n} machines, {num_seeds} seeds"); println!("Total assignments: {total}, capacity cap per machine: {cap}"); @@ -183,6 +185,9 @@ fn run(num_tokens: usize, k: usize, n: usize, num_seeds: u64) { let mut ub_changes = Stats::new(); let mut b_changes = Stats::new(); let mut ring_changes = Stats::new(); + let mut ub_time_us = 0u128; + let mut b_time_us = 0u128; + let mut ring_time_us = 0u128; for seed in 0..num_seeds { // ── Choose-k (unbounded) ───────────────────────────────────────── @@ -190,15 +195,20 @@ fn run(num_tokens: usize, k: usize, n: usize, num_seeds: u64) { (0..num_tokens as u64) .map(move |key| ConsistentChooseKHasher::new(hasher_for_seed_and_key(seed, key), n)) }; + let t = Instant::now(); let (unbounded, ub_load) = bounded_load_assign(make_iters(n), k, n, usize::MAX); - ub_spread.push(load_spread(&ub_load) as f64); + ub_time_us += t.elapsed().as_micros(); + ub_spread.push(load_stddev(&ub_load)); // ── Choose-k (bounded) ─────────────────────────────────────────── + let t = Instant::now(); let (bounded, b_load) = bounded_load_assign(make_iters(n), k, n, cap); - b_spread.push(load_spread(&b_load) as f64); + b_time_us += t.elapsed().as_micros(); + b_spread.push(load_stddev(&b_load)); // ── Hash ring (bounded) ────────────────────────────────────────── let ring = HashRing::new(seed, n, VIRTUAL_NODES); + let t = Instant::now(); let (ring_assign, r_load) = bounded_load_assign( (0..num_tokens as u64) .map(|key| ring.iter(hasher_for_seed_and_key(seed, key).finish())), @@ -206,11 +216,12 @@ fn run(num_tokens: usize, k: usize, n: usize, num_seeds: u64) { n, cap, ); - ring_spread.push(load_spread(&r_load) as f64); + ring_time_us += t.elapsed().as_micros(); + ring_spread.push(load_stddev(&r_load)); // ── Consistency: add one machine ───────────────────────────────── let n2 = n + 1; - let cap2 = total.div_ceil(n2); + let cap2 = total.div_ceil(n2) + 1; let (unbounded2, _) = bounded_load_assign(make_iters(n2), k, n2, usize::MAX); ub_changes.push(count_churn(&unbounded, &unbounded2) as f64 / total as f64 * 100.0); @@ -236,7 +247,7 @@ fn run(num_tokens: usize, k: usize, n: usize, num_seeds: u64) { println!("{:-<24} {:->16} {:->16} {:->16}", "", "", "", ""); println!( "{:<24} {:>11.2} ± {:<5.2} {:>10.2} ± {:<5.2} {:>10.2} ± {:<5.2}", - "Load spread (max-min)", + "Load stddev", ub_spread.mean(), ub_spread.stddev(), b_spread.mean(), @@ -255,24 +266,28 @@ fn run(num_tokens: usize, k: usize, n: usize, num_seeds: u64) { ring_changes.stddev(), ); println!( - "\n ideal churn: {:.2}%", - 1.0 / (n + 1) as f64 * 100.0 + "{:<24} {:>13.1} ms {:>13.1} ms {:>13.1} ms", + "Total time", + ub_time_us as f64 / 1000.0, + b_time_us as f64 / 1000.0, + ring_time_us as f64 / 1000.0, ); + println!("\n ideal churn: {:.2}%", 1.0 / (n + 1) as f64 * 100.0); } fn main() { - let configs: &[(usize, usize, usize)] = &[ - // (num_tokens, k, n) - (64, 3, 24), // original - (256, 3, 24), // more tokens, same k and n - (64, 1, 24), // k=1 (no replication) - (64, 5, 24), // higher replication - (64, 3, 8), // fewer machines - (64, 3, 60), // many machines (sparse) + // (num_tokens, k, n, num_seeds) + let configs: &[(usize, usize, usize, u64)] = &[ + (64, 3, 24, 1000), // original + (256, 3, 24, 1000), // more tokens, same k and n + (64, 1, 24, 1000), // k=1 (no replication) + (64, 5, 24, 1000), // higher replication + (64, 3, 8, 1000), // fewer machines + (64, 3, 60, 1000), // many machines (sparse) + (1_000_000, 3, 100_000, 1), // 1M tokens, 100k machines ]; - let num_seeds = 1000; - for (i, &(num_tokens, k, n)) in configs.iter().enumerate() { + for (i, &(num_tokens, k, n, num_seeds)) in configs.iter().enumerate() { if i > 0 { println!("\n{}\n", "=".repeat(76)); } From 04da223439bbbb2cd1fa3e448dad41f4977eb9d3 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Tue, 19 May 2026 10:51:38 +0200 Subject: [PATCH 28/49] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index ae3acce..4a72f06 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ A collection of useful algorithms written in Rust. Currently contains: - [`geo_filters`](crates/geo_filters): probabilistic data structures that solve the [Distinct Count Problem](https://en.wikipedia.org/wiki/Count-distinct_problem) using geometric filters. - [`bpe`](crates/bpe): fast, correct, and novel algorithms for the [Byte Pair Encoding Algorithm](https://en.wikipedia.org/wiki/Large_language_model#BPE) which are particularly useful for chunking of documents. - [`bpe-openai`](crates/bpe-openai): Fast tokenizers for OpenAI token sets based on the `bpe` crate. +- [`consistent-hashing`](crates/consistent-hashing): constant time consistent hashing algorithms with support for replication and bounded load. - [`string-offsets`](crates/string-offsets): converts string positions between bytes, chars, UTF-16 code units, and line numbers. Useful when sending string indices across language boundaries. ## Background From f3bbcd9c385817d3271973873e39e2f72713c647 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Wed, 20 May 2026 09:37:29 +0200 Subject: [PATCH 29/49] add a proper consistentnodemap implementation + documentation --- crates/consistent-hashing/README.md | 44 ++- crates/consistent-hashing/src/lib.rs | 2 + crates/consistent-hashing/src/node_map.rs | 355 ++++++++++++++++++++++ 3 files changed, 388 insertions(+), 13 deletions(-) create mode 100644 crates/consistent-hashing/src/node_map.rs diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index 46a2a32..0a59852 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -11,26 +11,26 @@ Common algorithms - [DXHash](https://arxiv.org/abs/2107.07930) - [JumpBackHash](https://arxiv.org/abs/2403.18682) -## Complexity summary +## Core idea -where `N` is the number of nodes and `R` is the number of replicas. +Many consistent-hashing algorithms are best understood as specialized solutions +to one higher-level problem: primary placement, replication, bounded load, +failover, or arbitrary deletions. A single flat complexity table is often +misleading because those algorithms do not all expose the same operations. -| Algorithm | Lookup per key
(no replication) | Node add/remove | Memory | Lookup with replication | -|-------------------------|--------------------------------------------------------------------------|-----------------|----------------|-----------------------------------------------| -| Hash ring (with vnodes) | O(log(V·N)): binary search; V = 100–200 virtual nodes per physical node | O(V·log(V·N)) | O(V·N) | O(log(V·N) + R): walk to next R distinct nodes | -| Rendezvous | O(N): max score | O(1) | O(N) node list | O(N log R): pick top R scores | -| Jump consistent hash | O(log(N)) expected | 0 | O(1) | O(R log N) | -| AnchorHash | O(1) expected | O(1) | O(N) | Not native | -| DXHash | O(1) expected | O(1) | O(N) | Not native | -| JumpBackHash | O(1) expected | 0 | O(1) | Not native | -| **ConsistentChooseK** | **O(1) expected** | **0** | **O(1)** | **O(R^2)**; **O(R log(R))**: using heap | +This crate instead centers on `ConsistentChooseK`: a stateless per-key ranking +of all nodes. The first item is the primary owner, the first `R` items are +replicas, the next item after a failed node is its failover target, and the same +ranking can drive bounded-load and deletion-tolerant assignment. The current +implementation extracts the first `R` distinct candidates in `O(R^2)` time +(`O(R log R)` with a heap optimization) and uses no persistent memory. Replication of keys -- Hash ring: replicate by walking clockwise to the next R distinct nodes. Virtual nodes help spread replicas more evenly. Replicas are not independently distributed. +- Hash ring: replicate by walking clockwise to the next R distinct nodes. Virtual nodes help spread replicas more evenly. Replicas are not independently distributed. - Rendezvous hashing: replicate by selecting the top R nodes by score for the key. This naturally yields R distinct owners and supports weights. - Jump consistent hash: the base function doesn't support replication. While the math can be modified to support consistent replication, it cannot be efficiently solved for large k and even for small k (=2 or =3), a quadratic or cubic equation has to be solved. - JumpBackHash and variants: The trick of Jump consistent hash to support replication won't work here due to the introduction of additional state. -- ConsistentChooseK: Faster and more memory efficient than all other solutions. +- ConsistentChooseK: produces an ordered list of distinct, consistent candidates directly, making replication and related higher-level policies simple compositions over the same primitive. Why replication matters - Tolerates node failures and maintenance without data unavailability. @@ -59,6 +59,24 @@ Traditionally this requires drawing d independent random nodes per key. However, In active-passive or tiered architectures, each key needs a deterministic failover order. The ranking iterator provides exactly this: the first node is the primary, the second is the hot standby, and so on. When a node fails, the next node in the ranking takes over — consistently for all keys that had the failed node at the same rank position, and without any coordination or ring rebalancing. +### Deletion-tolerant node maps + +`ConsistentNodeMap` uses the `ConsistentChooseK` ranking to support arbitrary node deletions with very small state. It stores only the total slot count and the set of deleted slots. Lookup generates the per-key choose-k ranking and returns the first slot that is not deleted. + +This solves the same deletion problem targeted by AnchorHash, MementoHash, and DxHash: when a node is removed, only keys assigned to that node move, and they are redistributed uniformly over the remaining nodes. The difference is that those algorithms keep replacement or redirect metadata that encodes enough of the removal history to repair hits on deleted nodes. `ConsistentNodeMap` is history-independent: it only needs the current deleted set. + +For many practical deployments, this also makes `ConsistentNodeMap` a compelling replacement for traditional hash-ring implementations with virtual nodes. Rings typically need hundreds of virtual nodes per physical node to obtain good balance, which makes their memory footprint orders of magnitude larger than the actual node set. Here the ranking is generated directly from the key, so deletion support only adds state proportional to the number of deleted slots rather than to a large virtual-node ring. + +The tradeoff is lookup work. If `h` deleted slots are encountered before the first live slot, the current iterator costs `O((h + 1)^2)` because producing the i-th choose-k candidate costs O(i). The expected number of deleted-node hits has the same harmonic/log behavior analyzed for history-based approaches, approximately `ln(total / active)` when `total` slots contain `active` live nodes. Thus the total expected lookup cost is `O((1 + ln(total / active))^2)`. + +| Algorithm | Total lookup time | Add node | Remove node | State | Predefined capacity? | History-dependent? | +|-----------|-------------------|----------|-------------|-------|----------------------|--------------------| +| Hash ring with `V` virtual nodes | `O(log(V·active))` | `O(V log(V·active))` | `O(V log(V·active))` | `O(V·active)` ring entries | No | No | +| `ConsistentNodeMap` | `O((h + 1)^2)`, expected `O((1 + ln(total / active))^2)` | `O(1)` expected | `O(1)` expected | `O(deleted)` deleted-slot set | No | No | +| AnchorHash | `O((h + 1)^2)`, expected `O((1 + ln(total / active))^2)` | `O(1)` expected | `O(1)` expected | `O(capacity)` anchor/removal state | Yes | Yes | +| MementoHash | `O((h + 1)^2)`, expected `O((1 + ln(total / active))^2)` | `O(1)` expected | `O(1)` expected | `O(deleted)` replacement tuples | No | Yes | +| DxHash | `O((h + 1)^2)`, expected `O((1 + ln(total / active))^2)` | `O(1)` expected | `O(1)` expected | `O(capacity)` redirect/displacement state with smaller constants than AnchorHash | Yes | Yes | + ## ConsistentChooseK algorithm The following functions summarize the core algorithmic innovation as a minimal Rust excerpt. diff --git a/crates/consistent-hashing/src/lib.rs b/crates/consistent-hashing/src/lib.rs index f1b6fe0..67116e8 100644 --- a/crates/consistent-hashing/src/lib.rs +++ b/crates/consistent-hashing/src/lib.rs @@ -1,7 +1,9 @@ use std::hash::{Hash, Hasher}; mod choose_k; +mod node_map; pub use choose_k::ConsistentChooseKHasher; +pub use node_map::ConsistentNodeMap; /// A trait which behaves like a pseudo-random number generator. /// It is used to generate consistent hashes within one bucket. diff --git a/crates/consistent-hashing/src/node_map.rs b/crates/consistent-hashing/src/node_map.rs new file mode 100644 index 0000000..de6722f --- /dev/null +++ b/crates/consistent-hashing/src/node_map.rs @@ -0,0 +1,355 @@ +use std::collections::HashSet; + +use crate::{ConsistentChooseKHasher, ManySeqBuilder}; + +/// A consistent node map that supports dynamic addition and deletion of nodes. +/// +/// Slots are tracked by storing the total number of slots and a set of deleted +/// slots. To find the slot associated with a key, the consistent choose-k +/// iterator yields positions in a consistent order; the first non-deleted slot +/// is returned. +/// +/// # Comparison with AnchorHash, MementoHash, and DxHash +/// +/// This solves the same problem as [AnchorHash], [MementoHash], and [DxHash]: +/// consistently mapping keys to a dynamic set of nodes where nodes can be +/// added and removed, with minimal key reassignment. All of these algorithms +/// guarantee that when a node is removed, only keys assigned to that node are +/// redistributed — and they are redistributed uniformly among the remaining +/// nodes. +/// +/// The key difference is history. AnchorHash, MementoHash, and DxHash keep +/// redirect/replacement state so that when lookup hits a deleted node, it can +/// replay enough of the prior removal process to find the correct replacement. +/// MementoHash, for example, defines its state as ``, where `R` is a +/// set of replacement tuples and `l` is the last removed bucket. This +/// implementation is history-independent: it only needs to know which slots are +/// currently deleted. Lookup simply iterates the consistent choose-k sequence +/// until it hits an active slot. +/// +/// This implementation takes a much simpler approach: it leverages the +/// consistent choose-k algorithm, which already provides both n-consistency +/// and k-consistency by construction. No auxiliary redirect structures are +/// needed beyond the current set of deleted slots. +/// +/// Let `total` be the number of slots, `active` the number of active slots, and +/// `h` the number of deleted slots hit during a lookup before the first active +/// slot is found. For AnchorHash and DxHash, `total` is the predefined capacity; +/// for MementoHash and this implementation, it is the current slot count. +/// MementoHash bounds the expected number of deleted-node hits by harmonic +/// sums, e.g. `1 + H_total - H_active`, which is at most +/// `1 + ln(total / active)`. +/// +/// In this implementation, the choose-k iterator never returns the same slot +/// twice, so a deleted slot can be hit at most once during a lookup. Thus the +/// scan has the same deleted-hit behavior as the history-based algorithms, but +/// without storing the deletion history. +/// +/// The current choose-k iterator costs O(k) to produce the k-th candidate, so a +/// lookup that skips `h` deleted slots costs O((h + 1)^2), and the corresponding +/// expected total lookup cost is O((1 + ln(total / active))^2). This is in the +/// same practical complexity regime as history-based redirection schemes: the +/// cost grows roughly quadratically with the number of deleted-node hits, while +/// the expected number of such hits stays small unless many slots are deleted. +/// +/// | Algorithm | Total lookup time | State | Predefined capacity? | History-dependent? | +/// | --- | --- | --- | --- | --- | +/// | `ConsistentNodeMap` | `O((h + 1)^2)`, expected `O((1 + ln(total / active))^2)` | `O(deleted)` deleted-slot set | No | No | +/// | AnchorHash | `O((h + 1)^2)`, expected `O((1 + ln(total / active))^2)` | `O(capacity)` anchor/removal state | Yes | Yes | +/// | MementoHash | `O((h + 1)^2)`, expected `O((1 + ln(total / active))^2)` | `O(deleted)` replacement tuples | No | Yes | +/// | DxHash | `O((h + 1)^2)`, expected `O((1 + ln(total / active))^2)` | `O(capacity)` redirect/displacement state with smaller constants than AnchorHash | Yes | Yes | +/// +/// The MementoHash paper explicitly notes that AnchorHash and DxHash keep an +/// internal data structure for all cluster nodes, both working and not working, +/// and require the overall capacity to be fixed during initialization. +/// MementoHash reduces memory by storing only replacement information for +/// removed buckets, but that replacement information still encodes the removal +/// history. This implementation has the same O(deleted) storage shape as that +/// idea, but stores only the deleted set. +/// +/// [AnchorHash]: https://arxiv.org/abs/1812.09674 +/// [MementoHash]: https://arxiv.org/abs/2306.09783 +/// [DxHash]: https://doi.org/10.1145/3631708 +/// +/// # Example +/// ``` +/// use std::hash::{DefaultHasher, Hash}; +/// use consistent_hashing::ConsistentNodeMap; +/// +/// let mut map = ConsistentNodeMap::new(); +/// let a = map.add(); +/// let b = map.add(); +/// let c = map.add(); +/// +/// let mut h = DefaultHasher::default(); +/// 42u64.hash(&mut h); +/// let slot = map.get(h).unwrap(); +/// assert!(slot == a || slot == b || slot == c); +/// ``` +pub struct ConsistentNodeMap { + total: usize, + deleted: HashSet, +} + +impl Default for ConsistentNodeMap { + fn default() -> Self { + Self::new() + } +} + +impl ConsistentNodeMap { + /// Create an empty node map. + pub fn new() -> Self { + Self { + total: 0, + deleted: HashSet::new(), + } + } + + /// Add a slot and return its index. + /// + /// If there is a previously deleted slot, it will be reused. + pub fn add(&mut self) -> usize { + if let Some(i) = self.deleted.iter().next().copied() { + self.deleted.remove(&i); + i + } else { + let i = self.total; + self.total += 1; + i + } + } + + /// Remove the slot at the given index. Returns true if it was active. + pub fn remove(&mut self, index: usize) -> bool { + if index >= self.total || self.deleted.contains(&index) { + return false; + } + if index == self.total - 1 { + self.total -= 1; + } else { + self.deleted.insert(index); + } + true + } + + /// Returns the number of active slots. + pub fn len(&self) -> usize { + self.total - self.deleted.len() + } + + /// Returns true if there are no active slots. + pub fn is_empty(&self) -> bool { + self.total == self.deleted.len() + } + + /// Returns the total number of slots (including deleted ones). + pub fn slot_count(&self) -> usize { + self.total + } + + /// Returns whether the slot at the given index is active. + pub fn is_active(&self, index: usize) -> bool { + index < self.total && !self.deleted.contains(&index) + } + + /// Look up which slot a key maps to using consistent hashing. + /// + /// The `builder` should be a hasher seeded with the key. The consistent + /// choose-k iterator yields positions in a consistent order; the first + /// active slot is returned. + pub fn get(&self, builder: H) -> Option { + if self.is_empty() { + return None; + } + let iter = ConsistentChooseKHasher::new(builder, self.total); + for pos in iter { + if !self.deleted.contains(&pos) { + return Some(pos); + } + } + None + } +} + +#[cfg(test)] +mod tests { + use std::hash::{DefaultHasher, Hash}; + + use super::*; + + fn hasher_for_key(key: u64) -> DefaultHasher { + let mut hasher = DefaultHasher::default(); + key.hash(&mut hasher); + hasher + } + + #[test] + fn test_add_remove() { + let mut map = ConsistentNodeMap::new(); + let a = map.add(); + let b = map.add(); + assert_eq!(map.len(), 2); + + assert!(map.remove(a)); + assert_eq!(map.len(), 1); + for key in 0..100 { + assert_eq!(map.get(hasher_for_key(key)), Some(b)); + } + + assert!(map.remove(b)); + assert!(map.is_empty()); + assert_eq!(map.len(), 0); + assert!(map.get(hasher_for_key(0)).is_none()); + } + + #[test] + fn test_remove_returns_false_for_inactive() { + let mut map = ConsistentNodeMap::new(); + let a = map.add(); + assert!(map.remove(a)); + assert!(!map.remove(a)); + assert!(!map.remove(999)); + } + + #[test] + fn test_slot_reuse() { + let mut map = ConsistentNodeMap::new(); + map.add(); + let b = map.add(); + map.add(); + assert_eq!(map.slot_count(), 3); + + map.remove(b); + let d = map.add(); + assert_eq!(d, b); + assert_eq!(map.slot_count(), 3); + assert!(map.is_active(d)); + } + + #[test] + fn test_trailing_pop() { + let mut map = ConsistentNodeMap::new(); + let a = map.add(); // 0 + let b = map.add(); // 1 + let c = map.add(); // 2 + assert_eq!(map.slot_count(), 3); + + // Removing last slot pops it. + map.remove(c); + assert_eq!(map.slot_count(), 2); + + // Removing last again pops it. + map.remove(b); + assert_eq!(map.slot_count(), 1); + + // Middle removal is tracked as deleted, not popped. + let b2 = map.add(); // appends as 1 + let c2 = map.add(); // appends as 2 + assert_eq!(b2, 1); + assert_eq!(c2, 2); + map.remove(b2); // middle -> deleted set + assert_eq!(map.slot_count(), 3); + map.remove(c2); // trailing → only pops c2 + assert_eq!(map.slot_count(), 2); // b2 slot stays as inactive + assert_eq!(map.len(), 1); + assert!(map.is_active(a)); + } + + #[test] + fn test_consistency_after_add() { + let mut map = ConsistentNodeMap::new(); + for _ in 0..10 { + map.add(); + } + let before: Vec<_> = (0..10000) + .map(|k| map.get(hasher_for_key(k)).unwrap()) + .collect(); + map.add(); + let after: Vec<_> = (0..10000) + .map(|k| map.get(hasher_for_key(k)).unwrap()) + .collect(); + let changed = before.iter().zip(&after).filter(|(a, b)| a != b).count(); + assert!( + changed < 2000, + "too many keys changed after add: {changed}/10000" + ); + } + + #[test] + fn test_remove_10_percent_consistency() { + let n = 100; + let num_keys = 100_000u64; + let to_remove: Vec = (0..n).step_by(10).collect(); // 10% of nodes + + let mut map = ConsistentNodeMap::new(); + for _ in 0..n { + map.add(); + } + + let before: Vec = (0..num_keys) + .map(|k| map.get(hasher_for_key(k)).unwrap()) + .collect(); + + for &slot in &to_remove { + map.remove(slot); + } + let remaining = map.len(); + + let after: Vec = (0..num_keys) + .map(|k| map.get(hasher_for_key(k)).unwrap()) + .collect(); + + // 1. Keys not on removed nodes must stay on the same node. + let mut displaced = 0u64; + for (k, (b, a)) in before.iter().zip(&after).enumerate() { + if !to_remove.contains(b) { + assert_eq!( + b, a, + "key {k}: slot changed from {b} to {a} but was not on a removed slot" + ); + } else { + displaced += 1; + assert!( + !to_remove.contains(a), + "key {k}: reassigned to removed slot {a}" + ); + } + } + + // 2. Displaced fraction should be very close to the theoretical value. + let displaced_pct = displaced as f64 / num_keys as f64; + let theoretical_pct = to_remove.len() as f64 / n as f64; + assert!( + (displaced_pct - theoretical_pct).abs() < 0.01, + "displaced fraction {displaced_pct:.4} not close to theoretical {theoretical_pct:.4}" + ); + + // 3. After removal, distribution among remaining nodes should be + // roughly uniform: each node gets ~1/remaining of all keys. + let mut counts = vec![0u64; n]; + for &a in &after { + counts[a] += 1; + } + let expected = num_keys as f64 / remaining as f64; + let chi2: f64 = counts + .iter() + .enumerate() + .filter(|(i, _)| !to_remove.contains(i)) + .map(|(_, &c)| { + let diff = c as f64 - expected; + diff * diff / expected + }) + .sum(); + // Chi-squared critical value for 89 df at p=0.001 is ~122.9. + assert!( + chi2 < 200.0, + "distribution not uniform enough: chi2={chi2:.1} (expected < 200)" + ); + + // 4. Removed slots must have zero keys. + for &slot in &to_remove { + assert_eq!(counts[slot], 0, "removed slot {slot} still has keys"); + } + } +} From 7deb0271c1717ef1c95befcbd344234623b0f002 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Wed, 20 May 2026 09:41:55 +0200 Subject: [PATCH 30/49] Update README.md --- crates/consistent-hashing/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index 0a59852..70107da 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -112,7 +112,7 @@ We define the consistent `n-choose-k` replication as follows: 1. For a given number `n` of nodes, choose `k` distinct nodes `S`. 2. For a given `key` the chosen set of nodes must be uniformly chosen from all possible sets of size `k`. -3. When `n` increases by one, exactly one node in the chosen set will be changed. +3. When `n` increases by one, at most one node in the chosen set will be changed. 4. and the node will be changed with probability `k/(n+1)`. In the remainder of this section we prove that the `consistent_choose_k` algorithm satisfies those properties. From e54eaa222b04878f7923f10dce6dc6068107c51e Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Wed, 20 May 2026 10:08:43 +0200 Subject: [PATCH 31/49] Update README.md --- crates/consistent-hashing/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index 70107da..dfb9a19 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -128,16 +128,16 @@ Properties 2, 3, and 4 can be proven via induction as follows. ### Property 4 -`k = 1`: We expect that `consistent_hash` returns a single uniformly distributed node index which is consistent in `n`, i.e. changes the hash value with probability `1/(n+1)`, when `n` increments by one. In our implementation, we use an `O(1)` implementation of the jump-hash algorithm. For `k=1`, `consistent_choose_k(key, 1, n)` becomes a single function call to `consistent_choose_max(key, 1, n)` which in turn calls `consistent_hash(key, 0, n)`. I.e. `consistent_choose_k` inherits the all the desired properties from `consistent_hash` for `k=1` and all `n>=1`. +`k = 1`: We expect that `consistent_hash` returns a single uniformly distributed node index which is consistent in `n`, i.e. changes the hash value with probability `1/(n+1)`, when `n` increments by one. In our implementation, we use an `O(1)` implementation of the jump-hash algorithm. For `k=1`, `consistent_choose_k(key, 1, n)` becomes a single function call to `consistent_choose_max(key, 1, n)` which in turn calls `consistent_hash(key, 0, n)`. I.e. `consistent_choose_k` inherits all the desired properties from `consistent_hash` for `k=1` and all `n>=1`. -`k → k+1`: `M(k+1, n+1) = M(k+1, n)` iff `M(k, n+1) < n` and `consistent_hash(_, k, n+1-k) < n - k`. The probability for this is `(n+1-k)/(n+1)` for the former by induction and `(n-k)/(n+1-k)` by the assumption that `consistent_hash` is a proper consistent hash function. Since both these probabilities are assumed to be independent, the probability that our initial value changes is `1 - (n+1-k)/(n+1) * (n-k)/(n+1-k) = 1 - (n-k)/(n+1) = (k+1)/(n+1)` proving property 4. +`k → k+1`: `M(k+1, n+1) = M(k+1, n)` iff `M(k, n+1) < n` and `consistent_hash(_, k, n+1-k) < n - k`. The probability for the former is `(n+1-k)/(n+1)` by induction and `(n-k)/(n+1-k)` for the latter by the assumption that `consistent_hash` is a proper consistent hash function. Since both these probabilities are assumed to be independent, the probability that our initial value changes is `1 - (n+1-k)/(n+1) * (n-k)/(n+1-k) = 1 - (n-k)/(n+1) = (k+1)/(n+1)` proving property 4. ### Property 3 Property 3 is trivially satisfied if `S(k+1, n+1) = S(k+1, n)`. So, we focus on the case where `S(k+1, n+1) != S(k+1, n)`, which implies that `n ∈ S(k+1, n+1)` as largest element. We know that `S(k+1, n) = {m} ∪ S(k, m)` for some `m` by definition and `S(k, n) = S(k, u) ∖ {v} ∪ {w}` by induction for some `u`, `v`, and `w`. Thus far we have `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, u) ∖ {v} ∪ {w}`. -If `u = m`, then `S(k+1, n) = {m} ∪ S(k, m) ∖ {v} ∪ {w}` and `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, m) ∖ {v} ∪ {w}` and the two differ exactly in the elemetns `m` and `n` proving property 3. +If `u = m`, then `S(k+1, n) = {m} ∪ S(k, m) ∖ {v} ∪ {w}` and `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, m) ∖ {v} ∪ {w}` and the two differ exactly in the elements `m` and `n` proving property 3. If `u ≠ m`, then `consistent_hash(_, k, n) = m`, since that's the only way how the largest values in `S(k+1, n)` and `S(k, n)` can differ. In this case, `m ∉ S(k+1, n+1)`, since `n` (and not `m`) is the largest element of `S(k+1, n+1)`. Furthermore, `S(k, n) = S(k, m)`, since `consistent_hash(_, i, n) < m` for all `i < k` (otherwise there is a contradiction). Putting it together leads to `S(k+1, n+1) = {n} ∪ S(k, m)` and `S(k+1, n) = {m} ∪ S(k, m)` which differ exactly in the elements `n` and `m` which concludes the proof. From cad91159f94ccead9d4f0f7ce7a0b506f10cae26 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Wed, 20 May 2026 12:04:52 +0200 Subject: [PATCH 32/49] Update README.md --- crates/consistent-hashing/README.md | 37 +++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index dfb9a19..f0d3528 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -106,6 +106,43 @@ For small `k` neither optimization is probably improving the actual performance The next section proves the correctness of this algorithm. +## Relation to reservoir sampling + +`consistent_choose_k` solves the same distributional problem as +**reservoir sampling without replacement** — drawing a uniform `k`-subset from +`{0, …, n−1}` such that incrementing `n` evicts at most one element with +probability `k/(n+1)`. This is exactly the invariant maintained by Vitter's +classical streaming algorithms (Algorithm R, 1985; Algorithm L, Li 1994). + +The two approaches differ in *where the randomness lives* and *what queries +are cheap*: + +| | Algorithm L (streaming) | `consistent_choose_k` | +|--------------------------------|-----------------------------------|------------------------------------| +| Randomness source | fresh PRNG draws | deterministic `consistent_hash(key, …)` | +| State | `O(k)` reservoir + threshold `W` | none | +| Build sample for `n` | `O(k · (1 + log(n/k)))` (replay) | `O(k²)` or `O(k log k)`, no replay | +| Advance to next `n` | `O(1)` amortized (geometric skip) | `O(k)` via a `grow_n` step | + +In other words, `consistent_choose_k` is a **history-independent** analogue +of Algorithm L: + +- An `O(k)` `grow_n(key, k, n) → (Option, new_member?)` step would + mirror Algorithm L's geometric skip — advancing the active set from `n` to + `n+1` (or directly to the next `n` that actually changes the sample). The + ingredients are already there: the recursion `S(k, n+1)` differs from + `S(k, n)` in at most one element (Property 3), and that element is determined + by which level of the `consistent_choose_max` recursion the new `n` enters. +- Unlike Algorithm L, the active set for **any** `n` can also be recomputed + from scratch in `O(k²)` (or `O(k log k)`) without walking the prefix `1..n` — because the + randomness for level `i` is materialized by `consistent_hash(key, i, n−i)` + rather than threaded through a running PRNG state. This is what makes + `consistent_choose_k` usable as a *consistent hashing* primitive: any node + can compute the assignment for the current cluster size in isolation. + +So the same construction simultaneously gives a stateless consistent-hashing +ranking and a fully reproducible, addressable reservoir sample. + ## N-Choose-K replication We define the consistent `n-choose-k` replication as follows: From 4e3dc6599f9ef52d92441d5b6cbc19876d2724d6 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Wed, 20 May 2026 15:16:27 +0200 Subject: [PATCH 33/49] clippy --- crates/consistent-hashing/benchmarks/performance.rs | 10 +++++----- crates/consistent-hashing/src/node_map.rs | 9 ++------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/crates/consistent-hashing/benchmarks/performance.rs b/crates/consistent-hashing/benchmarks/performance.rs index b8cbdbe..365c4dc 100644 --- a/crates/consistent-hashing/benchmarks/performance.rs +++ b/crates/consistent-hashing/benchmarks/performance.rs @@ -14,11 +14,11 @@ use rand::{rng, Rng}; fn throughput_benchmark(c: &mut Criterion) { let keys: Vec = rng().random_iter().take(1000).collect(); - let mut group = c.benchmark_group(format!("choose")); + let mut group = c.benchmark_group("choose"); group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); for n in [1usize, 10, 100, 1000, 10000] { group.throughput(Throughput::Elements(keys.len() as u64)); - group.bench_with_input(BenchmarkId::new(format!("1"), n), &n, |b, n| { + group.bench_with_input(BenchmarkId::new("1", n), &n, |b, n| { b.iter_batched( || &keys, |keys| { @@ -65,9 +65,9 @@ fn append_vs_new_with_k(c: &mut Criterion) { b.iter(|| { let h = DefaultHasher::default(); let mut iter = ConsistentChooseKHasher::new(h, n + k); - black_box(for _ in 0..k { - iter.grow_k(); - }) + for _ in 0..k { + black_box(iter.grow_k()); + } }) }); } diff --git a/crates/consistent-hashing/src/node_map.rs b/crates/consistent-hashing/src/node_map.rs index de6722f..82e3e54 100644 --- a/crates/consistent-hashing/src/node_map.rs +++ b/crates/consistent-hashing/src/node_map.rs @@ -162,13 +162,8 @@ impl ConsistentNodeMap { if self.is_empty() { return None; } - let iter = ConsistentChooseKHasher::new(builder, self.total); - for pos in iter { - if !self.deleted.contains(&pos) { - return Some(pos); - } - } - None + let mut iter = ConsistentChooseKHasher::new(builder, self.total); + iter.find(|pos| !self.deleted.contains(pos)) } } From 2dda8bdc8b4bb209c3f24fbb0888b0d4430eac1c Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Wed, 20 May 2026 15:24:44 +0200 Subject: [PATCH 34/49] Update README.md --- crates/consistent-hashing/README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md index f0d3528..4270d68 100644 --- a/crates/consistent-hashing/README.md +++ b/crates/consistent-hashing/README.md @@ -141,7 +141,11 @@ of Algorithm L: can compute the assignment for the current cluster size in isolation. So the same construction simultaneously gives a stateless consistent-hashing -ranking and a fully reproducible, addressable reservoir sample. +ranking and a fully reproducible, addressable reservoir sample. Conversely, +reservoir sampling provides another way to view consistent hashing with +replication: it is the unique `k`-out-of-`n` growth process where each new node +joins the active set with probability `k/(n+1)`, evicts at most one old node, +and preserves a uniform active set after every growth step. ## N-Choose-K replication From 83174e24d883f91afe55d7d00961ed310107fc44 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Wed, 20 May 2026 17:06:53 +0200 Subject: [PATCH 35/49] rename crate --- Cargo.toml | 2 +- README.md | 2 +- crates/consistent-choose-k/Cargo.toml | 20 +++++++++++++++++++ .../README.md | 0 .../benchmarks/Cargo.toml | 4 ++-- .../benchmarks/criterion.toml | 0 .../benchmarks/performance.rs | 2 +- .../examples/bounded_load.rs | 2 +- .../src/choose_k.rs | 2 +- .../src/lib.rs | 0 .../src/node_map.rs | 2 +- crates/consistent-hashing/Cargo.toml | 17 ---------------- 12 files changed, 28 insertions(+), 25 deletions(-) create mode 100644 crates/consistent-choose-k/Cargo.toml rename crates/{consistent-hashing => consistent-choose-k}/README.md (100%) rename crates/{consistent-hashing => consistent-choose-k}/benchmarks/Cargo.toml (71%) rename crates/{consistent-hashing => consistent-choose-k}/benchmarks/criterion.toml (100%) rename crates/{consistent-hashing => consistent-choose-k}/benchmarks/performance.rs (97%) rename crates/{consistent-hashing => consistent-choose-k}/examples/bounded_load.rs (99%) rename crates/{consistent-hashing => consistent-choose-k}/src/choose_k.rs (99%) rename crates/{consistent-hashing => consistent-choose-k}/src/lib.rs (100%) rename crates/{consistent-hashing => consistent-choose-k}/src/node_map.rs (99%) delete mode 100644 crates/consistent-hashing/Cargo.toml diff --git a/Cargo.toml b/Cargo.toml index 0b09dcb..058b161 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ members = [ "crates/*", "crates/bpe/benchmarks", "crates/bpe/tests", - "crates/consistent-hashing/benchmarks", + "crates/consistent-choose-k/benchmarks", ] resolver = "2" diff --git a/README.md b/README.md index 46bfb40..1a61103 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ A collection of useful algorithms written in Rust. Currently contains: - [`geo_filters`](crates/geo_filters): probabilistic data structures that solve the [Distinct Count Problem](https://en.wikipedia.org/wiki/Count-distinct_problem) using geometric filters. - [`bpe`](crates/bpe): fast, correct, and novel algorithms for the [Byte Pair Encoding Algorithm](https://en.wikipedia.org/wiki/Large_language_model#BPE) which are particularly useful for chunking of documents. - [`bpe-openai`](crates/bpe-openai): Fast tokenizers for OpenAI token sets based on the `bpe` crate. -- [`consistent-hashing`](crates/consistent-hashing): constant time consistent hashing algorithms with support for replication and bounded load. +- [`consistent-choose-k`](crates/consistent-choose-k): constant time consistent hashing algorithms with support for replication and bounded load. - [`sparse-ngrams`](crates/sparse-ngrams): fast sparse n-gram extraction from byte slices. Selects variable-length n-grams (2–8 bytes) deterministically using bigram frequency priorities, suitable for substring search indexes. - [`string-offsets`](crates/string-offsets): converts string positions between bytes, chars, UTF-16 code units, and line numbers. Useful when sending string indices across language boundaries. diff --git a/crates/consistent-choose-k/Cargo.toml b/crates/consistent-choose-k/Cargo.toml new file mode 100644 index 0000000..d8341a8 --- /dev/null +++ b/crates/consistent-choose-k/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "consistent-choose-k" +version = "0.1.0" +edition = "2021" +description = "Stateless consistent choose-k hashing for replication, failover, and bounded-load placement." +repository = "https://github.com/github/rust-gems" +homepage = "https://github.com/github/rust-gems/tree/main/crates/consistent-choose-k" +documentation = "https://docs.rs/consistent-choose-k" +readme = "README.md" +license = "MIT" +keywords = ["consistent", "hashing", "replication", "choose-k", "sampling"] +categories = ["algorithms", "data-structures", "mathematics", "science"] + +[lib] +crate-type = ["lib", "staticlib"] +bench = false + +[dependencies] + +[dev-dependencies] diff --git a/crates/consistent-hashing/README.md b/crates/consistent-choose-k/README.md similarity index 100% rename from crates/consistent-hashing/README.md rename to crates/consistent-choose-k/README.md diff --git a/crates/consistent-hashing/benchmarks/Cargo.toml b/crates/consistent-choose-k/benchmarks/Cargo.toml similarity index 71% rename from crates/consistent-hashing/benchmarks/Cargo.toml rename to crates/consistent-choose-k/benchmarks/Cargo.toml index 580e5ab..ce2c881 100644 --- a/crates/consistent-hashing/benchmarks/Cargo.toml +++ b/crates/consistent-choose-k/benchmarks/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "consistent-hashing-benchmarks" +name = "consistent-choose-k-benchmarks" edition = "2021" [[bench]] @@ -9,7 +9,7 @@ harness = false test = false [dependencies] -consistent-hashing = { path = "../" } +consistent-choose-k = { path = "../" } criterion = { version = "0.7", features = ["csv_output"] } rand = "0.9" diff --git a/crates/consistent-hashing/benchmarks/criterion.toml b/crates/consistent-choose-k/benchmarks/criterion.toml similarity index 100% rename from crates/consistent-hashing/benchmarks/criterion.toml rename to crates/consistent-choose-k/benchmarks/criterion.toml diff --git a/crates/consistent-hashing/benchmarks/performance.rs b/crates/consistent-choose-k/benchmarks/performance.rs similarity index 97% rename from crates/consistent-hashing/benchmarks/performance.rs rename to crates/consistent-choose-k/benchmarks/performance.rs index 365c4dc..5465381 100644 --- a/crates/consistent-hashing/benchmarks/performance.rs +++ b/crates/consistent-choose-k/benchmarks/performance.rs @@ -4,7 +4,7 @@ use std::{ time::Duration, }; -use consistent_hashing::{ConsistentChooseKHasher, ConsistentHasher}; +use consistent_choose_k::{ConsistentChooseKHasher, ConsistentHasher}; use criterion::{ criterion_group, criterion_main, AxisScale, BenchmarkId, Criterion, PlotConfiguration, Throughput, diff --git a/crates/consistent-hashing/examples/bounded_load.rs b/crates/consistent-choose-k/examples/bounded_load.rs similarity index 99% rename from crates/consistent-hashing/examples/bounded_load.rs rename to crates/consistent-choose-k/examples/bounded_load.rs index d9ce34a..8ed6645 100644 --- a/crates/consistent-hashing/examples/bounded_load.rs +++ b/crates/consistent-choose-k/examples/bounded_load.rs @@ -15,7 +15,7 @@ use std::collections::HashSet; use std::hash::{DefaultHasher, Hash, Hasher}; use std::time::Instant; -use consistent_hashing::ConsistentChooseKHasher; +use consistent_choose_k::ConsistentChooseKHasher; /// Bounded-load assignment. /// diff --git a/crates/consistent-hashing/src/choose_k.rs b/crates/consistent-choose-k/src/choose_k.rs similarity index 99% rename from crates/consistent-hashing/src/choose_k.rs rename to crates/consistent-choose-k/src/choose_k.rs index f26ec2d..5f6e27e 100644 --- a/crates/consistent-hashing/src/choose_k.rs +++ b/crates/consistent-choose-k/src/choose_k.rs @@ -32,7 +32,7 @@ impl Sample { /// # Example /// ``` /// use std::hash::{DefaultHasher, Hash}; -/// use consistent_hashing::ConsistentChooseKHasher; +/// use consistent_choose_k::ConsistentChooseKHasher; /// /// let mut h = DefaultHasher::default(); /// 42u64.hash(&mut h); diff --git a/crates/consistent-hashing/src/lib.rs b/crates/consistent-choose-k/src/lib.rs similarity index 100% rename from crates/consistent-hashing/src/lib.rs rename to crates/consistent-choose-k/src/lib.rs diff --git a/crates/consistent-hashing/src/node_map.rs b/crates/consistent-choose-k/src/node_map.rs similarity index 99% rename from crates/consistent-hashing/src/node_map.rs rename to crates/consistent-choose-k/src/node_map.rs index 82e3e54..9bcb491 100644 --- a/crates/consistent-hashing/src/node_map.rs +++ b/crates/consistent-choose-k/src/node_map.rs @@ -74,7 +74,7 @@ use crate::{ConsistentChooseKHasher, ManySeqBuilder}; /// # Example /// ``` /// use std::hash::{DefaultHasher, Hash}; -/// use consistent_hashing::ConsistentNodeMap; +/// use consistent_choose_k::ConsistentNodeMap; /// /// let mut map = ConsistentNodeMap::new(); /// let a = map.add(); diff --git a/crates/consistent-hashing/Cargo.toml b/crates/consistent-hashing/Cargo.toml deleted file mode 100644 index 53f4e02..0000000 --- a/crates/consistent-hashing/Cargo.toml +++ /dev/null @@ -1,17 +0,0 @@ -[package] -name = "consistent-hashing" -version = "0.1.0" -edition = "2021" -description = "Constant time consistent hashing algorithms." -repository = "https://github.com/github/rust-gems" -license = "MIT" -keywords = ["probabilistic", "algorithm", "consistent hashing", "jump hashing", "rendezvous hashing"] -categories = ["algorithms", "data-structures", "mathematics", "science"] - -[lib] -crate-type = ["lib", "staticlib"] -bench = false - -[dependencies] - -[dev-dependencies] From bd4083f9a2ac8e83875782c2c2aca3a1aec17fb2 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Wed, 20 May 2026 17:18:59 +0200 Subject: [PATCH 36/49] upgrade dependencies --- crates/bpe-openai/Cargo.toml | 2 +- crates/bpe/Cargo.toml | 2 +- crates/bpe/benchmarks/Cargo.toml | 4 ++-- crates/bpe/tests/Cargo.toml | 2 +- crates/consistent-choose-k/benchmarks/Cargo.toml | 4 ++-- crates/consistent-choose-k/benchmarks/performance.rs | 2 +- crates/sparse-ngrams/Cargo.toml | 2 +- crates/sparse-ngrams/benchmarks/performance.rs | 4 +++- 8 files changed, 12 insertions(+), 10 deletions(-) diff --git a/crates/bpe-openai/Cargo.toml b/crates/bpe-openai/Cargo.toml index 9bc20e7..8c5a227 100644 --- a/crates/bpe-openai/Cargo.toml +++ b/crates/bpe-openai/Cargo.toml @@ -21,7 +21,7 @@ unicode-normalization = "0.1" [dev-dependencies] bpe = { version = "0.2", path = "../bpe", features = ["rand"] } -tiktoken-rs = "0.9" +tiktoken-rs = "0.11" [build-dependencies] base64 = "0.22" diff --git a/crates/bpe/Cargo.toml b/crates/bpe/Cargo.toml index beaef6c..ff1de4c 100644 --- a/crates/bpe/Cargo.toml +++ b/crates/bpe/Cargo.toml @@ -26,7 +26,7 @@ serde = { version = "1", features = ["derive"] } [dev-dependencies] bpe = { path = "." } -tiktoken-rs = "0.9" +tiktoken-rs = "0.11" [package.metadata.docs.rs] all-features = true diff --git a/crates/bpe/benchmarks/Cargo.toml b/crates/bpe/benchmarks/Cargo.toml index 6fb1a6a..368fb35 100644 --- a/crates/bpe/benchmarks/Cargo.toml +++ b/crates/bpe/benchmarks/Cargo.toml @@ -22,5 +22,5 @@ bpe = { path = "../../bpe", features = ["rand", "tiktoken"] } bpe-openai = { path = "../../bpe-openai" } criterion = "0.8" rand = "0.10" -tiktoken-rs = "0.9" -tokenizers = { version = "0.22", features = ["http"] } +tiktoken-rs = "0.11" +tokenizers = { version = "0.23", features = ["http"] } diff --git a/crates/bpe/tests/Cargo.toml b/crates/bpe/tests/Cargo.toml index 75ba2ea..640eb52 100644 --- a/crates/bpe/tests/Cargo.toml +++ b/crates/bpe/tests/Cargo.toml @@ -7,4 +7,4 @@ bpe = { path = "../../bpe", features = ["rand"] } bpe-openai = { path = "../../bpe-openai" } itertools = "0.14" rand = "0.10" -tiktoken-rs = "0.9" +tiktoken-rs = "0.11" diff --git a/crates/consistent-choose-k/benchmarks/Cargo.toml b/crates/consistent-choose-k/benchmarks/Cargo.toml index ce2c881..7b17102 100644 --- a/crates/consistent-choose-k/benchmarks/Cargo.toml +++ b/crates/consistent-choose-k/benchmarks/Cargo.toml @@ -11,5 +11,5 @@ test = false [dependencies] consistent-choose-k = { path = "../" } -criterion = { version = "0.7", features = ["csv_output"] } -rand = "0.9" +criterion = { version = "0.8", features = ["csv_output"] } +rand = "0.10" diff --git a/crates/consistent-choose-k/benchmarks/performance.rs b/crates/consistent-choose-k/benchmarks/performance.rs index 5465381..f7cf049 100644 --- a/crates/consistent-choose-k/benchmarks/performance.rs +++ b/crates/consistent-choose-k/benchmarks/performance.rs @@ -9,7 +9,7 @@ use criterion::{ criterion_group, criterion_main, AxisScale, BenchmarkId, Criterion, PlotConfiguration, Throughput, }; -use rand::{rng, Rng}; +use rand::{rng, RngExt}; fn throughput_benchmark(c: &mut Criterion) { let keys: Vec = rng().random_iter().take(1000).collect(); diff --git a/crates/sparse-ngrams/Cargo.toml b/crates/sparse-ngrams/Cargo.toml index 80ca0f7..9b7d9ea 100644 --- a/crates/sparse-ngrams/Cargo.toml +++ b/crates/sparse-ngrams/Cargo.toml @@ -17,4 +17,4 @@ path = "benchmarks/performance.rs" harness = false [dev-dependencies] -criterion = "0.7" +criterion = "0.8" diff --git a/crates/sparse-ngrams/benchmarks/performance.rs b/crates/sparse-ngrams/benchmarks/performance.rs index ac2b2c3..123588a 100644 --- a/crates/sparse-ngrams/benchmarks/performance.rs +++ b/crates/sparse-ngrams/benchmarks/performance.rs @@ -1,4 +1,6 @@ -use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use std::hint::black_box; + +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; use sparse_ngrams::{ collect_sparse_grams_deque, collect_sparse_grams_scan, max_sparse_grams, NGram, }; From 2ee80da2b889a94902a86d7d14cfd4d1483c6734 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Thu, 21 May 2026 08:16:45 +0200 Subject: [PATCH 37/49] correct some comments, split lib.rs --- crates/consistent-choose-k/src/choose_k.rs | 49 +--- .../src/consistent_hash.rs | 274 ++++++++++++++++++ crates/consistent-choose-k/src/lib.rs | 273 +---------------- 3 files changed, 288 insertions(+), 308 deletions(-) create mode 100644 crates/consistent-choose-k/src/consistent_hash.rs diff --git a/crates/consistent-choose-k/src/choose_k.rs b/crates/consistent-choose-k/src/choose_k.rs index 5f6e27e..91b6c3a 100644 --- a/crates/consistent-choose-k/src/choose_k.rs +++ b/crates/consistent-choose-k/src/choose_k.rs @@ -21,13 +21,13 @@ impl Sample { /// Implementation of a consistent choose k hashing algorithm. /// It returns k distinct consistent hashes in the range `0..n`. /// The hashes are consistent when `n` changes and when `k` changes! -/// I.e. on average exactly `1/(n+1)` (resp. `1/(k+1)`) many hashes will change -/// when `n` (resp. `k`) increases by one. Additionally, the returned `k` tuple +/// I.e. one hash changes with probability `k/(n+1)` when `n` increases by one, +/// resp. one hash gets added when `k` is increased. Additionally, the returned `k` tuple /// is guaranteed to be uniformely chosen from all possible `n-choose-k` tuples. /// /// Also implements `Iterator` to yield the next sample when k is increased. /// Note: since this hashing algorithm implements choose k semantics, all the returned samples are distinct. -/// Note: they won't be sorted by their position, since the order is changing when k is changing. +/// Note: The `Iterator` won't output the samples ordered by position. /// /// # Example /// ``` @@ -78,14 +78,14 @@ impl ConsistentChooseKHasher { iter } - /// Returns an iterator over the sampled positions in increasing order. + /// Returns an iterator over the `k` sampled positions in increasing order. /// /// Time: O(1) pub fn positions(&self) -> impl Iterator + '_ { self.samples.iter().map(|s| s.pos) } - /// Returns the underlying samples. + /// Returns the `k` underlying samples. pub fn samples(&self) -> &[Sample] { &self.samples } @@ -149,19 +149,20 @@ impl ConsistentChooseKHasher { if let Some(last) = self.samples.last().copied() { if last.pos < sk.pos { self.samples.push(sk); + k } else if last.pos == sk.pos { let i = self.shrink_n_inner(last.pos); self.samples.push(sk); - return i; + i } else { let i = self.shrink_n_inner(last.pos); self.samples.push(last); - return i; + i } } else { self.samples.push(sk); + k } - k } } @@ -213,22 +214,6 @@ mod tests { } } - #[test] - fn test_ranking_k_equals_1() { - for key in 0..500 { - let hasher = hasher_for_key(key); - for n in 1..50 { - let first = ConsistentChooseKHasher::new(hasher.clone(), n) - .next() - .unwrap(); - let prev: Vec = ConsistentChooseKHasher::new_with_k(hasher.clone(), n, 1) - .positions() - .collect(); - assert_eq!(first, prev[0]); - } - } - } - #[test] fn test_ranking_k_equals_n() { // When exhausted, the ranking contains all nodes 0..n. @@ -243,22 +228,6 @@ mod tests { } } - #[test] - fn test_partial_iteration() { - // Taking fewer than n elements must still be correct. - for key in 0..100 { - let hasher = hasher_for_key(key); - let n = 20; - let full: Vec = ConsistentChooseKHasher::new(hasher.clone(), n).collect(); - for take in 1..=n { - let partial: Vec = ConsistentChooseKHasher::new(hasher.clone(), n) - .take(take) - .collect(); - assert_eq!(&partial[..], &full[..take]); - } - } - } - #[test] fn test_uniform_k() { const K: usize = 3; diff --git a/crates/consistent-choose-k/src/consistent_hash.rs b/crates/consistent-choose-k/src/consistent_hash.rs new file mode 100644 index 0000000..0a44cde --- /dev/null +++ b/crates/consistent-choose-k/src/consistent_hash.rs @@ -0,0 +1,274 @@ +use std::hash::{Hash, Hasher}; + +/// A trait which behaves like a pseudo-random number generator. +/// It is used to generate consistent hashes within one bucket. +/// Note: the hasher must have been seeded with the key during construction. +pub trait HashSequence { + fn next(&mut self) -> u64; +} + +/// A trait for building a special bit mask and sequences of hashes for different bit positions. +/// Note: the hasher must have been seeded with the key during construction. +pub trait HashSeqBuilder { + type Seq: HashSequence; + + /// Returns a bit mask indicating which buckets have at least one hash. + fn bit_mask(&self) -> u64; + /// Return a HashSequence instance which is seeded with the given bit position + /// and the seed of this builder. + fn hash_seq(&self, bit: u64) -> Self::Seq; +} + +/// A trait for building multiple independent hash builders +/// Note: the hasher must have been seeded with the key during construction. +pub trait ManySeqBuilder { + type Builder: HashSeqBuilder; + + /// Returns the i-th independent hash builder. + fn seq_builder(&self, i: usize) -> Self::Builder; +} + +impl HashSequence for H { + fn next(&mut self) -> u64 { + 54387634019u64.hash(self); + self.finish() + } +} + +impl HashSeqBuilder for H { + type Seq = H; + + fn bit_mask(&self) -> u64 { + self.finish() + } + + fn hash_seq(&self, bit: u64) -> Self::Seq { + let mut hasher = self.clone(); + bit.hash(&mut hasher); + hasher + } +} + +impl ManySeqBuilder for H { + type Builder = H; + + fn seq_builder(&self, i: usize) -> Self::Builder { + let mut hasher = self.clone(); + i.hash(&mut hasher); + hasher + } +} + +/// One building block for the consistent hashing algorithm is a consistent +/// hash iterator which enumerates all the hashes for a specific bucket. +/// A bucket covers the range `(1< { + hasher: H, + n: usize, // Upper bound for the hash values within the bucket. + is_first: bool, + bit: u64, // A bitmask with a single bit set. +} + +impl BucketIterator { + fn new(n: usize, bit: u64, hasher: H) -> Self { + Self { + hasher, + n, + is_first: true, + bit, + } + } +} + +impl Iterator for BucketIterator { + type Item = usize; + + fn next(&mut self) -> Option { + if self.bit == 0 { + return None; + } + if self.is_first { + let res = (self.hasher.next() & (self.bit - 1)) + self.bit; + self.is_first = false; + if res < self.n as u64 { + self.n = res as usize; + return Some(self.n); + } + } + loop { + let res = self.hasher.next() & (self.bit * 2 - 1); + if res & self.bit == 0 { + return None; + } + if res < self.n as u64 { + self.n = res as usize; + return Some(self.n); + } + } + } +} + +/// An iterator which enumerates all the consistent hashes for a given key +/// from largest to smallest in the range `0..n`. +pub struct ConsistentHashRevIterator { + builder: H, + bits: u64, // Bitmask of unvisited buckets. + n: usize, // Exclusive upper bound for the hash values. + inner: Option>, // Iterator for the current bucket. +} + +impl ConsistentHashRevIterator { + pub fn new(n: usize, builder: H) -> Self { + Self { + bits: builder.bit_mask() & (n.next_power_of_two() as u64 - 1), + builder, + n, + inner: None, + } + } +} + +impl Iterator for ConsistentHashRevIterator { + type Item = usize; + + fn next(&mut self) -> Option { + if self.n == 0 { + return None; + } + if let Some(res) = self.inner.as_mut().and_then(|inner| inner.next()) { + return Some(res); + } + while self.bits > 0 { + let bit = 1 << self.bits.ilog2(); + self.bits ^= bit; + let seq = self.builder.hash_seq(bit); + let mut iter = BucketIterator::new(self.n, bit, seq); + if let Some(res) = iter.next() { + self.inner = Some(iter); + return Some(res); + } + } + self.n = 0; + Some(0) + } +} + +/// Same as `ConsistentHashRevIterator`, but iterates from smallest to largest +/// for the range `n..`. +pub struct ConsistentHashIterator { + bits: u64, // Bitmasks of unvisited buckets. + n: usize, // Inclusive lower bound for the hash values. + builder: H, + stack: Vec, // Stack of hashes in the current bucket. +} + +impl ConsistentHashIterator { + pub fn new(n: usize, builder: H) -> Self { + Self { + bits: builder.bit_mask() & !((n + 2).next_power_of_two() as u64 / 2 - 1), + stack: if n == 0 { vec![0] } else { vec![] }, + builder, + n, + } + } +} + +impl Iterator for ConsistentHashIterator { + type Item = usize; + + fn next(&mut self) -> Option { + if let Some(res) = self.stack.pop() { + return Some(res); + } + while self.bits > 0 { + let bit = self.bits & !(self.bits - 1); + self.bits &= self.bits - 1; + let inner = BucketIterator::new(bit as usize * 2, bit, self.builder.hash_seq(bit)); + self.stack = inner.take_while(|x| *x >= self.n).collect(); + if let Some(res) = self.stack.pop() { + return Some(res); + } + } + None + } +} + +/// Wrapper around `ConsistentHashIterator` and `ConsistentHashRevIterator` to compute +/// the next or previous consistent hash for a given key for a given number of nodes `n`. +pub struct ConsistentHasher { + builder: H, +} + +impl ConsistentHasher { + /// Construct a new ConsistentHasher with the given builder for a specific key. + pub fn new(builder: H) -> Self { + Self { builder } + } + + /// Return the largest consistent hash smaller than `n`. + pub fn prev(&self, n: usize) -> Option + where + H: Clone, + { + let mut sampler = ConsistentHashRevIterator::new(n, self.builder.clone()); + sampler.next() + } + + /// Return the smallest consistent hash greater than or equal to `n`. + pub fn next(&self, n: usize) -> Option + where + H: Clone, + { + let mut sampler = ConsistentHashIterator::new(n, self.builder.clone()); + sampler.next() + } + + /// Return the largest consistent hash smaller than `n`, consuming the hasher. + pub fn into_prev(self, n: usize) -> Option { + ConsistentHashRevIterator::new(n, self.builder).next() + } +} + +#[cfg(test)] +mod tests { + use std::hash::DefaultHasher; + + use super::*; + + fn hasher_for_key(key: u64) -> DefaultHasher { + let mut hasher = DefaultHasher::default(); + key.hash(&mut hasher); + hasher + } + + #[test] + fn test_uniform_1() { + for k in 0..100 { + let hasher = hasher_for_key(k); + let sampler = ConsistentHasher::new(hasher.clone()); + for n in 0..1000 { + assert!(sampler.prev(n + 1) <= sampler.prev(n + 2)); + let next = sampler.next(n).unwrap(); + assert_eq!(next, sampler.prev(next + 1).unwrap()); + } + let mut iter_rev: Vec<_> = ConsistentHashIterator::new(0, hasher.clone()) + .take_while(|x| *x < 1000) + .collect(); + iter_rev.reverse(); + let iter: Vec<_> = ConsistentHashRevIterator::new(1000, hasher).collect(); + assert_eq!(iter, iter_rev); + } + let mut stats = vec![0; 13]; + for i in 0..100000 { + let hasher = hasher_for_key(i); + let sampler = ConsistentHasher::new(hasher); + let x = sampler.prev(stats.len()).unwrap(); + stats[x] += 1; + } + assert_eq!( + stats, + vec![7577, 7541, 7538, 7822, 7763, 7687, 7718, 7723, 7846, 7723, 7688, 7716, 7658] + ); + } +} diff --git a/crates/consistent-choose-k/src/lib.rs b/crates/consistent-choose-k/src/lib.rs index 67116e8..8a2c392 100644 --- a/crates/consistent-choose-k/src/lib.rs +++ b/crates/consistent-choose-k/src/lib.rs @@ -1,272 +1,9 @@ -use std::hash::{Hash, Hasher}; - mod choose_k; +mod consistent_hash; mod node_map; pub use choose_k::ConsistentChooseKHasher; +pub use consistent_hash::{ + ConsistentHashIterator, ConsistentHashRevIterator, ConsistentHasher, HashSeqBuilder, + HashSequence, ManySeqBuilder, +}; pub use node_map::ConsistentNodeMap; - -/// A trait which behaves like a pseudo-random number generator. -/// It is used to generate consistent hashes within one bucket. -/// Note: the hasher must have been seeded with the key during construction. -pub trait HashSequence { - fn next(&mut self) -> u64; -} - -/// A trait for building a special bit mask and sequences of hashes for different bit positions. -/// Note: the hasher must have been seeded with the key during construction. -pub trait HashSeqBuilder { - type Seq: HashSequence; - - /// Returns a bit mask indicating which buckets have at least one hash. - fn bit_mask(&self) -> u64; - /// Return a HashSequence instance which is seeded with the given bit position - /// and the seed of this builder. - fn hash_seq(&self, bit: u64) -> Self::Seq; -} - -/// A trait for building multiple independent hash builders -/// Note: the hasher must have been seeded with the key during construction. -pub trait ManySeqBuilder { - type Builder: HashSeqBuilder; - - /// Returns the i-th independent hash builder. - fn seq_builder(&self, i: usize) -> Self::Builder; -} - -impl HashSequence for H { - fn next(&mut self) -> u64 { - 54387634019u64.hash(self); - self.finish() - } -} - -impl HashSeqBuilder for H { - type Seq = H; - - fn bit_mask(&self) -> u64 { - self.finish() - } - - fn hash_seq(&self, bit: u64) -> Self::Seq { - let mut hasher = self.clone(); - bit.hash(&mut hasher); - hasher - } -} - -impl ManySeqBuilder for H { - type Builder = H; - - fn seq_builder(&self, i: usize) -> Self::Builder { - let mut hasher = self.clone(); - i.hash(&mut hasher); - hasher - } -} - -/// One building block for the consistent hashing algorithm is a consistent -/// hash iterator which enumerates all the hashes for a specific bucket. -/// A bucket covers the range `(1< { - hasher: H, - n: usize, - is_first: bool, - bit: u64, // A bitmask with a single bit set. -} - -impl BucketIterator { - fn new(n: usize, bit: u64, hasher: H) -> Self { - Self { - hasher, - n, - is_first: true, - bit, - } - } -} - -impl Iterator for BucketIterator { - type Item = usize; - - fn next(&mut self) -> Option { - if self.bit == 0 { - return None; - } - if self.is_first { - let res = (self.hasher.next() & (self.bit - 1)) + self.bit; - self.is_first = false; - if res < self.n as u64 { - self.n = res as usize; - return Some(self.n); - } - } - loop { - let res = self.hasher.next() & (self.bit * 2 - 1); - if res & self.bit == 0 { - return None; - } - if res < self.n as u64 { - self.n = res as usize; - return Some(self.n); - } - } - } -} - -/// An iterator which enumerates all the consistent hashes for a given key -/// from largest to smallest in the range `0..n`. -pub struct ConsistentHashRevIterator { - builder: H, - bits: u64, - n: usize, - inner: Option>, -} - -impl ConsistentHashRevIterator { - pub fn new(n: usize, builder: H) -> Self { - Self { - bits: builder.bit_mask() & (n.next_power_of_two() as u64 - 1), - builder, - n, - inner: None, - } - } -} - -impl Iterator for ConsistentHashRevIterator { - type Item = usize; - - fn next(&mut self) -> Option { - if self.n == 0 { - return None; - } - if let Some(res) = self.inner.as_mut().and_then(|inner| inner.next()) { - return Some(res); - } - while self.bits > 0 { - let bit = 1 << self.bits.ilog2(); - self.bits ^= bit; - let seq = self.builder.hash_seq(bit); - let mut iter = BucketIterator::new(self.n, bit, seq); - if let Some(res) = iter.next() { - self.inner = Some(iter); - return Some(res); - } - } - self.n = 0; - Some(0) - } -} - -/// Same as `ConsistentHashRevIterator`, but iterates from smallest to largest -/// for the range `n..`. -pub struct ConsistentHashIterator { - bits: u64, - n: usize, - builder: H, - stack: Vec, -} - -impl ConsistentHashIterator { - pub fn new(n: usize, builder: H) -> Self { - Self { - bits: builder.bit_mask() & !((n + 2).next_power_of_two() as u64 / 2 - 1), - stack: if n == 0 { vec![0] } else { vec![] }, - builder, - n, - } - } -} - -impl Iterator for ConsistentHashIterator { - type Item = usize; - - fn next(&mut self) -> Option { - if let Some(res) = self.stack.pop() { - return Some(res); - } - while self.bits > 0 { - let bit = self.bits & !(self.bits - 1); - self.bits &= self.bits - 1; - let inner = BucketIterator::new(bit as usize * 2, bit, self.builder.hash_seq(bit)); - self.stack = inner.take_while(|x| *x >= self.n).collect(); - if let Some(res) = self.stack.pop() { - return Some(res); - } - } - None - } -} - -/// Wrapper around `ConsistentHashIterator` and `ConsistentHashRevIterator` to compute -/// the next or previous consistent hash for a given key for a given number of nodes `n`. -pub struct ConsistentHasher { - builder: H, -} - -impl ConsistentHasher { - pub fn new(builder: H) -> Self { - Self { builder } - } - - pub fn prev(&self, n: usize) -> Option - where - H: Clone, - { - let mut sampler = ConsistentHashRevIterator::new(n, self.builder.clone()); - sampler.next() - } - - pub fn next(&self, n: usize) -> Option - where - H: Clone, - { - let mut sampler = ConsistentHashIterator::new(n, self.builder.clone()); - sampler.next() - } - - pub fn into_prev(self, n: usize) -> Option { - ConsistentHashRevIterator::new(n, self.builder).next() - } -} - -#[cfg(test)] -mod tests { - use std::hash::DefaultHasher; - - use super::*; - - fn hasher_for_key(key: u64) -> DefaultHasher { - let mut hasher = DefaultHasher::default(); - key.hash(&mut hasher); - hasher - } - - #[test] - fn test_uniform_1() { - for k in 0..100 { - let hasher = hasher_for_key(k); - let sampler = ConsistentHasher::new(hasher.clone()); - for n in 0..1000 { - assert!(sampler.prev(n + 1) <= sampler.prev(n + 2)); - let next = sampler.next(n).unwrap(); - assert_eq!(next, sampler.prev(next + 1).unwrap()); - } - let mut iter_rev: Vec<_> = ConsistentHashIterator::new(0, hasher.clone()) - .take_while(|x| *x < 1000) - .collect(); - iter_rev.reverse(); - let iter: Vec<_> = ConsistentHashRevIterator::new(1000, hasher).collect(); - assert_eq!(iter, iter_rev); - } - let mut stats = vec![0; 13]; - for i in 0..100000 { - let hasher = hasher_for_key(i); - let sampler = ConsistentHasher::new(hasher); - let x = sampler.prev(stats.len()).unwrap(); - stats[x] += 1; - } - println!("{stats:?}"); - } -} From c74e307340ddcf32fd50f463a22ed42b654c9e11 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Thu, 21 May 2026 08:43:09 +0200 Subject: [PATCH 38/49] Update choose_k.rs --- crates/consistent-choose-k/src/choose_k.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crates/consistent-choose-k/src/choose_k.rs b/crates/consistent-choose-k/src/choose_k.rs index 91b6c3a..ebb78fe 100644 --- a/crates/consistent-choose-k/src/choose_k.rs +++ b/crates/consistent-choose-k/src/choose_k.rs @@ -112,7 +112,10 @@ impl ConsistentChooseKHasher { /// being replaced with. Returns the index of the new largest sample. /// /// Time: O(k) + /// + /// Panics: if `n` is already at most `k`. pub fn shrink_n(&mut self) -> usize { + assert!(self.n > self.k()); let n = self.samples.last().expect("samples must not be empty").pos; self.n = n; self.shrink_n_inner(n) @@ -143,7 +146,10 @@ impl ConsistentChooseKHasher { /// element was inserted (i.e. its rank position). /// /// Time: O(k) + /// + /// Panics: if `k` equals `n`. pub fn grow_k(&mut self) -> usize { + assert!(self.k() < self.n); let k = self.samples.len(); let sk = Sample::new(self.get_sample(k, self.n), k); if let Some(last) = self.samples.last().copied() { From b2d1b31866a9d57fe0b0de6711b3a2481b71b82c Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Thu, 21 May 2026 08:49:13 +0200 Subject: [PATCH 39/49] Update choose_k.rs --- crates/consistent-choose-k/src/choose_k.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/consistent-choose-k/src/choose_k.rs b/crates/consistent-choose-k/src/choose_k.rs index ebb78fe..0f7e829 100644 --- a/crates/consistent-choose-k/src/choose_k.rs +++ b/crates/consistent-choose-k/src/choose_k.rs @@ -112,7 +112,7 @@ impl ConsistentChooseKHasher { /// being replaced with. Returns the index of the new largest sample. /// /// Time: O(k) - /// + /// /// Panics: if `n` is already at most `k`. pub fn shrink_n(&mut self) -> usize { assert!(self.n > self.k()); @@ -146,7 +146,7 @@ impl ConsistentChooseKHasher { /// element was inserted (i.e. its rank position). /// /// Time: O(k) - /// + /// /// Panics: if `k` equals `n`. pub fn grow_k(&mut self) -> usize { assert!(self.k() < self.n); From bef0d26140d769b4ce2183da9421e64a1a169179 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Thu, 21 May 2026 10:33:11 +0200 Subject: [PATCH 40/49] simplify algorithm further :) --- .../benchmarks/performance.rs | 37 +++--- crates/consistent-choose-k/src/choose_k.rs | 110 ++++++------------ 2 files changed, 52 insertions(+), 95 deletions(-) diff --git a/crates/consistent-choose-k/benchmarks/performance.rs b/crates/consistent-choose-k/benchmarks/performance.rs index f7cf049..bf4cce1 100644 --- a/crates/consistent-choose-k/benchmarks/performance.rs +++ b/crates/consistent-choose-k/benchmarks/performance.rs @@ -19,31 +19,24 @@ fn throughput_benchmark(c: &mut Criterion) { for n in [1usize, 10, 100, 1000, 10000] { group.throughput(Throughput::Elements(keys.len() as u64)); group.bench_with_input(BenchmarkId::new("1", n), &n, |b, n| { - b.iter_batched( - || &keys, - |keys| { - for key in keys { - let mut h = DefaultHasher::default(); - key.hash(&mut h); - black_box(ConsistentHasher::new(h).prev(*n + 1)); - } - }, - criterion::BatchSize::SmallInput, - ) + b.iter(|| { + for key in &keys { + let mut h = DefaultHasher::default(); + key.hash(&mut h); + black_box(ConsistentHasher::new(h).prev(*n + 1)); + } + }) }); for k in [1, 2, 3, 10, 100] { + group.throughput(Throughput::Elements((keys.len() * k) as u64)); group.bench_with_input(BenchmarkId::new(format!("k_{k}"), n), &n, |b, n| { - b.iter_batched( - || &keys, - |keys| { - for key in keys { - let mut h = DefaultHasher::default(); - key.hash(&mut h); - black_box(ConsistentChooseKHasher::new_with_k(h, *n + k, k)); - } - }, - criterion::BatchSize::SmallInput, - ) + b.iter(|| { + for key in &keys { + let mut h = DefaultHasher::default(); + key.hash(&mut h); + black_box(ConsistentChooseKHasher::new_with_k(h, *n + k, k)); + } + }) }); } } diff --git a/crates/consistent-choose-k/src/choose_k.rs b/crates/consistent-choose-k/src/choose_k.rs index 0f7e829..ce777ee 100644 --- a/crates/consistent-choose-k/src/choose_k.rs +++ b/crates/consistent-choose-k/src/choose_k.rs @@ -1,23 +1,5 @@ use crate::{ConsistentHasher, ManySeqBuilder}; -/// A sample from the consistent choose-k algorithm, pairing a hash value -/// with the index of the hash sequence that produced it. -#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub struct Sample { - pos: usize, - seq: usize, -} - -impl Sample { - fn new(pos: usize, seq: usize) -> Self { - Self { pos, seq } - } - - pub fn pos(&self) -> usize { - self.pos - } -} - /// Implementation of a consistent choose k hashing algorithm. /// It returns k distinct consistent hashes in the range `0..n`. /// The hashes are consistent when `n` changes and when `k` changes! @@ -42,7 +24,7 @@ impl Sample { pub struct ConsistentChooseKHasher { builder: H, n: usize, - pub(crate) samples: Vec, + samples: Vec, } impl ConsistentChooseKHasher { @@ -64,32 +46,30 @@ impl ConsistentChooseKHasher { assert!(n >= k, "n must be at least k"); let mut iter = Self::new(builder, n); for i in 0..k { - iter.samples.push(Sample::new(iter.get_sample(i, n), i)); + iter.samples.push(iter.get_sample(i, n)); } for i in (0..k).rev() { let s = iter.samples[0..=i].iter().copied().max().expect(""); iter.samples[i] = s; for j in 0..i { - if iter.samples[j].pos == s.pos { - iter.samples[j] = Sample::new(iter.get_sample(j, s.pos), j); + if iter.samples[j] == s { + iter.samples[j] = iter.get_sample(j, s); } } } iter } - /// Returns an iterator over the `k` sampled positions in increasing order. - /// - /// Time: O(1) - pub fn positions(&self) -> impl Iterator + '_ { - self.samples.iter().map(|s| s.pos) - } - /// Returns the `k` underlying samples. - pub fn samples(&self) -> &[Sample] { + pub fn samples(&self) -> &[usize] { &self.samples } + /// Converts self into the `k` underlying samples vector. + pub fn into_samples(self) -> Vec { + self.samples + } + /// Returns the current universe size. pub fn n(&self) -> usize { self.n @@ -116,28 +96,27 @@ impl ConsistentChooseKHasher { /// Panics: if `n` is already at most `k`. pub fn shrink_n(&mut self) -> usize { assert!(self.n > self.k()); - let n = self.samples.last().expect("samples must not be empty").pos; + let n = *self.samples.last().expect("samples must not be empty"); self.n = n; self.shrink_n_inner(n) } fn shrink_n_inner(&mut self, mut n: usize) -> usize { for i in (0..self.samples.len()).rev() { - if self.samples[i].pos < n { + if self.samples[i] < n { // We are done! return i + 1; } - // Here the maximum could be k, k-1, or i! - let k = self.samples[i].seq; - let si = Sample::new(self.get_sample(i, n), i); - let sk = Sample::new(self.get_sample(k, n), k); - let new_sample = si.max(sk); - if i > 0 && self.samples[i - 1] > new_sample { + // The new maximum over all sequences at position i is either + // the sample of the sequence i or the maximum over all other sequences. + // The latter is already known via self.samples[i-1]. + let si = self.get_sample(i, n); + if i > 0 && self.samples[i - 1] > si { self.samples[i] = self.samples[i - 1]; } else { - self.samples[i] = new_sample; + self.samples[i] = si; } - n = self.samples[i].pos; + n = self.samples[i]; } 0 } @@ -151,17 +130,13 @@ impl ConsistentChooseKHasher { pub fn grow_k(&mut self) -> usize { assert!(self.k() < self.n); let k = self.samples.len(); - let sk = Sample::new(self.get_sample(k, self.n), k); + let sk = self.get_sample(k, self.n); if let Some(last) = self.samples.last().copied() { - if last.pos < sk.pos { + if last < sk { self.samples.push(sk); k - } else if last.pos == sk.pos { - let i = self.shrink_n_inner(last.pos); - self.samples.push(sk); - i } else { - let i = self.shrink_n_inner(last.pos); + let i = self.shrink_n_inner(last); self.samples.push(last); i } @@ -180,7 +155,7 @@ impl Iterator for ConsistentChooseKHasher { return None; } let idx = self.grow_k(); - Some(self.samples[idx].pos) + Some(self.samples[idx]) } } @@ -200,15 +175,13 @@ mod tests { fn test_ranking_matches_prev() { // Every prefix of the ranking must equal the sorted prev(n) set. for key in 0..200 { - for n in 2..25 { + for n in 2..30 { let hasher = hasher_for_key(key); let full: Vec = ConsistentChooseKHasher::new(hasher.clone(), n).collect(); assert_eq!(full.len(), n); for k in 1..=n { - let expected: Vec = - ConsistentChooseKHasher::new_with_k(hasher.clone(), n, k) - .positions() - .collect(); + let expected = + ConsistentChooseKHasher::new_with_k(hasher.clone(), n, k).into_samples(); let mut prefix = full[..k].to_vec(); prefix.sort(); assert_eq!( @@ -240,18 +213,14 @@ mod tests { for k in 0..100 { let hasher = hasher_for_key(k); for n in K..1000 { - let samples: Vec = - ConsistentChooseKHasher::new_with_k(hasher.clone(), n + 1, K) - .positions() - .collect(); + let samples = + ConsistentChooseKHasher::new_with_k(hasher.clone(), n + 1, K).into_samples(); assert!(samples.len() == K); for i in 0..K - 1 { assert!(samples[i] < samples[i + 1]); } - let next: Vec = - ConsistentChooseKHasher::new_with_k(hasher.clone(), n + 2, K) - .positions() - .collect(); + let next = + ConsistentChooseKHasher::new_with_k(hasher.clone(), n + 2, K).into_samples(); for i in 0..K { assert!(samples[i] <= next[i]); } @@ -268,9 +237,8 @@ mod tests { let mut stats = vec![0; 8]; for i in 0..32 { let hasher = hasher_for_key(i + 32783); - let samples: Vec = ConsistentChooseKHasher::new_with_k(hasher, stats.len(), 2) - .positions() - .collect(); + let samples = + ConsistentChooseKHasher::new_with_k(hasher, stats.len(), 2).into_samples(); for s in samples { stats[s] += 1; } @@ -282,13 +250,9 @@ mod tests { for n in k + 1..20 { for key in 0..1000 { let hasher = hasher_for_key(key); - let set1: Vec = - ConsistentChooseKHasher::new_with_k(hasher.clone(), n, k) - .positions() - .collect(); - let set2: Vec = ConsistentChooseKHasher::new_with_k(hasher, n, k + 1) - .positions() - .collect(); + let set1 = + ConsistentChooseKHasher::new_with_k(hasher.clone(), n, k).into_samples(); + let set2 = ConsistentChooseKHasher::new_with_k(hasher, n, k + 1).into_samples(); assert_eq!(set1.len(), k); assert_eq!(set2.len(), k + 1); let mut merged = set1.clone(); @@ -306,10 +270,10 @@ mod tests { for k in 1..10 { for n in k + 1..30 { let mut iter = ConsistentChooseKHasher::new_with_k(DefaultHasher::new(), n, k); - while iter.samples.last().unwrap().pos > k { + while *iter.samples.last().unwrap() > k { let expected = ConsistentChooseKHasher::new_with_k( DefaultHasher::new(), - iter.samples.last().unwrap().pos, + *iter.samples.last().unwrap(), k, ); iter.shrink_n(); From 0f9db1f21c70d4e846b6446b31e31d7177c51eab Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Thu, 21 May 2026 15:36:47 +0200 Subject: [PATCH 41/49] working version --- crates/consistent-choose-k/Cargo.toml | 5 + .../consistent-choose-k/benchmarks/Cargo.toml | 2 +- .../benchmarks/performance.rs | 123 ++++- crates/consistent-choose-k/src/choose_k.rs | 34 +- .../src/compact_min_seg_tree.rs | 480 ++++++++++++++++++ .../consistent-choose-k/src/fast_choose_k.rs | 443 ++++++++++++++++ crates/consistent-choose-k/src/lib.rs | 11 + .../consistent-choose-k/src/min_seg_tree.rs | 278 ++++++++++ 8 files changed, 1368 insertions(+), 8 deletions(-) create mode 100644 crates/consistent-choose-k/src/compact_min_seg_tree.rs create mode 100644 crates/consistent-choose-k/src/fast_choose_k.rs create mode 100644 crates/consistent-choose-k/src/min_seg_tree.rs diff --git a/crates/consistent-choose-k/Cargo.toml b/crates/consistent-choose-k/Cargo.toml index d8341a8..fdb1493 100644 --- a/crates/consistent-choose-k/Cargo.toml +++ b/crates/consistent-choose-k/Cargo.toml @@ -15,6 +15,11 @@ categories = ["algorithms", "data-structures", "mathematics", "science"] crate-type = ["lib", "staticlib"] bench = false +[features] +# Exposes internal data structures (segment trees) for benchmarking. +# Not part of the public API. +__bench_internals = [] + [dependencies] [dev-dependencies] diff --git a/crates/consistent-choose-k/benchmarks/Cargo.toml b/crates/consistent-choose-k/benchmarks/Cargo.toml index 7b17102..a6f7c64 100644 --- a/crates/consistent-choose-k/benchmarks/Cargo.toml +++ b/crates/consistent-choose-k/benchmarks/Cargo.toml @@ -9,7 +9,7 @@ harness = false test = false [dependencies] -consistent-choose-k = { path = "../" } +consistent-choose-k = { path = "../", features = ["__bench_internals"] } criterion = { version = "0.8", features = ["csv_output"] } rand = "0.10" diff --git a/crates/consistent-choose-k/benchmarks/performance.rs b/crates/consistent-choose-k/benchmarks/performance.rs index bf4cce1..e287d07 100644 --- a/crates/consistent-choose-k/benchmarks/performance.rs +++ b/crates/consistent-choose-k/benchmarks/performance.rs @@ -4,10 +4,13 @@ use std::{ time::Duration, }; -use consistent_choose_k::{ConsistentChooseKHasher, ConsistentHasher}; +use consistent_choose_k::{ + ConsistentChooseKFastHasher, ConsistentChooseKHasher, ConsistentHasher, + __bench_internals::{CompactMinSegTree, MinSegTree}, +}; use criterion::{ - criterion_group, criterion_main, AxisScale, BenchmarkId, Criterion, PlotConfiguration, - Throughput, + criterion_group, criterion_main, AxisScale, BatchSize, BenchmarkId, Criterion, + PlotConfiguration, Throughput, }; use rand::{rng, RngExt}; @@ -57,7 +60,7 @@ fn append_vs_new_with_k(c: &mut Criterion) { group.bench_function(BenchmarkId::new(format!("append/k_{k}"), n), |b| { b.iter(|| { let h = DefaultHasher::default(); - let mut iter = ConsistentChooseKHasher::new(h, n + k); + let mut iter = ConsistentChooseKHasher::new_with_capacity(h, n + k, k); for _ in 0..k { black_box(iter.grow_k()); } @@ -68,6 +71,116 @@ fn append_vs_new_with_k(c: &mut Criterion) { group.finish(); } +fn shrink_n(c: &mut Criterion) { + let mut group = c.benchmark_group("shrink_n"); + group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); + for n in [100usize, 1000, 10000, 100000] { + for k in [2, 3, 10, 100] { + group.throughput(Throughput::Elements((n * k) as u64)); + group.bench_function(BenchmarkId::new(format!("standard/k_{k}"), n), |b| { + b.iter_batched( + || { + let h = DefaultHasher::default(); + ConsistentChooseKHasher::new_with_k(h, n + k, k) + }, + |mut iter| { + while iter.samples().last().copied().expect("k must be nonzero") > k { + black_box(iter.shrink_n()); + } + black_box(iter); + }, + BatchSize::SmallInput, + ) + }); + group.bench_function(BenchmarkId::new(format!("fast/k_{k}"), n), |b| { + b.iter_batched( + || { + let h = DefaultHasher::default(); + ConsistentChooseKFastHasher::new_with_k(h, n + k, k) + }, + |mut iter| { + while iter.samples().last().copied().expect("k must be nonzero") > k { + black_box(iter.shrink_n()); + } + black_box(iter); + }, + BatchSize::SmallInput, + ) + }); + } + } + group.finish(); +} + +/// Workload that mimics the segment-tree usage pattern inside `shrink_n`: +/// repeatedly find the right-most non-positive leaf, set it to a value, +/// and shift a suffix. +/// +/// The op sequence is deterministic so the two trees process identical work. +fn seg_tree_compare(c: &mut Criterion) { + let mut group = c.benchmark_group("seg_tree"); + group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); + + for &size in &[64usize, 256, 1024, 4096, 16384] { + let init: Vec = (0..size as i64) + .map(|i| ((i.wrapping_mul(2654435761)) & 0xff) - 64) + .collect(); + let ops: Vec<(usize, i64, usize, i64)> = (0..size) + .map(|i| { + let set_idx = (i * 5 + 3) % size; + let set_val = ((i as i64) % 31) - 15; + let suffix_lo = (i * 7) % size; + let suffix_delta = if i & 1 == 0 { 1 } else { -1 }; + (set_idx, set_val, suffix_lo, suffix_delta) + }) + .collect(); + + group.throughput(Throughput::Elements(ops.len() as u64)); + + group.bench_with_input(BenchmarkId::new("full", size), &size, |b, _| { + b.iter_batched( + || MinSegTree::new(&init, i64::MAX / 4), + |mut t| { + for &(i, v, lo, d) in &ops { + black_box(t.rightmost_le_zero()); + t.set(i, v); + t.suffix_add(lo, d); + } + black_box(t); + }, + BatchSize::SmallInput, + ) + }); + + group.bench_with_input(BenchmarkId::new("compact", size), &size, |b, _| { + b.iter_batched( + || CompactMinSegTree::new(&init, i64::MAX / 4), + |mut t| { + for &(i, v, lo, d) in &ops { + black_box(t.rightmost_le_zero()); + t.set(i, v); + t.suffix_add(lo, d); + } + black_box(t); + }, + BatchSize::SmallInput, + ) + }); + + group.bench_with_input(BenchmarkId::new("full_new", size), &size, |b, _| { + b.iter(|| { + black_box(MinSegTree::new(&init, i64::MAX / 4)); + }) + }); + group.bench_with_input(BenchmarkId::new("compact_new", size), &size, |b, _| { + b.iter(|| { + black_box(CompactMinSegTree::new(&init, i64::MAX / 4)); + }) + }); + } + group.finish(); +} + criterion_group!( name = benches; config = Criterion::default() @@ -75,6 +188,6 @@ criterion_group!( .measurement_time(Duration::from_millis(4000)) .nresamples(1000); - targets = throughput_benchmark, append_vs_new_with_k, + targets = throughput_benchmark, append_vs_new_with_k, shrink_n, seg_tree_compare, ); criterion_main!(benches); diff --git a/crates/consistent-choose-k/src/choose_k.rs b/crates/consistent-choose-k/src/choose_k.rs index ce777ee..b6cbd5c 100644 --- a/crates/consistent-choose-k/src/choose_k.rs +++ b/crates/consistent-choose-k/src/choose_k.rs @@ -32,10 +32,18 @@ impl ConsistentChooseKHasher { /// /// Time: O(1) pub fn new(builder: H, n: usize) -> Self { + Self::new_with_capacity(builder, n, 0) + } + + /// Create a new iterator for `n` nodes starting with k=0, preallocating + /// enough space to grow to `capacity` samples without reallocating. + /// + /// Time: O(1) + pub fn new_with_capacity(builder: H, n: usize, capacity: usize) -> Self { Self { builder, n, - samples: Vec::new(), + samples: Vec::with_capacity(capacity), } } @@ -44,7 +52,7 @@ impl ConsistentChooseKHasher { /// Average time: O(k^2) pub fn new_with_k(builder: H, n: usize, k: usize) -> Self { assert!(n >= k, "n must be at least k"); - let mut iter = Self::new(builder, n); + let mut iter = Self::new_with_capacity(builder, n, k); for i in 0..k { iter.samples.push(iter.get_sample(i, n)); } @@ -283,6 +291,28 @@ mod tests { } } + #[test] + fn test_with_capacity_matches_new() { + for key in 0..200 { + for n in 1..40 { + let hasher = hasher_for_key(key); + let mut standard = ConsistentChooseKHasher::new(hasher.clone(), n); + let mut standard_with_capacity = + ConsistentChooseKHasher::new_with_capacity(hasher, n, n); + + assert!(standard_with_capacity.samples.capacity() >= n); + + for k in 1..=n { + assert_eq!( + standard_with_capacity.next(), + standard.next(), + "key={key} n={n} k={k}" + ); + } + } + } + } + #[test] fn test_grow_k() { for n in 1..30 { diff --git a/crates/consistent-choose-k/src/compact_min_seg_tree.rs b/crates/consistent-choose-k/src/compact_min_seg_tree.rs new file mode 100644 index 0000000..9ade994 --- /dev/null +++ b/crates/consistent-choose-k/src/compact_min_seg_tree.rs @@ -0,0 +1,480 @@ +//! A compact min segment tree with the same API as [`crate::min_seg_tree::MinSegTree`] +//! but storing only `n` entries instead of `2n - 1`. +//! +//! # Idea +//! +//! In the relative-offset encoding used by [`crate::min_seg_tree::MinSegTree`], +//! every internal node `v` satisfies `min(seg[left], seg[right]) == 0`, so one +//! of the two children carries no information beyond "I am the zero side". +//! We drop the zero sibling entirely and store, per sibling pair, just +//! `(heavy_offset, side_bit)`: +//! - `heavy_offset: i64` (always `>= 0`): the offset of the heavier sibling +//! (the lighter one is implicitly `0`). +//! - `side: bool`: `false` = left child is heavy, `true` = right child is heavy. +//! +//! Plus one extra entry for the root (which has no sibling and hence no side): +//! it holds the actual min over all leaves. +//! +//! For a tree with `n` leaves (a power of two), this gives `1 + (n - 1) = n` +//! compact entries. +//! +//! # Layout +//! +//! * `val[0]`: the root's offset (= the true min over all leaves; the only +//! entry that can be negative). +//! * For `i in 1..n`: `val[i]` packs the sibling pair's `(heavy_offset, side)` +//! into a single `i64`: the sign bit (bit 63) is the `side` (`0` = left +//! heavy, `1` = right heavy), and the low 63 bits are `heavy_offset` +//! (`>= 0`, well within `i64::MAX / 2` for any realistic workload). +//! That pair lives under original heap-layout node `i - 1`, consisting of +//! original heap indices `2 * (i - 1) + 1` and `2 * (i - 1) + 2`. +//! +//! Compact pair indices form their own implicit-heap binary tree: the +//! children pairs of pair `i` (covering the two original subtrees) sit at +//! `2 * i` and `2 * i + 1`. Pairs in `[n/2, n)` are *leaf pairs* — their two +//! original children are leaves of the original tree. Pairs in `[1, n/2)` are +//! *internal pairs*. +//! +//! # Decoding a leaf +//! +//! To recover `actual_min(leaf i)`, start with `val[0]` and walk down, +//! consulting one pair per level. At each level, descending to the +//! `bit`-side child adds `heavy_offset(pair)` to the running sum iff +//! `bit == side(pair)`; otherwise nothing is added (that child is on the +//! zero side). + +/// See module docs. +#[allow(dead_code)] +pub struct CompactMinSegTree { + /// `val[0]` is the root's offset (true min over all leaves; may be + /// negative). `val[i]` for `i in 1..size` packs `(side, heavy_offset)`: + /// the sign bit is the side flag (negative = right heavy, non-negative = + /// left heavy), the low 63 bits are the non-negative heavy offset. + val: Vec, + /// Number of leaves (a power of two, or 0). + size: usize, +} + +/// Sign-bit mask used to encode the side flag inside a packed pair entry. +const SIDE_BIT: i64 = i64::MIN; +/// Mask of the low 63 bits — extracts the `heavy_offset` from a packed entry. +const OFFSET_MASK: i64 = i64::MAX; + +impl CompactMinSegTree { + /// Builds a compact tree whose leaves are `leaves`, padded up to the + /// next power of two with `padding`. + /// + /// Time: O(leaves.len().next_power_of_two()). + #[allow(dead_code)] + pub fn new(leaves: &[i64], padding: i64) -> Self { + if leaves.is_empty() { + return Self { + val: Vec::new(), + size: 0, + }; + } + let size = leaves.len().next_power_of_two(); + let mut val = vec![0i64; size]; + if size == 1 { + val[0] = leaves[0]; + return Self { val, size }; + } + // Build the full relative-encoded segment tree, then collapse it. + let mut seg = vec![padding; 2 * size - 1]; + let leaf_offset = size - 1; + for (i, &v) in leaves.iter().enumerate() { + seg[leaf_offset + i] = v; + } + for v in (0..leaf_offset).rev() { + let l = 2 * v + 1; + let r = 2 * v + 2; + let m = seg[l].min(seg[r]); + seg[l] -= m; + seg[r] -= m; + seg[v] = m; + } + val[0] = seg[0]; + for p in 0..size - 1 { + let l = 2 * p + 1; + let r = 2 * p + 2; + val[p + 1] = if seg[l] >= seg[r] { + seg[l] + } else { + seg[r] | SIDE_BIT + }; + } + Self { val, size } + } + + /// Number of leaves (including padding). Always a power of two, or `0`. + #[allow(dead_code)] + pub fn size(&self) -> usize { + self.size + } + + /// Sets leaf `i` to `val`. + /// + /// Time: O(log size). + #[allow(dead_code)] + pub fn set(&mut self, i: usize, val: i64) { + debug_assert!(i < self.size, "leaf index out of range"); + if self.size == 1 { + self.val[0] = val; + return; + } + // Walk down computing the path-sum of offsets from the root. At each + // internal pair, we add the pair's heavy offset to `acc` iff the + // path bit matches the heavy side; otherwise the path lies on the + // implicit-zero sibling and contributes nothing. + let depth = self.size.trailing_zeros(); + let mut acc = self.val[0]; + let mut p = 1; + for d in (1..depth).rev() { + let bit = (i >> d) & 1; + let packed = self.val[p]; + // `(packed < 0) == (bit == 1)`: heavy side matches the path bit. + if (packed < 0) == (bit == 1) { + acc += packed & OFFSET_MASK; + } + p = 2 * p + bit; + } + // Leaf pair. The "other leaf" keeps its current offset, which is 0 + // unless that other leaf was the heavy side. + let leaf_is_right = i & 1 == 1; + let packed = self.val[p]; + let other_off = if (packed < 0) == leaf_is_right { + // Old heavy was on the leaf side; other leaf is implicit zero. + 0 + } else { + packed & OFFSET_MASK + }; + let leaf_off = val - acc; + let (pushed, new_packed) = pack_with_side(leaf_off, other_off, leaf_is_right); + self.val[p] = new_packed; + // For `set`, every ancestor update only feeds `pushed` to the + // ascending side; the moment it hits zero, every further step is a + // no-op, so we can stop. + self.bubble_up_set(p, pushed); + } + + /// Adds `delta` to every leaf in `[lo, size)`. + /// + /// Time: O(log size). + #[allow(dead_code)] + pub fn suffix_add(&mut self, lo: usize, delta: i64) { + if lo >= self.size || delta == 0 { + return; + } + if self.size == 1 { + self.val[0] += delta; + return; + } + // Leaf pair index in the implicit pair-heap. + let p = self.size / 2 + lo / 2; + let leaf_is_right = lo & 1 == 1; + let packed = self.val[p]; + let (mut l_off, mut r_off) = unpack(packed); + // bit==0 → both leaves in the suffix; bit==1 → only the right one. + if !leaf_is_right { + l_off += delta; + } + r_off += delta; + let (pushed, new_packed) = pack(l_off, r_off); + self.val[p] = new_packed; + // On the way up, every left-descent step has its right sibling fully + // covered by the suffix, so we also bump the parent's right offset. + self.bubble_up_suffix(p, pushed, delta); + } + + /// Set-style bubble-up: at each ancestor, add `pushed` to the side we + /// ascended from, then rebalance. Returns early when `pushed` hits 0, + /// since every further update degenerates to a no-op. + fn bubble_up_set(&mut self, mut p: usize, mut pushed: i64) { + while p > 1 && pushed != 0 { + let parent = p / 2; + let from_right = p & 1 == 1; + let packed = self.val[parent]; + let old_off = packed & OFFSET_MASK; + // The side we came from currently has offset `old_off` if it was + // the heavy side, else `0` (since the other side is implicit zero). + let from_side_is_heavy = (packed < 0) == from_right; + let from_off = if from_side_is_heavy { old_off } else { 0 } + pushed; + let other_off = if from_side_is_heavy { 0 } else { old_off }; + let (m, new_packed) = pack_with_side(from_off, other_off, from_right); + self.val[parent] = new_packed; + pushed = m; + p = parent; + } + self.val[0] += pushed; + } + + /// Suffix-style bubble-up: like `bubble_up_set`, but on every step where + /// we ascend from a left child we also add `right_bump` to the right + /// sibling (since `suffix_add` fully covers that subtree). + fn bubble_up_suffix(&mut self, mut p: usize, mut pushed: i64, right_bump: i64) { + while p > 1 { + let parent = p / 2; + let from_right = p & 1 == 1; + let packed = self.val[parent]; + let (mut l_off, mut r_off) = unpack(packed); + if from_right { + r_off += pushed; + } else { + l_off += pushed; + r_off += right_bump; + } + let (m, new_packed) = pack(l_off, r_off); + self.val[parent] = new_packed; + pushed = m; + p = parent; + } + self.val[0] += pushed; + } + + /// Right-most leaf index `i` whose current value is `<= 0`, or `None`. + /// + /// Time: O(log size). + #[allow(dead_code)] + pub fn rightmost_le_zero(&self) -> Option { + if self.size == 0 || self.val[0] > 0 { + return None; + } + if self.size == 1 { + // Single leaf, stored at the root; we already know val[0] <= 0. + return Some(0); + } + let mut acc = self.val[0]; + let mut pair_idx = 1; + let half = self.size / 2; + // Descend through internal pairs; the pair tree is an implicit heap, + // so the leaf range covered by a pair is fully determined by its + // index. No need to track p_lo/p_hi/mid alongside. + while pair_idx < half { + let (l_off, r_off) = unpack(self.val[pair_idx]); + let r_min = acc + r_off; + if r_min <= 0 { + acc = r_min; + pair_idx = 2 * pair_idx + 1; + } else { + acc += l_off; + pair_idx *= 2; + } + } + // Leaf pair: covers leaves [p_lo, p_lo + 2) where + // `p_lo = 2 * pair_idx - size`. Prefer the right leaf. + let (_, r_off) = unpack(self.val[pair_idx]); + let p_lo = 2 * pair_idx - self.size; + if acc + r_off <= 0 { + Some(p_lo + 1) + } else { + Some(p_lo) + } + } +} + +/// Unpacks a stored entry into `(left_offset, right_offset)`. The +/// non-heavy side is always `0`. +#[inline(always)] +fn unpack(packed: i64) -> (i64, i64) { + let off = packed & OFFSET_MASK; + if packed < 0 { (0, off) } else { (off, 0) } +} + +/// Packs `(l, r)` by pulling out the common min `m = min(l, r)` and +/// returning `(m, packed)`. The packed word stores `|l - r|` with the +/// sign bit set iff the right side is the heavier (post-subtraction). +/// +/// `l` and `r` may be transiently negative (a suffix_add with `delta < 0` +/// can push one side below zero before its parent pulls the min back up); +/// what matters is that `m` is the common min and `|l - r|` is the residual. +#[inline(always)] +fn pack(l: i64, r: i64) -> (i64, i64) { + let m = l.min(r); + // `diff = l - r`: positive iff left is heavier, negative iff right is. + let diff = l - r; + let off = diff.unsigned_abs() as i64; + // Sign bit of `diff` shifted into the MSB: `SIDE_BIT` iff right heavy. + let side = ((diff as u64) >> 63 << 63) as i64; + (m, off | side) +} + +/// Like [`pack`] but tagged: `from_side_is_right` says which input side is +/// `from`; result side is computed correspondingly. Used by the leaf-pair +/// and bubble-up paths where we logically have a "this side / other side" +/// view rather than "left / right". +#[inline(always)] +fn pack_with_side(from_off: i64, other_off: i64, from_side_is_right: bool) -> (i64, i64) { + if from_side_is_right { + pack(other_off, from_off) + } else { + pack(from_off, other_off) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::min_seg_tree::MinSegTree; + + /// Brute-force model for cross-checking against the tree. + #[derive(Clone)] + struct Naive { + values: Vec, + } + + impl Naive { + fn new(values: Vec) -> Self { + Self { values } + } + fn set(&mut self, i: usize, val: i64) { + self.values[i] = val; + } + fn suffix_add(&mut self, lo: usize, delta: i64) { + for v in &mut self.values[lo..] { + *v += delta; + } + } + fn rightmost_le_zero(&self) -> Option { + self.values.iter().rposition(|&v| v <= 0) + } + } + + #[test] + fn empty() { + let t = CompactMinSegTree::new(&[], 0); + assert_eq!(t.size(), 0); + assert_eq!(t.rightmost_le_zero(), None); + } + + #[test] + fn single_leaf() { + let mut t = CompactMinSegTree::new(&[5], 1_000_000); + assert_eq!(t.size(), 1); + assert_eq!(t.rightmost_le_zero(), None); + t.set(0, -1); + assert_eq!(t.rightmost_le_zero(), Some(0)); + t.set(0, 7); + assert_eq!(t.rightmost_le_zero(), None); + t.suffix_add(0, -10); + assert_eq!(t.rightmost_le_zero(), Some(0)); + } + + #[test] + fn worked_example_from_design_doc() { + // n = 4, leaves [3, 1, 5, 2]. Verify decoding round-trips. + let mut t = CompactMinSegTree::new(&[3, 1, 5, 2], 1_000_000); + // No leaf <= 0 yet. + assert_eq!(t.rightmost_le_zero(), None); + // Bring each leaf to zero in turn and check rightmost_le_zero. + t.set(2, 0); + assert_eq!(t.rightmost_le_zero(), Some(2)); + t.set(2, 5); + t.set(0, 0); + assert_eq!(t.rightmost_le_zero(), Some(0)); + t.set(0, 3); + // Push every leaf below zero via a suffix_add. + t.suffix_add(0, -100); + // Right-most should be index 3. + assert_eq!(t.rightmost_le_zero(), Some(3)); + // Restore leaf 3 to a positive value. + t.set(3, 1); + assert_eq!(t.rightmost_le_zero(), Some(2)); + } + + fn lcg_rng() -> impl FnMut() -> u64 { + let mut state: u64 = 0x9e37_79b9_7f4a_7c15; + move || { + state = state + .wrapping_mul(6364136223846793005) + .wrapping_add(1442695040888963407); + state + } + } + + #[test] + fn matches_naive_under_random_ops() { + for &n in &[1usize, 2, 3, 4, 5, 7, 8, 13, 16, 32] { + let init: Vec = (0..n as i64).map(|i| (i * 13) % 17 - 5).collect(); + let mut t = CompactMinSegTree::new(&init, 1_000_000_000); + let mut naive = Naive::new(init); + let mut next = lcg_rng(); + + for _ in 0..3_000 { + match next() % 3 { + 0 => { + let i = (next() as usize) % n; + let v = (next() as i64) % 21 - 10; + t.set(i, v); + naive.set(i, v); + } + 1 => { + let lo = (next() as usize) % (n + 1); + let d = (next() as i64) % 9 - 4; + t.suffix_add(lo, d); + naive.suffix_add(lo, d); + } + _ => { + assert_eq!( + t.rightmost_le_zero(), + naive.rightmost_le_zero(), + "mismatch at n={n}" + ); + } + } + } + assert_eq!(t.rightmost_le_zero(), naive.rightmost_le_zero()); + } + } + + /// Cross-check: every operation on the compact tree must produce the same + /// `rightmost_le_zero` result as the same op on the non-compact tree. + #[test] + fn matches_min_seg_tree_under_random_ops() { + for &n in &[1usize, 2, 4, 7, 8, 13, 16, 32] { + let init: Vec = (0..n as i64).map(|i| (i * 11) % 19 - 7).collect(); + let mut compact = CompactMinSegTree::new(&init, 1_000_000_000); + let mut full = MinSegTree::new(&init, 1_000_000_000); + let mut next = lcg_rng(); + + for _ in 0..3_000 { + match next() % 3 { + 0 => { + let i = (next() as usize) % n; + let v = (next() as i64) % 25 - 12; + compact.set(i, v); + full.set(i, v); + } + 1 => { + let lo = (next() as usize) % (n + 1); + let d = (next() as i64) % 11 - 5; + compact.suffix_add(lo, d); + full.suffix_add(lo, d); + } + _ => { + assert_eq!( + compact.rightmost_le_zero(), + full.rightmost_le_zero(), + "compact vs full mismatch at n={n}" + ); + } + } + } + assert_eq!(compact.rightmost_le_zero(), full.rightmost_le_zero()); + } + } + + #[test] + fn padding_is_not_selected() { + // n = 3 → padded internally to size 4. + let mut t = CompactMinSegTree::new(&[10, 20, 30], 1_000_000_000); + assert_eq!(t.size(), 4); + t.suffix_add(0, -100); + assert_eq!(t.rightmost_le_zero(), Some(2)); + t.set(2, 1); + assert_eq!(t.rightmost_le_zero(), Some(1)); + t.set(1, 1); + assert_eq!(t.rightmost_le_zero(), Some(0)); + t.set(0, 1); + assert_eq!(t.rightmost_le_zero(), None); + } +} diff --git a/crates/consistent-choose-k/src/fast_choose_k.rs b/crates/consistent-choose-k/src/fast_choose_k.rs new file mode 100644 index 0000000..968d659 --- /dev/null +++ b/crates/consistent-choose-k/src/fast_choose_k.rs @@ -0,0 +1,443 @@ +use crate::min_seg_tree::MinSegTree; +use crate::{ConsistentHasher, ManySeqBuilder}; + +/// "Block count" sentinel for slots that can never be selected (e.g. their +/// sequence is exhausted). Chosen well above any realistic true count so that +/// the lazy `-1` updates applied by `shrink_n` cannot drive it down to zero +/// in any reasonable workload, and so that padding leaves in [`MinSegTree`] +/// are never selected. +const C_INF: i64 = 1_000_000_000_000; + +/// Fast variant of [`crate::ConsistentChooseKHasher`] specialized for +/// repeated `shrink_n` calls at fixed `k`. +/// +/// # Invariants +/// +/// For each position `i` in `0..k`: +/// * `samples[i]` is the `i`-th smallest of the `k` currently chosen samples; +/// `samples` is kept sorted ascending. +/// * `next[i]` is a candidate sample from hash sequence `i` (sequence id is +/// bound to **position**, not to a slot's contents — `next` does not shift +/// when `samples` does). Initially set to `get_sample(seq=i, n=samples[i])`, +/// it may become stale relative to `samples[i]` after subsequent shifts. +/// * `c[i]` (stored in the segment-tree leaf for position `i`) tracks the +/// number of left neighbours that block position `i` as a replacement +/// candidate. The true value is `#{ j < i : samples[j] >= next[i] }`; +/// `c[i] == 0` means `samples[i - 1] < next[i]`, i.e. `next[i]` could be +/// inserted right now between `samples[i - 1]` and `samples[i]`. +/// +/// # Lazy maintenance +/// +/// On `shrink_n` we insert a new sample at position `chosen_i`, shifting +/// `samples[chosen_i..k - 1]` right (dropping the old max). `next` does not +/// shift; only `next[chosen_i]` is rewritten to `get_sample(chosen_i, +/// new_sample)`. The true delta to `c[j]` (for `j > chosen_i`, with `next[j]` +/// unchanged) is `0` or `-1`: it is `-1` exactly when +/// `new_sample < next[j] <= samples_old[j - 1]`. We do not check this +/// per-slot; instead we apply a blanket `-1` to `c[chosen_i + 1..k]`. This +/// can under-count the true `c[j]` by up to `1` per shrink, but never +/// over-counts, so an OST descent always yields a candidate whose tracked +/// `c` is `<= 0`. +/// +/// On descent we verify the candidate against two staleness cases: +/// * **Upward stale** (`next[i] >= samples[i]`): `samples[i]` has decreased +/// via shifts at lower positions since `next[i]` was last set, so the +/// stored candidate is no longer below `samples[i]`. We refresh +/// `next[i] = get_sample(i, samples[i])` and recompute `c[i]`. +/// * **Downward stale** (`next[i] <= samples[i - 1]`): the lazy `-1` made +/// `c[i]` look unblocked but `next[i]` would actually fit at some position +/// `< i`. We correct `c[i]` upward via a binary search over `samples[..i]`. +/// +/// The standard `shrink_n` performs roughly `k / 2` `get_sample` calls on +/// average; this variant performs `O(1)` `get_sample` calls per `shrink_n` +/// plus `O(log k)` segment-tree work (amortized, plus the binary-search +/// corrections). +pub struct ConsistentChooseKFastHasher { + builder: H, + n: usize, + /// Samples sorted ascending; `samples[i]` is the `i`-th smallest sample. + samples: Vec, + /// `next[i]` is sequence `i`'s candidate for position `i`. The index `i` + /// is bound to the position (= sequence id); `next` does not shift when + /// `samples` does. + next: Vec>, + /// Per-slot block counts `c[i]`. See struct-level docs for definition. + /// Empty when `k == 0`. + tree: MinSegTree, +} + +impl ConsistentChooseKFastHasher { + /// Create a new instance for `n` nodes with `k = 0` samples. + /// + /// Time: O(1) + pub fn new(builder: H, n: usize) -> Self { + Self { + builder, + n, + samples: Vec::new(), + next: Vec::new(), + tree: MinSegTree::new(&[], C_INF), + } + } + + /// Create with the choose-k set for `k` out of `n` nodes pre-built. + /// + /// Uses the same bubble construction as + /// [`crate::ConsistentChooseKHasher::new_with_k`] to populate `samples`, + /// then initializes the `next` values and per-slot block counts and + /// builds the segment tree. + pub fn new_with_k(builder: H, n: usize, k: usize) -> Self { + assert!(n >= k, "n must be at least k"); + let mut this = Self::new(builder, n); + // Bubble construction (identical to the standard hasher). + let mut samples: Vec = Vec::with_capacity(k); + for i in 0..k { + samples.push(this.get_sample(i, n).expect("must not fail")); + } + for i in (0..k).rev() { + let s = samples[0..=i].iter().copied().max().expect("non-empty"); + samples[i] = s; + #[allow(clippy::needless_range_loop)] + for j in 0..i { + if samples[j] == s { + samples[j] = this.get_sample(j, s).expect("must not fail"); + } + } + } + this.samples = samples; + this.rebuild_from_samples(); + this + } + + /// Returns the `k` underlying samples in increasing order. + pub fn samples(&self) -> &[usize] { + &self.samples + } + + /// Returns the current universe size. + pub fn n(&self) -> usize { + self.n + } + + /// Returns the current sample count. + pub fn k(&self) -> usize { + self.samples.len() + } + + /// Grow the sample set by one element. Returns the index at which the + /// new element was inserted in the sorted samples list. + /// + /// Time: O(k). + /// + /// Panics if `k == n`. + pub fn grow_k(&mut self) -> usize { + assert!(self.samples.len() < self.n, "cannot grow: k must be less than n"); + let k = self.samples.len(); + let sk = self + .get_sample(k, self.n) + .expect("sample sequence must not be exhausted"); + let idx = if let Some(last) = self.samples.last().copied() { + if last < sk { + self.samples.push(sk); + k + } else { + let i = self.grow_k_cascade(last); + self.samples.push(last); + i + } + } else { + self.samples.push(sk); + 0 + }; + // The cascade may have touched samples in `[idx, k]`; rebuilding from + // scratch is O(new_k) = O(k + 1), the same asymptotic cost as the + // standard hasher's `grow_k`, so we don't bother updating in place. + self.rebuild_from_samples(); + idx + } + + /// Mirrors the standard hasher's `shrink_n_inner`: walks `samples` from the + /// top down, replacing each entry `>= n` with a fresh candidate from the + /// current sequence (chained against the smaller neighbour). Returns the + /// index at which the new (lower) sample lands. + fn grow_k_cascade(&mut self, mut n: usize) -> usize { + for i in (0..self.samples.len()).rev() { + if self.samples[i] < n { + return i + 1; + } + let si = self + .get_sample(i, n) + .expect("sample sequence must not be exhausted"); + if i > 0 && self.samples[i - 1] > si { + self.samples[i] = self.samples[i - 1]; + } else { + self.samples[i] = si; + } + n = self.samples[i]; + } + 0 + } + + /// Recomputes `next` and the segment tree from the current `samples`. + /// `samples` must already be sorted ascending. + fn rebuild_from_samples(&mut self) { + let k = self.samples.len(); + let mut next: Vec> = Vec::with_capacity(k); + let mut c: Vec = Vec::with_capacity(k); + for i in 0..k { + let nv = self.get_sample(i, self.samples[i]); + next.push(nv); + let ci = match nv { + Some(v) => (i - self.samples[..i].partition_point(|&s| s < v)) as i64, + None => C_INF, + }; + c.push(ci); + } + self.next = next; + self.tree = MinSegTree::new(&c, C_INF); + } + + /// Decrements `n` to the current largest sample and replaces it with the + /// next valid sample. Returns the index at which the new sample was + /// inserted in the sorted samples list. + /// + /// Panics if `n <= k` or if no replacement can be found (i.e. every + /// sequence whose slot would be a candidate is exhausted). + pub fn shrink_n(&mut self) -> usize { + let k = self.samples.len(); + assert!(self.n > k, "cannot shrink: n must be greater than k"); + assert!(k > 0, "cannot shrink: samples must not be empty"); + self.n = *self.samples.last().expect("k > 0"); + + // Find the right-most slot whose tracked `c[i]` is `<= 0`, verifying + // each candidate against the true definition and correcting on miss. + let chosen_i = loop { + let i = self + .tree + .rightmost_le_zero() + .expect("at least one slot must be selectable"); + let next_i = match self.next[i] { + None => { + // Sequence `i` is exhausted; never a valid candidate. + self.tree.set(i, C_INF); + continue; + } + Some(v) => v, + }; + // Stale upward: `next[i]` may be >= `samples[i]` if `samples[i]` + // has decreased via shifts at lower positions since `next[i]` was + // last set. Refresh slot `i` against its current `samples[i]`. + if next_i >= self.samples[i] { + self.refresh_slot(i); + continue; + } + if i == 0 || self.samples[i - 1] < next_i { + break i; + } + // Stale (under-counted) `c[i]`: recompute the true value via a + // binary search on `samples[..i]`. + let lb = self.lower_bound(next_i, i); + self.tree.set(i, (i - lb) as i64); + }; + let new_sample = self.next[chosen_i].expect("verified Some above"); + // Lazy bulk `-1` over the suffix `(chosen_i, k)`: every slot in this + // range gains the freshly-inserted sample as a new left neighbour. The + // true delta is `-1` exactly when `new_sample < next[j] <= samples_old[j - 1]` + // and `0` otherwise; applying `-1` always under-counts and is corrected + // lazily on the next descent. + self.tree.suffix_add(chosen_i + 1, -1); + // Insert at `chosen_i`, then drop the old largest at position `k`. + // Samples shift; `next` does NOT shift — `next[i]` is bound to sequence + // id `i`, i.e. to position, not to the slot's contents. After the + // shift, `refresh_slot(chosen_i)` re-derives `next[chosen_i]` from the + // freshly inserted `samples[chosen_i] == new_sample` and writes the + // matching `c[chosen_i]` into the tree. + self.samples.pop(); + self.samples.insert(chosen_i, new_sample); + self.refresh_slot(chosen_i); + chosen_i + } + + /// Re-derives `next[i]` from `samples[i]` and writes the matching `c[i]` + /// into the segment tree. + fn refresh_slot(&mut self, i: usize) { + let refreshed = self.get_sample(i, self.samples[i]); + self.next[i] = refreshed; + let new_c = match refreshed { + Some(v) => (i - self.lower_bound(v, i)) as i64, + None => C_INF, + }; + self.tree.set(i, new_c); + } + + fn get_sample(&self, seq: usize, n: usize) -> Option { + if n <= seq { + return None; + } + ConsistentHasher::new(self.builder.seq_builder(seq)) + .into_prev(n - seq) + .map(|pos| pos + seq) + } + + /// First index `j` in `[0, upto)` with `samples[j] >= value`. Returns + /// `upto` if no such index exists. + fn lower_bound(&self, value: usize, upto: usize) -> usize { + self.samples[..upto].partition_point(|&s| s < value) + } +} + +impl Iterator for ConsistentChooseKFastHasher { + type Item = usize; + + fn next(&mut self) -> Option { + if self.samples.len() >= self.n { + return None; + } + let idx = self.grow_k(); + Some(self.samples[idx]) + } +} + +#[cfg(test)] +mod tests { + use std::hash::{DefaultHasher, Hash}; + + use super::*; + use crate::ConsistentChooseKHasher; + + fn hasher_for_key(key: u64) -> DefaultHasher { + let mut hasher = DefaultHasher::default(); + key.hash(&mut hasher); + hasher + } + + #[test] + fn test_fast_new_with_k_matches_standard() { + for key in 0..50 { + for n in 1..30 { + for k in 0..=n { + let h = hasher_for_key(key); + let expected = + ConsistentChooseKHasher::new_with_k(h.clone(), n, k).into_samples(); + let actual = ConsistentChooseKFastHasher::new_with_k(h, n, k) + .samples() + .to_vec(); + assert_eq!(actual, expected, "key={key} n={n} k={k}"); + } + } + } + } + + #[test] + fn test_fast_shrink_n_matches_standard() { + for key in 0..50 { + for k in 1..10 { + for n in k + 1..30 { + let h = hasher_for_key(key); + let mut fast = ConsistentChooseKFastHasher::new_with_k(h.clone(), n, k); + let mut standard = ConsistentChooseKHasher::new_with_k(h, n, k); + while *standard.samples().last().unwrap() > k { + let standard_idx = standard.shrink_n(); + let fast_idx = fast.shrink_n(); + assert_eq!( + fast_idx, standard_idx, + "key={key} n={n} k={k}: returned index mismatch" + ); + assert_eq!( + fast.samples(), + standard.samples(), + "key={key} n={n} k={k}: samples mismatch after shrink" + ); + } + } + } + } + } + + #[test] + fn test_fast_shrink_n_preserves_invariants() { + // Even if the fast variant diverges from the standard one in the + // future, the choose-k invariants must hold. + for key in 0..50 { + for k in 1..10 { + for n in k + 1..30 { + let h = hasher_for_key(key); + let mut fast = ConsistentChooseKFastHasher::new_with_k(h, n, k); + while *fast.samples().last().unwrap() > k { + fast.shrink_n(); + let s = fast.samples(); + assert_eq!(s.len(), k, "k must be preserved"); + for w in s.windows(2) { + assert!(w[0] < w[1], "samples must be strictly sorted: {s:?}"); + } + assert!(*s.last().unwrap() < fast.n, "all samples must be < n"); + } + } + } + } + } + + #[test] + fn test_fast_grow_k_matches_standard() { + for key in 0..50 { + for n in 1..30 { + let mut fast = ConsistentChooseKFastHasher::new(hasher_for_key(key), n); + let mut standard = ConsistentChooseKHasher::new(hasher_for_key(key), n); + for _ in 0..n { + let fast_idx = fast.grow_k(); + let standard_idx = standard.grow_k(); + assert_eq!( + fast_idx, standard_idx, + "key={key} n={n}: grow_k returned index mismatch" + ); + assert_eq!( + fast.samples(), + standard.samples(), + "key={key} n={n}: samples mismatch after grow_k" + ); + } + } + } + } + + #[test] + fn test_fast_iterator_matches_standard() { + for key in 0..50 { + for n in 1..30 { + let fast: Vec = ConsistentChooseKFastHasher::new(hasher_for_key(key), n) + .collect(); + let standard: Vec = + ConsistentChooseKHasher::new(hasher_for_key(key), n).collect(); + assert_eq!(fast, standard, "key={key} n={n}: iterator order mismatch"); + assert_eq!(fast.len(), n, "key={key} n={n}: iterator length"); + } + } + } + + #[test] + fn test_fast_grow_then_shrink_roundtrip() { + // After growing all the way and then shrinking back, the surviving + // samples must still match the choose-k semantics of the standard + // hasher. + for key in 0..30 { + for n in 2..15 { + for k in 1..n { + let mut fast = ConsistentChooseKFastHasher::new(hasher_for_key(key), n); + for _ in 0..k { + fast.grow_k(); + } + let standard = ConsistentChooseKHasher::new_with_k( + hasher_for_key(key), + n, + k, + ); + assert_eq!( + fast.samples(), + standard.samples(), + "key={key} n={n} k={k}: grow_k built unexpected samples" + ); + } + } + } + } +} diff --git a/crates/consistent-choose-k/src/lib.rs b/crates/consistent-choose-k/src/lib.rs index 8a2c392..c93a151 100644 --- a/crates/consistent-choose-k/src/lib.rs +++ b/crates/consistent-choose-k/src/lib.rs @@ -1,9 +1,20 @@ mod choose_k; +mod compact_min_seg_tree; mod consistent_hash; +mod fast_choose_k; +mod min_seg_tree; mod node_map; pub use choose_k::ConsistentChooseKHasher; pub use consistent_hash::{ ConsistentHashIterator, ConsistentHashRevIterator, ConsistentHasher, HashSeqBuilder, HashSequence, ManySeqBuilder, }; +pub use fast_choose_k::ConsistentChooseKFastHasher; pub use node_map::ConsistentNodeMap; + +#[doc(hidden)] +#[cfg(feature = "__bench_internals")] +pub mod __bench_internals { + pub use crate::compact_min_seg_tree::CompactMinSegTree; + pub use crate::min_seg_tree::MinSegTree; +} diff --git a/crates/consistent-choose-k/src/min_seg_tree.rs b/crates/consistent-choose-k/src/min_seg_tree.rs new file mode 100644 index 0000000..dc26637 --- /dev/null +++ b/crates/consistent-choose-k/src/min_seg_tree.rs @@ -0,0 +1,278 @@ +//! A min segment tree with point-set, range-add, and "rightmost-leaf with +//! value `<= 0`" queries, all in O(log n). +//! +//! Uses a *relative offset* encoding so range-add does not need a separate +//! lazy-propagation array. +//! +//! # Encoding +//! +//! Implicit binary heap layout: root at `0`, children of node `v` at +//! `2 * v + 1` and `2 * v + 2`. Leaves live at indices +//! `[size - 1, 2 * size - 1)`; leaf `i` is at `size - 1 + i`. `size` is a +//! power of two (or zero, for an empty tree). +//! +//! Each `seg[v]` is an *offset*, not an absolute value. The true min of the +//! subtree rooted at `v` is the sum of `seg[u]` over all `u` on the path +//! from the root down to `v` (inclusive). +//! +//! # Invariant +//! +//! At every internal node `v`: `min(seg[2 * v + 1], seg[2 * v + 2]) == 0`. +//! The heavier child's offset is its excess over its sibling, and the +//! lighter sibling contributes zero. +//! +//! Because the offset at a node already accounts for every leaf beneath it, +//! a range-add `delta` over a node fully covered by the update is a single +//! `seg[v] += delta`. Partial-cover recursion calls [`MinSegTree::rebalance`] +//! on the way back up to restore the invariant. + +/// See module docs. +pub struct MinSegTree { + /// Offset values in implicit-heap layout. + seg: Vec, + /// Number of leaves (a power of two, or 0 for an empty tree). + size: usize, +} + +impl MinSegTree { + /// Builds a tree whose leaves are `leaves`, padded up to the next power + /// of two with `padding`. + /// + /// `padding` should be large enough that [`MinSegTree::rightmost_le_zero`] + /// never selects a padding leaf for any sequence of operations the caller + /// applies. + /// + /// Time: O(leaves.len().next_power_of_two()). + pub fn new(leaves: &[i64], padding: i64) -> Self { + if leaves.is_empty() { + return Self { + seg: Vec::new(), + size: 0, + }; + } + let size = leaves.len().next_power_of_two(); + let mut seg = vec![padding; 2 * size - 1]; + let leaf_offset = size - 1; + for (i, &v) in leaves.iter().enumerate() { + seg[leaf_offset + i] = v; + } + // Bulk-build: pull common min from each pair of children up into + // their parent, restoring `min(seg[l], seg[r]) == 0` bottom-up. + for v in (0..leaf_offset).rev() { + let l = 2 * v + 1; + let r = 2 * v + 2; + let m = seg[l].min(seg[r]); + seg[l] -= m; + seg[r] -= m; + seg[v] = m; + } + Self { seg, size } + } + + /// Number of leaves (including padding). Always a power of two, or `0`. + #[allow(dead_code)] + pub fn size(&self) -> usize { + self.size + } + + /// Sets leaf `i` to `val`. + /// + /// Time: O(log size). + pub fn set(&mut self, i: usize, val: i64) { + debug_assert!(i < self.size, "leaf index out of range"); + // Pre-shift so that the bit selecting the child at each level is + // always the top bit of `lo`; the recursion then peels bits off by + // left-shifting once per level. + let depth = self.size.trailing_zeros(); + let lo = i.checked_shl(usize::BITS - depth).unwrap_or(0); + self.set_rec(0, lo, val); + } + + /// Descends root -> leaf, reading the top bit of `lo` to pick a child and + /// shifting `lo` left by one per level. `val` is the target value minus + /// the path-sum of offsets at ancestors strictly above `v`, so at the + /// leaf we can write it directly. + fn set_rec(&mut self, v: usize, lo: usize, val: i64) { + if v >= self.size - 1 { + self.seg[v] = val; + return; + } + let bit = lo >> (usize::BITS - 1); + self.set_rec(2 * v + 1 + bit, lo << 1, val - self.seg[v]); + self.rebalance(v); + } + + /// Adds `delta` to every leaf in `[lo, size)`. + /// + /// Time: O(log size). + pub fn suffix_add(&mut self, lo: usize, delta: i64) { + if lo >= self.size || delta == 0 { + return; + } + let depth = self.size.trailing_zeros(); + let lo = lo.checked_shl(usize::BITS - depth).unwrap_or(0); + self.suffix_add_rec(0, lo, delta); + } + + /// Descends root -> leaf, reading the top bit of `lo` to pick a child and + /// shifting `lo` left by one per level; bumps the right sibling whole + /// whenever `lo` lies in the left subtree, and rebalances on unwind. + fn suffix_add_rec(&mut self, v: usize, lo: usize, delta: i64) { + if v >= self.size - 1 { + // The leaf at `lo` itself is in the suffix. + self.seg[v] += delta; + return; + } + let bit = lo >> (usize::BITS - 1); + if bit == 0 { + // Right sibling fully covered by the suffix. + self.seg[2 * v + 2] += delta; + self.suffix_add_rec(2 * v + 1, lo << 1, delta); + } else { + // Left sibling outside the suffix. + self.suffix_add_rec(2 * v + 2, lo << 1, delta); + } + self.rebalance(v); + } + + /// Returns the right-most leaf index `i` with current value `<= 0`, or + /// `None` if no leaf is `<= 0`. + /// + /// Time: O(log size). + pub fn rightmost_le_zero(&self) -> Option { + if self.size == 0 { + return None; + } + let mut acc = self.seg[0]; + if acc > 0 { + return None; + } + let leaf_offset = self.size - 1; + let mut v = 0; + while v < leaf_offset { + let r = 2 * v + 2; + let r_min = acc + self.seg[r]; + if r_min <= 0 { + acc = r_min; + v = r; + } else { + let l = 2 * v + 1; + acc += self.seg[l]; + v = l; + } + } + Some(v - leaf_offset) + } + + /// Restores `min(seg[2 * v + 1], seg[2 * v + 2]) == 0` at internal node + /// `v` by pulling the children's common min into `v`. Caller must have + /// finished updating `v`'s subtree first. + fn rebalance(&mut self, v: usize) { + let l = 2 * v + 1; + let r = 2 * v + 2; + let m = self.seg[l].min(self.seg[r]); + self.seg[l] -= m; + self.seg[r] -= m; + self.seg[v] += m; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Brute-force model for cross-checking against the tree. + #[derive(Clone)] + struct Naive { + values: Vec, + } + + impl Naive { + fn new(values: Vec) -> Self { + Self { values } + } + fn set(&mut self, i: usize, val: i64) { + self.values[i] = val; + } + fn suffix_add(&mut self, lo: usize, delta: i64) { + for v in &mut self.values[lo..] { + *v += delta; + } + } + fn rightmost_le_zero(&self) -> Option { + self.values.iter().rposition(|&v| v <= 0) + } + } + + #[test] + fn empty() { + let t = MinSegTree::new(&[], 0); + assert_eq!(t.size(), 0); + assert_eq!(t.rightmost_le_zero(), None); + } + + #[test] + fn single_leaf() { + let mut t = MinSegTree::new(&[5], 1_000_000); + assert_eq!(t.size(), 1); + assert_eq!(t.rightmost_le_zero(), None); + t.set(0, -1); + assert_eq!(t.rightmost_le_zero(), Some(0)); + t.set(0, 7); + assert_eq!(t.rightmost_le_zero(), None); + } + + #[test] + fn matches_naive_under_random_ops() { + // Deterministic pseudo-random sequence of ops. + let n = 13usize; + let init: Vec = (0..n as i64).map(|i| (i * 7) % 11 - 3).collect(); + let mut t = MinSegTree::new(&init, 1_000_000_000); + let mut naive = Naive::new(init); + + // A linear-congruential prng so the test is hermetic. + let mut state: u64 = 0xdead_beef_cafe_f00d; + let mut next = || { + state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407); + state + }; + + for _ in 0..2_000 { + match next() % 3 { + 0 => { + let i = (next() as usize) % n; + let v = (next() as i64) % 21 - 10; + t.set(i, v); + naive.set(i, v); + } + 1 => { + let lo = (next() as usize) % (n + 1); + let d = (next() as i64) % 9 - 4; + t.suffix_add(lo, d); + naive.suffix_add(lo, d); + } + _ => { + assert_eq!(t.rightmost_le_zero(), naive.rightmost_le_zero()); + } + } + } + assert_eq!(t.rightmost_le_zero(), naive.rightmost_le_zero()); + } + + #[test] + fn padding_is_not_selected() { + // k = 3, padded internally to size 4 with C_INF-like padding. + let mut t = MinSegTree::new(&[10, 20, 30], 1_000_000_000); + assert_eq!(t.size(), 4); + // Drive every real leaf to a non-positive value; the rightmost should + // still always be a real index in `[0, 3)`, never the padding leaf 3. + t.suffix_add(0, -100); + assert_eq!(t.rightmost_le_zero(), Some(2)); + t.set(2, 1); + assert_eq!(t.rightmost_le_zero(), Some(1)); + t.set(1, 1); + assert_eq!(t.rightmost_le_zero(), Some(0)); + t.set(0, 1); + assert_eq!(t.rightmost_le_zero(), None); + } +} From 109ab4efca088014b8a2a1cc88bc499cec01277e Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Thu, 21 May 2026 16:12:27 +0200 Subject: [PATCH 42/49] improve grow_k performance to log k --- .../benchmarks/performance.rs | 15 ++ .../src/compact_min_seg_tree.rs | 155 +++++++++++++- .../consistent-choose-k/src/fast_choose_k.rs | 133 +++++++----- .../consistent-choose-k/src/min_seg_tree.rs | 196 +++++++++++++++++- 4 files changed, 438 insertions(+), 61 deletions(-) diff --git a/crates/consistent-choose-k/benchmarks/performance.rs b/crates/consistent-choose-k/benchmarks/performance.rs index e287d07..0388099 100644 --- a/crates/consistent-choose-k/benchmarks/performance.rs +++ b/crates/consistent-choose-k/benchmarks/performance.rs @@ -66,6 +66,21 @@ fn append_vs_new_with_k(c: &mut Criterion) { } }) }); + group.bench_function(BenchmarkId::new(format!("fast_new_with_k/k_{k}"), n), |b| { + b.iter(|| { + let h = DefaultHasher::default(); + black_box(ConsistentChooseKFastHasher::new_with_k(h, n + k, k)); + }) + }); + group.bench_function(BenchmarkId::new(format!("fast_append/k_{k}"), n), |b| { + b.iter(|| { + let h = DefaultHasher::default(); + let mut iter = ConsistentChooseKFastHasher::new(h, n + k); + for _ in 0..k { + black_box(iter.grow_k()); + } + }) + }); } } group.finish(); diff --git a/crates/consistent-choose-k/src/compact_min_seg_tree.rs b/crates/consistent-choose-k/src/compact_min_seg_tree.rs index 9ade994..801e2e7 100644 --- a/crates/consistent-choose-k/src/compact_min_seg_tree.rs +++ b/crates/consistent-choose-k/src/compact_min_seg_tree.rs @@ -53,6 +53,14 @@ pub struct CompactMinSegTree { val: Vec, /// Number of leaves (a power of two, or 0). size: usize, + /// Number of *real* leaves (those at positions `0..len`). Slots in + /// `[len, size)` hold the [`Self::padding`] sentinel; `len` only changes + /// via [`CompactMinSegTree::push`]. + len: usize, + /// Sentinel for as-yet-unset leaves. New slots created by + /// [`CompactMinSegTree::push`] are initialized with this value before + /// the actual write, and growing the tree fills the new half with it. + padding: i64, } /// Sign-bit mask used to encode the side flag inside a packed pair entry. @@ -67,17 +75,25 @@ impl CompactMinSegTree { /// Time: O(leaves.len().next_power_of_two()). #[allow(dead_code)] pub fn new(leaves: &[i64], padding: i64) -> Self { + let len = leaves.len(); if leaves.is_empty() { return Self { val: Vec::new(), size: 0, + len, + padding, }; } let size = leaves.len().next_power_of_two(); let mut val = vec![0i64; size]; if size == 1 { val[0] = leaves[0]; - return Self { val, size }; + return Self { + val, + size, + len, + padding, + }; } // Build the full relative-encoded segment tree, then collapse it. let mut seg = vec![padding; 2 * size - 1]; @@ -103,7 +119,77 @@ impl CompactMinSegTree { seg[r] | SIDE_BIT }; } - Self { val, size } + Self { + val, + size, + len, + padding, + } + } + + /// Number of real leaves appended so far (excluding padding slots). + #[allow(dead_code)] + pub fn len(&self) -> usize { + self.len + } + + /// True iff no real leaves are stored. + #[allow(dead_code)] + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Appends `value` as a new leaf. Doubles the underlying tree if there + /// is no padding slot left to overwrite. + /// + /// Time: amortized O(log size); worst-case O(size) on the doubling step. + #[allow(dead_code)] + pub fn push(&mut self, value: i64) { + if self.len == self.size { + self.grow(); + } + let i = self.len; + self.len = i + 1; + self.set(i, value); + } + + /// Doubles `size`, preserving the existing tree as the left subtree of a + /// fresh root and filling the right subtree with all-padding leaves. + fn grow(&mut self) { + if self.size == 0 { + // Empty tree: allocate a single-leaf tree. The caller is + // expected to write the value via `set`/`push`, but pre-fill + // with `padding` so the slot is in a valid state regardless. + self.val.push(self.padding); + self.size = 1; + return; + } + let old_size = self.size; + let new_size = old_size * 2; + // Old `val[0]` is the actual min over the old real leaves. Padding + // is `>= ` every real value, so the new tree's actual min is the + // same value and lives at the same slot. + let old_root_min = self.val[0]; + self.val.resize(new_size, 0); + // Push every level of the pair-heap one step deeper. The whole old + // level `[L, 2L)` (where `L` is a power of two) becomes the LEFT + // half of the new level one deeper, i.e. moves to `[2L, 3L)`. We + // process from the deepest level upward; each step is a + // contiguous slice copy followed by zeroing the now-vacated source. + let mut level = old_size / 2; + while level > 0 { + self.val.copy_within(level..2 * level, 2 * level); + self.val[level..2 * level].fill(0); + level /= 2; + } + // The right subtree consists entirely of padding leaves (already 0 + // throughout). The new root pair's right child reaches that padding + // subtree (min = `padding`); its left child reaches the relocated + // old root (min = `old_root_min`). + let r_off = self.padding - old_root_min; + debug_assert!(r_off >= 0, "padding must be >= every real leaf"); + self.val[1] = SIDE_BIT | r_off; + self.size = new_size; } /// Number of leaves (including padding). Always a power of two, or `0`. @@ -477,4 +563,69 @@ mod tests { t.set(0, 1); assert_eq!(t.rightmost_le_zero(), None); } + + /// `push` from empty must reproduce the result of `new` on the same + /// sequence of leaves, for every interesting size including the + /// power-of-two boundaries that trigger doubling. + #[test] + fn push_matches_new() { + for &n in &[0usize, 1, 2, 3, 4, 5, 7, 8, 9, 15, 16, 17, 32, 33] { + let leaves: Vec = (0..n as i64).map(|i| (i * 13) % 17 - 5).collect(); + let padding = 1_000_000_000; + let mut pushed = CompactMinSegTree::new(&[], padding); + for &v in &leaves { + pushed.push(v); + } + let direct = CompactMinSegTree::new(&leaves, padding); + assert_eq!(pushed.size(), direct.size(), "size mismatch at n={n}"); + assert_eq!(pushed.len(), direct.len(), "len mismatch at n={n}"); + assert_eq!( + pushed.rightmost_le_zero(), + direct.rightmost_le_zero(), + "rightmost mismatch at n={n}", + ); + } + } + + /// Mix `push` with `set` / `suffix_add` / `rightmost_le_zero` and + /// cross-check against the brute-force model. + #[test] + fn push_mixed_with_other_ops() { + let padding = 1_000_000_000; + let mut t = CompactMinSegTree::new(&[], padding); + let mut naive: Vec = Vec::new(); + let mut next = lcg_rng(); + + for _ in 0..2_000 { + match next() % 4 { + 0 => { + // push + let v = (next() as i64) % 21 - 10; + t.push(v); + naive.push(v); + } + 1 if !naive.is_empty() => { + // set + let i = (next() as usize) % naive.len(); + let v = (next() as i64) % 21 - 10; + t.set(i, v); + naive[i] = v; + } + 2 if !naive.is_empty() => { + // suffix_add only over the real prefix; padding slots + // would otherwise be unfair game. + let lo = (next() as usize) % (naive.len() + 1); + let d = (next() as i64) % 9 - 4; + t.suffix_add(lo, d); + for v in &mut naive[lo..] { + *v += d; + } + } + _ => { + let want = naive[..t.len()].iter().rposition(|&v| v <= 0); + assert_eq!(t.rightmost_le_zero(), want); + } + } + } + } } diff --git a/crates/consistent-choose-k/src/fast_choose_k.rs b/crates/consistent-choose-k/src/fast_choose_k.rs index 968d659..39f6d24 100644 --- a/crates/consistent-choose-k/src/fast_choose_k.rs +++ b/crates/consistent-choose-k/src/fast_choose_k.rs @@ -105,7 +105,19 @@ impl ConsistentChooseKFastHasher { } } this.samples = samples; - this.rebuild_from_samples(); + // Initialize `next[i]` and the segment tree from `samples`. + let mut c: Vec = Vec::with_capacity(k); + this.next.reserve(k); + for i in 0..k { + let nv = this.get_sample(i, this.samples[i]); + this.next.push(nv); + let ci = match nv { + Some(v) => (i - this.lower_bound(v, i)) as i64, + None => C_INF, + }; + c.push(ci); + } + this.tree = MinSegTree::new(&c, C_INF); this } @@ -127,7 +139,7 @@ impl ConsistentChooseKFastHasher { /// Grow the sample set by one element. Returns the index at which the /// new element was inserted in the sorted samples list. /// - /// Time: O(k). + /// Amortized time: O(log k). /// /// Panics if `k == n`. pub fn grow_k(&mut self) -> usize { @@ -136,65 +148,47 @@ impl ConsistentChooseKFastHasher { let sk = self .get_sample(k, self.n) .expect("sample sequence must not be exhausted"); - let idx = if let Some(last) = self.samples.last().copied() { - if last < sk { - self.samples.push(sk); - k - } else { - let i = self.grow_k_cascade(last); - self.samples.push(last); - i + match self.samples.last().copied() { + Some(last) if last >= sk => { + // Hard case: `sk` collides with existing samples. The standard + // algorithm is "cascade-replace `samples[..k]` as if universe + // shrank to `last`, then push `last` at position k". Our fast + // `shrink_n` already implements the cascade in O(log k) + // amortized, so we reuse it directly: it pops the old last, + // inserts a new sample at some `chosen_i`, and leaves + // `samples` with the same length `k`. We then append the + // saved `last` at the new top, restoring `n`. + let original_n = self.n; + let chosen_i = self.shrink_n(); + self.n = original_n; + self.append_top(last); + chosen_i } - } else { - self.samples.push(sk); - 0 - }; - // The cascade may have touched samples in `[idx, k]`; rebuilding from - // scratch is O(new_k) = O(k + 1), the same asymptotic cost as the - // standard hasher's `grow_k`, so we don't bother updating in place. - self.rebuild_from_samples(); - idx - } - - /// Mirrors the standard hasher's `shrink_n_inner`: walks `samples` from the - /// top down, replacing each entry `>= n` with a fresh candidate from the - /// current sequence (chained against the smaller neighbour). Returns the - /// index at which the new (lower) sample lands. - fn grow_k_cascade(&mut self, mut n: usize) -> usize { - for i in (0..self.samples.len()).rev() { - if self.samples[i] < n { - return i + 1; - } - let si = self - .get_sample(i, n) - .expect("sample sequence must not be exhausted"); - if i > 0 && self.samples[i - 1] > si { - self.samples[i] = self.samples[i - 1]; - } else { - self.samples[i] = si; + _ => { + // Easy case: `sk` is strictly larger than every current + // sample (or `samples` is empty). Append it at the top. + self.append_top(sk); + k } - n = self.samples[i]; } - 0 } - /// Recomputes `next` and the segment tree from the current `samples`. - /// `samples` must already be sorted ascending. - fn rebuild_from_samples(&mut self) { + /// Append `new_sample` at position `k = self.samples.len()`, assuming it + /// is strictly greater than every current sample. Maintains `next` and + /// the segment tree. + /// + /// Amortized time: O(log k) (the segment tree doubles its capacity on + /// overflow; the per-call cost amortizes to O(log k)). + fn append_top(&mut self, new_sample: usize) { let k = self.samples.len(); - let mut next: Vec> = Vec::with_capacity(k); - let mut c: Vec = Vec::with_capacity(k); - for i in 0..k { - let nv = self.get_sample(i, self.samples[i]); - next.push(nv); - let ci = match nv { - Some(v) => (i - self.samples[..i].partition_point(|&s| s < v)) as i64, - None => C_INF, - }; - c.push(ci); - } - self.next = next; - self.tree = MinSegTree::new(&c, C_INF); + self.samples.push(new_sample); + let nv = self.get_sample(k, new_sample); + self.next.push(nv); + let c_k = match nv { + Some(v) => (k - self.samples[..k].partition_point(|&s| s < v)) as i64, + None => C_INF, + }; + self.tree.append(c_k); } /// Decrements `n` to the current largest sample and replaces it with the @@ -440,4 +434,33 @@ mod tests { } } } + + #[test] + fn test_fast_grow_then_shrink_interleaved() { + // Interleave grow_k and shrink_n to stress the tree-doubling boundary + // and the lazy `c[i]` updates left behind by shrink_n on slots that + // are subsequently overwritten by grow_k. + for key in 0..50 { + for n_start in 5..15 { + let mut fast = ConsistentChooseKFastHasher::new(hasher_for_key(key), n_start); + let mut standard = ConsistentChooseKHasher::new(hasher_for_key(key), n_start); + // Grow to k=n/2. + let k_target = n_start / 2; + for _ in 0..k_target { + assert_eq!(fast.grow_k(), standard.grow_k()); + assert_eq!(fast.samples(), standard.samples()); + } + // Shrink n down until forced to stop. + while *standard.samples().last().unwrap() > standard.k() { + assert_eq!(fast.shrink_n(), standard.shrink_n()); + assert_eq!(fast.samples(), standard.samples()); + } + // Grow some more, if possible. + while fast.k() < fast.n() { + assert_eq!(fast.grow_k(), standard.grow_k()); + assert_eq!(fast.samples(), standard.samples()); + } + } + } + } } diff --git a/crates/consistent-choose-k/src/min_seg_tree.rs b/crates/consistent-choose-k/src/min_seg_tree.rs index dc26637..2ea6a22 100644 --- a/crates/consistent-choose-k/src/min_seg_tree.rs +++ b/crates/consistent-choose-k/src/min_seg_tree.rs @@ -30,8 +30,15 @@ pub struct MinSegTree { /// Offset values in implicit-heap layout. seg: Vec, - /// Number of leaves (a power of two, or 0 for an empty tree). + /// Physical capacity = `size` leaves. A power of two, or `0`. size: usize, + /// Logical number of real leaves currently in use. `len <= size`. + /// Leaves in `[len, size)` hold the padding value (modulo any drift + /// accumulated from `suffix_add`). + len: usize, + /// Padding value used to fill leaves beyond `len`, including when the + /// tree grows via [`MinSegTree::append`]. + padding: i64, } impl MinSegTree { @@ -48,6 +55,8 @@ impl MinSegTree { return Self { seg: Vec::new(), size: 0, + len: 0, + padding, }; } let size = leaves.len().next_power_of_two(); @@ -66,15 +75,94 @@ impl MinSegTree { seg[r] -= m; seg[v] = m; } - Self { seg, size } + Self { + seg, + size, + len: leaves.len(), + padding, + } } - /// Number of leaves (including padding). Always a power of two, or `0`. - #[allow(dead_code)] + /// Physical number of leaves (including padding). Always a power of two, + /// or `0`. pub fn size(&self) -> usize { self.size } + /// Logical number of leaves (excluding padding). + #[allow(dead_code)] + pub fn len(&self) -> usize { + self.len + } + + /// True iff no real leaves are stored. + #[allow(dead_code)] + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Appends a new leaf with value `val` at logical position `self.len()`. + /// Doubles the physical capacity when full. + /// + /// Amortized time: O(log size). + pub fn append(&mut self, val: i64) { + if self.len == self.size { + self.double_capacity(); + } + self.set(self.len, val); + self.len += 1; + } + + /// Doubles `self.size`, moving every existing tree node into its + /// corresponding slot in the larger heap layout. The new right subtree + /// is filled with `self.padding`-valued leaves. + /// + /// At level `L` of the old tree (root = level 0), each node sits at + /// index `2^L - 1 + j` for some `j` in `[0, 2^L)`. After doubling the + /// physical size, that same logical position is now at level `L + 1` in + /// the left subtree, i.e. index `(2^(L+1) - 1) + j = old_idx + 2^L`. We + /// just walk levels top-down and copy each node up to its new slot. + fn double_capacity(&mut self) { + if self.size == 0 { + // Empty tree: grow to a single-leaf tree holding the padding + // value. The caller (`append`) immediately overwrites it via + // `set(0, val)`. + self.seg = vec![self.padding]; + self.size = 1; + return; + } + let new_size = self.size * 2; + let mut new_seg = vec![0i64; 2 * new_size - 1]; + + // Level-by-level shift. At level `L`, indices `[start, end)` in the + // old tree map to `[start + shift, end + shift)` in the new tree, + // where `shift = 2^L = end - start`. + let mut start = 0usize; + let mut end = 1usize; + let mut shift = 1usize; + while start < self.seg.len() { + new_seg[(start + shift)..(end + shift)].copy_from_slice(&self.seg[start..end]); + start = end; + end = 2 * end + 1; + shift *= 2; + } + + // After the level-by-level copy, `new_seg[1]` holds the old root's + // value (= the global min of the old tree). The new root must be + // `min(old_min, padding)`; we lift that common min out and store + // the residual offsets at the two children of the new root. + // Everything below the right child is already zero, which is the + // correct offset for a uniform-padding subtree. + let old_root = new_seg[1]; + let new_min = old_root.min(self.padding); + new_seg[0] = new_min; + new_seg[1] = old_root - new_min; + new_seg[2] = self.padding - new_min; + + self.seg = new_seg; + self.size = new_size; + } + /// Sets leaf `i` to `val`. /// /// Time: O(log size). @@ -275,4 +363,104 @@ mod tests { t.set(0, 1); assert_eq!(t.rightmost_le_zero(), None); } + + #[test] + fn append_from_empty() { + let mut t = MinSegTree::new(&[], 1_000_000_000); + assert!(t.is_empty()); + assert_eq!(t.size(), 0); + t.append(5); + assert_eq!(t.len(), 1); + assert_eq!(t.size(), 1); + assert_eq!(t.rightmost_le_zero(), None); + t.append(-1); + assert_eq!(t.len(), 2); + assert_eq!(t.size(), 2); + assert_eq!(t.rightmost_le_zero(), Some(1)); + t.append(3); + assert_eq!(t.len(), 3); + assert_eq!(t.size(), 4); + assert_eq!(t.rightmost_le_zero(), Some(1)); + t.append(-2); + assert_eq!(t.len(), 4); + assert_eq!(t.size(), 4); + assert_eq!(t.rightmost_le_zero(), Some(3)); + t.append(0); + assert_eq!(t.len(), 5); + assert_eq!(t.size(), 8); + // Values now [5, -1, 3, -2, 0, pad, pad, pad]. + assert_eq!(t.rightmost_le_zero(), Some(4)); + } + + #[test] + fn append_preserves_existing_leaves_through_doubling() { + // After each doubling step the existing leaves must keep their values + // (and prior `suffix_add` drift) intact. + let mut t = MinSegTree::new(&[10, 20], 1_000_000_000); + assert_eq!(t.size(), 2); + t.suffix_add(0, -5); + // leaves now [5, 15]. No leaf <= 0 yet. + assert_eq!(t.rightmost_le_zero(), None); + t.append(30); + assert_eq!(t.len(), 3); + assert_eq!(t.size(), 4); + // Drive leaf 1 negative; leaves 0 and 2 stay positive. + t.set(1, -1); + assert_eq!(t.rightmost_le_zero(), Some(1)); + t.append(7); + assert_eq!(t.len(), 4); + assert_eq!(t.size(), 4); + assert_eq!(t.rightmost_le_zero(), Some(1)); + t.append(0); + // First leaf in the new (size-8) tree's right subtree. + assert_eq!(t.len(), 5); + assert_eq!(t.size(), 8); + // values: [5, -1, 30, 7, 0, pad, pad, pad]. + assert_eq!(t.rightmost_le_zero(), Some(4)); + } + + #[test] + fn append_matches_naive_under_random_ops() { + // Start empty and grow via `append`, mixed with `set`, `suffix_add`, + // and `rightmost_le_zero` queries. Cross-check against `Naive`. + let mut t = MinSegTree::new(&[], 1_000_000_000); + let mut naive = Naive::new(Vec::new()); + + let mut state: u64 = 0xcafe_f00d_dead_beef; + let mut next = || { + state = state + .wrapping_mul(6364136223846793005) + .wrapping_add(1442695040888963407); + state + }; + + for _ in 0..3_000 { + // Bias slightly toward `append` so the tree actually grows. + let op = next() % 4; + if op == 0 || t.is_empty() { + let v = (next() as i64) % 21 - 10; + t.append(v); + naive.values.push(v); + } else { + match op % 3 { + 0 => { + let i = (next() as usize) % t.len(); + let v = (next() as i64) % 21 - 10; + t.set(i, v); + naive.set(i, v); + } + 1 => { + let lo = (next() as usize) % (t.len() + 1); + let d = (next() as i64) % 9 - 4; + t.suffix_add(lo, d); + naive.suffix_add(lo, d); + } + _ => { + assert_eq!(t.rightmost_le_zero(), naive.rightmost_le_zero()); + } + } + } + } + assert_eq!(t.rightmost_le_zero(), naive.rightmost_le_zero()); + } } From 85f1d1aee89fd850d7b3af3a58158bd3a24ea181 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Thu, 21 May 2026 16:21:22 +0200 Subject: [PATCH 43/49] rename and use compact tree --- .../src/compact_min_seg_tree.rs | 1 - .../consistent-choose-k/src/fast_choose_k.rs | 12 +++---- .../consistent-choose-k/src/min_seg_tree.rs | 34 +++++++++---------- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/crates/consistent-choose-k/src/compact_min_seg_tree.rs b/crates/consistent-choose-k/src/compact_min_seg_tree.rs index 801e2e7..d5076a3 100644 --- a/crates/consistent-choose-k/src/compact_min_seg_tree.rs +++ b/crates/consistent-choose-k/src/compact_min_seg_tree.rs @@ -143,7 +143,6 @@ impl CompactMinSegTree { /// is no padding slot left to overwrite. /// /// Time: amortized O(log size); worst-case O(size) on the doubling step. - #[allow(dead_code)] pub fn push(&mut self, value: i64) { if self.len == self.size { self.grow(); diff --git a/crates/consistent-choose-k/src/fast_choose_k.rs b/crates/consistent-choose-k/src/fast_choose_k.rs index 39f6d24..d4ab87e 100644 --- a/crates/consistent-choose-k/src/fast_choose_k.rs +++ b/crates/consistent-choose-k/src/fast_choose_k.rs @@ -1,10 +1,10 @@ -use crate::min_seg_tree::MinSegTree; +use crate::compact_min_seg_tree::CompactMinSegTree; use crate::{ConsistentHasher, ManySeqBuilder}; /// "Block count" sentinel for slots that can never be selected (e.g. their /// sequence is exhausted). Chosen well above any realistic true count so that /// the lazy `-1` updates applied by `shrink_n` cannot drive it down to zero -/// in any reasonable workload, and so that padding leaves in [`MinSegTree`] +/// in any reasonable workload, and so that padding leaves in [`CompactMinSegTree`] /// are never selected. const C_INF: i64 = 1_000_000_000_000; @@ -63,7 +63,7 @@ pub struct ConsistentChooseKFastHasher { next: Vec>, /// Per-slot block counts `c[i]`. See struct-level docs for definition. /// Empty when `k == 0`. - tree: MinSegTree, + tree: CompactMinSegTree, } impl ConsistentChooseKFastHasher { @@ -76,7 +76,7 @@ impl ConsistentChooseKFastHasher { n, samples: Vec::new(), next: Vec::new(), - tree: MinSegTree::new(&[], C_INF), + tree: CompactMinSegTree::new(&[], C_INF), } } @@ -117,7 +117,7 @@ impl ConsistentChooseKFastHasher { }; c.push(ci); } - this.tree = MinSegTree::new(&c, C_INF); + this.tree = CompactMinSegTree::new(&c, C_INF); this } @@ -188,7 +188,7 @@ impl ConsistentChooseKFastHasher { Some(v) => (k - self.samples[..k].partition_point(|&s| s < v)) as i64, None => C_INF, }; - self.tree.append(c_k); + self.tree.push(c_k); } /// Decrements `n` to the current largest sample and replaces it with the diff --git a/crates/consistent-choose-k/src/min_seg_tree.rs b/crates/consistent-choose-k/src/min_seg_tree.rs index 2ea6a22..5fd1365 100644 --- a/crates/consistent-choose-k/src/min_seg_tree.rs +++ b/crates/consistent-choose-k/src/min_seg_tree.rs @@ -37,7 +37,7 @@ pub struct MinSegTree { /// accumulated from `suffix_add`). len: usize, /// Padding value used to fill leaves beyond `len`, including when the - /// tree grows via [`MinSegTree::append`]. + /// tree grows via [`MinSegTree::push`]. padding: i64, } @@ -105,7 +105,7 @@ impl MinSegTree { /// Doubles the physical capacity when full. /// /// Amortized time: O(log size). - pub fn append(&mut self, val: i64) { + pub fn push(&mut self, val: i64) { if self.len == self.size { self.double_capacity(); } @@ -125,7 +125,7 @@ impl MinSegTree { fn double_capacity(&mut self) { if self.size == 0 { // Empty tree: grow to a single-leaf tree holding the padding - // value. The caller (`append`) immediately overwrites it via + // value. The caller (`push`) immediately overwrites it via // `set(0, val)`. self.seg = vec![self.padding]; self.size = 1; @@ -365,27 +365,27 @@ mod tests { } #[test] - fn append_from_empty() { + fn push_from_empty() { let mut t = MinSegTree::new(&[], 1_000_000_000); assert!(t.is_empty()); assert_eq!(t.size(), 0); - t.append(5); + t.push(5); assert_eq!(t.len(), 1); assert_eq!(t.size(), 1); assert_eq!(t.rightmost_le_zero(), None); - t.append(-1); + t.push(-1); assert_eq!(t.len(), 2); assert_eq!(t.size(), 2); assert_eq!(t.rightmost_le_zero(), Some(1)); - t.append(3); + t.push(3); assert_eq!(t.len(), 3); assert_eq!(t.size(), 4); assert_eq!(t.rightmost_le_zero(), Some(1)); - t.append(-2); + t.push(-2); assert_eq!(t.len(), 4); assert_eq!(t.size(), 4); assert_eq!(t.rightmost_le_zero(), Some(3)); - t.append(0); + t.push(0); assert_eq!(t.len(), 5); assert_eq!(t.size(), 8); // Values now [5, -1, 3, -2, 0, pad, pad, pad]. @@ -393,7 +393,7 @@ mod tests { } #[test] - fn append_preserves_existing_leaves_through_doubling() { + fn push_preserves_existing_leaves_through_doubling() { // After each doubling step the existing leaves must keep their values // (and prior `suffix_add` drift) intact. let mut t = MinSegTree::new(&[10, 20], 1_000_000_000); @@ -401,17 +401,17 @@ mod tests { t.suffix_add(0, -5); // leaves now [5, 15]. No leaf <= 0 yet. assert_eq!(t.rightmost_le_zero(), None); - t.append(30); + t.push(30); assert_eq!(t.len(), 3); assert_eq!(t.size(), 4); // Drive leaf 1 negative; leaves 0 and 2 stay positive. t.set(1, -1); assert_eq!(t.rightmost_le_zero(), Some(1)); - t.append(7); + t.push(7); assert_eq!(t.len(), 4); assert_eq!(t.size(), 4); assert_eq!(t.rightmost_le_zero(), Some(1)); - t.append(0); + t.push(0); // First leaf in the new (size-8) tree's right subtree. assert_eq!(t.len(), 5); assert_eq!(t.size(), 8); @@ -420,8 +420,8 @@ mod tests { } #[test] - fn append_matches_naive_under_random_ops() { - // Start empty and grow via `append`, mixed with `set`, `suffix_add`, + fn push_matches_naive_under_random_ops() { + // Start empty and grow via `push`, mixed with `set`, `suffix_add`, // and `rightmost_le_zero` queries. Cross-check against `Naive`. let mut t = MinSegTree::new(&[], 1_000_000_000); let mut naive = Naive::new(Vec::new()); @@ -435,11 +435,11 @@ mod tests { }; for _ in 0..3_000 { - // Bias slightly toward `append` so the tree actually grows. + // Bias slightly toward `push` so the tree actually grows. let op = next() % 4; if op == 0 || t.is_empty() { let v = (next() as i64) % 21 - 10; - t.append(v); + t.push(v); naive.values.push(v); } else { match op % 3 { From 21524aafe188764285d4e40061cbe0a29042ebf6 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Thu, 21 May 2026 17:16:21 +0200 Subject: [PATCH 44/49] add benchmark results --- .../benchmarks/performance.rs | 2 +- .../src/compact_min_seg_tree.rs | 18 +++++ .../consistent-choose-k/src/fast_choose_k.rs | 65 ++++++++++++++++--- 3 files changed, 76 insertions(+), 9 deletions(-) diff --git a/crates/consistent-choose-k/benchmarks/performance.rs b/crates/consistent-choose-k/benchmarks/performance.rs index 0388099..5fff1f2 100644 --- a/crates/consistent-choose-k/benchmarks/performance.rs +++ b/crates/consistent-choose-k/benchmarks/performance.rs @@ -75,7 +75,7 @@ fn append_vs_new_with_k(c: &mut Criterion) { group.bench_function(BenchmarkId::new(format!("fast_append/k_{k}"), n), |b| { b.iter(|| { let h = DefaultHasher::default(); - let mut iter = ConsistentChooseKFastHasher::new(h, n + k); + let mut iter = ConsistentChooseKFastHasher::new_with_capacity(h, n + k, k); for _ in 0..k { black_box(iter.grow_k()); } diff --git a/crates/consistent-choose-k/src/compact_min_seg_tree.rs b/crates/consistent-choose-k/src/compact_min_seg_tree.rs index d5076a3..6617266 100644 --- a/crates/consistent-choose-k/src/compact_min_seg_tree.rs +++ b/crates/consistent-choose-k/src/compact_min_seg_tree.rs @@ -127,6 +127,24 @@ impl CompactMinSegTree { } } + /// Builds an empty tree, preallocating the underlying `val` buffer to + /// fit up to `capacity` leaves without reallocation. + /// + /// Time: O(1). + pub fn with_capacity(capacity: usize, padding: i64) -> Self { + let cap = if capacity == 0 { + 0 + } else { + capacity.next_power_of_two() + }; + Self { + val: Vec::with_capacity(cap), + size: 0, + len: 0, + padding, + } + } + /// Number of real leaves appended so far (excluding padding slots). #[allow(dead_code)] pub fn len(&self) -> usize { diff --git a/crates/consistent-choose-k/src/fast_choose_k.rs b/crates/consistent-choose-k/src/fast_choose_k.rs index d4ab87e..c29f15c 100644 --- a/crates/consistent-choose-k/src/fast_choose_k.rs +++ b/crates/consistent-choose-k/src/fast_choose_k.rs @@ -11,6 +11,51 @@ const C_INF: i64 = 1_000_000_000_000; /// Fast variant of [`crate::ConsistentChooseKHasher`] specialized for /// repeated `shrink_n` calls at fixed `k`. /// +/// # Benchmarks +/// +/// Measured on the `performance` benchmark suite (release, M-series macOS). +/// "std" = [`crate::ConsistentChooseKHasher`], "fast" = this type. Lower is +/// faster; the bold column is the fast/standard wall-clock ratio. +/// +/// **Replay `shrink_n` from a fresh `new_with_k` down to `n == k`** (the +/// headline use case — total time over `n - k` shrinks): +/// +/// | n | k | std | fast | speedup | +/// |--------:|----:|----------:|---------:|--------:| +/// | 100 | 2 | 218 ns | 237 ns | 0.9× | +/// | 100 | 3 | 428 ns | 371 ns | 1.2× | +/// | 100 | 10 | 3.27 µs | 1.61 µs | 2.0× | +/// | 100 | 100 | 100 µs | 25.0 µs | 4.0× | +/// | 1 000 | 100 | 357 µs | 54.8 µs | 6.5× | +/// | 10 000 | 100 | 688 µs | 89.9 µs | 7.7× | +/// | 100 000 | 100 | 1 074 µs | 131 µs | 8.2× | +/// +/// **`grow_k` cascade** (append `k` items into a freshly preallocated +/// hasher, `n = 10 000`): +/// +/// | k | std | fast | speedup | +/// |----:|----------:|---------:|--------:| +/// | 2 | 48.5 ns | 153 ns | 0.3× | +/// | 3 | 71.1 ns | 224 ns | 0.3× | +/// | 10 | 1.10 µs | 1.02 µs | 1.1× | +/// | 100 | 79.6 µs | 15.6 µs | 5.1× | +/// +/// **`new_with_k`** (bulk construction, `n = 10 000`): +/// +/// | k | std | fast | speedup | +/// |----:|----------:|---------:|--------:| +/// | 2 | 54.7 ns | 170 ns | 0.3× | +/// | 3 | 77.4 ns | 244 ns | 0.3× | +/// | 10 | 477 ns | 835 ns | 0.6× | +/// | 100 | 8.47 µs | 12.0 µs | 0.7× | +/// +/// The fast hasher pays a constant per-call overhead (segment tree + `next` +/// table) which dominates at small `k`. The asymptotic improvement is in +/// repeated mutation (`shrink_n`, `grow_k` cascades) at large `k`: the +/// standard hasher does `O(k)` `get_sample` calls and a scan per shrink, +/// while this variant does `O(1)` `get_sample` calls plus `O(log k)` +/// segment-tree work (amortized, plus rare binary-search corrections). +/// /// # Invariants /// /// For each position `i` in `0..k`: @@ -47,11 +92,6 @@ const C_INF: i64 = 1_000_000_000_000; /// * **Downward stale** (`next[i] <= samples[i - 1]`): the lazy `-1` made /// `c[i]` look unblocked but `next[i]` would actually fit at some position /// `< i`. We correct `c[i]` upward via a binary search over `samples[..i]`. -/// -/// The standard `shrink_n` performs roughly `k / 2` `get_sample` calls on -/// average; this variant performs `O(1)` `get_sample` calls per `shrink_n` -/// plus `O(log k)` segment-tree work (amortized, plus the binary-search -/// corrections). pub struct ConsistentChooseKFastHasher { builder: H, n: usize, @@ -71,12 +111,21 @@ impl ConsistentChooseKFastHasher { /// /// Time: O(1) pub fn new(builder: H, n: usize) -> Self { + Self::new_with_capacity(builder, n, 0) + } + + /// Create a new instance for `n` nodes with `k = 0` samples, preallocating + /// enough space in `samples` and `next` to grow to `capacity` samples + /// without reallocating. The segment tree still grows lazily via doubling. + /// + /// Time: O(1) + pub fn new_with_capacity(builder: H, n: usize, capacity: usize) -> Self { Self { builder, n, - samples: Vec::new(), - next: Vec::new(), - tree: CompactMinSegTree::new(&[], C_INF), + samples: Vec::with_capacity(capacity), + next: Vec::with_capacity(capacity), + tree: CompactMinSegTree::with_capacity(capacity, C_INF), } } From c4c37fb35760ca78fe754516be58892b2e5c5775 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 22 May 2026 11:25:19 +0200 Subject: [PATCH 45/49] reservoir sampling version! --- .../benchmarks/performance.rs | 121 ++- .../src/consistent_hash.rs | 25 +- crates/consistent-choose-k/src/fast_grow_n.rs | 180 +++++ crates/consistent-choose-k/src/lib.rs | 6 + .../src/live_min_seg_tree.rs | 592 ++++++++++++++ .../consistent-choose-k/src/sample_treap.rs | 737 ++++++++++++++++++ 6 files changed, 1658 insertions(+), 3 deletions(-) create mode 100644 crates/consistent-choose-k/src/fast_grow_n.rs create mode 100644 crates/consistent-choose-k/src/live_min_seg_tree.rs create mode 100644 crates/consistent-choose-k/src/sample_treap.rs diff --git a/crates/consistent-choose-k/benchmarks/performance.rs b/crates/consistent-choose-k/benchmarks/performance.rs index 5fff1f2..59739e8 100644 --- a/crates/consistent-choose-k/benchmarks/performance.rs +++ b/crates/consistent-choose-k/benchmarks/performance.rs @@ -5,7 +5,8 @@ use std::{ }; use consistent_choose_k::{ - ConsistentChooseKFastHasher, ConsistentChooseKHasher, ConsistentHasher, + ConsistentChooseKFastGrowHasher, ConsistentChooseKFastHasher, ConsistentChooseKHasher, + ConsistentHasher, __bench_internals::{CompactMinSegTree, MinSegTree}, }; use criterion::{ @@ -127,6 +128,122 @@ fn shrink_n(c: &mut Criterion) { group.finish(); } +/// Reservoir-sampling comparison for `ConsistentChooseKFastGrowHasher`. +/// +/// All variants ingest a stream of `n` items and maintain a `k`-sized +/// sample throughout. The point is to see whether fast-grow's `O(log k)` +/// per-displacement work is competitive against the standard reservoir +/// sampling algorithms: +/// +/// * `fast_grow` — call `grow_n()` until `n` events have been processed. +/// * `reservoir_r` — Algorithm R (Vitter, 1985). Visits every item, does +/// one PRNG step + one comparison per item. `O(n)` total. +/// * `reservoir_l` — Algorithm L (Li, 1994). Skip-optimized: only +/// `O(k * log(n/k))` items are inspected, so it should be the closest +/// competitor to incremental hash-based grow. +/// +/// All three produce a `k`-sample of `0..n`. Output distributions differ +/// (deterministic hash vs. uniform random) but the throughput question is +/// fair: per-item work cost as the stream grows. +fn grow_n(c: &mut Criterion) { + let mut group = c.benchmark_group("grow_n"); + group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); + for k in [100usize, 1000] { + for n in [10_000usize, 100_000, 1_000_000] { + if n <= k { + continue; + } + group.throughput(Throughput::Elements(n as u64)); + + group.bench_function(BenchmarkId::new(format!("fast_grow/k_{k}"), n), |b| { + b.iter(|| { + let h = DefaultHasher::default(); + let mut iter = ConsistentChooseKFastGrowHasher::new(h, k); + while iter.n() < n { + black_box(iter.grow_n()); + } + black_box(iter); + }) + }); + + group.bench_function(BenchmarkId::new(format!("reservoir_r/k_{k}"), n), |b| { + b.iter(|| { + black_box(reservoir_r(n, k, 0x9E37_79B9_7F4A_7C15)); + }) + }); + + group.bench_function(BenchmarkId::new(format!("reservoir_l/k_{k}"), n), |b| { + b.iter(|| { + black_box(reservoir_l(n, k, 0x9E37_79B9_7F4A_7C15)); + }) + }); + } + } + group.finish(); +} + +/// SplitMix64 step. Cheap PRNG suitable for benchmark-grade randomness. +#[inline(always)] +fn splitmix64(state: &mut u64) -> u64 { + *state = state.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = *state; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^ (z >> 31) +} + +#[inline(always)] +fn next_f64(state: &mut u64) -> f64 { + // Top 53 bits → uniform in `[0, 1)`. + (splitmix64(state) >> 11) as f64 * (1.0 / (1u64 << 53) as f64) +} + +/// Algorithm R (Vitter 1985): linear-scan reservoir sampling. +fn reservoir_r(n: usize, k: usize, seed: u64) -> Vec { + let mut samples: Vec = (0..k).collect(); + let mut state = seed; + for i in k..n { + // Uniform integer in `0..=i` via the 64×64→128 multiply trick. + let r = splitmix64(&mut state); + let j = ((r as u128 * (i as u128 + 1)) >> 64) as usize; + if j < k { + samples[j] = i; + } + } + samples +} + +/// Algorithm L (Li 1994): skip-optimized reservoir sampling. Total work +/// is `O(k + k * log(n / k))` regardless of `n`. +fn reservoir_l(n: usize, k: usize, seed: u64) -> Vec { + let mut samples: Vec = (0..k).collect(); + let mut state = seed; + let inv_k = 1.0 / k as f64; + // `w` shrinks geometrically; each step's skip distance is drawn from a + // geometric distribution parameterised by `w`. + let mut w = (next_f64(&mut state).ln() * inv_k).exp(); + let mut i = k - 1; + loop { + // Skip `floor(ln(U) / ln(1 - w))` items, then visit the next one. + let log_u = next_f64(&mut state).ln(); + let log_1mw = (1.0 - w).ln(); + let skip = (log_u / log_1mw) as usize + 1; + i = match i.checked_add(skip) { + Some(v) => v, + None => break, + }; + if i >= n { + break; + } + // Replace a uniformly chosen slot with the freshly visited item. + let r = splitmix64(&mut state); + let j = ((r as u128 * k as u128) >> 64) as usize; + samples[j] = i; + w *= (next_f64(&mut state).ln() * inv_k).exp(); + } + samples +} + /// Workload that mimics the segment-tree usage pattern inside `shrink_n`: /// repeatedly find the right-most non-positive leaf, set it to a value, /// and shift a suffix. @@ -203,6 +320,6 @@ criterion_group!( .measurement_time(Duration::from_millis(4000)) .nresamples(1000); - targets = throughput_benchmark, append_vs_new_with_k, shrink_n, seg_tree_compare, + targets = throughput_benchmark, append_vs_new_with_k, shrink_n, grow_n, seg_tree_compare, ); criterion_main!(benches); diff --git a/crates/consistent-choose-k/src/consistent_hash.rs b/crates/consistent-choose-k/src/consistent_hash.rs index 0a44cde..6edd585 100644 --- a/crates/consistent-choose-k/src/consistent_hash.rs +++ b/crates/consistent-choose-k/src/consistent_hash.rs @@ -165,8 +165,13 @@ pub struct ConsistentHashIterator { impl ConsistentHashIterator { pub fn new(n: usize, builder: H) -> Self { + // Buckets are indexed by the bit value `b` (a power of two) and + // cover the range `[b, 2*b)`. We want every bucket whose upper + // bound exceeds `n`, i.e. `2*b > n`, i.e. + // `b >= next_power_of_two(n/2 + 1)`. + let bit_min = (n / 2 + 1).next_power_of_two() as u64; Self { - bits: builder.bit_mask() & !((n + 2).next_power_of_two() as u64 / 2 - 1), + bits: builder.bit_mask() & !(bit_min - 1), stack: if n == 0 { vec![0] } else { vec![] }, builder, n, @@ -271,4 +276,22 @@ mod tests { vec![7577, 7541, 7538, 7822, 7763, 7687, 7718, 7723, 7846, 7723, 7688, 7716, 7658] ); } + + /// Regression test for an off-by-one in `ConsistentHashIterator::new`'s + /// bucket-mask construction: starting the iterator at `n` must not skip + /// over a value `>= n` that the full iteration from 0 would visit. + #[test] + fn test_iterator_start_includes_n() { + let builder = hasher_for_key(1).seq_builder(3); + let from_zero: Vec = ConsistentHashIterator::new(0, builder.clone()) + .take(10) + .collect(); + for &v in &from_zero { + assert_eq!( + ConsistentHashIterator::new(v, builder.clone()).next(), + Some(v), + "iterator starting at {v} must yield {v} as the first value (full sequence: {from_zero:?})", + ); + } + } } diff --git a/crates/consistent-choose-k/src/fast_grow_n.rs b/crates/consistent-choose-k/src/fast_grow_n.rs new file mode 100644 index 0000000..8e374e5 --- /dev/null +++ b/crates/consistent-choose-k/src/fast_grow_n.rs @@ -0,0 +1,180 @@ +//! Fast variant of [`crate::ConsistentChooseKHasher`] specialized for +//! repeated `grow_n` calls at fixed `k`. +//! +//! Companion to [`crate::ConsistentChooseKFastHasher`] (which is fast at +//! `shrink_n` for fixed `k`). The two specializations cannot easily share +//! a single representation: `shrink_n` keeps `samples` sorted by value and +//! tracks per-position "block counts" for each *position-bound* sequence +//! id; `grow_n` keeps `samples` in insertion order and tracks per-sample +//! "life" = `seq_id - position`. +//! +//! # Algorithm sketch +//! +//! State: +//! * `next_heap`: min-heap of `(next_candidate_sample, seq_id)` — one +//! entry per active sequence, keyed by the sequence's smallest sample +//! strictly greater than its currently-selected sample (or its first +//! sample, if it has none yet). +//! * `samples`: a [`SampleTreap`] holding `(sample, life)` pairs in +//! insertion order, where `life = seq_id - position`. Once `k` samples +//! are present, an entry whose `life <= 0` is the *displaced* sample +//! that must be evicted on the next firing. +//! +//! `grow_n` (semantics: `n += 1`): +//! +//! 1. While the heap's smallest `next_candidate < n_new` (in practice +//! zero or one iteration, since `n` grows by one): +//! 1. Pop `(s, seq_id)`. +//! 2. Push the next candidate for `seq_id` (smallest sample > `s`) +//! back into the heap. +//! 3. Append `(s, life = seq_id - new_position)` at the end of the +//! treap via `push_back`. +//! 4. If the treap now has more than `k` entries (i.e. we displaced +//! a sample), find the rightmost position with `life <= 0` via +//! `find_rightmost_le_zero`, `remove_at` it, and apply +//! `add_life_suffix(p_dead, +1)` so the remaining entries (which +//! shifted left by one) see their `life` increase by one. +//! 2. Set `n = n_new`. +//! +//! Per-call cost: O(log k) expected (heap pop/push + treap ops). + +use std::cmp::Reverse; +use std::collections::BinaryHeap; + +use crate::consistent_hash::ConsistentHashIterator; +use crate::sample_treap::SampleTreap; +use crate::{ConsistentHasher, ManySeqBuilder}; + +/// Fast variant of [`crate::ConsistentChooseKHasher`] specialized for +/// repeated `grow_n` calls at fixed `k`. See module-level documentation +/// for the algorithm. +pub struct ConsistentChooseKFastGrowHasher { + builder: H, + /// Current universe size. + n: usize, + /// Fixed sample count (number of sequences tracked in `next_heap`). + k: usize, + /// Min-heap of `(next_candidate_sample, seq_id)` keyed by the first + /// component. A `None` next-candidate (sequence exhausted) is *not* + /// pushed back; the heap shrinks instead. + next_heap: BinaryHeap>, + /// Currently-selected samples in insertion order. Each entry's `life` + /// is `seq_id - position`; an entry with `life <= 0` is displaced and + /// will be evicted on the next firing. + samples: SampleTreap, +} + +impl ConsistentChooseKFastGrowHasher +where + H::Builder: Clone, +{ + /// Create a new instance for `k` sequences with `n = 0`. Seeds the + /// heap with each sequence's first sample; the sample treap is empty + /// until `grow_n` is called enough times for samples to fire. + /// + /// Time: O(k). + pub fn new(builder: H, k: usize) -> Self { + let mut next_heap = BinaryHeap::with_capacity(k); + let mut life = vec![0; k]; + for seq in 0..k { + for l in ConsistentHashIterator::new(0, builder.seq_builder(seq)) { + let sample = l + seq; + if sample >= k { + next_heap.push(Reverse((sample, seq))); + break; + } + life[sample] = l.max(life[sample]); + } + } + let mut samples = SampleTreap::with_capacity(k); + for (sample, life) in life.into_iter().enumerate() { + samples.push_back(sample, life as i32); + } + + Self { + builder, + n: k, + k, + next_heap, + samples, + } + } + + /// Current universe size. + pub fn n(&self) -> usize { + self.n + } + + /// Target sample count (fixed at construction). + pub fn k(&self) -> usize { + self.k + } + + /// Returns the currently-selected samples sorted by value. + pub fn samples(&self) -> Vec { + self.samples.samples() + } + + /// Grow `n` by one and update the choose-k set accordingly. Returns + /// `Some(new_sample)` if a sequence fired (i.e. some sample changed), + /// `None` otherwise. + /// + /// Time: O(log k) expected. + pub fn grow_n(&mut self) -> Option { + loop { + let Reverse((next, seq)) = self + .next_heap + .pop() + .expect("there are always k elements in the heap!"); + let after = ConsistentHasher::new(self.builder.seq_builder(seq)) + .next(self.n.max(next + 1) - seq) + .expect("") + + seq; + self.next_heap.push(Reverse((after, seq))); + if next >= self.n { + self.n = next + 1; + let pos = self + .samples + .find_rightmost_le_zero() + .expect("there must be a displaced sample to evict"); + self.samples.remove_at_decrementing_suffix(pos); + self.samples.push_back(next, self.k as i32 - seq as i32 - 1); + break Some(next); + } + } + } +} + +#[cfg(test)] +mod tests { + use std::hash::{DefaultHasher, Hash}; + + use super::*; + use crate::ConsistentChooseKHasher; + + fn hasher_for_key(key: u64) -> DefaultHasher { + let mut h = DefaultHasher::default(); + key.hash(&mut h); + h + } + + #[test] + fn grow_n_matches_new_with_k() { + for key in 0..200 { + for k in 1..10 { + let mut fast = ConsistentChooseKFastGrowHasher::new(hasher_for_key(key), k); + while fast.n() < 10000 { + let n = fast.n(); + let std = ConsistentChooseKHasher::new_with_k(hasher_for_key(key), n, k); + let mut expected: Vec = std.samples().to_vec(); + expected.sort(); + let mut got = fast.samples(); + got.sort(); + assert_eq!(got, expected, "key={key}, k={k}, n={n}"); + fast.grow_n(); + } + } + } + } + +} diff --git a/crates/consistent-choose-k/src/lib.rs b/crates/consistent-choose-k/src/lib.rs index c93a151..839def6 100644 --- a/crates/consistent-choose-k/src/lib.rs +++ b/crates/consistent-choose-k/src/lib.rs @@ -2,19 +2,25 @@ mod choose_k; mod compact_min_seg_tree; mod consistent_hash; mod fast_choose_k; +mod fast_grow_n; +mod live_min_seg_tree; mod min_seg_tree; mod node_map; +mod sample_treap; pub use choose_k::ConsistentChooseKHasher; pub use consistent_hash::{ ConsistentHashIterator, ConsistentHashRevIterator, ConsistentHasher, HashSeqBuilder, HashSequence, ManySeqBuilder, }; pub use fast_choose_k::ConsistentChooseKFastHasher; +pub use fast_grow_n::ConsistentChooseKFastGrowHasher; pub use node_map::ConsistentNodeMap; #[doc(hidden)] #[cfg(feature = "__bench_internals")] pub mod __bench_internals { pub use crate::compact_min_seg_tree::CompactMinSegTree; + pub use crate::live_min_seg_tree::LiveMinSegTree; pub use crate::min_seg_tree::MinSegTree; + pub use crate::sample_treap::SampleTreap; } diff --git a/crates/consistent-choose-k/src/live_min_seg_tree.rs b/crates/consistent-choose-k/src/live_min_seg_tree.rs new file mode 100644 index 0000000..a20af85 --- /dev/null +++ b/crates/consistent-choose-k/src/live_min_seg_tree.rs @@ -0,0 +1,592 @@ +//! A compact min segment tree with order-statistics support over LIVE +//! leaves only, plus O(log size) tombstoning of arbitrary live leaves. +//! +//! Same compact (pair-heap) packing as +//! [`crate::compact_min_seg_tree::CompactMinSegTree`] for the life-min tree, +//! and a parallel per-pair "alive count" array tracking the number of live +//! leaves in each pair's LEFT subtree. Memory: 12 bytes per leaf +//! (`i64` for the life offset, `u32` for the alive count). +//! +//! All public leaf indices are LOGICAL (= rank among live leaves). The +//! structure internally maps logical -> physical leaf using `alive` for +//! rank-select during descent. +//! +//! Tombstoned leaves are simply set to `padding` (which must be `>` every +//! real life value, so they are never selected by [`Self::find_dead`]) and +//! their entry in the `alive` array is decremented along the path to the +//! root. The caller is responsible for compacting (rebuilding) the tree +//! when the dead-leaf fraction grows too large. + +const SIDE_BIT: i64 = i64::MIN; +const OFFSET_MASK: i64 = i64::MAX; + +/// See module docs. +pub struct LiveMinSegTree { + /// Pair-heap of life offsets. Same layout as `CompactMinSegTree::val`. + val: Vec, + /// Pair-heap of alive counts. + /// + /// * `alive[0]` is the total number of live leaves (mirrors `val[0]` + /// being the global min). + /// * For `p` in `1..size`, `alive[p]` is the number of live leaves in + /// the LEFT subtree of the pair whose offset is stored in `val[p]`. + /// The right subtree's alive count is `parent_total - alive[p]`. + alive: Vec, + /// Power-of-two physical capacity (number of leaves, real + tombstone), + /// or `0` when the tree is empty. + size: usize, + /// Number of leaves ever pushed (i.e. number of physical slots in use, + /// counting both live leaves and tombstones). + physical_len: usize, + /// Number of currently live (non-tombstoned) leaves. + n_live: usize, + /// Padding value used for unused slots and tombstones. Must be `>` every + /// real life value the caller will ever push. + padding: i64, +} + +impl LiveMinSegTree { + /// Builds an empty tree, preallocating the underlying buffers to fit up + /// to `capacity` leaves without reallocation. + /// + /// Time: O(1). + pub fn with_capacity(capacity: usize, padding: i64) -> Self { + let cap = if capacity == 0 { + 0 + } else { + capacity.next_power_of_two() + }; + Self { + val: Vec::with_capacity(cap), + alive: Vec::with_capacity(cap), + size: 0, + physical_len: 0, + n_live: 0, + padding, + } + } + + /// Number of live leaves currently in the tree. + pub fn len(&self) -> usize { + self.n_live + } + + /// True iff no live leaves are stored. + #[allow(dead_code)] + pub fn is_empty(&self) -> bool { + self.n_live == 0 + } + + /// Number of physical leaves (live + tombstoned). Useful for caller-side + /// compaction heuristics. + #[allow(dead_code)] + pub fn physical_len(&self) -> usize { + self.physical_len + } + + /// Number of tombstoned leaves. + #[allow(dead_code)] + pub fn n_dead(&self) -> usize { + self.physical_len - self.n_live + } + + /// Physical capacity (power of two, or `0` when empty). + #[allow(dead_code)] + pub fn size(&self) -> usize { + self.size + } + + /// Appends `life` as a new live leaf at the rightmost physical slot. + /// The new leaf's logical index is `len() - 1` after this call. + /// + /// Time: amortized O(log size); worst-case O(size) on the doubling step. + pub fn push(&mut self, life: i64) { + if self.physical_len == self.size { + self.grow(); + } + let p_phys = self.physical_len; + self.physical_len = p_phys + 1; + self.n_live += 1; + self.set_physical(p_phys, life); + self.bump_alive_path(p_phys, 1); + } + + /// Overwrites the life of the live leaf at logical position `p`. + /// + /// Time: O(log size). + #[allow(dead_code)] + pub fn set(&mut self, p: usize, life: i64) { + debug_assert!(p < self.n_live, "logical index out of range"); + let p_phys = self.physical_at_rank(p); + self.set_physical(p_phys, life); + } + + /// Returns the logical index of the rightmost live leaf whose life is + /// `<= 0`, or `None` if no such leaf exists. + /// + /// Time: O(log size). + pub fn find_dead(&self) -> Option { + if self.size == 0 || self.n_live == 0 || self.val[0] > 0 { + return None; + } + if self.size == 1 { + // Single leaf, which is live (n_live > 0) and val[0] <= 0. + return Some(0); + } + let mut acc = self.val[0]; + let mut pair_idx = 1; + let mut logical = 0u32; + let half = self.size / 2; + while pair_idx < half { + let (l_off, r_off) = unpack(self.val[pair_idx]); + let r_min = acc + r_off; + if r_min <= 0 { + // Skip the entire left subtree → it contributes alive[pair_idx] + // live leaves to the left of our target. + logical += self.alive[pair_idx]; + acc = r_min; + pair_idx = 2 * pair_idx + 1; + } else { + acc += l_off; + pair_idx *= 2; + } + } + // Leaf pair. Tombstones sit at `>= padding > 0`, so any leaf with + // value `<= 0` is necessarily live; no need to consult `alive`. + let (_, r_off) = unpack(self.val[pair_idx]); + if acc + r_off <= 0 { + // Right leaf is dead; skip the left leaf's alive bit. + logical += self.alive[pair_idx]; + Some(logical as usize) + } else { + Some(logical as usize) + } + } + + /// Tombstones the live leaf at logical position `p`. + /// + /// After this call: + /// * `len()` decreases by 1, `n_dead()` increases by 1. + /// * Every live leaf strictly to the right of the killed slot has its + /// life incremented by 1 (matching the "shift-left" semantics where + /// each surviving leaf inherits the rank of its right neighbour). + /// + /// Time: O(log size). + pub fn kill(&mut self, p: usize) { + debug_assert!(p < self.n_live, "logical index out of range"); + let p_phys = self.physical_at_rank(p); + // Tombstone the leaf and bump all physically-later leaves (real or + // tombstone) by +1. Tombstones sit at `>= padding`, so they remain + // unselectable. + self.set_physical(p_phys, self.padding); + self.suffix_add(p_phys + 1, 1); + self.bump_alive_path(p_phys, -1); + self.n_live -= 1; + } + + // ---- Internal helpers (all in physical-leaf space) ------------------ + + /// Doubles `size`, preserving the existing tree as the left subtree of a + /// fresh root and filling the right subtree with all-padding leaves. + /// The `alive` array is grown in lockstep with all-zero right subtree. + fn grow(&mut self) { + if self.size == 0 { + self.val.push(self.padding); + self.alive.push(0); + self.size = 1; + return; + } + let old_size = self.size; + let new_size = old_size * 2; + let old_root_min = self.val[0]; + let old_root_alive = self.alive[0]; + self.val.resize(new_size, 0); + self.alive.resize(new_size, 0); + // Shift each level of the pair-heap one step deeper. Process from + // the deepest level upward; each step is a contiguous slice copy + // followed by zeroing the now-vacated source. + let mut level = old_size / 2; + while level > 0 { + self.val.copy_within(level..2 * level, 2 * level); + self.val[level..2 * level].fill(0); + self.alive.copy_within(level..2 * level, 2 * level); + self.alive[level..2 * level].fill(0); + level /= 2; + } + // Right subtree is all-padding (life) and all-dead (alive=0). The + // new root pair's right child reaches that padding subtree (min = + // padding, alive = 0); its left child reaches the relocated old + // root (min = old_root_min, alive = old_root_alive). + let r_off = self.padding - old_root_min; + debug_assert!(r_off >= 0, "padding must be >= every real leaf"); + self.val[1] = SIDE_BIT | r_off; + self.alive[1] = old_root_alive; + self.size = new_size; + } + + /// Translates a logical rank to a physical leaf index. + fn physical_at_rank(&self, mut rank: usize) -> usize { + debug_assert!(rank < self.n_live); + if self.size == 1 { + return 0; + } + let mut pair_idx = 1; + let half = self.size / 2; + while pair_idx < half { + let left_alive = self.alive[pair_idx] as usize; + if rank < left_alive { + pair_idx *= 2; + } else { + rank -= left_alive; + pair_idx = 2 * pair_idx + 1; + } + } + // Leaf pair: `alive[pair_idx]` is 0 or 1 (alive bit of the left leaf). + let left_alive = self.alive[pair_idx] as usize; + let p_lo = 2 * pair_idx - self.size; + if rank < left_alive { p_lo } else { p_lo + 1 } + } + + /// Sets physical leaf `i` to `val` (mirrors `CompactMinSegTree::set`). + fn set_physical(&mut self, i: usize, val: i64) { + debug_assert!(i < self.size, "leaf index out of range"); + if self.size == 1 { + self.val[0] = val; + return; + } + let depth = self.size.trailing_zeros(); + let mut acc = self.val[0]; + let mut p = 1; + for d in (1..depth).rev() { + let bit = (i >> d) & 1; + let packed = self.val[p]; + if (packed < 0) == (bit == 1) { + acc += packed & OFFSET_MASK; + } + p = 2 * p + bit; + } + let leaf_is_right = i & 1 == 1; + let packed = self.val[p]; + let other_off = if (packed < 0) == leaf_is_right { + 0 + } else { + packed & OFFSET_MASK + }; + let leaf_off = val - acc; + let (pushed, new_packed) = pack_with_side(leaf_off, other_off, leaf_is_right); + self.val[p] = new_packed; + self.bubble_up_set(p, pushed); + } + + /// Adds `delta` to every physical leaf in `[lo, size)` (mirrors + /// `CompactMinSegTree::suffix_add`). + fn suffix_add(&mut self, lo: usize, delta: i64) { + if lo >= self.size || delta == 0 { + return; + } + if self.size == 1 { + self.val[0] += delta; + return; + } + let p = self.size / 2 + lo / 2; + let leaf_is_right = lo & 1 == 1; + let packed = self.val[p]; + let (mut l_off, mut r_off) = unpack(packed); + if !leaf_is_right { + l_off += delta; + } + r_off += delta; + let (pushed, new_packed) = pack(l_off, r_off); + self.val[p] = new_packed; + self.bubble_up_suffix(p, pushed, delta); + } + + /// Adjusts the `alive` array along the path from physical leaf `p_phys` + /// to the root by `delta` (typically `+1` for `push`, `-1` for `kill`). + /// `alive[parent]` is bumped iff we ascend from the LEFT child. + fn bump_alive_path(&mut self, p_phys: usize, delta: i32) { + if self.size == 1 { + apply_delta(&mut self.alive[0], delta); + return; + } + let mut pair_idx = self.size / 2 + p_phys / 2; + // Leaf pair: bump alive[pair_idx] iff the leaf is on the left side + // (i.e. p_phys is even). The leaf-pair entry stores the left leaf's + // alive bit only. + if p_phys & 1 == 0 { + apply_delta(&mut self.alive[pair_idx], delta); + } + // Walk up the pair-heap. + while pair_idx > 1 { + let parent = pair_idx / 2; + let from_left = pair_idx & 1 == 0; + if from_left { + apply_delta(&mut self.alive[parent], delta); + } + pair_idx = parent; + } + // Root total. + apply_delta(&mut self.alive[0], delta); + } + + fn bubble_up_set(&mut self, mut p: usize, mut pushed: i64) { + while p > 1 && pushed != 0 { + let parent = p / 2; + let from_right = p & 1 == 1; + let packed = self.val[parent]; + let old_off = packed & OFFSET_MASK; + let from_side_is_heavy = (packed < 0) == from_right; + let from_off = if from_side_is_heavy { old_off } else { 0 } + pushed; + let other_off = if from_side_is_heavy { 0 } else { old_off }; + let (m, new_packed) = pack_with_side(from_off, other_off, from_right); + self.val[parent] = new_packed; + pushed = m; + p = parent; + } + self.val[0] += pushed; + } + + fn bubble_up_suffix(&mut self, mut p: usize, mut pushed: i64, right_bump: i64) { + while p > 1 { + let parent = p / 2; + let from_right = p & 1 == 1; + let packed = self.val[parent]; + let (mut l_off, mut r_off) = unpack(packed); + if from_right { + r_off += pushed; + } else { + l_off += pushed; + r_off += right_bump; + } + let (m, new_packed) = pack(l_off, r_off); + self.val[parent] = new_packed; + pushed = m; + p = parent; + } + self.val[0] += pushed; + } +} + +#[inline(always)] +fn apply_delta(slot: &mut u32, delta: i32) { + if delta >= 0 { + *slot += delta as u32; + } else { + *slot -= (-delta) as u32; + } +} + +#[inline(always)] +fn unpack(packed: i64) -> (i64, i64) { + let off = packed & OFFSET_MASK; + if packed < 0 { (0, off) } else { (off, 0) } +} + +#[inline(always)] +fn pack(l: i64, r: i64) -> (i64, i64) { + let m = l.min(r); + let diff = l - r; + let off = diff.unsigned_abs() as i64; + let side = ((diff as u64) >> 63 << 63) as i64; + (m, off | side) +} + +#[inline(always)] +fn pack_with_side(from_off: i64, other_off: i64, from_side_is_right: bool) -> (i64, i64) { + if from_side_is_right { + pack(other_off, from_off) + } else { + pack(from_off, other_off) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const PAD: i64 = 1_000_000_000; + + fn lcg_rng() -> impl FnMut() -> u64 { + let mut state: u64 = 0xC0FFEE_DEADBEEF; + move || { + state = state + .wrapping_mul(6364136223846793005) + .wrapping_add(1442695040888963407); + state + } + } + + /// Naive reference: stores the live lives in their logical order. + struct Naive { + lives: Vec, + } + impl Naive { + fn new() -> Self { + Self { lives: Vec::new() } + } + fn push(&mut self, life: i64) { + self.lives.push(life); + } + fn find_dead(&self) -> Option { + self.lives.iter().rposition(|&v| v <= 0) + } + fn kill(&mut self, p: usize) { + self.lives.remove(p); + for v in &mut self.lives[p..] { + *v += 1; + } + } + fn set(&mut self, p: usize, life: i64) { + self.lives[p] = life; + } + fn len(&self) -> usize { + self.lives.len() + } + } + + #[test] + fn empty() { + let t = LiveMinSegTree::with_capacity(0, PAD); + assert_eq!(t.len(), 0); + assert!(t.is_empty()); + assert_eq!(t.find_dead(), None); + assert_eq!(t.physical_len(), 0); + assert_eq!(t.n_dead(), 0); + } + + #[test] + fn single_leaf() { + let mut t = LiveMinSegTree::with_capacity(1, PAD); + t.push(5); + assert_eq!(t.len(), 1); + assert_eq!(t.find_dead(), None); + t.set(0, -1); + assert_eq!(t.find_dead(), Some(0)); + t.kill(0); + assert_eq!(t.len(), 0); + assert_eq!(t.n_dead(), 1); + assert_eq!(t.find_dead(), None); + } + + #[test] + fn push_then_find_dead_basic() { + let mut t = LiveMinSegTree::with_capacity(4, PAD); + // Push 4 lives; none <= 0. + for &v in &[3i64, 7, 5, 2] { + t.push(v); + } + assert_eq!(t.len(), 4); + assert_eq!(t.find_dead(), None); + // Make logical index 2 dead. + t.set(2, -1); + assert_eq!(t.find_dead(), Some(2)); + // Make logical index 3 dead too; rightmost is now 3. + t.set(3, 0); + assert_eq!(t.find_dead(), Some(3)); + } + + #[test] + fn kill_shifts_lives_correctly() { + let mut t = LiveMinSegTree::with_capacity(4, PAD); + for &v in &[5i64, -1, 7, -2] { + t.push(v); + } + // Rightmost dead is logical 3 (life -2). + assert_eq!(t.find_dead(), Some(3)); + t.kill(3); + // After kill: remaining logical lives are [5, -1, 7]. No bump (nothing right of 3). + // Rightmost dead is now logical 1. + assert_eq!(t.len(), 3); + assert_eq!(t.find_dead(), Some(1)); + t.kill(1); + // After kill: remaining live lives in logical order: [5, 7+1] = [5, 8]. + assert_eq!(t.len(), 2); + assert_eq!(t.find_dead(), None); + } + + #[test] + fn find_dead_returns_rightmost() { + let mut t = LiveMinSegTree::with_capacity(8, PAD); + for &v in &[-1i64, -2, -3, -4, -5, -6, -7, -8] { + t.push(v); + } + assert_eq!(t.find_dead(), Some(7)); + } + + #[test] + fn padding_is_not_selected() { + // Tombstones (= padding) must never be returned by find_dead, even + // after suffix bumps from neighbouring kills. + let mut t = LiveMinSegTree::with_capacity(4, PAD); + for &v in &[1i64, 2, 3, 4] { + t.push(v); + } + // Tombstone everything by repeatedly killing the dead leaf, but + // first artificially make every leaf dead so kills cascade. + for i in 0..4 { + t.set(i, -100); + } + for _ in 0..4 { + let p = t.find_dead().unwrap(); + t.kill(p); + } + assert_eq!(t.find_dead(), None); + assert_eq!(t.len(), 0); + assert_eq!(t.n_dead(), 4); + } + + #[test] + fn doubling_preserves_state() { + let mut t = LiveMinSegTree::with_capacity(0, PAD); + // Pushes that span several doublings. + let lives: Vec = (1..=9).map(|i| i as i64).collect(); + for &v in &lives { + t.push(v); + } + assert_eq!(t.len(), 9); + assert_eq!(t.find_dead(), None); + // Force a dead at logical 4 (life 5 + 0 bumps so far). + t.set(4, -1); + assert_eq!(t.find_dead(), Some(4)); + } + + /// Cross-check against the naive reference with a random sequence of + /// pushes / kills / sets. + #[test] + fn matches_naive_under_random_ops() { + let mut next = lcg_rng(); + for trial in 0..50 { + let mut t = LiveMinSegTree::with_capacity(0, PAD); + let mut n = Naive::new(); + for _ in 0..200 { + let op = next() % 10; + if op < 6 || n.len() == 0 { + // Push a life in a wide range so we get both positive + // and negative ones. + let life: i64 = (next() as i64).rem_euclid(26) - 5; + t.push(life); + n.push(life); + } else if op < 9 { + // Kill (prefer find_dead's choice, else random index). + let p = match n.find_dead() { + Some(p) => p, + None => (next() as usize) % n.len(), + }; + let tp = t.find_dead(); + let np = n.find_dead(); + assert_eq!(tp, np, "trial {trial}"); + t.kill(p); + n.kill(p); + } else { + // Overwrite the life of a random logical leaf. + let p = (next() as usize) % n.len(); + let life: i64 = (next() as i64).rem_euclid(26) - 5; + t.set(p, life); + n.set(p, life); + } + assert_eq!(t.len(), n.len(), "trial {trial}"); + assert_eq!(t.find_dead(), n.find_dead(), "trial {trial}"); + } + } + } +} diff --git a/crates/consistent-choose-k/src/sample_treap.rs b/crates/consistent-choose-k/src/sample_treap.rs new file mode 100644 index 0000000..e8ee3eb --- /dev/null +++ b/crates/consistent-choose-k/src/sample_treap.rs @@ -0,0 +1,737 @@ +//! Dynamic order-statistics tree for the sorted samples list of the fast +//! choose-k hasher. Implemented as an **implicit treap** (in-order index = +//! key) with a *relative-offset* encoding of the per-node `life` value. +//! Vec-backed: every node lives in `nodes` at a u32 index, freed slots are +//! reused. +//! +//! Each node carries one element of the in-order sequence; internal nodes +//! and "leaves" are the same thing — internal nodes are also data. +//! +//! All operations are expressed via two primitives: +//! * `merge(a, b)` — concatenate the in-order sequences of two treaps. +//! * `split(root, k)` — split the sequence into the first `k` and the rest. +//! +//! Both run in O(log n) expected. Public ops compose them. +//! +//! # Relative-offset encoding +//! +//! Each node stores `min_off: i32` and `life: i32` such that: +//! * `actual_subtree_min(v) == sum of min_off` along the path from the tree +//! root down to `v` (inclusive). +//! * `actual_life(v) == actual_subtree_min(v) + v.life`, with `v.life >= 0`. +//! +//! Invariant at every (non-NIL) node `v`: +//! `min(v.life, v.left.min_off, v.right.min_off) == 0`, where NIL children +//! contribute `+infinity`. Combined with `life >= 0`, this also gives +//! `child.min_off >= 0` at every internal edge. +//! +//! This is the same trick `min_seg_tree.rs` uses: a range-add of `delta` to +//! the entire subtree at `v` is just `nodes[v].min_off += delta` (the +//! reference frame for everything inside `v` shifts together). No lazy +//! tags are needed. +//! +//! # Reference frames +//! +//! Each subtree's *root* `min_off` is interpreted relative to the parent +//! subtree's actual min — i.e., when a subtree is detached (e.g. between a +//! `split` and the following `merge`), its root's `min_off` equals its +//! absolute subtree min. When a subtree is reattached as the child of some +//! node `p`, its root's `min_off` is shifted by `-p.min_off` (and by +//! `+p.min_off` when extracted back out). `split`/`merge` perform exactly +//! these adjustments at every recursive boundary. +//! +//! Index `0` is the **NIL sentinel** representing the empty subtree. It +//! lives permanently in `nodes[0]` so every index dereference is valid +//! without a null check, and is never freed or returned to callers. Its +//! fields are all zero **except** `min_off = i32::MAX` — that value acts +//! as `+infinity` in the `min(life, l.min_off, r.min_off)` computations +//! used by `update`, so a NIL child never "wins" the min (and therefore +//! never spuriously satisfies the invariant). NIL is also never written +//! to, so its sentinel value is preserved: writes inside `update` are +//! guarded so a non-zero rebase never clobbers it. + +const NIL: u32 = 0; + +#[derive(Clone, Copy)] +struct Node { + sample: usize, + /// Relative-offset encoding: see module docs. For the NIL sentinel + /// this is `i32::MAX`, acting as `+infinity` in min comparisons. + min_off: i32, + /// `actual_life - actual_subtree_min`. Always `>= 0` (except sentinel). + life: i32, + size: u32, + left: u32, + right: u32, + priority: u32, +} + +impl Node { + /// The permanent NIL sentinel stored at `nodes[0]`. All zero fields + /// except `min_off`, which is `+infinity` so NIL is invisible in min + /// comparisons. + const SENTINEL: Node = Node { + sample: 0, + min_off: i32::MAX, + life: 0, + size: 0, + left: NIL, + right: NIL, + priority: 0, + }; +} + +/// Implicit-key treap with relative-offset encoding on the `life` field. +/// See module docs. +pub struct SampleTreap { + nodes: Vec, + /// Free list of `nodes` indices that can be reused on the next insert. + /// Never contains `NIL`. + free: Vec, + root: u32, + /// Internal RNG state for generating node priorities. Deterministic and + /// instance-local so tests are reproducible. + rng_state: u64, +} + +impl SampleTreap { + /// New empty treap, preallocating space for `capacity` nodes. + pub fn with_capacity(capacity: usize) -> Self { + let mut nodes = Vec::with_capacity(capacity + 1); + nodes.push(Node::SENTINEL); + Self { + nodes, + free: Vec::new(), + root: NIL, + rng_state: 0x9E37_79B9_7F4A_7C15, + } + } + + /// Number of elements currently in the treap. + pub fn len(&self) -> usize { + self.nodes[self.root as usize].size as usize + } + + /// True iff the treap holds no elements. + #[allow(dead_code)] + pub fn is_empty(&self) -> bool { + self.root == NIL + } + + /// Reads the sample value at in-order position `p`. Panics if out of + /// range. Time: O(log n). + #[allow(dead_code)] + pub fn get_sample(&self, p: usize) -> usize { + debug_assert!(p < self.len(), "index out of range"); + self.read_at(p).0 + } + + /// Reads `(sample, life)` at in-order position `p`. Panics if out of + /// range. Time: O(log n). + #[allow(dead_code)] + pub fn get(&self, p: usize) -> (usize, i32) { + debug_assert!(p < self.len(), "index out of range"); + self.read_at(p) + } + + /// Appends `(sample, life)` at the end of the in-order sequence. + /// + /// Time: O(log n) expected. + pub fn push_back(&mut self, sample: usize, life: i32) { + let new_node = self.alloc(sample, life); + let root = self.root; + // Skip the redundant `split(root, len)` that an `insert_at(len, ...)` + // would do (it walks the entire right spine for nothing): just + // merge the fresh singleton onto the right. + self.root = self.merge(root, new_node); + } + + /// Inserts `(sample, life)` at in-order position `p` (i.e. the new + /// node's index becomes `p`; everything previously at `>= p` shifts + /// right by one). + /// + /// Time: O(log n) expected. + #[allow(dead_code)] + pub fn insert_at(&mut self, p: usize, sample: usize, life: i32) { + debug_assert!(p <= self.len(), "insertion position out of range"); + let new_node = self.alloc(sample, life); + let root = self.root; + let (left, right) = self.split(root, p); + let m = self.merge(left, new_node); + self.root = self.merge(m, right); + } + + /// Removes the node at in-order position `p` and returns its + /// `(sample, life)` pair (with the absolute life value). + /// + /// Time: O(log n) expected. + #[allow(dead_code)] + pub fn remove_at(&mut self, p: usize) -> (usize, i32) { + debug_assert!(p < self.len(), "removal position out of range"); + let root = self.root; + let (left, right) = self.split(root, p); + let (mid, right) = self.split(right, 1); + debug_assert!(mid != NIL && self.nodes[mid as usize].size == 1); + // After split, `mid` is a singleton at top-level reference frame. + // For a singleton: actual_subtree_min == actual_life, and `life` + // (the relative offset) is 0; so `mid.min_off` is the absolute life. + let m = &self.nodes[mid as usize]; + let result = (m.sample, m.min_off + m.life); + self.free.push(mid); + self.root = self.merge(left, right); + result + } + + /// Removes the node at in-order position `p` and decrements the life of + /// every remaining node at positions `[p, len)` (i.e. the original + /// suffix `[p + 1, len)`, which has just shifted left by one) by `1`. + /// + /// Semantically equivalent to: + /// ```ignore + /// let removed = self.remove_at(p); + /// self.add_life_suffix(p, -1); + /// removed + /// ``` + /// but it folds the suffix decrement into the same split/merge pair as + /// the removal, avoiding a second descent down the same spine. + /// + /// Time: O(log n) expected. + pub fn remove_at_decrementing_suffix(&mut self, p: usize) -> (usize, i32) { + debug_assert!(p < self.len(), "removal position out of range"); + let root = self.root; + let (left, right) = self.split(root, p); + let (mid, right) = self.split(right, 1); + debug_assert!(mid != NIL && self.nodes[mid as usize].size == 1); + let m = &self.nodes[mid as usize]; + let result = (m.sample, m.min_off + m.life); + self.free.push(mid); + // The detached `right` subtree holds the original positions + // `[p + 1, len)`, which become `[p, len - 1)` in the merged + // result. Shift its reference frame by `-1` in place — a single + // field bump on the subtree root (cf. `add_life_suffix`). + if right != NIL { + self.nodes[right as usize].min_off -= 1; + } + self.root = self.merge(left, right); + result + } + + /// Adds `delta` to the `life` of every node at in-order positions + /// `[p, len)` (the suffix starting at `p`). + /// + /// Time: O(log n) expected. + #[allow(dead_code)] + pub fn add_life_suffix(&mut self, p: usize, delta: i32) { + debug_assert!(p <= self.len(), "position out of range"); + if p == self.len() || delta == 0 { + return; + } + let root = self.root; + let (left, right) = self.split(root, p); + // Apply `+delta` to the detached `right` subtree. With the + // relative-offset encoding the entire subtree's reference frame + // shifts by `delta` via a single field bump. `right` is non-NIL + // here because `p < len()` guarantees the split leaves at least + // one element on the right. + debug_assert!(right != NIL); + self.nodes[right as usize].min_off += delta; + self.root = self.merge(left, right); + } + + /// Returns the in-order position of the **rightmost** node whose `life` + /// is `<= 0`, or `None` if no such node exists. + /// + /// Time: O(log n) expected. + pub fn find_rightmost_le_zero(&self) -> Option { + let root = self.root; + if root == NIL { + return None; + } + let root_min = self.nodes[root as usize].min_off; + if root_min > 0 { + return None; + } + Some(self.descend_rightmost_le_zero(root, 0, root_min)) + } + + /// In-order list of `sample` values. Useful for tests and the hasher's + /// `samples()` getter. + #[allow(dead_code)] + pub fn samples(&self) -> Vec { + let mut out = Vec::with_capacity(self.len()); + self.collect_in_order(self.root, &mut out); + out + } + + // ----- Internals: layout, allocation, aggregates --------------------- + + /// Allocate a singleton at the "top-level" reference frame: its + /// `min_off` equals its absolute `life`, and `life`-delta is 0. When + /// merged into the tree, `merge` will lower it relative to its new + /// parent. + fn alloc(&mut self, sample: usize, life: i32) -> u32 { + // SplitMix64-style RNG step; high 32 bits are well-mixed. + self.rng_state = self + .rng_state + .wrapping_mul(6364136223846793005) + .wrapping_add(1442695040888963407); + let priority = (self.rng_state >> 32) as u32; + let node = Node { + sample, + min_off: life, + life: 0, + size: 1, + left: NIL, + right: NIL, + priority, + }; + if let Some(idx) = self.free.pop() { + self.nodes[idx as usize] = node; + idx + } else { + let idx = self.nodes.len() as u32; + self.nodes.push(node); + idx + } + } + + /// Restore the local invariant at `idx`: + /// `min(idx.life, left.min_off, right.min_off) == 0` + /// and recompute `size`. Any common excess `dM` shared by all three is + /// "lifted up" into `idx.min_off` (so the subtree-min path-sum stays + /// correct, the contained `life`-deltas stay non-negative, and the + /// child offsets stay non-negative). + /// + /// NIL children read as `min_off = i32::MAX` so they cannot win the + /// `min()` — that's safe to use directly without a conditional. The + /// writes are still guarded against NIL: if `dM != 0` and one child is + /// NIL, writing `-= dM` would corrupt the sentinel. + fn update(&mut self, idx: u32) { + debug_assert!(idx != NIL, "update called on NIL"); + let (left, right, life) = { + let n = &self.nodes[idx as usize]; + (n.left, n.right, n.life) + }; + let l_min = self.nodes[left as usize].min_off; + let r_min = self.nodes[right as usize].min_off; + let d_m = life.min(l_min).min(r_min); + if d_m != 0 { + // Shift everything inside `idx`'s subtree down by `d_m` (so the + // residual minimum becomes 0) and pay `d_m` into `idx.min_off`. + // Actual subtree values are unchanged; only the relative + // encoding rebases. + let n = &mut self.nodes[idx as usize]; + n.life -= d_m; + n.min_off += d_m; + if left != NIL { + self.nodes[left as usize].min_off -= d_m; + } + if right != NIL { + self.nodes[right as usize].min_off -= d_m; + } + } + let l_size = self.nodes[left as usize].size; + let r_size = self.nodes[right as usize].size; + self.nodes[idx as usize].size = 1 + l_size + r_size; + } + + // ----- Internals: split / merge -------------------------------------- + + /// Split the subtree rooted at `idx` into `(left, right)` where `left` + /// contains the first `k` in-order elements and `right` contains the + /// rest. Both outputs are at the same reference frame as the caller's + /// `idx`. + fn split(&mut self, idx: u32, k: usize) -> (u32, u32) { + if idx == NIL { + return (NIL, NIL); + } + let l_size = self.nodes[self.nodes[idx as usize].left as usize].size as usize; + // Capture `idx.min_off` BEFORE `update(idx)` runs so we lift any + // promoted sibling using the pre-update offset. The promoted + // sibling is no longer a descendant of `idx`, so its reference + // frame is `idx`'s old parent — which corresponds to old + // `idx.min_off`. + let idx_min_off = self.nodes[idx as usize].min_off; + if k <= l_size { + // Recurse into the left child; the returned `l1` is detached + // and rises to be `idx`'s sibling (in the caller's frame). + let (l1, l2) = self.split(self.nodes[idx as usize].left, k); + if l1 != NIL { + self.nodes[l1 as usize].min_off += idx_min_off; + } + self.nodes[idx as usize].left = l2; + self.update(idx); + (l1, idx) + } else { + let (r1, r2) = self.split(self.nodes[idx as usize].right, k - l_size - 1); + if r2 != NIL { + self.nodes[r2 as usize].min_off += idx_min_off; + } + self.nodes[idx as usize].right = r1; + self.update(idx); + (idx, r2) + } + } + + /// Merge two treaps whose in-order sequences are concatenated as + /// `left` then `right`. Both inputs share the same reference frame; + /// the output is at that same frame. + fn merge(&mut self, left: u32, right: u32) -> u32 { + if left == NIL { + return right; + } + if right == NIL { + return left; + } + let lp = self.nodes[left as usize].priority; + let rp = self.nodes[right as usize].priority; + if lp > rp { + // `left` stays root; `right` descends into `left`'s right + // subtree. Lower `right` to `left`'s subtree-min frame. + self.nodes[right as usize].min_off -= self.nodes[left as usize].min_off; + self.nodes[left as usize].right = self.merge(self.nodes[left as usize].right, right); + self.update(left); + left + } else { + self.nodes[left as usize].min_off -= self.nodes[right as usize].min_off; + self.nodes[right as usize].left = self.merge(left, self.nodes[right as usize].left); + self.update(right); + right + } + } + + // ----- Internals: descent helpers ------------------------------------ + + /// Read at position `p`, accumulating `min_off` along the descent path + /// to recover the absolute life value at the target. + fn read_at(&self, mut p: usize) -> (usize, i32) { + let mut idx = self.root; + let mut accum: i32 = 0; + loop { + debug_assert!(idx != NIL, "position out of range"); + accum += self.nodes[idx as usize].min_off; + let n = &self.nodes[idx as usize]; + let l_size = self.nodes[n.left as usize].size as usize; + if p < l_size { + idx = n.left; + } else if p == l_size { + return (n.sample, accum + n.life); + } else { + p -= l_size + 1; + idx = n.right; + } + } + } + + /// Descend to the rightmost node with `actual_life <= 0`. Caller has + /// verified the subtree at `idx` contains such a node (i.e. + /// `accum_at_idx == actual_subtree_min(idx) <= 0`). + fn descend_rightmost_le_zero(&self, mut idx: u32, mut base: usize, mut accum: i32) -> usize { + loop { + debug_assert!(idx != NIL); + debug_assert!(accum <= 0); + let n = &self.nodes[idx as usize]; + let l_size = self.nodes[n.left as usize].size as usize; + // Prefer the right subtree; then this node; then the left + // subtree. Guard each min_off arithmetic against NIL (whose + // sentinel min_off = i32::MAX must never be added to a + // possibly-negative accum). + if n.right != NIL { + let r_accum = accum + self.nodes[n.right as usize].min_off; + if r_accum <= 0 { + base += l_size + 1; + accum = r_accum; + idx = n.right; + continue; + } + } + if accum + n.life <= 0 { + return base + l_size; + } + debug_assert!(n.left != NIL); + let l_accum = accum + self.nodes[n.left as usize].min_off; + debug_assert!(l_accum <= 0); + accum = l_accum; + idx = n.left; + } + } + + fn collect_in_order(&self, idx: u32, out: &mut Vec) { + if idx == NIL { + return; + } + let n = &self.nodes[idx as usize]; + self.collect_in_order(n.left, out); + out.push(n.sample); + self.collect_in_order(n.right, out); + } + + /// Returns the in-order list of `(sample, life)` pairs. + pub fn lifetimes(&self) -> Vec<(usize, i32)> { + let mut out = Vec::with_capacity(self.len()); + self.collect_lifetimes(self.root, 0, &mut out); + out + } + + fn collect_lifetimes(&self, idx: u32, accum: i32, out: &mut Vec<(usize, i32)>) { + if idx == NIL { + return; + } + let n = &self.nodes[idx as usize]; + let current_accum = accum + n.min_off; + self.collect_lifetimes(n.left, current_accum, out); + out.push((n.sample, current_accum + n.life)); + self.collect_lifetimes(n.right, current_accum, out); + } +} + +impl std::fmt::Debug for SampleTreap { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_list().entries(self.lifetimes().into_iter()).finish() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn lcg_rng() -> impl FnMut() -> u64 { + let mut state: u64 = 0xC0FFEE_DEADBEEF; + move || { + state = state + .wrapping_mul(6364136223846793005) + .wrapping_add(1442695040888963407); + state + } + } + + impl SampleTreap { + /// Recursive invariant checker used by tests. Verifies: + /// * sizes are correct, + /// * `life >= 0` and (non-root) `min_off >= 0`, + /// * `min(life, left.min_off, right.min_off) == 0`, + /// * actual lives accumulated from root equal the values returned + /// by `get`. + fn check_invariants(&self) { + fn rec(t: &SampleTreap, idx: u32, accum: i32, is_root: bool, pos: &mut usize) { + if idx == NIL { + return; + } + let n = &t.nodes[idx as usize]; + if !is_root { + assert!(n.min_off >= 0, "non-root min_off should be >= 0"); + } + assert!(n.life >= 0, "life-delta should be >= 0"); + let l_min = t.nodes[n.left as usize].min_off; + let r_min = t.nodes[n.right as usize].min_off; + assert_eq!( + n.life.min(l_min).min(r_min), + 0, + "invariant min(life, l, r) == 0 violated", + ); + let l_size = t.nodes[n.left as usize].size; + let r_size = t.nodes[n.right as usize].size; + assert_eq!(n.size, 1 + l_size + r_size, "size mismatch"); + let sub_accum = accum + n.min_off; + rec(t, n.left, sub_accum, false, pos); + // Verify get(pos) returns the same life as our accumulated + // walk would produce. + let got = t.get(*pos); + assert_eq!(got.0, n.sample); + assert_eq!(got.1, sub_accum + n.life); + *pos += 1; + rec(t, n.right, sub_accum, false, pos); + } + let mut pos = 0usize; + rec(self, self.root, 0, true, &mut pos); + assert_eq!(pos, self.len()); + } + } + + /// Naive reference for the same operations, backed by a Vec. + struct Naive { + v: Vec<(usize, i32)>, + } + impl Naive { + fn new() -> Self { + Self { v: Vec::new() } + } + fn len(&self) -> usize { + self.v.len() + } + fn insert_at(&mut self, p: usize, sample: usize, life: i32) { + self.v.insert(p, (sample, life)); + } + fn remove_at(&mut self, p: usize) -> (usize, i32) { + self.v.remove(p) + } + fn remove_at_decrementing_suffix(&mut self, p: usize) -> (usize, i32) { + let removed = self.v.remove(p); + for entry in &mut self.v[p..] { + entry.1 -= 1; + } + removed + } + fn add_life_suffix(&mut self, p: usize, d: i32) { + for i in p..self.v.len() { + self.v[i].1 += d; + } + } + fn find_rightmost_le_zero(&self) -> Option { + self.v.iter().rposition(|&(_, l)| l <= 0) + } + fn samples(&self) -> Vec { + self.v.iter().map(|&(s, _)| s).collect() + } + fn get(&self, p: usize) -> (usize, i32) { + self.v[p] + } + } + + #[test] + fn empty() { + let t = SampleTreap::with_capacity(0); + assert_eq!(t.len(), 0); + assert!(t.is_empty()); + assert_eq!(t.find_rightmost_le_zero(), None); + assert!(t.samples().is_empty()); + } + + #[test] + fn push_back_basic() { + let mut t = SampleTreap::with_capacity(4); + t.push_back(10, 3); + t.push_back(20, 7); + t.push_back(30, 0); + t.push_back(40, 5); + assert_eq!(t.len(), 4); + assert_eq!(t.samples(), vec![10, 20, 30, 40]); + assert_eq!(t.find_rightmost_le_zero(), Some(2)); + assert_eq!(t.get(0), (10, 3)); + assert_eq!(t.get(3), (40, 5)); + t.check_invariants(); + } + + #[test] + fn insert_at_middle() { + let mut t = SampleTreap::with_capacity(0); + t.push_back(10, 3); + t.push_back(30, 5); + t.insert_at(1, 20, 4); + assert_eq!(t.samples(), vec![10, 20, 30]); + assert_eq!(t.get(1), (20, 4)); + t.check_invariants(); + } + + #[test] + fn remove_at_returns_value() { + let mut t = SampleTreap::with_capacity(0); + for (s, l) in [(10, 3), (20, -1), (30, 5)] { + t.push_back(s, l); + } + let removed = t.remove_at(1); + assert_eq!(removed, (20, -1)); + assert_eq!(t.samples(), vec![10, 30]); + t.check_invariants(); + } + + #[test] + fn add_life_suffix_basic() { + let mut t = SampleTreap::with_capacity(0); + for (s, l) in [(10, 1), (20, 1), (30, 1), (40, 1)] { + t.push_back(s, l); + } + t.add_life_suffix(2, -3); + // Lives are now [1, 1, -2, -2]. + assert_eq!(t.find_rightmost_le_zero(), Some(3)); + assert_eq!(t.get(0).1, 1); + assert_eq!(t.get(1).1, 1); + assert_eq!(t.get(2).1, -2); + assert_eq!(t.get(3).1, -2); + t.check_invariants(); + } + + #[test] + fn find_rightmost_picks_rightmost() { + let mut t = SampleTreap::with_capacity(0); + for (s, l) in [(10, -1), (20, -2), (30, -3), (40, -4)] { + t.push_back(s, l); + } + assert_eq!(t.find_rightmost_le_zero(), Some(3)); + t.check_invariants(); + } + + #[test] + fn find_rightmost_none() { + let mut t = SampleTreap::with_capacity(0); + for (s, l) in [(10, 1), (20, 2), (30, 3)] { + t.push_back(s, l); + } + assert_eq!(t.find_rightmost_le_zero(), None); + t.check_invariants(); + } + + #[test] + fn nested_suffix_adds_get_returns_cumulative() { + let mut t = SampleTreap::with_capacity(0); + for i in 0..16i32 { + t.push_back(i as usize, i); + } + // Initial life at p is p. After add_life_suffix(0, -5): p - 5. + t.add_life_suffix(0, -5); + // After add_life_suffix(5, -10): p - 5 if p < 5 else p - 15. + t.add_life_suffix(5, -10); + assert_eq!(t.get(4).1, -1); + assert_eq!(t.get(5).1, -10); + assert_eq!(t.get(7).1, -8); + assert_eq!(t.get(11).1, -4); + assert_eq!(t.get(12).1, -3); + assert_eq!(t.get(15).1, 0); + t.check_invariants(); + } + + #[test] + fn matches_naive_under_random_ops() { + let mut rng = lcg_rng(); + for _trial in 0..50 { + let mut t = SampleTreap::with_capacity(0); + let mut n = Naive::new(); + for _ in 0..200 { + let op = rng() % 12; + if op < 4 || n.len() == 0 { + let sample = (rng() % 1000) as usize; + let life = (rng() as i32).rem_euclid(20) - 5; + let p = (rng() as usize) % (n.len() + 1); + t.insert_at(p, sample, life); + n.insert_at(p, sample, life); + } else if op < 6 { + let p = (rng() as usize) % n.len(); + assert_eq!(t.remove_at(p), n.remove_at(p)); + } else if op < 8 { + let p = (rng() as usize) % n.len(); + assert_eq!( + t.remove_at_decrementing_suffix(p), + n.remove_at_decrementing_suffix(p), + ); + } else if op < 10 { + let p = (rng() as usize) % (n.len() + 1); + let d = (rng() as i32).rem_euclid(11) - 5; + t.add_life_suffix(p, d); + n.add_life_suffix(p, d); + } else if !n.v.is_empty() { + let p = (rng() as usize) % n.len(); + assert_eq!(t.get(p), n.get(p)); + } + assert_eq!(t.len(), n.len()); + assert_eq!(t.find_rightmost_le_zero(), n.find_rightmost_le_zero()); + assert_eq!(t.samples(), n.samples()); + // Compare every (sample, life) — stronger than samples/find + // alone, since wrong positive lives would otherwise hide. + for p in 0..n.len() { + assert_eq!(t.get(p), n.get(p), "mismatch at position {p}"); + } + t.check_invariants(); + } + } + } +} From 64a0bce61d5329ac4e6606b7afd9ea32ba6ffc1f Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 22 May 2026 13:48:24 +0200 Subject: [PATCH 46/49] remember iterator --- crates/consistent-choose-k/src/fast_grow_n.rs | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/crates/consistent-choose-k/src/fast_grow_n.rs b/crates/consistent-choose-k/src/fast_grow_n.rs index 8e374e5..ede1173 100644 --- a/crates/consistent-choose-k/src/fast_grow_n.rs +++ b/crates/consistent-choose-k/src/fast_grow_n.rs @@ -43,13 +43,12 @@ use std::collections::BinaryHeap; use crate::consistent_hash::ConsistentHashIterator; use crate::sample_treap::SampleTreap; -use crate::{ConsistentHasher, ManySeqBuilder}; +use crate::ManySeqBuilder; /// Fast variant of [`crate::ConsistentChooseKHasher`] specialized for /// repeated `grow_n` calls at fixed `k`. See module-level documentation /// for the algorithm. pub struct ConsistentChooseKFastGrowHasher { - builder: H, /// Current universe size. n: usize, /// Fixed sample count (number of sequences tracked in `next_heap`). @@ -58,16 +57,18 @@ pub struct ConsistentChooseKFastGrowHasher { /// component. A `None` next-candidate (sequence exhausted) is *not* /// pushed back; the heap shrinks instead. next_heap: BinaryHeap>, + /// One long-lived `ConsistentHashIterator` per sequence id, kept + /// positioned just past the seq's most recently popped/pushed sample. + /// Avoids rebuilding the iterator (and re-deriving its bucket state) + /// on every `grow_n` event. + iters: Vec>, /// Currently-selected samples in insertion order. Each entry's `life` /// is `seq_id - position`; an entry with `life <= 0` is displaced and /// will be evicted on the next firing. samples: SampleTreap, } -impl ConsistentChooseKFastGrowHasher -where - H::Builder: Clone, -{ +impl ConsistentChooseKFastGrowHasher { /// Create a new instance for `k` sequences with `n = 0`. Seeds the /// heap with each sequence's first sample; the sample treap is empty /// until `grow_n` is called enough times for samples to fire. @@ -75,9 +76,12 @@ where /// Time: O(k). pub fn new(builder: H, k: usize) -> Self { let mut next_heap = BinaryHeap::with_capacity(k); + let mut iters = Vec::with_capacity(k); let mut life = vec![0; k]; for seq in 0..k { - for l in ConsistentHashIterator::new(0, builder.seq_builder(seq)) { + let mut iter = ConsistentHashIterator::new(0, builder.seq_builder(seq)); + loop { + let l = iter.next().expect("seq must yield a sample >= k"); let sample = l + seq; if sample >= k { next_heap.push(Reverse((sample, seq))); @@ -85,6 +89,7 @@ where } life[sample] = l.max(life[sample]); } + iters.push(iter); } let mut samples = SampleTreap::with_capacity(k); for (sample, life) in life.into_iter().enumerate() { @@ -92,10 +97,10 @@ where } Self { - builder, n: k, k, next_heap, + iters, samples, } } @@ -126,10 +131,17 @@ where .next_heap .pop() .expect("there are always k elements in the heap!"); - let after = ConsistentHasher::new(self.builder.seq_builder(seq)) - .next(self.n.max(next + 1) - seq) - .expect("") - + seq; + // Advance this seq's cached iterator until the next candidate + // satisfies `>= self.n` (i.e. iter yield `>= self.n - seq`). + // Under the heap invariant the very first `.next()` already + // satisfies the bound, but we keep the inner loop for safety. + let threshold = self.n.max(next + 1) - seq; + let after = loop { + let l = self.iters[seq].next().expect("seq must yield more samples"); + if l >= threshold { + break l + seq; + } + }; self.next_heap.push(Reverse((after, seq))); if next >= self.n { self.n = next + 1; From c6295ad5ab645d98d5e4a8340c42164d1d48c1f5 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 22 May 2026 14:35:19 +0200 Subject: [PATCH 47/49] use bucketiterator --- .../src/consistent_hash.rs | 4 +- crates/consistent-choose-k/src/fast_grow_n.rs | 168 +++++++++++------- 2 files changed, 108 insertions(+), 64 deletions(-) diff --git a/crates/consistent-choose-k/src/consistent_hash.rs b/crates/consistent-choose-k/src/consistent_hash.rs index 6edd585..efabe20 100644 --- a/crates/consistent-choose-k/src/consistent_hash.rs +++ b/crates/consistent-choose-k/src/consistent_hash.rs @@ -63,7 +63,7 @@ impl ManySeqBuilder for H { /// hash iterator which enumerates all the hashes for a specific bucket. /// A bucket covers the range `(1< { +pub(crate) struct BucketIterator { hasher: H, n: usize, // Upper bound for the hash values within the bucket. is_first: bool, @@ -71,7 +71,7 @@ struct BucketIterator { } impl BucketIterator { - fn new(n: usize, bit: u64, hasher: H) -> Self { + pub(crate) fn new(n: usize, bit: u64, hasher: H) -> Self { Self { hasher, n, diff --git a/crates/consistent-choose-k/src/fast_grow_n.rs b/crates/consistent-choose-k/src/fast_grow_n.rs index ede1173..864d41e 100644 --- a/crates/consistent-choose-k/src/fast_grow_n.rs +++ b/crates/consistent-choose-k/src/fast_grow_n.rs @@ -11,39 +11,32 @@ //! # Algorithm sketch //! //! State: -//! * `next_heap`: min-heap of `(next_candidate_sample, seq_id)` — one -//! entry per active sequence, keyed by the sequence's smallest sample -//! strictly greater than its currently-selected sample (or its first -//! sample, if it has none yet). +//! * `next_heap`: min-heap of `(sample, packed_seq)`. `packed_seq` is +//! `seq_id * 2 + owner_bit`. For each seq id with at least one entry +//! in the heap, exactly one entry — the largest — has its owner bit +//! set. When that entry is popped, the seq's next active bucket of +//! samples is loaded into the heap (and the new largest becomes the +//! new owner). Each bucket of a seq is materialized as a batch by +//! running the seq's [`BucketIterator`] to exhaustion; this avoids +//! re-running the seq's hash sequence on every single `grow_n` call. +//! * `bits[seq]`: bitmask of buckets *not yet* pushed into the heap. +//! Lower bits correspond to smaller value ranges (`[bit, 2*bit)`), +//! so the lowest set bit is the next bucket to push. +//! * `builders[seq]`: cached per-seq hash builder. //! * `samples`: a [`SampleTreap`] holding `(sample, life)` pairs in //! insertion order, where `life = seq_id - position`. Once `k` samples //! are present, an entry whose `life <= 0` is the *displaced* sample //! that must be evicted on the next firing. //! -//! `grow_n` (semantics: `n += 1`): -//! -//! 1. While the heap's smallest `next_candidate < n_new` (in practice -//! zero or one iteration, since `n` grows by one): -//! 1. Pop `(s, seq_id)`. -//! 2. Push the next candidate for `seq_id` (smallest sample > `s`) -//! back into the heap. -//! 3. Append `(s, life = seq_id - new_position)` at the end of the -//! treap via `push_back`. -//! 4. If the treap now has more than `k` entries (i.e. we displaced -//! a sample), find the rightmost position with `life <= 0` via -//! `find_rightmost_le_zero`, `remove_at` it, and apply -//! `add_life_suffix(p_dead, +1)` so the remaining entries (which -//! shifted left by one) see their `life` increase by one. -//! 2. Set `n = n_new`. -//! -//! Per-call cost: O(log k) expected (heap pop/push + treap ops). +//! Per-call cost: O(log k) expected — amortized constant heap pushes +//! per `grow_n`, plus a single treap pop / push. use std::cmp::Reverse; use std::collections::BinaryHeap; -use crate::consistent_hash::ConsistentHashIterator; +use crate::consistent_hash::BucketIterator; use crate::sample_treap::SampleTreap; -use crate::ManySeqBuilder; +use crate::{HashSeqBuilder, ManySeqBuilder}; /// Fast variant of [`crate::ConsistentChooseKHasher`] specialized for /// repeated `grow_n` calls at fixed `k`. See module-level documentation @@ -51,17 +44,18 @@ use crate::ManySeqBuilder; pub struct ConsistentChooseKFastGrowHasher { /// Current universe size. n: usize, - /// Fixed sample count (number of sequences tracked in `next_heap`). + /// Fixed sample count (number of sequences tracked). k: usize, - /// Min-heap of `(next_candidate_sample, seq_id)` keyed by the first - /// component. A `None` next-candidate (sequence exhausted) is *not* - /// pushed back; the heap shrinks instead. + /// Min-heap keyed by `(sample, packed_seq)` where + /// `packed_seq = seq_id * 2 + owner_bit`. The owner bit is set on + /// exactly one entry per seq present in the heap (the largest); when + /// that entry is popped, the seq's next bucket is loaded. next_heap: BinaryHeap>, - /// One long-lived `ConsistentHashIterator` per sequence id, kept - /// positioned just past the seq's most recently popped/pushed sample. - /// Avoids rebuilding the iterator (and re-deriving its bucket state) - /// on every `grow_n` event. - iters: Vec>, + /// Per-seq cached hash builder, used to spin up `BucketIterator`s on + /// refill without re-deriving the builder. + builders: Vec, + /// Per-seq bitmask of buckets not yet pushed into the heap. + bits: Vec, /// Currently-selected samples in insertion order. Each entry's `life` /// is `seq_id - position`; an entry with `life <= 0` is displaced and /// will be evicted on the next firing. @@ -69,27 +63,48 @@ pub struct ConsistentChooseKFastGrowHasher { } impl ConsistentChooseKFastGrowHasher { - /// Create a new instance for `k` sequences with `n = 0`. Seeds the - /// heap with each sequence's first sample; the sample treap is empty - /// until `grow_n` is called enough times for samples to fire. + /// Create a new instance for `k` sequences with `n = k`. Seeds the + /// life array from samples `< k` and pushes the first heap-worthy + /// bucket (the bucket containing the first sample `>= k`) into the + /// heap for every seq. /// /// Time: O(k). pub fn new(builder: H, k: usize) -> Self { let mut next_heap = BinaryHeap::with_capacity(k); - let mut iters = Vec::with_capacity(k); + let mut builders = Vec::with_capacity(k); + let mut bits = Vec::with_capacity(k); let mut life = vec![0; k]; for seq in 0..k { - let mut iter = ConsistentHashIterator::new(0, builder.seq_builder(seq)); - loop { - let l = iter.next().expect("seq must yield a sample >= k"); - let sample = l + seq; - if sample >= k { - next_heap.push(Reverse((sample, seq))); - break; + let bld = builder.seq_builder(seq); + let mut seq_bits = bld.bit_mask(); + let mut owner_pushed = false; + // Walk buckets low-bit-first. Push every sample `>= k` into + // the heap, mark the first such (largest in its bucket, since + // BucketIterator yields decreasing) as owner; lower samples + // feed the life array. Stop after the first bucket that + // contributes to the heap; later buckets are kept in + // `seq_bits` for `grow_n` to drain via `refill`. + while seq_bits != 0 && !owner_pushed { + let bit = seq_bits & seq_bits.wrapping_neg(); + seq_bits ^= bit; + let iter = BucketIterator::new(bit as usize * 2, bit, bld.hash_seq(bit)); + for l in iter { + let sample = l + seq; + if sample >= k { + let owner = usize::from(!owner_pushed); + next_heap.push(Reverse((sample, seq * 2 + owner))); + owner_pushed = true; + } else { + life[sample] = l.max(life[sample]); + } } - life[sample] = l.max(life[sample]); } - iters.push(iter); + debug_assert!( + owner_pushed, + "seq {seq} must contribute at least one sample >= k" + ); + bits.push(seq_bits); + builders.push(bld); } let mut samples = SampleTreap::with_capacity(k); for (sample, life) in life.into_iter().enumerate() { @@ -100,7 +115,8 @@ impl ConsistentChooseKFastGrowHasher { n: k, k, next_heap, - iters, + builders, + bits, samples, } } @@ -121,28 +137,22 @@ impl ConsistentChooseKFastGrowHasher { } /// Grow `n` by one and update the choose-k set accordingly. Returns - /// `Some(new_sample)` if a sequence fired (i.e. some sample changed), - /// `None` otherwise. + /// `Some(new_sample)` if a sequence fired (i.e. some sample changed). /// /// Time: O(log k) expected. pub fn grow_n(&mut self) -> Option { loop { - let Reverse((next, seq)) = self + let Reverse((next, packed_seq)) = self .next_heap .pop() - .expect("there are always k elements in the heap!"); - // Advance this seq's cached iterator until the next candidate - // satisfies `>= self.n` (i.e. iter yield `>= self.n - seq`). - // Under the heap invariant the very first `.next()` already - // satisfies the bound, but we keep the inner loop for safety. - let threshold = self.n.max(next + 1) - seq; - let after = loop { - let l = self.iters[seq].next().expect("seq must yield more samples"); - if l >= threshold { - break l + seq; - } - }; - self.next_heap.push(Reverse((after, seq))); + .expect("there are always entries in the heap!"); + let seq = packed_seq >> 1; + // If this entry was the owner (largest in heap for `seq`), + // load the seq's next active bucket. We do this whether or + // not the entry is stale: owners always trigger a refill. + if (packed_seq & 1) == 1 { + self.refill(seq); + } if next >= self.n { self.n = next + 1; let pos = self @@ -155,6 +165,40 @@ impl ConsistentChooseKFastGrowHasher { } } } + + /// Push the next active bucket of `seq` into the heap. Skips + /// buckets whose samples are all below `self.n` (would be stale on + /// arrival) and any remaining samples within a straddling bucket + /// that are below `self.n`. Marks the first (largest) pushed sample + /// as the new owner for `seq`. + fn refill(&mut self, seq: usize) { + let bld = &self.builders[seq]; + let n = self.n; + let bits = &mut self.bits[seq]; + while *bits != 0 { + let bit = *bits & bits.wrapping_neg(); + *bits ^= bit; + let iter = BucketIterator::new(bit as usize * 2, bit, bld.hash_seq(bit)); + let mut owner_pushed = false; + for l in iter { + let sample = l + seq; + // BucketIterator yields strictly decreasing, so once we + // drop below `n` everything after is stale too. + if sample < n { + break; + } + let owner = usize::from(!owner_pushed); + self.next_heap.push(Reverse((sample, seq * 2 + owner))); + owner_pushed = true; + } + if owner_pushed { + return; + } + } + // No more buckets — this seq is exhausted. Heap shrinks by one + // until it eventually empties (astronomically rare in practice + // for u64 hash sequences). + } } #[cfg(test)] From 6d5573de802ee2c9a4befdd294de25747ad02d55 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 22 May 2026 14:45:59 +0200 Subject: [PATCH 48/49] Update fast_grow_n.rs --- crates/consistent-choose-k/src/fast_grow_n.rs | 42 +++++++------------ 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/crates/consistent-choose-k/src/fast_grow_n.rs b/crates/consistent-choose-k/src/fast_grow_n.rs index 864d41e..c211deb 100644 --- a/crates/consistent-choose-k/src/fast_grow_n.rs +++ b/crates/consistent-choose-k/src/fast_grow_n.rs @@ -77,30 +77,30 @@ impl ConsistentChooseKFastGrowHasher { for seq in 0..k { let bld = builder.seq_builder(seq); let mut seq_bits = bld.bit_mask(); - let mut owner_pushed = false; + let mut is_owner = true; // Walk buckets low-bit-first. Push every sample `>= k` into // the heap, mark the first such (largest in its bucket, since // BucketIterator yields decreasing) as owner; lower samples // feed the life array. Stop after the first bucket that // contributes to the heap; later buckets are kept in // `seq_bits` for `grow_n` to drain via `refill`. - while seq_bits != 0 && !owner_pushed { + while seq_bits != 0 && is_owner { let bit = seq_bits & seq_bits.wrapping_neg(); seq_bits ^= bit; let iter = BucketIterator::new(bit as usize * 2, bit, bld.hash_seq(bit)); for l in iter { let sample = l + seq; if sample >= k { - let owner = usize::from(!owner_pushed); + let owner = usize::from(is_owner); next_heap.push(Reverse((sample, seq * 2 + owner))); - owner_pushed = true; + is_owner = false; } else { life[sample] = l.max(life[sample]); } } } debug_assert!( - owner_pushed, + !is_owner, "seq {seq} must contribute at least one sample >= k" ); bits.push(seq_bits); @@ -173,31 +173,17 @@ impl ConsistentChooseKFastGrowHasher { /// as the new owner for `seq`. fn refill(&mut self, seq: usize) { let bld = &self.builders[seq]; - let n = self.n; let bits = &mut self.bits[seq]; - while *bits != 0 { - let bit = *bits & bits.wrapping_neg(); - *bits ^= bit; - let iter = BucketIterator::new(bit as usize * 2, bit, bld.hash_seq(bit)); - let mut owner_pushed = false; - for l in iter { - let sample = l + seq; - // BucketIterator yields strictly decreasing, so once we - // drop below `n` everything after is stale too. - if sample < n { - break; - } - let owner = usize::from(!owner_pushed); - self.next_heap.push(Reverse((sample, seq * 2 + owner))); - owner_pushed = true; - } - if owner_pushed { - return; - } + let bit = *bits & bits.wrapping_neg(); + *bits ^= bit; + let iter = BucketIterator::new(bit as usize * 2, bit, bld.hash_seq(bit)); + let mut is_owner = true; + for l in iter { + let sample = l + seq; + let owner = usize::from(is_owner); + self.next_heap.push(Reverse((sample, seq * 2 + owner))); + is_owner = false; } - // No more buckets — this seq is exhausted. Heap shrinks by one - // until it eventually empties (astronomically rare in practice - // for u64 hash sequences). } } From 642b62e5b9c22c79a07ad68bf010de2da001a9d2 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 22 May 2026 15:17:52 +0200 Subject: [PATCH 49/49] Update README.md --- crates/consistent-choose-k/README.md | 61 ++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/crates/consistent-choose-k/README.md b/crates/consistent-choose-k/README.md index 4270d68..f97a30d 100644 --- a/crates/consistent-choose-k/README.md +++ b/crates/consistent-choose-k/README.md @@ -147,6 +147,67 @@ replication: it is the unique `k`-out-of-`n` growth process where each new node joins the active set with probability `k/(n+1)`, evicts at most one old node, and preserves a uniform active set after every growth step. +## Fast incremental hashers + +For workloads that repeatedly grow or shrink `n` at a fixed `k`, this crate +ships two stateful specializations of `ConsistentChooseKHasher` whose +amortized per-step cost is `O(log k)` rather than `O(k log k)` for a fresh +`new_with_k`. They share the same input contract and produce the same set +as the stateless version after every step — they just avoid recomputing +the whole ranking from scratch. + +| Type | Optimized for | Per-step cost | Construction | +|---------------------------------|---------------|---------------|--------------| +| `ConsistentChooseKHasher` | one-shot | `O(k log k)` total | `new_with_k(builder, n, k)` | +| `ConsistentChooseKFastHasher` | repeated `shrink_n` | `O(log k)` amortized | `new_with_k(builder, n, k)` | +| `ConsistentChooseKFastGrowHasher` | repeated `grow_n` | `O(log k)` amortized | `new(builder, k)` starts at `n = k` | + +### Why two specializations? + +`grow_n` and `shrink_n` keep different invariants, so they want different +in-memory layouts and cannot easily share a single representation: + +* **`ConsistentChooseKFastHasher`** (shrink): stores samples sorted by + value, with a per-position count of "how many already-selected + smaller samples block this slot". `shrink_n` is then a logarithmic + segment-tree descent to the displaced slot. +* **`ConsistentChooseKFastGrowHasher`** (grow): stores samples in + insertion order, with a per-sample "life" `= seq_id - position`. + An entry whose `life ≤ 0` is the slot to evict on the next firing. + `grow_n` is a heap pop, a logarithmic rightmost-non-positive query + on the sample structure, and a constant-time append. + +Both are backed by an implicit-tree `SampleTreap` that supports +`O(log k)` per operation for: insert, remove, point queries, +range life additions with lazy propagation, and rightmost-`life ≤ 0` +search via subtree-min augmentation. + +### Sequence iteration: bucket-batch heap + +Internally each of the `k` consistent-hash sequences advances through +disjoint **buckets** covering value ranges `[b, 2b)` for powers of two +`b`. The fast-grow hasher keeps a single min-heap keyed by +`(sample, packed_seq)` where `packed_seq = seq * 2 + owner_bit`. When +a seq's next bucket is loaded, every sample it produces is pushed at +once, and the largest one is tagged with the owner bit. Popping that +tagged entry is the signal to load the seq's next bucket. This +amortizes the per-pop cost of the hash sequence over an entire bucket +and avoids reconstructing per-seq iterator state. + +### When to use which + +* **Reservoir-style growth** (e.g. ingesting a stream and maintaining + a uniform top-`k`): use `ConsistentChooseKFastGrowHasher`. It is the + realization of the `O(k)`-per-step `grow_n` foreshadowed in the + reservoir-sampling section, and is competitive with Algorithm R + while remaining deterministically reproducible from `key` alone. + (Algorithm L's geometric-skip stays faster when *all* you need is + the sample; the choose-k variant pays for being history-independent.) +* **Shrinking cluster / load-shedding**: use + `ConsistentChooseKFastHasher`. +* **Single point lookup** (one `(n, k)` per key, no follow-up): + stick with the stateless `ConsistentChooseKHasher`. + ## N-Choose-K replication We define the consistent `n-choose-k` replication as follows: