Skip to content

Commit 4c89a72

Browse files
authored
Improve dithering CPU usage (#866)
1 parent b125659 commit 4c89a72

4 files changed

Lines changed: 40 additions & 12 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
### Changed
1111
- Include build profile in the displayed version information
12+
- [playback] Improve dithering CPU usage by about 33%
1213

1314
### Fixed
1415
- [connect] Partly fix behavior after last track of an album/playlist

Cargo.lock

Lines changed: 12 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

playback/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ ogg = "0.8"
4949
# Dithering
5050
rand = "0.8"
5151
rand_distr = "0.4"
52+
rand_xoshiro = "0.6"
5253

5354
[features]
5455
alsa-backend = ["alsa"]

playback/src/dither.rs

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use rand::rngs::ThreadRng;
1+
use rand::SeedableRng;
22
use rand_distr::{Distribution, Normal, Triangular, Uniform};
33
use std::fmt;
44

@@ -41,20 +41,36 @@ impl fmt::Display for dyn Ditherer {
4141
}
4242
}
4343

44-
// Implementation note: we save the handle to ThreadRng so it doesn't require
45-
// a lookup on each call (which is on each sample!). This is ~2.5x as fast.
46-
// Downside is that it is not Send so we cannot move it around player threads.
44+
// `SmallRng` is 33% faster than `ThreadRng`, but we can do even better.
45+
// `SmallRng` defaults to `Xoshiro256PlusPlus` on 64-bit platforms and
46+
// `Xoshiro128PlusPlus` on 32-bit platforms. These are excellent for the
47+
// general case. In our case of just 64-bit floating points, we can make
48+
// some optimizations. Compared to `SmallRng`, these hand-picked generators
49+
// improve performance by another 9% on 64-bit platforms and 2% on 32-bit
50+
// platforms.
4751
//
52+
// For reference, see https://prng.di.unimi.it. Note that we do not use
53+
// `Xoroshiro128Plus` or `Xoshiro128Plus` because they display low linear
54+
// complexity in the lower four bits, which is not what we want:
55+
// linearization is the very point of dithering.
56+
#[cfg(target_pointer_width = "64")]
57+
type Rng = rand_xoshiro::Xoshiro256Plus;
58+
#[cfg(not(target_pointer_width = "64"))]
59+
type Rng = rand_xoshiro::Xoshiro128StarStar;
60+
61+
fn create_rng() -> Rng {
62+
Rng::from_entropy()
63+
}
4864

4965
pub struct TriangularDitherer {
50-
cached_rng: ThreadRng,
66+
cached_rng: Rng,
5167
distribution: Triangular<f64>,
5268
}
5369

5470
impl Ditherer for TriangularDitherer {
5571
fn new() -> Self {
5672
Self {
57-
cached_rng: rand::thread_rng(),
73+
cached_rng: create_rng(),
5874
// 2 LSB peak-to-peak needed to linearize the response:
5975
distribution: Triangular::new(-1.0, 1.0, 0.0).unwrap(),
6076
}
@@ -74,14 +90,14 @@ impl TriangularDitherer {
7490
}
7591

7692
pub struct GaussianDitherer {
77-
cached_rng: ThreadRng,
93+
cached_rng: Rng,
7894
distribution: Normal<f64>,
7995
}
8096

8197
impl Ditherer for GaussianDitherer {
8298
fn new() -> Self {
8399
Self {
84-
cached_rng: rand::thread_rng(),
100+
cached_rng: create_rng(),
85101
// 1/2 LSB RMS needed to linearize the response:
86102
distribution: Normal::new(0.0, 0.5).unwrap(),
87103
}
@@ -103,7 +119,7 @@ impl GaussianDitherer {
103119
pub struct HighPassDitherer {
104120
active_channel: usize,
105121
previous_noises: [f64; NUM_CHANNELS],
106-
cached_rng: ThreadRng,
122+
cached_rng: Rng,
107123
distribution: Uniform<f64>,
108124
}
109125

@@ -112,7 +128,7 @@ impl Ditherer for HighPassDitherer {
112128
Self {
113129
active_channel: 0,
114130
previous_noises: [0.0; NUM_CHANNELS],
115-
cached_rng: rand::thread_rng(),
131+
cached_rng: create_rng(),
116132
distribution: Uniform::new_inclusive(-0.5, 0.5), // 1 LSB +/- 1 LSB (previous) = 2 LSB
117133
}
118134
}

0 commit comments

Comments
 (0)