Skip to content

Commit 616809b

Browse files
authored
Quantum-realm level normalisation optimization (#965)
This saves up to 1-2% CPU useage on a PI 4 depending on how much normalisation is actually being done. * We don't need to test against EPSILON. The factor will never be over 1.0 in basic normalisation mode. * Don't check the normalisation mode EVERY sample. * Do as little math as possible by simplfiying all equations as much as possible (while still retaining the textbook equations in comments). * Misc cleanup
1 parent cb194cf commit 616809b

3 files changed

Lines changed: 98 additions & 99 deletions

File tree

playback/src/convert.rs

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,15 @@ pub struct Converter {
2323

2424
impl Converter {
2525
pub fn new(dither_config: Option<DithererBuilder>) -> Self {
26-
if let Some(ref ditherer_builder) = dither_config {
27-
let ditherer = (ditherer_builder)();
28-
info!("Converting with ditherer: {}", ditherer.name());
29-
Self {
30-
ditherer: Some(ditherer),
26+
match dither_config {
27+
Some(ditherer_builder) => {
28+
let ditherer = (ditherer_builder)();
29+
info!("Converting with ditherer: {}", ditherer.name());
30+
Self {
31+
ditherer: Some(ditherer),
32+
}
3133
}
32-
} else {
33-
Self { ditherer: None }
34+
None => Self { ditherer: None },
3435
}
3536
}
3637

@@ -52,18 +53,15 @@ impl Converter {
5253
const SCALE_S16: f64 = 32768.;
5354

5455
pub fn scale(&mut self, sample: f64, factor: f64) -> f64 {
55-
let dither = match self.ditherer {
56-
Some(ref mut d) => d.noise(),
57-
None => 0.0,
58-
};
59-
6056
// From the many float to int conversion methods available, match what
6157
// the reference Vorbis implementation uses: sample * 32768 (for 16 bit)
62-
let int_value = sample * factor + dither;
6358

6459
// Casting float to integer rounds towards zero by default, i.e. it
6560
// truncates, and that generates larger error than rounding to nearest.
66-
int_value.round()
61+
match self.ditherer.as_mut() {
62+
Some(d) => (sample * factor + d.noise()).round(),
63+
None => (sample * factor).round(),
64+
}
6765
}
6866

6967
// Special case for samples packed in a word of greater bit depth (e.g.
@@ -79,11 +77,12 @@ impl Converter {
7977
let max = factor - 1.0;
8078

8179
if int_value < min {
82-
return min;
80+
min
8381
} else if int_value > max {
84-
return max;
82+
max
83+
} else {
84+
int_value
8585
}
86-
int_value
8786
}
8887

8988
pub fn f64_to_f32(&mut self, samples: &[f64]) -> Vec<f32> {
@@ -109,12 +108,7 @@ impl Converter {
109108
pub fn f64_to_s24_3(&mut self, samples: &[f64]) -> Vec<i24> {
110109
samples
111110
.iter()
112-
.map(|sample| {
113-
// Not as DRY as calling f32_to_s24 first, but this saves iterating
114-
// over all samples twice.
115-
let int_value = self.clamping_scale(*sample, Self::SCALE_S24) as i32;
116-
i24::from_s24(int_value)
117-
})
111+
.map(|sample| i24::from_s24(self.clamping_scale(*sample, Self::SCALE_S24) as i32))
118112
.collect()
119113
}
120114

playback/src/dither.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use rand::SeedableRng;
33
use rand_distr::{Distribution, Normal, Triangular, Uniform};
44
use std::fmt;
55

6-
const NUM_CHANNELS: usize = 2;
6+
use crate::NUM_CHANNELS;
77

88
// Dithering lowers digital-to-analog conversion ("requantization") error,
99
// linearizing output, lowering distortion and replacing it with a constant,
@@ -102,7 +102,7 @@ impl GaussianDitherer {
102102

103103
pub struct HighPassDitherer {
104104
active_channel: usize,
105-
previous_noises: [f64; NUM_CHANNELS],
105+
previous_noises: [f64; NUM_CHANNELS as usize],
106106
cached_rng: SmallRng,
107107
distribution: Uniform<f64>,
108108
}
@@ -111,7 +111,7 @@ impl Ditherer for HighPassDitherer {
111111
fn new() -> Self {
112112
Self {
113113
active_channel: 0,
114-
previous_noises: [0.0; NUM_CHANNELS],
114+
previous_noises: [0.0; NUM_CHANNELS as usize],
115115
cached_rng: create_rng(),
116116
distribution: Uniform::new_inclusive(-0.5, 0.5), // 1 LSB +/- 1 LSB (previous) = 2 LSB
117117
}

playback/src/player.rs

Lines changed: 78 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -760,7 +760,16 @@ impl PlayerTrackLoader {
760760
position_ms: u32,
761761
) -> Option<PlayerLoadedTrackData> {
762762
let audio = match AudioItem::get_audio_item(&self.session, spotify_id).await {
763-
Ok(audio) => audio,
763+
Ok(audio) => match self.find_available_alternative(audio).await {
764+
Some(audio) => audio,
765+
None => {
766+
warn!(
767+
"<{}> is not available",
768+
spotify_id.to_uri().unwrap_or_default()
769+
);
770+
return None;
771+
}
772+
},
764773
Err(e) => {
765774
error!("Unable to load audio item: {:?}", e);
766775
return None;
@@ -769,17 +778,6 @@ impl PlayerTrackLoader {
769778

770779
info!("Loading <{}> with Spotify URI <{}>", audio.name, audio.uri);
771780

772-
let audio = match self.find_available_alternative(audio).await {
773-
Some(audio) => audio,
774-
None => {
775-
warn!(
776-
"<{}> is not available",
777-
spotify_id.to_uri().unwrap_or_default()
778-
);
779-
return None;
780-
}
781-
};
782-
783781
if audio.duration < 0 {
784782
error!(
785783
"Track duration for <{}> cannot be {}",
@@ -809,26 +807,24 @@ impl PlayerTrackLoader {
809807
],
810808
};
811809

812-
let entry = formats.iter().find_map(|format| {
813-
if let Some(&file_id) = audio.files.get(format) {
814-
Some((*format, file_id))
815-
} else {
816-
None
817-
}
818-
});
819-
820-
let (format, file_id) = match entry {
821-
Some(t) => t,
822-
None => {
823-
warn!("<{}> is not available in any supported format", audio.name);
824-
return None;
825-
}
826-
};
810+
let (format, file_id) =
811+
match formats
812+
.iter()
813+
.find_map(|format| match audio.files.get(format) {
814+
Some(&file_id) => Some((*format, file_id)),
815+
_ => None,
816+
}) {
817+
Some(t) => t,
818+
None => {
819+
warn!("<{}> is not available in any supported format", audio.name);
820+
return None;
821+
}
822+
};
827823

828824
let bytes_per_second = self.stream_data_rate(format);
829825
let play_from_beginning = position_ms == 0;
830826

831-
// This is only a loop to be able to reload the file if an error occured
827+
// This is only a loop to be able to reload the file if an error occurred
832828
// while opening a cached file.
833829
loop {
834830
let encrypted_file = AudioFile::open(
@@ -1321,25 +1317,30 @@ impl PlayerInternal {
13211317
// For the basic normalisation method, a normalisation factor of 1.0 indicates that
13221318
// there is nothing to normalise (all samples should pass unaltered). For the
13231319
// dynamic method, there may still be peaks that we want to shave off.
1324-
if self.config.normalisation
1325-
&& !(f64::abs(normalisation_factor - 1.0) <= f64::EPSILON
1326-
&& self.config.normalisation_method == NormalisationMethod::Basic)
1327-
{
1328-
// zero-cost shorthands
1329-
let threshold_db = self.config.normalisation_threshold_dbfs;
1330-
let knee_db = self.config.normalisation_knee_db;
1331-
let attack_cf = self.config.normalisation_attack_cf;
1332-
let release_cf = self.config.normalisation_release_cf;
1333-
1334-
for sample in data.iter_mut() {
1335-
*sample *= normalisation_factor; // for both the basic and dynamic limiter
1336-
1337-
// Feedforward limiter in the log domain
1338-
// After: Giannoulis, D., Massberg, M., & Reiss, J.D. (2012). Digital Dynamic
1339-
// Range Compressor Design—A Tutorial and Analysis. Journal of The Audio
1340-
// Engineering Society, 60, 399-408.
1341-
if self.config.normalisation_method == NormalisationMethod::Dynamic
1342-
{
1320+
if self.config.normalisation {
1321+
if self.config.normalisation_method == NormalisationMethod::Basic
1322+
&& normalisation_factor < 1.0
1323+
{
1324+
for sample in data.iter_mut() {
1325+
*sample *= normalisation_factor;
1326+
}
1327+
} else if self.config.normalisation_method
1328+
== NormalisationMethod::Dynamic
1329+
{
1330+
// zero-cost shorthands
1331+
let threshold_db = self.config.normalisation_threshold_dbfs;
1332+
let knee_db = self.config.normalisation_knee_db;
1333+
let attack_cf = self.config.normalisation_attack_cf;
1334+
let release_cf = self.config.normalisation_release_cf;
1335+
1336+
for sample in data.iter_mut() {
1337+
*sample *= normalisation_factor;
1338+
1339+
// Feedforward limiter in the log domain
1340+
// After: Giannoulis, D., Massberg, M., & Reiss, J.D. (2012). Digital Dynamic
1341+
// Range Compressor Design—A Tutorial and Analysis. Journal of The Audio
1342+
// Engineering Society, 60, 399-408.
1343+
13431344
// Some tracks have samples that are precisely 0.0. That's silence
13441345
// and we know we don't need to limit that, in which we can spare
13451346
// the CPU cycles.
@@ -1348,22 +1349,26 @@ impl PlayerInternal {
13481349
// peak detector stuck. Also catch the unlikely case where a sample
13491350
// is decoded as `NaN` or some other non-normal value.
13501351
let limiter_db = if sample.is_normal() {
1351-
// step 1-2: half-wave rectification and conversion into dB
1352-
let abs_sample_db = ratio_to_db(sample.abs());
1353-
1354-
// step 3-4: gain computer with soft knee and subtractor
1355-
let bias_db = abs_sample_db - threshold_db;
1352+
// step 1-4: half-wave rectification and conversion into dB
1353+
// and gain computer with soft knee and subtractor
1354+
let bias_db = ratio_to_db(sample.abs()) - threshold_db;
13561355
let knee_boundary_db = bias_db * 2.0;
13571356

13581357
if knee_boundary_db < -knee_db {
13591358
0.0
13601359
} else if knee_boundary_db.abs() <= knee_db {
1361-
abs_sample_db
1362-
- (abs_sample_db
1363-
- (bias_db + knee_db / 2.0).powi(2)
1364-
/ (2.0 * knee_db))
1360+
// The textbook equation:
1361+
// ratio_to_db(sample.abs()) - (ratio_to_db(sample.abs()) - (bias_db + knee_db / 2.0).powi(2) / (2.0 * knee_db))
1362+
// Simplifies to:
1363+
// ((2.0 * bias_db) + knee_db).powi(2) / (8.0 * knee_db)
1364+
// Which in our case further simplifies to:
1365+
// (knee_boundary_db + knee_db).powi(2) / (8.0 * knee_db)
1366+
// because knee_boundary_db is 2.0 * bias_db.
1367+
(knee_boundary_db + knee_db).powi(2) / (8.0 * knee_db)
13651368
} else {
1366-
abs_sample_db - threshold_db
1369+
// Textbook:
1370+
// ratio_to_db(sample.abs()) - threshold_db, which is already our bias_db.
1371+
bias_db
13671372
}
13681373
} else {
13691374
0.0
@@ -1377,14 +1382,24 @@ impl PlayerInternal {
13771382
|| self.normalisation_peak > 0.0
13781383
{
13791384
// step 5: smooth, decoupled peak detector
1385+
// Textbook:
1386+
// release_cf * self.normalisation_integrator + (1.0 - release_cf) * limiter_db
1387+
// Simplifies to:
1388+
// release_cf * self.normalisation_integrator - release_cf * limiter_db + limiter_db
13801389
self.normalisation_integrator = f64::max(
13811390
limiter_db,
13821391
release_cf * self.normalisation_integrator
1383-
+ (1.0 - release_cf) * limiter_db,
1392+
- release_cf * limiter_db
1393+
+ limiter_db,
13841394
);
1395+
// Textbook:
1396+
// attack_cf * self.normalisation_peak + (1.0 - attack_cf) * self.normalisation_integrator
1397+
// Simplifies to:
1398+
// attack_cf * self.normalisation_peak - attack_cf * self.normalisation_integrator + self.normalisation_integrator
13851399
self.normalisation_peak = attack_cf
13861400
* self.normalisation_peak
1387-
+ (1.0 - attack_cf) * self.normalisation_integrator;
1401+
- attack_cf * self.normalisation_integrator
1402+
+ self.normalisation_integrator;
13881403

13891404
// step 6: make-up gain applied later (volume attenuation)
13901405
// Applying the standard normalisation factor here won't work,
@@ -1897,15 +1912,8 @@ impl PlayerInternal {
18971912
}
18981913

18991914
fn send_event(&mut self, event: PlayerEvent) {
1900-
let mut index = 0;
1901-
while index < self.event_senders.len() {
1902-
match self.event_senders[index].send(event.clone()) {
1903-
Ok(_) => index += 1,
1904-
Err(_) => {
1905-
self.event_senders.remove(index);
1906-
}
1907-
}
1908-
}
1915+
self.event_senders
1916+
.retain(|sender| sender.send(event.clone()).is_ok());
19091917
}
19101918

19111919
fn load_track(
@@ -2079,10 +2087,7 @@ impl<T: Read + Seek> Seek for Subfile<T> {
20792087
};
20802088

20812089
let newpos = self.stream.seek(pos)?;
2082-
if newpos > self.offset {
2083-
Ok(newpos - self.offset)
2084-
} else {
2085-
Ok(0)
2086-
}
2090+
2091+
Ok(newpos.saturating_sub(self.offset))
20872092
}
20882093
}

0 commit comments

Comments
 (0)