Skip to content

Commit

Permalink
Increase performance of stereo_to_mono fn ~4x
Browse files Browse the repository at this point in the history
Benches show average execution time
going from ~650ns to ~175ns,
 with possible reductions further to 160ns
if some accuracy loss is acceptable, by using bitshifts instead.
Given the already extreme gains,
an extra 15ns isn't worth it for being up to +-2 off in the final answer
  • Loading branch information
tazz4843 committed Sep 16, 2024
1 parent f6e41db commit 8f9f2fb
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 12 deletions.
2 changes: 1 addition & 1 deletion scripty_stt/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#![feature(slice_as_chunks)]

#![feature(portable_simd)]
#[macro_use]
extern crate tracing;

Expand Down
60 changes: 49 additions & 11 deletions scripty_stt/src/process_audio.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::simd::i16x64;

use dasp_interpolate::linear::Linear;
use dasp_signal::{from_iter, interpolate::Converter, Signal};

Expand Down Expand Up @@ -30,15 +32,55 @@ pub fn process_audio(
};

if channel_count == 2 {
stereo_to_mono(&src)
stereo_to_mono_simd(&src)
} else if channel_count != 1 {
panic!("Invalid channel count: {}", channel_count)
} else {
src
}
}

pub fn stereo_to_mono(src: &[i16]) -> Vec<i16> {
type SimdBitType = i16x64;

const BIT_WIDTH: usize = SimdBitType::LEN;
const TWICE_BIT_WIDTH: usize = BIT_WIDTH * 2;

// noinspection RsAssertEqual
const _: () = assert!(BIT_WIDTH % 2 == 0);
pub fn stereo_to_mono_simd(samples: &[i16]) -> Vec<i16> {
let mut mono = Vec::with_capacity(samples.len() / 2);

let div = SimdBitType::splat(2);

let (chunks, remainder) = samples.as_chunks::<TWICE_BIT_WIDTH>();
for chunk in chunks {
let mut c1 = [0; BIT_WIDTH];
let mut c2 = [0; BIT_WIDTH];
let (chunks, &[]) = chunk.as_chunks::<2>() else {
unreachable!(
"Remainder array should always be empty if taking chunks of size 2 from an array \
whose length is divisible by 2"
)
};
assert_eq!(chunks.len(), BIT_WIDTH);

for (i, [lhs, rhs]) in chunks.iter().enumerate() {
c1[i] = *lhs;
c2[i] = *rhs;
}

let c1 = SimdBitType::from_array(c1);
let c2 = SimdBitType::from_array(c2);
let mono_simd = (c1 / div) + (c2 / div);
mono.extend(&mono_simd.to_array()[..]);
}

mono.extend(stereo_to_mono_normal(remainder));

mono
}

pub fn stereo_to_mono_normal(src: &[i16]) -> Vec<i16> {
// note: we're not doing this the normal way, because in release mode, there are no arithmetic overflow checks
// so we divide the samples by two, and then add them together to get the mono sample
// this causes a mild distortion, but it's not noticeable (since it only affects the LSB)
Expand All @@ -48,13 +90,9 @@ pub fn stereo_to_mono(src: &[i16]) -> Vec<i16> {
trace!("input does not have an even number of samples, ignoring extra samples");
}

let mut dst = Vec::with_capacity(src.len() / 2);
for sample_pair in chunks.0 {
// SAFETY: the length of the chunk is defined at compile time, so we can safely index into it up to two elements
let s1 = unsafe { sample_pair.get_unchecked(0) };
let s2 = unsafe { sample_pair.get_unchecked(1) };
// see the notes above for why we're doing it this way
dst.push((s1 / 2) + (s2 / 2));
}
dst
chunks
.0
.iter()
.map(|[first, last]| (first / 2) + (last / 2))
.collect()
}

0 comments on commit 8f9f2fb

Please sign in to comment.