Skip to content

Commit

Permalink
⚡ faster parallel searchsorted (#15)
Browse files Browse the repository at this point in the history
  • Loading branch information
jvdd authored Jan 17, 2023
1 parent 108c557 commit 1b08886
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 72 deletions.
55 changes: 27 additions & 28 deletions downsample_rs/src/m4/generic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use ndarray::{s, Array1, ArrayView1};

use rayon::iter::IndexedParallelIterator;
use rayon::prelude::*;
use std::sync::{Arc, Mutex};

// --------------------- WITHOUT X

Expand Down Expand Up @@ -134,7 +133,7 @@ pub(crate) fn m4_generic_with_x<T: Copy>(
#[inline(always)]
pub(crate) fn m4_generic_with_x_parallel<T: Copy + PartialOrd + Send + Sync>(
arr: ArrayView1<T>,
bin_idx_iterator: impl IndexedParallelIterator<Item = (usize, usize)>,
bin_idx_iterator: impl IndexedParallelIterator<Item = impl Iterator<Item = (usize, usize)>>,
n_out: usize,
f_argminmax: fn(ArrayView1<T>) -> (usize, usize),
) -> Array1<usize> {
Expand All @@ -143,30 +142,30 @@ pub(crate) fn m4_generic_with_x_parallel<T: Copy + PartialOrd + Send + Sync>(
return Array1::from((0..arr.len()).collect::<Vec<usize>>());
}

let sampled_indices = Arc::new(Mutex::new(Array1::<usize>::default(n_out)));

// Iterate over the sample_index pointers and the array chunks
bin_idx_iterator
.enumerate()
.for_each(|(i, (start_idx, end_idx))| {
let (min_index, max_index) = f_argminmax(arr.slice(s![start_idx..end_idx]));

sampled_indices.lock().unwrap()[4 * i] = start_idx;

// Add the indexes in sorted order
if min_index < max_index {
sampled_indices.lock().unwrap()[4 * i + 1] = min_index + start_idx;
sampled_indices.lock().unwrap()[4 * i + 2] = max_index + start_idx;
} else {
sampled_indices.lock().unwrap()[4 * i + 1] = max_index + start_idx;
sampled_indices.lock().unwrap()[4 * i + 2] = min_index + start_idx;
}
sampled_indices.lock().unwrap()[4 * i + 3] = end_idx - 1;
});

// Remove the mutex and return the sampled indices
Arc::try_unwrap(sampled_indices)
.unwrap()
.into_inner()
.unwrap()
Array1::from_vec(
bin_idx_iterator
.flat_map(|bin_idx_iterator| {
bin_idx_iterator
.map(|(start, end)| {
let step = unsafe {
ArrayView1::from_shape_ptr(end - start, arr.as_ptr().add(start))
};
let (min_index, max_index) = f_argminmax(step);

// Add the indexes in sorted order
let mut sampled_index = [start, 0, 0, end - 1];
if min_index < max_index {
sampled_index[1] = min_index + start;
sampled_index[2] = max_index + start;
} else {
sampled_index[1] = max_index + start;
sampled_index[2] = min_index + start;
}
sampled_index
})
.collect::<Vec<[usize; 4]>>()
})
.flatten()
.collect::<Vec<usize>>(),
)
}
51 changes: 27 additions & 24 deletions downsample_rs/src/minmax/generic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use ndarray::{s, Array1, ArrayView1};

use rayon::iter::IndexedParallelIterator;
use rayon::prelude::*;
use std::sync::{Arc, Mutex};

// --------------------- WITHOUT X

Expand Down Expand Up @@ -123,7 +122,7 @@ pub(crate) fn min_max_generic_with_x<T: Copy>(
#[inline(always)]
pub(crate) fn min_max_generic_with_x_parallel<T: Copy + Send + Sync>(
arr: ArrayView1<T>,
bin_idx_iterator: impl IndexedParallelIterator<Item = (usize, usize)>,
bin_idx_iterator: impl IndexedParallelIterator<Item = impl Iterator<Item = (usize, usize)>>,
n_out: usize,
f_argminmax: fn(ArrayView1<T>) -> (usize, usize),
) -> Array1<usize> {
Expand All @@ -132,26 +131,30 @@ pub(crate) fn min_max_generic_with_x_parallel<T: Copy + Send + Sync>(
return Array1::from((0..arr.len()).collect::<Vec<usize>>());
}

// Create a mutex to store the sampled indices
let sampled_indices = Arc::new(Mutex::new(Array1::<usize>::default(n_out)));

// Iterate over the bins
bin_idx_iterator.enumerate().for_each(|(i, (start, end))| {
let (min_index, max_index) = f_argminmax(arr.slice(s![start..end]));

// Add the indexes in sorted order
if min_index < max_index {
sampled_indices.lock().unwrap()[2 * i] = min_index + start;
sampled_indices.lock().unwrap()[2 * i + 1] = max_index + start;
} else {
sampled_indices.lock().unwrap()[2 * i] = max_index + start;
sampled_indices.lock().unwrap()[2 * i + 1] = min_index + start;
}
});

// Remove the mutex and return the sampled indices
Arc::try_unwrap(sampled_indices)
.unwrap()
.into_inner()
.unwrap()
Array1::from_vec(
bin_idx_iterator
.flat_map(|bin_idx_iterator| {
bin_idx_iterator
.map(|(start, end)| {
let step = unsafe {
ArrayView1::from_shape_ptr(end - start, arr.as_ptr().add(start))
};
let (min_index, max_index) = f_argminmax(step);

// Add the indexes in sorted order
let mut sampled_index = [0, 0];
if min_index < max_index {
sampled_index[0] = min_index + start;
sampled_index[1] = max_index + start;
} else {
sampled_index[0] = max_index + start;
sampled_index[1] = min_index + start;
}
sampled_index
})
.collect::<Vec<[usize; 2]>>()
})
.flatten()
.collect::<Vec<usize>>(),
)
}
2 changes: 1 addition & 1 deletion downsample_rs/src/minmax/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ where
Tx: Num + FromPrimitive + AsPrimitive<f64> + Send + Sync,
Ty: Copy + PartialOrd + Send + Sync,
{
assert_eq!(n_out % 2, 0);
assert_eq!(n_out % 2, 0); // TODO can be faster (check last bit)
let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 2);
min_max_generic_with_x_parallel(arr, bin_idx_iterator, n_out, |arr| arr.argminmax())
}
Expand Down
67 changes: 50 additions & 17 deletions downsample_rs/src/searchsorted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,20 @@ use ndarray::ArrayView1;

use rayon::iter::IndexedParallelIterator;
use rayon::prelude::*;
use std::thread::available_parallelism;

use super::types::Num;
use num_traits::{AsPrimitive, FromPrimitive};

// ---------------------- Binary search ----------------------

// #[inline(always)]
fn binary_search<T: PartialOrd>(arr: ArrayView1<T>, value: T, left: usize, right: usize) -> usize {
fn binary_search<T: Copy + PartialOrd>(
arr: ArrayView1<T>,
value: T,
left: usize,
right: usize,
) -> usize {
let mut size: usize = right - left;
let mut left: usize = left;
let mut right: usize = right;
Expand All @@ -27,7 +33,7 @@ fn binary_search<T: PartialOrd>(arr: ArrayView1<T>, value: T, left: usize, right
}

// #[inline(always)]
fn binary_search_with_mid<T: PartialOrd>(
fn binary_search_with_mid<T: Copy + PartialOrd>(
arr: ArrayView1<T>,
value: T,
left: usize,
Expand Down Expand Up @@ -69,17 +75,17 @@ where
(arr[arr.len() - 1].as_() / nb_bins as f64) - (arr[0].as_() / nb_bins as f64);
let idx_step: usize = arr.len() / nb_bins; // used to pre-guess the mid index
let mut value: f64 = arr[0].as_(); // Search value
let mut idx = 0; // Index of the search value
let mut idx: usize = 0; // Index of the search value
(0..nb_bins).map(move |_| {
let start_idx = idx; // Start index of the bin (previous end index)
let start_idx: usize = idx; // Start index of the bin (previous end index)
value += val_step;
let mid = idx + idx_step;
let mid: usize = idx + idx_step;
let mid = if mid < arr.len() - 1 {
mid
} else {
arr.len() - 2 // TODO: arr.len() - 1 gives error I thought...
};
let search_value = T::from_f64(value).unwrap();
let search_value: T = T::from_f64(value).unwrap();
// Implementation WITHOUT pre-guessing mid is slower!!
// idx = binary_search(arr, search_value, idx, arr.len()-1);
idx = binary_search_with_mid(arr, search_value, idx, arr.len() - 1, mid); // End index of the bin
Expand All @@ -102,7 +108,7 @@ fn sequential_add_mul(start_val: f64, add_val: f64, mul: usize) -> f64 {
pub(crate) fn get_equidistant_bin_idx_iterator_parallel<T>(
arr: ArrayView1<T>,
nb_bins: usize,
) -> impl IndexedParallelIterator<Item = (usize, usize)> + '_
) -> impl IndexedParallelIterator<Item = impl Iterator<Item = (usize, usize)> + '_> + '_
where
T: Num + FromPrimitive + AsPrimitive<f64> + Sync + Send,
{
Expand All @@ -111,14 +117,35 @@ where
let val_step: f64 =
(arr[arr.len() - 1].as_() / nb_bins as f64) - (arr[0].as_() / nb_bins as f64);
let arr0: f64 = arr[0].as_();
(0..nb_bins).into_par_iter().map(move |i| {
let start_value = sequential_add_mul(arr0, val_step, i);
let end_value = start_value + val_step;
let start_value = T::from_f64(start_value).unwrap();
let end_value = T::from_f64(end_value).unwrap();
let start_idx = binary_search(arr, start_value, 0, arr.len() - 1);
let end_idx = binary_search(arr, end_value, start_idx, arr.len() - 1);
(start_idx, end_idx)
let nb_threads = available_parallelism().map(|x| x.get()).unwrap_or(1);
let nb_threads = if nb_threads > nb_bins {
nb_bins
} else {
nb_threads
};
let nb_bins_per_thread = nb_bins / nb_threads;
let nb_bins_last_thread = nb_bins - nb_bins_per_thread * (nb_threads - 1);
// Iterate over the number of threads
// -> for each thread perform the binary search sorted with moving left and
// yield the indices (using the same idea as for the sequential version)
(0..nb_threads).into_par_iter().map(move |i| {
// Search the start of the fist bin o(f the thread)
let mut value: f64 = sequential_add_mul(arr0, val_step, i * nb_bins_per_thread); // Search value
let start_value: T = T::from_f64(value).unwrap();
let mut idx: usize = binary_search(arr, start_value, 0, arr.len() - 1); // Index of the search value
let nb_bins_thread = if i == nb_threads - 1 {
nb_bins_last_thread
} else {
nb_bins_per_thread
};
// Perform sequential binary search for the end of the bins (of the thread)
(0..nb_bins_thread).map(move |_| {
let start_idx: usize = idx; // Start index of the bin (previous end index)
value += val_step;
let search_value: T = T::from_f64(value).unwrap();
idx = binary_search(arr, search_value, idx, arr.len() - 1); // End index of the bin
(start_idx, idx)
})
})
}

Expand Down Expand Up @@ -207,7 +234,10 @@ mod tests {
let bin_idxs = bin_idxs_iter.map(|x| x.0).collect::<Vec<usize>>();
assert_eq!(bin_idxs, vec![0, 3, 6]);
let bin_idxs_iter = get_equidistant_bin_idx_iterator_parallel(arr.view(), 3);
let bin_idxs = bin_idxs_iter.map(|x| x.0).collect::<Vec<usize>>();
let bin_idxs = bin_idxs_iter
.map(|x| x.map(|x| x.0).collect::<Vec<usize>>())
.flatten()
.collect::<Vec<usize>>();
assert_eq!(bin_idxs, vec![0, 3, 6]);
}

Expand All @@ -225,7 +255,10 @@ mod tests {
let bin_idxs_iter = get_equidistant_bin_idx_iterator(arr.view(), nb_bins);
let bin_idxs = bin_idxs_iter.map(|x| x.0).collect::<Vec<usize>>();
let bin_idxs_iter = get_equidistant_bin_idx_iterator_parallel(arr.view(), nb_bins);
let bin_idxs_parallel = bin_idxs_iter.map(|x| x.0).collect::<Vec<usize>>();
let bin_idxs_parallel = bin_idxs_iter
.map(|x| x.map(|x| x.0).collect::<Vec<usize>>())
.flatten()
.collect::<Vec<usize>>();
assert_eq!(bin_idxs, bin_idxs_parallel);
}
}
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "maturin"
[project]
name = "tsdownsample"
description = "Extremely fast time series downsampling in Rust"
version = "0.1.0a7"
version = "0.1.0"
requires-python = ">=3.7"
dependencies = ["numpy"]
authors = [{name = "Jeroen Van Der Donckt"}]
Expand Down
2 changes: 1 addition & 1 deletion tsdownsample/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
MinMaxLTTBDownsampler,
)

__version__ = "0.1.0a7"
__version__ = "0.1.0"
__author__ = "Jeroen Van Der Donckt"

__all__ = [
Expand Down

0 comments on commit 1b08886

Please sign in to comment.