diff --git a/src/aggregation/aggregators.rs b/src/aggregation/aggregators.rs index f5c2413..e6ee845 100644 --- a/src/aggregation/aggregators.rs +++ b/src/aggregation/aggregators.rs @@ -1,6 +1,7 @@ use crate::ms::frames::TimsPeak; -use crate::space::space_generics::HasIntensity; +use crate::space::space_generics::{HasIntensity, IntenseAtIndex}; use crate::utils; +use std::ops::Index; use rayon::prelude::*; @@ -80,13 +81,14 @@ impl ClusterAggregator for TimsPeakAggregator { pub fn aggregate_clusters< T: Send + Clone + Copy, + RE: Index + Sync + Send + ?Sized, G: Sync + Send + ClusterAggregator, R: Send, F: Fn() -> G + Send + Sync, >( tot_clusters: u64, cluster_labels: Vec>, - elements: &[T], + elements: &RE, def_aggregator: &F, log_level: utils::LogLevel, keep_unclustered: bool, @@ -123,13 +125,14 @@ pub fn aggregate_clusters< fn parallel_aggregate_clusters< T: Send + Clone + Copy, + RE: Index + Sync + Send + ?Sized, G: Sync + Send + ClusterAggregator, R: Send, F: Fn() -> G + Send + Sync, >( tot_clusters: u64, cluster_labels: Vec>, - elements: &[T], + elements: &RE, def_aggregator: &F, log_level: utils::LogLevel, keep_unclustered: bool, @@ -211,13 +214,14 @@ fn parallel_aggregate_clusters< fn serial_aggregate_clusters< T: Send + Clone + Copy, + RE: Index + Sync + Send + ?Sized, G: Sync + Send + ClusterAggregator, R: Send, F: Fn() -> G + Send + Sync, >( tot_clusters: u64, cluster_labels: Vec>, - elements: &[T], + elements: &RE, def_aggregator: &F, keep_unclustered: bool, ) -> Vec { diff --git a/src/aggregation/dbscan/dbscan.rs b/src/aggregation/dbscan/dbscan.rs index fe5eab9..8ab3eb9 100644 --- a/src/aggregation/dbscan/dbscan.rs +++ b/src/aggregation/dbscan/dbscan.rs @@ -1,13 +1,13 @@ use crate::aggregation::aggregators::{aggregate_clusters, ClusterAggregator, ClusterLabel}; use crate::space::kdtree::RadiusKDTree; use crate::space::space_generics::{ - convert_to_bounds_query, DistantAtIndex, HasIntensity, IntenseAtIndex, NDPointConverter, - QueriableIndexedPoints, + convert_to_bounds_query, AsNDPointsAtIndex, DistantAtIndex, HasIntensity, IntenseAtIndex, + NDPoint, NDPointConverter, QueriableIndexedPoints, }; -use crate::utils; +use crate::utils::{self, ContextTimer}; use log::{debug, info, trace}; use rayon::prelude::*; -use std::ops::Add; +use std::ops::{Add, Index}; use crate::aggregation::dbscan::runner::dbscan_label_clusters; @@ -23,12 +23,13 @@ fn reassign_centroid< I: QueriableIndexedPoints<'a, N, usize> + std::marker::Sync, G: Sync + Send + ClusterAggregator, R: Send, + RE: Send + Sync + Index + ?Sized, F: Fn() -> G + Send + Sync, >( centroids: Vec, indexed_points: &'a I, centroid_converter: C, - elements: &[T], + elements: &RE, def_aggregator: F, log_level: utils::LogLevel, expansion_factors: &[f32; N], @@ -62,18 +63,38 @@ fn reassign_centroid< // TODO: rename prefiltered peaks argument! // TODO implement a version that takes a sparse distance matrix. +impl AsNDPointsAtIndex for Vec> { + fn get_ndpoint( + &self, + index: usize, + ) -> NDPoint { + self[index] + } + + fn num_ndpoints(&self) -> usize { + self.len() + } +} + pub fn dbscan_generic< C: NDPointConverter, C2: NDPointConverter, R: Send, G: Sync + Send + ClusterAggregator, T: HasIntensity + Send + Clone + Copy + Sync, + RE: IntenseAtIndex + + DistantAtIndex + + IntoIterator + + Send + + Sync + + Index + + ?Sized, F: Fn() -> G + Send + Sync, D: Send + Sync, const N: usize, >( converter: C, - prefiltered_peaks: Vec, + prefiltered_peaks: &RE, min_n: usize, min_intensity: u64, def_aggregator: F, @@ -84,7 +105,7 @@ pub fn dbscan_generic< back_converter: Option, ) -> Vec where - Vec: IntenseAtIndex + DistantAtIndex, + ::IntoIter: ExactSizeIterator, { let show_progress = log_level.is_some(); let log_level = match log_level { @@ -94,7 +115,7 @@ where let timer = utils::ContextTimer::new("dbscan_generic", true, log_level); let mut i_timer = timer.start_sub_timer("conversion"); - let (ndpoints, boundary) = converter.convert_vec(&prefiltered_peaks); + let (ndpoints, boundary) = converter.convert_iter(prefiltered_peaks.into_iter()); i_timer.stop(true); let mut i_timer = timer.start_sub_timer("tree"); @@ -106,9 +127,69 @@ where } i_timer.stop(true); + let centroids = dbscan_aggregate( + prefiltered_peaks, + &ndpoints, + &tree, + timer, + min_n, + min_intensity, + def_aggregator, + extra_filter_fun, + log_level, + keep_unclustered, + max_extension_distances, + show_progress, + ); + + match back_converter { + Some(bc) => reassign_centroid( + centroids, + &tree, + bc, + prefiltered_peaks, + &def_aggregator, + log_level, + max_extension_distances, + ), + None => centroids, + } +} + +pub fn dbscan_aggregate< + 'a, + const N: usize, + RE: IntenseAtIndex + + DistantAtIndex + + IntoIterator + + Send + + Sync + + Index + + ?Sized, + IND: QueriableIndexedPoints<'a, N, usize> + std::marker::Sync + Send, + NAI: AsNDPointsAtIndex + std::marker::Sync + Send, + T: HasIntensity + Send + Clone + Copy + Sync, + D: Send + Sync, + G: Sync + Send + ClusterAggregator, + R: Send, + F: Fn() -> G + Send + Sync, +>( + prefiltered_peaks: &RE, + ndpoints: &NAI, + index: &IND, + timer: ContextTimer, + min_n: usize, + min_intensity: u64, + def_aggregator: F, + extra_filter_fun: Option<&(dyn Fn(&D) -> bool + Send + Sync)>, + log_level: utils::LogLevel, + keep_unclustered: bool, + max_extension_distances: &[f32; N], + show_progress: bool, +) -> Vec { let mut i_timer = timer.start_sub_timer("pre-sort"); let mut intensity_sorted_indices = prefiltered_peaks - .iter() + .into_iter() .enumerate() .map(|(i, peak)| (i, peak.intensity())) .collect::>(); @@ -118,9 +199,9 @@ where let mut i_timer = timer.start_sub_timer("dbscan"); let cluster_labels = dbscan_label_clusters( - &tree, - &prefiltered_peaks, - ndpoints.as_slice(), + index, + prefiltered_peaks, + ndpoints, min_n, min_intensity, &intensity_sorted_indices, @@ -133,22 +214,10 @@ where let centroids = aggregate_clusters( cluster_labels.num_clusters, cluster_labels.cluster_labels, - &prefiltered_peaks, + prefiltered_peaks, &def_aggregator, log_level, keep_unclustered, ); - - match back_converter { - Some(bc) => reassign_centroid( - centroids, - &tree, - bc, - &prefiltered_peaks, - &def_aggregator, - log_level, - max_extension_distances, - ), - None => centroids, - } + centroids } diff --git a/src/aggregation/dbscan/denseframe_dbscan.rs b/src/aggregation/dbscan/denseframe_dbscan.rs index 0e31eda..3d5f83f 100644 --- a/src/aggregation/dbscan/denseframe_dbscan.rs +++ b/src/aggregation/dbscan/denseframe_dbscan.rs @@ -47,7 +47,7 @@ pub fn dbscan_denseframe( }; let peak_vec: Vec = dbscan_generic( converter, - prefiltered_peaks, + &prefiltered_peaks, min_n, min_intensity, TimsPeakAggregator::default, diff --git a/src/aggregation/dbscan/runner.rs b/src/aggregation/dbscan/runner.rs index 9bd02e6..b266842 100644 --- a/src/aggregation/dbscan/runner.rs +++ b/src/aggregation/dbscan/runner.rs @@ -721,7 +721,7 @@ pub fn dbscan_label_clusters< T: QueriableIndexedPoints<'a, N, usize> + Send + std::marker::Sync, PE: AsNDPointsAtIndex + Send + Sync + ?Sized, D: Send + Sync, - E: HasIntensity + Send + Sync, + E: Send + Sync, >( indexed_points: &'a T, raw_elements: &'a RE, diff --git a/src/aggregation/ms_denoise.rs b/src/aggregation/ms_denoise.rs index 3cea28f..8d6cc5f 100644 --- a/src/aggregation/ms_denoise.rs +++ b/src/aggregation/ms_denoise.rs @@ -1,13 +1,23 @@ use core::panic; +use std::ops::Index; +use crate::aggregation::dbscan::dbscan::dbscan_aggregate; use crate::aggregation::dbscan::denseframe_dbscan::dbscan_denseframe; +use crate::ms::frames::frames::RawTimsPeak; use crate::ms::frames::Converters; use crate::ms::frames::DenseFrame; use crate::ms::frames::DenseFrameWindow; use crate::ms::frames::FrameSlice; +use crate::ms::frames::TimsPeak; use crate::ms::tdf; use crate::ms::tdf::DIAFrameInfo; +use crate::space::space_generics::AsNDPointsAtIndex; +use crate::space::space_generics::DistantAtIndex; +use crate::space::space_generics::IntenseAtIndex; +use crate::space::space_generics::NDPoint; +use crate::space::space_generics::QueriableIndexedPoints; use crate::utils; +use timsrust::ConvertableIndex; use indicatif::ParallelProgressIterator; use log::{info, trace, warn}; @@ -15,6 +25,11 @@ use rayon::prelude::*; use serde::{Deserialize, Serialize}; use timsrust::Frame; +use super::aggregators::aggregate_clusters; +use super::aggregators::ClusterAggregator; +use super::aggregators::TimsPeakAggregator; +use super::dbscan::runner::dbscan_label_clusters; + // TODO I can probably split the ms1 and ms2 ... #[derive(Debug, Serialize, Deserialize, Clone, Copy)] pub struct DenoiseConfig { @@ -135,6 +150,319 @@ fn _denoise_denseframe( denoised_frame } +#[derive(Debug)] +struct FrameSliceWindow<'a> { + window: &'a [FrameSlice<'a>], + reference_index: usize, + cum_lengths: Vec, +} + +#[derive(Debug, Clone, Copy)] +struct MaybeIntenseRawPeak { + intensity: u32, + tof_index: u32, + scan_index: usize, + weight_only: bool, +} + +impl FrameSliceWindow<'_> { + fn new<'a>(window: &'a [FrameSlice<'a>]) -> FrameSliceWindow<'a> { + let cum_lengths = window + .iter() + .map(|x| x.num_ndpoints()) + .scan(0, |acc, x| { + *acc += x; + Some(*acc) + }) + .collect(); + FrameSliceWindow { + window, + reference_index: window.len() / 2, + cum_lengths, + } + } + fn get_window_index( + &self, + index: usize, + ) -> (usize, usize) { + let mut pos = 0; + for (i, cum_length) in self.cum_lengths.iter().enumerate() { + if index < *cum_length { + pos = i; + break; + } + } + let within_window_index = index - self.cum_lengths[pos]; + (pos, within_window_index) + } +} + +impl Index for FrameSliceWindow<'_> { + type Output = MaybeIntenseRawPeak; + + fn index( + &self, + index: usize, + ) -> &Self::Output { + let (pos, within_window_index) = self.get_window_index(index); + let tmp = self.window[pos]; + let (tof, int) = tmp.tof_int_at_index(within_window_index); + let foo = MaybeIntenseRawPeak { + intensity: int, + tof_index: tof, + scan_index: tmp.global_scan_at_index(within_window_index), + weight_only: pos != self.reference_index, + }; + &foo + } +} + +impl IntenseAtIndex for FrameSliceWindow<'_> { + fn intensity_at_index( + &self, + index: usize, + ) -> u64 { + let (pos, within_window_index) = self.get_window_index(index); + if pos == self.reference_index { + self.window[self.reference_index].intensity_at_index(within_window_index) + } else { + 0 + } + } + + fn weight_at_index( + &self, + index: usize, + ) -> u64 { + let (pos, within_window_index) = self.get_window_index(index); + self.window[pos].weight_at_index(within_window_index) + } +} + +impl<'a> QueriableIndexedPoints<'a, 2, usize> for FrameSliceWindow<'a> { + fn query_ndpoint( + &'a self, + point: &NDPoint<2>, + ) -> Vec<&'a usize> { + let mut out = Vec::new(); + for (i, (frame, cum_length)) in self.window.iter().zip(self.cum_lengths).enumerate() { + let local_outs = frame.query_ndpoint(point); + for ii in local_outs { + out.push(&(ii + cum_length)); + } + } + out + } + + fn query_ndrange( + &'a self, + boundary: &crate::space::space_generics::NDBoundary<2>, + reference_point: Option<&NDPoint<2>>, + ) -> Vec<&'a usize> { + let mut out = Vec::new(); + for (i, (frame, cum_length)) in self.window.iter().zip(self.cum_lengths).enumerate() { + let local_outs = frame.query_ndrange(boundary, reference_point); + for ii in local_outs { + out.push(&(ii + cum_length)); + } + } + out + } +} + +impl DistantAtIndex for FrameSliceWindow<'_> { + fn distance_at_indices( + &self, + index: usize, + other: usize, + ) -> f32 { + let (pos, within_window_index) = self.get_window_index(index); + let (pos_other, within_window_index_other) = self.get_window_index(other); + panic!("unimplemented"); + 0. + } +} + +impl AsNDPointsAtIndex<2> for FrameSliceWindow<'_> { + fn get_ndpoint( + &self, + index: usize, + ) -> NDPoint<2> { + let (pos, within_window_index) = self.get_window_index(index); + self.window[pos].get_ndpoint(within_window_index) + } + + fn num_ndpoints(&self) -> usize { + self.cum_lengths.last().unwrap().clone() + } +} + +#[derive(Default, Debug, Clone, Copy)] +pub struct RawWeightedTimsPeakAggregator { + pub cumulative_weighted_cluster_tof: u64, + pub cumulative_weighted_cluster_scan: u64, + pub cumulative_cluster_weight: u64, + pub cumulative_cluster_intensity: u64, + pub num_peaks: u64, + pub num_intense_peaks: u64, +} + +#[derive(Debug, Clone, Copy)] +struct RawScaleTimsPeak { + intensity: f64, + tof_index: f64, + scan_index: f64, + npeaks: u64, +} + +impl RawScaleTimsPeak { + fn to_timspeak( + &self, + mz_converter: &timsrust::Tof2MzConverter, + ims_converter: &timsrust::Scan2ImConverter, + ) -> TimsPeak { + TimsPeak { + intensity: self.intensity as u32, + mz: mz_converter.convert(self.tof_index), + mobility: ims_converter.convert(self.scan_index) as f32, + npeaks: self.npeaks as u32, + } + } +} + +impl ClusterAggregator for RawWeightedTimsPeakAggregator { + // Calculate the weight-weighted average of the cluster + // for mz and ims. The intensity is kept as is. + fn add( + &mut self, + elem: &MaybeIntenseRawPeak, + ) { + self.cumulative_cluster_intensity += + if elem.weight_only { 0 } else { elem.intensity } as u64; + self.cumulative_cluster_weight += elem.intensity as u64; + self.cumulative_weighted_cluster_tof += elem.tof_index as u64 * elem.intensity as u64; + self.cumulative_weighted_cluster_scan += elem.scan_index as u64 * elem.intensity as u64; + self.num_peaks += 1; + if !elem.weight_only { + self.num_intense_peaks += 1; + }; + } + + fn aggregate(&self) -> RawScaleTimsPeak { + // Use raw + RawScaleTimsPeak { + intensity: self.cumulative_cluster_intensity as f64, + tof_index: self.cumulative_weighted_cluster_tof as f64 + / self.cumulative_cluster_weight as f64, + scan_index: self.cumulative_weighted_cluster_scan as f64 + / self.cumulative_cluster_weight as f64, + npeaks: self.num_intense_peaks, + } + } + + fn combine( + self, + other: Self, + ) -> Self { + Self { + cumulative_weighted_cluster_tof: self.cumulative_weighted_cluster_tof + + other.cumulative_weighted_cluster_tof, + cumulative_weighted_cluster_scan: self.cumulative_weighted_cluster_scan + + other.cumulative_weighted_cluster_scan, + cumulative_cluster_weight: self.cumulative_cluster_weight + + other.cumulative_cluster_weight, + cumulative_cluster_intensity: self.cumulative_cluster_intensity + + other.cumulative_cluster_intensity, + num_peaks: self.num_peaks + other.num_peaks, + num_intense_peaks: self.num_intense_peaks + other.num_intense_peaks, + } + } +} + +fn denoise_frame_slice_window( + frameslice_window: &[FrameSlice], + ims_converter: &timsrust::Scan2ImConverter, + mz_converter: &timsrust::Tof2MzConverter, + dia_frame_info: &DIAFrameInfo, + min_n: usize, + min_intensity: u64, + mz_scaling: f64, + max_mz_extension: f64, + ims_scaling: f32, + max_ims_extension: f32, +) -> DenseFrameWindow { + let timer = utils::ContextTimer::new("dbscan_dfs", true, utils::LogLevel::TRACE); + let fsw = FrameSliceWindow::new(frameslice_window); + // dbscan_aggregate( + // &fsw, + // &fsw, + // &fsw, + // timer, + // min_n, + // min_intensity, + // TimsPeakAggregator::default, + // None::<&(dyn Fn(&f32) -> bool + Send + Sync)>, + // utils::LogLevel::TRACE, + // false, + // &[max_mz_extension as f32, max_ims_extension], + // false, + // ); + + let mut intensity_sorted_indices = frameslice_window + .iter() + .map(|x| x.intensities) + .flat_map(|x| x) + .enumerate() + .map(|(i, x)| (i, *x as u64)) + .collect::>(); + + intensity_sorted_indices.par_sort_unstable_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); + + let mut i_timer = timer.start_sub_timer("dbscan"); + let cluster_labels = dbscan_label_clusters( + &fsw, + &fsw, + &fsw, + min_n, + min_intensity, + &intensity_sorted_indices, + None::<&(dyn Fn(&f32) -> bool + Send + Sync)>, + false, + &[10., 100.], + ); + i_timer.stop(true); + + let centroids = aggregate_clusters( + cluster_labels.num_clusters, + cluster_labels.cluster_labels, + &fsw, + &RawWeightedTimsPeakAggregator::default, + utils::LogLevel::TRACE, + false, + ); + + let out = DenseFrameWindow { + frame: DenseFrame { + raw_peaks: centroids + .into_iter() + .map(|x| x.to_timspeak(mz_converter, ims_converter)) + .collect(), + index: 0, + rt: 0., + frame_type: timsrust::FrameType::MS2(timsrust::AcquisitionType::DIAPASEF), + sorted: None, + }, + ims_min: 0., + ims_max: 0., + mz_start: 0., + mz_end: 0., + group_id: 0, + quad_group_id: 0, + }; + + out +} + fn denoise_frame_slice( frame_window: &FrameSlice, ims_converter: &timsrust::Scan2ImConverter, @@ -290,11 +618,12 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> info!("Denoising window {}/{}", i + 1, num_windows); let progbar = indicatif::ProgressBar::new(sv.len() as u64); let denoised_elements: Vec = sv - .into_par_iter() + .as_slice() + .par_windows(3) .progress_with(progbar) - .map(|x| { - denoise_frame_slice( - &x, + .map(|rt_window_of_slices| { + denoise_frame_slice_window( + rt_window_of_slices, &self.ims_converter, &self.mz_converter, &self.dia_frame_info, diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index d2dcf9d..5d505e5 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -486,7 +486,7 @@ fn _combine_single_window_traces( // TODO make dbscan_generic a runner-class let out_traces: Vec = dbscan_generic( converter, - prefiltered_peaks, + &prefiltered_peaks, min_n, min_intensity.into(), || TraceAggregator { @@ -770,7 +770,7 @@ pub fn combine_pseudospectra( let foo: Vec = dbscan_generic( converter, - traces, + &traces, config.min_n.into(), config.min_neighbor_intensity.into(), PseudoSpectrumAggregator::default, diff --git a/src/ms/frames/dense_frame_window.rs b/src/ms/frames/dense_frame_window.rs new file mode 100644 index 0000000..ef763e7 --- /dev/null +++ b/src/ms/frames/dense_frame_window.rs @@ -0,0 +1,213 @@ +use timsrust::{ConvertableIndex, Frame, Scan2ImConverter, Tof2MzConverter}; + +use crate::ms::{ + frames::MsMsFrameSliceWindowInfo, + tdf::{DIAFrameInfo, ScanRange}, +}; + +use super::{frames::SortingOrder, DenseFrame, FrameSlice, TimsPeak}; +use log::info; + +pub type Converters = (timsrust::Scan2ImConverter, timsrust::Tof2MzConverter); +fn check_peak_sanity(peak: &TimsPeak) { + debug_assert!(peak.intensity > 0); + debug_assert!(peak.mz > 0.); + debug_assert!(peak.mobility > 0.); + debug_assert!(peak.npeaks > 0); +} + +#[derive(Debug, Clone)] +pub struct DenseFrameWindow { + pub frame: DenseFrame, + pub ims_min: f32, + pub ims_max: f32, + pub mz_start: f64, + pub mz_end: f64, + pub group_id: usize, + pub quad_group_id: usize, +} + +impl DenseFrameWindow { + pub fn from_frame_window( + frame_window: &FrameSlice, + ims_converter: &Scan2ImConverter, + mz_converter: &Tof2MzConverter, + dia_info: &DIAFrameInfo, + ) -> DenseFrameWindow { + let (window_group_id, ww_quad_group_id, scan_start) = match frame_window.slice_window_info { + None => { + panic!("No window info") + // This branch points to an error in logic ... + // The window info should always be present in this context. + }, + Some(MsMsFrameSliceWindowInfo::WindowGroup(_)) => { + // This branch should be easy to implement for things like synchro pasef... + // Some details to iron out though ... + panic!("Not implemented") + }, + Some(MsMsFrameSliceWindowInfo::SingleWindow(ref x)) => { + let window_group_id = x.window_group_id; + let ww_quad_group_id = x.within_window_quad_group_id; + let scan_start = frame_window.scan_start; + (window_group_id, ww_quad_group_id, scan_start) + }, + }; + + // NOTE: I am swapping here the 'scan start' to be the `ims_end` because + // the first scans have lower 1/k0 values. + let ims_max = ims_converter.convert(scan_start as u32) as f32; + let ims_min = + ims_converter.convert((frame_window.scan_offsets.len() + scan_start) as u32) as f32; + + debug_assert!(ims_max <= ims_min); + + let scan_range: Option<&ScanRange> = + dia_info.get_quad_windows(window_group_id, ww_quad_group_id); + let scan_range = match scan_range { + Some(x) => x, + None => { + panic!( + "No scan range for window_group_id: {}, within_window_quad_group_id: {}", + window_group_id, ww_quad_group_id + ); + }, + }; + + let frame = DenseFrame::from_frame_window(frame_window, ims_converter, mz_converter); + + DenseFrameWindow { + frame, + ims_min, + ims_max, + mz_start: scan_range.iso_low as f64, + mz_end: scan_range.iso_high as f64, + group_id: window_group_id, + quad_group_id: ww_quad_group_id, + } + } +} + +impl DenseFrame { + pub fn from_frame( + frame: &Frame, + ims_converter: &Scan2ImConverter, + mz_converter: &Tof2MzConverter, + ) -> DenseFrame { + let mut expanded_scan_indices = Vec::with_capacity(frame.tof_indices.len()); + let mut last_scan_offset = frame.scan_offsets[0]; + for (scan_index, index_offset) in frame.scan_offsets[1..].iter().enumerate() { + let num_tofs = index_offset - last_scan_offset; + + let ims = ims_converter.convert(scan_index as u32) as f32; + expanded_scan_indices.extend(vec![ims; num_tofs]); + last_scan_offset = *index_offset; + } + + let peaks = expanded_scan_indices + .iter() + .zip(frame.tof_indices.iter()) + .zip(frame.intensities.iter()) + .map(|((scan_index, tof_index), intensity)| TimsPeak { + intensity: *intensity, + mz: mz_converter.convert(*tof_index), + mobility: *scan_index, + npeaks: 1, + }) + .collect::>(); + + if cfg!(debug_assertions) { + for peak in peaks.iter() { + check_peak_sanity(peak); + } + } + + let index = frame.index; + let rt = frame.rt; + let frame_type = frame.frame_type; + + DenseFrame { + raw_peaks: peaks, + index, + rt, + frame_type, + sorted: None, + } + } + + pub fn from_frame_window( + frame_window: &FrameSlice, + ims_converter: &Scan2ImConverter, + mz_converter: &Tof2MzConverter, + ) -> DenseFrame { + let mut expanded_scan_indices = Vec::with_capacity(frame_window.tof_indices.len()); + let mut last_scan_offset = frame_window.scan_offsets[0]; + for (scan_index, index_offset) in frame_window.scan_offsets[1..].iter().enumerate() { + let num_tofs = index_offset - last_scan_offset; + let scan_index_use = (scan_index + frame_window.scan_start) as u32; + + let ims = ims_converter.convert(scan_index as f64) as f32; + if ims < 0.0 { + info!("Negative IMS value: {}", ims); + info!("scan_index_use: {}", scan_index_use); + info!("scan_index: {}", scan_index); + info!("frame_window.scan_start: {}", frame_window.scan_start); + } + debug_assert!(ims >= 0.0); + expanded_scan_indices.extend(vec![ims; num_tofs]); + last_scan_offset = *index_offset; + } + debug_assert!(last_scan_offset == frame_window.tof_indices.len()); + + let peaks = expanded_scan_indices + .iter() + .zip(frame_window.tof_indices.iter()) + .zip(frame_window.intensities.iter()) + .map(|((scan_index, tof_index), intensity)| TimsPeak { + intensity: *intensity, + mz: mz_converter.convert(*tof_index), + mobility: *scan_index, + npeaks: 1, + }) + .collect::>(); + + if cfg!(debug_assertions) { + for peak in peaks.iter() { + check_peak_sanity(peak); + } + } + + let index = frame_window.parent_frame_index; + let rt = frame_window.rt; + let frame_type = frame_window.frame_type; + + DenseFrame { + raw_peaks: peaks, + index, + rt, + frame_type, + sorted: None, + } + } + + pub fn sort_by_mz(&mut self) { + match self.sorted { + Some(SortingOrder::Mz) => (), + _ => { + self.raw_peaks + .sort_unstable_by(|a, b| a.mz.partial_cmp(&b.mz).unwrap()); + self.sorted = Some(SortingOrder::Mz); + }, + } + } + + pub fn sort_by_mobility(&mut self) { + match self.sorted { + Some(SortingOrder::Mobility) => (), + _ => { + self.raw_peaks + .sort_unstable_by(|a, b| a.mobility.partial_cmp(&b.mobility).unwrap()); + self.sorted = Some(SortingOrder::Mobility); + }, + } + } +} diff --git a/src/ms/frames.rs b/src/ms/frames/frame_slice.rs similarity index 71% rename from src/ms/frames.rs rename to src/ms/frames/frame_slice.rs index 3db5c6c..6f8c449 100644 --- a/src/ms/frames.rs +++ b/src/ms/frames/frame_slice.rs @@ -1,73 +1,11 @@ use std::fmt; -use std::ops::Index; -use std::slice::SliceIndex; - -use rand::seq::index; -pub use timsrust::Frame; -pub use timsrust::FrameType; -pub use timsrust::{ - ConvertableIndex, FileReader, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, -}; - -use crate::ms::tdf::{DIAFrameInfo, ScanRange}; -use crate::space::space_generics::NDPoint; -use crate::space::space_generics::{AsNDPointsAtIndex, HasIntensity, IntenseAtIndex}; - -use log::info; - -#[derive(Debug, Clone, Copy)] -pub struct TimsPeak { - pub intensity: u32, - pub mz: f64, - pub mobility: f32, - pub npeaks: u32, -} - -impl HasIntensity for TimsPeak { - fn intensity(&self) -> u64 { - self.intensity as u64 - } -} - -#[derive(Debug, Clone, Copy)] -pub struct RawTimsPeak { - pub intensity: u32, - pub tof_index: u32, - pub scan_index: usize, -} +use timsrust::{Frame, FrameType}; -#[derive(Debug, Clone, Copy)] -pub struct RawTimsPeakReference<'a> { - pub intensity: &'a u32, - pub tof_index: &'a u32, - pub scan_index: &'a usize, -} - -impl HasIntensity for RawTimsPeak { - fn intensity(&self) -> u64 { - self.intensity as u64 - } -} - -impl<'a> HasIntensity for RawTimsPeakReference<'a> { - fn intensity(&self) -> u64 { - *self.intensity as u64 - } -} +use crate::space::space_generics::{ + AsNDPointsAtIndex, IntenseAtIndex, NDBoundary, NDPoint, QueriableIndexedPoints, +}; -fn _check_peak_sanity(peak: &TimsPeak) { - debug_assert!(peak.intensity > 0); - debug_assert!(peak.mz > 0.); - debug_assert!(peak.mobility > 0.); - debug_assert!(peak.npeaks > 0); -} - -#[derive(Debug, Clone, Copy)] -pub enum SortingOrder { - Mz, - Mobility, - Intensity, -} +use super::FrameMsMsWindowInfo; #[derive(Debug, Clone, Copy)] pub enum ScanNumberType { @@ -121,118 +59,6 @@ impl fmt::Display for ScanOutOfBoundsError { } } -/// Information on the context of a window in a frame. -/// -/// This adds to a frame slice the context of the what isolation was used -/// to generate the frame slice. -#[derive(Debug, Clone)] -pub struct FrameMsMsWindowInfo { - pub mz_start: f32, - pub mz_end: f32, - pub window_group_id: usize, - pub within_window_quad_group_id: usize, - pub global_quad_row_id: usize, -} - -pub trait FramePointTolerance { - fn tof_index_range( - &self, - tof_index: u32, - ) -> (u32, u32); - fn scan_range( - &self, - scan_index: ScanNumberType, - ) -> (ScanNumberType, ScanNumberType); -} - -struct AbsoluteFramePointTolerance { - tof_index_tolerance: u32, - scan_tolerance: usize, -} - -impl FramePointTolerance for AbsoluteFramePointTolerance { - fn tof_index_range( - &self, - tof_index: u32, - ) -> (u32, u32) { - let tof_index_tolerance = self.tof_index_tolerance; - ( - tof_index.saturating_sub(tof_index_tolerance), - tof_index.saturating_add(tof_index_tolerance), - ) - } - - fn scan_range( - &self, - scan_index: ScanNumberType, - ) -> (ScanNumberType, ScanNumberType) { - match scan_index { - ScanNumberType::Global(x) => { - let scan_tolerance = self.scan_tolerance; - ( - ScanNumberType::Global(x.saturating_sub(scan_tolerance)), - ScanNumberType::Global(x + scan_tolerance), - ) - }, - ScanNumberType::Local(x) => { - let scan_tolerance = self.scan_tolerance; - ( - ScanNumberType::Local(x.saturating_sub(scan_tolerance)), - ScanNumberType::Local(x + scan_tolerance), - ) - }, - } - } -} - -type Range = (usize, usize); - -pub struct RangeSet { - ranges: Vec, - offset: usize, -} - -impl RangeSet { - fn extend( - &mut self, - other: RangeSet, - ) { - let new_offset = self.offset.min(other.offset); - let vs_self_offset = self.offset - new_offset; - let vs_other_offset = other.offset - new_offset; - - for item in self.ranges.iter_mut() { - item.0 += vs_self_offset; - item.1 += vs_self_offset; - } - - for item in other.ranges.iter() { - self.ranges - .push((item.0 + vs_other_offset, item.1 + vs_other_offset)); - } - - self.ranges.sort_unstable_by(|a, b| a.0.cmp(&b.0)); - } - - fn any_overlap(&self) -> bool { - let mut last_end = 0; - - for range in self.ranges.iter() { - if range.0 < last_end { - return true; - } - last_end = range.1; - } - false - } -} - -#[derive(Debug, Clone)] -pub enum MsMsFrameSliceWindowInfo { - WindowGroup(usize), - SingleWindow(FrameMsMsWindowInfo), -} - /// Unprocessed data from a 'Frame' after breaking by quad isolation_window + ims window. /// /// 1. every tof-index + intensity represents a peak. @@ -531,6 +357,13 @@ impl<'a> FrameSlice<'a> { Some(ranges) } } + + pub fn tof_int_at_index( + &self, + index: usize, + ) -> (u32, u32) { + (self.tof_indices[index], self.intensities[index]) + } } // Tests for the FrameSlice @@ -757,17 +590,16 @@ impl<'a> IntenseAtIndex for FrameSlice<'a> { // } } -impl<'a> AsNDPointsAtIndex<3> for FrameSlice<'a> { +impl<'a> AsNDPointsAtIndex<2> for FrameSlice<'a> { fn get_ndpoint( &self, index: usize, - ) -> NDPoint<3> { - let intensity = self.intensities[index]; + ) -> NDPoint<2> { let tof_index = self.tof_indices[index]; let scan_index = self.global_scan_at_index(index); NDPoint { - values: [tof_index as f32, scan_index as f32, intensity as f32], + values: [tof_index as f32, scan_index as f32], } } @@ -776,209 +608,155 @@ impl<'a> AsNDPointsAtIndex<3> for FrameSlice<'a> { } } -#[derive(Debug, Clone)] -pub struct DenseFrame { - pub raw_peaks: Vec, - pub index: usize, - pub rt: f64, - pub frame_type: FrameType, - pub sorted: Option, -} - -#[derive(Debug, Clone)] -pub struct DenseFrameWindow { - pub frame: DenseFrame, - pub ims_min: f32, - pub ims_max: f32, - pub mz_start: f64, - pub mz_end: f64, - pub group_id: usize, - pub quad_group_id: usize, -} - -impl DenseFrameWindow { - pub fn from_frame_window( - frame_window: &FrameSlice, - ims_converter: &Scan2ImConverter, - mz_converter: &Tof2MzConverter, - dia_info: &DIAFrameInfo, - ) -> DenseFrameWindow { - let (window_group_id, ww_quad_group_id, scan_start) = match frame_window.slice_window_info { - None => { - panic!("No window info") - // This branch points to an error in logic ... - // The window info should always be present in this context. - }, - Some(MsMsFrameSliceWindowInfo::WindowGroup(_)) => { - // This branch should be easy to implement for things like synchro pasef... - // Some details to iron out though ... - panic!("Not implemented") - }, - Some(MsMsFrameSliceWindowInfo::SingleWindow(ref x)) => { - let window_group_id = x.window_group_id; - let ww_quad_group_id = x.within_window_quad_group_id; - let scan_start = frame_window.scan_start; - (window_group_id, ww_quad_group_id, scan_start) - }, - }; - - // NOTE: I am swapping here the 'scan start' to be the `ims_end` because - // the first scans have lower 1/k0 values. - let ims_max = ims_converter.convert(scan_start as u32) as f32; - let ims_min = - ims_converter.convert((frame_window.scan_offsets.len() + scan_start) as u32) as f32; - - debug_assert!(ims_max <= ims_min); - - let scan_range: Option<&ScanRange> = - dia_info.get_quad_windows(window_group_id, ww_quad_group_id); - let scan_range = match scan_range { - Some(x) => x, - None => { - panic!( - "No scan range for window_group_id: {}, within_window_quad_group_id: {}", - window_group_id, ww_quad_group_id - ); +impl<'a> QueriableIndexedPoints<'a, 2, usize> for FrameSlice<'a> { + fn query_ndpoint( + &'a self, + point: &NDPoint<2>, + ) -> Vec<&'a usize> { + let tof_index = point.values[0] as i32; + let scan_index = point.values[1] as usize; + let rangesets = self.matching_rangeset( + tof_index, + ScanNumberType::Global(scan_index), + &AbsoluteFramePointTolerance { + tof_index_tolerance: 1, + scan_tolerance: 1, }, - }; - - let frame = DenseFrame::from_frame_window(frame_window, ims_converter, mz_converter); + ); - DenseFrameWindow { - frame, - ims_min, - ims_max, - mz_start: scan_range.iso_low as f64, - mz_end: scan_range.iso_high as f64, - group_id: window_group_id, - quad_group_id: ww_quad_group_id, + let mut out = Vec::new(); + if let Some(rangesets) = rangesets { + for range in rangesets.ranges.iter() { + for i in range.0..range.1 { + out.push(&i); + } + } } + out } -} -impl DenseFrame { - pub fn from_frame( - frame: &Frame, - ims_converter: &Scan2ImConverter, - mz_converter: &Tof2MzConverter, - ) -> DenseFrame { - let mut expanded_scan_indices = Vec::with_capacity(frame.tof_indices.len()); - let mut last_scan_offset = frame.scan_offsets[0]; - for (scan_index, index_offset) in frame.scan_offsets[1..].iter().enumerate() { - let num_tofs = index_offset - last_scan_offset; - - let ims = ims_converter.convert(scan_index as u32) as f32; - expanded_scan_indices.extend(vec![ims; num_tofs]); - last_scan_offset = *index_offset; + fn query_ndrange( + &'a self, + boundary: &NDBoundary<2>, + reference_point: Option<&NDPoint<2>>, + ) -> Vec<&'a usize> { + let tol = AbsoluteFramePointTolerance { + tof_index_tolerance: (boundary.widths[0] / 2.) as u32, + scan_tolerance: (boundary.widths[1] / 2.) as usize, + }; + let rangesets = self.matching_rangeset( + boundary.centers[0] as i32, + ScanNumberType::Global(boundary.centers[1] as usize), + &tol, + ); + + let mut out = Vec::new(); + if let Some(rangesets) = rangesets { + for range in rangesets.ranges.iter() { + for i in range.0..range.1 { + out.push(&i); + } + } } + out + } +} - let peaks = expanded_scan_indices - .iter() - .zip(frame.tof_indices.iter()) - .zip(frame.intensities.iter()) - .map(|((scan_index, tof_index), intensity)| TimsPeak { - intensity: *intensity, - mz: mz_converter.convert(*tof_index), - mobility: *scan_index, - npeaks: 1, - }) - .collect::>(); +pub trait FramePointTolerance { + fn tof_index_range( + &self, + tof_index: u32, + ) -> (u32, u32); + fn scan_range( + &self, + scan_index: ScanNumberType, + ) -> (ScanNumberType, ScanNumberType); +} - if cfg!(debug_assertions) { - for peak in peaks.iter() { - _check_peak_sanity(peak); - } - } +struct AbsoluteFramePointTolerance { + tof_index_tolerance: u32, + scan_tolerance: usize, +} - let index = frame.index; - let rt = frame.rt; - let frame_type = frame.frame_type; +impl FramePointTolerance for AbsoluteFramePointTolerance { + fn tof_index_range( + &self, + tof_index: u32, + ) -> (u32, u32) { + let tof_index_tolerance = self.tof_index_tolerance; + ( + tof_index.saturating_sub(tof_index_tolerance), + tof_index.saturating_add(tof_index_tolerance), + ) + } - DenseFrame { - raw_peaks: peaks, - index, - rt, - frame_type, - sorted: None, + fn scan_range( + &self, + scan_index: ScanNumberType, + ) -> (ScanNumberType, ScanNumberType) { + match scan_index { + ScanNumberType::Global(x) => { + let scan_tolerance = self.scan_tolerance; + ( + ScanNumberType::Global(x.saturating_sub(scan_tolerance)), + ScanNumberType::Global(x + scan_tolerance), + ) + }, + ScanNumberType::Local(x) => { + let scan_tolerance = self.scan_tolerance; + ( + ScanNumberType::Local(x.saturating_sub(scan_tolerance)), + ScanNumberType::Local(x + scan_tolerance), + ) + }, } } +} - pub fn from_frame_window( - frame_window: &FrameSlice, - ims_converter: &Scan2ImConverter, - mz_converter: &Tof2MzConverter, - ) -> DenseFrame { - let mut expanded_scan_indices = Vec::with_capacity(frame_window.tof_indices.len()); - let mut last_scan_offset = frame_window.scan_offsets[0]; - for (scan_index, index_offset) in frame_window.scan_offsets[1..].iter().enumerate() { - let num_tofs = index_offset - last_scan_offset; - let scan_index_use = (scan_index + frame_window.scan_start) as u32; - - let ims = ims_converter.convert(scan_index as f64) as f32; - if ims < 0.0 { - info!("Negative IMS value: {}", ims); - info!("scan_index_use: {}", scan_index_use); - info!("scan_index: {}", scan_index); - info!("frame_window.scan_start: {}", frame_window.scan_start); - } - debug_assert!(ims >= 0.0); - expanded_scan_indices.extend(vec![ims; num_tofs]); - last_scan_offset = *index_offset; - } - debug_assert!(last_scan_offset == frame_window.tof_indices.len()); - - let peaks = expanded_scan_indices - .iter() - .zip(frame_window.tof_indices.iter()) - .zip(frame_window.intensities.iter()) - .map(|((scan_index, tof_index), intensity)| TimsPeak { - intensity: *intensity, - mz: mz_converter.convert(*tof_index), - mobility: *scan_index, - npeaks: 1, - }) - .collect::>(); +type Range = (usize, usize); - if cfg!(debug_assertions) { - for peak in peaks.iter() { - _check_peak_sanity(peak); - } - } +pub struct RangeSet { + ranges: Vec, + offset: usize, +} - let index = frame_window.parent_frame_index; - let rt = frame_window.rt; - let frame_type = frame_window.frame_type; +impl RangeSet { + fn extend( + &mut self, + other: RangeSet, + ) { + let new_offset = self.offset.min(other.offset); + let vs_self_offset = self.offset - new_offset; + let vs_other_offset = other.offset - new_offset; - DenseFrame { - raw_peaks: peaks, - index, - rt, - frame_type, - sorted: None, + for item in self.ranges.iter_mut() { + item.0 += vs_self_offset; + item.1 += vs_self_offset; } - } - pub fn sort_by_mz(&mut self) { - match self.sorted { - Some(SortingOrder::Mz) => (), - _ => { - self.raw_peaks - .sort_unstable_by(|a, b| a.mz.partial_cmp(&b.mz).unwrap()); - self.sorted = Some(SortingOrder::Mz); - }, + for item in other.ranges.iter() { + self.ranges + .push((item.0 + vs_other_offset, item.1 + vs_other_offset)); } + + self.ranges.sort_unstable_by(|a, b| a.0.cmp(&b.0)); } - pub fn sort_by_mobility(&mut self) { - match self.sorted { - Some(SortingOrder::Mobility) => (), - _ => { - self.raw_peaks - .sort_unstable_by(|a, b| a.mobility.partial_cmp(&b.mobility).unwrap()); - self.sorted = Some(SortingOrder::Mobility); - }, + fn any_overlap(&self) -> bool { + let mut last_end = 0; + + for range in self.ranges.iter() { + if range.0 < last_end { + return true; + } + last_end = range.1; } + false } } -pub type Converters = (timsrust::Scan2ImConverter, timsrust::Tof2MzConverter); +#[derive(Debug, Clone)] +pub enum MsMsFrameSliceWindowInfo { + WindowGroup(usize), + SingleWindow(FrameMsMsWindowInfo), +} diff --git a/src/ms/frames/frames.rs b/src/ms/frames/frames.rs new file mode 100644 index 0000000..5e011bc --- /dev/null +++ b/src/ms/frames/frames.rs @@ -0,0 +1,76 @@ +pub use timsrust::Frame; +pub use timsrust::FrameType; +pub use timsrust::{ + ConvertableIndex, FileReader, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, +}; + +use crate::space::space_generics::HasIntensity; + +#[derive(Debug, Clone, Copy)] +pub struct TimsPeak { + pub intensity: u32, + pub mz: f64, + pub mobility: f32, + pub npeaks: u32, +} + +impl HasIntensity for TimsPeak { + fn intensity(&self) -> u64 { + self.intensity as u64 + } +} + +#[derive(Debug, Clone, Copy)] +pub struct RawTimsPeak { + pub intensity: u32, + pub tof_index: u32, + pub scan_index: usize, +} + +#[derive(Debug, Clone, Copy)] +pub struct RawTimsPeakReference<'a> { + pub intensity: &'a u32, + pub tof_index: &'a u32, + pub scan_index: &'a usize, +} + +impl HasIntensity for RawTimsPeak { + fn intensity(&self) -> u64 { + self.intensity as u64 + } +} + +impl<'a> HasIntensity for RawTimsPeakReference<'a> { + fn intensity(&self) -> u64 { + *self.intensity as u64 + } +} + +#[derive(Debug, Clone, Copy)] +pub enum SortingOrder { + Mz, + Mobility, + Intensity, +} + +#[derive(Debug, Clone)] +pub struct DenseFrame { + pub raw_peaks: Vec, + pub index: usize, + pub rt: f64, + pub frame_type: FrameType, + pub sorted: Option, +} + +/// Information on the context of a window in a frame. +/// +/// This adds to a frame slice the context of the what isolation was used +/// to generate the frame slice. +#[derive(Debug, Clone)] +pub struct FrameMsMsWindowInfo { + pub mz_start: f32, + pub mz_end: f32, + pub window_group_id: usize, + pub within_window_quad_group_id: usize, + pub global_quad_row_id: usize, +} diff --git a/src/ms/frames/mod.rs b/src/ms/frames/mod.rs new file mode 100644 index 0000000..11e84b4 --- /dev/null +++ b/src/ms/frames/mod.rs @@ -0,0 +1,6 @@ +pub mod dense_frame_window; +pub mod frame_slice; +pub mod frames; +pub use dense_frame_window::{Converters, DenseFrameWindow}; +pub use frame_slice::{FrameSlice, MsMsFrameSliceWindowInfo}; +pub use frames::{DenseFrame, FrameMsMsWindowInfo, TimsPeak}; diff --git a/src/space/space_generics.rs b/src/space/space_generics.rs index 3d5ba83..4cb2d1b 100644 --- a/src/space/space_generics.rs +++ b/src/space/space_generics.rs @@ -204,14 +204,14 @@ pub trait NDPointConverter { &self, elem: &T, ) -> NDPoint; - fn convert_vec( + fn convert_iter( &self, - elems: &[T], - ) -> (Vec>, NDBoundary) { - let points = elems - .iter() - .map(|elem| self.convert(elem)) - .collect::>(); + elems: IT, + ) -> (Vec>, NDBoundary) + where + IT: ExactSizeIterator, + { + let points = elems.map(|elem| self.convert(&elem)).collect::>(); let boundary = NDBoundary::from_ndpoints(&points); (points, boundary) }