Skip to content

Commit

Permalink
Merge pull request #218 from theGreatHerrLebert/david@simulation
Browse files Browse the repository at this point in the history
David@simulation
  • Loading branch information
theGreatHerrLebert committed May 30, 2024
2 parents 785d65e + e5ba245 commit 594df4a
Show file tree
Hide file tree
Showing 9 changed files with 92 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def simulate_frame_distributions_emg(
verbose: bool = False,
add_noise: bool = False,
normalize: bool = False,
n_steps: int = 1000,
num_threads: int = 4,
) -> pd.DataFrame:

Expand All @@ -53,7 +54,8 @@ def simulate_frame_distributions_emg(
lambdas,
target_p,
step_size,
num_threads=num_threads
num_threads=num_threads,
n_steps=n_steps,
)

abundances = ims.calculate_frame_abundances_emg_par(
Expand All @@ -64,7 +66,8 @@ def simulate_frame_distributions_emg(
sigmas,
lambdas,
rt_cycle_length,
num_threads=num_threads
num_threads=num_threads,
n_steps=n_steps,
)

if verbose:
Expand Down
4 changes: 2 additions & 2 deletions imspy/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "imspy"
version = "0.2.20"
version = "0.2.22"
description = ""
authors = ["theGreatHerrLebert <[email protected]>"]
readme = "README.md"
Expand All @@ -22,7 +22,7 @@ pyopenms = ">=2.6.0"
sagepy = ">=0.2.10"
sagepy-connector = ">=0.2.11"

imspy-connector = ">=0.2.21"
imspy-connector = ">=0.2.22"

scipy = ">=1.7.1"
tqdm = ">=4.66"
Expand Down
2 changes: 1 addition & 1 deletion imspy_connector/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "imspy-connector"
version = "0.2.20"
version = "0.2.22"
edition = "2021"

[lib]
Expand Down
24 changes: 12 additions & 12 deletions imspy_connector/src/py_utility.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ pub fn emg_cdf(x: f64, mu: f64, sigma: f64, lambda: f64) -> f64 {
}

#[pyfunction]
pub fn accumulated_cdf_emg(lower_limit: f64, upper_limit: f64, mu: f64, sigma: f64, lambda: f64) -> f64 {
mscore::algorithm::utility::emg_cdf_range(lower_limit, upper_limit, mu, sigma, lambda)
pub fn accumulated_cdf_emg(lower_limit: f64, upper_limit: f64, mu: f64, sigma: f64, lambda: f64, n_steps: Option<usize>) -> f64 {
mscore::algorithm::utility::emg_cdf_range(lower_limit, upper_limit, mu, sigma, lambda, n_steps)
}

#[pyfunction]
pub fn calculate_bounds_emg(mu: f64, sigma: f64, lambda: f64, step_size: f64, target: f64, lower_start: f64, upper_start: f64) -> (f64, f64) {
mscore::algorithm::utility::calculate_bounds_emg(mu, sigma, lambda, step_size, target, lower_start, upper_start)
pub fn calculate_bounds_emg(mu: f64, sigma: f64, lambda: f64, step_size: f64, target: f64, lower_start: f64, upper_start: f64, n_steps: Option<usize>) -> (f64, f64) {
mscore::algorithm::utility::calculate_bounds_emg(mu, sigma, lambda, step_size, target, lower_start, upper_start, n_steps)
}

#[pyfunction]
Expand All @@ -32,25 +32,25 @@ pub fn calculate_bounds_normal(mean: f64, std: f64, z_score: f64) -> (f64, f64)
}

#[pyfunction]
pub fn calculate_frame_occurrence_emg(retention_times: Vec<f64>, rt: f64, sigma: f64, lambda_: f64, target_p: f64, step_size: f64) -> Vec<i32> {
mscore::algorithm::utility::calculate_frame_occurrence_emg(&retention_times, rt, sigma, lambda_, target_p, step_size)
pub fn calculate_frame_occurrence_emg(retention_times: Vec<f64>, rt: f64, sigma: f64, lambda_: f64, target_p: f64, step_size: f64, n_steps: Option<usize>) -> Vec<i32> {
mscore::algorithm::utility::calculate_frame_occurrence_emg(&retention_times, rt, sigma, lambda_, target_p, step_size, n_steps)
}

#[pyfunction]
pub fn calculate_frame_abundance_emg(frame_ids: Vec<i32>, retention_times: Vec<f64>, frame_occurrences: Vec<i32>, rt: f64, sigma: f64, lambda_: f64, rt_cycle_length: f64) -> Vec<f64> {
pub fn calculate_frame_abundance_emg(frame_ids: Vec<i32>, retention_times: Vec<f64>, frame_occurrences: Vec<i32>, rt: f64, sigma: f64, lambda_: f64, rt_cycle_length: f64, n_steps: Option<usize>) -> Vec<f64> {
let time_map: HashMap<i32, f64> = frame_ids.iter().zip(retention_times.iter()).map(|(id, rt)| (*id, *rt)).collect();
mscore::algorithm::utility::calculate_frame_abundance_emg(&time_map, &frame_occurrences, rt, sigma, lambda_, rt_cycle_length)
mscore::algorithm::utility::calculate_frame_abundance_emg(&time_map, &frame_occurrences, rt, sigma, lambda_, rt_cycle_length, n_steps)
}

#[pyfunction]
pub fn calculate_frame_occurrences_emg_par(retention_times: Vec<f64>, rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, target_p: f64, step_size: f64, num_threads: usize) -> Vec<Vec<i32>> {
mscore::algorithm::utility::calculate_frame_occurrences_emg_par(&retention_times, rts, sigmas, lambdas, target_p, step_size, num_threads)
pub fn calculate_frame_occurrences_emg_par(retention_times: Vec<f64>, rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, target_p: f64, step_size: f64, num_threads: usize, n_steps: Option<usize>) -> Vec<Vec<i32>> {
mscore::algorithm::utility::calculate_frame_occurrences_emg_par(&retention_times, rts, sigmas, lambdas, target_p, step_size, num_threads, n_steps)
}

#[pyfunction]
pub fn calculate_frame_abundances_emg_par(frame_ids: Vec<i32>, retention_times: Vec<f64>, frame_occurrences: Vec<Vec<i32>>, rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, rt_cycle_length: f64, num_threads: usize) -> Vec<Vec<f64>> {
pub fn calculate_frame_abundances_emg_par(frame_ids: Vec<i32>, retention_times: Vec<f64>, frame_occurrences: Vec<Vec<i32>>, rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, rt_cycle_length: f64, num_threads: usize, n_steps: Option<usize>) -> Vec<Vec<f64>> {
let time_map: HashMap<i32, f64> = frame_ids.iter().zip(retention_times.iter()).map(|(id, rt)| (*id, *rt)).collect();
mscore::algorithm::utility::calculate_frame_abundances_emg_par(&time_map, frame_occurrences, rts, sigmas, lambdas, rt_cycle_length, num_threads)
mscore::algorithm::utility::calculate_frame_abundances_emg_par(&time_map, frame_occurrences, rts, sigmas, lambdas, rt_cycle_length, num_threads, n_steps)
}


Expand Down
6 changes: 2 additions & 4 deletions mscore/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,15 @@ name = "mscore"
version = "0.2.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
statrs = "0.16.0"
itertools = "0.12.1"
itertools = "0.13.0"
rayon = "1.10.0"
nalgebra = "0.32.5"
serde = { version = "1.0.198", features = ["derive"] }
serde_json = "1.0.116"
regex = "1.10.4"
GSL = "7.0"
# GSL = "7.0"
rand = "0.8.5"

[lib]
Expand Down
72 changes: 60 additions & 12 deletions mscore/src/algorithm/utility.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,52 @@
extern crate rgsl;
// extern crate rgsl;

use std::collections::HashMap;
use rgsl::{IntegrationWorkspace, error::erfc, error::erf};
// use rgsl::{IntegrationWorkspace, error::erfc, error::erf};
use std::f64::consts::SQRT_2;
use rayon::prelude::*;
use rayon::ThreadPoolBuilder;

// Numerical integration using the trapezoidal rule
fn integrate<F>(f: F, a: f64, b: f64, n: usize) -> f64
where
F: Fn(f64) -> f64,
{
let dx = (b - a) / n as f64;
let mut sum = 0.0;
for i in 0..n {
let x = a + i as f64 * dx;
sum += f(x);
}
sum * dx
}

// Complementary error function (erfc)
fn erfc(x: f64) -> f64 {
1.0 - erf(x)
}

// Error function (erf)
fn erf(x: f64) -> f64 {
let t = 1.0 / (1.0 + 0.5 * x.abs());
let tau = t * (-x * x - 1.26551223 + t * (1.00002368 +
t * (0.37409196 + t * (0.09678418 + t * (-0.18628806 +
t * (0.27886807 + t * (-1.13520398 + t * (1.48851587 +
t * (-0.82215223 + t * 0.17087277)))))))))
.exp();
if x >= 0.0 {
1.0 - tau
} else {
tau - 1.0
}
}

// Exponentially modified Gaussian function
fn emg(x: f64, mu: f64, sigma: f64, lambda: f64) -> f64 {
let part1 = lambda / 2.0 * (-lambda * (x - mu) + lambda * lambda * sigma * sigma / 2.0).exp();
let part2 = erfc((mu + lambda * sigma * sigma - x) / (sigma * 2.0_f64.sqrt()));
part1 * part2
}

pub fn custom_cdf_normal(x: f64, mean: f64, std_dev: f64) -> f64 {
let z = (x - mean) / std_dev;
0.5 * (1.0 + erf(z / SQRT_2))
Expand All @@ -27,6 +68,7 @@ pub fn emg_function(x: f64, mu: f64, sigma: f64, lambda: f64) -> f64 {
prefactor * erfc_part
}

/*
pub fn emg_cdf_range(lower_limit: f64, upper_limit: f64, mu: f64, sigma: f64, lambda: f64) -> f64 {
let mut workspace = IntegrationWorkspace::new(1000).expect("IntegrationWorkspace::new failed");
Expand All @@ -42,8 +84,14 @@ pub fn emg_cdf_range(lower_limit: f64, upper_limit: f64, mu: f64, sigma: f64, la
result
}
*/

pub fn emg_cdf_range(lower_limit: f64, upper_limit: f64, mu: f64, sigma: f64, lambda: f64, n_steps: Option<usize>) -> f64 {
let n_steps = n_steps.unwrap_or(1000);
integrate(|x| emg(x, mu, sigma, lambda), lower_limit, upper_limit, n_steps)
}

pub fn calculate_bounds_emg(mu: f64, sigma: f64, lambda: f64, step_size: f64, target: f64, lower_start: f64, upper_start: f64) -> (f64, f64) {
pub fn calculate_bounds_emg(mu: f64, sigma: f64, lambda: f64, step_size: f64, target: f64, lower_start: f64, upper_start: f64, n_steps: Option<usize>) -> (f64, f64) {
assert!(0.0 <= target && target <= 1.0, "target must be in [0, 1]");

let lower_initial = mu - lower_start * sigma;
Expand All @@ -53,7 +101,7 @@ pub fn calculate_bounds_emg(mu: f64, sigma: f64, lambda: f64, step_size: f64, ta
let search_space: Vec<f64> = (0..=steps).map(|i| lower_initial + i as f64 * step_size).collect();

let calc_cdf = |low: usize, high: usize| -> f64 {
emg_cdf_range(search_space[low], search_space[high], mu, sigma, lambda)
emg_cdf_range(search_space[low], search_space[high], mu, sigma, lambda, n_steps)
};

// Binary search for cutoff values
Expand Down Expand Up @@ -85,8 +133,8 @@ pub fn calculate_bounds_emg(mu: f64, sigma: f64, lambda: f64, step_size: f64, ta
(search_space[lower_cutoff_index], search_space[upper_cutoff_index])
}

pub fn calculate_frame_occurrence_emg(retention_times: &[f64], rt: f64, sigma: f64, lambda_: f64, target_p: f64, step_size: f64) -> Vec<i32> {
let (rt_min, rt_max) = calculate_bounds_emg(rt, sigma, lambda_, step_size, target_p, 20.0, 60.0);
pub fn calculate_frame_occurrence_emg(retention_times: &[f64], rt: f64, sigma: f64, lambda_: f64, target_p: f64, step_size: f64, n_steps: Option<usize>) -> Vec<i32> {
let (rt_min, rt_max) = calculate_bounds_emg(rt, sigma, lambda_, step_size, target_p, 20.0, 60.0, n_steps);

// Finding the frame closest to rt_min
let first_frame = retention_times.iter()
Expand All @@ -106,13 +154,13 @@ pub fn calculate_frame_occurrence_emg(retention_times: &[f64], rt: f64, sigma: f
(first_frame..=last_frame).map(|x| x as i32).collect()
}

pub fn calculate_frame_abundance_emg(time_map: &HashMap<i32, f64>, occurrences: &[i32], rt: f64, sigma: f64, lambda_: f64, rt_cycle_length: f64) -> Vec<f64> {
pub fn calculate_frame_abundance_emg(time_map: &HashMap<i32, f64>, occurrences: &[i32], rt: f64, sigma: f64, lambda_: f64, rt_cycle_length: f64, n_steps: Option<usize>) -> Vec<f64> {
let mut frame_abundance = Vec::new();

for &occurrence in occurrences {
if let Some(&time) = time_map.get(&occurrence) {
let start = time - rt_cycle_length;
let i = emg_cdf_range(start, time, rt, sigma, lambda_);
let i = emg_cdf_range(start, time, rt, sigma, lambda_, n_steps);
frame_abundance.push(i);
}
}
Expand All @@ -121,24 +169,24 @@ pub fn calculate_frame_abundance_emg(time_map: &HashMap<i32, f64>, occurrences:
}

// retention_times: &[f64], rt: f64, sigma: f64, lambda_: f64
pub fn calculate_frame_occurrences_emg_par(retention_times: &[f64], rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, target_p: f64, step_size: f64, num_threads: usize) -> Vec<Vec<i32>> {
pub fn calculate_frame_occurrences_emg_par(retention_times: &[f64], rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, target_p: f64, step_size: f64, num_threads: usize, n_steps: Option<usize>) -> Vec<Vec<i32>> {
let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
let result = thread_pool.install(|| {
rts.into_par_iter().zip(sigmas.into_par_iter()).zip(lambdas.into_par_iter())
.map(|((rt, sigma), lambda)| {
calculate_frame_occurrence_emg(retention_times, rt, sigma, lambda, target_p, step_size)
calculate_frame_occurrence_emg(retention_times, rt, sigma, lambda, target_p, step_size, n_steps)
})
.collect()
});
result
}

pub fn calculate_frame_abundances_emg_par(time_map: &HashMap<i32, f64>, occurrences: Vec<Vec<i32>>, rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, rt_cycle_length: f64, num_threads: usize) -> Vec<Vec<f64>> {
pub fn calculate_frame_abundances_emg_par(time_map: &HashMap<i32, f64>, occurrences: Vec<Vec<i32>>, rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, rt_cycle_length: f64, num_threads: usize, n_steps: Option<usize>) -> Vec<Vec<f64>> {
let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
let result = thread_pool.install(|| {
occurrences.into_par_iter().zip(rts.into_par_iter()).zip(sigmas.into_par_iter()).zip(lambdas.into_par_iter())
.map(|(((occurrences, rt), sigma), lambda)| {
calculate_frame_abundance_emg(time_map, &occurrences, rt, sigma, lambda, rt_cycle_length)
calculate_frame_abundance_emg(time_map, &occurrences, rt, sigma, lambda, rt_cycle_length, n_steps)
})
.collect()
});
Expand Down
2 changes: 1 addition & 1 deletion mscore/src/chemistry/unimod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1109,7 +1109,7 @@ pub fn unimod_modifications_mass() -> HashMap<&'static str, f64> {
/// use mscore::chemistry::unimod::unimod_modifications_mass_numerical;
///
/// let mass = unimod_modifications_mass_numerical();
/// assert_eq!(mass.get(&58), Some(&56.026215));
/// assert_eq!(mass.get(&1), Some(&42.010565));
/// ```
pub fn unimod_modifications_mass_numerical() -> HashMap<u32, f64> {
let mut map = HashMap::new();
Expand Down
14 changes: 9 additions & 5 deletions mscore/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
use mscore::algorithm::utility::emg_cdf_range;
use mscore::algorithm::utility::{calculate_bounds_emg, emg_cdf_range};

fn main() {
let mu = 0.0;
let sigma = 1.0;
let lambda = 1.0;
let lower_limit = 0.0;
let upper_limit = 2.0;
let n_steps: usize = 1000;
let step_size = 0.0001;
let l_s = 25.0;
let u_s = 25.0;

let cdf_range = emg_cdf_range(lower_limit, upper_limit, mu, sigma, lambda);
let (lb, ub) = calculate_bounds_emg(mu, sigma, lambda, step_size, 0.99, l_s, u_s, Some(n_steps));

println!("EMG CDF from {} to {} = {}", lower_limit, upper_limit, cdf_range);
let cdf_range = emg_cdf_range(lb, ub, mu, sigma, lambda, Some(n_steps));

println!("EMG CDF from {} to {} = {}", lb, ub, cdf_range);
}
3 changes: 0 additions & 3 deletions rustdf/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,8 @@ path = "src/lib.rs"
[profile.release]
debug = true

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
regex = "1.10.4"
sage-core = { git = "https://github.com/lazear/sage.git" }
clap = { version = "4.5.4", features = ["derive"] }
libloading = "0.8.3"
rusqlite = { version = "0.31.0", features = ["bundled"] }
Expand Down

0 comments on commit 594df4a

Please sign in to comment.