Merge pull request #218 from theGreatHerrLebert/david@simulation

David@simulation
theGreatHerrLebert · May 30, 2024 · 594df4a · 594df4a
2 parents 785d65e + e5ba245
commit 594df4a
Show file tree

Hide file tree

Showing 9 changed files with 92 additions and 42 deletions.
diff --git a/imspy/imspy/simulation/timsim/jobs/simulate_frame_distributions_emg.py b/imspy/imspy/simulation/timsim/jobs/simulate_frame_distributions_emg.py
@@ -32,6 +32,7 @@ def simulate_frame_distributions_emg(
         verbose: bool = False,
         add_noise: bool = False,
         normalize: bool = False,
+        n_steps: int = 1000,
         num_threads: int = 4,
 ) -> pd.DataFrame:
 
@@ -53,7 +54,8 @@ def simulate_frame_distributions_emg(
         lambdas,
         target_p,
         step_size,
-        num_threads=num_threads
+        num_threads=num_threads,
+        n_steps=n_steps,
     )
 
     abundances = ims.calculate_frame_abundances_emg_par(
@@ -64,7 +66,8 @@ def simulate_frame_distributions_emg(
         sigmas,
         lambdas,
         rt_cycle_length,
-        num_threads=num_threads
+        num_threads=num_threads,
+        n_steps=n_steps,
     )
 
     if verbose:

diff --git a/imspy/pyproject.toml b/imspy/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "imspy"
-version = "0.2.20"
+version = "0.2.22"
 description = ""
 authors = ["theGreatHerrLebert <[email protected]>"]
 readme = "README.md"
@@ -22,7 +22,7 @@ pyopenms = ">=2.6.0"
 sagepy = ">=0.2.10"
 sagepy-connector = ">=0.2.11"
 
-imspy-connector = ">=0.2.21"
+imspy-connector = ">=0.2.22"
 
 scipy = ">=1.7.1"
 tqdm = ">=4.66"

diff --git a/imspy_connector/Cargo.toml b/imspy_connector/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "imspy-connector"
-version = "0.2.20"
+version = "0.2.22"
 edition = "2021"
 
 [lib]

diff --git a/imspy_connector/src/py_utility.rs b/imspy_connector/src/py_utility.rs
@@ -7,13 +7,13 @@ pub fn emg_cdf(x: f64, mu: f64, sigma: f64, lambda: f64) -> f64 {
 }
 
 #[pyfunction]
-pub fn accumulated_cdf_emg(lower_limit: f64, upper_limit: f64, mu: f64, sigma: f64, lambda: f64) -> f64 {
-    mscore::algorithm::utility::emg_cdf_range(lower_limit, upper_limit, mu, sigma, lambda)
+pub fn accumulated_cdf_emg(lower_limit: f64, upper_limit: f64, mu: f64, sigma: f64, lambda: f64, n_steps: Option<usize>) -> f64 {
+    mscore::algorithm::utility::emg_cdf_range(lower_limit, upper_limit, mu, sigma, lambda, n_steps)
 }
 
 #[pyfunction]
-pub fn calculate_bounds_emg(mu: f64, sigma: f64, lambda: f64, step_size: f64, target: f64, lower_start: f64, upper_start: f64) -> (f64, f64) {
-    mscore::algorithm::utility::calculate_bounds_emg(mu, sigma, lambda, step_size, target, lower_start, upper_start)
+pub fn calculate_bounds_emg(mu: f64, sigma: f64, lambda: f64, step_size: f64, target: f64, lower_start: f64, upper_start: f64, n_steps: Option<usize>) -> (f64, f64) {
+    mscore::algorithm::utility::calculate_bounds_emg(mu, sigma, lambda, step_size, target, lower_start, upper_start, n_steps)
 }
 
 #[pyfunction]
@@ -32,25 +32,25 @@ pub fn calculate_bounds_normal(mean: f64, std: f64, z_score: f64) -> (f64, f64)
 }
 
 #[pyfunction]
-pub fn calculate_frame_occurrence_emg(retention_times: Vec<f64>, rt: f64, sigma: f64, lambda_: f64, target_p: f64, step_size: f64) -> Vec<i32> {
-    mscore::algorithm::utility::calculate_frame_occurrence_emg(&retention_times, rt, sigma, lambda_, target_p, step_size)
+pub fn calculate_frame_occurrence_emg(retention_times: Vec<f64>, rt: f64, sigma: f64, lambda_: f64, target_p: f64, step_size: f64, n_steps: Option<usize>) -> Vec<i32> {
+    mscore::algorithm::utility::calculate_frame_occurrence_emg(&retention_times, rt, sigma, lambda_, target_p, step_size, n_steps)
 }
 
 #[pyfunction]
-pub fn calculate_frame_abundance_emg(frame_ids: Vec<i32>, retention_times: Vec<f64>, frame_occurrences: Vec<i32>, rt: f64, sigma: f64, lambda_: f64, rt_cycle_length: f64) -> Vec<f64> {
+pub fn calculate_frame_abundance_emg(frame_ids: Vec<i32>, retention_times: Vec<f64>, frame_occurrences: Vec<i32>, rt: f64, sigma: f64, lambda_: f64, rt_cycle_length: f64, n_steps: Option<usize>) -> Vec<f64> {
     let time_map: HashMap<i32, f64> = frame_ids.iter().zip(retention_times.iter()).map(|(id, rt)| (*id, *rt)).collect();
-    mscore::algorithm::utility::calculate_frame_abundance_emg(&time_map, &frame_occurrences, rt, sigma, lambda_, rt_cycle_length)
+    mscore::algorithm::utility::calculate_frame_abundance_emg(&time_map, &frame_occurrences, rt, sigma, lambda_, rt_cycle_length, n_steps)
 }
 
 #[pyfunction]
-pub fn calculate_frame_occurrences_emg_par(retention_times: Vec<f64>, rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, target_p: f64, step_size: f64, num_threads: usize) -> Vec<Vec<i32>> {
-    mscore::algorithm::utility::calculate_frame_occurrences_emg_par(&retention_times, rts, sigmas, lambdas, target_p, step_size, num_threads)
+pub fn calculate_frame_occurrences_emg_par(retention_times: Vec<f64>, rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, target_p: f64, step_size: f64, num_threads: usize, n_steps: Option<usize>) -> Vec<Vec<i32>> {
+    mscore::algorithm::utility::calculate_frame_occurrences_emg_par(&retention_times, rts, sigmas, lambdas, target_p, step_size, num_threads, n_steps)
 }
 
 #[pyfunction]
-pub fn calculate_frame_abundances_emg_par(frame_ids: Vec<i32>, retention_times: Vec<f64>, frame_occurrences: Vec<Vec<i32>>, rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, rt_cycle_length: f64, num_threads: usize) -> Vec<Vec<f64>> {
+pub fn calculate_frame_abundances_emg_par(frame_ids: Vec<i32>, retention_times: Vec<f64>, frame_occurrences: Vec<Vec<i32>>, rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, rt_cycle_length: f64, num_threads: usize, n_steps: Option<usize>) -> Vec<Vec<f64>> {
     let time_map: HashMap<i32, f64> = frame_ids.iter().zip(retention_times.iter()).map(|(id, rt)| (*id, *rt)).collect();
-    mscore::algorithm::utility::calculate_frame_abundances_emg_par(&time_map, frame_occurrences, rts, sigmas, lambdas, rt_cycle_length, num_threads)
+    mscore::algorithm::utility::calculate_frame_abundances_emg_par(&time_map, frame_occurrences, rts, sigmas, lambdas, rt_cycle_length, num_threads, n_steps)
 }
 
 

diff --git a/mscore/Cargo.toml b/mscore/Cargo.toml
@@ -3,17 +3,15 @@ name = "mscore"
 version = "0.2.0"
 edition = "2021"
 
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
 [dependencies]
 statrs = "0.16.0"
-itertools = "0.12.1"
+itertools = "0.13.0"
 rayon = "1.10.0"
 nalgebra = "0.32.5"
 serde = { version = "1.0.198", features = ["derive"] }
 serde_json = "1.0.116"
 regex = "1.10.4"
-GSL = "7.0"
+# GSL = "7.0"
 rand = "0.8.5"
 
 [lib]

diff --git a/mscore/src/algorithm/utility.rs b/mscore/src/algorithm/utility.rs
@@ -1,11 +1,52 @@
-extern crate rgsl;
+// extern crate rgsl;
 
 use std::collections::HashMap;
-use rgsl::{IntegrationWorkspace, error::erfc, error::erf};
+// use rgsl::{IntegrationWorkspace, error::erfc, error::erf};
 use std::f64::consts::SQRT_2;
 use rayon::prelude::*;
 use rayon::ThreadPoolBuilder;
 
+// Numerical integration using the trapezoidal rule
+fn integrate<F>(f: F, a: f64, b: f64, n: usize) -> f64
+    where
+        F: Fn(f64) -> f64,
+{
+    let dx = (b - a) / n as f64;
+    let mut sum = 0.0;
+    for i in 0..n {
+        let x = a + i as f64 * dx;
+        sum += f(x);
+    }
+    sum * dx
+}
+
+// Complementary error function (erfc)
+fn erfc(x: f64) -> f64 {
+    1.0 - erf(x)
+}
+
+// Error function (erf)
+fn erf(x: f64) -> f64 {
+    let t = 1.0 / (1.0 + 0.5 * x.abs());
+    let tau = t * (-x * x - 1.26551223 + t * (1.00002368 +
+        t * (0.37409196 + t * (0.09678418 + t * (-0.18628806 +
+            t * (0.27886807 + t * (-1.13520398 + t * (1.48851587 +
+                t * (-0.82215223 + t * 0.17087277)))))))))
+        .exp();
+    if x >= 0.0 {
+        1.0 - tau
+    } else {
+        tau - 1.0
+    }
+}
+
+// Exponentially modified Gaussian function
+fn emg(x: f64, mu: f64, sigma: f64, lambda: f64) -> f64 {
+    let part1 = lambda / 2.0 * (-lambda * (x - mu) + lambda * lambda * sigma * sigma / 2.0).exp();
+    let part2 = erfc((mu + lambda * sigma * sigma - x) / (sigma * 2.0_f64.sqrt()));
+    part1 * part2
+}
+
 pub fn custom_cdf_normal(x: f64, mean: f64, std_dev: f64) -> f64 {
     let z = (x - mean) / std_dev;
     0.5 * (1.0 + erf(z / SQRT_2))
@@ -27,6 +68,7 @@ pub fn emg_function(x: f64, mu: f64, sigma: f64, lambda: f64) -> f64 {
     prefactor * erfc_part
 }
 
+/*
 pub fn emg_cdf_range(lower_limit: f64, upper_limit: f64, mu: f64, sigma: f64, lambda: f64) -> f64 {
     let mut workspace = IntegrationWorkspace::new(1000).expect("IntegrationWorkspace::new failed");
 
@@ -42,8 +84,14 @@ pub fn emg_cdf_range(lower_limit: f64, upper_limit: f64, mu: f64, sigma: f64, la
 
     result
 }
+*/
+
+pub fn emg_cdf_range(lower_limit: f64, upper_limit: f64, mu: f64, sigma: f64, lambda: f64, n_steps: Option<usize>) -> f64 {
+    let n_steps = n_steps.unwrap_or(1000);
+    integrate(|x| emg(x, mu, sigma, lambda), lower_limit, upper_limit, n_steps)
+}
 
-pub fn calculate_bounds_emg(mu: f64, sigma: f64, lambda: f64, step_size: f64, target: f64, lower_start: f64, upper_start: f64) -> (f64, f64) {
+pub fn calculate_bounds_emg(mu: f64, sigma: f64, lambda: f64, step_size: f64, target: f64, lower_start: f64, upper_start: f64, n_steps: Option<usize>) -> (f64, f64) {
     assert!(0.0 <= target && target <= 1.0, "target must be in [0, 1]");
 
     let lower_initial = mu - lower_start * sigma;
@@ -53,7 +101,7 @@ pub fn calculate_bounds_emg(mu: f64, sigma: f64, lambda: f64, step_size: f64, ta
     let search_space: Vec<f64> = (0..=steps).map(|i| lower_initial + i as f64 * step_size).collect();
 
     let calc_cdf = |low: usize, high: usize| -> f64 {
-        emg_cdf_range(search_space[low], search_space[high], mu, sigma, lambda)
+        emg_cdf_range(search_space[low], search_space[high], mu, sigma, lambda, n_steps)
     };
 
     // Binary search for cutoff values
@@ -85,8 +133,8 @@ pub fn calculate_bounds_emg(mu: f64, sigma: f64, lambda: f64, step_size: f64, ta
     (search_space[lower_cutoff_index], search_space[upper_cutoff_index])
 }
 
-pub fn calculate_frame_occurrence_emg(retention_times: &[f64], rt: f64, sigma: f64, lambda_: f64, target_p: f64, step_size: f64) -> Vec<i32> {
-    let (rt_min, rt_max) = calculate_bounds_emg(rt, sigma, lambda_, step_size, target_p, 20.0, 60.0);
+pub fn calculate_frame_occurrence_emg(retention_times: &[f64], rt: f64, sigma: f64, lambda_: f64, target_p: f64, step_size: f64, n_steps: Option<usize>) -> Vec<i32> {
+    let (rt_min, rt_max) = calculate_bounds_emg(rt, sigma, lambda_, step_size, target_p, 20.0, 60.0, n_steps);
 
     // Finding the frame closest to rt_min
     let first_frame = retention_times.iter()
@@ -106,13 +154,13 @@ pub fn calculate_frame_occurrence_emg(retention_times: &[f64], rt: f64, sigma: f
     (first_frame..=last_frame).map(|x| x as i32).collect()
 }
 
-pub fn calculate_frame_abundance_emg(time_map: &HashMap<i32, f64>, occurrences: &[i32], rt: f64, sigma: f64, lambda_: f64, rt_cycle_length: f64) -> Vec<f64> {
+pub fn calculate_frame_abundance_emg(time_map: &HashMap<i32, f64>, occurrences: &[i32], rt: f64, sigma: f64, lambda_: f64, rt_cycle_length: f64, n_steps: Option<usize>) -> Vec<f64> {
     let mut frame_abundance = Vec::new();
 
     for &occurrence in occurrences {
         if let Some(&time) = time_map.get(&occurrence) {
             let start = time - rt_cycle_length;
-            let i = emg_cdf_range(start, time, rt, sigma, lambda_);
+            let i = emg_cdf_range(start, time, rt, sigma, lambda_, n_steps);
             frame_abundance.push(i);
         }
     }
@@ -121,24 +169,24 @@ pub fn calculate_frame_abundance_emg(time_map: &HashMap<i32, f64>, occurrences:
 }
 
 // retention_times: &[f64], rt: f64, sigma: f64, lambda_: f64
-pub fn calculate_frame_occurrences_emg_par(retention_times: &[f64], rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, target_p: f64, step_size: f64, num_threads: usize) -> Vec<Vec<i32>> {
+pub fn calculate_frame_occurrences_emg_par(retention_times: &[f64], rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, target_p: f64, step_size: f64, num_threads: usize, n_steps: Option<usize>) -> Vec<Vec<i32>> {
     let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
     let result = thread_pool.install(|| {
         rts.into_par_iter().zip(sigmas.into_par_iter()).zip(lambdas.into_par_iter())
             .map(|((rt, sigma), lambda)| {
-                calculate_frame_occurrence_emg(retention_times, rt, sigma, lambda, target_p, step_size)
+                calculate_frame_occurrence_emg(retention_times, rt, sigma, lambda, target_p, step_size, n_steps)
             })
             .collect()
     });
     result
 }
 
-pub fn calculate_frame_abundances_emg_par(time_map: &HashMap<i32, f64>, occurrences: Vec<Vec<i32>>, rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, rt_cycle_length: f64, num_threads: usize) -> Vec<Vec<f64>> {
+pub fn calculate_frame_abundances_emg_par(time_map: &HashMap<i32, f64>, occurrences: Vec<Vec<i32>>, rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, rt_cycle_length: f64, num_threads: usize, n_steps: Option<usize>) -> Vec<Vec<f64>> {
     let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
     let result = thread_pool.install(|| {
         occurrences.into_par_iter().zip(rts.into_par_iter()).zip(sigmas.into_par_iter()).zip(lambdas.into_par_iter())
             .map(|(((occurrences, rt), sigma), lambda)| {
-                calculate_frame_abundance_emg(time_map, &occurrences, rt, sigma, lambda, rt_cycle_length)
+                calculate_frame_abundance_emg(time_map, &occurrences, rt, sigma, lambda, rt_cycle_length, n_steps)
             })
             .collect()
     });

diff --git a/mscore/src/chemistry/unimod.rs b/mscore/src/chemistry/unimod.rs
@@ -1109,7 +1109,7 @@ pub fn unimod_modifications_mass() -> HashMap<&'static str, f64> {
 /// use mscore::chemistry::unimod::unimod_modifications_mass_numerical;
 ///
 /// let mass = unimod_modifications_mass_numerical();
-/// assert_eq!(mass.get(&58), Some(&56.026215));
+/// assert_eq!(mass.get(&1), Some(&42.010565));
 /// ```
 pub fn unimod_modifications_mass_numerical() -> HashMap<u32, f64> {
     let mut map = HashMap::new();

diff --git a/mscore/src/main.rs b/mscore/src/main.rs
@@ -1,13 +1,17 @@
-use mscore::algorithm::utility::emg_cdf_range;
+use mscore::algorithm::utility::{calculate_bounds_emg, emg_cdf_range};
 
 fn main() {
     let mu = 0.0;
     let sigma = 1.0;
     let lambda = 1.0;
-    let lower_limit = 0.0;
-    let upper_limit = 2.0;
+    let n_steps: usize = 1000;
+    let step_size = 0.0001;
+    let l_s = 25.0;
+    let u_s = 25.0;
 
-    let cdf_range = emg_cdf_range(lower_limit, upper_limit, mu, sigma, lambda);
+    let (lb, ub) = calculate_bounds_emg(mu, sigma, lambda, step_size, 0.99, l_s, u_s, Some(n_steps));
 
-    println!("EMG CDF from {} to {} = {}", lower_limit, upper_limit, cdf_range);
+    let cdf_range = emg_cdf_range(lb, ub, mu, sigma, lambda, Some(n_steps));
+
+    println!("EMG CDF from {} to {} = {}", lb, ub, cdf_range);
 }
diff --git a/rustdf/Cargo.toml b/rustdf/Cargo.toml
@@ -9,11 +9,8 @@ path = "src/lib.rs"
 [profile.release]
 debug = true
 
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
 [dependencies]
 regex = "1.10.4"
-sage-core = { git = "https://github.com/lazear/sage.git" }
 clap = { version = "4.5.4", features = ["derive"] }
 libloading = "0.8.3"
 rusqlite = { version = "0.31.0", features = ["bundled"] }