From 61eaff53e3ebb9978bd035113f1aee3d84664330 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Thu, 11 Jul 2024 13:45:38 +0200 Subject: [PATCH 01/16] contributions need to sum to 1 --- imspy/imspy/timstof/dbsearch/utility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imspy/imspy/timstof/dbsearch/utility.py b/imspy/imspy/timstof/dbsearch/utility.py index b37a6f1c..f59b5e51 100644 --- a/imspy/imspy/timstof/dbsearch/utility.py +++ b/imspy/imspy/timstof/dbsearch/utility.py @@ -425,7 +425,7 @@ def log_factorial(n: int, k: int) -> float: def beta_score(fragments_observed, fragments_predicted) -> float: - intensity = np.dot(fragments_observed.intensities, fragments_predicted.intensities) + intensity = np.dot(fragments_observed.intensities, fragments_predicted.intensities / np.sum(fragments_predicted.intensities)) len_b, len_y = 0, 0 From 7e4594d37bae84412af9cab19ac113bdcf1e1491 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Thu, 11 Jul 2024 13:51:28 +0200 Subject: [PATCH 02/16] testing --- imspy/imspy/timstof/dbsearch/utility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imspy/imspy/timstof/dbsearch/utility.py b/imspy/imspy/timstof/dbsearch/utility.py index f59b5e51..b37a6f1c 100644 --- a/imspy/imspy/timstof/dbsearch/utility.py +++ b/imspy/imspy/timstof/dbsearch/utility.py @@ -425,7 +425,7 @@ def log_factorial(n: int, k: int) -> float: def beta_score(fragments_observed, fragments_predicted) -> float: - intensity = np.dot(fragments_observed.intensities, fragments_predicted.intensities / np.sum(fragments_predicted.intensities)) + intensity = np.dot(fragments_observed.intensities, fragments_predicted.intensities) len_b, len_y = 0, 0 From 5f1931bca69e0624a5b235c1a4205ebe599271de Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Thu, 11 Jul 2024 15:51:47 +0200 Subject: [PATCH 03/16] adding cmd argument --- imspy/imspy/timstof/dbsearch/imspy_dda.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/imspy/imspy/timstof/dbsearch/imspy_dda.py b/imspy/imspy/timstof/dbsearch/imspy_dda.py index ad752a10..2669072a 100644 --- a/imspy/imspy/timstof/dbsearch/imspy_dda.py +++ b/imspy/imspy/timstof/dbsearch/imspy_dda.py @@ -258,7 +258,7 @@ def main(): parser.add_argument("--refinement_verbose", dest="refinement_verbose", action="store_true", help="Refinement verbose") parser.set_defaults(refinement_verbose=False) - parser.add_argument("--score", type=str, default="hyper_score", help="Score type (default: hyper_score)") + parser.add_argument("--re_score_score", type=str, default="hyper_score", help="Score type to be used for re-scoring (default: hyper_score)") args = parser.parse_args() @@ -305,7 +305,7 @@ def main(): start_time = time.time() scores = ["hyper_score", "beta_score"] - assert args.score in scores, f"Score type {args.score} not supported. Supported score types are: {scores}" + assert args.re_score_score in scores, f"Score type {args.re_score_score} not supported. Supported score types are: {scores}" if args.verbose: print(f"found {len(paths)} RAW data folders in {args.path} ...") @@ -703,7 +703,7 @@ def main(): psms = list(sorted(psms, key=lambda psm: (psm.spec_idx, psm.peptide_idx))) psms = re_score_psms(psms=psms, verbose=args.verbose, num_splits=args.re_score_num_splits, - balance=args.balanced_re_score, score=args.score) + balance=args.balanced_re_score, score=args.re_score_score) # serialize all PSMs to JSON binary bts = psms_to_json_bin(psms) From b0aa581b824e87f38f8e271ed44eecccfa5be80b Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Thu, 11 Jul 2024 15:53:26 +0200 Subject: [PATCH 04/16] adding cmd argument --- imspy/imspy/timstof/dbsearch/imspy_dda.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/imspy/imspy/timstof/dbsearch/imspy_dda.py b/imspy/imspy/timstof/dbsearch/imspy_dda.py index 2669072a..c7c70880 100644 --- a/imspy/imspy/timstof/dbsearch/imspy_dda.py +++ b/imspy/imspy/timstof/dbsearch/imspy_dda.py @@ -258,7 +258,7 @@ def main(): parser.add_argument("--refinement_verbose", dest="refinement_verbose", action="store_true", help="Refinement verbose") parser.set_defaults(refinement_verbose=False) - parser.add_argument("--re_score_score", type=str, default="hyper_score", help="Score type to be used for re-scoring (default: hyper_score)") + parser.add_argument("--rescore_score", type=str, default="hyper_score", help="Score type to be used for re-scoring (default: hyper_score)") args = parser.parse_args() @@ -305,7 +305,7 @@ def main(): start_time = time.time() scores = ["hyper_score", "beta_score"] - assert args.re_score_score in scores, f"Score type {args.re_score_score} not supported. Supported score types are: {scores}" + assert args.rescore_score in scores, f"Score type {args.rescore_score} not supported. Supported score types are: {scores}" if args.verbose: print(f"found {len(paths)} RAW data folders in {args.path} ...") @@ -703,7 +703,7 @@ def main(): psms = list(sorted(psms, key=lambda psm: (psm.spec_idx, psm.peptide_idx))) psms = re_score_psms(psms=psms, verbose=args.verbose, num_splits=args.re_score_num_splits, - balance=args.balanced_re_score, score=args.re_score_score) + balance=args.balanced_re_score, score=args.rescore_score) # serialize all PSMs to JSON binary bts = psms_to_json_bin(psms) From 894720fbd8b7b7f446065c496e6e15c34fb740d1 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Mon, 15 Jul 2024 14:02:07 +0200 Subject: [PATCH 05/16] adding random state to sampling of data frame --- imspy/imspy/simulation/timsim/simulator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imspy/imspy/simulation/timsim/simulator.py b/imspy/imspy/simulation/timsim/simulator.py index b6f57876..554c531f 100644 --- a/imspy/imspy/simulation/timsim/simulator.py +++ b/imspy/imspy/simulation/timsim/simulator.py @@ -308,7 +308,7 @@ def main(): peptides = pd.concat(peptide_list) if args.sample_fraction < 1.0: - peptides = peptides.sample(frac=args.sample_fraction) + peptides = peptides.sample(frac=args.sample_fraction, random_state=41) peptides.reset_index(drop=True, inplace=True) if verbose: From 492ffb0f19cfc4f3dde77198744db5a1fac1377e Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Mon, 15 Jul 2024 14:17:02 +0200 Subject: [PATCH 06/16] testing --- imspy/imspy/simulation/timsim/simulator.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/imspy/imspy/simulation/timsim/simulator.py b/imspy/imspy/simulation/timsim/simulator.py index 554c531f..d5a7fb1c 100644 --- a/imspy/imspy/simulation/timsim/simulator.py +++ b/imspy/imspy/simulation/timsim/simulator.py @@ -72,15 +72,19 @@ def main(): help="Use the layout of the reference dataset for the acquisition (default: True)") parser.set_defaults(use_reference_layout=True) + parser.add_argument("--no_sample_peptides", dest="sample_peptides", action="store_false", + help="Sample peptides from the digested fasta (default: True)") + parser.set_defaults(sample_peptides=True) + # Peptide digestion arguments parser.add_argument( - "--sample_fraction", - type=float, - default=0.005, + "--num_sample_peptides", + type=int, + default=25_000, help="Sample fraction, fraction of peptides to be sampled at random from digested fasta (default: 0.005)") parser.add_argument("--missed_cleavages", type=int, default=2, help="Number of missed cleavages (default: 2)") - parser.add_argument("--min_len", type=int, default=9, help="Minimum peptide length (default: 7)") + parser.add_argument("--min_len", type=int, default=8, help="Minimum peptide length (default: 7)") parser.add_argument("--max_len", type=int, default=30, help="Maximum peptide length (default: 30)") parser.add_argument("--cleave_at", type=str, default='KR', help="Cleave at (default: KR)") parser.add_argument("--restrict", type=str, default='P', help="Restrict (default: P)") @@ -307,8 +311,8 @@ def main(): peptides = pd.concat(peptide_list) - if args.sample_fraction < 1.0: - peptides = peptides.sample(frac=args.sample_fraction, random_state=41) + if args.sample_peptides: + peptides = peptides.sample(n=args.num_sample_peptides, random_state=41) peptides.reset_index(drop=True, inplace=True) if verbose: From 91183f6f68847a13bae4f3e3f8795417af3752b1 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Mon, 15 Jul 2024 14:19:51 +0200 Subject: [PATCH 07/16] fixing --- imspy/imspy/simulation/timsim/jobs/simulate_retention_time.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/imspy/imspy/simulation/timsim/jobs/simulate_retention_time.py b/imspy/imspy/simulation/timsim/jobs/simulate_retention_time.py index 21a6f5df..121c9bfa 100644 --- a/imspy/imspy/simulation/timsim/jobs/simulate_retention_time.py +++ b/imspy/imspy/simulation/timsim/jobs/simulate_retention_time.py @@ -1,6 +1,7 @@ import pandas as pd -from imspy.algorithm import DeepChromatographyApex, load_tokenizer_from_resources, load_deep_retention_time_predictor +from imspy.algorithm.rt.predictors import DeepChromatographyApex, load_deep_retention_time_predictor +from imspy.algorithm.utility import load_tokenizer_from_resources def simulate_retention_times( From 464032ab5726c823f869ee853ef132ce1923c661 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Tue, 16 Jul 2024 10:19:18 +0200 Subject: [PATCH 08/16] fixing --- imspy/imspy/simulation/timsim/simulator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/imspy/imspy/simulation/timsim/simulator.py b/imspy/imspy/simulation/timsim/simulator.py index d5a7fb1c..29d62fc4 100644 --- a/imspy/imspy/simulation/timsim/simulator.py +++ b/imspy/imspy/simulation/timsim/simulator.py @@ -72,7 +72,7 @@ def main(): help="Use the layout of the reference dataset for the acquisition (default: True)") parser.set_defaults(use_reference_layout=True) - parser.add_argument("--no_sample_peptides", dest="sample_peptides", action="store_false", + parser.add_argument("--no_peptide_sampling", dest="sample_peptides", action="store_false", help="Sample peptides from the digested fasta (default: True)") parser.set_defaults(sample_peptides=True) @@ -211,7 +211,7 @@ def main(): action="store_true", dest="proteome_mix", ) - parser.set_defaults(proteome_mixture=False) + parser.set_defaults(proteome_mix=False) # Parse the arguments args = parser.parse_args() From 855eb417834b612f695431bdeb69cfb653c8dc8e Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Tue, 16 Jul 2024 11:35:12 +0200 Subject: [PATCH 09/16] fixing --- imspy/imspy/simulation/timsim/jobs/build_acquisition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imspy/imspy/simulation/timsim/jobs/build_acquisition.py b/imspy/imspy/simulation/timsim/jobs/build_acquisition.py index 813da1c6..eebf5da5 100644 --- a/imspy/imspy/simulation/timsim/jobs/build_acquisition.py +++ b/imspy/imspy/simulation/timsim/jobs/build_acquisition.py @@ -1,6 +1,6 @@ from imspy.simulation.acquisition import TimsTofAcquisitionBuilderDIA from imspy.simulation.utility import read_acquisition_config -from imspy.timstof import TimsDataset, TimsDatasetDIA +from imspy.timstof import TimsDatasetDIA def build_acquisition( From 613a0f37af2aa8a66a1e84b6dc28a0c7d3a80af1 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Tue, 16 Jul 2024 15:30:25 +0200 Subject: [PATCH 10/16] adding correct dil factors --- .../imspy/simulation/resources/configs/dilution_factors.csv | 6 +++--- imspy/imspy/simulation/timsim/simulator.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/imspy/imspy/simulation/resources/configs/dilution_factors.csv b/imspy/imspy/simulation/resources/configs/dilution_factors.csv index 7ced4261..21f1768d 100644 --- a/imspy/imspy/simulation/resources/configs/dilution_factors.csv +++ b/imspy/imspy/simulation/resources/configs/dilution_factors.csv @@ -1,4 +1,4 @@ proteome,dilution_factor -"HUMAN.fasta",1.0 -"YEAST.fasta",0.33 -"ECOLI.fasta",0.11 \ No newline at end of file +"HUMAN.fasta",0.65 +"YEAST.fasta",0.15 +"ECOLI.fasta",0.20 \ No newline at end of file diff --git a/imspy/imspy/simulation/timsim/simulator.py b/imspy/imspy/simulation/timsim/simulator.py index 29d62fc4..af2a48f3 100644 --- a/imspy/imspy/simulation/timsim/simulator.py +++ b/imspy/imspy/simulation/timsim/simulator.py @@ -84,7 +84,7 @@ def main(): help="Sample fraction, fraction of peptides to be sampled at random from digested fasta (default: 0.005)") parser.add_argument("--missed_cleavages", type=int, default=2, help="Number of missed cleavages (default: 2)") - parser.add_argument("--min_len", type=int, default=8, help="Minimum peptide length (default: 7)") + parser.add_argument("--min_len", type=int, default=7, help="Minimum peptide length (default: 7)") parser.add_argument("--max_len", type=int, default=30, help="Maximum peptide length (default: 30)") parser.add_argument("--cleave_at", type=str, default='KR', help="Cleave at (default: KR)") parser.add_argument("--restrict", type=str, default='P', help="Restrict (default: P)") From 8509b519d0ddb588703f073e56d2541a1cd15bb1 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Tue, 16 Jul 2024 17:31:40 +0200 Subject: [PATCH 11/16] updating dilution --- imspy/imspy/simulation/resources/configs/dilution_factors.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/imspy/imspy/simulation/resources/configs/dilution_factors.csv b/imspy/imspy/simulation/resources/configs/dilution_factors.csv index 21f1768d..9e05add1 100644 --- a/imspy/imspy/simulation/resources/configs/dilution_factors.csv +++ b/imspy/imspy/simulation/resources/configs/dilution_factors.csv @@ -1,4 +1,4 @@ proteome,dilution_factor "HUMAN.fasta",0.65 -"YEAST.fasta",0.15 -"ECOLI.fasta",0.20 \ No newline at end of file +"YEAST.fasta",0.30 +"ECOLI.fasta",0.05 \ No newline at end of file From a05d21e30d16b474c5e1fdad61fcc8b52f21a5d5 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Wed, 17 Jul 2024 10:54:26 +0200 Subject: [PATCH 12/16] adding min charge contrib to cmd arguments of simulator --- .../imspy/simulation/timsim/jobs/simulate_charge_states.py | 7 ++++++- imspy/imspy/simulation/timsim/simulator.py | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/imspy/imspy/simulation/timsim/jobs/simulate_charge_states.py b/imspy/imspy/simulation/timsim/jobs/simulate_charge_states.py index 5f44fd18..54fafc1b 100644 --- a/imspy/imspy/simulation/timsim/jobs/simulate_charge_states.py +++ b/imspy/imspy/simulation/timsim/jobs/simulate_charge_states.py @@ -12,6 +12,7 @@ def simulate_charge_states( mz_upper: float, p_charge: float = 0.5, charge_state_one_probability: float = 0.0, + min_charge_contrib: float = 0.15, ) -> pd.DataFrame: IonSource = DeepChargeStateDistribution( @@ -20,7 +21,11 @@ def simulate_charge_states( ) # IonSource = BinomialChargeStateDistributionModel(charged_probability=p_charge) - peptide_ions = IonSource.simulate_charge_state_distribution_pandas(peptides, charge_state_one_probability=charge_state_one_probability) + peptide_ions = IonSource.simulate_charge_state_distribution_pandas( + peptides, + charge_state_one_probability=charge_state_one_probability, + min_charge_contrib=min_charge_contrib, + ) # merge tables to have sequences with ions, remove mz values outside scope ions = pd.merge(left=peptide_ions, right=peptides, left_on=['peptide_id'], right_on=['peptide_id']) diff --git a/imspy/imspy/simulation/timsim/simulator.py b/imspy/imspy/simulation/timsim/simulator.py index af2a48f3..9ead570f 100644 --- a/imspy/imspy/simulation/timsim/simulator.py +++ b/imspy/imspy/simulation/timsim/simulator.py @@ -143,6 +143,8 @@ def main(): # charge state probabilities parser.add_argument("--p_charge", type=float, default=0.5, help="Probability of being charged (default: 0.5)") + parser.add_argument("--min_charge_contrib", type=float, default=0.15, + help="Minimum charge contribution (default: 0.15)") # Noise settings # -- 1. RT and IM noise @@ -359,7 +361,8 @@ def main(): peptides=peptides, mz_lower=acquisition_builder.tdf_writer.helper_handle.mz_lower, mz_upper=acquisition_builder.tdf_writer.helper_handle.mz_upper, - p_charge=p_charge + p_charge=p_charge, + min_charge_contrib=args.min_charge_contrib, ) # JOB 6: Simulate ion mobilities From cfc29bbacf596f7ac2c226981a2e686943375128 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Wed, 17 Jul 2024 11:00:33 +0200 Subject: [PATCH 13/16] fixing arguments --- imspy/imspy/simulation/timsim/simulator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/imspy/imspy/simulation/timsim/simulator.py b/imspy/imspy/simulation/timsim/simulator.py index 9ead570f..6d0e573f 100644 --- a/imspy/imspy/simulation/timsim/simulator.py +++ b/imspy/imspy/simulation/timsim/simulator.py @@ -227,15 +227,15 @@ def main(): # Print table print(tabulate(table, headers=["Argument", "Value"], tablefmt="grid")) - # Save the arguments to a file - with open(os.path.join(args.path, 'arguments.txt'), 'w') as f: - f.write(tabulate(table, headers=["Argument", "Value"], tablefmt="grid")) - # Use the arguments path = check_path(args.path) reference_path = check_path(args.reference_path) name = args.name.replace('[PLACEHOLDER]', f'{args.acquisition_type}').replace("'", "") + # Save the arguments to a file, should go into the database folder + with open(os.path.join(path, f'arguments-{name}.txt'), 'w') as f: + f.write(tabulate(table, headers=["Argument", "Value"], tablefmt="grid")) + if args.proteome_mix: factors = get_dilution_factors() From a44293fa6e633d3eea7682febc3b65c5972d43dc Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Wed, 17 Jul 2024 11:03:14 +0200 Subject: [PATCH 14/16] adding all available threads to default num threads --- imspy/imspy/simulation/timsim/simulator.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/imspy/imspy/simulation/timsim/simulator.py b/imspy/imspy/simulation/timsim/simulator.py index 6d0e573f..6b64c26a 100644 --- a/imspy/imspy/simulation/timsim/simulator.py +++ b/imspy/imspy/simulation/timsim/simulator.py @@ -138,7 +138,7 @@ def main(): help="Sampling step size for frame distributions (default: 0.001)") # Number of cores to use - parser.add_argument("--num_threads", type=int, default=16, help="Number of threads to use (default: 16)") + parser.add_argument("--num_threads", type=int, default=-1, help="Number of threads to use (default: -1, all available)") parser.add_argument("--batch_size", type=int, default=256, help="Batch size (default: 256)") # charge state probabilities @@ -334,6 +334,10 @@ def main(): columns[-2], columns[-1] = columns[-1], columns[-2] peptides = peptides[columns] + # get the number of available threads of the system if not specified + if args.num_threads == -1: + args.num_threads = os.cpu_count() + # JOB 4: Simulate frame distributions emg peptides = simulate_frame_distributions_emg( peptides=peptides, From 28110292c8dadb3b1eb70df8dc5b7ee9a0c914f6 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Wed, 17 Jul 2024 11:16:10 +0200 Subject: [PATCH 15/16] adding readme --- imspy/imspy/timstof/data.py | 2 +- imspy/imspy/timstof/dda.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/imspy/imspy/timstof/data.py b/imspy/imspy/timstof/data.py index 9c4e1e87..48fb2610 100644 --- a/imspy/imspy/timstof/data.py +++ b/imspy/imspy/timstof/data.py @@ -59,7 +59,7 @@ def get_py_ptr(self): class TimsDataset(ABC): - def __init__(self, data_path: str, in_memory: bool = True): + def __init__(self, data_path: str, in_memory: bool = False): """TimsDataHandle class. Args: diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index 0c350626..40a682b3 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -59,8 +59,8 @@ def _load_pasef_meta_data(self): def get_pasef_fragments(self, num_threads: int = 1) -> pd.DataFrame: """Get PASEF fragments. - Args: - num_threads (int, optional): Number of threads. Defaults to 1. + Args: num_threads (int, optional): Number of threads. Defaults to 1. CAUTION: As long as connection to + datasets is established via bruker so / dll, using multiple threads is unstable. Returns: List[FragmentDDA]: List of PASEF fragments. From 5098bd8fa27071f96dc14c354f353d3d7afac3df Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Wed, 17 Jul 2024 11:49:51 +0200 Subject: [PATCH 16/16] adding README --- imspy/README.md | 121 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/imspy/README.md b/imspy/README.md index e69de29b..4d4829b1 100644 --- a/imspy/README.md +++ b/imspy/README.md @@ -0,0 +1,121 @@ +# imspy - Python package for working with timsTOF raw data + + + +## Raw data access + +### Establish a connection to a timsTOF raw file and access data + +```python +import numpy as np +from imspy.timstof import TimsDataset + +# you can use in-memory mode for faster access, but it requires more memory +tdf = TimsDataset("path/to/rawfolder.d", in_memory=False) + +# show global meta data table +print(tdf.global_meta_data) + +# show frame meta data +print(tdf.meta_data) + +# get the first frame (bruker frame indices start at 1) +frame = tdf.get_tims_frame(1) + +# you can also use indexing +frame = tdf[1] + +# print data as pandas dataframe +frame.df() + +# get all spectra in a tims frame (sorted by scan = ion mobility) +spectra = frame.to_tims_spectra() + +# get a slice of multiple frames +frames = tdf.get_tims_slice(np.array([1, 2, 3])) + +# or, by using slicing +frames = tdf[1:4] +``` + +### DDA data + +```python +from imspy.timstof import TimsDatasetDDA +# read a DDA dataset +tdf = TimsDatasetDDA("path/to/rawfolder.d", in_memory=False) + +# get raw data of precursors together with their fragment ions +dda_fragments = tdf.get_pasef_fragments() + +# the timsTOF re-fragments precursors below a certain intensity threshold, +# you can aggregate the data for increased sensitivity like so: +dda_fragments_grouped = dda_fragments.groupby('precursor_id').agg({ + 'frame_id': 'first', + 'time': 'first', + 'precursor_id': 'first', + # this will sum up the raw data of all fragments with the same precursor_id + 'raw_data': 'sum', + 'scan_begin': 'first', + 'scan_end': 'first', + 'isolation_mz': 'first', + 'isolation_width': 'first', + 'collision_energy': 'first', + 'largest_peak_mz': 'first', + 'average_mz': 'first', + 'monoisotopic_mz': 'first', + 'charge': 'first', + 'average_scan': 'first', + 'intensity': 'first', + 'parent_id': 'first', +}) + +# for convenience, you can calculate the inverse mobility +# of the precursor ion by finding the maximum intensity along the scan dimension +mobility = dda_fragments_grouped.apply( + lambda r: r.raw_data.get_inverse_mobility_along_scan_marginal(), axis=1 +) + +# add the inverse mobility to the grouped data as a new column +dda_fragments_grouped['mobility'] = mobility +``` + +### DIA data + +```python +from imspy.timstof import TimsDatasetDIA +# read a DIA dataset +tdf = TimsDatasetDIA("path/to/rawfolder.d", in_memory=False) +``` + +## The chemistry module + +### Basic usage +```python +``` + +### Working with peptide sequences +```python +``` + +## Algorithms and machine learning + +### ion mobility and retention time prediction +```python +``` + +### Locality sensitive hashing +```python +``` + +### Mixture models +```python +``` + +## Pipeline: DDA data analysis (imspy_dda) +```python +``` + +## Pipeline: Synthetic raw data generation (timsim) +```python +``` \ No newline at end of file