From b1f5221f3daeac3988d37bd15ea8a27f2af7e4c8 Mon Sep 17 00:00:00 2001
From: ArthurDeclercq <arre_declercq@hotmail.com>
Date: Thu, 28 Nov 2024 17:38:22 +0100
Subject: [PATCH 1/7] initial changes to speed up multi modification
 implementation

---
 mumble/mumble.py | 299 ++++++++++++++++++++++++-----------------------
 1 file changed, 150 insertions(+), 149 deletions(-)

diff --git a/mumble/mumble.py b/mumble/mumble.py
index b735f44..5c4aca7 100644
--- a/mumble/mumble.py
+++ b/mumble/mumble.py
@@ -3,8 +3,10 @@
 import itertools
 import os
 import json
-from collections import namedtuple
+import numpy as np
 from pathlib import Path
+from dataclasses import dataclass
+from itertools import product
 
 import pandas as pd
 import pickle
@@ -420,81 +422,21 @@ def _get_name_to_mass_residue_dict(self):
         return:
             dict: Dictionary with name as key and mass and residue as value
         """
-        Modification = namedtuple("modification", ["mass", "residues", "restrictions"])
 
         return {
-            row.name: Modification(row.monoisotopic_mass, row.residue, row.restriction)
+            row.name: Modification(
+                name=row.name,
+                monoisotopic_mass=row.monoisotopic_mass,
+                residue=row.residue,
+                restriction=row.restriction,
+            )
             for row in self.modification_df.groupby(["monoisotopic_mass", "name"])
             .agg({"residue": list, "restriction": list})
             .reset_index()
             .itertuples()
-        }  # TODO: used named tuple here
-
-    def get_localisation(
-        self, psm, modification_name, residue_list, restrictions
-    ) -> set[namedtuple]:
-        """
-        Localise a given modification in a peptide
-
-        Args:
-            psm (psm_utils.PSM): PSM object
-            modification_name (str): Name of the modification
-            residue_list (list): List of residues
-            restrictions (list): List of restrictions
-
-            return:
-                list: List of localised_mass_shift
-        """
-        loc_list = []
-        Localised_mass_shift = namedtuple("Localised_mass_shift", ["loc", "modification"])
-
-        amino_acids_peptide = [x[0] for x in psm.peptidoform.parsed_sequence]
-
-        for residue, restriction in zip(residue_list, restrictions):
-            if (residue == "N-term") and (psm.peptidoform.properties["n_term"] is None):
-                loc_list.append(Localised_mass_shift("N-term", modification_name))
-
-            elif residue == "C-term" and (psm.peptidoform.properties["c_term"] is None):
-                loc_list.append(Localised_mass_shift("C-term", modification_name))
-
-            elif residue == "protein_level":
-                loc_list.extend(
-                    [
-                        Localised_mass_shift(loc, mod)
-                        for loc, mod in self.check_protein_level(psm, modification_name)
-                    ]
-                )
-            elif restriction == "N-term" or restriction == "C-term":
-                if (
-                    restriction == "N-term"
-                    and (psm.peptidoform.properties["n_term"] is None)
-                    and (psm.peptidoform.parsed_sequence[0][0] == residue)
-                ):
-                    loc_list.append(Localised_mass_shift("N-term", modification_name))
-
-                elif (
-                    restriction == "C-term"
-                    and (psm.peptidoform.properties["c_term"] is None)
-                    and (psm.peptidoform.parsed_sequence[-1][0] == residue)
-                ):
-                    loc_list.append(Localised_mass_shift("C-term", modification_name))
-
-                else:
-                    continue
-
-            elif residue in amino_acids_peptide:
-                loc_list.extend(
-                    [
-                        Localised_mass_shift(i, modification_name)
-                        for i, aa in enumerate(amino_acids_peptide)
-                        if (aa == residue) and (psm.peptidoform.parsed_sequence[i][1] is None)
-                    ]
-                )
-
-        # remove duplicate locations
-        return set(loc_list)
+        }
 
-    def localize_mass_shift(self, psm) -> list[namedtuple]:
+    def localize_mass_shift(self, psm):
         """Give potential localisations of a mass shift in a peptide
 
         Args:
@@ -504,87 +446,80 @@ def localize_mass_shift(self, psm) -> list[namedtuple]:
             list: List of Modification_candidate([localised_mass_shift])
         """
         expmass = mz_to_mass(psm.precursor_mz, psm.get_precursor_charge())
-        calcmass = calculate_mass(psm.peptidoform.composition)
+        calcmass = calculate_mass(
+            psm.peptidoform.composition
+        )  # TODO: use mass instead of composition
         mass_shift = expmass - calcmass
 
+        original_peptidoform = (
+            (["N-term"] if not psm.peptidoform.properties["n_term"] else [None])
+            + (["Protein_level"] if self.protein_level_check else [None])
+            + ([x[0] if not x[1] else None for x in psm.peptidoform.parsed_sequence])
+            + (["Protein_level"] if self.protein_level_check else [None])
+            + (["C-term"] if not psm.peptidoform.properties["c_term"] else [None])
+        )
+
         # get all potential modifications
-        try:
-            potential_modifications_indices = self._binary_range_search(
-                self.monoisotopic_masses, mass_shift, self.mass_error
-            )
-            if potential_modifications_indices:
-                potential_modifications_tuples = self.modifications_names[
-                    potential_modifications_indices[0] : potential_modifications_indices[1] + 1
-                ]
-            else:
-                return []
-        except KeyError:
-            return None
-
-        Modification_candidate = namedtuple("Modification_candidate", ["Localised_mass_shifts"])
-
-        # cache to store results for combinations
-        combination_cache = {}
-
-        def check_combination(combination, psm):
-            if not combination:
-                return []
-
-            if combination in combination_cache:
-                return combination_cache[combination]
-
-            if len(combination) == 1:
-                # Case: combination with no child combinations and not cached
-                mod_name = combination[0]
-                residues = self.name_to_mass_residue_dict[mod_name].residues
-                restrictions = self.name_to_mass_residue_dict[mod_name].restrictions
-                localizations = self.get_localisation(psm, mod_name, residues, restrictions)
-                # Store the results as a list of feasible modification candidates
-                result = [
-                    Modification_candidate(Localised_mass_shifts=[localization])
-                    for localization in localizations
-                ]
-                combination_cache[combination] = result
-                return result
+        left_index, rigth_index = self._binary_range_search(
+            self.monoisotopic_masses, mass_shift, self.mass_error
+        )
+        canididate_mass_shifts = self.modifications_names[left_index : rigth_index + 1]
+
+        for modifications in canididate_mass_shifts:
+            # Try localise all modifications for each mass shift candidate
+            peptidoform_candidates = []
+            for mod_name in modifications:
+                localised_mod = self.name_to_mass_residue_dict[mod_name].localise(
+                    original_peptidoform
+                )
+                if localised_mod[1] or localised_mod[-2]:
+                    localised_mod[1], localised_mod[-2] = self.check_protein_level(psm, mod_name)
+                peptidoform_candidates.append(localised_mod)
 
-            else:
-                # Case: combination with child combinations and not cached
-                # child_combinations = [combo for combo in itertools.product(*[[(mod,) for mod in combination]])]
-                child_combinations = itertools.product(combination)
-
-                # Get possible mass shift combinations for each child
-                child_results = []
-                for child in child_combinations:
-                    child_results.append(check_combination(child, psm))
-
-                # Combine child mass shift possibilities
-                combined_results = []
-                for child_result_list in itertools.product(*child_results):
-                    # Flatten the list of Localised_mass_shifts from all child results
-                    all_shifts = [
-                        shift
-                        for result in child_result_list
-                        for shift in result.Localised_mass_shifts
-                    ]
+            final_candidates = self._generate_combinations(peptidoform_candidates)
 
-                    # Check for position conflicts
-                    positions = [shift.loc for shift in all_shifts]
-                    if len(set(positions)) == len(positions):  # No overlap in positions
-                        combined_results.append(
-                            Modification_candidate(Localised_mass_shifts=all_shifts)
-                        )
-
-                combination_cache[combination] = combined_results
-                return combined_results
-
-        feasible_modifications_candidates = []
-        for potential_mods_combination in potential_modifications_tuples:
-            # check every combination recursively
-            feasible_modifications_candidates.extend(
-                check_combination(potential_mods_combination, psm)
-            )
+        return final_candidates
+
+    @staticmethod
+    def generate_combinations(mod_lists):
+        """Give a list of lists of modifications, generate all possible combinations of modifications"""
+        mod_array = np.array(mod_lists, dtype=object)
+
+        # Use np.where to get indices of non-False values for each row
+        valid_indices = [
+            np.where(mod_array[i] != False)[0].tolist() for i in range(mod_array.shape[0])
+        ]
+        # generate all possible combinations of indices
+        valid_combinations = [
+            comb for comb in list(product(*valid_indices)) if len(set(comb)) == len(mod_lists)
+        ]
 
-        return feasible_modifications_candidates if feasible_modifications_candidates else None
+        candidates = []
+        # Generate the candidate modifications
+        for combination in valid_combinations:
+            candidate = [False] * len(mod_array[0])
+            for i, idx in enumerate(combination):
+                name = mod_array[i][idx]
+                if candidate[idx] == "[B]":
+                    candidate = False
+                    break
+
+                elif "[B]" in name:
+                    candidate[idx] = name.replace("[B]", "")
+                    # check that there are no other modifications on before this position
+                    if any(candidate[:idx]):
+                        break
+                    else:
+                        # block everything before this position
+                        candidate[:idx] = ["[B]"] * idx
+                else:
+                    candidate[idx] = name
+            if candidate:
+                # replace [B] with False
+                candidate = [False if x == "[B]" else x for x in candidate]
+                candidates.append(candidate)
+
+        return candidates
 
     @staticmethod
     def _binary_range_search(arr, target, error) -> tuple[int, int]:
@@ -638,7 +573,7 @@ def binary_right_index(arr, value) -> int:
         # Check bounds and validity
         if left <= right and left < len(arr) and right >= 0:
             return (left, right)
-        return ()
+        return (0, 0)
 
     def _get_aa_sub_dict(self):
         """
@@ -705,7 +640,6 @@ def check_protein_level(self, psm, additional_aa):
 
         if psm.is_decoy:
             return []
-        found_additional_amino_acids = []
 
         protein_sequence = self.fasta_file[psm.protein_list[0]].sequence
         peptide_start_position = protein_sequence.find(psm.peptidoform.sequence)
@@ -716,15 +650,19 @@ def check_protein_level(self, psm, additional_aa):
             protein_sequence[peptide_start_position - additional_aa_len : peptide_start_position]
             == additional_aa
         ):
-            found_additional_amino_acids.append(("prepeptide", additional_aa))
+            prepeptide = additional_aa
+        else:
+            prepeptide = False
 
         if (
             protein_sequence[peptide_end_position : peptide_end_position + additional_aa_len]
             == additional_aa
         ):
-            found_additional_amino_acids.append(("postpeptide", additional_aa))
+            postpeptide = additional_aa
+        else:
+            postpeptide = False
 
-        return found_additional_amino_acids
+        return prepeptide, postpeptide
 
 
 class ModificationCache:
@@ -984,6 +922,69 @@ def _read_unimod_file(self, modification_file=None):
             return None, None
 
 
+@dataclass
+class Modification:
+    name: str
+    monoisotopic_mass: float
+    residues: np.ndarray
+    restrictions: np.ndarray
+
+    def __post_init__(self):
+        assert len(self.residues) == len(
+            self.restrictions
+        ), "Residues and restrictions should have the same length"
+
+    def localise(self, original_peptidoform_list):
+        """
+        Localise the modification in a peptide based on residues and restrictions.
+
+        Args:
+            original_peptidoform_list (np.ndarray): Array of the original peptidoform.
+
+        Returns:
+            np.ndarray: Array with modifcation name, [B]Modification name or False for each element.
+        """
+        # Initialize result array with "False"
+        result = np.full(original_peptidoform_list.shape, False, dtype=object)
+
+        # Precompute positions
+        n_term_positions = np.array([0, 2])
+        c_term_positions = np.array(
+            [len(original_peptidoform_list) - 1, len(original_peptidoform_list) - 3]
+        )
+
+        for residue, restriction in zip(self.residues, self.restrictions):
+            # Find matches
+            matches = original_peptidoform_list == residue
+
+            if restriction == "Anywhere":
+                result[matches] = self.name
+
+            elif restriction == "N-term":
+                # Handle N-term restrictions
+                n_term_indices = np.intersect1d(np.where(matches)[0], n_term_positions)
+                result[n_term_indices] = "[B]" + self.name
+                result[np.where(matches)[0][~np.isin(np.where(matches)[0], n_term_positions)]] = (
+                    False
+                )
+                result[np.where(matches)[0][np.isin(np.where(matches)[0], [0])]] = self.name
+
+            elif restriction == "C-term":
+                # Handle C-term restrictions
+                c_term_indices = np.intersect1d(np.where(matches)[0], c_term_positions)
+                result[c_term_indices] = "[B]" + self.name
+                result[np.where(matches)[0][~np.isin(np.where(matches)[0], c_term_positions)]] = (
+                    False
+                )
+                result[
+                    np.where(matches)[0][
+                        np.isin(np.where(matches)[0], [len(original_peptidoform_list) - 1])
+                    ]
+                ] = self.name
+
+        return result
+
+
 class JSONConfigLoader:
     """Loads a single-level configuration from a JSON file."""
 

From b00a2d6a70d2a80e90efb3e000d3d067dc9c7321 Mon Sep 17 00:00:00 2001
From: ArthurDeclercq <arre_declercq@hotmail.com>
Date: Fri, 29 Nov 2024 13:29:29 +0100
Subject: [PATCH 2/7] finish initial changes to modification mapping

---
 mumble/mumble.py | 153 +++++++++++++++++------------------------------
 1 file changed, 56 insertions(+), 97 deletions(-)

diff --git a/mumble/mumble.py b/mumble/mumble.py
index 5c4aca7..b95abf9 100644
--- a/mumble/mumble.py
+++ b/mumble/mumble.py
@@ -118,9 +118,7 @@ def _find_mod_locations(peptidoform):
 
         return locations
 
-    def _return_mass_shifted_peptidoform(
-        self, modification_tuple_list, peptidoform
-    ) -> Peptidoform:
+    def _return_mass_shifted_psms(self, modification_lists, psm) -> Peptidoform:
         """
         Apply a list of modification tuples to a peptidoform.
 
@@ -131,75 +129,43 @@ def _return_mass_shifted_peptidoform(
         Returns:
             list of psm_utils.Peptidoform: List of new peptidoform objects with applied modifications, or None if conflicting modifications exist.
         """
+        mass_shifted_psms = []
 
-        new_peptidoforms = []
-        new_peptidoform = deepcopy(peptidoform)
+        for assigned_modifications in modification_lists:
 
-        existing_mod_locations = self._find_mod_locations(new_peptidoform)
+            mass_shifted_psm = deepcopy(psm)
+            new_peptidoform = mass_shifted_psm.peptidoform  # TODO check if link is broken
 
-        for modification_tuple in modification_tuple_list:
+            # handle terminal modifications
+            new_peptidoform.properties["n_term"] = assigned_modifications[0]
+            new_peptidoform.properties["c_term"] = assigned_modifications[-1]
 
-            new_peptidoform = deepcopy(peptidoform)
-            for localised_mass_shift in modification_tuple.Localised_mass_shifts:
-                loc, mod = localised_mass_shift
-                if loc in existing_mod_locations:
-                    return None
+            # handle peptide modifications
+            for i, mod in enumerate(assigned_modifications[2:-2]):
+                if mod in self.modification_handler.aa_sub_dict.keys():
+                    new_peptidoform.parsed_sequence[i] = (
+                        self.modification_handler.aa_sub_dict[mod][1],
+                        None,
+                    )
                 else:
-                    if loc == "N-term":
-                        new_peptidoform.properties["n_term"] = [proforma.process_tag_tokens(mod)]
-                    elif loc == "C-term":
-                        new_peptidoform.properties["c_term"] = [proforma.process_tag_tokens(mod)]
-                    elif loc == "prepeptide":
-                        new_peptidoform.parsed_sequence = [
-                            (aa, None) for aa in mod
-                        ] + new_peptidoform.parsed_sequence
-
-                    elif loc == "postpeptide":
-                        new_peptidoform.parsed_sequence = new_peptidoform.parsed_sequence + [
-                            (aa, None) for aa in mod
-                        ]
-                    else:
-                        try:
-                            aa = new_peptidoform.parsed_sequence[loc][0]
-                        except IndexError:
-                            logger.warning(f"IndexError for {peptidoform} at {loc} with {mod}")
-                            raise IndexError("Localisation is not in peptide")
-
-                        # If the modification is an amino acid substitution
-                        if mod in self.modification_handler.aa_sub_dict.keys():
-                            if (
-                                aa == self.modification_handler.aa_sub_dict[mod][0]
-                            ):  # TODO named tuple so indexing is not necesary and more clear
-                                new_peptidoform.parsed_sequence[loc] = (
-                                    self.modification_handler.aa_sub_dict[mod][1],
-                                    None,
-                                )
-                        # If the modification is a standard modification
-                        else:
-                            new_peptidoform.parsed_sequence[loc] = (
-                                aa,
-                                [proforma.process_tag_tokens(mod)],
-                            )
-            new_peptidoforms.append(new_peptidoform)
-        return new_peptidoforms
+                    new_peptidoform.parsed_sequence[i] = (
+                        new_peptidoform.parsed_sequence[i][0],
+                        mod,
+                    )
 
-    @staticmethod
-    def _create_new_psm(psm, new_peptidoform) -> PSM:
-        """
-        Create new psm with new peptidoform.
+            # handle peptide additions
+            if additional_aa := assigned_modifications[1]:
+                new_peptidoform.parsed_sequence = [
+                    (aa, None) for aa in additional_aa
+                ] + new_peptidoform.parsed_sequence
+            if additional_aa := assigned_modifications[-2]:
+                new_peptidoform.parsed_sequence = new_peptidoform.parsed_sequence + [
+                    (aa, None) for aa in additional_aa
+                ]
 
-        Args:
-            psm (psm_utils.PSM): PSM object
-            new_peptidoform (psm_utils.Peptidoform): Peptidoform object
+            mass_shifted_psms.append(mass_shifted_psm)
 
-        return:
-            psm_utils.PSM: PSM object
-        """
-        if new_peptidoform is None:
-            return
-        copy_psm = deepcopy(psm)
-        copy_psm.peptidoform = new_peptidoform
-        return copy_psm
+        return mass_shifted_psms
 
     def _get_modified_peptidoforms(self, psm, keep_original=False) -> list:
         """
@@ -218,19 +184,11 @@ def _get_modified_peptidoforms(self, psm, keep_original=False) -> list:
             psm["metadata"]["original_psm"] = True
             modified_peptidoforms.append(psm)
 
-        modification_tuple_list = self.modification_handler.localize_mass_shift(psm)
-        if modification_tuple_list:
-            new_proteoforms_list = self._return_mass_shifted_peptidoform(
-                modification_tuple_list, psm.peptidoform
+        localised_mass_shifts = self.modification_handler._localize_mass_shift(psm)
+        if localised_mass_shifts:
+            modified_peptidoforms.append(
+                self._return_mass_shifted_psms(localised_mass_shifts, psm)
             )
-            for new_proteoform in new_proteoforms_list:
-                new_psm = self._create_new_psm(
-                    psm,
-                    new_proteoform,
-                )
-                if new_psm is not None:
-                    new_psm["metadata"]["original_psm"] = False
-                    modified_peptidoforms.append(new_psm)
 
         return modified_peptidoforms
 
@@ -291,7 +249,7 @@ def add_modified_psms(
 
         for psm in track(
             parsed_psm_list,
-            description="Parsing PSMs in PSMList...",
+            description="Matching modifications to your mass shifts ... ",
             total=len(parsed_psm_list),
         ):
             if (psm.is_decoy) & (not generate_modified_decoys):
@@ -328,7 +286,7 @@ def _parse_psm_list(
         elif type(psm_list) is str:
             self.psm_file_name = Path(psm_list)
             psm_list = read_file(psm_list, filetype=psm_file_type)
-            psm_list.rename_modifications({"+15.9949": "Oxidation"})
+            psm_list.rename_modifications(modification_mapping)
         elif type(psm_list) is not PSMList:
             raise TypeError("psm_list should be a path to a file or a PSMList object")
 
@@ -427,8 +385,8 @@ def _get_name_to_mass_residue_dict(self):
             row.name: Modification(
                 name=row.name,
                 monoisotopic_mass=row.monoisotopic_mass,
-                residue=row.residue,
-                restriction=row.restriction,
+                residues=row.residue,
+                restrictions=row.restriction,
             )
             for row in self.modification_df.groupby(["monoisotopic_mass", "name"])
             .agg({"residue": list, "restriction": list})
@@ -436,7 +394,7 @@ def _get_name_to_mass_residue_dict(self):
             .itertuples()
         }
 
-    def localize_mass_shift(self, psm):
+    def _localize_mass_shift(self, psm):
         """Give potential localisations of a mass shift in a peptide
 
         Args:
@@ -451,7 +409,7 @@ def localize_mass_shift(self, psm):
         )  # TODO: use mass instead of composition
         mass_shift = expmass - calcmass
 
-        original_peptidoform = (
+        peptidofrom_as_list = (
             (["N-term"] if not psm.peptidoform.properties["n_term"] else [None])
             + (["Protein_level"] if self.protein_level_check else [None])
             + ([x[0] if not x[1] else None for x in psm.peptidoform.parsed_sequence])
@@ -470,7 +428,7 @@ def localize_mass_shift(self, psm):
             peptidoform_candidates = []
             for mod_name in modifications:
                 localised_mod = self.name_to_mass_residue_dict[mod_name].localise(
-                    original_peptidoform
+                    peptidofrom_as_list
                 )
                 if localised_mod[1] or localised_mod[-2]:
                     localised_mod[1], localised_mod[-2] = self.check_protein_level(psm, mod_name)
@@ -481,8 +439,8 @@ def localize_mass_shift(self, psm):
         return final_candidates
 
     @staticmethod
-    def generate_combinations(mod_lists):
-        """Give a list of lists of modifications, generate all possible combinations of modifications"""
+    def _generate_combinations(mod_lists):
+        """Give a list of lists of single modification placements, generate all potential combinations"""
         mod_array = np.array(mod_lists, dtype=object)
 
         # Use np.where to get indices of non-False values for each row
@@ -497,7 +455,7 @@ def generate_combinations(mod_lists):
         candidates = []
         # Generate the candidate modifications
         for combination in valid_combinations:
-            candidate = [False] * len(mod_array[0])
+            candidate = [None] * len(mod_array[0])
             for i, idx in enumerate(combination):
                 name = mod_array[i][idx]
                 if candidate[idx] == "[B]":
@@ -508,6 +466,7 @@ def generate_combinations(mod_lists):
                     candidate[idx] = name.replace("[B]", "")
                     # check that there are no other modifications on before this position
                     if any(candidate[:idx]):
+                        candidate = False
                         break
                     else:
                         # block everything before this position
@@ -515,8 +474,8 @@ def generate_combinations(mod_lists):
                 else:
                     candidate[idx] = name
             if candidate:
-                # replace [B] with False
-                candidate = [False if x == "[B]" else x for x in candidate]
+                # replace [B] with None
+                candidate = [None if x == "[B]" else x for x in candidate]
                 candidates.append(candidate)
 
         return candidates
@@ -934,28 +893,28 @@ def __post_init__(self):
             self.restrictions
         ), "Residues and restrictions should have the same length"
 
-    def localise(self, original_peptidoform_list):
+    def localise(self, peptidoform_as_list):
         """
         Localise the modification in a peptide based on residues and restrictions.
 
         Args:
-            original_peptidoform_list (np.ndarray): Array of the original peptidoform.
+            peptidoform_as_list (np.ndarray): Array of the original peptidoform.
 
         Returns:
             np.ndarray: Array with modifcation name, [B]Modification name or False for each element.
         """
-        # Initialize result array with "False"
-        result = np.full(original_peptidoform_list.shape, False, dtype=object)
+        # convert to array
+        peptidoform_as_list = np.array(peptidoform_as_list)
+        # Initialize result array with False
+        result = np.full(peptidoform_as_list.shape, False, dtype=object)
 
         # Precompute positions
         n_term_positions = np.array([0, 2])
-        c_term_positions = np.array(
-            [len(original_peptidoform_list) - 1, len(original_peptidoform_list) - 3]
-        )
+        c_term_positions = np.array([len(peptidoform_as_list) - 1, len(peptidoform_as_list) - 3])
 
         for residue, restriction in zip(self.residues, self.restrictions):
             # Find matches
-            matches = original_peptidoform_list == residue
+            matches = peptidoform_as_list == residue
 
             if restriction == "Anywhere":
                 result[matches] = self.name
@@ -978,7 +937,7 @@ def localise(self, original_peptidoform_list):
                 )
                 result[
                     np.where(matches)[0][
-                        np.isin(np.where(matches)[0], [len(original_peptidoform_list) - 1])
+                        np.isin(np.where(matches)[0], [len(peptidoform_as_list) - 1])
                     ]
                 ] = self.name
 

From c355f6aa715be6c1a1d373b9b7aafbc8ce5e7870 Mon Sep 17 00:00:00 2001
From: ArthurDeclercq <arre_declercq@hotmail.com>
Date: Fri, 29 Nov 2024 18:34:25 +0100
Subject: [PATCH 3/7] bugfixes in modifications mapping and removing unused
 functions

---
 mumble/mumble.py | 64 +++++++++++++++++++-----------------------------
 1 file changed, 25 insertions(+), 39 deletions(-)

diff --git a/mumble/mumble.py b/mumble/mumble.py
index b95abf9..dcb1a00 100644
--- a/mumble/mumble.py
+++ b/mumble/mumble.py
@@ -93,31 +93,6 @@ def _load_parameters(self, overrides: dict) -> dict:
 
         return params
 
-    @staticmethod
-    def _find_mod_locations(peptidoform):
-        """
-        Find the locations of existing modifications in a peptide.
-
-        Args:
-            peptidoform (psm_utils.Peptidoform): Peptidoform object
-
-        return:
-            list: List of locations of existing modifications
-        """
-        locations = []
-
-        if peptidoform.properties["n_term"] is not None:
-            locations.append("N-term")
-
-        if peptidoform.properties["c_term"] is not None:
-            locations.append("C-term")
-
-        for i, aa in enumerate(peptidoform.parsed_sequence):
-            if aa[1] is not None:
-                locations.append(i)
-
-        return locations
-
     def _return_mass_shifted_psms(self, modification_lists, psm) -> Peptidoform:
         """
         Apply a list of modification tuples to a peptidoform.
@@ -137,8 +112,10 @@ def _return_mass_shifted_psms(self, modification_lists, psm) -> Peptidoform:
             new_peptidoform = mass_shifted_psm.peptidoform  # TODO check if link is broken
 
             # handle terminal modifications
-            new_peptidoform.properties["n_term"] = assigned_modifications[0]
-            new_peptidoform.properties["c_term"] = assigned_modifications[-1]
+            if nterm := assigned_modifications[0]:
+                new_peptidoform.properties["n_term"] = [proforma.process_tag_tokens(nterm)]
+            if cterm := assigned_modifications[-1]:
+                new_peptidoform.properties["c_term"] = [proforma.process_tag_tokens(cterm)]
 
             # handle peptide modifications
             for i, mod in enumerate(assigned_modifications[2:-2]):
@@ -147,10 +124,10 @@ def _return_mass_shifted_psms(self, modification_lists, psm) -> Peptidoform:
                         self.modification_handler.aa_sub_dict[mod][1],
                         None,
                     )
-                else:
+                elif mod:
                     new_peptidoform.parsed_sequence[i] = (
                         new_peptidoform.parsed_sequence[i][0],
-                        mod,
+                        [proforma.process_tag_tokens(mod)],
                     )
 
             # handle peptide additions
@@ -418,11 +395,15 @@ def _localize_mass_shift(self, psm):
         )
 
         # get all potential modifications
-        left_index, rigth_index = self._binary_range_search(
+        search_indices = self._binary_range_search(
             self.monoisotopic_masses, mass_shift, self.mass_error
         )
-        canididate_mass_shifts = self.modifications_names[left_index : rigth_index + 1]
-
+        if not search_indices:
+            return []
+        canididate_mass_shifts = self.modifications_names[
+            search_indices[0] : search_indices[1] + 1
+        ]
+        psm_modifications = []
         for modifications in canididate_mass_shifts:
             # Try localise all modifications for each mass shift candidate
             peptidoform_candidates = []
@@ -432,11 +413,16 @@ def _localize_mass_shift(self, psm):
                 )
                 if localised_mod[1] or localised_mod[-2]:
                     localised_mod[1], localised_mod[-2] = self.check_protein_level(psm, mod_name)
-                peptidoform_candidates.append(localised_mod)
 
-            final_candidates = self._generate_combinations(peptidoform_candidates)
+                if not localised_mod.any():
+                    break
+
+                peptidoform_candidates.append(localised_mod)
+            if peptidoform_candidates:
+                final_candidates = self._generate_combinations(peptidoform_candidates)
+                psm_modifications.extend(final_candidates)
 
-        return final_candidates
+        return psm_modifications
 
     @staticmethod
     def _generate_combinations(mod_lists):
@@ -532,7 +518,7 @@ def binary_right_index(arr, value) -> int:
         # Check bounds and validity
         if left <= right and left < len(arr) and right >= 0:
             return (left, right)
-        return (0, 0)
+        return None
 
     def _get_aa_sub_dict(self):
         """
@@ -665,7 +651,7 @@ def _get_cache_file_path(self):
         # Create the cache directory if it doesn't exist
         os.makedirs(cache_dir, exist_ok=True)
 
-        cache_file = os.path.join(cache_dir, "modification_cache.h5")
+        cache_file = os.path.join(cache_dir, "modification_cache.pkl")
         return cache_file
 
     def _load_or_generate_data(self, cache_file: str) -> None:
@@ -730,7 +716,7 @@ def get_unimod_database(self):
             monoisotopic_mass = mod.monoisotopic_mass
 
             for specificity in mod.specificities:
-                classification = specificity.classification
+                classification = specificity.classification.classification
 
                 # Skip based on classification
                 if classification == "Isotopic label":
@@ -916,7 +902,7 @@ def localise(self, peptidoform_as_list):
             # Find matches
             matches = peptidoform_as_list == residue
 
-            if restriction == "Anywhere":
+            if restriction == "anywhere":
                 result[matches] = self.name
 
             elif restriction == "N-term":

From ef46204173d6d300ec80cad62d9e0be0476c30d8 Mon Sep 17 00:00:00 2001
From: ArthurDeclercq <arre_declercq@hotmail.com>
Date: Sat, 30 Nov 2024 18:48:27 +0100
Subject: [PATCH 4/7] fixes to psm_list writing

---
 mumble/mumble.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/mumble/mumble.py b/mumble/mumble.py
index dcb1a00..e3c23ae 100644
--- a/mumble/mumble.py
+++ b/mumble/mumble.py
@@ -163,7 +163,7 @@ def _get_modified_peptidoforms(self, psm, keep_original=False) -> list:
 
         localised_mass_shifts = self.modification_handler._localize_mass_shift(psm)
         if localised_mass_shifts:
-            modified_peptidoforms.append(
+            modified_peptidoforms.extend(
                 self._return_mass_shifted_psms(localised_mass_shifts, psm)
             )
 
@@ -381,9 +381,7 @@ def _localize_mass_shift(self, psm):
             list: List of Modification_candidate([localised_mass_shift])
         """
         expmass = mz_to_mass(psm.precursor_mz, psm.get_precursor_charge())
-        calcmass = calculate_mass(
-            psm.peptidoform.composition
-        )  # TODO: use mass instead of composition
+        calcmass = psm.peptidoform.theoretical_mass
         mass_shift = expmass - calcmass
 
         peptidofrom_as_list = (

From 618b11b603a8e17dc6fee727f2caeb5505b41d3b Mon Sep 17 00:00:00 2001
From: ArthurDeclercq <arre_declercq@hotmail.com>
Date: Mon, 2 Dec 2024 11:35:11 +0100
Subject: [PATCH 5/7] added profiling and multiproccessing to try to speed
 things up

---
 mumble/__main__.py |  27 +++++
 mumble/mumble.py   | 248 ++++++++++++++++++++++++++++-----------------
 2 files changed, 180 insertions(+), 95 deletions(-)

diff --git a/mumble/__main__.py b/mumble/__main__.py
index 705d3dd..fdf180b 100644
--- a/mumble/__main__.py
+++ b/mumble/__main__.py
@@ -1,4 +1,5 @@
 import click
+import cProfile
 
 from mumble import PSMHandler
 
@@ -75,9 +76,27 @@
         "help": "Path to a config file",
         "default": None,
     },
+    "profile": {
+        "is_flag": True,
+        "help": "Profile the code with cProfile",
+        "default": False,
+        "show_default": True,
+    },
 }
 
 
+def profile(fnc, filepath):
+    """A decorator that uses cProfile to profile a function"""
+
+    def inner(*args, **kwargs):
+        with cProfile.Profile() as profiler:
+            return_value = fnc(*args, **kwargs)
+        profiler.dump_stats(filepath + ".profile.prof")
+        return return_value
+
+    return inner
+
+
 @click.command("cli", context_settings={"show_default": True})
 @click.argument("input_file", type=click.Path(exists=True), default=None)
 def main(**kwargs):
@@ -92,6 +111,11 @@ def main(**kwargs):
         for key, value in kwargs.items()
         if ctx.get_parameter_source(key) == click.core.ParameterSource.COMMANDLINE
     }
+    if kwargs["profile"]:
+        pr = cProfile.Profile()
+        pr.enable()
+    else:
+        pr = None
 
     # Initialize PSMHandler with priority for CLI params
     psm_handler = PSMHandler(
@@ -99,6 +123,9 @@ def main(**kwargs):
         **cli_params,
     )
     modified_psm_list = psm_handler.add_modified_psms(kwargs["input_file"])
+    if pr:
+        pr.disable()
+        pr.dump_stats(kwargs["input_file"] + ".profile.prof")
 
     psm_handler.write_modified_psm_list(modified_psm_list)
 
diff --git a/mumble/mumble.py b/mumble/mumble.py
index e3c23ae..a9632e9 100644
--- a/mumble/mumble.py
+++ b/mumble/mumble.py
@@ -7,6 +7,10 @@
 from pathlib import Path
 from dataclasses import dataclass
 from itertools import product
+from functools import lru_cache
+import concurrent.futures
+from functools import partial
+
 
 import pandas as pd
 import pickle
@@ -18,6 +22,7 @@
 from pyteomics.fasta import IndexedFASTA
 from rich.progress import track
 from rich.logging import RichHandler
+from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeElapsedColumn
 
 # setup logging
 logging.basicConfig(
@@ -61,7 +66,7 @@ def _load_parameters(self, overrides: dict) -> dict:
         Load configuration parameters from the JSON file and apply overrides.
 
         Args:
-            overrides (dict): Dictionary of parameters to override.
+            overrides (dict): Dictionary of parameters _getto override.
 
         Returns:
             dict: Consolidated configuration parameters.
@@ -93,82 +98,6 @@ def _load_parameters(self, overrides: dict) -> dict:
 
         return params
 
-    def _return_mass_shifted_psms(self, modification_lists, psm) -> Peptidoform:
-        """
-        Apply a list of modification tuples to a peptidoform.
-
-        Args:
-            modification_tuple_list (list of tuples): List of modification tuples containing local mass shifts.
-            peptidoform (psm_utils.Peptidoform): Original peptidoform object to modify.
-
-        Returns:
-            list of psm_utils.Peptidoform: List of new peptidoform objects with applied modifications, or None if conflicting modifications exist.
-        """
-        mass_shifted_psms = []
-
-        for assigned_modifications in modification_lists:
-
-            mass_shifted_psm = deepcopy(psm)
-            new_peptidoform = mass_shifted_psm.peptidoform  # TODO check if link is broken
-
-            # handle terminal modifications
-            if nterm := assigned_modifications[0]:
-                new_peptidoform.properties["n_term"] = [proforma.process_tag_tokens(nterm)]
-            if cterm := assigned_modifications[-1]:
-                new_peptidoform.properties["c_term"] = [proforma.process_tag_tokens(cterm)]
-
-            # handle peptide modifications
-            for i, mod in enumerate(assigned_modifications[2:-2]):
-                if mod in self.modification_handler.aa_sub_dict.keys():
-                    new_peptidoform.parsed_sequence[i] = (
-                        self.modification_handler.aa_sub_dict[mod][1],
-                        None,
-                    )
-                elif mod:
-                    new_peptidoform.parsed_sequence[i] = (
-                        new_peptidoform.parsed_sequence[i][0],
-                        [proforma.process_tag_tokens(mod)],
-                    )
-
-            # handle peptide additions
-            if additional_aa := assigned_modifications[1]:
-                new_peptidoform.parsed_sequence = [
-                    (aa, None) for aa in additional_aa
-                ] + new_peptidoform.parsed_sequence
-            if additional_aa := assigned_modifications[-2]:
-                new_peptidoform.parsed_sequence = new_peptidoform.parsed_sequence + [
-                    (aa, None) for aa in additional_aa
-                ]
-
-            mass_shifted_psms.append(mass_shifted_psm)
-
-        return mass_shifted_psms
-
-    def _get_modified_peptidoforms(self, psm, keep_original=False) -> list:
-        """
-        Get modified peptidoforms derived from a single PSM.
-
-        Args:
-            psm (psm_utils.PSM): Original PSM object.
-            keep_original (bool, optional): Whether to keep the original PSM alongside modified ones. Defaults to False.
-
-        Returns:
-            list: List of modified PSMs, or None if no modifications were applied.
-        """
-        modified_peptidoforms = []
-
-        if keep_original:
-            psm["metadata"]["original_psm"] = True
-            modified_peptidoforms.append(psm)
-
-        localised_mass_shifts = self.modification_handler._localize_mass_shift(psm)
-        if localised_mass_shifts:
-            modified_peptidoforms.extend(
-                self._return_mass_shifted_psms(localised_mass_shifts, psm)
-            )
-
-        return modified_peptidoforms
-
     def get_modified_peptidoforms_list(self, psm, keep_original=False) -> PSMList:
         """
         Get modified peptidoforms derived from 1 PSM in a PSMList.
@@ -180,7 +109,10 @@ def get_modified_peptidoforms_list(self, psm, keep_original=False) -> PSMList:
         return:
             psm_utils.PSMList: PSMList object
         """
-        modified_peptidoforms = self._get_modified_peptidoforms(psm, keep_original=keep_original)
+        modified_peptidoforms = self.modification_handler._get_modified_peptidoforms(
+            psm, keep_original=keep_original, generate_modified_decoys=False
+        )
+
         return PSMList(psm_list=modified_peptidoforms)
 
     def add_modified_psms(
@@ -207,36 +139,65 @@ def add_modified_psms(
                 pass
             else:
                 raise ValueError("No PSM list provided")
-        if not generate_modified_decoys:
+        if generate_modified_decoys is None:
             generate_modified_decoys = self.params["generate_modified_decoys"]
-        if not keep_original:
+        if keep_original is None:
             keep_original = self.params["keep_original"]
         if not psm_file_type:
             psm_file_type = self.params["psm_file_type"]
 
         logger.info(
-            f"Adding modified PSMs to PSMlist {'WITH' if keep_original else 'WITHOUT'} originals, {'INCLUDING' if generate_modified_decoys else 'EXCLUDING'} modfied decoys"
+            f"Adding modified PSMs to PSMlist {'WITH' if keep_original else 'WITHOUT'} originals, {'INCLUDING' if generate_modified_decoys else 'EXCLUDING'} modified decoys"
         )
 
         parsed_psm_list = self._parse_psm_list(
             psm_list, psm_file_type, self.params["modification_mapping"]
         )
         new_psm_list = []
-        num_added_psms = 0
+        total_num_added_psms = 0
+
+        # Define a partial function with fixed parameters
+        process_func = partial(
+            process_single_psm,
+            modification_handler=self.modification_handler,
+            keep_original=keep_original,
+            generate_modified_decoys=generate_modified_decoys,
+        )
 
-        for psm in track(
-            parsed_psm_list,
-            description="Matching modifications to your mass shifts ... ",
-            total=len(parsed_psm_list),
-        ):
-            if (psm.is_decoy) & (not generate_modified_decoys):
-                continue
-            new_psms = self._get_modified_peptidoforms(psm, keep_original=keep_original)
-            if new_psms:
-                num_added_psms += len(new_psms) if not keep_original else len(new_psms) - 1
-                new_psm_list.extend(new_psms)
-        if num_added_psms != 0:
-            logger.info(f"Added {num_added_psms} additional modified PSMs")
+        # Setup the progress bar with enhanced columns
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            TextColumn("{task.completed}/{task.total} PSMs processed"),
+            TimeElapsedColumn(),
+            transient=False,
+        ) as progress:
+            task = progress.add_task("Processing PSMs...", total=len(parsed_psm_list))
+
+            # Use ThreadPoolExecutor to avoid pickling issues
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                # Submit all tasks to the executor
+                futures = {executor.submit(process_func, psm): psm for psm in parsed_psm_list}
+
+                for future in concurrent.futures.as_completed(futures):
+                    try:
+                        modified_psms = future.result()
+                        if modified_psms:
+                            if keep_original:
+                                # Assuming the first PSM is the original
+                                total_num_added_psms += len(modified_psms) - 1
+                            else:
+                                total_num_added_psms += len(modified_psms)
+                            new_psm_list.extend(modified_psms)
+                    except Exception as e:
+                        psm = futures[future]
+                        logger.error(f"Error processing PSM {psm}: {e}")
+                    finally:
+                        progress.advance(task)
+
+        if total_num_added_psms != 0:
+            logger.info(f"Added {total_num_added_psms} additional modified PSMs")
         else:
             logger.warning("No modified PSMs found, ensure open modification search was enabled")
 
@@ -299,6 +260,35 @@ def write_modified_psm_list(self, psm_list, output_file=None, psm_file_type=None
         write_file(psm_list=psm_list, filename=output_file, filetype=psm_file_type)
 
 
+def process_single_psm(psm, modification_handler, keep_original, generate_modified_decoys):
+    """
+    Processes a single PSM to generate modified PSMs.
+
+    Args:
+        psm (psm_utils.PSM): The original PSM object.
+        modification_handler (_ModificationHandler): Instance handling modifications.
+        keep_original (bool): Whether to keep the original PSM.
+        generate_modified_decoys (bool): Whether to generate decoys.
+
+    Returns:
+        list: List of modified PSMs.
+    """
+    # Add logging to confirm the function is being called
+
+    if psm.is_decoy and not generate_modified_decoys:
+        return []
+
+    try:
+        modified_peptidoforms = modification_handler._get_modified_peptidoforms(
+            psm, keep_original=keep_original
+        )
+        logger.debug(f"Generated {len(modified_peptidoforms)} modified PSMs for {psm}")
+        return modified_peptidoforms
+    except Exception as e:
+        logger.error(f"Exception in processing PSM {psm}: {e}")
+        return []
+
+
 class _ModificationHandler:
     """Class that handles modifications."""
 
@@ -569,6 +559,61 @@ def _add_amino_acid_combinations(self, number_of_aa=1):
             ]
         )
 
+    def _get_modified_peptidoforms(self, psm, keep_original=False) -> list:
+        modified_peptidoforms = []
+
+        if keep_original:
+            psm["metadata"]["original_psm"] = True
+            modified_peptidoforms.append(psm)
+
+        localised_mass_shifts = self._localize_mass_shift(psm)
+        if localised_mass_shifts:
+            modified_peptidoforms.extend(
+                self._return_mass_shifted_psms(localised_mass_shifts, psm)
+            )
+
+        return modified_peptidoforms
+
+    def _return_mass_shifted_psms(self, modification_lists, psm) -> list:
+        mass_shifted_psms = []
+
+        for assigned_modifications in modification_lists:
+            mass_shifted_psm = deepcopy(psm)
+            new_peptidoform = mass_shifted_psm.peptidoform
+
+            # Handle terminal modifications
+            if nterm := assigned_modifications[0]:
+                new_peptidoform.properties["n_term"] = [self.cached_process_tag_tokens(nterm)]
+            if cterm := assigned_modifications[-1]:
+                new_peptidoform.properties["c_term"] = [self.cached_process_tag_tokens(cterm)]
+
+            # Handle peptide modifications
+            for i, mod in enumerate(assigned_modifications[2:-2]):
+                if mod in self.aa_sub_dict.keys():
+                    new_peptidoform.parsed_sequence[i] = (
+                        self.aa_sub_dict[mod][1],
+                        None,
+                    )
+                elif mod:
+                    new_peptidoform.parsed_sequence[i] = (
+                        new_peptidoform.parsed_sequence[i][0],
+                        [self.cached_process_tag_tokens(mod)],
+                    )
+
+            # Handle peptide additions
+            if additional_aa := assigned_modifications[1]:
+                new_peptidoform.parsed_sequence = [
+                    (aa, None) for aa in additional_aa
+                ] + new_peptidoform.parsed_sequence
+            if additional_aa := assigned_modifications[-2]:
+                new_peptidoform.parsed_sequence = new_peptidoform.parsed_sequence + [
+                    (aa, None) for aa in additional_aa
+                ]
+
+            mass_shifted_psms.append(mass_shifted_psm)
+
+        return mass_shifted_psms
+
     def check_protein_level(self, psm, additional_aa):
         """
         Check if amino acid(s) precedes or follows a peptide in the protein sequence.
@@ -607,6 +652,19 @@ def check_protein_level(self, psm, additional_aa):
 
         return prepeptide, postpeptide
 
+    @lru_cache(maxsize=None)
+    def cached_process_tag_tokens(self, tag):
+        """
+        Process a tag token and cache the result.
+
+        Args:
+            tag (str): Tag token to process.
+
+        Returns:
+            str: Processed tag token.
+        """
+        return proforma.process_tag_tokens(tag)
+
 
 class ModificationCache:
     """Class that handles the cache for modifications."""

From 7f3f45de1d5849b6a6f1ee624a9fe61ccd514165 Mon Sep 17 00:00:00 2001
From: ArthurDeclercq <arre_declercq@hotmail.com>
Date: Tue, 3 Dec 2024 09:18:03 +0100
Subject: [PATCH 6/7] minor changes

---
 mumble/mumble.py | 7 +++----
 pyproject.toml   | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/mumble/mumble.py b/mumble/mumble.py
index a9632e9..3764e16 100644
--- a/mumble/mumble.py
+++ b/mumble/mumble.py
@@ -15,12 +15,11 @@
 import pandas as pd
 import pickle
 from psm_utils.io import read_file, write_file
-from psm_utils import PSMList, PSM, Peptidoform
+from psm_utils import PSMList
 from psm_utils.utils import mz_to_mass
 from pyteomics import proforma
-from pyteomics.mass import std_aa_mass, calculate_mass, unimod
+from pyteomics.mass import std_aa_mass, unimod
 from pyteomics.fasta import IndexedFASTA
-from rich.progress import track
 from rich.logging import RichHandler
 from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeElapsedColumn
 
@@ -419,7 +418,7 @@ def _generate_combinations(mod_lists):
 
         # Use np.where to get indices of non-False values for each row
         valid_indices = [
-            np.where(mod_array[i] != False)[0].tolist() for i in range(mod_array.shape[0])
+            np.where(mod_array[i] != False)[0].tolist() for i in range(mod_array.shape[0])  # NOQA
         ]
         # generate all possible combinations of indices
         valid_combinations = [
diff --git a/pyproject.toml b/pyproject.toml
index 860cd47..edc112a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -49,7 +49,7 @@ target-version = 'py38'
 extend-select = ["T201", "T203"]
 
 [tool.bumpver]
-current_version = "0.0.1"
+current_version = "0.1.2"
 version_pattern = "MAJOR.MINOR.PATCH"
 commit_message  = "Bump version {old_version} -> {new_version}"
 commit          = true

From 4249f6d27978af121a6a83c714acc952e3bc237a Mon Sep 17 00:00:00 2001
From: ArthurDeclercq <arre_declercq@hotmail.com>
Date: Tue, 3 Dec 2024 09:29:21 +0100
Subject: [PATCH 7/7] bump version 0.2.0

---
 mumble/__init__.py | 2 +-
 pyproject.toml     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mumble/__init__.py b/mumble/__init__.py
index 73e0c75..140e2aa 100644
--- a/mumble/__init__.py
+++ b/mumble/__init__.py
@@ -1,3 +1,3 @@
-__version__ = "0.1.2"
+__version__ = "0.2.0"
 
 from mumble.mumble import PSMHandler
diff --git a/pyproject.toml b/pyproject.toml
index edc112a..ea232d5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "mumble"
-version = "0.1.2"
+version = "0.2.0"
 description = "Finding the perfect modification for your mass shift"
 readme = "README.md"
 keywords = [
@@ -49,7 +49,7 @@ target-version = 'py38'
 extend-select = ["T201", "T203"]
 
 [tool.bumpver]
-current_version = "0.1.2"
+current_version = "0.2.0"
 version_pattern = "MAJOR.MINOR.PATCH"
 commit_message  = "Bump version {old_version} -> {new_version}"
 commit          = true
@@ -58,4 +58,4 @@ push            = false
 
 [tool.bumpver.file_patterns]
 "pyproject.toml" = ['current_version = "{version}"', 'version = "{version}"']
-"mumble/__init__.py" = ["{version}"]
+"mumble/__init__.py" = ['__version__ = "{version}"']