From 0e5ec5f08cc20a7c43e315de973140fb1af22bae Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Fri, 31 May 2024 11:42:57 -0400
Subject: [PATCH 1/7] add: trafoXML accessor

---
 massdash/loaders/access/TrafoXMLAccess.py | 86 +++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 massdash/loaders/access/TrafoXMLAccess.py

diff --git a/massdash/loaders/access/TrafoXMLAccess.py b/massdash/loaders/access/TrafoXMLAccess.py
new file mode 100644
index 0000000..279a589
--- /dev/null
+++ b/massdash/loaders/access/TrafoXMLAccess.py
@@ -0,0 +1,86 @@
+"""
+massdash/loaders/access/TrafoXMLAccess
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+"""
+
+import xml.etree.ElementTree as ET
+import pandas as pd
+from typing import List, Tuple
+
+# Library Access
+from ..SpectralLibraryLoader import SpectralLibraryLoader
+
+class TrafoXMLAccess:
+    """
+    A class for accessing and loading data from a TrafoXML file.
+
+    Args:
+        input_file (str): The path to the TrafoXML file.
+        irt_library (str, optional): The path to the IRT library file. Defaults to None.
+
+    Attributes:
+        input_file_str (str): The path to the TrafoXML file.
+        tree (ElementTree): The parsed XML tree from the TrafoXML file.
+        root (Element): The root element of the XML tree.
+        irt_library_str (str): The path to the IRT library file.
+        irt_library (SpectralLibraryLoader): The loaded IRT library.
+
+    Methods:
+        load_transformation_params: Loads the transformation parameters from the TrafoXML file.
+        load_pairs: Loads the transformation pairs from the TrafoXML file.
+        load_pairs_df: Loads the transformation pairs as a pandas DataFrame.
+    """
+
+    def __init__(self, input_file: str, irt_library: str = None) -> None:
+        self.input_file_str = input_file
+        self.tree = ET.parse(self.input_file_str)
+        self.root = self.tree.getroot()
+        self.irt_library_str = irt_library
+        
+        if self.irt_library_str is not None:
+            self.irt_library = SpectralLibraryLoader(self.irt_library_str)
+            self.irt_library.load()
+
+    def load_transformation_params(self) -> dict:
+        """
+        Loads the transformation parameters from the TrafoXML file.
+
+        Returns:
+            dict: A dictionary containing the transformation parameters.
+
+        """
+        transformation = self.root.find('Transformation')
+        params = {param.attrib['name']: param.attrib['value'] for param in transformation.findall('Param')}
+        return params
+
+    def load_pairs(self) -> List[Tuple[float, float]]:
+        """
+        Loads the transformation pairs from the TrafoXML file.
+
+        Returns:
+            List[Tuple[float, float]]: A list of tuples representing the transformation pairs.
+
+        """
+        transformation = self.root.find('Transformation')
+        pairs = [(float(pair.attrib['from']), float(pair.attrib['to'])) for pair in transformation.find('Pairs')]
+        return pairs
+    
+    def load_pairs_df(self) -> pd.DataFrame:
+        """
+        Loads the transformation pairs as a pandas DataFrame.
+
+        Returns:
+            pd.DataFrame: A DataFrame containing the transformation pairs.
+
+        """
+        pairs = self.load_pairs()
+        df = pd.DataFrame(pairs, columns=['experiment_rt', 'library_rt'])
+        
+        # Add irt precursor information to table if irt_library is available
+        if self.irt_library_str is not None:
+            irt_prec_meta = self.irt_library.data[['GeneName', 'ProteinId', 'ModifiedPeptideSequence',
+       'PrecursorMz', 'PrecursorCharge', 'NormalizedRetentionTime',
+       'PrecursorIonMobility']].drop_duplicates()
+            df = pd.merge(df, irt_prec_meta, left_on='library_rt', right_on='NormalizedRetentionTime', how='inner')
+
+        return df

From 90b587328e79de649f8d05587eee2a69b2c693ec Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Fri, 31 May 2024 11:45:27 -0400
Subject: [PATCH 2/7] change: dict type set to Dict

---
 massdash/loaders/access/TrafoXMLAccess.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/massdash/loaders/access/TrafoXMLAccess.py b/massdash/loaders/access/TrafoXMLAccess.py
index 279a589..67dd0d3 100644
--- a/massdash/loaders/access/TrafoXMLAccess.py
+++ b/massdash/loaders/access/TrafoXMLAccess.py
@@ -3,9 +3,10 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 """
 
+from typing import List, Dict, Tuple
 import xml.etree.ElementTree as ET
 import pandas as pd
-from typing import List, Tuple
+
 
 # Library Access
 from ..SpectralLibraryLoader import SpectralLibraryLoader
@@ -41,7 +42,7 @@ def __init__(self, input_file: str, irt_library: str = None) -> None:
             self.irt_library = SpectralLibraryLoader(self.irt_library_str)
             self.irt_library.load()
 
-    def load_transformation_params(self) -> dict:
+    def load_transformation_params(self) -> Dict:
         """
         Loads the transformation parameters from the TrafoXML file.
 

From f4aa597f91a71af13061a5ac73319f992f226389 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sat, 1 Jun 2024 23:39:56 -0400
Subject: [PATCH 3/7] add: loader and plotter for trafoXML

---
 massdash/loaders/TrafoXMLLoader.py        | 36 ++++++++++++
 massdash/loaders/access/TrafoXMLAccess.py |  2 +
 massdash/plotting/DebugPlotter.py         | 67 +++++++++++++++++++++++
 3 files changed, 105 insertions(+)
 create mode 100644 massdash/loaders/TrafoXMLLoader.py
 create mode 100644 massdash/plotting/DebugPlotter.py

diff --git a/massdash/loaders/TrafoXMLLoader.py b/massdash/loaders/TrafoXMLLoader.py
new file mode 100644
index 0000000..622af81
--- /dev/null
+++ b/massdash/loaders/TrafoXMLLoader.py
@@ -0,0 +1,36 @@
+"""
+massdash/loaders/TrafoXMLLoader
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+"""
+
+from typing import List, Dict, Union
+
+# Access
+from .access.TrafoXMLAccess import TrafoXMLAccess
+
+class TrafoXMLLoader:
+    def __init__(self, dataFiles: Union[str, List[str]], libraryFile: Union[str, List[str]] = None):
+        ## store the file names
+        if isinstance(dataFiles, str):
+            self.dataFiles_str = [dataFiles]
+        else:
+            self.dataFiles_str = dataFiles
+            
+        if isinstance(libraryFile, str):
+            self.libraryFile_str = [libraryFile]
+        else:
+            self.libraryFile_str = libraryFile
+        print(f"len libraryFile: {len(self.libraryFile_str)}")
+    
+        if self.libraryFile_str is not None and len(self.libraryFile_str) > 1:
+            self.dataFiles = [TrafoXMLAccess(f, l) for f, l in zip(self.dataFiles_str, self.libraryFile_str)]
+        elif self.libraryFile_str is not None and len(self.libraryFile_str) == 1:
+            self.dataFiles = [TrafoXMLAccess(f, self.libraryFile_str[0]) for f in self.dataFiles_str]
+    
+    def __str__(self):
+        return f"TrafoXMLLoader(dataFiles={self.dataFiles_str}, libraryFile={self.libraryFile_str}"
+
+    def __repr__(self):
+        return f"TrafoXMLLoader(dataFiles={self.dataFiles_str}, libraryFile={self.libraryFile_str}"
+    
+    
\ No newline at end of file
diff --git a/massdash/loaders/access/TrafoXMLAccess.py b/massdash/loaders/access/TrafoXMLAccess.py
index 67dd0d3..d93555b 100644
--- a/massdash/loaders/access/TrafoXMLAccess.py
+++ b/massdash/loaders/access/TrafoXMLAccess.py
@@ -4,6 +4,7 @@
 """
 
 from typing import List, Dict, Tuple
+from os.path import basename
 import xml.etree.ElementTree as ET
 import pandas as pd
 
@@ -83,5 +84,6 @@ def load_pairs_df(self) -> pd.DataFrame:
        'PrecursorMz', 'PrecursorCharge', 'NormalizedRetentionTime',
        'PrecursorIonMobility']].drop_duplicates()
             df = pd.merge(df, irt_prec_meta, left_on='library_rt', right_on='NormalizedRetentionTime', how='inner')
+        df['filename'] = basename(self.input_file_str).split('.')[0]
 
         return df
diff --git a/massdash/plotting/DebugPlotter.py b/massdash/plotting/DebugPlotter.py
new file mode 100644
index 0000000..44a7adb
--- /dev/null
+++ b/massdash/plotting/DebugPlotter.py
@@ -0,0 +1,67 @@
+"""
+massdash/plotting/GenericPlotter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+"""
+
+from typing import List, Optional, Literal
+
+# Plotting modules
+from bokeh.plotting import figure, show, output_notebook
+from bokeh.models import HoverTool, ColumnDataSource, PrintfTickFormatter, LegendItem, Legend
+from bokeh.palettes import Category20
+
+
+
+class DebugPlotter:
+    def __init__(self):
+        self.fig = None
+        
+    def plot(self, df):
+        # Create a new plot
+        p = figure(title='Retention time transformation', x_axis_label='original RT [s]', y_axis_label='Delta RT [s]',
+           tools=['pan', 'wheel_zoom', 'box_zoom', 'reset', 'save'])
+
+        unique_filenames = df['filename'].unique()
+        colors = Category20[len(unique_filenames)]
+
+        legend_it = []
+        file_number = 1
+        for filename, grouped_df in df.groupby('filename'):
+            
+            color = colors[file_number-1]
+            print(f"File {file_number}: {filename} | color: {color}")
+            # Add the scatter plot
+            source = ColumnDataSource(grouped_df)
+            renderer = p.scatter('experiment_rt', 'library_rt', source=source, size=10, alpha=0.5, color=color)
+            legend_it.append((f"File {file_number}", [renderer]))
+            file_number += 1
+
+        # Configure the minimal hover tool
+        hover_minimal = HoverTool(tooltips=[
+            ('original RT', '@experiment_rt{0.0}'),
+            ('Delta RT', '@library_rt{0.0}'),
+            ('Peptide Sequence', '@ModifiedPeptideSequence')
+        ], name="Minimal Hover")
+        p.add_tools(hover_minimal)
+
+        # Configure the detailed hover tool
+        hover_detailed = HoverTool(tooltips=[
+            ('Protein ID', '@ProteinId'),
+            ('Precursor m/z', '@PrecursorMz{0.4}'),
+            ('Precursor Charge', '@PrecursorCharge'),
+            ('Normalized Retention Time', '@NormalizedRetentionTime{0.2}'),
+            ('Precursor Ion Mobility', '@PrecursorIonMobility{0.6}'),
+            ('Filename', '''<div style="width:200px; word-wrap:break-word;">@filename</div>''')
+        ], name="Detailed Hover")
+        p.add_tools(hover_detailed)
+
+        # Add a legend for the filename
+        legend = Legend(items=legend_it)
+        legend.click_policy="mute"
+        legend.label_text_font_size = '8pt'
+        p.add_layout(legend, 'right')
+
+        # Format the tick labels to remove scientific notation
+        p.xaxis.formatter = PrintfTickFormatter(format='%.2f')
+        p.yaxis.formatter = PrintfTickFormatter(format='%.2f')
+        return p
\ No newline at end of file

From b38a726f718c3bed5c0ff27ff2df4ba65b02bddd Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sun, 2 Jun 2024 01:47:40 -0400
Subject: [PATCH 4/7] add: mz and im debug files

---
 massdash/loaders/TrafoXMLLoader.py        | 21 ++++++++++++++----
 massdash/loaders/access/TrafoXMLAccess.py | 27 +++++++++++++++++++++--
 2 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/massdash/loaders/TrafoXMLLoader.py b/massdash/loaders/TrafoXMLLoader.py
index 622af81..4d01835 100644
--- a/massdash/loaders/TrafoXMLLoader.py
+++ b/massdash/loaders/TrafoXMLLoader.py
@@ -9,7 +9,11 @@
 from .access.TrafoXMLAccess import TrafoXMLAccess
 
 class TrafoXMLLoader:
-    def __init__(self, dataFiles: Union[str, List[str]], libraryFile: Union[str, List[str]] = None):
+    def __init__(self, 
+                 dataFiles: Union[str, List[str]], 
+                 libraryFile: Union[str, List[str]] = None, 
+                 mzDebugFile: Union[str, List[str]] = None, 
+                 imDebugFile: Union[str, List[str]] = None):
         ## store the file names
         if isinstance(dataFiles, str):
             self.dataFiles_str = [dataFiles]
@@ -20,12 +24,21 @@ def __init__(self, dataFiles: Union[str, List[str]], libraryFile: Union[str, Lis
             self.libraryFile_str = [libraryFile]
         else:
             self.libraryFile_str = libraryFile
-        print(f"len libraryFile: {len(self.libraryFile_str)}")
+            
+        if isinstance(mzDebugFile, str):
+            self.mzDebugFile_str = [mzDebugFile]
+        else:
+            self.mzDebugFile_str = mzDebugFile
+            
+        if isinstance(imDebugFile, str):
+            self.imDebugFile_str = [imDebugFile]
+        else:
+            self.imDebugFile_str = imDebugFile
     
         if self.libraryFile_str is not None and len(self.libraryFile_str) > 1:
-            self.dataFiles = [TrafoXMLAccess(f, l) for f, l in zip(self.dataFiles_str, self.libraryFile_str)]
+            self.dataFiles = [TrafoXMLAccess(f, l, mz_f, im_f) for f, l, mz_f, im_f in zip(self.dataFiles_str, self.libraryFile_str, self.mzDebugFile_str, self.imDebugFile_str)]
         elif self.libraryFile_str is not None and len(self.libraryFile_str) == 1:
-            self.dataFiles = [TrafoXMLAccess(f, self.libraryFile_str[0]) for f in self.dataFiles_str]
+            self.dataFiles = [TrafoXMLAccess(f, self.libraryFile_str[0], mz_f, im_f) for f, mz_f, im_f in zip(self.dataFiles_str, self.mzDebugFile_str, self.imDebugFile_str)]
     
     def __str__(self):
         return f"TrafoXMLLoader(dataFiles={self.dataFiles_str}, libraryFile={self.libraryFile_str}"
diff --git a/massdash/loaders/access/TrafoXMLAccess.py b/massdash/loaders/access/TrafoXMLAccess.py
index d93555b..60d1fe1 100644
--- a/massdash/loaders/access/TrafoXMLAccess.py
+++ b/massdash/loaders/access/TrafoXMLAccess.py
@@ -33,15 +33,27 @@ class TrafoXMLAccess:
         load_pairs_df: Loads the transformation pairs as a pandas DataFrame.
     """
 
-    def __init__(self, input_file: str, irt_library: str = None) -> None:
+    def __init__(self, 
+                 input_file: str, 
+                 irt_library: str = None, 
+                 mzDebugFile: str = None, 
+                 imDebugFile: str = None) -> None:
         self.input_file_str = input_file
         self.tree = ET.parse(self.input_file_str)
         self.root = self.tree.getroot()
         self.irt_library_str = irt_library
+        self.mzDebugFile_str = mzDebugFile
+        self.imDebugFile_str = imDebugFile
         
         if self.irt_library_str is not None:
             self.irt_library = SpectralLibraryLoader(self.irt_library_str)
             self.irt_library.load()
+        
+        if self.mzDebugFile_str is not None:
+            self.mzDebugFile = pd.read_csv(self.mzDebugFile_str, sep='\t')
+            
+        if self.imDebugFile_str is not None:
+            self.imDebugFile = pd.read_csv(self.imDebugFile_str, sep='\t')
 
     def load_transformation_params(self) -> Dict:
         """
@@ -81,9 +93,20 @@ def load_pairs_df(self) -> pd.DataFrame:
         # Add irt precursor information to table if irt_library is available
         if self.irt_library_str is not None:
             irt_prec_meta = self.irt_library.data[['GeneName', 'ProteinId', 'ModifiedPeptideSequence',
-       'PrecursorMz', 'PrecursorCharge', 'NormalizedRetentionTime',
+       'PrecursorMz', 'PrecursorCharge', 'ProductMz', 'ProductCharge', 'Annotation', 'NormalizedRetentionTime',
        'PrecursorIonMobility']].drop_duplicates()
             df = pd.merge(df, irt_prec_meta, left_on='library_rt', right_on='NormalizedRetentionTime', how='inner')
+            
+        if self.mzDebugFile_str is not None:
+            df = pd.merge(df, self.mzDebugFile, left_on='experiment_rt', right_on='RT', how='inner')
+            # Drop RT column, since it's the same as experiment_rt
+            df = df.drop(columns=['RT'])
+            
+        if self.imDebugFile_str is not None:
+            df = pd.merge(df, self.imDebugFile[['RT', 'im', 'theo_im', 'intensity']], left_on='experiment_rt', right_on='RT', how='inner')
+            # Drop RT column, since it's the same as experiment_rt
+            df = df.drop(columns=['RT'])
+            
         df['filename'] = basename(self.input_file_str).split('.')[0]
 
         return df

From bb9715e4c94f42899df59acf91c308a62952f750 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sun, 2 Jun 2024 01:48:10 -0400
Subject: [PATCH 5/7] refactor: plotting for generalized scatter plot based on
 df

---
 massdash/plotting/DebugPlotter.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/massdash/plotting/DebugPlotter.py b/massdash/plotting/DebugPlotter.py
index 44a7adb..12d81c0 100644
--- a/massdash/plotting/DebugPlotter.py
+++ b/massdash/plotting/DebugPlotter.py
@@ -16,10 +16,10 @@ class DebugPlotter:
     def __init__(self):
         self.fig = None
         
-    def plot(self, df):
+    def plot(self, df, x_col, y_col, title, x_axis_label, y_axis_label):
         # Create a new plot
-        p = figure(title='Retention time transformation', x_axis_label='original RT [s]', y_axis_label='Delta RT [s]',
-           tools=['pan', 'wheel_zoom', 'box_zoom', 'reset', 'save'])
+        p = figure(title=title, x_axis_label=x_axis_label, y_axis_label=y_axis_label,
+                tools=['pan', 'wheel_zoom', 'box_zoom', 'reset', 'save'])
 
         unique_filenames = df['filename'].unique()
         colors = Category20[len(unique_filenames)]
@@ -27,19 +27,18 @@ def plot(self, df):
         legend_it = []
         file_number = 1
         for filename, grouped_df in df.groupby('filename'):
-            
-            color = colors[file_number-1]
-            print(f"File {file_number}: {filename} | color: {color}")
+            color = colors[file_number - 1]
+            print(f"File {file_number}: {filename}")
             # Add the scatter plot
             source = ColumnDataSource(grouped_df)
-            renderer = p.scatter('experiment_rt', 'library_rt', source=source, size=10, alpha=0.5, color=color)
+            renderer = p.scatter(x_col, y_col, source=source, size=10, alpha=0.5, color=color)
             legend_it.append((f"File {file_number}", [renderer]))
             file_number += 1
 
         # Configure the minimal hover tool
         hover_minimal = HoverTool(tooltips=[
-            ('original RT', '@experiment_rt{0.0}'),
-            ('Delta RT', '@library_rt{0.0}'),
+            (x_axis_label, f'@{x_col}{{0.0}}'),
+            (y_axis_label, f'@{y_col}{{0.0}}'),
             ('Peptide Sequence', '@ModifiedPeptideSequence')
         ], name="Minimal Hover")
         p.add_tools(hover_minimal)
@@ -57,7 +56,7 @@ def plot(self, df):
 
         # Add a legend for the filename
         legend = Legend(items=legend_it)
-        legend.click_policy="mute"
+        legend.click_policy = "mute"
         legend.label_text_font_size = '8pt'
         p.add_layout(legend, 'right')
 

From fedabf7761ff922ee33653f35d576715708f8971 Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sun, 2 Jun 2024 01:59:56 -0400
Subject: [PATCH 6/7] add: plot method to loader

---
 massdash/loaders/TrafoXMLLoader.py | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/massdash/loaders/TrafoXMLLoader.py b/massdash/loaders/TrafoXMLLoader.py
index 4d01835..9326095 100644
--- a/massdash/loaders/TrafoXMLLoader.py
+++ b/massdash/loaders/TrafoXMLLoader.py
@@ -46,4 +46,29 @@ def __str__(self):
     def __repr__(self):
         return f"TrafoXMLLoader(dataFiles={self.dataFiles_str}, libraryFile={self.libraryFile_str}"
     
-    
\ No newline at end of file
+    
+    def plot(self, debug_plot_type = "rt", split: bool = False):
+        import pandas as pd
+        from bokeh.plotting import show
+        
+        from massdash.plotting.DebugPlotter import DebugPlotter
+        
+        debug_plot_type_map = {
+            "rt": ['experiment_rt', 'library_rt', 'Retention Time Transformation', 'Original RT [s]', 'Delta RT [s]'],
+            "mz": ['mz', 'theo_mz', 'm/z calibration', 'Experiment m/z', 'Theoretical m/z'],
+            "im": ['im', 'theo_im', 'Ion mobility calibration', 'Experiment Ion Mobility', 'Theoretical Ion Mobility']
+        }
+        
+        if split:
+            for i in range(len(self.dataFiles)):
+                df = self.dataFiles[i].load_pairs_df()
+                plotter = DebugPlotter()
+                p = plotter.plot(df, *debug_plot_type_map[debug_plot_type])
+                show(p)
+        else:
+            df = [self.dataFiles[i].load_pairs_df() for i in range(len(self.dataFiles))]
+            df = pd.concat(df)
+
+            plotter = DebugPlotter()
+            p = plotter.plot(df, *debug_plot_type_map[debug_plot_type])
+            show(p)
\ No newline at end of file

From 0b06fe9a094eb86deaaca8d09c095a90060c1ecc Mon Sep 17 00:00:00 2001
From: singjc <justincsing@gmail.com>
Date: Sun, 2 Jun 2024 02:00:21 -0400
Subject: [PATCH 7/7] fix: if only one file, use default blue color

---
 massdash/plotting/DebugPlotter.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/massdash/plotting/DebugPlotter.py b/massdash/plotting/DebugPlotter.py
index 12d81c0..f6eb6f7 100644
--- a/massdash/plotting/DebugPlotter.py
+++ b/massdash/plotting/DebugPlotter.py
@@ -22,7 +22,12 @@ def plot(self, df, x_col, y_col, title, x_axis_label, y_axis_label):
                 tools=['pan', 'wheel_zoom', 'box_zoom', 'reset', 'save'])
 
         unique_filenames = df['filename'].unique()
-        colors = Category20[len(unique_filenames)]
+        if len(unique_filenames) == 1:
+            colors = ['blue']
+        elif len(unique_filenames) <= 20:
+            colors = Category20[len(unique_filenames)]
+        else:
+            raise ValueError("Too many files to plot (>20), not enought colors available")
 
         legend_it = []
         file_number = 1