Merge pull request #41 from llegregam/Dev

Dev
llegregam · Sep 18, 2023 · 01ea711 · 01ea711
2 parents 74e32a3 + b0021e5
commit 01ea711
Show file tree

Hide file tree

Showing 7 changed files with 121 additions and 9 deletions.
diff --git a/.github/workflows/deploy_pypi.yml b/.github/workflows/deploy_pypi.yml
@@ -10,10 +10,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@master
-      - name: Set up Python 3.9
+      - name: Set up Python 3.11
         uses: actions/setup-python@v1
         with:
-          python-version: 3.9
+          python-version: 3.11
       - name: Install pypi/build
         run: >-
           python -m

diff --git a/ms_reader/__init__.py b/ms_reader/__init__.py
@@ -1 +1 @@
-__version__ = "1.4.2"
+__version__ = "1.5.0"
diff --git a/ms_reader/app.py b/ms_reader/app.py
@@ -6,6 +6,7 @@
 
 from extract import Extractor
 from ms_reader import __version__, __file__
+from ms_reader.skyline_convert import convert_skyline_input
 
 # Constants
 EXCEL_ENGINE = "openpyxl"
@@ -59,19 +60,29 @@ def convert_df(df):
 st.title(f"Welcome to MS_Reader (v{__version__})")
 check_uptodate()
 
+skyline = st.checkbox(
+    label="Skyline input",
+    value=False,
+    help="Check box if input is a skyline output file"
+)
+
+
 st.subheader("Select input files")
 col1, col2, col3 = st.columns(3)
 with col1:
     data = st.file_uploader("Upload Data")
 with col2:
-    report = st.file_uploader("Upload Report File (optional)")
+    report = st.file_uploader("Upload Report File (optional)", disabled=True if skyline else False)
 with col3:
     metadata = st.file_uploader("Upload Metadata (optional)")
 
 if data:
 
-    # noinspection PyArgumentList
-    data = pd.read_excel(io=data, engine=EXCEL_ENGINE)
+    if skyline:
+        data = convert_skyline_input(data)
+    else:
+        # noinspection PyArgumentList
+        data = pd.read_excel(io=data, engine=EXCEL_ENGINE)
 
     # Add way to drop metabolites from data
     with st.expander("Click to open metabolite remover"):

diff --git a/ms_reader/extract.py b/ms_reader/extract.py
@@ -1,4 +1,4 @@
-"""Module containing the parser for tracefinder data and handling all the
+"""Module containing the parser for TraceFinder data and handling all the
 logic of MS_Reader """
 
 import io
@@ -65,7 +65,7 @@ def __init__(self, data, calrep=None, metadata=None, met_class="CM"):
         columns = [
             "Compound", "Sample_Name", "Area", "Sample Type",
             "Calculated Amt", "Theoretical Amt",
-            "Response Ratio", "Excluded", "%Diff"
+            "Excluded", "%Diff"
         ]
         self.data = self.data[columns].copy()
         self._replace_nf()
@@ -425,6 +425,8 @@ def handle_qc(self) -> bool:
         ]
         self.qc_table.set_index("Compound", inplace=True)
 
+        print(self.qc_table)
+
         # QC is set to have a difference between sample point
         # and QC of 20% maximum
         if (abs(qc_verif["%Diff"].values) > 20).any():
@@ -433,9 +435,15 @@ def handle_qc(self) -> bool:
             qc = True
         self.qc_table = self.qc_table.astype(str)
 
+        # print(self.qc_table)
+        # print(self.qc_table.columns[self.qc_table.columns.duplicated(keep=False)])
+        # print(self.qc_table.index[self.qc_table.index.duplicated(keep=False)])
+        self.qc_table.index = \
+            self.qc_table.index.drop_duplicates(keep="first")
         self.qc_table = self.qc_table.style.apply(self._color_qc, axis=1,
                                                   subset=["%Diff"])
-
+        print(self.qc_table.columns[self.qc_table.columns.duplicated(keep=False)])
+        print(self.qc_table.index[self.qc_table.index.duplicated(keep=False)])
         self.excel_tables.append(("Quality Control", self.qc_table))
         return qc
 

diff --git a/ms_reader/skyline_convert.py b/ms_reader/skyline_convert.py
@@ -0,0 +1,93 @@
+"""
+Converter to convert the skyline input to MS_Reader format
+"""
+
+import pandas as pd
+import numpy as np
+
+MAPPING = {
+    "Molecule": "Compound",
+    "File Name": "Filename",
+    "Total Area": "Area",
+    "Quantification": "Calculated Amt",
+    "Explicit Analyte Concentration": "Theoretical Amt",
+    "Exclude From Calibration": "Excluded",
+    "Accuracy": "%Diff"
+}
+
+SAMPLE_TYPE_MAPPING = {
+    "Quality Control": "QC Std",
+    "Standard": "Cal Std",
+    "Blank": "Unknown"
+}
+
+def convert_column_names(df):
+
+    return df.rename(MAPPING, axis=1)
+
+def convert_sample_types(value):
+
+    if value in SAMPLE_TYPE_MAPPING.keys():
+        return SAMPLE_TYPE_MAPPING[value]
+    return value
+
+def convert_accuracy_to_diff(value):
+
+    if value != np.nan:
+        return round(float(str(value).replace("%", ""))-100, 1)
+
+def convert_calculated_amt(value):
+
+    if type(value) == str:
+        if "Normalized Area" in value:
+            return np.nan
+        if "NaN" in value:
+            return np.nan
+        return float(str(value).replace(" uM", ""))
+
+def handle_na(row):
+    if "(heavy)" in row["Precursor"]:
+        row["Compound"] = row["Compound"] + " C13"
+    if np.isnan(row["Area"]):
+        row["Area"] = "N/F"
+        row["Calculated Amt"] = "N/F"
+    if row["Calculated Amt"] is None:
+        row["Calculated Amt"] = np.nan
+    if not type(row["Area"]) == str and not type(row["Calculated Amt"]) == str:
+        try:
+            if not np.isnan(row["Area"]) and np.isnan(row["Calculated Amt"]):
+                row["Calculated Amt"] = np.nan
+        except TypeError:
+            print(row)
+            print(f"Area value = {row['Area']}\nCalculated Amt value = {row['Calculated Amt']}")
+    return row
+
+def convert_skyline_input(skyline_file):
+    try:
+        data = pd.read_csv(skyline_file, sep=",")
+        data = convert_column_names(data)
+        data["Sample Type"] = data["Sample Type"].apply(convert_sample_types)
+        data["%Diff"] = data["%Diff"].apply(convert_accuracy_to_diff).fillna("N/A")
+        data["Calculated Amt"] = data["Calculated Amt"].apply(convert_calculated_amt)
+        data = data.apply(handle_na, axis=1)
+    except Exception:
+        raise
+    return data
+
+if __name__ == "__main__":
+
+    #df = pd.read_csv(,sep=",")
+    #print(df.columns)
+    #converted_df = convert_column_names(df)
+    #print(converted_df.columns)
+    #print(converted_df["Sample Type"].unique())
+    #converted_df["Sample Type"] = converted_df["Sample Type"].apply(convert_sample_types)
+    #print(converted_df["Sample Type"].unique())
+    #print(converted_df["%Diff"])
+    #converted_df["%Diff"] = converted_df["%Diff"].apply(convert_accuracy_to_diff).fillna("NA")
+    #print(converted_df["%Diff"])
+    #converted_df["Calculated Amt"] = converted_df["Calculated Amt"].apply(convert_calculated_amt)
+    #converted_df = converted_df.apply(handle_na, axis=1)
+    #converted_df.to_excel(r"C:\Users\legregam\Desktop\test\test.xlsx", index=False)
+    data = convert_skyline_input(r"C:\Users\legregam\PycharmProjects\MSReader\tests\data\skyline\Quantif-MC.csv")
+    data.to_excel(r"C:\Users\legregam\Desktop\test\test2.xlsx")
diff --git a/tests/data/Metadata.xlsx b/tests/data/Metadata.xlsx
diff --git a/tests/data/test_data.xlsx b/tests/data/test_data.xlsx