Merge pull request #45 from llegregam/Dev

Update to 1.6.0
llegregam · Mar 20, 2024 · e932af9 · e932af9
2 parents b661f48 + a8b559b
commit e932af9
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 18 deletions.
diff --git a/ms_reader/__init__.py b/ms_reader/__init__.py
@@ -1 +1 @@
-__version__ = "1.5.1"
+__version__ = "1.6.0"
diff --git a/ms_reader/app.py b/ms_reader/app.py
@@ -6,7 +6,7 @@
 
 from extract import Extractor
 from ms_reader import __version__, __file__
-from ms_reader.skyline_convert import convert_skyline_input
+from ms_reader.skyline_convert import import_skyline_dataset
 
 # Constants
 EXCEL_ENGINE = "openpyxl"
@@ -79,7 +79,7 @@ def convert_df(df):
 if data:
 
     if skyline:
-        data = convert_skyline_input(data)
+        data = import_skyline_dataset(data)
     else:
         # noinspection PyArgumentList
         data = pd.read_excel(io=data, engine=EXCEL_ENGINE)

diff --git a/ms_reader/skyline_convert.py b/ms_reader/skyline_convert.py
@@ -2,6 +2,8 @@
 Converter to convert the skyline input to MS_Reader format
 """
 
+import re
+
 import pandas as pd
 import numpy as np
 
@@ -39,9 +41,7 @@ def convert_accuracy_to_diff(value):
 def convert_calculated_amt(value):
 
     if type(value) == str:
-        if "Normalized Area" in value:
-            return np.nan
-        if "NaN" in value:
+        if "Normalized Area" in value or "NaN" in value:
             return np.nan
         return float(str(value).replace(" uM", ""))
 
@@ -62,18 +62,35 @@ def handle_na(row):
             print(f"Area value = {row['Area']}\nCalculated Amt value = {row['Calculated Amt']}")
     return row
 
-def convert_skyline_input(skyline_file):
+def import_skyline_dataset(skyline_file):
+    """
+    Import skyline dataset and transform into MS_Reader compatible format
+
+    :param skyline_file: Bytes file containing skyline data (tabular format)
+    """
+
+    # Get copy of file binary to dodge any wierd effects when file is read twice by pandas
+    #file = copy(skyline_file)
+    filename_extension = skyline_file.name[-3:]
+    if filename_extension not in ["tsv", "txt"]:
+        raise TypeError(
+            f"Skyline data must be in tabulated format with 'tsv' or 'txt' extension. "
+            f"Detected extension: {filename_extension}"
+        )
+
+    data = pd.read_csv(skyline_file, sep="\t")
+    #if len(data.columns) == 1:
+    #    data = pd.read_csv(file, sep="\t")
+    data = convert_column_names(data)
+    data["Sample Type"] = data["Sample Type"].apply(convert_sample_types)
+    data["%Diff"] = data["%Diff"].apply(convert_accuracy_to_diff).fillna("N/A")
     try:
-        data = pd.read_csv(skyline_file, sep=",")
-        data = convert_column_names(data)
-        data["Sample Type"] = data["Sample Type"].apply(convert_sample_types)
-        data["%Diff"] = data["%Diff"].apply(convert_accuracy_to_diff).fillna("N/A")
         data["Calculated Amt"] = data["Calculated Amt"].apply(convert_calculated_amt)
-        data = data.apply(handle_na, axis=1)
-    except KeyError:
-        raise ValueError("There seems to be an error while parsing file. Please make sure your data is comma separated.")
-    except Exception:
-        raise RuntimeError("Unkown error while converting the skyline intput file.")
+    except ValueError:
+        data["Calculated Amt"].replace(to_replace=re.compile(pattern='∞'), value="NaN", inplace=True)
+        data["Calculated Amt"] = data["Calculated Amt"].apply(convert_calculated_amt)
+    data = data.apply(handle_na, axis=1)
+
     return data
 
 if __name__ == "__main__":
@@ -91,5 +108,5 @@ def convert_skyline_input(skyline_file):
     #converted_df["Calculated Amt"] = converted_df["Calculated Amt"].apply(convert_calculated_amt)
     #converted_df = converted_df.apply(handle_na, axis=1)
     #converted_df.to_excel(r"C:\Users\legregam\Desktop\test\test.xlsx", index=False)
-    data = convert_skyline_input(r"C:\Users\legregam\PycharmProjects\MSReader\tests\data\skyline\Quantif-MC.csv")
+    data = import_skyline_dataset(r"C:\Users\legregam\PycharmProjects\MSReader\tests\data\skyline\Quantif-MC.csv")
     data.to_excel(r"C:\Users\legregam\Desktop\test\test2.xlsx")
diff --git a/setup.cfg b/setup.cfg
@@ -26,7 +26,7 @@ install_requires =
 
 [options.entry_points]
 console_scripts =
-    ms_reader = ms_reader.__main__:main
+    ms_reader = ms_reader.__main__:import_skyline_dataset
 
 [options.extras_require]
 testing=