Skip to content

Commit

Permalink
Merge pull request #41 from llegregam/Dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
llegregam authored Sep 18, 2023
2 parents 74e32a3 + b0021e5 commit 01ea711
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 9 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/deploy_pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: Set up Python 3.9
- name: Set up Python 3.11
uses: actions/setup-python@v1
with:
python-version: 3.9
python-version: 3.11
- name: Install pypi/build
run: >-
python -m
Expand Down
2 changes: 1 addition & 1 deletion ms_reader/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.4.2"
__version__ = "1.5.0"
17 changes: 14 additions & 3 deletions ms_reader/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from extract import Extractor
from ms_reader import __version__, __file__
from ms_reader.skyline_convert import convert_skyline_input

# Constants
EXCEL_ENGINE = "openpyxl"
Expand Down Expand Up @@ -59,19 +60,29 @@ def convert_df(df):
st.title(f"Welcome to MS_Reader (v{__version__})")
check_uptodate()

skyline = st.checkbox(
label="Skyline input",
value=False,
help="Check box if input is a skyline output file"
)


st.subheader("Select input files")
col1, col2, col3 = st.columns(3)
with col1:
data = st.file_uploader("Upload Data")
with col2:
report = st.file_uploader("Upload Report File (optional)")
report = st.file_uploader("Upload Report File (optional)", disabled=True if skyline else False)
with col3:
metadata = st.file_uploader("Upload Metadata (optional)")

if data:

# noinspection PyArgumentList
data = pd.read_excel(io=data, engine=EXCEL_ENGINE)
if skyline:
data = convert_skyline_input(data)
else:
# noinspection PyArgumentList
data = pd.read_excel(io=data, engine=EXCEL_ENGINE)

# Add way to drop metabolites from data
with st.expander("Click to open metabolite remover"):
Expand Down
14 changes: 11 additions & 3 deletions ms_reader/extract.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Module containing the parser for tracefinder data and handling all the
"""Module containing the parser for TraceFinder data and handling all the
logic of MS_Reader """

import io
Expand Down Expand Up @@ -65,7 +65,7 @@ def __init__(self, data, calrep=None, metadata=None, met_class="CM"):
columns = [
"Compound", "Sample_Name", "Area", "Sample Type",
"Calculated Amt", "Theoretical Amt",
"Response Ratio", "Excluded", "%Diff"
"Excluded", "%Diff"
]
self.data = self.data[columns].copy()
self._replace_nf()
Expand Down Expand Up @@ -425,6 +425,8 @@ def handle_qc(self) -> bool:
]
self.qc_table.set_index("Compound", inplace=True)

print(self.qc_table)

# QC is set to have a difference between sample point
# and QC of 20% maximum
if (abs(qc_verif["%Diff"].values) > 20).any():
Expand All @@ -433,9 +435,15 @@ def handle_qc(self) -> bool:
qc = True
self.qc_table = self.qc_table.astype(str)

# print(self.qc_table)
# print(self.qc_table.columns[self.qc_table.columns.duplicated(keep=False)])
# print(self.qc_table.index[self.qc_table.index.duplicated(keep=False)])
self.qc_table.index = \
self.qc_table.index.drop_duplicates(keep="first")
self.qc_table = self.qc_table.style.apply(self._color_qc, axis=1,
subset=["%Diff"])

print(self.qc_table.columns[self.qc_table.columns.duplicated(keep=False)])
print(self.qc_table.index[self.qc_table.index.duplicated(keep=False)])
self.excel_tables.append(("Quality Control", self.qc_table))
return qc

Expand Down
93 changes: 93 additions & 0 deletions ms_reader/skyline_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""
Converter to convert the skyline input to MS_Reader format
"""

import pandas as pd
import numpy as np

MAPPING = {
"Molecule": "Compound",
"File Name": "Filename",
"Total Area": "Area",
"Quantification": "Calculated Amt",
"Explicit Analyte Concentration": "Theoretical Amt",
"Exclude From Calibration": "Excluded",
"Accuracy": "%Diff"
}

SAMPLE_TYPE_MAPPING = {
"Quality Control": "QC Std",
"Standard": "Cal Std",
"Blank": "Unknown"
}

def convert_column_names(df):

return df.rename(MAPPING, axis=1)

def convert_sample_types(value):

if value in SAMPLE_TYPE_MAPPING.keys():
return SAMPLE_TYPE_MAPPING[value]
return value

def convert_accuracy_to_diff(value):

if value != np.nan:
return round(float(str(value).replace("%", ""))-100, 1)

def convert_calculated_amt(value):

if type(value) == str:
if "Normalized Area" in value:
return np.nan
if "NaN" in value:
return np.nan
return float(str(value).replace(" uM", ""))

def handle_na(row):
if "(heavy)" in row["Precursor"]:
row["Compound"] = row["Compound"] + " C13"
if np.isnan(row["Area"]):
row["Area"] = "N/F"
row["Calculated Amt"] = "N/F"
if row["Calculated Amt"] is None:
row["Calculated Amt"] = np.nan
if not type(row["Area"]) == str and not type(row["Calculated Amt"]) == str:
try:
if not np.isnan(row["Area"]) and np.isnan(row["Calculated Amt"]):
row["Calculated Amt"] = np.nan
except TypeError:
print(row)
print(f"Area value = {row['Area']}\nCalculated Amt value = {row['Calculated Amt']}")
return row

def convert_skyline_input(skyline_file):
try:
data = pd.read_csv(skyline_file, sep=",")
data = convert_column_names(data)
data["Sample Type"] = data["Sample Type"].apply(convert_sample_types)
data["%Diff"] = data["%Diff"].apply(convert_accuracy_to_diff).fillna("N/A")
data["Calculated Amt"] = data["Calculated Amt"].apply(convert_calculated_amt)
data = data.apply(handle_na, axis=1)
except Exception:
raise
return data

if __name__ == "__main__":

#df = pd.read_csv(,sep=",")
#print(df.columns)
#converted_df = convert_column_names(df)
#print(converted_df.columns)
#print(converted_df["Sample Type"].unique())
#converted_df["Sample Type"] = converted_df["Sample Type"].apply(convert_sample_types)
#print(converted_df["Sample Type"].unique())
#print(converted_df["%Diff"])
#converted_df["%Diff"] = converted_df["%Diff"].apply(convert_accuracy_to_diff).fillna("NA")
#print(converted_df["%Diff"])
#converted_df["Calculated Amt"] = converted_df["Calculated Amt"].apply(convert_calculated_amt)
#converted_df = converted_df.apply(handle_na, axis=1)
#converted_df.to_excel(r"C:\Users\legregam\Desktop\test\test.xlsx", index=False)
data = convert_skyline_input(r"C:\Users\legregam\PycharmProjects\MSReader\tests\data\skyline\Quantif-MC.csv")
data.to_excel(r"C:\Users\legregam\Desktop\test\test2.xlsx")
Binary file modified tests/data/Metadata.xlsx
Binary file not shown.
Binary file modified tests/data/test_data.xlsx
Binary file not shown.

0 comments on commit 01ea711

Please sign in to comment.