Skip to content

Commit

Permalink
added a check to start of MSDataset so that if the datapath doesn't e…
Browse files Browse the repository at this point in the history
…xist it fails quickly; fixed some tests
  • Loading branch information
duibuqi committed Jan 10, 2024
1 parent 2aea7e3 commit 699ccae
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 53 deletions.
6 changes: 3 additions & 3 deletions Tests/test_msdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1352,7 +1352,7 @@ def test_inferbatches_warns(self):
msData = copy.deepcopy(self.msData)
msData.sampleMetadata.drop('Run Order', axis=1, inplace=True)
msData.sampleMetadata.drop('Acquired Time', axis=1, inplace=True)
self.assertWarnsRegex(UserWarning, 'Unable to infer batches without run order or acquired time info, skipping.',
self.assertWarnsRegex(UserWarning, 'Unable to infer batches without complete run order or acquired time info, skipping.',
msData._inferBatches)

def test_amendbatches(self):
Expand Down Expand Up @@ -1381,7 +1381,7 @@ class test_msdataset_import_undefined(unittest.TestCase):
Test we raise an error when passing an fileType we don't understand.
"""
def test_raise_notimplemented(self):
self.assertRaises(NotImplementedError, nPYc.MSDataset, os.path.join('nopath'), fileType='Unknown filetype')
self.assertRaises(NotImplementedError, nPYc.MSDataset, os.path.join(''), fileType='Unknown filetype')


class test_msdataset_import_QI(unittest.TestCase):
Expand Down Expand Up @@ -2527,7 +2527,7 @@ def test_init(self):

with tempfile.TemporaryDirectory() as tmpdirname:
# Change default SOP to allow exporting acquired time.
dataset.Attributes['sampleMetadataNotExported'].remove('Acquired Time')
#dataset.Attributes['sampleMetadataNotExported'].remove('Acquired Time')
dataset.exportDataset(destinationPath=tmpdirname, saveFormat='CSV', withExclusions=False)

pathName = os.path.join(tmpdirname, 'Testing_sampleMetadata.csv')
Expand Down
97 changes: 60 additions & 37 deletions Tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,51 +8,74 @@
"""
Tests for checking specific data values remain the same after report functionality changes
"""
class test_sample_summary_regression(unittest.TestCase):


class TestSampleSummaryRegression(unittest.TestCase):

def setUp(self):
# load test data specific for this purpose: we know the correct numbers
self.data = nPYc.MSDataset(os.path.join('..', '..',
'npc-standard-project',
'Regression_Testing_Data',
'DEVSET U RPOS xcms_regressionTesting.csv'),
fileType='XCMS',
sop='GenericMS',
noFeatureParams=9)
# Load test data specific for this purpose: we know the correct numbers.
# This data is stored in the npc-standard-project GitHub repo


self.data = nPYc.MSDataset(os.path.join("..", "..",
"npc-standard-project",
"Regression_Testing_Data",
"DEVSET U RPOS xcms_regressionTesting.csv"),
fileType="XCMS",
sop="GenericMS",
noFeatureParams=9)

self.data.addSampleInfo(descriptionFormat='Basic CSV',
filePath=os.path.join('..', '..',
'npc-standard-project',
'Regression_Testing_Data',
'DEVSET U RPOS Basic CSV_regressionTesting.csv'))
self.data.addSampleInfo(descriptionFormat="Basic CSV",
filePath=os.path.join("..", "..",
"npc-standard-project",
"Regression_Testing_Data",
"DEVSET U RPOS Basic CSV_regressionTesting.csv"))

def test_report_samplesummary(self):

sampleSummary = nPYc.reports._generateSampleReport(self.data, returnOutput=True)
def test_setup(self):
self.assertIsNotNone(self.data)

# Check returns against expected
def test_XCMS_metadata_report_correct(self):

sample_summary = nPYc.reports._generateSampleReport(self.data, returnOutput=True)
"""
Check returns against expected. sample_summary is a dictionary of dataframes with keys:
for key in sample_summary.keys():
print(key)
print(sample_summary[key])
"""
# Acquired - Totals
assert sampleSummary['Acquired'].loc['All', 'Total'] == 115
assert sampleSummary['Acquired'].loc['Study Sample', 'Total'] == 8
assert sampleSummary['Acquired'].loc['Study Reference', 'Total'] == 11
assert sampleSummary['Acquired'].loc['Long-Term Reference', 'Total'] == 1
assert sampleSummary['Acquired'].loc['Serial Dilution', 'Total'] == 92
assert sampleSummary['Acquired'].loc['Blank', 'Total'] == 2
assert sampleSummary['Acquired'].loc['Unknown', 'Total'] == 1
self.assertEqual(sample_summary["Acquired"].loc["All", "Total"], 214)
self.assertEqual(sample_summary["Acquired"].loc["Study Sample", "Total"], 78)
self.assertEqual(sample_summary["Acquired"].loc["Study Reference", "Total"], 23)
self.assertEqual(sample_summary["Acquired"].loc["Long-Term Reference", "Total"], 8)
self.assertEqual(sample_summary["Acquired"].loc["Serial Dilution", "Total"], 92)
self.assertEqual(sample_summary["Acquired"].loc["Blank", "Total"], 12)
self.assertEqual(sample_summary["Acquired"].loc["Unknown", "Total"], 1)

# Acquired - Marked for exclusion
assert sampleSummary['Acquired'].loc['All', 'Marked for Exclusion'] == 1
assert sampleSummary['Acquired'].loc['Study Sample', 'Marked for Exclusion'] == 1
assert sampleSummary['Acquired'].loc['Study Reference', 'Marked for Exclusion'] == 0
assert sampleSummary['Acquired'].loc['Long-Term Reference', 'Marked for Exclusion'] == 0
assert sampleSummary['Acquired'].loc['Serial Dilution', 'Marked for Exclusion'] == 0
assert sampleSummary['Acquired'].loc['Blank', 'Marked for Exclusion'] == 0
assert sampleSummary['Acquired'].loc['Unknown', 'Marked for Exclusion'] == 0

# Check details tables
assert sampleSummary['MarkedToExclude Details'].shape == (1, 2)
assert sampleSummary['UnknownType Details'].shape == (1, 1)

if __name__ == '__main__':
self.assertEqual(sample_summary["Acquired"].loc["All", "Marked for Exclusion"], 0)
self.assertEqual(sample_summary["Acquired"].loc["Study Sample", "Marked for Exclusion"], 0)
self.assertEqual(sample_summary["Acquired"].loc["Study Reference", "Marked for Exclusion"], 0)
self.assertEqual(sample_summary["Acquired"].loc["Long-Term Reference", "Marked for Exclusion"], 0)
self.assertEqual(sample_summary["Acquired"].loc["Serial Dilution", "Marked for Exclusion"], 0)
self.assertEqual(sample_summary["Acquired"].loc["Blank", "Marked for Exclusion"], 0)
self.assertEqual(sample_summary["Acquired"].loc["Unknown", "Marked for Exclusion"], 0)

# Acquired - Missing/Excluded
self.assertEqual(sample_summary["Acquired"].loc["All", "Missing/Excluded"], 1)
self.assertEqual(sample_summary["Acquired"].loc["Study Sample", "Missing/Excluded"], 1)
self.assertEqual(sample_summary["Acquired"].loc["Study Reference", "Missing/Excluded"], 0)
self.assertEqual(sample_summary["Acquired"].loc["Long-Term Reference", "Missing/Excluded"], 0)
self.assertEqual(sample_summary["Acquired"].loc["Serial Dilution", "Missing/Excluded"], 0)
self.assertEqual(sample_summary["Acquired"].loc["Blank", "Missing/Excluded"], 0)
self.assertEqual(sample_summary["Acquired"].loc["Unknown", "Missing/Excluded"], 0)

self.assertEqual(sample_summary["NoMetadata Details"].loc[0, "Sample File Name"], "PipelineTesting_RPOS_ToF10_U1W98")
self.assertEqual(sample_summary["UnknownType Details"].loc[0, "Sample File Name"], "PipelineTesting_RPOS_ToF10_U1W98")
self.assertEqual(sample_summary["NotAcquired"].loc[0, "Sample File Name"], "PipelineTesting_RPOS_ToF10_U1W97")
self.assertEqual(sample_summary["Excluded Details"].loc[0, "Sample File Name"], "PipelineTesting_RPOS_ToF10_U1W97")
self.assertEqual(sample_summary["StudySamples Exclusion Details"].loc[0, "Sample File Name"], "PipelineTesting_RPOS_ToF10_U1W97")

if __name__ == "__main__":
unittest.main()
16 changes: 8 additions & 8 deletions Tests/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,53 +180,53 @@ def test_generatesrdmask(self):
msData.sampleMetadata['Dilution Series'] = [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, numpy.nan, numpy.nan,
numpy.nan, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6]
msData.corrExclusions = msData.sampleMask

# I think batches are inferred from the sample file names?
srdMask = nPYc.utilities.ms.generateLRmask(msData)

cannonicalMask = {'Batch 1, series 1.0': numpy.array([True, True, True, True, True, True,
canonicalMask = {'Batch 1.0, series 1.0': numpy.array([True, True, True, True, True, True,
False, False, False, False, False, False,
False, False, False, False, False, False,
False, False, False,
False, False, False, False, False, False,
False, False, False, False, False, False,
False, False, False, False, False, False], dtype=bool),
'Batch 1, series 2.0': numpy.array([False, False, False, False, False, False,
'Batch 1.0, series 2.0': numpy.array([False, False, False, False, False, False,
True, True, True, True, True, True,
False, False, False, False, False, False,
False, False, False,
False, False, False, False, False, False,
False, False, False, False, False, False,
False, False, False, False, False, False], dtype=bool),
'Batch 1, series 3.0': numpy.array([False, False, False, False, False, False,
'Batch 2.0, series 3.0': numpy.array([False, False, False, False, False, False,
False, False, False, False, False, False,
True, True, True, True, True, True,
False, False, False,
False, False, False, False, False, False,
False, False, False, False, False, False,
False, False, False, False, False, False], dtype=bool),
'Batch 1, series 4.0': numpy.array([False, False, False, False, False, False,
'Batch 2.0, series 4.0': numpy.array([False, False, False, False, False, False,
False, False, False, False, False, False,
False, False, False, False, False, False,
False, False, False,
True, True, True, True, True, True,
False, False, False, False, False, False,
False, False, False, False, False, False], dtype=bool),
'Batch 1, series 5.0': numpy.array([False, False, False, False, False, False,
'Batch 3.0, series 5.0': numpy.array([False, False, False, False, False, False,
False, False, False, False, False, False,
False, False, False, False, False, False,
False, False, False,
False, False, False, False, False, False,
True, True, True, True, True, True,
False, False, False, False, False, False], dtype=bool),
'Batch 1, series 6.0': numpy.array([False, False, False, False, False, False,
'Batch 3.0, series 6.0': numpy.array([False, False, False, False, False, False,
False, False, False, False, False, False,
False, False, False, False, False, False,
False, False, False,
False, False, False, False, False, False,
False, False, False, False, False, False,
True, True, True, True, True, True,], dtype=bool)}

numpy.testing.assert_equal(srdMask, cannonicalMask)
numpy.testing.assert_equal(srdMask, canonicalMask)

def test_generatesrdmask_raises(self):

Expand Down
22 changes: 18 additions & 4 deletions nPYc/objects/_msDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ class MSDataset(Dataset):
Operates on spreadsheets exported from Biocrates MetIDQ. By default loads data from the sheet named 'Data Export', this may be overridden with the ``sheetName=`` argument, If the number of sample metadata columns differes from the default, this can be overridden with the ``noSampleParams=`` argument.
* nPYc
nPYc import operates on the csv file generated using nPYc exportDataset function ('combinedData' file). This reimport function is meant for further filtering or normalisation without having to run whole process again.
nPYc import operates on the csv file generated using nPYc exportDataset function ('combinedData' file).
This reimport function is meant for further filtering or normalisation without having to run whole process again.
Note that metadata does not need to be imported again.
"""

Expand All @@ -67,6 +68,19 @@ def __init__(self, datapath, fileType='xcms', sop='GenericMS', **kwargs):
"""

super().__init__(sop=sop, **kwargs)

allowed_file_types = ['qi', 'mzmine', 'msdial', 'csv', 'xcms', 'xcmsonline',
'biocrates', 'metaboscape', 'npyc', 'csv export', 'empty']

fileType = fileType.lower()
if fileType in allowed_file_types:
if fileType != 'empty' and not os.path.exists(datapath):
# warn early if a datapath has been supplied with points to a non-existent file
# caveat: datapaths can be empty strings with the 'empty' fileType
raise ValueError("Supplied MS data file '%s' regrettably doesn't exist." % datapath)
else:
raise NotImplementedError("Unfortunately '%s' is not yet recognised as an input format to nPYc.MSDataset." % fileType)

self.corrExclusions = None
self._correlationToDilution = numpy.array(None)
try:
Expand All @@ -92,7 +106,8 @@ def __init__(self, datapath, fileType='xcms', sop='GenericMS', **kwargs):
'deltaMzArtifactual': None}

# Load the output file
fileType = fileType.lower()


if fileType == 'qi':
self._loadQIDataset(datapath)
self.Attributes['FeatureExtractionSoftware'] = 'Progenesis QI'
Expand Down Expand Up @@ -147,8 +162,7 @@ def __init__(self, datapath, fileType='xcms', sop='GenericMS', **kwargs):
elif fileType == 'empty':
# Lets us build an empty object for testing &c
pass
else:
raise NotImplementedError


self._intensityData = self._intensityData.astype(float)
self.featureMetadata['Exclusion Details'] = None
Expand Down
5 changes: 4 additions & 1 deletion nPYc/plotting/_plotBatchAndROCorrection.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,11 @@ def plotBatchAndROCorrection(dataset, datasetcorrected, featureList, addViolin=T
# Check that dimensions are the same
try:
# Attempting to add arrays ar1 and ar2
print(msData.intensityData.shape)
print(msDatacorrected.intensityData.shape)
msData.intensityData + msDatacorrected.intensityData
except ValueError:
except ValueError as ve:
print(ve)
# If ValueError occurs (arrays have different dimensions), return "Different dimensions"
return "msData and msDatacorrected must have the same dimensions"

Expand Down

0 comments on commit 699ccae

Please sign in to comment.