From 1ec5dc4a1070496ed61c900ecd3623407d9a165e Mon Sep 17 00:00:00 2001 From: Caroline Sands Date: Wed, 7 Aug 2024 12:01:05 +0100 Subject: [PATCH] fixed batch and run order correction and tests --- Tests/test_reports.py | 23 +++++++++++++------ .../_batchAndROCorrection.py | 16 +++++++------ 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/Tests/test_reports.py b/Tests/test_reports.py index 84564a1..de3375b 100644 --- a/Tests/test_reports.py +++ b/Tests/test_reports.py @@ -295,24 +295,33 @@ def test_reports_ms_correlationtodilution(self): def test_reports_ms_batchcorrectiontest(self): +# data = nPYc.MSDataset( +# os.path.join('..', '..', 'npc-standard-project', 'Derived_Data', 'UnitTest1_PCSOP.069_QI.csv'), +# fileType='QI') data = nPYc.MSDataset( - os.path.join('..', '..', 'npc-standard-project', 'Derived_Data', 'UnitTest1_PCSOP.069_QI.csv'), - fileType='QI') + os.path.join('..', '..', 'npc-standard-project', 'Derived_Data', 'UnitTest1_PCSOP.069_xcms_peakTable.csv'), + fileType='XCMS', + noFeatureParams=8 + ) + + #data.excludeSamples(['UnitTest1_LPOS_ToF02_ERROR'], on='Sample File Name') ### CAROLINE 070824 + #data.applyMasks() data.addSampleInfo(descriptionFormat='Filenames') data.addSampleInfo(descriptionFormat='Raw Data', filePath=os.path.join('..', '..', 'npc-standard-project', 'Raw_Data', 'ms', 'parameters_data')) - # data.sampleMetadata['Correction Batch'] = data.sampleMetadata['Batch'] + data.addSampleInfo(descriptionFormat='Infer Batches') +# data.sampleMetadata['Correction Batch'] = data.sampleMetadata['Batch'] with tempfile.TemporaryDirectory() as tmpdirname: nPYc.reports.generateReport(data, 'batch correction assessment', destinationPath=tmpdirname) - expectedPath = os.path.join(tmpdirname, 'UnitTest1_PCSOP.069_QI_report_batchCorrectionAssessment.html') + expectedPath = os.path.join(tmpdirname, 'UnitTest1_PCSOP.069_xcms_peakTable_report_batchCorrectionAssessment.html') self.assertTrue(os.path.exists(expectedPath)) - testFiles = ['UnitTest1_PCSOP.069_QI_batchPlotFeature_3.17_145.0686m-z.png', - 'UnitTest1_PCSOP.069_QI_batchPlotFeature_3.17_262.0378m-z.png', - 'UnitTest1_PCSOP.069_QI_TICdetectorBatches.png'] + testFiles = ['UnitTest1_PCSOP.069_xcms_peakTable_batchPlotFeature_3.17_145.0686m-z.png', + 'UnitTest1_PCSOP.069_xcms_peakTable_batchPlotFeature_3.17_262.0378m-z.png', + 'UnitTest1_PCSOP.069_xcms_peakTable_TICdetectorBatches.png'] for testFile in testFiles: expectedPath = os.path.join(tmpdirname, 'graphics', 'report_batchCorrectionAssessment', testFile) diff --git a/nPYc/batchAndROCorrection/_batchAndROCorrection.py b/nPYc/batchAndROCorrection/_batchAndROCorrection.py index 862c89d..43ad1b2 100755 --- a/nPYc/batchAndROCorrection/_batchAndROCorrection.py +++ b/nPYc/batchAndROCorrection/_batchAndROCorrection.py @@ -100,25 +100,27 @@ def correctMSdataset(data, # For each `Correction Batch`, check all samples for correction have at least one `correctionSampleType` batches = numpy.unique(data.sampleMetadata['Correction Batch']) + batches = batches[~numpy.isnan(batches)] for batch in batches: - if numpy.any((data.sampleMetadata.loc[data.sampleMetadata['Correction Batch'] == batch, 'SampleType'] == correctionSampleType) & (data.sampleMetadata.loc[data.sampleMetadata['Correction Batch'] == batch, 'AssayRole'] == AssayRole.PrecisionReference)): + if not numpy.any((data.sampleMetadata.loc[data.sampleMetadata['Correction Batch'] == batch, 'SampleType'] == correctionSampleType) & (data.sampleMetadata.loc[data.sampleMetadata['Correction Batch'] == batch, 'AssayRole'] == AssayRole.PrecisionReference)): raise npycToolboxError("Unable to run batch and run order correction without at least one " + str(correctionSampleType) + " sample in each `Correction Batch`, please check and update dataset accordingly.") with warnings.catch_warnings(): warnings.simplefilter('ignore', category=RuntimeWarning) - correctedP = _batchCorrectionHead(data.intensityData, - data.sampleMetadata['Run Order'].values, - (data.sampleMetadata['SampleType'].values == correctionSampleType) & (data.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference), - data.sampleMetadata.loc[samplesForCorrection==True, 'Correction Batch'], + correctedP = _batchCorrectionHead(data.intensityData[samplesForCorrection==True,:], + data.sampleMetadata.loc[samplesForCorrection==True, 'Run Order'].values, + (data.sampleMetadata.loc[samplesForCorrection==True, 'SampleType'].values == correctionSampleType) & (data.sampleMetadata.loc[samplesForCorrection==True, 'AssayRole'].values == AssayRole.PrecisionReference), + data.sampleMetadata.loc[samplesForCorrection==True, 'Correction Batch'].values, window=window, method=method, align=align, parallelise=parallelise) correctedData = copy.deepcopy(data) - correctedData.intensityData = correctedP[0] - correctedData.fit = correctedP[1] + correctedData.intensityData[samplesForCorrection==True,:] = correctedP[0] + correctedData.fit = numpy.full(correctedData.intensityData.shape, numpy.nan) + correctedData.fit[samplesForCorrection==True,:] = correctedP[1] correctedData.Attributes['Log'].append([datetime.now(),'Batch and run order correction applied']) return correctedData