Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

one wide calibration to rule them all #2432

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 71 additions & 30 deletions MetaMorpheus/TaskLayer/CalibrationTask/CalibrationTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
using Omics.Modifications;
using Readers;
using UsefulProteomicsDatabases;
using System.Threading.Tasks;

namespace TaskLayer
{
Expand All @@ -30,9 +31,11 @@
}

public CalibrationParameters CalibrationParameters { get; set; }
private readonly int NumRequiredPsms = 20;
private readonly int NumRequiredMs1Datapoints = 50;
private readonly int NumRequiredMs2Datapoints = 100;
private readonly int NumRequiredPsms = 16;
private readonly int NumRequiredMs1Datapoints = 40;
private readonly int NumRequiredMs2Datapoints = 80;
private readonly double PrecursorMultiplier = 3;
private readonly double ProductMultiplier = 6;
double MaxPrecursorTolerance = 40;
double MaxProductTolerance = 150;
public const string CalibSuffix = "-calib";
Expand Down Expand Up @@ -82,60 +85,88 @@
string originalUncalibratedFilePath = currentRawFileList[spectraFileIndex];
string originalUncalibratedFilenameWithoutExtension = Path.GetFileNameWithoutExtension(originalUncalibratedFilePath);
string calibratedFilePath = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".mzML");

string uncalibratedFilePath = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + ".mzML");
bool calibrated = false;
// mark the file as in-progress
StartingDataFile(originalUncalibratedFilePath, new List<string> { taskId, "Individual Spectra Files", originalUncalibratedFilePath });

CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

// load the file
Status("Loading spectra file...", new List<string> { taskId, "Individual Spectra Files" });

MsDataFile myMsDataFile = myFileManager.LoadFile(originalUncalibratedFilePath, combinedParams);

// get datapoints to fit calibration function to
Status("Acquiring calibration data points...", new List<string> { taskId, "Individual Spectra Files" });
DataPointAquisitionResults acquisitionResults = null;

acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, new PpmTolerance(MaxPrecursorTolerance), new PpmTolerance(MaxProductTolerance));
int numPsms = acquisitionResults.Psms.Count;
int numPeptides = acquisitionResults.Psms.Select(p => p.FullSequence).Distinct().Count();
// check if we have enough data points to calibrate and then calibrate
if (acquisitionResults.Psms.Count >= NumRequiredPsms && acquisitionResults.Ms1List.Count >= NumRequiredMs1Datapoints && acquisitionResults.Ms2List.Count >= NumRequiredMs2Datapoints)
{

// generate calibration function and shift data points
Status("Calibrating...", new List<string> { taskId, "Individual Spectra Files" });
CalibrationEngine engine = new(myMsDataFile, acquisitionResults, combinedParams, FileSpecificParameters, new List<string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension });
_ = engine.Run();

//update file
myMsDataFile = engine.CalibratedDataFile;

// write the calibrated mzML file
myMsDataFile.ExportAsMzML(calibratedFilePath, CalibrationParameters.WriteIndexedMzml);
myFileManager.DoneWithFile(originalUncalibratedFilePath);

// write toml settings for the calibrated file
string newTomlFileName = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".toml");

string calibratedTomlFilename = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".toml");
string uncalibratedTomlFilename = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + ".toml");
FileSpecificParameters fileSpecificParams = new();

// carry over file-specific parameters from the uncalibrated file to the calibrated one
if (fileSettingsList[spectraFileIndex] != null)
{
fileSpecificParams = fileSettingsList[spectraFileIndex].Clone();
}
// set the mass tolerances for the file specific parameters
// we use a multiplier of 4 for the tolerance for files that are not calibrated
fileSpecificParams.PrecursorMassTolerance = new PpmTolerance((4 * acquisitionResults.PsmPrecursorIqrPpmError) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError));
fileSpecificParams.ProductMassTolerance = new PpmTolerance((4 * acquisitionResults.PsmProductIqrPpmError) + Math.Abs(acquisitionResults.PsmProductMedianPpmError));

//suggest 4 * interquartile range as the ppm tolerance
fileSpecificParams.PrecursorMassTolerance = new PpmTolerance((4.0 * acquisitionResults.PsmPrecursorIqrPpmError) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError));
fileSpecificParams.ProductMassTolerance = new PpmTolerance((4.0 * acquisitionResults.PsmProductIqrPpmError) + Math.Abs(acquisitionResults.PsmProductMedianPpmError));
// generate calibration function and shift data points
Status("Calibrating...", new List<string> { taskId, "Individual Spectra Files" });
CalibrationEngine engine = new(myMsDataFile, acquisitionResults, combinedParams, FileSpecificParameters, new List<string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension });
_ = engine.Run();

Toml.WriteFile(fileSpecificParams, newTomlFileName, tomlConfig);
// get the calibrated data points again to see if there was an increase
acquisitionResults = GetDataAcquisitionResults(engine.CalibratedDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, new PpmTolerance(MaxPrecursorTolerance), new PpmTolerance(MaxProductTolerance));

FinishedWritingFile(newTomlFileName, new List<string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension });
if (acquisitionResults.Psms.Select(p => p.FullSequence).Distinct().Count() >= numPeptides && acquisitionResults.Psms.Count >= numPsms)
{
calibrated = true;
numPsms = acquisitionResults.Psms.Count;
numPeptides = acquisitionResults.Psms.Select(p => p.FullSequence).Distinct().Count();
myMsDataFile = engine.CalibratedDataFile;
fileSpecificParams.PrecursorMassTolerance = new PpmTolerance((PrecursorMultiplier * acquisitionResults.PsmPrecursorIqrPpmError) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError));
fileSpecificParams.ProductMassTolerance = new PpmTolerance((ProductMultiplier * acquisitionResults.PsmProductIqrPpmError) + Math.Abs(acquisitionResults.PsmProductMedianPpmError));
//Try a second round of calibration
// generate calibration function and shift data points
Status("Calibrating...", new List<string> { taskId, "Individual Spectra Files" });
engine = new(myMsDataFile, acquisitionResults, combinedParams, FileSpecificParameters, new List<string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension });
_ = engine.Run();

// get the calibrated data points again to see if there was an increase
acquisitionResults = GetDataAcquisitionResults(engine.CalibratedDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, new PpmTolerance(MaxPrecursorTolerance), new PpmTolerance(MaxProductTolerance));

if (acquisitionResults.Psms.Select(p => p.FullSequence).Distinct().Count() >= numPeptides && acquisitionResults.Psms.Count >= numPsms)
{
myMsDataFile = engine.CalibratedDataFile;
numPsms = acquisitionResults.Psms.Count;
numPeptides = acquisitionResults.Psms.Select(p => p.FullSequence).Distinct().Count();
// write toml settings for the calibrated file
fileSpecificParams.PrecursorMassTolerance = new PpmTolerance((PrecursorMultiplier * acquisitionResults.PsmPrecursorIqrPpmError) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError));
fileSpecificParams.ProductMassTolerance = new PpmTolerance((ProductMultiplier * acquisitionResults.PsmProductIqrPpmError) + Math.Abs(acquisitionResults.PsmProductMedianPpmError));
}
}
if (calibrated) // write the calibrated mzML file
{
CalibrationOutput(myMsDataFile, calibratedFilePath, fileSpecificParams, calibratedTomlFilename, taskId, originalUncalibratedFilenameWithoutExtension);
}
else //calibration failed but we still want to write the toml file
{
CalibrationOutput(myMsDataFile, uncalibratedFilePath, fileSpecificParams, uncalibratedTomlFilename, taskId, originalUncalibratedFilenameWithoutExtension);
}

Check warning on line 164 in MetaMorpheus/TaskLayer/CalibrationTask/CalibrationTask.cs

View check run for this annotation

Codecov / codecov/patch

MetaMorpheus/TaskLayer/CalibrationTask/CalibrationTask.cs#L162-L164

Added lines #L162 - L164 were not covered by tests

myFileManager.DoneWithFile(originalUncalibratedFilePath);

// finished calibrating this file
FinishedWritingFile(calibratedFilePath, new List<string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension });
MyTaskResults.NewSpectra.Add(calibratedFilePath);
MyTaskResults.NewFileSpecificTomls.Add(newTomlFileName);

FinishedDataFile(originalUncalibratedFilePath, new List<string> { taskId, "Individual Spectra Files", originalUncalibratedFilePath });
ReportProgress(new ProgressEventArgs(100, "Done!", new List<string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }));
}
Expand Down Expand Up @@ -176,6 +207,16 @@
}
}

private void CalibrationOutput(MsDataFile msDataFile, string mzFilePath, FileSpecificParameters fileParams, string tomlName, string taskId, string mzFilenameNoExtension)
{
msDataFile.ExportAsMzML(mzFilePath, CalibrationParameters.WriteIndexedMzml);
MyTaskResults.NewSpectra.Add(mzFilePath);
Toml.WriteFile(fileParams, tomlName, tomlConfig);
FinishedWritingFile(tomlName, new List<string> { taskId, "Individual Spectra Files", mzFilenameNoExtension });
FinishedWritingFile(mzFilePath, new List<string> { taskId, "Individual Spectra Files", mzFilenameNoExtension });
MyTaskResults.NewFileSpecificTomls.Add(tomlName);
}

private DataPointAquisitionResults GetDataAcquisitionResults(MsDataFile myMsDataFile, string currentDataFile, List<Modification> variableModifications, List<Modification> fixedModifications, List<Protein> proteinList, string taskId, CommonParameters combinedParameters, Tolerance initPrecTol, Tolerance initProdTol)
{
string fileNameWithoutExtension = Path.GetFileNameWithoutExtension(currentDataFile);
Expand Down
51 changes: 50 additions & 1 deletion MetaMorpheus/Test/CalibrationTests.cs
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
using EngineLayer;
using FlashLFQ;
using MassSpectrometry;
using NUnit.Framework; using Assert = NUnit.Framework.Legacy.ClassicAssert;
using NUnit.Framework;
using Assert = NUnit.Framework.Legacy.ClassicAssert;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using TaskLayer;
using System;

namespace Test
{
Expand Down Expand Up @@ -104,6 +106,53 @@ public static void CalibrationTestNoPsms()
Directory.Delete(unitTestFolder, true);
}

[Test]
[NonParallelizable]
public static void CalibrationTooFewMS1DataPoints()
{
// set up directories
string unitTestFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"ExperimentalDesignCalibrationTest");
string outputFolder = Path.Combine(unitTestFolder, @"TaskOutput");
Directory.CreateDirectory(unitTestFolder);
Directory.CreateDirectory(outputFolder);

// set up original spectra file (input to calibration)
string nonCalibratedFilePath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\mouseOne.mzML");

// set up original experimental design (input to calibration)
SpectraFileInfo fileInfo = new(nonCalibratedFilePath, "condition", 0, 0, 0);
_ = ExperimentalDesign.WriteExperimentalDesignToFile(new List<SpectraFileInfo> { fileInfo });

// protein db for a non-matching organism
string myDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\mouseOne.xml");

CalibrationTask calibrationTask = new();

calibrationTask.CommonParameters = new CommonParameters(
trimMsMsPeaks: true,
doPrecursorDeconvolution: true);
var wasCalled = false;

EventHandler<StringEventArgs> handler = (o, e) => CalibrationWarnHandler(o, e, ref wasCalled);
MetaMorpheusTask.WarnHandler += handler;

MetaMorpheusTask.WarnHandler -= (o, e) =>
{
wasCalled = true;
Assert.That(e.S, Does.Contain("Calibration failure! Could not find enough MS1 datapoints."));
};

// clean up
Directory.Delete(unitTestFolder, true);
MetaMorpheusTask.WarnHandler -= handler;
}

private static void CalibrationWarnHandler(object sender, StringEventArgs e, ref bool wasCalled)
{
wasCalled = true;
Assert.That(e.S, Does.Contain("Calibration failure! Could not find enough MS1 datapoints."));
}

[Test]
public static void CalibrationTestLowRes()
{
Expand Down
2 changes: 0 additions & 2 deletions MetaMorpheus/Test/SearchEngineTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,7 @@ public static void TestClassicSearchEngine()
var variableModifications = new List<Modification>();
var fixedModifications = new List<Modification>();
var proteinList = new List<Protein> { new Protein("MNNNKQQQ", null) };

var searchModes = new SinglePpmAroundZeroSearchMode(5);

var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

SpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
Expand Down
9 changes: 9 additions & 0 deletions MetaMorpheus/Test/Test.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,15 @@
<None Update="TestData\MbrAnalysisTest\MsMsids.psmtsv">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TestData\mouseOne.mzML">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TestData\mouseOne.xml">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TestData\mouseTwo.mzML">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TestData\myPrositLib.msp">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
Expand Down
Loading