Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Q value threshold adjustment and bug fix #2426

Merged
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
bd5aefc
update mzlib nuget package to 551
trishorts Aug 26, 2024
d60e32e
Merge remote-tracking branch 'upstream/master'
trishorts Sep 5, 2024
3049ef3
Merge remote-tracking branch 'upstream/master'
trishorts Oct 7, 2024
5231c70
adjust inverted qValue threshold to 1000
trishorts Oct 15, 2024
5186b82
add override qValue threshold for unit testing
trishorts Oct 15, 2024
4f9e99f
calculate notch specific inverted qVAlue
trishorts Oct 15, 2024
4196788
add static bool to override psmcount threshold for PEP calculation in…
trishorts Oct 16, 2024
70f2d7a
final unit tests repaired
trishorts Oct 16, 2024
6ece6ba
ghk
trishorts Oct 16, 2024
10b2333
prevent unit tests from going down pep rabbit hole
trishorts Oct 16, 2024
60c2d69
not supposed to be hard
trishorts Oct 16, 2024
375bcd0
finally got that threshold to behave kinda
trishorts Oct 17, 2024
8340667
gf
trishorts Oct 17, 2024
d1080e6
add NonParallelizable to tests
trishorts Oct 17, 2024
d96c2da
kgh
trishorts Oct 17, 2024
17f994e
Merge remote-tracking branch 'upstream/master' into qValueThresholdAd…
trishorts Oct 21, 2024
02a696d
calibrated when it helps
trishorts Oct 21, 2024
68db4f4
Merge branch 'master' into qValueThresholdAdjustmentAndBugFix
trishorts Oct 30, 2024
df1907e
Merge remote-tracking branch 'upstream/master' into qValueThresholdAd…
trishorts Oct 30, 2024
0eb5136
Merge branch 'qValueThresholdAdjustmentAndBugFix' of https://github.c…
trishorts Oct 30, 2024
e49061d
seems fine
trishorts Oct 30, 2024
d6f0bcb
Merge branch 'master' into qValueThresholdAdjustmentAndBugFix
trishorts Oct 31, 2024
4975e08
Merge branch 'master' into qValueThresholdAdjustmentAndBugFix
elaboy Nov 4, 2024
3afcf09
close but no cigar
trishorts Nov 4, 2024
70705b4
Merge branch 'qValueThresholdAdjustmentAndBugFix' of https://github.c…
trishorts Nov 4, 2024
cef3bb3
jk
trishorts Nov 4, 2024
2bfe86a
dg
trishorts Nov 5, 2024
e4c6569
fh
trishorts Nov 5, 2024
6022547
maybe works finaly
trishorts Nov 5, 2024
35db00c
gk
trishorts Nov 6, 2024
312b02c
eliminate unneccessary changes to calibration task
trishorts Nov 6, 2024
6869d22
eliminate unneccesary spaces
trishorts Nov 6, 2024
6c3dee1
eliminate unneeded usings and spaces
trishorts Nov 6, 2024
45f44ad
same
trishorts Nov 6, 2024
b0bb0fa
more same
trishorts Nov 6, 2024
794353e
d
trishorts Nov 6, 2024
ca4fdc5
kg
trishorts Nov 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 35 additions & 14 deletions MetaMorpheus/EngineLayer/FdrAnalysis/FdrAnalysisEngine.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
using EngineLayer.CrosslinkSearch;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;

namespace EngineLayer.FdrAnalysis
{
Expand All @@ -14,7 +12,17 @@ public class FdrAnalysisEngine : MetaMorpheusEngine
private readonly string AnalysisType;
private readonly string OutputFolder; // used for storing PEP training models
private readonly bool DoPEP;

private static bool qvalueThresholdOverride;
private readonly int PsmCountThresholdForInvertedQvalue = 1000;
/// <summary>
/// This is to be used only for unit testing. Threshold for q-value calculation is set to 1000
/// However, many unit tests don't generate that many PSMs. Therefore, this property is used to override the threshold
/// to enable PEP calculation in unit tests with lower number of PSMs
/// </summary>
public static bool QvalueThresholdOverride // property
{
set { qvalueThresholdOverride = value; } // set method
}
public FdrAnalysisEngine(List<SpectralMatch> psms, int massDiffAcceptorNumNotches, CommonParameters commonParameters,
List<(string fileName, CommonParameters fileSpecificParameters)> fileSpecificParameters, List<string> nestedIds, string analysisType = "PSM",
bool doPEP = true, string outputFolder = null) : base(commonParameters, fileSpecificParameters, nestedIds)
Expand Down Expand Up @@ -71,10 +79,10 @@ private void DoFalseDiscoveryRateAnalysis(FdrAnalysisResults myAnalysisResults)
.Select(b => b.FirstOrDefault())
.ToList();

if (psms.Count > 100 & DoPEP)
if ((psms.Count > PsmCountThresholdForInvertedQvalue || qvalueThresholdOverride) & DoPEP)
{
CalculateQValue(psms, peptideLevelCalculation: false, pepCalculation: false);
if (peptides.Count > 100 )
if (peptides.Count > PsmCountThresholdForInvertedQvalue || qvalueThresholdOverride)
{
CalculateQValue(peptides, peptideLevelCalculation: true, pepCalculation: false);

Expand Down Expand Up @@ -160,6 +168,7 @@ public void CalculateQValue(List<SpectralMatch> psms, bool peptideLevelCalculati
// Stop if canceled
if (GlobalVariables.StopLoops) { break; }

// we have to keep track of q-values separately for each notch
int notch = psm.Notch ?? MassDiffAcceptorNumNotches;
if (psm.IsDecoy)
{
Expand Down Expand Up @@ -198,7 +207,9 @@ public void CalculateQValue(List<SpectralMatch> psms, bool peptideLevelCalculati
}
else
{
if(psms.Count < 100)
//the QValueThreshodOverride condition here can be problematic in unit tests. If tests fail unexpectedly,
//try setting FdrAnalysisEngine.QvalueThresholdOverride = false; in the test method
if (psms.Count < PsmCountThresholdForInvertedQvalue && !qvalueThresholdOverride)
{

QValueTraditional(psms, peptideLevelAnalysis: peptideLevelCalculation);
Expand All @@ -216,39 +227,49 @@ public void CalculateQValue(List<SpectralMatch> psms, bool peptideLevelCalculati
private void QValueTraditional(List<SpectralMatch> psms, bool peptideLevelAnalysis)
{
double qValue = 0;
double qValueNotch = 0;
double[] qValueNotch = new double[MassDiffAcceptorNumNotches + 1];

for (int i = 0; i < psms.Count; i++)
{
// Stop if canceled
if (GlobalVariables.StopLoops) { break; }

int notch = psms[i].Notch ?? MassDiffAcceptorNumNotches;
qValue = Math.Max(qValue, psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoy / Math.Max(psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeTarget, 1));
qValueNotch = Math.Max(qValueNotch, psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoyNotch / Math.Max(psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeTargetNotch, 1));
qValueNotch[notch] = Math.Max(qValueNotch[notch], psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoyNotch / Math.Max(psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeTargetNotch, 1));

psms[i].GetFdrInfo(peptideLevelAnalysis).QValue = Math.Min(qValue, 1);
psms[i].GetFdrInfo(peptideLevelAnalysis).QValueNotch = Math.Min(qValueNotch, 1);
psms[i].GetFdrInfo(peptideLevelAnalysis).QValueNotch = Math.Min(qValueNotch[notch], 1);
}
}

private static void QValueInverted(List<SpectralMatch> psms, bool peptideLevelAnalysis)
private void QValueInverted(List<SpectralMatch> psms, bool peptideLevelAnalysis)
{
double[] qValueNotch = new double[MassDiffAcceptorNumNotches + 1];
bool[] qValueNotchCalculated = new bool[MassDiffAcceptorNumNotches + 1];
psms.Reverse();
//this calculation is performed from bottom up. So, we begin the loop by computing qValue
//and qValueNotch for the last/lowest scoring psm in the bunch
double qValue = (psms[0].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoy + 1) / psms[0].GetFdrInfo(peptideLevelAnalysis).CumulativeTarget;
double qValueNotch = (psms[0].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoyNotch + 1) / psms[0].GetFdrInfo(peptideLevelAnalysis).CumulativeTargetNotch;

//Assign FDR values to PSMs
for (int i = 0; i < psms.Count; i++)
{
// Stop if canceled
if (GlobalVariables.StopLoops) { break; }
int notch = psms[i].Notch ?? MassDiffAcceptorNumNotches;

// populate the highest q-Value for each notch
if (!qValueNotchCalculated[notch])
{
qValueNotch[notch] = (psms[0].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoyNotch + 1) / psms[0].GetFdrInfo(peptideLevelAnalysis).CumulativeTargetNotch;
qValueNotchCalculated[notch] = true;
}

qValue = Math.Min(qValue, (psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoy + 1) / Math.Max(psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeTarget, 1));
qValueNotch = Math.Min(qValueNotch, (psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoyNotch + 1) / Math.Max(psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeTargetNotch, 1));
qValueNotch[notch] = Math.Min(qValueNotch[notch], (psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoyNotch + 1) / Math.Max(psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeTargetNotch, 1));

psms[i].GetFdrInfo(peptideLevelAnalysis).QValue = Math.Min(qValue, 1);
psms[i].GetFdrInfo(peptideLevelAnalysis).QValueNotch = Math.Min(qValueNotch, 1);
psms[i].GetFdrInfo(peptideLevelAnalysis).QValueNotch = Math.Min(qValueNotch[notch], 1);
}
psms.Reverse(); //we inverted the psms for this calculation. now we need to put them back into the original order
}
Expand Down
64 changes: 47 additions & 17 deletions MetaMorpheus/TaskLayer/CalibrationTask/CalibrationTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -82,56 +82,86 @@
string originalUncalibratedFilePath = currentRawFileList[spectraFileIndex];
string originalUncalibratedFilenameWithoutExtension = Path.GetFileNameWithoutExtension(originalUncalibratedFilePath);
string calibratedFilePath = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".mzML");

bool calibrated = false;
// mark the file as in-progress
StartingDataFile(originalUncalibratedFilePath, new List<string> { taskId, "Individual Spectra Files", originalUncalibratedFilePath });

CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

// load the file
Status("Loading spectra file...", new List<string> { taskId, "Individual Spectra Files" });

MsDataFile myMsDataFile = myFileManager.LoadFile(originalUncalibratedFilePath, combinedParams);

// get datapoints to fit calibration function to
Status("Acquiring calibration data points...", new List<string> { taskId, "Individual Spectra Files" });
DataPointAquisitionResults acquisitionResults = null;

acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, new PpmTolerance(MaxPrecursorTolerance), new PpmTolerance(MaxProductTolerance));
if(acquisitionResults.Psms.Count >= NumRequiredPsms && acquisitionResults.Ms1List.Count >= NumRequiredMs1Datapoints && acquisitionResults.Ms2List.Count >= NumRequiredMs2Datapoints)
{
int numPsms = acquisitionResults.Psms.Count;

// check if we have enough data points to calibrate and then calibrate
if (acquisitionResults.Psms.Count >= NumRequiredPsms && acquisitionResults.Ms1List.Count >= NumRequiredMs1Datapoints && acquisitionResults.Ms2List.Count >= NumRequiredMs2Datapoints)
{
// generate calibration function and shift data points
Status("Calibrating...", new List<string> { taskId, "Individual Spectra Files" });
CalibrationEngine engine = new(myMsDataFile, acquisitionResults, combinedParams, FileSpecificParameters, new List<string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension });
_ = engine.Run();

//update file
myMsDataFile = engine.CalibratedDataFile;

// write the calibrated mzML file
myMsDataFile.ExportAsMzML(calibratedFilePath, CalibrationParameters.WriteIndexedMzml);
myFileManager.DoneWithFile(originalUncalibratedFilePath);
// get the calibrated data points again to see if there was an increase
acquisitionResults = GetDataAcquisitionResults(engine.CalibratedDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, new PpmTolerance(MaxPrecursorTolerance), new PpmTolerance(MaxProductTolerance));

// write toml settings for the calibrated file
string newTomlFileName = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".toml");

FileSpecificParameters fileSpecificParams = new();

// carry over file-specific parameters from the uncalibrated file to the calibrated one
if (fileSettingsList[spectraFileIndex] != null)
{
fileSpecificParams = fileSettingsList[spectraFileIndex].Clone();
}

//suggest 4 * interquartile range as the ppm tolerance
fileSpecificParams.PrecursorMassTolerance = new PpmTolerance((4.0 * acquisitionResults.PsmPrecursorIqrPpmError) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError));
fileSpecificParams.ProductMassTolerance = new PpmTolerance((4.0 * acquisitionResults.PsmProductIqrPpmError) + Math.Abs(acquisitionResults.PsmProductMedianPpmError));

Toml.WriteFile(fileSpecificParams, newTomlFileName, tomlConfig);

FinishedWritingFile(newTomlFileName, new List<string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension });
// if we have more PSMs after calibration, we should re-calibrate
if (acquisitionResults.Psms.Count > numPsms)
{
calibrated = true;
numPsms = acquisitionResults.Psms.Count;
//update file
myMsDataFile = engine.CalibratedDataFile;

//suggest 4 * interquartile range as the ppm tolerance
fileSpecificParams.PrecursorMassTolerance = new PpmTolerance((4.0 * acquisitionResults.PsmPrecursorIqrPpmError) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError));
fileSpecificParams.ProductMassTolerance = new PpmTolerance((4.0 * acquisitionResults.PsmProductIqrPpmError) + Math.Abs(acquisitionResults.PsmProductMedianPpmError));

// generate calibration function and shift data points
Status("Calibrating...", new List<string> { taskId, "Individual Spectra Files" });
engine = new(myMsDataFile, acquisitionResults, combinedParams, FileSpecificParameters, new List<string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension });
_ = engine.Run();

// get the calibrated data points again to see if there was an increase
acquisitionResults = GetDataAcquisitionResults(engine.CalibratedDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, new PpmTolerance((4.0 * acquisitionResults.PsmPrecursorIqrPpmError) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError)), new PpmTolerance((4.0 * acquisitionResults.PsmProductIqrPpmError) + Math.Abs(acquisitionResults.PsmProductMedianPpmError)));

if (acquisitionResults.Psms.Count > numPsms)
{

Check warning on line 145 in MetaMorpheus/TaskLayer/CalibrationTask/CalibrationTask.cs

View check run for this annotation

Codecov / codecov/patch

MetaMorpheus/TaskLayer/CalibrationTask/CalibrationTask.cs#L145

Added line #L145 was not covered by tests
//update file
myMsDataFile = engine.CalibratedDataFile;

Check warning on line 147 in MetaMorpheus/TaskLayer/CalibrationTask/CalibrationTask.cs

View check run for this annotation

Codecov / codecov/patch

MetaMorpheus/TaskLayer/CalibrationTask/CalibrationTask.cs#L147

Added line #L147 was not covered by tests
//suggest 4 * interquartile range as the ppm tolerance
fileSpecificParams.PrecursorMassTolerance = new PpmTolerance((4.0 * acquisitionResults.PsmPrecursorIqrPpmError) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError));
fileSpecificParams.ProductMassTolerance = new PpmTolerance((4.0 * acquisitionResults.PsmProductIqrPpmError) + Math.Abs(acquisitionResults.PsmProductMedianPpmError));

Check warning on line 150 in MetaMorpheus/TaskLayer/CalibrationTask/CalibrationTask.cs

View check run for this annotation

Codecov / codecov/patch

MetaMorpheus/TaskLayer/CalibrationTask/CalibrationTask.cs#L149-L150

Added lines #L149 - L150 were not covered by tests

}

Check warning on line 152 in MetaMorpheus/TaskLayer/CalibrationTask/CalibrationTask.cs

View check run for this annotation

Codecov / codecov/patch

MetaMorpheus/TaskLayer/CalibrationTask/CalibrationTask.cs#L152

Added line #L152 was not covered by tests

// write the calibrated mzML file
myMsDataFile.ExportAsMzML(calibratedFilePath, CalibrationParameters.WriteIndexedMzml);

}
myFileManager.DoneWithFile(originalUncalibratedFilePath);
if(!calibrated)
{
myMsDataFile.ExportAsMzML(calibratedFilePath, CalibrationParameters.WriteIndexedMzml);
}
Toml.WriteFile(fileSpecificParams, newTomlFileName, tomlConfig);
FinishedWritingFile(newTomlFileName, new List<string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension });
// finished calibrating this file
FinishedWritingFile(calibratedFilePath, new List<string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension });
MyTaskResults.NewSpectra.Add(calibratedFilePath);
Expand Down
3 changes: 2 additions & 1 deletion MetaMorpheus/Test/MatchIonsOfAllCharges.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
using Omics.Modifications;
using Omics.SpectrumMatch;
using static System.Net.WebRequestMethods;
using EngineLayer.FdrAnalysis;

namespace Test
{
Expand Down Expand Up @@ -408,6 +409,7 @@ public static void TestLibraryGeneration()

public static void TestLibraryUpdate()
{
FdrAnalysisEngine.QvalueThresholdOverride = false;
string thisTaskOutputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\SpectralLibrarySearch\UpdateLibrary");
_ = Directory.CreateDirectory(thisTaskOutputFolder);
SearchTask task = Toml.ReadFile<SearchTask>(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\SpectralLibrarySearch\SpectralSearchTask.toml"), MetaMorpheusTask.tomlConfig);
Expand All @@ -425,7 +427,6 @@ public static void TestLibraryUpdate()

string rawCopy = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\SpectralLibrarySearch\UpdateLibrary\rawCopy.mzML");
System.IO.File.Copy(raw1, rawCopy);

EverythingRunnerEngine UpdateLibrary = new(new List<(string, MetaMorpheusTask)> { ("UpdateSpectraFileOutput", task) }, new List<string> { raw1, raw2 }, new List<DbForTask> { new DbForTask(lib, false), new DbForTask( db1,false), new DbForTask(db2, false) }, thisTaskOutputFolder);

UpdateLibrary.Run();
Expand Down
Loading