Skip to content

Commit

Permalink
Q value threshold adjustment and bug fix (#2426)
Browse files Browse the repository at this point in the history
* update mzlib nuget package to 551

* adjust inverted qValue threshold to 1000

* add override qValue threshold for unit testing

* calculate notch specific inverted qVAlue

* add static bool to override psmcount threshold for PEP calculation in unit tests

* final unit tests repaired

* ghk

* prevent unit tests from going down pep rabbit hole

* not supposed to be hard

* finally got that threshold to behave kinda

* gf

* add NonParallelizable to tests

* kgh

* calibrated when it helps

* seems fine

* close but no cigar

* jk

* dg

* fh

* maybe works finaly

* gk

* eliminate unneccessary changes to calibration task

* eliminate unneccesary spaces

* eliminate unneeded usings and spaces

* same

* more same

* d

* kg

---------

Co-authored-by: Edwin Laboy <[email protected]>
  • Loading branch information
trishorts and elaboy authored Nov 7, 2024
1 parent b98cfad commit 97d1761
Show file tree
Hide file tree
Showing 18 changed files with 107 additions and 77 deletions.
47 changes: 33 additions & 14 deletions MetaMorpheus/EngineLayer/FdrAnalysis/FdrAnalysisEngine.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
using EngineLayer.CrosslinkSearch;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;

namespace EngineLayer.FdrAnalysis
{
Expand All @@ -14,7 +12,16 @@ public class FdrAnalysisEngine : MetaMorpheusEngine
private readonly string AnalysisType;
private readonly string OutputFolder; // used for storing PEP training models
private readonly bool DoPEP;

private readonly int PsmCountThresholdForInvertedQvalue = 1000;
/// <summary>
/// This is to be used only for unit testing. Threshold for q-value calculation is set to 1000
/// However, many unit tests don't generate that many PSMs. Therefore, this property is used to override the threshold
/// to enable PEP calculation in unit tests with lower number of PSMs
/// </summary>
public static bool QvalueThresholdOverride // property
{
get; private set;
}
public FdrAnalysisEngine(List<SpectralMatch> psms, int massDiffAcceptorNumNotches, CommonParameters commonParameters,
List<(string fileName, CommonParameters fileSpecificParameters)> fileSpecificParameters, List<string> nestedIds, string analysisType = "PSM",
bool doPEP = true, string outputFolder = null) : base(commonParameters, fileSpecificParameters, nestedIds)
Expand Down Expand Up @@ -71,10 +78,10 @@ private void DoFalseDiscoveryRateAnalysis(FdrAnalysisResults myAnalysisResults)
.Select(b => b.FirstOrDefault())
.ToList();

if (psms.Count > 100 & DoPEP)
if ((psms.Count > PsmCountThresholdForInvertedQvalue || QvalueThresholdOverride) & DoPEP)
{
CalculateQValue(psms, peptideLevelCalculation: false, pepCalculation: false);
if (peptides.Count > 100 )
if (peptides.Count > PsmCountThresholdForInvertedQvalue || QvalueThresholdOverride)
{
CalculateQValue(peptides, peptideLevelCalculation: true, pepCalculation: false);

Expand Down Expand Up @@ -160,6 +167,7 @@ public void CalculateQValue(List<SpectralMatch> psms, bool peptideLevelCalculati
// Stop if canceled
if (GlobalVariables.StopLoops) { break; }

// we have to keep track of q-values separately for each notch
int notch = psm.Notch ?? MassDiffAcceptorNumNotches;
if (psm.IsDecoy)
{
Expand Down Expand Up @@ -198,7 +206,8 @@ public void CalculateQValue(List<SpectralMatch> psms, bool peptideLevelCalculati
}
else
{
if(psms.Count < 100)
//the QValueThreshodOverride condition here can be problematic in unit tests.
if (psms.Count < PsmCountThresholdForInvertedQvalue && !QvalueThresholdOverride)
{

QValueTraditional(psms, peptideLevelAnalysis: peptideLevelCalculation);
Expand All @@ -216,39 +225,49 @@ public void CalculateQValue(List<SpectralMatch> psms, bool peptideLevelCalculati
private void QValueTraditional(List<SpectralMatch> psms, bool peptideLevelAnalysis)
{
double qValue = 0;
double qValueNotch = 0;
double[] qValueNotch = new double[MassDiffAcceptorNumNotches + 1];

for (int i = 0; i < psms.Count; i++)
{
// Stop if canceled
if (GlobalVariables.StopLoops) { break; }

int notch = psms[i].Notch ?? MassDiffAcceptorNumNotches;
qValue = Math.Max(qValue, psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoy / Math.Max(psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeTarget, 1));
qValueNotch = Math.Max(qValueNotch, psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoyNotch / Math.Max(psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeTargetNotch, 1));
qValueNotch[notch] = Math.Max(qValueNotch[notch], psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoyNotch / Math.Max(psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeTargetNotch, 1));

psms[i].GetFdrInfo(peptideLevelAnalysis).QValue = Math.Min(qValue, 1);
psms[i].GetFdrInfo(peptideLevelAnalysis).QValueNotch = Math.Min(qValueNotch, 1);
psms[i].GetFdrInfo(peptideLevelAnalysis).QValueNotch = Math.Min(qValueNotch[notch], 1);
}
}

private static void QValueInverted(List<SpectralMatch> psms, bool peptideLevelAnalysis)
private void QValueInverted(List<SpectralMatch> psms, bool peptideLevelAnalysis)
{
double[] qValueNotch = new double[MassDiffAcceptorNumNotches + 1];
bool[] qValueNotchCalculated = new bool[MassDiffAcceptorNumNotches + 1];
psms.Reverse();
//this calculation is performed from bottom up. So, we begin the loop by computing qValue
//and qValueNotch for the last/lowest scoring psm in the bunch
double qValue = (psms[0].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoy + 1) / psms[0].GetFdrInfo(peptideLevelAnalysis).CumulativeTarget;
double qValueNotch = (psms[0].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoyNotch + 1) / psms[0].GetFdrInfo(peptideLevelAnalysis).CumulativeTargetNotch;

//Assign FDR values to PSMs
for (int i = 0; i < psms.Count; i++)
{
// Stop if canceled
if (GlobalVariables.StopLoops) { break; }
int notch = psms[i].Notch ?? MassDiffAcceptorNumNotches;

// populate the highest q-Value for each notch
if (!qValueNotchCalculated[notch])
{
qValueNotch[notch] = (psms[0].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoyNotch + 1) / psms[0].GetFdrInfo(peptideLevelAnalysis).CumulativeTargetNotch;
qValueNotchCalculated[notch] = true;
}

qValue = Math.Min(qValue, (psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoy + 1) / Math.Max(psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeTarget, 1));
qValueNotch = Math.Min(qValueNotch, (psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoyNotch + 1) / Math.Max(psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeTargetNotch, 1));
qValueNotch[notch] = Math.Min(qValueNotch[notch], (psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeDecoyNotch + 1) / Math.Max(psms[i].GetFdrInfo(peptideLevelAnalysis).CumulativeTargetNotch, 1));

psms[i].GetFdrInfo(peptideLevelAnalysis).QValue = Math.Min(qValue, 1);
psms[i].GetFdrInfo(peptideLevelAnalysis).QValueNotch = Math.Min(qValueNotch, 1);
psms[i].GetFdrInfo(peptideLevelAnalysis).QValueNotch = Math.Min(qValueNotch[notch], 1);
}
psms.Reverse(); //we inverted the psms for this calculation. now we need to put them back into the original order
}
Expand Down
6 changes: 3 additions & 3 deletions MetaMorpheus/TaskLayer/CalibrationTask/CalibrationTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
DataPointAquisitionResults acquisitionResults = null;

acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, new PpmTolerance(MaxPrecursorTolerance), new PpmTolerance(MaxProductTolerance));
if(acquisitionResults.Psms.Count >= NumRequiredPsms && acquisitionResults.Ms1List.Count >= NumRequiredMs1Datapoints && acquisitionResults.Ms2List.Count >= NumRequiredMs2Datapoints)
if (acquisitionResults.Psms.Count >= NumRequiredPsms && acquisitionResults.Ms1List.Count >= NumRequiredMs1Datapoints && acquisitionResults.Ms2List.Count >= NumRequiredMs2Datapoints)
{

// generate calibration function and shift data points
Expand Down Expand Up @@ -200,8 +200,8 @@ private DataPointAquisitionResults GetDataAcquisitionResults(MsDataFile myMsData

_ = new FdrAnalysisEngine(allPsms, searchMode.NumNotches, CommonParameters, FileSpecificParameters, new List<string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }, doPEP: false).Run();

List<SpectralMatch> goodIdentifications = allPsms.Where(b =>
b.FdrInfo.QValueNotch < CalibrationParameters.QValueCutoffForCalibratingPSMs
List<SpectralMatch> goodIdentifications = allPsms.Where(b =>
b.FdrInfo.QValueNotch < CalibrationParameters.QValueCutoffForCalibratingPSMs
&& b.FullSequence != null
&& !b.IsDecoy).ToList();

Expand Down
18 changes: 15 additions & 3 deletions MetaMorpheus/Test/EverythingRunnerEngineTestCase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Linq;
using EngineLayer.FdrAnalysis;
using Nett;
using NUnit.Framework;
using TaskLayer;
Expand Down Expand Up @@ -59,13 +60,25 @@ internal EverythingRunnerEngineTestCase(EverythingRunnerEngineTestCases testCase
WriteMzId = searchTask.SearchParameters.WriteMzId;
}

public static Object myLock = new();
internal void Run()
{
if (Directory.Exists(OutputDirectory))
Directory.Delete(OutputDirectory, true);

var runner = new EverythingRunnerEngine(TaskList, DataFileList, DatabaseList, OutputDirectory);
runner.Run();
lock (myLock)
{
System.Reflection.PropertyInfo property = null;
if (TestCase != EverythingRunnerEngineTestCases.TopDownQValue && TestCase != EverythingRunnerEngineTestCases.TopDownQValueSingle)
{
var type = typeof(FdrAnalysisEngine);
property = type.GetProperty("QvalueThresholdOverride");
property.SetValue(null, true);
}
var runner = new EverythingRunnerEngine(TaskList, DataFileList, DatabaseList, OutputDirectory);
runner.Run();
if (TestCase != EverythingRunnerEngineTestCases.TopDownQValue && TestCase != EverythingRunnerEngineTestCases.TopDownQValueSingle) property.SetValue(null, false);
}
HasRun = true;
}

Expand Down Expand Up @@ -188,4 +201,3 @@ static EverythingRunnerEngineTestCase()
}
}


6 changes: 0 additions & 6 deletions MetaMorpheus/Test/MatchIonsOfAllCharges.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,10 @@
using System;
using MassSpectrometry;
using Nett;
using EngineLayer.Gptmd;
using NUnit.Framework.Legacy;
using Omics.Digestion;
using Omics.Modifications;
using Omics.SpectrumMatch;
using static System.Net.WebRequestMethods;

namespace Test
{
Expand Down Expand Up @@ -421,11 +419,8 @@ public static void TestLibraryUpdate()
string raw1 = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML");
string raw2 = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\TaGe_SA_A549_3_snip.mzML");
string lib = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\SpectralLibrarySearch\SpectralLibrary.msp");


string rawCopy = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\SpectralLibrarySearch\UpdateLibrary\rawCopy.mzML");
System.IO.File.Copy(raw1, rawCopy);

EverythingRunnerEngine UpdateLibrary = new(new List<(string, MetaMorpheusTask)> { ("UpdateSpectraFileOutput", task) }, new List<string> { raw1, raw2 }, new List<DbForTask> { new DbForTask(lib, false), new DbForTask( db1,false), new DbForTask(db2, false) }, thisTaskOutputFolder);

UpdateLibrary.Run();
Expand Down Expand Up @@ -483,7 +478,6 @@ public static void TestLibraryExistAfterGPTMDsearch()
_ = Directory.CreateDirectory(thisTaskOutputFolder);
SearchTask task = Toml.ReadFile<SearchTask>(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\SpectralLibrarySearch\SpectralSearchTask.toml"), MetaMorpheusTask.tomlConfig);


string db = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\hela_snip_for_unitTest.fasta");
string raw = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML");
string lib = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\SpectralLibrarySearch\SpectralLibrary.msp");
Expand Down
3 changes: 0 additions & 3 deletions MetaMorpheus/Test/MetaDraw/MetaDrawTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1028,8 +1028,6 @@ public static void MetaDraw_TestChimeraScanSpectrumDisplay()
Directory.Delete(outputFolder, true);
}



[Test]
public static void TestMetaDrawErrors()
{
Expand Down Expand Up @@ -1081,7 +1079,6 @@ public static void TestMetaDrawErrors()
}

[Test]
[NonParallelizable]
public static void TestMetaDrawLoadingWithWeirdFileNames()
{
// test loading when the file has a periods, commas, spaces in the name
Expand Down
3 changes: 1 addition & 2 deletions MetaMorpheus/Test/MetaDraw/SpectrumMatchPlotTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Controls;
using EngineLayer;
using GuiFunctions;
Expand All @@ -31,6 +29,7 @@ public class SpectrumMatchPlotTests
[OneTimeSetUp]
public void Setup()
{

outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"MetaDraw_PeakAnnotaitonTest");
string proteinDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\smalldb.fasta");
string spectraFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\SmallCalibratible_Yeast.mzML");
Expand Down
7 changes: 4 additions & 3 deletions MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Linq;
using NUnit.Framework; using Assert = NUnit.Framework.Legacy.ClassicAssert;
using NUnit.Framework;
using Assert = NUnit.Framework.Legacy.ClassicAssert;

namespace Test
{
Expand Down Expand Up @@ -49,7 +50,7 @@ public static void AllResultsAndResultsTxtContainsCorrectValues_QValue_BottomUp(
Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target PSMs with q-value <= 0.01: 214", results[13]);
Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target peptides with q-value <= 0.01: 174", results[14]);
Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target protein groups within 1 % FDR: 165", results[15]);

// Search TaGe_SA_A549_3_snip_2 by itself. The results from this should be identical to the file specific results above
// TaGe_SA_A549_3_snip_2 is searched twice. First with two files being searched simultaneously, then with TaGe_SA_A549_3_snip_2 by itself
// This allows us to compare the file specific results produced by in the two file search to the output
Expand Down Expand Up @@ -110,7 +111,7 @@ public static void AllResultTxtContainsCorrectNumberOfResultLines(EverythingRunn
{
var testCase = EverythingRunnerEngineTestCase.GetTestCase(testCaseIdentifier);

int expectedIndividualFileLines = testCase.DataFileList.Count == 1 || !testCase.WriteIndividualResults
int expectedIndividualFileLines = testCase.DataFileList.Count == 1 || !testCase.WriteIndividualResults
? 0 : testCase.DataFileList.Count;
int expectedSummaryLines = 1;
var allResultTxtLines = File.ReadAllLines(Path.Combine(testCase.OutputDirectory, @"allResults.txt"));
Expand Down
3 changes: 1 addition & 2 deletions MetaMorpheus/Test/ProteinGroupTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
using Proteomics.ProteolyticDigestion;
using MassSpectrometry;
using Chemistry;
using EngineLayer.ClassicSearch;
using FlashLFQ;
using TaskLayer;
using ProteinGroup = EngineLayer.ProteinGroup;
using System.IO;
Expand Down Expand Up @@ -206,6 +204,7 @@ public static void TestModificationInfoListInProteinGroupsOutput()
string mzmlName = @"TestData\PrunedDbSpectra.mzml";
string fastaName = @"TestData\DbForPrunedDb.fasta";
string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestPrunedGeneration");

var engine = new EverythingRunnerEngine(taskList, new List<string> { mzmlName }, new List<DbForTask> { new DbForTask(fastaName, false) }, outputFolder);
engine.Run();
string final = Path.Combine(MySetUpClass.outputFolder, "task2", "DbForPrunedDbGPTMDproteinPruned.xml");
Expand Down
1 change: 0 additions & 1 deletion MetaMorpheus/Test/QuantificationTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using Omics.Modifications;
using TaskLayer;

Expand Down
Loading

0 comments on commit 97d1761

Please sign in to comment.