Skip to content

Commit

Permalink
Gptmd approach update (#2419)
Browse files Browse the repository at this point in the history
* update mzlib nuget package to 551

* use psms score to revise what ptms are added in gptmd

* now deals w/ variants

* fix unit testts

* unused lines

* load files in background

* add parallelization and eliminate pep

* fix unit tests

* yo

* fix those unit tests

* fix unit test

* test gptmd DissociationType Autodetect

* gptmd test mod before variant

* new test

* delete unreachable code

* unit test for xcorr process spectra and matched ion with unknown mass

* Test MatchFragmentIons when scan has no peaks

* test gptmd task with contaminant database

* return missing comments

* more unit test coverage for modification analysis engine

* fix unit test

* fix missing prose line

* eliminate unused code

* fix merge problems

---------

Co-authored-by: Edwin Laboy <[email protected]>
  • Loading branch information
trishorts and elaboy authored Nov 16, 2024
1 parent 14ef43d commit 5780fb6
Show file tree
Hide file tree
Showing 17 changed files with 36,524 additions and 140 deletions.
196 changes: 130 additions & 66 deletions MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs

Large diffs are not rendered by default.

7 changes: 1 addition & 6 deletions MetaMorpheus/EngineLayer/MetaMorpheusEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -137,11 +137,6 @@ public static List<MatchedFragmentIon> MatchFragmentIons(Ms2ScanWithSpecificMass

if (scan.TheScan.MassSpectrum.XcorrProcessed && scan.TheScan.MassSpectrum.XArray.Length != 0)
{
// if the spectrum has no peaks
if (scan.TheScan.MassSpectrum.XArray.Length == 0)
{
return matchedFragmentIons;
}

for (int i = 0; i < theoreticalProducts.Count; i++)
{
Expand Down Expand Up @@ -225,7 +220,7 @@ public static List<MatchedFragmentIon> MatchFragmentIons(Ms2ScanWithSpecificMass

return matchedFragmentIons;
}

//Used only when user wants to generate spectral library.
//Normal search only looks for one match ion for one fragment, and if it accepts it then it doesn't try to look for different charge states of that same fragment.
//But for library generation, we need find all the matched peaks with all the different charges.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ protected override MetaMorpheusEngineResults RunSpecific()
if (unlocalizedFormulas.ContainsKey(representativePsm.ModsChemicalFormula))
unlocalizedFormulas[representativePsm.ModsChemicalFormula] += 1;
else
unlocalizedFormulas.Add(representativePsm.ModsChemicalFormula, 1);
unlocalizedFormulas.Add(representativePsm.ModsChemicalFormula, 1);
}

myAnalysisResults.CountOfEachModSeenOnProteins = modsOnProteins.GroupBy(b => b.Item2).ToDictionary(b => b.Key, b => b.Count());
Expand Down
68 changes: 50 additions & 18 deletions MetaMorpheus/TaskLayer/GPTMDTask/GPTMDTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
using Proteomics.ProteolyticDigestion;
using System.Globalization;
using Omics.Modifications;
using System.Threading.Tasks;

namespace TaskLayer
{
Expand All @@ -30,14 +31,30 @@ public GptmdTask() : base(MyTask.Gptmd)

protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask> dbFilenameList, List<string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
{
MyFileManager myFileManager = new MyFileManager(true);
var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));

// start loading first spectra file in the background
Task<MsDataFile> nextFileLoadingTask = new(() => myFileManager.LoadFile(currentRawFileList[0], SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[0])));
nextFileLoadingTask.Start();
LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes);

// start loading proteins in the background
List<Protein> proteinList = null;
Task<List<Protein>> proteinLoadingTask = new(() =>
{
var proteins = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse,
localizeableModificationTypes,
CommonParameters);
SanitizeProteinDatabase(proteins, TargetContaminantAmbiguity.RemoveContaminant);
return proteins;
});
proteinLoadingTask.Start();

// TODO: print error messages loading GPTMD mods
List<Modification> gptmdModifications = GlobalVariables.AllModsKnown.OfType<Modification>().Where(b => GptmdParameters.ListOfModsGptmd.Contains((b.ModificationType, b.IdWithMotif))).ToList();
IEnumerable<Tuple<double, double>> combos = LoadCombos(gptmdModifications).ToList();

// load proteins
List<Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters);

List<SpectralMatch> allPsms = new List<SpectralMatch>();

Expand All @@ -62,18 +79,13 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
ProseCreatedWhileRunning.Append("precursor mass tolerance(s) = {" + tempSearchMode.ToProseString() + "}; ");

ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". ");
ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. ");

// start the G-PTM-D task
Status("Running G-PTM-D...", new List<string> { taskId });
MyTaskResults = new MyTaskResults(this)
{
NewDatabases = new List<DbForTask>()
};
var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));
HashSet<DigestionParams> ListOfDigestionParams = new HashSet<DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams));

MyFileManager myFileManager = new MyFileManager(true);

object lock1 = new object();
object lock2 = new object();
Expand All @@ -94,14 +106,39 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
NewCollection(Path.GetFileName(origDataFile), new List<string> { taskId, "Individual Spectra Files", origDataFile });

Status("Loading spectra file...", new List<string> { taskId, "Individual Spectra Files", origDataFile });
MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams);

// ensure that the next file has finished loading from the async method
nextFileLoadingTask.Wait();
var myMsDataFile = nextFileLoadingTask.Result;
// if another file exists, then begin loading it in while the previous is being searched
if (origDataFile != currentRawFileList.Last())
{
int nextFileIndex = spectraFileIndex + 1;
nextFileLoadingTask = new Task<MsDataFile>(() => myFileManager.LoadFile(currentRawFileList[nextFileIndex], SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[nextFileIndex])));
nextFileLoadingTask.Start();
}
Status("Getting ms2 scans...", new List<string> { taskId, "Individual Spectra Files", origDataFile });
Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray();
myFileManager.DoneWithFile(origDataFile);
SpectralMatch[] allPsmsArray = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];

//spectral Library search and library generation have't applied to GPTMD yet
bool writeSpctralLibrary = false;

// ensure proteins are loaded in before proceeding with search
switch (proteinLoadingTask.IsCompleted)
{
case true when proteinList is null: // has finished loading but not been set
proteinList = proteinLoadingTask.Result;
break;
case true when proteinList.Any(): // has finished loading and already been set
break;
case false: // has not finished loading
proteinLoadingTask.Wait();
proteinList = proteinLoadingTask.Result;
break;
}

new ClassicSearchEngine(allPsmsArray, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, null, null, null,
proteinList, searchMode, combinedParams, this.FileSpecificParameters, null, new List<string> { taskId, "Individual Spectra Files", origDataFile }, writeSpctralLibrary).Run();
allPsms.AddRange(allPsmsArray.Where(p => p != null));
Expand All @@ -110,18 +147,11 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
}
ReportProgress(new ProgressEventArgs(100, "Done!", new List<string> { taskId, "Individual Spectra Files" }));

allPsms = allPsms.OrderByDescending(b => b.Score)
.ThenBy(b => b.BioPolymerWithSetModsMonoisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.BioPolymerWithSetModsMonoisotopicMass.Value) : double.MaxValue)
.GroupBy(b => new Tuple<string, int, double?>(b.FullFilePath, b.ScanNumber, b.BioPolymerWithSetModsMonoisotopicMass))
.Select(b => b.First()).ToList();

new FdrAnalysisEngine(allPsms, tempSearchMode.NumNotches, CommonParameters, this.FileSpecificParameters, new List<string> { taskId }).Run();
//Move this text after search because proteins don't get loaded until search begins.
ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. ");

var writtenFile = Path.Combine(OutputFolder, "GPTMD_Candidates.psmtsv");
WritePsmsToTsv(allPsms, writtenFile, new Dictionary<string, int>());
FinishedWritingFile(writtenFile, new List<string> { taskId });
new FdrAnalysisEngine(allPsms.OrderBy(p=>p).ToList(), tempSearchMode.NumNotches, CommonParameters, this.FileSpecificParameters, new List<string> { taskId }, doPEP: false).Run();

// get file-specific precursor mass tolerances for the GPTMD engine
var filePathToPrecursorMassTolerance = new Dictionary<string, Tolerance>();
for (int i = 0; i < currentRawFileList.Count; i++)
{
Expand All @@ -135,6 +165,7 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
}

// run GPTMD engine
Status("Creating the GPTMD Database", new List<string> { taskId });
var gptmdResults = (GptmdResults)new GptmdEngine(allPsms, gptmdModifications, combos, filePathToPrecursorMassTolerance, CommonParameters, this.FileSpecificParameters, new List<string> { taskId }).Run();

// Stop if canceled
Expand Down Expand Up @@ -188,6 +219,7 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
MyTaskResults.AddTaskSummaryText("Mods types and counts:");
MyTaskResults.AddTaskSummaryText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value)));
}
Status("Done", new List<string> { taskId });
return MyTaskResults;
}

Expand Down
4 changes: 2 additions & 2 deletions MetaMorpheus/TaskLayer/MetaMorpheusTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -572,10 +572,10 @@ public MyTaskResults RunTask(string output_folder, List<DbForTask> currentProtei
using (StreamWriter file = new StreamWriter(proseFilePath))
{
file.WriteLine("The data analysis was performed using MetaMorpheus version " + GlobalVariables.MetaMorpheusVersion + ", available at " + "https://github.com/smith-chem-wisc/MetaMorpheus.");
file.WriteLine();
file.WriteLine();
file.Write(ProseCreatedWhileRunning.ToString());
file.WriteLine(SystemInfo.SystemProse().Replace(Environment.NewLine, "") + " ");
file.WriteLine();
file.WriteLine();
file.WriteLine("The total time to perform the " + TaskType + " task on " + currentRawDataFilepathList.Count + " spectra file(s) was " + String.Format("{0:0.00}", MyTaskResults.Time.TotalMinutes) + " minutes.");
file.WriteLine();
file.WriteLine("Published works using MetaMorpheus software are encouraged to cite the appropriate publications listed in the reference guide, found here: https://github.com/smith-chem-wisc/MetaMorpheus/blob/master/README.md.");
Expand Down
38 changes: 4 additions & 34 deletions MetaMorpheus/Test/CustomFragmentationTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,9 @@ public static void CustomFragmentationManyTasks()
CollectionAssert.AreEquivalent(customIons, loadedSearchTask.CommonParameters.CustomIons);
Assert.That(loadedSearchTask.CommonParameters.DissociationType, Is.EqualTo(DissociationType.Custom));

// read gptmd and search results to ensure matched ions are correct
var gptmdResults = PsmTsvReader.ReadTsv(Path.Combine(outputFolder, "GPTMD", "GPTMD_Candidates.psmtsv"), out List<string> warnings);
var searchResults = PsmTsvReader.ReadTsv(Path.Combine(outputFolder, "Search", "AllPSMs.psmtsv"), out List<string> warnings);
Assert.That(!warnings.Any());
var productIons = gptmdResults.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
.Distinct();
CollectionAssert.AreEquivalent(customIons, productIons);

var searchResults = PsmTsvReader.ReadTsv(Path.Combine(outputFolder, "Search", "AllPSMs.psmtsv"), out warnings);
Assert.That(!warnings.Any());
productIons = searchResults.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
var productIons = searchResults.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
.Distinct();
CollectionAssert.AreEquivalent(customIons, productIons);

Expand All @@ -156,13 +149,6 @@ public static void CustomFragmentationManyTasks()
CollectionAssert.AreEquivalent(customIons, loadedSearchTask.CommonParameters.CustomIons);
Assert.That(loadedSearchTask.CommonParameters.DissociationType, Is.EqualTo(DissociationType.Custom));

// read gptmd and search results to ensure matched ions are correct
gptmdResults = PsmTsvReader.ReadTsv(Path.Combine(newOutputFolder, "GPTMD", "GPTMD_Candidates.psmtsv"), out warnings);
Assert.That(!warnings.Any());
productIons = gptmdResults.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
.Distinct();
CollectionAssert.AreEquivalent(customIons, productIons);

searchResults = PsmTsvReader.ReadTsv(Path.Combine(newOutputFolder, "Search", "AllPSMs.psmtsv"), out warnings);
Assert.That(!warnings.Any());
productIons = searchResults.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
Expand Down Expand Up @@ -298,16 +284,9 @@ public static void CustomFragmentIonsManySearchTasksContainingDifferentIons()
CollectionAssert.AreEquivalent(customIons3, loadedSearchTask3.CommonParameters.CustomIons);
Assert.That(loadedSearchTask3.CommonParameters.DissociationType == DissociationType.Custom);

// read gptmd and search results to ensure matched ions are correct
var gptmdResults = PsmTsvReader.ReadTsv(Path.Combine(outputFolder, "GPTMD", "GPTMD_Candidates.psmtsv"), out List<string> warnings);
var searchResults1 = PsmTsvReader.ReadTsv(Path.Combine(outputFolder, "Search1", "AllPSMs.psmtsv"), out List<string> warnings);
Assert.That(!warnings.Any());
var productIons = gptmdResults.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
.Distinct();
CollectionAssert.AreEquivalent(customIons2, productIons);

var searchResults1 = PsmTsvReader.ReadTsv(Path.Combine(outputFolder, "Search1", "AllPSMs.psmtsv"), out warnings);
Assert.That(!warnings.Any());
productIons = searchResults1.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
var productIons = searchResults1.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
.Distinct();
CollectionAssert.AreEquivalent(customIons1, productIons);

Expand Down Expand Up @@ -365,15 +344,6 @@ public static void CustomFragmentIonsManySearchTasksContainingDifferentIons()
CollectionAssert.AreEquivalent(customIons3, loadedSearchTask3.CommonParameters.CustomIons);
Assert.That(loadedSearchTask3.CommonParameters.DissociationType == DissociationType.Custom);

// read gptmd and search results to ensure matched ions are correct
gptmdResults = PsmTsvReader.ReadTsv(Path.Combine(newOutputFolder, "GPTMD", "GPTMD_Candidates.psmtsv"),
out warnings);
Assert.That(!warnings.Any());
productIons = gptmdResults
.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
.Distinct();
CollectionAssert.AreEquivalent(customIons2, productIons);

searchResults1 =
PsmTsvReader.ReadTsv(Path.Combine(newOutputFolder, "Search1", "AllPSMs.psmtsv"), out warnings);
Assert.That(!warnings.Any());
Expand Down
Loading

0 comments on commit 5780fb6

Please sign in to comment.