Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion MetaMorpheus/EngineLayer/EngineLayer.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
<PackageReference Include="Microsoft.ML.CpuMath" Version="3.0.1" />
<PackageReference Include="Microsoft.ML.FastTree" Version="3.0.1" />
<PackageReference Include="Microsoft.NETCore.App" Version="2.2.8" />
<PackageReference Include="mzLib" Version="1.0.569" />
<PackageReference Include="mzLib" Version="9.0.569" />
<PackageReference Include="NETStandard.Library" Version="2.0.3" />
<PackageReference Include="Nett" Version="0.15.0" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
Expand Down
251 changes: 163 additions & 88 deletions MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs

Large diffs are not rendered by default.

25 changes: 25 additions & 0 deletions MetaMorpheus/EngineLayer/Silac/SilacConversions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
using Omics;
using Omics.Digestion;
using EngineLayer.SpectrumMatch;
using MzLibUtil.PositionFrequencyAnalysis;
using Easy.Common.Extensions;

namespace EngineLayer
{
Expand Down Expand Up @@ -460,6 +462,7 @@ public static void SilacConversionsPostQuantification(List<SilacLabel> allSilacL
{
proteinGroup.FilesForQuantification = allInfo;
proteinGroup.IntensitiesByFile = new Dictionary<SpectraFileInfo, double>();
proteinGroup.ModsInfo = new Dictionary<SpectraFileInfo, QuantifiedProteinGroup>();

foreach (var spectraFile in allInfo)
{
Expand All @@ -472,6 +475,28 @@ public static void SilacConversionsPostQuantification(List<SilacLabel> allSilacL
//needed for decoys/contaminants/proteins that aren't quantified
proteinGroup.IntensitiesByFile.Add(spectraFile, 0);
}

// get modification stoichiometry using FlashLFQ spectraFile-specific intensities
var pgQuantifiedPeptides = flashLfqResults.PeptideModifiedSequences.Where(x => proteinGroup.AllPeptides.Select(x => x.FullSequence).Contains(x.Key)).ToList();

if (pgQuantifiedPeptides.IsNotNullOrEmpty())
{
var peptides = pgQuantifiedPeptides.Where(pep => pep.Value.GetIntensity(spectraFile) > 0)
.Select(pep => (pep.Value.Sequence,
new List<string> { proteinGroup.ProteinGroupName },
pep.Value.GetIntensity(spectraFile))).ToList();
if (!peptides.IsNotNullOrEmpty())
{
proteinGroup.ModsInfo.Add(spectraFile, new QuantifiedProteinGroup(proteinGroup.ProteinGroupName));
continue;
}

PositionFrequencyAnalysis pfa = new PositionFrequencyAnalysis();
var proteins = proteinGroup.Proteins.Select(p => new KeyValuePair<string, string>(p.Accession, p.BaseSequence)).ToDictionary();
pfa.SetUpQuantificationObjectsFromFullSequences(peptides, proteins); // uses zero-based indexes for the mods.

proteinGroup.ModsInfo.Add(spectraFile, pfa.ProteinGroups.First().Value); // Getting stoich one protein group at a time, so only getting First() is ok here.
}
}
}
}
Expand Down
55 changes: 40 additions & 15 deletions MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
using TaskLayer.MbrAnalysis;
using Chemistry;
using MzLibUtil;
using MzLibUtil.PositionFrequencyAnalysis;
using Omics.Digestion;
using Omics.BioPolymer;
using Omics.Modifications;
Expand All @@ -35,7 +36,7 @@ public class PostSearchAnalysisTask : MetaMorpheusTask
/// <summary>
/// Used for storage of results for writing to Results.tsv. It is explained in the method ConstructResultsDictionary()
/// </summary>
private Dictionary<(string,string),string> ResultsDictionary { get; set; }
private Dictionary<(string, string), string> ResultsDictionary { get; set; }
/// <summary>
/// Used for storage of results for writing digestion product counts to a .tsv.
/// </summary>
Expand Down Expand Up @@ -108,14 +109,14 @@ public MyTaskResults Run()
if (Parameters.SearchParameters.DoLabelFreeQuantification && Parameters.FlashLfqResults != null)
{
SpectralRecoveryResults = SpectralRecoveryRunner.RunSpectralRecoveryAlgorithm(Parameters, CommonParameters, FileSpecificParameters);
}
}
}

if(Parameters.SearchParameters.UpdateSpectralLibrary)
if (Parameters.SearchParameters.UpdateSpectralLibrary)
{
UpdateSpectralLibrary();
}

if (Parameters.SearchParameters.WriteDigestionProductCountFile)
{
WriteDigestionCountByProtein();
Expand Down Expand Up @@ -552,13 +553,15 @@ private void QuantificationAnalysis()
Parameters.FlashLfqResults = flashLfqEngine.Run();
}

// get protein intensity back from FlashLFQ
// get protein intensity and mod stoichiometry back from FlashLFQ
if (ProteinGroups != null && Parameters.FlashLfqResults != null)
{
// get protein intensity back from FlashLFQ
foreach (var proteinGroup in ProteinGroups)
{
proteinGroup.FilesForQuantification = spectraFileInfo;
proteinGroup.IntensitiesByFile = new Dictionary<SpectraFileInfo, double>();
proteinGroup.ModsInfo = new Dictionary<SpectraFileInfo, QuantifiedProteinGroup>();

foreach (var spectraFile in proteinGroup.FilesForQuantification)
{
Expand All @@ -570,6 +573,28 @@ private void QuantificationAnalysis()
{
proteinGroup.IntensitiesByFile.Add(spectraFile, 0);
}

// get modification stoichiometry using FlashLFQ spectraFile-specific intensities
var pgQuantifiedPeptides = Parameters.FlashLfqResults.PeptideModifiedSequences.Where(x => proteinGroup.AllPeptides.Select(x=>x.FullSequence).Contains(x.Key)).ToList();

if (pgQuantifiedPeptides.IsNotNullOrEmpty())
{
var peptides = pgQuantifiedPeptides.Where(pep => pep.Value.GetIntensity(spectraFile) > 0)
.Select(pep => (pep.Value.Sequence,
new List<string> { proteinGroup.ProteinGroupName },
pep.Value.GetIntensity(spectraFile))).ToList();
if (peptides.IsNullOrEmpty())
{
proteinGroup.ModsInfo.Add(spectraFile, new QuantifiedProteinGroup(proteinGroup.ProteinGroupName));
continue;
}

PositionFrequencyAnalysis pfa = new PositionFrequencyAnalysis();
var proteins = proteinGroup.Proteins.Select(p => new KeyValuePair<string, string>(p.Accession, p.BaseSequence)).ToDictionary();
pfa.SetUpQuantificationObjectsFromFullSequences(peptides, proteins); // uses zero-based indexes for the mods.

proteinGroup.ModsInfo.Add(spectraFile, pfa.ProteinGroups.First().Value); // Getting stoich one protein group at a time, so only getting First() is ok here.
}
}
}
}
Expand Down Expand Up @@ -650,7 +675,7 @@ private void WritePsmResults()
// write summary text
if (psmsForPsmResults.FilteringNotPerformed)
{

Parameters.SearchTaskResults.AddPsmPeptideProteinSummaryText(
$"PEP could not be calculated due to an insufficient number of {GlobalVariables.AnalyteType.GetSpectralMatchLabel()}s. Results were filtered by q-value." +
Environment.NewLine);
Expand Down Expand Up @@ -701,9 +726,9 @@ private void WriteIndividualPsmResults()
// generated by analyzing one file by itself. Therefore, the FDR info should change between AllResults and FileSpecific
string strippedFileName = Path.GetFileNameWithoutExtension(psmFileGroup.Key);
var psmsForThisFile = psmFileGroup.ToList();
CalculatePsmAndPeptideFdr(psmsForThisFile,"PSM", false);
CalculatePsmAndPeptideFdr(psmsForThisFile, "PSM", false);
var psmsToWrite = FilteredPsms.Filter(psmsForThisFile,
CommonParameters,
CommonParameters,
includeDecoys: Parameters.SearchParameters.WriteDecoys,
includeContaminants: Parameters.SearchParameters.WriteContaminants,
includeAmbiguous: true,
Expand Down Expand Up @@ -785,7 +810,7 @@ private void UpdateSpectralLibrary()
// Key is a (FullSequence, Charge) tuple
keySelector: g => g.Key,
// Value is the highest scoring psm in the group
elementSelector: g => g.MaxBy(p => p.Score));
elementSelector: g => g.MaxBy(p => p.Score));

//load the original library
var originalLibrarySpectra = Parameters.SpectralLibrary.GetAllLibrarySpectra();
Expand Down Expand Up @@ -864,7 +889,7 @@ private void SpectralLibraryGeneration()
bestPsm.MatchedFragmentIons,
bestPsm.ScanRetentionTime));
}

WriteSpectrumLibrary(spectraLibrary, Parameters.OutputFolder);
}

Expand All @@ -879,7 +904,7 @@ private void WriteProteinResults()
string proteinResultsText = $"All target {GlobalVariables.AnalyteType.GetBioPolymerLabel().ToLower()} groups with q-value <= 0.01 (1% FDR): " + ProteinGroups.Count(b => b.QValue <= 0.01 && !b.IsDecoy);
ResultsDictionary[("All", $"{GlobalVariables.AnalyteType.GetBioPolymerLabel()}s")] = proteinResultsText;
}

string fileName = $"All{GlobalVariables.AnalyteType.GetBioPolymerLabel()}Groups.tsv";
if (Parameters.SearchParameters.DoLabelFreeQuantification)
{
Expand Down Expand Up @@ -1127,7 +1152,7 @@ public static double[] GetMultiplexIonIntensities(SpectralMatch psm, double[] th
.Where(ion => ion.NeutralTheoreticalProduct.ProductType == Omics.Fragmentation.ProductType.D)
.OrderBy(ion => ion.Mz)
.ToArray();
double[] expIonMzs = diagnosticIons.Select(ion => ion.Mz).ToArray();
double[] expIonMzs = diagnosticIons.Select(ion => ion.Mz).ToArray();
double[] ionIntensities = new double[theoreticalIonMzs.Length];

int expIonIndex = 0;
Expand Down Expand Up @@ -1290,7 +1315,7 @@ private void WriteVariantResults()

foreach (var variant in variants)
{
if (variantPWSM.IntersectsAndIdentifiesVariation(variant).identifies == true)
if (variantPWSM.IntersectsAndIdentifiesVariation(variant).identifies == true && variant.Description.Description.IsNotNullOrEmpty())
{
if (culture.CompareInfo.IndexOf(variant.Description.Description, "missense_variant", CompareOptions.IgnoreCase) >= 0)
{
Expand Down Expand Up @@ -1574,7 +1599,7 @@ private void ConstructResultsDictionary()

if (Parameters.SearchParameters.DoParsimony)
{
ResultsDictionary.Add(("All", $"{GlobalVariables.AnalyteType.GetBioPolymerLabel()}s"), "");
ResultsDictionary.Add(("All", $"{GlobalVariables.AnalyteType.GetBioPolymerLabel()}s"), "");
if (Parameters.CurrentRawFileList.Count > 1 && Parameters.SearchParameters.WriteIndividualFiles)
{
foreach (var rawFile in Parameters.CurrentRawFileList)
Expand All @@ -1597,7 +1622,7 @@ private string AllResultsTotals()
}
}

var keys = ResultsDictionary.Keys.Where(k => k.Item1 != "All").OrderBy(k=>k.Item1).ToList();
var keys = ResultsDictionary.Keys.Where(k=>k.Item1 != "All").OrderBy(k=>k.Item1).ToList();
if (keys.Any())
{
sb.AppendLine();
Expand Down
7 changes: 6 additions & 1 deletion MetaMorpheus/Test/ProteinGroupTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ public static void ProteinGroupToStringTest()

//string exectedProteinGroupToString = proteinGroup1.ToString();
string exectedProteinGroupToString = "prot1|prot2\t|\t\t\t779.30073507823|778.3167194953201\t2\t\t\t2\t2\t\t\t\t\t\t0\tT\t0\t0\t0\t0\t0";
var out1 = proteinGroup1.ToString().Split("\t");
var out1h = proteinGroup1.GetTabSeparatedHeader().Split("\t");
var out1zipped = out1h.Zip(out1, (a, b) => (a, b)).ToDictionary();
Assert.That(proteinGroup1.ToString(), Is.EqualTo(exectedProteinGroupToString));


Expand All @@ -92,6 +95,7 @@ public static void ProteinGroupToStringTest()
ProteinGroup proteinGroup3 = new ProteinGroup(new HashSet<IBioPolymer>(proteinList3),
new HashSet<IBioPolymerWithSetMods>(), new HashSet<IBioPolymerWithSetMods>());
string exectedProteinGroupWithDecoyToString = "prot1|prot2\t|\t\t\t779.30073507823|778.3167194953201\t2\t\t\t2\t2\t\t\t\t\t\t0\tT\t0\t0\t0\t0\t0";
var out2 = proteinGroup1.ToString();
Assert.That(proteinGroup1.ToString(), Is.EqualTo(exectedProteinGroupWithDecoyToString));
}

Expand Down Expand Up @@ -245,7 +249,8 @@ public static void TestModificationInfoListInProteinGroupsOutput()
List<string> proteinGroupsOutput = File.ReadAllLines(Path.Combine(outputFolder, "task2", "AllQuantifiedProteinGroups.tsv")).ToList();
string firstDataLine = proteinGroupsOutput[2];
string modInfoListProteinTwo = firstDataLine.Split('\t')[14];
Assert.That(modInfoListProteinTwo, Is.EqualTo("#aa71[Oxidation on S,info:occupancy=0.50(1/2)]"));
Assert.That(modInfoListProteinTwo, Is.EqualTo("P10591:{M#65[Common Variable:Oxidation on M, info: occupancy=1.0000(654315.977066199)]S#71[Less Common:Oxidation on S, info: occupancy=0.1957(654315.977066199)]}" +
"P10592:{M#65[Common Variable:Oxidation on M, info: occupancy=1.0000(654315.977066199)]S#71[Less Common:Oxidation on S, info: occupancy=0.1957(654315.977066199)]}"));

Directory.Delete(outputFolder, true);
}
Expand Down
2 changes: 1 addition & 1 deletion MetaMorpheus/Test/QuantificationTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ public static void TestProteinQuantFileHeaders(bool hasDefinedExperimentalDesign
string condition = hasDefinedExperimentalDesign ? "TestCondition" : "";

// create the protein database
Protein prot = new(peptide, @"");
Protein prot = new(peptide, @"test"); // necessary to pass name to protein. otherwise dbloader will do crazy things
string dbName = Path.Combine(unitTestFolder, "testDB.fasta");
UsefulProteomicsDatabases.ProteinDbWriter.WriteFastaDatabase(new List<Protein> { prot }, dbName, ">");

Expand Down
10 changes: 9 additions & 1 deletion MetaMorpheus/Test/RobTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
using Omics.Modifications;
using Omics;
using Transcriptomics;
using EngineLayer.ModernSearch;
using EngineLayer.Indexing;
using System.IO;
using TaskLayer;
using UsefulProteomicsDatabases;

namespace Test
{
Expand Down Expand Up @@ -254,7 +259,10 @@ public static void TestPTMOutput()
ProteinScoringAndFdrEngine f = new ProteinScoringAndFdrEngine(proteinGroups, psms, false, false, true, new CommonParameters(), null, new List<string>());
f.Run();

Assert.That(proteinGroups.First().ModsInfo[0], Is.EqualTo("#aa5[resMod on S,info:occupancy=0.67(2/3)];#aa10[iModOne on I,info:occupancy=0.33(2/6)];#aa10[iModTwo on I,info:occupancy=0.33(2/6)]"));
// inclined to delete this test
// Otherwise, readapt to run flashlfq and then correctly check modinfo...
Assert.That(proteinGroups.First().ModsInfo, Is.Null);
Assert.That(proteinGroups.First().ModInfoStringFromGroupedFiles(proteinGroups.First().FilesForQuantification), Is.EqualTo(""));
}

[Test]
Expand Down
7 changes: 0 additions & 7 deletions MetaMorpheus/Test/SeqCoverageTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,6 @@ public static void TryFailSequenceCoverage()
var firstSequenceCoverageDisplayListWithMods = fjkd.ProteinGroups.First().SequenceCoverageDisplayListWithMods.First();
Assert.That(firstSequenceCoverageDisplayListWithMods, Is.EqualTo("[mod1 on M]-MM[mod3 on M]KM[mod3 on M]MK-[mod5 on K]"));

var firstModInfo = fjkd.ProteinGroups.First().ModsInfo.First();
Assert.That(firstModInfo.Contains(@"#aa1[mod1 on M,info:occupancy=1.00(2/2)]"));
Assert.That(firstModInfo.Contains(@"#aa2[mod3 on M,info:occupancy=0.50(1/2)]"));
Assert.That(!(firstModInfo.Contains(@"#aa3")));
Assert.That(firstModInfo.Contains(@"#aa4[mod3 on M,info:occupancy=0.50(1/2)]"));
Assert.That(!(firstModInfo.Contains(@"#aa5")));
Assert.That(firstModInfo.Contains(@"#aa6[mod5 on K,info:occupancy=1.00(2/2)]"));
Console.WriteLine("Test output: " + firstSequenceCoverageDisplayList);
}

Expand Down
Loading
Loading