Skip to content

Commit 1075ee2

Browse files
committed
Initial implementation of occupancy by intensity. Requires ptm_stoich branch of mzlib. Code is untestested. Have seen some discrepancies with the quantity of quantified mods and their indexing.
1 parent 9766001 commit 1075ee2

File tree

2 files changed

+111
-40
lines changed

2 files changed

+111
-40
lines changed

MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ public ProteinGroup(HashSet<Protein> proteins, HashSet<PeptideWithSetModificatio
8787

8888
public bool DisplayModsOnPeptides { get; set; }
8989

90-
public List<string> ModsInfo { get; private set; }
90+
public List<string> ModsInfo { get; set; }
9191

9292
public Dictionary<SpectraFileInfo, double> IntensitiesByFile { get; set; }
9393

@@ -608,22 +608,28 @@ public void CalculateSequenceCoverage()
608608
}
609609
}
610610

611-
var modStrings = new List<(int aaNum, string part)>();
612-
for (int i = 0; i < pepModTotals.Count; i++)
611+
// modInfo will be updated by the PostSearchAnalysisTask. However, leaving this code
612+
// here for now in case we want to use it in the future.
613+
bool quantifyModsByPSM = false;
614+
if (quantifyModsByPSM)
613615
{
614-
string aa = modIndex[i].index.ToString();
615-
string modName = modIndex[i].modName.ToString();
616-
string occupancy = ((double)pepModTotals[i] / (double)pepTotals[i]).ToString("F2");
617-
string fractOccupancy = $"{pepModTotals[i].ToString()}/{pepTotals[i].ToString()}";
618-
string tempString = ($"#aa{aa}[{modName},info:occupancy={occupancy}({fractOccupancy})]");
619-
modStrings.Add((modIndex[i].index, tempString));
620-
}
616+
var modStrings = new List<(int aaNum, string part)>();
617+
for (int i = 0; i < pepModTotals.Count; i++)
618+
{
619+
string aa = modIndex[i].index.ToString();
620+
string modName = modIndex[i].modName.ToString();
621+
string occupancy = ((double)pepModTotals[i] / (double)pepTotals[i]).ToString("F2");
622+
string fractOccupancy = $"{pepModTotals[i].ToString()}/{pepTotals[i].ToString()}";
623+
string tempString = ($"#aa{aa}[{modName},info:occupancy={occupancy}({fractOccupancy})]");
624+
modStrings.Add((modIndex[i].index, tempString));
625+
}
621626

622-
var modInfoString = string.Join(";", modStrings.OrderBy(x => x.aaNum).Select(x => x.part));
627+
var modInfoString = string.Join(";", modStrings.OrderBy(x => x.aaNum).Select(x => x.part));
623628

624-
if (!string.IsNullOrEmpty(modInfoString))
625-
{
626-
ModsInfo.Add(modInfoString);
629+
if (!string.IsNullOrEmpty(modInfoString))
630+
{
631+
ModsInfo.Add(modInfoString);
632+
}
627633
}
628634
}
629635
}

MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs

Lines changed: 91 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using Easy.Common.Extensions;
1+
using Chemistry;
2+
using Easy.Common.Extensions;
23
using EngineLayer;
34
using EngineLayer.FdrAnalysis;
45
using EngineLayer.HistogramAnalysis;
@@ -7,6 +8,9 @@
78
using FlashLFQ;
89
using MassSpectrometry;
910
using MathNet.Numerics.Distributions;
11+
using MzLibUtil;
12+
using Omics.Modifications;
13+
using Omics.SpectrumMatch;
1014
using Proteomics;
1115
using Proteomics.ProteolyticDigestion;
1216
using System;
@@ -16,12 +20,9 @@
1620
using System.IO.Compression;
1721
using System.Linq;
1822
using System.Text;
19-
using UsefulProteomicsDatabases;
2023
using TaskLayer.MbrAnalysis;
21-
using Chemistry;
22-
using MzLibUtil;
23-
using Omics.Modifications;
24-
using Omics.SpectrumMatch;
24+
using TopDownProteomics;
25+
using UsefulProteomicsDatabases;
2526

2627
namespace TaskLayer
2728
{
@@ -34,7 +35,7 @@ public class PostSearchAnalysisTask : MetaMorpheusTask
3435
/// <summary>
3536
/// Used for storage of results for writing to Results.tsv. It is explained in the method ConstructResultsDictionary()
3637
/// </summary>
37-
private Dictionary<(string,string),string> ResultsDictionary { get; set; }
38+
private Dictionary<(string, string), string> ResultsDictionary { get; set; }
3839
/// <summary>
3940
/// Used for storage of results for writing digestion product counts to a .tsv.
4041
/// </summary>
@@ -105,14 +106,14 @@ public MyTaskResults Run()
105106
if (Parameters.SearchParameters.DoLabelFreeQuantification && Parameters.FlashLfqResults != null)
106107
{
107108
SpectralRecoveryResults = SpectralRecoveryRunner.RunSpectralRecoveryAlgorithm(Parameters, CommonParameters, FileSpecificParameters);
108-
}
109+
}
109110
}
110111

111-
if(Parameters.SearchParameters.UpdateSpectralLibrary)
112+
if (Parameters.SearchParameters.UpdateSpectralLibrary)
112113
{
113114
UpdateSpectralLibrary();
114115
}
115-
116+
116117
if (Parameters.SearchParameters.WriteDigestionProductCountFile)
117118
{
118119
WriteDigestionCountByProtein();
@@ -526,9 +527,73 @@ private void QuantificationAnalysis()
526527
Parameters.FlashLfqResults = flashLfqEngine.Run();
527528
}
528529

529-
// get protein intensity back from FlashLFQ
530+
530531
if (ProteinGroups != null && Parameters.FlashLfqResults != null)
531532
{
533+
// get modification stoichiometry using FlashLFQ intensities
534+
var peptides = flashLfqEngine.PeptideModifiedSequencesToQuantify
535+
.Where(pep => Parameters.FlashLfqResults.PeptideModifiedSequences.ContainsKey(pep))
536+
.Select(pep => (Parameters.FlashLfqResults.PeptideModifiedSequences[pep].Sequence,
537+
Parameters.FlashLfqResults.PeptideModifiedSequences[pep].BaseSequence,
538+
Parameters.FlashLfqResults.PeptideModifiedSequences[pep].ProteinGroups.Select(pg => pg.ProteinGroupName).ToList(),
539+
Parameters.FlashLfqResults.PeptideModifiedSequences[pep].GetTotalIntensity())).ToList();
540+
541+
PositionFrequencyAnalysis pfa = new PositionFrequencyAnalysis();
542+
pfa.ProteinGroupsOccupancyByPeptide(peptides, true, true, true); // one-based indexes, ignores terminal mods on all peptides.
543+
544+
var proteinGroupsOccupancyByProteins = pfa.Occupancy;
545+
var quantifiedProteinGroups = ProteinGroups.Where(pg => Parameters.FlashLfqResults.ProteinGroups.ContainsKey(pg.ProteinGroupName));
546+
547+
foreach (var proteinGroup in quantifiedProteinGroups)
548+
{
549+
var modInfoString = new StringBuilder();
550+
551+
foreach (var protein in proteinGroup.Proteins)
552+
{
553+
List<string> peptideBaseSequencesSeen = new List<string>();
554+
foreach (var peptide in proteinGroup.AllPeptides)
555+
{
556+
if (proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName].Proteins[protein.Accession].Peptides.ContainsKey(peptide.BaseSequence)
557+
&& !peptideBaseSequencesSeen.Contains(peptide.BaseSequence))
558+
{
559+
proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName]
560+
.Proteins[protein.Accession].Peptides[peptide.BaseSequence]
561+
.PeptideToProteinPositions(peptide.OneBasedStartResidueInProtein);
562+
563+
peptideBaseSequencesSeen.Add(peptide.BaseSequence);
564+
}
565+
}
566+
567+
proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName]
568+
.Proteins[protein.Accession]
569+
.SetProteinModsFromPeptides();
570+
571+
// build modInfoString for this protein
572+
var occupancyPGProtein = proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName].Proteins[protein.Accession];
573+
modInfoString.Append($"");
574+
var aaModStrings = new List<string>();
575+
576+
foreach (var modpos in occupancyPGProtein.ModifiedAminoAcidPositionsInProtein.OrderBy(x => x.Key))
577+
{
578+
var aaModString = new StringBuilder();
579+
aaModString.Append($"aa#{modpos.Key.ToString()}");
580+
581+
foreach (var mod in occupancyPGProtein.ModifiedAminoAcidPositionsInProtein[modpos.Key])
582+
{
583+
aaModString.Append($"[{mod.Key}, info:occupancy={mod.Value.Intensity.ToString()}]");
584+
}
585+
586+
aaModStrings.Add(aaModString.ToString());
587+
}
588+
if (aaModStrings.IsNotNullOrEmpty())
589+
{
590+
modInfoString.Append($"protein:{protein.Accession}{{{string.Join(";", aaModStrings)}}}");
591+
}
592+
}
593+
proteinGroup.ModsInfo.Add(modInfoString.ToString());
594+
}
595+
596+
// get protein intensity back from FlashLFQ
532597
foreach (var proteinGroup in ProteinGroups)
533598
{
534599
proteinGroup.FilesForQuantification = spectraFileInfo;
@@ -548,7 +613,7 @@ private void QuantificationAnalysis()
548613
}
549614
}
550615

551-
//Silac stuff for post-quantification
616+
//Silac stuff for post-quantification
552617
if (Parameters.SearchParameters.SilacLabels != null && Parameters.AllPsms.First() is PeptideSpectralMatch) //if we're doing silac
553618
{
554619
SilacConversions.SilacConversionsPostQuantification(allSilacLabels, startLabel, endLabel, spectraFileInfo, ProteinGroups, Parameters.ListOfDigestionParams,
@@ -612,19 +677,19 @@ private void WritePsmResults()
612677

613678
// write PSMs
614679
string writtenFile = Path.Combine(Parameters.OutputFolder, $"All{GlobalVariables.AnalyteType.GetSpectralMatchLabel()}s.{GlobalVariables.AnalyteType.GetSpectralMatchExtension()}");
615-
WritePsmsToTsv(psmsForPsmResults.OrderByDescending(p=>p).ToList(), writtenFile, writePeptideLevelResults: false);
680+
WritePsmsToTsv(psmsForPsmResults.OrderByDescending(p => p).ToList(), writtenFile, writePeptideLevelResults: false);
616681
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId });
617682

618683
// write PSMs for percolator
619684
// percolator native read format is .tab
620685
writtenFile = Path.Combine(Parameters.OutputFolder, $"All{GlobalVariables.AnalyteType.GetSpectralMatchLabel()}s_FormattedForPercolator.tab");
621-
WritePsmsForPercolator(psmsForPsmResults.OrderByDescending(p=>p).ToList(), writtenFile);
686+
WritePsmsForPercolator(psmsForPsmResults.OrderByDescending(p => p).ToList(), writtenFile);
622687
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId });
623688

624689
// write summary text
625690
if (psmsForPsmResults.FilteringNotPerformed)
626691
{
627-
692+
628693
Parameters.SearchTaskResults.AddPsmPeptideProteinSummaryText(
629694
$"PEP could not be calculated due to an insufficient number of {GlobalVariables.AnalyteType.GetSpectralMatchLabel()}s. Results were filtered by q-value." +
630695
Environment.NewLine);
@@ -672,9 +737,9 @@ private void WriteIndividualPsmResults()
672737
// generated by analyzing one file by itself. Therefore, the FDR info should change between AllResults and FileSpecific
673738
string strippedFileName = Path.GetFileNameWithoutExtension(psmFileGroup.Key);
674739
var psmsForThisFile = psmFileGroup.ToList();
675-
CalculatePsmAndPeptideFdr(psmsForThisFile,"PSM", false);
740+
CalculatePsmAndPeptideFdr(psmsForThisFile, "PSM", false);
676741
var psmsToWrite = FilteredPsms.Filter(psmsForThisFile,
677-
CommonParameters,
742+
CommonParameters,
678743
includeDecoys: Parameters.SearchParameters.WriteDecoys,
679744
includeContaminants: Parameters.SearchParameters.WriteContaminants,
680745
includeAmbiguous: true,
@@ -753,7 +818,7 @@ private void UpdateSpectralLibrary()
753818
// Key is a (FullSequence, Charge) tuple
754819
keySelector: g => g.Key,
755820
// Value is the highest scoring psm in the group
756-
elementSelector: g => g.MaxBy(p => p.Score));
821+
elementSelector: g => g.MaxBy(p => p.Score));
757822

758823
//load the original library
759824
var originalLibrarySpectra = Parameters.SpectralLibrary.GetAllLibrarySpectra();
@@ -832,7 +897,7 @@ private void SpectralLibraryGeneration()
832897
bestPsm.MatchedFragmentIons,
833898
bestPsm.ScanRetentionTime));
834899
}
835-
900+
836901
WriteSpectrumLibrary(spectraLibrary, Parameters.OutputFolder);
837902
}
838903

@@ -847,7 +912,7 @@ private void WriteProteinResults()
847912
string proteinResultsText = $"All target {GlobalVariables.AnalyteType.GetBioPolymerLabel().ToLower()} groups with q-value <= 0.01 (1% FDR): " + ProteinGroups.Count(b => b.QValue <= 0.01 && !b.IsDecoy);
848913
ResultsDictionary[("All", $"{GlobalVariables.AnalyteType.GetBioPolymerLabel()}s")] = proteinResultsText;
849914
}
850-
915+
851916
string fileName = $"All{GlobalVariables.AnalyteType.GetBioPolymerLabel()}Groups.tsv";
852917
if (Parameters.SearchParameters.DoLabelFreeQuantification)
853918
{
@@ -1380,10 +1445,10 @@ public static double[] GetMultiplexIonIntensities(MzSpectrum scan, double[] theo
13801445
int peakIndex = scan.GetClosestPeakIndex(theoreticalIonMzs[0]);
13811446
int lastPeakIndex = Math.Min(scan.GetClosestPeakIndex(theoreticalIonMzs.Last()) + 1, scan.XArray.Length - 1);
13821447
double[] ionIntensities = new double[theoreticalIonMzs.Length];
1383-
1448+
13841449
for (int ionIndex = 0; ionIndex < ionIntensities.Length; ionIndex++)
13851450
{
1386-
while (peakIndex <= lastPeakIndex &&
1451+
while (peakIndex <= lastPeakIndex &&
13871452
scan.XArray[peakIndex] < tolerance.GetMinimumValue(theoreticalIonMzs[ionIndex]))
13881453
{
13891454
peakIndex++;
@@ -1398,7 +1463,7 @@ public static double[] GetMultiplexIonIntensities(MzSpectrum scan, double[] theo
13981463
peakIndex++;
13991464
}
14001465
}
1401-
1466+
14021467
return ionIntensities;
14031468
}
14041469

@@ -1885,7 +1950,7 @@ private void ConstructResultsDictionary()
18851950

18861951
if (Parameters.SearchParameters.DoParsimony)
18871952
{
1888-
ResultsDictionary.Add(("All", $"{GlobalVariables.AnalyteType.GetBioPolymerLabel()}s"), "");
1953+
ResultsDictionary.Add(("All", $"{GlobalVariables.AnalyteType.GetBioPolymerLabel()}s"), "");
18891954
if (Parameters.CurrentRawFileList.Count > 1 && Parameters.SearchParameters.WriteIndividualFiles)
18901955
{
18911956
foreach (var rawFile in Parameters.CurrentRawFileList)
@@ -1907,8 +1972,8 @@ private string AllResultsTotals()
19071972
sb.AppendLine(ResultsDictionary[key]);
19081973
}
19091974
}
1910-
1911-
var keys = ResultsDictionary.Keys.Where(k=>k.Item1 != "All").OrderBy(k=>k.Item1).ToList();
1975+
1976+
var keys = ResultsDictionary.Keys.Where(k => k.Item1 != "All").OrderBy(k => k.Item1).ToList();
19121977
if (keys.Any())
19131978
{
19141979
sb.AppendLine();

0 commit comments

Comments
 (0)