Skip to content

Commit 34198e6

Browse files
committed
Cleaned up code and made all tests pass
1 parent 10fc9bb commit 34198e6

File tree

2 files changed

+33
-38
lines changed

2 files changed

+33
-38
lines changed

MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,9 +1026,10 @@ private void WritePrunedDatabase()
10261026
// find all biopolymers that have at least one confident PSM and their confident localized modifications -> Used for determining which mods to retain
10271027
var proteinToConfidentModifiedSequences = GetProteinToConfidentModifiedSequences(Parameters.AllPsms, Parameters.SearchParameters.EvidenceRequiredToWriteLocalizedMod, Parameters.SearchParameters.IncludeProteinAmbiguous);
10281028

1029+
if (proteinToConfidentBaseSequences.Count == 0) return;
1030+
10291031
// populate the protein object with the desired modifications with a modify in place operation, original modifications are stored for later restoration
10301032
UpdateProteinModifications(proteinToConfidentModifiedSequences, out var proteinsOriginalModifications, out var originalSequenceVariantModifications);
1031-
10321033
WriteDatabases(proteinToConfidentBaseSequences);
10331034

10341035
// Restore Original Modifications with a modify in place operation
@@ -1142,15 +1143,15 @@ public Dictionary<IBioPolymer, List<IBioPolymerWithSetMods>> GetProteinToConfide
11421143
if (modAndLocationGrouped.Count() <= 1)
11431144
continue;
11441145

1145-
var dissociationTypeCount = modAndLocationGrouped.GroupBy(p => p.dissociationType).Count();
1146-
var digestionAgentCount = modAndLocationGrouped.GroupBy(p => p.digestionAgent).Count();
1146+
var dissociationAndDigestionCount = modAndLocationGrouped.GroupBy(p => (p.dissociationType, p.digestionAgent)).Count();
11471147

11481148
// TODO: Ask Claire about truncy bois. Right now they dont count for anything as they are not true missed cleavages.
1149-
var missedCleavageCount = modAndLocationGrouped.GroupBy(p => p.dissociationType)
1150-
.Sum(dissGroup => dissGroup.GroupBy(p => p.missedCleavages).Count() - 1);
1151-
1152-
var conditionCount = dissociationTypeCount + digestionAgentCount + missedCleavageCount - 2;
1149+
// missed cleavages should only count if they occur with the same digestion agent
1150+
int missedCleavageCount = modAndLocationGrouped.GroupBy(p => p.digestionAgent)
1151+
.Sum(dissGroup =>
1152+
dissGroup.Select(p => p.missedCleavages).Distinct().Count() - 1);
11531153

1154+
var conditionCount = dissociationAndDigestionCount + missedCleavageCount;
11541155
if (conditionCount >= evidenceRequired)
11551156
modificationsToRetain.Add(modAndLocationGrouped.Key);
11561157
}
@@ -1182,17 +1183,19 @@ public Dictionary<IBioPolymer, List<IBioPolymerWithSetMods>> GetProteinToConfide
11821183
.Count(mod => !modificationsToRetain.Contains((mod.Key - covGroup.BioPolymerWithSetMods.OneBasedStartResidue + 1, mod.Value))))
11831184
.ToList();
11841185

1185-
1186+
11861187
// iterate through the sorted list until we cover all modifications or use all biopolymers (we should never hit the second case, but stops and infinite loop just in case)
11871188
while (modificationsToRetain.Count > 0 && sortedBioPolymers.Count > 0)
11881189
{
1190+
// Select the biopolymer that covers the most uncovered modifications
11891191
var bestBioPolymer = sortedBioPolymers.First();
11901192

11911193
minimumSet.Add(bestBioPolymer.BioPolymerWithSetMods);
11921194
foreach (var mod in bestBioPolymer.CoveredMods)
11931195
modificationsToRetain.Remove(mod);
1194-
sortedBioPolymers.RemoveAt(0);
11951196

1197+
// Remove the selected biopolymer from the list
1198+
sortedBioPolymers.Remove(bestBioPolymer);
11961199

11971200
sortedBioPolymers = sortedBioPolymers
11981201
.Where(covGroup => covGroup.CoveredMods.Overlaps(modificationsToRetain)) // retain only those with mods that are not yet covered

MetaMorpheus/Test/gptmdPrunedDbTests.cs

Lines changed: 21 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -642,18 +642,21 @@ private class TestSpectralMatch : SpectralMatch
642642
{
643643
// Construct the bare minimum of our objects needed to run this section of PostSearchAnalysisTask
644644
public TestSpectralMatch(string fullSequence, int startResidue = 0, int endResidue = 13,
645-
string filePath = "default", string protease = "trypsin")
645+
string filePath = "default", string protease = "trypsin", int missedCleavages = 0)
646646
: base(
647647
new PeptideWithSetModifications(fullSequence, GlobalVariables.AllModsKnownDictionary,
648-
p: _testProtein, oneBasedStartResidueInProtein: startResidue,
648+
p: _testProtein, oneBasedStartResidueInProtein: startResidue, missedCleavages: missedCleavages,
649649
oneBasedEndResidueInProtein: endResidue, digestionParams: new DigestionParams(protease)),
650650
0, 10, 0,
651651
new Ms2ScanWithSpecificMass(
652652
new MsDataScan(
653-
new MzSpectrum([], [], false), 0, 0,
654-
true, Polarity.Positive, 0, default, "", 0,
653+
new MzSpectrum([], [], false),
655654
0, 0,
656-
new double[0, 0], ""), 0, 0, filePath, new CommonParameters(), []),
655+
true, Polarity.Positive, 0,
656+
default, "", 0,
657+
0, 0,
658+
new double[0, 0], ""),
659+
0, 0, filePath, new CommonParameters(), []),
657660
new CommonParameters(), [])
658661
{
659662
FdrInfo = new EngineLayer.FdrAnalysis.FdrInfo
@@ -664,17 +667,6 @@ public TestSpectralMatch(string fullSequence, int startResidue = 0, int endResid
664667
QValueNotch = 0
665668
};
666669
ResolveAllAmbiguities();
667-
668-
// adjust modification index
669-
foreach (var bioPolymer in BestMatchingBioPolymersWithSetMods)
670-
{
671-
var allMods = bioPolymer.Peptide.AllModsOneIsNterminus.ToArray();
672-
bioPolymer.Peptide.AllModsOneIsNterminus.Clear();
673-
foreach (var modification in allMods)
674-
{
675-
bioPolymer.Peptide.AllModsOneIsNterminus.Add(modification.Key + startResidue-1, modification.Value);
676-
}
677-
}
678670
}
679671
};
680672

@@ -759,8 +751,8 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Misse
759751
{
760752
new TestSpectralMatch("PEPTIDE[Common Biological:Carboxylation on E]K", 0 , 8),
761753
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8),
762-
new TestSpectralMatch("PEPTIDEKPE[Common Biological:Carboxylation on E]PTK", 0 , 13),
763-
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPEPTK", 0 , 13)
754+
new TestSpectralMatch("PEPTIDEKPE[Common Biological:Carboxylation on E]PTK", 0 , 13, missedCleavages: 1),
755+
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPEPTK", 0 , 13, missedCleavages: 1)
764756
};
765757
var postSearchAnalysisTask = new PostSearchAnalysisTask()
766758
{
@@ -780,19 +772,19 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Misse
780772
Assert.That(fullSequences, Does.Contain("PE[Common Biological:Carboxylation on E]PTIDEK"));
781773
}
782774

783-
[Test]
775+
[Test] // TODO: UGH
784776
public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_MissedCleavage_TwoSharedMod_AlternatingTerm()
785777
{
786778
// Arrange
787779
var psms = new List<SpectralMatch>
788780
{
789781
new TestSpectralMatch("PEPT[Common Biological:Phosphorylation on T]K", 8, 13),
790782
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8),
791-
new TestSpectralMatch("PEPTIDEKPE[Common Biological:Carboxylation on E]PTK", 0 , 13),
783+
new TestSpectralMatch("PEPTIDEKPE[Common Biological:Carboxylation on E]PTK", 0 , 13, missedCleavages: 1),
792784

793-
// both of the below satisfy the criteria of covering all modifications, but only one should be selected
794-
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPEPT[Common Biological:Phosphorylation on T]K", 0 , 13),
795-
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]K", 0 , 13)
785+
// both of the below satisfy the criteria of covering all modifications, but only one the first should be selected as the second has an extra modification
786+
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPEPT[Common Biological:Phosphorylation on T]K", 0 , 13, missedCleavages: 1),
787+
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]K", 0 , 13, missedCleavages: 1)
796788
};
797789
var postSearchAnalysisTask = new PostSearchAnalysisTask()
798790
{
@@ -855,7 +847,7 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Diffe
855847
{
856848
// three mods found in the same condition, one mod of those found in a different condition, two found in the same condition in a missed cleavage product
857849
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]K", 0, 8, "hcd"),
858-
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "hcd"),
850+
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "hcd", missedCleavages: 1),
859851
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "hcd"),
860852
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8, "etd"),
861853
new TestSpectralMatch("PEPTIDEK", 0 , 8),
@@ -890,7 +882,7 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Diffe
890882
{
891883
// three mods found in the same condition, two mods of those found in a different condition, one found in the same condition in a missed cleavage product
892884
new TestSpectralMatch("PEPT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]K", 0, 8, "hcd"),
893-
new TestSpectralMatch("PEPTIDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "hcd"),
885+
new TestSpectralMatch("PEPTIDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "hcd", missedCleavages: 1),
894886
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "hcd"),
895887
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDEK", 0 , 8, "etd"),
896888
new TestSpectralMatch("PEPTIDEK", 0 , 8),
@@ -927,8 +919,8 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Diffe
927919
// two mods found in the same condition, one mod of those found in a different contdition
928920
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDEK", 0, 8, "top-down", "top-down"),
929921
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDEK", 0, 8, "top-down", "top-down"),
930-
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "top-down", "top-down"),
931-
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8, "trypsin", "trypsin"),
922+
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "top-down", "top-down", missedCleavages: 1),
923+
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8, "trypsin", "trypsin", missedCleavages: 1),
932924
new TestSpectralMatch("PEPTIDEK", 0 , 8),
933925
};
934926
var postSearchAnalysisTask = new PostSearchAnalysisTask()
@@ -961,7 +953,7 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Diffe
961953
{
962954
// three mods found in the same condition, one mod of those found in a different condition, two found in the same condition in a missed cleavage product
963955
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]K", 0, 8, "top-down"),
964-
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "top-down"),
956+
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "top-down", missedCleavages: 1),
965957
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "top-down"),
966958
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8, "trypsin"),
967959
new TestSpectralMatch("PEPTIDEK", 0 , 8),
@@ -996,7 +988,7 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Diffe
996988
{
997989
// three mods found in the same condition, two mods of those found in a different condition, one found in the same condition in a missed cleavage product
998990
new TestSpectralMatch("PEPT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]K", 0, 8, "top-down"),
999-
new TestSpectralMatch("PEPTIDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "top-down"),
991+
new TestSpectralMatch("PEPTIDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "top-down", missedCleavages: 1),
1000992
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "top-down"),
1001993
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDEK", 0 , 8, "trypsin"),
1002994
new TestSpectralMatch("PEPTIDEK", 0 , 8),

0 commit comments

Comments
 (0)