Skip to content

Commit

Permalink
Target decoy pairing (#642)
Browse files Browse the repository at this point in the history
* Added hash for identifying matching targets and decoys

* Added hash codes to pair target peptides to their decoys.

* Cleaned up code

* Added comments to better annotate pairing.

Co-authored-by: zdanaceau <[email protected]>
  • Loading branch information
zdanaceau and zdanaceau authored Jul 22, 2022
1 parent 2b961a4 commit 3c77adf
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ public class PeptideWithSetModifications : ProteolyticPeptide
{
public string FullSequence { get; private set; } //sequence with modifications
public readonly int NumFixedMods;

// Parameter to store a hash code corresponding to a Decoy or a Target peptide
// If the peptide in question is a decoy, this pairs it to the target it was generated from
// If the peptide in question is a target, this pairs it to its corresponding decoy
public int? PairedTargetDecoyHash { get; private set; }
/// <summary>
/// Dictionary of modifications on the peptide. The N terminus is index 1.
/// The key indicates which residue modification is on (with 1 being N terminus).
Expand All @@ -28,14 +31,12 @@ public class PeptideWithSetModifications : ProteolyticPeptide
[NonSerialized] private DigestionParams _digestionParams;
private static readonly double WaterMonoisotopicMass = PeriodicTable.GetElement("H").PrincipalIsotope.AtomicMass * 2 + PeriodicTable.GetElement("O").PrincipalIsotope.AtomicMass;
private readonly string ProteinAccession; // used to get protein object after deserialization


/// <summary>
/// Creates a PeptideWithSetModifications object from a protein. Used when a Protein is digested.
/// </summary>
public PeptideWithSetModifications(Protein protein, DigestionParams digestionParams, int oneBasedStartResidueInProtein,
int oneBasedEndResidueInProtein, CleavageSpecificity cleavageSpecificity, string peptideDescription, int missedCleavages,
Dictionary<int, Modification> allModsOneIsNterminus, int numFixedMods, string baseSequence = null)
Dictionary<int, Modification> allModsOneIsNterminus, int numFixedMods, string baseSequence = null, int? pairedTargetDecoyHash = null)
: base(protein, oneBasedStartResidueInProtein, oneBasedEndResidueInProtein, missedCleavages, cleavageSpecificity, peptideDescription, baseSequence)
{
_allModsOneIsNterminus = allModsOneIsNterminus;
Expand All @@ -44,6 +45,7 @@ public PeptideWithSetModifications(Protein protein, DigestionParams digestionPar
DetermineFullSequence();
ProteinAccession = protein.Accession;
UpdateCleavageSpecificity();
PairedTargetDecoyHash = pairedTargetDecoyHash; // Added PairedTargetDecoyHash as a nullable integer
}

/// <summary>
Expand All @@ -53,7 +55,7 @@ public PeptideWithSetModifications(Protein protein, DigestionParams digestionPar
public PeptideWithSetModifications(string sequence, Dictionary<string, Modification> allKnownMods, int numFixedMods = 0,
DigestionParams digestionParams = null, Protein p = null, int oneBasedStartResidueInProtein = int.MinValue,
int oneBasedEndResidueInProtein = int.MinValue, int missedCleavages = int.MinValue,
CleavageSpecificity cleavageSpecificity = CleavageSpecificity.Full, string peptideDescription = null)
CleavageSpecificity cleavageSpecificity = CleavageSpecificity.Full, string peptideDescription = null, int? pairedTargetDecoyHash = null)
: base(p, oneBasedStartResidueInProtein, oneBasedEndResidueInProtein, missedCleavages, cleavageSpecificity, peptideDescription)
{
if (sequence.Contains("|"))
Expand All @@ -66,6 +68,7 @@ public PeptideWithSetModifications(string sequence, Dictionary<string, Modificat
GetModsAfterDeserialization(allKnownMods);
NumFixedMods = numFixedMods;
_digestionParams = digestionParams;
PairedTargetDecoyHash = pairedTargetDecoyHash; // Added PairedTargetDecoyHash as a nullable integer

if (p != null)
{
Expand Down Expand Up @@ -1132,6 +1135,11 @@ private HashSet<double> AddNeutralLossesFromMods(Modification mod, HashSet<doubl
//Occasionally, this process results in peptide with exactly the same sequence. Therefore, there is a stop-gap measure
//the returns the mirror image of the original. N-terminal mods are preserved, but other mods are also reversed.
//this should yield a unique decoy for each target sequence.
//This function also adds a hash code to both the original PeptideWithSetModifications and the decoy
//generated by this function pairing the two together by eachother's FullSequence.
//The original taget peptide is given a hash code corresponding to the decoy's full sequence,
//and the decoy is given a hash code corresponding to the original target peptide's sequence.
//This hash code is stored in the PairedTargetDecoyHash parameter of PeptideWithSetModifications.
public PeptideWithSetModifications GetReverseDecoyFromTarget(int[] revisedAminoAcidOrder)
{
Dictionary<int, Modification> newModificationsDictionary = new Dictionary<int, Modification>();
Expand Down Expand Up @@ -1217,16 +1225,28 @@ public PeptideWithSetModifications GetReverseDecoyFromTarget(int[] revisedAminoA
Protein decoyProtein = new Protein(proteinSequence, "DECOY_" + this.Protein.Accession, null, new List<Tuple<string, string>>(), new Dictionary<int, List<Modification>>(), null, null, null, true);
DigestionParams d = this.DigestionParams;

// Creates a hash code corresponding to the target's sequence
int targetHash = GetHashCode();
PeptideWithSetModifications decoyPeptide;
//Make the "peptideDescription" store the corresponding target's sequence
if (newBaseString != this.BaseSequence)
{
return new PeptideWithSetModifications(decoyProtein, d, this.OneBasedStartResidueInProtein, this.OneBasedEndResidueInProtein, this.CleavageSpecificityForFdrCategory, this.FullSequence, this.MissedCleavages, newModificationsDictionary, this.NumFixedMods, newBaseString);
decoyPeptide = new PeptideWithSetModifications(decoyProtein, d, this.OneBasedStartResidueInProtein, this.OneBasedEndResidueInProtein, this.CleavageSpecificityForFdrCategory, this.FullSequence, this.MissedCleavages, newModificationsDictionary, this.NumFixedMods, newBaseString);
// Sets PairedTargetDecoyHash of the original target peptie to the hash hode of the decoy sequence
PairedTargetDecoyHash = decoyPeptide.GetHashCode();
// Sets PairedTargetDecoyHash of the decoy peptide to the hash code of the target sequence
decoyPeptide.PairedTargetDecoyHash = targetHash;
return decoyPeptide;

}
else
{
//The reverse decoy procedure failed to create a PeptideWithSetModificatons with a different sequence. Therefore,
//we retrun the mirror image peptide.
return this.GetPeptideMirror(revisedAminoAcidOrder);
decoyPeptide = this.GetPeptideMirror(revisedAminoAcidOrder);
PairedTargetDecoyHash = decoyPeptide.GetHashCode();
decoyPeptide.PairedTargetDecoyHash = targetHash;
return decoyPeptide;
}

}
Expand Down Expand Up @@ -1260,6 +1280,7 @@ public PeptideWithSetModifications GetPeptideMirror(int[] revisedOrderNisOne)
proteinSequence = aStringBuilder.ToString();

Protein decoyProtein = new Protein(proteinSequence, "DECOY_" + this.Protein.Accession, null, new List<Tuple<string, string>>(), new Dictionary<int, List<Modification>>(), null, null, null, true);

DigestionParams d = this.DigestionParams;

//now fill in the revised amino acid order
Expand Down
12 changes: 12 additions & 0 deletions mzLib/Test/TestPeptideWithSetMods.cs
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,12 @@ public static void TestReverseDecoyFromTarget()

int[] newAminoAcidPositions = new int["PEPTIDEK".Length];
PeptideWithSetModifications reverse = p.GetReverseDecoyFromTarget(newAminoAcidPositions);
// Hash code corresponding to the target sequence, should be PairedTargetDecoyHash for reverse
int testTargetHash = p.GetHashCode();
// Hash code corresponding to the decoy sequence, should be PairedTargetDecoyHash for target
int testDecoyHash = reverse.GetHashCode();
Assert.AreEqual(reverse.PairedTargetDecoyHash, testTargetHash);
Assert.AreEqual(p.PairedTargetDecoyHash, testDecoyHash);
Assert.AreEqual("EDITPEPK", reverse.BaseSequence);
Assert.AreEqual(new int[] { 6, 5, 4, 3, 2, 1, 0, 7 }, newAminoAcidPositions);
Assert.IsTrue(reverse.Protein.IsDecoy);
Expand Down Expand Up @@ -826,6 +832,12 @@ public static void TestReverseDecoyFromTarget()
newAminoAcidPositions = new int["VTIRTVR".Length];
PeptideWithSetModifications p_tryp = new PeptideWithSetModifications(new Protein("VTIRTVR", "DECOY_TRYP"), new DigestionParams(protease: "trypsin"), 1, 7, CleavageSpecificity.Full, null, 0, VTIRTVR_modsDictionary, 0, null);
PeptideWithSetModifications p_tryp_reverse = p_tryp.GetReverseDecoyFromTarget(newAminoAcidPositions);
// Hash code corresponding to the target sequence, should be PairedTargetDecoyHash for reverse
int testMirrorTargetHash = p_tryp.GetHashCode();
// Hash code corresponding to the decoy sequence, should be PairedTargetDecoyHash for target
int testMirrorDecoyHash = p_tryp_reverse.GetHashCode();
Assert.AreEqual(testMirrorTargetHash, p_tryp_reverse.PairedTargetDecoyHash);
Assert.AreEqual(testMirrorDecoyHash, p_tryp.PairedTargetDecoyHash);
Assert.AreEqual("RVTRITV", p_tryp_reverse.BaseSequence);
Assert.AreEqual(new int[] { 6, 5, 4, 3, 2, 1, 0 }, newAminoAcidPositions);
Assert.IsTrue(p_tryp_reverse.AllModsOneIsNterminus.ContainsKey(1));//n-term acetyl
Expand Down

0 comments on commit 3c77adf

Please sign in to comment.