From 3d0a4e491f2cb5e1db2f921e308a3245a040df0b Mon Sep 17 00:00:00 2001 From: trishorts Date: Wed, 20 Apr 2022 16:14:45 -0500 Subject: [PATCH] Top down biomarker III (#626) * correct Within calculation * update unit tests * first correct top-down biomarker test * remove unused test code * quotable protease * unquotable * add full length proteoform proteolysis products for biomarker search with unit tests * new strategy for adding proteolysis products * more complete biomarker addition * works for protein.xml databases * add unit test biomarkers with xml database * clean up * more clean up * fix unit test * adjust biomarker generation for three initiator methionine behaviors (cleave, retain, variable) * make sure decoy biomarkers for standard xml proteolysis products are generated * deal appropriately w/ n-terminal methionine cleavage * last litle unit test tweaks * delete unused code * add biomarkers during protein database load * greater clarity for biomarker proteolysis product entries * change digest for top-down biomarker search to resolve issues in MM * bro. shouldn't have been that hard * cleanup * add unit test * don't need to cover unit tests with unit tests * increase unit test coverage of protein.cs * codemaid * biomarkers on protein with no methionine * unit test booster * ACs comments * more summary statemsn * deleted mzlib.sln scarry * delete empty line * c-terminal products w/ and w/o methionine * delete remaining initiator meth behavior dependances * remove reference to meth behavior in biomarker test functions * fix effected unit tests * fix more unit tests * Biomarkers changed to Truncations * Biomarker to truncation * added method summaries * MsDataScan tests to boost coverage * more MsDataScan tests * more MsDataFile tests * delete unused comment Co-authored-by: MICHAEL SHORTREED --- mzLib/Proteomics/Protein/Protein.cs | 184 +++++------------- .../ProteolyticDigestion/Protease.cs | 4 +- .../ProteolyticDigestion/ProteinDigestion.cs | 4 +- mzLib/Test/TestDigestionMotif.cs | 127 ++++++------ mzLib/Test/TestMsDataFile.cs | 31 +++ mzLib/Test/TestPeptideWithSetMods.cs | 20 +- mzLib/Test/TestProteinDatabase.cs | 113 +++++------ .../ProteinDbLoader.cs | 10 +- 8 files changed, 211 insertions(+), 282 deletions(-) diff --git a/mzLib/Proteomics/Protein/Protein.cs b/mzLib/Proteomics/Protein/Protein.cs index 2ebf54a33..c97b61a8c 100644 --- a/mzLib/Proteomics/Protein/Protein.cs +++ b/mzLib/Proteomics/Protein/Protein.cs @@ -33,7 +33,7 @@ public Protein(string sequence, string accession, string organism = null, List> oneBasedModifications = null, List proteolysisProducts = null, string name = null, string fullName = null, bool isDecoy = false, bool isContaminant = false, List databaseReferences = null, List sequenceVariations = null, List appliedSequenceVariations = null, string sampleNameForVariants = null, - List disulfideBonds = null, List spliceSites = null, string databaseFilePath = null, bool addBiomarkers = false) + List disulfideBonds = null, List spliceSites = null, string databaseFilePath = null, bool addTruncations = false) { // Mandatory BaseSequence = sequence; @@ -65,9 +65,9 @@ public Protein(string sequence, string accession, string organism = null, List(); SpliceSites = spliceSites ?? new List(); - if (addBiomarkers) + if (addTruncations) { - this.AddBiomarkers(); + this.AddTruncations(); } } @@ -237,7 +237,7 @@ public string GetEnsemblFastaHeader() /// Gets peptides for digestion of a protein /// public IEnumerable Digest(DigestionParams digestionParams, List allKnownFixedModifications, - List variableModifications, List silacLabels = null, (SilacLabel startLabel, SilacLabel endLabel)? turnoverLabels = null, bool topDownBiomarkerSearch = false) + List variableModifications, List silacLabels = null, (SilacLabel startLabel, SilacLabel endLabel)? turnoverLabels = null, bool topDownTruncationSearch = false) { //can't be null allKnownFixedModifications = allKnownFixedModifications ?? new List(); @@ -253,7 +253,7 @@ public IEnumerable Digest(DigestionParams digestion IEnumerable unmodifiedPeptides = searchModeType == CleavageSpecificity.Semi ? digestion.SpeedySemiSpecificDigestion(this) : - digestion.Digestion(this, topDownBiomarkerSearch); + digestion.Digestion(this, topDownTruncationSearch); if (digestionParams.KeepNGlycopeptide || digestionParams.KeepOGlycopeptide) { @@ -586,92 +586,61 @@ private IDictionary> SelectValidOneBaseMods(IDictionary< } return validModDictionary; } + /// + /// Protein XML files contain annotated proteolysis products for many proteins (e.g. signal peptides, chain peptides). + /// This method adds N- and C-terminal truncations to these products. + /// - public void AddBiomarkersToProteolysisProducts(int fullProteinOneBasedBegin, int fullProteinOneBasedEnd, bool addNterminalDigestionBiomarkers, bool addCterminalDigestionBiomarkers, InitiatorMethionineBehavior initiatorMethionineBehavior, int minProductBaseSequenceLength, int lengthOfProteolysis, string proteolyisisProductName) + public void AddTruncationsToExistingProteolysisProducts(int fullProteinOneBasedBegin, int fullProteinOneBasedEnd, bool addNterminalDigestionTruncations, bool addCterminalDigestionTruncations, int minProductBaseSequenceLength, int lengthOfProteolysis, string proteolyisisProductName) { bool sequenceContainsNterminus = (fullProteinOneBasedBegin == 1); if (sequenceContainsNterminus) { - if (initiatorMethionineBehavior == InitiatorMethionineBehavior.Retain)//we don't have to do anything here. if the sequence starrts / M or not it is unchanged - { - //Digest C-terminus - if (addCterminalDigestionBiomarkers) - { - AddCterminalBiomarkers(lengthOfProteolysis, fullProteinOneBasedEnd, fullProteinOneBasedBegin, minProductBaseSequenceLength, proteolyisisProductName); - } - - //Digest N-terminus - if (addNterminalDigestionBiomarkers) - { - AddNterminalBiomarkers(lengthOfProteolysis, fullProteinOneBasedBegin, fullProteinOneBasedEnd, minProductBaseSequenceLength, proteolyisisProductName); - } - } - else if (initiatorMethionineBehavior == InitiatorMethionineBehavior.Cleave) + //Digest N-terminus + if (addNterminalDigestionTruncations) { if (BaseSequence.Substring(0, 1) == "M") { - //Digest C-terminus - if (addCterminalDigestionBiomarkers) - { - AddCterminalBiomarkers(lengthOfProteolysis, fullProteinOneBasedEnd, fullProteinOneBasedBegin + 1, minProductBaseSequenceLength, proteolyisisProductName); - } - - //Digest N-terminus - if (addNterminalDigestionBiomarkers) - { - AddNterminalBiomarkers(lengthOfProteolysis, fullProteinOneBasedBegin + 1, fullProteinOneBasedEnd, minProductBaseSequenceLength, proteolyisisProductName); - } + AddNterminalTruncations(lengthOfProteolysis + 1, fullProteinOneBasedBegin, fullProteinOneBasedEnd, minProductBaseSequenceLength, proteolyisisProductName); } else { - //Digest C-terminus - if (addCterminalDigestionBiomarkers) - { - AddCterminalBiomarkers(lengthOfProteolysis, fullProteinOneBasedEnd, fullProteinOneBasedBegin, minProductBaseSequenceLength, proteolyisisProductName); - } - - //Digest N-terminus - if (addNterminalDigestionBiomarkers) - { - AddNterminalBiomarkers(lengthOfProteolysis, fullProteinOneBasedBegin, fullProteinOneBasedEnd, minProductBaseSequenceLength, proteolyisisProductName); - } + AddNterminalTruncations(lengthOfProteolysis, fullProteinOneBasedBegin, fullProteinOneBasedEnd, minProductBaseSequenceLength, proteolyisisProductName); } } - else // initiator methionine cleavage is variable we have to deal both with keeping and deleting the M + //Digest C-terminus -- not effected by variable N-terminus behavior + if (addCterminalDigestionTruncations) { - //Digest N-terminus - if (addNterminalDigestionBiomarkers) - { - if (BaseSequence.Substring(0, 1) == "M") - { - AddNterminalBiomarkers(lengthOfProteolysis + 1, fullProteinOneBasedBegin, fullProteinOneBasedEnd, minProductBaseSequenceLength, proteolyisisProductName); - } - } - //Digest C-terminus -- not effected by variable N-terminus behavior - if (addCterminalDigestionBiomarkers) + // if first residue is M, then we have to add c-terminal markers for both with and without the M + if (BaseSequence.Substring(0, 1) == "M") { - AddCterminalBiomarkers(lengthOfProteolysis, fullProteinOneBasedEnd, fullProteinOneBasedBegin, minProductBaseSequenceLength, proteolyisisProductName); + //add sequences WITHOUT methionine + AddCterminalTruncations(lengthOfProteolysis, fullProteinOneBasedEnd, fullProteinOneBasedBegin + 1, minProductBaseSequenceLength, proteolyisisProductName); } + //add sequences with methionine + AddCterminalTruncations(lengthOfProteolysis, fullProteinOneBasedEnd, fullProteinOneBasedBegin, minProductBaseSequenceLength, proteolyisisProductName); } } else // sequence does not contain N-terminus { //Digest C-terminus - if (addCterminalDigestionBiomarkers) + if (addCterminalDigestionTruncations) { - AddCterminalBiomarkers(lengthOfProteolysis, fullProteinOneBasedEnd, fullProteinOneBasedBegin, minProductBaseSequenceLength, proteolyisisProductName); + AddCterminalTruncations(lengthOfProteolysis, fullProteinOneBasedEnd, fullProteinOneBasedBegin, minProductBaseSequenceLength, proteolyisisProductName); } //Digest N-terminus - if (addNterminalDigestionBiomarkers) + if (addNterminalDigestionTruncations) { - AddNterminalBiomarkers(lengthOfProteolysis, fullProteinOneBasedBegin, fullProteinOneBasedEnd, minProductBaseSequenceLength, proteolyisisProductName); + AddNterminalTruncations(lengthOfProteolysis, fullProteinOneBasedBegin, fullProteinOneBasedEnd, minProductBaseSequenceLength, proteolyisisProductName); } } } - - private void AddCterminalBiomarkers(int lengthOfProteolysis, int fullProteinOneBasedEnd, int fullProteinOneBasedBegin, int minProductBaseSequenceLength, string proteolyisisProductName) + /// + /// Returns of list of proteoforms with the specified number of C-terminal amino acid truncations subject to minimum length criteria + /// + private void AddCterminalTruncations(int lengthOfProteolysis, int fullProteinOneBasedEnd, int fullProteinOneBasedBegin, int minProductBaseSequenceLength, string proteolyisisProductName) { for (int i = 1; i <= lengthOfProteolysis; i++) { @@ -683,8 +652,11 @@ private void AddCterminalBiomarkers(int lengthOfProteolysis, int fullProteinOneB } } } + /// + /// Returns of list of proteoforms with the specified number of N-terminal amino acid truncations subject to minimum length criteria + /// - private void AddNterminalBiomarkers(int lengthOfProteolysis, int fullProteinOneBasedBegin, int fullProteinOneBasedEnd, int minProductBaseSequenceLength, string proteolyisisProductName) + private void AddNterminalTruncations(int lengthOfProteolysis, int fullProteinOneBasedBegin, int fullProteinOneBasedEnd, int minProductBaseSequenceLength, string proteolyisisProductName) { for (int i = 1; i <= lengthOfProteolysis; i++) { @@ -706,29 +678,27 @@ private void AddNterminalBiomarkers(int lengthOfProteolysis, int fullProteinOneB /// /// This needs to be added to the proteolysisProducts list to be searched /// the original products are there but those resulting from N- or C-terminal degradation still need to be added - /// - /// - /// this effects the intact proteoform as well as any original proteolysis products containing the N-terminus + /// + /// /// the same as the min detectable peptide /// the number of amino acids that can be removed from either end. - public void AddBiomarkers(bool addFullProtein = true, bool addForEachOrigninalProteolysisProduct = true, bool addNterminalDigestionBiomarkers = true, bool addCterminalDigestionBiomarkers = true, InitiatorMethionineBehavior initiatorMethionineBehavior = InitiatorMethionineBehavior.Retain, int minProductBaseSequenceLength = 7, int lengthOfProteolysis = 5) + public void AddTruncations(bool addFullProtein = true, bool addForEachOrigninalProteolysisProduct = true, bool addNterminalDigestionTruncations = true, bool addCterminalDigestionTruncations = true, int minProductBaseSequenceLength = 7, int lengthOfProteolysis = 5) { if (addFullProtein) //this loop adds the intact protoeoform and its proteolysis products to the proteolysis products list { - AddIntactProteoformToProteolysisProducts(initiatorMethionineBehavior, minProductBaseSequenceLength); - if (addNterminalDigestionBiomarkers) + AddIntactProteoformToTruncationsProducts(minProductBaseSequenceLength); + if (addNterminalDigestionTruncations) { - AddBiomarkersToProteolysisProducts(1, BaseSequence.Length, true, false, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, "full-length proteoform N-terminal digestion biomarker"); + AddTruncationsToExistingProteolysisProducts(1, BaseSequence.Length, true, false, minProductBaseSequenceLength, lengthOfProteolysis, "full-length proteoform N-terminal digestion biomarker"); } - if (addCterminalDigestionBiomarkers) + if (addCterminalDigestionTruncations) { - AddBiomarkersToProteolysisProducts(1, BaseSequence.Length, false, true, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, "full-length proteoform C-terminal digestion biomarker"); + AddTruncationsToExistingProteolysisProducts(1, BaseSequence.Length, false, true, minProductBaseSequenceLength, lengthOfProteolysis, "full-length proteoform C-terminal digestion biomarker"); } } if (addForEachOrigninalProteolysisProduct) // this does not include the original intact proteoform { - RemoveMethionineWhenAppropriateFromExistingProduts(initiatorMethionineBehavior); List existingProducts = ProteolysisProducts.Where(p => !p.Type.Contains("biomarker") && !p.Type.Contains("full-length proteoform")).ToList(); foreach (ProteolysisProduct product in existingProducts) { @@ -741,80 +711,27 @@ public void AddBiomarkers(bool addFullProtein = true, bool addForEachOrigninalPr proteolyisisProductName = product.Type + " " + proteolyisisProductName; } //the original proteolysis product is already on the list so we don't need to duplicate - if (addNterminalDigestionBiomarkers) + if (addNterminalDigestionTruncations) { - AddBiomarkersToProteolysisProducts(product.OneBasedBeginPosition.Value, product.OneBasedEndPosition.Value, true, false, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); + AddTruncationsToExistingProteolysisProducts(product.OneBasedBeginPosition.Value, product.OneBasedEndPosition.Value, true, false, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); } - if (addCterminalDigestionBiomarkers) + if (addCterminalDigestionTruncations) { - AddBiomarkersToProteolysisProducts(product.OneBasedBeginPosition.Value, product.OneBasedEndPosition.Value, false, true, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); + AddTruncationsToExistingProteolysisProducts(product.OneBasedBeginPosition.Value, product.OneBasedEndPosition.Value, false, true, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); } } } } CleaveOnceBetweenProteolysisProducts(); } - /// - /// When a protein has existing proteolysis products, we have to remove methionine when appropriate before creating additional proteolysis products + /// This method adds proteoforms with N- and C-terminal amino acid loss to the list of species included in top-down search /// - /// - /// - public void RemoveMethionineWhenAppropriateFromExistingProduts(InitiatorMethionineBehavior initiatorMethionineBehavior) + public void AddIntactProteoformToTruncationsProducts(int minProductBaseSequenceLength) { - List productsAtNterminusWithMethionine = _proteolysisProducts.Where(p => !p.Type.Contains("biomarker") && !p.Type.Contains("intact") && p.OneBasedBeginPosition == 1).ToList(); - - if (productsAtNterminusWithMethionine.Count > 0) + if (BaseSequence.Length >= minProductBaseSequenceLength) { - if (BaseSequence.Substring(0, 1) == "M") - { - if (productsAtNterminusWithMethionine.Count > 0) - { - List replacementNterminalProducts = new(); - if (initiatorMethionineBehavior == InitiatorMethionineBehavior.Cleave) - { - foreach (ProteolysisProduct product in productsAtNterminusWithMethionine) - { - replacementNterminalProducts.Add(new ProteolysisProduct(2, product.OneBasedEndPosition, product.Type)); - } - _proteolysisProducts.RemoveAll(p => p.OneBasedBeginPosition == 1 && !p.Type.Contains("biomarker") && !p.Type.Contains("Intact")); - _proteolysisProducts.AddRange(replacementNterminalProducts); - } - else if (initiatorMethionineBehavior == InitiatorMethionineBehavior.Variable) - { - //here we don't want to do anything, we leave in the products with begin position = 1. Later we'll add an additional proteolysis product so that we get the right number - } - } - } - } - } - - public void AddIntactProteoformToProteolysisProducts(InitiatorMethionineBehavior initiatorMethionineBehavior, int minProductBaseSequenceLength) - { - if (initiatorMethionineBehavior == InitiatorMethionineBehavior.Retain || initiatorMethionineBehavior == InitiatorMethionineBehavior.Variable) - { - //when it's variable, we don't have to add anything here, we'll get an additonal proteolysis product later. - if (BaseSequence.Length >= minProductBaseSequenceLength) - { - _proteolysisProducts.Add(new ProteolysisProduct(1, BaseSequence.Length, "full-length proteoform")); - } - } - else if (initiatorMethionineBehavior == InitiatorMethionineBehavior.Cleave) - { - if (BaseSequence.Substring(0, 1) == "M") - { - if (BaseSequence.Length - 1 >= minProductBaseSequenceLength) - { - _proteolysisProducts.Add(new ProteolysisProduct(2, BaseSequence.Length, "full-length proteoform")); - } - } - else - { - if (BaseSequence.Length >= minProductBaseSequenceLength) - { - _proteolysisProducts.Add(new ProteolysisProduct(1, BaseSequence.Length, "full-length proteoform")); - } - } + _proteolysisProducts.Add(new ProteolysisProduct(1, BaseSequence.Length, "full-length proteoform")); } } @@ -822,7 +739,6 @@ public void AddIntactProteoformToProteolysisProducts(InitiatorMethionineBehavior /// proteins with multiple proteolysis products are not always full cleaved. we observed proteolysis products w/ missed cleavages. /// This method allows for one missed cleavage between proteolysis products. /// - /// public void CleaveOnceBetweenProteolysisProducts(int minimumProductLength = 7) { diff --git a/mzLib/Proteomics/ProteolyticDigestion/Protease.cs b/mzLib/Proteomics/ProteolyticDigestion/Protease.cs index b038a2c2a..836e5c5d8 100644 --- a/mzLib/Proteomics/ProteolyticDigestion/Protease.cs +++ b/mzLib/Proteomics/ProteolyticDigestion/Protease.cs @@ -92,7 +92,7 @@ public CleavageSpecificity GetCleavageSpecificity(Protein protein, int startInde /// /// internal List GetUnmodifiedPeptides(Protein protein, int maximumMissedCleavages, InitiatorMethionineBehavior initiatorMethionineBehavior, - int minPeptideLength, int maxPeptideLength, Protease specificProtease, bool topDownBiomarkerSearch = false) + int minPeptideLength, int maxPeptideLength, Protease specificProtease, bool topDownTruncationSearch = false) { List peptides = new List(); @@ -111,7 +111,7 @@ internal List GetUnmodifiedPeptides(Protein protein, int max //top-down else if (CleavageSpecificity == CleavageSpecificity.None) { - if (!topDownBiomarkerSearch)//standard top-down + if (!topDownTruncationSearch)//standard top-down { // retain methionine if ((initiatorMethionineBehavior != InitiatorMethionineBehavior.Cleave || protein[0] != 'M') diff --git a/mzLib/Proteomics/ProteolyticDigestion/ProteinDigestion.cs b/mzLib/Proteomics/ProteolyticDigestion/ProteinDigestion.cs index 5dc97f75e..668dec2a8 100644 --- a/mzLib/Proteomics/ProteolyticDigestion/ProteinDigestion.cs +++ b/mzLib/Proteomics/ProteolyticDigestion/ProteinDigestion.cs @@ -232,9 +232,9 @@ public IEnumerable SpeedySemiSpecificDigestion(Protein prote /// /// /// - public IEnumerable Digestion(Protein protein, bool topDownBiomarkerSearch = false) + public IEnumerable Digestion(Protein protein, bool topDownTruncationSearch = false) { - return Protease.GetUnmodifiedPeptides(protein, MaximumMissedCleavages, InitiatorMethionineBehavior, MinPeptideLength, MaxPeptideLength, DigestionParams.SpecificProtease, topDownBiomarkerSearch); + return Protease.GetUnmodifiedPeptides(protein, MaximumMissedCleavages, InitiatorMethionineBehavior, MinPeptideLength, MaxPeptideLength, DigestionParams.SpecificProtease, topDownTruncationSearch); } } } \ No newline at end of file diff --git a/mzLib/Test/TestDigestionMotif.cs b/mzLib/Test/TestDigestionMotif.cs index f13f5a0e0..6eaee424e 100644 --- a/mzLib/Test/TestDigestionMotif.cs +++ b/mzLib/Test/TestDigestionMotif.cs @@ -255,50 +255,48 @@ public static void TestNterminalProteolysis() Protein p = new Protein("MPEPTIDE", "P12345"); int fullProteinOneBasedBegin = 1; int fullProteinOneBasedEnd = 8; - bool addNterminalDegestionBiomarkers = true; - bool addCterminalDigestionBiomarkers = false; - InitiatorMethionineBehavior initiatorMethionineBehavior = InitiatorMethionineBehavior.Retain; + bool addNterminalDegestionTruncations = true; + bool addCterminalDigestionTruncations = false; int minProductBaseSequenceLength = 2; int lengthOfProteolysis = 3; string proteolyisisProductName = "biomarker"; - p.AddBiomarkersToProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionBiomarkers, addCterminalDigestionBiomarkers, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); + p.AddTruncationsToExistingProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionTruncations, addCterminalDigestionTruncations, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); List products = p.ProteolysisProducts.ToList(); - Assert.AreEqual(3, products.Count); + Assert.AreEqual(4, products.Count); List productSequences = new List(); foreach (ProteolysisProduct product in products) { productSequences.Add(p.BaseSequence.Substring((int)product.OneBasedBeginPosition - 1, (int)product.OneBasedEndPosition - (int)product.OneBasedBeginPosition + 1)); } - List expectedProductSequences = new List { "PEPTIDE", "EPTIDE", "PTIDE" }; + List expectedProductSequences = new List { "PEPTIDE", "EPTIDE", "PTIDE", "TIDE" }; CollectionAssert.AreEquivalent(expectedProductSequences, productSequences); p = new Protein("MPEPTIDE", "P12345"); fullProteinOneBasedBegin = 1; fullProteinOneBasedEnd = 8; - addNterminalDegestionBiomarkers = true; - addCterminalDigestionBiomarkers = false; - initiatorMethionineBehavior = InitiatorMethionineBehavior.Cleave; + addNterminalDegestionTruncations = true; + addCterminalDigestionTruncations = false; minProductBaseSequenceLength = 2; lengthOfProteolysis = 3; proteolyisisProductName = "biomarker"; - p.AddBiomarkersToProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionBiomarkers, addCterminalDigestionBiomarkers, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); products = p.ProteolysisProducts.ToList(); - Assert.AreEqual(3, products.Count); + p.AddTruncationsToExistingProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionTruncations, addCterminalDigestionTruncations, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); products = p.ProteolysisProducts.ToList(); + Assert.AreEqual(4, products.Count); productSequences = new List(); foreach (ProteolysisProduct product in products) { productSequences.Add(p.BaseSequence.Substring((int)product.OneBasedBeginPosition - 1, (int)product.OneBasedEndPosition - (int)product.OneBasedBeginPosition + 1)); } - expectedProductSequences = new List { "EPTIDE", "PTIDE", "TIDE" }; + expectedProductSequences = new List {"PEPTIDE", "EPTIDE", "PTIDE", "TIDE" }; CollectionAssert.AreEquivalent(expectedProductSequences, productSequences); + p = new Protein("PEPTIDE", "P12345"); fullProteinOneBasedBegin = 1; fullProteinOneBasedEnd = 7; - addNterminalDegestionBiomarkers = true; - addCterminalDigestionBiomarkers = false; - initiatorMethionineBehavior = InitiatorMethionineBehavior.Retain; + addNterminalDegestionTruncations = true; + addCterminalDigestionTruncations = false; minProductBaseSequenceLength = 2; lengthOfProteolysis = 3; proteolyisisProductName = "biomarker"; - p.AddBiomarkersToProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionBiomarkers, addCterminalDigestionBiomarkers, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); + p.AddTruncationsToExistingProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionTruncations, addCterminalDigestionTruncations, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); products = p.ProteolysisProducts.ToList(); Assert.AreEqual(3, products.Count); productSequences = new List(); @@ -311,13 +309,12 @@ public static void TestNterminalProteolysis() p = new Protein("PEPTIDE", "P12345"); fullProteinOneBasedBegin = 1; fullProteinOneBasedEnd = 7; - addNterminalDegestionBiomarkers = true; - addCterminalDigestionBiomarkers = false; - initiatorMethionineBehavior = InitiatorMethionineBehavior.Cleave; + addNterminalDegestionTruncations = true; + addCterminalDigestionTruncations = false; minProductBaseSequenceLength = 2; lengthOfProteolysis = 3; proteolyisisProductName = "biomarker"; - p.AddBiomarkersToProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionBiomarkers, addCterminalDigestionBiomarkers, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); products = p.ProteolysisProducts.ToList(); + p.AddTruncationsToExistingProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionTruncations, addCterminalDigestionTruncations, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); products = p.ProteolysisProducts.ToList(); Assert.AreEqual(3, products.Count); productSequences = new List(); foreach (ProteolysisProduct product in products) @@ -329,13 +326,12 @@ public static void TestNterminalProteolysis() p = new Protein("MPEPTIDE", "P12345"); fullProteinOneBasedBegin = 1; fullProteinOneBasedEnd = 8; - addNterminalDegestionBiomarkers = true; - addCterminalDigestionBiomarkers = false; - initiatorMethionineBehavior = InitiatorMethionineBehavior.Retain; + addNterminalDegestionTruncations = true; + addCterminalDigestionTruncations = false; minProductBaseSequenceLength = 6; lengthOfProteolysis = 3; proteolyisisProductName = "biomarker"; - p.AddBiomarkersToProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionBiomarkers, addCterminalDigestionBiomarkers, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); + p.AddTruncationsToExistingProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionTruncations, addCterminalDigestionTruncations, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); products = p.ProteolysisProducts.ToList(); Assert.AreEqual(2, products.Count); productSequences = new List(); @@ -354,50 +350,47 @@ public static void TestCterminalProteolysis() Protein p = new Protein("MPEPTIDE", "P12345"); int fullProteinOneBasedBegin = 1; int fullProteinOneBasedEnd = 8; - bool addNterminalDegestionBiomarkers = false; - bool addCterminalDigestionBiomarkers = true; - InitiatorMethionineBehavior initiatorMethionineBehavior = InitiatorMethionineBehavior.Retain; + bool addNterminalDegestionTruncations = false; + bool addCterminalDigestionTruncations = true; int minProductBaseSequenceLength = 2; int lengthOfProteolysis = 3; string proteolyisisProductName = "biomarker"; - p.AddBiomarkersToProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionBiomarkers, addCterminalDigestionBiomarkers, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); + p.AddTruncationsToExistingProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionTruncations, addCterminalDigestionTruncations, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); List products = p.ProteolysisProducts.ToList(); - Assert.AreEqual(3, products.Count); + Assert.AreEqual(6, products.Count); List productSequences = new List(); foreach (ProteolysisProduct product in products) { productSequences.Add(p.BaseSequence.Substring((int)product.OneBasedBeginPosition - 1, (int)product.OneBasedEndPosition - (int)product.OneBasedBeginPosition + 1)); } - List expectedProductSequences = new List { "MPEPTID", "MPEPTI", "MPEPT" }; + List expectedProductSequences = new List { "MPEPTID", "MPEPTI", "MPEPT", "PEPTID", "PEPTI", "PEPT" }; CollectionAssert.AreEquivalent(expectedProductSequences, productSequences); p = new Protein("MPEPTIDE", "P12345"); fullProteinOneBasedBegin = 1; fullProteinOneBasedEnd = 8; - addNterminalDegestionBiomarkers = false; - addCterminalDigestionBiomarkers = true; - initiatorMethionineBehavior = InitiatorMethionineBehavior.Cleave; + addNterminalDegestionTruncations = false; + addCterminalDigestionTruncations = true; minProductBaseSequenceLength = 2; lengthOfProteolysis = 3; proteolyisisProductName = "biomarker"; - p.AddBiomarkersToProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionBiomarkers, addCterminalDigestionBiomarkers, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); products = p.ProteolysisProducts.ToList(); - Assert.AreEqual(3, products.Count); + p.AddTruncationsToExistingProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionTruncations, addCterminalDigestionTruncations, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); products = p.ProteolysisProducts.ToList(); + Assert.AreEqual(6, products.Count); productSequences = new List(); foreach (ProteolysisProduct product in products) { productSequences.Add(p.BaseSequence.Substring((int)product.OneBasedBeginPosition - 1, (int)product.OneBasedEndPosition - (int)product.OneBasedBeginPosition + 1)); } - expectedProductSequences = new List { "PEPTID", "PEPTI", "PEPT" }; + expectedProductSequences = new List { "MPEPTID", "MPEPTI", "MPEPT", "PEPTID", "PEPTI", "PEPT" }; CollectionAssert.AreEquivalent(expectedProductSequences, productSequences); p = new Protein("PEPTIDE", "P12345"); fullProteinOneBasedBegin = 1; fullProteinOneBasedEnd = 7; - addNterminalDegestionBiomarkers = false; - addCterminalDigestionBiomarkers = true; - initiatorMethionineBehavior = InitiatorMethionineBehavior.Retain; + addNterminalDegestionTruncations = false; + addCterminalDigestionTruncations = true; minProductBaseSequenceLength = 2; lengthOfProteolysis = 3; proteolyisisProductName = "biomarker"; - p.AddBiomarkersToProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionBiomarkers, addCterminalDigestionBiomarkers, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); + p.AddTruncationsToExistingProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionTruncations, addCterminalDigestionTruncations, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); products = p.ProteolysisProducts.ToList(); Assert.AreEqual(3, products.Count); productSequences = new List(); @@ -410,13 +403,12 @@ public static void TestCterminalProteolysis() p = new Protein("PEPTIDE", "P12345"); fullProteinOneBasedBegin = 1; fullProteinOneBasedEnd = 7; - addNterminalDegestionBiomarkers = false; - addCterminalDigestionBiomarkers = true; - initiatorMethionineBehavior = InitiatorMethionineBehavior.Cleave; + addNterminalDegestionTruncations = false; + addCterminalDigestionTruncations = true; minProductBaseSequenceLength = 2; lengthOfProteolysis = 3; proteolyisisProductName = "biomarker"; - p.AddBiomarkersToProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionBiomarkers, addCterminalDigestionBiomarkers, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); + p.AddTruncationsToExistingProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionTruncations, addCterminalDigestionTruncations, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); products = p.ProteolysisProducts.ToList(); Assert.AreEqual(3, products.Count); productSequences = new List(); @@ -434,50 +426,47 @@ public static void TestProteolysisBothTermini() Protein p = new Protein("MPEPTIDE", "P12345"); int fullProteinOneBasedBegin = 1; int fullProteinOneBasedEnd = 8; - bool addNterminalDegestionBiomarkers = true; - bool addCterminalDigestionBiomarkers = true; - InitiatorMethionineBehavior initiatorMethionineBehavior = InitiatorMethionineBehavior.Retain; + bool addNterminalDegestionTruncations = true; + bool addCterminalDigestionTruncations = true; int minProductBaseSequenceLength = 2; int lengthOfProteolysis = 3; string proteolyisisProductName = "biomarker"; - p.AddBiomarkersToProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionBiomarkers, addCterminalDigestionBiomarkers, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); + p.AddTruncationsToExistingProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionTruncations, addCterminalDigestionTruncations, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); List products = p.ProteolysisProducts.ToList(); - Assert.AreEqual(6, products.Count); + Assert.AreEqual(10, products.Count); List productSequences = new List(); foreach (ProteolysisProduct product in products) { productSequences.Add(p.BaseSequence.Substring((int)product.OneBasedBeginPosition - 1, (int)product.OneBasedEndPosition - (int)product.OneBasedBeginPosition + 1)); } - List expectedProductSequences = new List { "MPEPTID", "MPEPTI", "MPEPT", "PEPTIDE", "EPTIDE", "PTIDE" }; + List expectedProductSequences = new List { "MPEPTID", "MPEPTI", "MPEPT", "PEPTID", "PEPTI", "PEPT", "PEPTIDE", "EPTIDE", "PTIDE", "TIDE" }; CollectionAssert.AreEquivalent(expectedProductSequences, productSequences); p = new Protein("MPEPTIDE", "P12345"); fullProteinOneBasedBegin = 1; fullProteinOneBasedEnd = 8; - addNterminalDegestionBiomarkers = true; - addCterminalDigestionBiomarkers = true; - initiatorMethionineBehavior = InitiatorMethionineBehavior.Cleave; + addNterminalDegestionTruncations = true; + addCterminalDigestionTruncations = true; minProductBaseSequenceLength = 2; lengthOfProteolysis = 3; proteolyisisProductName = "biomarker"; - p.AddBiomarkersToProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionBiomarkers, addCterminalDigestionBiomarkers, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); products = p.ProteolysisProducts.ToList(); - Assert.AreEqual(6, products.Count); + p.AddTruncationsToExistingProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionTruncations, addCterminalDigestionTruncations, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); products = p.ProteolysisProducts.ToList(); + Assert.AreEqual(10, products.Count); productSequences = new List(); foreach (ProteolysisProduct product in products) { productSequences.Add(p.BaseSequence.Substring((int)product.OneBasedBeginPosition - 1, (int)product.OneBasedEndPosition - (int)product.OneBasedBeginPosition + 1)); } - expectedProductSequences = new List { "PEPTID", "PEPTI", "PEPT", "EPTIDE", "PTIDE", "TIDE" }; + expectedProductSequences = new List { "MPEPTID", "MPEPTI", "MPEPT", "PEPTIDE", "PEPTID", "PEPTI", "PEPT", "EPTIDE", "PTIDE", "TIDE" }; CollectionAssert.AreEquivalent(expectedProductSequences, productSequences); p = new Protein("PEPTIDE", "P12345"); fullProteinOneBasedBegin = 1; fullProteinOneBasedEnd = 7; - addNterminalDegestionBiomarkers = true; - addCterminalDigestionBiomarkers = true; - initiatorMethionineBehavior = InitiatorMethionineBehavior.Retain; + addNterminalDegestionTruncations = true; + addCterminalDigestionTruncations = true; minProductBaseSequenceLength = 2; lengthOfProteolysis = 3; proteolyisisProductName = "biomarker"; - p.AddBiomarkersToProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionBiomarkers, addCterminalDigestionBiomarkers, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); + p.AddTruncationsToExistingProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionTruncations, addCterminalDigestionTruncations, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); products = p.ProteolysisProducts.ToList(); Assert.AreEqual(6, products.Count); productSequences = new List(); @@ -490,13 +479,12 @@ public static void TestProteolysisBothTermini() p = new Protein("PEPTIDE", "P12345"); fullProteinOneBasedBegin = 1; fullProteinOneBasedEnd = 7; - addNterminalDegestionBiomarkers = true; - addCterminalDigestionBiomarkers = true; - initiatorMethionineBehavior = InitiatorMethionineBehavior.Cleave; + addNterminalDegestionTruncations = true; + addCterminalDigestionTruncations = true; minProductBaseSequenceLength = 2; lengthOfProteolysis = 3; proteolyisisProductName = "biomarker"; - p.AddBiomarkersToProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionBiomarkers, addCterminalDigestionBiomarkers, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); + p.AddTruncationsToExistingProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionTruncations, addCterminalDigestionTruncations, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); products = p.ProteolysisProducts.ToList(); Assert.AreEqual(6, products.Count); productSequences = new List(); @@ -509,20 +497,19 @@ public static void TestProteolysisBothTermini() p = new Protein("MPEPTIDE", "P12345"); fullProteinOneBasedBegin = 1; fullProteinOneBasedEnd = 8; - addNterminalDegestionBiomarkers = true; - addCterminalDigestionBiomarkers = true; - initiatorMethionineBehavior = InitiatorMethionineBehavior.Cleave; + addNterminalDegestionTruncations = true; + addCterminalDigestionTruncations = true; minProductBaseSequenceLength = 6; lengthOfProteolysis = 3; proteolyisisProductName = "biomarker"; - p.AddBiomarkersToProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionBiomarkers, addCterminalDigestionBiomarkers, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); products = p.ProteolysisProducts.ToList(); - Assert.AreEqual(2, products.Count); + p.AddTruncationsToExistingProteolysisProducts(fullProteinOneBasedBegin, fullProteinOneBasedEnd, addNterminalDegestionTruncations, addCterminalDigestionTruncations, minProductBaseSequenceLength, lengthOfProteolysis, proteolyisisProductName); products = p.ProteolysisProducts.ToList(); + Assert.AreEqual(5, products.Count); productSequences = new List(); foreach (ProteolysisProduct product in products) { productSequences.Add(p.BaseSequence.Substring((int)product.OneBasedBeginPosition - 1, (int)product.OneBasedEndPosition - (int)product.OneBasedBeginPosition + 1)); } - expectedProductSequences = new List { "PEPTID", "EPTIDE" }; + expectedProductSequences = new List {"PEPTIDE", "EPTIDE", "PEPTID", "MPEPTID", "MPEPTI" }; CollectionAssert.AreEquivalent(expectedProductSequences, productSequences); } diff --git a/mzLib/Test/TestMsDataFile.cs b/mzLib/Test/TestMsDataFile.cs index f1efb02fa..8a90471ed 100644 --- a/mzLib/Test/TestMsDataFile.cs +++ b/mzLib/Test/TestMsDataFile.cs @@ -144,6 +144,37 @@ public void DataFileTest() Assert.AreEqual(0, ok3); } + [Test] + public void TestFunctionsOfMsDataScan() + { + MsDataScan theSpectrum = new MsDataScan(_mzSpectrumA, 1, 1, true, Polarity.Positive, 1, new MzRange(300, 1000), "fake scan filter", MZAnalyzerType.Unknown, _mzSpectrumA.SumOfAllY, 1, null, "scan=1"); + List isolatedMassesAndCharges = theSpectrum.GetIsolatedMassesAndCharges(_mzSpectrumA, 1, 10, 10, 1).ToList(); + Assert.AreEqual(0, isolatedMassesAndCharges.Count); //Isolation range is null, so we get an empty set + + Assert.Throws(() => theSpectrum.RefineSelectedMzAndIntensity(_mzSpectrumA)); //no isolation Mz throws error + + theSpectrum.SetOneBasedPrecursorScanNumber(6); + Assert.AreEqual(6, theSpectrum.OneBasedPrecursorScanNumber); + + theSpectrum.SetNativeID("bubba"); + Assert.AreEqual("bubba", theSpectrum.NativeId); + + theSpectrum.SetIsolationMz(42); + Assert.AreEqual(42, theSpectrum.IsolationMz); + } + + [Test] + public void MoreMsDataFilesTests() + { + MsDataFile fakeDataFile = new MsDataFile(new MsDataScan[1], new SourceFile(@"scan number only nativeID format", "mzML format", null, "SHA-1", @"C:\fake.mzML", null)); + Assert.AreEqual(1, fakeDataFile.NumSpectra); + Assert.AreEqual("scan number only nativeID format", fakeDataFile.SourceFile.NativeIdFormat); + Assert.AreEqual("mzML format", fakeDataFile.SourceFile.MassSpectrometerFileFormat); + Assert.IsNull(fakeDataFile.SourceFile.CheckSum); + Assert.AreEqual("SHA-1", fakeDataFile.SourceFile.FileChecksumType); + Assert.IsNull(fakeDataFile.SourceFile.Id); + } + [Test] public void TestAMoreRealFile() { diff --git a/mzLib/Test/TestPeptideWithSetMods.cs b/mzLib/Test/TestPeptideWithSetMods.cs index c3607ff6a..afe6e0350 100644 --- a/mzLib/Test/TestPeptideWithSetMods.cs +++ b/mzLib/Test/TestPeptideWithSetMods.cs @@ -931,30 +931,30 @@ public static void CountTargetsWithMatchingDecoys() } [Test] - public static void TestPeptideWithSetModsReturnsBiomarkersInTopDown() + public static void TestPeptideWithSetModsReturnsTruncationsInTopDown() { string xmlDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", "humanInsulin.xml"); Protein insulin = ProteinDbLoader.LoadProteinXML(xmlDatabase, true, - DecoyType.None, null, false, null, out var unknownModifications, addBiomarkers: true)[0]; + DecoyType.None, null, false, null, out var unknownModifications, addTruncations: true)[0]; Protease protease = new Protease("top-down", CleavageSpecificity.None, "", "", new List(), null); - List insulinBiomarkers = insulin.Digest(new DigestionParams(protease: protease.Name), new List(), new List(), topDownBiomarkerSearch: true).ToList(); - Assert.AreEqual(56, insulinBiomarkers.Count); + List insulinTruncations = insulin.Digest(new DigestionParams(protease: protease.Name), new List(), new List(), topDownTruncationSearch: true).ToList(); + Assert.AreEqual(68, insulinTruncations.Count); } [Test] - public static void TestPeptideWithSetModsReturnsDecoyBiomarkersInTopDown() + public static void TestPeptideWithSetModsReturnsDecoyTruncationsInTopDown() { string xmlDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", "humanInsulin.xml"); List insulinProteins = ProteinDbLoader.LoadProteinXML(xmlDatabase, true, - DecoyType.Reverse, null, false, null, out var unknownModifications, addBiomarkers: true); + DecoyType.Reverse, null, false, null, out var unknownModifications, addTruncations: true); Protease protease = new Protease("top-down", CleavageSpecificity.None, "", "", new List(), null); - List insulintTargetBiomarkers = insulinProteins.Where(p=>!p.IsDecoy).First().Digest(new DigestionParams(protease: protease.Name), new List(), new List(), topDownBiomarkerSearch: true).ToList(); - Assert.AreEqual(56, insulintTargetBiomarkers.Count); - List insulintDecoyBiomarkers = insulinProteins.Where(p => p.IsDecoy).First().Digest(new DigestionParams(protease: protease.Name), new List(), new List(), topDownBiomarkerSearch: true).ToList(); - Assert.AreEqual(56, insulintDecoyBiomarkers.Count); + List insulintTargetTruncations = insulinProteins.Where(p=>!p.IsDecoy).First().Digest(new DigestionParams(protease: protease.Name), new List(), new List(), topDownTruncationSearch: true).ToList(); + Assert.AreEqual(68, insulintTargetTruncations.Count); + List insulintDecoyTruncations = insulinProteins.Where(p => p.IsDecoy).First().Digest(new DigestionParams(protease: protease.Name), new List(), new List(), topDownTruncationSearch: true).ToList(); + Assert.AreEqual(68, insulintDecoyTruncations.Count); } [Test] diff --git a/mzLib/Test/TestProteinDatabase.cs b/mzLib/Test/TestProteinDatabase.cs index f017893f2..ee2012065 100644 --- a/mzLib/Test/TestProteinDatabase.cs +++ b/mzLib/Test/TestProteinDatabase.cs @@ -12,62 +12,62 @@ namespace Test public sealed class TestProteinDatabase { [Test] - public static void MakeAnewProteinWithAndWithoutBiomarkers() + public static void MakeAnewProteinWithAndWithoutTruncations() { - Protein noBiomarkerProtein1 = new("MPEPTIDEPEPTIDEPEPTIDE", "ACCESSION", addBiomarkers: false); - Assert.AreEqual(0, noBiomarkerProtein1.ProteolysisProducts.Count()); + Protein noTruncationProtein1 = new("MPEPTIDEPEPTIDEPEPTIDE", "ACCESSION", addTruncations: false); + Assert.AreEqual(0, noTruncationProtein1.ProteolysisProducts.Count()); - noBiomarkerProtein1.AddIntactProteoformToProteolysisProducts(Proteomics.ProteolyticDigestion.InitiatorMethionineBehavior.Cleave, 7); - Assert.AreEqual(1, noBiomarkerProtein1.ProteolysisProducts.Count()); + noTruncationProtein1.AddIntactProteoformToTruncationsProducts(7); + Assert.AreEqual(1, noTruncationProtein1.ProteolysisProducts.Count()); - Protein noBiomarkerProtein2 = new("MPEPTIDEPEPTIDEPEPTIDE", "ACCESSION", addBiomarkers: false); - Assert.AreEqual(0, noBiomarkerProtein2.ProteolysisProducts.Count()); + Protein noTruncationProtein2 = new("MPEPTIDEPEPTIDEPEPTIDE", "ACCESSION", addTruncations: false); + Assert.AreEqual(0, noTruncationProtein2.ProteolysisProducts.Count()); - noBiomarkerProtein2.AddIntactProteoformToProteolysisProducts(Proteomics.ProteolyticDigestion.InitiatorMethionineBehavior.Retain, 7); - Assert.AreEqual(1, noBiomarkerProtein2.ProteolysisProducts.Count()); + noTruncationProtein2.AddIntactProteoformToTruncationsProducts(7); + Assert.AreEqual(1, noTruncationProtein2.ProteolysisProducts.Count()); - Protein noBiomarkerProtein3 = new("MPEPTIDEPEPTIDEPEPTIDE", "ACCESSION", addBiomarkers: false); - Assert.AreEqual(0, noBiomarkerProtein3.ProteolysisProducts.Count()); + Protein noTruncationProtein3 = new("MPEPTIDEPEPTIDEPEPTIDE", "ACCESSION", addTruncations: false); + Assert.AreEqual(0, noTruncationProtein3.ProteolysisProducts.Count()); - noBiomarkerProtein3.AddIntactProteoformToProteolysisProducts(Proteomics.ProteolyticDigestion.InitiatorMethionineBehavior.Variable, 7); - Assert.AreEqual(1, noBiomarkerProtein3.ProteolysisProducts.Count()); + noTruncationProtein3.AddIntactProteoformToTruncationsProducts(7); + Assert.AreEqual(1, noTruncationProtein3.ProteolysisProducts.Count()); - Protein biomarkerProtein1 = new("PEPTIDEPEPTIDEPEPTIDE", "ACCESSION", addBiomarkers: true); + Protein biomarkerProtein1 = new("PEPTIDEPEPTIDEPEPTIDE", "ACCESSION", addTruncations: true); Assert.AreEqual(11, biomarkerProtein1.ProteolysisProducts.Count()); - Protein biomarkerProtein2 = new("PEPTIDEPEPTIDEPEPTIDE", "ACCESSION", addBiomarkers: false); - biomarkerProtein2.AddIntactProteoformToProteolysisProducts(Proteomics.ProteolyticDigestion.InitiatorMethionineBehavior.Cleave, 7); + Protein biomarkerProtein2 = new("PEPTIDEPEPTIDEPEPTIDE", "ACCESSION", addTruncations: false); + biomarkerProtein2.AddIntactProteoformToTruncationsProducts(7); Assert.AreEqual(1, biomarkerProtein2.ProteolysisProducts.Count()); } [Test] - public static void AddBiomarkersToProteolysisProducts() + public static void AddTruncationsToProteolysisProducts() { //with xml, here for this protein, there are existing proteolysis products string xmlDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", "humanInsulin.xml"); Protein insulinProteinFromXml1 = ProteinDbLoader.LoadProteinXML(xmlDatabase, true, - DecoyType.None, null, false, null, out var unknownModifications1, addBiomarkers: false)[0]; + DecoyType.None, null, false, null, out var unknownModifications1, addTruncations: false)[0]; Assert.AreEqual(4, insulinProteinFromXml1.ProteolysisProducts.Count()); - insulinProteinFromXml1.AddBiomarkersToProteolysisProducts(1, insulinProteinFromXml1.BaseSequence.Length, true, true, Proteomics.ProteolyticDigestion.InitiatorMethionineBehavior.Retain, 7, 5, "biomarker"); - Assert.AreEqual(14, insulinProteinFromXml1.ProteolysisProducts.Count()); + insulinProteinFromXml1.AddTruncationsToExistingProteolysisProducts(1, insulinProteinFromXml1.BaseSequence.Length, true, true, 7, 5, "biomarker"); + Assert.AreEqual(20, insulinProteinFromXml1.ProteolysisProducts.Count()); Protein insulinProteinFromXml2 = ProteinDbLoader.LoadProteinXML(xmlDatabase, true, - DecoyType.None, null, false, null, out var unknownModifications2, addBiomarkers: false)[0]; + DecoyType.None, null, false, null, out var unknownModifications2, addTruncations: false)[0]; Assert.AreEqual(4, insulinProteinFromXml2.ProteolysisProducts.Count()); - insulinProteinFromXml2.AddBiomarkersToProteolysisProducts(1, insulinProteinFromXml1.BaseSequence.Length, true, true, Proteomics.ProteolyticDigestion.InitiatorMethionineBehavior.Cleave, 7, 5, "biomarker"); - Assert.AreEqual(14, insulinProteinFromXml2.ProteolysisProducts.Count()); + insulinProteinFromXml2.AddTruncationsToExistingProteolysisProducts(1, insulinProteinFromXml1.BaseSequence.Length, true, true, 7, 5, "biomarker"); + Assert.AreEqual(20, insulinProteinFromXml2.ProteolysisProducts.Count()); Protein insulinProteinFromXml3 = ProteinDbLoader.LoadProteinXML(xmlDatabase, true, - DecoyType.None, null, false, null, out var unknownModifications3, addBiomarkers: false)[0]; + DecoyType.None, null, false, null, out var unknownModifications3, addTruncations: false)[0]; Assert.AreEqual(4, insulinProteinFromXml3.ProteolysisProducts.Count()); - insulinProteinFromXml3.AddBiomarkersToProteolysisProducts(1, insulinProteinFromXml1.BaseSequence.Length, true, true, Proteomics.ProteolyticDigestion.InitiatorMethionineBehavior.Variable, 7, 5, "biomarker"); - Assert.AreEqual(15, insulinProteinFromXml3.ProteolysisProducts.Count()); + insulinProteinFromXml3.AddTruncationsToExistingProteolysisProducts(1, insulinProteinFromXml1.BaseSequence.Length, true, true, 7, 5, "biomarker"); + Assert.AreEqual(20, insulinProteinFromXml3.ProteolysisProducts.Count()); } [Test] @@ -78,45 +78,40 @@ public static void TestRemoveMethionineWhenAppropriate() Protein insulinProteinFromXml1 = ProteinDbLoader.LoadProteinXML(xmlDatabase, true, - DecoyType.None, null, false, null, out var unknownModifications1, addBiomarkers: false)[0]; + DecoyType.None, null, false, null, out var unknownModifications1, addTruncations: false)[0]; Assert.AreEqual(4, insulinProteinFromXml1.ProteolysisProducts.Count()); - insulinProteinFromXml1.RemoveMethionineWhenAppropriateFromExistingProduts(Proteomics.ProteolyticDigestion.InitiatorMethionineBehavior.Retain); - Assert.AreEqual(1, insulinProteinFromXml1.ProteolysisProducts.First().OneBasedBeginPosition.Value); Protein insulinProteinFromXml2 = ProteinDbLoader.LoadProteinXML(xmlDatabase, true, - DecoyType.None, null, false, null, out var unknownModifications2, addBiomarkers: false)[0]; + DecoyType.None, null, false, null, out var unknownModifications2, addTruncations: false)[0]; Assert.AreEqual(4, insulinProteinFromXml2.ProteolysisProducts.Count()); - insulinProteinFromXml2.RemoveMethionineWhenAppropriateFromExistingProduts(Proteomics.ProteolyticDigestion.InitiatorMethionineBehavior.Cleave); - Assert.AreEqual(2, insulinProteinFromXml2.ProteolysisProducts.ToList()[3].OneBasedBeginPosition.Value); Protein insulinProteinFromXml3 = ProteinDbLoader.LoadProteinXML(xmlDatabase, true, - DecoyType.None, null, false, null, out var unknownModifications3, addBiomarkers: false)[0]; + DecoyType.None, null, false, null, out var unknownModifications3, addTruncations: false)[0]; Assert.AreEqual(4, insulinProteinFromXml3.ProteolysisProducts.Count()); - insulinProteinFromXml3.RemoveMethionineWhenAppropriateFromExistingProduts(Proteomics.ProteolyticDigestion.InitiatorMethionineBehavior.Variable); - Assert.AreEqual(1, insulinProteinFromXml3.ProteolysisProducts.ToList()[0].OneBasedBeginPosition.Value); } [Test] - public static void TestAddBiomarkersIntactAndExistingProteolysisProducts() + public static void TestAddTruncationsIntactAndExistingProteolysisProducts() { //Note: existing proteoloysis products are now subjected to additional proteolysis. //with fasta (there are no existing proteolysis products. so we rely on the code to deal with that non-factor) string fastaDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", "humanInsulin.fasta"); Protein insulinProteinFromFasta = ProteinDbLoader.LoadProteinFasta(fastaDatabase, true, DecoyType.None, false, out var dbErrors, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, - ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, addBiomarkers: true)[0]; + ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, addTruncations: true)[0]; - Assert.AreEqual(11, insulinProteinFromFasta.ProteolysisProducts.Count()); + Assert.AreEqual(17, insulinProteinFromFasta.ProteolysisProducts.Count()); Assert.AreEqual(1, insulinProteinFromFasta.ProteolysisProducts.Where(p => p.Type == "full-length proteoform").Count()); - Assert.AreEqual(10, insulinProteinFromFasta.ProteolysisProducts.Where(p => p.Type.Contains("biomarker")).Count()); + Assert.AreEqual(16, insulinProteinFromFasta.ProteolysisProducts.Where(p => p.Type.Contains("biomarker")).Count()); + + List expectedBegins = new() { 1, 2, 3, 4, 5, 6, 7, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1 }; + List expectedEnds = new() { 110, 110, 110, 110, 110, 110, 110, 109, 108, 107, 106, 105, 109, 108, 107, 106, 105 }; - List expectedBegins = new List { 1, 2, 3, 4, 5, 6, 1, 1, 1, 1, 1 }; - List expectedEnds = new List { 110, 110, 110, 110, 110, 110, 109, 108, 107, 106, 105 }; CollectionAssert.AreEquivalent(expectedBegins, insulinProteinFromFasta.ProteolysisProducts.Select(p => p.OneBasedBeginPosition).ToList()); CollectionAssert.AreEquivalent(expectedEnds, insulinProteinFromFasta.ProteolysisProducts.Select(p => p.OneBasedEndPosition).ToList()); @@ -124,14 +119,14 @@ public static void TestAddBiomarkersIntactAndExistingProteolysisProducts() string xmlDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", "humanInsulin.xml"); Protein insulinProteinFromXml = ProteinDbLoader.LoadProteinXML(xmlDatabase, true, - DecoyType.None, null, false, null, out var unknownModifications, addBiomarkers: true)[0]; + DecoyType.None, null, false, null, out var unknownModifications, addTruncations: true)[0]; - Assert.AreEqual(56, insulinProteinFromXml.ProteolysisProducts.Count()); + Assert.AreEqual(68, insulinProteinFromXml.ProteolysisProducts.Count()); Assert.AreEqual(1, insulinProteinFromXml.ProteolysisProducts.Where(p => p.Type == "full-length proteoform").Count()); - Assert.AreEqual(50, insulinProteinFromXml.ProteolysisProducts.Where(p => p.Type.Contains("biomarker")).Count()); //4 are original proteolysis products + Assert.AreEqual(62, insulinProteinFromXml.ProteolysisProducts.Where(p => p.Type.Contains("biomarker")).Count()); //4 are original proteolysis products - expectedBegins = new List { 1, 25, 57, 90, 1, 2, 3, 4, 5, 6, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 1, 1, 1, 1, 1, 26, 27, 28, 29, 30, 25, 25, 25, 25, 25, 58, 59, 60, 61, 62, 57, 57, 57, 57, 57, 91, 92, 93, 94, 95, 90, 90, 90, 90, 90, 25 }; - expectedEnds = new List { 24, 54, 87, 110, 110, 110, 110, 110, 110, 110, 109, 108, 107, 106, 105, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 54, 54, 54, 54, 54, 53, 52, 51, 50, 49, 87, 87, 87, 87, 87, 86, 85, 84, 83, 82, 110, 110, 110, 110, 110, 109, 108, 107, 106, 105, 110 }; + expectedBegins = new List { 1, 25, 57, 90, 1, 2, 3, 4, 5, 6, 7, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 7, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 26, 27, 28, 29, 30, 25, 25, 25, 25, 25, 58, 59, 60, 61, 62, 57, 57, 57, 57, 57, 91, 92, 93, 94, 95, 90, 90, 90, 90, 90, 25 }; + expectedEnds = new List { 24, 54, 87, 110, 110, 110, 110, 110, 110, 110, 110, 109, 108, 107, 106, 105, 109, 108, 107, 106, 105, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 23, 22, 21, 20, 19, 54, 54, 54, 54, 54, 53, 52, 51, 50, 49, 87, 87, 87, 87, 87, 86, 85, 84, 83, 82, 110, 110, 110, 110, 110, 109, 108, 107, 106, 105, 110 }; List reportedBegins = insulinProteinFromXml.ProteolysisProducts.Select(p => p.OneBasedBeginPosition.Value).ToList(); List reportedEnds = insulinProteinFromXml.ProteolysisProducts.Select(p => p.OneBasedEndPosition.Value).ToList(); @@ -147,11 +142,11 @@ public static void TestMethionineCleave() //with fasta (there are no existing proteolysis products. so we rely on the code to deal with that non-factor) string fastaDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", "humanInsulin.fasta"); Protein insulinProteinFromFasta = ProteinDbLoader.LoadProteinFasta(fastaDatabase, true, DecoyType.None, false, out var dbErrors, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, - ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, addBiomarkers: false)[0]; + ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, addTruncations: false)[0]; Assert.AreEqual(0, insulinProteinFromFasta.ProteolysisProducts.Count()); - insulinProteinFromFasta.AddBiomarkers(initiatorMethionineBehavior: Proteomics.ProteolyticDigestion.InitiatorMethionineBehavior.Cleave); - Assert.AreEqual(11, insulinProteinFromFasta.ProteolysisProducts.Count()); + insulinProteinFromFasta.AddTruncations(); + Assert.AreEqual(17, insulinProteinFromFasta.ProteolysisProducts.Count()); } [Test] @@ -162,12 +157,12 @@ public static void TestMethionineCleaveNoMethionine() //with fasta (there are no existing proteolysis products. so we rely on the code to deal with that non-factor) string fastaDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", "humanInsulin.fasta"); Protein insulinProteinFromFasta = ProteinDbLoader.LoadProteinFasta(fastaDatabase, true, DecoyType.None, false, out var dbErrors, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, - ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, addBiomarkers: false)[0]; + ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, addTruncations: false)[0]; - Protein noMethionine = new Protein(insulinProteinFromFasta.BaseSequence.Substring(1,insulinProteinFromFasta.BaseSequence.Length-1), insulinProteinFromFasta.Accession); + Protein noMethionine = new(insulinProteinFromFasta.BaseSequence.Substring(1,insulinProteinFromFasta.BaseSequence.Length-1), insulinProteinFromFasta.Accession); Assert.AreEqual(0, noMethionine.ProteolysisProducts.Count()); - noMethionine.AddBiomarkers(initiatorMethionineBehavior: Proteomics.ProteolyticDigestion.InitiatorMethionineBehavior.Cleave); + noMethionine.AddTruncations(); Assert.AreEqual(11, noMethionine.ProteolysisProducts.Count()); } @@ -179,23 +174,23 @@ public static void TestMethionineVariable() //with fasta (there are no existing proteolysis products. so we rely on the code to deal with that non-factor) string fastaDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", "humanInsulin.fasta"); Protein insulinProteinFromFasta = ProteinDbLoader.LoadProteinFasta(fastaDatabase, true, DecoyType.None, false, out var dbErrors, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, - ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, addBiomarkers: false)[0]; + ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, addTruncations: false)[0]; Assert.AreEqual(0, insulinProteinFromFasta.ProteolysisProducts.Count()); - insulinProteinFromFasta.AddBiomarkers(initiatorMethionineBehavior: Proteomics.ProteolyticDigestion.InitiatorMethionineBehavior.Variable); - Assert.AreEqual(12, insulinProteinFromFasta.ProteolysisProducts.Count()); + insulinProteinFromFasta.AddTruncations(); + Assert.AreEqual(17, insulinProteinFromFasta.ProteolysisProducts.Count()); } [Test] - public static void TestDoNotWriteBiomarkersToXml() + public static void TestDoNotWriteTruncationsToXml() { //with xml, here for this protein, there are existing proteolysis products string xmlDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "TestProtein.xml"); List proteins = ProteinDbLoader.LoadProteinXML(xmlDatabase, true, - DecoyType.Reverse, null, false, null, out var unknownModifications, addBiomarkers: true); + DecoyType.Reverse, null, false, null, out var unknownModifications, addTruncations: true); - Assert.AreEqual(10, proteins[0].ProteolysisProducts.Where(p => p.Type.Contains("biomarker")).Count()); + Assert.AreEqual(16, proteins[0].ProteolysisProducts.Where(p => p.Type.Contains("biomarker")).Count()); string testOutXml = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "testOutXml.xml"); ProteinDbWriter.WriteXmlDatabase(new Dictionary>>(), proteins.Where(p => !p.IsDecoy).ToList(), testOutXml); @@ -204,7 +199,7 @@ List proteins List moreProteins = ProteinDbLoader.LoadProteinXML(testOutXml, true, - DecoyType.Reverse, null, false, null, out var moreUnknownModifications, addBiomarkers: false); + DecoyType.Reverse, null, false, null, out var moreUnknownModifications, addTruncations: false); Assert.AreEqual(0, moreProteins[0].ProteolysisProducts.Where(p => p.Type.Contains("biomarker")).Count()); File.Delete(testOutXml); diff --git a/mzLib/UsefulProteomicsDatabases/ProteinDbLoader.cs b/mzLib/UsefulProteomicsDatabases/ProteinDbLoader.cs index 31f41cd6c..2f4db2ce0 100644 --- a/mzLib/UsefulProteomicsDatabases/ProteinDbLoader.cs +++ b/mzLib/UsefulProteomicsDatabases/ProteinDbLoader.cs @@ -55,7 +55,7 @@ public static class ProteinDbLoader [SuppressMessage("Microsoft.Usage", "CA2202:Do not dispose objects multiple times")] public static List LoadProteinXML(string proteinDbLocation, bool generateTargets, DecoyType decoyType, IEnumerable allKnownModifications, bool isContaminant, IEnumerable modTypesToExclude, out Dictionary unknownModifications, int maxThreads = -1, - int maxHeterozygousVariants = 4, int minAlleleDepth = 1, bool addBiomarkers = false) + int maxHeterozygousVariants = 4, int minAlleleDepth = 1, bool addTruncations = false) { List prespecified = GetPtmListFromProteinXml(proteinDbLocation); allKnownModifications = allKnownModifications ?? new List(); @@ -93,9 +93,9 @@ public static List LoadProteinXML(string proteinDbLocation, bool genera Protein newProtein = block.ParseEndElement(xml, modTypesToExclude, unknownModifications, isContaminant, proteinDbLocation); if (newProtein != null) { - if (addBiomarkers) + if (addTruncations) { - newProtein.AddBiomarkers(); + newProtein.AddTruncations(); } targets.Add(newProtein); } @@ -164,7 +164,7 @@ public static List GetPtmListFromProteinXml(string proteinDbLocati /// public static List LoadProteinFasta(string proteinDbLocation, bool generateTargets, DecoyType decoyType, bool isContaminant, out List errors, FastaHeaderFieldRegex accessionRegex = null, FastaHeaderFieldRegex fullNameRegex = null, FastaHeaderFieldRegex nameRegex = null, - FastaHeaderFieldRegex geneNameRegex = null, FastaHeaderFieldRegex organismRegex = null, int maxThreads = -1, bool addBiomarkers = false) + FastaHeaderFieldRegex geneNameRegex = null, FastaHeaderFieldRegex organismRegex = null, int maxThreads = -1, bool addTruncations = false) { FastaHeaderType? HeaderType = null; HashSet unique_accessions = new HashSet(); @@ -273,7 +273,7 @@ public static List LoadProteinFasta(string proteinDbLocation, bool gene } unique_accessions.Add(accession); Protein protein = new Protein(sequence, accession, organism, geneName, name: name, fullName: fullName, - isContaminant: isContaminant, databaseFilePath: proteinDbLocation, addBiomarkers: addBiomarkers); + isContaminant: isContaminant, databaseFilePath: proteinDbLocation, addTruncations: addTruncations); if (protein.Length == 0) { errors.Add("Line" + line + ", Protein Length of 0: " + protein.Name + " was skipped from database: " + proteinDbLocation);