From 0315587a4cbcaad1994616b683fbef5e119da809 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 4 Jun 2024 15:52:27 -0500 Subject: [PATCH] Edited FlashLFQ to include option for verbose peaks --- mzLib/FlashLFQ/ChromatographicPeak.cs | 59 +++++++++++++++++++++++ mzLib/FlashLFQ/FlashLFQResults.cs | 13 +++-- mzLib/FlashLFQ/FlashLfqEngine.cs | 13 ++++- mzLib/FlashLFQ/VerboseIsotopicEnvelope.cs | 58 ++++++++++++++++++++++ mzLib/TestFlashLFQ/TestFlashLFQ.cs | 3 +- 5 files changed, 139 insertions(+), 7 deletions(-) create mode 100644 mzLib/FlashLFQ/VerboseIsotopicEnvelope.cs diff --git a/mzLib/FlashLFQ/ChromatographicPeak.cs b/mzLib/FlashLFQ/ChromatographicPeak.cs index 5d1e238d6..2cf9035bb 100644 --- a/mzLib/FlashLFQ/ChromatographicPeak.cs +++ b/mzLib/FlashLFQ/ChromatographicPeak.cs @@ -1,4 +1,5 @@ using Chemistry; +using Easy.Common.Extensions; using MathNet.Numerics.Statistics; using System; using System.Collections.Generic; @@ -81,6 +82,8 @@ public static string TabSeparatedHeader } } + public static string VerboseTabSeparatedHeader => TabSeparatedHeader + "\tIsotope Peak Intensity\tIsotope Peak m/z\tIsotope Peak RTs"; + /// /// Sets retention time information for a given peak. Used for MBR peaks /// @@ -254,5 +257,61 @@ public override string ToString() return sb.ToString(); } + + public string ToString(bool verbose) + { + string peakString = ToString(); + if (verbose) + { + return peakString + "\t" + GetIsotopeInformation(); + } + else + { + return peakString; + } + } + + internal string GetIsotopeInformation() + { + List verboseEnvelopes = IsotopicEnvelopes.Select(e => e as VerboseIsotopicEnvelope) + .Where(e => e != null).OrderBy(e => e.IndexedPeak.RetentionTime).ToList(); + if (!verboseEnvelopes.IsNotNullOrEmpty()) + return "\t\t\t"; + List> chargeStateEnvelopeGroups = verboseEnvelopes + .OrderBy(e => e.ChargeState) + .GroupBy(e => e.RetentionTime) + .OrderBy(group => group.Key) + .ToList(); + Dictionary<(int, int), IndexedMassSpectralPeak[]> source = new Dictionary<(int, int), IndexedMassSpectralPeak[]>(); + for (int index = 0; index < chargeStateEnvelopeGroups.Count; ++index) + { + foreach (VerboseIsotopicEnvelope isotopicEnvelope in (IEnumerable)chargeStateEnvelopeGroups[index]) + { + foreach (KeyValuePair peak in isotopicEnvelope.PeakDictionary) + { + (int, int) key = (peak.Key, isotopicEnvelope.ChargeState); + if (source.ContainsKey(key)) + { + source[key][index] = peak.Value; + } + else + { + source.Add(key, new IndexedMassSpectralPeak[chargeStateEnvelopeGroups.Count]); + source[key][index] = peak.Value; + } + } + } + } + List> list3 = source.OrderBy, int>((Func, int>)(kvp => kvp.Key.Item1)).ThenBy, int>((Func, int>)(kvp => kvp.Key.Item2)).ToList>(); + StringBuilder intensityString = new StringBuilder(); + StringBuilder mzString = new StringBuilder(); + foreach (KeyValuePair<(int, int), IndexedMassSpectralPeak[]> keyValuePair in list3) + { + intensityString.Append("[" + VerboseIsotopicEnvelope.GetIsotopePeakName(keyValuePair.Key) + ": " + string.Join(", ", ((IEnumerable)keyValuePair.Value).Select((Func)(imsPeak => imsPeak != null ? imsPeak.Intensity.ToString() : "-"))) + "];"); + mzString.Append("[" + VerboseIsotopicEnvelope.GetIsotopePeakName(keyValuePair.Key) + ": " + string.Join(", ", ((IEnumerable)keyValuePair.Value).Select((Func)(imsPeak => imsPeak != null ? imsPeak.Mz.ToString() : "-"))) + "];"); + } + string retentionTimeString = "[" + string.Join(", ", chargeStateEnvelopeGroups.Select, double>((Func, double>)(group => group.First().RetentionTime))) + "]"; + return "\"" + intensityString.ToString().Trim() + "\"\t\"" + mzString.ToString().Trim() + "\"\t" + retentionTimeString + "\t"; + } } } \ No newline at end of file diff --git a/mzLib/FlashLFQ/FlashLFQResults.cs b/mzLib/FlashLFQ/FlashLFQResults.cs index 6261f1f29..288eba957 100644 --- a/mzLib/FlashLFQ/FlashLFQResults.cs +++ b/mzLib/FlashLFQ/FlashLFQResults.cs @@ -15,14 +15,16 @@ public class FlashLfqResults public readonly Dictionary ProteinGroups; public readonly Dictionary> Peaks; private readonly HashSet _peptideModifiedSequencesToQuantify; + public readonly bool WriteVerbosePeaks; - public FlashLfqResults(List spectraFiles, List identifications, HashSet peptides = null) + public FlashLfqResults(List spectraFiles, List identifications, HashSet peptides = null, bool writeVerbosePeaks = false) { SpectraFiles = spectraFiles; PeptideModifiedSequences = new Dictionary(); ProteinGroups = new Dictionary(); Peaks = new Dictionary>(); - if(peptides == null || !peptides.Any()) + WriteVerbosePeaks = writeVerbosePeaks; + if (peptides == null || !peptides.Any()) { peptides = identifications.Select(id => id.ModifiedSequence).ToHashSet(); } @@ -560,13 +562,16 @@ public void WriteResults(string peaksOutputPath, string modPeptideOutputPath, st { using (StreamWriter output = new StreamWriter(peaksOutputPath)) { - output.WriteLine(ChromatographicPeak.TabSeparatedHeader); + if(WriteVerbosePeaks) + output.WriteLine(ChromatographicPeak.VerboseTabSeparatedHeader); + else + output.WriteLine(ChromatographicPeak.TabSeparatedHeader); foreach (var peak in Peaks.SelectMany(p => p.Value) .OrderBy(p => p.SpectraFileInfo.FilenameWithoutExtension) .ThenByDescending(p => p.Intensity)) { - output.WriteLine(peak.ToString()); + output.WriteLine(peak.ToString(WriteVerbosePeaks)); } } } diff --git a/mzLib/FlashLFQ/FlashLfqEngine.cs b/mzLib/FlashLFQ/FlashLfqEngine.cs index 2e877d1f7..23a55896a 100644 --- a/mzLib/FlashLFQ/FlashLfqEngine.cs +++ b/mzLib/FlashLFQ/FlashLfqEngine.cs @@ -12,6 +12,7 @@ using UsefulProteomicsDatabases; using System.Runtime.CompilerServices; using Easy.Common.Extensions; +using System.Net; [assembly: InternalsVisibleTo("TestFlashLFQ")] @@ -33,6 +34,7 @@ public class FlashLfqEngine public readonly bool Normalize; public readonly double DiscriminationFactorToCutPeak; public readonly bool QuantifyAmbiguousPeptides; + public readonly bool WriteVerbosePeaks; // MBR settings public readonly bool MatchBetweenRuns; @@ -92,6 +94,7 @@ public FlashLfqEngine( bool quantifyAmbiguousPeptides = false, bool silent = false, int maxThreads = -1, + bool writeVerbosePeaks = false, // MBR settings bool matchBetweenRuns = false, @@ -132,6 +135,7 @@ public FlashLfqEngine( QuantifyAmbiguousPeptides = quantifyAmbiguousPeptides; Silent = silent; IdSpecificChargeState = idSpecificChargeState; + WriteVerbosePeaks = writeVerbosePeaks; MbrRtWindow = maxMbrWindow; RequireMsmsIdInCondition = requireMsmsIdInCondition; Normalize = normalize; @@ -166,7 +170,7 @@ public FlashLfqResults Run() { _globalStopwatch.Start(); _ms1Scans = new Dictionary(); - _results = new FlashLfqResults(_spectraFileInfo, _allIdentifications, PeptidesModifiedSequencesToQuantify); + _results = new FlashLfqResults(_spectraFileInfo, _allIdentifications, PeptidesModifiedSequencesToQuantify, WriteVerbosePeaks); // build m/z index keys CalculateTheoreticalIsotopeDistributions(); @@ -1183,6 +1187,7 @@ public List GetIsotopicEnvelopes( // isotope masses are calculated relative to the observed peak double observedMass = peak.Mz.ToMass(chargeState); double observedMassError = observedMass - identification.PeakfindingMass; + List allPeaks = new(); foreach (var shift in massShiftToIsotopePeaks) { @@ -1219,6 +1224,7 @@ public List GetIsotopicEnvelopes( if (shift.Key == 0) { experimentalIsotopeIntensities[i] = isotopePeak.Intensity; + allPeaks.Add(isotopePeak); } } } @@ -1243,7 +1249,10 @@ public List GetIsotopicEnvelopes( } } - isotopicEnvelopes.Add(new IsotopicEnvelope(peak, chargeState, experimentalIsotopeIntensities.Sum())); + IsotopicEnvelope isotopicEnvelope = WriteVerbosePeaks + ? new VerboseIsotopicEnvelope(peak, allPeaks, chargeState, identification.MonoisotopicMass) + : new IsotopicEnvelope(peak, chargeState, experimentalIsotopeIntensities.Sum()); + isotopicEnvelopes.Add(isotopicEnvelope); } } diff --git a/mzLib/FlashLFQ/VerboseIsotopicEnvelope.cs b/mzLib/FlashLFQ/VerboseIsotopicEnvelope.cs new file mode 100644 index 000000000..bf8f69a4c --- /dev/null +++ b/mzLib/FlashLFQ/VerboseIsotopicEnvelope.cs @@ -0,0 +1,58 @@ +using Chemistry; +using MathNet.Numerics; +using MzLibUtil; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace FlashLFQ +{ + public class VerboseIsotopicEnvelope : IsotopicEnvelope + { + public Dictionary PeakDictionary { get; } + + public VerboseIsotopicEnvelope( + IndexedMassSpectralPeak mostAbundantPeak, + List allPeaks, + int chargeState, + double monoisotopicMass, + int isotopePpmTolerance = 5, + double intensity = -1.0) + : base(mostAbundantPeak, chargeState, intensity > -1.0 ? intensity : allPeaks.Sum(p => p.Intensity)) + { + this.PeakDictionary = WritePeakDictionary(allPeaks.OrderBy(peak => peak.Mz).ToList(), monoisotopicMass.ToMz(chargeState), isotopePpmTolerance); + this.RetentionTime = mostAbundantPeak.RetentionTime.Round(4); + } + + public double RetentionTime { get; } + + public override string ToString() => "+" + this.ChargeState.ToString() + "|" + this.Intensity.ToString("F0") + "|" + this.IndexedPeak.RetentionTime.ToString("F3") + "|" + this.IndexedPeak.ZeroBasedMs1ScanIndex.ToString(); + + public static Dictionary WritePeakDictionary( + List peaks, + double monoisotopicMz, + int isotopePpmTolerance) + { + Dictionary dictionary = new Dictionary(); + PpmTolerance ppmTolerance = new PpmTolerance((double)isotopePpmTolerance); + int index = 0; + int num = 0; + while (dictionary.Count < peaks.Count) + { + if (ppmTolerance.Within(peaks[index].Mz, monoisotopicMz + 1.0033548381 * (double)num)) + { + dictionary.Add(num++, peaks[index++]); + } + else + { + if (ppmTolerance.GetMinimumValue(monoisotopicMz + 1.0033548381 * (double)num) > peaks[index].Mz) + return WritePeakDictionary(peaks, monoisotopicMz, isotopePpmTolerance + 5); + ++num; + } + } + return dictionary; + } + + public static string GetIsotopePeakName((int isotopeNumber, int chargeState) key) => "i" + key.isotopeNumber.ToString() + "z" + key.chargeState.ToString(); + } +} \ No newline at end of file diff --git a/mzLib/TestFlashLFQ/TestFlashLFQ.cs b/mzLib/TestFlashLFQ/TestFlashLFQ.cs index b94e32cf0..49641fbbe 100644 --- a/mzLib/TestFlashLFQ/TestFlashLFQ.cs +++ b/mzLib/TestFlashLFQ/TestFlashLFQ.cs @@ -53,7 +53,7 @@ public static void TestFlashLfq() Identification id4 = new Identification(mzml, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.05811, 2, new List { pg }); // create the FlashLFQ engine - FlashLfqEngine engine = new FlashLfqEngine(new List { id1, id2, id3, id4 }, normalize: true, maxThreads: 1); + FlashLfqEngine engine = new FlashLfqEngine(new List { id1, id2, id3, id4 }, normalize: true, maxThreads: 1, writeVerbosePeaks: true); // run the engine var results = engine.Run(); @@ -95,6 +95,7 @@ public static void TestFlashLfq() Path.Combine(TestContext.CurrentContext.TestDirectory, @"protein.tsv"), null, true); + int placeholder = 0; } [Test]