From 32658b167a7972c2770ff6f79352504141aec0f4 Mon Sep 17 00:00:00 2001 From: Alexander-Sol <41119316+Alexander-Sol@users.noreply.github.com> Date: Mon, 28 Oct 2024 18:08:15 -0500 Subject: [PATCH] Updated FilteredPsms to only iterate through PSMs once (#2430) * Updated FilterePsms to only iterate through PSMs once * Updated filtering --- MetaMorpheus/TaskLayer/FilteredPsms.cs | 88 +++++++++++++++----------- 1 file changed, 51 insertions(+), 37 deletions(-) diff --git a/MetaMorpheus/TaskLayer/FilteredPsms.cs b/MetaMorpheus/TaskLayer/FilteredPsms.cs index 81e80c0fa..9b8379663 100644 --- a/MetaMorpheus/TaskLayer/FilteredPsms.cs +++ b/MetaMorpheus/TaskLayer/FilteredPsms.cs @@ -112,43 +112,14 @@ public static FilteredPsms Filter(IEnumerable psms, } } - if (!includeHighQValuePsms) - { - filteredPsms = filterType.Equals(FilterType.QValue) - ? psms.Where(p => p.GetFdrInfo(filterAtPeptideLevel) != null - && p.GetFdrInfo(filterAtPeptideLevel).QValue <= filterThreshold - && p.GetFdrInfo(filterAtPeptideLevel).QValueNotch <= filterThreshold).ToList() - : psms.Where(p => p.GetFdrInfo(filterAtPeptideLevel) != null && p.GetFdrInfo(filterAtPeptideLevel).PEP_QValue <= filterThreshold).ToList(); - } - else - { - filteredPsms = psms.ToList(); - } - - if (!includeDecoys) - { - filteredPsms.RemoveAll(p => p.IsDecoy); - } - if (!includeContaminants) - { - filteredPsms.RemoveAll(p => p.IsContaminant); - } - if (!includeAmbiguous) - { - filteredPsms.RemoveAll(p => p.BaseSequence.IsNullOrEmpty()); - } - if (!includeAmbiguousMods) - { - filteredPsms.RemoveAll(p => p.FullSequence.IsNullOrEmpty()); - } - if (filterAtPeptideLevel) - { - //Choose the top scoring PSM for each peptide - filteredPsms = filteredPsms - .OrderByDescending(p => p) - .GroupBy(b => b.FullSequence) - .Select(b => b.FirstOrDefault()).ToList(); - } + filteredPsms = psms.Where(psm => + (includeDecoys || !psm.IsDecoy) + && (includeContaminants || !psm.IsContaminant) + && (includeAmbiguous || !psm.BaseSequence.IsNullOrEmpty()) + && (includeAmbiguousMods || !psm.FullSequence.IsNullOrEmpty())) + .FilterByQValue(includeHighQValuePsms, filterThreshold, filterAtPeptideLevel, filterType) + .CollapseToPeptides(filterAtPeptideLevel) + .ToList(); return new FilteredPsms(filteredPsms, filterType, filterThreshold, filteringNotPerformed, filterAtPeptideLevel); } @@ -163,4 +134,47 @@ System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() return FilteredPsmsList.GetEnumerator(); } } + + public static class FilteredPsmsExtensions + { + public static IEnumerable CollapseToPeptides(this IEnumerable psms, bool filterAtPeptideLevel) + { + if(!filterAtPeptideLevel) + { + return psms; + } + else + { + return psms + .OrderByDescending(p => p) + .GroupBy(b => b.FullSequence) + .Select(b => b.FirstOrDefault()); + } + } + + public static IEnumerable FilterByQValue(this IEnumerable psms, bool includeHighQValuePsms, double qValueThreshold, bool filterAtPeptideLevel, FilterType filterType) + { + foreach (var psm in psms) + { + if (includeHighQValuePsms) + { + yield return psm; + } + else if (filterType == FilterType.PepQValue) + { + if (psm.GetFdrInfo(filterAtPeptideLevel).PEP_QValue <= qValueThreshold) + { + yield return psm; + } + } + else + { + if (psm.GetFdrInfo(filterAtPeptideLevel).QValue <= qValueThreshold && psm.GetFdrInfo(filterAtPeptideLevel).QValueNotch <= qValueThreshold) + { + yield return psm; + } + } + } + } + } }