1
- using Easy . Common . Extensions ;
1
+ using Chemistry ;
2
+ using Easy . Common . Extensions ;
2
3
using EngineLayer ;
3
4
using EngineLayer . FdrAnalysis ;
4
5
using EngineLayer . HistogramAnalysis ;
7
8
using FlashLFQ ;
8
9
using MassSpectrometry ;
9
10
using MathNet . Numerics . Distributions ;
11
+ using MzLibUtil ;
12
+ using Omics . Modifications ;
13
+ using Omics . SpectrumMatch ;
10
14
using Proteomics ;
11
15
using Proteomics . ProteolyticDigestion ;
12
16
using System ;
16
20
using System . IO . Compression ;
17
21
using System . Linq ;
18
22
using System . Text ;
19
- using UsefulProteomicsDatabases ;
20
23
using TaskLayer . MbrAnalysis ;
21
- using Chemistry ;
22
- using MzLibUtil ;
23
- using Omics . Modifications ;
24
- using Omics . SpectrumMatch ;
24
+ using TopDownProteomics ;
25
+ using UsefulProteomicsDatabases ;
25
26
26
27
namespace TaskLayer
27
28
{
@@ -34,7 +35,7 @@ public class PostSearchAnalysisTask : MetaMorpheusTask
34
35
/// <summary>
35
36
/// Used for storage of results for writing to Results.tsv. It is explained in the method ConstructResultsDictionary()
36
37
/// </summary>
37
- private Dictionary < ( string , string ) , string > ResultsDictionary { get ; set ; }
38
+ private Dictionary < ( string , string ) , string > ResultsDictionary { get ; set ; }
38
39
/// <summary>
39
40
/// Used for storage of results for writing digestion product counts to a .tsv.
40
41
/// </summary>
@@ -105,14 +106,14 @@ public MyTaskResults Run()
105
106
if ( Parameters . SearchParameters . DoLabelFreeQuantification && Parameters . FlashLfqResults != null )
106
107
{
107
108
SpectralRecoveryResults = SpectralRecoveryRunner . RunSpectralRecoveryAlgorithm ( Parameters , CommonParameters , FileSpecificParameters ) ;
108
- }
109
+ }
109
110
}
110
111
111
- if ( Parameters . SearchParameters . UpdateSpectralLibrary )
112
+ if ( Parameters . SearchParameters . UpdateSpectralLibrary )
112
113
{
113
114
UpdateSpectralLibrary ( ) ;
114
115
}
115
-
116
+
116
117
if ( Parameters . SearchParameters . WriteDigestionProductCountFile )
117
118
{
118
119
WriteDigestionCountByProtein ( ) ;
@@ -526,9 +527,73 @@ private void QuantificationAnalysis()
526
527
Parameters . FlashLfqResults = flashLfqEngine . Run ( ) ;
527
528
}
528
529
529
- // get protein intensity back from FlashLFQ
530
+
530
531
if ( ProteinGroups != null && Parameters . FlashLfqResults != null )
531
532
{
533
+ // get modification stoichiometry using FlashLFQ intensities
534
+ var peptides = flashLfqEngine . PeptideModifiedSequencesToQuantify
535
+ . Where ( pep => Parameters . FlashLfqResults . PeptideModifiedSequences . ContainsKey ( pep ) )
536
+ . Select ( pep => ( Parameters . FlashLfqResults . PeptideModifiedSequences [ pep ] . Sequence ,
537
+ Parameters . FlashLfqResults . PeptideModifiedSequences [ pep ] . BaseSequence ,
538
+ Parameters . FlashLfqResults . PeptideModifiedSequences [ pep ] . ProteinGroups . Select ( pg => pg . ProteinGroupName ) . ToList ( ) ,
539
+ Parameters . FlashLfqResults . PeptideModifiedSequences [ pep ] . GetTotalIntensity ( ) ) ) . ToList ( ) ;
540
+
541
+ PositionFrequencyAnalysis pfa = new PositionFrequencyAnalysis ( ) ;
542
+ pfa . ProteinGroupsOccupancyByPeptide ( peptides , true , true , true ) ; // one-based indexes, ignores terminal mods on all peptides.
543
+
544
+ var proteinGroupsOccupancyByProteins = pfa . Occupancy ;
545
+ var quantifiedProteinGroups = ProteinGroups . Where ( pg => Parameters . FlashLfqResults . ProteinGroups . ContainsKey ( pg . ProteinGroupName ) ) ;
546
+
547
+ foreach ( var proteinGroup in quantifiedProteinGroups )
548
+ {
549
+ var modInfoString = new StringBuilder ( ) ;
550
+
551
+ foreach ( var protein in proteinGroup . Proteins )
552
+ {
553
+ List < string > peptideBaseSequencesSeen = new List < string > ( ) ;
554
+ foreach ( var peptide in proteinGroup . AllPeptides )
555
+ {
556
+ if ( proteinGroupsOccupancyByProteins [ proteinGroup . ProteinGroupName ] . Proteins [ protein . Accession ] . Peptides . ContainsKey ( peptide . BaseSequence )
557
+ && ! peptideBaseSequencesSeen . Contains ( peptide . BaseSequence ) )
558
+ {
559
+ proteinGroupsOccupancyByProteins [ proteinGroup . ProteinGroupName ]
560
+ . Proteins [ protein . Accession ] . Peptides [ peptide . BaseSequence ]
561
+ . PeptideToProteinPositions ( peptide . OneBasedStartResidueInProtein ) ;
562
+
563
+ peptideBaseSequencesSeen . Add ( peptide . BaseSequence ) ;
564
+ }
565
+ }
566
+
567
+ proteinGroupsOccupancyByProteins [ proteinGroup . ProteinGroupName ]
568
+ . Proteins [ protein . Accession ]
569
+ . SetProteinModsFromPeptides ( ) ;
570
+
571
+ // build modInfoString for this protein
572
+ var occupancyPGProtein = proteinGroupsOccupancyByProteins [ proteinGroup . ProteinGroupName ] . Proteins [ protein . Accession ] ;
573
+ modInfoString . Append ( $ "" ) ;
574
+ var aaModStrings = new List < string > ( ) ;
575
+
576
+ foreach ( var modpos in occupancyPGProtein . ModifiedAminoAcidPositionsInProtein . OrderBy ( x => x . Key ) )
577
+ {
578
+ var aaModString = new StringBuilder ( ) ;
579
+ aaModString . Append ( $ "aa#{ modpos . Key . ToString ( ) } ") ;
580
+
581
+ foreach ( var mod in occupancyPGProtein . ModifiedAminoAcidPositionsInProtein [ modpos . Key ] )
582
+ {
583
+ aaModString . Append ( $ "[{ mod . Key } , info:occupancy={ mod . Value . Intensity . ToString ( ) } ]") ;
584
+ }
585
+
586
+ aaModStrings . Add ( aaModString . ToString ( ) ) ;
587
+ }
588
+ if ( aaModStrings . IsNotNullOrEmpty ( ) )
589
+ {
590
+ modInfoString . Append ( $ "protein:{ protein . Accession } {{{string.Join("; ", aaModStrings)}}}" ) ;
591
+ }
592
+ }
593
+ proteinGroup . ModsInfo . Add ( modInfoString . ToString ( ) ) ;
594
+ }
595
+
596
+ // get protein intensity back from FlashLFQ
532
597
foreach ( var proteinGroup in ProteinGroups )
533
598
{
534
599
proteinGroup . FilesForQuantification = spectraFileInfo ;
@@ -548,7 +613,7 @@ private void QuantificationAnalysis()
548
613
}
549
614
}
550
615
551
- //Silac stuff for post-quantification
616
+ //Silac stuff for post-quantification
552
617
if ( Parameters . SearchParameters . SilacLabels != null && Parameters . AllPsms . First ( ) is PeptideSpectralMatch ) //if we're doing silac
553
618
{
554
619
SilacConversions . SilacConversionsPostQuantification ( allSilacLabels , startLabel , endLabel , spectraFileInfo , ProteinGroups , Parameters . ListOfDigestionParams ,
@@ -612,19 +677,19 @@ private void WritePsmResults()
612
677
613
678
// write PSMs
614
679
string writtenFile = Path . Combine ( Parameters . OutputFolder , $ "All{ GlobalVariables . AnalyteType . GetSpectralMatchLabel ( ) } s.{ GlobalVariables . AnalyteType . GetSpectralMatchExtension ( ) } ") ;
615
- WritePsmsToTsv ( psmsForPsmResults . OrderByDescending ( p=> p ) . ToList ( ) , writtenFile , writePeptideLevelResults : false ) ;
680
+ WritePsmsToTsv ( psmsForPsmResults . OrderByDescending ( p => p ) . ToList ( ) , writtenFile , writePeptideLevelResults : false ) ;
616
681
FinishedWritingFile ( writtenFile , new List < string > { Parameters . SearchTaskId } ) ;
617
682
618
683
// write PSMs for percolator
619
684
// percolator native read format is .tab
620
685
writtenFile = Path . Combine ( Parameters . OutputFolder , $ "All{ GlobalVariables . AnalyteType . GetSpectralMatchLabel ( ) } s_FormattedForPercolator.tab") ;
621
- WritePsmsForPercolator ( psmsForPsmResults . OrderByDescending ( p=> p ) . ToList ( ) , writtenFile ) ;
686
+ WritePsmsForPercolator ( psmsForPsmResults . OrderByDescending ( p => p ) . ToList ( ) , writtenFile ) ;
622
687
FinishedWritingFile ( writtenFile , new List < string > { Parameters . SearchTaskId } ) ;
623
688
624
689
// write summary text
625
690
if ( psmsForPsmResults . FilteringNotPerformed )
626
691
{
627
-
692
+
628
693
Parameters . SearchTaskResults . AddPsmPeptideProteinSummaryText (
629
694
$ "PEP could not be calculated due to an insufficient number of { GlobalVariables . AnalyteType . GetSpectralMatchLabel ( ) } s. Results were filtered by q-value." +
630
695
Environment . NewLine ) ;
@@ -672,9 +737,9 @@ private void WriteIndividualPsmResults()
672
737
// generated by analyzing one file by itself. Therefore, the FDR info should change between AllResults and FileSpecific
673
738
string strippedFileName = Path . GetFileNameWithoutExtension ( psmFileGroup . Key ) ;
674
739
var psmsForThisFile = psmFileGroup . ToList ( ) ;
675
- CalculatePsmAndPeptideFdr ( psmsForThisFile , "PSM" , false ) ;
740
+ CalculatePsmAndPeptideFdr ( psmsForThisFile , "PSM" , false ) ;
676
741
var psmsToWrite = FilteredPsms . Filter ( psmsForThisFile ,
677
- CommonParameters ,
742
+ CommonParameters ,
678
743
includeDecoys : Parameters . SearchParameters . WriteDecoys ,
679
744
includeContaminants : Parameters . SearchParameters . WriteContaminants ,
680
745
includeAmbiguous : true ,
@@ -753,7 +818,7 @@ private void UpdateSpectralLibrary()
753
818
// Key is a (FullSequence, Charge) tuple
754
819
keySelector : g => g . Key ,
755
820
// Value is the highest scoring psm in the group
756
- elementSelector : g => g . MaxBy ( p => p . Score ) ) ;
821
+ elementSelector : g => g . MaxBy ( p => p . Score ) ) ;
757
822
758
823
//load the original library
759
824
var originalLibrarySpectra = Parameters . SpectralLibrary . GetAllLibrarySpectra ( ) ;
@@ -832,7 +897,7 @@ private void SpectralLibraryGeneration()
832
897
bestPsm . MatchedFragmentIons ,
833
898
bestPsm . ScanRetentionTime ) ) ;
834
899
}
835
-
900
+
836
901
WriteSpectrumLibrary ( spectraLibrary , Parameters . OutputFolder ) ;
837
902
}
838
903
@@ -847,7 +912,7 @@ private void WriteProteinResults()
847
912
string proteinResultsText = $ "All target { GlobalVariables . AnalyteType . GetBioPolymerLabel ( ) . ToLower ( ) } groups with q-value <= 0.01 (1% FDR): " + ProteinGroups . Count ( b => b . QValue <= 0.01 && ! b . IsDecoy ) ;
848
913
ResultsDictionary [ ( "All" , $ "{ GlobalVariables . AnalyteType . GetBioPolymerLabel ( ) } s") ] = proteinResultsText ;
849
914
}
850
-
915
+
851
916
string fileName = $ "All{ GlobalVariables . AnalyteType . GetBioPolymerLabel ( ) } Groups.tsv";
852
917
if ( Parameters . SearchParameters . DoLabelFreeQuantification )
853
918
{
@@ -1380,10 +1445,10 @@ public static double[] GetMultiplexIonIntensities(MzSpectrum scan, double[] theo
1380
1445
int peakIndex = scan . GetClosestPeakIndex ( theoreticalIonMzs [ 0 ] ) ;
1381
1446
int lastPeakIndex = Math . Min ( scan . GetClosestPeakIndex ( theoreticalIonMzs . Last ( ) ) + 1 , scan . XArray . Length - 1 ) ;
1382
1447
double [ ] ionIntensities = new double [ theoreticalIonMzs . Length ] ;
1383
-
1448
+
1384
1449
for ( int ionIndex = 0 ; ionIndex < ionIntensities . Length ; ionIndex ++ )
1385
1450
{
1386
- while ( peakIndex <= lastPeakIndex &&
1451
+ while ( peakIndex <= lastPeakIndex &&
1387
1452
scan . XArray [ peakIndex ] < tolerance . GetMinimumValue ( theoreticalIonMzs [ ionIndex ] ) )
1388
1453
{
1389
1454
peakIndex ++ ;
@@ -1398,7 +1463,7 @@ public static double[] GetMultiplexIonIntensities(MzSpectrum scan, double[] theo
1398
1463
peakIndex ++ ;
1399
1464
}
1400
1465
}
1401
-
1466
+
1402
1467
return ionIntensities ;
1403
1468
}
1404
1469
@@ -1885,7 +1950,7 @@ private void ConstructResultsDictionary()
1885
1950
1886
1951
if ( Parameters . SearchParameters . DoParsimony )
1887
1952
{
1888
- ResultsDictionary . Add ( ( "All" , $ "{ GlobalVariables . AnalyteType . GetBioPolymerLabel ( ) } s") , "" ) ;
1953
+ ResultsDictionary . Add ( ( "All" , $ "{ GlobalVariables . AnalyteType . GetBioPolymerLabel ( ) } s") , "" ) ;
1889
1954
if ( Parameters . CurrentRawFileList . Count > 1 && Parameters . SearchParameters . WriteIndividualFiles )
1890
1955
{
1891
1956
foreach ( var rawFile in Parameters . CurrentRawFileList )
@@ -1907,8 +1972,8 @@ private string AllResultsTotals()
1907
1972
sb . AppendLine ( ResultsDictionary [ key ] ) ;
1908
1973
}
1909
1974
}
1910
-
1911
- var keys = ResultsDictionary . Keys . Where ( k=> k . Item1 != "All" ) . OrderBy ( k=> k . Item1 ) . ToList ( ) ;
1975
+
1976
+ var keys = ResultsDictionary . Keys . Where ( k => k . Item1 != "All" ) . OrderBy ( k => k . Item1 ) . ToList ( ) ;
1912
1977
if ( keys . Any ( ) )
1913
1978
{
1914
1979
sb . AppendLine ( ) ;
0 commit comments