Skip to content

Commit

Permalink
Merge pull request #403 from Nesvilab/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
Felipe da Veiga Leprevost committed Jan 11, 2023
2 parents c2d9335 + 3efe88c commit 39e3618
Show file tree
Hide file tree
Showing 9 changed files with 81 additions and 66 deletions.
11 changes: 3 additions & 8 deletions Changelog
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
### Added
- Adjusted the peptide-to-protein mapping due to I/L replacement rules
- Adjusted the protein FDR caclulation rules.


### Changed
- Removed the normalization done at the protein level in labelquant
- Improved the protein filter for small scale analyses

### Fixed
- Adjusted the msstats file format, added Purity to all plex sizes
- Fixed the protein coverage calculations
- Fixed repeated isobaric values that would eventually show up in the protein table

### Fixed
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ BINARY = philosopher
VERSION = $(shell date +%Y%m%d)
BUILD = $(shell date +%Y%m%d%H%M)

TAG = v4.7.0
RC = RC-6
TAG = v4.7.1
RC = RC-2

LDFLAGS = -ldflags "-w -s -extldflags -static -X main.version=${TAG} -X main.build=${BUILD}"

Expand Down
60 changes: 36 additions & 24 deletions lib/id/pep.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ type PeptideIdentification struct {
Intensity float64
PrevAA []byte
NextAA []byte
AlternativeProteins map[string]int
AlternativeProteins map[string]string
MSFragerLoc *MSFraggerLoc
PTM *PTM
Modifications mod.ModificationsSlice
Expand Down Expand Up @@ -399,7 +399,7 @@ func ReadPepXMLInput(xmlFile, decoyTag, temp string, models bool) (PepIDListPtrs
func processSpectrumQuery(sq spc.SpectrumQuery, mods mod.Modifications, decoyTag, FileName string) PeptideIdentification {

var psm PeptideIdentification
psm.AlternativeProteins = make(map[string]int)
psm.AlternativeProteins = make(map[string]string)

psm.Index = sq.Index
psm.SpectrumFile = FileName
Expand Down Expand Up @@ -462,7 +462,7 @@ func processSpectrumQuery(sq spc.SpectrumQuery, mods mod.Modifications, decoyTag
}

for _, j := range i.AlternativeProteins {
psm.AlternativeProteins[string(j.Protein)]++
psm.AlternativeProteins[string(j.Protein)] = string(j.PepPrevAA) + "#" + string(j.PepNextAA)
}

for _, j := range i.Score {
Expand Down Expand Up @@ -643,62 +643,74 @@ func (p *PepXML4Serialiazation) PromoteProteinIDs() {

var current string
var alt string
var list = make(map[string]int)
var list = make(map[string]string)
var isUniProt bool

if strings.Contains(p.PeptideIdentification[i].Protein, p.DecoyTag) {

current = p.PeptideIdentification[i].Protein

for j := range p.PeptideIdentification[i].AlternativeProteins {
for k, v := range p.PeptideIdentification[i].AlternativeProteins {

if strings.Contains(j, "sp|") {
if strings.Contains(k, "sp|") {
isUniProt = true
}

if !strings.HasPrefix(j, p.DecoyTag) {
list[j]++
if !strings.HasPrefix(k, p.DecoyTag) {
list[k] = v
}
}

}

if len(list) > 0 {

var prevAA string
var nextAA string

// if a Uniprot database is used we give preference to SwissProt proteins
if isUniProt {
for k := range list {
for k, v := range list {

pna := strings.Split(v, "#")

if strings.HasPrefix(k, "sp|") {
alt = k
prevAA = pna[0]
nextAA = pna[1]
break
} else {
alt = k
prevAA = pna[0]
nextAA = pna[1]
}
}
p.PeptideIdentification[i].Protein = alt

// remove the replaces protein from the alternative proteins list
//p.PeptideIdentification[i].AlternativeProteins[list[alt]] = p.PeptideIdentification[i].AlternativeProteins[len(p.PeptideIdentification[i].AlternativeProteins)-1]
//p.PeptideIdentification[i].AlternativeProteins[len(p.PeptideIdentification[i].AlternativeProteins)-1] = ""
//p.PeptideIdentification[i].AlternativeProteins = p.PeptideIdentification[i].AlternativeProteins[:len(p.PeptideIdentification[i].AlternativeProteins)-1]

// add the replaces current to the list
p.PeptideIdentification[i].AlternativeProteins[current]++
p.PeptideIdentification[i].AlternativeProteins[current] = string(p.PeptideIdentification[i].PrevAA) + "#" + string(p.PeptideIdentification[i].NextAA)

p.PeptideIdentification[i].Protein = alt
p.PeptideIdentification[i].PrevAA = []byte(prevAA)
p.PeptideIdentification[i].NextAA = []byte(nextAA)

} else {
for k := range list {
for k, v := range list {

pna := strings.Split(v, "#")

alt = k
prevAA = pna[0]
nextAA = pna[1]
break
}
p.PeptideIdentification[i].Protein = alt

// remove the replaces protein from the alternative proteins list
//p.PeptideIdentification[i].AlternativeProteins[list[alt]] = p.PeptideIdentification[i].AlternativeProteins[len(p.PeptideIdentification[i].AlternativeProteins)-1]
//p.PeptideIdentification[i].AlternativeProteins[len(p.PeptideIdentification[i].AlternativeProteins)-1] = ""
//p.PeptideIdentification[i].AlternativeProteins = p.PeptideIdentification[i].AlternativeProteins[:len(p.PeptideIdentification[i].AlternativeProteins)-1]

// add the replaces current to the list
p.PeptideIdentification[i].AlternativeProteins[current]++
p.PeptideIdentification[i].AlternativeProteins[current] = string(p.PeptideIdentification[i].PrevAA) + "#" + string(p.PeptideIdentification[i].NextAA)

p.PeptideIdentification[i].Protein = alt
p.PeptideIdentification[i].PrevAA = []byte(prevAA)
p.PeptideIdentification[i].NextAA = []byte(nextAA)

}

}
Expand Down
4 changes: 2 additions & 2 deletions lib/inf/inf.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ func ProteinInference(psm id.PepIDList) (id.PepIDList, map[string]string, map[st

if pt != psm[i].Protein {

psm[i].AlternativeProteins[psm[i].Protein]++
psm[i].AlternativeProteins[psm[i].Protein] = string(psm[i].PrevAA) + "#" + string(psm[i].NextAA)

var toRemove string
for j := range psm[i].AlternativeProteins {
Expand All @@ -207,7 +207,7 @@ func ProteinInference(psm id.PepIDList) (id.PepIDList, map[string]string, map[st
}
}

psm[i].AlternativeProteins[psm[i].Protein]++
psm[i].AlternativeProteins[psm[i].Protein] = string(psm[i].PrevAA) + "#" + string(psm[i].NextAA)
delete(psm[i].AlternativeProteins, toRemove)

psm[i].Protein = pt
Expand Down
11 changes: 6 additions & 5 deletions lib/rep/ion.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,18 @@ func (evi *Evidence) AssembleIonReport(ion id.PepIDList, decoyTag string) {
for idx, i := range ion {
pr := &evi.Ions[idx]

//pr.IonForm() = fmt.Sprintf("%s#%d#%.4f", i.Peptide, i.AssumedCharge, i.CalcNeutralPepMass)

pr.Spectra = make(map[id.SpectrumType]int)
pr.MappedGenes = make(map[string]struct{})
pr.MappedProteins = make(map[string]int)
//pr.Modifications.Index = make(map[string]mod.Modification)

pr.Sequence = i.Peptide
pr.ModifiedSequence = i.ModifiedPeptide
pr.MZ = uti.Round(((i.CalcNeutralPepMass + (float64(i.AssumedCharge) * bio.Proton)) / float64(i.AssumedCharge)), 5, 4)
pr.ChargeState = i.AssumedCharge
pr.PeptideMass = i.CalcNeutralPepMass

pr.PrevAA = string(i.PrevAA)
pr.NextAA = string(i.NextAA)

if v, ok := psmIonMap[pr.IonForm()]; ok {
for _, j := range v {
pr.Spectra[j]++
Expand Down Expand Up @@ -92,6 +92,7 @@ func (evi *Evidence) AssembleIonReport(ion id.PepIDList, decoyTag string) {
}
}
pr.Modifications = prModifications.ToSlice()

// is this bservation a decoy ?
if cla.IsDecoyPSM(i, decoyTag) {
pr.IsDecoy = true
Expand Down Expand Up @@ -146,7 +147,7 @@ func (evi IonEvidenceList) IonReport(workspace, brand, decoyTag string, channels

var headerIndex int
for i := range printSet {
if printSet[i].Labels != nil && len(printSet[i].Labels.Channel1.Name) > 0 {
if printSet[i].Labels != nil && len(printSet[i].Labels.Channel1.CustomName) > 0 {
headerIndex = i
break
}
Expand Down
13 changes: 10 additions & 3 deletions lib/rep/peptide.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@ func (evi *Evidence) AssemblePeptideReport(pep id.PepIDList, decoyTag string) {

var pepSeqMap = make(map[string]bool) //is this a decoy
var pepCSMap = make(map[string][]uint8)
var pepInt = make(map[string]float64)
var pepProt = make(map[string]string)
var spectra = make(map[string][]id.SpectrumType)
var mappedGenes = make(map[string][]string)
var mappedProts = make(map[string][]string)
var pepInt = make(map[string]float64)
var bestProb = make(map[string]float64)
var prevAA = make(map[string]string)
var nextAA = make(map[string]string)
var spectra = make(map[string][]id.SpectrumType)
var pepMods = make(map[string][]mod.Modification)

for _, i := range pep {
Expand All @@ -42,6 +44,8 @@ func (evi *Evidence) AssemblePeptideReport(pep id.PepIDList, decoyTag string) {
pepCSMap[i.Peptide] = append(pepCSMap[i.Peptide], i.AssumedCharge)
spectra[i.Peptide] = append(spectra[i.Peptide], i.SpectrumFileName())
pepProt[i.Peptide] = i.Protein
prevAA[i.Peptide] = i.PrevAA
nextAA[i.Peptide] = i.NextAA

if i.Intensity > pepInt[i.Peptide] {
pepInt[i.Peptide] = i.Intensity
Expand Down Expand Up @@ -83,6 +87,9 @@ func (evi *Evidence) AssemblePeptideReport(pep id.PepIDList, decoyTag string) {

pep.Probability = bestProb[k]

pep.PrevAA = prevAA[k]
pep.NextAA = nextAA[k]

for _, i := range spectra[k] {
pep.Spectra[i] = 0
}
Expand Down Expand Up @@ -166,7 +173,7 @@ func (evi PeptideEvidenceList) PeptideReport(workspace, brand, decoyTag string,

var headerIndex int
for i := range printSet {
if printSet[i].Labels != nil && len(printSet[i].Labels.Channel1.Name) > 0 {
if printSet[i].Labels != nil && len(printSet[i].Labels.Channel1.CustomName) > 0 {
headerIndex = i
break
}
Expand Down
2 changes: 1 addition & 1 deletion lib/rep/protein.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ func (eviProteins ProteinEvidenceList) ProteinReport(workspace, brand, decoyTag

var headerIndex int
for i := range printSet {
if printSet[i].UniqueLabels != nil && len(printSet[i].UniqueLabels.Channel1.Name) > 0 {
if printSet[i].UniqueLabels != nil && len(printSet[i].UniqueLabels.Channel1.CustomName) > 0 {
headerIndex = i
break
}
Expand Down
3 changes: 2 additions & 1 deletion lib/rep/psm.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,9 @@ func (evi PSMEvidenceList) PSMReport(workspace, brand, decoyTag string, channels

var headerIndex int
for i := range printSet {
if printSet[i].Labels != nil && len(printSet[i].Labels.Channel1.Name) > 0 {
if printSet[i].Labels != nil && len(printSet[i].Labels.Channel1.CustomName) > 0 {
headerIndex = i
//fmt.Println(headerIndex, printSet[i].Labels.Channel1.Name, printSet[i].Labels.Channel1.CustomName)
break
}
}
Expand Down
39 changes: 19 additions & 20 deletions lib/rep/updater.go
Original file line number Diff line number Diff line change
Expand Up @@ -427,12 +427,6 @@ func (evi *Evidence) UpdateLayerswithDatabase(decoyTag string) {
}
}

type prevNextAA struct {
prev string
next string
}
var pepPrevNextAA = make(map[string]prevNextAA)

replacerIL := strings.NewReplacer("L", "I")
for i := range evi.PSM {

Expand All @@ -449,19 +443,30 @@ func (evi *Evidence) UpdateLayerswithDatabase(decoyTag string) {
}
}

peptide := string(evi.PSM[i].PrevAA) + replacerIL.Replace(evi.PSM[i].Peptide) + string(evi.PSM[i].NextAA)
var adjustStart = 0
var adjustEnd = 0
peptide := replacerIL.Replace(evi.PSM[i].Peptide)

// map the peptide to the protein
mstart := strings.Index(replacerIL.Replace(rec.Sequence), peptide)
mend := mstart + len(evi.PSM[i].Peptide)
if evi.PSM[i].PrevAA != "-" && len(evi.PSM[i].PrevAA) == 1 {
peptide = replacerIL.Replace(evi.PSM[i].PrevAA) + peptide
adjustStart = +2
}

if mstart != -1 {
evi.PSM[i].ProteinStart = mstart + 2
evi.PSM[i].ProteinEnd = mend + 1
if evi.PSM[i].PrevAA == "-" && len(evi.PSM[i].PrevAA) == 1 {
adjustStart = +1
}

pepPrevNextAA[evi.PSM[i].Peptide] = prevNextAA{evi.PSM[i].PrevAA, evi.PSM[i].NextAA}
if evi.PSM[i].NextAA != "-" && len(evi.PSM[i].NextAA) == 1 {
peptide = peptide + replacerIL.Replace(evi.PSM[i].NextAA)
adjustEnd = -1
}

// map the peptide to the protein
mstart := strings.Index(replacerIL.Replace(rec.Sequence), peptide)
mend := mstart + len(peptide)

evi.PSM[i].ProteinStart = mstart + adjustStart
evi.PSM[i].ProteinEnd = mend + adjustEnd
}

for i := range evi.Ions {
Expand All @@ -483,9 +488,6 @@ func (evi *Evidence) UpdateLayerswithDatabase(decoyTag string) {
evi.Ions[i].MappedGenes[recordMap[k].GeneNames] = struct{}{}
}
}
pnAA := pepPrevNextAA[evi.Ions[i].Sequence]
evi.Ions[i].PrevAA = pnAA.prev
evi.Ions[i].NextAA = pnAA.next
}

for i := range evi.Peptides {
Expand All @@ -506,9 +508,6 @@ func (evi *Evidence) UpdateLayerswithDatabase(decoyTag string) {
evi.Peptides[i].MappedGenes[recordMap[k].GeneNames] = struct{}{}
}
}
pnAA := pepPrevNextAA[evi.Peptides[i].Sequence]
evi.Peptides[i].PrevAA = pnAA.prev
evi.Peptides[i].NextAA = pnAA.next
}
}

Expand Down

0 comments on commit 39e3618

Please sign in to comment.