diff --git a/cmd/inspect.go b/cmd/inspect.go index 88233bcd..d3fcad49 100644 --- a/cmd/inspect.go +++ b/cmd/inspect.go @@ -13,10 +13,10 @@ import ( "philosopher/lib/msg" "philosopher/lib/qua" "philosopher/lib/rep" + "philosopher/lib/sys" "github.com/davecgh/go-spew/spew" "github.com/spf13/cobra" - "github.com/vmihailenco/msgpack" ) var object string @@ -34,90 +34,54 @@ var inspectCmd = &cobra.Command{ var o met.Data target := fmt.Sprintf(".meta%smeta.bin", string(filepath.Separator)) - file, _ := os.Open(target) - - dec := msgpack.NewDecoder(file) - e := dec.Decode(&o) - if e != nil { - msg.DecodeMsgPck(e, "fatal") - } + sys.Restore(&o, target, false) if key == "session" { fmt.Println(o.UUID) } else { spew.Dump(o) } + case "psm": var o rep.PSMEvidenceList target := fmt.Sprintf(".meta%spsm.bin", string(filepath.Separator)) - file, _ := os.Open(target) - - dec := msgpack.NewDecoder(file) - e := dec.Decode(&o) - if e != nil { - msg.DecodeMsgPck(e, "fatal") - } + sys.Restore(&o, target, false) spew.Dump(o) + case "peptide": var o rep.PeptideEvidenceList target := fmt.Sprintf(".meta%spep.bin", string(filepath.Separator)) - file, _ := os.Open(target) - - dec := msgpack.NewDecoder(file) - e := dec.Decode(&o) - if e != nil { - msg.DecodeMsgPck(e, "fatal") - } + sys.Restore(&o, target, false) spew.Dump(o) + case "db": var o dat.Base target := fmt.Sprintf(".meta%sdb.bin", string(filepath.Separator)) - file, _ := os.Open(target) - - dec := msgpack.NewDecoder(file) - e := dec.Decode(&o) - if e != nil { - msg.DecodeMsgPck(e, "fatal") - } + sys.Restore(&o, target, false) spew.Dump(o.Records) + case "lfq": var o qua.LFQ target := fmt.Sprintf(".meta%slfq.bin", string(filepath.Separator)) - file, _ := os.Open(target) - - dec := msgpack.NewDecoder(file) - e := dec.Decode(&o) - if e != nil { - msg.DecodeMsgPck(e, "fatal") - } + sys.Restore(&o, target, false) spew.Dump(o.Intensities) + case "razor": var o fil.RazorMap target := fmt.Sprintf(".meta%srazor.bin", string(filepath.Separator)) - file, _ := os.Open(target) - - dec := msgpack.NewDecoder(file) - e := dec.Decode(&o) - if e != nil { - msg.DecodeMsgPck(e, "fatal") - } + sys.Restore(&o, target, false) spew.Dump(o) + case "protein": var o rep.ProteinEvidenceList target := fmt.Sprintf(".meta%spro.bin", string(filepath.Separator)) - file, _ := os.Open(target) - - dec := msgpack.NewDecoder(file) - e := dec.Decode(&o) - if e != nil { - msg.DecodeMsgPck(e, "fatal") - } + sys.Restore(&o, target, false) if len(key) > 0 { @@ -130,6 +94,7 @@ var inspectCmd = &cobra.Command{ } else { spew.Dump(o) } + default: msg.Custom(errors.New("the option is not available"), "fatal") } diff --git a/cmd/proproph.go b/cmd/proproph.go index 94a86c2a..9da8dde0 100644 --- a/cmd/proproph.go +++ b/cmd/proproph.go @@ -48,6 +48,7 @@ func init() { proprophCmd.Flags().BoolVarP(&m.ProteinProphet.Glyc, "glyc", "", false, "highlight peptide N-glycosylation motif") proprophCmd.Flags().BoolVarP(&m.ProteinProphet.Fpkm, "fpkm", "", false, "model protein FPKM values") proprophCmd.Flags().BoolVarP(&m.ProteinProphet.NonSP, "nonsp", "", false, "do not use NSP model") + proprophCmd.Flags().BoolVarP(&m.ProteinProphet.Subgroups, "subgroups", "", false, "do not use NOGROUPS") proprophCmd.Flags().IntVarP(&m.ProteinProphet.Minindep, "minindep", "", 0, "minimum percentage of independent peptides required for a protein") proprophCmd.Flags().Float64VarP(&m.ProteinProphet.Minprob, "minprob", "", 0.05, "PeptideProphet probability threshold") proprophCmd.Flags().IntVarP(&m.ProteinProphet.Maxppmdiff, "maxppmdiff", "", 2000000, "maximum peptide mass difference in ppm") diff --git a/go.mod b/go.mod index ab5ebfb6..372901f6 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,6 @@ go 1.13 require ( github.com/ajstarks/svgo v0.0.0-20200204031535-0cbcf57ea1d8 // indirect github.com/davecgh/go-spew v1.1.1 - github.com/golang/protobuf v1.3.4 // indirect github.com/gorilla/websocket v1.4.1 // indirect github.com/jpillora/go-ogle-analytics v0.0.0-20161213085824-14b04e0594ef github.com/jung-kurt/gofpdf v1.16.2 // indirect @@ -22,15 +21,12 @@ require ( github.com/sirupsen/logrus v1.4.2 github.com/spf13/cobra v0.0.6 github.com/spf13/pflag v1.0.5 // indirect - github.com/stretchr/testify v1.5.1 // indirect - github.com/vmihailenco/msgpack v4.0.4+incompatible + github.com/vmihailenco/msgpack/v5 v5.3.5 golang.org/x/exp v0.0.0-20200228211341-fcea875c7e85 // indirect golang.org/x/image v0.0.0-20200119044424-58c23975cae1 // indirect - golang.org/x/net v0.0.0-20200301022130-244492dfa37a // indirect golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527 // indirect gonum.org/v1/netlib v0.0.0-20200229103305-d71f404090bf // indirect gonum.org/v1/plot v0.7.0 - google.golang.org/appengine v1.6.5 // indirect gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect gopkg.in/yaml.v2 v2.2.8 ) diff --git a/go.sum b/go.sum index 123042ed..4a01cc55 100644 --- a/go.sum +++ b/go.sum @@ -45,8 +45,6 @@ github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4er github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.4 h1:87PNWwrRvUSnqS4dlcBU/ftvOIBep4sYuBLlh6rX2wk= -github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/gorilla/websocket v1.2.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= @@ -100,6 +98,8 @@ github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/profile v1.6.0 h1:hUDfIISABYI59DyeB3OTay/HxSRwTQ8rB/H83k6r5dM= +github.com/pkg/profile v1.6.0/go.mod h1:qBsxPvzyUincmltOk6iyRVxHYg4adc0OFOv72ZdLa18= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= @@ -139,12 +139,14 @@ github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/y github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4= -github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= -github.com/vmihailenco/msgpack v4.0.4+incompatible h1:dSLoQfGFAo3F6OoNhwUmLwVgaUXK79GlxNBwueZn0xI= -github.com/vmihailenco/msgpack v4.0.4+incompatible/go.mod h1:fy3FlTQTDXWkZ7Bh6AcGMlsjHatGryHQYUTf1ShIgkk= +github.com/vmihailenco/msgpack/v5 v5.3.5 h1:5gO0H1iULLWGhs2H5tbAHIZTV8/cYafcFOr9znI5mJU= +github.com/vmihailenco/msgpack/v5 v5.3.5/go.mod h1:7xyJ9e+0+9SaZT0Wt1RGleJXzli6Q/V5KbhBonMG9jc= +github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= +github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= @@ -179,10 +181,7 @@ golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200301022130-244492dfa37a h1:GuSPYbZzB5/dcLNCwLQLsg3obCJtX9IJhpXkvY7kzk0= -golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -202,11 +201,9 @@ golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527 h1:uYVVQ9WP/Ds2ROhcaGPeIdVq0RIXVLwsHlnvJ+cT1So= golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= @@ -223,8 +220,6 @@ gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZ gonum.org/v1/plot v0.7.0 h1:Otpxyvra6Ie07ft50OX5BrCfS/BWEMvhsCUHwPEJmLI= gonum.org/v1/plot v0.7.0/go.mod h1:2wtU6YrrdQAhAF9+MTd5tOQjrov/zF70b1i99Npjvgo= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.6.5 h1:tycE03LOZYQNhDpS27tcQdAzLCVMaj7QT2SXxebnpCM= -google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -239,6 +234,8 @@ gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= modernc.org/cc v1.0.0/go.mod h1:1Sk4//wdnYJiUIxnW8ddKpaOJCF37yAdqYnkxUpaYxw= modernc.org/golex v1.0.0/go.mod h1:b/QX9oBD/LhixY6NDh+IdGv17hgB+51fET1i2kPSmvk= diff --git a/lib/aba/pep.go b/lib/aba/pep.go index 6fb52b1a..d05e08c9 100644 --- a/lib/aba/pep.go +++ b/lib/aba/pep.go @@ -8,6 +8,7 @@ import ( "io/ioutil" "os" "path/filepath" + "philosopher/lib/mod" "sort" "strconv" "strings" @@ -104,9 +105,6 @@ func peptideLevelAbacus(m met.Data, args []string) { // processPeptideCombinedFile reads and filter the combined peptide report func processPeptideCombinedFile(a met.Abacus) { - var pepID id.PepIDList - var filteredPeptides id.PepIDList - if _, e := os.Stat("combined.pep.xml"); os.IsNotExist(e) { msg.NoParametersFound(errors.New("cannot find the combined.pep.xml file"), "fatal") @@ -116,13 +114,12 @@ func processPeptideCombinedFile(a met.Abacus) { var pep id.PepXML pep.DecoyTag = a.Tag - pepID, _ = id.ReadPepXMLInput("combined.pep.xml", a.Tag, sys.GetTemp(), false) - + pepID, _ := id.ReadPepXMLInput("combined.pep.xml", a.Tag, sys.GetTemp(), false) //uniqPsms := fil.GetUniquePSMs(pepID) uniqPeps := fil.GetUniquePeptides(pepID) //filteredPSMs, _ := fil.PepXMLFDRFilter(uniqPsms, 0.01, "PSM", a.Tag) - filteredPeptides, _ = fil.PepXMLFDRFilter(uniqPeps, 0.01, "Peptide", a.Tag) + filteredPeptides, _ := fil.PepXMLFDRFilter(uniqPeps, 0.01, "Peptide", a.Tag) filteredPeptides.Serialize("pep") } @@ -179,8 +176,8 @@ func SummarizeAttributes(evidences rep.CombinedPeptideEvidenceList, datasets map for _, j := range evi.Peptides { - for _, k := range j.Modifications.Index { - if k.Type == "Assigned" { + for _, k := range j.Modifications.IndexSlice { + if k.Type == mod.Assigned { mass := strconv.FormatFloat(k.MassDiff, 'f', 6, 64) ModsMap[j.Sequence] = append(ModsMap[j.Sequence], mass) } diff --git a/lib/aba/pro.go b/lib/aba/pro.go index b7f2708a..e521c03d 100644 --- a/lib/aba/pro.go +++ b/lib/aba/pro.go @@ -9,7 +9,6 @@ import ( "os" "path/filepath" "sort" - "strconv" "strings" "philosopher/lib/iso" @@ -196,7 +195,7 @@ func processProteinCombinedFile(a met.Abacus, database dat.Base) rep.CombinedPro ce.SupportingSpectra = make(map[string]string) ce.ProteinName = j.ProteinName - ce.Length, _ = strconv.Atoi(j.Length) + ce.Length = j.Length ce.Coverage = j.PercentCoverage ce.GroupNumber = j.GroupNumber ce.SiblingID = j.GroupSiblingID @@ -312,9 +311,9 @@ func getProteinLabelIntensities(combined rep.CombinedProteinEvidenceList, datase for i := range combined { for _, j := range v.Proteins { if combined[i].ProteinID == j.ProteinID && !strings.Contains(j.OriginalHeader, decoyTag) { - combined[i].TotalLabels[k] = j.TotalLabels - combined[i].UniqueLabels[k] = j.UniqueLabels - combined[i].URazorLabels[k] = j.URazorLabels + combined[i].TotalLabels[k] = *j.TotalLabels + combined[i].UniqueLabels[k] = *j.UniqueLabels + combined[i].URazorLabels[k] = *j.URazorLabels break } } @@ -330,9 +329,9 @@ func sumProteinIntensities(combined rep.CombinedProteinEvidenceList, datasets ma for k, v := range datasets { - var ions = make(map[string]float64) + var ions = make(map[id.IonFormType]float64) for _, i := range v.Ions { - ions[i.IonForm] = i.Intensity + ions[i.IonForm()] = i.Intensity } for _, i := range combined { diff --git a/lib/dat/dat.go b/lib/dat/dat.go index 5859df76..1164fb34 100644 --- a/lib/dat/dat.go +++ b/lib/dat/dat.go @@ -21,7 +21,7 @@ import ( "philosopher/lib/sys" "github.com/sirupsen/logrus" - "github.com/vmihailenco/msgpack" + "github.com/vmihailenco/msgpack/v5" ) // Base main structure @@ -234,7 +234,7 @@ func (d *Base) Create(temp, add, enz, tag string, crap, noD, cTag bool) { for k, v := range crapMap { - if cTag { + if cTag && !strings.Contains(k, "_HUMAN") && !strings.Contains(k, "OX=9606") { k = "contam_" + k } @@ -361,17 +361,7 @@ func (d *Base) Serialize() { // Restore reads philosopher results files and restore the data sctructure func (d *Base) Restore() { - - b, e := ioutil.ReadFile(sys.DBBin()) - if e != nil { - msg.MarshalFile(e, "warning") - } - - e = msgpack.Unmarshal(b, &d) - if e != nil { - msg.SerializeFile(e, "warning") - } - + sys.Restore(d, sys.DBBin(), false) } // RestoreWithPath reads philosopher results files and restore the data sctructure @@ -379,15 +369,7 @@ func (d *Base) RestoreWithPath(p string) { path := fmt.Sprintf("%s%s%s", p, string(filepath.Separator), sys.DBBin()) path, _ = filepath.Abs(path) - - file, _ := os.Open(path) - - dec := msgpack.NewDecoder(file) - e := dec.Decode(&d) - if e != nil { - msg.DecodeMsgPck(e, "fatal") - } - + sys.Restore(d, path, false) } // reverseSeq returns its argument string reversed rune-wise left to right. diff --git a/lib/dat/dat_test.go b/lib/dat/dat_test.go index 6c2c4430..cf3fccdf 100644 --- a/lib/dat/dat_test.go +++ b/lib/dat/dat_test.go @@ -73,7 +73,7 @@ func TestBase_ProcessDB(t *testing.T) { } d.ProcessDB(tt.args.file, tt.args.decoyTag) - if len(d.Records) != 20360 { + if len(d.Records) != 20361 { t.Errorf("Number of FASTA entries is incorrect, got %d, want %d", len(d.Records), 20360) } }) diff --git a/lib/ext/cdhit/cdhit.go b/lib/ext/cdhit/cdhit.go index 47fadd88..eec4e46b 100644 --- a/lib/ext/cdhit/cdhit.go +++ b/lib/ext/cdhit/cdhit.go @@ -8,8 +8,6 @@ import ( "philosopher/lib/msg" - ucdhit "philosopher/lib/ext/cdhit/unix" - wcdhit "philosopher/lib/ext/cdhit/win" "philosopher/lib/met" "philosopher/lib/sys" ) @@ -56,25 +54,6 @@ func New() CDhit { return o } -// Deploy generates binaries on workdir -func (c *CDhit) Deploy() { - - if c.OS == sys.Windows() { - - // deploy cd-hit binary - wcdhit.Win64(c.WinBin) - c.DefaultBin = c.WinBin - - } else { - - // deploy cd-hit binary - ucdhit.Unix64(c.UnixBin) - c.DefaultBin = c.UnixBin - - } - -} - // Run runs the cdhit binary with user's information func (c *CDhit) Run(level float64) { diff --git a/lib/ext/cdhit/cdhit_unix.go b/lib/ext/cdhit/cdhit_unix.go new file mode 100644 index 00000000..9f50b337 --- /dev/null +++ b/lib/ext/cdhit/cdhit_unix.go @@ -0,0 +1,15 @@ +//go:build linux +// +build linux + +package cdhit + +import ( + ucdhit "philosopher/lib/ext/cdhit/unix" +) + +// Deploy generates binaries on workdir +func (c *CDhit) Deploy() { + // deploy cd-hit binary + ucdhit.Unix64(c.UnixBin) + c.DefaultBin = c.UnixBin +} diff --git a/lib/ext/cdhit/cdhit_win.go b/lib/ext/cdhit/cdhit_win.go new file mode 100644 index 00000000..27e66d07 --- /dev/null +++ b/lib/ext/cdhit/cdhit_win.go @@ -0,0 +1,15 @@ +//go:build windows +// +build windows + +package cdhit + +import ( + wcdhit "philosopher/lib/ext/cdhit/win" +) + +// Deploy generates binaries on workdir +func (c *CDhit) Deploy() { + // deploy cd-hit binary + wcdhit.Win64(c.WinBin) + c.DefaultBin = c.WinBin +} diff --git a/lib/ext/comet/comet.go b/lib/ext/comet/comet.go index 080656d2..41e3fc04 100644 --- a/lib/ext/comet/comet.go +++ b/lib/ext/comet/comet.go @@ -9,8 +9,6 @@ import ( "path/filepath" "strings" - ucomet "philosopher/lib/ext/comet/unix" - wcomet "philosopher/lib/ext/comet/win" "philosopher/lib/met" "philosopher/lib/msg" "philosopher/lib/sys" @@ -55,7 +53,7 @@ func Run(m met.Data, args []string) met.Data { } // deploy the binaries - cmt.Deploy(m.OS, m.Arch) + cmt.Deploy(m.Arch) if m.Comet.Print { logrus.Info("Printing parameter file") @@ -100,38 +98,6 @@ func Run(m met.Data, args []string) met.Data { return m } -// Deploy generates comet binary on workdir bin directory -func (c *Comet) Deploy(os, arch string) { - - if os == sys.Windows() { - - // deploy comet param file - wcomet.WinParameterFile(c.WinParam) - c.DefaultParam = c.WinParam - - if arch == sys.Arch386() { - wcomet.Win32(c.Win32) - c.DefaultBin = c.Win32 - - } else { - wcomet.Win64(c.Win64) - c.DefaultBin = c.Win64 - } - - } else { - - // deploy comet param file - ucomet.UnixParameterFile(c.UnixParam) - c.DefaultParam = c.UnixParam - - // deploy comet - ucomet.Unix64(c.Unix64) - c.DefaultBin = c.Unix64 - - } - -} - // Execute is the main function to execute Comet func (c *Comet) Execute(cmdArgs []string, param string) { diff --git a/lib/ext/comet/comet_unix.go b/lib/ext/comet/comet_unix.go new file mode 100644 index 00000000..57ab74fd --- /dev/null +++ b/lib/ext/comet/comet_unix.go @@ -0,0 +1,19 @@ +//go:build linux +// +build linux + +package comet + +import ( + ucomet "philosopher/lib/ext/comet/unix" +) + +// Deploy generates comet binary on workdir bin directory +func (c *Comet) Deploy(arch string) { + // deploy comet param file + ucomet.UnixParameterFile(c.UnixParam) + c.DefaultParam = c.UnixParam + + // deploy comet + ucomet.Unix64(c.Unix64) + c.DefaultBin = c.Unix64 +} diff --git a/lib/ext/comet/comet_win.go b/lib/ext/comet/comet_win.go new file mode 100644 index 00000000..a6c93dc1 --- /dev/null +++ b/lib/ext/comet/comet_win.go @@ -0,0 +1,25 @@ +//go:build windows +// +build windows + +package comet + +import ( + wcomet "philosopher/lib/ext/comet/win" + "philosopher/lib/sys" +) + +// Deploy generates comet binary on workdir bin directory +func (c *Comet) Deploy(arch string) { + // deploy comet param file + wcomet.WinParameterFile(c.WinParam) + c.DefaultParam = c.WinParam + + if arch == sys.Arch386() { + wcomet.Win32(c.Win32) + c.DefaultBin = c.Win32 + + } else { + wcomet.Win64(c.Win64) + c.DefaultBin = c.Win64 + } +} diff --git a/lib/ext/interprophet/interprophet.go b/lib/ext/interprophet/interprophet.go index 4c26130f..b0bae24a 100644 --- a/lib/ext/interprophet/interprophet.go +++ b/lib/ext/interprophet/interprophet.go @@ -8,8 +8,6 @@ import ( "path/filepath" "strings" - unix "philosopher/lib/ext/interprophet/unix" - wiPr "philosopher/lib/ext/interprophet/win" "philosopher/lib/met" "philosopher/lib/msg" "philosopher/lib/sys" @@ -48,7 +46,7 @@ func Run(m met.Data, args []string) met.Data { } // deploy the binaries - itp.Deploy(m.OS, m.Distro) + itp.Deploy(m.Distro) // run InterProphet itp.Execute(m.InterProphet, m.Home, m.Temp, args) @@ -58,27 +56,6 @@ func Run(m met.Data, args []string) met.Data { return m } -// Deploy generates comet binary on workdir bin directory -func (i *InterProphet) Deploy(os, distro string) { - - if os == sys.Windows() { - wiPr.WinInterProphetParser(i.WinInterProphetParser) - i.DefaultInterProphetParser = i.WinInterProphetParser - wiPr.LibgccDLL(i.LibgccDLL) - wiPr.Zlib1DLL(i.Zlib1DLL) - } else { - if strings.EqualFold(distro, sys.Debian()) { - unix.UnixInterProphetParser(i.UnixInterProphetParser) - i.DefaultInterProphetParser = i.UnixInterProphetParser - } else if strings.EqualFold(distro, sys.Redhat()) { - unix.UnixInterProphetParser(i.UnixInterProphetParser) - i.DefaultInterProphetParser = i.UnixInterProphetParser - } else { - msg.UnsupportedDistribution(errors.New(""), "fatal") - } - } -} - // Execute IProphet func (i InterProphet) Execute(params met.InterProphet, home, temp string, args []string) []string { diff --git a/lib/ext/interprophet/interprophet_unix.go b/lib/ext/interprophet/interprophet_unix.go new file mode 100644 index 00000000..1f4aba81 --- /dev/null +++ b/lib/ext/interprophet/interprophet_unix.go @@ -0,0 +1,27 @@ +//go:build linux +// +build linux + +package interprophet + +import ( + "errors" + "strings" + + unix "philosopher/lib/ext/interprophet/unix" + "philosopher/lib/msg" + "philosopher/lib/sys" +) + +// Deploy generates comet binary on workdir bin directory +func (i *InterProphet) Deploy(distro string) { + + if strings.EqualFold(distro, sys.Debian()) { + unix.UnixInterProphetParser(i.UnixInterProphetParser) + i.DefaultInterProphetParser = i.UnixInterProphetParser + } else if strings.EqualFold(distro, sys.Redhat()) { + unix.UnixInterProphetParser(i.UnixInterProphetParser) + i.DefaultInterProphetParser = i.UnixInterProphetParser + } else { + msg.UnsupportedDistribution(errors.New(""), "fatal") + } +} diff --git a/lib/ext/interprophet/interprophet_win.go b/lib/ext/interprophet/interprophet_win.go new file mode 100644 index 00000000..d5e0076d --- /dev/null +++ b/lib/ext/interprophet/interprophet_win.go @@ -0,0 +1,17 @@ +//go:build windows +// +build windows + +package interprophet + +import ( + wiPr "philosopher/lib/ext/interprophet/win" +) + +// Deploy generates comet binary on workdir bin directory +func (i *InterProphet) Deploy(distro string) { + + wiPr.WinInterProphetParser(i.WinInterProphetParser) + i.DefaultInterProphetParser = i.WinInterProphetParser + wiPr.LibgccDLL(i.LibgccDLL) + wiPr.Zlib1DLL(i.Zlib1DLL) +} diff --git a/lib/ext/interprophet/unix/bindata.go.zip b/lib/ext/interprophet/unix/bindata.go.zip index 5d3cbd5c..4026462c 100644 Binary files a/lib/ext/interprophet/unix/bindata.go.zip and b/lib/ext/interprophet/unix/bindata.go.zip differ diff --git a/lib/ext/interprophet/win/bindata.go.zip b/lib/ext/interprophet/win/bindata.go.zip index 74441c83..25c9a207 100644 Binary files a/lib/ext/interprophet/win/bindata.go.zip and b/lib/ext/interprophet/win/bindata.go.zip differ diff --git a/lib/ext/peptideprophet/peptideprophet.go b/lib/ext/peptideprophet/peptideprophet.go index d0ecd9cb..f3386dbe 100644 --- a/lib/ext/peptideprophet/peptideprophet.go +++ b/lib/ext/peptideprophet/peptideprophet.go @@ -8,11 +8,8 @@ import ( "path/filepath" "strings" - unix "philosopher/lib/ext/peptideprophet/unix" - wPeP "philosopher/lib/ext/peptideprophet/win" "philosopher/lib/met" "philosopher/lib/msg" - "philosopher/lib/sys" ) // PeptideProphet is the main tool data configuration structure @@ -64,7 +61,7 @@ func Run(m met.Data, args []string) met.Data { } // deploy the binaries - pep.Deploy(m.OS, m.Distro) + pep.Deploy(m.Distro) // run pep.Execute(m.PeptideProphet, m.Home, m.Temp, args) @@ -74,40 +71,6 @@ func Run(m met.Data, args []string) met.Data { return m } -// Deploy PeptideProphet binaries on binary directory -func (p *PeptideProphet) Deploy(os, distro string) { - - if os == sys.Windows() { - wPeP.WinInteractParser(p.WinInteractParser) - p.DefaultInteractParser = p.WinInteractParser - wPeP.WinRefreshParser(p.WinRefreshParser) - p.DefaultRefreshParser = p.WinRefreshParser - wPeP.WinPeptideProphetParser(p.WinPeptideProphetParser) - p.DefaultPeptideProphetParser = p.WinPeptideProphetParser - wPeP.LibgccDLL(p.LibgccDLL) - wPeP.Zlib1DLL(p.Zlib1DLL) - wPeP.Mv(p.Mv) - } else { - if strings.EqualFold(distro, sys.Debian()) { - unix.UnixInteractParser(p.UnixInteractParser) - p.DefaultInteractParser = p.UnixInteractParser - unix.UnixRefreshParser(p.UnixRefreshParser) - p.DefaultRefreshParser = p.UnixRefreshParser - unix.UnixPeptideProphetParser(p.UnixPeptideProphetParser) - p.DefaultPeptideProphetParser = p.UnixPeptideProphetParser - } else if strings.EqualFold(distro, sys.Redhat()) { - unix.UnixInteractParser(p.UnixInteractParser) - p.DefaultInteractParser = p.UnixInteractParser - unix.UnixRefreshParser(p.UnixRefreshParser) - p.DefaultRefreshParser = p.UnixRefreshParser - unix.UnixPeptideProphetParser(p.UnixPeptideProphetParser) - p.DefaultPeptideProphetParser = p.UnixPeptideProphetParser - } else { - msg.UnsupportedDistribution(errors.New(""), "fatal") - } - } -} - // Execute PeptideProphet func (p PeptideProphet) Execute(params met.PeptideProphet, home, temp string, args []string) []string { diff --git a/lib/ext/peptideprophet/peptideprophet_unix.go b/lib/ext/peptideprophet/peptideprophet_unix.go new file mode 100644 index 00000000..84a39394 --- /dev/null +++ b/lib/ext/peptideprophet/peptideprophet_unix.go @@ -0,0 +1,35 @@ +//go:build linux +// +build linux + +package peptideprophet + +import ( + "errors" + "strings" + + unix "philosopher/lib/ext/peptideprophet/unix" + "philosopher/lib/msg" + "philosopher/lib/sys" +) + +// Deploy PeptideProphet binaries on binary directory +func (p *PeptideProphet) Deploy(distro string) { + + if strings.EqualFold(distro, sys.Debian()) { + unix.UnixInteractParser(p.UnixInteractParser) + p.DefaultInteractParser = p.UnixInteractParser + unix.UnixRefreshParser(p.UnixRefreshParser) + p.DefaultRefreshParser = p.UnixRefreshParser + unix.UnixPeptideProphetParser(p.UnixPeptideProphetParser) + p.DefaultPeptideProphetParser = p.UnixPeptideProphetParser + } else if strings.EqualFold(distro, sys.Redhat()) { + unix.UnixInteractParser(p.UnixInteractParser) + p.DefaultInteractParser = p.UnixInteractParser + unix.UnixRefreshParser(p.UnixRefreshParser) + p.DefaultRefreshParser = p.UnixRefreshParser + unix.UnixPeptideProphetParser(p.UnixPeptideProphetParser) + p.DefaultPeptideProphetParser = p.UnixPeptideProphetParser + } else { + msg.UnsupportedDistribution(errors.New(""), "fatal") + } +} diff --git a/lib/ext/peptideprophet/peptideprophet_win.go b/lib/ext/peptideprophet/peptideprophet_win.go new file mode 100644 index 00000000..3b678e75 --- /dev/null +++ b/lib/ext/peptideprophet/peptideprophet_win.go @@ -0,0 +1,21 @@ +//go:build windows +// +build windows + +package peptideprophet + +import ( + wPeP "philosopher/lib/ext/peptideprophet/win" +) + +// Deploy PeptideProphet binaries on binary directory +func (p *PeptideProphet) Deploy(distro string) { + wPeP.WinInteractParser(p.WinInteractParser) + p.DefaultInteractParser = p.WinInteractParser + wPeP.WinRefreshParser(p.WinRefreshParser) + p.DefaultRefreshParser = p.WinRefreshParser + wPeP.WinPeptideProphetParser(p.WinPeptideProphetParser) + p.DefaultPeptideProphetParser = p.WinPeptideProphetParser + wPeP.LibgccDLL(p.LibgccDLL) + wPeP.Zlib1DLL(p.Zlib1DLL) + wPeP.Mv(p.Mv) +} diff --git a/lib/ext/peptideprophet/unix/bindata.go.zip b/lib/ext/peptideprophet/unix/bindata.go.zip index 5113f805..ed870540 100644 Binary files a/lib/ext/peptideprophet/unix/bindata.go.zip and b/lib/ext/peptideprophet/unix/bindata.go.zip differ diff --git a/lib/ext/peptideprophet/win/bindata.go.zip b/lib/ext/peptideprophet/win/bindata.go.zip index 9e386f4c..4e1b3f29 100644 Binary files a/lib/ext/peptideprophet/win/bindata.go.zip and b/lib/ext/peptideprophet/win/bindata.go.zip differ diff --git a/lib/ext/proteinprophet/deploy_unix.go b/lib/ext/proteinprophet/deploy_unix.go new file mode 100644 index 00000000..96947169 --- /dev/null +++ b/lib/ext/proteinprophet/deploy_unix.go @@ -0,0 +1,34 @@ +//go:build linux +// +build linux + +package proteinprophet + +import ( + "errors" + unix "philosopher/lib/ext/proteinprophet/unix" + "philosopher/lib/msg" + "philosopher/lib/sys" + "strings" +) + +// Deploy generates comet binary on workdir bin directory +func (p *ProteinProphet) Deploy(distro string) { + + if strings.EqualFold(distro, sys.Debian()) { + unix.UnixBatchCoverage(p.UnixBatchCoverage) + p.DefaultBatchCoverage = p.UnixBatchCoverage + unix.UnixDatabaseParser(p.UnixDatabaseParser) + p.DefaultDatabaseParser = p.UnixDatabaseParser + unix.UnixProteinProphet(p.UnixProteinProphet) + p.DefaultProteinProphet = p.UnixProteinProphet + } else if strings.EqualFold(distro, sys.Redhat()) { + unix.UnixBatchCoverage(p.UnixBatchCoverage) + p.DefaultBatchCoverage = p.UnixBatchCoverage + unix.UnixDatabaseParser(p.UnixDatabaseParser) + p.DefaultDatabaseParser = p.UnixDatabaseParser + unix.UnixProteinProphet(p.UnixProteinProphet) + p.DefaultProteinProphet = p.UnixProteinProphet + } else { + msg.UnsupportedDistribution(errors.New(""), "fatal") + } +} diff --git a/lib/ext/proteinprophet/deploy_win.go b/lib/ext/proteinprophet/deploy_win.go new file mode 100644 index 00000000..fe740cdf --- /dev/null +++ b/lib/ext/proteinprophet/deploy_win.go @@ -0,0 +1,21 @@ +//go:build windows +// +build windows + +package proteinprophet + +import ( + wPoP "philosopher/lib/ext/proteinprophet/win" +) + +// Deploy generates comet binary on workdir bin directory +func (p *ProteinProphet) Deploy(distro string) { + + wPoP.WinBatchCoverage(p.WinBatchCoverage) + p.DefaultBatchCoverage = p.WinBatchCoverage + wPoP.WinDatabaseParser(p.WinDatabaseParser) + p.DefaultDatabaseParser = p.WinDatabaseParser + wPoP.WinProteinProphet(p.WinProteinProphet) + p.DefaultProteinProphet = p.WinProteinProphet + wPoP.LibgccDLL(p.LibgccDLL) + wPoP.Zlib1DLL(p.Zlib1DLL) +} diff --git a/lib/ext/proteinprophet/proteinprophet.go b/lib/ext/proteinprophet/proteinprophet.go index 5e6fa2b9..b95bb3d0 100644 --- a/lib/ext/proteinprophet/proteinprophet.go +++ b/lib/ext/proteinprophet/proteinprophet.go @@ -8,8 +8,6 @@ import ( "path/filepath" "strings" - unix "philosopher/lib/ext/proteinprophet/unix" - wPoP "philosopher/lib/ext/proteinprophet/win" "philosopher/lib/met" "philosopher/lib/msg" "philosopher/lib/sys" @@ -58,7 +56,7 @@ func Run(m met.Data, args []string) met.Data { } // deploy the binaries - pop.Deploy(m.OS, m.Distro) + pop.Deploy(m.Distro) // run ProteinProphet pop.Execute(m.ProteinProphet, m.Home, m.Temp, args) @@ -68,39 +66,6 @@ func Run(m met.Data, args []string) met.Data { return m } -// Deploy generates comet binary on workdir bin directory -func (p *ProteinProphet) Deploy(os, distro string) { - - if os == sys.Windows() { - wPoP.WinBatchCoverage(p.WinBatchCoverage) - p.DefaultBatchCoverage = p.WinBatchCoverage - wPoP.WinDatabaseParser(p.WinDatabaseParser) - p.DefaultDatabaseParser = p.WinDatabaseParser - wPoP.WinProteinProphet(p.WinProteinProphet) - p.DefaultProteinProphet = p.WinProteinProphet - wPoP.LibgccDLL(p.LibgccDLL) - wPoP.Zlib1DLL(p.Zlib1DLL) - } else { - if strings.EqualFold(distro, sys.Debian()) { - unix.UnixBatchCoverage(p.UnixBatchCoverage) - p.DefaultBatchCoverage = p.UnixBatchCoverage - unix.UnixDatabaseParser(p.UnixDatabaseParser) - p.DefaultDatabaseParser = p.UnixDatabaseParser - unix.UnixProteinProphet(p.UnixProteinProphet) - p.DefaultProteinProphet = p.UnixProteinProphet - } else if strings.EqualFold(distro, sys.Redhat()) { - unix.UnixBatchCoverage(p.UnixBatchCoverage) - p.DefaultBatchCoverage = p.UnixBatchCoverage - unix.UnixDatabaseParser(p.UnixDatabaseParser) - p.DefaultDatabaseParser = p.UnixDatabaseParser - unix.UnixProteinProphet(p.UnixProteinProphet) - p.DefaultProteinProphet = p.UnixProteinProphet - } else { - msg.UnsupportedDistribution(errors.New(""), "fatal") - } - } -} - // Execute ProteinProphet executes peptideprophet func (p ProteinProphet) Execute(params met.ProteinProphet, home, temp string, args []string) []string { @@ -194,6 +159,10 @@ func (p ProteinProphet) appendParams(params met.ProteinProphet, cmd *exec.Cmd) * cmd.Args = append(cmd.Args, "NONSP") } + if !params.Subgroups { + cmd.Args = append(cmd.Args, "NOGROUPS") + } + if params.Accuracy { cmd.Args = append(cmd.Args, "ACCURACY") } diff --git a/lib/ext/ptmprophet/ptmprophet.go b/lib/ext/ptmprophet/ptmprophet.go index b5297adc..736582a1 100644 --- a/lib/ext/ptmprophet/ptmprophet.go +++ b/lib/ext/ptmprophet/ptmprophet.go @@ -8,11 +8,8 @@ import ( "path/filepath" "strings" - unix "philosopher/lib/ext/ptmprophet/unix" - wPeP "philosopher/lib/ext/ptmprophet/win" "philosopher/lib/met" "philosopher/lib/msg" - "philosopher/lib/sys" ) // PTMProphet is the main tool data configuration structure @@ -41,7 +38,7 @@ func Run(m met.Data, args []string) met.Data { var ptm = New(m.Temp) // deploy the binaries - ptm.Deploy(m.OS, m.Distro) + ptm.Deploy(m.Distro) // run ptm.Execute(m.PTMProphet, args) @@ -51,25 +48,6 @@ func Run(m met.Data, args []string) met.Data { return m } -// Deploy PTMProphet binaries on binary directory -func (p *PTMProphet) Deploy(os, distro string) { - - if os == sys.Windows() { - wPeP.WinPTMProphetParser(p.WinPTMProphetParser) - p.DefaultPTMProphetParser = p.WinPTMProphetParser - } else { - if strings.EqualFold(distro, sys.Debian()) { - unix.UnixPTMProphetParser(p.UnixPTMProphetParser) - p.DefaultPTMProphetParser = p.UnixPTMProphetParser - } else if strings.EqualFold(distro, sys.Redhat()) { - unix.UnixPTMProphetParser(p.UnixPTMProphetParser) - p.DefaultPTMProphetParser = p.UnixPTMProphetParser - } else { - msg.UnsupportedDistribution(errors.New(""), "fatal") - } - } -} - // Execute PTMProphet func (p *PTMProphet) Execute(params met.PTMProphet, args []string) []string { diff --git a/lib/ext/ptmprophet/ptmprophet_unix.go b/lib/ext/ptmprophet/ptmprophet_unix.go new file mode 100644 index 00000000..dd494581 --- /dev/null +++ b/lib/ext/ptmprophet/ptmprophet_unix.go @@ -0,0 +1,26 @@ +//go:build linux +// +build linux + +package ptmprophet + +import ( + "errors" + "strings" + + unix "philosopher/lib/ext/ptmprophet/unix" + "philosopher/lib/msg" + "philosopher/lib/sys" +) + +// Deploy PTMProphet binaries on binary directory +func (p *PTMProphet) Deploy(distro string) { + if strings.EqualFold(distro, sys.Debian()) { + unix.UnixPTMProphetParser(p.UnixPTMProphetParser) + p.DefaultPTMProphetParser = p.UnixPTMProphetParser + } else if strings.EqualFold(distro, sys.Redhat()) { + unix.UnixPTMProphetParser(p.UnixPTMProphetParser) + p.DefaultPTMProphetParser = p.UnixPTMProphetParser + } else { + msg.UnsupportedDistribution(errors.New(""), "fatal") + } +} diff --git a/lib/ext/ptmprophet/ptmprophet_win.go b/lib/ext/ptmprophet/ptmprophet_win.go new file mode 100644 index 00000000..449838b2 --- /dev/null +++ b/lib/ext/ptmprophet/ptmprophet_win.go @@ -0,0 +1,14 @@ +//go:build windows +// +build windows + +package ptmprophet + +import ( + wPeP "philosopher/lib/ext/ptmprophet/win" +) + +// Deploy PTMProphet binaries on binary directory +func (p *PTMProphet) Deploy(distro string) { + wPeP.WinPTMProphetParser(p.WinPTMProphetParser) + p.DefaultPTMProphetParser = p.WinPTMProphetParser +} diff --git a/lib/ext/rawfilereader/rawfilereader.go b/lib/ext/rawfilereader/rawfilereader.go index 0b93d280..e2ef3038 100644 --- a/lib/ext/rawfilereader/rawfilereader.go +++ b/lib/ext/rawfilereader/rawfilereader.go @@ -7,10 +7,6 @@ import ( "philosopher/lib/msg" "philosopher/lib/sys" "runtime" - - uDeb "philosopher/lib/ext/rawfilereader/deb64" - uRH "philosopher/lib/ext/rawfilereader/reh64" - wRaw "philosopher/lib/ext/rawfilereader/win" ) // RawFileReader represents the tool configuration @@ -69,33 +65,6 @@ func Run(rawFileName, scanQuery string) string { return stream } -// Deploy generates binaries on workdir -func (c *RawFileReader) Deploy() { - - if c.OS == sys.Windows() { - - // deploy windows binary - wRaw.Win(c.WinBin) - wRaw.ThermoFisherCommonCoreDataDLL(c.ThermoFisherCommonCoreDataDLL) - wRaw.ThermoFisherCommonCoreRawFileReaderDLL(c.ThermoFisherCommonCoreRawFileReaderDLL) - c.DefaultBin = c.WinBin - - } else if c.OS == "linux" && c.Distro == sys.Debian() { - - // deploy debian binary - uDeb.Deb64(c.Deb64Bin) - c.DefaultBin = c.Deb64Bin - - } else { - - // deploy red hat binary - uRH.Reh64(c.ReH64Bin) - c.DefaultBin = c.ReH64Bin - - } - -} - // Execute is the main function to execute RawFileReader func (c *RawFileReader) Execute(rawFileName, scanQuery string) string { diff --git a/lib/ext/rawfilereader/rawfilereader_unix.go b/lib/ext/rawfilereader/rawfilereader_unix.go new file mode 100644 index 00000000..bdd362b9 --- /dev/null +++ b/lib/ext/rawfilereader/rawfilereader_unix.go @@ -0,0 +1,27 @@ +//go:build linux +// +build linux + +package rawfilereader + +import ( + uDeb "philosopher/lib/ext/rawfilereader/deb64" + uRH "philosopher/lib/ext/rawfilereader/reh64" + "philosopher/lib/sys" +) + +// Deploy generates binaries on workdir +func (c *RawFileReader) Deploy() { + if c.OS == "linux" && c.Distro == sys.Debian() { + + // deploy debian binary + uDeb.Deb64(c.Deb64Bin) + c.DefaultBin = c.Deb64Bin + + } else { + + // deploy red hat binary + uRH.Reh64(c.ReH64Bin) + c.DefaultBin = c.ReH64Bin + + } +} diff --git a/lib/ext/rawfilereader/rawfilereader_win.go b/lib/ext/rawfilereader/rawfilereader_win.go new file mode 100644 index 00000000..e023138d --- /dev/null +++ b/lib/ext/rawfilereader/rawfilereader_win.go @@ -0,0 +1,17 @@ +//go:build windows +// +build windows + +package rawfilereader + +import ( + wRaw "philosopher/lib/ext/rawfilereader/win" +) + +// Deploy generates binaries on workdir +func (c *RawFileReader) Deploy() { + // deploy windows binary + wRaw.Win(c.WinBin) + wRaw.ThermoFisherCommonCoreDataDLL(c.ThermoFisherCommonCoreDataDLL) + wRaw.ThermoFisherCommonCoreRawFileReaderDLL(c.ThermoFisherCommonCoreRawFileReaderDLL) + c.DefaultBin = c.WinBin +} diff --git a/lib/fil/fdr.go b/lib/fil/fdr.go index 0c5768a7..bada5f7f 100644 --- a/lib/fil/fdr.go +++ b/lib/fil/fdr.go @@ -5,6 +5,7 @@ import ( "fmt" "sort" "strings" + "sync" "philosopher/lib/cla" "philosopher/lib/id" @@ -15,14 +16,14 @@ import ( ) // PepXMLFDRFilter processes and calculates the FDR at the PSM, Ion or Peptide level -func PepXMLFDRFilter(input map[string]id.PepIDList, targetFDR float64, level, decoyTag string) (id.PepIDList, float64) { +func PepXMLFDRFilter(input map[string]id.PepIDListPtrs, targetFDR float64, level, decoyTag string) (id.PepIDListPtrs, float64) { //var msg string - var targets float64 - var decoys float64 + var targets uint + var decoys uint var calcFDR float64 - var list id.PepIDList - var peplist id.PepIDList + var list id.PepIDListPtrs + var peplist id.PepIDListPtrs var minProb float64 = 10 if strings.EqualFold(level, "PSM") { @@ -30,7 +31,7 @@ func PepXMLFDRFilter(input map[string]id.PepIDList, targetFDR float64, level, de // move all entries to list and count the number of targets and decoys for _, i := range input { for _, j := range i { - if cla.IsDecoyPSM(j, decoyTag) { + if cla.IsDecoyPSM(*j, decoyTag) { decoys++ } else { targets++ @@ -47,7 +48,7 @@ func PepXMLFDRFilter(input map[string]id.PepIDList, targetFDR float64, level, de } for i := range peplist { - if cla.IsDecoyPSM(peplist[i], decoyTag) { + if cla.IsDecoyPSM(*peplist[i], decoyTag) { decoys++ } else { targets++ @@ -63,7 +64,7 @@ func PepXMLFDRFilter(input map[string]id.PepIDList, targetFDR float64, level, de } for i := range peplist { - if cla.IsDecoyPSM(peplist[i], decoyTag) { + if cla.IsDecoyPSM(*peplist[i], decoyTag) { decoys++ } else { targets++ @@ -81,9 +82,9 @@ func PepXMLFDRFilter(input map[string]id.PepIDList, targetFDR float64, level, de for j := limit; j >= 0; j-- { _, ok := scoreMap[list[j].Probability] if !ok { - scoreMap[list[j].Probability] = (decoys / targets) + scoreMap[list[j].Probability] = float64(decoys) / float64(targets) } - if cla.IsDecoyPSM(list[j], decoyTag) { + if cla.IsDecoyPSM(*list[j], decoyTag) { decoys-- } else { targets-- @@ -110,7 +111,7 @@ func PepXMLFDRFilter(input map[string]id.PepIDList, targetFDR float64, level, de } } - var cleanlist id.PepIDList + var cleanlist id.PepIDListPtrs decoys = 0 targets = 0 @@ -118,7 +119,7 @@ func PepXMLFDRFilter(input map[string]id.PepIDList, targetFDR float64, level, de _, ok := probList[list[i].Probability] if ok { cleanlist = append(cleanlist, list[i]) - if cla.IsDecoyPSM(list[i], decoyTag) { + if cla.IsDecoyPSM(*list[i], decoyTag) { decoys++ } else { targets++ @@ -126,7 +127,7 @@ func PepXMLFDRFilter(input map[string]id.PepIDList, targetFDR float64, level, de } } - msg := fmt.Sprintf("Converged to %.2f %% FDR with %0.f %ss", (calcFDR * 100), targets, level) + msg := fmt.Sprintf("Converged to %.2f %% FDR with %d %ss", calcFDR*100, targets, level) logrus.WithFields(logrus.Fields{ "decoy": decoys, "total": (targets + decoys), @@ -608,14 +609,15 @@ func sequentialFDRControl(pep id.PepIDList, pro id.ProtIDList, psm, peptide, ion "ions": len(uniqIons), }).Info("Applying sequential FDR estimation") + wg := sync.WaitGroup{} + wg.Add(3) filteredPSM, _ := PepXMLFDRFilter(uniqPsms, psm, "PSM", decoyTag) - filteredPSM.Serialize("psm") - + go func() { defer wg.Done(); filteredPSM.Serialize("psm") }() filteredPeptides, _ := PepXMLFDRFilter(uniqPeps, peptide, "Peptide", decoyTag) - filteredPeptides.Serialize("pep") - + go func() { defer wg.Done(); filteredPeptides.Serialize("pep") }() filteredIons, _ := PepXMLFDRFilter(uniqIons, ion, "Ion", decoyTag) - filteredIons.Serialize("ion") + go func() { defer wg.Done(); filteredIons.Serialize("ion") }() + wg.Wait() } @@ -667,8 +669,7 @@ func correctRazorAssignment(list id.PepIDList) id.PepIDList { rm.Restore(false) for i := range list { - v, ok := rm[list[i].Peptide] - if ok { + if v, ok := rm[list[i].Peptide]; ok { if list[i].Protein != v.MappedProtein { diff --git a/lib/fil/fil.go b/lib/fil/fil.go index ec59f28b..2973fbce 100644 --- a/lib/fil/fil.go +++ b/lib/fil/fil.go @@ -7,6 +7,7 @@ import ( "path/filepath" "sort" "strings" + "sync" "philosopher/lib/cla" "philosopher/lib/id" @@ -23,7 +24,6 @@ import ( func Run(f met.Data) met.Data { e := rep.New() - var pepxml id.PepXML var pep id.PepIDList var pro id.ProtIDList @@ -99,11 +99,13 @@ func Run(f met.Data) met.Data { processProteinInferenceIdentifications(pepid, razorMap, coverMap, f.Filter.PtFDR, f.Filter.PepFDR, f.Filter.ProtProb, f.Filter.Picked, f.Filter.Tag) } } - + var pepxml id.PepXML pepxml.Restore() + // restoring for the modifications + e.Mods = pepxml.Modifications + e.AssembleSearchParameters(pepxml.SearchParameters) if f.Filter.Seq { - // sequential analysis // filtered psm list and filtered prot list pep.Restore("psm") @@ -124,6 +126,8 @@ func Run(f met.Data) met.Data { // msg.Custom(errors.New("database annotation not found, interrupting the processing"), "fatal") // } + os.RemoveAll(sys.PepxmlBin()) + if f.Filter.Razor || len(f.Filter.RazorBin) > 0 { var psm id.PepIDList psm.Restore("psm") @@ -146,12 +150,6 @@ func Run(f met.Data) met.Data { logrus.Info("Post processing identifications") - // restoring for the modifications - e.Mods = pepxml.Modifications - e.AssembleSearchParameters(pepxml.SearchParameters) - pepxml = id.PepXML{} - os.RemoveAll(sys.PepxmlBin()) - var psm id.PepIDList psm.Restore("psm") e.AssemblePSMReport(psm, f.Filter.Tag) @@ -176,10 +174,9 @@ func Run(f met.Data) met.Data { logrus.Info("Assigning protein identifications to layers") e.UpdateLayerswithDatabase(f.Filter.Tag) - // evaluate modifications in data set if f.Filter.Mapmods { - e.UpdateIonModCount() + //e.UpdateIonModCount() e.UpdatePeptideModCount() } @@ -219,7 +216,7 @@ func Run(f met.Data) met.Data { e.PSM[i].IsURazor = true e.PSM[i].IsUnique = true - e.PSM[i].MappedGenes = make(map[string]int) + e.PSM[i].MappedGenes = make(map[string]struct{}) } if strings.Contains(e.PSM[i].Protein, f.Filter.Tag) { @@ -229,7 +226,6 @@ func Run(f met.Data) met.Data { razor = nil } - if len(f.Filter.Pox) > 0 || f.Filter.Inference { logrus.Info("Processing protein inference") @@ -244,13 +240,11 @@ func Run(f met.Data) met.Data { logrus.Info("Synchronizing PSMs and proteins") - e = e.SyncPSMToProteins(f.Filter.Tag) + e.SyncPSMToProteins(f.Filter.Tag) - e.UpdateNumberOfEnzymaticTermini() + e.UpdateNumberOfEnzymaticTermini(f.Filter.Tag) } - e = e.SyncPSMToPeptides(f.Filter.Tag) - e = e.SyncPSMToPeptideIons(f.Filter.Tag) var countPSM, countPep, countIon, coutProtein int @@ -284,7 +278,6 @@ func Run(f met.Data) met.Data { "ions": countIon, "proteins": coutProtein, }).Info("Total report numbers after FDR filtering, and post-processing") - logrus.Info("Saving") e.SerializeGranular() @@ -292,7 +285,7 @@ func Run(f met.Data) met.Data { } // processPeptideIdentifications reads and process pepXML -func processPeptideIdentifications(p id.PepIDList, decoyTag, mods string, psm, peptide, ion float64) (float64, float64, float64) { +func processPeptideIdentifications(p id.PepIDListPtrs, decoyTag, mods string, psm, peptide, ion float64) (float64, float64, float64) { // report charge profile var t, d int @@ -344,13 +337,16 @@ func processPeptideIdentifications(p id.PepIDList, decoyTag, mods string, psm, p }).Info("Database search results") filteredPSM, psmThreshold := PepXMLFDRFilter(uniqPsms, psm, "PSM", decoyTag) - filteredPSM.Serialize("psm") + wg := sync.WaitGroup{} + wg.Add(3) + go func() { defer wg.Done(); filteredPSM.Serialize("psm") }() filteredPeptides, peptideThreshold := PepXMLFDRFilter(uniqPeps, peptide, "Peptide", decoyTag) - filteredPeptides.Serialize("pep") + go func() { defer wg.Done(); filteredPeptides.Serialize("pep") }() filteredIons, ionThreshold := PepXMLFDRFilter(uniqIons, ion, "Ion", decoyTag) - filteredIons.Serialize("ion") + go func() { defer wg.Done(); filteredIons.Serialize("ion") }() + wg.Wait() // sug-group FDR filtering if len(mods) > 0 { @@ -360,17 +356,18 @@ func processPeptideIdentifications(p id.PepIDList, decoyTag, mods string, psm, p return psmThreshold, peptideThreshold, ionThreshold } -func ptmBasedPSMFiltering(uniqPsms map[string]id.PepIDList, targetFDR float64, decoyTag, mods string) { - - // unmodified = no ptms - // defined = only the ptms defined, nothing else - // remaining or all = one or more ptms that might include the combination of the defined + something else +func ptmBasedPSMFiltering(uniqPsms map[string]id.PepIDListPtrs, targetFDR float64, decoyTag, mods string) { logrus.Info("Separating PSMs based on the modification profile") - unModPSMs := make(map[string]id.PepIDList) - definedModPSMs := make(map[string]id.PepIDList) - restModPSMs := make(map[string]id.PepIDList) + // unmodified: no ptms + unModPSMs := make(map[string]id.PepIDListPtrs) + + // defined: only the ptms defined, nothing else + definedModPSMs := make(map[string]id.PepIDListPtrs) + + // other: one or more ptms that might include the combination of the defined + something else + restModPSMs := make(map[string]id.PepIDListPtrs) modsMap := make(map[string]string) @@ -380,48 +377,36 @@ func ptmBasedPSMFiltering(uniqPsms map[string]id.PepIDList, targetFDR float64, d modsMap[i] = m[0] } - exclusionList := make(map[string]uint8) - psmsWithOtherPTMs := make(map[string]id.PepIDList) - for k, v := range uniqPsms { - if !strings.Contains(v[0].ModifiedPeptide, "[") || len(v[0].ModifiedPeptide) == 0 { - - unModPSMs[k] = v - exclusionList[v[0].Spectrum] = 0 + var other, defined bool - } else { + for _, i := range v[0].Modifications.IndexSlice { - // if PSM contains other mods than the ones defined by the flag, mark them to be ignored - for _, i := range v[0].Modifications.Index { - if i.Variable == "Y" { - m := fmt.Sprintf("%s:%.4f", i.AminoAcid, i.MassDiff) - _, ok := modsMap[m] - if !ok { - psmsWithOtherPTMs[v[0].Spectrum] = v - } - } - } + if i.Variable { - // if PSM contains only the defined mod and the correct amino acid, teh add to defined category - // and mark it for being excluded from rest - for _, i := range v[0].Modifications.Index { m := fmt.Sprintf("%s:%.4f", i.AminoAcid, i.MassDiff) - aa, ok1 := modsMap[m] - _, ok2 := psmsWithOtherPTMs[v[0].Spectrum] - if ok1 && !ok2 && aa == i.AminoAcid { - definedModPSMs[k] = v - exclusionList[v[0].Spectrum] = 0 + _, ok := modsMap[m] + if ok { + defined = true + } else { + other = true } - } + } } - _, ok := exclusionList[v[0].Spectrum] - if !ok { + if other && defined { restModPSMs[k] = v + } else if other && !defined { + restModPSMs[k] = v + } else if !other && defined { + definedModPSMs[k] = v + } else { + unModPSMs[k] = v } + } logrus.Info("Filtering unmodified PSMs") @@ -433,7 +418,7 @@ func ptmBasedPSMFiltering(uniqPsms map[string]id.PepIDList, targetFDR float64, d logrus.Info("Filtering all modified PSMs") filteredAllPSM, _ := PepXMLFDRFilter(restModPSMs, targetFDR, "PSM", decoyTag) - var combinedFiltered id.PepIDList + var combinedFiltered id.PepIDListPtrs combinedFiltered = append(combinedFiltered, filteredUnmodPSM...) @@ -446,7 +431,7 @@ func ptmBasedPSMFiltering(uniqPsms map[string]id.PepIDList, targetFDR float64, d } // chargeProfile ... -func chargeProfile(p id.PepIDList, charge uint8, decoyTag string) (t, d int) { +func chargeProfile(p id.PepIDListPtrs, charge uint8, decoyTag string) (t, d int) { for _, i := range p { if i.AssumedCharge == charge { @@ -462,19 +447,17 @@ func chargeProfile(p id.PepIDList, charge uint8, decoyTag string) (t, d int) { } //GetUniquePSMs selects only unique pepetide ions for the given data structure -func GetUniquePSMs(p id.PepIDList) map[string]id.PepIDList { - - uniqMap := make(map[string]id.PepIDList) +func GetUniquePSMs(p id.PepIDListPtrs) map[string]id.PepIDListPtrs { + uniqMap := make(map[string]id.PepIDListPtrs) for _, i := range p { - uniqMap[i.Spectrum] = append(uniqMap[i.Spectrum], i) + uniqMap[i.SpectrumFileName().Str()] = append(uniqMap[i.SpectrumFileName().Str()], i) } - return uniqMap } //getUniquePeptideIons selects only unique pepetide ions for the given data structure -func getUniquePeptideIons(p id.PepIDList) map[string]id.PepIDList { +func getUniquePeptideIons(p id.PepIDListPtrs) map[string]id.PepIDListPtrs { uniqMap := ExtractIonsFromPSMs(p) @@ -482,9 +465,9 @@ func getUniquePeptideIons(p id.PepIDList) map[string]id.PepIDList { } // ExtractIonsFromPSMs takes a pepidlist and transforms into an ion map -func ExtractIonsFromPSMs(p id.PepIDList) map[string]id.PepIDList { +func ExtractIonsFromPSMs(p id.PepIDListPtrs) map[string]id.PepIDListPtrs { - uniqMap := make(map[string]id.PepIDList) + uniqMap := make(map[string]id.PepIDListPtrs) for _, i := range p { ion := fmt.Sprintf("%s#%d#%.4f", i.Peptide, i.AssumedCharge, i.CalcNeutralPepMass) @@ -500,12 +483,12 @@ func ExtractIonsFromPSMs(p id.PepIDList) map[string]id.PepIDList { } // GetUniquePeptides selects only unique pepetide for the given data structure -func GetUniquePeptides(p id.PepIDList) map[string]id.PepIDList { +func GetUniquePeptides(p id.PepIDListPtrs) map[string]id.PepIDListPtrs { - uniqMap := make(map[string]id.PepIDList) + uniqMap := make(map[string]id.PepIDListPtrs) for _, i := range p { - uniqMap[string(i.Peptide)] = append(uniqMap[string(i.Peptide)], i) + uniqMap[i.Peptide] = append(uniqMap[i.Peptide], i) } // organize id list by score @@ -625,11 +608,11 @@ func processProteinInferenceIdentifications(psm id.PepIDList, razorMap map[strin if ok && pro.ProteinName == razorProtein { - pro.Length = "0" + pro.Length = 0 pro.PercentCoverage = float32(coverMap[pro.ProteinName]) - pro.PctSpectrumIDs = 0.0 - pro.GroupProbability = 1.00 - pro.Confidence = 1.00 + //pro.PctSpectrumIDs = 0.0 + //pro.GroupProbability = 1.00 + //pro.Confidence = 1.00 pro.HasRazor = true if i.Probability > pro.Probability { @@ -643,11 +626,11 @@ func processProteinInferenceIdentifications(psm id.PepIDList, razorMap map[strin _, ok := razorMarked[pro.ProteinName] if !ok { - pro.Length = "0" + pro.Length = 0 pro.PercentCoverage = float32(coverMap[pro.ProteinName]) - pro.PctSpectrumIDs = 0.0 - pro.GroupProbability = 1.00 - pro.Confidence = 1.00 + //pro.PctSpectrumIDs = 0.0 + //pro.GroupProbability = 1.00 + //pro.Confidence = 1.00 pro.HasRazor = false if i.Probability > pro.Probability { @@ -675,14 +658,14 @@ func processProteinInferenceIdentifications(psm id.PepIDList, razorMap map[strin pro.TotalNumberPeptides++ pep := id.PeptideIonIdentification{ - PeptideSequence: i.Peptide, - ModifiedPeptide: i.ModifiedPeptide, - Charge: i.AssumedCharge, - Weight: 1, - GroupWeight: 0, - CalcNeutralPepMass: i.CalcNeutralPepMass, - SharedParentProteins: len(i.AlternativeProteins), - Razor: 1, + PeptideSequence: i.Peptide, + ModifiedPeptide: i.ModifiedPeptide, + Charge: i.AssumedCharge, + Weight: 1, + GroupWeight: 0, + CalcNeutralPepMass: i.CalcNeutralPepMass, + //SharedParentProteins: len(i.AlternativeProteins), + Razor: 1, } for j := range i.AlternativeProteins { @@ -690,22 +673,22 @@ func processProteinInferenceIdentifications(psm id.PepIDList, razorMap map[strin pro.IndistinguishableProtein = append(pro.IndistinguishableProtein, j) } - pep.NumberOfInstances++ + //pep.NumberOfInstances++ if i.Probability > pep.InitialProbability { pep.InitialProbability = i.Probability } if len(i.AlternativeProteins) < 2 { - pep.IsNondegenerateEvidence = true + //pep.IsNondegenerateEvidence = true pep.IsUnique = true } else { - pep.IsNondegenerateEvidence = false + //pep.IsNondegenerateEvidence = false pep.IsUnique = false } pep.Modifications.Index = make(map[string]mod.Modification) - for k, v := range i.Modifications.Index { + for k, v := range i.Modifications.ToMap().Index { pep.Modifications.Index[k] = v } @@ -721,14 +704,14 @@ func processProteinInferenceIdentifications(psm id.PepIDList, razorMap map[strin pro.TotalNumberPeptides++ pep := id.PeptideIonIdentification{ - PeptideSequence: i.Peptide, - ModifiedPeptide: i.ModifiedPeptide, - Charge: i.AssumedCharge, - Weight: 0, - GroupWeight: 0, - CalcNeutralPepMass: i.CalcNeutralPepMass, - SharedParentProteins: len(i.AlternativeProteins), - Razor: 0, + PeptideSequence: i.Peptide, + ModifiedPeptide: i.ModifiedPeptide, + Charge: i.AssumedCharge, + Weight: 0, + GroupWeight: 0, + CalcNeutralPepMass: i.CalcNeutralPepMass, + //SharedParentProteins: len(i.AlternativeProteins), + Razor: 0, } if i.Probability > pep.InitialProbability { @@ -737,18 +720,18 @@ func processProteinInferenceIdentifications(psm id.PepIDList, razorMap map[strin //pep.PeptideParentProtein = i.AlternativeProteins - pep.NumberOfInstances++ + //pep.NumberOfInstances++ if len(i.AlternativeProteins) < 2 { - pep.IsNondegenerateEvidence = true + //pep.IsNondegenerateEvidence = true pep.IsUnique = true } else { - pep.IsNondegenerateEvidence = false + //pep.IsNondegenerateEvidence = false pep.IsUnique = false } pep.Modifications.Index = make(map[string]mod.Modification) - for k, v := range i.Modifications.Index { + for k, v := range i.Modifications.ToMap().Index { pep.Modifications.Index[k] = v } @@ -796,11 +779,11 @@ func proteinProfile(p id.ProtXML) (t, d int) { // extractPSMfromPepXML retrieves all psm from protxml that maps into pepxml files // using protein names from and tags -func extractPSMfromPepXML(filter string, peplist id.PepIDList, pro id.ProtIDList) id.PepIDList { +func extractPSMfromPepXML(filter string, peplist id.PepIDList, pro id.ProtIDList) id.PepIDListPtrs { - var protmap = make(map[string]uint16) - var filterMap = make(map[string]id.PeptideIdentification) - var output id.PepIDList + var protmap = make(map[string]struct{}) + var filterMap = make(map[id.SpectrumType]*id.PeptideIdentification) + var output id.PepIDListPtrs if filter == "sequential" { @@ -808,24 +791,24 @@ func extractPSMfromPepXML(filter string, peplist id.PepIDList, pro id.ProtIDList for _, i := range pro { for _, j := range i.UniqueStrippedPeptides { key := fmt.Sprintf("%s#%s", i.ProteinName, j) - protmap[string(key)] = 0 + protmap[key] = struct{}{} } } - for _, i := range peplist { + for idx, i := range peplist { key := fmt.Sprintf("%s#%s", i.Protein, i.Peptide) _, ok := protmap[key] if ok { - filterMap[string(i.Spectrum)] = i + filterMap[i.SpectrumFileName()] = &peplist[idx] } else { for j := range i.AlternativeProteins { key := fmt.Sprintf("%s#%s", j, i.Peptide) _, ap := protmap[key] if ap { - filterMap[string(i.Spectrum)] = i + filterMap[i.SpectrumFileName()] = &peplist[idx] } } @@ -837,18 +820,18 @@ func extractPSMfromPepXML(filter string, peplist id.PepIDList, pro id.ProtIDList // get all protein names from protxml for _, i := range pro { - protmap[string(i.ProteinName)] = 0 + protmap[string(i.ProteinName)] = struct{}{} } - for _, i := range peplist { + for idx, i := range peplist { _, ok := protmap[string(i.Protein)] if ok { - filterMap[string(i.Spectrum)] = i + filterMap[i.SpectrumFileName()] = &peplist[idx] } else { for j := range i.AlternativeProteins { _, ap := protmap[j] if ap { - filterMap[string(i.Spectrum)] = i + filterMap[i.SpectrumFileName()] = &peplist[idx] } } } diff --git a/lib/fil/fil_test.go b/lib/fil/fil_test.go index c544659d..baf611b9 100644 --- a/lib/fil/fil_test.go +++ b/lib/fil/fil_test.go @@ -12,7 +12,7 @@ import ( func Test_readPepXMLInput(t *testing.T) { tes.SetupTestEnv() - var pepIDList id.PepIDList + var pepIDList id.PepIDListPtrs type args1 struct { xmlFile string @@ -53,13 +53,13 @@ func Test_readPepXMLInput(t *testing.T) { t.Errorf("Index is incorrect, got %d, want %d", got[0].Index, uint32(18992)) } - if got[0].Spectrum != "b1906_293T_proteinID_01A_QE3_122212.60782.60782.2#interact.pep.xml" { - t.Errorf("Spectrum is incorrect, got %s, want %s", got[0].Spectrum, "b1906_293T_proteinID_01A_QE3_122212.60782.60782.2#interact.pep.xml") + if got[0].SpectrumFileName().Str() != "b1906_293T_proteinID_01A_QE3_122212.60782.60782.2#interact.pep.xml" { + t.Errorf("Spectrum is incorrect, got %s, want %s", got[0].SpectrumFileName().Str(), "b1906_293T_proteinID_01A_QE3_122212.60782.60782.2#interact.pep.xml") } - if got[0].Scan != 60782 { - t.Errorf("Scan is incorrect, got %d, want %d", got[0].Scan, 60782) - } + //if got[0].Scan != 60782 { + // t.Errorf("Scan is incorrect, got %d, want %d", got[0].Scan, 60782) + //} if got[0].PrecursorNeutralMass != 1429.7663 { t.Errorf("PrecursorNeutralMass is incorrect, got %f, want %f", got[0].PrecursorNeutralMass, 1429.7663) @@ -81,9 +81,9 @@ func Test_readPepXMLInput(t *testing.T) { t.Errorf("CalcNeutralPepMass is incorrect, got %.2f, want %.2f", got[0].CalcNeutralPepMass, 1429.7664) } - if got[0].NextAA != "Q" { - t.Errorf("NextAA is incorrect, got %s, want %s", got[0].NextAA, "Q") - } + //if got[0].NextAA != "Q" { + // t.Errorf("NextAA is incorrect, got %s, want %s", got[0].NextAA, "Q") + //} if got[0].NumberofMissedCleavages != 0 { t.Errorf("NumberofMissedCleavages is incorrect, got %d, want %d", got[0].NumberofMissedCleavages, 0) diff --git a/lib/fil/razor.go b/lib/fil/razor.go index ecf5d07b..c5d9026c 100644 --- a/lib/fil/razor.go +++ b/lib/fil/razor.go @@ -1,21 +1,17 @@ package fil import ( - "io/ioutil" - "philosopher/lib/msg" "philosopher/lib/sys" - - "github.com/vmihailenco/msgpack" ) // RazorCandidate is a peptide sequence to be evaluated as a razor type RazorCandidate struct { Sequence string + MappedProtein string + MappedproteinsSID map[string]string MappedProteinsW map[string]float64 MappedProteinsGW map[string]float64 MappedProteinsTNP map[string]int - MappedproteinsSID map[string]string - MappedProtein string } // a Map fo Razor candidates @@ -23,30 +19,10 @@ type RazorMap map[string]RazorCandidate // Serialize converts the razor structure to a gob file func (p *RazorMap) Serialize() { - - b, e := msgpack.Marshal(&p) - if e != nil { - msg.MarshalFile(e, "fatal") - } - - e = ioutil.WriteFile(sys.RazorBin(), b, sys.FilePermission()) - if e != nil { - msg.WriteFile(e, "fatal") - } - + sys.Serialize(p, sys.RazorBin()) } // Restore reads razor bin files and restore the data sctructure func (p *RazorMap) Restore(silent bool) { - - b, e := ioutil.ReadFile(sys.RazorBin()) - if e != nil && !silent { - msg.ReadFile(e, "warning") - } - - e = msgpack.Unmarshal(b, &p) - if e != nil && !silent { - msg.DecodeMsgPck(e, "warning") - } - + sys.Restore(p, sys.RazorBin(), silent) } diff --git a/lib/gth/gth.go b/lib/gth/gth.go index f6ce61fb..cc6317f7 100644 --- a/lib/gth/gth.go +++ b/lib/gth/gth.go @@ -18,10 +18,10 @@ type Release struct { AssetsURL string `json:"assets_url"` UploadURL string `json:"upload_url"` HTMLURL string `json:"html_url"` - ID int `json:"id"` TagName string `json:"tag_name"` TargetCommitish string `json:"target_commitish"` Name string `json:"name"` + ID int `json:"id"` Draft bool `json:"draft"` } diff --git a/lib/id/pep.go b/lib/id/pep.go index 28634399..08adcb52 100644 --- a/lib/id/pep.go +++ b/lib/id/pep.go @@ -3,12 +3,12 @@ package id import ( "errors" "fmt" - "io/ioutil" "path" "path/filepath" "sort" "strconv" "strings" + "sync" "time" "philosopher/lib/uti" @@ -20,7 +20,6 @@ import ( "philosopher/lib/sys" "github.com/sirupsen/logrus" - "github.com/vmihailenco/msgpack" "gonum.org/v1/plot" "gonum.org/v1/plot/plotter" "gonum.org/v1/plot/plotutil" @@ -33,50 +32,61 @@ type PepXML struct { SpectraFile string SearchEngine string DecoyTag string - SearchParameters []spc.Parameter Database string Prophet string - Modifications mod.Modifications + SearchParameters []spc.Parameter Models []spc.DistributionPoint + Modifications mod.Modifications PeptideIdentification PepIDList } -// PeptideIdentification struct -type PeptideIdentification struct { - Index uint32 +type PepXML4Serialiazation struct { + FileName string + SpectraFile string + SearchEngine string + DecoyTag string + Database string + Prophet string + SearchParameters []spc.Parameter + Models []spc.DistributionPoint + Modifications mod.Modifications + PeptideIdentification PepIDListPtrs +} + +// PeptideIdentificationOld struct +type PeptideIdentificationOld struct { Spectrum string SpectrumFile string - Scan int Peptide string Protein string ModifiedPeptide string - CompesationVoltage string - AlternativeProteins map[string]int - AssumedCharge uint8 + CompensationVoltage string PrevAA string NextAA string + LocalizationRange string + MSFragerLocalization string + MSFraggerLocalizationScoreWithPTM string + MSFraggerLocalizationScoreWithoutPTM string + Scan int + NumberofMissedCleavages int + IsoMassD int + AssumedCharge uint8 HitRank uint8 MissedCleavages uint8 NumberTolTerm uint8 NumberOfEnzymaticTermini uint8 + IsRejected uint8 NumberTotalProteins uint16 TotalNumberIons uint16 NumberMatchedIons uint16 - NumberofMissedCleavages int + Index uint32 UncalibratedPrecursorNeutralMass float64 PrecursorNeutralMass float64 PrecursorExpMass float64 RetentionTime float64 CalcNeutralPepMass float64 Massdiff float64 - LocalizedPTMSites map[string]int - LocalizedPTMMassDiff map[string]string - LocalizationRange string - MSFragerLocalization string - MSFraggerLocalizationScoreWithPTM string - MSFraggerLocalizationScoreWithoutPTM string Probability float64 - IsoMassD int Expectation float64 Xcorr float64 DeltaCN float64 @@ -88,12 +98,74 @@ type PeptideIdentification struct { DiscriminantValue float64 Intensity float64 IonMobility float64 - IsRejected uint8 + AlternativeProteins map[string]int + LocalizedPTMSites map[string]int + LocalizedPTMMassDiff map[string]string Modifications mod.Modifications } +type SpectrumType struct{ Spectrum, FileName string } + +func (s SpectrumType) Str() string { return fmt.Sprintf("%s#%s", s.Spectrum, s.FileName) } +func (p PeptideIdentification) SpectrumFileName() SpectrumType { + return SpectrumType{p.Spectrum, p.SpectrumFile} +} + +// PeptideIdentification struct +type PeptideIdentification struct { + Spectrum string + SpectrumFile string + Peptide string + Protein string + ModifiedPeptide string + CompensationVoltage string + AssumedCharge uint8 + HitRank uint8 + NumberOfEnzymaticTermini uint8 + NumberofMissedCleavages uint8 + Index uint32 + UncalibratedPrecursorNeutralMass float64 + PrecursorNeutralMass float64 + RetentionTime float64 + CalcNeutralPepMass float64 + Massdiff float64 + Probability float64 + Expectation float64 + Xcorr float64 + DeltaCN float64 + SPRank float64 + Hyperscore float64 + Nextscore float64 + IonMobility float64 + Intensity float64 + AlternativeProteins map[string]int + MSFragerLoc *MSFraggerLoc + PTM *PTM + Modifications mod.ModificationsSlice +} + +type PTM struct { + LocalizedPTMSites map[string]int + LocalizedPTMMassDiff map[string]string +} + +type MSFraggerLoc struct { + MSFragerLocalization string + MSFraggerLocalizationScoreWithPTM string + MSFraggerLocalizationScoreWithoutPTM string +} + // PepIDList is a list of PeptideSpectrumMatch type PepIDList []PeptideIdentification +type PepIDListPtrs []*PeptideIdentification + +func ToPepIDListPtrs(p PepIDList) PepIDListPtrs { + pptrs := make(PepIDListPtrs, len(p), len(p)) + for i := range p { + pptrs[i] = &p[i] + } + return pptrs +} // Len function for Sort func (p PepIDList) Len() int { @@ -110,10 +182,26 @@ func (p PepIDList) Swap(i, j int) { p[i], p[j] = p[j], p[i] } +// Len function for Sort +func (p PepIDListPtrs) Len() int { + return len(p) +} + +// Less function for Sort +func (p PepIDListPtrs) Less(i, j int) bool { + return p[i].Probability > p[j].Probability +} + +// Swap function for Sort +func (p PepIDListPtrs) Swap(i, j int) { + p[i], p[j] = p[j], p[i] +} + // Read is the main function for parsing pepxml data func (p *PepXML) Read(f string) { var xml spc.PepXML + logrus.Info("Parsing ", f) xml.Parse(f) var mpa = xml.MsmsPipelineAnalysis @@ -165,18 +253,21 @@ func (p *PepXML) Read(f string) { for _, i := range mpa.MsmsRunSummary.SearchSummary.AminoAcidModifications { key := fmt.Sprintf("%s#%.4f", i.AminoAcid, i.Mass) - + variableStr := string(i.Variable) + if variableStr != "Y" && variableStr != "N" { + panic(nil) + } + variable := variableStr == "Y" _, ok := p.Modifications.Index[key] if !ok { - m := mod.Modification{ - Index: key, - Type: "Assigned", - MonoIsotopicMass: i.Mass, - MassDiff: uti.ToFixed(i.MassDiff, 4), - Variable: string(i.Variable), - AminoAcid: string(i.AminoAcid), - IsobaricMods: make(map[string]float64), + Index: key, + Type: mod.Assigned, + //MonoIsotopicMass: i.Mass, + MassDiff: uti.ToFixed(i.MassDiff, 4), + Variable: variable, + AminoAcid: string(i.AminoAcid), + //IsobaricMods: make(map[string]float64), } p.Modifications.Index[key] = m @@ -187,20 +278,24 @@ func (p *PepXML) Read(f string) { for _, i := range mpa.MsmsRunSummary.SearchSummary.TerminalModifications { key := fmt.Sprintf("%s-term#%.4f", strings.ToUpper(string(i.Terminus)), i.Mass) - + variableStr := string(i.Variable) + if variableStr != "Y" && variableStr != "N" { + panic(nil) + } + variable := variableStr == "Y" _, ok := p.Modifications.Index[key] if !ok { m := mod.Modification{ - Index: key, - Type: "Assigned", - MonoIsotopicMass: i.Mass, - MassDiff: uti.ToFixed(i.MassDiff, 4), - Variable: string(i.Variable), - AminoAcid: fmt.Sprintf("%s-term", i.Terminus), - IsProteinTerminus: string(i.ProteinTerminus), - Terminus: strings.ToLower(string(i.Terminus)), - IsobaricMods: make(map[string]float64), + Index: key, + Type: mod.Assigned, + //MonoIsotopicMass: i.Mass, + MassDiff: uti.ToFixed(i.MassDiff, 4), + Variable: variable, + AminoAcid: fmt.Sprintf("%s-term", i.Terminus), + //IsProteinTerminus: string(i.ProteinTerminus), + //Terminus: strings.ToLower(string(i.Terminus)), + //IsobaricMods: make(map[string]float64), } p.Modifications.Index[key] = m @@ -219,20 +314,18 @@ func (p *PepXML) Read(f string) { //massDeviation := getMassDeviation(mpa.MsmsRunSummary.SpectrumQuery) // start processing spectra queries - var psmlist PepIDList sq := mpa.MsmsRunSummary.SpectrumQuery - for _, i := range sq { - psm := processSpectrumQuery(i, p.Modifications, p.DecoyTag, p.FileName) - psmlist = append(psmlist, psm) + p.PeptideIdentification = make(PepIDList, len(sq), len(sq)) + for idx, i := range sq { + p.PeptideIdentification[idx] = processSpectrumQuery(i, p.Modifications, p.DecoyTag, p.FileName) } - p.PeptideIdentification = psmlist p.Prophet = string(mpa.AnalysisSummary[0].Analysis) p.Models = models // p.adjustMassDeviation() - if len(psmlist) == 0 { + if len(p.PeptideIdentification) == 0 { msg.NoPSMFound(errors.New(f), "warning") } @@ -240,16 +333,15 @@ func (p *PepXML) Read(f string) { } // ReadPepXMLInput reads one or more fies and organize the data into PSM list -func ReadPepXMLInput(xmlFile, decoyTag, temp string, models bool) (PepIDList, string) { +func ReadPepXMLInput(xmlFile, decoyTag, temp string, models bool) (PepIDListPtrs, string) { - var files = make(map[string]uint8) - var pepIdent PepIDList + var files = make(map[string]struct{}) var params []spc.Parameter var modsIndex = make(map[string]mod.Modification) var searchEngine string if strings.Contains(xmlFile, "pep.xml") || strings.Contains(xmlFile, "pepXML") { - files[xmlFile] = 0 + files[xmlFile] = struct{}{} } else { list := uti.IOReadDir(xmlFile, "pep.xml") @@ -263,7 +355,7 @@ func ReadPepXMLInput(xmlFile, decoyTag, temp string, models bool) (PepIDList, st for _, i := range list { base := filepath.Base(i) if strings.Contains(base, ".mod.") { - files[i] = 0 + files[i] = struct{}{} } } @@ -272,19 +364,27 @@ func ReadPepXMLInput(xmlFile, decoyTag, temp string, models bool) (PepIDList, st for _, i := range list { base := filepath.Base(i) if !strings.Contains(base, ".mod.") { - files[i] = 0 + files[i] = struct{}{} } } } } + pepIdentList := make([]PepIDList, len(files)) + sortedFiles := make([]string, 0, len(files)) for i := range files { + sortedFiles = append(sortedFiles, i) + } + sort.Strings(sortedFiles) + mu := sync.Mutex{} + processSinglePepXML := func(idx int, i string) { var p PepXML p.DecoyTag = decoyTag p.Read(i) - - params = p.SearchParameters + if idx == 0 { + params = p.SearchParameters + } // print models if models { @@ -297,23 +397,41 @@ func ReadPepXMLInput(xmlFile, decoyTag, temp string, models bool) (PepIDList, st } } - pepIdent = append(pepIdent, p.PeptideIdentification...) - + pepIdentList[idx] = p.PeptideIdentification + mu.Lock() for _, k := range p.Modifications.Index { - _, ok := modsIndex[k.Index] - if !ok { + if _, ok := modsIndex[k.Index]; !ok { modsIndex[k.Index] = k } } - - searchEngine = p.SearchEngine + mu.Unlock() + if idx == 0 { + searchEngine = p.SearchEngine + } } - + wg := sync.WaitGroup{} + parallelism := 6 + parallelismTokens := make(chan struct{}, parallelism) + wg.Add(len(sortedFiles)) + for idx, i := range sortedFiles { + go func(idx int, i string) { + defer wg.Done() + parallelismTokens <- struct{}{} + processSinglePepXML(idx, i) + <-parallelismTokens + }(idx, i) + } + wg.Wait() // create a "fake" global pepXML comprising all data - var pepXML PepXML + var pepXML PepXML4Serialiazation pepXML.DecoyTag = decoyTag pepXML.SearchParameters = params - pepXML.PeptideIdentification = pepIdent + pepXML.PeptideIdentification = make(PepIDListPtrs, 0) + for _, pepIdent := range pepIdentList { + for idx := range pepIdent { + pepXML.PeptideIdentification = append(pepXML.PeptideIdentification, &pepIdent[idx]) + } + } pepXML.Modifications.Index = modsIndex // promoting Spectra that matches to both decoys and targets to TRUE hits @@ -323,23 +441,22 @@ func ReadPepXMLInput(xmlFile, decoyTag, temp string, models bool) (PepIDList, st sort.Sort(pepXML.PeptideIdentification) pepXML.Serialize() - return pepIdent, searchEngine + return pepXML.PeptideIdentification, searchEngine } func processSpectrumQuery(sq spc.SpectrumQuery, mods mod.Modifications, decoyTag, FileName string) PeptideIdentification { var psm PeptideIdentification - psm.Modifications.Index = make(map[string]mod.Modification) + //psm.Modifications.Index = make(map[string]mod.Modification) psm.AlternativeProteins = make(map[string]int) psm.Index = sq.Index psm.SpectrumFile = FileName - psm.Spectrum = string(sq.Spectrum) - psm.Scan = sq.StartScan + //psm.Scan = sq.StartScan psm.AssumedCharge = sq.AssumedCharge psm.RetentionTime = sq.RetentionTimeSec psm.IonMobility = sq.IonMobility - psm.CompesationVoltage = sq.CompensationVoltage + psm.CompensationVoltage = sq.CompensationVoltage if sq.UncalibratedPrecursorNeutralMass > 0 { psm.PrecursorNeutralMass = sq.PrecursorNeutralMass @@ -352,14 +469,14 @@ func processSpectrumQuery(sq spc.SpectrumQuery, mods mod.Modifications, decoyTag for _, i := range sq.SearchResult.SearchHit { psm.HitRank = i.HitRank - psm.PrevAA = string(i.PrevAA) - psm.NextAA = string(i.NextAA) - psm.MissedCleavages = i.MissedCleavages - psm.NumberTolTerm = i.TotalTerm - psm.NumberTotalProteins = i.TotalProteins - psm.TotalNumberIons = i.TotalIons - psm.NumberMatchedIons = i.MatchedIons - psm.IsRejected = i.IsRejected + //psm.PrevAA = string(i.PrevAA) + //psm.NextAA = string(i.NextAA) + //psm.MissedCleavages = i.MissedCleavages + //psm.NumberTolTerm = i.TotalTerm + //psm.NumberTotalProteins = i.TotalProteins + //psm.TotalNumberIons = i.TotalIons + //psm.NumberMatchedIons = i.MatchedIons + //psm.IsRejected = i.IsRejected psm.Peptide = string(i.Peptide) psm.Protein = string(i.Protein) @@ -368,7 +485,7 @@ func processSpectrumQuery(sq spc.SpectrumQuery, mods mod.Modifications, decoyTag //psm.Massdiff = uti.ToFixed((i.Massdiff - massDeviation), 4) psm.Massdiff = uti.ToFixed(i.Massdiff, 4) - psm.NumberofMissedCleavages = int(i.MissedCleavages) + psm.NumberofMissedCleavages = i.MissedCleavages psm.NumberOfEnzymaticTermini = i.TotalTerm for _, j := range i.AnalysisResult { @@ -380,7 +497,7 @@ func processSpectrumQuery(sq spc.SpectrumQuery, mods mod.Modifications, decoyTag for _, k := range j.PeptideProphetResult.SearchScoreSummary.Parameter { if k.Name == "massd" { - psm.IsoMassD, _ = strconv.Atoi(k.Value) + //psm.IsoMassD, _ = strconv.Atoi(k.Value) } } } @@ -390,11 +507,12 @@ func processSpectrumQuery(sq spc.SpectrumQuery, mods mod.Modifications, decoyTag } if string(j.Analysis) == "ptmprophet" { - psm.LocalizedPTMSites = make(map[string]int) - psm.LocalizedPTMMassDiff = make(map[string]string) - for _, k := range j.PTMProphetResult { - psm.LocalizedPTMSites[string(k.PTM)] = len(k.ModAminoAcidProbability) - psm.LocalizedPTMMassDiff[string(k.PTM)] = string(k.PTMPeptide) + if len(j.PTMProphetResult) != 0 { + psm.PTM = &PTM{LocalizedPTMSites: make(map[string]int), LocalizedPTMMassDiff: make(map[string]string)} + for _, k := range j.PTMProphetResult { + psm.PTM.LocalizedPTMSites[string(k.PTM)] = len(k.ModAminoAcidProbability) + psm.PTM.LocalizedPTMMassDiff[string(k.PTM)] = string(k.PTMPeptide) + } } } } @@ -414,11 +532,11 @@ func processSpectrumQuery(sq spc.SpectrumQuery, mods mod.Modifications, decoyTag value, _ := strconv.ParseFloat(j.Value, 64) psm.DeltaCN = value } else if string(j.Name) == "deltacnstar" { - value, _ := strconv.ParseFloat(j.Value, 64) - psm.DeltaCNStar = value + //value, _ := strconv.ParseFloat(j.Value, 64) + //psm.DeltaCNStar = value } else if string(j.Name) == "spscore" { - value, _ := strconv.ParseFloat(j.Value, 64) - psm.SPScore = value + //value, _ := strconv.ParseFloat(j.Value, 64) + //psm.SPScore = value } else if string(j.Name) == "sprank" { value, _ := strconv.ParseFloat(j.Value, 64) psm.SPRank = value @@ -431,16 +549,19 @@ func processSpectrumQuery(sq spc.SpectrumQuery, mods mod.Modifications, decoyTag } } - psm.LocalizationRange = i.PTMResult.LocalizationPeptide - - psm.MSFragerLocalization = i.PTMResult.LocalizationPeptide - psm.MSFraggerLocalizationScoreWithPTM = i.PTMResult.BestScoreWithPTM - psm.MSFraggerLocalizationScoreWithoutPTM = i.PTMResult.ScoreWithoutPTM + //psm.LocalizationRange = i.PTMResult.LocalizationPeptide + if len(i.PTMResult.LocalizationPeptide+i.PTMResult.BestScoreWithPTM+i.PTMResult.ScoreWithoutPTM) != 0 { + psm.MSFragerLoc = &MSFraggerLoc{ + MSFragerLocalization: i.PTMResult.LocalizationPeptide, + MSFraggerLocalizationScoreWithPTM: i.PTMResult.BestScoreWithPTM, + MSFraggerLocalizationScoreWithoutPTM: i.PTMResult.ScoreWithoutPTM} + } // to be able to accept multiple entries with the same spectrum name, we fuse the // file name to the spectrum name. This is going to be used as an identifiable attribute // Before reporting the filtered PSMs, the file name is removed from the spectrum name. - psm.Spectrum = fmt.Sprintf("%s#%s", psm.Spectrum, FileName) + //psm.Spectrum = fmt.Sprintf("%s#%s", psm.Spectrum, FileName) + psm.Spectrum = string(sq.Spectrum) psm.mapModsFromPepXML(i.ModificationInfo, mods) } @@ -452,7 +573,7 @@ func processSpectrumQuery(sq spc.SpectrumQuery, mods mod.Modifications, decoyTag func (p *PeptideIdentification) mapModsFromPepXML(m spc.ModificationInfo, mods mod.Modifications) { p.ModifiedPeptide = string(m.ModifiedPeptide) - + pModificationsIndex := make(map[string]mod.Modification) for _, i := range m.ModAminoacidMass { aa := strings.Split(p.Peptide, "") @@ -468,9 +589,9 @@ func (p *PeptideIdentification) mapModsFromPepXML(m spc.ModificationInfo, mods m m := v newKey := fmt.Sprintf("%s#%d#%.4f", aa[i.Position-1], i.Position, i.Mass) m.Index = newKey - m.Position = strconv.Itoa(i.Position) - m.IsobaricMods = make(map[string]float64) - p.Modifications.Index[newKey] = m + m.Position = i.Position + //m.IsobaricMods = make(map[string]float64) + pModificationsIndex[newKey] = m } else { v, ok = mods.Index[keyPlus] @@ -478,9 +599,9 @@ func (p *PeptideIdentification) mapModsFromPepXML(m spc.ModificationInfo, mods m m := v newKey := fmt.Sprintf("%s#%d#%.4f", aa[i.Position-1], i.Position, i.Mass) m.Index = newKey - m.Position = strconv.Itoa(i.Position) - m.IsobaricMods = make(map[string]float64) - p.Modifications.Index[newKey] = m + m.Position = i.Position + //m.IsobaricMods = make(map[string]float64) + pModificationsIndex[newKey] = m } v, ok = mods.Index[keyMinus] @@ -488,9 +609,9 @@ func (p *PeptideIdentification) mapModsFromPepXML(m spc.ModificationInfo, mods m m := v newKey := fmt.Sprintf("%s#%d#%.4f", aa[i.Position-1], i.Position, i.Mass) m.Index = newKey - m.Position = strconv.Itoa(i.Position) - m.IsobaricMods = make(map[string]float64) - p.Modifications.Index[newKey] = m + m.Position = i.Position + //m.IsobaricMods = make(map[string]float64) + pModificationsIndex[newKey] = m } } } @@ -502,8 +623,8 @@ func (p *PeptideIdentification) mapModsFromPepXML(m spc.ModificationInfo, mods m if ok { m := v m.AminoAcid = "N-term" - m.IsobaricMods = make(map[string]float64) - p.Modifications.Index[key] = m + //m.IsobaricMods = make(map[string]float64) + pModificationsIndex[key] = m } // this rule was added because PTMProphet is changing the mod_nterm_mass @@ -514,8 +635,8 @@ func (p *PeptideIdentification) mapModsFromPepXML(m spc.ModificationInfo, mods m if ok { m := v m.AminoAcid = "N-term" - m.IsobaricMods = make(map[string]float64) - p.Modifications.Index[key] = m + //m.IsobaricMods = make(map[string]float64) + pModificationsIndex[key] = m } } @@ -528,25 +649,27 @@ func (p *PeptideIdentification) mapModsFromPepXML(m spc.ModificationInfo, mods m if ok { m := v m.AminoAcid = "C-term" - m.IsobaricMods = make(map[string]float64) - p.Modifications.Index[key] = m + //m.IsobaricMods = make(map[string]float64) + pModificationsIndex[key] = m } } // if isotopicCorr >= 0.036386 || isotopicCorr <= -0.036386 { key := fmt.Sprintf("%.4f", p.Massdiff) - _, ok := p.Modifications.Index[key] + _, ok := pModificationsIndex[key] if !ok { m := mod.Modification{ - Index: key, - Name: "Unknown", - Type: "Observed", - MassDiff: p.Massdiff, - IsobaricMods: make(map[string]float64), + Index: key, + Name: "Unknown", + Type: mod.Observed, + MassDiff: p.Massdiff, + //IsobaricMods: make(map[string]float64), } - p.Modifications.Index[key] = m + pModificationsIndex[key] = m + } + if len(pModificationsIndex) != 0 { + p.Modifications = mod.Modifications{Index: pModificationsIndex}.ToSlice() } - } // getMassDeviation calculates the mass deviation for a pepXML file based on the 0 mass difference @@ -572,7 +695,7 @@ func (p *PeptideIdentification) mapModsFromPepXML(m spc.ModificationInfo, mods m // PromoteProteinIDs changes the identification in cases where the reference protein is a decoy and // the alternative proteins contains target proteins. -func (p *PepXML) PromoteProteinIDs() { +func (p *PepXML4Serialiazation) PromoteProteinIDs() { for i := range p.PeptideIdentification { @@ -768,34 +891,14 @@ func printModel(v, path string, xAxis, obs, pos, neg []float64) { } -// Serialize converts the whle structure to a gob file -func (p *PepXML) Serialize() { - - b, e := msgpack.Marshal(&p) - if e != nil { - msg.MarshalFile(e, "fatal") - } - - e = ioutil.WriteFile(sys.PepxmlBin(), b, sys.FilePermission()) - if e != nil { - msg.WriteFile(e, "fatal") - } - +// Serialize converts the whole structure to a msgpack file +func (p *PepXML4Serialiazation) Serialize() { + sys.Serialize(p, sys.PepxmlBin()) } // Restore reads philosopher results files and restore the data sctructure func (p *PepXML) Restore() { - - b, e := ioutil.ReadFile(sys.PepxmlBin()) - if e != nil { - msg.ReadFile(e, "warning") - } - - e = msgpack.Unmarshal(b, &p) - if e != nil { - msg.DecodeMsgPck(e, "warning") - } - + sys.Restore(p, sys.PepxmlBin(), false) } // Serialize converts the whle structure to a gob file @@ -812,17 +915,24 @@ func (p *PepIDList) Serialize(level string) { } else { msg.Custom(errors.New("cannot determine binary data class"), "fatal") } + sys.Serialize(p, dest) +} - b, e := msgpack.Marshal(&p) - if e != nil { - msg.MarshalFile(e, "fatal") - } +// Serialize converts the whle structure to a gob file +func (p *PepIDListPtrs) Serialize(level string) { - e = ioutil.WriteFile(dest, b, sys.FilePermission()) - if e != nil { - msg.WriteFile(e, "fatal") - } + var dest string + if level == "psm" { + dest = sys.PSMBin() + } else if level == "pep" { + dest = sys.PepBin() + } else if level == "ion" { + dest = sys.IonBin() + } else { + msg.Custom(errors.New("cannot determine binary data class"), "fatal") + } + sys.Serialize(p, dest) } // Restore reads philosopher results files and restore the data sctructure @@ -839,15 +949,5 @@ func (p *PepIDList) Restore(level string) { } else { msg.Custom(errors.New("cannot determine binary data class"), "fatal") } - - b, e := ioutil.ReadFile(dest) - if e != nil { - msg.ReadFile(e, "fatal") - } - - e = msgpack.Unmarshal(b, &p) - if e != nil { - msg.DecodeMsgPck(e, "fatal") - } - + sys.Restore(p, dest, false) } diff --git a/lib/id/pro.go b/lib/id/pro.go index 8146ef74..c7b10d2b 100644 --- a/lib/id/pro.go +++ b/lib/id/pro.go @@ -3,10 +3,10 @@ package id import ( "errors" "fmt" - "io/ioutil" "log" "os" "path/filepath" + "strconv" "strings" "philosopher/lib/iso" @@ -18,15 +18,14 @@ import ( "philosopher/lib/sys" "github.com/sirupsen/logrus" - "github.com/vmihailenco/msgpack" ) // ProtXML struct type ProtXML struct { FileName string DecoyTag string - Groups GroupList RunOptions string + Groups GroupList } // GroupIdentification tag @@ -38,43 +37,60 @@ type GroupIdentification struct { // ProteinIdentification struct type ProteinIdentification struct { - GroupNumber uint32 - GroupSiblingID string ProteinName string Description string + GroupSiblingID string UniqueStrippedPeptides []string - Length string + IndistinguishableProtein []string + GroupNumber uint32 + Length int + Picked int + TotalNumberPeptides int PercentCoverage float32 - PctSpectrumIDs float32 - GroupProbability float64 Probability float64 - Confidence float64 TopPepProb float64 - IndistinguishableProtein []string - TotalNumberPeptides int PeptideIons []PeptideIonIdentification HasRazor bool - Picked int + //Confidence float64 + //PctSpectrumIDs float32 + //GroupProbability float64 } // PeptideIonIdentification struct type PeptideIonIdentification struct { PeptideSequence string ModifiedPeptide string + PeptideParentProtein []string + Razor int + NumberOfEnzymaticTermini uint8 Charge uint8 InitialProbability float64 Weight float64 GroupWeight float64 CalcNeutralPepMass float64 - NumberOfEnzymaticTermini uint8 - NumberOfInstances int - SharedParentProteins int - Razor int - IsNondegenerateEvidence bool IsUnique bool - PeptideParentProtein []string - Labels iso.Labels + Labels *iso.Labels Modifications mod.Modifications + //NumberOfInstances int + //SharedParentProteins int + //IsNondegenerateEvidence bool +} +type IonFormType struct { + Peptide string + CalcNeutralPepMass float32 + AssumedCharge uint8 +} + +func (e IonFormType) Str() string { + return fmt.Sprintf("%s#%d#%.4f", e.Peptide, e.AssumedCharge, e.CalcNeutralPepMass) +} + +func (e PeptideIonIdentification) IonForm() IonFormType { + t, err := strconv.ParseFloat(fmt.Sprintf("%.4f", e.CalcNeutralPepMass), 32) + if err != nil { + panic(err) + } + return IonFormType{e.PeptideSequence, float32(t), e.Charge} } // GroupList represents a protein group list @@ -120,7 +136,7 @@ func (p *ProtXML) Read(f string) { // correcting group probabilities if jindex == 0 { if i.Probability == 1 && j.Probability == 0 { - j.Probability = float64(i.Probability) + j.Probability = i.Probability break } } @@ -128,19 +144,23 @@ func (p *ProtXML) Read(f string) { var ptid ProteinIdentification ptid.GroupNumber = i.GroupNumber - ptid.GroupProbability = i.Probability + //ptid.GroupProbability = i.Probability ptid.Probability = i.Probability ptid.ProteinName = string(j.ProteinName) ptid.Description = string(j.Annotation.ProteinDescription) ptid.Probability = j.Probability ptid.PercentCoverage = j.PercentCoverage - ptid.PctSpectrumIDs = j.PctSpectrumIDs + //ptid.PctSpectrumIDs = j.PctSpectrumIDs ptid.GroupSiblingID = string(j.GroupSiblingID) ptid.TotalNumberPeptides = j.TotalNumberPeptides ptid.TopPepProb = 0 if strings.EqualFold(j.Parameter.Name, "prot_length") { - ptid.Length = j.Parameter.Value + l, e := strconv.Atoi(j.Parameter.Value) + if e != nil { + panic(e) + } + ptid.Length = l } // collect indistinguishable proteins (Protein to Protein equivalency) @@ -161,17 +181,17 @@ func (p *ProtXML) Read(f string) { pepid.Weight = k.Weight pepid.GroupWeight = k.GroupWeight pepid.CalcNeutralPepMass = k.CalcNeutralPepMass - pepid.SharedParentProteins = len(k.PeptideParentProtein) + //pepid.SharedParentProteins = len(k.PeptideParentProtein) pepid.Modifications.Index = make(map[string]mod.Modification) - pepid.NumberOfInstances = k.NIstances + //pepid.NumberOfInstances = k.NIstances pepid.NumberOfEnzymaticTermini = k.NEnzymaticTermini pepid.Razor = -1 if strings.EqualFold(string(k.IsNondegenerateEvidence), "Y") || strings.EqualFold(string(k.IsNondegenerateEvidence), "y") { - pepid.IsNondegenerateEvidence = true + //pepid.IsNondegenerateEvidence = true pepid.IsUnique = true } else { - pepid.IsNondegenerateEvidence = false + //pepid.IsNondegenerateEvidence = false pepid.IsUnique = false } @@ -260,16 +280,7 @@ func (p *ProtXML) MarkUniquePeptides(w float64) { // Serialize converts the whle structure to a gob file func (p *ProtIDList) Serialize() { - - b, e := msgpack.Marshal(&p) - if e != nil { - msg.MarshalFile(e, "fatal") - } - - e = ioutil.WriteFile(sys.ProBin(), b, sys.FilePermission()) - if e != nil { - msg.WriteFile(e, "fatal") - } + sys.Serialize(p, sys.ProBin()) } // SerializeToTemp converts the whle structure to a gob file and puts in a specific data set folder @@ -291,32 +302,12 @@ func (p *ProtIDList) SerializeToTemp() string { log.Fatal(e) } - b, e := msgpack.Marshal(&p) - if e != nil { - msg.MarshalFile(e, "fatal") - } - dest := fmt.Sprintf("%s%spro.bin", m.Temp, string(filepath.Separator)) - - e = ioutil.WriteFile(dest, b, sys.FilePermission()) - if e != nil { - msg.WriteFile(e, "fatal") - } - + sys.Serialize(p, dest) return dest } // Restore reads philosopher results files and restore the data sctructure func (p *ProtIDList) Restore() { - - b, e := ioutil.ReadFile(sys.ProBin()) - if e != nil { - msg.ReadFile(e, "fatal") - } - - e = msgpack.Unmarshal(b, &p) - if e != nil { - msg.DecodeMsgPck(e, "fatal") - } - + sys.Restore(p, sys.ProBin(), false) } diff --git a/lib/inf/inf.go b/lib/inf/inf.go index 581e9f07..0c657b30 100644 --- a/lib/inf/inf.go +++ b/lib/inf/inf.go @@ -20,7 +20,7 @@ type Peptide struct { CalcNeutralPepMass float64 Probability float64 Weight float64 - Spectra map[string]int + Spectra map[id.SpectrumType]int MappedProteins map[string]int MappedProteinsWithDecoys map[string]int } @@ -49,7 +49,7 @@ func ProteinInference(psm id.PepIDList) (id.PepIDList, map[string]string, map[st var p Peptide p.IonForm = ionForm - p.Spectra = make(map[string]int) + p.Spectra = make(map[id.SpectrumType]int) p.MappedProteins = make(map[string]int) p.MappedProteinsWithDecoys = make(map[string]int) @@ -100,7 +100,7 @@ func ProteinInference(psm id.PepIDList) (id.PepIDList, map[string]string, map[st obj := v obj.Sequence = i.Peptide - obj.Spectra[i.Spectrum]++ + obj.Spectra[i.SpectrumFileName()]++ obj.MappedProteins[i.Protein] = proteinTNP[i.Protein] obj.MappedProteinsWithDecoys[i.Protein] = proteinTNP[i.Protein] diff --git a/lib/met/met.go b/lib/met/met.go index 35e2247f..7b9ffd5b 100644 --- a/lib/met/met.go +++ b/lib/met/met.go @@ -16,7 +16,7 @@ import ( uuid "github.com/satori/go.uuid" "github.com/sirupsen/logrus" - "github.com/vmihailenco/msgpack" + "github.com/vmihailenco/msgpack/v5" ) // Data is the global parameter container @@ -88,74 +88,79 @@ type Database struct { // Comet options and parameters type Comet struct { Param string `yaml:"param"` - ParamFile []byte RawExtension string `yaml:"raw"` RawFiles []string + ParamFile []byte Print bool NoIndex bool `yaml:"noindex"` } // MSFragger options and parameters type MSFragger struct { - JarPath string `yaml:"path"` - Memory int `yaml:"memory"` - Threads int `yaml:"num_threads"` - Extension string `yaml:"extension"` - DataType int `yaml:"data_type"` - DatabaseName string `yaml:"database_name"` - PrecursorMassLower int `yaml:"precursor_mass_lower"` - PrecursorMassUpper int `yaml:"precursor_mass_upper"` - PrecursorMassUnits int `yaml:"precursor_mass_units"` - PrecursorTrueTolerance int `yaml:"precursor_true_tolerance"` - PrecursorTrueUnits int `yaml:"precursor_true_units"` - FragmentMassTolerance float64 `yaml:"fragment_mass_tolerance"` - FragmentMassUnits int `yaml:"fragment_mass_units"` - CalibrateMass int `yaml:"calibrate_mass"` - UseAllModsInFirstSearch int `yaml:"use_all_mods_in_first_search"` - WriteCalibratedMGF int `yaml:"write_calibrated_mgf"` - DecoyPrefix string `yaml:"decoy_prefix"` - EvaluateMassCalibration int `yaml:"evaluate_mass_calibration"` - Deisotope int `yaml:"deisotope"` - Deneutralloss int `yaml:"deneutralloss"` - IsotopeError string `yaml:"isotope_error"` - MassOffsets string `yaml:"mass_offsets"` - PrecursorMassMode string `yaml:"precursor_mass_mode"` - LocalizeDeltaMass int `yaml:"localize_delta_mass"` - DeltaMassExcludeRanges string `yaml:"delta_mass_exclude_ranges"` - FragmentIonSeries string `yaml:"fragment_ion_series"` - IonSeriesDefinitions string `yaml:"ion_series_definitions"` - - SearchEnzymeName1 string `yaml:"search_enzyme_name_1"` - SearchEnzymeCut1 string `yaml:"search_enzyme_cut_1"` - SearchEnzymeNocut1 string `yaml:"search_enzyme_nocut_1"` - AllowedMissedCleavage1 int `yaml:"allowed_missed_cleavage_1"` - SearchEnzymeSense1 string `yaml:"search_enzyme_sense_1"` - - SearchEnzymeName2 string `yaml:"search_enzyme_name_2"` - SearchEnzymeCut2 string `yaml:"search_enzyme_cut_2"` - SearchEnzymeNocut2 string `yaml:"search_enzyme_nocut_2"` - AllowedMissedCleavage2 int `yaml:"allowed_missed_cleavage_2"` - SearchEnzymeSense2 string `yaml:"search_enzyme_sense_2"` - - //SearchEnzymeName string `yaml:"search_enzyme_name"` - //SearchEnzymeCutafter string `yaml:"search_enzyme_cutafter"` - //SearchEnzymeButNotAfter string `yaml:"search_enzyme_butnotafter"` - //AllowedMissedCleavage int `yaml:"allowed_missed_cleavage"` - + JarPath string `yaml:"path"` + Extension string `yaml:"extension"` + DatabaseName string `yaml:"database_name"` + DecoyPrefix string `yaml:"decoy_prefix"` + IsotopeError string `yaml:"isotope_error"` + MassOffsets string `yaml:"mass_offsets"` + PrecursorMassMode string `yaml:"precursor_mass_mode"` + DeltaMassExcludeRanges string `yaml:"delta_mass_exclude_ranges"` + FragmentIonSeries string `yaml:"fragment_ion_series"` + IonSeriesDefinitions string `yaml:"ion_series_definitions"` + SearchEnzymeName1 string `yaml:"search_enzyme_name_1"` + SearchEnzymeCut1 string `yaml:"search_enzyme_cut_1"` + SearchEnzymeNocut1 string `yaml:"search_enzyme_nocut_1"` + SearchEnzymeSense1 string `yaml:"search_enzyme_sense_1"` + SearchEnzymeName2 string `yaml:"search_enzyme_name_2"` + SearchEnzymeCut2 string `yaml:"search_enzyme_cut_2"` + SearchEnzymeNocut2 string `yaml:"search_enzyme_nocut_2"` + SearchEnzymeSense2 string `yaml:"search_enzyme_sense_2"` + OutputFormat string `yaml:"output_format"` + PrecursorCharge string `yaml:"precursor_charge"` + DigestMassRange string `yaml:"digest_mass_range"` + ClearMzRange string `yaml:"clear_mz_range"` + RemovePrecursorRange string `yaml:"remove_precursor_range"` + LabileSearchMode string `yaml:"labile_search_mode"` + RestrictDeltaMassTo string `yaml:"restrict_deltamass_to"` + DiagnosticFragments string `yaml:"diagnostic_fragments"` + YTypeMasses string `yaml:"Y_type_masses"` + VariableMod01 string `yaml:"variable_mod_01"` + VariableMod02 string `yaml:"variable_mod_02"` + VariableMod03 string `yaml:"variable_mod_03"` + VariableMod04 string `yaml:"variable_mod_04"` + VariableMod05 string `yaml:"variable_mod_05"` + VariableMod06 string `yaml:"variable_mod_06"` + VariableMod07 string `yaml:"variable_mod_07"` + RawFiles []string + Memory int `yaml:"memory"` + Threads int `yaml:"num_threads"` + DataType int `yaml:"data_type"` + PrecursorMassLower int `yaml:"precursor_mass_lower"` + PrecursorMassUpper int `yaml:"precursor_mass_upper"` + PrecursorMassUnits int `yaml:"precursor_mass_units"` + PrecursorTrueTolerance int `yaml:"precursor_true_tolerance"` + PrecursorTrueUnits int `yaml:"precursor_true_units"` + FragmentMassUnits int `yaml:"fragment_mass_units"` + CalibrateMass int `yaml:"calibrate_mass"` + UseAllModsInFirstSearch int `yaml:"use_all_mods_in_first_search"` + WriteCalibratedMGF int `yaml:"write_calibrated_mgf"` + EvaluateMassCalibration int `yaml:"evaluate_mass_calibration"` + Deisotope int `yaml:"deisotope"` + Deneutralloss int `yaml:"deneutralloss"` + LocalizeDeltaMass int `yaml:"localize_delta_mass"` + AllowedMissedCleavage1 int `yaml:"allowed_missed_cleavage_1"` + AllowedMissedCleavage2 int `yaml:"allowed_missed_cleavage_2"` NumEnzymeTermini int `yaml:"num_enzyme_termini"` ClipNTermM int `yaml:"clip_nTerm_M"` AllowMultipleVariableModsOnResidue int `yaml:"allow_multiple_variable_mods_on_residue"` MaxVariableModsPerPeptide int `yaml:"max_variable_mods_per_peptide"` MaxVariableModsCombinations int `yaml:"max_variable_mods_combinations"` - OutputFormat string `yaml:"output_format"` OutputReportTopN int `yaml:"output_report_topN"` OutputMaxExpect int `yaml:"output_max_expect"` ReportAlternativeProteins int `yaml:"report_alternative_proteins"` OverrideCharge int `yaml:"override_charge"` - PrecursorCharge string `yaml:"precursor_charge"` DigestMinLength int `yaml:"digest_min_length"` DigestMaxLength int `yaml:"digest_max_length"` - DigestMassRange string `yaml:"digest_mass_range"` MaxFragmentCharge int `yaml:"max_fragment_charge"` TrackZeroTopN int `yaml:"track_zero_topN"` ZeroBinAcceptExpect int `yaml:"zero_bin_accept_expect"` @@ -166,24 +171,12 @@ type MSFragger struct { UseTopNPeaks int `yaml:"use_topN_peaks"` MinFragmentsModelling int `yaml:"min_fragments_modelling"` MinMatchedFragments int `yaml:"min_matched_fragments"` - MinimumRatio float64 `yaml:"minimum_ratio"` - ClearMzRange string `yaml:"clear_mz_range"` RemovePrecursorPeak int `yaml:"remove_precursor_peak"` - RemovePrecursorRange string `yaml:"remove_precursor_range"` IntensityTransform int `yaml:"intensity_transform"` MassDiffToVariableMod int `yaml:"mass_diff_to_variable_mod"` - LabileSearchMode string `yaml:"labile_search_mode"` - RestrictDeltaMassTo string `yaml:"restrict_deltamass_to"` DiagnosticIntensityFilter int `yaml:"diagnostic_intensity_filter"` - DiagnosticFragments string `yaml:"diagnostic_fragments"` - YTypeMasses string `yaml:"Y_type_masses"` - VariableMod01 string `yaml:"variable_mod_01"` - VariableMod02 string `yaml:"variable_mod_02"` - VariableMod03 string `yaml:"variable_mod_03"` - VariableMod04 string `yaml:"variable_mod_04"` - VariableMod05 string `yaml:"variable_mod_05"` - VariableMod06 string `yaml:"variable_mod_06"` - VariableMod07 string `yaml:"variable_mod_07"` + MinimumRatio float64 `yaml:"minimum_ratio"` + FragmentMassTolerance float64 `yaml:"fragment_mass_tolerance"` AddCtermPeptide float64 `yaml:"add_Cterm_peptide"` AddCtermProtein float64 `yaml:"add_Cterm_protein"` AddNTermPeptide float64 `yaml:"add_Nterm_peptide"` @@ -209,20 +202,23 @@ type MSFragger struct { AddTryptophan float64 `yaml:"add_W_tryptophan"` AddTyrosine float64 `yaml:"add_Y_tyrosine"` Param string `yaml:"param"` - RawFiles []string ParamFile []byte + //SearchEnzymeName string `yaml:"search_enzyme_name"` + //SearchEnzymeCutafter string `yaml:"search_enzyme_cutafter"` + //SearchEnzymeButNotAfter string `yaml:"search_enzyme_butnotafter"` + //AllowedMissedCleavage int `yaml:"allowed_missed_cleavage"` } // PeptideProphet options and parameters type PeptideProphet struct { + FileExtension string `yaml:"extension"` + Output string `yaml:"output"` + Database string `yaml:"database"` + Rtcat string `yaml:"rtcat"` + Decoy string `yaml:"decoy"` + Enzyme string `yaml:"enzyme"` + Ignorechg string `yaml:"ignorechg"` InputFiles []string - FileExtension string `yaml:"extension"` - Output string `yaml:"output"` - Database string `yaml:"database"` - Rtcat string `yaml:"rtcat"` - Decoy string `yaml:"decoy"` - Enzyme string `yaml:"enzyme"` - Ignorechg string `yaml:"ignorechg"` Minpiprob float64 `yaml:"minpiprob"` Minrtprob float64 `yaml:"minrtprob"` Minprob float64 `yaml:"minprob"` @@ -293,6 +289,7 @@ type ProteinProphet struct { Glyc bool `yaml:"glyc"` Nogroupwts bool `yaml:"nogroupwts"` NonSP bool `yaml:"nonsp"` + Subgroups bool `yaml:"subgroups"` Accuracy bool `yaml:"accuracy"` Asap bool `yaml:"asap"` Refresh bool `yaml:"refresh"` diff --git a/lib/mod/mod.go b/lib/mod/mod.go index 138e74f3..d4b0c2a2 100644 --- a/lib/mod/mod.go +++ b/lib/mod/mod.go @@ -5,22 +5,52 @@ type Modifications struct { Index map[string]Modification } +type ModTypeType uint8 + +const ( + Assigned ModTypeType = iota + Observed +) + // Modification is the basic attribute for each modification type Modification struct { - Index string - ID string - Name string - Definition string - Variable string - Position string - Type string - MonoIsotopicMass float64 - AverageMass float64 - MassDiff float64 - AminoAcid string - IsProteinTerminus string - Terminus string - IsobaricMods map[string]float64 + Index string + ID string + Name string + Definition string + AminoAcid string + IsobaricMods map[string]float64 + MassDiff float64 + Position int + Type ModTypeType + Variable bool + //IsProteinTerminus string + //Terminus string + //MonoIsotopicMass float64 + //AverageMass float64 +} + +// Modifications is a collection of modifications +type ModificationsSlice struct { + IndexSlice []Modification +} + +func (m Modifications) ToSlice() ModificationsSlice { + IndexSlice := make([]Modification, 0, len(m.Index)) + for k, v := range m.Index { + IndexSlice = append(IndexSlice, v) + if v.Index != k { + panic(nil) + } + } + return ModificationsSlice{IndexSlice: IndexSlice} +} +func (m ModificationsSlice) ToMap() Modifications { + Index := make(map[string]Modification, len(m.IndexSlice)) + for _, e := range m.IndexSlice { + Index[e.Index] = e + } + return Modifications{Index: Index} } // Serialize saves to disk a msgpack version of the Isobaric data structure diff --git a/lib/mzn/mzn.go b/lib/mzn/mzn.go index 32083ad2..df606aa1 100644 --- a/lib/mzn/mzn.go +++ b/lib/mzn/mzn.go @@ -22,8 +22,8 @@ import ( // MsData top struct type MsData struct { FileName string + Spectra Spectra // RefSpectra sync.Map - Spectra Spectra } // Spectra struct diff --git a/lib/obo/obo.go b/lib/obo/obo.go index c931b3e0..8018cd36 100644 --- a/lib/obo/obo.go +++ b/lib/obo/obo.go @@ -36,7 +36,6 @@ type Onto struct { // Term refers to an atomic ontology definition type Term struct { ID string - RecordID int Name string Definition string DateTimePosted string @@ -44,9 +43,10 @@ type Term struct { Comments string Synonyms string IsA string + Composition string + RecordID int MonoIsotopicMass float64 AverageMass float64 - Composition string Sites map[string]uint8 } diff --git a/lib/pip/pip.go b/lib/pip/pip.go index 721246f6..a316dbb1 100644 --- a/lib/pip/pip.go +++ b/lib/pip/pip.go @@ -518,9 +518,11 @@ func CombinedProteinList(meta met.Data, p Directives, dir string, data []string) } } - e := os.RemoveAll(path.Dir(proBin)) - if e != nil { - log.Fatal(e) + if _, err := os.Stat(path.Dir(proBin)); err == nil { + e := os.RemoveAll(path.Dir(proBin)) + if e != nil { + log.Fatal(e) + } } return meta diff --git a/lib/qua/bqt.go b/lib/qua/bqt.go index d52bc916..386f9889 100644 --- a/lib/qua/bqt.go +++ b/lib/qua/bqt.go @@ -29,15 +29,15 @@ type Cluster struct { Status string Existence string GeneNames string + Peptides []string + PeptideIons []string + UniqueClusterPeptides []string Number int TotalPeptideNumber int SharedPeptides int Coverage float32 UniqueClusterTopPepProb float64 TopPepProb float64 - Peptides []string - PeptideIons []string - UniqueClusterPeptides []string Members map[string]uint8 } diff --git a/lib/qua/iso.go b/lib/qua/iso.go index 2679d8ad..59984e36 100644 --- a/lib/qua/iso.go +++ b/lib/qua/iso.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "math" + "philosopher/lib/id" "strings" "philosopher/lib/iso" @@ -391,10 +392,10 @@ func mapLabeledSpectra(labels map[string]iso.Labels, purity float64, evi []rep.P } // the assignment of usage is only done for general PSM, not for phosphoPSMs -func assignUsage(evi rep.Evidence, spectrumMap map[string]iso.Labels) rep.Evidence { +func assignUsage(evi rep.Evidence, spectrumMap map[id.SpectrumType]iso.Labels) rep.Evidence { for i := range evi.PSM { - _, ok := spectrumMap[evi.PSM[i].Spectrum] + _, ok := spectrumMap[evi.PSM[i].SpectrumFileName()] if ok { evi.PSM[i].Labels.IsUsed = true } @@ -417,7 +418,7 @@ func correctUnlabelledSpectra(evi rep.Evidence) rep.Evidence { counter++ } - if len(evi.PSM[i].Modifications.Index) < 1 { + if len(evi.PSM[i].Modifications.IndexSlice) < 1 { evi.PSM[i].Labels.Channel1.Intensity = 0 evi.PSM[i].Labels.Channel2.Intensity = 0 evi.PSM[i].Labels.Channel3.Intensity = 0 @@ -438,7 +439,7 @@ func correctUnlabelledSpectra(evi rep.Evidence) rep.Evidence { evi.PSM[i].Labels.Channel18.Intensity = 0 } else { - for _, j := range evi.PSM[i].Modifications.Index { + for _, j := range evi.PSM[i].Modifications.IndexSlice { //if j.MassDiff == 144.1020 || j.MassDiff == 229.1629 || j.MassDiff == 304.2072 { if j.MassDiff > 144 { flag++ @@ -476,14 +477,18 @@ func correctUnlabelledSpectra(evi rep.Evidence) rep.Evidence { } // rollUpPeptides gathers PSM info and filters them before summing the instensities to the peptide level -func rollUpPeptides(evi rep.Evidence, spectrumMap map[string]iso.Labels, phosphoSpectrumMap map[string]iso.Labels) rep.Evidence { +func rollUpPeptides(evi rep.Evidence, spectrumMap map[id.SpectrumType]iso.Labels, phosphoSpectrumMap map[id.SpectrumType]iso.Labels) rep.Evidence { for j := range evi.Peptides { + + evi.Peptides[j].Labels = &iso.Labels{} + for k := range evi.Peptides[j].Spectra { i, ok := spectrumMap[k] if ok { + //evi.Peptides[j].Labels = &iso.Labels{} evi.Peptides[j].Labels.Channel1.Name = i.Channel1.Name evi.Peptides[j].Labels.Channel1.CustomName = i.Channel1.CustomName evi.Peptides[j].Labels.Channel1.Mz = i.Channel1.Mz @@ -564,11 +569,6 @@ func rollUpPeptides(evi rep.Evidence, spectrumMap map[string]iso.Labels, phospho evi.Peptides[j].Labels.Channel16.Mz = i.Channel16.Mz evi.Peptides[j].Labels.Channel16.Intensity += i.Channel16.Intensity - evi.Peptides[j].Labels.Channel16.Name = i.Channel16.Name - evi.Peptides[j].Labels.Channel16.CustomName = i.Channel16.CustomName - evi.Peptides[j].Labels.Channel16.Mz = i.Channel16.Mz - evi.Peptides[j].Labels.Channel16.Intensity += i.Channel16.Intensity - evi.Peptides[j].Labels.Channel17.Name = i.Channel17.Name evi.Peptides[j].Labels.Channel17.CustomName = i.Channel17.CustomName evi.Peptides[j].Labels.Channel17.Mz = i.Channel17.Mz @@ -582,7 +582,7 @@ func rollUpPeptides(evi rep.Evidence, spectrumMap map[string]iso.Labels, phospho i, ok = phosphoSpectrumMap[k] if ok { - + evi.Peptides[j].PhosphoLabels = &iso.Labels{} evi.Peptides[j].PhosphoLabels.Channel1.Name = i.Channel1.Name evi.Peptides[j].PhosphoLabels.Channel1.CustomName = i.Channel1.CustomName evi.Peptides[j].PhosphoLabels.Channel1.Mz = i.Channel1.Mz @@ -681,14 +681,17 @@ func rollUpPeptides(evi rep.Evidence, spectrumMap map[string]iso.Labels, phospho } // rollUpPeptideIons gathers PSM info and filters them before summing the instensities to the peptide ION level -func rollUpPeptideIons(evi rep.Evidence, spectrumMap map[string]iso.Labels, phosphoSpectrumMap map[string]iso.Labels) rep.Evidence { +func rollUpPeptideIons(evi rep.Evidence, spectrumMap map[id.SpectrumType]iso.Labels, phosphoSpectrumMap map[id.SpectrumType]iso.Labels) rep.Evidence { for j := range evi.Ions { + + evi.Ions[j].Labels = &iso.Labels{} + for k := range evi.Ions[j].Spectra { i, ok := spectrumMap[k] if ok { - + //evi.Ions[j].Labels = &iso.Labels{} evi.Ions[j].Labels.Channel1.Name = i.Channel1.Name evi.Ions[j].Labels.Channel1.CustomName = i.Channel1.CustomName evi.Ions[j].Labels.Channel1.Mz = i.Channel1.Mz @@ -782,7 +785,7 @@ func rollUpPeptideIons(evi rep.Evidence, spectrumMap map[string]iso.Labels, phos i, ok = phosphoSpectrumMap[k] if ok { - + evi.Ions[j].PhosphoLabels = &iso.Labels{} evi.Ions[j].PhosphoLabels.Channel1.Name = i.Channel1.Name evi.Ions[j].PhosphoLabels.Channel1.CustomName = i.Channel1.CustomName evi.Ions[j].PhosphoLabels.Channel1.Mz = i.Channel1.Mz @@ -881,14 +884,20 @@ func rollUpPeptideIons(evi rep.Evidence, spectrumMap map[string]iso.Labels, phos } // rollUpProteins gathers PSM info and filters them before summing the instensities to the peptide ION level -func rollUpProteins(evi rep.Evidence, spectrumMap map[string]iso.Labels, phosphoSpectrumMap map[string]iso.Labels) rep.Evidence { +func rollUpProteins(evi rep.Evidence, spectrumMap map[id.SpectrumType]iso.Labels, phosphoSpectrumMap map[id.SpectrumType]iso.Labels) rep.Evidence { for j := range evi.Proteins { + + evi.Proteins[j].TotalLabels = &iso.Labels{} + evi.Proteins[j].UniqueLabels = &iso.Labels{} + evi.Proteins[j].URazorLabels = &iso.Labels{} + for _, k := range evi.Proteins[j].TotalPeptideIons { for l := range k.Spectra { i, ok := spectrumMap[l] if ok { + //evi.Proteins[j].TotalLabels = &iso.Labels{} evi.Proteins[j].TotalLabels.Channel1.Name = i.Channel1.Name evi.Proteins[j].TotalLabels.Channel1.CustomName = i.Channel1.CustomName evi.Proteins[j].TotalLabels.Channel1.Mz = i.Channel1.Mz @@ -981,6 +990,7 @@ func rollUpProteins(evi rep.Evidence, spectrumMap map[string]iso.Labels, phospho //if k.IsNondegenerateEvidence { if k.IsUnique { + //evi.Proteins[j].UniqueLabels = &iso.Labels{} evi.Proteins[j].UniqueLabels.Channel1.Name = i.Channel1.Name evi.Proteins[j].UniqueLabels.Channel1.CustomName = i.Channel1.CustomName evi.Proteins[j].UniqueLabels.Channel1.Mz = i.Channel1.Mz @@ -1073,6 +1083,7 @@ func rollUpProteins(evi rep.Evidence, spectrumMap map[string]iso.Labels, phospho } if k.IsURazor { + //evi.Proteins[j].URazorLabels = &iso.Labels{} evi.Proteins[j].URazorLabels.Channel1.Name = i.Channel1.Name evi.Proteins[j].URazorLabels.Channel1.CustomName = i.Channel1.CustomName evi.Proteins[j].URazorLabels.Channel1.Mz = i.Channel1.Mz diff --git a/lib/qua/lfq.go b/lib/qua/lfq.go index 47c71e98..540ea17d 100644 --- a/lib/qua/lfq.go +++ b/lib/qua/lfq.go @@ -5,6 +5,7 @@ import ( "fmt" "math" "path/filepath" + "philosopher/lib/id" "sort" "strings" @@ -37,35 +38,35 @@ func peakIntensity(evi rep.Evidence, dir, format string, rTWin, pTWin, tol float logrus.Info("Indexing PSM information") - var psmMap = make(map[string]rep.PSMEvidence) + var psmMap = make(map[id.SpectrumType]rep.PSMEvidence) var sourceMap = make(map[string][]rep.PSMEvidence) - var spectra = make(map[string][]string) - var ppmPrecision = make(map[string]float64) + var spectra = make(map[string][]id.SpectrumType) + var ppmPrecision = make(map[id.SpectrumType]float64) var mzMap = make(map[string]float64) var mzCVMap = make(map[string]string) - var minRT = make(map[string]float64) - var maxRT = make(map[string]float64) - var compVoltageMap = make(map[string]string) - var retentionTime = make(map[string]float64) - var intensity = make(map[string]float64) - var instensityCV = make(map[string]float64) + var minRT = make(map[id.SpectrumType]float64) + var maxRT = make(map[id.SpectrumType]float64) + var compVoltageMap = make(map[id.SpectrumType]string) + var retentionTime = make(map[id.SpectrumType]float64) + var intensity = make(map[id.SpectrumType]float64) + var instensityCV = make(map[id.SpectrumType]float64) - var charges = make(map[string]int) + var charges = make(map[id.SpectrumType]int) // collect attributes from PSM for _, i := range evi.PSM { partName := strings.Split(i.Spectrum, ".") sourceMap[partName[0]] = append(sourceMap[partName[0]], i) - spectra[partName[0]] = append(spectra[partName[0]], i.Spectrum) - - ppmPrecision[i.Spectrum] = tol / math.Pow(10, 6) - mzMap[i.Spectrum] = ((i.PrecursorNeutralMass + (float64(i.AssumedCharge) * bio.Proton)) / float64(i.AssumedCharge)) - minRT[i.Spectrum] = (i.RetentionTime / 60) - rTWin - maxRT[i.Spectrum] = (i.RetentionTime / 60) + rTWin - retentionTime[i.Spectrum] = i.RetentionTime - compVoltageMap[i.Spectrum] = i.CompensationVoltage - charges[i.Spectrum] = int(i.AssumedCharge) - psmMap[i.Spectrum] = i + spectra[partName[0]] = append(spectra[partName[0]], i.SpectrumFileName()) + + ppmPrecision[i.SpectrumFileName()] = tol / math.Pow(10, 6) + mzMap[i.SpectrumFileName().Str()] = ((i.PrecursorNeutralMass + (float64(i.AssumedCharge) * bio.Proton)) / float64(i.AssumedCharge)) + minRT[i.SpectrumFileName()] = (i.RetentionTime / 60) - rTWin + maxRT[i.SpectrumFileName()] = (i.RetentionTime / 60) + rTWin + retentionTime[i.SpectrumFileName()] = i.RetentionTime + compVoltageMap[i.SpectrumFileName()] = i.CompensationVoltage + charges[i.SpectrumFileName()] = int(i.AssumedCharge) + psmMap[i.SpectrumFileName()] = i } // get a sorted list of spectrum names @@ -117,11 +118,11 @@ func peakIntensity(evi rep.Evidence, dir, format string, rTWin, pTWin, tol float mappedPurity := calculateIonPurity(dir, format, mz, sourceMap[s]) for _, j := range mappedPurity { - v, ok := psmMap[j.Spectrum] + v, ok := psmMap[j.SpectrumFileName()] if ok { psm := v psm.Purity = j.Purity - psmMap[j.Spectrum] = psm + psmMap[j.SpectrumFileName()] = psm } } @@ -129,7 +130,7 @@ func peakIntensity(evi rep.Evidence, dir, format string, rTWin, pTWin, tol float if ok { for _, j := range v { - measuredFaims, measured, retrieved := xic(mz.Spectra, minRT[j], maxRT[j], ppmPrecision[j], mzMap[j]) + measuredFaims, measured, retrieved := xic(mz.Spectra, minRT[j], maxRT[j], ppmPrecision[j], mzMap[j.Str()]) if retrieved { @@ -168,13 +169,13 @@ func peakIntensity(evi rep.Evidence, dir, format string, rTWin, pTWin, tol float _, ok := spectra[partName[0]] if ok { if isFaims { - evi.PSM[i].Intensity = instensityCV[evi.PSM[i].Spectrum] + evi.PSM[i].Intensity = instensityCV[evi.PSM[i].SpectrumFileName()] } else { - evi.PSM[i].Intensity = intensity[evi.PSM[i].Spectrum] + evi.PSM[i].Intensity = intensity[evi.PSM[i].SpectrumFileName()] } } - v, ok := psmMap[evi.PSM[i].Spectrum] + v, ok := psmMap[evi.PSM[i].SpectrumFileName()] if ok { evi.PSM[i].Purity = v.Purity } @@ -231,7 +232,7 @@ func calculateIntensities(e rep.Evidence) rep.Evidence { } var peptideIntMap = make(map[string]float64) - var ionIntMap = make(map[string]float64) + var ionIntMap = make(map[id.IonFormType]float64) for _, i := range e.PSM { @@ -244,13 +245,13 @@ func calculateIntensities(e rep.Evidence) rep.Evidence { } // ion intensity : most intense ion - ionV, ok := ionIntMap[i.IonForm] + ionV, ok := ionIntMap[i.IonForm()] if ok { if i.Intensity > ionV { - ionIntMap[i.IonForm] = i.Intensity + ionIntMap[i.IonForm()] = i.Intensity } } else { - ionIntMap[i.IonForm] = i.Intensity + ionIntMap[i.IonForm()] = i.Intensity } } @@ -263,7 +264,7 @@ func calculateIntensities(e rep.Evidence) rep.Evidence { } for i := range e.Ions { - v, ok := ionIntMap[e.Ions[i].IonForm] + v, ok := ionIntMap[e.Ions[i].IonForm()] if ok { e.Ions[i].Intensity = v } @@ -277,7 +278,7 @@ func calculateIntensities(e rep.Evidence) rep.Evidence { var razorInt []float64 for _, k := range e.Proteins[i].TotalPeptideIons { - v, ok := ionIntMap[k.IonForm] + v, ok := ionIntMap[k.IonForm()] if ok { totalInt = append(totalInt, v) diff --git a/lib/qua/qua.go b/lib/qua/qua.go index 0daed40b..3328e2b1 100644 --- a/lib/qua/qua.go +++ b/lib/qua/qua.go @@ -5,6 +5,7 @@ import ( "fmt" "math" "path/filepath" + "philosopher/lib/id" "sort" "strings" @@ -23,7 +24,7 @@ import ( // Pair ... type Pair struct { - Key string + Key id.SpectrumType Value float64 } @@ -54,7 +55,7 @@ func RunLabelFreeQuantification(p met.Quantify) { // RunIsobaricLabelQuantification is the top function for label quantification func RunIsobaricLabelQuantification(p met.Quantify, mods bool) met.Quantify { - var psmMap = make(map[string]rep.PSMEvidence) + var psmMap = make(map[id.SpectrumType]rep.PSMEvidence) var sourceMap = make(map[string][]rep.PSMEvidence) var sourceList []string @@ -72,7 +73,7 @@ func RunIsobaricLabelQuantification(p met.Quantify, mods bool) met.Quantify { for _, i := range evi.PSM { specName := strings.Split(i.Spectrum, ".") sourceMap[specName[0]] = append(sourceMap[specName[0]], i) - psmMap[i.Spectrum] = i + psmMap[i.SpectrumFileName()] = i } for i := range sourceMap { @@ -129,27 +130,27 @@ func RunIsobaricLabelQuantification(p met.Quantify, mods bool) met.Quantify { mappedPSM := mapLabeledSpectra(labels, p.Purity, sourceMap[sourceList[i]]) for _, j := range mappedPurity { - v, ok := psmMap[j.Spectrum] + v, ok := psmMap[j.SpectrumFileName()] if ok { psm := v psm.Purity = j.Purity - psmMap[j.Spectrum] = psm + psmMap[j.SpectrumFileName()] = psm } } for _, j := range mappedPSM { - v, ok := psmMap[j.Spectrum] + v, ok := psmMap[j.SpectrumFileName()] if ok { psm := v psm.Labels = j.Labels - psmMap[j.Spectrum] = psm + psmMap[j.SpectrumFileName()] = psm } } } for i := range evi.PSM { - v, ok := psmMap[evi.PSM[i].Spectrum] + v, ok := psmMap[evi.PSM[i].SpectrumFileName()] if ok { evi.PSM[i].Purity = v.Purity evi.PSM[i].Labels = v.Labels @@ -223,29 +224,39 @@ func cleanPreviousData(evi rep.Evidence, brand, plex string) rep.Evidence { for i := range evi.PSM { if brand == "tmt" { - evi.PSM[i].Labels = tmt.New(plex) + evi.PSM[i].Labels = &iso.Labels{} + *evi.PSM[i].Labels = tmt.New(plex) } else if brand == "itraq" { - evi.PSM[i].Labels = trq.New(plex) + evi.PSM[i].Labels = &iso.Labels{} + *evi.PSM[i].Labels = trq.New(plex) } } for i := range evi.Ions { if brand == "tmt" { - evi.Ions[i].Labels = tmt.New(plex) + evi.Ions[i].Labels = &iso.Labels{} + *evi.Ions[i].Labels = tmt.New(plex) } else if brand == "itraq" { - evi.Ions[i].Labels = trq.New(plex) + evi.Ions[i].Labels = &iso.Labels{} + *evi.Ions[i].Labels = trq.New(plex) } } for i := range evi.Proteins { if brand == "tmt" { - evi.Proteins[i].TotalLabels = tmt.New(plex) - evi.Proteins[i].UniqueLabels = tmt.New(plex) - evi.Proteins[i].URazorLabels = tmt.New(plex) + evi.Proteins[i].TotalLabels = &iso.Labels{} + evi.Proteins[i].UniqueLabels = &iso.Labels{} + evi.Proteins[i].URazorLabels = &iso.Labels{} + *evi.Proteins[i].TotalLabels = tmt.New(plex) + *evi.Proteins[i].UniqueLabels = tmt.New(plex) + *evi.Proteins[i].URazorLabels = tmt.New(plex) } else if brand == "itraq" { - evi.Proteins[i].TotalLabels = trq.New(plex) - evi.Proteins[i].UniqueLabels = trq.New(plex) - evi.Proteins[i].URazorLabels = trq.New(plex) + evi.Proteins[i].TotalLabels = &iso.Labels{} + evi.Proteins[i].UniqueLabels = &iso.Labels{} + evi.Proteins[i].URazorLabels = &iso.Labels{} + *evi.Proteins[i].TotalLabels = trq.New(plex) + *evi.Proteins[i].UniqueLabels = trq.New(plex) + *evi.Proteins[i].URazorLabels = trq.New(plex) } } @@ -451,11 +462,11 @@ func assignLabelNames(labels map[string]iso.Labels, labelNames map[string]string return labels } -func classification(evi rep.Evidence, mods, best bool, remove, purity, probability float64) (map[string]iso.Labels, map[string]iso.Labels) { +func classification(evi rep.Evidence, mods, best bool, remove, purity, probability float64) (map[id.SpectrumType]iso.Labels, map[id.SpectrumType]iso.Labels) { - var spectrumMap = make(map[string]iso.Labels) - var phosphoSpectrumMap = make(map[string]iso.Labels) - var bestMap = make(map[string]uint8) + var spectrumMap = make(map[id.SpectrumType]iso.Labels) + var phosphoSpectrumMap = make(map[id.SpectrumType]iso.Labels) + var bestMap = make(map[id.SpectrumType]uint8) var psmLabelSumList PairList var quantCheckUp bool @@ -463,15 +474,15 @@ func classification(evi rep.Evidence, mods, best bool, remove, purity, probabili for _, i := range evi.PSM { if i.Probability >= probability && i.Purity >= purity { - spectrumMap[i.Spectrum] = i.Labels - bestMap[i.Spectrum] = 0 + spectrumMap[i.SpectrumFileName()] = *i.Labels + bestMap[i.SpectrumFileName()] = 0 - if mods { - _, ok1 := i.LocalizedPTMSites["PTMProphet_STY79.9663"] - _, ok2 := i.LocalizedPTMSites["PTMProphet_STY79.96633"] - _, ok3 := i.LocalizedPTMSites["PTMProphet_STY79.966331"] + if mods && i.PTM != nil { + _, ok1 := i.PTM.LocalizedPTMSites["PTMProphet_STY79.9663"] + _, ok2 := i.PTM.LocalizedPTMSites["PTMProphet_STY79.96633"] + _, ok3 := i.PTM.LocalizedPTMSites["PTMProphet_STY79.966331"] if ok1 || ok2 || ok3 { - phosphoSpectrumMap[i.Spectrum] = i.Labels + phosphoSpectrumMap[i.SpectrumFileName()] = *i.Labels } } @@ -496,7 +507,7 @@ func classification(evi rep.Evidence, mods, best bool, remove, purity, probabili i.Labels.Channel16.Intensity + i.Labels.Channel17.Intensity + i.Labels.Channel18.Intensity - psmLabelSumList = append(psmLabelSumList, Pair{i.Spectrum, sum}) + psmLabelSumList = append(psmLabelSumList, Pair{i.SpectrumFileName(), sum}) if sum > 0 { quantCheckUp = true @@ -515,16 +526,16 @@ func classification(evi rep.Evidence, mods, best bool, remove, purity, probabili var groupedPSMMap = make(map[string][]rep.PSMEvidence) for _, i := range evi.PSM { specName := strings.Split(i.Spectrum, ".") - fqn := fmt.Sprintf("%s#%s", specName[0], i.IonForm) + fqn := fmt.Sprintf("%s#%s", specName[0], i.IonForm().Str()) groupedPSMMap[fqn] = append(groupedPSMMap[fqn], i) } for _, v := range groupedPSMMap { if len(v) == 1 { - bestMap[v[0].Spectrum] = 0 + bestMap[v[0].SpectrumFileName()] = 0 } else { - var bestPSM string + var bestPSM id.SpectrumType var bestPSMInt float64 for _, i := range v { tmtSum := i.Labels.Channel1.Intensity + @@ -547,7 +558,7 @@ func classification(evi rep.Evidence, mods, best bool, remove, purity, probabili i.Labels.Channel18.Intensity if tmtSum > bestPSMInt { - bestPSM = i.Spectrum + bestPSM = i.SpectrumFileName() bestPSMInt = tmtSum } @@ -559,8 +570,8 @@ func classification(evi rep.Evidence, mods, best bool, remove, purity, probabili } } - var toDelete = make(map[string]uint8) - var toDeletePhospho = make(map[string]uint8) + var toDelete = make(map[id.SpectrumType]uint8) + var toDeletePhospho = make(map[id.SpectrumType]uint8) // 3rd check: remove the lower 3% // Ignore all PSMs that fall under the lower 3% based on their summed TMT labels diff --git a/lib/qua/spc.go b/lib/qua/spc.go index 00384686..12171c28 100644 --- a/lib/qua/spc.go +++ b/lib/qua/spc.go @@ -1,15 +1,16 @@ package qua import ( + "philosopher/lib/id" "philosopher/lib/rep" ) // CalculateSpectralCounts add Spc to ions and proteins func CalculateSpectralCounts(e rep.Evidence) rep.Evidence { - var total = make(map[string][]string) - var unique = make(map[string][]string) - var razor = make(map[string][]string) + var total = make(map[string][]id.SpectrumType) + var unique = make(map[string][]id.SpectrumType) + var razor = make(map[string][]id.SpectrumType) var sequences = make(map[string]int) @@ -17,17 +18,17 @@ func CalculateSpectralCounts(e rep.Evidence) rep.Evidence { sequences[i.Peptide]++ - total[i.Protein] = append(total[i.Protein], i.Spectrum) + total[i.Protein] = append(total[i.Protein], i.SpectrumFileName()) for j := range i.MappedProteins { - total[j] = append(total[j], i.Spectrum) + total[j] = append(total[j], i.SpectrumFileName()) } if i.IsUnique { - unique[i.Protein] = append(unique[i.Protein], i.Spectrum) + unique[i.Protein] = append(unique[i.Protein], i.SpectrumFileName()) } if i.IsURazor { - razor[i.Protein] = append(razor[i.Protein], i.Spectrum) + razor[i.Protein] = append(razor[i.Protein], i.SpectrumFileName()) } } @@ -76,10 +77,10 @@ func CalculateSpectralCounts(e rep.Evidence) rep.Evidence { // sequences[i.Peptide]++ // if i.IsUnique { -// uniqueIonPSM[i.Spectrum] = i.ProteinID +// uniqueIonPSM[i.SpectrumFileName()] = i.ProteinID // } // if i.IsURazor { -// razorIonPSM[i.Spectrum] = i.ProteinID +// razorIonPSM[i.SpectrumFileName()] = i.ProteinID // } // } diff --git a/lib/rep/io.go b/lib/rep/io.go index 2813f808..097f2d37 100644 --- a/lib/rep/io.go +++ b/lib/rep/io.go @@ -2,88 +2,45 @@ package rep import ( "fmt" - "io/ioutil" "path/filepath" + "sync" "philosopher/lib/sys" - - "github.com/sirupsen/logrus" - "github.com/vmihailenco/msgpack" ) // SerializeGranular converts the whole structure into sevral small gob files func (evi *Evidence) SerializeGranular() { - + wg := sync.WaitGroup{} + wg.Add(4) // create PSM Bin - SerializePSM(&evi.PSM) - + go func() { defer wg.Done(); SerializePSM(&evi.PSM) }() // create Ion Bin - SerializeIon(&evi.Ions) - + go func() { defer wg.Done(); SerializeIon(&evi.Ions) }() // create Peptides Bin - SerializePeptides(&evi.Peptides) - + go func() { defer wg.Done(); SerializePeptides(&evi.Peptides) }() // create Protein Bin - SerializeProteins(&evi.Proteins) + go func() { defer wg.Done(); SerializeProteins(&evi.Proteins) }() + wg.Wait() } // SerializePSM creates an ev serial with Evidence data func SerializePSM(evi *PSMEvidenceList) { - - b, e := msgpack.Marshal(&evi) - if e != nil { - logrus.Trace("Cannot marshal PSM data:", e) - } - - e = ioutil.WriteFile(sys.PSMBin(), b, sys.FilePermission()) - if e != nil { - logrus.Trace("Cannot serialize PSM data:", e) - } - + sys.Serialize(evi, sys.PSMBin()) } // SerializeIon creates an ev serial with Evidence data func SerializeIon(evi *IonEvidenceList) { - - b, e := msgpack.Marshal(&evi) - if e != nil { - logrus.Trace("Cannot marshal Ions data:", e) - } - - e = ioutil.WriteFile(sys.IonBin(), b, sys.FilePermission()) - if e != nil { - logrus.Trace("Cannot serialize Ions data:", e) - } + sys.Serialize(evi, sys.IonBin()) } // SerializePeptides creates an ev serial with Evidence data func SerializePeptides(evi *PeptideEvidenceList) { - - b, e := msgpack.Marshal(&evi) - if e != nil { - logrus.Trace("Cannot marshal Peptides data:", e) - } - - e = ioutil.WriteFile(sys.PepBin(), b, sys.FilePermission()) - if e != nil { - logrus.Trace("Cannot serialize Peptides data:", e) - } - + sys.Serialize(evi, sys.PepBin()) } // SerializeProteins creates an ev serial with Evidence data func SerializeProteins(evi *ProteinEvidenceList) { - - b, e := msgpack.Marshal(&evi) - if e != nil { - logrus.Trace("Cannot marshal Proteins data:", e) - } - - e = ioutil.WriteFile(sys.ProBin(), b, sys.FilePermission()) - if e != nil { - logrus.Trace("Cannot serialize Proteins data:", e) - } - + sys.Serialize(evi, sys.ProBin()) } // RestoreGranular reads philosopher results files and restore the data sctructure @@ -104,62 +61,22 @@ func (evi *Evidence) RestoreGranular() { // RestorePSM restores PSM data func RestorePSM(evi *PSMEvidenceList) { - - b, e := ioutil.ReadFile(sys.PSMBin()) - if e != nil { - logrus.Fatal("Cannot read file:", e) - } - - e = msgpack.Unmarshal(b, &evi) - if e != nil { - logrus.Fatal("Cannot unmarshal file:", e) - } - + sys.Restore(evi, sys.PSMBin(), false) } // RestoreIon restores Ion data func RestoreIon(evi *IonEvidenceList) { - - b, e := ioutil.ReadFile(sys.IonBin()) - if e != nil { - logrus.Fatal("Cannot read file:", e) - } - - e = msgpack.Unmarshal(b, &evi) - if e != nil { - logrus.Fatal("Cannot unmarshal file:", e) - } - + sys.Restore(evi, sys.IonBin(), false) } // RestorePeptide restores Peptide data func RestorePeptide(evi *PeptideEvidenceList) { - - b, e := ioutil.ReadFile(sys.PepBin()) - if e != nil { - logrus.Fatal("Cannot read file:", e) - } - - e = msgpack.Unmarshal(b, &evi) - if e != nil { - logrus.Fatal("Cannot unmarshal file:", e) - } - + sys.Restore(evi, sys.PepBin(), false) } // RestoreProtein restores Protein data func RestoreProtein(evi *ProteinEvidenceList) { - - b, e := ioutil.ReadFile(sys.ProBin()) - if e != nil { - logrus.Fatal("Cannot read file:", e) - } - - e = msgpack.Unmarshal(b, &evi) - if e != nil { - logrus.Fatal("Cannot unmarshal file:", e) - } - + sys.Restore(evi, sys.ProBin(), false) } // RestoreGranularWithPath reads philosopher results files and restore the data sctructure @@ -180,68 +97,24 @@ func (evi *Evidence) RestoreGranularWithPath(p string) { // RestorePSMWithPath restores PSM data func RestorePSMWithPath(evi *PSMEvidenceList, p string) { - path := fmt.Sprintf("%s%s%s", p, string(filepath.Separator), sys.PSMBin()) - - b, e := ioutil.ReadFile(path) - if e != nil { - logrus.Fatal("Cannot read file:", e) - } - - e = msgpack.Unmarshal(b, &evi) - if e != nil { - logrus.Fatal("Cannot unmarshal file:", e) - } - + sys.Restore(evi, path, false) } // RestoreIonWithPath restores Ion data func RestoreIonWithPath(evi *IonEvidenceList, p string) { - path := fmt.Sprintf("%s%s%s", p, string(filepath.Separator), sys.IonBin()) - - b, e := ioutil.ReadFile(path) - if e != nil { - logrus.Fatal("Cannot read file:", e) - } - - e = msgpack.Unmarshal(b, &evi) - if e != nil { - logrus.Fatal("Cannot unmarshal file:", e) - } - + sys.Restore(evi, path, false) } // RestorePeptideWithPath restores Ion data func RestorePeptideWithPath(evi *PeptideEvidenceList, p string) { - path := fmt.Sprintf("%s%s%s", p, string(filepath.Separator), sys.PepBin()) - - b, e := ioutil.ReadFile(path) - if e != nil { - logrus.Fatal("Cannot read file:", e) - } - - e = msgpack.Unmarshal(b, &evi) - if e != nil { - logrus.Fatal("Cannot unmarshal file:", e) - } - + sys.Restore(evi, path, false) } // RestoreProteinWithPath restores Protein data func RestoreProteinWithPath(evi *ProteinEvidenceList, p string) { - path := fmt.Sprintf("%s%s%s", p, string(filepath.Separator), sys.ProBin()) - - b, e := ioutil.ReadFile(path) - if e != nil { - logrus.Fatal("Cannot read file:", e) - } - - e = msgpack.Unmarshal(b, &evi) - if e != nil { - logrus.Fatal("Cannot unmarshal file:", e) - } - + sys.Restore(evi, path, false) } diff --git a/lib/rep/ion.go b/lib/rep/ion.go index 7c4301a4..bb6f377f 100644 --- a/lib/rep/ion.go +++ b/lib/rep/ion.go @@ -1,6 +1,7 @@ package rep import ( + "bufio" "errors" "fmt" "io" @@ -21,114 +22,110 @@ import ( // AssembleIonReport reports consist on ion reporting func (evi *Evidence) AssembleIonReport(ion id.PepIDList, decoyTag string) { - var list IonEvidenceList - var psmPtMap = make(map[string][]string) - var psmIonMap = make(map[string][]string) - var bestProb = make(map[string]float64) + var psmPtMap = make(map[id.IonFormType][]string) + var psmIonMap = make(map[id.IonFormType][]id.SpectrumType) + var bestProb = make(map[id.IonFormType]float64) - var ionMods = make(map[string][]mod.Modification) + var ionMods = make(map[id.IonFormType][]mod.Modification) // collapse all psm to protein based on Peptide-level identifications for _, i := range evi.PSM { - psmIonMap[i.IonForm] = append(psmIonMap[i.IonForm], i.Spectrum) - psmPtMap[i.Spectrum] = append(psmPtMap[i.Spectrum], i.Protein) + psmIonMap[i.IonForm()] = append(psmIonMap[i.IonForm()], i.SpectrumFileName()) + psmPtMap[i.IonForm()] = append(psmPtMap[i.IonForm()], i.Protein) - if i.Probability > bestProb[i.IonForm] { - bestProb[i.IonForm] = i.Probability + if i.Probability > bestProb[i.IonForm()] { + bestProb[i.IonForm()] = i.Probability } for j := range i.MappedProteins { - psmPtMap[i.IonForm] = append(psmPtMap[i.IonForm], j) + psmPtMap[i.IonForm()] = append(psmPtMap[i.IonForm()], j) } - for _, j := range i.Modifications.Index { - ionMods[i.IonForm] = append(ionMods[i.IonForm], j) + for _, j := range i.Modifications.IndexSlice { + ionMods[i.IonForm()] = append(ionMods[i.IonForm()], j) } } - for _, i := range ion { - var pr IonEvidence + evi.Ions = make(IonEvidenceList, len(ion)) + for idx, i := range ion { + pr := &evi.Ions[idx] - pr.IonForm = fmt.Sprintf("%s#%d#%.4f", i.Peptide, i.AssumedCharge, i.CalcNeutralPepMass) + //pr.IonForm() = fmt.Sprintf("%s#%d#%.4f", i.Peptide, i.AssumedCharge, i.CalcNeutralPepMass) - pr.Spectra = make(map[string]int) - pr.MappedGenes = make(map[string]int) + pr.Spectra = make(map[id.SpectrumType]int) + pr.MappedGenes = make(map[string]struct{}) pr.MappedProteins = make(map[string]int) - pr.Modifications.Index = make(map[string]mod.Modification) - - v, ok := psmIonMap[pr.IonForm] - if ok { - for _, j := range v { - pr.Spectra[j]++ - } - } + //pr.Modifications.Index = make(map[string]mod.Modification) pr.Sequence = i.Peptide pr.ModifiedSequence = i.ModifiedPeptide pr.MZ = uti.Round(((i.CalcNeutralPepMass + (float64(i.AssumedCharge) * bio.Proton)) / float64(i.AssumedCharge)), 5, 4) pr.ChargeState = i.AssumedCharge pr.PeptideMass = i.CalcNeutralPepMass + if v, ok := psmIonMap[pr.IonForm()]; ok { + for _, j := range v { + pr.Spectra[j]++ + } + } pr.PrecursorNeutralMass = i.PrecursorNeutralMass pr.Expectation = i.Expectation pr.NumberOfEnzymaticTermini = i.NumberOfEnzymaticTermini pr.Protein = i.Protein pr.MappedProteins[i.Protein] = 0 pr.Modifications = i.Modifications - pr.Probability = bestProb[pr.IonForm] + pr.Probability = bestProb[pr.IonForm()] // get the mapped proteins - for _, j := range psmPtMap[pr.IonForm] { + for _, j := range psmPtMap[pr.IonForm()] { pr.MappedProteins[j] = 0 } - - mods, ok := ionMods[pr.IonForm] - if ok { + prModifications := pr.Modifications.ToMap() + if mods, ok := ionMods[pr.IonForm()]; ok { for _, j := range mods { - _, okMod := pr.Modifications.Index[j.Index] + _, okMod := prModifications.Index[j.Index] if !okMod { - pr.Modifications.Index[j.Index] = j + prModifications.Index[j.Index] = j } } } - + pr.Modifications = prModifications.ToSlice() // is this bservation a decoy ? if cla.IsDecoyPSM(i, decoyTag) { pr.IsDecoy = true } - list = append(list, pr) } - sort.Sort(list) - evi.Ions = list - + sort.Sort(evi.Ions) } // MetaIonReport reports consist on ion reporting -func (evi Evidence) MetaIonReport(workspace, brand string, channels int, hasDecoys, hasLabels bool) { +func (evi IonEvidenceList) MetaIonReport(workspace, brand string, channels int, hasDecoys, hasLabels bool) { var header string output := fmt.Sprintf("%s%sion.tsv", workspace, string(filepath.Separator)) file, e := os.Create(output) + bw := bufio.NewWriter(file) if e != nil { msg.WriteFile(errors.New("peptide ion output file"), "fatal") } defer file.Close() + defer bw.Flush() // building the printing set tat may or not contain decoys - var printSet IonEvidenceList - for _, i := range evi.Ions { + var printSet []*IonEvidence + for idx, i := range evi { // This inclusion is necessary to avoid unexistent observations from being included after using the filter --mods options if i.Probability > 0 { if !hasDecoys { if !i.IsDecoy { - printSet = append(printSet, i) + printSet = append(printSet, &evi[idx]) } } else { - printSet = append(printSet, i) + printSet = append(printSet, &evi[idx]) } } } @@ -212,14 +209,14 @@ func (evi Evidence) MetaIonReport(workspace, brand string, channels int, hasDeco header = strings.Replace(header, "Channel "+printSet[10].Labels.Channel18.Name, c18, -1) } - _, e = io.WriteString(file, header) + _, e = io.WriteString(bw, header) if e != nil { msg.WriteToFile(errors.New("cannot print Ion to file"), "fatal") } for _, i := range printSet { - assL, obs := getModsList(i.Modifications.Index) + assL, obs := getModsList(i.Modifications.ToMap().Index) var mappedProteins []string for j := range i.MappedProteins { @@ -243,8 +240,8 @@ func (evi Evidence) MetaIonReport(workspace, brand string, channels int, hasDeco line := fmt.Sprintf("%s\t%s\t%s\t%s\t%d\t%.4f\t%d\t%.4f\t%.4f\t%.14f\t%d\t%.4f\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s", i.Sequence, i.ModifiedSequence, - i.PrevAA, - i.NextAA, + string(i.PrevAA), + string(i.NextAA), len(i.Sequence), i.MZ, i.ChargeState, @@ -278,10 +275,10 @@ func (evi Evidence) MetaIonReport(workspace, brand string, channels int, hasDeco line, i.Labels.Channel1.Intensity, i.Labels.Channel2.Intensity, - i.Labels.Channel3.Intensity, - i.Labels.Channel4.Intensity, i.Labels.Channel5.Intensity, i.Labels.Channel6.Intensity, + i.Labels.Channel9.Intensity, + i.Labels.Channel10.Intensity, ) case 8: line = fmt.Sprintf("%s\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f", @@ -372,7 +369,7 @@ func (evi Evidence) MetaIonReport(workspace, brand string, channels int, hasDeco line += "\n" - _, e = io.WriteString(file, line) + _, e = io.WriteString(bw, line) if e != nil { msg.WriteToFile(errors.New("cannot print Ions to file"), "fatal") } diff --git a/lib/rep/modification.go b/lib/rep/modification.go index 500de84e..46c14b5c 100644 --- a/lib/rep/modification.go +++ b/lib/rep/modification.go @@ -7,6 +7,7 @@ import ( "math" "os" "path/filepath" + "philosopher/lib/mod" "philosopher/lib/msg" "philosopher/lib/obo" @@ -25,7 +26,7 @@ func (evi *Evidence) MapMods() { o := obo.NewUniModOntology() for _, i := range evi.PSM { - for _, j := range i.Modifications.Index { + for _, j := range i.Modifications.IndexSlice { modMap[j.MassDiff] = obo.Term{} } } @@ -58,8 +59,8 @@ func (evi *Evidence) MapMods() { for i := range evi.PSM { // for fixed and variable modifications - - for k, v := range evi.PSM[i].Modifications.Index { + mods := evi.PSM[i].Modifications.ToMap() + for k, v := range mods.Index { obo, ok := modMap[v.MassDiff] if ok { @@ -70,20 +71,27 @@ func (evi *Evidence) MapMods() { updatedMod.Name = obo.Name updatedMod.Definition = obo.Definition updatedMod.ID = obo.ID - updatedMod.MonoIsotopicMass = obo.MonoIsotopicMass + //updatedMod.MonoIsotopicMass = obo.MonoIsotopicMass + if updatedMod.IsobaricMods == nil { + updatedMod.IsobaricMods = make(map[string]float64) + } updatedMod.IsobaricMods[obo.Name]++ - evi.PSM[i].Modifications.Index[k] = updatedMod + mods.Index[k] = updatedMod } - if updatedMod.Type == "Observed" { + if updatedMod.Type == mod.Observed { updatedMod.Name = obo.Name updatedMod.Definition = obo.Definition updatedMod.ID = obo.ID - updatedMod.MonoIsotopicMass = obo.MonoIsotopicMass + //updatedMod.MonoIsotopicMass = obo.MonoIsotopicMass + if updatedMod.IsobaricMods == nil { + updatedMod.IsobaricMods = make(map[string]float64) + } updatedMod.IsobaricMods[obo.Name] = obo.MonoIsotopicMass - evi.PSM[i].Modifications.Index[k] = updatedMod + mods.Index[k] = updatedMod } } } + evi.PSM[i].Modifications = mods.ToSlice() } } @@ -127,12 +135,12 @@ func (evi *Evidence) AssembleModificationReport() { // for assigned mods // 0 here means something that doest not map to the pepXML header // like multiple mods on n-term - for _, l := range evi.PSM[i].Modifications.Index { + for _, l := range evi.PSM[i].Modifications.IndexSlice { if l.MassDiff > bins[j].LowerMass && l.MassDiff <= bins[j].HigherRight && l.MassDiff != 0 { _, ok := assignChecklist[l.MassDiff] if !ok { - if l.Type == "Assigned" { + if l.Type == mod.Assigned { bins[j].AssignedMods = append(bins[j].AssignedMods, evi.PSM[i]) assignChecklist[l.MassDiff] = 0 } diff --git a/lib/rep/msstats.go b/lib/rep/msstats.go index 75c618bc..64a77671 100644 --- a/lib/rep/msstats.go +++ b/lib/rep/msstats.go @@ -14,6 +14,9 @@ import ( // MetaMSstatsReport report all psms from study that passed the FDR filter func (evi Evidence) MetaMSstatsReport(workspace, brand string, channels int, hasDecoys bool) { + if evi.PSM == nil { + RestorePSM(&evi.PSM) + } var header string output := fmt.Sprintf("%s%smsstats.csv", workspace, string(filepath.Separator)) @@ -88,7 +91,7 @@ func (evi Evidence) MetaMSstatsReport(workspace, brand string, channels int, has fileName = fmt.Sprintf("%s.raw", parts[0]) line := fmt.Sprintf("%s\t%s\t%s\t%s\t%d\t%.4f\t%.4f\t%.4f\t%t\t%s\t%s\t%s", - i.Spectrum, + i.SpectrumFileName().Str(), fileName, i.Peptide, i.ModifiedPeptide, @@ -104,6 +107,17 @@ func (evi Evidence) MetaMSstatsReport(workspace, brand string, channels int, has if brand == "tmt" { switch channels { + case 6: + line = fmt.Sprintf("%s\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f", + line, + i.Purity, + i.Labels.Channel1.Intensity, + i.Labels.Channel2.Intensity, + i.Labels.Channel5.Intensity, + i.Labels.Channel6.Intensity, + i.Labels.Channel9.Intensity, + i.Labels.Channel10.Intensity, + ) case 10: line = fmt.Sprintf("%s\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f", line, diff --git a/lib/rep/mzid.go b/lib/rep/mzid.go index cf2555c0..b22abd0e 100644 --- a/lib/rep/mzid.go +++ b/lib/rep/mzid.go @@ -2,6 +2,7 @@ package rep import ( "fmt" + "philosopher/lib/id" "sort" "strconv" "strings" @@ -38,7 +39,7 @@ func (e Evidence) MzIdentMLReport(version, database string) { dtb.Restore() // spectra evidence reference map - var specRef = make(map[string]string) + var specRef = make(map[id.SpectrumType]string) // peptide evidence reference map var pepRef = make(map[string]string) @@ -212,13 +213,13 @@ func (e Evidence) MzIdentMLReport(version, database string) { }, } - for _, j := range i.Modifications.Index { + for _, j := range i.Modifications.IndexSlice { if j.Name != "Unknown" { mod := psi.Modification{ - AvgMassDelta: j.AverageMass, - MonoIsotopicMassDelta: j.MonoIsotopicMass, - Residues: j.AminoAcid, - Location: j.Position, + //AvgMassDelta: j.AverageMass, + //MonoIsotopicMassDelta: j.MonoIsotopicMass, + Residues: j.AminoAcid, + Location: strconv.Itoa(j.Position), CVParam: []psi.CVParam{ { CVRef: "UNIMOD", @@ -252,8 +253,8 @@ func (e Evidence) MzIdentMLReport(version, database string) { ID: fmt.Sprintf("PepEv_%d", idCounter), IsDecoy: strconv.FormatBool(i.IsDecoy), PeptideRef: i.Peptide, - Pre: i.PrevAA, - Post: i.NextAA, + Pre: string(i.PrevAA), + Post: string(i.NextAA), } pepRef[i.Peptide] = fmt.Sprintf("PepEv_%d", idCounter) @@ -791,7 +792,7 @@ func (e Evidence) MzIdentMLReport(version, database string) { CVRef: "PSI-MS", Accession: "MS:1000796", Name: "spectrum title", - Value: j.Spectrum, + Value: j.SpectrumFileName().Str(), }, { CVRef: "PSI-MS", @@ -1012,7 +1013,7 @@ func (e Evidence) MzIdentMLReport(version, database string) { }, } - specRef[j.Spectrum] = fmt.Sprintf("Spectrum_%d", idCounter) + specRef[j.SpectrumFileName()] = fmt.Sprintf("Spectrum_%d", idCounter) ad.SpectrumIdentificationList[0].SpectrumIdentificationResult = append(ad.SpectrumIdentificationList[0].SpectrumIdentificationResult, *sir) } } diff --git a/lib/rep/peptide.go b/lib/rep/peptide.go index 99167b3f..3342af19 100644 --- a/lib/rep/peptide.go +++ b/lib/rep/peptide.go @@ -1,6 +1,7 @@ package rep import ( + "bufio" "errors" "fmt" "io" @@ -19,32 +20,26 @@ import ( // AssemblePeptideReport reports consist on ion reporting func (evi *Evidence) AssemblePeptideReport(pep id.PepIDList, decoyTag string) { - var list PeptideEvidenceList var pepSeqMap = make(map[string]bool) //is this a decoy var pepCSMap = make(map[string][]uint8) var pepInt = make(map[string]float64) var pepProt = make(map[string]string) - var spectra = make(map[string][]string) + var spectra = make(map[string][]id.SpectrumType) var mappedGenes = make(map[string][]string) var mappedProts = make(map[string][]string) var bestProb = make(map[string]float64) var pepMods = make(map[string][]mod.Modification) for _, i := range pep { - if !cla.IsDecoyPSM(i, decoyTag) { - pepSeqMap[i.Peptide] = false - } else { - pepSeqMap[i.Peptide] = true - } + pepSeqMap[i.Peptide] = cla.IsDecoyPSM(i, decoyTag) } for _, i := range evi.PSM { - _, ok := pepSeqMap[i.Peptide] - if ok { + if _, ok := pepSeqMap[i.Peptide]; ok { pepCSMap[i.Peptide] = append(pepCSMap[i.Peptide], i.AssumedCharge) - spectra[i.Peptide] = append(spectra[i.Peptide], i.Spectrum) + spectra[i.Peptide] = append(spectra[i.Peptide], i.SpectrumFileName()) pepProt[i.Peptide] = i.Protein if i.Intensity > pepInt[i.Peptide] { @@ -59,7 +54,7 @@ func (evi *Evidence) AssemblePeptideReport(pep id.PepIDList, decoyTag string) { mappedGenes[i.Peptide] = append(mappedGenes[i.Peptide], j) } - for _, j := range i.Modifications.Index { + for _, j := range i.Modifications.IndexSlice { pepMods[i.Peptide] = append(pepMods[i.Peptide], j) } @@ -71,14 +66,17 @@ func (evi *Evidence) AssemblePeptideReport(pep id.PepIDList, decoyTag string) { } + evi.Peptides = make(PeptideEvidenceList, len(pepSeqMap)) + idx := 0 for k, v := range pepSeqMap { - var pep PeptideEvidence - pep.Spectra = make(map[string]uint8) + pep := &evi.Peptides[idx] + idx++ + + pep.Spectra = make(map[id.SpectrumType]uint8) pep.ChargeState = make(map[uint8]uint8) - pep.MappedGenes = make(map[string]int) + pep.MappedGenes = make(map[string]struct{}) pep.MappedProteins = make(map[string]int) - pep.Modifications.Index = make(map[string]mod.Modification) pep.Sequence = k @@ -93,7 +91,7 @@ func (evi *Evidence) AssemblePeptideReport(pep id.PepIDList, decoyTag string) { } for _, i := range mappedGenes[k] { - pep.MappedGenes[i] = 0 + pep.MappedGenes[i] = struct{}{} } for _, i := range mappedProts[k] { @@ -105,48 +103,50 @@ func (evi *Evidence) AssemblePeptideReport(pep id.PepIDList, decoyTag string) { pep.Protein = d } - mods, ok := pepMods[pep.Sequence] - if ok { + pepModificationsIndex := make(map[string]mod.Modification) + if mods, ok := pepMods[pep.Sequence]; ok { for _, j := range mods { - _, okMod := pep.Modifications.Index[j.Index] + _, okMod := pepModificationsIndex[j.Index] if !okMod { - pep.Modifications.Index[j.Index] = j + pepModificationsIndex[j.Index] = j } } } - + if len(pepModificationsIndex) != 0 { + pep.Modifications = mod.Modifications{Index: pepModificationsIndex}.ToSlice() + } // is this a decoy ? pep.IsDecoy = v - list = append(list, pep) } - sort.Sort(list) - evi.Peptides = list + sort.Sort(evi.Peptides) } // MetaPeptideReport report consist on ion reporting -func (evi Evidence) MetaPeptideReport(workspace, brand string, channels int, hasDecoys, hasLabels bool) { +func (evi PeptideEvidenceList) MetaPeptideReport(workspace, brand string, channels int, hasDecoys, hasLabels bool) { var header string output := fmt.Sprintf("%s%speptide.tsv", workspace, string(filepath.Separator)) file, e := os.Create(output) + bw := bufio.NewWriter(file) if e != nil { msg.WriteFile(errors.New("peptide output file"), "fatal") } defer file.Close() + defer bw.Flush() // building the printing set tat may or not contain decoys - var printSet PeptideEvidenceList - for _, i := range evi.Peptides { + var printSet []*PeptideEvidence + for idx, i := range evi { if !hasDecoys { if !i.IsDecoy { - printSet = append(printSet, i) + printSet = append(printSet, &evi[idx]) } } else { - printSet = append(printSet, i) + printSet = append(printSet, &evi[idx]) } } @@ -229,14 +229,15 @@ func (evi Evidence) MetaPeptideReport(workspace, brand string, channels int, has header = strings.Replace(header, "Channel "+printSet[10].Labels.Channel18.Name, c18, -1) } - _, e = io.WriteString(file, header) + //_, e = io.WriteString(file, header) + _, e = io.WriteString(bw, header) if e != nil { msg.WriteToFile(errors.New("cannot print PSM to file"), "fatal") } for _, i := range printSet { - assL, obs := getModsList(i.Modifications.Index) + assL, obs := getModsList(i.Modifications.ToMap().Index) var mappedProteins []string for j := range i.MappedProteins { @@ -265,8 +266,8 @@ func (evi Evidence) MetaPeptideReport(workspace, brand string, channels int, has line := fmt.Sprintf("%s\t%s\t%s\t%d\t%s\t%.4f\t%d\t%f\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s", i.Sequence, - i.PrevAA, - i.NextAA, + string(i.PrevAA), + string(i.NextAA), len(i.Sequence), strings.Join(cs, ", "), i.Probability, @@ -297,10 +298,10 @@ func (evi Evidence) MetaPeptideReport(workspace, brand string, channels int, has line, i.Labels.Channel1.Intensity, i.Labels.Channel2.Intensity, - i.Labels.Channel3.Intensity, - i.Labels.Channel4.Intensity, i.Labels.Channel5.Intensity, i.Labels.Channel6.Intensity, + i.Labels.Channel9.Intensity, + i.Labels.Channel10.Intensity, ) case 8: line = fmt.Sprintf("%s\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f", @@ -391,7 +392,7 @@ func (evi Evidence) MetaPeptideReport(workspace, brand string, channels int, has line += "\n" - _, e = io.WriteString(file, line) + _, e = io.WriteString(bw, line) if e != nil { msg.WriteToFile(errors.New("cannot print Peptides to file"), "fatal") } diff --git a/lib/rep/protein.go b/lib/rep/protein.go index 90e2d279..182a8310 100644 --- a/lib/rep/protein.go +++ b/lib/rep/protein.go @@ -1,13 +1,13 @@ package rep import ( + "bufio" "errors" "fmt" "io" "os" "path/filepath" "sort" - "strconv" "strings" "philosopher/lib/dat" @@ -19,34 +19,30 @@ import ( // AssembleProteinReport creates the post processed protein strcuture func (evi *Evidence) AssembleProteinReport(pro id.ProtIDList, weight float64, decoyTag string) { - var list ProteinEvidenceList - var protMods = make(map[string][]mod.Modification) - var evidenceIons = make(map[string]IonEvidence) - - for _, i := range evi.Ions { - evidenceIons[i.IonForm] = i + var protMods = make(map[id.IonFormType][]mod.Modification) + var evidenceIons = make(map[id.IonFormType]*IonEvidence) + for idx, i := range evi.Ions { + evidenceIons[i.IonForm()] = &evi.Ions[idx] } for _, i := range evi.PSM { - for _, j := range i.Modifications.Index { - protMods[i.IonForm] = append(protMods[i.IonForm], j) + for _, j := range i.Modifications.IndexSlice { + protMods[i.IonForm()] = append(protMods[i.IonForm()], j) } } - - for _, i := range pro { - - var rep ProteinEvidence - - rep.SupportingSpectra = make(map[string]int) - rep.TotalPeptideIons = make(map[string]IonEvidence) - rep.IndiProtein = make(map[string]uint8) - rep.Modifications.Index = make(map[string]mod.Modification) + evi.Proteins = make(ProteinEvidenceList, len(pro)) + for idx, i := range pro { + rep := &evi.Proteins[idx] + rep.SupportingSpectra = make(map[id.SpectrumType]int) + rep.TotalPeptideIons = make(map[id.IonFormType]IonEvidence) + rep.IndiProtein = make(map[string]struct{}) + repModificationsIndex := make(map[string]mod.Modification) rep.ProteinName = i.ProteinName rep.Description = i.Description rep.ProteinGroup = i.GroupNumber rep.ProteinSubGroup = i.GroupSiblingID - rep.Length, _ = strconv.Atoi(i.Length) + rep.Length = i.Length rep.Coverage = i.PercentCoverage rep.UniqueStrippedPeptides = len(i.UniqueStrippedPeptides) rep.Probability = i.Probability @@ -63,21 +59,21 @@ func (evi *Evidence) AssembleProteinReport(pro id.ProtIDList, weight float64, de } for j := range i.IndistinguishableProtein { - rep.IndiProtein[i.IndistinguishableProtein[j]] = 0 + rep.IndiProtein[i.IndistinguishableProtein[j]] = struct{}{} } for _, k := range i.PeptideIons { - ion := fmt.Sprintf("%s#%d#%.4f", k.PeptideSequence, k.Charge, k.CalcNeutralPepMass) + //ion := fmt.Sprintf("%s#%d#%.4f", k.PeptideSequence, k.Charge, k.CalcNeutralPepMass) + ion := k.IonForm() - v, ok := evidenceIons[ion] - if ok { + if v, ok := evidenceIons[ion]; ok { for spec := range v.Spectra { rep.SupportingSpectra[spec]++ } - ref := v + ref := *v ref.Weight = k.Weight ref.GroupWeight = k.GroupWeight @@ -86,7 +82,7 @@ func (evi *Evidence) AssembleProteinReport(pro id.ProtIDList, weight float64, de } delete(ref.MappedProteins, i.ProteinName) - ref.Modifications = k.Modifications + ref.Modifications = k.Modifications.ToSlice() if len(ref.MappedProteins) == 0 && ref.Weight >= weight { ref.IsUnique = true @@ -97,35 +93,34 @@ func (evi *Evidence) AssembleProteinReport(pro id.ProtIDList, weight float64, de if k.Razor == 1 { ref.IsURazor = true } - - mods, ok := protMods[ion] - if ok { + refModifications := ref.Modifications.ToMap() + if mods, ok := protMods[ion]; ok { for _, j := range mods { - _, okMod := ref.Modifications.Index[j.Index] + _, okMod := refModifications.Index[j.Index] if !okMod && k.IsUnique { - ref.Modifications.Index[j.Index] = j - rep.Modifications.Index[j.Index] = j + refModifications.Index[j.Index] = j + repModificationsIndex[j.Index] = j } if !okMod && k.Razor == 1 { - ref.Modifications.Index[j.Index] = j - rep.Modifications.Index[j.Index] = j + refModifications.Index[j.Index] = j + repModificationsIndex[j.Index] = j } } } - + ref.Modifications = refModifications.ToSlice() rep.TotalPeptideIons[ion] = ref } else { var ref IonEvidence ref.MappedProteins = make(map[string]int) - ref.Spectra = make(map[string]int) + ref.Spectra = make(map[id.SpectrumType]int) ref.Protein = i.ProteinName ref.Sequence = k.PeptideSequence - ref.IonForm = ion + //ref.IonForm() = ion ref.ModifiedSequence = k.ModifiedPeptide ref.ChargeState = k.Charge ref.Probability = k.InitialProbability @@ -140,40 +135,40 @@ func (evi *Evidence) AssembleProteinReport(pro id.ProtIDList, weight float64, de } delete(ref.MappedProteins, i.ProteinName) - ref.Modifications = k.Modifications + ref.Modifications = k.Modifications.ToSlice() if len(ref.MappedProteins) == 0 && ref.Weight >= weight { ref.IsUnique = true } else { ref.IsUnique = false } - - mods, ok := protMods[ion] - if ok { + refModifications := ref.Modifications.ToMap() + if mods, ok := protMods[ion]; ok { for _, j := range mods { - _, okMod := ref.Modifications.Index[j.Index] + _, okMod := refModifications.Index[j.Index] if !okMod && k.IsUnique { - ref.Modifications.Index[j.Index] = j - rep.Modifications.Index[j.Index] = j + refModifications.Index[j.Index] = j + repModificationsIndex[j.Index] = j } if !okMod && k.Razor == 1 { - ref.Modifications.Index[j.Index] = j - rep.Modifications.Index[j.Index] = j + refModifications.Index[j.Index] = j + repModificationsIndex[j.Index] = j } } } - + ref.Modifications = refModifications.ToSlice() rep.TotalPeptideIons[ion] = ref } } - + if len(repModificationsIndex) != 0 { + rep.Modifications = mod.Modifications{Index: repModificationsIndex}.ToSlice() + } // if strings.Contains(rep.ProteinName, "Q8WXG9") { // spew.Dump(rep) // } - list = append(list, rep) } var dtb dat.Base @@ -184,35 +179,36 @@ func (evi *Evidence) AssembleProteinReport(pro id.ProtIDList, weight float64, de } // fix the name sand headers and pull database information into protein report - for i := range list { + for i := range evi.Proteins { + pe := &evi.Proteins[i] for _, j := range dtb.Records { desc := strings.Replace(j.Description, "|", " ", -1) - if strings.Contains(j.OriginalHeader, list[i].ProteinName) && strings.EqualFold(list[i].Description, desc) { - //if strings.Contains(j.OriginalHeader, list[i].ProteinName) && strings.Contains(j.OriginalHeader, list[i].Description) { + //if strings.Contains(j.OriginalHeader, list[i].ProteinName) && strings.EqualFold(list[i].Description, desc) { + if strings.Contains(j.OriginalHeader, pe.ProteinName) && strings.Contains(j.OriginalHeader, desc) { - if (j.IsDecoy && list[i].IsDecoy) || (!j.IsDecoy && !list[i].IsDecoy) { + if (j.IsDecoy && pe.IsDecoy) || (!j.IsDecoy && !pe.IsDecoy) { - list[i].OriginalHeader = j.OriginalHeader - list[i].PartHeader = j.PartHeader - list[i].ProteinID = j.ID - list[i].EntryName = j.EntryName - list[i].ProteinExistence = j.ProteinExistence - list[i].GeneNames = j.GeneNames - list[i].Sequence = j.Sequence - list[i].ProteinName = j.ProteinName - list[i].Organism = j.Organism + pe.OriginalHeader = j.OriginalHeader + pe.PartHeader = j.PartHeader + pe.ProteinID = j.ID + pe.EntryName = j.EntryName + pe.ProteinExistence = j.ProteinExistence + pe.GeneNames = j.GeneNames + pe.Sequence = j.Sequence + pe.ProteinName = j.ProteinName + pe.Organism = j.Organism // uniprot entries have the description on ProteinName if len(j.Description) < 1 { - list[i].Description = j.ProteinName + pe.Description = j.ProteinName } else { - list[i].Description = j.Description + pe.Description = j.Description } // updating the protein ions - for _, k := range list[i].TotalPeptideIons { + for _, k := range pe.TotalPeptideIons { k.Protein = j.PartHeader k.ProteinID = j.ID k.GeneName = j.GeneNames @@ -224,33 +220,33 @@ func (evi *Evidence) AssembleProteinReport(pro id.ProtIDList, weight float64, de } } - sort.Sort(list) - evi.Proteins = list + sort.Sort(evi.Proteins) } // MetaProteinReport creates the TSV Protein report -func (evi Evidence) MetaProteinReport(workspace, brand string, channels int, hasDecoys, hasRazor, uniqueOnly, hasLabels bool) { +func (eviProteins ProteinEvidenceList) MetaProteinReport(workspace, brand string, channels int, hasDecoys, hasRazor, uniqueOnly, hasLabels bool) { var header string output := fmt.Sprintf("%s%sprotein.tsv", workspace, string(filepath.Separator)) // create result file file, e := os.Create(output) + bw := bufio.NewWriter(file) if e != nil { msg.WriteFile(errors.New("cannot create protein report"), "error") } defer file.Close() - + defer bw.Flush() // building the printing set tat may or not contain decoys - var printSet ProteinEvidenceList - for _, i := range evi.Proteins { + var printSet []*ProteinEvidence + for idx, i := range eviProteins { if !hasDecoys { if !i.IsDecoy { - printSet = append(printSet, i) + printSet = append(printSet, &eviProteins[idx]) } } else { - printSet = append(printSet, i) + printSet = append(printSet, &eviProteins[idx]) } } @@ -333,7 +329,7 @@ func (evi Evidence) MetaProteinReport(workspace, brand string, channels int, has header = strings.Replace(header, "Channel "+printSet[10].UniqueLabels.Channel18.Name, c18, -1) } - _, e = io.WriteString(file, header) + _, e = io.WriteString(bw, header) if e != nil { msg.WriteToFile(e, "fatal") } @@ -345,7 +341,7 @@ func (evi Evidence) MetaProteinReport(workspace, brand string, channels int, has ip = append(ip, k) } - assL, obs := getModsList(i.Modifications.Index) + assL, obs := getModsList(i.Modifications.ToMap().Index) // var uniqIons int // for _, j := range i.TotalPeptideIons { @@ -368,44 +364,47 @@ func (evi Evidence) MetaProteinReport(workspace, brand string, channels int, has // change between Unique+Razor and Unique only based on parameter defined on labelquant var reportIntensities [18]float64 if uniqueOnly || !hasRazor { - reportIntensities[0] = i.UniqueLabels.Channel1.Intensity - reportIntensities[1] = i.UniqueLabels.Channel2.Intensity - reportIntensities[2] = i.UniqueLabels.Channel3.Intensity - reportIntensities[3] = i.UniqueLabels.Channel4.Intensity - reportIntensities[4] = i.UniqueLabels.Channel5.Intensity - reportIntensities[5] = i.UniqueLabels.Channel6.Intensity - reportIntensities[6] = i.UniqueLabels.Channel7.Intensity - reportIntensities[7] = i.UniqueLabels.Channel8.Intensity - reportIntensities[8] = i.UniqueLabels.Channel9.Intensity - reportIntensities[9] = i.UniqueLabels.Channel10.Intensity - reportIntensities[10] = i.UniqueLabels.Channel11.Intensity - reportIntensities[11] = i.UniqueLabels.Channel12.Intensity - reportIntensities[12] = i.UniqueLabels.Channel13.Intensity - reportIntensities[13] = i.UniqueLabels.Channel14.Intensity - reportIntensities[14] = i.UniqueLabels.Channel15.Intensity - reportIntensities[15] = i.UniqueLabels.Channel16.Intensity - reportIntensities[16] = i.UniqueLabels.Channel17.Intensity - reportIntensities[17] = i.UniqueLabels.Channel18.Intensity - + if i.UniqueLabels != nil { + reportIntensities[0] = i.UniqueLabels.Channel1.Intensity + reportIntensities[1] = i.UniqueLabels.Channel2.Intensity + reportIntensities[2] = i.UniqueLabels.Channel3.Intensity + reportIntensities[3] = i.UniqueLabels.Channel4.Intensity + reportIntensities[4] = i.UniqueLabels.Channel5.Intensity + reportIntensities[5] = i.UniqueLabels.Channel6.Intensity + reportIntensities[6] = i.UniqueLabels.Channel7.Intensity + reportIntensities[7] = i.UniqueLabels.Channel8.Intensity + reportIntensities[8] = i.UniqueLabels.Channel9.Intensity + reportIntensities[9] = i.UniqueLabels.Channel10.Intensity + reportIntensities[10] = i.UniqueLabels.Channel11.Intensity + reportIntensities[11] = i.UniqueLabels.Channel12.Intensity + reportIntensities[12] = i.UniqueLabels.Channel13.Intensity + reportIntensities[13] = i.UniqueLabels.Channel14.Intensity + reportIntensities[14] = i.UniqueLabels.Channel15.Intensity + reportIntensities[15] = i.UniqueLabels.Channel16.Intensity + reportIntensities[16] = i.UniqueLabels.Channel17.Intensity + reportIntensities[17] = i.UniqueLabels.Channel18.Intensity + } } else { - reportIntensities[0] = i.URazorLabels.Channel1.Intensity - reportIntensities[1] = i.URazorLabels.Channel2.Intensity - reportIntensities[2] = i.URazorLabels.Channel3.Intensity - reportIntensities[3] = i.URazorLabels.Channel4.Intensity - reportIntensities[4] = i.URazorLabels.Channel5.Intensity - reportIntensities[5] = i.URazorLabels.Channel6.Intensity - reportIntensities[6] = i.URazorLabels.Channel7.Intensity - reportIntensities[7] = i.URazorLabels.Channel8.Intensity - reportIntensities[8] = i.URazorLabels.Channel9.Intensity - reportIntensities[9] = i.URazorLabels.Channel10.Intensity - reportIntensities[10] = i.URazorLabels.Channel11.Intensity - reportIntensities[11] = i.URazorLabels.Channel12.Intensity - reportIntensities[12] = i.URazorLabels.Channel13.Intensity - reportIntensities[13] = i.URazorLabels.Channel14.Intensity - reportIntensities[14] = i.URazorLabels.Channel15.Intensity - reportIntensities[15] = i.URazorLabels.Channel16.Intensity - reportIntensities[16] = i.URazorLabels.Channel17.Intensity - reportIntensities[17] = i.URazorLabels.Channel18.Intensity + if i.URazorLabels != nil { + reportIntensities[0] = i.URazorLabels.Channel1.Intensity + reportIntensities[1] = i.URazorLabels.Channel2.Intensity + reportIntensities[2] = i.URazorLabels.Channel3.Intensity + reportIntensities[3] = i.URazorLabels.Channel4.Intensity + reportIntensities[4] = i.URazorLabels.Channel5.Intensity + reportIntensities[5] = i.URazorLabels.Channel6.Intensity + reportIntensities[6] = i.URazorLabels.Channel7.Intensity + reportIntensities[7] = i.URazorLabels.Channel8.Intensity + reportIntensities[8] = i.URazorLabels.Channel9.Intensity + reportIntensities[9] = i.URazorLabels.Channel10.Intensity + reportIntensities[10] = i.URazorLabels.Channel11.Intensity + reportIntensities[11] = i.URazorLabels.Channel12.Intensity + reportIntensities[12] = i.URazorLabels.Channel13.Intensity + reportIntensities[13] = i.URazorLabels.Channel14.Intensity + reportIntensities[14] = i.URazorLabels.Channel15.Intensity + reportIntensities[15] = i.URazorLabels.Channel16.Intensity + reportIntensities[16] = i.URazorLabels.Channel17.Intensity + reportIntensities[17] = i.URazorLabels.Channel18.Intensity + } } // proteins with almost no evidences, and completely shared with decoys are eliminated from the analysis, @@ -455,10 +454,10 @@ func (evi Evidence) MetaProteinReport(workspace, brand string, channels int, has line, reportIntensities[0], reportIntensities[1], - reportIntensities[2], - reportIntensities[3], reportIntensities[4], reportIntensities[5], + reportIntensities[8], + reportIntensities[9], ) case 8: line = fmt.Sprintf("%s\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f", @@ -549,7 +548,7 @@ func (evi Evidence) MetaProteinReport(workspace, brand string, channels int, has line += "\n" - _, e = io.WriteString(file, line) + _, e = io.WriteString(bw, line) if e != nil { msg.WriteToFile(e, "fatal") } @@ -558,32 +557,33 @@ func (evi Evidence) MetaProteinReport(workspace, brand string, channels int, has } // ProteinFastaReport saves to disk a filtered FASTA file with FDR aproved proteins -func (evi *Evidence) ProteinFastaReport(workspace string, hasDecoys bool) { +func (eviProteins ProteinEvidenceList) ProteinFastaReport(workspace string, hasDecoys bool) { output := fmt.Sprintf("%s%sprotein.fas", workspace, string(filepath.Separator)) file, e := os.Create(output) + bw := bufio.NewWriter(file) if e != nil { msg.WriteFile(e, "fatal") } defer file.Close() - + defer bw.Flush() // building the printing set tat may or not contain decoys - var printSet ProteinEvidenceList - for _, i := range evi.Proteins { + var printSet []*ProteinEvidence + for idx, i := range eviProteins { if !hasDecoys { if !i.IsDecoy { - printSet = append(printSet, i) + printSet = append(printSet, &eviProteins[idx]) } } else { - printSet = append(printSet, i) + printSet = append(printSet, &eviProteins[idx]) } } for _, i := range printSet { header := i.OriginalHeader line := ">" + header + "\n" + i.Sequence + "\n" - _, e = io.WriteString(file, line) + _, e = io.WriteString(bw, line) if e != nil { msg.WriteToFile(e, "fatal") } diff --git a/lib/rep/psm.go b/lib/rep/psm.go index 87b1738c..41f69251 100644 --- a/lib/rep/psm.go +++ b/lib/rep/psm.go @@ -1,6 +1,7 @@ package rep import ( + "bufio" "errors" "fmt" "io" @@ -22,45 +23,44 @@ import ( // AssemblePSMReport creates the PSM structure for reporting func (evi *Evidence) AssemblePSMReport(pep id.PepIDList, decoyTag string) { - var list PSMEvidenceList - - // collect database information - var dtb dat.Base - dtb.Restore() - var genes = make(map[string]string) var ptid = make(map[string]string) - for _, j := range dtb.Records { - genes[j.PartHeader] = j.GeneNames - ptid[j.PartHeader] = j.ID + { + // collect database information + var dtb dat.Base + dtb.Restore() + + for _, j := range dtb.Records { + genes[j.PartHeader] = j.GeneNames + ptid[j.PartHeader] = j.ID + } } + evi.PSM = make(PSMEvidenceList, len(pep)) + for idx, i := range pep { - for _, i := range pep { - - var p PSMEvidence + p := &evi.PSM[idx] source := strings.Split(i.Spectrum, ".") p.Source = source[0] p.Index = i.Index p.Spectrum = i.Spectrum p.SpectrumFile = i.SpectrumFile - p.Scan = i.Scan - p.PrevAA = "" - p.NextAA = "" - p.NumberOfEnzymaticTermini = int(i.NumberOfEnzymaticTermini) + //p.Scan = i.Scan + //p.PrevAA = "" + //p.NextAA = "" + p.NumberOfEnzymaticTermini = i.NumberOfEnzymaticTermini p.NumberOfMissedCleavages = i.NumberofMissedCleavages p.Peptide = i.Peptide - p.IonForm = fmt.Sprintf("%s#%d#%.4f", i.Peptide, i.AssumedCharge, i.CalcNeutralPepMass) + //p.IonForm() = fmt.Sprintf("%s#%d#%.4f", i.Peptide, i.AssumedCharge, i.CalcNeutralPepMass) p.Protein = i.Protein p.ModifiedPeptide = i.ModifiedPeptide p.AssumedCharge = i.AssumedCharge p.HitRank = i.HitRank - p.PrecursorExpMass = i.PrecursorExpMass + //p.PrecursorExpMass = i.PrecursorExpMass p.RetentionTime = i.RetentionTime p.CalcNeutralPepMass = i.CalcNeutralPepMass p.Massdiff = i.Massdiff - p.LocalizedPTMSites = i.LocalizedPTMSites - p.LocalizedPTMMassDiff = i.LocalizedPTMMassDiff + p.PTM = i.PTM p.Probability = i.Probability p.Expectation = i.Expectation p.Xcorr = i.Xcorr @@ -68,23 +68,20 @@ func (evi *Evidence) AssemblePSMReport(pep id.PepIDList, decoyTag string) { p.SPRank = i.SPRank p.Hyperscore = i.Hyperscore p.Nextscore = i.Nextscore - p.DiscriminantValue = i.DiscriminantValue + //p.DiscriminantValue = i.DiscriminantValue p.Intensity = i.Intensity p.IonMobility = i.IonMobility - p.CompensationVoltage = i.CompesationVoltage - p.MappedGenes = make(map[string]int) + p.CompensationVoltage = i.CompensationVoltage + p.MappedGenes = make(map[string]struct{}) p.MappedProteins = make(map[string]int) p.Modifications = i.Modifications - p.MSFragerLocalization = i.MSFragerLocalization - p.MSFraggerLocalizationScoreWithPTM = i.MSFraggerLocalizationScoreWithPTM - p.MSFraggerLocalizationScoreWithoutPTM = i.MSFraggerLocalizationScoreWithoutPTM - + p.MSFraggerLoc = i.MSFragerLoc if i.UncalibratedPrecursorNeutralMass > 0 { - p.PrecursorNeutralMass = i.PrecursorNeutralMass - p.UncalibratedPrecursorNeutralMass = i.UncalibratedPrecursorNeutralMass + p.PrecursorNeutralMass = float64(i.PrecursorNeutralMass) + p.UncalibratedPrecursorNeutralMass = float64(i.UncalibratedPrecursorNeutralMass) } else { - p.PrecursorNeutralMass = i.PrecursorNeutralMass - p.UncalibratedPrecursorNeutralMass = i.PrecursorNeutralMass + p.PrecursorNeutralMass = float64(i.PrecursorNeutralMass) + p.UncalibratedPrecursorNeutralMass = float64(i.PrecursorNeutralMass) } for j := range i.AlternativeProteins { @@ -125,16 +122,13 @@ func (evi *Evidence) AssemblePSMReport(pep id.PepIDList, decoyTag string) { p.IsUnique = true } - list = append(list, p) } - sort.Sort(list) - evi.PSM = list + sort.Sort(evi.PSM) } // MetaPSMReport report all psms from study that passed the FDR filter -func (evi Evidence) MetaPSMReport(workspace, brand string, channels int, hasDecoys, isComet, hasLoc, hasIonMob, hasLabels bool) { - +func (evi PSMEvidenceList) MetaPSMReport(workspace, brand string, channels int, hasDecoys, isComet, hasLoc, hasIonMob, hasLabels bool) { var header string var modMap = make(map[string]string) var modList []string @@ -145,48 +139,53 @@ func (evi Evidence) MetaPSMReport(workspace, brand string, channels int, hasDeco // create result file file, e := os.Create(output) + bw := bufio.NewWriter(file) if e != nil { msg.WriteFile(errors.New("cannot create report file"), "fatal") } defer file.Close() + defer bw.Flush() // building the printing set tat may or not contain decoys - var printSet PSMEvidenceList - for i := range evi.PSM { + //var printSet PSMEvidenceList + var printSet []*PSMEvidence + for i := range evi { - compositeName := strings.Split(evi.PSM[i].Spectrum, "#") - evi.PSM[i].Spectrum = compositeName[0] + //compositeName := strings.Split(evi[i].Spectrum, "#") + //evi[i].Spectrum = compositeName[0] if !hasDecoys { - if !evi.PSM[i].IsDecoy { - printSet = append(printSet, evi.PSM[i]) + if !evi[i].IsDecoy { + printSet = append(printSet, &evi[i]) } } else { - printSet = append(printSet, evi.PSM[i]) + printSet = append(printSet, &evi[i]) } - for k := range evi.PSM[i].LocalizedPTMMassDiff { - _, ok := modMap[k] - if !ok { - modMap[k] = "" - } else { - modMap[k] = "" + if evi[i].PTM != nil { + for k := range evi[i].PTM.LocalizedPTMMassDiff { + _, ok := modMap[k] + if !ok { + modMap[k] = "" + } else { + modMap[k] = "" + } } } - if len(evi.PSM[i].CompensationVoltage) > 0 { + if len(evi[i].CompensationVoltage) > 0 { hasCompVolt = true } - if !hasIonMob && evi.PSM[i].IonMobility > 0 { + if !hasIonMob && evi[i].IonMobility > 0 { hasIonMob = true } - if evi.PSM[i].Purity > 0 { + if evi[i].Purity > 0 { hasPurity = true } - if len(evi.PSM[i].MSFragerLocalization) > 0 { + if evi[i].MSFraggerLoc != nil && len(evi[i].MSFraggerLoc.MSFragerLocalization) > 0 { hasLoc = true } @@ -244,7 +243,7 @@ func (evi Evidence) MetaPSMReport(workspace, brand string, channels int, hasDeco case 16: header += "\tQuan Usage\tChannel 126\tChannel 127N\tChannel 127C\tChannel 128N\tChannel 128C\tChannel 129N\tChannel 129C\tChannel 130N\tChannel 130C\tChannel 131N\tChannel 131C\tChannel 132N\tChannel 132C\tChannel 133N\tChannel 133C\tChannel 134N" case 18: - header += "\tChannel 126\tChannel 127N\tChannel 127C\tChannel 128N\tChannel 128C\tChannel 129N\tChannel 129C\tChannel 130N\tChannel 130C\tChannel 131N\tChannel 131C\tChannel 132N\tChannel 132C\tChannel 133N\tChannel 133C\tChannel 134N\tChannel 134C\tChannel 135N" + header += "\tQuan Usage\tChannel 126\tChannel 127N\tChannel 127C\tChannel 128N\tChannel 128C\tChannel 129N\tChannel 129C\tChannel 130N\tChannel 130C\tChannel 131N\tChannel 131C\tChannel 132N\tChannel 132C\tChannel 133N\tChannel 133C\tChannel 134N\tChannel 134C\tChannel 135N" default: header += "" } @@ -310,14 +309,14 @@ func (evi Evidence) MetaPSMReport(workspace, brand string, channels int, hasDeco header = strings.Replace(header, "Channel "+printSet[10].Labels.Channel18.Name, c18, -1) } - _, e = io.WriteString(file, header) + _, e = io.WriteString(bw, header) if e != nil { msg.WriteToFile(errors.New("cannot print PSM to file"), "fatal") } for _, i := range printSet { - assL, obs := getModsList(i.Modifications.Index) + assL, obs := getModsList(i.Modifications.ToMap().Index) var mappedProteins []string for j := range i.MappedProteins { @@ -343,8 +342,8 @@ func (evi Evidence) MetaPSMReport(workspace, brand string, channels int, hasDeco i.SpectrumFile, i.Peptide, i.ModifiedPeptide, - i.PrevAA, - i.NextAA, + string(i.PrevAA), + string(i.NextAA), len(i.Peptide), i.AssumedCharge, i.RetentionTime, @@ -387,24 +386,31 @@ func (evi Evidence) MetaPSMReport(workspace, brand string, channels int, hasDeco for _, j := range modList { r := regexp.MustCompile(`\d\.\d{3}`) - matches := r.FindAllString(i.LocalizedPTMMassDiff[j], -1) + PTM := i.PTM + if PTM == nil { + PTM = &id.PTM{LocalizedPTMSites: map[string]int{}, LocalizedPTMMassDiff: map[string]string{}} + } + matches := r.FindAllString(PTM.LocalizedPTMMassDiff[j], -1) max := uti.GetMaxNumber(matches) line = fmt.Sprintf("%s\t%s\t%s", line, - i.LocalizedPTMMassDiff[j], + PTM.LocalizedPTMMassDiff[j], max, ) } } if hasLoc { + MSFraggerLoc := i.MSFraggerLoc + if MSFraggerLoc == nil { + MSFraggerLoc = &id.MSFraggerLoc{} + } line = fmt.Sprintf("%s\t%s\t%s\t%s", line, - i.MSFragerLocalization, - i.MSFraggerLocalizationScoreWithPTM, - i.MSFraggerLocalizationScoreWithoutPTM, - ) + MSFraggerLoc.MSFragerLocalization, + MSFraggerLoc.MSFraggerLocalizationScoreWithPTM, + MSFraggerLoc.MSFraggerLocalizationScoreWithoutPTM) } if hasIonMob { @@ -562,7 +568,7 @@ func (evi Evidence) MetaPSMReport(workspace, brand string, channels int, hasDeco line += "\n" - _, e = io.WriteString(file, line) + _, e = io.WriteString(bw, line) if e != nil { msg.WriteToFile(e, "fatal") } @@ -597,22 +603,23 @@ func (evi *Evidence) PSMLocalizationReport(workspace, decoyTag string, hasRazor, printSet = append(printSet, i) } } - for _, i := range printSet { - for j := range i.LocalizedPTMMassDiff { - line := fmt.Sprintf("%s\t%s\t%s\t%d\t%.4f\t%s\t%d\t%s\n", - i.Spectrum, - i.Peptide, - i.ModifiedPeptide, - i.AssumedCharge, - i.RetentionTime, - j, - i.LocalizedPTMSites[j], - i.LocalizedPTMMassDiff[j], - ) - _, e = io.WriteString(file, line) - if e != nil { - msg.WriteToFile(e, "fatal") + if i.PTM != nil { + for j := range i.PTM.LocalizedPTMMassDiff { + line := fmt.Sprintf("%s\t%s\t%s\t%d\t%.4f\t%s\t%d\t%s\n", + i.SpectrumFileName().Str(), + i.Peptide, + i.ModifiedPeptide, + i.AssumedCharge, + i.RetentionTime, + j, + i.PTM.LocalizedPTMSites[j], + i.PTM.LocalizedPTMMassDiff[j], + ) + _, e = io.WriteString(file, line) + if e != nil { + msg.WriteToFile(e, "fatal") + } } } } diff --git a/lib/rep/rep.go b/lib/rep/rep.go index 7d040ade..03546e59 100644 --- a/lib/rep/rep.go +++ b/lib/rep/rep.go @@ -107,71 +107,90 @@ type SearchParametersEvidence struct { Tyrosine string } +func (p PSMEvidence) SpectrumFileName() id.SpectrumType { + return id.SpectrumType{Spectrum: p.Spectrum, FileName: p.SpectrumFile} +} + // PSMEvidence struct type PSMEvidence struct { - Source string - Index uint32 - Spectrum string - SpectrumFile string - Scan int - NumberOfEnzymaticTermini int - NumberOfMissedCleavages int - ProteinStart int - ProteinEnd int - PrevAA string - NextAA string - Peptide string - IonForm string - Protein string - ProteinDescription string - ProteinID string - EntryName string - GeneName string - ModifiedPeptide string - CompensationVoltage string - MappedProteins map[string]int - MappedGenes map[string]int - AssumedCharge uint8 - HitRank uint8 - UncalibratedPrecursorNeutralMass float64 - PrecursorNeutralMass float64 - PrecursorExpMass float64 - RetentionTime float64 - CalcNeutralPepMass float64 - RawMassdiff float64 - Massdiff float64 - LocalizedPTMSites map[string]int - LocalizedPTMMassDiff map[string]string - LocalizationRange string - MSFragerLocalization string - MSFraggerLocalizationScoreWithPTM string - MSFraggerLocalizationScoreWithoutPTM string - Probability float64 - Expectation float64 - Xcorr float64 - DeltaCN float64 - DeltaCNStar float64 - SPScore float64 - SPRank float64 - Hyperscore float64 - Nextscore float64 - DiscriminantValue float64 - Intensity float64 - IonMobility float64 - Purity float64 - IsDecoy bool - IsUnique bool - IsURazor bool - Labels iso.Labels - Modifications mod.Modifications + Source string + Spectrum string + SpectrumFile string + Peptide string + Protein string + ProteinDescription string + ProteinID string + EntryName string + GeneName string + ModifiedPeptide string + CompensationVoltage string + ProteinStart int + ProteinEnd int + NumberOfEnzymaticTermini uint8 + NumberOfMissedCleavages uint8 + AssumedCharge uint8 + HitRank uint8 + Index uint32 + UncalibratedPrecursorNeutralMass float64 + PrecursorNeutralMass float64 + RetentionTime float64 + CalcNeutralPepMass float64 + RawMassdiff float64 + Massdiff float64 + Probability float64 + Expectation float64 + Xcorr float64 + DeltaCN float64 + DeltaCNStar float64 + SPScore float64 + SPRank float64 + Hyperscore float64 + Nextscore float64 + Intensity float64 + IonMobility float64 + Purity float64 + PrevAA byte + NextAA byte + IsDecoy bool + IsUnique bool + IsURazor bool + PTM *id.PTM + MSFraggerLoc *id.MSFraggerLoc + Labels *iso.Labels + Modifications mod.ModificationsSlice + MappedProteins map[string]int + MappedGenes map[string]struct{} + //Scan int + //PrecursorExpMass float64 + //LocalizationRange string + //DiscriminantValue float64 + //IonForm() string +} + +func (e PSMEvidence) IonForm() id.IonFormType { + t, err := strconv.ParseFloat(fmt.Sprintf("%.4f", e.CalcNeutralPepMass), 32) + if err != nil { + panic(err) + } + return id.IonFormType{e.Peptide, float32(t), e.AssumedCharge} +} + +func (e IonEvidence) IonForm() id.IonFormType { + t, err := strconv.ParseFloat(fmt.Sprintf("%.4f", e.PeptideMass), 32) + if err != nil { + panic(err) + } + return id.IonFormType{e.Sequence, float32(t), e.ChargeState} } // PSMEvidenceList ... type PSMEvidenceList []PSMEvidence -func (a PSMEvidenceList) Len() int { return len(a) } -func (a PSMEvidenceList) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a PSMEvidenceList) Less(i, j int) bool { return a[i].Spectrum < a[j].Spectrum } +func (a PSMEvidenceList) Len() int { return len(a) } +func (a PSMEvidenceList) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a PSMEvidenceList) Less(i, j int) bool { + return a[i].SpectrumFileName().Str() < a[j].SpectrumFileName().Str() +} // RemovePSMByIndex perfomrs a re-slicing by removing an element from a list func RemovePSMByIndex(s []PSMEvidence, i int) []PSMEvidence { @@ -182,16 +201,16 @@ func RemovePSMByIndex(s []PSMEvidence, i int) []PSMEvidence { // IonEvidence groups all valid info about peptide ions for reports type IonEvidence struct { Sequence string - IonForm string ModifiedSequence string - RetentionTime string + Protein string + ProteinID string + GeneName string + EntryName string + ProteinDescription string ChargeState uint8 NumberOfEnzymaticTermini uint8 - PrevAA string - NextAA string - Spectra map[string]int - MappedProteins map[string]int - MappedGenes map[string]int + PrevAA byte + NextAA byte MZ float64 PeptideMass float64 PrecursorNeutralMass float64 @@ -204,14 +223,15 @@ type IonEvidence struct { IsUnique bool IsURazor bool IsDecoy bool - Protein string - ProteinID string - GeneName string - EntryName string - ProteinDescription string - Labels iso.Labels - PhosphoLabels iso.Labels - Modifications mod.Modifications + Labels *iso.Labels + PhosphoLabels *iso.Labels + Modifications mod.ModificationsSlice + Spectra map[id.SpectrumType]int + MappedProteins map[string]int + MappedGenes map[string]struct{} + //IonForm() string + //RetentionTime string + } // IonEvidenceList ... @@ -230,28 +250,28 @@ func RemoveIonsByIndex(s []IonEvidence, i int) []IonEvidence { // PeptideEvidence groups all valid info about peptide ions for reports type PeptideEvidence struct { Sequence string - ChargeState map[uint8]uint8 - Spectra map[string]uint8 - PrevAA string - NextAA string Protein string ProteinID string GeneName string EntryName string ProteinDescription string - MappedProteins map[string]int - MappedGenes map[string]int Spc int - Intensity float64 - Probability float64 ModifiedObservations int UnModifiedObservations int + Intensity float64 + Probability float64 + PrevAA byte + NextAA byte IsUnique bool IsURazor bool IsDecoy bool - Labels iso.Labels - PhosphoLabels iso.Labels - Modifications mod.Modifications + ChargeState map[uint8]uint8 + Spectra map[id.SpectrumType]uint8 + MappedProteins map[string]int + MappedGenes map[string]struct{} + Labels *iso.Labels + PhosphoLabels *iso.Labels + Modifications mod.ModificationsSlice } // PeptideEvidenceList ... @@ -272,27 +292,21 @@ type ProteinEvidence struct { OriginalHeader string PartHeader string ProteinName string - ProteinGroup uint32 ProteinSubGroup string ProteinID string EntryName string Description string Organism string - Length int - Coverage float32 GeneNames string ProteinExistence string Sequence string - SupportingSpectra map[string]int - IndiProtein map[string]uint8 - UniqueStrippedPeptides int - TotalPeptideIons map[string]IonEvidence + Length int TotalSpC int UniqueSpC int URazorSpC int // Unique + razor - TotalPeptides map[string]int - UniquePeptides map[string]int - URazorPeptides map[string]int // Unique + razor + UniqueStrippedPeptides int + ProteinGroup uint32 + Coverage float32 TotalIntensity float64 UniqueIntensity float64 URazorIntensity float64 // Unique + razor @@ -300,13 +314,19 @@ type ProteinEvidence struct { TopPepProb float64 IsDecoy bool IsContaminant bool - TotalLabels iso.Labels - UniqueLabels iso.Labels - URazorLabels iso.Labels // Unique + razor - PhosphoTotalLabels iso.Labels - PhosphoUniqueLabels iso.Labels - PhosphoURazorLabels iso.Labels // Unique + razor - Modifications mod.Modifications + SupportingSpectra map[id.SpectrumType]int + IndiProtein map[string]struct{} + TotalPeptideIons map[id.IonFormType]IonEvidence + TotalPeptides map[string]int + UniquePeptides map[string]int + URazorPeptides map[string]int // Unique + razor + TotalLabels *iso.Labels + UniqueLabels *iso.Labels + URazorLabels *iso.Labels // Unique + razor + PhosphoTotalLabels *iso.Labels + PhosphoUniqueLabels *iso.Labels + PhosphoURazorLabels *iso.Labels // Unique + razor + Modifications mod.ModificationsSlice } // ProteinEvidenceList list @@ -322,20 +342,19 @@ type CombinedProteinEvidence struct { SiblingID string ProteinName string ProteinID string - IndiProtein []string EntryName string Organism string - Length int - Coverage float32 GeneNames string ProteinExistence string Description string + IndiProtein []string Names []string + Length int UniqueStrippedPeptides int - SupportingSpectra map[string]string + Coverage float32 ProteinProbability float64 TopPepProb float64 - PeptideIons []id.PeptideIonIdentification + SupportingSpectra map[string]string TotalSpc map[string]int UniqueSpc map[string]int UrazorSpc map[string]int @@ -348,6 +367,7 @@ type CombinedProteinEvidence struct { TotalLabels map[string]iso.Labels UniqueLabels map[string]iso.Labels URazorLabels map[string]iso.Labels // Unique + razor + PeptideIons []id.PeptideIonIdentification } // CombinedProteinEvidenceList is a list of Combined Protein Evidences @@ -359,13 +379,13 @@ func (a CombinedProteinEvidenceList) Less(i, j int) bool { return a[i].GroupNumb // CombinedPeptideEvidence represents all combined peptides detected type CombinedPeptideEvidence struct { - BestPSM float64 Sequence string Protein string ProteinID string EntryName string Gene string ProteinDescription string + BestPSM float64 ChargeStates map[uint8]uint8 AssignedMassDiffs map[string]uint8 Spc map[string]int @@ -407,9 +427,6 @@ func New() Evidence { // Run is the main entry poit for Report func Run(m met.Data) { - var repo = New() - repo.RestoreGranular() - var isComet bool var hasLoc bool var hasLabels bool @@ -439,24 +456,38 @@ func Run(m met.Data) { } logrus.Info("Creating reports") - - // PSM - repo.MetaPSMReport(m.Home, isoBrand, isoChannels, m.Report.Decoys, isComet, hasLoc, m.Report.IonMob, hasLabels) - - // Ion - repo.MetaIonReport(m.Home, isoBrand, isoChannels, m.Report.Decoys, hasLabels) - - // Peptide - repo.MetaPeptideReport(m.Home, isoBrand, isoChannels, m.Report.Decoys, hasLabels) - + { + var repoPSM PSMEvidenceList + RestorePSM(&repoPSM) + // PSM + repoPSM.MetaPSMReport(m.Home, isoBrand, isoChannels, m.Report.Decoys, isComet, hasLoc, m.Report.IonMob, hasLabels) + } + { + var repoIons IonEvidenceList + RestoreIon(&repoIons) + // Ion + repoIons.MetaIonReport(m.Home, isoBrand, isoChannels, m.Report.Decoys, hasLabels) + } + { + // Peptide + var repoPeptides PeptideEvidenceList + RestorePeptide(&repoPeptides) + repoPeptides.MetaPeptideReport(m.Home, isoBrand, isoChannels, m.Report.Decoys, hasLabels) + } // Protein if len(m.Filter.Pox) > 0 || m.Filter.Inference { - repo.MetaProteinReport(m.Home, isoBrand, isoChannels, m.Report.Decoys, m.Filter.Razor, m.Quantify.Unique, hasLabels) - repo.ProteinFastaReport(m.Home, m.Report.Decoys) + var repoProteins ProteinEvidenceList + RestoreProtein(&repoProteins) + repoProteins.MetaProteinReport(m.Home, isoBrand, isoChannels, m.Report.Decoys, m.Filter.Razor, m.Quantify.Unique, hasLabels) + repoProteins.ProteinFastaReport(m.Home, m.Report.Decoys) } // Modifications + repo := New() if len(repo.Modifications.MassBins) > 0 { + if repo.PSM == nil { + RestorePSM(&repo.PSM) + } repo.ModificationReport(m.Home) if m.PTMProphet.InputFiles != nil || len(m.PTMProphet.InputFiles) > 0 { @@ -473,6 +504,7 @@ func Run(m met.Data) { // MzID if m.Report.MZID { + repo.RestoreGranular() repo.MzIdentMLReport(m.Version, m.Database.Annot) } @@ -485,10 +517,14 @@ func getModsList(m map[string]mod.Modification) ([]string, []string) { var o []string for _, i := range m { - if i.Type == "Assigned" && i.Name != "Unknown" { - a = append(a, fmt.Sprintf("%s%s(%.4f)", i.Position, i.AminoAcid, i.MassDiff)) + if i.Type == mod.Assigned && i.Name != "Unknown" { + pos := "" + if len(i.AminoAcid) == 1 { + pos = strconv.Itoa(i.Position) + } + a = append(a, fmt.Sprintf("%s%s(%.4f)", pos, i.AminoAcid, i.MassDiff)) } - if i.Type == "Observed" && i.Name != "Unknown" { + if i.Type == mod.Observed && i.Name != "Unknown" { for k, v := range i.IsobaricMods { o = append(o, fmt.Sprintf("%s(%f)", k, v)) } diff --git a/lib/rep/updater.go b/lib/rep/updater.go index c68b2f79..22ae1d49 100644 --- a/lib/rep/updater.go +++ b/lib/rep/updater.go @@ -1,8 +1,6 @@ package rep import ( - "fmt" - "regexp" "strings" "philosopher/lib/dat" @@ -22,30 +20,31 @@ type PeptideMap struct { // UpdateNumberOfEnzymaticTermini collects the NTT from ProteinProphet // and passes along to the final Protein structure. -func (evi *Evidence) UpdateNumberOfEnzymaticTermini() { +func (evi *Evidence) UpdateNumberOfEnzymaticTermini(decoyTag string) { // restore the original prot.xml output var p id.ProtIDList p.Restore() // collect the updated ntt for each peptide-protein pair - var nttPeptidetoProptein = make(map[string]uint8) + //var nttPeptidetoProptein = make(map[string]uint8) + type k struct { + a string + b string + } + var nttPeptidetoProptein = make(map[k]uint8) for _, i := range p { for _, j := range i.PeptideIons { - if !strings.Contains(i.ProteinName, "rev_") { - key := fmt.Sprintf("%s#%s", j.PeptideSequence, i.ProteinName) - nttPeptidetoProptein[key] = j.NumberOfEnzymaticTermini + if !strings.Contains(i.ProteinName, decoyTag) { + nttPeptidetoProptein[k{j.PeptideSequence, i.ProteinName}] = j.NumberOfEnzymaticTermini } } } for i := range evi.PSM { - - key := fmt.Sprintf("%s#%s", evi.PSM[i].Peptide, evi.PSM[i].Protein) - ntt, ok := nttPeptidetoProptein[key] - if ok { - evi.PSM[i].NumberOfEnzymaticTermini = int(ntt) + if ntt, ok := nttPeptidetoProptein[k{evi.PSM[i].Peptide, evi.PSM[i].Protein}]; ok { + evi.PSM[i].NumberOfEnzymaticTermini = ntt } } } @@ -53,8 +52,8 @@ func (evi *Evidence) UpdateNumberOfEnzymaticTermini() { // UpdateIonStatus pushes back to ion and psm evideces the uniqueness and razorness status of each peptide and ion func (evi *Evidence) UpdateIonStatus(decoyTag string) { - var uniqueIons = make(map[string]bool) - var razorIons = make(map[string]string) + var uniqueIons = make(map[id.IonFormType]bool) + var razorIons = make(map[id.IonFormType]string) var uniquePeptides = make(map[string]string) var razorPeptides = make(map[string]string) @@ -62,12 +61,12 @@ func (evi *Evidence) UpdateIonStatus(decoyTag string) { for _, j := range i.TotalPeptideIons { if j.IsUnique { - uniqueIons[j.IonForm] = true + uniqueIons[j.IonForm()] = true uniquePeptides[j.Sequence] = i.PartHeader } if j.IsURazor { - razorIons[j.IonForm] = i.PartHeader + razorIons[j.IonForm()] = i.PartHeader razorPeptides[j.Sequence] = i.PartHeader } } @@ -79,7 +78,7 @@ func (evi *Evidence) UpdateIonStatus(decoyTag string) { // wrong classifications. If by any chance the protein gets assigned to // a razor decoy, this mechanism avoids the replacement - rp, rOK := razorIons[evi.PSM[i].IonForm] + rp, rOK := razorIons[evi.PSM[i].IonForm()] if rOK { evi.PSM[i].IsURazor = true @@ -120,7 +119,7 @@ func (evi *Evidence) UpdateIonStatus(decoyTag string) { } } - _, uOK := uniqueIons[evi.PSM[i].IonForm] + _, uOK := uniqueIons[evi.PSM[i].IonForm()] if uOK { evi.PSM[i].IsUnique = true } @@ -130,7 +129,7 @@ func (evi *Evidence) UpdateIonStatus(decoyTag string) { } for i := range evi.Ions { - rp, rOK := razorIons[evi.Ions[i].IonForm] + rp, rOK := razorIons[evi.Ions[i].IonForm()] if rOK { evi.Ions[i].IsURazor = true @@ -144,7 +143,7 @@ func (evi *Evidence) UpdateIonStatus(decoyTag string) { } } - _, uOK := uniqueIons[evi.Ions[i].IonForm] + _, uOK := uniqueIons[evi.Ions[i].IonForm()] if uOK { evi.Ions[i].IsUnique = true } else { @@ -180,14 +179,14 @@ func (evi *Evidence) UpdateIonStatus(decoyTag string) { func (evi *Evidence) UpdateIonModCount() { // recreate the ion list from the main report object - var AllIons = make(map[string]int) - var ModIons = make(map[string]int) - var UnModIons = make(map[string]int) + var AllIons = make(map[id.IonFormType]struct{}) + var ModIons = make(map[id.IonFormType]int) + var UnModIons = make(map[id.IonFormType]int) for _, i := range evi.Ions { - AllIons[i.IonForm] = 0 - ModIons[i.IonForm] = 0 - UnModIons[i.IonForm] = 0 + AllIons[i.IonForm()] = struct{}{} + ModIons[i.IonForm()] = 0 + UnModIons[i.IonForm()] = 0 } // range over PSMs looking for modified and not modified evidences @@ -195,13 +194,13 @@ func (evi *Evidence) UpdateIonModCount() { for _, i := range evi.PSM { // check the map - _, ok := AllIons[i.IonForm] + _, ok := AllIons[i.IonForm()] if ok { if i.Massdiff >= -0.99 && i.Massdiff <= 0.99 { - UnModIons[i.IonForm]++ + UnModIons[i.IonForm()]++ } else { - ModIons[i.IonForm]++ + ModIons[i.IonForm()]++ } } @@ -209,24 +208,19 @@ func (evi *Evidence) UpdateIonModCount() { } // SyncPSMToProteins forces the synchronization between the filtered proteins, and the remaining structures. -func (evi Evidence) SyncPSMToProteins(decoy string) Evidence { +func (evi *Evidence) SyncPSMToProteins(decoy string) { + var totalSpc = make(map[string][]id.SpectrumType, len(evi.PSM)) + var uniqueSpc = make(map[string][]id.SpectrumType, len(evi.PSM)) + var razorSpc = make(map[string][]id.SpectrumType, len(evi.PSM)) - var totalSpc = make(map[string][]string) - var uniqueSpc = make(map[string][]string) - var razorSpc = make(map[string][]string) - - var totalPeptides = make(map[string][]string) - var uniquePeptides = make(map[string][]string) - var razorPeptides = make(map[string][]string) + var totalPeptides = make(map[string][]string, len(evi.PSM)) + var uniquePeptides = make(map[string][]string, len(evi.PSM)) + var razorPeptides = make(map[string][]string, len(evi.PSM)) - var proteinIndex = make(map[string]uint8) - var newPSM PSMEvidenceList - var newIons IonEvidenceList - var newPeptides PeptideEvidenceList - var newProteins ProteinEvidenceList + var proteinIndex = make(map[string]struct{}) for _, i := range evi.Proteins { - proteinIndex[i.PartHeader] = 0 + proteinIndex[i.PartHeader] = struct{}{} } // for _, i := range evi.PSM { @@ -257,20 +251,20 @@ func (evi Evidence) SyncPSMToProteins(decoy string) Evidence { //if !i.IsDecoy { // Total - totalSpc[i.Protein] = append(totalSpc[i.Protein], i.Spectrum) + totalSpc[i.Protein] = append(totalSpc[i.Protein], i.SpectrumFileName()) totalPeptides[i.Protein] = append(totalPeptides[i.Protein], i.Peptide) for j := range i.MappedProteins { - totalSpc[j] = append(totalSpc[j], i.Spectrum) + totalSpc[j] = append(totalSpc[j], i.SpectrumFileName()) totalPeptides[j] = append(totalPeptides[j], i.Peptide) } if i.IsUnique { - uniqueSpc[i.Protein] = append(uniqueSpc[i.Protein], i.Spectrum) + uniqueSpc[i.Protein] = append(uniqueSpc[i.Protein], i.SpectrumFileName()) uniquePeptides[i.Protein] = append(uniquePeptides[i.Protein], i.Peptide) } if i.IsURazor { - razorSpc[i.Protein] = append(razorSpc[i.Protein], i.Spectrum) + razorSpc[i.Protein] = append(razorSpc[i.Protein], i.SpectrumFileName()) razorPeptides[i.Protein] = append(razorPeptides[i.Protein], i.Peptide) } //} @@ -290,7 +284,7 @@ func (evi Evidence) SyncPSMToProteins(decoy string) Evidence { for i := range evi.Proteins { - evi.Proteins[i].SupportingSpectra = make(map[string]int) + evi.Proteins[i].SupportingSpectra = make(map[id.SpectrumType]int) evi.Proteins[i].TotalSpC = 0 evi.Proteins[i].UniqueSpC = 0 evi.Proteins[i].URazorSpC = 0 @@ -298,102 +292,109 @@ func (evi Evidence) SyncPSMToProteins(decoy string) Evidence { evi.Proteins[i].UniquePeptides = make(map[string]int) evi.Proteins[i].URazorPeptides = make(map[string]int) - v, ok := totalSpc[evi.Proteins[i].PartHeader] - if ok { + if v, ok := totalSpc[evi.Proteins[i].PartHeader]; ok { evi.Proteins[i].TotalSpC += len(v) for _, j := range v { evi.Proteins[i].SupportingSpectra[j]++ } } - v, ok = totalPeptides[evi.Proteins[i].PartHeader] - if ok { + if v, ok := totalPeptides[evi.Proteins[i].PartHeader]; ok { for _, j := range v { evi.Proteins[i].TotalPeptides[j]++ } } - v, ok = uniqueSpc[evi.Proteins[i].PartHeader] - if ok { + if v, ok := uniqueSpc[evi.Proteins[i].PartHeader]; ok { evi.Proteins[i].UniqueSpC += len(v) } - v, ok = uniquePeptides[evi.Proteins[i].PartHeader] - if ok { + if v, ok := uniquePeptides[evi.Proteins[i].PartHeader]; ok { for _, j := range v { evi.Proteins[i].UniquePeptides[j]++ } } - v, ok = razorSpc[evi.Proteins[i].PartHeader] - if ok { + if v, ok := razorSpc[evi.Proteins[i].PartHeader]; ok { evi.Proteins[i].URazorSpC += len(v) } - v, ok = razorPeptides[evi.Proteins[i].PartHeader] - if ok { + if v, ok := razorPeptides[evi.Proteins[i].PartHeader]; ok { for _, j := range v { evi.Proteins[i].URazorPeptides[j]++ } } } - proteinIndex = make(map[string]uint8) - for _, i := range evi.Proteins { - if len(i.SupportingSpectra) > 0 { - proteinIndex[i.PartHeader] = 0 - newProteins = append(newProteins, i) + { + proteinIndex = make(map[string]struct{}, len(evi.Proteins)) + newProteins := make([]int, 0, len(evi.Proteins)) + for idx, i := range evi.Proteins { + if len(i.SupportingSpectra) > 0 { + proteinIndex[i.PartHeader] = struct{}{} + newProteins = append(newProteins, idx) + } } + for idx, i := range newProteins { + evi.Proteins[idx] = evi.Proteins[i] + } + evi.Proteins = evi.Proteins[:len(newProteins)] } - evi.Proteins = newProteins - - for _, i := range evi.PSM { - _, ok := proteinIndex[i.Protein] - if ok { - newPSM = append(newPSM, i) + { + newPSM := make([]int, 0, len(evi.PSM)) + for idx, i := range evi.PSM { + if _, ok := proteinIndex[i.Protein]; ok { + newPSM = append(newPSM, idx) + } + } + for idx, i := range newPSM { + evi.PSM[idx] = evi.PSM[i] } + evi.PSM = evi.PSM[:len(newPSM)] } - evi.PSM = newPSM - - for _, i := range evi.Ions { - _, ok := proteinIndex[i.Protein] - if ok { - newIons = append(newIons, i) + { + newIons := make([]int, 0, len(evi.Ions)) + for idx, i := range evi.Ions { + if _, ok := proteinIndex[i.Protein]; ok { + newIons = append(newIons, idx) + } + } + for idx, i := range newIons { + evi.Ions[idx] = evi.Ions[i] } + evi.Ions = evi.Ions[:len(newIons)] } - evi.Ions = newIons - - for _, i := range evi.Peptides { - _, ok := proteinIndex[i.Protein] - if ok { - newPeptides = append(newPeptides, i) + { + newPeptides := make([]int, 0, len(evi.Peptides)) + for idx, i := range evi.Peptides { + if _, ok := proteinIndex[i.Protein]; ok { + newPeptides = append(newPeptides, idx) + } + } + for idx, i := range newPeptides { + evi.Peptides[idx] = evi.Peptides[i] } + evi.Peptides = evi.Peptides[:len(newPeptides)] } - evi.Peptides = newPeptides - - return evi } // SyncPSMToPeptides forces the synchronization between the filtered peptides, and the remaining structures. func (evi Evidence) SyncPSMToPeptides(decoy string) Evidence { - var spc = make(map[string]int) - var spectra = make(map[string][]string) + var spectra = make(map[string][]id.SpectrumType) for _, i := range evi.PSM { if !i.IsDecoy { - spc[i.Peptide]++ - spectra[i.Peptide] = append(spectra[i.Peptide], i.Spectrum) + spectra[i.Peptide] = append(spectra[i.Peptide], i.SpectrumFileName()) } } for i := range evi.Peptides { evi.Peptides[i].Spc = 0 - evi.Peptides[i].Spectra = make(map[string]uint8) + evi.Peptides[i].Spectra = make(map[id.SpectrumType]uint8) - v, ok := spectra[evi.Peptides[i].Sequence] - if ok { + if v, ok := spectra[evi.Peptides[i].Sequence]; ok { //evi.Peptides[i].IsDecoy = false @@ -411,21 +412,19 @@ func (evi Evidence) SyncPSMToPeptides(decoy string) Evidence { // SyncPSMToPeptideIons forces the synchronization between the filtered ions, and the remaining structures. func (evi Evidence) SyncPSMToPeptideIons(decoy string) Evidence { - var ion = make(map[string]int) - var spectra = make(map[string][]string) + var spectra = make(map[id.IonFormType][]id.SpectrumType) for _, i := range evi.PSM { if !i.IsDecoy { - ion[i.IonForm]++ - spectra[i.IonForm] = append(spectra[i.IonForm], i.Spectrum) + spectra[i.IonForm()] = append(spectra[i.IonForm()], i.SpectrumFileName()) } } for i := range evi.Ions { - evi.Ions[i].Spectra = make(map[string]int) + evi.Ions[i].Spectra = make(map[id.SpectrumType]int) - v, ok := spectra[evi.Ions[i].IonForm] + v, ok := spectra[evi.Ions[i].IonForm()] if ok { //evi.Ions[i].IsDecoy = false @@ -441,70 +440,69 @@ func (evi Evidence) SyncPSMToPeptideIons(decoy string) Evidence { // UpdateLayerswithDatabase will fix the protein and gene assignments based on the database data func (evi *Evidence) UpdateLayerswithDatabase(decoyTag string) { + type liteRecord struct { + ID string + EntryName string + GeneNames string + Description string + Sequence string + } + var recordMap = make(map[string]liteRecord) - var dtb dat.Base - dtb.Restore() - - var proteinIDMap = make(map[string]string) - var entryNameMap = make(map[string]string) - var geneMap = make(map[string]string) - var descriptionMap = make(map[string]string) - var sequenceMap = make(map[string]string) - var pepPrevAA = make(map[string]string) - var pepNextAA = make(map[string]string) + { + var dtb dat.Base + dtb.Restore() + for _, j := range dtb.Records { + recordMap[j.PartHeader] = liteRecord{j.ID, j.EntryName, j.GeneNames, strings.TrimSpace(j.Description), j.Sequence} + } + } - for _, j := range dtb.Records { - proteinIDMap[j.PartHeader] = j.ID - entryNameMap[j.PartHeader] = j.EntryName - geneMap[j.PartHeader] = j.GeneNames - descriptionMap[j.PartHeader] = strings.TrimSpace(j.Description) - sequenceMap[j.PartHeader] = j.Sequence + type prevNextAA struct { + prev byte + next byte } + var pepPrevNextAA = make(map[string]prevNextAA) + replacerIL := strings.NewReplacer("L", "I") for i := range evi.PSM { - id := evi.PSM[i].Protein - - evi.PSM[i].ProteinID = proteinIDMap[id] - evi.PSM[i].EntryName = entryNameMap[id] - evi.PSM[i].GeneName = geneMap[id] - evi.PSM[i].ProteinDescription = descriptionMap[id] + rec := recordMap[evi.PSM[i].Protein] + evi.PSM[i].ProteinID = rec.ID + evi.PSM[i].EntryName = rec.EntryName + evi.PSM[i].GeneName = rec.GeneNames + evi.PSM[i].ProteinDescription = rec.Description // update mapped genes for k := range evi.PSM[i].MappedProteins { if !strings.Contains(k, decoyTag) { - evi.PSM[i].MappedGenes[geneMap[k]] = 0 + evi.PSM[i].MappedGenes[recordMap[k].GeneNames] = struct{}{} } } // map the peptide to the protein - replacer := strings.NewReplacer("I", "[IL]", "L", "[IL]") - peptideIL := replacer.Replace(evi.PSM[i].Peptide) - - re := regexp.MustCompile(peptideIL) - reMatch := re.FindStringIndex(sequenceMap[id]) + mstart := strings.Index(replacerIL.Replace(rec.Sequence), replacerIL.Replace(evi.PSM[i].Peptide)) + mend := mstart + len(evi.PSM[i].Peptide) - if len(reMatch) > 0 { + if mstart != -1 { + evi.PSM[i].ProteinStart = mstart + 1 + evi.PSM[i].ProteinEnd = mend - evi.PSM[i].ProteinStart = reMatch[0] + 1 - evi.PSM[i].ProteinEnd = reMatch[1] - - if (reMatch[0]) <= 0 { - evi.PSM[i].PrevAA = "-" + if (mstart) <= 0 { + evi.PSM[i].PrevAA = '-' } else { - evi.PSM[i].PrevAA = string(sequenceMap[id][reMatch[0]-1]) + evi.PSM[i].PrevAA = rec.Sequence[mstart-1] } - if (reMatch[1] + 1) >= len(sequenceMap[id]) { - evi.PSM[i].NextAA = "-" + if (mend + 1) >= len(rec.Sequence) { + evi.PSM[i].NextAA = '-' } else { - evi.PSM[i].NextAA = string(sequenceMap[id][reMatch[1]]) + evi.PSM[i].NextAA = rec.Sequence[mend] } } - pepPrevAA[evi.PSM[i].Peptide] = evi.PSM[i].PrevAA - pepNextAA[evi.PSM[i].Peptide] = evi.PSM[i].NextAA + pepPrevNextAA[evi.PSM[i].Peptide] = prevNextAA{evi.PSM[i].PrevAA, evi.PSM[i].NextAA} + } for i := range evi.Ions { @@ -513,21 +511,22 @@ func (evi *Evidence) UpdateLayerswithDatabase(decoyTag string) { if evi.Ions[i].IsDecoy { id = strings.Replace(id, decoyTag, "", 1) } + rec := recordMap[id] - evi.Ions[i].ProteinID = proteinIDMap[id] - evi.Ions[i].EntryName = entryNameMap[id] - evi.Ions[i].GeneName = geneMap[id] - evi.Ions[i].ProteinDescription = descriptionMap[id] + evi.Ions[i].ProteinID = rec.ID + evi.Ions[i].EntryName = rec.EntryName + evi.Ions[i].GeneName = rec.GeneNames + evi.Ions[i].ProteinDescription = rec.Description // update mapped genes for k := range evi.Ions[i].MappedProteins { if !strings.Contains(k, decoyTag) { - evi.Ions[i].MappedGenes[geneMap[k]] = 0 + evi.Ions[i].MappedGenes[recordMap[k].GeneNames] = struct{}{} } } - - evi.Ions[i].PrevAA = pepPrevAA[evi.Ions[i].Sequence] - evi.Ions[i].NextAA = pepNextAA[evi.Ions[i].Sequence] + pnAA := pepPrevNextAA[evi.Ions[i].Sequence] + evi.Ions[i].PrevAA = pnAA.prev + evi.Ions[i].NextAA = pnAA.next } for i := range evi.Peptides { @@ -536,30 +535,30 @@ func (evi *Evidence) UpdateLayerswithDatabase(decoyTag string) { if evi.Peptides[i].IsDecoy { id = strings.Replace(id, decoyTag, "", 1) } - - evi.Peptides[i].ProteinID = proteinIDMap[id] - evi.Peptides[i].EntryName = entryNameMap[id] - evi.Peptides[i].GeneName = geneMap[id] - evi.Peptides[i].ProteinDescription = descriptionMap[id] + rec := recordMap[id] + evi.Peptides[i].ProteinID = rec.ID + evi.Peptides[i].EntryName = rec.EntryName + evi.Peptides[i].GeneName = rec.GeneNames + evi.Peptides[i].ProteinDescription = rec.Description // update mapped genes for k := range evi.Peptides[i].MappedProteins { if !strings.Contains(k, decoyTag) { - evi.Peptides[i].MappedGenes[geneMap[k]] = 0 + evi.Peptides[i].MappedGenes[recordMap[k].GeneNames] = struct{}{} } } - - evi.Peptides[i].PrevAA = pepPrevAA[evi.Peptides[i].Sequence] - evi.Peptides[i].NextAA = pepNextAA[evi.Peptides[i].Sequence] + pnAA := pepPrevNextAA[evi.Peptides[i].Sequence] + evi.Peptides[i].PrevAA = pnAA.prev + evi.Peptides[i].NextAA = pnAA.next } } // UpdateSupportingSpectra pushes back from PSM to Protein the new supporting spectra from razor results func (evi *Evidence) UpdateSupportingSpectra() { - var ptSupSpec = make(map[string][]string) - var uniqueSpec = make(map[string][]string) - var razorSpec = make(map[string][]string) + var ptSupSpec = make(map[string][]id.SpectrumType) + var uniqueSpec = make(map[id.IonFormType][]id.SpectrumType) + var razorSpec = make(map[id.IonFormType][]id.SpectrumType) var totalPeptides = make(map[string][]string) var uniquePeptides = make(map[string][]string) @@ -569,20 +568,20 @@ func (evi *Evidence) UpdateSupportingSpectra() { _, ok := ptSupSpec[i.Protein] if !ok { - ptSupSpec[i.Protein] = append(ptSupSpec[i.Protein], i.Spectrum) + ptSupSpec[i.Protein] = append(ptSupSpec[i.Protein], i.SpectrumFileName()) } if i.IsUnique { - _, ok := uniqueSpec[i.IonForm] + _, ok := uniqueSpec[i.IonForm()] if !ok { - uniqueSpec[i.IonForm] = append(uniqueSpec[i.IonForm], i.Spectrum) + uniqueSpec[i.IonForm()] = append(uniqueSpec[i.IonForm()], i.SpectrumFileName()) } } if i.IsURazor { - _, ok := razorSpec[i.IonForm] + _, ok := razorSpec[i.IonForm()] if !ok { - razorSpec[i.IonForm] = append(razorSpec[i.IonForm], i.Spectrum) + razorSpec[i.IonForm()] = append(razorSpec[i.IonForm()], i.SpectrumFileName()) } } } @@ -635,14 +634,14 @@ func (evi *Evidence) UpdateSupportingSpectra() { for k := range evi.Proteins[i].TotalPeptideIons { - Up, UOK := uniqueSpec[evi.Proteins[i].TotalPeptideIons[k].IonForm] + Up, UOK := uniqueSpec[evi.Proteins[i].TotalPeptideIons[k].IonForm()] if UOK && evi.Proteins[i].TotalPeptideIons[k].IsUnique { for _, l := range Up { evi.Proteins[i].TotalPeptideIons[k].Spectra[l] = 0 } } - Rp, ROK := razorSpec[evi.Proteins[i].TotalPeptideIons[k].IonForm] + Rp, ROK := razorSpec[evi.Proteins[i].TotalPeptideIons[k].IonForm()] if ROK && evi.Proteins[i].TotalPeptideIons[k].IsURazor { for _, l := range Rp { evi.Proteins[i].TotalPeptideIons[k].Spectra[l] = 0 diff --git a/lib/spc/pepxml.go b/lib/spc/pepxml.go index e3e09796..2197b9fc 100644 --- a/lib/spc/pepxml.go +++ b/lib/spc/pepxml.go @@ -182,17 +182,17 @@ type TerminalModification struct { // SpectrumQuery tag type SpectrumQuery struct { XMLName xml.Name `xml:"spectrum_query"` + CompensationVoltage string `xml:"compensation_voltage,attr"` Spectrum []byte `xml:"spectrum,attr"` SpectrumNativeID []byte `xml:"spectrumNativeID,attr"` StartScan int `xml:"start_scan,attr"` EndScan int `xml:"end_scan,attr"` - UncalibratedPrecursorNeutralMass float64 `xml:"uncalibrated_precursor_neutral_mass,attr"` - PrecursorNeutralMass float64 `xml:"precursor_neutral_mass,attr"` AssumedCharge uint8 `xml:"assumed_charge,attr"` Index uint32 `xml:"index,attr"` RetentionTimeSec float64 `xml:"retention_time_sec,attr"` IonMobility float64 `xml:"ion_mobility,attr"` - CompensationVoltage string `xml:"compensation_voltage,attr"` + UncalibratedPrecursorNeutralMass float64 `xml:"uncalibrated_precursor_neutral_mass,attr"` + PrecursorNeutralMass float64 `xml:"precursor_neutral_mass,attr"` SearchResult SearchResult `xml:"search_result"` } @@ -211,17 +211,17 @@ type SearchHit struct { NextAA []byte `xml:"peptide_next_aa,attr"` Protein []byte `xml:"protein,attr"` ProteinDescr []byte `xml:"protein_descr,attr"` + TotalTerm uint8 `xml:"num_tol_term,attr"` + MissedCleavages uint8 `xml:"num_missed_cleavages,attr"` + IsRejected uint8 `xml:"is_rejected,attr"` TotalProteins uint16 `xml:"num_tot_proteins,attr"` MatchedIons uint16 `xml:"num_matched_ions,attr"` TotalIons uint16 `xml:"tot_num_ions,attr"` + MatchedPeptides uint32 `xml:"num_matched_peptides,attr"` CalcNeutralPepMass float64 `xml:"calc_neutral_pep_mass,attr"` Massdiff float64 `xml:"massdiff,attr"` - TotalTerm uint8 `xml:"num_tol_term,attr"` - MissedCleavages uint8 `xml:"num_missed_cleavages,attr"` - MatchedPeptides uint32 `xml:"num_matched_peptides,attr"` - IsRejected uint8 `xml:"is_rejected,attr"` - Score []SearchScore `xml:"search_score"` ModificationInfo ModificationInfo `xml:"modification_info"` + Score []SearchScore `xml:"search_score"` AnalysisResult []AnalysisResult `xml:"analysis_result"` AlternativeProteins []AlternativeProtein `xml:"alternative_protein"` PTMResult PTMResult `xml:"ptm_result"` @@ -230,9 +230,9 @@ type SearchHit struct { // AlternativeProtein tag type AlternativeProtein struct { XMLName xml.Name `xml:"alternative_protein"` + NumTolTerm int8 `xml:"num_tol_tem,attr"` Protein []byte `xml:"protein,attr"` Description []byte `xml:"protein_descr,attr"` - NumTolTerm int8 `xml:"num_tol_tem,attr"` PepPrevAA []byte `xml:"peptide_prev_aa,attr"` PepNextAA []byte `xml:"peptide_next_aa,attr"` } diff --git a/lib/spc/protxml.go b/lib/spc/protxml.go index 7f18c746..3e1f1298 100644 --- a/lib/spc/protxml.go +++ b/lib/spc/protxml.go @@ -22,16 +22,16 @@ type ProteinSummaryHeader struct { XMLName xml.Name `xml:"protein_summary_header"` ReferenceDatabase []byte `xml:"reference_database,attr"` ResidueSubstitutionList []byte `xml:"residue_substitution_list,attr"` + SampleEnzyme []byte `xml:"sample_enzyme,attr"` MinPeptideProbability float32 `xml:"min_peptide_probability,attr"` MinPeptideWeight float32 `xml:"min_peptide_weight,attr"` NumPredictedCorrectProteins float32 `xml:"num_predicted_correct_prots,attr"` + TotalNumberSpectrumIDs float32 `xml:"total_no_spectrum_ids,attr"` NumInput1Spectra uint32 `xml:"num_input_1_spectra,attr"` NumInput2Spectra uint32 `xml:"num_input_2_spectra,attr"` NumInput3Spectra uint32 `xml:"num_input_3_spectra,attr"` NumInput4Spectra uint32 `xml:"num_input_4_spectra,attr"` NumInput5Spectra uint32 `xml:"num_input_5_spectra,attr"` - TotalNumberSpectrumIDs float32 `xml:"total_no_spectrum_ids,attr"` - SampleEnzyme []byte `xml:"sample_enzyme,attr"` ProgramDetails ProgramDetails `xml:"program_details"` } @@ -70,19 +70,18 @@ type ProteinGroup struct { type Protein struct { XMLName xml.Name `xml:"protein"` ProteinName []byte `xml:"protein_name,attr"` - NumberIndistinguishableProteins int16 `xml:"n_indistinguishable_proteins,attr"` - Probability float64 `xml:"probability,attr"` - PercentCoverage float32 `xml:"percent_coverage,attr"` UniqueStrippedPeptides []byte `xml:"unique_stripped_peptides,attr"` GroupSiblingID []byte `xml:"group_sibling_id,attr"` + NumberIndistinguishableProteins int16 `xml:"n_indistinguishable_proteins,attr"` TotalNumberPeptides int `xml:"total_number_peptides,attr"` TotalNumberIndPeptides int `xml:"total_number_distinct_peptides,attr"` + PercentCoverage float32 `xml:"percent_coverage,attr"` PctSpectrumIDs float32 `xml:"pct_spectrum_ids,attr"` + Probability float64 `xml:"probability,attr"` Parameter Parameter `xml:"parameter"` Annotation Annotation `xml:"annotation"` IndistinguishableProtein []IndistinguishableProtein `xml:"indistinguishable_protein"` Peptide []Peptide `xml:"peptide"` - TopPepProb float64 //Confidence float64 `xml:"confidence,attr"` } diff --git a/lib/spc/spc.go b/lib/spc/spc.go index 2757e07d..58e13d0f 100644 --- a/lib/spc/spc.go +++ b/lib/spc/spc.go @@ -1,12 +1,10 @@ package spc import ( - "bytes" + "bufio" "encoding/xml" - "io/ioutil" "os" "path/filepath" - "philosopher/lib/msg" "github.com/rogpeppe/go-charset/charset" @@ -56,18 +54,18 @@ func (p *PepXML) Parse(f string) { if e != nil { msg.ReadFile(e, "fatal") } - defer xmlFile.Close() - b, _ := ioutil.ReadAll(xmlFile) - - var mpa MsmsPipelineAnalysis - - reader := bytes.NewReader(b) + reader := bufio.NewReader(xmlFile) decoder := xml.NewDecoder(reader) decoder.CharsetReader = charset.NewReader + var mpa MsmsPipelineAnalysis if e = decoder.Decode(&mpa); e != nil { msg.DecodeMsgPck(e, "fatal") } + err := xmlFile.Close() + if err != nil { + panic(err) + } p.MsmsPipelineAnalysis = mpa p.Name = filepath.Base(f) @@ -82,11 +80,10 @@ func (p *ProtXML) Parse(f string) { msg.ReadFile(e, "fatal") } defer xmlFile.Close() - b, _ := ioutil.ReadAll(xmlFile) var ps ProteinSummary - reader := bytes.NewReader(b) + reader := bufio.NewReader(xmlFile) decoder := xml.NewDecoder(reader) decoder.CharsetReader = charset.NewReader diff --git a/lib/sys/sys.go b/lib/sys/sys.go index 0e96d2f7..1e580b29 100644 --- a/lib/sys/sys.go +++ b/lib/sys/sys.go @@ -1,8 +1,10 @@ package sys import ( + "bufio" "errors" "fmt" + "github.com/vmihailenco/msgpack/v5" "io" "os" "os/exec" @@ -229,3 +231,48 @@ func FilePermission() os.FileMode { //return 0644 return 0755 } + +func Serialize(v interface{}, filename string) { + output, e := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, FilePermission()) + if e != nil { + msg.WriteFile(e, "fatal") + panic(e) + } + bo := bufio.NewWriter(output) + enc := msgpack.NewEncoder(bo) + enc.UseInternedStrings(true) + err := enc.Encode(&v) + errFlush := bo.Flush() + if errFlush != nil { + msg.MarshalFile(errFlush, "fatal") + panic(errFlush) + } + _ = output.Close() + if err != nil { + msg.MarshalFile(err, "fatal") + panic(err) + } +} + +func Restore(v interface{}, filename string, silent bool) { + input, e := os.Open(filename) + if e != nil && silent { + return + } + if e != nil { + msg.ReadFile(e, "fatal") + panic(e) + } + bi := bufio.NewReader(input) + dec := msgpack.NewDecoder(bi) + dec.UseInternedStrings(true) + err := dec.Decode(&v) + errClose := input.Close() + if errClose != nil { + panic(errClose) + } + if err != nil && !silent { + msg.DecodeMsgPck(err, "fatal") + panic(err) + } +} diff --git a/lib/uti/uti.go b/lib/uti/uti.go index cd99af43..bb12a727 100644 --- a/lib/uti/uti.go +++ b/lib/uti/uti.go @@ -191,14 +191,15 @@ func GetMaxNumber(list []string) string { // RemoveDuplicateStrings removes duplicates from a slice func RemoveDuplicateStrings(slice []string) []string { - keys := make(map[string]bool) - list := []string{} - + keys := make(map[string]struct{}, len(slice)) + list := make([]string, 0, len(slice)) for _, entry := range slice { if _, value := keys[entry]; !value { - keys[entry] = true + keys[entry] = struct{}{} list = append(list, entry) } } - return list + list2 := make([]string, len(list)) + copy(list2, list) + return list2 } diff --git a/lib/wrk/wrk.go b/lib/wrk/wrk.go index 322bb669..e0ac62d9 100644 --- a/lib/wrk/wrk.go +++ b/lib/wrk/wrk.go @@ -16,7 +16,7 @@ import ( "github.com/pierrre/archivefile/zip" "github.com/sirupsen/logrus" - "github.com/vmihailenco/msgpack" + "github.com/vmihailenco/msgpack/v5" ) // Run is the workspace main entry point diff --git a/main.go b/main.go index 443b8a2e..eb2c64ac 100644 --- a/main.go +++ b/main.go @@ -1,6 +1,9 @@ package main -import "philosopher/cmd" +import ( + "philosopher/cmd" + "runtime/debug" +) var ( // Version code @@ -15,7 +18,7 @@ var ( ) func main() { - + debug.SetGCPercent(20) cmd.Version = version cmd.Build = build