Skip to content

Commit 2aae996

Browse files
committed
Refactor and add a names subcommand
1 parent aeae1c4 commit 2aae996

12 files changed

+282
-94
lines changed

Diff for: cli.go

+24-10
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ var (
1111
Use: "fastats {command}",
1212
Short: "Very simple statistics from fasta files",
1313
Long: ``,
14-
Version: "0.5.0",
14+
Version: "0.6.0",
1515
CompletionOptions: cobra.CompletionOptions{DisableDefaultCmd: true},
1616
}
1717
)
@@ -26,13 +26,16 @@ func main() {
2626
var f bool
2727
var c bool
2828
var p string
29+
var d bool
2930

3031
func init() {
3132
rootCmd.PersistentFlags().BoolVarP(&f, "file", "f", false, "calculate statistics per file (default is per record)")
3233
rootCmd.PersistentFlags().BoolVarP(&c, "count", "c", false, "print counts (default is proportions)")
34+
rootCmd.PersistentFlags().BoolVarP(&d, "description", "d", false, "write record descriptions (default is IDs)")
3335

3436
rootCmd.PersistentFlags().Lookup("file").NoOptDefVal = "true"
3537
rootCmd.PersistentFlags().Lookup("count").NoOptDefVal = "true"
38+
rootCmd.PersistentFlags().Lookup("description").NoOptDefVal = "true"
3639

3740
rootCmd.AddCommand(atCmd)
3841
rootCmd.AddCommand(gcCmd)
@@ -43,6 +46,7 @@ func init() {
4346
rootCmd.AddCommand(softCmd)
4447
rootCmd.AddCommand(patternCmd)
4548
rootCmd.AddCommand(numCmd)
49+
rootCmd.AddCommand(nameCmd)
4650

4751
patternCmd.Flags().StringVarP(&p, "pattern", "p", "", "arbitrary pattern to parse")
4852
}
@@ -52,7 +56,7 @@ var atCmd = &cobra.Command{
5256
Short: "AT content",
5357
DisableFlagsInUseLine: true,
5458
RunE: func(cmd *cobra.Command, args []string) (err error) {
55-
err = pattern(os.Stdout, args, "ATat", f, c)
59+
err = pattern(os.Stdout, args, "ATat", f, c, d)
5660
return err
5761
},
5862
}
@@ -62,7 +66,7 @@ var gcCmd = &cobra.Command{
6266
Short: "GC content",
6367
DisableFlagsInUseLine: true,
6468
RunE: func(cmd *cobra.Command, args []string) (err error) {
65-
err = pattern(os.Stdout, args, "GCgc", f, c)
69+
err = pattern(os.Stdout, args, "GCgc", f, c, d)
6670
return err
6771
},
6872
}
@@ -72,7 +76,7 @@ var atgcCmd = &cobra.Command{
7276
Short: "ATGC content",
7377
DisableFlagsInUseLine: true,
7478
RunE: func(cmd *cobra.Command, args []string) (err error) {
75-
err = pattern(os.Stdout, args, "ATGCatgc", f, c)
79+
err = pattern(os.Stdout, args, "ATGCatgc", f, c, d)
7680
return err
7781
},
7882
}
@@ -82,7 +86,7 @@ var nCmd = &cobra.Command{
8286
Short: "N content",
8387
DisableFlagsInUseLine: true,
8488
RunE: func(cmd *cobra.Command, args []string) (err error) {
85-
err = pattern(os.Stdout, args, "Nn", f, c)
89+
err = pattern(os.Stdout, args, "Nn", f, c, d)
8690
return err
8791
},
8892
}
@@ -92,7 +96,7 @@ var gapCmd = &cobra.Command{
9296
Short: "Gap content",
9397
DisableFlagsInUseLine: true,
9498
RunE: func(cmd *cobra.Command, args []string) (err error) {
95-
err = pattern(os.Stdout, args, "-", f, c)
99+
err = pattern(os.Stdout, args, "-", f, c, d)
96100
return err
97101
},
98102
}
@@ -102,7 +106,7 @@ var softCmd = &cobra.Command{
102106
Short: "Softmasked content",
103107
DisableFlagsInUseLine: true,
104108
RunE: func(cmd *cobra.Command, args []string) (err error) {
105-
err = pattern(os.Stdout, args, "atgcn", f, c)
109+
err = pattern(os.Stdout, args, "atgcn", f, c, d)
106110
return err
107111
},
108112
}
@@ -114,7 +118,7 @@ var patternCmd = &cobra.Command{
114118
Short: "Arbitrary PATTERN content",
115119
DisableFlagsInUseLine: true,
116120
RunE: func(cmd *cobra.Command, args []string) (err error) {
117-
err = pattern(os.Stdout, args, p, f, c)
121+
err = pattern(os.Stdout, args, p, f, c, d)
118122
return err
119123
},
120124
}
@@ -124,7 +128,7 @@ var lenCmd = &cobra.Command{
124128
Short: "Sequence length",
125129
DisableFlagsInUseLine: true,
126130
RunE: func(cmd *cobra.Command, args []string) (err error) {
127-
err = length(os.Stdout, args, p, f, c)
131+
err = length(os.Stdout, args, p, f, c, d)
128132
return err
129133
},
130134
}
@@ -134,7 +138,17 @@ var numCmd = &cobra.Command{
134138
Short: "Number of records",
135139
DisableFlagsInUseLine: true,
136140
RunE: func(cmd *cobra.Command, args []string) (err error) {
137-
err = num(os.Stdout, args, p, f, c)
141+
err = num(os.Stdout, args, p, f, c, d)
142+
return err
143+
},
144+
}
145+
146+
var nameCmd = &cobra.Command{
147+
Use: "names <infile[s]>",
148+
Short: "Record names",
149+
DisableFlagsInUseLine: true,
150+
RunE: func(cmd *cobra.Command, args []string) (err error) {
151+
err = names(os.Stdout, args, p, f, c, d)
138152
return err
139153
},
140154
}

Diff for: fasta.go renamed to fasta/fasta.go

+11-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package main
1+
package fasta
22

33
import (
44
"bufio"
@@ -124,3 +124,13 @@ func (r *Reader) Read() (FastaRecord, error) {
124124

125125
return FR, err
126126
}
127+
128+
func (r *Reader) Seek(offset int) error {
129+
for i := 0; i < offset; i++ {
130+
_, err := r.r.ReadByte()
131+
if err != nil {
132+
return err
133+
}
134+
}
135+
return nil
136+
}

Diff for: fasta_test.go renamed to fasta/fasta_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package main
1+
package fasta
22

33
import (
44
"bytes"

Diff for: len.go

+6-4
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@ package main
33
import (
44
"fmt"
55
"io"
6+
7+
"github.com/bjeight/fastats/fasta"
68
)
79

810
// length() is fastats len in the cli. It writes the appropriate header (which depends on the cli
911
// arguments), then passes lengthRecords() + the cli arguments + the writer to collectCommandLine,
1012
// which processes the fasta file(s) from the command line or stdin, depending on what is provided
1113
// by the user.
12-
func length(w io.Writer, filepaths []string, pattern string, file bool, counts bool) error {
14+
func length(w io.Writer, filepaths []string, pattern string, file bool, counts bool, description bool) error {
1315

1416
// print the correct header to stdout, depending on whether the statistics are
1517
// to be calculated per file or per record
@@ -26,7 +28,7 @@ func length(w io.Writer, filepaths []string, pattern string, file bool, counts b
2628
}
2729

2830
// pass lengthRecords + the cli arguments to template() for processesing the fasta file(s)
29-
err := collectCommandLine(w, lengthRecords, filepaths, pattern, file, counts)
31+
err := collectCommandLine(w, lengthRecords, filepaths, pattern, file, counts, description)
3032
if err != nil {
3133
return err
3234
}
@@ -35,7 +37,7 @@ func length(w io.Writer, filepaths []string, pattern string, file bool, counts b
3537
}
3638

3739
// lengthRecords does the work of fastats len for one fasta file at a time.
38-
func lengthRecords(r *Reader, args arguments, w io.Writer) error {
40+
func lengthRecords(r *fasta.Reader, args arguments, w io.Writer) error {
3941

4042
// get the file name in case we need to print it to stdout
4143
filename := filenameFromFullPath(args.filepath)
@@ -57,7 +59,7 @@ func lengthRecords(r *Reader, args arguments, w io.Writer) error {
5759
if args.file {
5860
l_total += len(record.Seq)
5961
} else {
60-
s := fmt.Sprintf("%s\t%d\n", record.ID, len(record.Seq))
62+
s := fmt.Sprintf("%s\t%d\n", return_record_name(record, args.description), len(record.Seq))
6163
_, err := w.Write([]byte(s))
6264
if err != nil {
6365
return err

Diff for: len_test.go

+18-14
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@ import (
44
"bytes"
55
"fmt"
66
"testing"
7+
8+
"github.com/bjeight/fastats/fasta"
79
)
810

911
func Test_length(t *testing.T) {
1012
out := new(bytes.Buffer)
11-
err := length(out, []string{}, "", false, false)
13+
err := length(out, []string{}, "", false, false, false)
1214
if err != nil {
1315
t.Error(err)
1416
}
@@ -21,7 +23,7 @@ func Test_length(t *testing.T) {
2123

2224
func Test_lengthFile(t *testing.T) {
2325
out := new(bytes.Buffer)
24-
err := length(out, []string{}, "", true, false)
26+
err := length(out, []string{}, "", true, false, false)
2527
if err != nil {
2628
t.Error(err)
2729
}
@@ -35,7 +37,7 @@ stdin 0
3537

3638
func Test_lengthCounts(t *testing.T) {
3739
out := new(bytes.Buffer)
38-
err := length(out, []string{}, "", false, true)
40+
err := length(out, []string{}, "", false, true, false)
3941
if err != nil {
4042
t.Error(err)
4143
}
@@ -48,7 +50,7 @@ func Test_lengthCounts(t *testing.T) {
4850

4951
func Test_lengthFileCounts(t *testing.T) {
5052
out := new(bytes.Buffer)
51-
err := length(out, []string{}, "", true, true)
53+
err := length(out, []string{}, "", true, true, false)
5254
if err != nil {
5355
t.Error(err)
5456
}
@@ -72,16 +74,17 @@ ATG
7274
ATGN
7375
`)
7476
fastaR := bytes.NewReader(fastaData)
75-
r := NewReader(fastaR)
77+
r := fasta.NewReader(fastaR)
7678
out := new(bytes.Buffer)
7779

7880
lengthRecords(
7981
r,
8082
arguments{
81-
filepath: "/path/to/myfile.fasta",
82-
file: false,
83-
counts: false,
84-
pattern: "",
83+
filepath: "/path/to/myfile.fasta",
84+
file: false,
85+
counts: false,
86+
description: false,
87+
pattern: "",
8588
},
8689
out,
8790
)
@@ -109,16 +112,17 @@ ATG
109112
ATGN
110113
`)
111114
fastaR := bytes.NewReader(fastaData)
112-
r := NewReader(fastaR)
115+
r := fasta.NewReader(fastaR)
113116
out := new(bytes.Buffer)
114117

115118
lengthRecords(
116119
r,
117120
arguments{
118-
filepath: "/path/to/myfile.fasta",
119-
file: true,
120-
counts: false,
121-
pattern: "",
121+
filepath: "/path/to/myfile.fasta",
122+
file: true,
123+
counts: false,
124+
description: false,
125+
pattern: "",
122126
},
123127
out,
124128
)

Diff for: lib.go

+31-18
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,33 @@ import (
55
"os"
66
"path/filepath"
77
"strings"
8+
9+
"github.com/bjeight/fastats/fasta"
810
)
911

1012
type arguments struct {
11-
filepath string
12-
file bool
13-
counts bool
14-
pattern string
13+
filepath string
14+
file bool
15+
counts bool
16+
description bool
17+
pattern string
1518
}
1619

17-
type fastatsFunction func(*Reader, arguments, io.Writer) error
20+
type fastatsFunction func(*fasta.Reader, arguments, io.Writer) error
1821

1922
// For every file provided on the command line, template applies the correct functionality based on the cli arguments.
2023
// If no files are provided, it signals that we should try to read an uncompressed fasta file from stdin.
21-
func collectCommandLine(w io.Writer, fn fastatsFunction, filepaths []string, pattern string, file bool, count bool) error {
24+
func collectCommandLine(w io.Writer, fn fastatsFunction, filepaths []string, pattern string, file bool, count bool, description bool) error {
2225

2326
// for every file provided on the command line...
2427
for _, fp := range filepaths {
2528
// wrap the arguments up in s struct
2629
a := arguments{
27-
filepath: fp,
28-
file: file,
29-
counts: count,
30-
pattern: pattern,
30+
filepath: fp,
31+
file: file,
32+
counts: count,
33+
description: description,
34+
pattern: pattern,
3135
}
3236
// and pass them to the correct function (defined when template is called)
3337
err := applyFastatsFunction(fn, a, w)
@@ -39,10 +43,11 @@ func collectCommandLine(w io.Writer, fn fastatsFunction, filepaths []string, pat
3943
// if there were no files provided on the command line, attemp to read from stdin
4044
if len(filepaths) == 0 {
4145
a := arguments{
42-
filepath: "stdin",
43-
file: file,
44-
counts: count,
45-
pattern: pattern,
46+
filepath: "stdin",
47+
file: file,
48+
counts: count,
49+
pattern: pattern,
50+
description: description,
4651
}
4752
err := applyFastatsFunction(fn, a, w)
4853
if err != nil {
@@ -58,9 +63,9 @@ func collectCommandLine(w io.Writer, fn fastatsFunction, filepaths []string, pat
5863
// it the command line arguments and the writer from the scope above)
5964
func applyFastatsFunction(fn fastatsFunction, args arguments, w io.Writer) error {
6065
// open stdin or a file
61-
var r *Reader
66+
var r *fasta.Reader
6267
if args.filepath == "stdin" {
63-
r = NewReader(os.Stdin)
68+
r = fasta.NewReader(os.Stdin)
6469
} else {
6570
f, err := os.Open(args.filepath)
6671
if err != nil {
@@ -70,9 +75,9 @@ func applyFastatsFunction(fn fastatsFunction, args arguments, w io.Writer) error
7075
// depending on whwether the fasta file is compressed or not, provide the correct reader
7176
switch filepath.Ext(args.filepath) {
7277
case ".gz", ".bgz":
73-
r = NewZReader(f)
78+
r = fasta.NewZReader(f)
7479
default:
75-
r = NewReader(f)
80+
r = fasta.NewReader(f)
7681
}
7782
}
7883

@@ -89,3 +94,11 @@ func filenameFromFullPath(filepath string) string {
8994
sa := strings.Split(filepath, "/")
9095
return sa[len(sa)-1]
9196
}
97+
98+
func return_record_name(fr fasta.FastaRecord, description bool) string {
99+
if description {
100+
return fr.Description
101+
} else {
102+
return fr.ID
103+
}
104+
}

0 commit comments

Comments
 (0)