Skip to content

Commit

Permalink
Merge branch 'latest' into add/cli_parse_args
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb authored Aug 12, 2023
2 parents bbf3d64 + f5c72fc commit e9c0f8c
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:

- name: Start Redis
if: startsWith(runner.os, 'Linux') && (matrix.py == '3.9')
uses: supercharge/redis-github-action@1.5.0
uses: supercharge/redis-github-action@1.6.0
with:
redis-version: 6

Expand Down
8 changes: 4 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion doc/command-line.md
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ sourmash tax metagenome
--gather-csv HSMA33MX_gather_x_gtdbrs202_k31.csv \
--gather-csv PSM6XBW3_gather_x_gtdbrs202_k31.csv \
--taxonomy gtdb-rs202.taxonomy.v2.csv \
--output-format krona --rank species
--output-format lineage_summary --rank species
```

example `lineage_summary`:
Expand Down
6 changes: 3 additions & 3 deletions doc/databases.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ The GTDB genomic representatives are a low-redundancy subset of Genbank genomes,

| K-mer size | Zipfile collection | SBT | LCA |
| -------- | -------- | -------- | ---- |
| 21 | [download (2.2 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k21.zip) | [download (4.4 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k21.sbt.zip) | [download (189 MB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs207gtdb-rs214-reps.k21.lca.json.gz) |
| 31 | [download (2.2 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k31.zip) | [download (4.4 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k31.sbt.zip) | [download (221 MB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs207/gtdb-rs214-reps.k31.lca.json.gz) |
| 51 | [download (2.2 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k51.zip) | [download (4.4 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k51.sbt.zip) | [download (230 MB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs207/gtdb-rs214-reps.k51.lca.json.gz) |
| 21 | [download (2.2 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k21.zip) | [download (4.4 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k21.sbt.zip) | [download (189 MB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k21.lca.json.gz) |
| 31 | [download (2.2 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k31.zip) | [download (4.4 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k31.sbt.zip) | [download (221 MB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k31.lca.json.gz) |
| 51 | [download (2.2 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k51.zip) | [download (4.4 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k51.sbt.zip) | [download (230 MB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k51.lca.json.gz) |

### GTDB R08-RS214 all genomes (403k)

Expand Down
4 changes: 2 additions & 2 deletions src/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ num-iter = "0.1.43"
once_cell = "1.18.0" # once_cell 1.14+ requires Rust 1.56+
rayon = { version = "1.7.0", optional = true }
serde = { version = "1.0.168", features = ["derive"] }
serde_json = "1.0.103"
serde_json = "1.0.104"
primal-check = "0.3.1"
thiserror = "1.0"
typed-builder = "0.14.0"
Expand All @@ -58,7 +58,7 @@ criterion = "0.5.1"
needletail = { version = "0.5.1", default-features = false }
proptest = { version = "1.2.0", default-features = false, features = ["std"]}
rand = "0.8.2"
tempfile = "3.7.0"
tempfile = "3.7.1"

[[bench]]
name = "index"
Expand Down
4 changes: 4 additions & 0 deletions src/sourmash/cli/sig/split.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ def subparser(subparsers):
'--from-file',
help='a text file containing a list of files to load signatures from'
)
subparser.add_argument(
'-E', '--extension', type=str, default='.sig',
help="write files with this extension ('.sig' by default)"
)
add_ksize_arg(subparser)
add_moltype_args(subparser)
add_picklist_args(subparser)
Expand Down
4 changes: 2 additions & 2 deletions src/sourmash/sig/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ def split(args):
_extend_signatures_with_from_file(args)

output_names = set()
output_scaled_template = '{md5sum}.k={ksize}.scaled={scaled}.{moltype}.dup={dup}.{basename}.sig'
output_num_template = '{md5sum}.k={ksize}.num={num}.{moltype}.dup={dup}.{basename}.sig'
output_scaled_template = '{md5sum}.k={ksize}.scaled={scaled}.{moltype}.dup={dup}.{basename}' + args.extension
output_num_template = '{md5sum}.k={ksize}.num={num}.{moltype}.dup={dup}.{basename}' + args.extension

if args.output_dir:
if not os.path.exists(args.output_dir):
Expand Down
36 changes: 36 additions & 0 deletions tests/test_cmd_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -1465,6 +1465,42 @@ def test_sig_split_3_multisig(c):
assert os.path.exists(c.output(filename))


def test_sig_split_3_multisig_sig_gz(runtmp):
# split 47 and 47+63-multisig.sig with a .sig.gz extension
c = runtmp

sig47 = utils.get_test_data('47.fa.sig')
multisig = utils.get_test_data('47+63-multisig.sig')
c.run_sourmash('sig', 'split', sig47, multisig, '-E', '.sig.gz')

outlist = ['57e2b22f.k=31.scaled=1000.DNA.dup=0.none.sig.gz',
'bde81a41.k=31.scaled=1000.DNA.dup=0.none.sig.gz',
'f033bbd8.k=31.scaled=1000.DNA.dup=0.none.sig.gz',
'87a9aec4.k=31.scaled=1000.DNA.dup=0.none.sig.gz',
'837bf2a7.k=31.scaled=1000.DNA.dup=0.none.sig.gz',
'485c3377.k=31.scaled=1000.DNA.dup=0.none.sig.gz']
for filename in outlist:
assert os.path.exists(c.output(filename))


def test_sig_split_3_multisig_zip(runtmp):
# split 47 and 47+63-multisig.sig with a .zip extension
c = runtmp

sig47 = utils.get_test_data('47.fa.sig')
multisig = utils.get_test_data('47+63-multisig.sig')
c.run_sourmash('sig', 'split', sig47, multisig, '-E', '.zip')

outlist = ['57e2b22f.k=31.scaled=1000.DNA.dup=0.none.zip',
'bde81a41.k=31.scaled=1000.DNA.dup=0.none.zip',
'f033bbd8.k=31.scaled=1000.DNA.dup=0.none.zip',
'87a9aec4.k=31.scaled=1000.DNA.dup=0.none.zip',
'837bf2a7.k=31.scaled=1000.DNA.dup=0.none.zip',
'485c3377.k=31.scaled=1000.DNA.dup=0.none.zip']
for filename in outlist:
assert os.path.exists(c.output(filename))


@utils.in_tempdir
def test_sig_split_4_sbt_prot(c):
# split sbt
Expand Down

0 comments on commit e9c0f8c

Please sign in to comment.