diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 585ca36db8..f8cdf7c4db 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -57,7 +57,7 @@ jobs: - name: Start Redis if: startsWith(runner.os, 'Linux') && (matrix.py == '3.9') - uses: supercharge/redis-github-action@1.5.0 + uses: supercharge/redis-github-action@1.6.0 with: redis-version: 6 diff --git a/Cargo.lock b/Cargo.lock index 62ef488d93..fce327bd3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1344,9 +1344,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.103" +version = "1.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b" +checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c" dependencies = [ "itoa 1.0.1", "ryu", @@ -1444,9 +1444,9 @@ checksum = "c02424087780c9b71cc96799eaeddff35af2bc513278cda5c99fc1f5d026d3c1" [[package]] name = "tempfile" -version = "3.7.0" +version = "3.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5486094ee78b2e5038a6382ed7645bc084dc2ec433426ca4c3cb61e2007b8998" +checksum = "dc02fddf48964c42031a0b3fe0428320ecf3a73c401040fc0096f97794310651" dependencies = [ "cfg-if", "fastrand", diff --git a/doc/command-line.md b/doc/command-line.md index 3979f652e2..00f7291a42 100644 --- a/doc/command-line.md +++ b/doc/command-line.md @@ -612,7 +612,7 @@ sourmash tax metagenome --gather-csv HSMA33MX_gather_x_gtdbrs202_k31.csv \ --gather-csv PSM6XBW3_gather_x_gtdbrs202_k31.csv \ --taxonomy gtdb-rs202.taxonomy.v2.csv \ - --output-format krona --rank species + --output-format lineage_summary --rank species ``` example `lineage_summary`: diff --git a/doc/databases.md b/doc/databases.md index b95a97a52d..229029ac22 100644 --- a/doc/databases.md +++ b/doc/databases.md @@ -42,9 +42,9 @@ The GTDB genomic representatives are a low-redundancy subset of Genbank genomes, | K-mer size | Zipfile collection | SBT | LCA | | -------- | -------- | -------- | ---- | -| 21 | [download (2.2 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k21.zip) | [download (4.4 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k21.sbt.zip) | [download (189 MB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs207gtdb-rs214-reps.k21.lca.json.gz) | -| 31 | [download (2.2 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k31.zip) | [download (4.4 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k31.sbt.zip) | [download (221 MB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs207/gtdb-rs214-reps.k31.lca.json.gz) | -| 51 | [download (2.2 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k51.zip) | [download (4.4 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k51.sbt.zip) | [download (230 MB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs207/gtdb-rs214-reps.k51.lca.json.gz) | +| 21 | [download (2.2 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k21.zip) | [download (4.4 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k21.sbt.zip) | [download (189 MB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k21.lca.json.gz) | +| 31 | [download (2.2 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k31.zip) | [download (4.4 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k31.sbt.zip) | [download (221 MB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k31.lca.json.gz) | +| 51 | [download (2.2 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k51.zip) | [download (4.4 GB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k51.sbt.zip) | [download (230 MB)](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/gtdb-rs214/gtdb-rs214-reps.k51.lca.json.gz) | ### GTDB R08-RS214 all genomes (403k) diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml index f710a088ee..2692f22234 100644 --- a/src/core/Cargo.toml +++ b/src/core/Cargo.toml @@ -42,7 +42,7 @@ num-iter = "0.1.43" once_cell = "1.18.0" # once_cell 1.14+ requires Rust 1.56+ rayon = { version = "1.7.0", optional = true } serde = { version = "1.0.168", features = ["derive"] } -serde_json = "1.0.103" +serde_json = "1.0.104" primal-check = "0.3.1" thiserror = "1.0" typed-builder = "0.14.0" @@ -58,7 +58,7 @@ criterion = "0.5.1" needletail = { version = "0.5.1", default-features = false } proptest = { version = "1.2.0", default-features = false, features = ["std"]} rand = "0.8.2" -tempfile = "3.7.0" +tempfile = "3.7.1" [[bench]] name = "index" diff --git a/src/sourmash/cli/sig/split.py b/src/sourmash/cli/sig/split.py index 527ee650ef..e4587b3e0f 100644 --- a/src/sourmash/cli/sig/split.py +++ b/src/sourmash/cli/sig/split.py @@ -59,6 +59,10 @@ def subparser(subparsers): '--from-file', help='a text file containing a list of files to load signatures from' ) + subparser.add_argument( + '-E', '--extension', type=str, default='.sig', + help="write files with this extension ('.sig' by default)" + ) add_ksize_arg(subparser) add_moltype_args(subparser) add_picklist_args(subparser) diff --git a/src/sourmash/sig/__main__.py b/src/sourmash/sig/__main__.py index 225725c40d..7336f7ac79 100644 --- a/src/sourmash/sig/__main__.py +++ b/src/sourmash/sig/__main__.py @@ -139,8 +139,8 @@ def split(args): _extend_signatures_with_from_file(args) output_names = set() - output_scaled_template = '{md5sum}.k={ksize}.scaled={scaled}.{moltype}.dup={dup}.{basename}.sig' - output_num_template = '{md5sum}.k={ksize}.num={num}.{moltype}.dup={dup}.{basename}.sig' + output_scaled_template = '{md5sum}.k={ksize}.scaled={scaled}.{moltype}.dup={dup}.{basename}' + args.extension + output_num_template = '{md5sum}.k={ksize}.num={num}.{moltype}.dup={dup}.{basename}' + args.extension if args.output_dir: if not os.path.exists(args.output_dir): diff --git a/tests/test_cmd_signature.py b/tests/test_cmd_signature.py index 8241c8579f..680924568a 100644 --- a/tests/test_cmd_signature.py +++ b/tests/test_cmd_signature.py @@ -1465,6 +1465,42 @@ def test_sig_split_3_multisig(c): assert os.path.exists(c.output(filename)) +def test_sig_split_3_multisig_sig_gz(runtmp): + # split 47 and 47+63-multisig.sig with a .sig.gz extension + c = runtmp + + sig47 = utils.get_test_data('47.fa.sig') + multisig = utils.get_test_data('47+63-multisig.sig') + c.run_sourmash('sig', 'split', sig47, multisig, '-E', '.sig.gz') + + outlist = ['57e2b22f.k=31.scaled=1000.DNA.dup=0.none.sig.gz', + 'bde81a41.k=31.scaled=1000.DNA.dup=0.none.sig.gz', + 'f033bbd8.k=31.scaled=1000.DNA.dup=0.none.sig.gz', + '87a9aec4.k=31.scaled=1000.DNA.dup=0.none.sig.gz', + '837bf2a7.k=31.scaled=1000.DNA.dup=0.none.sig.gz', + '485c3377.k=31.scaled=1000.DNA.dup=0.none.sig.gz'] + for filename in outlist: + assert os.path.exists(c.output(filename)) + + +def test_sig_split_3_multisig_zip(runtmp): + # split 47 and 47+63-multisig.sig with a .zip extension + c = runtmp + + sig47 = utils.get_test_data('47.fa.sig') + multisig = utils.get_test_data('47+63-multisig.sig') + c.run_sourmash('sig', 'split', sig47, multisig, '-E', '.zip') + + outlist = ['57e2b22f.k=31.scaled=1000.DNA.dup=0.none.zip', + 'bde81a41.k=31.scaled=1000.DNA.dup=0.none.zip', + 'f033bbd8.k=31.scaled=1000.DNA.dup=0.none.zip', + '87a9aec4.k=31.scaled=1000.DNA.dup=0.none.zip', + '837bf2a7.k=31.scaled=1000.DNA.dup=0.none.zip', + '485c3377.k=31.scaled=1000.DNA.dup=0.none.zip'] + for filename in outlist: + assert os.path.exists(c.output(filename)) + + @utils.in_tempdir def test_sig_split_4_sbt_prot(c): # split sbt