From ff4c7677fa74a252d650c9b39a91b3394abc50a9 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 12 Aug 2023 07:25:15 -0700 Subject: [PATCH] add support for -E/--extension to sig split --- src/sourmash/cli/sig/split.py | 4 ++++ src/sourmash/sig/__main__.py | 4 ++-- tests/test_cmd_signature.py | 36 +++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/sourmash/cli/sig/split.py b/src/sourmash/cli/sig/split.py index 527ee650ef..e4587b3e0f 100644 --- a/src/sourmash/cli/sig/split.py +++ b/src/sourmash/cli/sig/split.py @@ -59,6 +59,10 @@ def subparser(subparsers): '--from-file', help='a text file containing a list of files to load signatures from' ) + subparser.add_argument( + '-E', '--extension', type=str, default='.sig', + help="write files with this extension ('.sig' by default)" + ) add_ksize_arg(subparser) add_moltype_args(subparser) add_picklist_args(subparser) diff --git a/src/sourmash/sig/__main__.py b/src/sourmash/sig/__main__.py index 225725c40d..7336f7ac79 100644 --- a/src/sourmash/sig/__main__.py +++ b/src/sourmash/sig/__main__.py @@ -139,8 +139,8 @@ def split(args): _extend_signatures_with_from_file(args) output_names = set() - output_scaled_template = '{md5sum}.k={ksize}.scaled={scaled}.{moltype}.dup={dup}.{basename}.sig' - output_num_template = '{md5sum}.k={ksize}.num={num}.{moltype}.dup={dup}.{basename}.sig' + output_scaled_template = '{md5sum}.k={ksize}.scaled={scaled}.{moltype}.dup={dup}.{basename}' + args.extension + output_num_template = '{md5sum}.k={ksize}.num={num}.{moltype}.dup={dup}.{basename}' + args.extension if args.output_dir: if not os.path.exists(args.output_dir): diff --git a/tests/test_cmd_signature.py b/tests/test_cmd_signature.py index 14f4874860..15da197df6 100644 --- a/tests/test_cmd_signature.py +++ b/tests/test_cmd_signature.py @@ -1456,6 +1456,42 @@ def test_sig_split_3_multisig(c): assert os.path.exists(c.output(filename)) +def test_sig_split_3_multisig_sig_gz(runtmp): + # split 47 and 47+63-multisig.sig with a .sig.gz extension + c = runtmp + + sig47 = utils.get_test_data('47.fa.sig') + multisig = utils.get_test_data('47+63-multisig.sig') + c.run_sourmash('sig', 'split', sig47, multisig, '-E', '.sig.gz') + + outlist = ['57e2b22f.k=31.scaled=1000.DNA.dup=0.none.sig.gz', + 'bde81a41.k=31.scaled=1000.DNA.dup=0.none.sig.gz', + 'f033bbd8.k=31.scaled=1000.DNA.dup=0.none.sig.gz', + '87a9aec4.k=31.scaled=1000.DNA.dup=0.none.sig.gz', + '837bf2a7.k=31.scaled=1000.DNA.dup=0.none.sig.gz', + '485c3377.k=31.scaled=1000.DNA.dup=0.none.sig.gz'] + for filename in outlist: + assert os.path.exists(c.output(filename)) + + +def test_sig_split_3_multisig_zip(runtmp): + # split 47 and 47+63-multisig.sig with a .zip extension + c = runtmp + + sig47 = utils.get_test_data('47.fa.sig') + multisig = utils.get_test_data('47+63-multisig.sig') + c.run_sourmash('sig', 'split', sig47, multisig, '-E', '.zip') + + outlist = ['57e2b22f.k=31.scaled=1000.DNA.dup=0.none.zip', + 'bde81a41.k=31.scaled=1000.DNA.dup=0.none.zip', + 'f033bbd8.k=31.scaled=1000.DNA.dup=0.none.zip', + '87a9aec4.k=31.scaled=1000.DNA.dup=0.none.zip', + '837bf2a7.k=31.scaled=1000.DNA.dup=0.none.zip', + '485c3377.k=31.scaled=1000.DNA.dup=0.none.zip'] + for filename in outlist: + assert os.path.exists(c.output(filename)) + + @utils.in_tempdir def test_sig_split_4_sbt_prot(c): # split sbt