From d33e0cb52bae5a872aa5976d64faaca5a0482bf1 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 28 Jun 2024 14:11:55 +0200 Subject: [PATCH 01/50] added new amrfinder directory and moved types to card directory --- q2_amr/{types/tests => amrfinderplus}/__init__.py | 0 q2_amr/amrfinderplus/tests/__init__.py | 7 +++++++ q2_amr/amrfinderplus/types/__init__.py | 9 +++++++++ q2_amr/amrfinderplus/types/_format.py | 7 +++++++ q2_amr/amrfinderplus/types/_transformer.py | 7 +++++++ q2_amr/amrfinderplus/types/_type.py | 10 ++++++++++ q2_amr/amrfinderplus/types/tests/__init__.py | 7 +++++++ .../types/tests/test_types_formats_transformers.py | 7 +++++++ q2_amr/card/database.py | 4 ++-- q2_amr/card/heatmap.py | 2 +- q2_amr/card/mags.py | 2 +- q2_amr/card/partition.py | 4 ++-- q2_amr/card/reads.py | 4 ++-- q2_amr/card/tests/test_database.py | 5 ++++- q2_amr/card/tests/test_heatmap.py | 2 +- q2_amr/card/tests/test_mags.py | 2 +- q2_amr/card/tests/test_partition.py | 2 +- q2_amr/card/tests/test_reads.py | 2 +- q2_amr/card/tests/test_utils.py | 5 ++++- q2_amr/{ => card}/types/__init__.py | 0 q2_amr/{ => card}/types/_format.py | 0 q2_amr/{ => card}/types/_transformer.py | 4 ++-- q2_amr/{ => card}/types/_type.py | 0 q2_amr/card/types/tests/__init__.py | 7 +++++++ .../types/tests/data/61mer_analysis.allele.txt | 0 .../types/tests/data/61mer_analysis.gene.txt | 0 .../types/tests/data/61mer_analysis_rgi_summary.txt | 0 q2_amr/{ => card}/types/tests/data/DNA_fasta.fasta | 0 .../{ => card}/types/tests/data/DNA_fasta_-.fasta | 0 .../sample1/allele_mapping_data.txt | 0 .../sample1/overall_mapping_stats.txt | 0 .../sample1/sorted.length_100.bam | Bin .../sample2/allele_mapping_data.txt | 0 .../sample2/overall_mapping_stats.txt | 0 .../sample2/sorted.length_100.bam | Bin .../amr_annotation.json | 0 .../amr_annotation.txt | 0 .../amr_annotation.json | 0 .../amr_annotation.txt | 0 .../amr_annotation.json | 0 .../amr_annotation.txt | 0 .../sample1/gene_mapping_data.txt | 0 .../sample2/gene_mapping_data.txt | 0 .../sample1/bin1/61mer_analysis_rgi_summary.txt | 0 .../61mer_analysis.json | 0 .../61mer_analysis_rgi_summary.txt | 0 .../61mer_analysis.json | 0 .../61mer_analysis_rgi_summary.txt | 0 .../61mer_analysis.json | 0 .../61mer_analysis_rgi_summary.txt | 0 .../sample1/61mer_analysis.allele.txt | 0 .../sample1/61mer_analysis.json | 0 .../sample2/61mer_analysis.allele.txt | 0 .../sample2/61mer_analysis.json | 0 .../sample1/61mer_analysis.gene.txt | 0 .../sample2/61mer_analysis.gene.txt | 0 q2_amr/{ => card}/types/tests/data/card_test.json | 0 .../{ => card}/types/tests/data/card_test_dna.fasta | 0 .../types/tests/data/card_test_protein.fasta | 0 q2_amr/{ => card}/types/tests/data/empty_dict.json | 0 .../tests/data/index-for-model-sequences-test.txt | 0 .../{ => card}/types/tests/data/kmer_json_test.json | 0 .../{ => card}/types/tests/data/kmer_txt_test.txt | 0 .../types/tests/data/mags_61mer_analysis.json | 0 .../types/tests/data/reads_61mer_analysis.json | 0 q2_amr/{ => card}/types/tests/data/rgi_output.json | 0 q2_amr/{ => card}/types/tests/data/rgi_output.txt | 0 .../{ => card}/types/tests/data/rgi_output_dna.fna | 0 .../types/tests/data/rgi_output_protein.fna | 0 .../types/tests/data/tabulated_df_allele.txt | 0 .../types/tests/data/tabulated_df_mags.txt | 0 .../types/tests/test_types_formats_transformers.py | 8 ++++---- q2_amr/plugin_setup.py | 8 ++++---- setup.py | 2 +- 74 files changed, 92 insertions(+), 25 deletions(-) rename q2_amr/{types/tests => amrfinderplus}/__init__.py (100%) create mode 100644 q2_amr/amrfinderplus/tests/__init__.py create mode 100644 q2_amr/amrfinderplus/types/__init__.py create mode 100644 q2_amr/amrfinderplus/types/_format.py create mode 100644 q2_amr/amrfinderplus/types/_transformer.py create mode 100644 q2_amr/amrfinderplus/types/_type.py create mode 100644 q2_amr/amrfinderplus/types/tests/__init__.py create mode 100644 q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py rename q2_amr/{ => card}/types/__init__.py (100%) rename q2_amr/{ => card}/types/_format.py (100%) rename q2_amr/{ => card}/types/_transformer.py (98%) rename q2_amr/{ => card}/types/_type.py (100%) create mode 100644 q2_amr/card/types/tests/__init__.py rename q2_amr/{ => card}/types/tests/data/61mer_analysis.allele.txt (100%) rename q2_amr/{ => card}/types/tests/data/61mer_analysis.gene.txt (100%) rename q2_amr/{ => card}/types/tests/data/61mer_analysis_rgi_summary.txt (100%) rename q2_amr/{ => card}/types/tests/data/DNA_fasta.fasta (100%) rename q2_amr/{ => card}/types/tests/data/DNA_fasta_-.fasta (100%) rename q2_amr/{ => card}/types/tests/data/card_allele_annotation/sample1/allele_mapping_data.txt (100%) rename q2_amr/{ => card}/types/tests/data/card_allele_annotation/sample1/overall_mapping_stats.txt (100%) rename q2_amr/{ => card}/types/tests/data/card_allele_annotation/sample1/sorted.length_100.bam (100%) rename q2_amr/{ => card}/types/tests/data/card_allele_annotation/sample2/allele_mapping_data.txt (100%) rename q2_amr/{ => card}/types/tests/data/card_allele_annotation/sample2/overall_mapping_stats.txt (100%) rename q2_amr/{ => card}/types/tests/data/card_allele_annotation/sample2/sorted.length_100.bam (100%) rename q2_amr/{ => card}/types/tests/data/card_annotation/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.json (100%) rename q2_amr/{ => card}/types/tests/data/card_annotation/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.txt (100%) rename q2_amr/{ => card}/types/tests/data/card_annotation/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.json (100%) rename q2_amr/{ => card}/types/tests/data/card_annotation/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.txt (100%) rename q2_amr/{ => card}/types/tests/data/card_annotation/sample2/f5a16381-ea80-49f9-875e-620f333a9293/amr_annotation.json (100%) rename q2_amr/{ => card}/types/tests/data/card_annotation/sample2/f5a16381-ea80-49f9-875e-620f333a9293/amr_annotation.txt (100%) rename q2_amr/{ => card}/types/tests/data/card_gene_annotation/sample1/gene_mapping_data.txt (100%) rename q2_amr/{ => card}/types/tests/data/card_gene_annotation/sample2/gene_mapping_data.txt (100%) rename q2_amr/{ => card}/types/tests/data/card_kmer_analysis_mags/sample1/bin1/61mer_analysis_rgi_summary.txt (100%) rename q2_amr/{ => card}/types/tests/data/card_kmer_analysis_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/61mer_analysis.json (100%) rename q2_amr/{ => card}/types/tests/data/card_kmer_analysis_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/61mer_analysis_rgi_summary.txt (100%) rename q2_amr/{ => card}/types/tests/data/card_kmer_analysis_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/61mer_analysis.json (100%) rename q2_amr/{ => card}/types/tests/data/card_kmer_analysis_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/61mer_analysis_rgi_summary.txt (100%) rename q2_amr/{ => card}/types/tests/data/card_kmer_analysis_mags/sample2/f5a16381-ea80-49f9-875e-620f333a9293/61mer_analysis.json (100%) rename q2_amr/{ => card}/types/tests/data/card_kmer_analysis_mags/sample2/f5a16381-ea80-49f9-875e-620f333a9293/61mer_analysis_rgi_summary.txt (100%) rename q2_amr/{ => card}/types/tests/data/card_reads_allele_kmer_analysis/sample1/61mer_analysis.allele.txt (100%) rename q2_amr/{ => card}/types/tests/data/card_reads_allele_kmer_analysis/sample1/61mer_analysis.json (100%) rename q2_amr/{ => card}/types/tests/data/card_reads_allele_kmer_analysis/sample2/61mer_analysis.allele.txt (100%) rename q2_amr/{ => card}/types/tests/data/card_reads_allele_kmer_analysis/sample2/61mer_analysis.json (100%) rename q2_amr/{ => card}/types/tests/data/card_reads_gene_kmer_analysis/sample1/61mer_analysis.gene.txt (100%) rename q2_amr/{ => card}/types/tests/data/card_reads_gene_kmer_analysis/sample2/61mer_analysis.gene.txt (100%) rename q2_amr/{ => card}/types/tests/data/card_test.json (100%) rename q2_amr/{ => card}/types/tests/data/card_test_dna.fasta (100%) rename q2_amr/{ => card}/types/tests/data/card_test_protein.fasta (100%) rename q2_amr/{ => card}/types/tests/data/empty_dict.json (100%) rename q2_amr/{ => card}/types/tests/data/index-for-model-sequences-test.txt (100%) rename q2_amr/{ => card}/types/tests/data/kmer_json_test.json (100%) rename q2_amr/{ => card}/types/tests/data/kmer_txt_test.txt (100%) rename q2_amr/{ => card}/types/tests/data/mags_61mer_analysis.json (100%) rename q2_amr/{ => card}/types/tests/data/reads_61mer_analysis.json (100%) rename q2_amr/{ => card}/types/tests/data/rgi_output.json (100%) rename q2_amr/{ => card}/types/tests/data/rgi_output.txt (100%) rename q2_amr/{ => card}/types/tests/data/rgi_output_dna.fna (100%) rename q2_amr/{ => card}/types/tests/data/rgi_output_protein.fna (100%) rename q2_amr/{ => card}/types/tests/data/tabulated_df_allele.txt (100%) rename q2_amr/{ => card}/types/tests/data/tabulated_df_mags.txt (100%) rename q2_amr/{ => card}/types/tests/test_types_formats_transformers.py (99%) diff --git a/q2_amr/types/tests/__init__.py b/q2_amr/amrfinderplus/__init__.py similarity index 100% rename from q2_amr/types/tests/__init__.py rename to q2_amr/amrfinderplus/__init__.py diff --git a/q2_amr/amrfinderplus/tests/__init__.py b/q2_amr/amrfinderplus/tests/__init__.py new file mode 100644 index 0000000..bc9c3d2 --- /dev/null +++ b/q2_amr/amrfinderplus/tests/__init__.py @@ -0,0 +1,7 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2019-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- diff --git a/q2_amr/amrfinderplus/types/__init__.py b/q2_amr/amrfinderplus/types/__init__.py new file mode 100644 index 0000000..19b7008 --- /dev/null +++ b/q2_amr/amrfinderplus/types/__init__.py @@ -0,0 +1,9 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2019-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- + +__all__ = [] diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py new file mode 100644 index 0000000..bc9c3d2 --- /dev/null +++ b/q2_amr/amrfinderplus/types/_format.py @@ -0,0 +1,7 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2019-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- diff --git a/q2_amr/amrfinderplus/types/_transformer.py b/q2_amr/amrfinderplus/types/_transformer.py new file mode 100644 index 0000000..bc9c3d2 --- /dev/null +++ b/q2_amr/amrfinderplus/types/_transformer.py @@ -0,0 +1,7 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2019-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- diff --git a/q2_amr/amrfinderplus/types/_type.py b/q2_amr/amrfinderplus/types/_type.py new file mode 100644 index 0000000..680bfcf --- /dev/null +++ b/q2_amr/amrfinderplus/types/_type.py @@ -0,0 +1,10 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2019-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- +from qiime2.core.type import SemanticType + +AMRFinderPlusDatabase = SemanticType("AMRFinderPlusDatabase") diff --git a/q2_amr/amrfinderplus/types/tests/__init__.py b/q2_amr/amrfinderplus/types/tests/__init__.py new file mode 100644 index 0000000..bc9c3d2 --- /dev/null +++ b/q2_amr/amrfinderplus/types/tests/__init__.py @@ -0,0 +1,7 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2019-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- diff --git a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py new file mode 100644 index 0000000..bc9c3d2 --- /dev/null +++ b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py @@ -0,0 +1,7 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2019-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- diff --git a/q2_amr/card/database.py b/q2_amr/card/database.py index 766f9df..a761db8 100644 --- a/q2_amr/card/database.py +++ b/q2_amr/card/database.py @@ -9,11 +9,11 @@ import requests from tqdm import tqdm -from q2_amr.card.utils import colorify, run_command -from q2_amr.types._format import ( +from q2_amr.card.types._format import ( CARDDatabaseDirectoryFormat, CARDKmerDatabaseDirectoryFormat, ) +from q2_amr.card.utils import colorify, run_command def fetch_card_db() -> (CARDDatabaseDirectoryFormat, CARDKmerDatabaseDirectoryFormat): diff --git a/q2_amr/card/heatmap.py b/q2_amr/card/heatmap.py index b6c93ed..000e9de 100644 --- a/q2_amr/card/heatmap.py +++ b/q2_amr/card/heatmap.py @@ -8,8 +8,8 @@ import pkg_resources import q2templates +from q2_amr.card.types import CARDAnnotationDirectoryFormat from q2_amr.card.utils import run_command -from q2_amr.types import CARDAnnotationDirectoryFormat def heatmap( diff --git a/q2_amr/card/mags.py b/q2_amr/card/mags.py index 7c0dbcf..4a5c72f 100644 --- a/q2_amr/card/mags.py +++ b/q2_amr/card/mags.py @@ -6,8 +6,8 @@ import pandas as pd from q2_types.per_sample_sequences import MultiMAGSequencesDirFmt +from q2_amr.card.types import CARDAnnotationDirectoryFormat, CARDDatabaseDirectoryFormat from q2_amr.card.utils import create_count_table, load_card_db, read_in_txt, run_command -from q2_amr.types import CARDAnnotationDirectoryFormat, CARDDatabaseDirectoryFormat def annotate_mags_card( diff --git a/q2_amr/card/partition.py b/q2_amr/card/partition.py index 50e4479..d52c98b 100644 --- a/q2_amr/card/partition.py +++ b/q2_amr/card/partition.py @@ -5,8 +5,7 @@ import numpy as np from qiime2.util import duplicate -from q2_amr.card.utils import copy_files -from q2_amr.types import ( +from q2_amr.card.types import ( CARDAlleleAnnotationDirectoryFormat, CARDAnnotationDirectoryFormat, CARDGeneAnnotationDirectoryFormat, @@ -14,6 +13,7 @@ CARDReadsAlleleKmerAnalysisDirectoryFormat, CARDReadsGeneKmerAnalysisDirectoryFormat, ) +from q2_amr.card.utils import copy_files def collate_mags_annotations( diff --git a/q2_amr/card/reads.py b/q2_amr/card/reads.py index 44031db..586c67d 100644 --- a/q2_amr/card/reads.py +++ b/q2_amr/card/reads.py @@ -10,12 +10,12 @@ SingleLanePerSampleSingleEndFastqDirFmt, ) -from q2_amr.card.utils import create_count_table, load_card_db, read_in_txt, run_command -from q2_amr.types import ( +from q2_amr.card.types import ( CARDAlleleAnnotationDirectoryFormat, CARDDatabaseDirectoryFormat, CARDGeneAnnotationDirectoryFormat, ) +from q2_amr.card.utils import create_count_table, load_card_db, read_in_txt, run_command def annotate_reads_card( diff --git a/q2_amr/card/tests/test_database.py b/q2_amr/card/tests/test_database.py index 0f5ac2e..21d2067 100644 --- a/q2_amr/card/tests/test_database.py +++ b/q2_amr/card/tests/test_database.py @@ -8,7 +8,10 @@ from qiime2.plugin.testing import TestPluginBase from q2_amr.card.database import download_with_progress_bar, fetch_card_db, preprocess -from q2_amr.types import CARDDatabaseDirectoryFormat, CARDKmerDatabaseDirectoryFormat +from q2_amr.card.types import ( + CARDDatabaseDirectoryFormat, + CARDKmerDatabaseDirectoryFormat, +) class TestAnnotateMagsCard(TestPluginBase): diff --git a/q2_amr/card/tests/test_heatmap.py b/q2_amr/card/tests/test_heatmap.py index d7664b4..7e82f5f 100644 --- a/q2_amr/card/tests/test_heatmap.py +++ b/q2_amr/card/tests/test_heatmap.py @@ -10,7 +10,7 @@ heatmap, run_rgi_heatmap, ) -from q2_amr.types import CARDAnnotationDirectoryFormat +from q2_amr.card.types import CARDAnnotationDirectoryFormat class TestHeatmap(TestPluginBase): diff --git a/q2_amr/card/tests/test_mags.py b/q2_amr/card/tests/test_mags.py index 11a286e..53697cd 100644 --- a/q2_amr/card/tests/test_mags.py +++ b/q2_amr/card/tests/test_mags.py @@ -7,7 +7,7 @@ from qiime2.plugin.testing import TestPluginBase from q2_amr.card.mags import annotate_mags_card, run_rgi_main -from q2_amr.types import CARDAnnotationDirectoryFormat, CARDDatabaseDirectoryFormat +from q2_amr.card.types import CARDAnnotationDirectoryFormat, CARDDatabaseDirectoryFormat class TestAnnotateMagsCard(TestPluginBase): diff --git a/q2_amr/card/tests/test_partition.py b/q2_amr/card/tests/test_partition.py index b6e9b7d..3854ab6 100644 --- a/q2_amr/card/tests/test_partition.py +++ b/q2_amr/card/tests/test_partition.py @@ -13,7 +13,7 @@ partition_reads_allele_annotations, partition_reads_gene_annotations, ) -from q2_amr.types import ( +from q2_amr.card.types import ( CARDAlleleAnnotationDirectoryFormat, CARDAnnotationDirectoryFormat, CARDGeneAnnotationDirectoryFormat, diff --git a/q2_amr/card/tests/test_reads.py b/q2_amr/card/tests/test_reads.py index 749ec77..c183ff8 100644 --- a/q2_amr/card/tests/test_reads.py +++ b/q2_amr/card/tests/test_reads.py @@ -10,7 +10,7 @@ from qiime2.plugin.testing import TestPluginBase from q2_amr.card.reads import annotate_reads_card, run_rgi_bwt -from q2_amr.types import ( +from q2_amr.card.types import ( CARDAlleleAnnotationDirectoryFormat, CARDDatabaseDirectoryFormat, CARDGeneAnnotationDirectoryFormat, diff --git a/q2_amr/card/tests/test_utils.py b/q2_amr/card/tests/test_utils.py index 15aaabf..dd24934 100644 --- a/q2_amr/card/tests/test_utils.py +++ b/q2_amr/card/tests/test_utils.py @@ -6,6 +6,10 @@ import pandas as pd from qiime2.plugin.testing import TestPluginBase +from q2_amr.card.types import ( + CARDDatabaseDirectoryFormat, + CARDKmerDatabaseDirectoryFormat, +) from q2_amr.card.utils import ( colorify, copy_files, @@ -13,7 +17,6 @@ load_card_db, read_in_txt, ) -from q2_amr.types import CARDDatabaseDirectoryFormat, CARDKmerDatabaseDirectoryFormat class TestAnnotateReadsCARD(TestPluginBase): diff --git a/q2_amr/types/__init__.py b/q2_amr/card/types/__init__.py similarity index 100% rename from q2_amr/types/__init__.py rename to q2_amr/card/types/__init__.py diff --git a/q2_amr/types/_format.py b/q2_amr/card/types/_format.py similarity index 100% rename from q2_amr/types/_format.py rename to q2_amr/card/types/_format.py diff --git a/q2_amr/types/_transformer.py b/q2_amr/card/types/_transformer.py similarity index 98% rename from q2_amr/types/_transformer.py rename to q2_amr/card/types/_transformer.py index 1139f7d..4536df8 100644 --- a/q2_amr/types/_transformer.py +++ b/q2_amr/card/types/_transformer.py @@ -18,9 +18,9 @@ from q2_types.genome_data import GenesDirectoryFormat, ProteinsDirectoryFormat from skbio import DNA, Protein -from q2_amr.types import CARDAnnotationDirectoryFormat +from q2_amr.card.types import CARDAnnotationDirectoryFormat +from q2_amr.plugin_setup import plugin -from ..plugin_setup import plugin from ._format import ( CARDAlleleAnnotationDirectoryFormat, CARDAnnotationJSONFormat, diff --git a/q2_amr/types/_type.py b/q2_amr/card/types/_type.py similarity index 100% rename from q2_amr/types/_type.py rename to q2_amr/card/types/_type.py diff --git a/q2_amr/card/types/tests/__init__.py b/q2_amr/card/types/tests/__init__.py new file mode 100644 index 0000000..bc9c3d2 --- /dev/null +++ b/q2_amr/card/types/tests/__init__.py @@ -0,0 +1,7 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2019-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- diff --git a/q2_amr/types/tests/data/61mer_analysis.allele.txt b/q2_amr/card/types/tests/data/61mer_analysis.allele.txt similarity index 100% rename from q2_amr/types/tests/data/61mer_analysis.allele.txt rename to q2_amr/card/types/tests/data/61mer_analysis.allele.txt diff --git a/q2_amr/types/tests/data/61mer_analysis.gene.txt b/q2_amr/card/types/tests/data/61mer_analysis.gene.txt similarity index 100% rename from q2_amr/types/tests/data/61mer_analysis.gene.txt rename to q2_amr/card/types/tests/data/61mer_analysis.gene.txt diff --git a/q2_amr/types/tests/data/61mer_analysis_rgi_summary.txt b/q2_amr/card/types/tests/data/61mer_analysis_rgi_summary.txt similarity index 100% rename from q2_amr/types/tests/data/61mer_analysis_rgi_summary.txt rename to q2_amr/card/types/tests/data/61mer_analysis_rgi_summary.txt diff --git a/q2_amr/types/tests/data/DNA_fasta.fasta b/q2_amr/card/types/tests/data/DNA_fasta.fasta similarity index 100% rename from q2_amr/types/tests/data/DNA_fasta.fasta rename to q2_amr/card/types/tests/data/DNA_fasta.fasta diff --git a/q2_amr/types/tests/data/DNA_fasta_-.fasta b/q2_amr/card/types/tests/data/DNA_fasta_-.fasta similarity index 100% rename from q2_amr/types/tests/data/DNA_fasta_-.fasta rename to q2_amr/card/types/tests/data/DNA_fasta_-.fasta diff --git a/q2_amr/types/tests/data/card_allele_annotation/sample1/allele_mapping_data.txt b/q2_amr/card/types/tests/data/card_allele_annotation/sample1/allele_mapping_data.txt similarity index 100% rename from q2_amr/types/tests/data/card_allele_annotation/sample1/allele_mapping_data.txt rename to q2_amr/card/types/tests/data/card_allele_annotation/sample1/allele_mapping_data.txt diff --git a/q2_amr/types/tests/data/card_allele_annotation/sample1/overall_mapping_stats.txt b/q2_amr/card/types/tests/data/card_allele_annotation/sample1/overall_mapping_stats.txt similarity index 100% rename from q2_amr/types/tests/data/card_allele_annotation/sample1/overall_mapping_stats.txt rename to q2_amr/card/types/tests/data/card_allele_annotation/sample1/overall_mapping_stats.txt diff --git a/q2_amr/types/tests/data/card_allele_annotation/sample1/sorted.length_100.bam b/q2_amr/card/types/tests/data/card_allele_annotation/sample1/sorted.length_100.bam similarity index 100% rename from q2_amr/types/tests/data/card_allele_annotation/sample1/sorted.length_100.bam rename to q2_amr/card/types/tests/data/card_allele_annotation/sample1/sorted.length_100.bam diff --git a/q2_amr/types/tests/data/card_allele_annotation/sample2/allele_mapping_data.txt b/q2_amr/card/types/tests/data/card_allele_annotation/sample2/allele_mapping_data.txt similarity index 100% rename from q2_amr/types/tests/data/card_allele_annotation/sample2/allele_mapping_data.txt rename to q2_amr/card/types/tests/data/card_allele_annotation/sample2/allele_mapping_data.txt diff --git a/q2_amr/types/tests/data/card_allele_annotation/sample2/overall_mapping_stats.txt b/q2_amr/card/types/tests/data/card_allele_annotation/sample2/overall_mapping_stats.txt similarity index 100% rename from q2_amr/types/tests/data/card_allele_annotation/sample2/overall_mapping_stats.txt rename to q2_amr/card/types/tests/data/card_allele_annotation/sample2/overall_mapping_stats.txt diff --git a/q2_amr/types/tests/data/card_allele_annotation/sample2/sorted.length_100.bam b/q2_amr/card/types/tests/data/card_allele_annotation/sample2/sorted.length_100.bam similarity index 100% rename from q2_amr/types/tests/data/card_allele_annotation/sample2/sorted.length_100.bam rename to q2_amr/card/types/tests/data/card_allele_annotation/sample2/sorted.length_100.bam diff --git a/q2_amr/types/tests/data/card_annotation/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.json b/q2_amr/card/types/tests/data/card_annotation/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.json similarity index 100% rename from q2_amr/types/tests/data/card_annotation/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.json rename to q2_amr/card/types/tests/data/card_annotation/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.json diff --git a/q2_amr/types/tests/data/card_annotation/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.txt b/q2_amr/card/types/tests/data/card_annotation/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.txt similarity index 100% rename from q2_amr/types/tests/data/card_annotation/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.txt rename to q2_amr/card/types/tests/data/card_annotation/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.txt diff --git a/q2_amr/types/tests/data/card_annotation/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.json b/q2_amr/card/types/tests/data/card_annotation/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.json similarity index 100% rename from q2_amr/types/tests/data/card_annotation/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.json rename to q2_amr/card/types/tests/data/card_annotation/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.json diff --git a/q2_amr/types/tests/data/card_annotation/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.txt b/q2_amr/card/types/tests/data/card_annotation/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.txt similarity index 100% rename from q2_amr/types/tests/data/card_annotation/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.txt rename to q2_amr/card/types/tests/data/card_annotation/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.txt diff --git a/q2_amr/types/tests/data/card_annotation/sample2/f5a16381-ea80-49f9-875e-620f333a9293/amr_annotation.json b/q2_amr/card/types/tests/data/card_annotation/sample2/f5a16381-ea80-49f9-875e-620f333a9293/amr_annotation.json similarity index 100% rename from q2_amr/types/tests/data/card_annotation/sample2/f5a16381-ea80-49f9-875e-620f333a9293/amr_annotation.json rename to q2_amr/card/types/tests/data/card_annotation/sample2/f5a16381-ea80-49f9-875e-620f333a9293/amr_annotation.json diff --git a/q2_amr/types/tests/data/card_annotation/sample2/f5a16381-ea80-49f9-875e-620f333a9293/amr_annotation.txt b/q2_amr/card/types/tests/data/card_annotation/sample2/f5a16381-ea80-49f9-875e-620f333a9293/amr_annotation.txt similarity index 100% rename from q2_amr/types/tests/data/card_annotation/sample2/f5a16381-ea80-49f9-875e-620f333a9293/amr_annotation.txt rename to q2_amr/card/types/tests/data/card_annotation/sample2/f5a16381-ea80-49f9-875e-620f333a9293/amr_annotation.txt diff --git a/q2_amr/types/tests/data/card_gene_annotation/sample1/gene_mapping_data.txt b/q2_amr/card/types/tests/data/card_gene_annotation/sample1/gene_mapping_data.txt similarity index 100% rename from q2_amr/types/tests/data/card_gene_annotation/sample1/gene_mapping_data.txt rename to q2_amr/card/types/tests/data/card_gene_annotation/sample1/gene_mapping_data.txt diff --git a/q2_amr/types/tests/data/card_gene_annotation/sample2/gene_mapping_data.txt b/q2_amr/card/types/tests/data/card_gene_annotation/sample2/gene_mapping_data.txt similarity index 100% rename from q2_amr/types/tests/data/card_gene_annotation/sample2/gene_mapping_data.txt rename to q2_amr/card/types/tests/data/card_gene_annotation/sample2/gene_mapping_data.txt diff --git a/q2_amr/types/tests/data/card_kmer_analysis_mags/sample1/bin1/61mer_analysis_rgi_summary.txt b/q2_amr/card/types/tests/data/card_kmer_analysis_mags/sample1/bin1/61mer_analysis_rgi_summary.txt similarity index 100% rename from q2_amr/types/tests/data/card_kmer_analysis_mags/sample1/bin1/61mer_analysis_rgi_summary.txt rename to q2_amr/card/types/tests/data/card_kmer_analysis_mags/sample1/bin1/61mer_analysis_rgi_summary.txt diff --git a/q2_amr/types/tests/data/card_kmer_analysis_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/61mer_analysis.json b/q2_amr/card/types/tests/data/card_kmer_analysis_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/61mer_analysis.json similarity index 100% rename from q2_amr/types/tests/data/card_kmer_analysis_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/61mer_analysis.json rename to q2_amr/card/types/tests/data/card_kmer_analysis_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/61mer_analysis.json diff --git a/q2_amr/types/tests/data/card_kmer_analysis_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/61mer_analysis_rgi_summary.txt b/q2_amr/card/types/tests/data/card_kmer_analysis_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/61mer_analysis_rgi_summary.txt similarity index 100% rename from q2_amr/types/tests/data/card_kmer_analysis_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/61mer_analysis_rgi_summary.txt rename to q2_amr/card/types/tests/data/card_kmer_analysis_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d/61mer_analysis_rgi_summary.txt diff --git a/q2_amr/types/tests/data/card_kmer_analysis_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/61mer_analysis.json b/q2_amr/card/types/tests/data/card_kmer_analysis_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/61mer_analysis.json similarity index 100% rename from q2_amr/types/tests/data/card_kmer_analysis_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/61mer_analysis.json rename to q2_amr/card/types/tests/data/card_kmer_analysis_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/61mer_analysis.json diff --git a/q2_amr/types/tests/data/card_kmer_analysis_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/61mer_analysis_rgi_summary.txt b/q2_amr/card/types/tests/data/card_kmer_analysis_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/61mer_analysis_rgi_summary.txt similarity index 100% rename from q2_amr/types/tests/data/card_kmer_analysis_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/61mer_analysis_rgi_summary.txt rename to q2_amr/card/types/tests/data/card_kmer_analysis_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd/61mer_analysis_rgi_summary.txt diff --git a/q2_amr/types/tests/data/card_kmer_analysis_mags/sample2/f5a16381-ea80-49f9-875e-620f333a9293/61mer_analysis.json b/q2_amr/card/types/tests/data/card_kmer_analysis_mags/sample2/f5a16381-ea80-49f9-875e-620f333a9293/61mer_analysis.json similarity index 100% rename from q2_amr/types/tests/data/card_kmer_analysis_mags/sample2/f5a16381-ea80-49f9-875e-620f333a9293/61mer_analysis.json rename to q2_amr/card/types/tests/data/card_kmer_analysis_mags/sample2/f5a16381-ea80-49f9-875e-620f333a9293/61mer_analysis.json diff --git a/q2_amr/types/tests/data/card_kmer_analysis_mags/sample2/f5a16381-ea80-49f9-875e-620f333a9293/61mer_analysis_rgi_summary.txt b/q2_amr/card/types/tests/data/card_kmer_analysis_mags/sample2/f5a16381-ea80-49f9-875e-620f333a9293/61mer_analysis_rgi_summary.txt similarity index 100% rename from q2_amr/types/tests/data/card_kmer_analysis_mags/sample2/f5a16381-ea80-49f9-875e-620f333a9293/61mer_analysis_rgi_summary.txt rename to q2_amr/card/types/tests/data/card_kmer_analysis_mags/sample2/f5a16381-ea80-49f9-875e-620f333a9293/61mer_analysis_rgi_summary.txt diff --git a/q2_amr/types/tests/data/card_reads_allele_kmer_analysis/sample1/61mer_analysis.allele.txt b/q2_amr/card/types/tests/data/card_reads_allele_kmer_analysis/sample1/61mer_analysis.allele.txt similarity index 100% rename from q2_amr/types/tests/data/card_reads_allele_kmer_analysis/sample1/61mer_analysis.allele.txt rename to q2_amr/card/types/tests/data/card_reads_allele_kmer_analysis/sample1/61mer_analysis.allele.txt diff --git a/q2_amr/types/tests/data/card_reads_allele_kmer_analysis/sample1/61mer_analysis.json b/q2_amr/card/types/tests/data/card_reads_allele_kmer_analysis/sample1/61mer_analysis.json similarity index 100% rename from q2_amr/types/tests/data/card_reads_allele_kmer_analysis/sample1/61mer_analysis.json rename to q2_amr/card/types/tests/data/card_reads_allele_kmer_analysis/sample1/61mer_analysis.json diff --git a/q2_amr/types/tests/data/card_reads_allele_kmer_analysis/sample2/61mer_analysis.allele.txt b/q2_amr/card/types/tests/data/card_reads_allele_kmer_analysis/sample2/61mer_analysis.allele.txt similarity index 100% rename from q2_amr/types/tests/data/card_reads_allele_kmer_analysis/sample2/61mer_analysis.allele.txt rename to q2_amr/card/types/tests/data/card_reads_allele_kmer_analysis/sample2/61mer_analysis.allele.txt diff --git a/q2_amr/types/tests/data/card_reads_allele_kmer_analysis/sample2/61mer_analysis.json b/q2_amr/card/types/tests/data/card_reads_allele_kmer_analysis/sample2/61mer_analysis.json similarity index 100% rename from q2_amr/types/tests/data/card_reads_allele_kmer_analysis/sample2/61mer_analysis.json rename to q2_amr/card/types/tests/data/card_reads_allele_kmer_analysis/sample2/61mer_analysis.json diff --git a/q2_amr/types/tests/data/card_reads_gene_kmer_analysis/sample1/61mer_analysis.gene.txt b/q2_amr/card/types/tests/data/card_reads_gene_kmer_analysis/sample1/61mer_analysis.gene.txt similarity index 100% rename from q2_amr/types/tests/data/card_reads_gene_kmer_analysis/sample1/61mer_analysis.gene.txt rename to q2_amr/card/types/tests/data/card_reads_gene_kmer_analysis/sample1/61mer_analysis.gene.txt diff --git a/q2_amr/types/tests/data/card_reads_gene_kmer_analysis/sample2/61mer_analysis.gene.txt b/q2_amr/card/types/tests/data/card_reads_gene_kmer_analysis/sample2/61mer_analysis.gene.txt similarity index 100% rename from q2_amr/types/tests/data/card_reads_gene_kmer_analysis/sample2/61mer_analysis.gene.txt rename to q2_amr/card/types/tests/data/card_reads_gene_kmer_analysis/sample2/61mer_analysis.gene.txt diff --git a/q2_amr/types/tests/data/card_test.json b/q2_amr/card/types/tests/data/card_test.json similarity index 100% rename from q2_amr/types/tests/data/card_test.json rename to q2_amr/card/types/tests/data/card_test.json diff --git a/q2_amr/types/tests/data/card_test_dna.fasta b/q2_amr/card/types/tests/data/card_test_dna.fasta similarity index 100% rename from q2_amr/types/tests/data/card_test_dna.fasta rename to q2_amr/card/types/tests/data/card_test_dna.fasta diff --git a/q2_amr/types/tests/data/card_test_protein.fasta b/q2_amr/card/types/tests/data/card_test_protein.fasta similarity index 100% rename from q2_amr/types/tests/data/card_test_protein.fasta rename to q2_amr/card/types/tests/data/card_test_protein.fasta diff --git a/q2_amr/types/tests/data/empty_dict.json b/q2_amr/card/types/tests/data/empty_dict.json similarity index 100% rename from q2_amr/types/tests/data/empty_dict.json rename to q2_amr/card/types/tests/data/empty_dict.json diff --git a/q2_amr/types/tests/data/index-for-model-sequences-test.txt b/q2_amr/card/types/tests/data/index-for-model-sequences-test.txt similarity index 100% rename from q2_amr/types/tests/data/index-for-model-sequences-test.txt rename to q2_amr/card/types/tests/data/index-for-model-sequences-test.txt diff --git a/q2_amr/types/tests/data/kmer_json_test.json b/q2_amr/card/types/tests/data/kmer_json_test.json similarity index 100% rename from q2_amr/types/tests/data/kmer_json_test.json rename to q2_amr/card/types/tests/data/kmer_json_test.json diff --git a/q2_amr/types/tests/data/kmer_txt_test.txt b/q2_amr/card/types/tests/data/kmer_txt_test.txt similarity index 100% rename from q2_amr/types/tests/data/kmer_txt_test.txt rename to q2_amr/card/types/tests/data/kmer_txt_test.txt diff --git a/q2_amr/types/tests/data/mags_61mer_analysis.json b/q2_amr/card/types/tests/data/mags_61mer_analysis.json similarity index 100% rename from q2_amr/types/tests/data/mags_61mer_analysis.json rename to q2_amr/card/types/tests/data/mags_61mer_analysis.json diff --git a/q2_amr/types/tests/data/reads_61mer_analysis.json b/q2_amr/card/types/tests/data/reads_61mer_analysis.json similarity index 100% rename from q2_amr/types/tests/data/reads_61mer_analysis.json rename to q2_amr/card/types/tests/data/reads_61mer_analysis.json diff --git a/q2_amr/types/tests/data/rgi_output.json b/q2_amr/card/types/tests/data/rgi_output.json similarity index 100% rename from q2_amr/types/tests/data/rgi_output.json rename to q2_amr/card/types/tests/data/rgi_output.json diff --git a/q2_amr/types/tests/data/rgi_output.txt b/q2_amr/card/types/tests/data/rgi_output.txt similarity index 100% rename from q2_amr/types/tests/data/rgi_output.txt rename to q2_amr/card/types/tests/data/rgi_output.txt diff --git a/q2_amr/types/tests/data/rgi_output_dna.fna b/q2_amr/card/types/tests/data/rgi_output_dna.fna similarity index 100% rename from q2_amr/types/tests/data/rgi_output_dna.fna rename to q2_amr/card/types/tests/data/rgi_output_dna.fna diff --git a/q2_amr/types/tests/data/rgi_output_protein.fna b/q2_amr/card/types/tests/data/rgi_output_protein.fna similarity index 100% rename from q2_amr/types/tests/data/rgi_output_protein.fna rename to q2_amr/card/types/tests/data/rgi_output_protein.fna diff --git a/q2_amr/types/tests/data/tabulated_df_allele.txt b/q2_amr/card/types/tests/data/tabulated_df_allele.txt similarity index 100% rename from q2_amr/types/tests/data/tabulated_df_allele.txt rename to q2_amr/card/types/tests/data/tabulated_df_allele.txt diff --git a/q2_amr/types/tests/data/tabulated_df_mags.txt b/q2_amr/card/types/tests/data/tabulated_df_mags.txt similarity index 100% rename from q2_amr/types/tests/data/tabulated_df_mags.txt rename to q2_amr/card/types/tests/data/tabulated_df_mags.txt diff --git a/q2_amr/types/tests/test_types_formats_transformers.py b/q2_amr/card/types/tests/test_types_formats_transformers.py similarity index 99% rename from q2_amr/types/tests/test_types_formats_transformers.py rename to q2_amr/card/types/tests/test_types_formats_transformers.py index a871e99..2a0ac25 100644 --- a/q2_amr/types/tests/test_types_formats_transformers.py +++ b/q2_amr/card/types/tests/test_types_formats_transformers.py @@ -24,12 +24,12 @@ from qiime2.plugin.testing import TestPluginBase from skbio import DNA, Protein -from q2_amr.types import ( +from q2_amr.card.types import ( CARDAlleleAnnotationDirectoryFormat, CARDDatabaseDirectoryFormat, CARDGeneAnnotationDirectoryFormat, ) -from q2_amr.types._format import ( +from q2_amr.card.types._format import ( CARDAnnotationDirectoryFormat, CARDAnnotationTXTFormat, CARDDatabaseFormat, @@ -47,7 +47,7 @@ CARDWildcardIndexFormat, GapDNAFASTAFormat, ) -from q2_amr.types._transformer import ( +from q2_amr.card.types._transformer import ( _read_from_card_file, card_annotation_df_to_fasta, extract_sequence, @@ -56,7 +56,7 @@ class AMRTypesTestPluginBase(TestPluginBase): - package = "q2_amr.types.tests" + package = "q2_amr.card.types.tests" def setUp(self): super().setUp() diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index bc8fdb9..00ac3f5 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -43,14 +43,14 @@ partition_reads_gene_annotations, ) from q2_amr.card.reads import annotate_reads_card -from q2_amr.types import ( +from q2_amr.card.types import ( CARDAnnotationJSONFormat, CARDAnnotationTXTFormat, CARDDatabase, CARDDatabaseDirectoryFormat, CARDDatabaseFormat, ) -from q2_amr.types._format import ( +from q2_amr.card.types._format import ( CARDAlleleAnnotationDirectoryFormat, CARDAlleleAnnotationFormat, CARDAnnotationDirectoryFormat, @@ -71,7 +71,7 @@ CARDWildcardIndexFormat, GapDNAFASTAFormat, ) -from q2_amr.types._type import ( +from q2_amr.card.types._type import ( CARDAlleleAnnotation, CARDAnnotation, CARDGeneAnnotation, @@ -524,4 +524,4 @@ CARDReadsAlleleKmerAnalysisDirectoryFormat, ) -importlib.import_module("q2_amr.types._transformer") +importlib.import_module("q2_amr.card.types._transformer") diff --git a/setup.py b/setup.py index 8c4bbd3..c42b266 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ "assets/rgi/annotation_stats/*", "assets/rgi/heatmap/*", ], - "q2_amr.types.tests": [ + "q2_amr.card.types.tests": [ "data/*", "data/*/*/*", "data/*/*/*/*", From 32e8b7cc23d04e17d4952f3ebbe88000e43f03db Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Mon, 1 Jul 2024 14:46:32 +0200 Subject: [PATCH 02/50] dirformat with filecollections --- q2_amr/amrfinderplus/types/__init__.py | 11 +++++- q2_amr/amrfinderplus/types/_format.py | 35 +++++++++++++++++++ .../tests/test_types_formats_transformers.py | 13 +++++++ q2_amr/plugin_setup.py | 14 ++++++++ 4 files changed, 72 insertions(+), 1 deletion(-) diff --git a/q2_amr/amrfinderplus/types/__init__.py b/q2_amr/amrfinderplus/types/__init__.py index 19b7008..31ef4ea 100644 --- a/q2_amr/amrfinderplus/types/__init__.py +++ b/q2_amr/amrfinderplus/types/__init__.py @@ -5,5 +5,14 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +from q2_amr.amrfinderplus.types._format import ( + AMRFinderPlusDatabaseDirectoryFormat, + BinaryFormat, + TextFormat, +) -__all__ = [] +__all__ = [ + "AMRFinderPlusDatabaseDirectoryFormat", + "TextFormat", + "BinaryFormat", +] diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index bc9c3d2..a1b6f67 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -5,3 +5,38 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +from q2_types.feature_data import MixedCaseDNAFASTAFormat, ProteinFASTAFormat +from qiime2.plugin import model + + +class TextFormat(model.TextFileFormat): + def _validate_(self, level): + pass + + +class BinaryFormat(model.BinaryFileFormat): + def _validate_(self, level): + pass + + +class AMRFinderPlusDatabaseDirectoryFormat(model.DirectoryFormat): + AMR_LIB = model.File("AMR.LIB", format=TextFormat) + AMR_LIB_comp = model.FileCollection(r"AMR\.LIB\.h3.$", format=BinaryFormat) + AMRProt = model.File("AMRProt", format=ProteinFASTAFormat) + AMRProt_blast = model.FileCollection(r"AMRProt\.p..$", format=BinaryFormat) + AMRProt_mutation = model.File("AMRProt-mutation.tab", format=TextFormat) + AMRProt_suppress = model.File("AMRProt-suppress", format=TextFormat) + AMRProt_susceptible = model.File("AMRProt-susceptible.tab", format=TextFormat) + changes = model.File("changes.txt", format=TextFormat) + db_version = model.File("database_format_version.txt", format=TextFormat) + fam = model.File("fam.tab", format=TextFormat) + taxgroup = model.File("taxgroup.tab", format=TextFormat) + version = model.File("version.txt", format=TextFormat) + AMR_DNA = model.FileCollection( + r"^AMR_DNA-[a-zA-Z_]+$", format=MixedCaseDNAFASTAFormat + ) + AMR_DNA_comp = model.FileCollection( + r"^AMR_DNA-[a-zA-Z_]+\.n..$", format=BinaryFormat + ) + AMR_CDS_comp = model.FileCollection(r"^AMR_CDS\.n..$", format=BinaryFormat) + AMR_CDS = model.File("AMR_CDS", format=MixedCaseDNAFASTAFormat) diff --git a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py index bc9c3d2..ea15625 100644 --- a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py +++ b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py @@ -5,3 +5,16 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +from qiime2.plugin.testing import TestPluginBase + +from q2_amr.amrfinderplus.types._format import AMRFinderPlusDatabaseDirectoryFormat + + +class TestAMRFinderPlusDatabaseTypesAndFormats(TestPluginBase): + package = "q2_amr.amrfinderplus.types.tests" + + def test_amrfinderplus_database_directory_format_validate_positive(self): + format = AMRFinderPlusDatabaseDirectoryFormat( + "/Users/rischv/Documents/data/amrfinder/database", mode="r" + ) + format.validate() diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index 00ac3f5..6f12790 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -28,6 +28,12 @@ from qiime2.plugin import Citations, Plugin from q2_amr import __version__ +from q2_amr.amrfinderplus.types._format import ( + AMRFinderPlusDatabaseDirectoryFormat, + BinaryFormat, + TextFormat, +) +from q2_amr.amrfinderplus.types._type import AMRFinderPlusDatabase from q2_amr.card.database import fetch_card_db from q2_amr.card.heatmap import heatmap from q2_amr.card.mags import annotate_mags_card @@ -468,6 +474,7 @@ CARDReadsGeneKmerAnalysis, CARDReadsAlleleKmerAnalysis, CARDMAGsKmerAnalysis, + AMRFinderPlusDatabase, ) plugin.register_semantic_type_to_format( @@ -498,6 +505,10 @@ SampleData[CARDMAGsKmerAnalysis], artifact_format=CARDMAGsKmerAnalysisDirectoryFormat, ) +plugin.register_semantic_type_to_format( + AMRFinderPlusDatabase, + artifact_format=AMRFinderPlusDatabaseDirectoryFormat, +) plugin.register_formats( CARDKmerDatabaseDirectoryFormat, CARDKmerJSONFormat, @@ -522,6 +533,9 @@ CARDReadsKmerAnalysisJSONFormat, CARDReadsGeneKmerAnalysisDirectoryFormat, CARDReadsAlleleKmerAnalysisDirectoryFormat, + AMRFinderPlusDatabaseDirectoryFormat, + TextFormat, + BinaryFormat, ) importlib.import_module("q2_amr.card.types._transformer") From d1b2ca6bd0ddc48a465de9a700096d5d5fc01035 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Mon, 1 Jul 2024 15:17:22 +0200 Subject: [PATCH 03/50] dirformat with validating all filepaths --- q2_amr/amrfinderplus/types/_format.py | 321 +++++++++++++++++- .../types/tests/data/database/AMR.LIB | 0 .../types/tests/data/database/AMR.LIB.h3f | 0 .../types/tests/data/database/AMR.LIB.h3i | 0 .../types/tests/data/database/AMR.LIB.h3m | 0 .../types/tests/data/database/AMR.LIB.h3p | 0 .../types/tests/data/database/AMRProt | 0 .../tests/data/database/AMRProt-mutation.tab | 0 .../tests/data/database/AMRProt-suppress | 0 .../data/database/AMRProt-susceptible.tab | 0 .../types/tests/data/database/AMRProt.pdb | 0 .../types/tests/data/database/AMRProt.phr | 0 .../types/tests/data/database/AMRProt.pin | 0 .../types/tests/data/database/AMRProt.pjs | 0 .../types/tests/data/database/AMRProt.psq | 0 .../types/tests/data/database/AMRProt.ptf | 0 .../types/tests/data/database/AMRProt.pto | 0 .../types/tests/data/database/AMR_CDS | 0 .../types/tests/data/database/AMR_CDS.ndb | 0 .../types/tests/data/database/AMR_CDS.nhr | 0 .../types/tests/data/database/AMR_CDS.nin | 0 .../types/tests/data/database/AMR_CDS.njs | 0 .../types/tests/data/database/AMR_CDS.not | 0 .../types/tests/data/database/AMR_CDS.nsq | 0 .../types/tests/data/database/AMR_CDS.ntf | 0 .../types/tests/data/database/AMR_CDS.nto | 0 .../database/AMR_DNA-Acinetobacter_baumannii | 0 .../AMR_DNA-Acinetobacter_baumannii.ndb | 0 .../AMR_DNA-Acinetobacter_baumannii.nhr | 0 .../AMR_DNA-Acinetobacter_baumannii.nin | 0 .../AMR_DNA-Acinetobacter_baumannii.njs | 0 .../AMR_DNA-Acinetobacter_baumannii.not | 0 .../AMR_DNA-Acinetobacter_baumannii.nsq | 0 .../AMR_DNA-Acinetobacter_baumannii.ntf | 0 .../AMR_DNA-Acinetobacter_baumannii.nto | 0 .../AMR_DNA-Acinetobacter_baumannii.tab | 0 .../tests/data/database/AMR_DNA-Campylobacter | 0 .../data/database/AMR_DNA-Campylobacter.ndb | 0 .../data/database/AMR_DNA-Campylobacter.nhr | 0 .../data/database/AMR_DNA-Campylobacter.nin | 0 .../data/database/AMR_DNA-Campylobacter.njs | 0 .../data/database/AMR_DNA-Campylobacter.not | 0 .../data/database/AMR_DNA-Campylobacter.nsq | 0 .../data/database/AMR_DNA-Campylobacter.ntf | 0 .../data/database/AMR_DNA-Campylobacter.nto | 0 .../data/database/AMR_DNA-Campylobacter.tab | 0 .../database/AMR_DNA-Clostridioides_difficile | 0 .../AMR_DNA-Clostridioides_difficile.ndb | 0 .../AMR_DNA-Clostridioides_difficile.nhr | 0 .../AMR_DNA-Clostridioides_difficile.nin | 0 .../AMR_DNA-Clostridioides_difficile.njs | 0 .../AMR_DNA-Clostridioides_difficile.not | 0 .../AMR_DNA-Clostridioides_difficile.nsq | 0 .../AMR_DNA-Clostridioides_difficile.ntf | 0 .../AMR_DNA-Clostridioides_difficile.nto | 0 .../AMR_DNA-Clostridioides_difficile.tab | 0 .../database/AMR_DNA-Enterococcus_faecalis | 0 .../AMR_DNA-Enterococcus_faecalis.ndb | 0 .../AMR_DNA-Enterococcus_faecalis.nhr | 0 .../AMR_DNA-Enterococcus_faecalis.nin | 0 .../AMR_DNA-Enterococcus_faecalis.njs | 0 .../AMR_DNA-Enterococcus_faecalis.not | 0 .../AMR_DNA-Enterococcus_faecalis.nsq | 0 .../AMR_DNA-Enterococcus_faecalis.ntf | 0 .../AMR_DNA-Enterococcus_faecalis.nto | 0 .../AMR_DNA-Enterococcus_faecalis.tab | 0 .../database/AMR_DNA-Enterococcus_faecium | 0 .../database/AMR_DNA-Enterococcus_faecium.ndb | 0 .../database/AMR_DNA-Enterococcus_faecium.nhr | 0 .../database/AMR_DNA-Enterococcus_faecium.nin | 0 .../database/AMR_DNA-Enterococcus_faecium.njs | 0 .../database/AMR_DNA-Enterococcus_faecium.not | 0 .../database/AMR_DNA-Enterococcus_faecium.nsq | 0 .../database/AMR_DNA-Enterococcus_faecium.ntf | 0 .../database/AMR_DNA-Enterococcus_faecium.nto | 0 .../database/AMR_DNA-Enterococcus_faecium.tab | 0 .../tests/data/database/AMR_DNA-Escherichia | 0 .../data/database/AMR_DNA-Escherichia.ndb | 0 .../data/database/AMR_DNA-Escherichia.nhr | 0 .../data/database/AMR_DNA-Escherichia.nin | 0 .../data/database/AMR_DNA-Escherichia.njs | 0 .../data/database/AMR_DNA-Escherichia.not | 0 .../data/database/AMR_DNA-Escherichia.nsq | 0 .../data/database/AMR_DNA-Escherichia.ntf | 0 .../data/database/AMR_DNA-Escherichia.nto | 0 .../data/database/AMR_DNA-Escherichia.tab | 0 .../data/database/AMR_DNA-Klebsiella_oxytoca | 0 .../database/AMR_DNA-Klebsiella_oxytoca.ndb | 0 .../database/AMR_DNA-Klebsiella_oxytoca.nhr | 0 .../database/AMR_DNA-Klebsiella_oxytoca.nin | 0 .../database/AMR_DNA-Klebsiella_oxytoca.njs | 0 .../database/AMR_DNA-Klebsiella_oxytoca.not | 0 .../database/AMR_DNA-Klebsiella_oxytoca.nsq | 0 .../database/AMR_DNA-Klebsiella_oxytoca.ntf | 0 .../database/AMR_DNA-Klebsiella_oxytoca.nto | 0 .../database/AMR_DNA-Klebsiella_oxytoca.tab | 0 .../database/AMR_DNA-Neisseria_gonorrhoeae | 0 .../AMR_DNA-Neisseria_gonorrhoeae.ndb | 0 .../AMR_DNA-Neisseria_gonorrhoeae.nhr | 0 .../AMR_DNA-Neisseria_gonorrhoeae.nin | 0 .../AMR_DNA-Neisseria_gonorrhoeae.njs | 0 .../AMR_DNA-Neisseria_gonorrhoeae.not | 0 .../AMR_DNA-Neisseria_gonorrhoeae.nsq | 0 .../AMR_DNA-Neisseria_gonorrhoeae.ntf | 0 .../AMR_DNA-Neisseria_gonorrhoeae.nto | 0 .../AMR_DNA-Neisseria_gonorrhoeae.tab | 0 .../tests/data/database/AMR_DNA-Salmonella | 0 .../data/database/AMR_DNA-Salmonella.ndb | 0 .../data/database/AMR_DNA-Salmonella.nhr | 0 .../data/database/AMR_DNA-Salmonella.nin | 0 .../data/database/AMR_DNA-Salmonella.njs | 0 .../data/database/AMR_DNA-Salmonella.not | 0 .../data/database/AMR_DNA-Salmonella.nsq | 0 .../data/database/AMR_DNA-Salmonella.ntf | 0 .../data/database/AMR_DNA-Salmonella.nto | 0 .../data/database/AMR_DNA-Salmonella.tab | 0 .../database/AMR_DNA-Staphylococcus_aureus | 0 .../AMR_DNA-Staphylococcus_aureus.ndb | 0 .../AMR_DNA-Staphylococcus_aureus.nhr | 0 .../AMR_DNA-Staphylococcus_aureus.nin | 0 .../AMR_DNA-Staphylococcus_aureus.njs | 0 .../AMR_DNA-Staphylococcus_aureus.not | 0 .../AMR_DNA-Staphylococcus_aureus.nsq | 0 .../AMR_DNA-Staphylococcus_aureus.ntf | 0 .../AMR_DNA-Staphylococcus_aureus.nto | 0 .../AMR_DNA-Staphylococcus_aureus.tab | 0 .../database/AMR_DNA-Streptococcus_pneumoniae | 0 .../AMR_DNA-Streptococcus_pneumoniae.ndb | 0 .../AMR_DNA-Streptococcus_pneumoniae.nhr | 0 .../AMR_DNA-Streptococcus_pneumoniae.nin | 0 .../AMR_DNA-Streptococcus_pneumoniae.njs | 0 .../AMR_DNA-Streptococcus_pneumoniae.not | 0 .../AMR_DNA-Streptococcus_pneumoniae.nsq | 0 .../AMR_DNA-Streptococcus_pneumoniae.ntf | 0 .../AMR_DNA-Streptococcus_pneumoniae.nto | 0 .../AMR_DNA-Streptococcus_pneumoniae.tab | 0 .../types/tests/data/database/changes.txt | 0 .../data/database/database_format_version.txt | 0 .../types/tests/data/database/fam.tab | 0 .../types/tests/data/database/taxgroup.tab | 0 .../types/tests/data/database/version.txt | 0 .../tests/test_types_formats_transformers.py | 2 +- 142 files changed, 314 insertions(+), 9 deletions(-) create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR.LIB create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR.LIB.h3f create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR.LIB.h3i create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR.LIB.h3m create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR.LIB.h3p create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMRProt create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMRProt-mutation.tab create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMRProt-suppress create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMRProt-susceptible.tab create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMRProt.pdb create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMRProt.phr create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMRProt.pin create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMRProt.pjs create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMRProt.psq create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMRProt.ptf create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMRProt.pto create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.ndb create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.nhr create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.nin create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.njs create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.not create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.nsq create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.ntf create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.nto create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.ndb create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.nhr create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.nin create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.njs create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.not create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.nsq create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.ntf create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.nto create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.tab create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.ndb create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.nhr create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.nin create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.njs create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.not create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.nsq create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.ntf create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.nto create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.tab create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.ndb create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.nhr create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.nin create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.njs create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.not create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.nsq create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.ntf create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.nto create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.tab create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.ndb create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.nhr create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.nin create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.njs create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.not create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.nsq create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.ntf create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.nto create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.tab create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.ndb create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.nhr create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.nin create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.njs create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.not create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.nsq create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.ntf create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.nto create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.tab create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.ndb create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.nhr create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.nin create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.njs create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.not create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.nsq create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.ntf create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.nto create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.tab create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.ndb create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.nhr create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.nin create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.njs create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.not create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.nsq create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.ntf create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.nto create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.tab create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.ndb create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.nhr create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.nin create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.njs create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.not create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.nsq create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.ntf create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.nto create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.tab create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.ndb create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.nhr create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.nin create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.njs create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.not create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.nsq create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.ntf create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.nto create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.tab create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.ndb create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.nhr create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.nin create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.njs create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.not create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.nsq create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.ntf create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.nto create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.tab create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.ndb create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.nhr create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.nin create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.njs create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.not create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.nsq create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.ntf create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.nto create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.tab create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/changes.txt create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/database_format_version.txt create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/fam.tab create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/taxgroup.tab create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/version.txt diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index a1b6f67..6b5ed9b 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -21,9 +21,7 @@ def _validate_(self, level): class AMRFinderPlusDatabaseDirectoryFormat(model.DirectoryFormat): AMR_LIB = model.File("AMR.LIB", format=TextFormat) - AMR_LIB_comp = model.FileCollection(r"AMR\.LIB\.h3.$", format=BinaryFormat) AMRProt = model.File("AMRProt", format=ProteinFASTAFormat) - AMRProt_blast = model.FileCollection(r"AMRProt\.p..$", format=BinaryFormat) AMRProt_mutation = model.File("AMRProt-mutation.tab", format=TextFormat) AMRProt_suppress = model.File("AMRProt-suppress", format=TextFormat) AMRProt_susceptible = model.File("AMRProt-susceptible.tab", format=TextFormat) @@ -32,11 +30,318 @@ class AMRFinderPlusDatabaseDirectoryFormat(model.DirectoryFormat): fam = model.File("fam.tab", format=TextFormat) taxgroup = model.File("taxgroup.tab", format=TextFormat) version = model.File("version.txt", format=TextFormat) - AMR_DNA = model.FileCollection( - r"^AMR_DNA-[a-zA-Z_]+$", format=MixedCaseDNAFASTAFormat + AMR_CDS = model.File("AMR_CDS", format=MixedCaseDNAFASTAFormat) + AMR_LIB_h3f = model.File("AMR.LIB.h3f", format=BinaryFormat) + AMR_LIB_h3i = model.File("AMR.LIB.h3i", format=BinaryFormat) + AMR_LIB_h3m = model.File("AMR.LIB.h3m", format=BinaryFormat) + AMR_LIB_h3p = model.File("AMR.LIB.h3p", format=BinaryFormat) + AMRProt_pdb = model.File("AMRProt.pdb", format=BinaryFormat) + AMRProt_phr = model.File("AMRProt.phr", format=BinaryFormat) + AMRProt_pin = model.File("AMRProt.pin", format=BinaryFormat) + AMRProt_pjs = model.File("AMRProt.pjs", format=BinaryFormat) + AMRProt_pot = model.File("AMRProt.pot", format=BinaryFormat) + AMRProt_psq = model.File("AMRProt.psq", format=BinaryFormat) + AMRProt_ptf = model.File("AMRProt.ptf", format=BinaryFormat) + AMRProt_pto = model.File("AMRProt.pto", format=BinaryFormat) + AMR_CDS_ndb = model.File("AMR_CDS.ndb", format=BinaryFormat) + AMR_CDS_nhr = model.File("AMR_CDS.nhr", format=BinaryFormat) + AMR_CDS_nin = model.File("AMR_CDS.nin", format=BinaryFormat) + AMR_CDS_njs = model.File("AMR_CDS.njs", format=BinaryFormat) + AMR_CDS_not = model.File("AMR_CDS.not", format=BinaryFormat) + AMR_CDS_nsq = model.File("AMR_CDS.nsq", format=BinaryFormat) + AMR_CDS_ntf = model.File("AMR_CDS.ntf", format=BinaryFormat) + AMR_CDS_nto = model.File("AMR_CDS.nto", format=BinaryFormat) + AMR_DNA_Acinetobacter_baumannii_ndb = model.File( + "AMR_DNA-Acinetobacter_baumannii.ndb", format=BinaryFormat ) - AMR_DNA_comp = model.FileCollection( - r"^AMR_DNA-[a-zA-Z_]+\.n..$", format=BinaryFormat + AMR_DNA_Acinetobacter_baumannii_nhr = model.File( + "AMR_DNA-Acinetobacter_baumannii.nhr", format=BinaryFormat ) - AMR_CDS_comp = model.FileCollection(r"^AMR_CDS\.n..$", format=BinaryFormat) - AMR_CDS = model.File("AMR_CDS", format=MixedCaseDNAFASTAFormat) + AMR_DNA_Acinetobacter_baumannii_nin = model.File( + "AMR_DNA-Acinetobacter_baumannii.nin", format=BinaryFormat + ) + AMR_DNA_Acinetobacter_baumannii_njs = model.File( + "AMR_DNA-Acinetobacter_baumannii.njs", format=BinaryFormat + ) + AMR_DNA_Acinetobacter_baumannii_not = model.File( + "AMR_DNA-Acinetobacter_baumannii.not", format=BinaryFormat + ) + AMR_DNA_Acinetobacter_baumannii_nsq = model.File( + "AMR_DNA-Acinetobacter_baumannii.nsq", format=BinaryFormat + ) + AMR_DNA_Acinetobacter_baumannii_ntf = model.File( + "AMR_DNA-Acinetobacter_baumannii.ntf", format=BinaryFormat + ) + AMR_DNA_Acinetobacter_baumannii_nto = model.File( + "AMR_DNA-Acinetobacter_baumannii.nto", format=BinaryFormat + ) + AMR_DNA_Campylobacter_ndb = model.File( + "AMR_DNA-Campylobacter.ndb", format=BinaryFormat + ) + AMR_DNA_Campylobacter_nhr = model.File( + "AMR_DNA-Campylobacter.nhr", format=BinaryFormat + ) + AMR_DNA_Campylobacter_nin = model.File( + "AMR_DNA-Campylobacter.nin", format=BinaryFormat + ) + AMR_DNA_Campylobacter_njs = model.File( + "AMR_DNA-Campylobacter.njs", format=BinaryFormat + ) + AMR_DNA_Campylobacter_not = model.File( + "AMR_DNA-Campylobacter.not", format=BinaryFormat + ) + AMR_DNA_Campylobacter_nsq = model.File( + "AMR_DNA-Campylobacter.nsq", format=BinaryFormat + ) + AMR_DNA_Campylobacter_ntf = model.File( + "AMR_DNA-Campylobacter.ntf", format=BinaryFormat + ) + AMR_DNA_Campylobacter_nto = model.File( + "AMR_DNA-Campylobacter.nto", format=BinaryFormat + ) + AMR_DNA_Clostridioides_difficile_ndb = model.File( + "AMR_DNA-Clostridioides_difficile.ndb", format=BinaryFormat + ) + AMR_DNA_Clostridioides_difficile_nhr = model.File( + "AMR_DNA-Clostridioides_difficile.nhr", format=BinaryFormat + ) + AMR_DNA_Clostridioides_difficile_nin = model.File( + "AMR_DNA-Clostridioides_difficile.nin", format=BinaryFormat + ) + AMR_DNA_Clostridioides_difficile_njs = model.File( + "AMR_DNA-Clostridioides_difficile.njs", format=BinaryFormat + ) + AMR_DNA_Clostridioides_difficile_not = model.File( + "AMR_DNA-Clostridioides_difficile.not", format=BinaryFormat + ) + AMR_DNA_Clostridioides_difficile_nsq = model.File( + "AMR_DNA-Clostridioides_difficile.nsq", format=BinaryFormat + ) + AMR_DNA_Clostridioides_difficile_ntf = model.File( + "AMR_DNA-Clostridioides_difficile.ntf", format=BinaryFormat + ) + AMR_DNA_Clostridioides_difficile_nto = model.File( + "AMR_DNA-Clostridioides_difficile.nto", format=BinaryFormat + ) + AMR_DNA_Enterococcus_faecalis_ndb = model.File( + "AMR_DNA-Enterococcus_faecalis.ndb", format=BinaryFormat + ) + AMR_DNA_Enterococcus_faecalis_nhr = model.File( + "AMR_DNA-Enterococcus_faecalis.nhr", format=BinaryFormat + ) + AMR_DNA_Enterococcus_faecalis_nin = model.File( + "AMR_DNA-Enterococcus_faecalis.nin", format=BinaryFormat + ) + AMR_DNA_Enterococcus_faecalis_njs = model.File( + "AMR_DNA-Enterococcus_faecalis.njs", format=BinaryFormat + ) + AMR_DNA_Enterococcus_faecalis_not = model.File( + "AMR_DNA-Enterococcus_faecalis.not", format=BinaryFormat + ) + AMR_DNA_Enterococcus_faecalis_nsq = model.File( + "AMR_DNA-Enterococcus_faecalis.nsq", format=BinaryFormat + ) + AMR_DNA_Enterococcus_faecalis_ntf = model.File( + "AMR_DNA-Enterococcus_faecalis.ntf", format=BinaryFormat + ) + AMR_DNA_Enterococcus_faecalis_nto = model.File( + "AMR_DNA-Enterococcus_faecalis.nto", format=BinaryFormat + ) + AMR_DNA_Enterococcus_faecium_ndb = model.File( + "AMR_DNA-Enterococcus_faecium.ndb", format=BinaryFormat + ) + AMR_DNA_Enterococcus_faecium_nhr = model.File( + "AMR_DNA-Enterococcus_faecium.nhr", format=BinaryFormat + ) + AMR_DNA_Enterococcus_faecium_nin = model.File( + "AMR_DNA-Enterococcus_faecium.nin", format=BinaryFormat + ) + AMR_DNA_Enterococcus_faecium_njs = model.File( + "AMR_DNA-Enterococcus_faecium.njs", format=BinaryFormat + ) + AMR_DNA_Enterococcus_faecium_not = model.File( + "AMR_DNA-Enterococcus_faecium.not", format=BinaryFormat + ) + AMR_DNA_Enterococcus_faecium_nsq = model.File( + "AMR_DNA-Enterococcus_faecium.nsq", format=BinaryFormat + ) + AMR_DNA_Enterococcus_faecium_ntf = model.File( + "AMR_DNA-Enterococcus_faecium.ntf", format=BinaryFormat + ) + AMR_DNA_Enterococcus_faecium_nto = model.File( + "AMR_DNA-Enterococcus_faecium.nto", format=BinaryFormat + ) + AMR_DNA_Escherichia_ndb = model.File("AMR_DNA-Escherichia.ndb", format=BinaryFormat) + AMR_DNA_Escherichia_nhr = model.File("AMR_DNA-Escherichia.nhr", format=BinaryFormat) + AMR_DNA_Escherichia_nin = model.File("AMR_DNA-Escherichia.nin", format=BinaryFormat) + AMR_DNA_Escherichia_njs = model.File("AMR_DNA-Escherichia.njs", format=BinaryFormat) + AMR_DNA_Escherichia_not = model.File("AMR_DNA-Escherichia.not", format=BinaryFormat) + AMR_DNA_Escherichia_nsq = model.File("AMR_DNA-Escherichia.nsq", format=BinaryFormat) + AMR_DNA_Escherichia_ntf = model.File("AMR_DNA-Escherichia.ntf", format=BinaryFormat) + AMR_DNA_Escherichia_nto = model.File("AMR_DNA-Escherichia.nto", format=BinaryFormat) + AMR_DNA_Klebsiella_oxytoca_ndb = model.File( + "AMR_DNA-Klebsiella_oxytoca.ndb", format=BinaryFormat + ) + AMR_DNA_Klebsiella_oxytoca_nhr = model.File( + "AMR_DNA-Klebsiella_oxytoca.nhr", format=BinaryFormat + ) + AMR_DNA_Klebsiella_oxytoca_nin = model.File( + "AMR_DNA-Klebsiella_oxytoca.nin", format=BinaryFormat + ) + AMR_DNA_Klebsiella_oxytoca_njs = model.File( + "AMR_DNA-Klebsiella_oxytoca.njs", format=BinaryFormat + ) + AMR_DNA_Klebsiella_oxytoca_not = model.File( + "AMR_DNA-Klebsiella_oxytoca.not", format=BinaryFormat + ) + AMR_DNA_Klebsiella_oxytoca_nsq = model.File( + "AMR_DNA-Klebsiella_oxytoca.nsq", format=BinaryFormat + ) + AMR_DNA_Klebsiella_oxytoca_ntf = model.File( + "AMR_DNA-Klebsiella_oxytoca.ntf", format=BinaryFormat + ) + AMR_DNA_Klebsiella_oxytoca_nto = model.File( + "AMR_DNA-Klebsiella_oxytoca.nto", format=BinaryFormat + ) + AMR_DNA_Neisseria_gonorrhoeae_ndb = model.File( + "AMR_DNA-Neisseria_gonorrhoeae.ndb", format=BinaryFormat + ) + AMR_DNA_Neisseria_gonorrhoeae_nhr = model.File( + "AMR_DNA-Neisseria_gonorrhoeae.nhr", format=BinaryFormat + ) + AMR_DNA_Neisseria_gonorrhoeae_nin = model.File( + "AMR_DNA-Neisseria_gonorrhoeae.nin", format=BinaryFormat + ) + AMR_DNA_Neisseria_gonorrhoeae_njs = model.File( + "AMR_DNA-Neisseria_gonorrhoeae.njs", format=BinaryFormat + ) + AMR_DNA_Neisseria_gonorrhoeae_not = model.File( + "AMR_DNA-Neisseria_gonorrhoeae.not", format=BinaryFormat + ) + AMR_DNA_Neisseria_gonorrhoeae_nsq = model.File( + "AMR_DNA-Neisseria_gonorrhoeae.nsq", format=BinaryFormat + ) + AMR_DNA_Neisseria_gonorrhoeae_ntf = model.File( + "AMR_DNA-Neisseria_gonorrhoeae.ntf", format=BinaryFormat + ) + AMR_DNA_Neisseria_gonorrhoeae_nto = model.File( + "AMR_DNA-Neisseria_gonorrhoeae.nto", format=BinaryFormat + ) + AMR_DNA_Salmonella_nhr = model.File("AMR_DNA-Salmonella.nhr", format=BinaryFormat) + AMR_DNA_Salmonella_ndb = model.File("AMR_DNA-Salmonella.ndb", format=BinaryFormat) + AMR_DNA_Salmonella_nin = model.File("AMR_DNA-Salmonella.nin", format=BinaryFormat) + AMR_DNA_Salmonella_njs = model.File("AMR_DNA-Salmonella.njs", format=BinaryFormat) + AMR_DNA_Salmonella_not = model.File("AMR_DNA-Salmonella.not", format=BinaryFormat) + AMR_DNA_Salmonella_nsq = model.File("AMR_DNA-Salmonella.nsq", format=BinaryFormat) + AMR_DNA_Salmonella_ntf = model.File("AMR_DNA-Salmonella.ntf", format=BinaryFormat) + AMR_DNA_Salmonella_nto = model.File("AMR_DNA-Salmonella.nto", format=BinaryFormat) + AMR_DNA_Staphylococcus_aureus_ndb = model.File( + "AMR_DNA-Staphylococcus_aureus.ndb", format=BinaryFormat + ) + AMR_DNA_Staphylococcus_aureus_nhr = model.File( + "AMR_DNA-Staphylococcus_aureus.nhr", format=BinaryFormat + ) + AMR_DNA_Staphylococcus_aureus_nin = model.File( + "AMR_DNA-Staphylococcus_aureus.nin", format=BinaryFormat + ) + AMR_DNA_Staphylococcus_aureus_njs = model.File( + "AMR_DNA-Staphylococcus_aureus.njs", format=BinaryFormat + ) + AMR_DNA_Staphylococcus_aureus_not = model.File( + "AMR_DNA-Staphylococcus_aureus.not", format=BinaryFormat + ) + AMR_DNA_Staphylococcus_aureus_nsq = model.File( + "AMR_DNA-Staphylococcus_aureus.nsq", format=BinaryFormat + ) + AMR_DNA_Staphylococcus_aureus_ntf = model.File( + "AMR_DNA-Staphylococcus_aureus.ntf", format=BinaryFormat + ) + AMR_DNA_Staphylococcus_aureus_nto = model.File( + "AMR_DNA-Staphylococcus_aureus.nto", format=BinaryFormat + ) + AMR_DNA_Streptococcus_pneumoniae_ndb = model.File( + "AMR_DNA-Streptococcus_pneumoniae.ndb", format=BinaryFormat + ) + AMR_DNA_Streptococcus_pneumoniae_nhr = model.File( + "AMR_DNA-Streptococcus_pneumoniae.nhr", format=BinaryFormat + ) + AMR_DNA_Streptococcus_pneumoniae_nin = model.File( + "AMR_DNA-Streptococcus_pneumoniae.nin", format=BinaryFormat + ) + AMR_DNA_Streptococcus_pneumoniae_njs = model.File( + "AMR_DNA-Streptococcus_pneumoniae.njs", format=BinaryFormat + ) + AMR_DNA_Streptococcus_pneumoniae_not = model.File( + "AMR_DNA-Streptococcus_pneumoniae.not", format=BinaryFormat + ) + AMR_DNA_Streptococcus_pneumoniae_nsq = model.File( + "AMR_DNA-Streptococcus_pneumoniae.nsq", format=BinaryFormat + ) + AMR_DNA_Streptococcus_pneumoniae_ntf = model.File( + "AMR_DNA-Streptococcus_pneumoniae.ntf", format=BinaryFormat + ) + AMR_DNA_Streptococcus_pneumoniae_nto = model.File( + "AMR_DNA-Streptococcus_pneumoniae.nto", format=BinaryFormat + ) + AMR_DNA_Acinetobacter_baumannii = model.File( + "AMR_DNA-Acinetobacter_baumannii", format=MixedCaseDNAFASTAFormat + ) + AMR_DNA_Campylobacter = model.File( + "AMR_DNA-Campylobacter", format=MixedCaseDNAFASTAFormat + ) + AMR_DNA_Clostridioides_difficile = model.File( + "AMR_DNA-Clostridioides_difficile", format=MixedCaseDNAFASTAFormat + ) + AMR_DNA_Enterococcus_faecalis = model.File( + "AMR_DNA-Enterococcus_faecalis", format=MixedCaseDNAFASTAFormat + ) + AMR_DNA_Enterococcus_faecium = model.File( + "AMR_DNA-Enterococcus_faecium", format=MixedCaseDNAFASTAFormat + ) + AMR_DNA_Escherichia = model.File( + "AMR_DNA-Escherichia", format=MixedCaseDNAFASTAFormat + ) + AMR_DNA_Klebsiella_oxytoca = model.File( + "AMR_DNA-Klebsiella_oxytoca", format=MixedCaseDNAFASTAFormat + ) + AMR_DNA_Neisseria_gonorrhoeae = model.File( + "AMR_DNA-Neisseria_gonorrhoeae", format=MixedCaseDNAFASTAFormat + ) + AMR_DNA_Salmonella = model.File( + "AMR_DNA-Salmonella", format=MixedCaseDNAFASTAFormat + ) + AMR_DNA_Staphylococcus_aureus = model.File( + "AMR_DNA-Staphylococcus_aureus", format=MixedCaseDNAFASTAFormat + ) + AMR_DNA_Streptococcus_pneumoniae = model.File( + "AMR_DNA-Streptococcus_pneumoniae", format=MixedCaseDNAFASTAFormat + ) + AMR_DNA_Acinetobacter_baumannii_tab = model.File( + "AMR_DNA-Acinetobacter_baumannii.tab", format=TextFormat + ) + AMR_DNA_Campylobacter_tab = model.File( + "AMR_DNA-Campylobacter.tab", format=TextFormat + ) + AMR_DNA_Clostridioides_difficile_tab = model.File( + "AMR_DNA-Clostridioides_difficile.tab", format=TextFormat + ) + AMR_DNA_Enterococcus_faecalis_tab = model.File( + "AMR_DNA-Enterococcus_faecalis.tab", format=TextFormat + ) + AMR_DNA_Enterococcus_faecium_tab = model.File( + "AMR_DNA-Enterococcus_faecium.tab", format=TextFormat + ) + AMR_DNA_Klebsiella_oxytoca_tab = model.File( + "AMR_DNA-Klebsiella_oxytoca.tab", format=TextFormat + ) + AMR_DNA_Neisseria_gonorrhoeae_tab = model.File( + "AMR_DNA-Neisseria_gonorrhoeae.tab", format=TextFormat + ) + AMR_DNA_Salmonella_tab = model.File("AMR_DNA-Salmonella.tab", format=TextFormat) + AMR_DNA_Staphylococcus_aureus_tab = model.File( + "AMR_DNA-Staphylococcus_aureus.tab", format=TextFormat + ) + AMR_DNA_Streptococcus_pneumoniae_tab = model.File( + "AMR_DNA-Streptococcus_pneumoniae.tab", format=TextFormat + ) + AMR_DNA_Escherichia_tab = model.File("AMR_DNA-Escherichia.tab", format=TextFormat) diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR.LIB b/q2_amr/amrfinderplus/types/tests/data/database/AMR.LIB new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR.LIB.h3f b/q2_amr/amrfinderplus/types/tests/data/database/AMR.LIB.h3f new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR.LIB.h3i b/q2_amr/amrfinderplus/types/tests/data/database/AMR.LIB.h3i new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR.LIB.h3m b/q2_amr/amrfinderplus/types/tests/data/database/AMR.LIB.h3m new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR.LIB.h3p b/q2_amr/amrfinderplus/types/tests/data/database/AMR.LIB.h3p new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMRProt b/q2_amr/amrfinderplus/types/tests/data/database/AMRProt new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMRProt-mutation.tab b/q2_amr/amrfinderplus/types/tests/data/database/AMRProt-mutation.tab new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMRProt-suppress b/q2_amr/amrfinderplus/types/tests/data/database/AMRProt-suppress new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMRProt-susceptible.tab b/q2_amr/amrfinderplus/types/tests/data/database/AMRProt-susceptible.tab new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMRProt.pdb b/q2_amr/amrfinderplus/types/tests/data/database/AMRProt.pdb new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMRProt.phr b/q2_amr/amrfinderplus/types/tests/data/database/AMRProt.phr new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMRProt.pin b/q2_amr/amrfinderplus/types/tests/data/database/AMRProt.pin new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMRProt.pjs b/q2_amr/amrfinderplus/types/tests/data/database/AMRProt.pjs new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMRProt.psq b/q2_amr/amrfinderplus/types/tests/data/database/AMRProt.psq new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMRProt.ptf b/q2_amr/amrfinderplus/types/tests/data/database/AMRProt.ptf new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMRProt.pto b/q2_amr/amrfinderplus/types/tests/data/database/AMRProt.pto new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS b/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.ndb b/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.ndb new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.nhr b/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.nhr new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.nin b/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.nin new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.njs b/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.njs new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.not b/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.not new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.nsq b/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.nsq new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.ntf b/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.ntf new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.nto b/q2_amr/amrfinderplus/types/tests/data/database/AMR_CDS.nto new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.ndb b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.ndb new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.nhr b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.nhr new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.nin b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.nin new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.njs b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.njs new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.not b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.not new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.nsq b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.nsq new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.ntf b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.ntf new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.nto b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.nto new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.tab b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Acinetobacter_baumannii.tab new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.ndb b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.ndb new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.nhr b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.nhr new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.nin b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.nin new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.njs b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.njs new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.not b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.not new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.nsq b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.nsq new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.ntf b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.ntf new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.nto b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.nto new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.tab b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Campylobacter.tab new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.ndb b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.ndb new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.nhr b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.nhr new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.nin b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.nin new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.njs b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.njs new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.not b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.not new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.nsq b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.nsq new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.ntf b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.ntf new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.nto b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.nto new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.tab b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Clostridioides_difficile.tab new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.ndb b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.ndb new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.nhr b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.nhr new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.nin b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.nin new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.njs b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.njs new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.not b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.not new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.nsq b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.nsq new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.ntf b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.ntf new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.nto b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.nto new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.tab b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecalis.tab new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.ndb b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.ndb new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.nhr b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.nhr new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.nin b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.nin new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.njs b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.njs new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.not b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.not new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.nsq b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.nsq new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.ntf b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.ntf new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.nto b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.nto new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.tab b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Enterococcus_faecium.tab new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.ndb b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.ndb new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.nhr b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.nhr new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.nin b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.nin new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.njs b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.njs new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.not b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.not new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.nsq b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.nsq new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.ntf b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.ntf new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.nto b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.nto new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.tab b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Escherichia.tab new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.ndb b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.ndb new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.nhr b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.nhr new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.nin b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.nin new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.njs b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.njs new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.not b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.not new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.nsq b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.nsq new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.ntf b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.ntf new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.nto b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.nto new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.tab b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Klebsiella_oxytoca.tab new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.ndb b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.ndb new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.nhr b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.nhr new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.nin b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.nin new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.njs b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.njs new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.not b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.not new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.nsq b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.nsq new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.ntf b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.ntf new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.nto b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.nto new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.tab b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Neisseria_gonorrhoeae.tab new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.ndb b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.ndb new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.nhr b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.nhr new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.nin b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.nin new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.njs b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.njs new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.not b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.not new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.nsq b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.nsq new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.ntf b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.ntf new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.nto b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.nto new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.tab b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Salmonella.tab new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.ndb b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.ndb new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.nhr b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.nhr new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.nin b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.nin new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.njs b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.njs new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.not b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.not new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.nsq b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.nsq new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.ntf b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.ntf new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.nto b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.nto new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.tab b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Staphylococcus_aureus.tab new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.ndb b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.ndb new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.nhr b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.nhr new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.nin b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.nin new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.njs b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.njs new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.not b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.not new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.nsq b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.nsq new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.ntf b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.ntf new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.nto b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.nto new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.tab b/q2_amr/amrfinderplus/types/tests/data/database/AMR_DNA-Streptococcus_pneumoniae.tab new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/changes.txt b/q2_amr/amrfinderplus/types/tests/data/database/changes.txt new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/database_format_version.txt b/q2_amr/amrfinderplus/types/tests/data/database/database_format_version.txt new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/fam.tab b/q2_amr/amrfinderplus/types/tests/data/database/fam.tab new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/taxgroup.tab b/q2_amr/amrfinderplus/types/tests/data/database/taxgroup.tab new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/data/database/version.txt b/q2_amr/amrfinderplus/types/tests/data/database/version.txt new file mode 100644 index 0000000..e69de29 diff --git a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py index ea15625..fdf7351 100644 --- a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py +++ b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py @@ -15,6 +15,6 @@ class TestAMRFinderPlusDatabaseTypesAndFormats(TestPluginBase): def test_amrfinderplus_database_directory_format_validate_positive(self): format = AMRFinderPlusDatabaseDirectoryFormat( - "/Users/rischv/Documents/data/amrfinder/database", mode="r" + self.get_data_path("database"), mode="r" ) format.validate() From facc75dfb823bc3384cda1d30e82c0783353fd06 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 2 Jul 2024 11:47:59 +0200 Subject: [PATCH 04/50] added test data to package data --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index c42b266..f59ab18 100644 --- a/setup.py +++ b/setup.py @@ -38,6 +38,9 @@ "data/*/*/*/*", "data/*/*/*/*/*", ], + "q2_amr.amrfinderplus.types.tests": [ + "data/*/*", + ], }, zip_safe=False, ) From f9481956d0044107c5525c45e6551911dad33eb0 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 2 Jul 2024 17:08:11 +0200 Subject: [PATCH 05/50] added amrprot.pot file to git --- q2_amr/amrfinderplus/types/tests/data/database/AMRProt.pot | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/AMRProt.pot diff --git a/q2_amr/amrfinderplus/types/tests/data/database/AMRProt.pot b/q2_amr/amrfinderplus/types/tests/data/database/AMRProt.pot new file mode 100644 index 0000000..e69de29 From 0670d7e36acfe1ce81441b8ceef99aeb569fb6d2 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Wed, 3 Jul 2024 16:56:35 +0200 Subject: [PATCH 06/50] added new annotation format --- q2_amr/amrfinderplus/types/__init__.py | 2 + q2_amr/amrfinderplus/types/_format.py | 44 ++++++++++++++ q2_amr/amrfinderplus/types/_type.py | 1 + .../tests/data/annotation/amr_annotation.tsv | 3 + .../annotation/amr_annotation_coordiantes.tsv | 3 + .../data/annotation/amr_annotation_wrong.tsv | 1 + .../tests/test_types_formats_transformers.py | 59 ++++++++++++++++++- q2_amr/plugin_setup.py | 2 + 8 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation.tsv create mode 100644 q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation_coordiantes.tsv create mode 100644 q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation_wrong.tsv diff --git a/q2_amr/amrfinderplus/types/__init__.py b/q2_amr/amrfinderplus/types/__init__.py index 31ef4ea..e7c374d 100644 --- a/q2_amr/amrfinderplus/types/__init__.py +++ b/q2_amr/amrfinderplus/types/__init__.py @@ -7,12 +7,14 @@ # ---------------------------------------------------------------------------- from q2_amr.amrfinderplus.types._format import ( AMRFinderPlusDatabaseDirectoryFormat, + ARMFinderPlusAnnotationFormat, BinaryFormat, TextFormat, ) __all__ = [ "AMRFinderPlusDatabaseDirectoryFormat", + "ARMFinderPlusAnnotationFormat", "TextFormat", "BinaryFormat", ] diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index 6b5ed9b..a01f206 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -5,7 +5,9 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +import pandas as pd from q2_types.feature_data import MixedCaseDNAFASTAFormat, ProteinFASTAFormat +from qiime2.core.exceptions import ValidationError from qiime2.plugin import model @@ -345,3 +347,45 @@ class AMRFinderPlusDatabaseDirectoryFormat(model.DirectoryFormat): "AMR_DNA-Streptococcus_pneumoniae.tab", format=TextFormat ) AMR_DNA_Escherichia_tab = model.File("AMR_DNA-Escherichia.tab", format=TextFormat) + + +class ARMFinderPlusAnnotationFormat(model.TextFileFormat): + def _validate(self, n_records=None): + header_coordinates = [ + "Protein identifier", + "Contig id", + "Start", + "Stop", + "Strand", + "Gene symbol", + "Sequence name", + "Scope", + "Element type", + "Element subtype", + "Class", + "Subclass", + "Method", + "Target length", + "Reference sequence length", + "% Coverage of reference sequence", + "% Identity to reference sequence", + "Alignment length", + "Accession of closest sequence", + "Name of closest sequence", + "HMM id", + "HMM description", + ] + header = header_coordinates[:1] + header_coordinates[5:] + header_obs = pd.read_csv(str(self), sep="\t", nrows=0).columns.tolist() + if header != header_obs and header_coordinates != header_obs: + raise ValidationError( + "Header line does not match ARMFinderPlusAnnotation format. Must " + "consist of the following values: " + + ", ".join(header_coordinates) + + ".\nWhile Contig id, Start, Stop and Strand are optional." + + ".\n\nFound instead: " + + ", ".join(header_obs) + ) + + def _validate_(self, level): + self._validate() diff --git a/q2_amr/amrfinderplus/types/_type.py b/q2_amr/amrfinderplus/types/_type.py index 680bfcf..c14e0e0 100644 --- a/q2_amr/amrfinderplus/types/_type.py +++ b/q2_amr/amrfinderplus/types/_type.py @@ -8,3 +8,4 @@ from qiime2.core.type import SemanticType AMRFinderPlusDatabase = SemanticType("AMRFinderPlusDatabase") +ARMFinderPlusAnnotation = SemanticType("ARMFinderPlusAnnotation") diff --git a/q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation.tsv b/q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation.tsv new file mode 100644 index 0000000..d69d0c2 --- /dev/null +++ b/q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase diff --git a/q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation_coordiantes.tsv b/q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation_coordiantes.tsv new file mode 100644 index 0000000..06adaea --- /dev/null +++ b/q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation_coordiantes.tsv @@ -0,0 +1,3 @@ +Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description +NA contig01 101 958 + blaTEM-156 class A beta-lactamase TEM-156 core AMR AMR BETA-LACTAM BETA-LACTAM ALLELEX 286 286 100.00 100.00 286 WP_061158039.1 class A beta-lactamase TEM-156 NA NA +NA contig02 1 1191 + blaPDC PDC family class C beta-lactamase core AMR AMR BETA-LACTAM CEPHALOSPORIN BLASTX 397 397 100.00 99.75 397 WP_061189306.1 class C beta-lactamase PDC-114 NA NA diff --git a/q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation_wrong.tsv b/q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation_wrong.tsv new file mode 100644 index 0000000..1f1fa8b --- /dev/null +++ b/q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation_wrong.tsv @@ -0,0 +1 @@ +Incorrect Header 1 Incorrect Header 2 Incorrect Header 3 diff --git a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py index fdf7351..b67646d 100644 --- a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py +++ b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py @@ -5,12 +5,16 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +from qiime2.core.exceptions import ValidationError from qiime2.plugin.testing import TestPluginBase -from q2_amr.amrfinderplus.types._format import AMRFinderPlusDatabaseDirectoryFormat +from q2_amr.amrfinderplus.types._format import ( + AMRFinderPlusDatabaseDirectoryFormat, + ARMFinderPlusAnnotationFormat, +) -class TestAMRFinderPlusDatabaseTypesAndFormats(TestPluginBase): +class TestAMRFinderPlusTypesAndFormats(TestPluginBase): package = "q2_amr.amrfinderplus.types.tests" def test_amrfinderplus_database_directory_format_validate_positive(self): @@ -18,3 +22,54 @@ def test_amrfinderplus_database_directory_format_validate_positive(self): self.get_data_path("database"), mode="r" ) format.validate() + + def test_amrfinderplus_annotation_format_validate_positive(self): + filepath = self.get_data_path("annotation/amr_annotation.tsv") + format = ARMFinderPlusAnnotationFormat(filepath, mode="r") + format.validate() + + def test_amrfinderplus_annotation_format_validate_positive_coordinates(self): + filepath = self.get_data_path("annotation/amr_annotation_coordiantes.tsv") + format = ARMFinderPlusAnnotationFormat(filepath, mode="r") + format.validate() + + def test_amrfinderplus_annotation_format_validation_error(self): + with self.assertRaises(ValidationError) as context: + path = self.get_data_path("annotation/amr_annotation_wrong.tsv") + format = ARMFinderPlusAnnotationFormat(path, mode="r") + format.validate() + + header_coordinates = [ + "Protein identifier", + "Contig id", + "Start", + "Stop", + "Strand", + "Gene symbol", + "Sequence name", + "Scope", + "Element type", + "Element subtype", + "Class", + "Subclass", + "Method", + "Target length", + "Reference sequence length", + "% Coverage of reference sequence", + "% Identity to reference sequence", + "Alignment length", + "Accession of closest sequence", + "Name of closest sequence", + "HMM id", + "HMM description", + ] + expected_message = ( + "Header line does not match ARMFinderPlusAnnotation format. Must " + "consist of the following values: " + + ", ".join(header_coordinates) + + ".\nWhile Contig id, Start, Stop and Strand are optional." + + "\n\nFound instead: " + + "Incorrect Header 1, Incorrect Header 2, Incorrect Header 3" + ) + + self.assertEqual(str(context.exception), expected_message) diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index 3341c9a..56c0664 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -30,6 +30,7 @@ from q2_amr import __version__ from q2_amr.amrfinderplus.types._format import ( AMRFinderPlusDatabaseDirectoryFormat, + ARMFinderPlusAnnotationFormat, BinaryFormat, TextFormat, ) @@ -1145,6 +1146,7 @@ AMRFinderPlusDatabaseDirectoryFormat, TextFormat, BinaryFormat, + ARMFinderPlusAnnotationFormat, ) importlib.import_module("q2_amr.card.types._transformer") From bfafdb8251412db675f737091f8410c2001fd8a7 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 4 Jul 2024 10:48:15 +0200 Subject: [PATCH 07/50] added sampledata and feature data dir fmts --- q2_amr/amrfinderplus/types/_format.py | 29 ++++++++++ q2_amr/amrfinderplus/types/_type.py | 9 +++- .../amr_annotation.tsv} | 0 .../amr_annotation.tsv | 0 .../amr_annotation.tsv} | 0 .../tests/test_types_formats_transformers.py | 54 +++++++++++++++++-- q2_amr/plugin_setup.py | 17 +++++- 7 files changed, 104 insertions(+), 5 deletions(-) rename q2_amr/amrfinderplus/types/tests/data/annotation/{amr_annotation_coordiantes.tsv => coordinates/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.tsv} (100%) rename q2_amr/amrfinderplus/types/tests/data/annotation/{ => no_coordinates/aa447c99-ecd9-4c4a-a53b-4df6999815dd}/amr_annotation.tsv (100%) rename q2_amr/amrfinderplus/types/tests/data/{annotation/amr_annotation_wrong.tsv => annotation_wrong/amr_annotation.tsv} (100%) diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index a01f206..f05545e 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -5,8 +5,11 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +import os + import pandas as pd from q2_types.feature_data import MixedCaseDNAFASTAFormat, ProteinFASTAFormat +from q2_types.per_sample_sequences._format import MultiDirValidationMixin from qiime2.core.exceptions import ValidationError from qiime2.plugin import model @@ -389,3 +392,29 @@ def _validate(self, n_records=None): def _validate_(self, level): self._validate() + + +class ARMFinderPlusAnnotationsDirFmt(MultiDirValidationMixin, model.DirectoryFormat): + tsv = model.FileCollection( + r".+amr_annotation.tsv$", format=ARMFinderPlusAnnotationFormat + ) + + @tsv.set_path_maker + def json_path_maker(self, sample_id, mag_id): + return f"{sample_id}/{mag_id}/amr_annotation.tsv$" + + def sample_dict(self): + sample_dict = {} + for sample in self.path.iterdir(): + mag_dict = {} + for mag in sample.iterdir(): + mag_dict[mag.name] = [ + os.path.join(mag, "amr_annotation.tsv"), + ] + sample_dict[sample.name] = mag_dict + return sample_dict + + +ARMFinderPlusAnnotationDirFmt = model.SingleFileDirectoryFormat( + "ARMFinderPlusAnnotationDirFmt", "amr_annotation.tsv", ARMFinderPlusAnnotationFormat +) diff --git a/q2_amr/amrfinderplus/types/_type.py b/q2_amr/amrfinderplus/types/_type.py index c14e0e0..214ac0d 100644 --- a/q2_amr/amrfinderplus/types/_type.py +++ b/q2_amr/amrfinderplus/types/_type.py @@ -5,7 +5,14 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +from q2_types.feature_data import FeatureData +from q2_types.sample_data import SampleData from qiime2.core.type import SemanticType AMRFinderPlusDatabase = SemanticType("AMRFinderPlusDatabase") -ARMFinderPlusAnnotation = SemanticType("ARMFinderPlusAnnotation") +ARMFinderPlusAnnotations = SemanticType( + "ARMFinderPlusAnnotations", variant_of=SampleData.field["type"] +) +ARMFinderPlusAnnotation = SemanticType( + "ARMFinderPlusAnnotation", variant_of=FeatureData.field["type"] +) diff --git a/q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation_coordiantes.tsv b/q2_amr/amrfinderplus/types/tests/data/annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.tsv similarity index 100% rename from q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation_coordiantes.tsv rename to q2_amr/amrfinderplus/types/tests/data/annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.tsv diff --git a/q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation.tsv b/q2_amr/amrfinderplus/types/tests/data/annotation/no_coordinates/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.tsv similarity index 100% rename from q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation.tsv rename to q2_amr/amrfinderplus/types/tests/data/annotation/no_coordinates/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.tsv diff --git a/q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation_wrong.tsv b/q2_amr/amrfinderplus/types/tests/data/annotation_wrong/amr_annotation.tsv similarity index 100% rename from q2_amr/amrfinderplus/types/tests/data/annotation/amr_annotation_wrong.tsv rename to q2_amr/amrfinderplus/types/tests/data/annotation_wrong/amr_annotation.tsv diff --git a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py index b67646d..9ea964b 100644 --- a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py +++ b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py @@ -5,12 +5,16 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +import os + from qiime2.core.exceptions import ValidationError from qiime2.plugin.testing import TestPluginBase from q2_amr.amrfinderplus.types._format import ( AMRFinderPlusDatabaseDirectoryFormat, + ARMFinderPlusAnnotationDirFmt, ARMFinderPlusAnnotationFormat, + ARMFinderPlusAnnotationsDirFmt, ) @@ -24,18 +28,25 @@ def test_amrfinderplus_database_directory_format_validate_positive(self): format.validate() def test_amrfinderplus_annotation_format_validate_positive(self): - filepath = self.get_data_path("annotation/amr_annotation.tsv") + filepath = self.get_data_path( + "annotation/no_coordinates/aa447c99-ecd9-4c4a-a53b-4df6999815dd" + "/amr_annotation.tsv" + ) + format = ARMFinderPlusAnnotationFormat(filepath, mode="r") format.validate() def test_amrfinderplus_annotation_format_validate_positive_coordinates(self): - filepath = self.get_data_path("annotation/amr_annotation_coordiantes.tsv") + filepath = self.get_data_path( + "annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d" + "/amr_annotation.tsv" + ) format = ARMFinderPlusAnnotationFormat(filepath, mode="r") format.validate() def test_amrfinderplus_annotation_format_validation_error(self): with self.assertRaises(ValidationError) as context: - path = self.get_data_path("annotation/amr_annotation_wrong.tsv") + path = self.get_data_path("annotation_wrong/amr_annotation.tsv") format = ARMFinderPlusAnnotationFormat(path, mode="r") format.validate() @@ -73,3 +84,40 @@ def test_amrfinderplus_annotation_format_validation_error(self): ) self.assertEqual(str(context.exception), expected_message) + + def test_amrfinderplus_annotations_directory_format_sample_dict(self): + dirpath = self.get_data_path("annotation") + annotations = ARMFinderPlusAnnotationsDirFmt(dirpath, mode="r") + + obs = annotations.sample_dict() + + exp = { + "coordinates": { + "e026af61-d911-4de3-a957-7e8bf837f30d": [ + os.path.join( + annotations.path, + "coordinates", + "e026af61-d911-4de3-a957-7e8bf837f30d", + "amr_annotation.tsv", + ), + ] + }, + "no_coordinates": { + "aa447c99-ecd9-4c4a-a53b-4df6999815dd": [ + os.path.join( + annotations.path, + "no_coordinates", + "aa447c99-ecd9-4c4a-a53b-4df6999815dd", + "amr_annotation.tsv", + ), + ], + }, + } + self.assertEqual(obs, exp) + + def test_amrfinderplus_annotation_directory_format(self): + dirpath = self.get_data_path( + "annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d" + ) + annotations = ARMFinderPlusAnnotationDirFmt(dirpath, mode="r") + assert isinstance(annotations, ARMFinderPlusAnnotationDirFmt) diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index 56c0664..f8f2fe0 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -7,6 +7,7 @@ # ---------------------------------------------------------------------------- import importlib +from q2_types.feature_data import FeatureData from q2_types.feature_table import FeatureTable, Frequency from q2_types.per_sample_sequences import ( MAGs, @@ -30,11 +31,17 @@ from q2_amr import __version__ from q2_amr.amrfinderplus.types._format import ( AMRFinderPlusDatabaseDirectoryFormat, + ARMFinderPlusAnnotationDirFmt, ARMFinderPlusAnnotationFormat, + ARMFinderPlusAnnotationsDirFmt, BinaryFormat, TextFormat, ) -from q2_amr.amrfinderplus.types._type import AMRFinderPlusDatabase +from q2_amr.amrfinderplus.types._type import ( + AMRFinderPlusDatabase, + ARMFinderPlusAnnotation, + ARMFinderPlusAnnotations, +) from q2_amr.card.database import fetch_card_db from q2_amr.card.heatmap import heatmap from q2_amr.card.kmer import ( @@ -1119,6 +1126,14 @@ AMRFinderPlusDatabase, artifact_format=AMRFinderPlusDatabaseDirectoryFormat, ) +plugin.register_semantic_type_to_format( + SampleData[ARMFinderPlusAnnotations], + artifact_format=ARMFinderPlusAnnotationsDirFmt, +) +plugin.register_semantic_type_to_format( + FeatureData[ARMFinderPlusAnnotation], + artifact_format=ARMFinderPlusAnnotationDirFmt, +) plugin.register_formats( CARDKmerDatabaseDirectoryFormat, CARDKmerJSONFormat, From bb9220c0db339970fcff22386ec3ab3b4d79e843 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 4 Jul 2024 11:37:22 +0200 Subject: [PATCH 08/50] register all formats --- q2_amr/amrfinderplus/types/__init__.py | 4 ++++ q2_amr/plugin_setup.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/q2_amr/amrfinderplus/types/__init__.py b/q2_amr/amrfinderplus/types/__init__.py index e7c374d..5a6cbb9 100644 --- a/q2_amr/amrfinderplus/types/__init__.py +++ b/q2_amr/amrfinderplus/types/__init__.py @@ -7,7 +7,9 @@ # ---------------------------------------------------------------------------- from q2_amr.amrfinderplus.types._format import ( AMRFinderPlusDatabaseDirectoryFormat, + ARMFinderPlusAnnotationDirFmt, ARMFinderPlusAnnotationFormat, + ARMFinderPlusAnnotationsDirFmt, BinaryFormat, TextFormat, ) @@ -15,6 +17,8 @@ __all__ = [ "AMRFinderPlusDatabaseDirectoryFormat", "ARMFinderPlusAnnotationFormat", + "ARMFinderPlusAnnotationsDirFmt", + "ARMFinderPlusAnnotationDirFmt", "TextFormat", "BinaryFormat", ] diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index f8f2fe0..e6f0095 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -1162,6 +1162,8 @@ TextFormat, BinaryFormat, ARMFinderPlusAnnotationFormat, + ARMFinderPlusAnnotationsDirFmt, + ARMFinderPlusAnnotationDirFmt, ) importlib.import_module("q2_amr.card.types._transformer") From 317e5cb12504c782b9f8e5e070616a9a4c4ceacc Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 4 Jul 2024 12:09:33 +0200 Subject: [PATCH 09/50] using filecollections for the database format --- q2_amr/amrfinderplus/types/_format.py | 356 +++------------------ q2_amr/amrfinderplus/types/_transformer.py | 7 - 2 files changed, 38 insertions(+), 325 deletions(-) delete mode 100644 q2_amr/amrfinderplus/types/_transformer.py diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index 6b5ed9b..b8f8da8 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -20,328 +20,48 @@ def _validate_(self, level): class AMRFinderPlusDatabaseDirectoryFormat(model.DirectoryFormat): - AMR_LIB = model.File("AMR.LIB", format=TextFormat) - AMRProt = model.File("AMRProt", format=ProteinFASTAFormat) - AMRProt_mutation = model.File("AMRProt-mutation.tab", format=TextFormat) - AMRProt_suppress = model.File("AMRProt-suppress", format=TextFormat) - AMRProt_susceptible = model.File("AMRProt-susceptible.tab", format=TextFormat) + amr_lib = model.File("AMR.LIB", format=TextFormat) + amr_lib_comp = model.FileCollection(r"^AMR\.LIB\.h3.$", format=BinaryFormat) + amrprot = model.File("AMRProt", format=ProteinFASTAFormat) + amrprot_blast = model.FileCollection(r"^AMRProt\.p..$", format=BinaryFormat) + amrprot_mutation = model.File("AMRProt-mutation.tab", format=TextFormat) + amrprot_suppress = model.File("AMRProt-suppress", format=TextFormat) + amrprot_susceptible = model.File("AMRProt-susceptible.tab", format=TextFormat) changes = model.File("changes.txt", format=TextFormat) db_version = model.File("database_format_version.txt", format=TextFormat) fam = model.File("fam.tab", format=TextFormat) taxgroup = model.File("taxgroup.tab", format=TextFormat) version = model.File("version.txt", format=TextFormat) - AMR_CDS = model.File("AMR_CDS", format=MixedCaseDNAFASTAFormat) - AMR_LIB_h3f = model.File("AMR.LIB.h3f", format=BinaryFormat) - AMR_LIB_h3i = model.File("AMR.LIB.h3i", format=BinaryFormat) - AMR_LIB_h3m = model.File("AMR.LIB.h3m", format=BinaryFormat) - AMR_LIB_h3p = model.File("AMR.LIB.h3p", format=BinaryFormat) - AMRProt_pdb = model.File("AMRProt.pdb", format=BinaryFormat) - AMRProt_phr = model.File("AMRProt.phr", format=BinaryFormat) - AMRProt_pin = model.File("AMRProt.pin", format=BinaryFormat) - AMRProt_pjs = model.File("AMRProt.pjs", format=BinaryFormat) - AMRProt_pot = model.File("AMRProt.pot", format=BinaryFormat) - AMRProt_psq = model.File("AMRProt.psq", format=BinaryFormat) - AMRProt_ptf = model.File("AMRProt.ptf", format=BinaryFormat) - AMRProt_pto = model.File("AMRProt.pto", format=BinaryFormat) - AMR_CDS_ndb = model.File("AMR_CDS.ndb", format=BinaryFormat) - AMR_CDS_nhr = model.File("AMR_CDS.nhr", format=BinaryFormat) - AMR_CDS_nin = model.File("AMR_CDS.nin", format=BinaryFormat) - AMR_CDS_njs = model.File("AMR_CDS.njs", format=BinaryFormat) - AMR_CDS_not = model.File("AMR_CDS.not", format=BinaryFormat) - AMR_CDS_nsq = model.File("AMR_CDS.nsq", format=BinaryFormat) - AMR_CDS_ntf = model.File("AMR_CDS.ntf", format=BinaryFormat) - AMR_CDS_nto = model.File("AMR_CDS.nto", format=BinaryFormat) - AMR_DNA_Acinetobacter_baumannii_ndb = model.File( - "AMR_DNA-Acinetobacter_baumannii.ndb", format=BinaryFormat + amr_dna = model.FileCollection( + r"^AMR_DNA-[a-zA-Z_]+$", format=MixedCaseDNAFASTAFormat ) - AMR_DNA_Acinetobacter_baumannii_nhr = model.File( - "AMR_DNA-Acinetobacter_baumannii.nhr", format=BinaryFormat + amr_dna_comp = model.FileCollection( + r"^AMR_DNA-[a-zA-Z_]+\.n..$", format=BinaryFormat ) - AMR_DNA_Acinetobacter_baumannii_nin = model.File( - "AMR_DNA-Acinetobacter_baumannii.nin", format=BinaryFormat - ) - AMR_DNA_Acinetobacter_baumannii_njs = model.File( - "AMR_DNA-Acinetobacter_baumannii.njs", format=BinaryFormat - ) - AMR_DNA_Acinetobacter_baumannii_not = model.File( - "AMR_DNA-Acinetobacter_baumannii.not", format=BinaryFormat - ) - AMR_DNA_Acinetobacter_baumannii_nsq = model.File( - "AMR_DNA-Acinetobacter_baumannii.nsq", format=BinaryFormat - ) - AMR_DNA_Acinetobacter_baumannii_ntf = model.File( - "AMR_DNA-Acinetobacter_baumannii.ntf", format=BinaryFormat - ) - AMR_DNA_Acinetobacter_baumannii_nto = model.File( - "AMR_DNA-Acinetobacter_baumannii.nto", format=BinaryFormat - ) - AMR_DNA_Campylobacter_ndb = model.File( - "AMR_DNA-Campylobacter.ndb", format=BinaryFormat - ) - AMR_DNA_Campylobacter_nhr = model.File( - "AMR_DNA-Campylobacter.nhr", format=BinaryFormat - ) - AMR_DNA_Campylobacter_nin = model.File( - "AMR_DNA-Campylobacter.nin", format=BinaryFormat - ) - AMR_DNA_Campylobacter_njs = model.File( - "AMR_DNA-Campylobacter.njs", format=BinaryFormat - ) - AMR_DNA_Campylobacter_not = model.File( - "AMR_DNA-Campylobacter.not", format=BinaryFormat - ) - AMR_DNA_Campylobacter_nsq = model.File( - "AMR_DNA-Campylobacter.nsq", format=BinaryFormat - ) - AMR_DNA_Campylobacter_ntf = model.File( - "AMR_DNA-Campylobacter.ntf", format=BinaryFormat - ) - AMR_DNA_Campylobacter_nto = model.File( - "AMR_DNA-Campylobacter.nto", format=BinaryFormat - ) - AMR_DNA_Clostridioides_difficile_ndb = model.File( - "AMR_DNA-Clostridioides_difficile.ndb", format=BinaryFormat - ) - AMR_DNA_Clostridioides_difficile_nhr = model.File( - "AMR_DNA-Clostridioides_difficile.nhr", format=BinaryFormat - ) - AMR_DNA_Clostridioides_difficile_nin = model.File( - "AMR_DNA-Clostridioides_difficile.nin", format=BinaryFormat - ) - AMR_DNA_Clostridioides_difficile_njs = model.File( - "AMR_DNA-Clostridioides_difficile.njs", format=BinaryFormat - ) - AMR_DNA_Clostridioides_difficile_not = model.File( - "AMR_DNA-Clostridioides_difficile.not", format=BinaryFormat - ) - AMR_DNA_Clostridioides_difficile_nsq = model.File( - "AMR_DNA-Clostridioides_difficile.nsq", format=BinaryFormat - ) - AMR_DNA_Clostridioides_difficile_ntf = model.File( - "AMR_DNA-Clostridioides_difficile.ntf", format=BinaryFormat - ) - AMR_DNA_Clostridioides_difficile_nto = model.File( - "AMR_DNA-Clostridioides_difficile.nto", format=BinaryFormat - ) - AMR_DNA_Enterococcus_faecalis_ndb = model.File( - "AMR_DNA-Enterococcus_faecalis.ndb", format=BinaryFormat - ) - AMR_DNA_Enterococcus_faecalis_nhr = model.File( - "AMR_DNA-Enterococcus_faecalis.nhr", format=BinaryFormat - ) - AMR_DNA_Enterococcus_faecalis_nin = model.File( - "AMR_DNA-Enterococcus_faecalis.nin", format=BinaryFormat - ) - AMR_DNA_Enterococcus_faecalis_njs = model.File( - "AMR_DNA-Enterococcus_faecalis.njs", format=BinaryFormat - ) - AMR_DNA_Enterococcus_faecalis_not = model.File( - "AMR_DNA-Enterococcus_faecalis.not", format=BinaryFormat - ) - AMR_DNA_Enterococcus_faecalis_nsq = model.File( - "AMR_DNA-Enterococcus_faecalis.nsq", format=BinaryFormat - ) - AMR_DNA_Enterococcus_faecalis_ntf = model.File( - "AMR_DNA-Enterococcus_faecalis.ntf", format=BinaryFormat - ) - AMR_DNA_Enterococcus_faecalis_nto = model.File( - "AMR_DNA-Enterococcus_faecalis.nto", format=BinaryFormat - ) - AMR_DNA_Enterococcus_faecium_ndb = model.File( - "AMR_DNA-Enterococcus_faecium.ndb", format=BinaryFormat - ) - AMR_DNA_Enterococcus_faecium_nhr = model.File( - "AMR_DNA-Enterococcus_faecium.nhr", format=BinaryFormat - ) - AMR_DNA_Enterococcus_faecium_nin = model.File( - "AMR_DNA-Enterococcus_faecium.nin", format=BinaryFormat - ) - AMR_DNA_Enterococcus_faecium_njs = model.File( - "AMR_DNA-Enterococcus_faecium.njs", format=BinaryFormat - ) - AMR_DNA_Enterococcus_faecium_not = model.File( - "AMR_DNA-Enterococcus_faecium.not", format=BinaryFormat - ) - AMR_DNA_Enterococcus_faecium_nsq = model.File( - "AMR_DNA-Enterococcus_faecium.nsq", format=BinaryFormat - ) - AMR_DNA_Enterococcus_faecium_ntf = model.File( - "AMR_DNA-Enterococcus_faecium.ntf", format=BinaryFormat - ) - AMR_DNA_Enterococcus_faecium_nto = model.File( - "AMR_DNA-Enterococcus_faecium.nto", format=BinaryFormat - ) - AMR_DNA_Escherichia_ndb = model.File("AMR_DNA-Escherichia.ndb", format=BinaryFormat) - AMR_DNA_Escherichia_nhr = model.File("AMR_DNA-Escherichia.nhr", format=BinaryFormat) - AMR_DNA_Escherichia_nin = model.File("AMR_DNA-Escherichia.nin", format=BinaryFormat) - AMR_DNA_Escherichia_njs = model.File("AMR_DNA-Escherichia.njs", format=BinaryFormat) - AMR_DNA_Escherichia_not = model.File("AMR_DNA-Escherichia.not", format=BinaryFormat) - AMR_DNA_Escherichia_nsq = model.File("AMR_DNA-Escherichia.nsq", format=BinaryFormat) - AMR_DNA_Escherichia_ntf = model.File("AMR_DNA-Escherichia.ntf", format=BinaryFormat) - AMR_DNA_Escherichia_nto = model.File("AMR_DNA-Escherichia.nto", format=BinaryFormat) - AMR_DNA_Klebsiella_oxytoca_ndb = model.File( - "AMR_DNA-Klebsiella_oxytoca.ndb", format=BinaryFormat - ) - AMR_DNA_Klebsiella_oxytoca_nhr = model.File( - "AMR_DNA-Klebsiella_oxytoca.nhr", format=BinaryFormat - ) - AMR_DNA_Klebsiella_oxytoca_nin = model.File( - "AMR_DNA-Klebsiella_oxytoca.nin", format=BinaryFormat - ) - AMR_DNA_Klebsiella_oxytoca_njs = model.File( - "AMR_DNA-Klebsiella_oxytoca.njs", format=BinaryFormat - ) - AMR_DNA_Klebsiella_oxytoca_not = model.File( - "AMR_DNA-Klebsiella_oxytoca.not", format=BinaryFormat - ) - AMR_DNA_Klebsiella_oxytoca_nsq = model.File( - "AMR_DNA-Klebsiella_oxytoca.nsq", format=BinaryFormat - ) - AMR_DNA_Klebsiella_oxytoca_ntf = model.File( - "AMR_DNA-Klebsiella_oxytoca.ntf", format=BinaryFormat - ) - AMR_DNA_Klebsiella_oxytoca_nto = model.File( - "AMR_DNA-Klebsiella_oxytoca.nto", format=BinaryFormat - ) - AMR_DNA_Neisseria_gonorrhoeae_ndb = model.File( - "AMR_DNA-Neisseria_gonorrhoeae.ndb", format=BinaryFormat - ) - AMR_DNA_Neisseria_gonorrhoeae_nhr = model.File( - "AMR_DNA-Neisseria_gonorrhoeae.nhr", format=BinaryFormat - ) - AMR_DNA_Neisseria_gonorrhoeae_nin = model.File( - "AMR_DNA-Neisseria_gonorrhoeae.nin", format=BinaryFormat - ) - AMR_DNA_Neisseria_gonorrhoeae_njs = model.File( - "AMR_DNA-Neisseria_gonorrhoeae.njs", format=BinaryFormat - ) - AMR_DNA_Neisseria_gonorrhoeae_not = model.File( - "AMR_DNA-Neisseria_gonorrhoeae.not", format=BinaryFormat - ) - AMR_DNA_Neisseria_gonorrhoeae_nsq = model.File( - "AMR_DNA-Neisseria_gonorrhoeae.nsq", format=BinaryFormat - ) - AMR_DNA_Neisseria_gonorrhoeae_ntf = model.File( - "AMR_DNA-Neisseria_gonorrhoeae.ntf", format=BinaryFormat - ) - AMR_DNA_Neisseria_gonorrhoeae_nto = model.File( - "AMR_DNA-Neisseria_gonorrhoeae.nto", format=BinaryFormat - ) - AMR_DNA_Salmonella_nhr = model.File("AMR_DNA-Salmonella.nhr", format=BinaryFormat) - AMR_DNA_Salmonella_ndb = model.File("AMR_DNA-Salmonella.ndb", format=BinaryFormat) - AMR_DNA_Salmonella_nin = model.File("AMR_DNA-Salmonella.nin", format=BinaryFormat) - AMR_DNA_Salmonella_njs = model.File("AMR_DNA-Salmonella.njs", format=BinaryFormat) - AMR_DNA_Salmonella_not = model.File("AMR_DNA-Salmonella.not", format=BinaryFormat) - AMR_DNA_Salmonella_nsq = model.File("AMR_DNA-Salmonella.nsq", format=BinaryFormat) - AMR_DNA_Salmonella_ntf = model.File("AMR_DNA-Salmonella.ntf", format=BinaryFormat) - AMR_DNA_Salmonella_nto = model.File("AMR_DNA-Salmonella.nto", format=BinaryFormat) - AMR_DNA_Staphylococcus_aureus_ndb = model.File( - "AMR_DNA-Staphylococcus_aureus.ndb", format=BinaryFormat - ) - AMR_DNA_Staphylococcus_aureus_nhr = model.File( - "AMR_DNA-Staphylococcus_aureus.nhr", format=BinaryFormat - ) - AMR_DNA_Staphylococcus_aureus_nin = model.File( - "AMR_DNA-Staphylococcus_aureus.nin", format=BinaryFormat - ) - AMR_DNA_Staphylococcus_aureus_njs = model.File( - "AMR_DNA-Staphylococcus_aureus.njs", format=BinaryFormat - ) - AMR_DNA_Staphylococcus_aureus_not = model.File( - "AMR_DNA-Staphylococcus_aureus.not", format=BinaryFormat - ) - AMR_DNA_Staphylococcus_aureus_nsq = model.File( - "AMR_DNA-Staphylococcus_aureus.nsq", format=BinaryFormat - ) - AMR_DNA_Staphylococcus_aureus_ntf = model.File( - "AMR_DNA-Staphylococcus_aureus.ntf", format=BinaryFormat - ) - AMR_DNA_Staphylococcus_aureus_nto = model.File( - "AMR_DNA-Staphylococcus_aureus.nto", format=BinaryFormat - ) - AMR_DNA_Streptococcus_pneumoniae_ndb = model.File( - "AMR_DNA-Streptococcus_pneumoniae.ndb", format=BinaryFormat - ) - AMR_DNA_Streptococcus_pneumoniae_nhr = model.File( - "AMR_DNA-Streptococcus_pneumoniae.nhr", format=BinaryFormat - ) - AMR_DNA_Streptococcus_pneumoniae_nin = model.File( - "AMR_DNA-Streptococcus_pneumoniae.nin", format=BinaryFormat - ) - AMR_DNA_Streptococcus_pneumoniae_njs = model.File( - "AMR_DNA-Streptococcus_pneumoniae.njs", format=BinaryFormat - ) - AMR_DNA_Streptococcus_pneumoniae_not = model.File( - "AMR_DNA-Streptococcus_pneumoniae.not", format=BinaryFormat - ) - AMR_DNA_Streptococcus_pneumoniae_nsq = model.File( - "AMR_DNA-Streptococcus_pneumoniae.nsq", format=BinaryFormat - ) - AMR_DNA_Streptococcus_pneumoniae_ntf = model.File( - "AMR_DNA-Streptococcus_pneumoniae.ntf", format=BinaryFormat - ) - AMR_DNA_Streptococcus_pneumoniae_nto = model.File( - "AMR_DNA-Streptococcus_pneumoniae.nto", format=BinaryFormat - ) - AMR_DNA_Acinetobacter_baumannii = model.File( - "AMR_DNA-Acinetobacter_baumannii", format=MixedCaseDNAFASTAFormat - ) - AMR_DNA_Campylobacter = model.File( - "AMR_DNA-Campylobacter", format=MixedCaseDNAFASTAFormat - ) - AMR_DNA_Clostridioides_difficile = model.File( - "AMR_DNA-Clostridioides_difficile", format=MixedCaseDNAFASTAFormat - ) - AMR_DNA_Enterococcus_faecalis = model.File( - "AMR_DNA-Enterococcus_faecalis", format=MixedCaseDNAFASTAFormat - ) - AMR_DNA_Enterococcus_faecium = model.File( - "AMR_DNA-Enterococcus_faecium", format=MixedCaseDNAFASTAFormat - ) - AMR_DNA_Escherichia = model.File( - "AMR_DNA-Escherichia", format=MixedCaseDNAFASTAFormat - ) - AMR_DNA_Klebsiella_oxytoca = model.File( - "AMR_DNA-Klebsiella_oxytoca", format=MixedCaseDNAFASTAFormat - ) - AMR_DNA_Neisseria_gonorrhoeae = model.File( - "AMR_DNA-Neisseria_gonorrhoeae", format=MixedCaseDNAFASTAFormat - ) - AMR_DNA_Salmonella = model.File( - "AMR_DNA-Salmonella", format=MixedCaseDNAFASTAFormat - ) - AMR_DNA_Staphylococcus_aureus = model.File( - "AMR_DNA-Staphylococcus_aureus", format=MixedCaseDNAFASTAFormat - ) - AMR_DNA_Streptococcus_pneumoniae = model.File( - "AMR_DNA-Streptococcus_pneumoniae", format=MixedCaseDNAFASTAFormat - ) - AMR_DNA_Acinetobacter_baumannii_tab = model.File( - "AMR_DNA-Acinetobacter_baumannii.tab", format=TextFormat - ) - AMR_DNA_Campylobacter_tab = model.File( - "AMR_DNA-Campylobacter.tab", format=TextFormat - ) - AMR_DNA_Clostridioides_difficile_tab = model.File( - "AMR_DNA-Clostridioides_difficile.tab", format=TextFormat - ) - AMR_DNA_Enterococcus_faecalis_tab = model.File( - "AMR_DNA-Enterococcus_faecalis.tab", format=TextFormat - ) - AMR_DNA_Enterococcus_faecium_tab = model.File( - "AMR_DNA-Enterococcus_faecium.tab", format=TextFormat - ) - AMR_DNA_Klebsiella_oxytoca_tab = model.File( - "AMR_DNA-Klebsiella_oxytoca.tab", format=TextFormat - ) - AMR_DNA_Neisseria_gonorrhoeae_tab = model.File( - "AMR_DNA-Neisseria_gonorrhoeae.tab", format=TextFormat - ) - AMR_DNA_Salmonella_tab = model.File("AMR_DNA-Salmonella.tab", format=TextFormat) - AMR_DNA_Staphylococcus_aureus_tab = model.File( - "AMR_DNA-Staphylococcus_aureus.tab", format=TextFormat - ) - AMR_DNA_Streptococcus_pneumoniae_tab = model.File( - "AMR_DNA-Streptococcus_pneumoniae.tab", format=TextFormat - ) - AMR_DNA_Escherichia_tab = model.File("AMR_DNA-Escherichia.tab", format=TextFormat) + amr_dna_tab = model.FileCollection(r"^AMR_DNA-[a-zA-Z_]+\.tab$", format=TextFormat) + amr_cds_comp = model.FileCollection(r"^AMR_CDS\.n..$", format=BinaryFormat) + amr_cds = model.File("AMR_CDS", format=MixedCaseDNAFASTAFormat) + + @amr_lib_comp.set_path_maker + def amr_lib_comp_path_maker(self): + return r"^AMR\.LIB\.h3.$" + + @amrprot_blast.set_path_maker + def amrprot_blast_path_maker(self): + return r"^AMRProt\.p..$" + + @amr_dna.set_path_maker + def amr_dna_path_maker(self): + return r"^AMR_DNA-[a-zA-Z_]+$" + + @amr_dna_comp.set_path_maker + def amr_dna_comp_path_maker(self): + return r"^AMR_DNA-[a-zA-Z_]+\.n..$" + + @amr_cds_comp.set_path_maker + def amr_cds_comp_path_maker(self): + return r"^AMR_CDS\.n..$" + + @amr_dna_tab.set_path_maker + def amr_dna_tab_path_maker(self): + return r"^AMR_DNA-[a-zA-Z_]+\.tab$" diff --git a/q2_amr/amrfinderplus/types/_transformer.py b/q2_amr/amrfinderplus/types/_transformer.py deleted file mode 100644 index bc9c3d2..0000000 --- a/q2_amr/amrfinderplus/types/_transformer.py +++ /dev/null @@ -1,7 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2019-2023, QIIME 2 development team. -# -# Distributed under the terms of the Modified BSD License. -# -# The full license is in the file LICENSE, distributed with this software. -# ---------------------------------------------------------------------------- From 0bf7f205047f7d2d87e7abe7ce0d07b035e54a1a Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 4 Jul 2024 13:12:04 +0200 Subject: [PATCH 10/50] renamed to dirfmt --- q2_amr/amrfinderplus/types/__init__.py | 4 ++-- q2_amr/amrfinderplus/types/_format.py | 2 +- .../types/tests/test_types_formats_transformers.py | 6 ++---- q2_amr/plugin_setup.py | 6 +++--- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/q2_amr/amrfinderplus/types/__init__.py b/q2_amr/amrfinderplus/types/__init__.py index 31ef4ea..1e2e7e1 100644 --- a/q2_amr/amrfinderplus/types/__init__.py +++ b/q2_amr/amrfinderplus/types/__init__.py @@ -6,13 +6,13 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- from q2_amr.amrfinderplus.types._format import ( - AMRFinderPlusDatabaseDirectoryFormat, + AMRFinderPlusDatabaseDirFmt, BinaryFormat, TextFormat, ) __all__ = [ - "AMRFinderPlusDatabaseDirectoryFormat", + "AMRFinderPlusDatabaseDirFmt", "TextFormat", "BinaryFormat", ] diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index b8f8da8..7f98c71 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -19,7 +19,7 @@ def _validate_(self, level): pass -class AMRFinderPlusDatabaseDirectoryFormat(model.DirectoryFormat): +class AMRFinderPlusDatabaseDirFmt(model.DirectoryFormat): amr_lib = model.File("AMR.LIB", format=TextFormat) amr_lib_comp = model.FileCollection(r"^AMR\.LIB\.h3.$", format=BinaryFormat) amrprot = model.File("AMRProt", format=ProteinFASTAFormat) diff --git a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py index fdf7351..f413052 100644 --- a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py +++ b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py @@ -7,14 +7,12 @@ # ---------------------------------------------------------------------------- from qiime2.plugin.testing import TestPluginBase -from q2_amr.amrfinderplus.types._format import AMRFinderPlusDatabaseDirectoryFormat +from q2_amr.amrfinderplus.types._format import AMRFinderPlusDatabaseDirFmt class TestAMRFinderPlusDatabaseTypesAndFormats(TestPluginBase): package = "q2_amr.amrfinderplus.types.tests" def test_amrfinderplus_database_directory_format_validate_positive(self): - format = AMRFinderPlusDatabaseDirectoryFormat( - self.get_data_path("database"), mode="r" - ) + format = AMRFinderPlusDatabaseDirFmt(self.get_data_path("database"), mode="r") format.validate() diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index 3341c9a..828622b 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -29,7 +29,7 @@ from q2_amr import __version__ from q2_amr.amrfinderplus.types._format import ( - AMRFinderPlusDatabaseDirectoryFormat, + AMRFinderPlusDatabaseDirFmt, BinaryFormat, TextFormat, ) @@ -1116,7 +1116,7 @@ ) plugin.register_semantic_type_to_format( AMRFinderPlusDatabase, - artifact_format=AMRFinderPlusDatabaseDirectoryFormat, + artifact_format=AMRFinderPlusDatabaseDirFmt, ) plugin.register_formats( CARDKmerDatabaseDirectoryFormat, @@ -1142,7 +1142,7 @@ CARDReadsKmerAnalysisJSONFormat, CARDReadsGeneKmerAnalysisDirectoryFormat, CARDReadsAlleleKmerAnalysisDirectoryFormat, - AMRFinderPlusDatabaseDirectoryFormat, + AMRFinderPlusDatabaseDirFmt, TextFormat, BinaryFormat, ) From 8378b45cd57cdc8e68a1a3c6f7ef266136e4af7e Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 4 Jul 2024 14:14:42 +0200 Subject: [PATCH 11/50] overwrite all pathmakers with code from busco moshpit --- q2_amr/amrfinderplus/types/_format.py | 32 +++++++++------------------ 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index 7f98c71..71af13e 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -19,6 +19,10 @@ def _validate_(self, level): pass +def _path_maker(name): + return str(name) + + class AMRFinderPlusDatabaseDirFmt(model.DirectoryFormat): amr_lib = model.File("AMR.LIB", format=TextFormat) amr_lib_comp = model.FileCollection(r"^AMR\.LIB\.h3.$", format=BinaryFormat) @@ -42,26 +46,10 @@ class AMRFinderPlusDatabaseDirFmt(model.DirectoryFormat): amr_cds_comp = model.FileCollection(r"^AMR_CDS\.n..$", format=BinaryFormat) amr_cds = model.File("AMR_CDS", format=MixedCaseDNAFASTAFormat) - @amr_lib_comp.set_path_maker - def amr_lib_comp_path_maker(self): - return r"^AMR\.LIB\.h3.$" - - @amrprot_blast.set_path_maker - def amrprot_blast_path_maker(self): - return r"^AMRProt\.p..$" - - @amr_dna.set_path_maker - def amr_dna_path_maker(self): - return r"^AMR_DNA-[a-zA-Z_]+$" - - @amr_dna_comp.set_path_maker - def amr_dna_comp_path_maker(self): - return r"^AMR_DNA-[a-zA-Z_]+\.n..$" - - @amr_cds_comp.set_path_maker - def amr_cds_comp_path_maker(self): - return r"^AMR_CDS\.n..$" + def __init__(self, path, mode): + super().__init__(path, mode) - @amr_dna_tab.set_path_maker - def amr_dna_tab_path_maker(self): - return r"^AMR_DNA-[a-zA-Z_]+\.tab$" + # Overwrite path maker methods for all file collections + for var_name, var_value in vars(self.__class__).items(): + if isinstance(var_value, model.FileCollection): + var_value.set_path_maker(_path_maker) From 82a1558c62f025d9efda89a23f6e399ff77d2e8d Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 4 Jul 2024 14:36:33 +0200 Subject: [PATCH 12/50] added field to annotation format --- q2_amr/amrfinderplus/types/_format.py | 1 + .../e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.tsv | 6 +++--- .../aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.tsv | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index 0b7c7e1..2b7b4d6 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -97,6 +97,7 @@ def _validate(self, n_records=None): "Name of closest sequence", "HMM id", "HMM description", + "Hierarchy node", ] header = header_coordinates[:1] + header_coordinates[5:] header_obs = pd.read_csv(str(self), sep="\t", nrows=0).columns.tolist() diff --git a/q2_amr/amrfinderplus/types/tests/data/annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.tsv b/q2_amr/amrfinderplus/types/tests/data/annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.tsv index 06adaea..20e52d1 100644 --- a/q2_amr/amrfinderplus/types/tests/data/annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.tsv +++ b/q2_amr/amrfinderplus/types/tests/data/annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.tsv @@ -1,3 +1,3 @@ -Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description -NA contig01 101 958 + blaTEM-156 class A beta-lactamase TEM-156 core AMR AMR BETA-LACTAM BETA-LACTAM ALLELEX 286 286 100.00 100.00 286 WP_061158039.1 class A beta-lactamase TEM-156 NA NA -NA contig02 1 1191 + blaPDC PDC family class C beta-lactamase core AMR AMR BETA-LACTAM CEPHALOSPORIN BLASTX 397 397 100.00 99.75 397 WP_061189306.1 class C beta-lactamase PDC-114 NA NA +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amr/amrfinderplus/types/tests/data/annotation/no_coordinates/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.tsv b/q2_amr/amrfinderplus/types/tests/data/annotation/no_coordinates/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.tsv index d69d0c2..20e52d1 100644 --- a/q2_amr/amrfinderplus/types/tests/data/annotation/no_coordinates/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.tsv +++ b/q2_amr/amrfinderplus/types/tests/data/annotation/no_coordinates/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.tsv @@ -1,3 +1,3 @@ -Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description -aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase -blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam From f42d8450862d790f83afaacbec6f700560525165 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 4 Jul 2024 15:27:22 +0200 Subject: [PATCH 13/50] changed name of file in annotation format to allow oter names --- q2_amr/amrfinderplus/types/_format.py | 10 ++++------ .../types/tests/test_types_formats_transformers.py | 5 +++++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index 2b7b4d6..821c5b0 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -116,13 +116,11 @@ def _validate_(self, level): class ARMFinderPlusAnnotationsDirFmt(MultiDirValidationMixin, model.DirectoryFormat): - tsv = model.FileCollection( - r".+amr_annotation.tsv$", format=ARMFinderPlusAnnotationFormat - ) + annotation = model.FileCollection(r".+\.tsv$", format=ARMFinderPlusAnnotationFormat) - @tsv.set_path_maker - def json_path_maker(self, sample_id, mag_id): - return f"{sample_id}/{mag_id}/amr_annotation.tsv$" + @annotation.set_path_maker + def annotation_path_maker(self, sample_id, mag_id): + return rf"{sample_id}/{mag_id}/.+\.tsv" def sample_dict(self): sample_dict = {} diff --git a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py index 3674ab7..b6d8ab6 100644 --- a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py +++ b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py @@ -119,3 +119,8 @@ def test_amrfinderplus_annotation_directory_format(self): ) annotations = ARMFinderPlusAnnotationDirFmt(dirpath, mode="r") assert isinstance(annotations, ARMFinderPlusAnnotationDirFmt) + + def test_amrfinderplus_annotations_directory_format(self): + dirpath = self.get_data_path("annotation") + annotations = ARMFinderPlusAnnotationsDirFmt(dirpath, mode="r") + assert isinstance(annotations, ARMFinderPlusAnnotationsDirFmt) From 7e31553817a40de71cd6a0085ea14b03f6b616e8 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 5 Jul 2024 11:37:10 +0200 Subject: [PATCH 14/50] added mags action --- q2_amr/amrfinderplus/mags.py | 127 ++++++++++++++++++++++++++++++++ q2_amr/card/mags.py | 5 +- q2_amr/card/reads.py | 3 +- q2_amr/card/tests/test_reads.py | 6 +- q2_amr/card/tests/test_utils.py | 11 ++- q2_amr/card/utils.py | 11 +-- q2_amr/plugin_setup.py | 117 +++++++++++++++++++++++++++++ 7 files changed, 263 insertions(+), 17 deletions(-) create mode 100644 q2_amr/amrfinderplus/mags.py diff --git a/q2_amr/amrfinderplus/mags.py b/q2_amr/amrfinderplus/mags.py new file mode 100644 index 0000000..2f73d25 --- /dev/null +++ b/q2_amr/amrfinderplus/mags.py @@ -0,0 +1,127 @@ +import os +import shutil +import subprocess +import tempfile + +import pandas as pd +from q2_types.genome_data import GenesDirectoryFormat +from q2_types.per_sample_sequences import MultiMAGSequencesDirFmt + +from q2_amr.amrfinderplus.types import ( + AMRFinderPlusDatabaseDirFmt, + ARMFinderPlusAnnotationsDirFmt, +) +from q2_amr.card.utils import create_count_table, read_in_txt, run_command + + +def annotate_mags_amrfinderplus( + mags: MultiMAGSequencesDirFmt, + amrfinderplus_db: AMRFinderPlusDatabaseDirFmt, + organism: str = None, + plus: bool = False, + report_all_equal: bool = False, + ident_min: float = None, + coverage_min: float = 0.5, + translation_table: int = 11, + threads: int = None, +) -> (ARMFinderPlusAnnotationsDirFmt, ARMFinderPlusAnnotationsDirFmt, pd.DataFrame): + manifest = mags.manifest.view(pd.DataFrame) + + annotations = ARMFinderPlusAnnotationsDirFmt() + mutations = ARMFinderPlusAnnotationsDirFmt() + genes = GenesDirectoryFormat() + + frequency_list = [] + + with tempfile.TemporaryDirectory() as tmp: + for samp_mag in list(manifest.index): + input_sequence = manifest.loc[samp_mag, "filename"] + run_amrfinderplus_n( + tmp, + amrfinderplus_db, + input_sequence, + organism, + plus, + report_all_equal, + ident_min, + coverage_min, + translation_table, + threads, + ) + + for dir_format, file_name in zip( + [annotations, mutations, genes], + ["amr_annotations.tsv", "amr_mutations.tsv", str(samp_mag)[1]], + ): + if dir_format in [annotations, mutations]: + des_dir = os.path.join(str(dir_format), samp_mag[0], samp_mag[1]) + os.makedirs(des_dir, exist_ok=True) + shutil.move(os.path.join(tmp, file_name), des_dir) + + frequency_df = read_in_txt( + path=os.path.join(tmp, "amr_annotations.tsv"), + samp_bin_name=str(os.path.join(samp_mag[0], samp_mag[1])), + data_type="mags", + colname="Gene symbol", + ) + + frequency_list.append(frequency_df) + + feature_table = create_count_table(df_list=frequency_list) + return ( + annotations, + mutations, + genes, + feature_table, + ) + + +def run_amrfinderplus_n( + tmp, + amrfinderplus_db, + input_sequence, + organism: str = None, + plus: bool = False, + report_all_equal: bool = False, + ident_min: float = None, + coverage_min: float = None, + translation_table: int = None, + threads: int = None, +): + cmd = [ + "amrfinder", + "-n", + input_sequence, + "--database", + str(amrfinderplus_db), + "-o", + f"{tmp}/amr_annotations.tsv", + "--print_node", + "--alignment_tool", + "--nucleotide_output", + f"{tmp}/amr_genes.fasta", + "--mutation_all", + f"{tmp}/amr_mutations.fasta", + ] + if threads: + cmd.extend(["--threads", str(threads)]) + if organism: + cmd.extend(["--organism", organism]) + if plus: + cmd.append("--plus") + if report_all_equal: + cmd.append("--report_all_equal") + if ident_min: + cmd.extend(["--ident_min", str(ident_min)]) + if coverage_min: + cmd.extend(["--coverage_min", str(coverage_min)]) + if translation_table: + cmd.extend(["--translation_table", str(translation_table)]) + try: + run_command(cmd, tmp, verbose=True) + except subprocess.CalledProcessError as e: + raise Exception( + "An error was encountered while running AMRFinderPlus, " + f"(return code {e.returncode}), please inspect " + "stdout and stderr to learn more." + ) diff --git a/q2_amr/card/mags.py b/q2_amr/card/mags.py index 4a5c72f..8ba020c 100644 --- a/q2_amr/card/mags.py +++ b/q2_amr/card/mags.py @@ -46,7 +46,10 @@ def annotate_mags_card( shutil.move(f"{tmp}/output.json", json_path) samp_bin_name = os.path.join(samp_bin[0], samp_bin[1]) frequency_df = read_in_txt( - path=txt_path, samp_bin_name=samp_bin_name, data_type="mags" + path=txt_path, + samp_bin_name=samp_bin_name, + data_type="mags", + colname="Best_Hit_ARO", ) frequency_list.append(frequency_df) feature_table = create_count_table(df_list=frequency_list) diff --git a/q2_amr/card/reads.py b/q2_amr/card/reads.py index 05debc1..0d85d69 100644 --- a/q2_amr/card/reads.py +++ b/q2_amr/card/reads.py @@ -144,11 +144,12 @@ def _annotate_reads_card( path_txt = os.path.join( samp_tmp_dir, f"output.{map_type}_mapping_data.txt" ) + colname = "Reference Sequence" if map_type == "allele" else "ARO Term" frequency_table = read_in_txt( path=path_txt, samp_bin_name=samp, data_type="reads", - map_type=map_type, + colname=colname, ) table_list.append(frequency_table) diff --git a/q2_amr/card/tests/test_reads.py b/q2_amr/card/tests/test_reads.py index c3abaef..88a5de2 100644 --- a/q2_amr/card/tests/test_reads.py +++ b/q2_amr/card/tests/test_reads.py @@ -119,10 +119,12 @@ def annotate_reads_card_test_body(self, read_type): path=f"{tmp_dir}/{samp}/output.{map_type}_mapping_data.txt", samp_bin_name=samp, data_type="reads", - map_type=map_type, + colname=colname, ) for samp in ["sample1", "sample2"] - for map_type in ["allele", "gene"] + for map_type, colname in zip( + ["allele", "gene"], ["Reference Sequence", "ARO Term"] + ) ] # Expected call objects for mock_create_count_table diff --git a/q2_amr/card/tests/test_utils.py b/q2_amr/card/tests/test_utils.py index a8eb7ca..dc64094 100644 --- a/q2_amr/card/tests/test_utils.py +++ b/q2_amr/card/tests/test_utils.py @@ -151,6 +151,7 @@ def test_read_in_txt_mags(self): samp_bin_name="sample1/bin1", exp=self.mag_count_df, data_type="mags", + colname="Best_Hit_ARO", ) def test_read_in_txt_reads_allele(self): @@ -160,7 +161,7 @@ def test_read_in_txt_reads_allele(self): samp_bin_name="sample1", exp=self.allele_count_df, data_type="reads", - map_type="allele", + colname="Reference Sequence", ) def test_read_in_txt_reads_gene(self): @@ -170,15 +171,13 @@ def test_read_in_txt_reads_gene(self): samp_bin_name="sample1", exp=self.gene_count_df, data_type="reads", - map_type="gene", + colname="ARO Term", ) - def read_in_txt_test_body( - self, filename, samp_bin_name, exp, data_type, map_type=None - ): + def read_in_txt_test_body(self, filename, samp_bin_name, exp, data_type, colname): # Create expected and observed count dataframes and compare them obs = read_in_txt( - self.get_data_path(filename), samp_bin_name, data_type, map_type + self.get_data_path(filename), samp_bin_name, data_type, colname ) pd.testing.assert_frame_equal(exp, obs) diff --git a/q2_amr/card/utils.py b/q2_amr/card/utils.py index 8ed0441..6615109 100644 --- a/q2_amr/card/utils.py +++ b/q2_amr/card/utils.py @@ -95,20 +95,17 @@ def load_card_db( return kmer_size -def read_in_txt(path: str, samp_bin_name: str, data_type: str, map_type=None): +def read_in_txt(path: str, samp_bin_name: str, data_type: str, colname: str): # Read in txt file to pd.Dataframe df = pd.read_csv(path, sep="\t") # Process the df depending on the data type and mapping type if data_type == "reads": - colname = "Reference Sequence" if map_type == "allele" else "ARO Term" df = df[[colname, "All Mapped Reads"]] df.rename(columns={"All Mapped Reads": samp_bin_name}, inplace=True) - else: - df = df["Best_Hit_ARO"].value_counts().reset_index() - - # Rename the columns - df.columns = ["Best_Hit_ARO", samp_bin_name] + elif data_type == "mags": + df = df[colname].value_counts().reset_index() + df.columns = [colname, samp_bin_name] df = df.astype(str) return df diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index b4f763a..5eba666 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -9,6 +9,7 @@ from q2_types.feature_data import FeatureData from q2_types.feature_table import FeatureTable, Frequency +from q2_types.genome_data import Genes, GenomeData from q2_types.per_sample_sequences import ( MAGs, PairedEndSequencesWithQuality, @@ -19,6 +20,7 @@ Bool, Choices, Collection, + Float, Int, List, Properties, @@ -29,6 +31,7 @@ from qiime2.plugin import Citations, Plugin from q2_amr import __version__ +from q2_amr.amrfinderplus.mags import annotate_mags_amrfinderplus from q2_amr.amrfinderplus.types._format import ( AMRFinderPlusDatabaseDirFmt, ARMFinderPlusAnnotationDirFmt, @@ -1081,6 +1084,120 @@ citations=[citations["alcock_card_2023"]], ) +organisms = [ + "Acinetobacter_baumannii", + "Burkholderia_cepacia", + "Burkholderia_pseudomallei", + "Campylobacter", + "Citrobacter_freundii", + "Clostridioides_difficile", + "Enterobacter_asburiae", + "Enterobacter_cloacae", + "Enterococcus_faecalis", + "Enterococcus_faecium", + "Escherichia", + "Klebsiella_oxytoca", + "Klebsiella_pneumoniae", + "Neisseria_gonorrhoeae", + "Neisseria_meningitidis", + "Pseudomonas_aeruginosa", + "Salmonella", + "Serratia_marcescens", + "Staphylococcus_aureus", + "Staphylococcus_pseudintermedius", + "Streptococcus_agalactiae", + "Streptococcus_pneumoniae", + "Streptococcus_pyogenes", + "Vibrio_cholerae", + "Vibrio_parahaemolyticus", + "Vibrio_vulnificus", +] + +translation_tables = [ + "1", + "2", + "3", + "4", + "5", + "6", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "33", +] + +plugin.methods.register_function( + function=annotate_mags_amrfinderplus, + inputs={"mags": SampleData[MAGs], "amrfinderplus_db": AMRFinderPlusDatabase}, + parameters={ + "organism": Str % Choices(organisms), + "plus": Bool, + "report_all_equal": Bool, + "ident_min": Float % Range(-1, 1, inclusive_start=True, inclusive_end=True), + "coverage_min": Float % Range(0, 1, inclusive_start=True, inclusive_end=True), + "translation_table": Str % Choices(translation_tables), + "threads": Int % Range(0, None, inclusive_start=False), + }, + outputs=[ + ("annotations", SampleData[ARMFinderPlusAnnotations]), + ("mutations", SampleData[ARMFinderPlusAnnotations]), + ("genes", GenomeData[Genes]), + ("feature_table", FeatureTable[Frequency]), + ], + input_descriptions={ + "mags": "MAGs to be annotated with AMRFinderPlus.", + "amrfinderplus_db": "AMRFinderPlus Database.", + }, + parameter_descriptions={ + "organism": "Taxon used for screening known resistance causing point mutations " + "and blacklisting of common, non-informative genes.", + "plus": "Provide results from 'Plus' genes such as virulence factors, " + "stress-response genes, etc.", + "report_all_equal": "Report all equally scoring BLAST and HMM matches. This " + "will report multiple lines for a single element if there " + "are multiple reference proteins that have the same score. " + "On those lines the fields Accession of closest sequence " + "and Name of closest sequence will be different showing " + "each of the database proteins that are equally close to " + "the query sequence.", + "ident_min": "Minimum identity for a blast-based hit hit (Methods BLAST or " + "PARTIAL). -1 means use the curated threshold if it exists and " + "0.9 otherwise. Setting this value to something other than -1 " + "will override curated similarity cutoffs. We only recommend " + "using this option if you have a specific reason.", + "coverage_min": "Minimum proportion of reference gene covered for a " + "BLAST-based hit (Methods BLAST or PARTIAL).", + "translation_table": "Translation table used for BLASTX.", + "threads": "The number of threads to use for processing. AMRFinderPlus " + "defaults to 4 on hosts with >= 4 cores. Setting this number higher" + " than the number of cores on the running host may cause blastp to " + "fail. Using more than 4 threads may speed up searches.", + }, + output_descriptions={ + "amr_annotations": "AMR annotation as .txt and .json file.", + "feature_table": "Frequency table of ARGs in all samples.", + }, + name="Annotate MAGs with AMRFinderPlus.", + description="Annotate MAGs with antimicrobial resistance genes with AMRFinderPlus.", + citations=[], +) + # Registrations plugin.register_semantic_types( CARDDatabase, From 514688c7080bc9310dc83d0614bab702093a70fa Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 5 Jul 2024 11:39:55 +0200 Subject: [PATCH 15/50] registered annotations types in plusgin setup --- q2_amr/plugin_setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index b4f763a..4d6983c 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -1092,6 +1092,8 @@ CARDReadsAlleleKmerAnalysis, CARDMAGsKmerAnalysis, AMRFinderPlusDatabase, + ARMFinderPlusAnnotations, + ARMFinderPlusAnnotation, ) plugin.register_semantic_type_to_format( From a017eeb13be24fa9cd2e165a874858dbc92f69ab Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 5 Jul 2024 12:02:29 +0200 Subject: [PATCH 16/50] changes --- q2_amr/amrfinderplus/mags.py | 37 ++++++++++++++++++++---------------- q2_amr/plugin_setup.py | 20 +++++++++++++++++-- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/q2_amr/amrfinderplus/mags.py b/q2_amr/amrfinderplus/mags.py index 2f73d25..51cab65 100644 --- a/q2_amr/amrfinderplus/mags.py +++ b/q2_amr/amrfinderplus/mags.py @@ -22,9 +22,14 @@ def annotate_mags_amrfinderplus( report_all_equal: bool = False, ident_min: float = None, coverage_min: float = 0.5, - translation_table: int = 11, + translation_table: str = "11", threads: int = None, -) -> (ARMFinderPlusAnnotationsDirFmt, ARMFinderPlusAnnotationsDirFmt, pd.DataFrame): +) -> ( + ARMFinderPlusAnnotationsDirFmt, + ARMFinderPlusAnnotationsDirFmt, + GenesDirectoryFormat, + pd.DataFrame, +): manifest = mags.manifest.view(pd.DataFrame) annotations = ARMFinderPlusAnnotationsDirFmt() @@ -49,6 +54,13 @@ def annotate_mags_amrfinderplus( threads, ) + frequency_df = read_in_txt( + path=os.path.join(tmp, "amr_annotations.tsv"), + samp_bin_name=str(os.path.join(samp_mag[0], samp_mag[1])), + data_type="mags", + colname="Gene symbol", + ) + for dir_format, file_name in zip( [annotations, mutations, genes], ["amr_annotations.tsv", "amr_mutations.tsv", str(samp_mag)[1]], @@ -58,13 +70,6 @@ def annotate_mags_amrfinderplus( os.makedirs(des_dir, exist_ok=True) shutil.move(os.path.join(tmp, file_name), des_dir) - frequency_df = read_in_txt( - path=os.path.join(tmp, "amr_annotations.tsv"), - samp_bin_name=str(os.path.join(samp_mag[0], samp_mag[1])), - data_type="mags", - colname="Gene symbol", - ) - frequency_list.append(frequency_df) feature_table = create_count_table(df_list=frequency_list) @@ -80,13 +85,13 @@ def run_amrfinderplus_n( tmp, amrfinderplus_db, input_sequence, - organism: str = None, - plus: bool = False, - report_all_equal: bool = False, - ident_min: float = None, - coverage_min: float = None, - translation_table: int = None, - threads: int = None, + organism, + plus, + report_all_equal, + ident_min, + coverage_min, + translation_table, + threads, ): cmd = [ "amrfinder", diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index ebbcabd..dd3079b 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -1190,8 +1190,24 @@ "fail. Using more than 4 threads may speed up searches.", }, output_descriptions={ - "amr_annotations": "AMR annotation as .txt and .json file.", - "feature_table": "Frequency table of ARGs in all samples.", + "annotations": "AMR annotation as .txt and .json file.", + "mutations": "Report of genotypes at all locations screened for point " + "mutations. These files allow you to distinguish between called " + "point mutations that were the sensitive variant and the point " + "mutations that could not be called because the sequence was not " + "found. This file will contain all detected variants from the " + "reference sequence, so it could be used as an initial screen for " + "novel variants. Note 'Gene symbols' for mutations not in the " + "database (identifiable by [UNKNOWN] in the Sequence name field) " + "have offsets that are relative to the start of the sequence " + "indicated in the field 'Accession of closest sequence' while " + "'Gene symbols' from known point-mutation sites have gene symbols " + "that match the Pathogen Detection Reference Gene Catalog " + "standardized nomenclature for point mutations.", + "genes": "Sequences that were identified by AMRFinderPlus as AMR genes. This " + "will include the entire region that aligns to the references for " + "point mutations.", + "feature_table": "Presence/Absence table of ARGs in all samples.", }, name="Annotate MAGs with AMRFinderPlus.", description="Annotate MAGs with antimicrobial resistance genes with AMRFinderPlus.", From 07e9f52ef6f7713debc7491197a7ad63ed9dac91 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 5 Jul 2024 13:42:25 +0200 Subject: [PATCH 17/50] Revert "overwrite all pathmakers with code from busco moshpit" This reverts commit 8378b45cd57cdc8e68a1a3c6f7ef266136e4af7e. --- q2_amr/amrfinderplus/types/_format.py | 32 ++++++++++++++++++--------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index 71af13e..7f98c71 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -19,10 +19,6 @@ def _validate_(self, level): pass -def _path_maker(name): - return str(name) - - class AMRFinderPlusDatabaseDirFmt(model.DirectoryFormat): amr_lib = model.File("AMR.LIB", format=TextFormat) amr_lib_comp = model.FileCollection(r"^AMR\.LIB\.h3.$", format=BinaryFormat) @@ -46,10 +42,26 @@ class AMRFinderPlusDatabaseDirFmt(model.DirectoryFormat): amr_cds_comp = model.FileCollection(r"^AMR_CDS\.n..$", format=BinaryFormat) amr_cds = model.File("AMR_CDS", format=MixedCaseDNAFASTAFormat) - def __init__(self, path, mode): - super().__init__(path, mode) + @amr_lib_comp.set_path_maker + def amr_lib_comp_path_maker(self): + return r"^AMR\.LIB\.h3.$" + + @amrprot_blast.set_path_maker + def amrprot_blast_path_maker(self): + return r"^AMRProt\.p..$" + + @amr_dna.set_path_maker + def amr_dna_path_maker(self): + return r"^AMR_DNA-[a-zA-Z_]+$" + + @amr_dna_comp.set_path_maker + def amr_dna_comp_path_maker(self): + return r"^AMR_DNA-[a-zA-Z_]+\.n..$" + + @amr_cds_comp.set_path_maker + def amr_cds_comp_path_maker(self): + return r"^AMR_CDS\.n..$" - # Overwrite path maker methods for all file collections - for var_name, var_value in vars(self.__class__).items(): - if isinstance(var_value, model.FileCollection): - var_value.set_path_maker(_path_maker) + @amr_dna_tab.set_path_maker + def amr_dna_tab_path_maker(self): + return r"^AMR_DNA-[a-zA-Z_]+\.tab$" From 9237c737e964f31fac98ddc8a1118e7b36699bb5 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 5 Jul 2024 14:25:42 +0200 Subject: [PATCH 18/50] working action --- q2_amr/amrfinderplus/mags.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/q2_amr/amrfinderplus/mags.py b/q2_amr/amrfinderplus/mags.py index 51cab65..a0641d5 100644 --- a/q2_amr/amrfinderplus/mags.py +++ b/q2_amr/amrfinderplus/mags.py @@ -52,6 +52,7 @@ def annotate_mags_amrfinderplus( coverage_min, translation_table, threads, + samp_mag[1], ) frequency_df = read_in_txt( @@ -63,11 +64,17 @@ def annotate_mags_amrfinderplus( for dir_format, file_name in zip( [annotations, mutations, genes], - ["amr_annotations.tsv", "amr_mutations.tsv", str(samp_mag)[1]], + [ + "amr_annotations.tsv", + "amr_mutations.tsv", + f"{samp_mag[1]}_amr_genes.fasta", + ], ): if dir_format in [annotations, mutations]: des_dir = os.path.join(str(dir_format), samp_mag[0], samp_mag[1]) os.makedirs(des_dir, exist_ok=True) + else: + des_dir = str(dir_format) shutil.move(os.path.join(tmp, file_name), des_dir) frequency_list.append(frequency_df) @@ -92,6 +99,7 @@ def run_amrfinderplus_n( coverage_min, translation_table, threads, + mag_id, ): cmd = [ "amrfinder", @@ -102,11 +110,10 @@ def run_amrfinderplus_n( "-o", f"{tmp}/amr_annotations.tsv", "--print_node", - "--alignment_tool", "--nucleotide_output", - f"{tmp}/amr_genes.fasta", + f"{tmp}/{mag_id}_amr_genes.fasta", "--mutation_all", - f"{tmp}/amr_mutations.fasta", + f"{tmp}/amr_mutations.tsv", ] if threads: cmd.extend(["--threads", str(threads)]) From 34a34b88cea0ed08d96fa0940cc681683e623b35 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 5 Jul 2024 14:47:26 +0200 Subject: [PATCH 19/50] removed nested structure of annotaion type --- q2_amr/amrfinderplus/types/_format.py | 19 ++-------- ...de3-a957-7e8bf837f30d_amr_annotations.tsv} | 0 ...c4a-a53b-4df6999815dd_amr_annotations.tsv} | 0 .../tests/test_types_formats_transformers.py | 38 ++----------------- 4 files changed, 7 insertions(+), 50 deletions(-) rename q2_amr/amrfinderplus/types/tests/data/annotation/coordinates/{e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.tsv => e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv} (100%) rename q2_amr/amrfinderplus/types/tests/data/annotation/no_coordinates/{aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.tsv => aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv} (100%) diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index 821c5b0..1337fe3 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -5,8 +5,6 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -import os - import pandas as pd from q2_types.feature_data import MixedCaseDNAFASTAFormat, ProteinFASTAFormat from q2_types.per_sample_sequences._format import MultiDirValidationMixin @@ -116,22 +114,13 @@ def _validate_(self, level): class ARMFinderPlusAnnotationsDirFmt(MultiDirValidationMixin, model.DirectoryFormat): - annotation = model.FileCollection(r".+\.tsv$", format=ARMFinderPlusAnnotationFormat) + annotation = model.FileCollection( + r".+_amr_(annotations|mutations)\.tsv$", format=ARMFinderPlusAnnotationFormat + ) @annotation.set_path_maker def annotation_path_maker(self, sample_id, mag_id): - return rf"{sample_id}/{mag_id}/.+\.tsv" - - def sample_dict(self): - sample_dict = {} - for sample in self.path.iterdir(): - mag_dict = {} - for mag in sample.iterdir(): - mag_dict[mag.name] = [ - os.path.join(mag, "amr_annotation.tsv"), - ] - sample_dict[sample.name] = mag_dict - return sample_dict + return rf"{sample_id}/{mag_id}_amr_(annotations|mutations)\.tsv$" ARMFinderPlusAnnotationDirFmt = model.SingleFileDirectoryFormat( diff --git a/q2_amr/amrfinderplus/types/tests/data/annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.tsv b/q2_amr/amrfinderplus/types/tests/data/annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv similarity index 100% rename from q2_amr/amrfinderplus/types/tests/data/annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d/amr_annotation.tsv rename to q2_amr/amrfinderplus/types/tests/data/annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv diff --git a/q2_amr/amrfinderplus/types/tests/data/annotation/no_coordinates/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.tsv b/q2_amr/amrfinderplus/types/tests/data/annotation/no_coordinates/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv similarity index 100% rename from q2_amr/amrfinderplus/types/tests/data/annotation/no_coordinates/aa447c99-ecd9-4c4a-a53b-4df6999815dd/amr_annotation.tsv rename to q2_amr/amrfinderplus/types/tests/data/annotation/no_coordinates/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv diff --git a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py index b6d8ab6..f0672d5 100644 --- a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py +++ b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py @@ -5,8 +5,6 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -import os - from qiime2.core.exceptions import ValidationError from qiime2.plugin.testing import TestPluginBase @@ -27,8 +25,8 @@ def test_amrfinderplus_database_directory_format_validate_positive(self): def test_amrfinderplus_annotation_format_validate_positive(self): filepath = self.get_data_path( - "annotation/no_coordinates/aa447c99-ecd9-4c4a-a53b-4df6999815dd" - "/amr_annotation.tsv" + "annotation/no_coordinates/" + "aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv" ) format = ARMFinderPlusAnnotationFormat(filepath, mode="r") @@ -37,7 +35,7 @@ def test_amrfinderplus_annotation_format_validate_positive(self): def test_amrfinderplus_annotation_format_validate_positive_coordinates(self): filepath = self.get_data_path( "annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d" - "/amr_annotation.tsv" + "_amr_annotations.tsv" ) format = ARMFinderPlusAnnotationFormat(filepath, mode="r") format.validate() @@ -83,36 +81,6 @@ def test_amrfinderplus_annotation_format_validation_error(self): self.assertEqual(str(context.exception), expected_message) - def test_amrfinderplus_annotations_directory_format_sample_dict(self): - dirpath = self.get_data_path("annotation") - annotations = ARMFinderPlusAnnotationsDirFmt(dirpath, mode="r") - - obs = annotations.sample_dict() - - exp = { - "coordinates": { - "e026af61-d911-4de3-a957-7e8bf837f30d": [ - os.path.join( - annotations.path, - "coordinates", - "e026af61-d911-4de3-a957-7e8bf837f30d", - "amr_annotation.tsv", - ), - ] - }, - "no_coordinates": { - "aa447c99-ecd9-4c4a-a53b-4df6999815dd": [ - os.path.join( - annotations.path, - "no_coordinates", - "aa447c99-ecd9-4c4a-a53b-4df6999815dd", - "amr_annotation.tsv", - ), - ], - }, - } - self.assertEqual(obs, exp) - def test_amrfinderplus_annotation_directory_format(self): dirpath = self.get_data_path( "annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d" From 4e38e218f28d7eb0d0e70f45993ab97c3c5c2514 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 5 Jul 2024 15:01:54 +0200 Subject: [PATCH 20/50] working action with non nested output format --- q2_amr/amrfinderplus/mags.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/q2_amr/amrfinderplus/mags.py b/q2_amr/amrfinderplus/mags.py index a0641d5..40656da 100644 --- a/q2_amr/amrfinderplus/mags.py +++ b/q2_amr/amrfinderplus/mags.py @@ -41,6 +41,10 @@ def annotate_mags_amrfinderplus( with tempfile.TemporaryDirectory() as tmp: for samp_mag in list(manifest.index): input_sequence = manifest.loc[samp_mag, "filename"] + + sample_id = samp_mag[0] + mag_id = samp_mag[1] + run_amrfinderplus_n( tmp, amrfinderplus_db, @@ -52,12 +56,12 @@ def annotate_mags_amrfinderplus( coverage_min, translation_table, threads, - samp_mag[1], + mag_id, ) frequency_df = read_in_txt( - path=os.path.join(tmp, "amr_annotations.tsv"), - samp_bin_name=str(os.path.join(samp_mag[0], samp_mag[1])), + path=os.path.join(tmp, f"{mag_id}_amr_annotations.tsv"), + samp_bin_name=str(os.path.join(sample_id, mag_id)), data_type="mags", colname="Gene symbol", ) @@ -65,13 +69,13 @@ def annotate_mags_amrfinderplus( for dir_format, file_name in zip( [annotations, mutations, genes], [ - "amr_annotations.tsv", - "amr_mutations.tsv", - f"{samp_mag[1]}_amr_genes.fasta", + f"{mag_id}_amr_annotations.tsv", + f"{mag_id}_amr_mutations.tsv", + f"{mag_id}_amr_genes.fasta", ], ): if dir_format in [annotations, mutations]: - des_dir = os.path.join(str(dir_format), samp_mag[0], samp_mag[1]) + des_dir = os.path.join(str(dir_format), sample_id) os.makedirs(des_dir, exist_ok=True) else: des_dir = str(dir_format) @@ -108,12 +112,12 @@ def run_amrfinderplus_n( "--database", str(amrfinderplus_db), "-o", - f"{tmp}/amr_annotations.tsv", + f"{tmp}/{mag_id}_amr_annotations.tsv", "--print_node", "--nucleotide_output", f"{tmp}/{mag_id}_amr_genes.fasta", "--mutation_all", - f"{tmp}/amr_mutations.tsv", + f"{tmp}/{mag_id}_amr_mutations.tsv", ] if threads: cmd.extend(["--threads", str(threads)]) From 16db485a69fc972245c963b6d8d8fea941ab84b3 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Mon, 8 Jul 2024 12:35:38 +0200 Subject: [PATCH 21/50] changed magid to id in mags annotaiton --- q2_amr/amrfinderplus/mags.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/q2_amr/amrfinderplus/mags.py b/q2_amr/amrfinderplus/mags.py index 40656da..05444fa 100644 --- a/q2_amr/amrfinderplus/mags.py +++ b/q2_amr/amrfinderplus/mags.py @@ -103,7 +103,7 @@ def run_amrfinderplus_n( coverage_min, translation_table, threads, - mag_id, + id, ): cmd = [ "amrfinder", @@ -112,12 +112,12 @@ def run_amrfinderplus_n( "--database", str(amrfinderplus_db), "-o", - f"{tmp}/{mag_id}_amr_annotations.tsv", + f"{tmp}/{id}_amr_annotations.tsv", "--print_node", "--nucleotide_output", - f"{tmp}/{mag_id}_amr_genes.fasta", + f"{tmp}/{id}_amr_genes.fasta", "--mutation_all", - f"{tmp}/{mag_id}_amr_mutations.tsv", + f"{tmp}/{id}_amr_mutations.tsv", ] if threads: cmd.extend(["--threads", str(threads)]) From 8a902edc237437d3551a92b681ae9e0977882b10 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Mon, 8 Jul 2024 17:20:27 +0200 Subject: [PATCH 22/50] moved run fucntion into utils added protein option --- q2_amr/amrfinderplus/mags.py | 79 +++++++---------------------------- q2_amr/amrfinderplus/utils.py | 77 ++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 64 deletions(-) create mode 100644 q2_amr/amrfinderplus/utils.py diff --git a/q2_amr/amrfinderplus/mags.py b/q2_amr/amrfinderplus/mags.py index 05444fa..0e8827b 100644 --- a/q2_amr/amrfinderplus/mags.py +++ b/q2_amr/amrfinderplus/mags.py @@ -1,6 +1,5 @@ import os import shutil -import subprocess import tempfile import pandas as pd @@ -11,7 +10,8 @@ AMRFinderPlusDatabaseDirFmt, ARMFinderPlusAnnotationsDirFmt, ) -from q2_amr.card.utils import create_count_table, read_in_txt, run_command +from q2_amr.amrfinderplus.utils import run_amrfinderplus_n +from q2_amr.card.utils import create_count_table, read_in_txt def annotate_mags_amrfinderplus( @@ -46,17 +46,19 @@ def annotate_mags_amrfinderplus( mag_id = samp_mag[1] run_amrfinderplus_n( - tmp, - amrfinderplus_db, - input_sequence, - organism, - plus, - report_all_equal, - ident_min, - coverage_min, - translation_table, - threads, - mag_id, + working_dir=tmp, + amrfinderplus_db=amrfinderplus_db, + dna_sequence=input_sequence, + protein_sequence=None, + gff=None, + organism=organism, + plus=plus, + report_all_equal=report_all_equal, + ident_min=ident_min, + coverage_min=coverage_min, + translation_table=translation_table, + threads=threads, + id=mag_id, ) frequency_df = read_in_txt( @@ -90,54 +92,3 @@ def annotate_mags_amrfinderplus( genes, feature_table, ) - - -def run_amrfinderplus_n( - tmp, - amrfinderplus_db, - input_sequence, - organism, - plus, - report_all_equal, - ident_min, - coverage_min, - translation_table, - threads, - id, -): - cmd = [ - "amrfinder", - "-n", - input_sequence, - "--database", - str(amrfinderplus_db), - "-o", - f"{tmp}/{id}_amr_annotations.tsv", - "--print_node", - "--nucleotide_output", - f"{tmp}/{id}_amr_genes.fasta", - "--mutation_all", - f"{tmp}/{id}_amr_mutations.tsv", - ] - if threads: - cmd.extend(["--threads", str(threads)]) - if organism: - cmd.extend(["--organism", organism]) - if plus: - cmd.append("--plus") - if report_all_equal: - cmd.append("--report_all_equal") - if ident_min: - cmd.extend(["--ident_min", str(ident_min)]) - if coverage_min: - cmd.extend(["--coverage_min", str(coverage_min)]) - if translation_table: - cmd.extend(["--translation_table", str(translation_table)]) - try: - run_command(cmd, tmp, verbose=True) - except subprocess.CalledProcessError as e: - raise Exception( - "An error was encountered while running AMRFinderPlus, " - f"(return code {e.returncode}), please inspect " - "stdout and stderr to learn more." - ) diff --git a/q2_amr/amrfinderplus/utils.py b/q2_amr/amrfinderplus/utils.py new file mode 100644 index 0000000..b404d90 --- /dev/null +++ b/q2_amr/amrfinderplus/utils.py @@ -0,0 +1,77 @@ +import subprocess + +from q2_amr.card.utils import run_command + + +def run_amrfinderplus_n( + working_dir, + amrfinderplus_db, + dna_sequence, + protein_sequence, + gff, + organism, + plus, + report_all_equal, + ident_min, + coverage_min, + translation_table, + threads, + id, +): + cmd = [ + "amrfinder", + "--database", + str(amrfinderplus_db), + "-o", + f"{working_dir}/{id}_amr_annotations.tsv", + "--print_node", + ] + if dna_sequence: + cmd.extend( + [ + "-n", + dna_sequence, + "--nucleotide_output", + f"{working_dir}/{id}_amr_genes.fasta", + ] + ) + if protein_sequence: + cmd.extend( + [ + "-p", + protein_sequence, + "--protein_output", + f"{working_dir}/{id}_amr_proteins.fasta", + ] + ) + if gff: + cmd.extend(["-g", gff]) + if threads: + cmd.extend(["--threads", str(threads)]) + if organism: + cmd.extend( + [ + "--organism", + organism, + "--mutation_all", + f"{working_dir}/{id}_amr_mutations.tsv", + ] + ) + if plus: + cmd.append("--plus") + if report_all_equal: + cmd.append("--report_all_equal") + if ident_min: + cmd.extend(["--ident_min", str(ident_min)]) + if coverage_min: + cmd.extend(["--coverage_min", str(coverage_min)]) + if translation_table: + cmd.extend(["--translation_table", str(translation_table)]) + try: + run_command(cmd, working_dir, verbose=True) + except subprocess.CalledProcessError as e: + raise Exception( + "An error was encountered while running AMRFinderPlus, " + f"(return code {e.returncode}), please inspect " + "stdout and stderr to learn more." + ) From 0cb8492067f483dc3af8a06d4887856369ded799 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 9 Jul 2024 14:39:35 +0200 Subject: [PATCH 23/50] changed utils to not inlcude _ in filenames --- q2_amr/amrfinderplus/mags.py | 2 +- q2_amr/amrfinderplus/utils.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/q2_amr/amrfinderplus/mags.py b/q2_amr/amrfinderplus/mags.py index 0e8827b..bca7193 100644 --- a/q2_amr/amrfinderplus/mags.py +++ b/q2_amr/amrfinderplus/mags.py @@ -58,7 +58,7 @@ def annotate_mags_amrfinderplus( coverage_min=coverage_min, translation_table=translation_table, threads=threads, - id=mag_id, + id=mag_id + "_", ) frequency_df = read_in_txt( diff --git a/q2_amr/amrfinderplus/utils.py b/q2_amr/amrfinderplus/utils.py index b404d90..3ed20fa 100644 --- a/q2_amr/amrfinderplus/utils.py +++ b/q2_amr/amrfinderplus/utils.py @@ -23,7 +23,7 @@ def run_amrfinderplus_n( "--database", str(amrfinderplus_db), "-o", - f"{working_dir}/{id}_amr_annotations.tsv", + f"{working_dir}/{id}amr_annotations.tsv", "--print_node", ] if dna_sequence: @@ -32,7 +32,7 @@ def run_amrfinderplus_n( "-n", dna_sequence, "--nucleotide_output", - f"{working_dir}/{id}_amr_genes.fasta", + f"{working_dir}/{id}amr_genes.fasta", ] ) if protein_sequence: @@ -41,7 +41,7 @@ def run_amrfinderplus_n( "-p", protein_sequence, "--protein_output", - f"{working_dir}/{id}_amr_proteins.fasta", + f"{working_dir}/{id}amr_proteins.fasta", ] ) if gff: @@ -54,7 +54,7 @@ def run_amrfinderplus_n( "--organism", organism, "--mutation_all", - f"{working_dir}/{id}_amr_mutations.tsv", + f"{working_dir}/{id}amr_mutations.tsv", ] ) if plus: From 4f09ee752628197e5e3b8d9e414a8ddcd00e8e8a Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 9 Jul 2024 15:09:14 +0200 Subject: [PATCH 24/50] changed type of featuredata one to also include mutations in name --- q2_amr/amrfinderplus/types/_format.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index 1337fe3..ac08235 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -124,5 +124,7 @@ def annotation_path_maker(self, sample_id, mag_id): ARMFinderPlusAnnotationDirFmt = model.SingleFileDirectoryFormat( - "ARMFinderPlusAnnotationDirFmt", "amr_annotation.tsv", ARMFinderPlusAnnotationFormat + "ARMFinderPlusAnnotationDirFmt", + r"amr_(annotations|mutations)\.tsv$", + ARMFinderPlusAnnotationFormat, ) From bbaca6ef61d742576c18a18586ae8298671b3b1f Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Wed, 10 Jul 2024 15:05:47 +0200 Subject: [PATCH 25/50] changed type and path_maker --- q2_amr/amrfinderplus/types/_format.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index ac08235..9c2e251 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -115,12 +115,13 @@ def _validate_(self, level): class ARMFinderPlusAnnotationsDirFmt(MultiDirValidationMixin, model.DirectoryFormat): annotation = model.FileCollection( - r".+_amr_(annotations|mutations)\.tsv$", format=ARMFinderPlusAnnotationFormat + r".+amr_(annotations|mutations)\.tsv$", format=ARMFinderPlusAnnotationFormat ) @annotation.set_path_maker def annotation_path_maker(self, sample_id, mag_id): - return rf"{sample_id}/{mag_id}_amr_(annotations|mutations)\.tsv$" + prefix = f"{sample_id}/{mag_id}_" if mag_id else f"{sample_id}/" + return f"{prefix}amr_annotations.tsv" ARMFinderPlusAnnotationDirFmt = model.SingleFileDirectoryFormat( From 757721471c81999c7f352d841268ec8302629104 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Wed, 10 Jul 2024 15:36:40 +0200 Subject: [PATCH 26/50] added sampledata contigs as input --- q2_amr/amrfinderplus/mags.py | 82 ++++++++++++++++++++++------------- q2_amr/amrfinderplus/utils.py | 11 ++--- q2_amr/plugin_setup.py | 12 +++-- 3 files changed, 67 insertions(+), 38 deletions(-) diff --git a/q2_amr/amrfinderplus/mags.py b/q2_amr/amrfinderplus/mags.py index bca7193..a0000f2 100644 --- a/q2_amr/amrfinderplus/mags.py +++ b/q2_amr/amrfinderplus/mags.py @@ -1,10 +1,11 @@ import os import shutil import tempfile +from typing import Union import pandas as pd from q2_types.genome_data import GenesDirectoryFormat -from q2_types.per_sample_sequences import MultiMAGSequencesDirFmt +from q2_types.per_sample_sequences import ContigSequencesDirFmt, MultiMAGSequencesDirFmt from q2_amr.amrfinderplus.types import ( AMRFinderPlusDatabaseDirFmt, @@ -14,8 +15,8 @@ from q2_amr.card.utils import create_count_table, read_in_txt -def annotate_mags_amrfinderplus( - mags: MultiMAGSequencesDirFmt, +def annotate_sample_data_amrfinderplus( + sequences: Union[MultiMAGSequencesDirFmt, ContigSequencesDirFmt], amrfinderplus_db: AMRFinderPlusDatabaseDirFmt, organism: str = None, plus: bool = False, @@ -30,25 +31,43 @@ def annotate_mags_amrfinderplus( GenesDirectoryFormat, pd.DataFrame, ): - manifest = mags.manifest.view(pd.DataFrame) - annotations = ARMFinderPlusAnnotationsDirFmt() mutations = ARMFinderPlusAnnotationsDirFmt() genes = GenesDirectoryFormat() - frequency_list = [] - with tempfile.TemporaryDirectory() as tmp: - for samp_mag in list(manifest.index): - input_sequence = manifest.loc[samp_mag, "filename"] + # Create list of paths to all mags or contigs + if isinstance(sequences, MultiMAGSequencesDirFmt): + manifest = sequences.manifest.view(pd.DataFrame) + files = manifest["filename"] + else: + files = [ + os.path.join(str(sequences), file) for file in os.listdir(str(sequences)) + ] - sample_id = samp_mag[0] - mag_id = samp_mag[1] + with tempfile.TemporaryDirectory() as tmp: + # Iterate over paths of mags or contigs + for file in files: + # Set sample and mag ids and output file pats for mag or contig + if isinstance(sequences, MultiMAGSequencesDirFmt): + index_value = manifest.query("filename == @file").index[0] + sample_id = index_value[0] + mag_id = index_value[1] + annotations_path = os.path.join(tmp, f"{mag_id}_amr_annotations.tsv") + mutations_path = os.path.join(tmp, f"{mag_id}_amr_mutations.tsv") + genes_path = os.path.join(tmp, f"{mag_id}_amr_genes.fasta") + else: + sample_id = os.path.splitext(os.path.basename(file))[0][:-8] + mag_id = "" + annotations_path = os.path.join(tmp, "amr_annotations.tsv") + mutations_path = os.path.join(tmp, "amr_mutations.tsv") + genes_path = os.path.join(tmp, f"{sample_id}_amr_genes.fasta") + # Run amrfinderplus run_amrfinderplus_n( working_dir=tmp, amrfinderplus_db=amrfinderplus_db, - dna_sequence=input_sequence, + dna_sequence=file, protein_sequence=None, gff=None, organism=organism, @@ -58,32 +77,37 @@ def annotate_mags_amrfinderplus( coverage_min=coverage_min, translation_table=translation_table, threads=threads, - id=mag_id + "_", + mag_id=mag_id, + sample_id=sample_id, ) + # Create frequency dataframe and append it to list frequency_df = read_in_txt( - path=os.path.join(tmp, f"{mag_id}_amr_annotations.tsv"), + path=os.path.join(tmp, annotations_path), samp_bin_name=str(os.path.join(sample_id, mag_id)), data_type="mags", colname="Gene symbol", ) + frequency_list.append(frequency_df) - for dir_format, file_name in zip( - [annotations, mutations, genes], - [ - f"{mag_id}_amr_annotations.tsv", - f"{mag_id}_amr_mutations.tsv", - f"{mag_id}_amr_genes.fasta", - ], - ): - if dir_format in [annotations, mutations]: - des_dir = os.path.join(str(dir_format), sample_id) - os.makedirs(des_dir, exist_ok=True) - else: - des_dir = str(dir_format) - shutil.move(os.path.join(tmp, file_name), des_dir) + # Move mutations file. If it is not created, create an empty mutations file + des_dir_mutations = os.path.join(str(mutations), sample_id) + os.makedirs(des_dir_mutations, exist_ok=True) + if organism: + shutil.move(mutations_path, des_dir_mutations) + else: + with open( + os.path.join(str(mutations), os.path.basename(mutations_path)), "w" + ): + pass - frequency_list.append(frequency_df) + # Move annotations file + des_dir_annotations = os.path.join(str(annotations), sample_id) + os.makedirs(des_dir_annotations, exist_ok=True) + shutil.move(annotations_path, des_dir_annotations) + + # Move genes file + shutil.move(genes_path, str(genes)) feature_table = create_count_table(df_list=frequency_list) return ( diff --git a/q2_amr/amrfinderplus/utils.py b/q2_amr/amrfinderplus/utils.py index 3ed20fa..12785eb 100644 --- a/q2_amr/amrfinderplus/utils.py +++ b/q2_amr/amrfinderplus/utils.py @@ -16,14 +16,15 @@ def run_amrfinderplus_n( coverage_min, translation_table, threads, - id, + mag_id, + sample_id, ): cmd = [ "amrfinder", "--database", str(amrfinderplus_db), "-o", - f"{working_dir}/{id}amr_annotations.tsv", + f"{working_dir}/{mag_id + '_' if mag_id else ''}amr_annotations.tsv", "--print_node", ] if dna_sequence: @@ -32,7 +33,7 @@ def run_amrfinderplus_n( "-n", dna_sequence, "--nucleotide_output", - f"{working_dir}/{id}amr_genes.fasta", + f"{working_dir}/{mag_id if mag_id else sample_id}_amr_genes.fasta", ] ) if protein_sequence: @@ -41,7 +42,7 @@ def run_amrfinderplus_n( "-p", protein_sequence, "--protein_output", - f"{working_dir}/{id}amr_proteins.fasta", + f"{working_dir}/{mag_id + '_' if mag_id else ''}amr_proteins.fasta", ] ) if gff: @@ -54,7 +55,7 @@ def run_amrfinderplus_n( "--organism", organism, "--mutation_all", - f"{working_dir}/{id}amr_mutations.tsv", + f"{working_dir}/{mag_id + '_' if mag_id else ''}amr_mutations.tsv", ] ) if plus: diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index dd3079b..cd11888 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -11,6 +11,7 @@ from q2_types.feature_table import FeatureTable, Frequency from q2_types.genome_data import Genes, GenomeData from q2_types.per_sample_sequences import ( + Contigs, MAGs, PairedEndSequencesWithQuality, SequencesWithQuality, @@ -31,7 +32,7 @@ from qiime2.plugin import Citations, Plugin from q2_amr import __version__ -from q2_amr.amrfinderplus.mags import annotate_mags_amrfinderplus +from q2_amr.amrfinderplus.mags import annotate_sample_data_amrfinderplus from q2_amr.amrfinderplus.types._format import ( AMRFinderPlusDatabaseDirFmt, ARMFinderPlusAnnotationDirFmt, @@ -1143,8 +1144,11 @@ ] plugin.methods.register_function( - function=annotate_mags_amrfinderplus, - inputs={"mags": SampleData[MAGs], "amrfinderplus_db": AMRFinderPlusDatabase}, + function=annotate_sample_data_amrfinderplus, + inputs={ + "sequences": SampleData[MAGs | Contigs], + "amrfinderplus_db": AMRFinderPlusDatabase, + }, parameters={ "organism": Str % Choices(organisms), "plus": Bool, @@ -1161,7 +1165,7 @@ ("feature_table", FeatureTable[Frequency]), ], input_descriptions={ - "mags": "MAGs to be annotated with AMRFinderPlus.", + "sequences": "MAGs to be annotated with AMRFinderPlus.", "amrfinderplus_db": "AMRFinderPlus Database.", }, parameter_descriptions={ From b97152ffcdf4157bfabda714e735a664d31afb2a Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Wed, 10 Jul 2024 16:04:41 +0200 Subject: [PATCH 27/50] added validation positive for emty files --- q2_amr/amrfinderplus/types/_format.py | 25 +++++++++++-------- .../tests/test_types_formats_transformers.py | 11 ++++++++ 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index 9c2e251..c74f5d3 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -71,7 +71,7 @@ def amr_dna_tab_path_maker(self): class ARMFinderPlusAnnotationFormat(model.TextFileFormat): - def _validate(self, n_records=None): + def _validate(self): header_coordinates = [ "Protein identifier", "Contig id", @@ -98,16 +98,19 @@ def _validate(self, n_records=None): "Hierarchy node", ] header = header_coordinates[:1] + header_coordinates[5:] - header_obs = pd.read_csv(str(self), sep="\t", nrows=0).columns.tolist() - if header != header_obs and header_coordinates != header_obs: - raise ValidationError( - "Header line does not match ARMFinderPlusAnnotation format. Must " - "consist of the following values: " - + ", ".join(header_coordinates) - + ".\nWhile Contig id, Start, Stop and Strand are optional." - + ".\n\nFound instead: " - + ", ".join(header_obs) - ) + try: + header_obs = pd.read_csv(str(self), sep="\t", nrows=0).columns.tolist() + if header != header_obs and header_coordinates != header_obs: + raise ValidationError( + "Header line does not match ARMFinderPlusAnnotation format. Must " + "consist of the following values: " + + ", ".join(header_coordinates) + + ".\nWhile Contig id, Start, Stop and Strand are optional." + + ".\n\nFound instead: " + + ", ".join(header_obs) + ) + except pd.errors.EmptyDataError: + pass def _validate_(self, level): self._validate() diff --git a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py index f0672d5..3164780 100644 --- a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py +++ b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py @@ -5,6 +5,9 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +import os +import tempfile + from qiime2.core.exceptions import ValidationError from qiime2.plugin.testing import TestPluginBase @@ -40,6 +43,14 @@ def test_amrfinderplus_annotation_format_validate_positive_coordinates(self): format = ARMFinderPlusAnnotationFormat(filepath, mode="r") format.validate() + def test_amrfinderplus_annotation_format_validate_positive_empty(self): + with tempfile.TemporaryDirectory() as temp_dir: + temp_file_path = os.path.join(temp_dir, "amr_annotations.tsv") + with open(temp_file_path, "w"): + pass + format = ARMFinderPlusAnnotationFormat(temp_file_path, mode="r") + format.validate() + def test_amrfinderplus_annotation_format_validation_error(self): with self.assertRaises(ValidationError) as context: path = self.get_data_path("annotation_wrong/amr_annotation.tsv") From fa019d504c472a95a5774a45337df28bd5dd9135 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Wed, 10 Jul 2024 16:24:58 +0200 Subject: [PATCH 28/50] fixed bug in mutations empty file creation --- q2_amr/amrfinderplus/mags.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/q2_amr/amrfinderplus/mags.py b/q2_amr/amrfinderplus/mags.py index a0000f2..05224f2 100644 --- a/q2_amr/amrfinderplus/mags.py +++ b/q2_amr/amrfinderplus/mags.py @@ -97,7 +97,12 @@ def annotate_sample_data_amrfinderplus( shutil.move(mutations_path, des_dir_mutations) else: with open( - os.path.join(str(mutations), os.path.basename(mutations_path)), "w" + os.path.join( + str(mutations), + des_dir_mutations, + os.path.basename(mutations_path), + ), + "w", ): pass From 86babac72bfd952b24a0e2bc670a2e66ebd22d3b Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Wed, 10 Jul 2024 16:27:16 +0200 Subject: [PATCH 29/50] fixed other bug in mutations empty file creation --- q2_amr/amrfinderplus/mags.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/q2_amr/amrfinderplus/mags.py b/q2_amr/amrfinderplus/mags.py index 05224f2..54e8719 100644 --- a/q2_amr/amrfinderplus/mags.py +++ b/q2_amr/amrfinderplus/mags.py @@ -97,11 +97,7 @@ def annotate_sample_data_amrfinderplus( shutil.move(mutations_path, des_dir_mutations) else: with open( - os.path.join( - str(mutations), - des_dir_mutations, - os.path.basename(mutations_path), - ), + os.path.join(des_dir_mutations, os.path.basename(mutations_path)), "w", ): pass From 783870d4ac80459cda05169238a9df7ac1ea84c5 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 11 Jul 2024 13:37:57 +0200 Subject: [PATCH 30/50] changed utils protein and nucleotide naming --- q2_amr/amrfinderplus/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/q2_amr/amrfinderplus/utils.py b/q2_amr/amrfinderplus/utils.py index 12785eb..ae43cb0 100644 --- a/q2_amr/amrfinderplus/utils.py +++ b/q2_amr/amrfinderplus/utils.py @@ -33,7 +33,8 @@ def run_amrfinderplus_n( "-n", dna_sequence, "--nucleotide_output", - f"{working_dir}/{mag_id if mag_id else sample_id}_amr_genes.fasta", + f"{working_dir}/\ + {mag_id + '_' if mag_id else sample_id + '_'}amr_genes.fasta", ] ) if protein_sequence: @@ -42,7 +43,7 @@ def run_amrfinderplus_n( "-p", protein_sequence, "--protein_output", - f"{working_dir}/{mag_id + '_' if mag_id else ''}amr_proteins.fasta", + f"{working_dir}/{sample_id}_amr_proteins.fasta", ] ) if gff: From 4b9c200eadc2746cd464b9de761c0f9cd48b4ac3 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 11 Jul 2024 13:39:17 +0200 Subject: [PATCH 31/50] changed utils protein and nucleotide naming 2 --- q2_amr/amrfinderplus/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/q2_amr/amrfinderplus/utils.py b/q2_amr/amrfinderplus/utils.py index ae43cb0..6105152 100644 --- a/q2_amr/amrfinderplus/utils.py +++ b/q2_amr/amrfinderplus/utils.py @@ -33,8 +33,7 @@ def run_amrfinderplus_n( "-n", dna_sequence, "--nucleotide_output", - f"{working_dir}/\ - {mag_id + '_' if mag_id else sample_id + '_'}amr_genes.fasta", + f"{working_dir}/{mag_id if mag_id else sample_id}_amr_genes.fasta", ] ) if protein_sequence: From 107c039b7c6b02e18e9caf3ed0b19030f6786b49 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 11 Jul 2024 13:44:19 +0200 Subject: [PATCH 32/50] Revert "changed utils protein and nucleotide naming" This reverts commit 783870d4ac80459cda05169238a9df7ac1ea84c5. --- q2_amr/amrfinderplus/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/q2_amr/amrfinderplus/utils.py b/q2_amr/amrfinderplus/utils.py index 6105152..12785eb 100644 --- a/q2_amr/amrfinderplus/utils.py +++ b/q2_amr/amrfinderplus/utils.py @@ -42,7 +42,7 @@ def run_amrfinderplus_n( "-p", protein_sequence, "--protein_output", - f"{working_dir}/{sample_id}_amr_proteins.fasta", + f"{working_dir}/{mag_id + '_' if mag_id else ''}amr_proteins.fasta", ] ) if gff: From 92c840046c479c9d44c3fa2d7b50017e000b4277 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 11 Jul 2024 13:44:25 +0200 Subject: [PATCH 33/50] Revert "changed utils protein and nucleotide naming 2" This reverts commit 4b9c200eadc2746cd464b9de761c0f9cd48b4ac3. --- q2_amr/amrfinderplus/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/q2_amr/amrfinderplus/utils.py b/q2_amr/amrfinderplus/utils.py index 12785eb..44df7f9 100644 --- a/q2_amr/amrfinderplus/utils.py +++ b/q2_amr/amrfinderplus/utils.py @@ -33,7 +33,8 @@ def run_amrfinderplus_n( "-n", dna_sequence, "--nucleotide_output", - f"{working_dir}/{mag_id if mag_id else sample_id}_amr_genes.fasta", + f"{working_dir}/\ + {mag_id + '_' if mag_id else sample_id + '_'}amr_genes.fasta", ] ) if protein_sequence: From 3813a2b081535e3d4bc880731fa5ba8624195976 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 11 Jul 2024 14:26:21 +0200 Subject: [PATCH 34/50] changed mag and samplename addition to main function --- q2_amr/amrfinderplus/mags.py | 42 ++++++++++++++++++----------------- q2_amr/amrfinderplus/utils.py | 11 ++++----- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/q2_amr/amrfinderplus/mags.py b/q2_amr/amrfinderplus/mags.py index 54e8719..ba849a1 100644 --- a/q2_amr/amrfinderplus/mags.py +++ b/q2_amr/amrfinderplus/mags.py @@ -53,15 +53,9 @@ def annotate_sample_data_amrfinderplus( index_value = manifest.query("filename == @file").index[0] sample_id = index_value[0] mag_id = index_value[1] - annotations_path = os.path.join(tmp, f"{mag_id}_amr_annotations.tsv") - mutations_path = os.path.join(tmp, f"{mag_id}_amr_mutations.tsv") - genes_path = os.path.join(tmp, f"{mag_id}_amr_genes.fasta") else: sample_id = os.path.splitext(os.path.basename(file))[0][:-8] mag_id = "" - annotations_path = os.path.join(tmp, "amr_annotations.tsv") - mutations_path = os.path.join(tmp, "amr_mutations.tsv") - genes_path = os.path.join(tmp, f"{sample_id}_amr_genes.fasta") # Run amrfinderplus run_amrfinderplus_n( @@ -77,13 +71,11 @@ def annotate_sample_data_amrfinderplus( coverage_min=coverage_min, translation_table=translation_table, threads=threads, - mag_id=mag_id, - sample_id=sample_id, ) # Create frequency dataframe and append it to list frequency_df = read_in_txt( - path=os.path.join(tmp, annotations_path), + path=os.path.join(tmp, "amr_annotations.tsv"), samp_bin_name=str(os.path.join(sample_id, mag_id)), data_type="mags", colname="Gene symbol", @@ -91,24 +83,34 @@ def annotate_sample_data_amrfinderplus( frequency_list.append(frequency_df) # Move mutations file. If it is not created, create an empty mutations file - des_dir_mutations = os.path.join(str(mutations), sample_id) - os.makedirs(des_dir_mutations, exist_ok=True) + des_path_mutations = os.path.join( + str(mutations), + sample_id, + f"{mag_id + '_' if mag_id else ''}amr_mutations.tsv", + ) + os.makedirs(os.path.dirname(des_path_mutations), exist_ok=True) if organism: - shutil.move(mutations_path, des_dir_mutations) + shutil.move(os.path.join(tmp, "amr_mutations.tsv"), des_path_mutations) else: - with open( - os.path.join(des_dir_mutations, os.path.basename(mutations_path)), - "w", - ): + with open(des_path_mutations, "w"): pass # Move annotations file - des_dir_annotations = os.path.join(str(annotations), sample_id) - os.makedirs(des_dir_annotations, exist_ok=True) - shutil.move(annotations_path, des_dir_annotations) + des_path_annotations = os.path.join( + str(annotations), + sample_id, + f"{mag_id + '_' if mag_id else ''}amr_annotations.tsv", + ) + os.makedirs(os.path.dirname(des_path_annotations), exist_ok=True) + shutil.move(os.path.join(tmp, "amr_annotations.tsv"), des_path_annotations) # Move genes file - shutil.move(genes_path, str(genes)) + shutil.move( + os.path.join(tmp, "amr_genes.fasta"), + os.path.join( + str(genes), f"{mag_id if mag_id else sample_id}_amr_genes.fasta" + ), + ) feature_table = create_count_table(df_list=frequency_list) return ( diff --git a/q2_amr/amrfinderplus/utils.py b/q2_amr/amrfinderplus/utils.py index 44df7f9..ee30541 100644 --- a/q2_amr/amrfinderplus/utils.py +++ b/q2_amr/amrfinderplus/utils.py @@ -16,15 +16,13 @@ def run_amrfinderplus_n( coverage_min, translation_table, threads, - mag_id, - sample_id, ): cmd = [ "amrfinder", "--database", str(amrfinderplus_db), "-o", - f"{working_dir}/{mag_id + '_' if mag_id else ''}amr_annotations.tsv", + f"{working_dir}/amr_annotations.tsv", "--print_node", ] if dna_sequence: @@ -33,8 +31,7 @@ def run_amrfinderplus_n( "-n", dna_sequence, "--nucleotide_output", - f"{working_dir}/\ - {mag_id + '_' if mag_id else sample_id + '_'}amr_genes.fasta", + f"{working_dir}/amr_genes.fasta", ] ) if protein_sequence: @@ -43,7 +40,7 @@ def run_amrfinderplus_n( "-p", protein_sequence, "--protein_output", - f"{working_dir}/{mag_id + '_' if mag_id else ''}amr_proteins.fasta", + f"{working_dir}/amr_proteins.fasta", ] ) if gff: @@ -56,7 +53,7 @@ def run_amrfinderplus_n( "--organism", organism, "--mutation_all", - f"{working_dir}/{mag_id + '_' if mag_id else ''}amr_mutations.tsv", + f"{working_dir}/amr_mutations.tsv", ] ) if plus: From 14a3b62b9486e236249b477f216bbdb49cc95258 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 12 Jul 2024 15:04:48 +0200 Subject: [PATCH 35/50] added tests for utils and sample data --- .../amrfinderplus/{mags.py => sample_data.py} | 0 q2_amr/amrfinderplus/tests/data/MANIFEST_mags | 2 + .../amrfinderplus/tests/test_sample_data.py | 101 ++++++++++++++++++ q2_amr/amrfinderplus/tests/test_utils.py | 91 ++++++++++++++++ q2_amr/plugin_setup.py | 9 +- 5 files changed, 199 insertions(+), 4 deletions(-) rename q2_amr/amrfinderplus/{mags.py => sample_data.py} (100%) create mode 100644 q2_amr/amrfinderplus/tests/data/MANIFEST_mags create mode 100644 q2_amr/amrfinderplus/tests/test_sample_data.py create mode 100644 q2_amr/amrfinderplus/tests/test_utils.py diff --git a/q2_amr/amrfinderplus/mags.py b/q2_amr/amrfinderplus/sample_data.py similarity index 100% rename from q2_amr/amrfinderplus/mags.py rename to q2_amr/amrfinderplus/sample_data.py diff --git a/q2_amr/amrfinderplus/tests/data/MANIFEST_mags b/q2_amr/amrfinderplus/tests/data/MANIFEST_mags new file mode 100644 index 0000000..675101b --- /dev/null +++ b/q2_amr/amrfinderplus/tests/data/MANIFEST_mags @@ -0,0 +1,2 @@ +sample-id,mag-id,filename +sample1,mag1,sample1/mag1.fasta diff --git a/q2_amr/amrfinderplus/tests/test_sample_data.py b/q2_amr/amrfinderplus/tests/test_sample_data.py new file mode 100644 index 0000000..4d1c07c --- /dev/null +++ b/q2_amr/amrfinderplus/tests/test_sample_data.py @@ -0,0 +1,101 @@ +import os +import shutil +from unittest.mock import MagicMock, patch + +from q2_types.per_sample_sequences import ContigSequencesDirFmt, MultiMAGSequencesDirFmt +from qiime2.plugin.testing import TestPluginBase + +from q2_amr.amrfinderplus.sample_data import annotate_sample_data_amrfinderplus +from q2_amr.amrfinderplus.types import AMRFinderPlusDatabaseDirFmt + + +class TestAnnotateMagsCard(TestPluginBase): + package = "q2_amr.amrfinderplus.tests" + + def mock_run_amrfinderplus_n( + self, + working_dir, + amrfinderplus_db, + dna_sequence, + protein_sequence, + gff, + organism, + plus, + report_all_equal, + ident_min, + coverage_min, + translation_table, + threads, + ): + with open(os.path.join(working_dir, "amr_annotations.tsv"), "w"): + pass + if organism: + with open(os.path.join(working_dir, "amr_mutations.tsv"), "w"): + pass + if dna_sequence: + with open(os.path.join(working_dir, "amr_genes.fasta"), "w"): + pass + + files_contigs = [ + "amr_annotations.tsv", + "amr_mutations.tsv", + "sample1_amr_genes.fasta", + ] + + files_mags = [ + "mag1_amr_annotations.tsv", + "mag1_amr_mutations.tsv", + "mag1_amr_genes.fasta", + ] + + def test_annotate_sample_data_amrfinderplus_mags(self): + manifest = self.get_data_path("MANIFEST_mags") + sequences = MultiMAGSequencesDirFmt() + shutil.copy(manifest, os.path.join(str(sequences), "MANIFEST")) + self._helper(sequences=sequences, organism=None, files=self.files_mags) + + def test_annotate_sample_data_amrfinderplus_mags_organism(self): + manifest = self.get_data_path("MANIFEST_mags") + sequences = MultiMAGSequencesDirFmt() + shutil.copy(manifest, os.path.join(str(sequences), "MANIFEST")) + self._helper(sequences, "Escherichia", files=self.files_mags) + + def test_annotate_sample_data_amrfinderplus_contigs(self): + sequences = ContigSequencesDirFmt() + with open(os.path.join(str(sequences), "sample1_contigs.fasta"), "w"): + pass + self._helper(sequences=sequences, organism=None, files=self.files_contigs) + + def test_annotate_sample_data_amrfinderplus_contigs_organism(self): + sequences = ContigSequencesDirFmt() + with open(os.path.join(str(sequences), "sample1_contigs.fasta"), "w"): + pass + self._helper( + sequences=sequences, organism="Escherichia", files=self.files_contigs + ) + + def _helper(self, sequences, organism, files): + amrfinderplus_db = AMRFinderPlusDatabaseDirFmt() + mock_create_count_table = MagicMock() + mock_read_in_txt = MagicMock() + with patch( + "q2_amr.amrfinderplus.sample_data.run_amrfinderplus_n", + side_effect=self.mock_run_amrfinderplus_n, + ), patch( + "q2_amr.amrfinderplus.sample_data.read_in_txt", mock_read_in_txt + ), patch( + "q2_amr.amrfinderplus.sample_data.create_count_table", + mock_create_count_table, + ): + result = annotate_sample_data_amrfinderplus( + sequences=sequences, + amrfinderplus_db=amrfinderplus_db, + organism=organism, + ) + self.assertTrue( + os.path.exists(os.path.join(str(result[0]), "sample1", files[0])) + ) + self.assertTrue( + os.path.exists(os.path.join(str(result[1]), "sample1", files[1])) + ) + self.assertTrue(os.path.exists(os.path.join(str(result[2]), files[2]))) diff --git a/q2_amr/amrfinderplus/tests/test_utils.py b/q2_amr/amrfinderplus/tests/test_utils.py new file mode 100644 index 0000000..f464e77 --- /dev/null +++ b/q2_amr/amrfinderplus/tests/test_utils.py @@ -0,0 +1,91 @@ +from unittest.mock import patch + +from qiime2.plugin.testing import TestPluginBase + +from q2_amr.amrfinderplus.utils import run_amrfinderplus_n + + +class TestAnnotateMagsCard(TestPluginBase): + package = "q2_amr.amrfinderplus.tests" + + @patch("q2_amr.amrfinderplus.utils.run_command") + def test_run_amrfinderplus_n(self, mock_run_command): + run_amrfinderplus_n( + working_dir="path_dir", + amrfinderplus_db="amrfinderplus_db", + dna_sequence="dna_sequence", + protein_sequence="protein_sequence", + gff="gff", + organism="Escherichia", + plus=True, + report_all_equal=True, + ident_min=1, + coverage_min=1, + translation_table="11", + threads=4, + ) + mock_run_command.assert_called_once_with( + [ + "amrfinder", + "--database", + "amrfinderplus_db", + "-o", + "path_dir/amr_annotations.tsv", + "--print_node", + "-n", + "dna_sequence", + "--nucleotide_output", + "path_dir/amr_genes.fasta", + "-p", + "protein_sequence", + "--protein_output", + "path_dir/amr_proteins.fasta", + "-g", + "gff", + "--threads", + "4", + "--organism", + "Escherichia", + "--mutation_all", + "path_dir/amr_mutations.tsv", + "--plus", + "--report_all_equal", + "--ident_min", + "1", + "--coverage_min", + "1", + "--translation_table", + "11", + ], + "path_dir", + verbose=True, + ) + + @patch("q2_amr.amrfinderplus.utils.run_command") + def test_run_amrfinderplus_n_minimal(self, mock_run_command): + run_amrfinderplus_n( + working_dir="path_dir", + amrfinderplus_db="amrfinderplus_db", + dna_sequence=None, + protein_sequence=None, + gff=None, + organism=None, + plus=False, + report_all_equal=False, + ident_min=None, + coverage_min=None, + translation_table=None, + threads=None, + ) + mock_run_command.assert_called_once_with( + [ + "amrfinder", + "--database", + "amrfinderplus_db", + "-o", + "path_dir/amr_annotations.tsv", + "--print_node", + ], + "path_dir", + verbose=True, + ) diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index cd11888..04b9aff 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -32,7 +32,7 @@ from qiime2.plugin import Citations, Plugin from q2_amr import __version__ -from q2_amr.amrfinderplus.mags import annotate_sample_data_amrfinderplus +from q2_amr.amrfinderplus.sample_data import annotate_sample_data_amrfinderplus from q2_amr.amrfinderplus.types._format import ( AMRFinderPlusDatabaseDirFmt, ARMFinderPlusAnnotationDirFmt, @@ -1165,7 +1165,7 @@ ("feature_table", FeatureTable[Frequency]), ], input_descriptions={ - "sequences": "MAGs to be annotated with AMRFinderPlus.", + "sequences": "MAGs or contigs to be annotated with AMRFinderPlus.", "amrfinderplus_db": "AMRFinderPlus Database.", }, parameter_descriptions={ @@ -1213,8 +1213,9 @@ "point mutations.", "feature_table": "Presence/Absence table of ARGs in all samples.", }, - name="Annotate MAGs with AMRFinderPlus.", - description="Annotate MAGs with antimicrobial resistance genes with AMRFinderPlus.", + name="Annotate MAGs or contigs with AMRFinderPlus.", + description="Annotate sample data MAGs or contigs with antimicrobial resistance " + "genes with AMRFinderPlus.", citations=[], ) From db37f811f5b4fafe5f53491b3880f6b697d23c36 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 12 Jul 2024 15:08:32 +0200 Subject: [PATCH 36/50] changed the way manifest is loaded --- q2_amr/amrfinderplus/tests/data/MANIFEST_mags | 2 -- q2_amr/amrfinderplus/tests/test_sample_data.py | 9 ++++----- 2 files changed, 4 insertions(+), 7 deletions(-) delete mode 100644 q2_amr/amrfinderplus/tests/data/MANIFEST_mags diff --git a/q2_amr/amrfinderplus/tests/data/MANIFEST_mags b/q2_amr/amrfinderplus/tests/data/MANIFEST_mags deleted file mode 100644 index 675101b..0000000 --- a/q2_amr/amrfinderplus/tests/data/MANIFEST_mags +++ /dev/null @@ -1,2 +0,0 @@ -sample-id,mag-id,filename -sample1,mag1,sample1/mag1.fasta diff --git a/q2_amr/amrfinderplus/tests/test_sample_data.py b/q2_amr/amrfinderplus/tests/test_sample_data.py index 4d1c07c..0b8e701 100644 --- a/q2_amr/amrfinderplus/tests/test_sample_data.py +++ b/q2_amr/amrfinderplus/tests/test_sample_data.py @@ -1,5 +1,4 @@ import os -import shutil from unittest.mock import MagicMock, patch from q2_types.per_sample_sequences import ContigSequencesDirFmt, MultiMAGSequencesDirFmt @@ -49,15 +48,15 @@ def mock_run_amrfinderplus_n( ] def test_annotate_sample_data_amrfinderplus_mags(self): - manifest = self.get_data_path("MANIFEST_mags") sequences = MultiMAGSequencesDirFmt() - shutil.copy(manifest, os.path.join(str(sequences), "MANIFEST")) + with open(os.path.join(str(sequences), "MANIFEST"), "w") as file: + file.write("sample-id,mag-id,filename\nsample1,mag1,sample1/mag1.fasta\n") self._helper(sequences=sequences, organism=None, files=self.files_mags) def test_annotate_sample_data_amrfinderplus_mags_organism(self): - manifest = self.get_data_path("MANIFEST_mags") sequences = MultiMAGSequencesDirFmt() - shutil.copy(manifest, os.path.join(str(sequences), "MANIFEST")) + with open(os.path.join(str(sequences), "MANIFEST"), "w") as file: + file.write("sample-id,mag-id,filename\nsample1,mag1,sample1/mag1.fasta\n") self._helper(sequences, "Escherichia", files=self.files_mags) def test_annotate_sample_data_amrfinderplus_contigs(self): From a0343d940caa01bb6f267afc052a9981dcad17e1 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 16 Jul 2024 15:29:18 +0200 Subject: [PATCH 37/50] added database_format_version --- q2_amr/amrfinderplus/types/_format.py | 1 + .../types/tests/data/database/database_format_version.txt | 0 2 files changed, 1 insertion(+) create mode 100644 q2_amr/amrfinderplus/types/tests/data/database/database_format_version.txt diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index ba03052..c8feef3 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -33,6 +33,7 @@ class AMRFinderPlusDatabaseDirFmt(model.DirectoryFormat): fam = model.File("fam.tab", format=TextFormat) taxgroup = model.File("taxgroup.tab", format=TextFormat) version = model.File("version.txt", format=TextFormat) + db_fmt_version = model.File("database_format_version.txt", format=TextFormat) amr_dna = model.FileCollection( r"^AMR_DNA-[a-zA-Z_]+$", format=MixedCaseDNAFASTAFormat ) diff --git a/q2_amr/amrfinderplus/types/tests/data/database/database_format_version.txt b/q2_amr/amrfinderplus/types/tests/data/database/database_format_version.txt new file mode 100644 index 0000000..e69de29 From 06e809a081d413f7084be3a34b86b166e4a78eb0 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 16 Jul 2024 15:56:24 +0200 Subject: [PATCH 38/50] added cureated_indet as parameter --- q2_amr/amrfinderplus/sample_data.py | 1 + q2_amr/amrfinderplus/tests/test_utils.py | 4 ++++ q2_amr/amrfinderplus/utils.py | 5 ++++- q2_amr/plugin_setup.py | 11 +++++++---- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/q2_amr/amrfinderplus/sample_data.py b/q2_amr/amrfinderplus/sample_data.py index f63adb3..ffef175 100644 --- a/q2_amr/amrfinderplus/sample_data.py +++ b/q2_amr/amrfinderplus/sample_data.py @@ -22,6 +22,7 @@ def annotate_sample_data_amrfinderplus( plus: bool = False, report_all_equal: bool = False, ident_min: float = None, + curated_ident: bool = False, coverage_min: float = 0.5, translation_table: str = "11", threads: int = None, diff --git a/q2_amr/amrfinderplus/tests/test_utils.py b/q2_amr/amrfinderplus/tests/test_utils.py index f464e77..efe9e05 100644 --- a/q2_amr/amrfinderplus/tests/test_utils.py +++ b/q2_amr/amrfinderplus/tests/test_utils.py @@ -20,6 +20,7 @@ def test_run_amrfinderplus_n(self, mock_run_command): plus=True, report_all_equal=True, ident_min=1, + curated_ident=False, coverage_min=1, translation_table="11", threads=4, @@ -73,6 +74,7 @@ def test_run_amrfinderplus_n_minimal(self, mock_run_command): plus=False, report_all_equal=False, ident_min=None, + curated_ident=True, coverage_min=None, translation_table=None, threads=None, @@ -85,6 +87,8 @@ def test_run_amrfinderplus_n_minimal(self, mock_run_command): "-o", "path_dir/amr_annotations.tsv", "--print_node", + "--ident_min", + "-1", ], "path_dir", verbose=True, diff --git a/q2_amr/amrfinderplus/utils.py b/q2_amr/amrfinderplus/utils.py index ee30541..8141761 100644 --- a/q2_amr/amrfinderplus/utils.py +++ b/q2_amr/amrfinderplus/utils.py @@ -13,6 +13,7 @@ def run_amrfinderplus_n( plus, report_all_equal, ident_min, + curated_ident, coverage_min, translation_table, threads, @@ -60,8 +61,10 @@ def run_amrfinderplus_n( cmd.append("--plus") if report_all_equal: cmd.append("--report_all_equal") - if ident_min: + if ident_min and not curated_ident: cmd.extend(["--ident_min", str(ident_min)]) + if curated_ident: + cmd.extend(["--ident_min", "-1"]) if coverage_min: cmd.extend(["--coverage_min", str(coverage_min)]) if translation_table: diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index 08ec28f..1838f44 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -1153,7 +1153,8 @@ "organism": Str % Choices(organisms), "plus": Bool, "report_all_equal": Bool, - "ident_min": Float % Range(-1, 1, inclusive_start=True, inclusive_end=True), + "ident_min": Float % Range(0, 1, inclusive_start=True, inclusive_end=True), + "curated_ident": Bool, "coverage_min": Float % Range(0, 1, inclusive_start=True, inclusive_end=True), "translation_table": Str % Choices(translation_tables), "threads": Int % Range(0, None, inclusive_start=False), @@ -1180,11 +1181,13 @@ "and Name of closest sequence will be different showing " "each of the database proteins that are equally close to " "the query sequence.", - "ident_min": "Minimum identity for a blast-based hit hit (Methods BLAST or " - "PARTIAL). -1 means use the curated threshold if it exists and " - "0.9 otherwise. Setting this value to something other than -1 " + "ident_min": "Minimum identity for a blast-based hit (Methods BLAST or " + "PARTIAL). Setting this value to something other than -1 " "will override curated similarity cutoffs. We only recommend " "using this option if you have a specific reason.", + "curated_ident": "Use the curated threshold for a blast-based hit, if it " + "exists and 0.9 otherwise. This will overwrite the value specified with the " + "'ident_min' parameter", "coverage_min": "Minimum proportion of reference gene covered for a " "BLAST-based hit (Methods BLAST or PARTIAL).", "translation_table": "Translation table used for BLASTX.", From bff959573f269b8738efb3961825fe6bf1742f2d Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 16 Jul 2024 15:58:25 +0200 Subject: [PATCH 39/50] bugfix missing parameter --- q2_amr/amrfinderplus/sample_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/q2_amr/amrfinderplus/sample_data.py b/q2_amr/amrfinderplus/sample_data.py index ffef175..317cbc4 100644 --- a/q2_amr/amrfinderplus/sample_data.py +++ b/q2_amr/amrfinderplus/sample_data.py @@ -69,6 +69,7 @@ def annotate_sample_data_amrfinderplus( plus=plus, report_all_equal=report_all_equal, ident_min=ident_min, + curated_ident=curated_ident, coverage_min=coverage_min, translation_table=translation_table, threads=threads, From c9a12a9985b33ffdaafe76bf84190d10012f835b Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 16 Jul 2024 16:14:56 +0200 Subject: [PATCH 40/50] bug parameter added in mocked function --- q2_amr/amrfinderplus/tests/test_sample_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/q2_amr/amrfinderplus/tests/test_sample_data.py b/q2_amr/amrfinderplus/tests/test_sample_data.py index 0b8e701..f925a2e 100644 --- a/q2_amr/amrfinderplus/tests/test_sample_data.py +++ b/q2_amr/amrfinderplus/tests/test_sample_data.py @@ -22,6 +22,7 @@ def mock_run_amrfinderplus_n( plus, report_all_equal, ident_min, + curated_ident, coverage_min, translation_table, threads, From 1d94b24c3fce05f6c5e80c2c1c40ec71562f6d8d Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 16 Jul 2024 17:17:10 +0200 Subject: [PATCH 41/50] renaming tests --- q2_amr/amrfinderplus/tests/test_sample_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/q2_amr/amrfinderplus/tests/test_sample_data.py b/q2_amr/amrfinderplus/tests/test_sample_data.py index f925a2e..fdc901c 100644 --- a/q2_amr/amrfinderplus/tests/test_sample_data.py +++ b/q2_amr/amrfinderplus/tests/test_sample_data.py @@ -8,7 +8,7 @@ from q2_amr.amrfinderplus.types import AMRFinderPlusDatabaseDirFmt -class TestAnnotateMagsCard(TestPluginBase): +class TestAnnotateSampleDataAMRFinderPlus(TestPluginBase): package = "q2_amr.amrfinderplus.tests" def mock_run_amrfinderplus_n( From f642503773061556e2a1cb357e85dd11a3228b31 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Wed, 17 Jul 2024 11:43:50 +0200 Subject: [PATCH 42/50] chnages after review --- q2_amr/amrfinderplus/sample_data.py | 8 ++++---- q2_amr/amrfinderplus/tests/test_sample_data.py | 6 +++--- q2_amr/amrfinderplus/tests/test_utils.py | 8 ++++---- q2_amr/amrfinderplus/utils.py | 16 ++++++++++------ q2_amr/card/utils.py | 2 +- 5 files changed, 22 insertions(+), 18 deletions(-) diff --git a/q2_amr/amrfinderplus/sample_data.py b/q2_amr/amrfinderplus/sample_data.py index 317cbc4..6a5265e 100644 --- a/q2_amr/amrfinderplus/sample_data.py +++ b/q2_amr/amrfinderplus/sample_data.py @@ -47,9 +47,9 @@ def annotate_sample_data_amrfinderplus( ] with tempfile.TemporaryDirectory() as tmp: - # Iterate over paths of mags or contigs + # Iterate over paths of MAGs or contigs for file in files: - # Set sample and mag ids and output file pats for mag or contig + # Set sample and MAG IDs if isinstance(sequences, MultiMAGSequencesDirFmt): index_value = manifest.query("filename == @file").index[0] sample_id = index_value[0] @@ -62,8 +62,8 @@ def annotate_sample_data_amrfinderplus( run_amrfinderplus_n( working_dir=tmp, amrfinderplus_db=amrfinderplus_db, - dna_sequence=file, - protein_sequence=None, + dna_sequences=file, + protein_sequences=None, gff=None, organism=organism, plus=plus, diff --git a/q2_amr/amrfinderplus/tests/test_sample_data.py b/q2_amr/amrfinderplus/tests/test_sample_data.py index fdc901c..9f705af 100644 --- a/q2_amr/amrfinderplus/tests/test_sample_data.py +++ b/q2_amr/amrfinderplus/tests/test_sample_data.py @@ -15,8 +15,8 @@ def mock_run_amrfinderplus_n( self, working_dir, amrfinderplus_db, - dna_sequence, - protein_sequence, + dna_sequences, + protein_sequences, gff, organism, plus, @@ -32,7 +32,7 @@ def mock_run_amrfinderplus_n( if organism: with open(os.path.join(working_dir, "amr_mutations.tsv"), "w"): pass - if dna_sequence: + if dna_sequences: with open(os.path.join(working_dir, "amr_genes.fasta"), "w"): pass diff --git a/q2_amr/amrfinderplus/tests/test_utils.py b/q2_amr/amrfinderplus/tests/test_utils.py index efe9e05..3e5bd8a 100644 --- a/q2_amr/amrfinderplus/tests/test_utils.py +++ b/q2_amr/amrfinderplus/tests/test_utils.py @@ -13,8 +13,8 @@ def test_run_amrfinderplus_n(self, mock_run_command): run_amrfinderplus_n( working_dir="path_dir", amrfinderplus_db="amrfinderplus_db", - dna_sequence="dna_sequence", - protein_sequence="protein_sequence", + dna_sequences="dna_sequence", + protein_sequences="protein_sequence", gff="gff", organism="Escherichia", plus=True, @@ -67,8 +67,8 @@ def test_run_amrfinderplus_n_minimal(self, mock_run_command): run_amrfinderplus_n( working_dir="path_dir", amrfinderplus_db="amrfinderplus_db", - dna_sequence=None, - protein_sequence=None, + dna_sequences=None, + protein_sequences=None, gff=None, organism=None, plus=False, diff --git a/q2_amr/amrfinderplus/utils.py b/q2_amr/amrfinderplus/utils.py index 8141761..199957c 100644 --- a/q2_amr/amrfinderplus/utils.py +++ b/q2_amr/amrfinderplus/utils.py @@ -6,8 +6,8 @@ def run_amrfinderplus_n( working_dir, amrfinderplus_db, - dna_sequence, - protein_sequence, + dna_sequences, + protein_sequences, gff, organism, plus, @@ -26,20 +26,22 @@ def run_amrfinderplus_n( f"{working_dir}/amr_annotations.tsv", "--print_node", ] - if dna_sequence: + # Creates nucleotide fasta output if DNA sequences are given as input + if dna_sequences: cmd.extend( [ "-n", - dna_sequence, + dna_sequences, "--nucleotide_output", f"{working_dir}/amr_genes.fasta", ] ) - if protein_sequence: + # Creates protein fasta output if protein sequences are given as input + if protein_sequences: cmd.extend( [ "-p", - protein_sequence, + protein_sequences, "--protein_output", f"{working_dir}/amr_proteins.fasta", ] @@ -48,6 +50,7 @@ def run_amrfinderplus_n( cmd.extend(["-g", gff]) if threads: cmd.extend(["--threads", str(threads)]) + # Creates all mutations output if an organism is specified if organism: cmd.extend( [ @@ -61,6 +64,7 @@ def run_amrfinderplus_n( cmd.append("--plus") if report_all_equal: cmd.append("--report_all_equal") + # If curated_ident is True, it will overwrite the value specified with ident_min if ident_min and not curated_ident: cmd.extend(["--ident_min", str(ident_min)]) if curated_ident: diff --git a/q2_amr/card/utils.py b/q2_amr/card/utils.py index eb98907..129bb7a 100644 --- a/q2_amr/card/utils.py +++ b/q2_amr/card/utils.py @@ -99,7 +99,7 @@ def read_in_txt(path: str, samp_bin_name: str, data_type: str, colname: str): # Read in txt file to pd.Dataframe df = pd.read_csv(path, sep="\t") - # Process the df depending on the data type and mapping type + # Process the df depending on the data type if data_type == "reads": df = df[[colname, "All Mapped Reads"]] df.rename(columns={"All Mapped Reads": samp_bin_name}, inplace=True) From e4c6bfde731777e6d2a585d5472e44dd8960339c Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Wed, 17 Jul 2024 11:47:22 +0200 Subject: [PATCH 43/50] added s in utils sequneces --- q2_amr/amrfinderplus/tests/test_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/q2_amr/amrfinderplus/tests/test_utils.py b/q2_amr/amrfinderplus/tests/test_utils.py index 3e5bd8a..1e7ae59 100644 --- a/q2_amr/amrfinderplus/tests/test_utils.py +++ b/q2_amr/amrfinderplus/tests/test_utils.py @@ -13,8 +13,8 @@ def test_run_amrfinderplus_n(self, mock_run_command): run_amrfinderplus_n( working_dir="path_dir", amrfinderplus_db="amrfinderplus_db", - dna_sequences="dna_sequence", - protein_sequences="protein_sequence", + dna_sequences="dna_sequences", + protein_sequences="protein_sequences", gff="gff", organism="Escherichia", plus=True, @@ -34,11 +34,11 @@ def test_run_amrfinderplus_n(self, mock_run_command): "path_dir/amr_annotations.tsv", "--print_node", "-n", - "dna_sequence", + "dna_sequences", "--nucleotide_output", "path_dir/amr_genes.fasta", "-p", - "protein_sequence", + "protein_sequences", "--protein_output", "path_dir/amr_proteins.fasta", "-g", From 7a05e7087f9bd513e38003bf61d353c18f6bcbbc Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Wed, 17 Jul 2024 12:27:04 +0200 Subject: [PATCH 44/50] changed plugin setup description --- q2_amr/plugin_setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index 134a42b..ea20a55 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -1212,7 +1212,7 @@ "fail. Using more than 4 threads may speed up searches.", }, output_descriptions={ - "annotations": "AMR annotation as .txt and .json file.", + "annotations": "Annotated AMR genes and mutations.", "mutations": "Report of genotypes at all locations screened for point " "mutations. These files allow you to distinguish between called " "point mutations that were the sensitive variant and the point " From 57e20da3fef3461a53e1cf09fffa1602096c3589 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 23 Jul 2024 11:13:47 +0200 Subject: [PATCH 45/50] removed AMRFinderplusannotation type --- q2_amr/amrfinderplus/types/__init__.py | 2 -- q2_amr/amrfinderplus/types/_format.py | 20 ++++++---------- q2_amr/amrfinderplus/types/_type.py | 6 ++--- .../tests/test_types_formats_transformers.py | 23 +++++++++++++++---- q2_amr/plugin_setup.py | 8 ++----- 5 files changed, 29 insertions(+), 30 deletions(-) diff --git a/q2_amr/amrfinderplus/types/__init__.py b/q2_amr/amrfinderplus/types/__init__.py index c84d73b..8dfb549 100644 --- a/q2_amr/amrfinderplus/types/__init__.py +++ b/q2_amr/amrfinderplus/types/__init__.py @@ -6,7 +6,6 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- from q2_amr.amrfinderplus.types._format import ( - AMRFinderPlusAnnotationDirFmt, AMRFinderPlusAnnotationFormat, AMRFinderPlusAnnotationsDirFmt, AMRFinderPlusDatabaseDirFmt, @@ -18,7 +17,6 @@ "AMRFinderPlusDatabaseDirFmt", "AMRFinderPlusAnnotationFormat", "AMRFinderPlusAnnotationsDirFmt", - "AMRFinderPlusAnnotationDirFmt", "TextFormat", "BinaryFormat", ] diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index c8feef3..390d16f 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -5,6 +5,8 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +import os + import pandas as pd from q2_types.feature_data import MixedCaseDNAFASTAFormat, ProteinFASTAFormat from q2_types.per_sample_sequences._format import MultiDirValidationMixin @@ -110,18 +112,10 @@ def _validate_(self, level): class AMRFinderPlusAnnotationsDirFmt(MultiDirValidationMixin, model.DirectoryFormat): - annotation = model.FileCollection( - r".*amr_(annotations|mutations)\.tsv$", format=AMRFinderPlusAnnotationFormat + annotations = model.FileCollection( + r".*amr_(annotations|all_mutations)\.tsv$", format=AMRFinderPlusAnnotationFormat ) - @annotation.set_path_maker - def annotation_path_maker(self, sample_id, mag_id): - prefix = f"{sample_id}/{mag_id}_" if mag_id else f"{sample_id}/" - return f"{prefix}amr_annotations.tsv" - - -AMRFinderPlusAnnotationDirFmt = model.SingleFileDirectoryFormat( - "AMRFinderPlusAnnotationDirFmt", - r"amr_(annotations|mutations)\.tsv$", - AMRFinderPlusAnnotationFormat, -) + @annotations.set_path_maker + def annotations_path_maker(self, name, id, dir_name=""): + return os.path.join(dir_name, f"{id}_amr_{name}.tsv") diff --git a/q2_amr/amrfinderplus/types/_type.py b/q2_amr/amrfinderplus/types/_type.py index 13d0e90..5cc2f5e 100644 --- a/q2_amr/amrfinderplus/types/_type.py +++ b/q2_amr/amrfinderplus/types/_type.py @@ -11,8 +11,6 @@ AMRFinderPlusDatabase = SemanticType("AMRFinderPlusDatabase") AMRFinderPlusAnnotations = SemanticType( - "AMRFinderPlusAnnotations", variant_of=SampleData.field["type"] -) -AMRFinderPlusAnnotation = SemanticType( - "AMRFinderPlusAnnotation", variant_of=FeatureData.field["type"] + "AMRFinderPlusAnnotations", + variant_of=[SampleData.field["type"], FeatureData.field["type"]], ) diff --git a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py index 2b2ea6f..d905785 100644 --- a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py +++ b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py @@ -12,7 +12,6 @@ from qiime2.plugin.testing import TestPluginBase from q2_amr.amrfinderplus.types._format import ( - AMRFinderPlusAnnotationDirFmt, AMRFinderPlusAnnotationFormat, AMRFinderPlusAnnotationsDirFmt, AMRFinderPlusDatabaseDirFmt, @@ -92,14 +91,28 @@ def test_amrfinderplus_annotation_format_validation_error(self): self.assertEqual(str(context.exception), expected_message) - def test_amrfinderplus_annotation_directory_format(self): + def test_amrfinderplus_annotations_dir_fmt_feature(self): dirpath = self.get_data_path( "annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d" ) - annotations = AMRFinderPlusAnnotationDirFmt(dirpath, mode="r") - assert isinstance(annotations, AMRFinderPlusAnnotationDirFmt) + annotations = AMRFinderPlusAnnotationsDirFmt(dirpath, mode="r") + assert isinstance(annotations, AMRFinderPlusAnnotationsDirFmt) - def test_amrfinderplus_annotations_directory_format(self): + def test_amrfinderplus_annotations_dir_fmt_sample(self): dirpath = self.get_data_path("annotation") annotations = AMRFinderPlusAnnotationsDirFmt(dirpath, mode="r") assert isinstance(annotations, AMRFinderPlusAnnotationsDirFmt) + + def test_amrfinderplus_annotations_dir_fmt_path_maker_dir_name(self): + fmt = AMRFinderPlusAnnotationsDirFmt() + path = fmt.annotations_path_maker( + name="annotations", id="id", dir_name="dir_name" + ) + self.assertEqual( + str(path), os.path.join(str(fmt), "dir_name/id_amr_annotations.tsv") + ) + + def test_amrfinderplus_annotations_dir_fmt_path_maker(self): + fmt = AMRFinderPlusAnnotationsDirFmt() + path = fmt.annotations_path_maker(name="annotations", id="id") + self.assertEqual(str(path), os.path.join(str(fmt), "id_amr_annotations.tsv")) diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index ea20a55..27dfb4e 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -35,7 +35,6 @@ from q2_amr.amrfinderplus.database import fetch_amrfinderplus_db from q2_amr.amrfinderplus.sample_data import annotate_sample_data_amrfinderplus from q2_amr.amrfinderplus.types._format import ( - AMRFinderPlusAnnotationDirFmt, AMRFinderPlusAnnotationFormat, AMRFinderPlusAnnotationsDirFmt, AMRFinderPlusDatabaseDirFmt, @@ -43,7 +42,6 @@ TextFormat, ) from q2_amr.amrfinderplus.types._type import ( - AMRFinderPlusAnnotation, AMRFinderPlusAnnotations, AMRFinderPlusDatabase, ) @@ -1249,7 +1247,6 @@ CARDMAGsKmerAnalysis, AMRFinderPlusDatabase, AMRFinderPlusAnnotations, - AMRFinderPlusAnnotation, ) plugin.register_semantic_type_to_format( @@ -1289,8 +1286,8 @@ artifact_format=AMRFinderPlusAnnotationsDirFmt, ) plugin.register_semantic_type_to_format( - FeatureData[AMRFinderPlusAnnotation], - artifact_format=AMRFinderPlusAnnotationDirFmt, + FeatureData[AMRFinderPlusAnnotations], + artifact_format=AMRFinderPlusAnnotationsDirFmt, ) plugin.register_formats( CARDKmerDatabaseDirectoryFormat, @@ -1321,7 +1318,6 @@ BinaryFormat, AMRFinderPlusAnnotationFormat, AMRFinderPlusAnnotationsDirFmt, - AMRFinderPlusAnnotationDirFmt, ) importlib.import_module("q2_amr.card.types._transformer") From 16b413fd6575c498dfaf7a8337c12d1f22b76388 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 23 Jul 2024 11:37:44 +0200 Subject: [PATCH 46/50] removed multidirvalidation mixing --- q2_amr/amrfinderplus/types/_format.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index 390d16f..bcda9e8 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -9,7 +9,6 @@ import pandas as pd from q2_types.feature_data import MixedCaseDNAFASTAFormat, ProteinFASTAFormat -from q2_types.per_sample_sequences._format import MultiDirValidationMixin from qiime2.core.exceptions import ValidationError from qiime2.plugin import model @@ -111,7 +110,7 @@ def _validate_(self, level): self._validate() -class AMRFinderPlusAnnotationsDirFmt(MultiDirValidationMixin, model.DirectoryFormat): +class AMRFinderPlusAnnotationsDirFmt(model.DirectoryFormat): annotations = model.FileCollection( r".*amr_(annotations|all_mutations)\.tsv$", format=AMRFinderPlusAnnotationFormat ) From 869e03c5bedb15e0a61e1687797299584965df15 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 23 Jul 2024 15:43:10 +0200 Subject: [PATCH 47/50] chnages in annotation action --- q2_amr/amrfinderplus/sample_data.py | 24 ++++++++++--------- .../amrfinderplus/tests/test_sample_data.py | 6 ++--- q2_amr/amrfinderplus/tests/test_utils.py | 2 +- q2_amr/amrfinderplus/utils.py | 2 +- q2_amr/plugin_setup.py | 14 +++++------ 5 files changed, 25 insertions(+), 23 deletions(-) diff --git a/q2_amr/amrfinderplus/sample_data.py b/q2_amr/amrfinderplus/sample_data.py index 6a5265e..9a7c4fa 100644 --- a/q2_amr/amrfinderplus/sample_data.py +++ b/q2_amr/amrfinderplus/sample_data.py @@ -32,9 +32,9 @@ def annotate_sample_data_amrfinderplus( GenesDirectoryFormat, pd.DataFrame, ): - annotations = AMRFinderPlusAnnotationsDirFmt() - mutations = AMRFinderPlusAnnotationsDirFmt() - genes = GenesDirectoryFormat() + amr_annotations = AMRFinderPlusAnnotationsDirFmt() + amr_all_mutations = AMRFinderPlusAnnotationsDirFmt() + amr_genes = GenesDirectoryFormat() frequency_list = [] # Create list of paths to all mags or contigs @@ -86,20 +86,22 @@ def annotate_sample_data_amrfinderplus( # Move mutations file. If it is not created, create an empty mutations file des_path_mutations = os.path.join( - str(mutations), + str(amr_all_mutations), sample_id, - f"{mag_id + '_' if mag_id else ''}amr_mutations.tsv", + f"{mag_id + '_' if mag_id else ''}amr_all_mutations.tsv", ) os.makedirs(os.path.dirname(des_path_mutations), exist_ok=True) if organism: - shutil.move(os.path.join(tmp, "amr_mutations.tsv"), des_path_mutations) + shutil.move( + os.path.join(tmp, "amr_all_mutations.tsv"), des_path_mutations + ) else: with open(des_path_mutations, "w"): pass # Move annotations file des_path_annotations = os.path.join( - str(annotations), + str(amr_annotations), sample_id, f"{mag_id + '_' if mag_id else ''}amr_annotations.tsv", ) @@ -110,14 +112,14 @@ def annotate_sample_data_amrfinderplus( shutil.move( os.path.join(tmp, "amr_genes.fasta"), os.path.join( - str(genes), f"{mag_id if mag_id else sample_id}_amr_genes.fasta" + str(amr_genes), f"{mag_id if mag_id else sample_id}_amr_genes.fasta" ), ) feature_table = create_count_table(df_list=frequency_list) return ( - annotations, - mutations, - genes, + amr_annotations, + amr_all_mutations, + amr_genes, feature_table, ) diff --git a/q2_amr/amrfinderplus/tests/test_sample_data.py b/q2_amr/amrfinderplus/tests/test_sample_data.py index 9f705af..8c2ff05 100644 --- a/q2_amr/amrfinderplus/tests/test_sample_data.py +++ b/q2_amr/amrfinderplus/tests/test_sample_data.py @@ -30,7 +30,7 @@ def mock_run_amrfinderplus_n( with open(os.path.join(working_dir, "amr_annotations.tsv"), "w"): pass if organism: - with open(os.path.join(working_dir, "amr_mutations.tsv"), "w"): + with open(os.path.join(working_dir, "amr_all_mutations.tsv"), "w"): pass if dna_sequences: with open(os.path.join(working_dir, "amr_genes.fasta"), "w"): @@ -38,13 +38,13 @@ def mock_run_amrfinderplus_n( files_contigs = [ "amr_annotations.tsv", - "amr_mutations.tsv", + "amr_all_mutations.tsv", "sample1_amr_genes.fasta", ] files_mags = [ "mag1_amr_annotations.tsv", - "mag1_amr_mutations.tsv", + "mag1_amr_all_mutations.tsv", "mag1_amr_genes.fasta", ] diff --git a/q2_amr/amrfinderplus/tests/test_utils.py b/q2_amr/amrfinderplus/tests/test_utils.py index 1e7ae59..4b7f436 100644 --- a/q2_amr/amrfinderplus/tests/test_utils.py +++ b/q2_amr/amrfinderplus/tests/test_utils.py @@ -48,7 +48,7 @@ def test_run_amrfinderplus_n(self, mock_run_command): "--organism", "Escherichia", "--mutation_all", - "path_dir/amr_mutations.tsv", + "path_dir/amr_all_mutations.tsv", "--plus", "--report_all_equal", "--ident_min", diff --git a/q2_amr/amrfinderplus/utils.py b/q2_amr/amrfinderplus/utils.py index 199957c..793ee37 100644 --- a/q2_amr/amrfinderplus/utils.py +++ b/q2_amr/amrfinderplus/utils.py @@ -57,7 +57,7 @@ def run_amrfinderplus_n( "--organism", organism, "--mutation_all", - f"{working_dir}/amr_mutations.tsv", + f"{working_dir}/amr_all_mutations.tsv", ] ) if plus: diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index 27dfb4e..703cecb 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -1173,9 +1173,9 @@ "threads": Int % Range(0, None, inclusive_start=False), }, outputs=[ - ("annotations", SampleData[AMRFinderPlusAnnotations]), - ("mutations", SampleData[AMRFinderPlusAnnotations]), - ("genes", GenomeData[Genes]), + ("amr_annotations", SampleData[AMRFinderPlusAnnotations]), + ("amr_all_mutations", SampleData[AMRFinderPlusAnnotations]), + ("amr_genes", GenomeData[Genes]), ("feature_table", FeatureTable[Frequency]), ], input_descriptions={ @@ -1210,8 +1210,8 @@ "fail. Using more than 4 threads may speed up searches.", }, output_descriptions={ - "annotations": "Annotated AMR genes and mutations.", - "mutations": "Report of genotypes at all locations screened for point " + "amr_annotations": "Annotated AMR genes and mutations.", + "amr_all_mutations": "Report of genotypes at all locations screened for point " "mutations. These files allow you to distinguish between called " "point mutations that were the sensitive variant and the point " "mutations that could not be called because the sequence was not " @@ -1224,8 +1224,8 @@ "'Gene symbols' from known point-mutation sites have gene symbols " "that match the Pathogen Detection Reference Gene Catalog " "standardized nomenclature for point mutations.", - "genes": "Sequences that were identified by AMRFinderPlus as AMR genes. This " - "will include the entire region that aligns to the references for " + "amr_genes": "Sequences that were identified by AMRFinderPlus as AMR genes. " + "This will include the entire region that aligns to the references for " "point mutations.", "feature_table": "Presence/Absence table of ARGs in all samples.", }, From e3f2abbf473348d5d4ab6155d5ebbe96668a7f4c Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 23 Jul 2024 16:59:03 +0200 Subject: [PATCH 48/50] added three new parameters --- q2_amr/amrfinderplus/sample_data.py | 5 ++ .../amrfinderplus/tests/test_sample_data.py | 3 + q2_amr/amrfinderplus/tests/test_utils.py | 56 +++++++++++++++++++ q2_amr/amrfinderplus/utils.py | 16 ++++++ q2_amr/plugin_setup.py | 5 ++ 5 files changed, 85 insertions(+) diff --git a/q2_amr/amrfinderplus/sample_data.py b/q2_amr/amrfinderplus/sample_data.py index 9a7c4fa..06de759 100644 --- a/q2_amr/amrfinderplus/sample_data.py +++ b/q2_amr/amrfinderplus/sample_data.py @@ -25,6 +25,8 @@ def annotate_sample_data_amrfinderplus( curated_ident: bool = False, coverage_min: float = 0.5, translation_table: str = "11", + report_common: bool = False, + gpipe_org: bool = False, threads: int = None, ) -> ( AMRFinderPlusAnnotationsDirFmt, @@ -72,6 +74,9 @@ def annotate_sample_data_amrfinderplus( curated_ident=curated_ident, coverage_min=coverage_min, translation_table=translation_table, + annotation_format=None, + report_common=report_common, + gpipe_org=gpipe_org, threads=threads, ) diff --git a/q2_amr/amrfinderplus/tests/test_sample_data.py b/q2_amr/amrfinderplus/tests/test_sample_data.py index 8c2ff05..9d4557b 100644 --- a/q2_amr/amrfinderplus/tests/test_sample_data.py +++ b/q2_amr/amrfinderplus/tests/test_sample_data.py @@ -25,6 +25,9 @@ def mock_run_amrfinderplus_n( curated_ident, coverage_min, translation_table, + annotation_format, + report_common, + gpipe_org, threads, ): with open(os.path.join(working_dir, "amr_annotations.tsv"), "w"): diff --git a/q2_amr/amrfinderplus/tests/test_utils.py b/q2_amr/amrfinderplus/tests/test_utils.py index 4b7f436..0bd6b73 100644 --- a/q2_amr/amrfinderplus/tests/test_utils.py +++ b/q2_amr/amrfinderplus/tests/test_utils.py @@ -23,6 +23,9 @@ def test_run_amrfinderplus_n(self, mock_run_command): curated_ident=False, coverage_min=1, translation_table="11", + annotation_format="prodigal", + report_common=True, + gpipe_org=True, threads=4, ) mock_run_command.assert_called_once_with( @@ -57,6 +60,10 @@ def test_run_amrfinderplus_n(self, mock_run_command): "1", "--translation_table", "11", + "--annotation_format", + "prodigal", + "--report_common", + "--gpipe_org", ], "path_dir", verbose=True, @@ -77,6 +84,9 @@ def test_run_amrfinderplus_n_minimal(self, mock_run_command): curated_ident=True, coverage_min=None, translation_table=None, + annotation_format=None, + report_common=False, + gpipe_org=False, threads=None, ) mock_run_command.assert_called_once_with( @@ -93,3 +103,49 @@ def test_run_amrfinderplus_n_minimal(self, mock_run_command): "path_dir", verbose=True, ) + + @patch("q2_amr.amrfinderplus.utils.run_command") + def test_run_amrfinderplus_n_value_error_report_common(self, mock_run_command): + with self.assertRaisesRegex( + ValueError, "--p-report-common requires " "--p-plus and --p-organism" + ): + run_amrfinderplus_n( + working_dir="path_dir", + amrfinderplus_db="amrfinderplus_db", + dna_sequences=None, + protein_sequences=None, + gff=None, + organism=None, + plus=False, + report_all_equal=False, + ident_min=None, + curated_ident=True, + coverage_min=None, + translation_table=None, + annotation_format=None, + report_common=True, + gpipe_org=False, + threads=None, + ) + + @patch("q2_amr.amrfinderplus.utils.run_command") + def test_run_amrfinderplus_n_value_error_gpipe_org(self, mock_run_command): + with self.assertRaisesRegex(ValueError, "--p-gpipe_org requires --p-organism"): + run_amrfinderplus_n( + working_dir="path_dir", + amrfinderplus_db="amrfinderplus_db", + dna_sequences=None, + protein_sequences=None, + gff=None, + organism=None, + plus=False, + report_all_equal=False, + ident_min=None, + curated_ident=True, + coverage_min=None, + translation_table=None, + annotation_format=None, + report_common=False, + gpipe_org=True, + threads=None, + ) diff --git a/q2_amr/amrfinderplus/utils.py b/q2_amr/amrfinderplus/utils.py index 793ee37..51dfcb3 100644 --- a/q2_amr/amrfinderplus/utils.py +++ b/q2_amr/amrfinderplus/utils.py @@ -16,8 +16,17 @@ def run_amrfinderplus_n( curated_ident, coverage_min, translation_table, + annotation_format, + report_common, + gpipe_org, threads, ): + # Check for unallowed parameter combinations + if report_common and (not plus or not organism): + raise ValueError("--p-report-common requires --p-plus and --p-organism") + if gpipe_org and not organism: + raise ValueError("--p-gpipe_org requires --p-organism") + cmd = [ "amrfinder", "--database", @@ -73,6 +82,13 @@ def run_amrfinderplus_n( cmd.extend(["--coverage_min", str(coverage_min)]) if translation_table: cmd.extend(["--translation_table", str(translation_table)]) + if annotation_format: + cmd.extend(["--annotation_format", str(annotation_format)]) + if report_common: + cmd.append("--report_common") + if gpipe_org: + cmd.append("--gpipe_org") + try: run_command(cmd, working_dir, verbose=True) except subprocess.CalledProcessError as e: diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index 703cecb..465127f 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -1170,6 +1170,8 @@ "curated_ident": Bool, "coverage_min": Float % Range(0, 1, inclusive_start=True, inclusive_end=True), "translation_table": Str % Choices(translation_tables), + "report_common": Bool, + "gpipe_org": Bool, "threads": Int % Range(0, None, inclusive_start=False), }, outputs=[ @@ -1204,6 +1206,9 @@ "coverage_min": "Minimum proportion of reference gene covered for a " "BLAST-based hit (Methods BLAST or PARTIAL).", "translation_table": "Translation table used for BLASTX.", + "report_common": "Report proteins common to a taxonomy group.", + "gpipe_org": "Use Pathogen Detection taxgroup names as arguments to the " + "organism option", "threads": "The number of threads to use for processing. AMRFinderPlus " "defaults to 4 on hosts with >= 4 cores. Setting this number higher" " than the number of cores on the running host may cause blastp to " From f636e7304abe9358ed9f5d590619027b51a0a057 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Wed, 24 Jul 2024 11:59:09 +0200 Subject: [PATCH 49/50] added typemap for parameters --- q2_amr/plugin_setup.py | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index 465127f..cdc5bd6 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -1127,6 +1127,35 @@ "Vibrio_vulnificus", ] +organisms_gpipe = [ + "Acinetobacter", + "Burkholderia_cepacia_complex", + "Burkholderia_pseudomallei", + "Campylobacter", + "Citrobacter_freundii", + "Clostridioides_difficile", + "Enterobacter_asburiae", + "Enterobacter_cloacae", + "Enterococcus_faecalis", + "Enterococcus_faecium", + "Escherichia_coli_Shigella", + "Klebsiella_oxytoca", + "Klebsiella", + "Neisseria_gonorrhoeae", + "Neisseria_meningitidis", + "Pseudomonas_aeruginosa", + "Salmonella", + "Serratia", + "Staphylococcus_aureus", + "Staphylococcus_pseudintermedius", + "Streptococcus_agalactiae", + "Streptococcus_pneumoniae", + "Streptococcus_pyogenes", + "Vibrio_cholerae", + "Vibrio_parahaemolyticus", + "Vibrio_vulnificus", +] + translation_tables = [ "1", "2", @@ -1156,6 +1185,13 @@ "33", ] +P_gpipe_org, P_organism, _ = TypeMap( + { + (Bool % Choices(True), Str % Choices(organisms_gpipe)): Int, + (Bool % Choices(False), Str % Choices(organisms)): Int, + } +) + plugin.methods.register_function( function=annotate_sample_data_amrfinderplus, inputs={ @@ -1163,7 +1199,7 @@ "amrfinderplus_db": AMRFinderPlusDatabase, }, parameters={ - "organism": Str % Choices(organisms), + "organism": P_organism, "plus": Bool, "report_all_equal": Bool, "ident_min": Float % Range(0, 1, inclusive_start=True, inclusive_end=True), @@ -1171,7 +1207,7 @@ "coverage_min": Float % Range(0, 1, inclusive_start=True, inclusive_end=True), "translation_table": Str % Choices(translation_tables), "report_common": Bool, - "gpipe_org": Bool, + "gpipe_org": P_gpipe_org, "threads": Int % Range(0, None, inclusive_start=False), }, outputs=[ From 1986cb87af2e5903459f14c071fb8085cac9f8ad Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 26 Jul 2024 12:50:39 +0200 Subject: [PATCH 50/50] added stop --- q2_amr/plugin_setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index cdc5bd6..b57aeb6 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -1244,7 +1244,7 @@ "translation_table": "Translation table used for BLASTX.", "report_common": "Report proteins common to a taxonomy group.", "gpipe_org": "Use Pathogen Detection taxgroup names as arguments to the " - "organism option", + "organism option.", "threads": "The number of threads to use for processing. AMRFinderPlus " "defaults to 4 on hosts with >= 4 cores. Setting this number higher" " than the number of cores on the running host may cause blastp to "