From 89898b301cf0f52561242e5de6546569fd6d995f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Cavalcante?= Date: Sat, 30 Mar 2024 20:38:22 -0300 Subject: [PATCH] refactor: Use txts instead of tsvs (#2) * refactor: Detect txt instead of tsv files - Will make it easier to integrate with existing nf-core modules * refactor: Fix schema validation when using txt files * style: Add black to test/file_finder * style: Add black to remaining init files --- microview/file_finder.py | 8 +++++--- microview/schemas/__init__.py | 1 + microview/schemas/kaiju_report.schema.json | 8 ++++---- microview/templates/__init__.py | 1 + tests/conftest.py | 6 +++--- .../{centrifuge_test.tsv => centrifuge_test.txt} | 0 tests/test_data/contrast_table.csv | 4 ++-- tests/test_data/{kaiju_test.tsv => kaiju_test.txt} | 0 tests/test_data/{kaiju_test_2.tsv => kaiju_test_2.txt} | 0 tests/test_data/{kraken_test.tsv => kraken_test.txt} | 0 tests/test_file_finder.py | 2 -- tests/test_parse_taxonomy.py | 4 ---- 12 files changed, 16 insertions(+), 18 deletions(-) rename tests/test_data/{centrifuge_test.tsv => centrifuge_test.txt} (100%) rename tests/test_data/{kaiju_test.tsv => kaiju_test.txt} (100%) rename tests/test_data/{kaiju_test_2.tsv => kaiju_test_2.txt} (100%) rename tests/test_data/{kraken_test.tsv => kraken_test.txt} (100%) diff --git a/microview/file_finder.py b/microview/file_finder.py index 5edd42f..1810f72 100644 --- a/microview/file_finder.py +++ b/microview/file_finder.py @@ -96,7 +96,7 @@ def detect_report_type(report_paths: List[Path], console) -> List[Sample]: one the report path, the other a string specifying the report type. """ kaiju_validated = [ - get_validation_dict(report, schema=kaiju_report_schema) + get_validation_dict(report, format="tsv", schema=kaiju_report_schema) for report in report_paths ] kaiju_reports = [ @@ -107,7 +107,9 @@ def detect_report_type(report_paths: List[Path], console) -> List[Sample]: # TODO: Improve Kraken validation kraken_validated = [ - get_validation_dict(report, checks=[checks.table_dimensions(num_fields=6)]) + get_validation_dict( + report, format="tsv", checks=[checks.table_dimensions(num_fields=6)] + ) for report in report_paths ] kraken_reports = [ @@ -155,7 +157,7 @@ def find_reports(reports_path: Path, console) -> List[Sample]: List[Sample]: List of samples, an object comprising two attributes, one the report path, the other a string specifying the report type. """ - file_paths: List[Path] = list(reports_path.glob("*tsv")) + file_paths: List[Path] = list(reports_path.glob("*txt")) samples = detect_report_type(file_paths, console) return samples diff --git a/microview/schemas/__init__.py b/microview/schemas/__init__.py index 98fde35..12f03db 100644 --- a/microview/schemas/__init__.py +++ b/microview/schemas/__init__.py @@ -1,6 +1,7 @@ """ MicroView module containing schemas for data validation """ + from pathlib import Path HERE = Path(__file__).parent.resolve() diff --git a/microview/schemas/kaiju_report.schema.json b/microview/schemas/kaiju_report.schema.json index e42a531..7ec9012 100644 --- a/microview/schemas/kaiju_report.schema.json +++ b/microview/schemas/kaiju_report.schema.json @@ -10,25 +10,25 @@ "format": "default", "name": "percent", "type": "number", - "required": false + "required": true }, { "format": "default", "name": "reads", "type": "integer", - "required": false + "required": true }, { "format": "default", "name": "taxon_id", "type": "integer", - "required": false + "required": true }, { "format": "default", "name": "taxon_name", "type": "string", - "required": false + "required": true } ], "missingValues": ["NA"] diff --git a/microview/templates/__init__.py b/microview/templates/__init__.py index 7844d47..41817aa 100644 --- a/microview/templates/__init__.py +++ b/microview/templates/__init__.py @@ -1,6 +1,7 @@ """ MicroView module containing Jinja2 templates """ + from pathlib import Path from jinja2 import Environment, FileSystemLoader diff --git a/tests/conftest.py b/tests/conftest.py index 3041467..2fc5e42 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -38,17 +38,17 @@ def all_sample_counts(): @pytest.fixture def get_kraken_data(): - return Path(__file__).parent.resolve() / "test_data" / "kraken_test.tsv" + return Path(__file__).parent.resolve() / "test_data" / "kraken_test.txt" @pytest.fixture def get_kaiju_data(): - return Path(__file__).parent.resolve() / "test_data" / "kaiju_test.tsv" + return Path(__file__).parent.resolve() / "test_data" / "kaiju_test.txt" @pytest.fixture def get_centrifuge_data(): - return Path(__file__).parent.resolve() / "test_data" / "centrifuge_test.tsv" + return Path(__file__).parent.resolve() / "test_data" / "centrifuge_test.txt" @pytest.fixture diff --git a/tests/test_data/centrifuge_test.tsv b/tests/test_data/centrifuge_test.txt similarity index 100% rename from tests/test_data/centrifuge_test.tsv rename to tests/test_data/centrifuge_test.txt diff --git a/tests/test_data/contrast_table.csv b/tests/test_data/contrast_table.csv index 3d96436..a893a0a 100644 --- a/tests/test_data/contrast_table.csv +++ b/tests/test_data/contrast_table.csv @@ -1,3 +1,3 @@ sample,group -kaiju_test.tsv,one -kaiju_test_2.tsv,two +kaiju_test.txt,one +kaiju_test_2.txt,two diff --git a/tests/test_data/kaiju_test.tsv b/tests/test_data/kaiju_test.txt similarity index 100% rename from tests/test_data/kaiju_test.tsv rename to tests/test_data/kaiju_test.txt diff --git a/tests/test_data/kaiju_test_2.tsv b/tests/test_data/kaiju_test_2.txt similarity index 100% rename from tests/test_data/kaiju_test_2.tsv rename to tests/test_data/kaiju_test_2.txt diff --git a/tests/test_data/kraken_test.tsv b/tests/test_data/kraken_test.txt similarity index 100% rename from tests/test_data/kraken_test.tsv rename to tests/test_data/kraken_test.txt diff --git a/tests/test_file_finder.py b/tests/test_file_finder.py index 8d5c4c0..e17dc18 100644 --- a/tests/test_file_finder.py +++ b/tests/test_file_finder.py @@ -28,14 +28,12 @@ def test_detect_centrifuge(get_centrifuge_data): def test_validate_source_table(get_contrast_data): - validated = get_validation_dict(get_contrast_data, schema=contrast_table_schema) assert validated["errors"] == 0 def test_invalidate_source_table(get_failing_contrast_data): - validated = get_validation_dict( get_failing_contrast_data, schema=contrast_table_schema ) diff --git a/tests/test_parse_taxonomy.py b/tests/test_parse_taxonomy.py index b27ff09..50e082c 100644 --- a/tests/test_parse_taxonomy.py +++ b/tests/test_parse_taxonomy.py @@ -7,7 +7,6 @@ def test_get_taxon_counts(parsed_stats): - results = get_taxon_counts(parsed_stats) assert results["sample1"]["tax1"] == 5 @@ -15,7 +14,6 @@ def test_get_taxon_counts(parsed_stats): def test_build_taxonomy_stats(parsed_stats): - n_reads = get_read_assignment(parsed_stats) assert n_reads["sample2"]["assigned"] == 93.75 @@ -23,14 +21,12 @@ def test_build_taxonomy_stats(parsed_stats): def test_get_common_taxas(all_sample_counts): - most_common = get_common_taxas(all_sample_counts) assert most_common["sample2"]["tax2"] == 66.67 def test_calculate_abund_diver(all_sample_counts): - abund_div_df = calculate_abund_diver(all_sample_counts) assert round(abund_div_df[0]["Shannon Diversity"][1], 2) == 0.92