refactor: Use txts instead of tsvs (#2)

* refactor: Detect txt instead of tsv files - Will make it easier to integrate with existing nf-core modules * refactor: Fix schema validation when using txt files * style: Add black to test/file_finder * style: Add black to remaining init files
dalmolingroup · Mar 30, 2024 · 89898b3 · 89898b3
1 parent 6d2947f
commit 89898b3
Show file tree

Hide file tree

Showing 12 changed files with 16 additions and 18 deletions.
diff --git a/microview/file_finder.py b/microview/file_finder.py
@@ -96,7 +96,7 @@ def detect_report_type(report_paths: List[Path], console) -> List[Sample]:
           one the report path, the other a string specifying the report type.
     """
     kaiju_validated = [
-        get_validation_dict(report, schema=kaiju_report_schema)
+        get_validation_dict(report, format="tsv", schema=kaiju_report_schema)
         for report in report_paths
     ]
     kaiju_reports = [
@@ -107,7 +107,9 @@ def detect_report_type(report_paths: List[Path], console) -> List[Sample]:
 
     # TODO: Improve Kraken validation
     kraken_validated = [
-        get_validation_dict(report, checks=[checks.table_dimensions(num_fields=6)])
+        get_validation_dict(
+            report, format="tsv", checks=[checks.table_dimensions(num_fields=6)]
+        )
         for report in report_paths
     ]
     kraken_reports = [
@@ -155,7 +157,7 @@ def find_reports(reports_path: Path, console) -> List[Sample]:
         List[Sample]: List of samples, an object comprising two attributes,
           one the report path, the other a string specifying the report type.
     """
-    file_paths: List[Path] = list(reports_path.glob("*tsv"))
+    file_paths: List[Path] = list(reports_path.glob("*txt"))
     samples = detect_report_type(file_paths, console)
     return samples
 

diff --git a/microview/schemas/__init__.py b/microview/schemas/__init__.py
@@ -1,6 +1,7 @@
 """
 MicroView module containing schemas for data validation
 """
+
 from pathlib import Path
 
 HERE = Path(__file__).parent.resolve()

diff --git a/microview/schemas/kaiju_report.schema.json b/microview/schemas/kaiju_report.schema.json
@@ -10,25 +10,25 @@
       "format": "default",
       "name": "percent",
       "type": "number",
-      "required": false
+      "required": true
     },
     {
       "format": "default",
       "name": "reads",
       "type": "integer",
-      "required": false
+      "required": true
     },
     {
       "format": "default",
       "name": "taxon_id",
       "type": "integer",
-      "required": false
+      "required": true
     },
     {
       "format": "default",
       "name": "taxon_name",
       "type": "string",
-      "required": false
+      "required": true
     }
   ],
   "missingValues": ["NA"]

diff --git a/microview/templates/__init__.py b/microview/templates/__init__.py
@@ -1,6 +1,7 @@
 """
 MicroView module containing Jinja2 templates
 """
+
 from pathlib import Path
 
 from jinja2 import Environment, FileSystemLoader

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -38,17 +38,17 @@ def all_sample_counts():
 
 @pytest.fixture
 def get_kraken_data():
-    return Path(__file__).parent.resolve() / "test_data" / "kraken_test.tsv"
+    return Path(__file__).parent.resolve() / "test_data" / "kraken_test.txt"
 
 
 @pytest.fixture
 def get_kaiju_data():
-    return Path(__file__).parent.resolve() / "test_data" / "kaiju_test.tsv"
+    return Path(__file__).parent.resolve() / "test_data" / "kaiju_test.txt"
 
 
 @pytest.fixture
 def get_centrifuge_data():
-    return Path(__file__).parent.resolve() / "test_data" / "centrifuge_test.tsv"
+    return Path(__file__).parent.resolve() / "test_data" / "centrifuge_test.txt"
 
 
 @pytest.fixture

diff --git a/tests/test_data/centrifuge_test.tsv → tests/test_data/centrifuge_test.txt b/tests/test_data/centrifuge_test.tsv → tests/test_data/centrifuge_test.txt
diff --git a/tests/test_data/contrast_table.csv b/tests/test_data/contrast_table.csv
@@ -1,3 +1,3 @@
 sample,group
-kaiju_test.tsv,one
-kaiju_test_2.tsv,two
+kaiju_test.txt,one
+kaiju_test_2.txt,two
diff --git a/tests/test_data/kaiju_test.tsv → tests/test_data/kaiju_test.txt b/tests/test_data/kaiju_test.tsv → tests/test_data/kaiju_test.txt
diff --git a/tests/test_data/kaiju_test_2.tsv → tests/test_data/kaiju_test_2.txt b/tests/test_data/kaiju_test_2.tsv → tests/test_data/kaiju_test_2.txt
diff --git a/tests/test_data/kraken_test.tsv → tests/test_data/kraken_test.txt b/tests/test_data/kraken_test.tsv → tests/test_data/kraken_test.txt
diff --git a/tests/test_file_finder.py b/tests/test_file_finder.py
@@ -28,14 +28,12 @@ def test_detect_centrifuge(get_centrifuge_data):
 
 
 def test_validate_source_table(get_contrast_data):
-
     validated = get_validation_dict(get_contrast_data, schema=contrast_table_schema)
 
     assert validated["errors"] == 0
 
 
 def test_invalidate_source_table(get_failing_contrast_data):
-
     validated = get_validation_dict(
         get_failing_contrast_data, schema=contrast_table_schema
     )

diff --git a/tests/test_parse_taxonomy.py b/tests/test_parse_taxonomy.py
@@ -7,30 +7,26 @@
 
 
 def test_get_taxon_counts(parsed_stats):
-
     results = get_taxon_counts(parsed_stats)
 
     assert results["sample1"]["tax1"] == 5
     assert results["sample2"]["tax2"] == 10
 
 
 def test_build_taxonomy_stats(parsed_stats):
-
     n_reads = get_read_assignment(parsed_stats)
 
     assert n_reads["sample2"]["assigned"] == 93.75
     assert n_reads["sample2"]["unassigned"] == 6.25
 
 
 def test_get_common_taxas(all_sample_counts):
-
     most_common = get_common_taxas(all_sample_counts)
 
     assert most_common["sample2"]["tax2"] == 66.67
 
 
 def test_calculate_abund_diver(all_sample_counts):
-
     abund_div_df = calculate_abund_diver(all_sample_counts)
 
     assert round(abund_div_df[0]["Shannon Diversity"][1], 2) == 0.92