From 89898b301cf0f52561242e5de6546569fd6d995f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Cavalcante?= <jvfecav@gmail.com>
Date: Sat, 30 Mar 2024 20:38:22 -0300
Subject: [PATCH] refactor: Use txts instead of tsvs (#2)

* refactor: Detect txt instead of tsv files

- Will make it easier to integrate with existing nf-core modules

* refactor: Fix schema validation when using txt files

* style: Add black to test/file_finder

* style: Add black to remaining init files
---
 microview/file_finder.py                                  | 8 +++++---
 microview/schemas/__init__.py                             | 1 +
 microview/schemas/kaiju_report.schema.json                | 8 ++++----
 microview/templates/__init__.py                           | 1 +
 tests/conftest.py                                         | 6 +++---
 .../{centrifuge_test.tsv => centrifuge_test.txt}          | 0
 tests/test_data/contrast_table.csv                        | 4 ++--
 tests/test_data/{kaiju_test.tsv => kaiju_test.txt}        | 0
 tests/test_data/{kaiju_test_2.tsv => kaiju_test_2.txt}    | 0
 tests/test_data/{kraken_test.tsv => kraken_test.txt}      | 0
 tests/test_file_finder.py                                 | 2 --
 tests/test_parse_taxonomy.py                              | 4 ----
 12 files changed, 16 insertions(+), 18 deletions(-)
 rename tests/test_data/{centrifuge_test.tsv => centrifuge_test.txt} (100%)
 rename tests/test_data/{kaiju_test.tsv => kaiju_test.txt} (100%)
 rename tests/test_data/{kaiju_test_2.tsv => kaiju_test_2.txt} (100%)
 rename tests/test_data/{kraken_test.tsv => kraken_test.txt} (100%)

diff --git a/microview/file_finder.py b/microview/file_finder.py
index 5edd42f..1810f72 100644
--- a/microview/file_finder.py
+++ b/microview/file_finder.py
@@ -96,7 +96,7 @@ def detect_report_type(report_paths: List[Path], console) -> List[Sample]:
           one the report path, the other a string specifying the report type.
     """
     kaiju_validated = [
-        get_validation_dict(report, schema=kaiju_report_schema)
+        get_validation_dict(report, format="tsv", schema=kaiju_report_schema)
         for report in report_paths
     ]
     kaiju_reports = [
@@ -107,7 +107,9 @@ def detect_report_type(report_paths: List[Path], console) -> List[Sample]:
 
     # TODO: Improve Kraken validation
     kraken_validated = [
-        get_validation_dict(report, checks=[checks.table_dimensions(num_fields=6)])
+        get_validation_dict(
+            report, format="tsv", checks=[checks.table_dimensions(num_fields=6)]
+        )
         for report in report_paths
     ]
     kraken_reports = [
@@ -155,7 +157,7 @@ def find_reports(reports_path: Path, console) -> List[Sample]:
         List[Sample]: List of samples, an object comprising two attributes,
           one the report path, the other a string specifying the report type.
     """
-    file_paths: List[Path] = list(reports_path.glob("*tsv"))
+    file_paths: List[Path] = list(reports_path.glob("*txt"))
     samples = detect_report_type(file_paths, console)
     return samples
 
diff --git a/microview/schemas/__init__.py b/microview/schemas/__init__.py
index 98fde35..12f03db 100644
--- a/microview/schemas/__init__.py
+++ b/microview/schemas/__init__.py
@@ -1,6 +1,7 @@
 """
 MicroView module containing schemas for data validation
 """
+
 from pathlib import Path
 
 HERE = Path(__file__).parent.resolve()
diff --git a/microview/schemas/kaiju_report.schema.json b/microview/schemas/kaiju_report.schema.json
index e42a531..7ec9012 100644
--- a/microview/schemas/kaiju_report.schema.json
+++ b/microview/schemas/kaiju_report.schema.json
@@ -10,25 +10,25 @@
       "format": "default",
       "name": "percent",
       "type": "number",
-      "required": false
+      "required": true
     },
     {
       "format": "default",
       "name": "reads",
       "type": "integer",
-      "required": false
+      "required": true
     },
     {
       "format": "default",
       "name": "taxon_id",
       "type": "integer",
-      "required": false
+      "required": true
     },
     {
       "format": "default",
       "name": "taxon_name",
       "type": "string",
-      "required": false
+      "required": true
     }
   ],
   "missingValues": ["NA"]
diff --git a/microview/templates/__init__.py b/microview/templates/__init__.py
index 7844d47..41817aa 100644
--- a/microview/templates/__init__.py
+++ b/microview/templates/__init__.py
@@ -1,6 +1,7 @@
 """
 MicroView module containing Jinja2 templates
 """
+
 from pathlib import Path
 
 from jinja2 import Environment, FileSystemLoader
diff --git a/tests/conftest.py b/tests/conftest.py
index 3041467..2fc5e42 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -38,17 +38,17 @@ def all_sample_counts():
 
 @pytest.fixture
 def get_kraken_data():
-    return Path(__file__).parent.resolve() / "test_data" / "kraken_test.tsv"
+    return Path(__file__).parent.resolve() / "test_data" / "kraken_test.txt"
 
 
 @pytest.fixture
 def get_kaiju_data():
-    return Path(__file__).parent.resolve() / "test_data" / "kaiju_test.tsv"
+    return Path(__file__).parent.resolve() / "test_data" / "kaiju_test.txt"
 
 
 @pytest.fixture
 def get_centrifuge_data():
-    return Path(__file__).parent.resolve() / "test_data" / "centrifuge_test.tsv"
+    return Path(__file__).parent.resolve() / "test_data" / "centrifuge_test.txt"
 
 
 @pytest.fixture
diff --git a/tests/test_data/centrifuge_test.tsv b/tests/test_data/centrifuge_test.txt
similarity index 100%
rename from tests/test_data/centrifuge_test.tsv
rename to tests/test_data/centrifuge_test.txt
diff --git a/tests/test_data/contrast_table.csv b/tests/test_data/contrast_table.csv
index 3d96436..a893a0a 100644
--- a/tests/test_data/contrast_table.csv
+++ b/tests/test_data/contrast_table.csv
@@ -1,3 +1,3 @@
 sample,group
-kaiju_test.tsv,one
-kaiju_test_2.tsv,two
+kaiju_test.txt,one
+kaiju_test_2.txt,two
diff --git a/tests/test_data/kaiju_test.tsv b/tests/test_data/kaiju_test.txt
similarity index 100%
rename from tests/test_data/kaiju_test.tsv
rename to tests/test_data/kaiju_test.txt
diff --git a/tests/test_data/kaiju_test_2.tsv b/tests/test_data/kaiju_test_2.txt
similarity index 100%
rename from tests/test_data/kaiju_test_2.tsv
rename to tests/test_data/kaiju_test_2.txt
diff --git a/tests/test_data/kraken_test.tsv b/tests/test_data/kraken_test.txt
similarity index 100%
rename from tests/test_data/kraken_test.tsv
rename to tests/test_data/kraken_test.txt
diff --git a/tests/test_file_finder.py b/tests/test_file_finder.py
index 8d5c4c0..e17dc18 100644
--- a/tests/test_file_finder.py
+++ b/tests/test_file_finder.py
@@ -28,14 +28,12 @@ def test_detect_centrifuge(get_centrifuge_data):
 
 
 def test_validate_source_table(get_contrast_data):
-
     validated = get_validation_dict(get_contrast_data, schema=contrast_table_schema)
 
     assert validated["errors"] == 0
 
 
 def test_invalidate_source_table(get_failing_contrast_data):
-
     validated = get_validation_dict(
         get_failing_contrast_data, schema=contrast_table_schema
     )
diff --git a/tests/test_parse_taxonomy.py b/tests/test_parse_taxonomy.py
index b27ff09..50e082c 100644
--- a/tests/test_parse_taxonomy.py
+++ b/tests/test_parse_taxonomy.py
@@ -7,7 +7,6 @@
 
 
 def test_get_taxon_counts(parsed_stats):
-
     results = get_taxon_counts(parsed_stats)
 
     assert results["sample1"]["tax1"] == 5
@@ -15,7 +14,6 @@ def test_get_taxon_counts(parsed_stats):
 
 
 def test_build_taxonomy_stats(parsed_stats):
-
     n_reads = get_read_assignment(parsed_stats)
 
     assert n_reads["sample2"]["assigned"] == 93.75
@@ -23,14 +21,12 @@ def test_build_taxonomy_stats(parsed_stats):
 
 
 def test_get_common_taxas(all_sample_counts):
-
     most_common = get_common_taxas(all_sample_counts)
 
     assert most_common["sample2"]["tax2"] == 66.67
 
 
 def test_calculate_abund_diver(all_sample_counts):
-
     abund_div_df = calculate_abund_diver(all_sample_counts)
 
     assert round(abund_div_df[0]["Shannon Diversity"][1], 2) == 0.92