Skip to content

Commit

Permalink
refactor: Use txts instead of tsvs (#2)
Browse files Browse the repository at this point in the history
* refactor: Detect txt instead of tsv files

- Will make it easier to integrate with existing nf-core modules

* refactor: Fix schema validation when using txt files

* style: Add black to test/file_finder

* style: Add black to remaining init files
  • Loading branch information
jvfe authored Mar 30, 2024
1 parent 6d2947f commit 89898b3
Show file tree
Hide file tree
Showing 12 changed files with 16 additions and 18 deletions.
8 changes: 5 additions & 3 deletions microview/file_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def detect_report_type(report_paths: List[Path], console) -> List[Sample]:
one the report path, the other a string specifying the report type.
"""
kaiju_validated = [
get_validation_dict(report, schema=kaiju_report_schema)
get_validation_dict(report, format="tsv", schema=kaiju_report_schema)
for report in report_paths
]
kaiju_reports = [
Expand All @@ -107,7 +107,9 @@ def detect_report_type(report_paths: List[Path], console) -> List[Sample]:

# TODO: Improve Kraken validation
kraken_validated = [
get_validation_dict(report, checks=[checks.table_dimensions(num_fields=6)])
get_validation_dict(
report, format="tsv", checks=[checks.table_dimensions(num_fields=6)]
)
for report in report_paths
]
kraken_reports = [
Expand Down Expand Up @@ -155,7 +157,7 @@ def find_reports(reports_path: Path, console) -> List[Sample]:
List[Sample]: List of samples, an object comprising two attributes,
one the report path, the other a string specifying the report type.
"""
file_paths: List[Path] = list(reports_path.glob("*tsv"))
file_paths: List[Path] = list(reports_path.glob("*txt"))
samples = detect_report_type(file_paths, console)
return samples

Expand Down
1 change: 1 addition & 0 deletions microview/schemas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
MicroView module containing schemas for data validation
"""

from pathlib import Path

HERE = Path(__file__).parent.resolve()
Expand Down
8 changes: 4 additions & 4 deletions microview/schemas/kaiju_report.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,25 @@
"format": "default",
"name": "percent",
"type": "number",
"required": false
"required": true
},
{
"format": "default",
"name": "reads",
"type": "integer",
"required": false
"required": true
},
{
"format": "default",
"name": "taxon_id",
"type": "integer",
"required": false
"required": true
},
{
"format": "default",
"name": "taxon_name",
"type": "string",
"required": false
"required": true
}
],
"missingValues": ["NA"]
Expand Down
1 change: 1 addition & 0 deletions microview/templates/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
MicroView module containing Jinja2 templates
"""

from pathlib import Path

from jinja2 import Environment, FileSystemLoader
Expand Down
6 changes: 3 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,17 @@ def all_sample_counts():

@pytest.fixture
def get_kraken_data():
return Path(__file__).parent.resolve() / "test_data" / "kraken_test.tsv"
return Path(__file__).parent.resolve() / "test_data" / "kraken_test.txt"


@pytest.fixture
def get_kaiju_data():
return Path(__file__).parent.resolve() / "test_data" / "kaiju_test.tsv"
return Path(__file__).parent.resolve() / "test_data" / "kaiju_test.txt"


@pytest.fixture
def get_centrifuge_data():
return Path(__file__).parent.resolve() / "test_data" / "centrifuge_test.tsv"
return Path(__file__).parent.resolve() / "test_data" / "centrifuge_test.txt"


@pytest.fixture
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions tests/test_data/contrast_table.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
sample,group
kaiju_test.tsv,one
kaiju_test_2.tsv,two
kaiju_test.txt,one
kaiju_test_2.txt,two
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 0 additions & 2 deletions tests/test_file_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,12 @@ def test_detect_centrifuge(get_centrifuge_data):


def test_validate_source_table(get_contrast_data):

validated = get_validation_dict(get_contrast_data, schema=contrast_table_schema)

assert validated["errors"] == 0


def test_invalidate_source_table(get_failing_contrast_data):

validated = get_validation_dict(
get_failing_contrast_data, schema=contrast_table_schema
)
Expand Down
4 changes: 0 additions & 4 deletions tests/test_parse_taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,26 @@


def test_get_taxon_counts(parsed_stats):

results = get_taxon_counts(parsed_stats)

assert results["sample1"]["tax1"] == 5
assert results["sample2"]["tax2"] == 10


def test_build_taxonomy_stats(parsed_stats):

n_reads = get_read_assignment(parsed_stats)

assert n_reads["sample2"]["assigned"] == 93.75
assert n_reads["sample2"]["unassigned"] == 6.25


def test_get_common_taxas(all_sample_counts):

most_common = get_common_taxas(all_sample_counts)

assert most_common["sample2"]["tax2"] == 66.67


def test_calculate_abund_diver(all_sample_counts):

abund_div_df = calculate_abund_diver(all_sample_counts)

assert round(abund_div_df[0]["Shannon Diversity"][1], 2) == 0.92

0 comments on commit 89898b3

Please sign in to comment.