Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
yaseminbridges committed Sep 30, 2024
2 parents d86ecca + f6d5344 commit 765390e
Show file tree
Hide file tree
Showing 8 changed files with 56 additions and 9 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ tool_specific_configuration_options:
phenotype_data_version: 2302
cache_type:
cache_caffeine_spec:
output_formats: [JSON,HTML] # options include HTML, JSON, TSV_VARIANT, TSV_GENE, VCF
post_process:
score_name: combinedScore
sort_order: DESCENDING
Expand All @@ -56,6 +57,8 @@ The analysis configuration file (in this case: `preset-exome-analysis.yml`) shou

The whitelist paths for the hg19 and hg38 dbs need only be specified for Exomiser v13.3.0 and earlier (unless specifying your own whitelist), as Exomiser v14.0.0 now includes this in the db.

To save on diskspace we recommend limiting the Exomiser output to JSON, this can be specified by setting the `output_formats` field in the `config.yaml` to [JSON]

If using optional databases, such as REMM/CADD/local frequency the optional data input should look like so in the input
directory:

Expand Down Expand Up @@ -118,7 +121,7 @@ The overall structure of the input directory should look like this with the cadd
```
### Setting up the testdata directory

The Exomiser plugin for PhEval accepts phenopackets and vcf files as an input for running Exomiser. The plugin can be run in `phenotype_only` mode, where only phenopackets are required as an input, however, this *must* be specified in the `config.yaml`.
The Exomiser plugin for PhEval accepts phenopackets and vcf files as an input for running Exomiser. The plugin can be run in `phenotype_only` mode, where only phenopackets are required as an input, however, this *must* be specified in the `config.yaml` by setting `variant_analysis: False`

The testdata directory should include subdirectories named `phenopackets` and `vcf` if running with variant prioritisation.

Expand Down Expand Up @@ -157,4 +160,4 @@ To fix the error, `setuptools` needs to be downgraded to version 66:
```shell
pip uninstall setuptools
pip install -U setuptools=="66"
```
```
1 change: 1 addition & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ tool_specific_configuration_options:
# either none, simple, or caffeine
cache_type: none
cache_caffeine_spec:
output_formats: JSON # can be HTML, JSON, TSV_VARIANT, TSV_GENE, VCF
post_process:
# For Exomiser, valid ranking methods include combinedScore, priorityScore, variantScore or pValue
score_name: combinedScore
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pheval_exomiser"
version = "0.2.2"
version = "0.2.3"
description = ""
authors = ["Yasemin Bridges <[email protected]>",
"Julius Jacobsen <[email protected]>",
Expand Down
34 changes: 28 additions & 6 deletions src/pheval_exomiser/prepare/create_batch_commands.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#!/usr/bin/python
import tempfile
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
from typing import List, Optional

import click
from phenopackets import Family, Phenopacket
Expand Down Expand Up @@ -30,9 +29,10 @@ class ExomiserCommandLineArguments:
raw_results_dir: Path or None = None
variant_analysis: bool or None = None
output_options_file: Optional[Path] = None
output_formats: List[str] or None = None


def get_all_files_from_output_opt_directory(output_options_dir: Path) -> list[Path] or None:
def get_all_files_from_output_opt_directory(output_options_dir: Path) -> List[Path] or None:
"""Obtain all output options files if directory is specified - otherwise returns none."""
return None if output_options_dir is None else all_files(output_options_dir)

Expand All @@ -46,10 +46,11 @@ def __init__(
phenopacket_path: Path,
phenopacket: Phenopacket or Family,
variant_analysis: bool,
output_options_dir_files: list[Path] or None,
output_options_dir_files: List[Path] or None,
output_options_file: Path or None,
raw_results_dir: Path or None,
analysis_yaml: Path or None,
output_formats: List[str] or None,
):
self.environment = environment
self.phenopacket_path = phenopacket_path
Expand All @@ -59,6 +60,7 @@ def __init__(
self.output_options_file = output_options_file
self.results_dir = raw_results_dir
self.analysis_yaml = analysis_yaml
self.output_formats = output_formats

def assign_output_options_file(self) -> Path or None:
"""Return the path of a single output option yaml if specified,
Expand Down Expand Up @@ -86,13 +88,15 @@ def add_phenotype_only_arguments(self) -> ExomiserCommandLineArguments:
else None
),
raw_results_dir=RAW_RESULTS_TARGET_DIRECTORY_DOCKER,
output_formats=self.output_formats,
)
elif self.environment == "local":
return ExomiserCommandLineArguments(
sample=Path(self.phenopacket_path),
variant_analysis=self.variant_analysis,
output_options_file=output_options_file,
raw_results_dir=self.results_dir,
output_formats=self.output_formats,
)

def add_variant_analysis_arguments(self, vcf_dir: Path) -> ExomiserCommandLineArguments:
Expand All @@ -109,6 +113,7 @@ def add_variant_analysis_arguments(self, vcf_dir: Path) -> ExomiserCommandLineAr
variant_analysis=self.variant_analysis,
raw_results_dir=self.results_dir,
analysis_yaml=self.analysis_yaml,
output_formats=self.output_formats,
)
elif self.environment == "docker":
return ExomiserCommandLineArguments(
Expand Down Expand Up @@ -143,7 +148,8 @@ def create_command_arguments(
output_options_dir: Path or None = None,
output_options_file: Path or None = None,
analysis_yaml: Path or None = None,
) -> list[ExomiserCommandLineArguments]:
output_formats: List[str] or None = None,
) -> List[ExomiserCommandLineArguments]:
"""Return a list of Exomiser command line arguments for a directory of phenopackets."""
phenopacket_paths = files_with_suffix(phenopacket_dir, ".json")
commands = []
Expand All @@ -160,6 +166,7 @@ def create_command_arguments(
output_options_file,
results_dir,
analysis_yaml,
output_formats,
).add_command_line_arguments(vcf_dir)
)
return commands
Expand Down Expand Up @@ -212,10 +219,22 @@ def write_output_options(self, command_arguments: ExomiserCommandLineArguments)
except IOError:
print("Error writing ", self.file)

def write_output_format(self, command_arguments: ExomiserCommandLineArguments) -> None:
"""Write output formats for Exomiser raw result output."""
try:
(
self.file.write(" --output-format " + ",".join(command_arguments.output_formats))
if command_arguments.output_formats is not None
else None
)
except IOError:
print("Error writing ", self.file)

def write_analysis_command(self, command_arguments: ExomiserCommandLineArguments):
self.write_basic_analysis_command(command_arguments)
self.write_results_dir(command_arguments)
self.write_output_options(command_arguments)
self.write_output_format(command_arguments)
self.file.write("\n")

def write_basic_phenotype_only_command(
Expand All @@ -239,6 +258,7 @@ def write_basic_phenotype_only_command(
def write_phenotype_only_command(self, command_arguments: ExomiserCommandLineArguments):
self.write_basic_phenotype_only_command(command_arguments)
self.write_output_options(command_arguments)
self.write_output_format(command_arguments)
self.file.write("\n")

def write_local_commands(self, command_arguments: ExomiserCommandLineArguments):
Expand All @@ -261,7 +281,7 @@ class BatchFileWriter:

def __init__(
self,
command_arguments_list: list[ExomiserCommandLineArguments],
command_arguments_list: List[ExomiserCommandLineArguments],
variant_analysis: bool,
output_dir: Path,
batch_prefix: str,
Expand Down Expand Up @@ -326,6 +346,7 @@ def create_batch_file(
results_dir: Path,
output_options_dir: Path = None,
output_options_file: Path = None,
output_formats: List[str] = None,
) -> None:
"""Create Exomiser batch files."""
command_arguments = create_command_arguments(
Expand All @@ -337,6 +358,7 @@ def create_batch_file(
output_options_dir,
output_options_file,
analysis,
output_formats,
)
(
BatchFileWriter(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pathlib import Path
from typing import List

from pydantic import BaseModel, Field

Expand Down Expand Up @@ -54,6 +55,7 @@ class ExomiserConfigurations(BaseModel):
analysis_configuration_file (Path): The file name of the analysis configuration file located in the input_dir
max_jobs (int): Maximum number of jobs to run in a batch
application_properties (ApplicationProperties): application.properties configurations
output_formats: List(str): List of raw output formats.
post_process (PostProcessing): Post-processing configurations
"""

Expand All @@ -62,4 +64,5 @@ class ExomiserConfigurations(BaseModel):
analysis_configuration_file: Path = Field(...)
max_jobs: int = Field(...)
application_properties: ApplicationProperties = Field(...)
output_formats: List[str] = Field(None)
post_process: PostProcessing = Field(...)
6 changes: 6 additions & 0 deletions src/pheval_exomiser/run/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ def prepare_batch_files(
"""Prepare the exomiser batch files"""
print("...preparing batch files...")
vcf_dir_name = Path(testdata_dir).joinpath("vcf")
output_formats = (
config.output_formats + ["JSON"]
if config.output_formats and "JSON" not in config.output_formats
else config.output_formats
)
create_batch_file(
environment=config.environment,
analysis=input_dir.joinpath(config.analysis_configuration_file),
Expand All @@ -43,6 +48,7 @@ def prepare_batch_files(
output_options_dir=None,
results_dir=raw_results_dir,
variant_analysis=variant_analysis,
output_formats=output_formats,
)


Expand Down
11 changes: 11 additions & 0 deletions tests/test_create_batch_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ def setUpClass(cls) -> None:
output_options_file=None,
raw_results_dir=Path("/path/to/results_dir"),
analysis_yaml=Path("/path/to/exomiser_analysis.yaml"),
output_formats=["JSON"],
)
cls.command_creator_output_options_file = CommandCreator(
environment="local",
Expand All @@ -152,6 +153,7 @@ def setUpClass(cls) -> None:
),
raw_results_dir=Path("/path/to/results_dir"),
analysis_yaml=Path("/path/to/exomiser_analysis.yaml"),
output_formats=["JSON"],
)
cls.command_creator_none = CommandCreator(
environment="local",
Expand All @@ -162,6 +164,7 @@ def setUpClass(cls) -> None:
output_options_file=None,
raw_results_dir=Path("/path/to/results_dir"),
analysis_yaml=Path("/path/to/exomiser_analysis.yaml"),
output_formats=["JSON"],
)
cls.command_creator_phenotype_only = CommandCreator(
environment="local",
Expand All @@ -172,6 +175,7 @@ def setUpClass(cls) -> None:
output_options_file=None,
raw_results_dir=Path("/path/to/results_dir"),
analysis_yaml=None,
output_formats=["JSON", "HTML"],
)
cls.command_creator_phenotype_only_output_options = CommandCreator(
environment="local",
Expand All @@ -184,6 +188,7 @@ def setUpClass(cls) -> None:
),
raw_results_dir=Path("/path/to/results_dir"),
analysis_yaml=None,
output_formats=["JSON"],
)

def test_assign_output_options_file_from_dir(self):
Expand All @@ -210,6 +215,7 @@ def test_add_phenotype_only_arguments(self):
vcf_assembly=None,
raw_results_dir=Path("/path/to/results_dir"),
variant_analysis=False,
output_formats=["JSON", "HTML"],
),
)

Expand All @@ -225,6 +231,7 @@ def test_add_phenotype_only_arguments_output_options(self):
output_options_file=Path(
"/full/path/to/some/alternate/output_options/phenopacket-output_options.yml"
),
output_formats=["JSON"],
),
)

Expand All @@ -243,6 +250,7 @@ def test_add_variant_analysis_arguments(self):
"/full/path/to/some/alternate/output_options/phenopacket-output_options.yml"
),
analysis_yaml=Path("/path/to/exomiser_analysis.yaml"),
output_formats=["JSON"],
),
)

Expand All @@ -256,6 +264,7 @@ def test_add_variant_analysis_arguments_none(self):
raw_results_dir=Path("/path/to/results_dir"),
variant_analysis=False,
analysis_yaml=Path("/path/to/exomiser_analysis.yaml"),
output_formats=["JSON"],
),
)

Expand All @@ -274,6 +283,7 @@ def test_add_command_line_arguments(self):
"/full/path/to/some/alternate/output_options/phenopacket-output_options.yml"
),
analysis_yaml=Path("/path/to/exomiser_analysis.yaml"),
output_formats=["JSON"],
),
)

Expand All @@ -286,5 +296,6 @@ def test_add_command_line_arguments_phenotype_only(self):
vcf_assembly=None,
raw_results_dir=Path("/path/to/results_dir"),
variant_analysis=False,
output_formats=["JSON", "HTML"],
),
)
1 change: 1 addition & 0 deletions tests/test_write_application_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def setUp(cls) -> None:
hg19_whitelist_path="2302_hg19_clinvar_whitelist.tsv.gz",
hg38_whitelist_path="2302_hg38_clinvar_whitelist.tsv.gz",
),
output_formats=["JSON"],
post_process=PostProcessing(score_name="combinedScore", sort_order="descending"),
),
)
Expand Down

0 comments on commit 765390e

Please sign in to comment.