Skip to content

Commit d6221b0

Browse files
committed
Move input validation out of tests, into pipeline code itself
1 parent 5a3c628 commit d6221b0

File tree

2 files changed

+33
-23
lines changed

2 files changed

+33
-23
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,17 @@
1-
import pytest
21
from cattrs import structure, structure_attrs_fromdict
32
import hail as hl
43
import json
54

6-
from data_pipeline.pipelines.gnomad_v4_variants import (
7-
pipeline as gnomad_v4_variant_pipeline,
8-
)
5+
from loguru import logger
6+
7+
from data_pipeline.pipeline import Pipeline
98

109
from data_pipeline.datasets.gnomad_v4.types.initial_globals import Globals
1110
from data_pipeline.datasets.gnomad_v4.types.initial_variant import InitialVariant
1211
from data_pipeline.datasets.gnomad_v4.types.prepare_variants_step1 import Variant as Step1Variant
1312
from data_pipeline.datasets.gnomad_v4.types.prepare_variants_step2 import Variant as Step2Variant
1413
from data_pipeline.datasets.gnomad_v4.types.prepare_variants_step3 import Variant as Step3Variant
1514

16-
step1_task = gnomad_v4_variant_pipeline.get_task("prepare_gnomad_v4_exome_variants")
17-
1815

1916
def ht_to_json(ht: hl.Table, field: str = "row"):
2017
if field == "row":
@@ -32,45 +29,43 @@ def ht_to_json(ht: hl.Table, field: str = "row"):
3229
return objs
3330

3431

35-
@pytest.mark.mock_data
36-
def test_globals_input_validation():
37-
input_path = gnomad_v4_variant_pipeline.get_task("prepare_gnomad_v4_exome_variants").get_inputs()["input_path"]
32+
def validate_globals_input(pipeline: Pipeline):
33+
input_path = pipeline.get_task("prepare_gnomad_v4_exome_variants").get_inputs()["input_path"]
3834
ht = hl.read_table(input_path)
3935
result = ht_to_json(ht, "globals")[0]
4036
# logger.info(result)
4137
structure(result, Globals)
38+
logger.info("Validated prepare_gnomad_v4_exome_variants input globals")
4239

4340

44-
@pytest.mark.mock_data
45-
def test_validate_variant_input():
46-
input_path = gnomad_v4_variant_pipeline.get_task("prepare_gnomad_v4_exome_variants").get_inputs()["input_path"]
41+
def validate_variant_input(pipeline: Pipeline):
42+
input_path = pipeline.get_task("prepare_gnomad_v4_exome_variants").get_inputs()["input_path"]
4743
ht = hl.read_table(input_path)
4844
result = ht_to_json(ht)
4945
[structure_attrs_fromdict(variant, InitialVariant) for variant in result]
46+
logger.info("Validated prepare_gnomad_v4_exome_variants input variants")
5047

5148

52-
@pytest.mark.mock_data
53-
def test_validate_step1_output():
54-
output_path = gnomad_v4_variant_pipeline.get_task("prepare_gnomad_v4_exome_variants").get_output_path()
49+
def validate_step1_output(pipeline: Pipeline):
50+
output_path = pipeline.get_task("prepare_gnomad_v4_exome_variants").get_output_path()
5551
ht = hl.read_table(output_path)
5652
# ht = ht.sample(0.1, seed=1234)
5753
result = ht_to_json(ht)
5854
[structure_attrs_fromdict(variant, Step1Variant) for variant in result]
55+
logger.info("Validated prepare_gnomad_v4_exome_variants (step 1) output")
5956

6057

61-
@pytest.mark.mock_data
62-
def test_validate_step2_output():
63-
output_path = gnomad_v4_variant_pipeline.get_task("annotate_gnomad_v4_exome_variants").get_output_path()
58+
def validate_step2_output(pipeline: Pipeline):
59+
output_path = pipeline.get_task("annotate_gnomad_v4_exome_variants").get_output_path()
6460
ht = hl.read_table(output_path)
6561
result = ht_to_json(ht)
6662
[structure_attrs_fromdict(variant, Step2Variant) for variant in result]
63+
logger.info("Validated annotate_gnomad_v4_exome_variants (step 2) output")
6764

6865

69-
@pytest.mark.mock_data
70-
def test_validate_step3_output():
71-
output_path = gnomad_v4_variant_pipeline.get_task(
72-
"annotate_gnomad_v4_exome_transcript_consequences"
73-
).get_output_path()
66+
def validate_step3_output(pipeline: Pipeline):
67+
output_path = pipeline.get_task("annotate_gnomad_v4_exome_transcript_consequences").get_output_path()
7468
ht = hl.read_table(output_path)
7569
result = ht_to_json(ht)
7670
[structure_attrs_fromdict(variant, Step3Variant) for variant in result]
71+
logger.info("Validated annotate_gnomad_v4_exome_transcript_consequences (step 3) output")

data-pipeline/src/data_pipeline/pipelines/gnomad_v4_variants.py

+15
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
from loguru import logger
23

34

45
from data_pipeline.config import PipelineConfig, get_data_environment, DataEnvironment
@@ -12,6 +13,13 @@
1213
annotate_variants,
1314
annotate_transcript_consequences,
1415
)
16+
from data_pipeline.pipelines.gnomad_v4_validation import (
17+
validate_globals_input,
18+
validate_step1_output,
19+
validate_step2_output,
20+
validate_step3_output,
21+
validate_variant_input,
22+
)
1523

1624
DATA_ENV = os.getenv("DATA_ENV", "mock")
1725

@@ -106,3 +114,10 @@
106114

107115
if __name__ == "__main__":
108116
run_pipeline(pipeline)
117+
118+
logger.info("Validating pipeline IO formats")
119+
validate_globals_input(pipeline)
120+
validate_variant_input(pipeline)
121+
validate_step1_output(pipeline)
122+
validate_step2_output(pipeline)
123+
validate_step3_output(pipeline)

0 commit comments

Comments
 (0)