From 7bd709f327749bc592a1c791bf91e38708ea30c7 Mon Sep 17 00:00:00 2001 From: David Betancur Date: Thu, 10 Oct 2024 21:15:17 -0400 Subject: [PATCH] Bugfix wrong matrix for multiome, updated to be dynamic based on dataset previously processed. --- .../airflow/dags/azimuth_annotations.py | 4 ++-- src/ingest-pipeline/airflow/dags/utils.py | 22 +++++++++++-------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/ingest-pipeline/airflow/dags/azimuth_annotations.py b/src/ingest-pipeline/airflow/dags/azimuth_annotations.py index c5ad7bae..396ed393 100644 --- a/src/ingest-pipeline/airflow/dags/azimuth_annotations.py +++ b/src/ingest-pipeline/airflow/dags/azimuth_annotations.py @@ -88,7 +88,7 @@ def build_cwltool_cmd1(**kwargs): organ_list = list(set(ds_rslt["organs"])) organ_code = organ_list[0] if len(organ_list) == 1 else "multi" - assay = get_assay_previous_version(**kwargs) + assay, matrix = get_assay_previous_version(**kwargs) command = [ *get_cwltool_base_cmd(tmpdir), @@ -96,7 +96,7 @@ def build_cwltool_cmd1(**kwargs): "--reference", organ_code, "--matrix", - "expr.h5ad", + matrix, "--secondary-analysis-matrix", "secondary_analysis.h5ad", "--assay", diff --git a/src/ingest-pipeline/airflow/dags/utils.py b/src/ingest-pipeline/airflow/dags/utils.py index 5b92ccf4..2c0042fc 100644 --- a/src/ingest-pipeline/airflow/dags/utils.py +++ b/src/ingest-pipeline/airflow/dags/utils.py @@ -351,22 +351,26 @@ def my_callable(**kwargs): return ds_rslt["dataset_info"] -def get_assay_previous_version(**kwargs) -> str: +def get_assay_previous_version(**kwargs) -> tuple: dataset_type = get_dataname_previous_version(**kwargs).split("__")[0] if dataset_type == "salmon_rnaseq_10x": - return "10x_v3" + return "10x_v3", "expr.h5ad" if dataset_type == "salmon_rnaseq_10x_sn": - return "10x_v3_sn" + return "10x_v3_sn", "expr.h5ad" if dataset_type == "salmon_rnaseq_10x_v2": - return "10x_v2" + return "10x_v2", "expr.h5ad" if dataset_type == "salmon_rnaseq_10x_v2_sn": - return "10x_v2_sn" + return "10x_v2_sn", "expr.h5ad" if dataset_type == "salmon_rnaseq_sciseq": - return "sciseq" - if dataset_type == "salmon_rnaseq_snareseq" or dataset_type == "multiome_snareseq": - return "snareseq" + return "sciseq", "expr.h5ad" + if dataset_type == "salmon_rnaseq_snareseq": + return "snareseq", "expr.h5ad" if dataset_type == "salmon_rnaseq_slideseq": - return "slideseq" + return "slideseq", "expr.h5ad" + if dataset_type == "multiome_10x": + return "10x_V3_sn", "mudata_raw.h5mu" + if dataset_type == "multiome_snareseq": + return "snareseq", "mudata_raw.h5mu" def get_parent_dataset_paths_list(**kwargs) -> List[Path]: