Skip to content

Commit

Permalink
Merge pull request #229 from hubmapconsortium/devel
Browse files Browse the repository at this point in the history
Merging devel to master on the way to a new release branch
  • Loading branch information
jswelling authored Feb 19, 2021
2 parents 620e7c3 + be6c56c commit c0f696a
Show file tree
Hide file tree
Showing 49 changed files with 1,104 additions and 13,585 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@
[submodule "src/ingest-pipeline/airflow/dags/cwl/ome-tiff-pyramid-ims"]
path = src/ingest-pipeline/airflow/dags/cwl/ome-tiff-pyramid-ims
url = https://github.com/hubmapconsortium/ome-tiff-pyramid
[submodule "src/ingest-pipeline/submodules/ingest-validation-tests"]
path = src/ingest-pipeline/submodules/ingest-validation-tests
url = [email protected]:hubmapconsortium/ingest-validation-tests.git
2 changes: 1 addition & 1 deletion build_number
Original file line number Diff line number Diff line change
@@ -1 +1 @@
477
591
6 changes: 4 additions & 2 deletions docker/ingest-pipeline/config/airflow.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,9 @@ expose_stacktrace = True

# Set to true to turn on authentication:
# https://airflow.apache.org/security.html#web-authentication
authenticate = False
authenticate = True

#auth_backend =

# Filter the list of dags by owner name (requires authentication to be enabled)
filter_by_owner = False
Expand Down Expand Up @@ -1052,4 +1054,4 @@ fs_group =
# The Key-value pairs to be given to worker pods.
# The worker pods will be given these static labels, as well as some additional dynamic labels
# to identify the task.
# Should be supplied in the format: ``key = value``
# Should be supplied in the format: ``key = value``
4 changes: 3 additions & 1 deletion docker/ingest-pipeline/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@ export PATH=/home/airflow/.local/bin:$PATH

# Install custom python package if requirements.txt is present
pip install --upgrade pip
pip install --user 'apache-airflow[celery,crypto,postgres,redis,ssh]'
pip install --user flask-admin
pip install --user 'apache-airflow[celery,crypto,postgres,redis,ssh]<2.0.0'
if [[ -e "/requirements.txt" ]]; then
$(command -v pip) install --user -r /requirements.txt
fi

# Global defaults and back-compat
export AIRFLOW__CORE__FERNET_KEY=`python -c 'from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)'`
export AIRFLOW__WEBSERVER__SECRET_KEY=`openssl rand -hex 30`

# Load DAGs examples (default: Yes)
if [[ -z "$AIRFLOW__CORE__LOAD_EXAMPLES" && "${LOAD_EX:=n}" == n ]]; then
Expand Down
2 changes: 1 addition & 1 deletion src/ingest-pipeline/airflow/dags/bulk_atacseq.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
) as dag:
pipeline_name = 'bulk-atac-seq'
cwl_workflows = get_absolute_workflows(
Path(pipeline_name, 'bulk-atac-seq-pipeline.cwl'),
Path('sc-atac-seq-pipeline', 'bulk-atac-seq-pipeline.cwl'),
)

def build_dataset_name(**kwargs):
Expand Down
56 changes: 55 additions & 1 deletion src/ingest-pipeline/airflow/dags/codex_cytokit.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
Path(pipeline_name, 'pipeline.cwl'),
Path('portal-containers', 'ome-tiff-offsets.cwl'),
Path('portal-containers', 'sprm-to-json.cwl'),
Path('portal-containers', 'sprm-to-anndata.cwl'),
)

def build_dataset_name(**kwargs):
Expand Down Expand Up @@ -214,10 +215,60 @@ def build_cwltool_cmd3(**kwargs):
task_id='maybe_keep_cwl3',
python_callable=utils.pythonop_maybe_keep,
provide_context=True,
op_kwargs = {'next_op' : 'move_data',
op_kwargs = {'next_op' : 'prepare_cwl4',
'bail_op' : 'set_dataset_error',
'test_op' : 'pipeline_exec_cwl3'}
)

prepare_cwl4 = DummyOperator(
task_id='prepare_cwl4'
)

def build_cwltool_cmd4(**kwargs):
ctx = kwargs['dag_run'].conf
run_id = kwargs['run_id']
tmpdir = utils.get_tmp_dir_path(run_id)
print('tmpdir: ', tmpdir)
parent_data_dir = ctx['parent_lz_path']
print('parent_data_dir: ', parent_data_dir)
data_dir = tmpdir / 'cwl_out' # This stage reads input from stage 1
print('data_dir: ', data_dir)

command = [
*get_cwltool_base_cmd(tmpdir),
cwl_workflows[3],
'--input_dir',
data_dir / 'sprm_outputs',
]

return join_quote_command_str(command)


t_build_cmd4 = PythonOperator(
task_id='build_cmd4',
python_callable=build_cwltool_cmd4,
provide_context=True,
)


t_pipeline_exec_cwl4 = BashOperator(
task_id='pipeline_exec_cwl4',
bash_command=""" \
tmp_dir={{tmp_dir_path(run_id)}} ; \
cd ${tmp_dir}/cwl_out ; \
{{ti.xcom_pull(task_ids='build_cmd4')}} >> ${tmp_dir}/session.log 2>&1 ; \
echo $?
"""
)

t_maybe_keep_cwl4 = BranchPythonOperator(
task_id='maybe_keep_cwl4',
python_callable=utils.pythonop_maybe_keep,
provide_context=True,
op_kwargs = {'next_op' : 'move_data',
'bail_op' : 'set_dataset_error',
'test_op' : 'pipeline_exec_cwl4'}
)


t_send_create_dataset = PythonOperator(
Expand Down Expand Up @@ -265,6 +316,7 @@ def build_cwltool_cmd3(**kwargs):
'pipeline_exec_cwl1',
'pipeline_exec_cwl2',
'pipeline_exec_cwl3',
'pipeline_exec_cwl4',
'move_data',
],
cwl_workflows=cwl_workflows,
Expand All @@ -287,10 +339,12 @@ def build_cwltool_cmd3(**kwargs):
>> prepare_cwl1 >> t_build_cmd1 >> t_pipeline_exec_cwl1 >> t_maybe_keep_cwl1
>> prepare_cwl2 >> t_build_cmd2 >> t_pipeline_exec_cwl2 >> t_maybe_keep_cwl2
>> prepare_cwl3 >> t_build_cmd3 >> t_pipeline_exec_cwl3 >> t_maybe_keep_cwl3
>> prepare_cwl4 >> t_build_cmd4 >> t_pipeline_exec_cwl4 >> t_maybe_keep_cwl4
>> t_move_data >> t_expand_symlinks >> t_send_status >> t_join)
t_maybe_keep_cwl1 >> t_set_dataset_error
t_maybe_keep_cwl2 >> t_set_dataset_error
t_maybe_keep_cwl3 >> t_set_dataset_error
t_maybe_keep_cwl4 >> t_set_dataset_error
t_set_dataset_error >> t_join
t_join >> t_cleanup_tmpdir

Expand Down
1 change: 0 additions & 1 deletion src/ingest-pipeline/airflow/dags/cwl/bulk-atac-seq
Submodule bulk-atac-seq deleted from 302f1f
2 changes: 1 addition & 1 deletion src/ingest-pipeline/airflow/dags/cwl/portal-containers
Submodule portal-containers updated 58 files
+0 −1 .gitignore
+47 −0 README.md
+22 −0 containers/sprm-to-anndata/Dockerfile
+3 −0 containers/sprm-to-anndata/README.md
+1 −0 containers/sprm-to-anndata/VERSION
+95 −0 containers/sprm-to-anndata/context/main.py
+36 −0 containers/sprm-to-anndata/context/requirements-freeze.txt
+6 −0 containers/sprm-to-anndata/context/requirements.txt
+4 −0 containers/sprm-to-anndata/test-input/R001_X001_Y001.ome.tiff-cell_channel_mean.csv
+4 −0 containers/sprm-to-anndata/test-input/R001_X001_Y001.ome.tiff-cell_cluster.csv
+4 −0 containers/sprm-to-anndata/test-input/R001_X001_Y001.ome.tiff-cell_polygons_spatial.csv
+3 −0 containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/.zgroup
+22 −0 containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/X/.zarray
+ containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/X/0.0
+9 −0 containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/obs/.zattrs
+3 −0 containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/obs/.zgroup
+20 −0 containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/obs/Covariance/.zarray
+ containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/obs/Covariance/0
+24 −0 containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/obs/ID/.zarray
+ containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/obs/ID/0
+20 −0 containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/obs/Mean/.zarray
+ containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/obs/Mean/0
+3 −0 containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/obsm/.zgroup
+26 −0 containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/obsm/poly/.zarray
+ containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/obsm/poly/0.0.0.0
+24 −0 containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/obsm/xy/.zarray
+ containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/obsm/xy/0.0.0
+6 −0 containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/var/.zattrs
+3 −0 containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/var/.zgroup
+24 −0 containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/var/_index/.zarray
+ containers/sprm-to-anndata/test-output-expected/R001_X001_Y001-anndata.zarr/var/_index/0
+7 −0 sprm-to-anndata-manifest.json
+19 −0 sprm-to-anndata.cwl
+1 −0 workflows/sprm-to-anndata/README.md
+4 −0 workflows/sprm-to-anndata/test-input/R001_X001_Y001.ome.tiff-cell_channel_mean.csv
+4 −0 workflows/sprm-to-anndata/test-input/R001_X001_Y001.ome.tiff-cell_cluster.csv
+4 −0 workflows/sprm-to-anndata/test-input/R001_X001_Y001.ome.tiff-cell_polygons_spatial.csv
+3 −0 workflows/sprm-to-anndata/test-job.yml
+3 −0 workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/.zgroup
+22 −0 workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/X/.zarray
+ workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/X/0.0
+9 −0 workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/obs/.zattrs
+3 −0 workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/obs/.zgroup
+20 −0 workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/obs/Covariance/.zarray
+ workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/obs/Covariance/0
+24 −0 workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/obs/ID/.zarray
+ workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/obs/ID/0
+20 −0 workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/obs/Mean/.zarray
+ workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/obs/Mean/0
+3 −0 workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/obsm/.zgroup
+26 −0 workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/obsm/poly/.zarray
+ workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/obsm/poly/0.0.0.0
+24 −0 workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/obsm/xy/.zarray
+ workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/obsm/xy/0.0.0
+6 −0 workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/var/.zattrs
+3 −0 workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/var/.zgroup
+24 −0 workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/var/_index/.zarray
+ workflows/sprm-to-anndata/test-output-expected/hubmap_ui/R001_X001_Y001-anndata.zarr/var/_index/0
2 changes: 1 addition & 1 deletion src/ingest-pipeline/airflow/dags/cwl/salmon-rnaseq
Submodule salmon-rnaseq updated 46 files
+2 −0 .gitignore
+11 −0 .pre-commit-config.yaml
+7 −0 .travis.yml
+32 −0 README.rst
+54 −0 bin/analysis/add_slideseq_coordinates.py
+32 −0 bin/analysis/adjust_barcodes.py
+187 −0 bin/analysis/alevin_to_anndata.py
+48 −0 bin/analysis/annotate_cells.py
+19 −14 bin/analysis/annotate_sciseq_barcodes.py
+25 −21 bin/analysis/correct_snareseq_barcodes.py
+26 −33 bin/analysis/expand_sciseq_barcodes.py
+75 −0 bin/analysis/extract_slideseq_barcodes.py
+13 −13 bin/analysis/fastqc_wrapper.py
+17 −10 bin/analysis/make_expression_matrix.py
+29 −0 bin/analysis/plot_utils.py
+92 −0 bin/analysis/scanpy_entry_point.py
+65 −0 bin/analysis/scvelo_analysis.py
+38 −13 bin/common/common.py
+26 −26 bin/salmon/bulk_salmon_wrapper.py
+60 −36 bin/salmon/salmon_wrapper.py
+0 −27 bin/scanpy/adjust_barcodes.py
+0 −163 bin/scanpy/alevin_to_anndata.py
+0 −29 bin/scanpy/annotate_cells.py
+0 −104 bin/scanpy/scanpy_entry_point.py
+2 −2 docker/analysis/Dockerfile
+22 −0 docker/analysis/requirements.txt
+18 −0 docker/barcode_adj/Dockerfile
+5 −0 docker/barcode_adj/requirements.txt
+1 −1 docker/salmon/Dockerfile
+1 −1 docker/salmon/requirements.txt
+0 −19 docker/scanpy/requirements.txt
+2 −1 docker_images.txt
+47 −5 pipeline-manifest.json
+77 −15 pipeline.cwl
+7 −0 pyproject.toml
+2 −0 requirements-test.txt
+1 −1 steps/adjust-barcodes.cwl
+5 −5 steps/alevin-to-anndata.cwl
+11 −3 steps/annotate-cells.cwl
+1 −1 steps/bulk-salmon.cwl
+1 −1 steps/expression-matrix.cwl
+1 −1 steps/fastqc.cwl
+1 −1 steps/salmon.cwl
+24 −4 steps/scanpy-analysis.cwl
+30 −0 steps/scvelo-analysis.cwl
+15 −0 test.sh
2 changes: 1 addition & 1 deletion src/ingest-pipeline/airflow/dags/cwl/sc-atac-seq-pipeline
Submodule sc-atac-seq-pipeline updated 59 files
+2 −0 .gitignore
+11 −0 .pre-commit-config.yaml
+1 −18 .travis.yml
+6 −0 README.md
+0 −107 bin/add_barcodes_to_reads.pl
+97 −0 bin/add_barcodes_to_reads.py
+37 −0 bin/adjust_barcodes.py
+29 −19 bin/alignment_qc.py
+24 −71 bin/bulk/bulk_gather_sequence_files.py
+27 −13 bin/bulk/sort_bam_file.py
+26 −20 bin/bulk/sort_index_frag.py
+46 −65 bin/concat_fastq.py
+63 −0 bin/fastqc_wrapper.py
+49 −45 bin/index_reference_genome.py
+55 −0 bin/sciseq_add_barcodes_to_read_ids.py
+6 −6 bin/snaptools_defaults.py
+13 −11 bin/snaptools_wrapper.py
+21 −2 bin/utils.py
+51 −38 bulk-atac-seq-pipeline.cwl
+4 −23 bulk_gather_sequence_bundles.cwl
+0 −168 compare_snap_files.R
+0 −13 compare_snap_files.sh
+26 −13 create_snap_and_analyze.cwl
+10 −0 docker/barcode-adj/Dockerfile
+5 −0 docker/barcode-adj/requirements.txt
+64 −1 docker/pipeline-code/Dockerfile
+4 −3 docker_images.txt
+7 −0 pyproject.toml
+1 −0 requirements.txt
+2 −0 requirements_host.txt
+0 −93 sc_atac_seq_wrapper.py
+0 −76 sciseq_add_barcodes_to_read_ids.py
+27 −0 steps/adjust-barcodes.cwl
+1 −1 steps/analyze_snap_steps/snapanalysis_add_pmat_tool.cwl
+1 −1 steps/analyze_snap_steps/snapanalysis_analyze.cwl
+1 −1 steps/analyze_snap_steps/snapanalysis_motif.cwl
+1 −1 steps/analyze_snap_steps/snapanalysis_select_barcode.cwl
+3 −5 steps/bulk_analysis.cwl
+7 −55 steps/bulk_process.cwl
+3 −5 steps/call_peaks.cwl
+20 −5 steps/concat-fastq.cwl
+1 −1 steps/create_snap_steps/alignment_qc.cwl
+1 −1 steps/create_snap_steps/snaptools_add_barcodes_to_reads_tool.cwl
+1 −1 steps/create_snap_steps/snaptools_align_paired_end_tool.cwl
+1 −1 steps/create_snap_steps/snaptools_create_cell_by_bin_matrix_tool.cwl
+1 −1 steps/create_snap_steps/snaptools_create_fragment_file.cwl
+0 −87 steps/create_snap_steps/snaptools_fastqc_tool.cwl
+1 −1 steps/create_snap_steps/snaptools_index_ref_genome_tool.cwl
+1 −1 steps/create_snap_steps/snaptools_preprocess_reads_tool.cwl
+1 −1 steps/create_snap_steps/snaptools_remove_blacklist.cwl
+8 −5 steps/create_snap_steps/sort_bam_file_tool.cwl
+27 −0 steps/fastqc.cwl
+26 −0 steps/merge_bam.cwl
+1 −2 steps/motif_and_tf_analysis.cwl
+29 −26 steps/snaptools_create_snap_file.cwl
+24 −0 test.sh
+0 −7 tests/create_snap_and_analyze_local.json
+0 −59 tests/run_sc_atac_seq_tests.sh
+0 −37 tests/sc_atac_seq_integration_test.sh
1 change: 0 additions & 1 deletion src/ingest-pipeline/airflow/dags/cwl/sci-atac-seq-pipeline
Submodule sci-atac-seq-pipeline deleted from dba3c8
1 change: 0 additions & 1 deletion src/ingest-pipeline/airflow/dags/cwl/sn-atac-seq-pipeline
Submodule sn-atac-seq-pipeline deleted from 8471d6
Loading

0 comments on commit c0f696a

Please sign in to comment.