From 558d9b114ce05b04981e44accd7a9f1234568b31 Mon Sep 17 00:00:00 2001 From: David Betancur Date: Tue, 24 Oct 2023 15:25:12 -0400 Subject: [PATCH] Adding support for extras directory inside data folder for an upload. --- .../airflow/dags/reorganize_upload.py | 3 --- src/ingest-pipeline/misc/tools/split_and_create.py | 14 ++++++++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/ingest-pipeline/airflow/dags/reorganize_upload.py b/src/ingest-pipeline/airflow/dags/reorganize_upload.py index 7fa37844..70518df2 100644 --- a/src/ingest-pipeline/airflow/dags/reorganize_upload.py +++ b/src/ingest-pipeline/airflow/dags/reorganize_upload.py @@ -30,9 +30,6 @@ from misc.tools.split_and_create import reorganize -sys.path.append(airflow_conf.as_dict()['connections']['SRC_PATH'] - .strip("'").strip('"')) -sys.path.pop() # Following are defaults which can be overridden later on default_args = { diff --git a/src/ingest-pipeline/misc/tools/split_and_create.py b/src/ingest-pipeline/misc/tools/split_and_create.py index c7f18a61..96caa5ee 100755 --- a/src/ingest-pipeline/misc/tools/split_and_create.py +++ b/src/ingest-pipeline/misc/tools/split_and_create.py @@ -213,10 +213,20 @@ def populate(row, source_entity, entity_factory, dryrun=False): extras_path.mkdir(0o770) source_data_path = source_entity.full_path / old_data_path for elt in source_data_path.glob('*'): + dst_file = kid_path / elt.name if dryrun: - print(f'rename {elt} to {kid_path / elt.name}') + if dst_file.exists() and dst_file.is_dir(): + for sub_elt in elt.glob('*'): + sub_elt.rename(kid_path / elt.name / sub_elt.name) + print(f'rename {sub_elt} to {kid_path / elt.name / sub_elt.name}') + continue + print(f'rename {elt} to {dst_file}') else: - elt.rename(kid_path / elt.name) + if dst_file.exists() and dst_file.is_dir(): + for sub_elt in elt.glob('*'): + sub_elt.rename(kid_path / elt.name / sub_elt.name) + continue + elt.rename(dst_file) if dryrun: print(f'copy {old_contrib_path} to {extras_path}') else: