diff --git a/.gitignore b/.gitignore
index f15be313..a6a295f3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,7 @@ __pycache__
 db.sqlite3
 media/*
 media
+.envrc
 
 docker/dev-local
 
diff --git a/src/ingest-pipeline/airflow/dags/error_catching/failure_callback.py b/src/ingest-pipeline/airflow/dags/error_catching/failure_callback.py
deleted file mode 100644
index e469dcb5..00000000
--- a/src/ingest-pipeline/airflow/dags/error_catching/failure_callback.py
+++ /dev/null
@@ -1,164 +0,0 @@
-import json
-from pprint import pprint
-from requests.exceptions import HTTPError
-from requests import codes
-import traceback
-
-from airflow.providers.http.hooks.http import HttpHook
-from airflow.utils.email import send_email
-
-from utils import get_auth_tok
-
-
-class FailureCallbackException(Exception):
-    pass
-
-
-class FailureCallback:
-    """
-    List should be overwritten by each subclass with appropriate values.
-    """
-    external_exceptions = []
-    # TODO: Switch to curator email(s)
-    internal_email_recipients = ["gesina@psc.edu"]
-
-    def __init__(self, context, execute_methods=True):
-        self.context = context
-        self.uuid = self.context.get("task_instance").xcom_pull(key="uuid")
-        self.auth_tok = get_auth_tok(**context)
-        self.dag_run = self.context.get("dag_run")
-        self.task = self.context.get("task")
-        self.exception = self.context.get("exception")
-        self.exception_name = type(self.exception).__name__
-
-        if execute_methods:
-            self.set_status()
-            self.send_notifications()
-
-    def send_notifications(self):
-        """
-        Subclasses should override the send_notifications method
-        in order to add self.send_asana_notification() or other
-        custom notifications.
-        """
-        self.send_failure_email()
-
-    # This is simplified from pythonop_get_dataset_state in utils
-    def get_submission_context(self):
-        method = "GET"
-        headers = {
-            "authorization": f"Bearer {self.auth_tok}",
-            "content-type": "application/json",
-            "X-Hubmap-Application": "ingest-pipeline",
-        }
-        http_hook = HttpHook(method, http_conn_id="entity_api_connection")
-
-        endpoint = f"entities/{self.uuid}"
-
-        try:
-            response = http_hook.run(
-                endpoint, headers=headers, extra_options={"check_response": False}
-            )
-            response.raise_for_status()
-            submission_data = response.json()
-            return submission_data
-        except HTTPError as e:
-            print(f"ERROR: {e}")
-            if e.response.status_code == codes.unauthorized:
-                raise RuntimeError("entity database authorization was rejected?")
-            else:
-                print("benign error")
-                return {}
-
-    def get_status_and_message(self):
-        """
-        Error message might need to be overwritten when
-        subclassed for various DAGs.
-        """
-        return {
-            "status": "Invalid",
-            "validation_message": f"Process {self.dag_run.dag_id} started {self.dag_run.execution_date} failed at task {self.task.task_id} with error {self.exception_name} {self.exception}",
-        }
-
-    def set_status(self):
-        """
-        The failure callback needs to set the dataset status,
-        otherwise it will remain in the "Processing" state
-        """
-        data = self.get_status_and_message()
-        endpoint = f"/entities/{self.uuid}"
-        headers = {
-            "authorization": "Bearer " + self.auth_tok,
-            "X-Hubmap-Application": "ingest-pipeline",
-            "content-type": "application/json",
-        }
-        extra_options = []
-        http_conn_id = "entity_api_connection"
-        http_hook = HttpHook("PUT", http_conn_id=http_conn_id)
-        print("data: ")
-        pprint(data)
-        response = http_hook.run(
-            endpoint,
-            json.dumps(data),
-            headers,
-            extra_options,
-        )
-
-    def get_failure_email_template(
-        self, formatted_exception=None, external_template=False, submission_data=None, **kwargs
-    ):
-        """
-        Generic template, can be overridden or super() called
-        in subclass.
-        """
-        subject = f"DAG {self.dag_run.dag_id} failed at task {self.task.task_id}"
-        if formatted_exception:
-            msg = f"""
-                             DAG run: {self.dag_run.id} {self.dag_run.dag_id} <br>
-                             Task: {self.task.task_id} <br>
-                             Execution date: {self.dag_run.execution_date} <br>
-                             Run id: {self.dag_run.run_id} <br>
-                             Error: {self.exception_name} <br>
-                             Traceback: {formatted_exception}
-                            """
-
-        else:
-            msg = f"""
-                             DAG run: {self.dag_run.id} {self.dag_run.dag_id} <br>
-                             Task: {self.task.task_id} <br>
-                             Execution date: {self.dag_run.execution_date} <br>
-                             Run id: {self.dag_run.run_id} <br>
-                             Error: {self.exception_name} <br>
-                             """
-        return subject, msg
-
-    def send_failure_email(self, **kwargs):
-        # traceback logic borrowed from https://stackoverflow.com/questions/51822029/get-exception-details-on-airflow-on-failure-callback-context
-        try:
-            formatted_exception = "".join(
-                traceback.TracebackException.from_exception(self.exception).format()
-            ).replace("\n", "<br>")
-        except:
-            formatted_exception = None
-        self.submission_data = self.get_submission_context()
-        subject, msg = self.get_failure_email_template(
-            formatted_exception=formatted_exception, submission_data=self.submission_data, **kwargs
-        )
-        send_email(to=self.internal_email_recipients, subject=subject, html_content=msg)
-        if self.exception_name in self.external_exceptions:
-            try:
-                created_by_user_email = self.submission_data.get("created_by_user_email")
-                subject, msg = self.get_failure_email_template(
-                    formatted_exception=formatted_exception,
-                    external_template=True,
-                    submission_data=self.submission_data,
-                    **kwargs,
-                )
-                send_email(to=[created_by_user_email], subject=subject, html_content=msg)
-            except:
-                raise FailureCallbackException(
-                        "Failure retrieving created_by_user_email or sending email."
-                )
-
-    def send_asana_notification(self, **kwargs):
-        pass
diff --git a/src/ingest-pipeline/airflow/dags/error_catching/validate_upload_failure_callback.py b/src/ingest-pipeline/airflow/dags/error_catching/validate_upload_failure_callback.py
deleted file mode 100644
index dc8cae5d..00000000
--- a/src/ingest-pipeline/airflow/dags/error_catching/validate_upload_failure_callback.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from airflow.utils.email import send_email
-
-from .failure_callback import FailureCallback, FailureCallbackException
-
-
-class ValidateUploadFailure(FailureCallback):
-    # Should probably be importing custom exceptions rather than comparing strings
-    external_exceptions = [
-        "ValueError",
-        "PreflightError",
-        "ValidatorError",
-        "DirectoryValidationErrors",
-        "FileNotFoundError",
-    ]
-
-    def get_failure_email_template(
-        self,
-        formatted_exception=None,
-        external_template=False,
-        submission_data=None,
-        report_txt=False,
-    ):
-        if external_template:
-            if report_txt and submission_data:
-                subject = f"Your {submission_data.get('entity_type')} has failed!"
-                msg = f"""
-                    Error: {report_txt}
-                    """
-                return subject, msg
-        else:
-            if report_txt and submission_data:
-                subject = f"{submission_data.get('entity_type')} {self.uuid} has failed!"
-                msg = f"""
-                    Error: {report_txt}
-                    """
-                return subject, msg
-        return super().get_failure_email_template(formatted_exception)
-
-    def send_failure_email(self, **kwargs):
-        super().send_failure_email(**kwargs)
-        if "report_txt" in kwargs:
-            try:
-                created_by_user_email = self.submission_data.get("created_by_user_email")
-                subject, msg = self.get_failure_email_template(
-                    formatted_exception=None,
-                    external_template=True,
-                    submission_data=self.submission_data,
-                    **kwargs,
-                )
-                send_email(to=[created_by_user_email], subject=subject, html_content=msg)
-            except:
-                raise FailureCallbackException(
-                    "Failure retrieving created_by_user_email or sending email in ValidateUploadFailure."
-                )
diff --git a/src/ingest-pipeline/airflow/dags/export_and_backup/plugins/dataset_published.py b/src/ingest-pipeline/airflow/dags/export_and_backup/plugins/dataset_published.py
index 14ee43aa..6d3595c2 100644
--- a/src/ingest-pipeline/airflow/dags/export_and_backup/plugins/dataset_published.py
+++ b/src/ingest-pipeline/airflow/dags/export_and_backup/plugins/dataset_published.py
@@ -1,13 +1,11 @@
-from requests.exceptions import HTTPError
-from requests import codes
 import traceback
 
-from airflow.configuration import conf as airflow_conf
-from airflow.hooks.http_hook import HttpHook
-
 from export_and_backup.export_and_backup_plugin import ExportAndBackupPlugin, add_path
+from status_change.status_utils import get_hubmap_id_from_uuid
 from utils import get_auth_tok
 
+from airflow.configuration import conf as airflow_conf
+
 with add_path(airflow_conf.as_dict()["connections"]["SRC_PATH"].strip("'").strip('"')):
     from submodules import hubmapinventory
 
@@ -18,7 +16,7 @@ class PublishedBackupPlugin(ExportAndBackupPlugin):
     def run_plugin(self):
         return "PublishedBackupPlugin ran successfully"
 
-    ## Future functionality
+    # Future functionality
     # Back-up published datasets to appropriate location (S3)
     # Also stage for inclusion in 6-month Glacier backup?
     # Not sure how datasets are updated post-publication; that is likely a separate process--
@@ -32,34 +30,8 @@ def __init__(self, **kwargs):
         self.kwargs = kwargs
         self.token = get_auth_tok(**self.kwargs)
 
-    # Should I add this to utils instead, so it can be reused?
-    def get_hubmap_id(self, uuid):
-        method = 'GET'
-        headers = {
-            'authorization': f'Bearer {self.token}',
-            'content-type': 'application/json',
-            'X-Hubmap-Application': 'ingest-pipeline',
-            }
-        http_hook = HttpHook(method, http_conn_id='entity_api_connection')
-
-        endpoint = f'entities/{uuid}'
-
-        try:
-            response = http_hook.run(endpoint,
-                                    headers=headers,
-                                    extra_options={'check_response': False})
-            response.raise_for_status()
-            return response.json()
-        except HTTPError as e:
-            print(f'ERROR: {e}')
-            if e.response.status_code == codes.unauthorized:
-                raise RuntimeError('entity database authorization was rejected?')
-            else:
-                print('benign error')
-                return {}
-
     def run_plugin(self):
-        hubmap_id = self.get_hubmap_id(self.kwargs["uuid"])["hubmap_id"]
+        hubmap_id = get_hubmap_id_from_uuid(self.token, self.kwargs["uuid"])["hubmap_id"]
         dbgap_study_id = self.kwargs.get("dbgap_study_id", None)
         # instance will need to be changed
         try:
diff --git a/src/ingest-pipeline/airflow/dags/obsolete/test_scan_and_begin_processing.py b/src/ingest-pipeline/airflow/dags/obsolete/test_scan_and_begin_processing.py
index 375f2a23..a1e5c339 100644
--- a/src/ingest-pipeline/airflow/dags/obsolete/test_scan_and_begin_processing.py
+++ b/src/ingest-pipeline/airflow/dags/obsolete/test_scan_and_begin_processing.py
@@ -68,7 +68,7 @@ def test_params(*argv, **kwargs):
         pprint(kwargs)
         return 'maybe this will make it run only once'
 
-         
+    # TODO: this code looks potentially out of date? Did not update with StatusManager yet.
     def send_status_msg(**kwargs):
         md_extract_retcode = int(kwargs['ti'].xcom_pull(task_ids="run_md_extract"))
         print('md_extract_retcode: ', md_extract_retcode)
diff --git a/src/ingest-pipeline/airflow/dags/status_change/failure_callback.py b/src/ingest-pipeline/airflow/dags/status_change/failure_callback.py
new file mode 100644
index 00000000..d4fcdd6a
--- /dev/null
+++ b/src/ingest-pipeline/airflow/dags/status_change/failure_callback.py
@@ -0,0 +1,66 @@
+import logging
+from pprint import pprint
+
+from status_change.status_manager import StatusChanger
+from status_change.status_utils import formatted_exception
+from utils import get_auth_tok
+
+
+class FailureCallbackException(Exception):
+    pass
+
+
+class FailureCallback:
+    """
+    List should be overridden by each subclass with appropriate values.
+    """
+
+    def __init__(self, context):
+        self.context = context
+        self.uuid = self.context.get("task_instance").xcom_pull(key="uuid")
+        self.context["uuid"] = self.uuid
+        self.auth_tok = get_auth_tok(**context)
+        self.dag_run = self.context.get("dag_run")
+        self.task = self.context.get("task")
+        exception = self.context.get("exception")
+        self.formatted_exception = formatted_exception(exception)
+
+        self.pre_set_status()
+
+    def get_extra_fields(self):
+        """
+        Error message might need to be overwritten when
+        subclassed for various DAGs.
+        'Error' is the default for FailureCallback, which indicates a pipeline has failed.
+        """
+        return {
+            "validation_message": f"""
+                Process {self.dag_run.dag_id} started {self.dag_run.execution_date}
+                failed at task {self.task.task_id}.
+                {f'Error: {self.formatted_exception}' if self.formatted_exception else ""}
+            """,
+        }
+
+    def pre_set_status(self):
+        # Allows for alterations to props, before calling StatusChanger
+        # This was added to support some email functions and is perhaps
+        # at the moment over-engineered.
+        self.set_status()
+
+    def set_status(self):
+        """
+        The failure callback needs to set the dataset status,
+        otherwise it will remain in the "Processing" state
+        """
+        data = self.get_extra_fields()
+        logging.info("data: ")
+        logging.info(pprint(data))
+        StatusChanger(
+            self.uuid,
+            self.auth_tok,
+            "error",
+            {
+                "extra_fields": self.get_extra_fields(),
+                "extra_options": {},
+            },
+        ).on_status_change()
diff --git a/src/ingest-pipeline/airflow/dags/status_change/status_manager.py b/src/ingest-pipeline/airflow/dags/status_change/status_manager.py
new file mode 100644
index 00000000..805fa78d
--- /dev/null
+++ b/src/ingest-pipeline/airflow/dags/status_change/status_manager.py
@@ -0,0 +1,257 @@
+from __future__ import annotations
+
+import json
+import logging
+from enum import Enum
+from functools import cached_property
+from typing import Any, Dict, TypedDict, Union
+
+from airflow.providers.http.hooks.http import HttpHook
+
+from .status_utils import get_submission_context
+
+
+class Statuses(str, Enum):
+    # Dataset Hold and Deprecated are not currently in use but are valid for Entity API
+    DATASET_DEPRECATED = "deprecated"
+    DATASET_ERROR = "error"
+    DATASET_HOLD = "hold"
+    DATASET_INVALID = "invalid"
+    DATASET_NEW = "new"
+    DATASET_PROCESSING = "processing"
+    DATASET_PUBLISHED = "published"
+    DATASET_QA = "qa"
+    PUBLICATION_ERROR = "error"
+    PUBLICATION_HOLD = "hold"
+    PUBLICATION_INVALID = "invalid"
+    PUBLICATION_NEW = "new"
+    PUBLICATION_PROCESSING = "processing"
+    PUBLICATION_PUBLISHED = "published"
+    PUBLICATION_QA = "qa"
+    PUBLICATION_SUBMITTED = "submitted"
+    UPLOAD_ERROR = "error"
+    UPLOAD_INVALID = "invalid"
+    UPLOAD_NEW = "new"
+    UPLOAD_PROCESSING = "processing"
+    UPLOAD_REORGANIZED = "reorganized"
+    UPLOAD_SUBMITTED = "submitted"
+    UPLOAD_VALID = "valid"
+
+
+# Needed some way to disambiguate statuses shared by datasets and uploads
+ENTITY_STATUS_MAP = {
+    "dataset": {
+        "deprecated": Statuses.DATASET_DEPRECATED,
+        "error": Statuses.DATASET_ERROR,
+        "hold": Statuses.DATASET_HOLD,
+        "invalid": Statuses.DATASET_INVALID,
+        "new": Statuses.DATASET_NEW,
+        "processing": Statuses.DATASET_PROCESSING,
+        "published": Statuses.DATASET_PUBLISHED,
+        "qa": Statuses.DATASET_QA,
+    },
+    "publication": {
+        "error": Statuses.PUBLICATION_ERROR,
+        "hold": Statuses.PUBLICATION_HOLD,
+        "invalid": Statuses.PUBLICATION_INVALID,
+        "new": Statuses.PUBLICATION_NEW,
+        "processing": Statuses.PUBLICATION_PROCESSING,
+        "published": Statuses.PUBLICATION_PUBLISHED,
+        "qa": Statuses.PUBLICATION_QA,
+        "submitted": Statuses.PUBLICATION_SUBMITTED,
+    },
+    "upload": {
+        "error": Statuses.UPLOAD_ERROR,
+        "invalid": Statuses.UPLOAD_INVALID,
+        "new": Statuses.UPLOAD_NEW,
+        "processing": Statuses.UPLOAD_PROCESSING,
+        "reorganized": Statuses.UPLOAD_REORGANIZED,
+        "submitted": Statuses.UPLOAD_SUBMITTED,
+        "valid": Statuses.UPLOAD_VALID,
+    },
+}
+
+
+class StatusChangerExtras(TypedDict):
+    extra_fields: dict[str, Any]
+    extra_options: dict[str, Any]
+
+
+class StatusChangerException(Exception):
+    pass
+
+
+"""
+Example usage, simple path (e.g. status string, no validation message):
+    from status_manager import StatusChanger
+    StatusChanger(
+            "uuid_string",
+            "token_string",
+            "status",
+        ).on_status_change()
+
+Example usage, optional params path:
+    from status_manager import StatusChanger, Statuses
+    StatusChanger(
+            "uuid_string",
+            "token_string",
+            Statuses.STATUS_ENUM or "status",
+            # optional {
+                "extra_fields": {},
+                "extra_options": {},
+            },
+            #optional entity_type="Dataset"|"Upload"|"Publication"
+            #optional http_conn_id="entity_api_connection"
+        ).on_status_change()
+"""
+
+
+class StatusChanger:
+    def __init__(
+        self,
+        uuid: str,
+        token: str,
+        # NOTE: status is currently required; should it be possible
+        # to add extra info without updating status?
+        status: Statuses | str,
+        extras: StatusChangerExtras | None = None,
+        entity_type: str | None = None,
+        http_conn_id: str = "entity_api_connection",
+        verbose: bool = True,
+    ):
+        self.uuid = uuid
+        self.token = token
+        self.http_conn_id = http_conn_id
+        self.verbose = verbose
+        self.status = (
+            self.check_status(status)
+            if isinstance(status, Statuses)
+            else self.get_status(status, entity_type)
+        )
+        self.extras = (
+            extras
+            if extras
+            else {
+                "extra_fields": {},
+                "extra_options": {},
+            }
+        )
+
+    def get_status(self, status: str, entity_type: str | None) -> Union[Statuses, None]:
+        """
+        If status is passed as a string, get the entity type and match
+        to correct entry in ENTITY_STATUS_MAP. Also check current status,
+        because ingest-pipeline will error if you try to set the same status
+        over the existing status.
+        Potential TODO: could stop any operation involving "Published"
+        statuses at this stage.
+        """
+        if entity_type is None:
+            try:
+                entity_type = self.entity_data["entity_type"]
+                assert entity_type is not None
+            except Exception as e:
+                raise StatusChangerException(
+                    f"""
+                    Could not find entity type for {self.uuid}.
+                    Error {e}
+                    """
+                )
+        try:
+            entity_status = ENTITY_STATUS_MAP[entity_type.lower()][status.lower()]
+        except KeyError:
+            raise StatusChangerException(
+                f"""
+                    Could not retrieve status for {self.uuid}.
+                    Check that status is valid for entity type.
+                    Status not changed.
+                """
+            )
+        return self.check_status(entity_status)
+
+    @cached_property
+    def entity_data(self):
+        return get_submission_context(self.token, self.uuid)
+
+    def check_status(self, status: Statuses) -> Union[Statuses, None]:
+        if status == self.entity_data["status"].lower():
+            return None
+        return status
+
+    def format_status_data(self) -> Dict[str, str | Dict]:
+        data = {}
+        if self.status:
+            data["status"] = self.status
+        # Double-check that you're not accidentally overwriting status
+        if (extra_status := self.extras.get("status")) is not None and isinstance(
+            extra_status, str
+        ):
+            assert (
+                extra_status.lower() == self.status
+            ), f"Entity {self.uuid} passed multiple statuses ({self.status} and {extra_status})."
+        data.update(self.extras["extra_fields"])
+        logging.info(f"COMPILED DATA: {data}")
+        return data
+
+    def set_entity_api_status(self) -> Dict:
+        endpoint = f"/entities/{self.uuid}"
+        headers = {
+            "authorization": "Bearer " + self.token,
+            "X-Hubmap-Application": "ingest-pipeline",
+            "content-type": "application/json",
+        }
+        http_hook = HttpHook("PUT", http_conn_id=self.http_conn_id)
+        data = self.format_status_data()
+        if self.extras["extra_options"].get("check_response") is None:
+            self.extras["extra_options"].update({"check_response": True})
+        logging.info(
+            f"""
+            data:
+            {data}
+            """
+        )
+        try:
+            if self.verbose:
+                logging.info(f"Updating {self.uuid} with data {data}...")
+            response = http_hook.run(
+                endpoint, json.dumps(data), headers, self.extras["extra_options"]
+            )
+            return response.json()
+        except Exception as e:
+            raise StatusChangerException(
+                f"""
+                Encountered error with request to change status/fields
+                for {self.uuid}, status not set.
+                Error: {e}
+                """
+            )
+
+    def update_asana(self) -> None:
+        # Separating logic for updating Asana into a separate PR
+        # UpdateAsana(self.uuid, self.token, self.status).update_process_stage()
+        pass
+
+    def send_email(self) -> None:
+        # This is underdeveloped and also requires a separate PR
+        pass
+
+    status_map = {}
+    """
+    Default behavior is to call both set_entity_api_status and update_asana.
+    Add any statuses to map that require a different process.
+    Example:
+    {
+        # "Statuses.UPLOAD_INVALID": [set_entity_api_status, update_asana, send_email],
+        # "Statuses.DATASET_INVALID": [set_entity_api_status, update_asana, send_email],
+        # "Statuses.DATASET_PROCESSING": [set_entity_api_status],
+    }
+    """
+
+    def on_status_change(self) -> None:
+        if self.status in self.status_map:
+            for func in self.status_map[self.status]:
+                func(self)
+        else:
+            self.set_entity_api_status()
+            self.send_email()
+            self.update_asana()
diff --git a/src/ingest-pipeline/airflow/dags/status_change/status_utils.py b/src/ingest-pipeline/airflow/dags/status_change/status_utils.py
new file mode 100644
index 00000000..f7ca6822
--- /dev/null
+++ b/src/ingest-pipeline/airflow/dags/status_change/status_utils.py
@@ -0,0 +1,79 @@
+from __future__ import annotations
+
+import traceback
+from typing import Any, Dict
+
+from requests import codes
+from requests.exceptions import HTTPError
+
+from airflow.hooks.http_hook import HttpHook
+
+
+# This is simplified from pythonop_get_dataset_state in utils
+def get_submission_context(token: str, uuid: str) -> Dict[str, Any]:
+    """
+    uuid can also be a HuBMAP ID.
+    """
+    method = "GET"
+    headers = {
+        "authorization": f"Bearer {token}",
+        "content-type": "application/json",
+        "X-Hubmap-Application": "ingest-pipeline",
+    }
+    http_hook = HttpHook(method, http_conn_id="entity_api_connection")
+
+    endpoint = f"entities/{uuid}"
+
+    try:
+        response = http_hook.run(
+            endpoint, headers=headers, extra_options={"check_response": False}
+        )
+        response.raise_for_status()
+        return response.json()
+    except HTTPError as e:
+        print(f"ERROR: {e}")
+        if e.response.status_code == codes.unauthorized:
+            raise RuntimeError("entity database authorization was rejected?")
+        else:
+            print("benign error")
+            return {}
+
+
+def get_hubmap_id_from_uuid(token: str, uuid: str) -> str | None:
+    method = "GET"
+    headers = {
+        "authorization": f"Bearer {token}",
+        "content-type": "application/json",
+        "X-Hubmap-Application": "ingest-pipeline",
+    }
+    http_hook = HttpHook(method, http_conn_id="entity_api_connection")
+
+    endpoint = f"entities/{uuid}"
+
+    try:
+        response = http_hook.run(
+            endpoint, headers=headers, extra_options={"check_response": False}
+        )
+        response.raise_for_status()
+        return response.json().get("hubmap_id")
+    except HTTPError as e:
+        print(f"ERROR: {e}")
+        if e.response.status_code == codes.unauthorized:
+            raise RuntimeError("entity database authorization was rejected?")
+        else:
+            print("benign error")
+            return None
+
+
+def formatted_exception(exception):
+    """
+    traceback logic from
+    https://stackoverflow.com/questions/51822029/get-exception-details-on-airflow-on-failure-callback-context
+    """
+    if not (
+        formatted_exception := "".join(
+            traceback.TracebackException.from_exception(exception).format()
+        ).replace("\n", "<br>")
+    ):
+        return None
+    return formatted_exception
diff --git a/src/ingest-pipeline/airflow/dags/status_change/tests/test_status_changer.py b/src/ingest-pipeline/airflow/dags/status_change/tests/test_status_changer.py
new file mode 100644
index 00000000..e938246c
--- /dev/null
+++ b/src/ingest-pipeline/airflow/dags/status_change/tests/test_status_changer.py
@@ -0,0 +1,183 @@
+import unittest
+from functools import cached_property
+from unittest.mock import patch
+
+from status_manager import StatusChanger, StatusChangerException, Statuses
+from utils import pythonop_set_dataset_state
+
+
+class TestStatusChanger(unittest.TestCase):
+    @cached_property
+    @patch("status_manager.HttpHook.run")
+    def upload_valid(self, hhr_mock):
+        return StatusChanger(
+            "upload_valid_uuid",
+            "upload_valid_token",
+            "Valid",
+            {
+                "extra_fields": {},
+                "extra_options": {},
+            },
+            entity_type="Upload",
+        )
+
+    @patch("status_manager.HttpHook.run")
+    def test_unrecognized_status(self, hhr_mock):
+        with self.assertRaises(StatusChangerException):
+            StatusChanger(
+                "invalid_status_uuid",
+                "invalid_status_token",
+                "invalid_status_string",
+                {
+                    "extra_fields": {},
+                    "extra_options": {},
+                },
+                entity_type="Upload",
+            )
+
+    def test_recognized_status(self):
+        data = self.upload_valid.format_status_data()
+        self.assertEqual(data["status"], self.upload_valid.status)
+
+    @patch("status_manager.HttpHook.run")
+    def test_extra_fields(self, hhr_mock):
+        with_extra_field = StatusChanger(
+            "extra_field_uuid",
+            "extra_field_token",
+            Statuses.UPLOAD_PROCESSING,
+            {
+                "extra_fields": {"test_extra_field": True},
+                "extra_options": {},
+            },
+        )
+        data = with_extra_field.format_status_data()
+        self.assertIn("test_extra_field", data)
+        self.assertEqual(data["test_extra_field"], True)
+
+    @patch("status_manager.HttpHook.run")
+    def test_extra_options(self, hhr_mock):
+        with_extra_option = StatusChanger(
+            "extra_options_uuid",
+            "extra_options_token",
+            Statuses.UPLOAD_VALID,
+            {"extra_fields": {}, "extra_options": {"check_response": False}},
+            verbose=False,
+        )
+        with_extra_option.set_entity_api_status()
+        self.assertIn({"check_response": False}, hhr_mock.call_args.args)
+        without_extra_option = StatusChanger(
+            "extra_options_uuid",
+            "extra_options_token",
+            Statuses.UPLOAD_VALID,
+            {"extra_fields": {}, "extra_options": {}},
+            verbose=False,
+        )
+        without_extra_option.set_entity_api_status()
+        self.assertIn({"check_response": True}, hhr_mock.call_args.args)
+
+    @patch("status_manager.HttpHook.run")
+    def test_extra_options_and_fields(self, hhr_mock):
+        with_extra_option_and_field = StatusChanger(
+            "extra_options_uuid",
+            "extra_options_token",
+            Statuses.UPLOAD_VALID,
+            {
+                "extra_fields": {"test_extra_field": True},
+                "extra_options": {"check_response": False},
+            },
+            verbose=False,
+        )
+        with_extra_option_and_field.set_entity_api_status()
+        self.assertIn({"check_response": False}, hhr_mock.call_args.args)
+        self.assertIn('{"status": "valid", "test_extra_field": true}', hhr_mock.call_args.args)
+
+    @patch("status_manager.HttpHook.run")
+    def test_valid_status_in_request(self, hhr_mock):
+        self.upload_valid.set_entity_api_status()
+        self.assertIn('{"status": "valid"}', hhr_mock.call_args.args)
+
+    @patch("status_manager.HttpHook.run")
+    def test_http_conn_id(self, hhr_mock):
+        with_http_conn_id = StatusChanger(
+            "http_conn_uuid",
+            "http_conn_token",
+            Statuses.DATASET_NEW,
+            {
+                "extra_fields": {},
+                "extra_options": {},
+            },
+            http_conn_id="test_conn_id",
+        )
+        assert with_http_conn_id.http_conn_id == "test_conn_id"
+
+    @patch("status_manager.HttpHook.run")
+    @patch("status_manager.StatusChanger.send_email")
+    def test_status_map(self, test_send_email, hhr_mock):
+        self.assertFalse(test_send_email.called)
+        self.upload_valid.status_map = {Statuses.UPLOAD_VALID: [self.upload_valid.send_email]}
+        self.upload_valid.on_status_change()
+        self.assertTrue(test_send_email.called)
+
+    @staticmethod
+    def my_callable(**kwargs):
+        return kwargs["uuid"]
+
+    @patch("utils.StatusChanger")
+    @patch("utils.get_auth_tok")
+    def test_pythonop_set_dataset_state_valid(self, gat_mock, sc_mock):
+        uuid = "test_uuid"
+        token = "test_token"
+        gat_mock.return_value = token
+        message = "Test message"
+        # Not passing a ds_state kwarg sets status to Processing
+        pythonop_set_dataset_state(
+            crypt_auth_tok=token,
+            dataset_uuid_callable=self.my_callable,
+            uuid=uuid,
+            message=message,
+        )
+        sc_mock.assert_called_with(
+            uuid,
+            token,
+            "Processing",
+            {
+                "extra_fields": {"pipeline_message": message},
+                "extra_options": {},
+            },
+            http_conn_id="entity_api_connection",
+        )
+        # Pass a valid ds_state and assert it was passed properly
+        pythonop_set_dataset_state(
+            crypt_auth_tok=token,
+            dataset_uuid_callable=self.my_callable,
+            uuid=uuid,
+            message=message,
+            ds_state="QA",
+        )
+        sc_mock.assert_called_with(
+            uuid,
+            token,
+            "QA",
+            {
+                "extra_fields": {"pipeline_message": message},
+                "extra_options": {},
+            },
+            http_conn_id="entity_api_connection",
+        )
+
+    @patch("status_manager.HttpHook.run")
+    @patch("utils.get_auth_tok")
+    def test_pythonop_set_dataset_state_invalid(self, gat_mock, hhr_mock):
+        uuid = "test_uuid"
+        token = "test_token"
+        gat_mock.return_value = token
+        message = "Test message"
+        # Pass an invalid ds_state
+        with self.assertRaises(Exception):
+            pythonop_set_dataset_state(
+                crypt_auth_tok=token,
+                dataset_uuid_callable=self.my_callable,
+                uuid=uuid,
+                message=message,
+                ds_state="Unknown",
+            )
diff --git a/src/ingest-pipeline/airflow/dags/utils.py b/src/ingest-pipeline/airflow/dags/utils.py
index 1f3bc83e..d7230b74 100644
--- a/src/ingest-pipeline/airflow/dags/utils.py
+++ b/src/ingest-pipeline/airflow/dags/utils.py
@@ -1,137 +1,122 @@
+import json
 import math
 import os
+import re
+import shlex
+import sys
 import urllib.parse
+import uuid
 from abc import ABC, abstractmethod
 from collections import namedtuple
+from copy import deepcopy
 from functools import lru_cache
-import json
 from os import environ, fspath, walk
-from os.path import (
-    basename, dirname, relpath, split, join, getsize,
-    realpath, exists
-)
+from os.path import basename, dirname, exists, getsize, join, realpath, relpath, split
 from pathlib import Path
 from pprint import pprint
-import re
-import shlex
-import sys
-import uuid
-from subprocess import check_output, CalledProcessError
+from subprocess import CalledProcessError, check_output
 from typing import (
-    Any, Callable, Dict, Iterable, List, Mapping, Optional,
-    Pattern, Tuple, TypeVar, Union
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    List,
+    Mapping,
+    Optional,
+    Pattern,
+    Tuple,
+    TypeVar,
+    Union,
 )
-from requests.exceptions import HTTPError
-from requests import codes
-from copy import deepcopy
 
+import cwltool  # used to find its path
 import yaml
-from airflow import DAG
-from airflow.models.baseoperator import BaseOperator
-from airflow.configuration import conf as airflow_conf
-from airflow.hooks.http_hook import HttpHook
 from cryptography.fernet import Fernet
-
-from hubmap_commons.schema_tools import (
-    assert_json_matches_schema,
-    set_schema_base_path
-)
+from hubmap_commons.schema_tools import assert_json_matches_schema, set_schema_base_path
 from hubmap_commons.type_client import TypeClient
+from requests import codes
+from requests.exceptions import HTTPError
+from status_change.status_manager import StatusChanger
 
-import cwltool  # used to find its path
-
+from airflow import DAG
+from airflow.configuration import conf as airflow_conf
+from airflow.hooks.http_hook import HttpHook
+from airflow.models.baseoperator import BaseOperator
 
-airflow_conf.read(join(environ['AIRFLOW_HOME'], 'instance', 'app.cfg'))
+airflow_conf.read(join(environ["AIRFLOW_HOME"], "instance", "app.cfg"))
 try:
-    sys.path.append(airflow_conf.as_dict()['connections']['SRC_PATH']
-                    .strip("'").strip('"'))
+    sys.path.append(airflow_conf.as_dict()["connections"]["SRC_PATH"].strip("'").strip('"'))
     from misc.tools.survey import ENDPOINTS
+
     sys.path.pop()
-except KeyError:
+except Exception:
     ENDPOINTS = {}
 
 
 JSONType = Union[str, int, float, bool, None, Dict[str, Any], List[Any]]
 
 # Some functions accept a `str` or `List[str]` and return that same type
-StrOrListStr = TypeVar('StrOrListStr', str, List[str])
+StrOrListStr = TypeVar("StrOrListStr", str, List[str])
 
 PathStrOrList = Union[str, Path, Iterable[Union[str, Path]]]
 
-SCHEMA_BASE_PATH = join(dirname(dirname(dirname(realpath(__file__)))),
-                        'schemata')
-SCHEMA_BASE_URI = 'http://schemata.hubmapconsortium.org/'
+SCHEMA_BASE_PATH = join(dirname(dirname(dirname(realpath(__file__)))), "schemata")
+SCHEMA_BASE_URI = "http://schemata.hubmapconsortium.org/"
 
 # Some constants
-PIPELINE_BASE_DIR = Path(__file__).resolve().parent / 'cwl'
+PIPELINE_BASE_DIR = Path(__file__).resolve().parent / "cwl"
 
-RE_ID_WITH_SLICES = re.compile(r'([a-zA-Z0-9\-]*)-(\d*)_(\d*)')
+RE_ID_WITH_SLICES = re.compile(r"([a-zA-Z0-9\-]*)-(\d*)_(\d*)")
 
-RE_GIT_URL_PATTERN = re.compile(r'(^git@github.com:)(.*)(\.git)')
+RE_GIT_URL_PATTERN = re.compile(r"(^git@github.com:)(.*)(\.git)")
 
 # default maximum for number of files for which info should be returned in_line
 # rather than via an alternative scratch file
 MAX_IN_LINE_FILES = 500
 
-GIT = 'git'
+GIT = "git"
 GIT_CLONE_COMMAND = [
     GIT,
-    'clone',
-    '{repository}',
+    "clone",
+    "{repository}",
 ]
 GIT_FETCH_COMMAND = [
     GIT,
-    'fetch',
+    "fetch",
 ]
 GIT_CHECKOUT_COMMAND = [
     GIT,
-    'checkout',
-    '{ref}',
-]
-GIT_LOG_COMMAND = [
-    GIT,
-    'log',
-    '-n1',
-    '--oneline'
-]
-GIT_ORIGIN_COMMAND = [
-    GIT,
-    'config',
-    '--get',
-    'remote.origin.url'
-]
-GIT_ROOT_COMMAND = [
-    GIT,
-    'rev-parse',
-    '--show-toplevel'
+    "checkout",
+    "{ref}",
 ]
-SHA1SUM_COMMAND = [
-    'sha1sum',
-    '{fname}'
+GIT_LOG_COMMAND = [GIT, "log", "-n1", "--oneline"]
+GIT_ORIGIN_COMMAND = [GIT, "config", "--get", "remote.origin.url"]
+GIT_ROOT_COMMAND = [GIT, "rev-parse", "--show-toplevel"]
+SHA1SUM_COMMAND = ["sha1sum", "{fname}"]
+FILE_TYPE_MATCHERS = [
+    (r"^.*\.csv$", "csv"),  # format is (regex, type)
+    (r"^.*\.hdf5$", "hdf5"),
+    (r"^.*\.h5ad$", "h5ad"),
+    (r"^.*\.pdf$", "pdf"),
+    (r"^.*\.json$", "json"),
+    (r"^.*\.arrow$", "arrow"),
+    (r"(^.*\.fastq$)|(^.*\.fastq.gz$)", "fastq"),
+    (r"(^.*\.yml$)|(^.*\.yaml$)", "yaml"),
 ]
-FILE_TYPE_MATCHERS = [(r'^.*\.csv$', 'csv'),  # format is (regex, type)
-                      (r'^.*\.hdf5$', 'hdf5'),
-                      (r'^.*\.h5ad$', 'h5ad'),
-                      (r'^.*\.pdf$', 'pdf'),
-                      (r'^.*\.json$', 'json'),
-                      (r'^.*\.arrow$', 'arrow'),
-                      (r'(^.*\.fastq$)|(^.*\.fastq.gz$)', 'fastq'),
-                      (r'(^.*\.yml$)|(^.*\.yaml$)', 'yaml')
-                      ]
 COMPILED_TYPE_MATCHERS: Optional[List[Tuple[Pattern, str]]] = None
 
 """
 Lazy construction: a list of tuples (collection_type_regex, assay_type_regex, workflow)
 """
-WORKFLOW_MAP_FILENAME = 'workflow_map.yml'  # Expected to be found in this same dir
-WORKFLOW_MAP_SCHEMA = 'workflow_map_schema.yml'
+WORKFLOW_MAP_FILENAME = "workflow_map.yml"  # Expected to be found in this same dir
+WORKFLOW_MAP_SCHEMA = "workflow_map_schema.yml"
 COMPILED_WORKFLOW_MAP: Optional[List[Tuple[Pattern, Pattern, str]]] = None
 
 """
 Lazy construction; a list of tuples (dag_id_reges, task_id_regex, {key:value})
 """
-RESOURCE_MAP_FILENAME = 'resource_map.yml'  # Expected to be found in this same dir
-RESOURCE_MAP_SCHEMA = 'resource_map_schema.yml'
+RESOURCE_MAP_FILENAME = "resource_map.yml"  # Expected to be found in this same dir
+RESOURCE_MAP_SCHEMA = "resource_map_schema.yml"
 COMPILED_RESOURCE_MAP: Optional[List[Tuple[Pattern, int, Dict[str, Any]]]] = None
 
 TYPE_CLIENT: Optional[TypeClient] = None
@@ -139,12 +124,12 @@
 # Parameters used to generate scRNA and scATAC analysis DAGs; these
 # are the only fields which differ between assays and DAGs
 SequencingDagParameters = namedtuple(
-    'SequencingDagParameters',
+    "SequencingDagParameters",
     [
-        'dag_id',
-        'pipeline_name',
-        'assay',
-        'dataset_type',
+        "dag_id",
+        "pipeline_name",
+        "assay",
+        "dataset_type",
     ],
 )
 
@@ -177,16 +162,20 @@ def __init__(self):
         self.matchers = []
 
     @classmethod
-    def read_manifest(cls, pipeline_file_manifest: Path) -> Iterable[Tuple[Pattern, str, str, bool, bool]]:
+    def read_manifest(
+        cls, pipeline_file_manifest: Path
+    ) -> Iterable[Tuple[Pattern, str, str, bool, bool]]:
         with open(pipeline_file_manifest) as f:
             manifest = json.load(f)
-            localized_assert_json_matches_schema(manifest, 'pipeline_file_manifest.yml')
+            localized_assert_json_matches_schema(manifest, "pipeline_file_manifest.yml")
 
         for annotation in manifest:
-            pattern = re.compile(annotation['pattern'])
-            is_qa_qc = annotation.get('is_qa_qc', False)
-            is_data_product = annotation.get('is_data_product', False)
-            yield pattern, annotation['description'], annotation['edam_ontology_term'], is_qa_qc, is_data_product
+            pattern = re.compile(annotation["pattern"])
+            is_qa_qc = annotation.get("is_qa_qc", False)
+            is_data_product = annotation.get("is_data_product", False)
+            yield pattern, annotation["description"], annotation[
+                "edam_ontology_term"
+            ], is_qa_qc, is_data_product
 
     @classmethod
     def create_from_files(cls, pipeline_file_manifests: Iterable[Path]):
@@ -203,7 +192,13 @@ def get_file_metadata(self, file_path: Path) -> ManifestMatch:
         the "first-match" behavior is deliberate.
         """
         path_str = fspath(file_path)
-        for pattern, description_template, ontology_term, is_qa_qc, is_data_product in self.matchers:
+        for (
+            pattern,
+            description_template,
+            ontology_term,
+            is_qa_qc,
+            is_data_product,
+        ) in self.matchers:
             # TODO: walrus operator
             m = pattern.search(path_str)
             if m:
@@ -217,8 +212,9 @@ class DummyFileMatcher(FileMatcher):
     Drop-in replacement for PipelineFileMatcher which allows everything and always
     provides empty descriptions and ontology terms.
     """
+
     def get_file_metadata(self, file_path: Path) -> ManifestMatch:
-        return True, '', '', False
+        return True, "", "", False
 
 
 class HMDAG(DAG):
@@ -227,13 +223,14 @@ class HMDAG(DAG):
     Defaults are applied to the DAG itself, and to any Tasks added to
     the DAG.
     """
+
     def __init__(self, dag_id: str, **kwargs):
         """
         Provide "max_active_runs" from the lanes resource, if it is
         not already present.
         """
-        if 'max_active_runs' not in kwargs:
-            kwargs['max_active_runs'] = get_lanes_resource(dag_id)
+        if "max_active_runs" not in kwargs:
+            kwargs["max_active_runs"] = get_lanes_resource(dag_id)
         super().__init__(dag_id, **kwargs)
 
     def add_task(self, task: BaseOperator):
@@ -252,7 +249,7 @@ def add_task(self, task: BaseOperator):
             task.queue = res_queue
         super().add_task(task)
 
-        
+
 def find_pipeline_manifests(cwl_files: Iterable[Path]) -> List[Path]:
     """
     Constructs manifest paths from CWL files (strip '.cwl', append
@@ -261,7 +258,7 @@ def find_pipeline_manifests(cwl_files: Iterable[Path]) -> List[Path]:
     """
     manifests = []
     for cwl_file in cwl_files:
-        manifest_file = cwl_file.with_name(f'{cwl_file.stem}-manifest.json')
+        manifest_file = cwl_file.with_name(f"{cwl_file.stem}-manifest.json")
         if manifest_file.is_file():
             manifests.append(manifest_file)
     return manifests
@@ -275,10 +272,7 @@ def get_absolute_workflows(*workflows: Path) -> List[Path]:
       already absolute, they are returned unchanged; if relative,
       they are anchored to `PIPELINE_BASE_DIR`
     """
-    return [
-        PIPELINE_BASE_DIR / workflow
-        for workflow in workflows
-    ]
+    return [PIPELINE_BASE_DIR / workflow for workflow in workflows]
 
 
 def get_named_absolute_workflows(**workflow_kwargs: Path) -> Dict[str, Path]:
@@ -292,19 +286,16 @@ def get_named_absolute_workflows(**workflow_kwargs: Path) -> Dict[str, Path]:
       if the input paths were already absolute, they are returned unchanged;
       if relative, they are anchored to `PIPELINE_BASE_DIR`
     """
-    return {
-        name: PIPELINE_BASE_DIR / workflow
-        for name, workflow in workflow_kwargs.items()
-    }
+    return {name: PIPELINE_BASE_DIR / workflow for name, workflow in workflow_kwargs.items()}
 
 
 def build_dataset_name(dag_id: str, pipeline_str: str, **kwargs) -> str:
-    parent_submission_str = '_'.join(get_parent_dataset_uuids_list(**kwargs))
-    return f'{dag_id}__{parent_submission_str}__{pipeline_str}'
+    parent_submission_str = "_".join(get_parent_dataset_uuids_list(**kwargs))
+    return f"{dag_id}__{parent_submission_str}__{pipeline_str}"
 
 
 def get_parent_dataset_uuids_list(**kwargs) -> List[str]:
-    uuid_list = kwargs['dag_run'].conf['parent_submission_id']
+    uuid_list = kwargs["dag_run"].conf["parent_submission_id"]
     if not isinstance(uuid_list, list):
         uuid_list = [uuid_list]
     return uuid_list
@@ -317,7 +308,7 @@ def get_parent_dataset_uuid(**kwargs) -> str:
 
 
 def get_parent_dataset_paths_list(**kwargs) -> List[Path]:
-    path_list = kwargs['dag_run'].conf['parent_lz_path']
+    path_list = kwargs["dag_run"].conf["parent_lz_path"]
     if not isinstance(path_list, list):
         path_list = [path_list]
     return [Path(p) for p in path_list]
@@ -339,9 +330,13 @@ def get_parent_data_dirs_list(**kwargs) -> List[Path]:
     ctx_md_list = ctx["metadata"]
     if not isinstance(ctx_md_list, list):
         ctx_md_list = [ctx_md_list]
-    assert len(data_dir_list) == len(ctx_md_list), "lengths of data directory and md lists do not match"
-    return [Path(data_dir) / ctx_md['metadata']['data_path']
-            for data_dir, ctx_md in zip(data_dir_list, ctx_md_list)]
+    assert len(data_dir_list) == len(
+        ctx_md_list
+    ), "lengths of data directory and md lists do not match"
+    return [
+        Path(data_dir) / ctx_md["metadata"]["data_path"]
+        for data_dir, ctx_md in zip(data_dir_list, ctx_md_list)
+    ]
 
 
 def get_parent_data_dir(**kwargs) -> Path:
@@ -351,12 +346,11 @@ def get_parent_data_dir(**kwargs) -> Path:
 
 
 def get_previous_revision_uuid(**kwargs) -> Optional[str]:
-    return kwargs['dag_run'].conf.get('previous_version_uuid', None)
+    return kwargs["dag_run"].conf.get("previous_version_uuid", None)
 
 
 def get_dataset_uuid(**kwargs) -> str:
-    return kwargs['ti'].xcom_pull(key='derived_dataset_uuid',
-                                  task_ids="send_create_dataset")
+    return kwargs["ti"].xcom_pull(key="derived_dataset_uuid", task_ids="send_create_dataset")
 
 
 def get_uuid_for_error(**kwargs) -> Optional[str]:
@@ -380,18 +374,17 @@ def get_git_commits(file_list: StrOrListStr) -> StrOrListStr:
     else:
         unroll = False
     for fname in file_list:
-        log_command = [piece.format(fname=fname)
-                       for piece in GIT_LOG_COMMAND]
+        log_command = [piece.format(fname=fname) for piece in GIT_LOG_COMMAND]
         try:
             dirnm = dirname(fname)
-            if dirnm == '':
-                dirnm = '.'
+            if dirnm == "":
+                dirnm = "."
             line = check_output(log_command, cwd=dirnm)
         except CalledProcessError as e:
             # Git will fail if this is not running from a git repo
-            line = 'DeadBeef git call failed: {}'.format(e.output)
-            line = line.encode('utf-8')
-        hashval = line.split()[0].strip().decode('utf-8')
+            line = "DeadBeef git call failed: {}".format(e.output)
+            line = line.encode("utf-8")
+        hashval = line.split()[0].strip().decode("utf-8")
         rslt.append(hashval)
     if unroll:
         return rslt[0]
@@ -406,7 +399,7 @@ def _convert_git_to_proper_url(raw_url: str) -> str:
     """
     m = RE_GIT_URL_PATTERN.fullmatch(raw_url)
     if m:
-        return f'https://github.com/{m[2]}'
+        return f"https://github.com/{m[2]}"
     else:
         return raw_url
 
@@ -422,18 +415,17 @@ def get_git_origins(file_list: StrOrListStr) -> StrOrListStr:
     else:
         unroll = False
     for fname in file_list:
-        command = [piece.format(fname=fname)
-                   for piece in GIT_ORIGIN_COMMAND]
+        command = [piece.format(fname=fname) for piece in GIT_ORIGIN_COMMAND]
         try:
             dirnm = dirname(fname)
-            if dirnm == '':
-                dirnm = '.'
+            if dirnm == "":
+                dirnm = "."
             line = check_output(command, cwd=dirnm)
         except CalledProcessError as e:
             # Git will fail if this is not running from a git repo
-            line = 'https://unknown/unknown.git git call failed: {}'.format(e.output)
-            line = line.encode('utf-8')
-        url = line.split()[0].strip().decode('utf-8')
+            line = "https://unknown/unknown.git git call failed: {}".format(e.output)
+            line = line.encode("utf-8")
+        url = line.split()[0].strip().decode("utf-8")
         url = _convert_git_to_proper_url(url)
         rslt.append(url)
     if unroll:
@@ -454,17 +446,16 @@ def get_git_root_paths(file_list: Iterable[str]) -> Union[str, List[str]]:
     else:
         unroll = False
     for fname in file_list:
-        command = [piece.format(fname=fname)
-                   for piece in GIT_ROOT_COMMAND]
+        command = [piece.format(fname=fname) for piece in GIT_ROOT_COMMAND]
         try:
             dirnm = dirname(fname)
-            if dirnm == '':
-                dirnm = '.'
+            if dirnm == "":
+                dirnm = "."
             root_path = check_output(command, cwd=dirnm)
         except CalledProcessError as e:
-            print(f'Exception {e}')
-            root_path = dirname(fname).encode('utf-8')
-        rslt.append(root_path.strip().decode('utf-8'))
+            print(f"Exception {e}")
+            root_path = dirname(fname).encode("utf-8")
+        rslt.append(root_path.strip().decode("utf-8"))
     if unroll:
         return rslt[0]
     else:
@@ -474,19 +465,18 @@ def get_git_root_paths(file_list: Iterable[str]) -> Union[str, List[str]]:
 def get_git_provenance_dict(file_list: PathStrOrList) -> Mapping[str, str]:
     """
     Given a list of file paths, return a list of dicts of the form:
-    
+
       [{<file base name>:<file commit hash>}, ...]
     """
     if isinstance(file_list, (str, Path)):  # sadly, a str is an Iterable[str]
         file_list = [file_list]
-    return {basename(fname): get_git_commits(realpath(fname))
-            for fname in file_list}
+    return {basename(fname): get_git_commits(realpath(fname)) for fname in file_list}
 
 
 def get_git_provenance_list(file_list: Iterable[str]) -> List[Mapping[str, Any]]:
     """
     Given a list of file paths, return a list of dicts of the form:
-    
+
       [{'name':<file base name>, 'hash':<file commit hash>, 'origin':<file git origin>},...]
     """
     if isinstance(file_list, str):  # sadly, a str is an Iterable[str]
@@ -497,14 +487,16 @@ def get_git_provenance_list(file_list: Iterable[str]) -> List[Mapping[str, Any]]
     root_l = get_git_root_paths(file_list)
     rel_name_l = [relpath(name, root) for name, root in zip(name_l, root_l)]
     # Make sure each repo appears only once
-    repo_d = {origin: {'name': name, 'hash': hashed}
-              for origin, name, hashed in zip(origin_l, rel_name_l, hash_l)}
+    repo_d = {
+        origin: {"name": name, "hash": hashed}
+        for origin, name, hashed in zip(origin_l, rel_name_l, hash_l)
+    }
     rslt = []
     for origin in repo_d:
         dct = repo_d[origin].copy()
-        dct['origin'] = origin
-        if not dct['name'].endswith('cwl'):
-            del dct['name']  # include explicit names for workflows only
+        dct["origin"] = origin
+        if not dct["name"].endswith("cwl"):
+            del dct["name"]  # include explicit names for workflows only
         rslt.append(dct)
     # pprint(rslt)
     return rslt
@@ -524,13 +516,13 @@ def _get_file_type(path: Path) -> str:
         # print('testing ', regex, tpnm)
         if regex.match(fspath(path)):
             return tpnm
-    return 'unknown'
+    return "unknown"
 
 
 def get_file_metadata(root_dir: str, matcher: FileMatcher) -> List[Mapping[str, Any]]:
     """
     Given a root directory, return a list of the form:
-    
+
       [
         {
           'rel_path': <relative path>,
@@ -543,17 +535,23 @@ def get_file_metadata(root_dir: str, matcher: FileMatcher) -> List[Mapping[str,
         },
         ...
       ]
-    
+
     containing an entry for every file below the given root directory:
     """
     root_path = Path(root_dir)
     rslt = []
-    for dirpth, dirnames, fnames in walk(root_dir):
+    for dirpth, _, fnames in walk(root_dir):
         dp = Path(dirpth)
         for fn in fnames:
             full_path = dp / fn
             relative_path = full_path.relative_to(root_path)
-            add_to_index, description, ontology_term, is_qa_qc, is_data_product = matcher.get_file_metadata(relative_path)
+            (
+                add_to_index,
+                description,
+                ontology_term,
+                is_qa_qc,
+                is_data_product,
+            ) = matcher.get_file_metadata(relative_path)
             if add_to_index:
                 # sha1sum disabled because of run time issues on large data collections
                 # line = check_output([word.format(fname=full_path)
@@ -561,13 +559,13 @@ def get_file_metadata(root_dir: str, matcher: FileMatcher) -> List[Mapping[str,
                 # cs = line.split()[0].strip().decode('utf-8')
                 rslt.append(
                     {
-                        'rel_path': fspath(relative_path),
-                        'type': _get_file_type(full_path),
-                        'size': getsize(full_path),
-                        'description': description,
-                        'edam_term': ontology_term,
-                        'is_qa_qc': is_qa_qc,
-                        'is_data_product': is_data_product,
+                        "rel_path": fspath(relative_path),
+                        "type": _get_file_type(full_path),
+                        "size": getsize(full_path),
+                        "description": description,
+                        "edam_term": ontology_term,
+                        "is_qa_qc": is_qa_qc,
+                        "is_data_product": is_data_product,
                         # 'sha1sum': cs,
                     }
                 )
@@ -575,10 +573,10 @@ def get_file_metadata(root_dir: str, matcher: FileMatcher) -> List[Mapping[str,
 
 
 def get_file_metadata_dict(
-        root_dir: str,
-        alt_file_dir: str,
-        pipeline_file_manifests: List[Path],
-        max_in_line_files: int = MAX_IN_LINE_FILES,
+    root_dir: str,
+    alt_file_dir: str,
+    pipeline_file_manifests: List[Path],
+    max_in_line_files: int = MAX_IN_LINE_FILES,
 ) -> Mapping[str, Any]:
     """
     This routine returns file metadata, either directly as JSON in the form
@@ -592,13 +590,13 @@ def get_file_metadata_dict(
         matcher = PipelineFileMatcher.create_from_files(pipeline_file_manifests)
     file_info = get_file_metadata(root_dir, matcher)
     if len(file_info) > max_in_line_files:
-        localized_assert_json_matches_schema(file_info, 'file_info_schema.yml')
-        fpath = join(alt_file_dir, '{}.json'.format(uuid.uuid4()))
-        with open(fpath, 'w') as f:
-            json.dump({'files': file_info}, f)
-        return {'files_info_alt_path': relpath(fpath, _get_scratch_base_path())}
+        localized_assert_json_matches_schema(file_info, "file_info_schema.yml")
+        fpath = join(alt_file_dir, "{}.json".format(uuid.uuid4()))
+        with open(fpath, "w") as f:
+            json.dump({"files": file_info}, f)
+        return {"files_info_alt_path": relpath(fpath, _get_scratch_base_path())}
     else:
-        return {'files': file_info}
+        return {"files": file_info}
 
 
 def pythonop_trigger_target(**kwargs) -> None:
@@ -606,12 +604,12 @@ def pythonop_trigger_target(**kwargs) -> None:
     When used as the python_callable of a PythonOperator,this just logs
     data provided to the running DAG.
     """
-    ctx = kwargs['dag_run'].conf
-    run_id = kwargs['run_id']
-    print('run_id: ', run_id)
-    print('dag_run.conf:')
+    ctx = kwargs["dag_run"].conf
+    run_id = kwargs["run_id"]
+    print("run_id: ", run_id)
+    print("dag_run.conf:")
     pprint(ctx)
-    print('kwargs:')
+    print("kwargs:")
     pprint(kwargs)
 
 
@@ -623,13 +621,13 @@ def pythonop_maybe_keep(**kwargs) -> str:
     'test_op': the operator providing the success code
     'test_key': xcom key to test.  Defaults to None for return code
     """
-    bail_op = kwargs['bail_op'] if 'bail_op' in kwargs else 'no_keep'
-    test_op = kwargs['test_op']
-    test_key = kwargs['test_key'] if 'test_key' in kwargs else None
-    retcode = int(kwargs['ti'].xcom_pull(task_ids=test_op, key=test_key))
-    print('%s key %s: %s\n' % (test_op, test_key, retcode))
+    bail_op = kwargs["bail_op"] if "bail_op" in kwargs else "no_keep"
+    test_op = kwargs["test_op"]
+    test_key = kwargs["test_key"] if "test_key" in kwargs else None
+    retcode = int(kwargs["ti"].xcom_pull(task_ids=test_op, key=test_key))
+    print("%s key %s: %s\n" % (test_op, test_key, retcode))
     if retcode == 0:
-        return kwargs['next_op']
+        return kwargs["next_op"]
     else:
         return bail_op
 
@@ -639,17 +637,21 @@ def get_auth_tok(**kwargs) -> str:
     Recover the authorization token from the environment, and
     decrpyt it.
     """
-    crypt_auth_tok = (kwargs['crypt_auth_tok'] if 'crypt_auth_tok' in kwargs
-                      else kwargs['dag_run'].conf['crypt_auth_tok'])
-    auth_tok = ''.join(e for e in decrypt_tok(crypt_auth_tok.encode())
-                       if e.isalnum())  # strip out non-alnum characters
+    crypt_auth_tok = (
+        kwargs["crypt_auth_tok"]
+        if "crypt_auth_tok" in kwargs
+        else kwargs["dag_run"].conf["crypt_auth_tok"]
+    )
+    auth_tok = "".join(
+        e for e in decrypt_tok(crypt_auth_tok.encode()) if e.isalnum()
+    )  # strip out non-alnum characters
     return auth_tok
 
 
 def pythonop_send_create_dataset(**kwargs) -> str:
     """
     Requests creation of a new dataset.  Returns dataset info via XCOM
-    
+
     Accepts the following via the caller's op_kwargs:
     'http_conn_id' : the http connection to be used
     'parent_dataset_uuid_callable' : called with **kwargs; returns uuid
@@ -665,30 +667,30 @@ def pythonop_send_create_dataset(**kwargs) -> str:
     or
       'dataset_types_callable' : called with **kwargs; returns the
                                  types list of the new dataset
-    
+
     Returns the following via XCOM:
     (no key) : data_directory_path for the new dataset
     'derived_dataset_uuid' : uuid for the created dataset
     'group_uuid' : group uuid for the created dataset
     """
 
-    for arg in ['parent_dataset_uuid_callable', 'http_conn_id']:
+    for arg in ["parent_dataset_uuid_callable", "http_conn_id"]:
         assert arg in kwargs, "missing required argument {}".format(arg)
-    for arg_options in [['dataset_types', 'dataset_types_callable']]:
+    for arg_options in [["dataset_types", "dataset_types_callable"]]:
         assert any([arg in kwargs for arg in arg_options])
 
-    http_conn_id = kwargs['http_conn_id']
+    http_conn_id = kwargs["http_conn_id"]
     # ctx = kwargs['dag_run'].conf
     headers = {
-        'authorization': 'Bearer ' + get_auth_tok(**kwargs),
-        'content-type': 'application/json',
-        'X-Hubmap-Application': 'ingest-pipeline'
+        "authorization": "Bearer " + get_auth_tok(**kwargs),
+        "content-type": "application/json",
+        "X-Hubmap-Application": "ingest-pipeline",
     }
 
-    if 'dataset_types' in kwargs:
-        dataset_types = kwargs['dataset_types']
+    if "dataset_types" in kwargs:
+        dataset_types = kwargs["dataset_types"]
     else:
-        dataset_types = kwargs['dataset_types_callable'](**kwargs)
+        dataset_types = kwargs["dataset_types_callable"](**kwargs)
     if not isinstance(dataset_types, list):
         dataset_types = [dataset_types]
     canonical_types = set()  # to avoid duplicates
@@ -699,78 +701,75 @@ def pythonop_send_create_dataset(**kwargs) -> str:
         contains_seq |= type_info.contains_pii
     # canonical_types = list(canonical_types)
 
-    source_uuids = kwargs['parent_dataset_uuid_callable'](**kwargs)
+    source_uuids = kwargs["parent_dataset_uuid_callable"](**kwargs)
     if not isinstance(source_uuids, list):
         source_uuids = [source_uuids]
 
-    dataset_name = kwargs['dataset_name_callable'](**kwargs)
-    
+    dataset_name = kwargs["dataset_name_callable"](**kwargs)
+
     try:
-        response = HttpHook('GET', http_conn_id=http_conn_id).run(
-            endpoint=f'entities/{source_uuids[0]}',
+        response = HttpHook("GET", http_conn_id=http_conn_id).run(
+            endpoint=f"entities/{source_uuids[0]}",
             headers=headers,
-            extra_options={'check_response': False}
+            extra_options={"check_response": False},
         )
         response.raise_for_status()
         response_json = response.json()
-        if 'group_uuid' not in response_json:
-            print(f'response from GET on entities{source_uuids[0]}:')
+        if "group_uuid" not in response_json:
+            print(f"response from GET on entities{source_uuids[0]}:")
             pprint(response_json)
-            raise ValueError('entities response did not contain group_uuid')
-        parent_group_uuid = response_json['group_uuid']
+            raise ValueError("entities response did not contain group_uuid")
+        parent_group_uuid = response_json["group_uuid"]
 
         data = {
             "direct_ancestor_uuids": source_uuids,
             "dataset_info": dataset_name,
             "data_types": dataset_types,
             "group_uuid": parent_group_uuid,
-            "contains_human_genetic_sequences": contains_seq
+            "contains_human_genetic_sequences": contains_seq,
         }
-        if 'previous_revision_uuid_callable' in kwargs:
-            previous_revision_uuid = kwargs['previous_revision_uuid_callable'](**kwargs)
+        if "previous_revision_uuid_callable" in kwargs:
+            previous_revision_uuid = kwargs["previous_revision_uuid_callable"](**kwargs)
             if previous_revision_uuid is not None:
-                data['previous_revision_uuid'] = previous_revision_uuid
-        print('data for dataset creation:')
+                data["previous_revision_uuid"] = previous_revision_uuid
+        print("data for dataset creation:")
         pprint(data)
-        response = HttpHook('POST', http_conn_id=http_conn_id).run(
-            endpoint='datasets',
-            data=json.dumps(data),
-            headers=headers,
-            extra_options={}
+        response = HttpHook("POST", http_conn_id=http_conn_id).run(
+            endpoint="datasets", data=json.dumps(data), headers=headers, extra_options={}
         )
         response.raise_for_status()
         response_json = response.json()
-        print('response to dataset creation:')
+        print("response to dataset creation:")
         pprint(response_json)
-        for elt in ['uuid', 'group_uuid']:
+        for elt in ["uuid", "group_uuid"]:
             if elt not in response_json:
-                raise ValueError(f'datasets response did not contain {elt}')
-        uuid = response_json['uuid']
-        group_uuid = response_json['group_uuid']
-        
-        response = HttpHook('GET', http_conn_id=http_conn_id).run(
-            endpoint=f'datasets/{uuid}/file-system-abs-path',
+                raise ValueError(f"datasets response did not contain {elt}")
+        uuid = response_json["uuid"]
+        group_uuid = response_json["group_uuid"]
+
+        response = HttpHook("GET", http_conn_id=http_conn_id).run(
+            endpoint=f"datasets/{uuid}/file-system-abs-path",
             headers=headers,
-            extra_options={'check_response': False}
+            extra_options={"check_response": False},
         )
         response.raise_for_status()
         response_json = response.json()
-        if 'path' not in response_json:
-            print(f'response from datasets/{uuid}/file-system-abs-path:')
+        if "path" not in response_json:
+            print(f"response from datasets/{uuid}/file-system-abs-path:")
             pprint(response_json)
-            raise ValueError(f'datasets/{uuid}/file-system-abs-path'
-                             ' did not return a path')
-        abs_path = response_json['path']
+            raise ValueError(f"datasets/{uuid}/file-system-abs-path" " did not return a path")
+        abs_path = response_json["path"]
 
     except HTTPError as e:
-        print(f'ERROR: {e}')
+        print(f"ERROR: {e}")
         if e.response.status_code == codes.unauthorized:
-            raise RuntimeError(f'authorization for {endpoint} was rejected?')
+            # TODO: endpoint is undefined
+            raise RuntimeError(f"authorization for {endpoint} was rejected?")
         else:
-            raise RuntimeError(f'misc error {e} on {endpoint}')
-            
-    kwargs['ti'].xcom_push(key='group_uuid', value=group_uuid)
-    kwargs['ti'].xcom_push(key='derived_dataset_uuid', value=uuid)
+            raise RuntimeError(f"misc error {e} on {endpoint}")
+
+    kwargs["ti"].xcom_push(key="group_uuid", value=group_uuid)
+    kwargs["ti"].xcom_push(key="derived_dataset_uuid", value=uuid)
     return abs_path
 
 
@@ -779,7 +778,7 @@ def pythonop_set_dataset_state(**kwargs) -> None:
     Sets the status of a dataset, to 'Processing' if no specific state
     is specified.  NOTE that this routine cannot change a dataset into
     or out of the Published state.
-    
+
     Accepts the following via the caller's op_kwargs:
     'dataset_uuid_callable' : called with **kwargs; returns the
                               uuid of the dataset to be modified
@@ -788,34 +787,22 @@ def pythonop_set_dataset_state(**kwargs) -> None:
     'message' : update message, saved as dataset metadata element "pipeline_messsage".
                 The default is not to save any message.
     """
-    for arg in ['dataset_uuid_callable']:
+    for arg in ["dataset_uuid_callable"]:
         assert arg in kwargs, "missing required argument {}".format(arg)
-    dataset_uuid = kwargs['dataset_uuid_callable'](**kwargs)
-    http_conn_id = kwargs.get('http_conn_id', 'entity_api_connection')
-    endpoint = f'/entities/{dataset_uuid}'
-    ds_state = kwargs['ds_state'] if 'ds_state' in kwargs else 'Processing'
-    message = kwargs.get('message', None)
-    headers = {
-        'authorization': 'Bearer ' + get_auth_tok(**kwargs),
-        'content-type': 'application/json',
-        'X-Hubmap-Application': 'ingest-pipeline'}
-    extra_options = {}
-
-    http_hook = HttpHook('PUT',
-                         http_conn_id=http_conn_id)
-
-    data = {'status': ds_state}
-    if message is not None:
-        data['pipeline_message'] = message
-    print('data: ')
-    pprint(data)
-
-    response = http_hook.run(endpoint,
-                             json.dumps(data),
-                             headers,
-                             extra_options)
-    print('response: ')
-    pprint(response.json())
+    dataset_uuid = kwargs["dataset_uuid_callable"](**kwargs)
+    http_conn_id = kwargs.get("http_conn_id", "entity_api_connection")
+    status = kwargs["ds_state"] if "ds_state" in kwargs else "Processing"
+    message = kwargs.get("message", None)
+    StatusChanger(
+        dataset_uuid,
+        get_auth_tok(**kwargs),
+        status,
+        {
+            "extra_fields": {"pipeline_message": message} if message else {},
+            "extra_options": {},
+        },
+        http_conn_id=http_conn_id,
+    ).on_status_change()
 
 
 def restructure_entity_metadata(raw_metadata: JSONType) -> JSONType:
@@ -830,15 +817,15 @@ def restructure_entity_metadata(raw_metadata: JSONType) -> JSONType:
     de-restructured version can be used by workflows in liu of the original.
     """
     md = {}
-    if 'ingest_metadata' in raw_metadata:
-        if 'metadata' in raw_metadata['ingest_metadata']:
-            md['metadata'] = deepcopy(raw_metadata['ingest_metadata']['metadata'])
-        if 'extra_metadata' in raw_metadata['ingest_metadata']:
-            md.update(raw_metadata['ingest_metadata']['extra_metadata'])
-    if 'contributors' in raw_metadata:
-        md['contributors'] = deepcopy(raw_metadata['contributors'])
-    if 'antibodies' in raw_metadata:
-        md['antibodies'] = deepcopy(raw_metadata['antibodies'])
+    if "ingest_metadata" in raw_metadata:
+        if "metadata" in raw_metadata["ingest_metadata"]:
+            md["metadata"] = deepcopy(raw_metadata["ingest_metadata"]["metadata"])
+        if "extra_metadata" in raw_metadata["ingest_metadata"]:
+            md.update(raw_metadata["ingest_metadata"]["extra_metadata"])
+    if "contributors" in raw_metadata:
+        md["contributors"] = deepcopy(raw_metadata["contributors"])
+    if "antibodies" in raw_metadata:
+        md["antibodies"] = deepcopy(raw_metadata["antibodies"])
     # print('reconstructed metadata follows')
     # pprint(md)
     return md
@@ -848,52 +835,54 @@ def pythonop_get_dataset_state(**kwargs) -> JSONType:
     """
     Gets the status JSON structure for a dataset.  Works for Uploads
     and Publications as well as Datasets.
-    
+
     Accepts the following via the caller's op_kwargs:
     'dataset_uuid_callable' : called with **kwargs; returns the
                               uuid of the Dataset or Upload to be examined
     """
-    for arg in ['dataset_uuid_callable']:
+    for arg in ["dataset_uuid_callable"]:
         assert arg in kwargs, "missing required argument {}".format(arg)
-    uuid = kwargs['dataset_uuid_callable'](**kwargs)
-    method = 'GET'
+    uuid = kwargs["dataset_uuid_callable"](**kwargs)
+    method = "GET"
     auth_tok = get_auth_tok(**kwargs)
     headers = {
-        'authorization': f'Bearer {auth_tok}',
-        'content-type': 'application/json',
-        'X-Hubmap-Application': 'ingest-pipeline',
-        }
-    http_hook = HttpHook(method, http_conn_id='entity_api_connection')
+        "authorization": f"Bearer {auth_tok}",
+        "content-type": "application/json",
+        "X-Hubmap-Application": "ingest-pipeline",
+    }
+    http_hook = HttpHook(method, http_conn_id="entity_api_connection")
 
-    endpoint = f'entities/{uuid}'
+    endpoint = f"entities/{uuid}"
 
     try:
-        response = http_hook.run(endpoint,
-                                 headers=headers,
-                                 extra_options={'check_response': False})
+        response = http_hook.run(
+            endpoint, headers=headers, extra_options={"check_response": False}
+        )
         response.raise_for_status()
         ds_rslt = response.json()
-        print('ds rslt:')
+        print("ds rslt:")
         pprint(ds_rslt)
     except HTTPError as e:
-        print(f'ERROR: {e}')
+        print(f"ERROR: {e}")
         if e.response.status_code == codes.unauthorized:
-            raise RuntimeError('entity database authorization was rejected?')
+            raise RuntimeError("entity database authorization was rejected?")
         else:
-            print('benign error')
+            print("benign error")
             return {}
 
-    for key in ['status', 'uuid', 'entity_type']:
+    for key in ["status", "uuid", "entity_type"]:
         assert key in ds_rslt, f"Dataset status for {uuid} has no {key}"
-    if ds_rslt['entity_type'] in ['Dataset', 'Publication']:
-        assert 'data_types' in ds_rslt, f"Dataset status for {uuid} has no data_types"
-        data_types = ds_rslt['data_types']
-        parent_dataset_uuid_list = [ancestor['uuid']
-                                    for ancestor in ds_rslt['direct_ancestors']
-                                    if ancestor['entity_type'] == 'Dataset']
+    if ds_rslt["entity_type"] in ["Dataset", "Publication"]:
+        assert "data_types" in ds_rslt, f"Dataset status for {uuid} has no data_types"
+        data_types = ds_rslt["data_types"]
+        parent_dataset_uuid_list = [
+            ancestor["uuid"]
+            for ancestor in ds_rslt["direct_ancestors"]
+            if ancestor["entity_type"] == "Dataset"
+        ]
         metadata = restructure_entity_metadata(ds_rslt)
         endpoint = f"datasets/{ds_rslt['uuid']}/file-system-abs-path"
-    elif ds_rslt['entity_type'] == 'Upload':
+    elif ds_rslt["entity_type"] == "Upload":
         data_types = []
         metadata = {}
         endpoint = f"uploads/{ds_rslt['uuid']}/file-system-abs-path"
@@ -901,76 +890,71 @@ def pythonop_get_dataset_state(**kwargs) -> JSONType:
     else:
         raise RuntimeError(f"Unknown entity_type {ds_rslt['entity_type']}")
     try:
-        http_hook = HttpHook(method, http_conn_id='ingest_api_connection')
-        response = http_hook.run(endpoint,
-                                 headers=headers,
-                                 extra_options={'check_response': False})
+        http_hook = HttpHook(method, http_conn_id="ingest_api_connection")
+        response = http_hook.run(
+            endpoint, headers=headers, extra_options={"check_response": False}
+        )
         response.raise_for_status()
         path_query_rslt = response.json()
-        print('path_query rslt:')
+        print("path_query rslt:")
         pprint(path_query_rslt)
     except HTTPError as e:
-        print(f'ERROR: {e}')
+        print(f"ERROR: {e}")
         if e.response.status_code == codes.unauthorized:
-            raise RuntimeError('entity database authorization was rejected?')
+            raise RuntimeError("entity database authorization was rejected?")
         else:
-            print('benign error')
+            print("benign error")
             return {}
-    assert 'path' in path_query_rslt, (f"Dataset path for {uuid} produced"
-                                       " no path")
-    full_path = path_query_rslt['path']
+    assert "path" in path_query_rslt, f"Dataset path for {uuid} produced" " no path"
+    full_path = path_query_rslt["path"]
 
     rslt = {
-        'entity_type': ds_rslt['entity_type'],
-        'status': ds_rslt['status'],
-        'uuid': ds_rslt['uuid'],
-        'parent_dataset_uuid_list': parent_dataset_uuid_list,
-        'data_types': data_types,
-        'local_directory_full_path': full_path,
-        'metadata': metadata,
+        "entity_type": ds_rslt["entity_type"],
+        "status": ds_rslt["status"],
+        "uuid": ds_rslt["uuid"],
+        "parent_dataset_uuid_list": parent_dataset_uuid_list,
+        "data_types": data_types,
+        "local_directory_full_path": full_path,
+        "metadata": metadata,
     }
 
-    if ds_rslt['entity_type'] == 'Dataset':
-        http_hook = HttpHook('GET', http_conn_id='entity_api_connection')
+    if ds_rslt["entity_type"] == "Dataset":
+        http_hook = HttpHook("GET", http_conn_id="entity_api_connection")
         endpoint = f"datasets/{ds_rslt['uuid']}/organs"
         try:
-            response = http_hook.run(endpoint,
-                                     headers=headers,
-                                     extra_options={'check_response': False})
+            response = http_hook.run(
+                endpoint, headers=headers, extra_options={"check_response": False}
+            )
             response.raise_for_status()
             organs_query_rslt = response.json()
-            print('organs_query_rslt:')
+            print("organs_query_rslt:")
             pprint(organs_query_rslt)
-            rslt['organs'] = [entry['organ'] for entry in organs_query_rslt]
+            rslt["organs"] = [entry["organ"] for entry in organs_query_rslt]
         except HTTPError as e:
-            print(f'ERROR: {e}')
+            print(f"ERROR: {e}")
             if e.response.status_code == codes.unauthorized:
-                raise RuntimeError('entity database authorization was rejected?')
+                raise RuntimeError("entity database authorization was rejected?")
             else:
-                print('benign error')
+                print("benign error")
                 return {}
-        
+
     return rslt
 
 
 def _uuid_lookup(uuid, **kwargs):
-    http_conn_id = 'uuid_api_connection'
-    endpoint = 'hmuuid/{}'.format(uuid)
-    method = 'GET'
-    headers = {'authorization': 'Bearer ' + get_auth_tok(**kwargs)}
-#     print('headers:')
-#     pprint(headers)
+    http_conn_id = "uuid_api_connection"
+    endpoint = "hmuuid/{}".format(uuid)
+    method = "GET"
+    headers = {"authorization": "Bearer " + get_auth_tok(**kwargs)}
+    #     print('headers:')
+    #     pprint(headers)
     extra_options = {}
 
-    http_hook = HttpHook(method,
-                         http_conn_id=http_conn_id)
+    http_hook = HttpHook(method, http_conn_id=http_conn_id)
 
-    response = http_hook.run(endpoint,
-                             None,
-                             headers,
-                             extra_options)
-#     print('response: ')
-#     pprint(response.json())
+    response = http_hook.run(endpoint, None, headers, extra_options)
+    #     print('response: ')
+    #     pprint(response.json())
     return response.json()
 
 
@@ -980,8 +964,8 @@ def _generate_slices(id: str) -> Iterable[str]:
         base, lidx, hidx = mo.groups()
         lidx = int(lidx)
         hidx = int(hidx)
-        for idx in range(lidx, hidx+1):
-            yield f'{base}-{idx}'
+        for idx in range(lidx, hidx + 1):
+            yield f"{base}-{idx}"
     else:
         yield id
 
@@ -991,12 +975,12 @@ def assert_id_known(id: str, **kwargs) -> None:
     Is the given id string known to the uuid database?  Id strings with suffixes like
     myidstr-n1_n2 where n1 and n2 are integers are interpreted as representing multiple
     ids with suffix integers in the range n1 to n2 inclusive.
-    
+
     Raises AssertionError if the ID is not known.
     """
     for slice in _generate_slices(id):
         tissue_info = _uuid_lookup(slice, **kwargs)
-        assert tissue_info and len(tissue_info) >= 1, f'tissue_id {slice} not found on lookup'
+        assert tissue_info and len(tissue_info) >= 1, f"tissue_id {slice} not found on lookup"
 
 
 def pythonop_md_consistency_tests(**kwargs) -> int:
@@ -1004,41 +988,39 @@ def pythonop_md_consistency_tests(**kwargs) -> int:
     Perform simple consistency checks of the metadata stored as YAML in kwargs['metadata_fname'].
     This includes accessing the UUID api via its Airflow connection ID to verify uuids.
     """
-    md_path = join(get_tmp_dir_path(kwargs['run_id']), kwargs['metadata_fname'])
+    md_path = join(get_tmp_dir_path(kwargs["run_id"]), kwargs["metadata_fname"])
     if exists(md_path):
-        with open(md_path, 'r') as f:
+        with open(md_path, "r") as f:
             md = yaml.safe_load(f)
-    #     print('metadata from {} follows:'.format(md_path))
-    #     pprint(md)
-        if '_from_metadatatsv' in md and md['_from_metadatatsv']:
+        #     print('metadata from {} follows:'.format(md_path))
+        #     pprint(md)
+        if "_from_metadatatsv" in md and md["_from_metadatatsv"]:
             try:
-                for elt in ['tissue_id', 'donor_id']:
-                    assert elt in md, 'metadata is missing {}'.format(elt)
-                assert md['tissue_id'].startswith(md['donor_id']+'-'), 'tissue_id does not match'
-                assert_id_known(md['tissue_id'], **kwargs)
+                for elt in ["tissue_id", "donor_id"]:
+                    assert elt in md, "metadata is missing {}".format(elt)
+                assert md["tissue_id"].startswith(md["donor_id"] + "-"), "tissue_id does not match"
+                assert_id_known(md["tissue_id"], **kwargs)
                 return 0
             except AssertionError as e:
-                kwargs['ti'].xcom_push(key='err_msg',
-                                       value='Assertion Failed: {}'.format(e))
+                kwargs["ti"].xcom_push(key="err_msg", value="Assertion Failed: {}".format(e))
                 return 1
         else:
             return 0
     else:
-        kwargs['ti'].xcom_push(key='err_msg',
-                               value='Expected metadata file is missing')
+        kwargs["ti"].xcom_push(key="err_msg", value="Expected metadata file is missing")
         return 1
-        
+
 
 def _get_scratch_base_path() -> Path:
-    dct = airflow_conf.as_dict(display_sensitive=True)['connections']
-    if 'WORKFLOW_SCRATCH' in dct:
-        scratch_path = dct['WORKFLOW_SCRATCH']
-    elif 'workflow_scratch' in dct:
+    dct = airflow_conf.as_dict(display_sensitive=True)["connections"]
+    if "WORKFLOW_SCRATCH" in dct:
+        scratch_path = dct["WORKFLOW_SCRATCH"]
+    elif "workflow_scratch" in dct:
         # support for lower case is necessary setting the scratch path via the
         # environment variable AIRFLOW__CONNECTIONS__WORKFLOW_SCRATCH
-        scratch_path = dct['workflow_scratch']
+        scratch_path = dct["workflow_scratch"]
     else:
-        raise KeyError('WORKFLOW_SCRATCH')  # preserve original code behavior
+        raise KeyError("WORKFLOW_SCRATCH")  # preserve original code behavior
     scratch_path = scratch_path.strip("'").strip('"')  # remove quotes that may be on the string
     return Path(scratch_path)
 
@@ -1059,21 +1041,22 @@ def get_cwltool_bin_path() -> Path:
     while cwltool_dir:
         part1, part2 = split(cwltool_dir)
         cwltool_dir = part1
-        if part2 == 'lib':
+        if part2 == "lib":
             break
-    assert cwltool_dir, 'Failed to find cwltool bin directory'
-    cwltool_dir = Path(cwltool_dir, 'bin')
+    assert cwltool_dir, "Failed to find cwltool bin directory"
+    cwltool_dir = Path(cwltool_dir, "bin")
     return cwltool_dir
 
 
 def get_cwltool_base_cmd(tmpdir: Path) -> List[str]:
     return [
-        'env',
-        'TMPDIR={}'.format(tmpdir),
-        '_JAVA_OPTIONS={}'.format('-XX:ActiveProcessorCount=2'),
-        'cwltool',
-        '--timestamps',
-        '--preserve-environment', '_JAVA_OPTIONS',
+        "env",
+        "TMPDIR={}".format(tmpdir),
+        "_JAVA_OPTIONS={}".format("-XX:ActiveProcessorCount=2"),
+        "cwltool",
+        "--timestamps",
+        "--preserve-environment",
+        "_JAVA_OPTIONS",
         # The trailing slashes in the next two lines are deliberate.
         # cwltool treats these path prefixes as *strings*, not as
         # directories in which new temporary dirs should be created, so
@@ -1081,184 +1064,21 @@ def get_cwltool_base_cmd(tmpdir: Path) -> List[str]:
         # like '/tmp/cwl-tmpXXXXXXXX' with 'XXXXXXXX' as a random string.
         # Adding the trailing slash is ensures that temporary directories
         # are created as *subdirectories* of 'cwl-tmp' and 'cwl-out-tmp'.
-        '--tmpdir-prefix={}/'.format(tmpdir / 'cwl-tmp'),
-        '--tmp-outdir-prefix={}/'.format(tmpdir / 'cwl-out-tmp'),
+        "--tmpdir-prefix={}/".format(tmpdir / "cwl-tmp"),
+        "--tmp-outdir-prefix={}/".format(tmpdir / "cwl-out-tmp"),
     ]
 
 
-def make_send_status_msg_function_old(
-        dag_file: str,
-        retcode_ops: List[str],
-        cwl_workflows: List[Path],
-        http_conn_id: str = 'ingest_api_connection',
-        uuid_src_task_id: str = 'send_create_dataset',
-        dataset_uuid_fun: Optional[Callable[..., str]] = None,
-        dataset_lz_path_fun: Optional[Callable[..., str]] = None,
-        metadata_fun: Optional[Callable[..., dict]] = None,
-        include_file_metadata: Optional[bool] = True
-) -> Callable[..., None]:
-    """
-    The function which is generated by this function will return a boolean,
-    True if the message which was ultimately sent was for a success and
-    False otherwise.  This return value is not necessary in most circumstances
-    but is useful when the generated function is being wrapped.
-
-    The user can specify dataset_uuid_fun and dataset_lz_path_fun, or leave
-    both to their empty default values and specify 'uuid_src_task_id'.
-
-    `dag_file` should always be `__file__` wherever this function is used,
-    to include the DAG file in the provenance. This could be "automated" with
-    something like `sys._getframe(1).f_code.co_filename`, but that doesn't
-    seem worth it at the moment
-
-    'http_conn_id' is the Airflow connection id associated with the /datasets/status service.
-
-    'dataset_uuid_fun' is a function which returns the uuid of the dataset to be
-    updated, or None.  If given, it will be called with **kwargs arguments.
-
-    'dataset_lz_path_fun' is a function which returns the full path of the dataset 
-    data directory, or None.  If given, it will be called with **kwargs arguments.
-    If the return value of this callable is None or the empty string, no file metadata
-    will be ultimately be included in the status message.
-
-    'uuid_src_task_id' is the Airflow task_id of a task providing the uuid via 
-    the XCOM key 'derived_dataset_uuid' and the dataset data directory
-    via the None key.  This is used only if dataset_uuid is None or dataset_lz_path 
-    is None.
-
-    'metadata_fun' is a function which returns additional metadata in JSON form,
-    or None.  If given, it will be called with **kwargs arguments.  This function
-    will only be evaluated if retcode_ops have all returned 0.
-
-    'include_file_metadata is a boolean defaulting to True which indicates whether
-    file metadata should be included in the transmitted metadata structure.  If False,
-    no file metadata will be included.  Note that file metadata may also be excluded
-    based on the return value of 'dataset_lz_path_fun' above.
-    """
-    def send_status_msg(**kwargs) -> bool:
-        retcodes = [
-            kwargs['ti'].xcom_pull(task_ids=op)
-            for op in retcode_ops
-        ]
-        retcodes = [int(rc or '0') for rc in retcodes]
-        print('retcodes: ', {k: v for k, v in zip(retcode_ops, retcodes)})
-        success = all(rc == 0 for rc in retcodes)
-        if dataset_uuid_fun is None:
-            dataset_uuid = kwargs['ti'].xcom_pull(
-                key='derived_dataset_uuid',
-                task_ids=uuid_src_task_id,
-            )
-        else:
-            dataset_uuid = dataset_uuid_fun(**kwargs)
-        if dataset_lz_path_fun is None:
-            ds_dir = kwargs['ti'].xcom_pull(task_ids=uuid_src_task_id)
-        else:
-            ds_dir = dataset_lz_path_fun(**kwargs)
-        endpoint = '/datasets/status'
-        method = 'PUT'
-        headers = {
-            'authorization': 'Bearer ' + get_auth_tok(**kwargs),
-            'content-type': 'application/json',
-        }
-        extra_options = {}
-        return_status = True  # mark false on failure
-
-        http_hook = HttpHook(method, http_conn_id=http_conn_id)
-
-        if success:
-            md = {}
-            files_for_provenance = [dag_file, *cwl_workflows]
-
-            if 'dag_provenance' in kwargs['dag_run'].conf:
-                md['dag_provenance'] = kwargs['dag_run'].conf['dag_provenance'].copy()
-                new_prv_dct = get_git_provenance_dict(files_for_provenance)
-                md['dag_provenance'].update(new_prv_dct)
-            else:
-                dag_prv = (kwargs['dag_run'].conf['dag_provenance_list']
-                           if 'dag_provenance_list' in kwargs['dag_run'].conf
-                           else [])
-                dag_prv.extend(get_git_provenance_list(files_for_provenance))
-                md['dag_provenance_list'] = dag_prv
-
-            if metadata_fun:
-                md['metadata'] = metadata_fun(**kwargs)
-                
-            if dataset_lz_path_fun:
-                dataset_dir_abs_path = dataset_lz_path_fun(**kwargs)
-                if dataset_dir_abs_path:
-                    #########################################################################
-                    # Added by Zhou 6/16/2021 for registering thumbnail image
-                    # This is the only place that uses this hardcoded extras/thumbnail.jpg
-                    thumbnail_file_abs_path = join(dataset_dir_abs_path,
-                                                   'extras/thumbnail.jpg')
-                    if exists(thumbnail_file_abs_path):
-                        md['thumbnail_file_abs_path'] = thumbnail_file_abs_path
-                    #########################################################################
-
-            manifest_files = find_pipeline_manifests(cwl_workflows)
-            if include_file_metadata and ds_dir is not None and not ds_dir == '':
-                md.update(
-                    get_file_metadata_dict(
-                        ds_dir,
-                        get_tmp_dir_path(kwargs['run_id']),
-                        manifest_files,
-                    ),
-                )
-            try:
-                assert_json_matches_schema(md, 'dataset_metadata_schema.yml')
-                data = {
-                    'dataset_id': dataset_uuid,
-                    'status': 'QA',
-                    'message': 'the process ran',
-                    'metadata': md,
-                }
-            except AssertionError as e:
-                print('invalid metadata follows:')
-                pprint(md)
-                data = {
-                    'dataset_id': dataset_uuid,
-                    'status': 'Error',
-                    'message': 'internal error; schema violation: {}'.format(e),
-                    'metadata': {},
-                }
-                return_status = False
-        else:
-            log_fname = Path(get_tmp_dir_path(kwargs['run_id']), 'session.log')
-            with open(log_fname, 'r') as f:
-                err_txt = '\n'.join(f.readlines())
-            data = {
-                'dataset_id': dataset_uuid,
-                'status': 'Invalid',
-                'message': err_txt,
-            }
-            return_status = False
-        print('data: ')
-        pprint(data)
-
-        response = http_hook.run(
-            endpoint,
-            json.dumps(data),
-            headers,
-            extra_options,
-        )
-        print('response: ')
-        pprint(response.json())
-
-        return return_status
-
-    return send_status_msg
-
-
 def make_send_status_msg_function(
-        dag_file: str,
-        retcode_ops: List[str],
-        cwl_workflows: List[Path],
-        uuid_src_task_id: str = 'send_create_dataset',
-        dataset_uuid_fun: Optional[Callable[..., str]] = None,
-        dataset_lz_path_fun: Optional[Callable[..., str]] = None,
-        metadata_fun: Optional[Callable[..., dict]] = None,
-        include_file_metadata: Optional[bool] = True
-) -> Callable[..., None]:
+    dag_file: str,
+    retcode_ops: List[str],
+    cwl_workflows: List[Path],
+    uuid_src_task_id: str = "send_create_dataset",
+    dataset_uuid_fun: Optional[Callable[..., str]] = None,
+    dataset_lz_path_fun: Optional[Callable[..., str]] = None,
+    metadata_fun: Optional[Callable[..., dict]] = None,
+    include_file_metadata: Optional[bool] = True,
+) -> Callable[..., bool]:
     """
     The function which is generated by this function will return a boolean,
     True if the message which was ultimately sent was for a success and
@@ -1300,10 +1120,11 @@ def make_send_status_msg_function(
 
     # Does the string represent a "true" value, or an int that is 1
     def __is_true(val):
-        if val is None: return False
+        if val is None:
+            return False
         if isinstance(val, str):
             uval = val.upper().strip()
-            if uval in ['TRUE', 'T', '1', 'Y', 'YES']:
+            if uval in ["TRUE", "T", "1", "Y", "YES"]:
                 return True
             else:
                 return False
@@ -1313,53 +1134,44 @@ def __is_true(val):
             return False
 
     def send_status_msg(**kwargs) -> bool:
-        retcodes = [
-            kwargs['ti'].xcom_pull(task_ids=op)
-            for op in retcode_ops
-        ]
-        retcodes = [int(rc or '0') for rc in retcodes]
-        print('retcodes: ', {k: v for k, v in zip(retcode_ops, retcodes)})
+        retcodes = [kwargs["ti"].xcom_pull(task_ids=op) for op in retcode_ops]
+        retcodes = [int(rc or "0") for rc in retcodes]
+        print("retcodes: ", {k: v for k, v in zip(retcode_ops, retcodes)})
         success = all(rc == 0 for rc in retcodes)
         if dataset_uuid_fun is None:
-            dataset_uuid = kwargs['ti'].xcom_pull(
-                key='derived_dataset_uuid',
+            dataset_uuid = kwargs["ti"].xcom_pull(
+                key="derived_dataset_uuid",
                 task_ids=uuid_src_task_id,
             )
         else:
             dataset_uuid = dataset_uuid_fun(**kwargs)
         if dataset_lz_path_fun is None:
-            ds_dir = kwargs['ti'].xcom_pull(task_ids=uuid_src_task_id)
+            ds_dir = kwargs["ti"].xcom_pull(task_ids=uuid_src_task_id)
         else:
             ds_dir = dataset_lz_path_fun(**kwargs)
-        endpoint = '/entities/' + dataset_uuid
-        method = 'PUT'
-        headers = {
-            'authorization': 'Bearer ' + get_auth_tok(**kwargs),
-            'content-type': 'application/json',
-            'X-Hubmap-Application': 'ingest-pipeline',
-        }
-        extra_options = {}
         return_status = True  # mark false on failure
-
-        http_hook = HttpHook(method, http_conn_id='entity_api_connection')
+        status = None
+        extra_fields = {}
 
         if success:
             md = {}
             files_for_provenance = [dag_file, *cwl_workflows]
 
-            if 'dag_provenance' in kwargs['dag_run'].conf:
-                md['dag_provenance'] = kwargs['dag_run'].conf['dag_provenance'].copy()
+            if "dag_provenance" in kwargs["dag_run"].conf:
+                md["dag_provenance"] = kwargs["dag_run"].conf["dag_provenance"].copy()
                 new_prv_dct = get_git_provenance_dict(files_for_provenance)
-                md['dag_provenance'].update(new_prv_dct)
+                md["dag_provenance"].update(new_prv_dct)
             else:
-                dag_prv = (kwargs['dag_run'].conf['dag_provenance_list']
-                           if 'dag_provenance_list' in kwargs['dag_run'].conf
-                           else [])
+                dag_prv = (
+                    kwargs["dag_run"].conf["dag_provenance_list"]
+                    if "dag_provenance_list" in kwargs["dag_run"].conf
+                    else []
+                )
                 dag_prv.extend(get_git_provenance_list(files_for_provenance))
-                md['dag_provenance_list'] = dag_prv
+                md["dag_provenance_list"] = dag_prv
 
             if metadata_fun:
-                md['metadata'] = metadata_fun(**kwargs)
+                md["metadata"] = metadata_fun(**kwargs)
 
             thumbnail_file_abs_path = []
             if dataset_lz_path_fun:
@@ -1368,8 +1180,7 @@ def send_status_msg(**kwargs) -> bool:
                     #########################################################################
                     # Added by Zhou 6/16/2021 for registering thumbnail image
                     # This is the only place that uses this hardcoded extras/thumbnail.jpg
-                    thumbnail_file_abs_path = join(dataset_dir_abs_path,
-                                                   'extras/thumbnail.jpg')
+                    thumbnail_file_abs_path = join(dataset_dir_abs_path, "extras/thumbnail.jpg")
                     if exists(thumbnail_file_abs_path):
                         thumbnail_file_abs_path = thumbnail_file_abs_path
                     else:
@@ -1377,88 +1188,93 @@ def send_status_msg(**kwargs) -> bool:
                     #########################################################################
 
             manifest_files = find_pipeline_manifests(cwl_workflows)
-            if include_file_metadata and ds_dir is not None and not ds_dir == '':
+            if include_file_metadata and ds_dir is not None and not ds_dir == "":
                 md.update(
                     get_file_metadata_dict(
                         ds_dir,
-                        get_tmp_dir_path(kwargs['run_id']),
+                        get_tmp_dir_path(kwargs["run_id"]),
                         manifest_files,
                     ),
                 )
 
             # Refactoring metadata structure
+            contacts = []
             if metadata_fun:
-                md['files'] = md['metadata'].pop('files_info_alt_path', [])
-                md['extra_metadata'] = {'collectiontype': md['metadata'].pop('collectiontype', None)}
-                md['thumbnail_file_abs_path'] = thumbnail_file_abs_path
-                antibodies = md['metadata'].pop('antibodies', [])
-                contributors = md['metadata'].pop('contributors', [])
-                md['metadata'] = md['metadata'].pop('metadata', [])
-                contacts = []
+                md["files"] = md["metadata"].pop("files_info_alt_path", [])
+                md["extra_metadata"] = {
+                    "collectiontype": md["metadata"].pop("collectiontype", None)
+                }
+                md["thumbnail_file_abs_path"] = thumbnail_file_abs_path
+                antibodies = md["metadata"].pop("antibodies", [])
+                contributors = md["metadata"].pop("contributors", [])
+                md["metadata"] = md["metadata"].pop("metadata", [])
                 for contrib in contributors:
-                    if 'is_contact' in contrib:
-                        v = contrib['is_contact']
+                    if "is_contact" in contrib:
+                        v = contrib["is_contact"]
                         if __is_true(val=v):
                             contacts.append(contrib)
 
             def my_callable(**kwargs):
                 return dataset_uuid
 
-            ds_rslt = pythonop_get_dataset_state(
-                dataset_uuid_callable=my_callable,
-                **kwargs
-            )
+            ds_rslt = pythonop_get_dataset_state(dataset_uuid_callable=my_callable, **kwargs)
             if not ds_rslt:
-                status = 'QA'
+                status = "QA"
             else:
-                status = ds_rslt.get('status', 'QA')
-                if status in ['Processing', 'New', 'Error']:
-                    status = 'QA'
+                status = ds_rslt.get("status", "QA")
+                if status in ["Processing", "New"]:
+                    status = "QA"
                 if metadata_fun:
                     if not contacts:
-                        contacts = ds_rslt.get('contacts', [])
+                        contacts = ds_rslt.get("contacts", [])
 
             try:
-                assert_json_matches_schema(md, 'dataset_metadata_schema.yml')
-                data = {
-                    'pipeline_message': 'the process ran',
-                    'ingest_metadata': md,
+                assert_json_matches_schema(md, "dataset_metadata_schema.yml")
+                extra_fields = {
+                    "pipeline_message": "the process ran",
+                    "ingest_metadata": md,
                 }
                 if metadata_fun:
-                    data.update({'antibodies': antibodies,
-                                 'contributors': contributors,
-                                 'contacts': contacts})
-                if status not in ['Published']:
-                    data.update({'status': status})
+                    extra_fields.update(
+                        {
+                            "antibodies": antibodies,
+                            "contributors": contributors,
+                            "contacts": contacts,
+                        }
+                    )
+                if status in ["Published"]:
+                    status = None
             except AssertionError as e:
-                print('invalid metadata follows:')
+                print("invalid metadata follows:")
                 pprint(md)
-                data = {
-                    'status': 'Error',
-                    'pipeline_message': 'internal error; schema violation: {}'.format(e),
-                    'ingest_metadata': {},
+                status = "Error"
+                extra_fields = {
+                    "status": "Error",
+                    "pipeline_message": "internal error; schema violation: {}".format(e),
+                    "ingest_metadata": {},
                 }
                 return_status = False
         else:
-            log_fname = Path(get_tmp_dir_path(kwargs['run_id']), 'session.log')
-            with open(log_fname, 'r') as f:
-                err_txt = '\n'.join(f.readlines())
-            data = {
-                'status': 'Invalid',
-                'pipeline_message': err_txt,
+            log_fname = Path(get_tmp_dir_path(kwargs["run_id"]), "session.log")
+            with open(log_fname, "r") as f:
+                err_txt = "\n".join(f.readlines())
+            status = "Invalid"
+            extra_fields = {
+                "status": "Invalid",
+                "pipeline_message": err_txt,
             }
             return_status = False
-        print('data: ')
-        pprint(data)
-
-        response = http_hook.run(
-            endpoint,
-            json.dumps(data),
-            headers,
-            extra_options,
-        )
-        print('response: ')
-        pprint(response.json())
+        entity_type = ds_rslt.get("entity_type")
+        StatusChanger(
+            dataset_uuid,
+            get_auth_tok(**kwargs),
+            status,
+            {
+                "extra_fields": extra_fields,
+                "extra_options": {},
+            },
+            entity_type=entity_type if entity_type else None,
+        ).on_status_change()
 
         return return_status
 
@@ -1471,8 +1287,8 @@ def map_queue_name(raw_queue_name: str) -> str:
     provided queue name.  This allows job separation under Celery.
     """
     conf_dict = airflow_conf.as_dict()
-    if 'QUEUE_NAME_TEMPLATE' in conf_dict.get('connections', {}):
-        template = conf_dict['connections']['QUEUE_NAME_TEMPLATE']
+    if "QUEUE_NAME_TEMPLATE" in conf_dict.get("connections", {}):
+        template = conf_dict["connections"]["QUEUE_NAME_TEMPLATE"]
         template = template.strip("'").strip('"')  # remove quotes that may be on the config string
         rslt = template.format(raw_queue_name)
         return rslt
@@ -1480,22 +1296,28 @@ def map_queue_name(raw_queue_name: str) -> str:
         return raw_queue_name
 
 
-def create_dataset_state_error_callback(dataset_uuid_callable: Callable[[Any], str]) -> Callable[[Mapping, Any],
-                                                                                                 None]:
+def create_dataset_state_error_callback(
+    dataset_uuid_callable: Callable[[Any], str]
+) -> Callable[[Mapping, Any], None]:
     def set_dataset_state_error(context_dict: Mapping, **kwargs) -> None:
         """
         This routine is meant to be
         """
-        msg = 'An internal error occurred in the {} workflow step {}'.format(context_dict['dag'].dag_id,
-                                                                             context_dict['task'].task_id)
+        msg = "An internal error occurred in the {} workflow step {}".format(
+            context_dict["dag"].dag_id, context_dict["task"].task_id
+        )
         new_kwargs = kwargs.copy()
         new_kwargs.update(context_dict)
-        new_kwargs.update({'dataset_uuid_callable': dataset_uuid_callable,
-                           'http_conn_id': 'entity_api_connection',
-                           'ds_state': 'Error',
-                           'message': msg
-                           })
+        new_kwargs.update(
+            {
+                "dataset_uuid_callable": dataset_uuid_callable,
+                "http_conn_id": "entity_api_connection",
+                "ds_state": "Error",
+                "message": msg,
+            }
+        )
         pythonop_set_dataset_state(**new_kwargs)
+
     return set_dataset_state_error
 
 
@@ -1509,7 +1331,7 @@ def localized_assert_json_matches_schema(jsn: JSONType, schemafile: str) -> None
     try:
         return assert_json_matches_schema(jsn, schemafile)  # localized by set_schema_base_path
     except AssertionError as e:
-        print('ASSERTION FAILED: {}'.format(e))
+        print("ASSERTION FAILED: {}".format(e))
         raise
 
 
@@ -1520,14 +1342,14 @@ def _get_workflow_map() -> List[Tuple[Pattern, Pattern, str]]:
     global COMPILED_WORKFLOW_MAP
     if COMPILED_WORKFLOW_MAP is None:
         map_path = join(dirname(__file__), WORKFLOW_MAP_FILENAME)
-        with open(map_path, 'r') as f:
+        with open(map_path, "r") as f:
             map = yaml.safe_load(f)
         localized_assert_json_matches_schema(map, WORKFLOW_MAP_SCHEMA)
         cmp_map = []
-        for dct in map['workflow_map']:
-            ct_re = re.compile(dct['collection_type'])
-            at_re = re.compile(dct['assay_type'])
-            cmp_map.append((ct_re, at_re, dct['workflow']))
+        for dct in map["workflow_map"]:
+            ct_re = re.compile(dct["collection_type"])
+            at_re = re.compile(dct["assay_type"])
+            cmp_map.append((ct_re, at_re, dct["workflow"]))
         COMPILED_WORKFLOW_MAP = cmp_map
     return COMPILED_WORKFLOW_MAP
 
@@ -1539,29 +1361,25 @@ def _get_resource_map() -> List[Tuple[Pattern, Pattern, Dict[str, str]]]:
     global COMPILED_RESOURCE_MAP
     if COMPILED_RESOURCE_MAP is None:
         map_path = join(dirname(__file__), RESOURCE_MAP_FILENAME)
-        with open(map_path, 'r') as f:
+        with open(map_path, "r") as f:
             map = yaml.safe_load(f)
         localized_assert_json_matches_schema(map, RESOURCE_MAP_SCHEMA)
         cmp_map = []
-        for dct in map['resource_map']:
-            dag_re = re.compile(dct['dag_re'])
-            dag_dct = {key: dct[key] for key in dct
-                       if key not in ['dag_re', 'tasks']}
+        for dct in map["resource_map"]:
+            dag_re = re.compile(dct["dag_re"])
+            dag_dct = {key: dct[key] for key in dct if key not in ["dag_re", "tasks"]}
             tasks = []
-            for inner_dct in dct['tasks']:
-                assert 'task_re' in inner_dct, ('schema should guarantee'
-                                                ' "task_re" is present?')
-                task_re = re.compile(inner_dct['task_re'])
-                task_dct = {key: inner_dct[key] for key in inner_dct
-                            if key not in ['task_re']}
+            for inner_dct in dct["tasks"]:
+                assert "task_re" in inner_dct, "schema should guarantee" ' "task_re" is present?'
+                task_re = re.compile(inner_dct["task_re"])
+                task_dct = {key: inner_dct[key] for key in inner_dct if key not in ["task_re"]}
                 tasks.append((task_re, task_dct))
             cmp_map.append((dag_re, dag_dct, tasks))
         COMPILED_RESOURCE_MAP = cmp_map
     return COMPILED_RESOURCE_MAP
 
 
-def _lookup_resource_record(dag_id: str,
-                            task_id: Optional[str] = None) -> Tuple[int, Dict]:
+def _lookup_resource_record(dag_id: str, task_id: Optional[str] = None) -> Tuple[int, Dict]:
     """
     Look up the resource map entry for the given dag_id and task_id. The first
     match is returned.  If the task_id is None, the first record matching only
@@ -1577,25 +1395,28 @@ def _lookup_resource_record(dag_id: str,
                         rslt.update(task_dict)
                         break
                 else:
-                    raise ValueError(f'Resource map entry for dag_id <{dag_id}>'
-                                     f' has no match for task_id <{task_id}>')
+                    raise ValueError(
+                        f"Resource map entry for dag_id <{dag_id}>"
+                        f" has no match for task_id <{task_id}>"
+                    )
             return rslt
     else:
-        raise ValueError('No resource map entry found for'
-                         f' dag_id <{dag_id}> task_id <{task_id}>')
+        raise ValueError(
+            "No resource map entry found for" f" dag_id <{dag_id}> task_id <{task_id}>"
+        )
+
 
-    
 def get_queue_resource(dag_id: str, task_id: Optional[str] = None) -> str:
     """
     Look up the queue defined for this dag_id and task_id in the current
-    resource map.  If the task_id is None, the lookup is done with 
-    task_id='__default__', which presumably only matches the wildcard case.  
+    resource map.  If the task_id is None, the lookup is done with
+    task_id='__default__', which presumably only matches the wildcard case.
     """
     if task_id is None:
-        task_id = '__default__'
+        task_id = "__default__"
     rec = _lookup_resource_record(dag_id, task_id)
-    assert 'queue' in rec, 'schema should guarantee "queue" is present?'
-    return map_queue_name(rec['queue'])
+    assert "queue" in rec, 'schema should guarantee "queue" is present?'
+    return map_queue_name(rec["queue"])
 
 
 def get_lanes_resource(dag_id: str) -> int:
@@ -1604,8 +1425,8 @@ def get_lanes_resource(dag_id: str) -> int:
     resource map.
     """
     rec = _lookup_resource_record(dag_id)
-    assert 'lanes' in rec, 'schema should guarantee "lanes" is present?'
-    return int(rec['lanes'])
+    assert "lanes" in rec, 'schema should guarantee "lanes" is present?'
+    return int(rec["lanes"])
 
 
 def get_preserve_scratch_resource(dag_id: str) -> bool:
@@ -1614,9 +1435,8 @@ def get_preserve_scratch_resource(dag_id: str) -> bool:
     resource map.
     """
     rec = _lookup_resource_record(dag_id)
-    assert 'preserve_scratch' in rec, ('schema should guarantee'
-                                       ' "preserve_scratch" is present?')
-    return bool(rec['preserve_scratch'])
+    assert "preserve_scratch" in rec, "schema should guarantee" ' "preserve_scratch" is present?'
+    return bool(rec["preserve_scratch"])
 
 
 def get_threads_resource(dag_id: str, task_id: Optional[str] = None) -> int:
@@ -1624,18 +1444,22 @@ def get_threads_resource(dag_id: str, task_id: Optional[str] = None) -> int:
     Look up the number of threads defined for this dag_id and task_id in
     the current resource map.  If the task_id is None, the lookup is done
     with task_id='__default__', which presumably only matches the wildcard
-    case.  
+    case.
     """
     if task_id is None:
-        task_id = '__default__'
+        task_id = "__default__"
     rec = _lookup_resource_record(dag_id, task_id)
-    assert any(['threads' in rec, 'coreuse' in rec]), 'schema should guarantee "threads" or "coreuse" is present?'
-    if rec.get('coreuse'):
-        return math.ceil(os.cpu_count() * (int(rec.get('coreuse')) / 100)) \
-            if int(rec.get('coreuse')) > 0 \
+    assert any(
+        ["threads" in rec, "coreuse" in rec]
+    ), 'schema should guarantee "threads" or "coreuse" is present?'
+    if rec.get("coreuse"):
+        return (
+            math.ceil(os.cpu_count() * (int(rec.get("coreuse")) / 100))
+            if int(rec.get("coreuse")) > 0
             else math.ceil(os.cpu_count() / 4)
+        )
     else:
-        return int(rec.get('threads'))
+        return int(rec.get("threads"))
 
 
 def get_type_client() -> TypeClient:
@@ -1651,14 +1475,14 @@ def _get_type_client() -> TypeClient:
     """
     global TYPE_CLIENT
     if TYPE_CLIENT is None:
-        conn = HttpHook.get_connection('search_api_connection')
-        if conn.host.startswith('https'):
-            conn.host = urllib.parse.unquote(conn.host).split('https://')[1]
-            conn.conn_type = 'https'
+        conn = HttpHook.get_connection("search_api_connection")
+        if conn.host.startswith("https"):
+            conn.host = urllib.parse.unquote(conn.host).split("https://")[1]
+            conn.conn_type = "https"
         if conn.port is None:
-            url = f'{conn.conn_type}://{conn.host}'
+            url = f"{conn.conn_type}://{conn.host}"
         else:
-            url = f'{conn.conn_type}://{conn.host}:{conn.port}'
+            url = f"{conn.conn_type}://{conn.host}:{conn.port}"
         TYPE_CLIENT = TypeClient(url)
     return TYPE_CLIENT
 
@@ -1685,9 +1509,9 @@ def downstream_workflow_iter(collectiontype: str, assay_type: StrOrListStr) -> I
     collectiontype and assay_type.  Each workflow name is expected to correspond to
     a known workflow, e.g. an Airflow DAG implemented by workflow_name.py .
     """
-    collectiontype = collectiontype or ''
+    collectiontype = collectiontype or ""
     assay_type = _canonicalize_assay_type_if_possible(assay_type)
-    assay_type = assay_type or ''
+    assay_type = assay_type or ""
     for ct_re, at_re, workflow in _get_workflow_map():
         if isinstance(assay_type, str):
             at_match = at_re.match(assay_type)
@@ -1698,42 +1522,43 @@ def downstream_workflow_iter(collectiontype: str, assay_type: StrOrListStr) -> I
 
 
 def encrypt_tok(cleartext_tok: str) -> bytes:
-    key = airflow_conf.as_dict(display_sensitive=True)['core']['fernet_key']
+    key = airflow_conf.as_dict(display_sensitive=True)["core"]["fernet_key"]
     fernet = Fernet(key.encode())
     return fernet.encrypt(cleartext_tok.encode())
 
 
 def decrypt_tok(crypt_tok: bytes) -> str:
-    key = airflow_conf.as_dict(display_sensitive=True)['core']['fernet_key']
+    key = airflow_conf.as_dict(display_sensitive=True)["core"]["fernet_key"]
     fernet = Fernet(key.encode())
     return fernet.decrypt(crypt_tok).decode()
 
 
 def join_quote_command_str(pieces: List[Any]):
-    command_str = ' '.join(shlex.quote(str(piece)) for piece in pieces)
-    print('final command_str:', command_str)
+    command_str = " ".join(shlex.quote(str(piece)) for piece in pieces)
+    print("final command_str:", command_str)
     return command_str
 
 
 def _strip_url(url):
-    return url.split(':')[1].strip('/')
+    return url.split(":")[1].strip("/")
 
 
 def find_matching_endpoint(host_url: str) -> str:
     """
     Find the identity of the 'instance' of Airflow infrastructure based
     on environment information.
-    
+
     host_url: the URL of entity-api in the current context
 
     returns: an instance string, for example 'PROD' or 'DEV'
     """
     assert ENDPOINTS, "Context information is unavailable"
     stripped_url = _strip_url(host_url)
-    print(f'stripped_url: {stripped_url}')
-    candidates = [ep for ep in ENDPOINTS
-                  if stripped_url == _strip_url(ENDPOINTS[ep]['entity_url'])]
-    assert len(candidates) == 1, f'Found {candidates}, expected 1 match'
+    print(f"stripped_url: {stripped_url}")
+    candidates = [
+        ep for ep in ENDPOINTS if stripped_url == _strip_url(ENDPOINTS[ep]["entity_url"])
+    ]
+    assert len(candidates) == 1, f"Found {candidates}, expected 1 match"
     return candidates[0]
 
 
@@ -1742,7 +1567,7 @@ def main():
     This provides some unit tests.  To run it, you will need to define the
     'search_api_connection' connection ID and the Fernet key.  The easiest way
     to do that is with something like:
-    
+
       export AIRFLOW_CONN_SEARCH_API_CONNECTION='https://search.api.hubmapconsortium.org/v3/
       fernet_key=`python -c 'from cryptography.fernet import Fernet ; print(Fernet.generate_key().decode())'`
       export AIRFLOW__CORE__FERNET_KEY=${fernet_key}
@@ -1751,34 +1576,41 @@ def main():
     print(get_git_commits([__file__]))
     print(get_git_provenance_dict(__file__))
     dirnm = dirname(__file__)
-    if dirnm == '':
-        dirnm = '.'
+    if dirnm == "":
+        dirnm = "."
     for elt in get_file_metadata(dirnm, DummyFileMatcher()):
         print(elt)
     pprint(get_git_provenance_list(__file__))
-    md = {'metadata': {'my_string': 'hello world'},
-          'files': get_file_metadata(dirnm, DummyFileMatcher()),
-          'dag_provenance_list': get_git_provenance_list(__file__)}
+    md = {
+        "metadata": {"my_string": "hello world"},
+        "files": get_file_metadata(dirnm, DummyFileMatcher()),
+        "dag_provenance_list": get_git_provenance_list(__file__),
+    }
     try:
-        localized_assert_json_matches_schema(md, 'dataset_metadata_schema.yml')
-        print('ASSERT passed')
+        localized_assert_json_matches_schema(md, "dataset_metadata_schema.yml")
+        print("ASSERT passed")
     except AssertionError as e:
-        print(f'ASSERT failed {e}')
-
-    assay_pairs = [('devtest', 'devtest'), ('codex', 'CODEX'),
-                   ('codex', 'SOMEOTHER'), ('someother', 'CODEX'),
-                   ('someother', 'salmon_sn_rnaseq_10x'), ('someother', 'salmon_rnaseq_10x_sn')]
+        print(f"ASSERT failed {e}")
+
+    assay_pairs = [
+        ("devtest", "devtest"),
+        ("codex", "CODEX"),
+        ("codex", "SOMEOTHER"),
+        ("someother", "CODEX"),
+        ("someother", "salmon_sn_rnaseq_10x"),
+        ("someother", "salmon_rnaseq_10x_sn"),
+    ]
     for collectiontype, assay_type in assay_pairs:
-        print('collectiontype {}, assay_type {}:'.format(collectiontype, assay_type))
+        print("collectiontype {}, assay_type {}:".format(collectiontype, assay_type))
         for elt in downstream_workflow_iter(collectiontype, assay_type):
-            print('  -> {}'.format(elt))
+            print("  -> {}".format(elt))
 
-    print(f'cwltool bin path: {get_cwltool_bin_path()}')
+    print(f"cwltool bin path: {get_cwltool_bin_path()}")
 
-    s = 'hello world'
+    s = "hello world"
     crypt_s = encrypt_tok(s)
     s2 = decrypt_tok(crypt_s)
-    print('crypto test: {} -> {} -> {}'.format(s, crypt_s, s2))
+    print("crypto test: {} -> {} -> {}".format(s, crypt_s, s2))
 
 
 if __name__ == "__main__":
diff --git a/src/ingest-pipeline/airflow/dags/validate_upload.py b/src/ingest-pipeline/airflow/dags/validate_upload.py
index 96cf3cd7..8fbc8b96 100644
--- a/src/ingest-pipeline/airflow/dags/validate_upload.py
+++ b/src/ingest-pipeline/airflow/dags/validate_upload.py
@@ -1,14 +1,17 @@
-import json
+from __future__ import annotations
+
+import logging
 import sys
 from datetime import datetime, timedelta
 from pathlib import Path
 from pprint import pprint
 
-# from error_catching.validate_upload_failure_callback import ValidateUploadFailure
 from hubmap_operators.common_operators import (
     CleanupTmpDirOperator,
     CreateTmpDirOperator,
 )
+from status_change.failure_callback import FailureCallback
+from status_change.status_manager import StatusChanger, Statuses
 from utils import (
     HMDAG,
     get_auth_tok,
@@ -22,7 +25,6 @@
 from airflow.configuration import conf as airflow_conf
 from airflow.exceptions import AirflowException
 from airflow.operators.python import PythonOperator
-from airflow.providers.http.hooks.http import HttpHook
 
 sys.path.append(airflow_conf.as_dict()["connections"]["SRC_PATH"].strip("'").strip('"'))
 
@@ -40,7 +42,7 @@
     "email": ["gesina@psc.edu"],
     "email_on_failure": False,
     "email_on_retry": False,
-    # "on_failure_callback": ValidateUploadFailure,
+    "on_failure_callback": FailureCallback,
     "retries": 1,
     "retry_delay": timedelta(minutes=1),
     "xcom_push": True,
@@ -132,42 +134,33 @@ def run_validation(**kwargs):
 
     def send_status_msg(**kwargs):
         validation_file_path = Path(kwargs["ti"].xcom_pull(key="validation_file_path"))
-        uuid = kwargs["ti"].xcom_pull(key="uuid")
-        endpoint = f"/entities/{uuid}"
-        headers = {
-            "authorization": "Bearer " + get_auth_tok(**kwargs),
-            "X-Hubmap-Application": "ingest-pipeline",
-            "content-type": "application/json",
-        }
-        extra_options = []
-        http_conn_id = "entity_api_connection"
-        http_hook = HttpHook("PUT", http_conn_id=http_conn_id)
         with open(validation_file_path) as f:
             report_txt = f.read()
         if report_txt.startswith("No errors!"):
-            data = {
-                "status": "Valid",
+            status = Statuses.UPLOAD_VALID
+            extra_fields = {
                 "validation_message": "",
             }
         else:
-            data = {
-                "status": "Invalid",
+            status = Statuses.UPLOAD_INVALID
+            extra_fields = {
                 "validation_message": report_txt,
             }
-            # context = kwargs["ti"].get_template_context()
-            # ValidateUploadFailure(context, execute_methods=False).send_failure_email(
-            #     report_txt=report_txt
-            # )
-        print("data: ")
-        pprint(data)
-        response = http_hook.run(
-            endpoint,
-            json.dumps(data),
-            headers,
-            extra_options,
+        logging.info(
+            f"""
+                     status: {status.value}
+                     validation_message: {extra_fields['validation_message']}
+                     """
         )
-        print("response: ")
-        pprint(response.json())
+        StatusChanger(
+            kwargs["ti"].xcom_pull(key="uuid"),
+            get_auth_tok(**kwargs),
+            status,
+            {
+                "extra_fields": extra_fields,
+                "extra_options": {},
+            },
+        ).on_status_change()
 
     t_send_status = PythonOperator(
         task_id="send_status",
diff --git a/src/ingest-pipeline/misc/tools/split_and_create.py b/src/ingest-pipeline/misc/tools/split_and_create.py
index 27bfdcaf..521814ba 100755
--- a/src/ingest-pipeline/misc/tools/split_and_create.py
+++ b/src/ingest-pipeline/misc/tools/split_and_create.py
@@ -1,28 +1,30 @@
 #! /usr/bin/env python
 
 import argparse
+import json
 import re
+import time
 from pathlib import Path
-from shutil import copytree, copy2
-from typing import TypeVar, List
 from pprint import pprint
-import time
-import json
-import requests
+from shutil import copy2, copytree
+from typing import List, TypeVar
+
 import pandas as pd
+import requests
+from status_change.status_manager import StatusChanger, Statuses
 
 # There has got to be a better solution for this, but I can't find it
 try:
-    from survey import (Dataset, EntityFactory, Upload, ENDPOINTS)
+    from survey import ENDPOINTS, Dataset, EntityFactory, Upload
 except ImportError:
-    from .survey import (Dataset, EntityFactory, Upload, ENDPOINTS)
+    from .survey import ENDPOINTS, Dataset, EntityFactory, Upload
 
 
-DEFAULT_FROZEN_DF_FNAME = 'frozen_source_df{}.tsv'  # must work with frozen_name.format(suffix)
+DEFAULT_FROZEN_DF_FNAME = "frozen_source_df{}.tsv"  # must work with frozen_name.format(suffix)
 FAKE_UUID_GENERATOR = None
-SCRATCH_PATH = '/tmp/split_and_create'
+SCRATCH_PATH = "/tmp/split_and_create"
 
-StrOrListStr = TypeVar('StrOrListStr', str, List[str])
+StrOrListStr = TypeVar("StrOrListStr", str, List[str])
 
 #
 # The following are used to try to deal with bad assay type information in the original
@@ -31,10 +33,10 @@
 #
 FALLBACK_ASSAY_TYPE_TRANSLATIONS = {
     # 'SNARE-Seq2-AC': 'SNARE-ATACseq2',
-    'SNARE-Seq2-AC': 'SNAREseq',
+    "SNARE-Seq2-AC": "SNAREseq",
     # 'SNARE2-RNAseq': 'SNARE-RNAseq2',
-    'SNARE2-RNAseq': 'sciRNAseq',
-    'scRNAseq-10xGenomics-v2': 'scRNA-Seq-10x',
+    "SNARE2-RNAseq": "sciRNAseq",
+    "scRNAseq-10xGenomics-v2": "scRNA-Seq-10x",
 }
 
 
@@ -46,35 +48,34 @@
 #
 def _remove_na(row: pd.Series, parent_assay_type: StrOrListStr) -> pd.Series:
     new_row = row.copy()
-    key = 'transposition_kit_number'
-    if key in row and row[key].lower() == 'na':
-        new_row[key] = ''
+    key = "transposition_kit_number"
+    if key in row and row[key].lower() == "na":
+        new_row[key] = ""
     return new_row
 
 
-SEQ_RD_FMT_TEST_RX = re.compile(r'\d+\+\d+\+\d+\+\d+')
+SEQ_RD_FMT_TEST_RX = re.compile(r"\d+\+\d+\+\d+\+\d+")
 
 
 def _reformat_seq_read(row: pd.Series, parent_assay_type: StrOrListStr) -> pd.Series:
     new_row = row.copy()
-    key = 'sequencing_read_format'
+    key = "sequencing_read_format"
     if key in row and SEQ_RD_FMT_TEST_RX.match(row[key]):
-        new_row[key] = row[key].replace('+', '/')
+        new_row[key] = row[key].replace("+", "/")
     return new_row
 
 
 def _fix_snare_atac_assay_type(row: pd.Series, parent_assay_type: StrOrListStr) -> pd.Series:
     new_row = row.copy()
-    key1 = 'assay_type'
-    key2 = 'canonical_assay_type'
-    if (key1 in row and key2 in row
-            and row[key1] == 'SNARE-seq2' and row[key2] == 'SNAREseq'):
-        new_row[key2] = 'SNARE-seq2'
+    key1 = "assay_type"
+    key2 = "canonical_assay_type"
+    if key1 in row and key2 in row and row[key1] == "SNARE-seq2" and row[key2] == "SNAREseq":
+        new_row[key2] = "SNARE-seq2"
     return new_row
 
 
 SPECIAL_CASE_TRANSFORMATIONS = [
-    (re.compile('SNAREseq'), [_remove_na, _reformat_seq_read, _fix_snare_atac_assay_type])
+    (re.compile("SNAREseq"), [_remove_na, _reformat_seq_read, _fix_snare_atac_assay_type])
 ]
 
 
@@ -82,7 +83,7 @@ def create_fake_uuid_generator():
     """This is used to simulate unique uuids for dryrun executions"""
     count = 0
     while True:
-        rslt = 'fakeuuid_%08x'%count
+        rslt = "fakeuuid_%08x" % count
         count += 1
         yield rslt
 
@@ -92,10 +93,10 @@ def get_canonical_assay_type(row, entity_factory, default_type):
     Convert assay type to canonical form, with fallback
     """
     try:
-        rslt = entity_factory.type_client.getAssayType(row['assay_type']).name
+        rslt = entity_factory.type_client.getAssayType(row["assay_type"]).name
     except Exception:
         print(f"fallback {row['assay_type']} {default_type}")
-        rslt = FALLBACK_ASSAY_TYPE_TRANSLATIONS.get(row['assay_type'], default_type)
+        rslt = FALLBACK_ASSAY_TYPE_TRANSLATIONS.get(row["assay_type"], default_type)
     print(f"{row['assay_type']} -> {rslt}")
     return rslt
 
@@ -105,46 +106,49 @@ def create_new_uuid(row, source_entity, entity_factory, dryrun=False):
     Use the entity_factory to create a new dataset, with safety checks
     """
     global FAKE_UUID_GENERATOR
-    canonical_assay_type = row['canonical_assay_type']
-    orig_assay_type = row['assay_type']
-    rec_identifier = row['data_path'].strip('/')
-    assert rec_identifier and rec_identifier != '.', 'Bad data_path!'
+    canonical_assay_type = row["canonical_assay_type"]
+    orig_assay_type = row["assay_type"]
+    rec_identifier = row["data_path"].strip("/")
+    assert rec_identifier and rec_identifier != ".", "Bad data_path!"
     info_txt_root = None
     if isinstance(source_entity, Dataset):
-        assert 'lab_dataset_id' in source_entity.prop_dct, (f'Dataset {source_entity.uuid}'
-                                                            ' has no lab_dataset_id')
-        info_txt_root = source_entity.prop_dct['lab_dataset_id']
+        assert "lab_dataset_id" in source_entity.prop_dct, (
+            f"Dataset {source_entity.uuid}" " has no lab_dataset_id"
+        )
+        info_txt_root = source_entity.prop_dct["lab_dataset_id"]
     elif isinstance(source_entity, Upload):
-        if 'title' in source_entity.prop_dct:
-            info_txt_root = source_entity.prop_dct['title']
+        if "title" in source_entity.prop_dct:
+            info_txt_root = source_entity.prop_dct["title"]
         else:
-            print(f'WARNING: Upload {source_entity.uuid} has no title')
+            print(f"WARNING: Upload {source_entity.uuid} has no title")
             info_txt_root = f"Upload {source_entity.prop_dct['hubmap_id']}"
-    assert info_txt_root is not None, 'Expected a Dataset or an Upload'
-    info_txt = info_txt_root + ' : ' + rec_identifier
+    assert info_txt_root is not None, "Expected a Dataset or an Upload"
+    info_txt = info_txt_root + " : " + rec_identifier
     try:
         type_info = entity_factory.type_client.getAssayType(canonical_assay_type)
     except Exception:
-        print(f'tried {orig_assay_type}, canoncal version {canonical_assay_type}')
-        print(f'options are {list(entity_factory.type_client.iterAssayNames())}')
+        print(f"tried {orig_assay_type}, canoncal version {canonical_assay_type}")
+        print(f"options are {list(entity_factory.type_client.iterAssayNames())}")
         type_info = entity_factory.type_client.getAssayType(orig_assay_type)
     contains_human_genetic_sequences = type_info.contains_pii
     # Check consistency in case this is a Dataset, which will have this info
-    if 'contains_human_genetic_sequences' in source_entity.prop_dct:
-        assert (contains_human_genetic_sequences
-                == source_entity.prop_dct['contains_human_genetic_sequences'])
-    group_uuid = source_entity.prop_dct['group_uuid']
-    if 'description' in row:
-        description = str(row['description'])
-    elif 'description' in source_entity.prop_dct:
-        description = source_entity.prop_dct['description'] + ' : ' + rec_identifier
-    elif 'lab_dataset_id' in source_entity.prop_dct:
-        description = source_entity.prop_dct['lab_dataset_id'] + ' : ' + rec_identifier
+    if "contains_human_genetic_sequences" in source_entity.prop_dct:
+        assert (
+            contains_human_genetic_sequences
+            == source_entity.prop_dct["contains_human_genetic_sequences"]
+        )
+    group_uuid = source_entity.prop_dct["group_uuid"]
+    if "description" in row:
+        description = str(row["description"])
+    elif "description" in source_entity.prop_dct:
+        description = source_entity.prop_dct["description"] + " : " + rec_identifier
+    elif "lab_dataset_id" in source_entity.prop_dct:
+        description = source_entity.prop_dct["lab_dataset_id"] + " : " + rec_identifier
     else:
-        description = ': ' + rec_identifier
-    sample_id_list = row['tissue_id']
+        description = ": " + rec_identifier
+    sample_id_list = row["tissue_id"]
     direct_ancestor_uuids = []
-    for sample_id in sample_id_list.split(','):
+    for sample_id in sample_id_list.split(","):
         sample_id = sample_id.strip()
         sample_uuid = entity_factory.id_to_uuid(sample_id)
         print(f"including tissue_id {sample_id} ({sample_uuid})")
@@ -156,7 +160,7 @@ def create_new_uuid(row, source_entity, entity_factory, dryrun=False):
         if FAKE_UUID_GENERATOR is None:
             FAKE_UUID_GENERATOR = create_fake_uuid_generator()
         uuid = FAKE_UUID_GENERATOR.__next__()
-        print(f'Not creating uuid {uuid} with assay_type {canonical_assay_type}')
+        print(f"Not creating uuid {uuid} with assay_type {canonical_assay_type}")
         return uuid
     else:
         rslt = entity_factory.create_dataset(
@@ -165,82 +169,81 @@ def create_new_uuid(row, source_entity, entity_factory, dryrun=False):
             assay_type=canonical_assay_type,
             direct_ancestor_uuids=direct_ancestor_uuids,
             group_uuid=group_uuid,
-            description=description
+            description=description,
         )
-        return rslt['uuid']
+        return rslt["uuid"]
 
 
 def populate(row, source_entity, entity_factory, dryrun=False):
     """
     Build the contents of the newly created dataset using info from the parent
     """
-    uuid = row['new_uuid']
-    old_data_path = row['data_path']
-    row['data_path'] = '.'
-    old_contrib_path = Path(row['contributors_path'])
-    new_contrib_path = Path('extras') / old_contrib_path.name
-    row['contributors_path'] = str(new_contrib_path)
-    if 'antibodies_path' in row:
-        old_antibodies_path = Path(row['antibodies_path'])
-        new_antibodies_path = Path('extras') / old_antibodies_path.name
-        row['antibodies_path'] = str(new_antibodies_path)
+    uuid = row["new_uuid"]
+    old_data_path = row["data_path"]
+    row["data_path"] = "."
+    old_contrib_path = Path(row["contributors_path"])
+    new_contrib_path = Path("extras") / old_contrib_path.name
+    row["contributors_path"] = str(new_contrib_path)
+    if "antibodies_path" in row:
+        old_antibodies_path = Path(row["antibodies_path"])
+        new_antibodies_path = Path("extras") / old_antibodies_path.name
+        row["antibodies_path"] = str(new_antibodies_path)
     else:
         old_antibodies_path = None
     # row['assay_type'] = row['canonical_assay_type']
     row_df = pd.DataFrame([row])
-    row_df = row_df.drop(columns=['canonical_assay_type', 'new_uuid'])
+    row_df = row_df.drop(columns=["canonical_assay_type", "new_uuid"])
     if dryrun:
         kid_path = Path(SCRATCH_PATH) / uuid
         kid_path.mkdir(0o770, parents=True, exist_ok=True)
-        print(f'writing this metadata to {kid_path}:')
+        print(f"writing this metadata to {kid_path}:")
         print(row_df)
     else:
         kid_path = Path(entity_factory.get_full_path(uuid))
-    row_df.to_csv(kid_path / f'{uuid}-metadata.tsv', header=True, sep='\t', index=False)
-    extras_path = kid_path / 'extras'
+    row_df.to_csv(kid_path / f"{uuid}-metadata.tsv", header=True, sep="\t", index=False)
+    extras_path = kid_path / "extras"
     if extras_path.exists():
-        assert extras_path.is_dir(), f'{extras_path} is not a directory'
+        assert extras_path.is_dir(), f"{extras_path} is not a directory"
     else:
-        source_extras_path = source_entity.full_path / 'extras'
+        source_extras_path = source_entity.full_path / "extras"
         if source_extras_path.exists():
             if dryrun:
-                print(f'copy {source_extras_path} to {extras_path}')
+                print(f"copy {source_extras_path} to {extras_path}")
             else:
                 copytree(source_extras_path, extras_path)
         else:
             if dryrun:
-                print(f'creating {extras_path}')
+                print(f"creating {extras_path}")
             extras_path.mkdir(0o770)
     source_data_path = source_entity.full_path / old_data_path
-    for elt in source_data_path.glob('*'):
+    for elt in source_data_path.glob("*"):
         dst_file = kid_path / elt.name
         if dryrun:
             if dst_file.exists() and dst_file.is_dir():
-                for sub_elt in elt.glob('*'):
-                    print(f'rename {sub_elt} to {kid_path / elt.name / sub_elt.name}')
+                for sub_elt in elt.glob("*"):
+                    print(f"rename {sub_elt} to {kid_path / elt.name / sub_elt.name}")
                 continue
-            print(f'rename {elt} to {dst_file}')
+            print(f"rename {elt} to {dst_file}")
         else:
             if dst_file.exists() and dst_file.is_dir():
-                for sub_elt in elt.glob('*'):
+                for sub_elt in elt.glob("*"):
                     sub_elt.rename(kid_path / elt.name / sub_elt.name)
                 continue
             elt.rename(dst_file)
     if dryrun:
-        print(f'copy {old_contrib_path} to {extras_path}')
+        print(f"copy {old_contrib_path} to {extras_path}")
     else:
         copy2(source_entity.full_path / old_contrib_path, extras_path)
     if old_antibodies_path is not None:
         if dryrun:
-            print(f'copy {old_antibodies_path} to {extras_path}')
+            print(f"copy {old_antibodies_path} to {extras_path}")
         else:
             copy2(source_entity.full_path / old_antibodies_path, extras_path)
     print(f"{old_data_path} -> {uuid} -> full path: {kid_path}")
 
 
 def apply_special_case_transformations(
-        df: pd.DataFrame,
-        parent_assay_type: StrOrListStr
+    df: pd.DataFrame, parent_assay_type: StrOrListStr
 ) -> pd.DataFrame:
     """
     Sometimes special case transformations must be applied, for example because the
@@ -260,54 +263,52 @@ def update_upload_entity(child_uuid_list, source_entity, dryrun=False, verbose=F
     if isinstance(source_entity, Upload):
         if dryrun:
             print(f'set status of <{source_entity.uuid}> to "Reorganized"')
-            print(f'set <{source_entity.uuid}> dataset_uuids_to_link to {child_uuid_list}')
+            print(f"set <{source_entity.uuid}> dataset_uuids_to_link to {child_uuid_list}")
         else:
             # Set Upload status to "Reorganized"
-            # Set links from Upload to split Datasets"
-            entity_url = ENDPOINTS[source_entity.entity_factory.instance]['entity_url']
-            data = {
-                "status": "Reorganized",
-                "dataset_uuids_to_link": child_uuid_list
-            }
-            endpoint = f'{entity_url}/entities/{source_entity.uuid}'
-            print(f'sending to {endpoint}:')
-            pprint(data)
-            r = requests.put(endpoint,
-                             data=json.dumps(data),
-                             headers={
-                                 'Authorization': f'Bearer {source_entity.entity_factory.auth_tok}',
-                                 'Content-Type': 'application/json',
-                                 'X-Hubmap-Application': 'ingest-pipeline'
-                             })
-            if r.status_code >= 300:
-                r.raise_for_status()
-            if verbose:
-                print('response:')
-                pprint(r.json())
-            else:
-                print(f'{source_entity.uuid} status is Reorganized')
-
+            # Set links from Upload to split Datasets
+            print(f"Setting status of {source_entity.uuid} to 'Reorganized'")
+            StatusChanger(
+                source_entity.uuid,
+                source_entity.entity_factory.auth_tok,
+                Statuses.UPLOAD_REORGANIZED,
+                {
+                    "extra_fields": {"dataset_uuids_to_link": child_uuid_list},
+                    "extra_options": {},
+                },
+                verbose=verbose,
+            ).on_status_change()
+            if not verbose:
+                print(f"{source_entity.uuid} status is Reorganized")
+
+            # TODO: click in with UpdateAsana
             data = {"status": "Submitted"}
             for uuid in child_uuid_list:
-                endpoint = f'{entity_url}/entities/{uuid}'
-                print(f'sending to {endpoint}: {data}')
-                r = requests.put(endpoint,
-                                 data=json.dumps(data),
-                                 headers={
-                                     'Authorization': f'Bearer {source_entity.entity_factory.auth_tok}',
-                                     'Content-Type': 'application/json',
-                                     'X-Hubmap-Application': 'ingest-pipeline'
-                                 })
+                endpoint = f"{entity_url}/entities/{uuid}"
+                print(f"sending to {endpoint}: {data}")
+                r = requests.put(
+                    endpoint,
+                    data=json.dumps(data),
+                    headers={
+                        "Authorization": f"Bearer {source_entity.entity_factory.auth_tok}",
+                        "Content-Type": "application/json",
+                        "X-Hubmap-Application": "ingest-pipeline",
+                    },
+                )
                 if r.status_code >= 300:
                     r.raise_for_status()
                 if verbose:
-                    print('response:')
+                    print("response:")
                     pprint(r.json())
                 else:
-                    print(f'Reorganized new: {uuid} from Upload: {source_entity.uuid} status is Submitted')
+                    print(
+                        f"Reorganized new: {uuid} from Upload: {source_entity.uuid} status is Submitted"
+                    )
     else:
-        print(f'source entity <{source_entity.uuid}> is not an upload,'
-              ' so its status was not updated')
+        print(
+            f"source entity <{source_entity.uuid}> is not an upload,"
+            " so its status was not updated"
+        )
 
 
 def submit_uuid(uuid, entity_factory, dryrun=False):
@@ -315,13 +316,13 @@ def submit_uuid(uuid, entity_factory, dryrun=False):
     Submit the given dataset, causing it to be ingested.
     """
     if dryrun:
-        print(f'Not submitting uuid {uuid}.')
+        print(f"Not submitting uuid {uuid}.")
         return uuid
     else:
         uuid_entity_to_submit = entity_factory.get(uuid)
         rslt = entity_factory.submit_dataset(
             uuid=uuid,
-            contains_human_genetic_sequences=uuid_entity_to_submit.contains_human_genetic_sequences
+            contains_human_genetic_sequences=uuid_entity_to_submit.contains_human_genetic_sequences,
         )
         return rslt
 
@@ -341,67 +342,72 @@ def reorganize(source_uuid, **kwargs) -> None:
                                must be formattable as frozen_df_fname.format(index_string)
     kwargs['verbose']: if present and True, increase verbosity of output
     """
-    auth_tok = kwargs['auth_tok']
-    mode = kwargs['mode']
-    ingest = kwargs['ingest']
-    dryrun = kwargs['dryrun']
-    instance = kwargs['instance']
-    frozen_df_fname = kwargs['frozen_df_fname']
-    verbose = kwargs.get('verbose', False)
+    auth_tok = kwargs["auth_tok"]
+    mode = kwargs["mode"]
+    ingest = kwargs["ingest"]
+    dryrun = kwargs["dryrun"]
+    instance = kwargs["instance"]
+    frozen_df_fname = kwargs["frozen_df_fname"]
+    verbose = kwargs.get("verbose", False)
     dag_config = {}
 
     entity_factory = EntityFactory(auth_tok, instance=instance)
 
-    print(f'Decomposing {source_uuid}')
+    print(f"Decomposing {source_uuid}")
     source_entity = entity_factory.get(source_uuid)
-    if mode in ['all', 'stop']:
-        if hasattr(source_entity, 'data_types'):
+    if mode in ["all", "stop"]:
+        if hasattr(source_entity, "data_types"):
             assert isinstance(source_entity.data_types, str)
             source_data_types = source_entity.data_types
         else:
             source_data_types = None
-        source_metadata_files = list(source_entity.full_path.glob('*metadata.tsv'))
+        source_metadata_files = list(source_entity.full_path.glob("*metadata.tsv"))
         for src_idx, smf in enumerate(source_metadata_files):
-            source_df = pd.read_csv(smf, sep='\t')
-            source_df['canonical_assay_type'] = source_df.apply(get_canonical_assay_type,
-                                                                axis=1,
-                                                                entity_factory=entity_factory,
-                                                                default_type=source_data_types)
-            source_df['new_uuid'] = source_df.apply(create_new_uuid, axis=1,
-                                                    source_entity=source_entity,
-                                                    entity_factory=entity_factory,
-                                                    dryrun=dryrun)
+            source_df = pd.read_csv(smf, sep="\t")
+            source_df["canonical_assay_type"] = source_df.apply(
+                get_canonical_assay_type,
+                axis=1,
+                entity_factory=entity_factory,
+                default_type=source_data_types,
+            )
+            source_df["new_uuid"] = source_df.apply(
+                create_new_uuid,
+                axis=1,
+                source_entity=source_entity,
+                entity_factory=entity_factory,
+                dryrun=dryrun,
+            )
             source_df = apply_special_case_transformations(source_df, source_data_types)
-            print(source_df[['data_path', 'canonical_assay_type', 'new_uuid']])
-            this_frozen_df_fname = frozen_df_fname.format('_' + str(src_idx))
-            source_df.to_csv(this_frozen_df_fname, sep='\t', header=True, index=False)
-            print(f'wrote {this_frozen_df_fname}')
+            print(source_df[["data_path", "canonical_assay_type", "new_uuid"]])
+            this_frozen_df_fname = frozen_df_fname.format("_" + str(src_idx))
+            source_df.to_csv(this_frozen_df_fname, sep="\t", header=True, index=False)
+            print(f"wrote {this_frozen_df_fname}")
 
-    if mode == 'stop':
+    if mode == "stop":
         return
 
-    if mode in ['all', 'unstop']:
-        dag_config = {'uuid_list': [], 'collection_type': ''}
+    if mode in ["all", "unstop"]:
+        dag_config = {"uuid_list": [], "collection_type": ""}
         child_uuid_list = []
-        source_metadata_files = list(source_entity.full_path.glob('*metadata.tsv'))
+        source_metadata_files = list(source_entity.full_path.glob("*metadata.tsv"))
         for src_idx, _ in enumerate(source_metadata_files):
-            this_frozen_df_fname = frozen_df_fname.format('_' + str(src_idx))
-            source_df = pd.read_csv(this_frozen_df_fname, sep='\t')
-            print(f'read {this_frozen_df_fname}')
+            this_frozen_df_fname = frozen_df_fname.format("_" + str(src_idx))
+            source_df = pd.read_csv(this_frozen_df_fname, sep="\t")
+            print(f"read {this_frozen_df_fname}")
 
             for _, row in source_df.iterrows():
-                dag_config['uuid_list'].append(row['new_uuid'])
-                child_uuid_list.append(row['new_uuid'])
+                dag_config["uuid_list"].append(row["new_uuid"])
+                child_uuid_list.append(row["new_uuid"])
                 populate(row, source_entity, entity_factory, dryrun=dryrun)
 
         update_upload_entity(child_uuid_list, source_entity, dryrun=dryrun, verbose=verbose)
 
         if ingest:
-            print('Beginning ingestion')
+            print("Beginning ingestion")
             for uuid in child_uuid_list:
                 submit_uuid(uuid, entity_factory, dryrun)
                 if not dryrun:
-                    while entity_factory.get(uuid).status not in ['QA', 'Invalid', 'Error']:
+                    while entity_factory.get(uuid).status not in ["QA", "Invalid", "Error"]:
                         time.sleep(30)
 
     print(json.dumps(dag_config))
@@ -412,29 +418,33 @@ def main():
     main
     """
     parser = argparse.ArgumentParser()
-    simplified_frozen_df_fname = DEFAULT_FROZEN_DF_FNAME.format('')  # no suffix
-    parser.add_argument("uuid",
-                        help=("input .txt file containing uuids or"
-                              " .csv or .tsv file with uuid column"))
-    parser.add_argument("--stop",
-                        help=("stop after creating child uuids and writing"
-                              f" {simplified_frozen_df_fname}"),
-                        action="store_true")
-    parser.add_argument("--unstop",
-                        help=("do not create child uuids;"
-                              f" read {simplified_frozen_df_fname} and continue"),
-                        action="store_true")
-    parser.add_argument("--instance",
-                        help=("instance to use."
-                              f" One of {list(ENDPOINTS)} (default %(default)s)"),
-                        default='PROD')
-    parser.add_argument("--dryrun",
-                        help=("describe the steps that would be taken but"
-                              " do not make changes"),
-                        action="store_true")
-    parser.add_argument("--ingest",
-                        help="automatically ingest the generated datasets",
-                        action="store_true")
+    simplified_frozen_df_fname = DEFAULT_FROZEN_DF_FNAME.format("")  # no suffix
+    parser.add_argument(
+        "uuid", help=("input .txt file containing uuids or" " .csv or .tsv file with uuid column")
+    )
+    parser.add_argument(
+        "--stop",
+        help=("stop after creating child uuids and writing" f" {simplified_frozen_df_fname}"),
+        action="store_true",
+    )
+    parser.add_argument(
+        "--unstop",
+        help=("do not create child uuids;" f" read {simplified_frozen_df_fname} and continue"),
+        action="store_true",
+    )
+    parser.add_argument(
+        "--instance",
+        help=("instance to use." f" One of {list(ENDPOINTS)} (default %(default)s)"),
+        default="PROD",
+    )
+    parser.add_argument(
+        "--dryrun",
+        help=("describe the steps that would be taken but" " do not make changes"),
+        action="store_true",
+    )
+    parser.add_argument(
+        "--ingest", help="automatically ingest the generated datasets", action="store_true"
+    )
 
     args = parser.parse_args()
 
@@ -454,11 +464,11 @@ def main():
     dryrun = args.dryrun
     ingest = args.ingest
     if args.stop:
-        mode = 'stop'
+        mode = "stop"
     elif args.unstop:
-        mode = 'unstop'
+        mode = "unstop"
     else:
-        mode = 'all'
+        mode = "all"
 
     print(
         """
@@ -466,16 +476,18 @@ def main():
         files around on PROD. Be very sure you know what it does before you run it!
         """
     )
-    auth_tok = input('auth_tok: ')
-
-    reorganize(source_uuid,
-               auth_tok=auth_tok,
-               mode=mode,
-               ingest=ingest,
-               dryrun=dryrun,
-               instance=instance,
-               frozen_df_fname=DEFAULT_FROZEN_DF_FNAME)
+    auth_tok = input("auth_tok: ")
+
+    reorganize(
+        source_uuid,
+        auth_tok=auth_tok,
+        mode=mode,
+        ingest=ingest,
+        dryrun=dryrun,
+        instance=instance,
+        frozen_df_fname=DEFAULT_FROZEN_DF_FNAME,
+    )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/src/ingest-pipeline/submodules/ingest-validation-tools b/src/ingest-pipeline/submodules/ingest-validation-tools
index 08fc7c69..80f78d30 160000
--- a/src/ingest-pipeline/submodules/ingest-validation-tools
+++ b/src/ingest-pipeline/submodules/ingest-validation-tools
@@ -1 +1 @@
-Subproject commit 08fc7c6922c7f6a73b913e520b73055f0349bd73
+Subproject commit 80f78d30542d3b084bbd56f07be9c4eceff6e9d0