diff --git a/src/ingest-pipeline/airflow/dags/cwl/portal-containers b/src/ingest-pipeline/airflow/dags/cwl/portal-containers
index 7e22a888..a9f03dc9 160000
--- a/src/ingest-pipeline/airflow/dags/cwl/portal-containers
+++ b/src/ingest-pipeline/airflow/dags/cwl/portal-containers
@@ -1 +1 @@
-Subproject commit 7e22a8884bb3e59fc31db01275002b81598aa068
+Subproject commit a9f03dc918d86ecb3154d0e79796451ddaa72a41
diff --git a/src/ingest-pipeline/airflow/dags/cwl/salmon-rnaseq b/src/ingest-pipeline/airflow/dags/cwl/salmon-rnaseq
index ce04e2cf..07f43493 160000
--- a/src/ingest-pipeline/airflow/dags/cwl/salmon-rnaseq
+++ b/src/ingest-pipeline/airflow/dags/cwl/salmon-rnaseq
@@ -1 +1 @@
-Subproject commit ce04e2cf5cd180448eb6107806b20d867d0411c6
+Subproject commit 07f434937a27f0159030b86d9c22efaf8a1d0553
diff --git a/src/ingest-pipeline/airflow/dags/cwl/sprm b/src/ingest-pipeline/airflow/dags/cwl/sprm
index 3bc79b4a..d9d98c65 160000
--- a/src/ingest-pipeline/airflow/dags/cwl/sprm
+++ b/src/ingest-pipeline/airflow/dags/cwl/sprm
@@ -1 +1 @@
-Subproject commit 3bc79b4abb37243f0b8ff48d8ecdf95882f0b199
+Subproject commit d9d98c65542bd15472a504b08d8f4411c23afc2c
diff --git a/src/ingest-pipeline/airflow/dags/salmon_rnaseq.py b/src/ingest-pipeline/airflow/dags/salmon_rnaseq.py
index 61eb814c..40819b3a 100644
--- a/src/ingest-pipeline/airflow/dags/salmon_rnaseq.py
+++ b/src/ingest-pipeline/airflow/dags/salmon_rnaseq.py
@@ -320,9 +320,15 @@ def get_salmon_dag_params(assay: str) -> SequencingDagParameters:
     SequencingDagParameters(
         dag_id="salmon_rnaseq_10x",
         pipeline_name="salmon-rnaseq",
-        assay="10x",
+        assay="10x_v3",
         dataset_type="salmon_rnaseq_10x",
     ),
+    SequencingDagParameters(
+        dag_id="salmon_rnaseq_10x_sn",
+        pipeline_name="salmon-rnaseq",
+        assay="10x_v3_sn",
+        dataset_type="salmon_rnaseq_10x_sn",
+    ),
     get_salmon_dag_params("sciseq"),
     get_salmon_dag_params("slideseq"),
     get_salmon_dag_params("snareseq"),
diff --git a/src/ingest-pipeline/airflow/dags/sc_atac_seq.py b/src/ingest-pipeline/airflow/dags/sc_atac_seq.py
index 1796f462..244f3bee 100644
--- a/src/ingest-pipeline/airflow/dags/sc_atac_seq.py
+++ b/src/ingest-pipeline/airflow/dags/sc_atac_seq.py
@@ -79,6 +79,7 @@ def build_cwltool_cmd1(**kwargs):
 
             command = [
                 *get_cwltool_base_cmd(tmpdir),
+                cwl_workflows[0],
                 "--assay",
                 params.assay,
                 "--outdir",
diff --git a/src/ingest-pipeline/airflow/dags/workflow_map.yml b/src/ingest-pipeline/airflow/dags/workflow_map.yml
index 3b09203d..8c4e0069 100644
--- a/src/ingest-pipeline/airflow/dags/workflow_map.yml
+++ b/src/ingest-pipeline/airflow/dags/workflow_map.yml
@@ -58,7 +58,7 @@ workflow_map:
     'workflow': 'sc_atac_seq_sn'
   - 'collection_type': 'generic_metadatatsv'
     'assay_type': 'snRNAseq'
-    'workflow': 'salmon_rnaseq_10x'
+    'workflow': 'salmon_rnaseq_10x_sn'
   - 'collection_type': 'bulkatacseq_collection'
     'assay_type': 'ATACseq-bulk'
     'workflow': 'bulk_atacseq'
diff --git a/src/ingest-pipeline/misc/tools/metadata_tsv_splitter.ipynb b/src/ingest-pipeline/misc/tools/metadata_tsv_splitter.ipynb
index 16833ea7..9d9c6f10 100644
--- a/src/ingest-pipeline/misc/tools/metadata_tsv_splitter.ipynb
+++ b/src/ingest-pipeline/misc/tools/metadata_tsv_splitter.ipynb
@@ -9,7 +9,13 @@
     "import numpy as np\n",
     "import pandas as pd\n",
     "import os\n",
-    "import csv"
+    "import csv\n",
+    "from pathlib import Path\n",
+    "from pprint import pprint\n",
+    "from collections import defaultdict\n",
+    "from datetime import datetime\n",
+    "from urllib.parse import urlparse, parse_qs\n",
+    "from io import StringIO"
    ]
   },
   {
@@ -25,15 +31,25 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "input_dir = \"/home/welling/git/hubmap/ingest-pipeline/src/ingest-pipeline/md/florida_10x_md\"\n",
-    "in_fname = os.path.join(input_dir, \"UFLA_10XscRNAseq_metadata_050520_-_UFLA_10XscRNAseq_metadata_050520.tsv\")"
+    "# Where the constructed tree of metadata files will go\n",
+    "build_tree_root = Path(\"/home/welling/git/hubmap/ingest-pipeline/src/ingest-pipeline/misc/tools/build_tree_root\")"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "Read the records"
+    "input_dir = \"/home/welling/git/hubmap/ingest-pipeline/src/ingest-pipeline/md/all_md\"\n",
+    "#base_path = Path(input_dir) / 'UCSD_R2_snRNAseq'\n",
+    "#base_path = Path(input_dir) / 'UFLA_R2_10X'\n",
+    "#base_path = Path(input_dir) / 'UFLA_R2_CODEX'\n",
+    "#base_path = Path(input_dir) / 'STAN_R2_snRNA'\n",
+    "base_path = Path(input_dir) / 'CALT_R2_sciATAC'\n",
+    "#in_fname = os.path.join(input_dir, \"UFLA_CODEX_LY_Metadata_110920.tsv\")\n",
+    "#for path in base_path.glob('**/*.xlsx'):\n",
+    "#    print(path.stem)"
    ]
   },
   {
@@ -42,20 +58,36 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "recs = []\n",
-    "with open(in_fname, 'r', newline='') as f:\n",
-    "    dialect = csv.Sniffer().sniff(f.read(128))\n",
-    "    f.seek(0)\n",
-    "    reader = csv.DictReader(f, dialect=dialect)\n",
-    "    for row in reader:\n",
-    "        recs.append({k : v for k, v in row.items()})"
+    "df_d = {}\n",
+    "metadata_df = None\n",
+    "for path in base_path.glob('**/*.xlsx'):\n",
+    "    print(path)\n",
+    "    df = pd.read_excel(path)\n",
+    "    true_stem = Path(path.stem)\n",
+    "    while Path(true_stem.stem) != true_stem:\n",
+    "        true_stem = Path(true_stem.stem)\n",
+    "    df_d[true_stem] = df\n",
+    "    if 'assay_type' in df.columns:\n",
+    "        if metadata_df is None:\n",
+    "            metadata_df = df.copy()\n",
+    "        else:\n",
+    "            metadata_df = metadata_df.append(df)\n",
+    "    print(f'{true_stem} -> {df_d[true_stem].columns}')\n",
+    "\n",
+    "# special logic needed to straighten out CALT sciATAC\n",
+    "for key in df_d:\n",
+    "    if 'contributors' in str(key):\n",
+    "        df_d[Path('contributors')] = df_d[key]\n",
+    "        break"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "Check what we've read.  Beware of columns named '' or ' ' - they indicate a trailing tab and thus an empty column in the input file!"
+    "assert metadata_df is not None, \"metadata file not found\""
    ]
   },
   {
@@ -64,7 +96,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "print(reader.fieldnames)"
+    "metadata_df.columns"
    ]
   },
   {
@@ -73,7 +105,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "print(recs[0])"
+    "metadata_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Begin block of mapping data used to guess uuids from tissue display ids ###"
    ]
   },
   {
@@ -82,7 +121,160 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "print(dialect.delimiter)  # should be tab"
+    "samp_to_uuid_map = {}\n",
+    "stan_snrna_uuid_map = {  # maps sample TMC ID to uuid\n",
+    "    \"STAN0008-LI-1-1\":\"a04d0138ed6b28810c5afa01d392bbd5\",  # misplaced\n",
+    "    \"STAN0008-LI-2-1\":\"a078805198f9f7f022b83de898a608a9\",  # misplaced\n",
+    "    \"STAN0008-LI-3-1\":\"c3d36358b184be55ac977abea5755447\",  # misplaced\n",
+    "    \"STAN0008-LI-4-1\":\"57288d8a0a9374ea83f90582df8eafa2\",  # misplaced\n",
+    "    \"STAN0007-LI-1-1\":\"f1937797246fa4592bff6166d7666de5\",  # misplaced\n",
+    "    \"STAN0007-LI-3-1\":\"e18254c67c8a0bb625f3748a2501a0bb\",  # misplaced\n",
+    "    \"STAN0007-LI-4-1\":\"492574f47224661fe8674f60373e44f4\",  # misplaced\n",
+    "    \"STAN0007-LI-2-1\":\"da93581ef554e25ec1c7a12500a56b74\",  # misplaced\n",
+    "}\n",
+    "samp_to_uuid_map.update(stan_snrna_uuid_map)\n",
+    "calt_sciatacseq_txt = \"\"\"\n",
+    "0bf827ea01b64963d39a10cac69bc386,CALT0006-HT-2\n",
+    "2d4d2f368c6f74cc3aa17177924003b8,CALT0012-HT-1\n",
+    "48da185436f006156d7e5c1941bfb147,CALT0005-SP-1\n",
+    "58ebb89caf1512e9452d1f9e0e1efa8e,CALT0003-HT\n",
+    "616a1aa904dfb1299f86910db2a20fbe,CALT0011-LV-1\n",
+    "76bfd5a517c681e5f672fecff2057111,CALT0012-HT-2\n",
+    "8ea82dc9f26bb2c01f19ddd19b3812b6,CALT0004-PA-2\n",
+    "8f6b8e19c21a664d67a467c3a08b5630,CALT0003-HT-2\n",
+    "93cc8c450db50a224dce243a43131d3c,CALT0010-HT-1\n",
+    "a0df02bda8befa491f86b0d41f2810ed,CALT0005-RL-1\n",
+    "a6a7f2b0b419aefb6f8ffb9bfa9ce7d5,CALT0004-RL-1\n",
+    "acaf75b8292db4a79dc14e3021742217,CALT0005-HT-1\n",
+    "ad26d1046084c5640f911a84e5cd0cee,CALT0003-HT-5\n",
+    "b2db3414cedf8805d20df3cf753842ca,CALT0011-HT-1\n",
+    "bc19b2d489ddef9e135a67bcc9746695,CALT0006-PA-1\n",
+    "bd435ed6aa55e9f57d783ce630d746bf,CALT0003-HT-3\n",
+    "bf88e07da70ee088e31c7f568e41b196,CALT0011-HT-2\n",
+    "d4fc9da8a21cbb323d55f38983fb3dbb,CALT0006-HT-1\n",
+    "dd39ed081ffc887d85fc8225c71b37dc,CALT0009-HT-1\n",
+    "e4b371ea3ed4c3ca77791b34b829803f,CALT0004-HT-1\n",
+    "ead5cc01250b4f9ea73dd91503c313a5,CALT0007-HT-1\n",
+    "eb4958e8b5dd073e8a4a80bd613b2d64,CALT0009-LV-1\n",
+    "f1b9f55b12e16d1e11a5ebbd863b5787,CALT0005-PA-1\n",
+    "\"\"\"\n",
+    "\n",
+    "calt_sciatacseq_map = {}\n",
+    "for line in StringIO(calt_sciatacseq_txt):\n",
+    "    words = line.strip().split(',')\n",
+    "    if len(words) == 2:\n",
+    "        uuid, samp = words\n",
+    "        calt_sciatacseq_map[samp] = uuid\n",
+    "samp_to_uuid_map.update(calt_sciatacseq_map)\n",
+    "pprint(samp_to_uuid_map)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### End block of mapping data for Stanford snRNAseq uuids ###"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## The following block produces inv_uuid_map, used for the special case of UCSD snRNAseq data ##"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true,
+    "tags": [
+     "hide-input"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "#\n",
+    "# /tmp/junk.txt below was running a 'find -type f -print' on the HIVE host and\n",
+    "# grepping for 'contributors'\n",
+    "#\n",
+    "#uuid_map = {}\n",
+    "#for line in open('/tmp/junk.txt'):\n",
+    "#    words = line.strip().split('/')\n",
+    "#    uuid_map[words[1]] = words[-1]\n",
+    "uuid_map = {\n",
+    " '0487454555924b54dd3f5b5232e3c77e': 'BUKMAP_20190529L_10X-R_contributors.tsv',\n",
+    " '05197e30394fd88affff0a9c214c8c4c': 'LAPMAP_20200317J_10X-R_contributors.tsv',\n",
+    " '065295e6e58b1d3555a261c1bfe3b3fe': 'LAPMAP_20200317M_10X-R_contributors.tsv',\n",
+    " '06ff98c01295ca5ea504a676f73f9a09': 'BUKMAP_20200304B_10X-R_contributors.tsv',\n",
+    " '0736735768692d6ca0cd96149b743be1': 'LAPMAP_20200317N_10X-R_contributors.tsv',\n",
+    " '0c3ce767d87527f41fd705fd469390a0': 'BUKMAP_20200302A_10X-R_contributors.tsv',\n",
+    " '17be751d961c5baf6c1dbe2e70c5d93c': 'BUKMAP_20191104B_10X-R_contributors.tsv',\n",
+    " '1b83223cf1f4446c625adbfb375ab3fd': 'BUKMAP_20200205D_10X-R_contributors.tsv',\n",
+    " '24eaa9730abe57c1c22f74573b846a6f': 'BUKMAP_20191029_10X-R_contributors.tsv',\n",
+    " '26b642ddbae00e7ff6570ddd57557e26': 'LAPMAP_20200317I_10X-R_contributors.tsv',\n",
+    " '2d27debfce3d25040af54fb77b25427b': 'BUKMAP_20200707A_10X-R_contributors.tsv',\n",
+    " '3b1490026022f850e4d3c3fb5e2283c9': 'LAPMAP_20191217E_10X-R_contributors.tsv',\n",
+    " '3fe18ec025f612ca2c5308d4c234da50': 'LAPMAP_20200317H_10X-R_contributors.tsv',\n",
+    " '46e8ffd2350efd19f771c6fb6a51f6cc': 'BUKMAP_20200304F_10X-R_contributors.tsv',\n",
+    " '488f364142c308a9692e0b529f6697dd': 'BUKMAP_20190822F_10X-R_contributors.tsv',\n",
+    " '4bef8fa6eab2d3eb8734bf418c0634ef': 'BUKMAP_20190829B_10X-R_contributors.tsv',\n",
+    " '4ea7a4cf1a6ff0df0cc33c1236633112': 'BUKMAP_20200205F_10X-R_contributors.tsv',\n",
+    " '68e6dfa4807ca615883f73a5067115cb': 'LAPMAP_20200317L_10X-R_contributors.tsv',\n",
+    " '69a0ada10f4f119f99ce5f66cf3b1a94': 'BUKMAP_20200702C_10X-R_contributors.tsv',\n",
+    " '6a75230d8d1063fcc8568537212211f5': 'BUKMAP_20200302B_10X-R_contributors.tsv',\n",
+    " '8e5c8f0cc61aad4fcbc5cc119bdf4c96': 'LAPMAP_20200317K_10X-R_contributors.tsv',\n",
+    " '9049f48b97dc5edc737b67783a47e918': 'BUKMAP_20200702D_10X-R_contributors.tsv',\n",
+    " '99c5c80509be87d2356d19a9ed8b22ff': 'BUKMAP_20191104A_10X-R_contributors.tsv',\n",
+    " 'a8652e9e3c545e61e1ffe9d54a8f1fd2': 'LAPMAP_20191217G_10X-R_contributors.tsv',\n",
+    " 'c686b93a809ec1f54a0d96bc25d3d207': 'BUKMAP_20190607L_10X-R_contributors.tsv',\n",
+    " 'cd887a6beabc794992876ad7ee591f69': 'BUKMAP_20200304A_10X-R_contributors.tsv',\n",
+    " 'cfc125d6d916f121e92a8406a0502a38': 'BUKMAP_20200707C_10X-R_contributors.tsv',\n",
+    " 'ec88a6b161dce97a2361b1479c69a036': 'BUKMAP_20191009_10X-R_contributors.tsv',\n",
+    " 'f1b130f1200ae1fabe56cb506245490c': 'BUKMAP_20191010_10X-R_contributors.tsv',\n",
+    " 'fa6d9c732c7f239422ec6b357136fcd4': 'BUKMAP_20200707B_10X-R_contributors.tsv',\n",
+    " 'fd0c0fcde5a331c9dfff52b520c7d792': 'BUKMAP_20200205A_10X-R_contributors.tsv'\n",
+    "}\n",
+    "pprint(uuid_map)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true,
+    "tags": [
+     "hide-input"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# Checking that we can use filenames as unique keys\n",
+    "counts = defaultdict(int)\n",
+    "for key, val in uuid_map.items():\n",
+    "    counts[val] += 1\n",
+    "for key, val in counts.items():\n",
+    "    print(f'{key}: {val}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "hide-input"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "inv_uuid_map = {val:key for key, val in uuid_map.items()}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## End block of special-purpose support code for UCSD snRNAseq ##"
    ]
   },
   {
@@ -102,21 +294,46 @@
     "    return s and len(s) == 32 and all([c in '0123456789abcdef' for c in list(s)])\n",
     "\n",
     "def get_uuid(s):\n",
-    "    words = s.split('/')\n",
-    "    while words:\n",
-    "        if is_uuid(words[0]):\n",
-    "            return words[0]\n",
+    "    if s.startswith(('http:', 'https:')):\n",
+    "        parsed = urlparse(s)\n",
+    "        if parsed.netloc == 'app.globus.org':\n",
+    "            origin_path = parse_qs(urlparse(s).query)['origin_path'][0]\n",
+    "            return get_uuid(origin_path)\n",
     "        else:\n",
-    "            words = words[1:]\n",
-    "    return None"
+    "            raise RuntimeError(f'Unrecognized URL {s}')\n",
+    "    else:\n",
+    "        words = s.split('/')\n",
+    "        while words:\n",
+    "            if is_uuid(words[0]):\n",
+    "                return words[0]\n",
+    "            else:\n",
+    "                words = words[1:]\n",
+    "\n",
+    "#for idx, row in metadata_df.iterrows():\n",
+    "#    print(get_uuid(row['data_path']))"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "def reformat_datetime(dt_str):\n",
+    "    return datetime.fromisoformat(dt_str).strftime(\"%Y-%m-%d %H:%M\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
-    "Write the output single-line metadata.tsv files.\n",
-    "*Output is written to the directory in which this notebook is running.*"
+    "def get_true_stem(some_path):\n",
+    "    true_stem = Path(some_path.stem)\n",
+    "    while true_stem != Path(true_stem.stem):\n",
+    "        true_stem = Path(true_stem.stem)\n",
+    "    return true_stem"
    ]
   },
   {
@@ -125,18 +342,80 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "for rawrec in recs:\n",
-    "    rec = rawrec.copy()  # don't stomp on original\n",
-    "    #display(rec)\n",
-    "    uuid = get_uuid(rec['data_path'])\n",
-    "    out_fname = '{}-metadata.tsv'.format(uuid)\n",
-    "    print(out_fname)\n",
-    "    rec['metadata_path'] = '.'\n",
-    "    rec['data_path'] = '.'\n",
-    "    with open(out_fname, 'w', newline='') as f:\n",
-    "        writer = csv.DictWriter(f, dialect=dialect, fieldnames=reader.fieldnames)\n",
-    "        writer.writeheader()\n",
-    "        writer.writerow(rec)"
+    "def fix_antibodies_df(df):\n",
+    "    \"\"\"\n",
+    "    This adds columns to get the antibodies dataframe past the current versions of the\n",
+    "    antibodies.tsv table schema.\n",
+    "    \"\"\"\n",
+    "    column_names = [elt for elt in df.columns]\n",
+    "    assert 'conjugated_cat_number' in column_names, 'conjugated_cat_number is not present'\n",
+    "    offset = [idx for idx, val in enumerate(column_names) if val == 'conjugated_cat_number'][0]\n",
+    "    new_column_names = (['version']\n",
+    "                        + column_names[:offset]\n",
+    "                        + ['concentration_value', 'concentration_unit']\n",
+    "                        + column_names[offset:])\n",
+    "    new_column_names = (column_names[:offset]\n",
+    "                        + column_names[offset:])\n",
+    "    print(new_column_names)\n",
+    "    out_df = df.copy().reindex(columns=new_column_names)\n",
+    "    #out_df['version'] = 2\n",
+    "    return out_df\n",
+    "\n",
+    "#print([k for k in df_d])\n",
+    "#display(fix_antibodies_df(df_d[Path('UFLA_antibodies_121120')]).head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert not build_tree_root.exists(), f'delete or move aside {build_tree_root}'\n",
+    "build_tree_root.mkdir()\n",
+    "for idx, row in metadata_df.iterrows():\n",
+    "    c_p = row['contributors_path']\n",
+    "    if row['contributors_path'] in inv_uuid_map:\n",
+    "        uuid = inv_uuid_map[row['contributors_path']]\n",
+    "    elif row['tissue_id'] in samp_to_uuid_map:\n",
+    "        uuid = samp_to_uuid_map[row['tissue_id']]\n",
+    "    else:\n",
+    "        uuid = get_uuid(row['data_path'])\n",
+    "    if not uuid:\n",
+    "        print(f'No uuid found for record {idx}')\n",
+    "        continue\n",
+    "    print(f'row {idx} -> {uuid}')\n",
+    "    uuid_path = build_tree_root / uuid\n",
+    "    uuid_path.mkdir()\n",
+    "    path_str = row['contributors_path']\n",
+    "    if path_str.startswith('/'):  # common error\n",
+    "        path_str = path_str[1:]\n",
+    "    contributors_path = Path(path_str)\n",
+    "    if 'antibodies_path' in row:\n",
+    "        path_str = row['antibodies_path']\n",
+    "        if path_str.startswith('/'):  # common error\n",
+    "            path_str = path_str[1:]\n",
+    "        antibodies_path = Path(path_str)\n",
+    "        row['antibodies_path'] = str(Path('extras').joinpath(antibodies_path))\n",
+    "    else:\n",
+    "        antibodies_path = None\n",
+    "    print(contributors_path.stem)\n",
+    "    print([k for k in df_d])\n",
+    "    assert get_true_stem(contributors_path) in df_d, f\"Cannot find contributors dataframe {contributors_path}\"\n",
+    "    row['contributors_path'] = str(Path('extras').joinpath(contributors_path))\n",
+    "    row['data_path'] = '.'\n",
+    "    for col in metadata_df.columns:\n",
+    "        if col.endswith('_datetime'):\n",
+    "            row[col] = reformat_datetime(str(row[col]))\n",
+    "    row_df = pd.DataFrame([row])\n",
+    "    row_df.to_csv(uuid_path / f'{uuid}-metadata.tsv', header=True, sep='\\t', index=False)\n",
+    "    (uuid_path / 'extras').mkdir()\n",
+    "    df_d[get_true_stem(contributors_path)].to_csv(uuid_path / row['contributors_path'],\n",
+    "                                                  header=True, sep='\\t', index=False)\n",
+    "    if antibodies_path:\n",
+    "        df = df_d[get_true_stem(antibodies_path)]\n",
+    "        fix_antibodies_df(df).to_csv(uuid_path / row['antibodies_path'],\n",
+    "                                     header=True, sep='\\t', index=False)\n"
    ]
   },
   {
@@ -145,6 +424,62 @@
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### This block should get reintegrated into the flow ###"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "recs = []\n",
+    "with open(in_fname, 'r', newline='') as f:\n",
+    "    dialect = csv.Sniffer().sniff(f.read(128))\n",
+    "    f.seek(0)\n",
+    "    reader = csv.DictReader(f, dialect=dialect)\n",
+    "    for row in reader:\n",
+    "        recs.append({k : v for k, v in row.items()})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Check what we've read.  Beware of columns named '' or ' ' - they indicate a trailing tab and thus an empty column in the input file!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(reader.fieldnames)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(recs[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(dialect.delimiter)  # should be tab"
+   ]
   }
  ],
  "metadata": {
diff --git a/src/ingest-pipeline/misc/tools/new_dataset_survey.py b/src/ingest-pipeline/misc/tools/new_dataset_survey.py
index f29febdb..1fca60f6 100755
--- a/src/ingest-pipeline/misc/tools/new_dataset_survey.py
+++ b/src/ingest-pipeline/misc/tools/new_dataset_survey.py
@@ -75,7 +75,8 @@ def main():
     main
     """
     parser = argparse.ArgumentParser()
-    parser.add_argument("uuid_txt", help="input files containing uuids")
+    parser.add_argument("uuid_txt",
+                        help="input .txt file containing uuids or .csv or .tsv file with uuid column")
     parser.add_argument("--out", help="name of the output .tsv file", required=True)
     args = parser.parse_args()
     auth_tok = input('auth_tok: ')
diff --git a/src/ingest-pipeline/misc/tools/post_validation_cleanup.sh b/src/ingest-pipeline/misc/tools/post_validation_cleanup.sh
new file mode 100755
index 00000000..c8ef299d
--- /dev/null
+++ b/src/ingest-pipeline/misc/tools/post_validation_cleanup.sh
@@ -0,0 +1,42 @@
+#!/bin/bash -ex
+
+uuid=$1
+echo $uuid
+echo `basename "$PWD"`
+if [[ `basename "$PWD"` != $uuid ]]; then
+   echo "run this from the $uuid directory"
+   exit -1
+else
+    if [ -e validation_report.txt ] ; then
+	if [[ `cat validation_report.txt` == 'No errors!' ]] ; then
+	    rm validation_report.txt
+	else
+	    echo "Validation report is not clean"
+	    exit -1
+	fi
+    else
+	echo "No validation report found"
+	exit -1
+    fi
+    pushd extras
+    for fname in *metadata.tsv.orig *contributors.tsv.orig ; do
+	if [ -e ${fname} ] ; then
+	    rm $fname
+	else
+	    echo "nothing to remove for ${fname}"
+	fi
+    done
+    for fname in *.fastq ; do
+	if [ ! -e ${fname} ] ; then
+	    echo 'no fastq files found'
+	    break
+	fi
+	if [ -e ../${fname}.gz ] ; then
+	    rm $fname
+        fi
+    done
+    popd
+fi
+
+
+
diff --git a/src/ingest-pipeline/misc/tools/restructure_dataset.sh b/src/ingest-pipeline/misc/tools/restructure_dataset.sh
new file mode 100755
index 00000000..8243a245
--- /dev/null
+++ b/src/ingest-pipeline/misc/tools/restructure_dataset.sh
@@ -0,0 +1,38 @@
+#!/bin/bash -ex
+
+uuid=$1
+echo $uuid
+echo `basename "$PWD"`
+if [[ `basename "$PWD"` != $uuid ]]; then
+   echo "run this from the $uuid directory"
+   exit -1
+else
+    mkdir -p extras
+    for fname in *metadata.tsv *contributors.tsv ; do
+	if [ -e ${fname} ] ; then
+	    mv $fname extras/${fname}.orig
+	else
+	    echo "nothing to move for ${fname}"
+	fi
+    done
+    for fname in *.fastq ; do
+	if [ ! -e ${fname} ] ; then
+	    echo 'no fastq files found'
+	    break
+	fi
+	if [ -e ${fname}.gz ] ; then
+	    mv $fname extras
+	else
+	    gzip $fname
+        fi
+    done
+fi
+metafiles_tar="/tmp/build_tree.tar"
+pushd ..
+for fname in `tar -tf $metafiles_tar | grep $uuid` ; do
+    echo $fname
+    tar -xvf $metafiles_tar $fname
+done
+popd
+
+
diff --git a/src/ingest-pipeline/misc/tools/survey.py b/src/ingest-pipeline/misc/tools/survey.py
index 5cf75498..671be57b 100755
--- a/src/ingest-pipeline/misc/tools/survey.py
+++ b/src/ingest-pipeline/misc/tools/survey.py
@@ -167,7 +167,7 @@ def describe(self, prefix='', file=sys.stdout):
                 self.kids[kid].describe(prefix=prefix+'    ', file=file)
 
 
-    def build_rec(self):
+    def build_rec(self, include_all_children=False):
         """
         Returns a dict containing:
         
@@ -180,6 +180,9 @@ def build_rec(self):
         QA_child.data_types[0]  (verifying there is only 1 entry)
         QA_child.status   (which must be QA or Published)
         note 
+
+        If include_all_children=True, all child datasets are included rather
+        than just those that are QA or Published.
         """
         rec = {'uuid': self.uuid, 'display_doi': self.display_doi, 'status': self.status,
                'group_name': self.group_name}
@@ -201,20 +204,26 @@ def build_rec(self):
             rec['sample_display_doi'] = samp.display_doi
         else:
             rec['sample_display_doi'] = 'multiple'
-        qa_kids = [self.kids[uuid] for uuid in self.kids if self.kids[uuid].status in ['QA', 'Published']]
-        if any(qa_kids):
-            if len(qa_kids) > 1:
-                rec['note'] = 'Multiple QA derived datasets'
-            this_kid = qa_kids[0]
-            rec['qa_child_uuid'] = this_kid.uuid
-            rec['qa_child_display_doi'] = this_kid.display_doi
-            rec['qa_child_data_type'] = this_kid.data_types[0]
-            rec['qa_child_status'] = this_kid.status
+        if include_all_children:
+            filtered_kids = [self.kids[uuid] for uuid in self.kids]
+            uuid_hdr, doi_hdr, data_type_hdr, status_hdr, note_note = ('child_uuid', 'child_display_doi',
+                                                                       'child_data_type', 'child_status',
+                                                                       'Multiple derived datasets')
+        else:
+            filtered_kids = [self.kids[uuid] for uuid in self.kids if self.kids[uuid].status in ['QA', 'Published']]
+            uuid_hdr, doi_hdr, data_type_hdr, status_hdr, note_note = ('qa_child_uuid', 'qa_child_display_doi',
+                                                                       'qa_child_data_type', 'qa_child_status',
+                                                                       'Multiple QA derived datasets')
+        if any(filtered_kids):
+            rec['note'] = note_note if len(filtered_kids) > 1 else ''
+            this_kid = filtered_kids[0]
+            rec[uuid_hdr] = this_kid.uuid
+            rec[doi_hdr] = this_kid.display_doi
+            rec[data_type_hdr] = this_kid.data_types[0]
+            rec[status_hdr] = this_kid.status
         else:
-            rec['qa_child_uuid'] = None
-            rec['qa_child_display_doi'] = None
-            rec['qa_child_data_type'] = None
-            rec['qa_child_status'] = None
+            for key in [uuid_hdr, doi_hdr, data_type_hdr, status_hdr]:
+                rec[key] = None
             rec['note'] = ''
     
         return rec
@@ -299,8 +308,10 @@ def main():
     main
     """
     parser = argparse.ArgumentParser()
-    parser.add_argument("metadatatsv", help="input .tsv or .xlsx file")
+    parser.add_argument("metadatatsv", help="input .tsv or .xlsx file, or a list of uuids in a .txt file")
     parser.add_argument("--out", help="name of the output .tsv file", required=True)
+    parser.add_argument("--include_all_children", action="store_true",
+                        help="include all children, not just those in the QA or Published states")
     args = parser.parse_args()
     auth_tok = input('auth_tok: ')
     entity_factory = EntityFactory(auth_tok)
@@ -308,6 +319,13 @@ def main():
         in_df = pd.read_csv(args.metadatatsv, sep='\t')
     elif args.metadatatsv.endswith('.xlsx'):
         in_df = pd.read_excel(args.metadatatsv)
+    elif args.metadatatsv.endswith('.txt'):
+        # a list of bare uuids
+        recs = []
+        for line in open(args.metadatatsv):
+            assert is_uuid(line.strip()), f'text file {args.metadatatsv} contains non-uuid {line.strip}'
+            recs.append({'data_path': line.strip()})
+        in_df = pd.DataFrame(recs)
     else:
         raise RuntimeError('Unrecognized input file format')
     in_df['uuid'] = in_df.apply(get_uuid, axis=1)
@@ -319,7 +337,7 @@ def main():
         ds = entity_factory.get(uuid)
         ds.describe()
         new_uuids = ds.all_uuids()
-        rec = ds.build_rec()
+        rec = ds.build_rec(include_all_children=args.include_all_children)
         if any([uuid in known_uuids for uuid in new_uuids]):
             old_note = rec['note'] if 'note' in rec else ''
             rec['note'] = 'UUID COLLISION! ' + old_note
@@ -328,9 +346,13 @@ def main():
     out_df = pd.DataFrame(out_recs).rename(columns={'sample_display_doi':'sample_doi',
                                                     'sample_hubmap_display_id':'sample_display_id',
                                                     'qa_child_uuid':'derived_uuid',
+                                                    'child_uuid':'derived_uuid',
                                                     'qa_child_display_doi':'derived_doi',
+                                                    'child_display_doi':'derived_doi',
                                                     'qa_child_data_type':'derived_data_type',
-                                                    'qa_child_status':'derived_status'})
+                                                    'child_data_type':'derived_data_type',
+                                                    'qa_child_status':'derived_status',
+                                                    'child_status':'derived_status'})
     out_df.to_csv(args.out, sep='\t', index=False)