lincbrain · kabilar · May 13, 2025 · May 13, 2025 · May 14, 2025 · May 15, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,5 +1,10 @@
 name: CI
-on: pull_request
+on: 
+  pull_request:
+  push:
+    branches:
+      - main
+  workflow_dispatch:
 
 jobs:
   ci:
@@ -36,5 +41,7 @@ jobs:
       - name: Install the project dependencies
         run: poetry install --with dev --extras all
       - name: Run the automated tests
+        env:
+          DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }}
         run: poetry run pytest -v
         working-directory: ./tests
diff --git a/linc_convert/modalities/lsm/__init__.py b/linc_convert/modalities/lsm/__init__.py
@@ -3,7 +3,7 @@
 try:
     import tifffile as _  # noqa: F401
 
-    __all__ = ["cli", "mosaic", "multi_slice", "spool"]
-    from . import cli, mosaic, multi_slice, spool
+    __all__ = ["cli", "mosaic", "multi_slice", "spool", "transfer"]
+    from . import cli, mosaic, multi_slice, spool, transfer
 except ImportError:
     pass
diff --git a/linc_convert/modalities/lsm/transfer.py b/linc_convert/modalities/lsm/transfer.py
@@ -0,0 +1,120 @@
+"""
+Convert a collection of spool .dat files generated by light sheet microscopy to compressed tar files and upload with the DANDI client.
+"""
+
+# stdlib
+import os
+from datetime import datetime
+from pathlib import Path
+
+# externals
+import tarfile
+import cyclopts
+import dandi.download
+import dandi.upload
+
+# internals
+from linc_convert.modalities.lsm.cli import lsm
+
+transfer = cyclopts.App(name="transfer", help_format="markdown")
+lsm.command(transfer)
+
+@transfer.default
+def dandi_transfer(input_dir, dandiset_url, subject, output_dir='.',  max_size_gb=2.00, upload=False):
+    """
+    Upload .dat files to DANDI in batched, compressed tar archives.
+
+    Parameters
+    ----------
+    input_dir : str
+        Directory containing .dat files to upload
+    dandiset_url : str
+        URL for the dandiset to upload (e.g., https://lincbrain.org/dandiset/000010)
+    output_dir : str, optional
+        Directory to save the Dandiset directory (default: '.')
+    max_size_gb : float, optional
+        Maximum size for each archive in GB (default: 2)
+    upload : bool, optional
+        Upload data to DANDI (default: True)
+    """
+
+    max_size_bytes = int(max_size_gb * 1024 * 1024 * 1024)
+
+    dandi.download.download(dandiset_url, output_dir=output_dir)
+
+    dandiset_id = dandiset_url.split('/')[-1]
+    dandiset_directory = f'{output_dir}/{dandiset_id}'
+
+    if not os.path.exists(f'{dandiset_directory}/dataset_description.json'):
+        with open(f'{dandiset_directory}/dataset_description.json', 'w') as f:
+            f.write('{}')
+
+    archive_directory = f'{dandiset_directory}/sourcedata/sub-{subject}'
+    os.makedirs(archive_directory, exist_ok=True)
+
+    dat_files = list(Path(input_dir).glob("*.dat"))
+    dat_files_size = len(dat_files)
+    if dat_files_size:
+        print(f"Found {dat_files_size} .dat files in '{input_dir}'.")
+    else:
+        print(f"No .dat files found in '{input_dir}'.")
+        return
+
+    batch = 0
+    file_number = 0
+
+    while file_number < dat_files_size:
+
+        print(f"\n[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Creating archive batch {batch}")
+
+        archive_path = os.path.join(archive_directory, f"sub-{subject}_desc-batch{batch}.tar")
+        archive = tarfile.open(archive_path, "w")
+
+        batch_size = 0
+        batch_files = 0
+
+        while batch_size < max_size_bytes and file_number < dat_files_size:
+
+            file_path = dat_files[file_number]
+            file_size = os.path.getsize(file_path)
+
+            print(f"Adding '{file_path.name}' ({file_size/1024**2:.2f}MB, {file_number}) to archive.")
+            archive.add(file_path, arcname=file_path.name)
+
+            batch_size += file_size
+            batch_files += 1
+            file_number += 1
+
+        archive.close()
+
+        print(f"Archive created with {batch_files} files and {batch_size / 1024**2:.2f}MB size.")
+
+        if upload:
+            print(f"Uploading {archive_path}.")
+
+            if 'lincbrain.org' in dandiset_url:
+                dandi_instance = 'linc'
+            elif 'dandiarchive.org' in dandiset_url:
+                dandi_instance = 'dandi'
+            else:
+                raise ValueError(f"Unknown DANDI instance: {dandiset_url}")
+
+            success = False
+            while not success:
+                try:
+                    dandi.upload.upload([dandiset_directory],
+                                        dandi_instance=dandi_instance,
+                                        )
+                    success = True
+                    print("Upload successful.")
+                except Exception as e:
+                    print(f"Upload failed with error: {str(e)}")
+
+            os.remove(archive_path)
+
+        del archive
+        batch += 1
+
+        print(f"Progress: {file_number}/{dat_files_size} files processed ({file_number/dat_files_size*100:.2f}%).")
+
+    print(f"{file_number} files uploaded successfully.")
diff --git a/pyproject.toml b/pyproject.toml
@@ -31,17 +31,18 @@ zarr = "^2.0.0"
 nifti-zarr = "*"
 # optionals
 glymur = { version = "*", optional = true }
+dandi = { version = ">=0.68.1", optional = true }
 tifffile = { version = "*", optional = true }
 h5py = { version = "*", optional = true }
 scipy = { version = "*", optional = true }
 wkw = { version = "*", optional = true }
 
 [tool.poetry.extras]
 df = ["glymur"]
-lsm = ["tifffile"]
+lsm = ["dandi", "tifffile"]
 psoct = ["h5py", "scipy"]
 wk = ["wkw"]
-all = ["glymur", "tifffile", "h5py", "scipy", "wkw"]
+all = ["glymur", "dandi", "tifffile", "h5py", "scipy", "wkw"]
 
 [tool.poetry.group.dev]
 optional = true

diff --git a/tests/test_lsm.py b/tests/test_lsm.py
@@ -1,10 +1,13 @@
 from pathlib import Path
 
+import filecmp
 import numpy as np
+import os
+import tarfile
 import tifffile
 
 from helper import _cmp_zarr_archives
-from linc_convert.modalities.lsm import mosaic
+from linc_convert.modalities.lsm import mosaic, transfer
 
 
 def _write_test_data(directory: str) -> None:
@@ -26,3 +29,33 @@ def test_lsm(tmp_path):
     output_zarr = tmp_path / "output.zarr"
     mosaic.convert(str(tmp_path), out=str(output_zarr))
     assert _cmp_zarr_archives(str(output_zarr), "data/lsm.zarr.zip")
+
+def test_transfer():
+
+    input_dir = './000051/sourcedata/sub-test1'
+
+    transfer.dandi_transfer(input_dir=input_dir, 
+                            dandiset_url='https://lincbrain.org/dandiset/000051', 
+                            dandi_instance='linc', 
+                            subject='test1', 
+                            output_dir='.',  
+                            max_size_gb=0.02, 
+                            upload=False)
+
+    extract_dir = './sub-test1'
+    os.mkdir(extract_dir)
+
+    tar_files = list(Path(input_dir).glob("*.tar"))
+    for tar_file in tar_files:
+        with tarfile.open(tar_file, "r") as tar:
+            tar.extractall(path=extract_dir)
+        os.remove(tar_file)
+
+    dirs_cmp = filecmp.dircmp(input_dir, extract_dir)
+
+    assert len(dirs_cmp.left_only)==0 and len(dirs_cmp.right_only)==0, "Files do not match"
+
+    input_dir_size = sum(os.path.getsize(f) for f in os.listdir(input_dir) if os.path.isfile(f))
+    extract_dir_size = sum(os.path.getsize(f) for f in os.listdir(extract_dir) if os.path.isfile(f))
+
+    assert input_dir_size == extract_dir_size, "File sizes do not match"