Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
84 commits
Select commit Hold shift + click to select a range
a1f81aa
Create module to package spool files into tar and upload to DANDI
kabilar May 13, 2025
8114a91
Remove print statement
kabilar May 13, 2025
07aa8e5
Update init
kabilar May 14, 2025
ad3dd76
Update default value
kabilar May 15, 2025
05284de
Revert gzip usage
kabilar May 15, 2025
ffce115
Add API key to GitHub Action
kabilar May 16, 2025
9f143aa
Add test for `transfer.py` module
kabilar May 16, 2025
41dc406
Merge branch 'main' of https://github.com/lincbrain/linc-convert into…
kabilar May 16, 2025
a6938de
Add manual trigger for CI
kabilar May 16, 2025
fe7043d
Add `dandi` to dependencies
kabilar May 16, 2025
53e3397
Fix pyproject file
kabilar May 16, 2025
b27dfa7
Add `upload` flag
kabilar May 16, 2025
3b26e22
Fix `linc-convert` call
kabilar May 16, 2025
b67cbd2
Try different order in github action
kabilar May 16, 2025
692ef28
Revert order change
kabilar May 16, 2025
8029ed3
Update subprocess call
kabilar May 16, 2025
638946c
Fix subprocess call
kabilar May 16, 2025
83e1bad
Wait until subprocess finishes
kabilar May 16, 2025
b438146
Only remove the `tar` if uploading
kabilar May 16, 2025
40c7827
Fix CI
kabilar May 16, 2025
bd38d6c
Fix subprocess call
kabilar May 16, 2025
bfe7ab2
Define API key explicitly
kabilar May 16, 2025
0475c78
Fix CI
kabilar May 16, 2025
b7746e2
[WIP] Add print statement
kabilar May 16, 2025
b91ce99
Revert CI change
kabilar May 16, 2025
9191f15
Update subprocess call
kabilar May 16, 2025
2f7792b
Add print statement for subprocess call
kabilar May 16, 2025
aa61768
Clean up test
kabilar May 16, 2025
85531cb
Fix subprocess call
kabilar May 16, 2025
300c0b1
Update subprocess call
kabilar May 16, 2025
560e5c0
Switch from `popen` to `run`
kabilar May 16, 2025
bdafa32
Pass environment variable to subprocess call
kabilar May 16, 2025
3d5f61b
[WIP] Use Python module instead of CLI
kabilar May 17, 2025
81acaae
[WIP] Try authenticating with `DandiAPIClient`
kabilar May 17, 2025
3d332c7
[WIP] Try authenticating with `DandiAPIClient`
kabilar May 17, 2025
ba3719d
[WIP] Explicitly pass token
kabilar May 17, 2025
0b94fde
Add print statement (TODO: refresh API token)
kabilar May 17, 2025
8415680
Update ci
kabilar May 17, 2025
2edfac4
Remove print statements after API key refresh
kabilar May 17, 2025
0aa4fb7
Fix ci
kabilar May 17, 2025
5113e9c
[WIP] Try using `pytest-env`
kabilar May 17, 2025
d534d4f
Update `DandiAPIClient` call
kabilar May 17, 2025
73fb121
Fix pyproject file
kabilar May 17, 2025
4e46ee2
Revert changes
kabilar May 17, 2025
3d15db3
Update pytest call
kabilar May 17, 2025
88f2130
Add back DandiAPIClient call
kabilar May 17, 2025
dc2a5cc
Update ci
kabilar May 17, 2025
8da0196
Revert pyproject pytest changes
kabilar May 17, 2025
4937101
Switch to downloading the data within the workflow
kabilar May 17, 2025
0a9e79c
Fix ci
kabilar May 17, 2025
ec900cd
Fix ci
kabilar May 17, 2025
a85a862
Fix ci
kabilar May 17, 2025
65189fb
Fix ci
kabilar May 17, 2025
c198bfc
Fix ci
kabilar May 17, 2025
66d96a7
Fix ci
kabilar May 17, 2025
91f8099
Fix ci
kabilar May 17, 2025
0991eb9
Fix ci
kabilar May 17, 2025
6f79772
Fix ci
kabilar May 17, 2025
4604aba
Fix ci
kabilar May 17, 2025
dce4c71
Switch to using S3 bucket
kabilar May 17, 2025
b561fa2
Fix ci
kabilar May 17, 2025
4cd8a74
Fix ci
kabilar May 17, 2025
c4116bc
Fix ci
kabilar May 17, 2025
ef1517a
Remove os.remove call
kabilar May 17, 2025
405cfc5
Fix directory
kabilar May 17, 2025
53b8cd8
Switch to downloading data with dandi Python API
kabilar May 17, 2025
e084266
Add api key and trigger
kabilar May 17, 2025
328e51b
Fix ci
kabilar May 17, 2025
48d21b1
Switch argument name and remove subprocess call
kabilar May 17, 2025
5fc47fc
Add print statements
kabilar May 17, 2025
e03f0aa
Add print statement
kabilar May 17, 2025
b0917af
Update print statements
kabilar May 17, 2025
5879286
Change path handling
kabilar May 17, 2025
7102116
Update print statement
kabilar May 17, 2025
a3cadb3
Fix path handling
kabilar May 17, 2025
e6fd12a
Update print statement
kabilar May 17, 2025
13006c2
Remove extra os.remove call
kabilar May 17, 2025
143e0b0
Revert changes
kabilar May 17, 2025
331e510
Remove print statements
kabilar May 17, 2025
ddf7ad0
Remove extra imports
kabilar May 17, 2025
8fb8c3c
Merge branch 'main' of https://github.com/kabilar/linc-convert into l…
kabilar May 17, 2025
46c5bef
Parse dandi instance from dandiset url
kabilar Jun 12, 2025
367ba0b
Pin `dandi` to minimum version
kabilar Jun 12, 2025
d044e40
Add a loop to continually attempt the data upload until successful
kabilar Jun 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
name: CI
on: pull_request
on:
pull_request:
push:
branches:
- main
workflow_dispatch:

jobs:
ci:
Expand Down Expand Up @@ -36,5 +41,7 @@ jobs:
- name: Install the project dependencies
run: poetry install --with dev --extras all
- name: Run the automated tests
env:
DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }}
run: poetry run pytest -v
working-directory: ./tests
4 changes: 2 additions & 2 deletions linc_convert/modalities/lsm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
try:
import tifffile as _ # noqa: F401

__all__ = ["cli", "mosaic", "multi_slice", "spool"]
from . import cli, mosaic, multi_slice, spool
__all__ = ["cli", "mosaic", "multi_slice", "spool", "transfer"]
from . import cli, mosaic, multi_slice, spool, transfer
except ImportError:
pass
120 changes: 120 additions & 0 deletions linc_convert/modalities/lsm/transfer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
"""
Convert a collection of spool .dat files generated by light sheet microscopy to compressed tar files and upload with the DANDI client.
"""

# stdlib
import os
from datetime import datetime
from pathlib import Path

# externals
import tarfile
import cyclopts
import dandi.download
import dandi.upload

# internals
from linc_convert.modalities.lsm.cli import lsm

transfer = cyclopts.App(name="transfer", help_format="markdown")
lsm.command(transfer)

@transfer.default
def dandi_transfer(input_dir, dandiset_url, subject, output_dir='.', max_size_gb=2.00, upload=False):
"""
Upload .dat files to DANDI in batched, compressed tar archives.

Parameters
----------
input_dir : str
Directory containing .dat files to upload
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An alternative could be for the input to be a list of files, rather than doing the glob ourselves in the function. Dat files can still be filtered using a command-line glob: linc-convert lsm transfer path/to/dir/*.dat. And it would allows taring non-dat files if needed.

But I understand that the directory-based interface might be easier to use for Emin et al.

dandiset_url : str
URL for the dandiset to upload (e.g., https://lincbrain.org/dandiset/000010)
output_dir : str, optional
Directory to save the Dandiset directory (default: '.')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We delete everyting in the end, right? Should we just use a tempfile.TemporaryDirectory instead of asking the user for a location?

max_size_gb : float, optional
Maximum size for each archive in GB (default: 2)
upload : bool, optional
Upload data to DANDI (default: True)
"""

max_size_bytes = int(max_size_gb * 1024 * 1024 * 1024)

dandi.download.download(dandiset_url, output_dir=output_dir)

dandiset_id = dandiset_url.split('/')[-1]
dandiset_directory = f'{output_dir}/{dandiset_id}'

if not os.path.exists(f'{dandiset_directory}/dataset_description.json'):
with open(f'{dandiset_directory}/dataset_description.json', 'w') as f:
f.write('{}')

archive_directory = f'{dandiset_directory}/sourcedata/sub-{subject}'
os.makedirs(archive_directory, exist_ok=True)

dat_files = list(Path(input_dir).glob("*.dat"))
dat_files_size = len(dat_files)
if dat_files_size:
print(f"Found {dat_files_size} .dat files in '{input_dir}'.")
else:
print(f"No .dat files found in '{input_dir}'.")
return

batch = 0
file_number = 0

while file_number < dat_files_size:

print(f"\n[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Creating archive batch {batch}")

archive_path = os.path.join(archive_directory, f"sub-{subject}_desc-batch{batch}.tar")
archive = tarfile.open(archive_path, "w")

batch_size = 0
batch_files = 0

while batch_size < max_size_bytes and file_number < dat_files_size:

file_path = dat_files[file_number]
file_size = os.path.getsize(file_path)

print(f"Adding '{file_path.name}' ({file_size/1024**2:.2f}MB, {file_number}) to archive.")
archive.add(file_path, arcname=file_path.name)

batch_size += file_size
batch_files += 1
file_number += 1

archive.close()

print(f"Archive created with {batch_files} files and {batch_size / 1024**2:.2f}MB size.")

if upload:
print(f"Uploading {archive_path}.")

if 'lincbrain.org' in dandiset_url:
dandi_instance = 'linc'
elif 'dandiarchive.org' in dandiset_url:
dandi_instance = 'dandi'
else:
raise ValueError(f"Unknown DANDI instance: {dandiset_url}")

success = False
while not success:
try:
dandi.upload.upload([dandiset_directory],
dandi_instance=dandi_instance,
)
success = True
print("Upload successful.")
except Exception as e:
print(f"Upload failed with error: {str(e)}")

os.remove(archive_path)

del archive
batch += 1

print(f"Progress: {file_number}/{dat_files_size} files processed ({file_number/dat_files_size*100:.2f}%).")

print(f"{file_number} files uploaded successfully.")
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,18 @@ zarr = "^2.0.0"
nifti-zarr = "*"
# optionals
glymur = { version = "*", optional = true }
dandi = { version = ">=0.68.1", optional = true }
tifffile = { version = "*", optional = true }
h5py = { version = "*", optional = true }
scipy = { version = "*", optional = true }
wkw = { version = "*", optional = true }

[tool.poetry.extras]
df = ["glymur"]
lsm = ["tifffile"]
lsm = ["dandi", "tifffile"]
psoct = ["h5py", "scipy"]
wk = ["wkw"]
all = ["glymur", "tifffile", "h5py", "scipy", "wkw"]
all = ["glymur", "dandi", "tifffile", "h5py", "scipy", "wkw"]

[tool.poetry.group.dev]
optional = true
Expand Down
35 changes: 34 additions & 1 deletion tests/test_lsm.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from pathlib import Path

import filecmp
import numpy as np
import os
import tarfile
import tifffile

from helper import _cmp_zarr_archives
from linc_convert.modalities.lsm import mosaic
from linc_convert.modalities.lsm import mosaic, transfer


def _write_test_data(directory: str) -> None:
Expand All @@ -26,3 +29,33 @@ def test_lsm(tmp_path):
output_zarr = tmp_path / "output.zarr"
mosaic.convert(str(tmp_path), out=str(output_zarr))
assert _cmp_zarr_archives(str(output_zarr), "data/lsm.zarr.zip")

def test_transfer():

input_dir = './000051/sourcedata/sub-test1'

transfer.dandi_transfer(input_dir=input_dir,
dandiset_url='https://lincbrain.org/dandiset/000051',
dandi_instance='linc',
subject='test1',
output_dir='.',
max_size_gb=0.02,
upload=False)

extract_dir = './sub-test1'
os.mkdir(extract_dir)

tar_files = list(Path(input_dir).glob("*.tar"))
for tar_file in tar_files:
with tarfile.open(tar_file, "r") as tar:
tar.extractall(path=extract_dir)
os.remove(tar_file)

dirs_cmp = filecmp.dircmp(input_dir, extract_dir)

assert len(dirs_cmp.left_only)==0 and len(dirs_cmp.right_only)==0, "Files do not match"

input_dir_size = sum(os.path.getsize(f) for f in os.listdir(input_dir) if os.path.isfile(f))
extract_dir_size = sum(os.path.getsize(f) for f in os.listdir(extract_dir) if os.path.isfile(f))

assert input_dir_size == extract_dir_size, "File sizes do not match"
Loading