lincbrain · calvinchai · Apr 1, 2025 · Apr 3, 2025 · Apr 3, 2025 · Apr 3, 2025
diff --git a/.gitignore b/.gitignore
@@ -157,4 +157,6 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
+
+tests/data/000051/
diff --git a/conda.yaml b/conda.yaml
@@ -2,16 +2,14 @@ name: linc-convert
 channels:
   - conda-forge
 dependencies:
-  - python=3.10
+  - python
   - ipython
   - typer
   - numpy
   - glymur
   - zarr
   - nibabel
   - tifffile
-  - wkw
-  - tensorstore
   - pytest
   - ruff
   - tifffile

diff --git a/linc_convert/__init__.py b/linc_convert/__init__.py
@@ -1,4 +1,5 @@
 """Data conversion tools for the LINC project."""
 
 __all__ = ["modalities", "utils"]
+
 from . import modalities, utils
diff --git a/linc_convert/modalities/__init__.py b/linc_convert/modalities/__init__.py
@@ -1,4 +1,5 @@
 """Converters for all imaging modalities."""
 
 __all__ = ["df", "lsm", "wk", "psoct"]
+
 from . import df, lsm, psoct, wk
diff --git a/linc_convert/modalities/df/__init__.py b/linc_convert/modalities/df/__init__.py
@@ -4,6 +4,7 @@
     import glymur as _  # noqa: F401
 
     __all__ = ["cli", "multi_slice", "single_slice"]
+
     from . import cli, multi_slice, single_slice
 except ImportError:
     pass
diff --git a/linc_convert/modalities/df/multi_slice.py b/linc_convert/modalities/df/multi_slice.py
@@ -6,50 +6,44 @@
 """
 
 # stdlib
-import ast
 import json
 import os
+from typing import Unpack
 
 # externals
 import glymur
 import nibabel as nib
 import numpy as np
-import zarr
 from cyclopts import App
 
-
 # internals
-from linc_convert import utils
 from linc_convert.modalities.df.cli import df
-from linc_convert.utils.j2k import WrappedJ2K, get_pixelsize
+from linc_convert.utils.io.j2k import WrappedJ2K, get_pixelsize
+from linc_convert.utils.io.zarr import from_config
 from linc_convert.utils.math import ceildiv, floordiv
 from linc_convert.utils.orientation import center_affine, orientation_to_affine
-from linc_convert.utils.zarr.compressor import make_compressor
-from linc_convert.utils.zarr.zarr_config import ZarrConfig
+from linc_convert.utils.zarr_config import ZarrConfig, update_default_config
 
 HOME = "/space/aspasia/2/users/linc/000003"
 
 # Path to LincBrain dataset
 LINCSET = os.path.join(HOME, "sourcedata")
 LINCOUT = os.path.join(HOME, "rawdata")
 
-
 ms = App(name="multislice", help_format="markdown")
 df.command(ms)
 
 
 @ms.default
 def convert(
-    inp: list[str],
-    *,
-    out: str,
-    zarr_config: ZarrConfig = None,
-    max_load: int = 16384,
-    orientation: str = "coronal",
-    center: bool = True,
-    thickness: float | None = None,
-        **kwargs
-) -> None:
+        inp: list[str],
+        *,
+        zarr_config: ZarrConfig = None,
+        orientation: str = "coronal",
+        center: bool = True,
+        thickness: float | None = None,
+        **kwargs: Unpack[ZarrConfig],
+        ) -> None:
     """
     Convert JPEG2000 files generated by MBF-Neurolucida into a Zarr pyramid.
 
@@ -71,7 +65,7 @@ def convert(
     * the second letter corresponds to the vertical dimension and
       indicates the anatomical meaning of the _bottom_ of the jp2 image,
     * the third letter corresponds to the slice dimension and
-      indicates the anatomical meaninff of the _end_ of the stack.
+      indicates the anatomical meaning of the _end_ of the stack.
 
     We also provide the aliases
 
@@ -85,35 +79,17 @@ def convert(
     ----------
     inp
         Path to the input slices
-    out
-        Path to the output Zarr directory [<INP>.ome.zarr]
-    max_load
-        Maximum input chunk size
     orientation
         Orientation of the slice
     center
         Set RAS[0, 0, 0] at FOV center
     thickness
         Slice thickness
     """
-    zarr_config = utils.zarr.zarr_config.update(zarr_config, **kwargs)
-    chunk: int = zarr_config.chunk[0]
-    compressor: str = zarr_config.compressor
-    compressor_opt: str = zarr_config.compressor_opt
-    nii: bool = zarr_config.nii
-
-    # Default output path
-    if not out:
-        out = os.path.splitext(inp[0])[0]
-        out += ".nii.zarr" if nii else ".ome.zarr"
-    nii = nii or out.endswith(".nii.zarr")
-
-    if isinstance(compressor_opt, str):
-        compressor_opt = ast.literal_eval(compressor_opt)
-
-    # Prepare Zarr group
-    omz = zarr.storage.DirectoryStore(out)
-    omz = zarr.group(store=omz, overwrite=True)
+    zarr_config = update_default_config(zarr_config, **kwargs)
+    zarr_config.set_default_name(os.path.splitext(inp[0])[0])
+    max_load = zarr_config.max_load
+    omz = from_config(zarr_config)
 
     nblevel, has_channel, dtype_jp2 = float("inf"), float("inf"), ""
 
@@ -132,24 +108,16 @@ def convert(
     if has_channel:
         new_size += (3,)
     print(len(inp), new_size, nblevel, has_channel)
-
-    # Prepare chunking options
-    opt = {
-        "chunks": list(new_size[2:]) + [1] + [chunk, chunk],
-        "dimension_separator": r"/",
-        "order": "F",
-        "dtype": dtype_jp2,
-        "fill_value": 0,
-        "compressor": make_compressor(compressor, **compressor_opt),
-    }
-    print(opt)
+    chunks = list(new_size[2:]) + [1] + list(zarr_config.chunk[-2:])
+    zarr_config.chunk = tuple(chunks)
     print(new_size)
     # Write each level
     for level in range(nblevel):
-        shape = [ceildiv(s, 2**level) for s in new_size[:2]]
+        shape = [ceildiv(s, 2 ** level) for s in new_size[:2]]
         shape = [new_size[2]] + [len(inp)] + shape
 
-        omz.create_dataset(f"{level}", shape=shape, **opt)
+        # omz.create_dataset(f"{level}", shape=shape, **opt)
+        omz.create_array(str(level), shape, dtype=dtype_jp2, zarr_config=zarr_config)
         array = omz[f"{level}"]
 
         # Write each slice
@@ -159,109 +127,67 @@ def convert(
             subdat = WrappedJ2K(j2k, level=level)
             subdat_size = subdat.shape
             print(
-                "Convert level",
-                level,
-                "with shape",
-                shape,
-                "for slice",
-                idx,
-                "with size",
-                subdat_size,
-            )
+                    "Convert level",
+                    level,
+                    "with shape",
+                    shape,
+                    "for slice",
+                    idx,
+                    "with size",
+                    subdat_size,
+                    )
 
             # offset while attaching
             x = floordiv(shape[-2] - subdat_size[-2], 2)
             y = floordiv(shape[-1] - subdat_size[-1], 2)
 
             for channel in range(3):
                 if max_load is None or (
-                    subdat_size[-2] < max_load and subdat_size[-1] < max_load
+                        subdat_size[-2] < max_load and subdat_size[-1] < max_load
                 ):
                     array[
-                        channel, idx, x : x + subdat_size[-2], y : y + subdat_size[-1]
-                    ] = subdat[channel : channel + 1, ...][0]
+                    channel, idx, x: x + subdat_size[-2], y: y + subdat_size[-1]
+                    ] = subdat[channel: channel + 1, ...][0]
                 else:
                     ni = ceildiv(subdat_size[-2], max_load)
                     nj = ceildiv(subdat_size[-1], max_load)
 
                     for i in range(ni):
                         for j in range(nj):
-                            print(f"\r{i+1}/{ni}, {j+1}/{nj}", end=" ")
+                            print(f"\r{i + 1}/{ni}, {j + 1}/{nj}", end=" ")
                             start_x, end_x = (
                                 i * max_load,
                                 min((i + 1) * max_load, subdat_size[-2]),
-                            )
+                                )
                             start_y, end_y = (
                                 j * max_load,
                                 min((j + 1) * max_load, subdat_size[-1]),
-                            )
+                                )
 
                             array[
-                                channel,
-                                idx,
-                                x + start_x : x + end_x,
-                                y + start_y : y + end_y,
+                            channel,
+                            idx,
+                            x + start_x: x + end_x,
+                            y + start_y: y + end_y,
                             ] = subdat[
-                                channel : channel + 1,
+                                channel: channel + 1,
                                 start_x:end_x,
                                 start_y:end_y,
-                            ][0]
+                                ][0]
 
                     print("")
 
     # Write OME-Zarr multiscale metadata
     print("Write metadata")
-    multiscales = [
-        {
-            "version": "0.4",
-            "axes": [
-                {"name": "z", "type": "space", "unit": "micrometer"},
-                {"name": "y", "type": "distance", "unit": "micrometer"},
-                {"name": "x", "type": "space", "unit": "micrometer"},
-            ],
-            "datasets": [],
-            "type": "jpeg2000",
-            "name": "",
-        }
-    ]
+    axes = ["z", "y", "x"]
     if has_channel:
-        multiscales[0]["axes"].insert(0, {"name": "c", "type": "channel"})
-
-    for n in range(nblevel):
-        shape0 = omz["0"].shape[-2:]
-        shape = omz[str(n)].shape[-2:]
-        multiscales[0]["datasets"].append({})
-        level = multiscales[0]["datasets"][-1]
-        level["path"] = str(n)
-
-        # I assume that wavelet transforms end up aligning voxel edges
-        # across levels, so the effective scaling is the shape ratio,
-        # and there is a half voxel shift wrt to the "center of first voxel"
-        # frame
-        level["coordinateTransformations"] = [
-            {
-                "type": "scale",
-                "scale": [1.0] * has_channel
-                + [
-                    1.0,
-                    (shape0[0] / shape[0]) * vxh,
-                    (shape0[1] / shape[1]) * vxw,
-                ],
-            },
-            {
-                "type": "translation",
-                "translation": [0.0] * has_channel
-                + [
-                    0.0,
-                    (shape0[0] / shape[0] - 1) * vxh * 0.5,
-                    (shape0[1] / shape[1] - 1) * vxw * 0.5,
-                ],
-            },
-        ]
-    multiscales[0]["coordinateTransformations"] = [
-        {"scale": [1.0] * (3 + has_channel), "type": "scale"}
-    ]
-    omz.attrs["multiscales"] = multiscales
+        axes.insert(0, "c")
+    omz.write_ome_metadata(
+            axes=axes,
+            space_scale=[1.0] + list(get_pixelsize(j2k)),
+            multiscales_type="jpeg2000",
+            no_pool=0,
+            )
 
     # Write NIfTI-Zarr header
     # NOTE: we use nifti2 because dimensions typically do not fit in a short
@@ -280,19 +206,10 @@ def convert(
     header.set_sform(affine)
     header.set_xyzt_units(nib.nifti1.unit_codes.code["micron"])
     header.structarr["magic"] = b"n+2\0"
-    header = np.frombuffer(header.structarr.tobytes(), dtype="u1")
-    opt = {
-        "chunks": [len(header)],
-        "dimension_separator": r"/",
-        "order": "F",
-        "dtype": "|u1",
-        "fill_value": None,
-        "compressor": None,
-    }
-    omz.create_dataset("nifti", data=header, shape=shape, **opt)
+    omz.write_nifti_header(header)
 
     # Write sidecar .json file
-    json_name = os.path.splitext(out)[0]
+    json_name = os.path.splitext(zarr_config.out)[0]
     json_name += ".json"
     dic = {}
     dic["PixelSize"] = json.dumps([vxw, vxh])