From 8b49a36b047e373ec595b0fefd705c72a664563f Mon Sep 17 00:00:00 2001 From: Isaac To Date: Mon, 3 Nov 2025 16:18:56 -0800 Subject: [PATCH 01/10] feat: redefine private attributes as properties and adjust their type annotations Redefine `_parent_run_directory` and `_run_directory` of `RunConfig` as private properties since they are computed based on the value of a field of the model and are never modified. Additionally, `None` type annotations are removed from these properties for they are really never `None` --- src/nwb2bids/_converters/_run_config.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/nwb2bids/_converters/_run_config.py b/src/nwb2bids/_converters/_run_config.py index 8e63b167..ca838596 100644 --- a/src/nwb2bids/_converters/_run_config.py +++ b/src/nwb2bids/_converters/_run_config.py @@ -40,8 +40,6 @@ class RunConfig(pydantic.BaseModel): file_mode: typing.Literal["move", "copy", "symlink", "auto"] = "auto" cache_directory: pydantic.DirectoryPath | None = None run_id: str | None = None - _parent_run_directory: pathlib.Path | None = pydantic.PrivateAttr(default=None) - _run_directory: pathlib.Path | None = pydantic.PrivateAttr(default=None) model_config = pydantic.ConfigDict( validate_assignment=True, # Re-validate model on mutation @@ -60,14 +58,24 @@ def model_post_init(self, context: typing.Any, /) -> None: if self.cache_directory is None: self.cache_directory = _get_home_directory() - self._parent_run_directory = self.cache_directory / "runs" + + # Ensure run directory and its parent exist self._parent_run_directory.mkdir(exist_ok=True) - self._run_directory = self._parent_run_directory / self.run_id self._run_directory.mkdir(exist_ok=True) self.notifications_file_path.touch() self.notifications_json_file_path.touch() + @property + def _parent_run_directory(self) -> pathlib.Path: + """The parent directory where all run-specific directories are stored.""" + return self.cache_directory / "runs" + + @property + def _run_directory(self) -> pathlib.Path: + """The directory specific to this run.""" + return self._parent_run_directory / self.run_id + @pydantic.computed_field @property def notifications_file_path(self) -> pathlib.Path: From 0091975a46247d3f9c001edbe2df0d06fe9c7925 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Mon, 3 Nov 2025 17:02:03 -0800 Subject: [PATCH 02/10] feat: default `run_id` through default factory and remove `None` type annotation Set the default of `run_id` using the default factory mechanism. This allows a more declarative definition of the model and `model_post_init` should be reserved for setting something that requires multi-field access. Additionally, the `None` type annotation is removed. `run_id` was never `None` once it was set. The `None` value was only used as an indication of absence of user provided value and to signal the setting of a default. Setting the default of `run_id` using the default factory mechanism obviates the `None` type annotation. --- src/nwb2bids/_converters/_run_config.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/nwb2bids/_converters/_run_config.py b/src/nwb2bids/_converters/_run_config.py index ca838596..73cefce1 100644 --- a/src/nwb2bids/_converters/_run_config.py +++ b/src/nwb2bids/_converters/_run_config.py @@ -39,7 +39,7 @@ class RunConfig(pydantic.BaseModel): additional_metadata_file_path: pydantic.FilePath | None = None file_mode: typing.Literal["move", "copy", "symlink", "auto"] = "auto" cache_directory: pydantic.DirectoryPath | None = None - run_id: str | None = None + run_id: typing.Annotated[str, pydantic.Field(default_factory=_generate_run_id)] model_config = pydantic.ConfigDict( validate_assignment=True, # Re-validate model on mutation @@ -47,8 +47,6 @@ class RunConfig(pydantic.BaseModel): def model_post_init(self, context: typing.Any, /) -> None: """Ensure the various run directories and files are created.""" - if self.run_id is None: - self.run_id = _generate_run_id() if self.bids_directory is None: self.bids_directory = pathlib.Path.cwd() From 649415e73eb19a53b0b4aec12a0fa8d84a0ce242 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Mon, 3 Nov 2025 17:15:02 -0800 Subject: [PATCH 03/10] feat: default `cache_directory` through default factory and remove `None` type annotation Set the default of `cache_directory` using the default factory mechanism. This allows a more declarative definition of the model and `model_post_init` should be reserved for setting something that requires multi-field access. Additionally, the `None` type annotation is removed. `cache_directory` was never `None` once it was set. The `None` value was only used as an indication of absence of user provided value and to signal the setting of a default. Setting the default of `cache_directory` using the default factory mechanism obviates the `None` type annotation. --- src/nwb2bids/_converters/_run_config.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/nwb2bids/_converters/_run_config.py b/src/nwb2bids/_converters/_run_config.py index 73cefce1..2d4512d3 100644 --- a/src/nwb2bids/_converters/_run_config.py +++ b/src/nwb2bids/_converters/_run_config.py @@ -38,7 +38,7 @@ class RunConfig(pydantic.BaseModel): bids_directory: pydantic.DirectoryPath | None = None additional_metadata_file_path: pydantic.FilePath | None = None file_mode: typing.Literal["move", "copy", "symlink", "auto"] = "auto" - cache_directory: pydantic.DirectoryPath | None = None + cache_directory: typing.Annotated[pydantic.DirectoryPath, pydantic.Field(default_factory=_get_home_directory)] run_id: typing.Annotated[str, pydantic.Field(default_factory=_generate_run_id)] model_config = pydantic.ConfigDict( @@ -54,9 +54,6 @@ def model_post_init(self, context: typing.Any, /) -> None: self._validate_existing_directory_as_bids() self._determine_file_mode() - if self.cache_directory is None: - self.cache_directory = _get_home_directory() - # Ensure run directory and its parent exist self._parent_run_directory.mkdir(exist_ok=True) self._run_directory.mkdir(exist_ok=True) From 60fa15d5b2dac43ece75462fd5d171ceecf216b6 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Mon, 3 Nov 2025 17:28:24 -0800 Subject: [PATCH 04/10] feat: default `bids_directory` through default factory and remove `None` type annotation Set the default of `bids_directory` using the default factory mechanism. This allows a more declarative definition of the model and `model_post_init` should be reserved for setting something that requires multi-field access. Additionally, the `None` type annotation is removed. `bids_directory` was never `None` once it was set. The `None` value was only used as an indication of absence of user provided value and to signal the setting of a default. Setting the default of `bids_directory` using the default factory mechanism obviates the `None` type annotation. --- src/nwb2bids/_converters/_run_config.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/nwb2bids/_converters/_run_config.py b/src/nwb2bids/_converters/_run_config.py index 2d4512d3..1b644699 100644 --- a/src/nwb2bids/_converters/_run_config.py +++ b/src/nwb2bids/_converters/_run_config.py @@ -35,7 +35,7 @@ class RunConfig(pydantic.BaseModel): The default ID uses runtime timestamp information of the form "date-%Y%m%d_time-%H%M%S." """ - bids_directory: pydantic.DirectoryPath | None = None + bids_directory: typing.Annotated[pydantic.DirectoryPath, pydantic.Field(default_factory=pathlib.Path.cwd)] additional_metadata_file_path: pydantic.FilePath | None = None file_mode: typing.Literal["move", "copy", "symlink", "auto"] = "auto" cache_directory: typing.Annotated[pydantic.DirectoryPath, pydantic.Field(default_factory=_get_home_directory)] @@ -48,9 +48,6 @@ class RunConfig(pydantic.BaseModel): def model_post_init(self, context: typing.Any, /) -> None: """Ensure the various run directories and files are created.""" - if self.bids_directory is None: - self.bids_directory = pathlib.Path.cwd() - self._validate_existing_directory_as_bids() self._determine_file_mode() From 7db118ea20737af506273d2fbd0c8e4b9802a773 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Tue, 4 Nov 2025 10:04:56 -0800 Subject: [PATCH 05/10] feat: default `file_mode` through default factory and remove its `"auto"` value option Set the default of `file_mode` using the default factory mechanism. This allows a more declarative definition of the model and `model_post_init` should be reserved for setting something that requires multi-field access. Additionally, the `"auto"` value option is removed. `file_mode` was never `"auto"` once it was set. The `"auto"` value was only used as an indication of absence of user provided value and to signal the setting of a default. Setting the default of `file_mode` using the default factory mechanism obviates the `"auto"` value option and allows the type annotation of the `file_mode` field to be consistent with the possible values that it holds in operation. --- src/nwb2bids/_converters/_run_config.py | 35 +++++-------------------- src/nwb2bids/_core/_file_mode.py | 25 ++++++++++++++++++ 2 files changed, 32 insertions(+), 28 deletions(-) create mode 100644 src/nwb2bids/_core/_file_mode.py diff --git a/src/nwb2bids/_converters/_run_config.py b/src/nwb2bids/_converters/_run_config.py index 1b644699..6c663078 100644 --- a/src/nwb2bids/_converters/_run_config.py +++ b/src/nwb2bids/_converters/_run_config.py @@ -1,10 +1,10 @@ import json import pathlib -import tempfile import typing import pydantic +from .._core._file_mode import _determine_file_mode from .._core._home import _get_home_directory from .._core._run_id import _generate_run_id @@ -19,12 +19,13 @@ class RunConfig(pydantic.BaseModel): additional_metadata_file_path : file path, optional The path to a YAML file containing additional metadata not included within the NWB files that you wish to include in the BIDS dataset. - file_mode : one of "move", "copy", "symlink", or "auto", default: "auto" + file_mode : one of "move", "copy", or "symlink" Specifies how to handle the NWB files when converting to BIDS format. - "move": Move the files to the BIDS directory. - "copy": Copy the files to the BIDS directory. - "symlink": Create symbolic links to the files in the BIDS directory. - - "auto": Decides between all the above based on the system, with preference for linking when possible. + - if not specified, decide between all the above based on the system, + with preference for linking when possible. cache_directory : directory path The directory where run specific files (e.g., notifications, sanitization reports) will be stored. Defaults to `~/.nwb2bids`. @@ -37,7 +38,9 @@ class RunConfig(pydantic.BaseModel): bids_directory: typing.Annotated[pydantic.DirectoryPath, pydantic.Field(default_factory=pathlib.Path.cwd)] additional_metadata_file_path: pydantic.FilePath | None = None - file_mode: typing.Literal["move", "copy", "symlink", "auto"] = "auto" + file_mode: typing.Annotated[ + typing.Literal["move", "copy", "symlink"], pydantic.Field(default_factory=_determine_file_mode) + ] cache_directory: typing.Annotated[pydantic.DirectoryPath, pydantic.Field(default_factory=_get_home_directory)] run_id: typing.Annotated[str, pydantic.Field(default_factory=_generate_run_id)] @@ -49,7 +52,6 @@ def model_post_init(self, context: typing.Any, /) -> None: """Ensure the various run directories and files are created.""" self._validate_existing_directory_as_bids() - self._determine_file_mode() # Ensure run directory and its parent exist self._parent_run_directory.mkdir(exist_ok=True) @@ -110,26 +112,3 @@ def _validate_existing_directory_as_bids(self) -> None: "missing 'BIDSVersion' in 'dataset_description.json'." ) raise ValueError(message) - - def _determine_file_mode(self) -> None: - if self.file_mode != "auto": - return - - with tempfile.TemporaryDirectory(prefix="nwb2bids-") as temp_dir_str: - temp_dir_path = pathlib.Path(temp_dir_str) - - # Create a test file to determine if symlinks are supported - test_file_path = temp_dir_path / "test_file.txt" - test_file_path.touch() - - try: - # Create a symlink to the test file - (temp_dir_path / "test_symlink.txt").symlink_to(target=test_file_path) - except (OSError, PermissionError, NotImplementedError): # Windows can sometimes have trouble with symlinks - # TODO: log a INFO message here when logging is set up - self.file_mode = "copy" - return - else: - # If symlink creation was successful, return "symlink" - self.file_mode = "symlink" - return diff --git a/src/nwb2bids/_core/_file_mode.py b/src/nwb2bids/_core/_file_mode.py new file mode 100644 index 00000000..1391d5f5 --- /dev/null +++ b/src/nwb2bids/_core/_file_mode.py @@ -0,0 +1,25 @@ +import pathlib +import tempfile +import typing + + +def _determine_file_mode() -> typing.Literal["symlink", "copy"]: + """ + Determine what file mode to use in creating a BIDS dataset based on the system + """ + with tempfile.TemporaryDirectory(prefix="nwb2bids-") as temp_dir_str: + temp_dir_path = pathlib.Path(temp_dir_str) + + # Create a test file to determine if symlinks are supported + test_file_path = temp_dir_path / "test_file.txt" + test_file_path.touch() + + try: + # Create a symlink to the test file + (temp_dir_path / "test_symlink.txt").symlink_to(target=test_file_path) + except (OSError, PermissionError, NotImplementedError): # Windows can sometimes have trouble with symlinks + # TODO: log a INFO message here when logging is set up + return "copy" + else: + # If symlink creation was successful, return "symlink" + return "symlink" From 74b2be2c854133d7d6248e613367d4575582e7fc Mon Sep 17 00:00:00 2001 From: Isaac To Date: Tue, 4 Nov 2025 12:16:58 -0800 Subject: [PATCH 06/10] feat: modify `_validate_existing_directory_as_bids` into a field validator Doing it this way makes the definition of the model more declarative and also avoids using `model_post_init` which should be preserved for validations that requires access to multiple fields of the model --- src/nwb2bids/_converters/_run_config.py | 82 +++++++++++++++---------- 1 file changed, 49 insertions(+), 33 deletions(-) diff --git a/src/nwb2bids/_converters/_run_config.py b/src/nwb2bids/_converters/_run_config.py index 6c663078..1f44d407 100644 --- a/src/nwb2bids/_converters/_run_config.py +++ b/src/nwb2bids/_converters/_run_config.py @@ -9,6 +9,49 @@ from .._core._run_id import _generate_run_id +def _validate_existing_directory_as_bids(directory: pathlib.Path) -> pathlib.Path: + + dataset_description_file_path = directory / "dataset_description.json" + current_directory_contents = {path.stem for path in directory.iterdir() if not path.name.startswith(".")} - { + "README", + "CHANGES", + "derivatives", + "dandiset", + } + + if len(current_directory_contents) == 0: + # The directory is considered empty, not containing any meaningful content. + # Populate the directory with `dataset_description.json` to make it + # a valid (though minimal) BIDS dataset. + + default_dataset_description = {"BIDSVersion": "1.10"} + with dataset_description_file_path.open(mode="w") as file_stream: + json.dump(obj=default_dataset_description, fp=file_stream, indent=4) + else: + # The directory is considered non-empty + + if not dataset_description_file_path.exists(): + # The directory is without `dataset_description.json` + # It is an invalid BIDS dataset. + + message = ( + f"The directory ({directory}) exists and is not empty, but is not a valid BIDS dataset: " + "missing 'dataset_description.json'." + ) + raise ValueError(message) + + with dataset_description_file_path.open(mode="r") as file_stream: + dataset_description = json.load(fp=file_stream) + if dataset_description.get("BIDSVersion", None) is None: + message = ( + f"The directory ({directory}) exists but is not a valid BIDS dataset: " + "missing 'BIDSVersion' in 'dataset_description.json'." + ) + raise ValueError(message) + + return directory + + class RunConfig(pydantic.BaseModel): """ Specifies configuration options for a single run of NWB to BIDS conversion. @@ -36,7 +79,11 @@ class RunConfig(pydantic.BaseModel): The default ID uses runtime timestamp information of the form "date-%Y%m%d_time-%H%M%S." """ - bids_directory: typing.Annotated[pydantic.DirectoryPath, pydantic.Field(default_factory=pathlib.Path.cwd)] + bids_directory: typing.Annotated[ + pydantic.DirectoryPath, + pydantic.Field(default_factory=pathlib.Path.cwd), + pydantic.AfterValidator(_validate_existing_directory_as_bids), + ] additional_metadata_file_path: pydantic.FilePath | None = None file_mode: typing.Annotated[ typing.Literal["move", "copy", "symlink"], pydantic.Field(default_factory=_determine_file_mode) @@ -46,12 +93,10 @@ class RunConfig(pydantic.BaseModel): model_config = pydantic.ConfigDict( validate_assignment=True, # Re-validate model on mutation + validate_default=True, # Validate default values as well ) def model_post_init(self, context: typing.Any, /) -> None: - """Ensure the various run directories and files are created.""" - - self._validate_existing_directory_as_bids() # Ensure run directory and its parent exist self._parent_run_directory.mkdir(exist_ok=True) @@ -83,32 +128,3 @@ def notifications_json_file_path(self) -> pathlib.Path: """The file path leading to a JSON dump of the notifications.""" notifications_file_path = self._run_directory / f"{self.run_id}_notifications.json" return notifications_file_path - - def _validate_existing_directory_as_bids(self) -> None: - bids_directory = self.bids_directory - - dataset_description_file_path = bids_directory / "dataset_description.json" - current_directory_contents = { - path.stem for path in bids_directory.iterdir() if not path.name.startswith(".") - } - {"README", "CHANGES", "derivatives", "dandiset"} - if len(current_directory_contents) == 0: - default_dataset_description = {"BIDSVersion": "1.10"} - with dataset_description_file_path.open(mode="w") as file_stream: - json.dump(obj=default_dataset_description, fp=file_stream, indent=4) - return - - if not dataset_description_file_path.exists(): - message = ( - f"The directory ({bids_directory}) exists and is not empty, but is not a valid BIDS dataset: " - "missing 'dataset_description.json'." - ) - raise ValueError(message) - - with dataset_description_file_path.open(mode="r") as file_stream: - dataset_description = json.load(fp=file_stream) - if dataset_description.get("BIDSVersion", None) is None: - message = ( - f"The directory ({bids_directory}) exists but is not a valid BIDS dataset: " - "missing 'BIDSVersion' in 'dataset_description.json'." - ) - raise ValueError(message) From 27f6ad8f6021162523660b01d60ce77a46d62e39 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Tue, 4 Nov 2025 12:30:13 -0800 Subject: [PATCH 07/10] feat: filter values of CLI arguments to set `RunConfig` Doing this allows CLI arguments to remain the same to users while making the `RunConfig` more succinct. For example, `RunConfig.run_id` can now be of type `str` instead of `str | None` even though it doesn't hold a value of `None` in operation. --- src/nwb2bids/_command_line_interface/_main.py | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/nwb2bids/_command_line_interface/_main.py b/src/nwb2bids/_command_line_interface/_main.py index af77a076..5b6e46ba 100644 --- a/src/nwb2bids/_command_line_interface/_main.py +++ b/src/nwb2bids/_command_line_interface/_main.py @@ -79,13 +79,21 @@ def _run_convert_nwb_dataset( raise ValueError(message) handled_nwb_paths = [pathlib.Path(nwb_path) for nwb_path in nwb_paths] - run_config = RunConfig( - bids_directory=bids_directory, - additional_metadata_file_path=additional_metadata_file_path, - file_mode=file_mode, - cache_directory=cache_directory, - run_id=run_id, - ) + run_config_kwargs = { + k: v + for k, v in { + "bids_directory": bids_directory, + "additional_metadata_file_path": additional_metadata_file_path, + "file_mode": file_mode, + "cache_directory": cache_directory, + "run_id": run_id, + }.items() + # Filter out values that indicate absence of direct user input or + # signal to use default + if k != "file_mode" and v is not None or k == "file_mode" and v != "auto" + } + + run_config = RunConfig.model_validate(run_config_kwargs) converter = convert_nwb_dataset(nwb_paths=handled_nwb_paths, run_config=run_config) messages = converter.messages From 4e581908d1b9c8f04cc373756090eefe8efd2d0a Mon Sep 17 00:00:00 2001 From: Isaac To Date: Tue, 4 Nov 2025 13:05:04 -0800 Subject: [PATCH 08/10] fix: ensure `dataset_description.json` is a file When validating a directory as a BIDS directory, ensure it is a file instead of just ensuring its mere existence. This rules out the situation where `dataset_description.json` is a directory --- src/nwb2bids/_converters/_run_config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nwb2bids/_converters/_run_config.py b/src/nwb2bids/_converters/_run_config.py index 1f44d407..42ea444f 100644 --- a/src/nwb2bids/_converters/_run_config.py +++ b/src/nwb2bids/_converters/_run_config.py @@ -30,13 +30,13 @@ def _validate_existing_directory_as_bids(directory: pathlib.Path) -> pathlib.Pat else: # The directory is considered non-empty - if not dataset_description_file_path.exists(): - # The directory is without `dataset_description.json` + if not dataset_description_file_path.is_file(): + # The directory is without `dataset_description.json` file # It is an invalid BIDS dataset. message = ( f"The directory ({directory}) exists and is not empty, but is not a valid BIDS dataset: " - "missing 'dataset_description.json'." + "missing 'dataset_description.json' file." ) raise ValueError(message) From 7d1c0cee2d9ea9851a2508eef11ae33c855b2047 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Tue, 4 Nov 2025 13:25:07 -0800 Subject: [PATCH 09/10] fix: handle situation that `dataset_description.json` is not a valid json file when validating a directory as a BIDS directory --- src/nwb2bids/_converters/_run_config.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/nwb2bids/_converters/_run_config.py b/src/nwb2bids/_converters/_run_config.py index 42ea444f..d330b219 100644 --- a/src/nwb2bids/_converters/_run_config.py +++ b/src/nwb2bids/_converters/_run_config.py @@ -41,7 +41,15 @@ def _validate_existing_directory_as_bids(directory: pathlib.Path) -> pathlib.Pat raise ValueError(message) with dataset_description_file_path.open(mode="r") as file_stream: - dataset_description = json.load(fp=file_stream) + try: + dataset_description = json.load(fp=file_stream) + except json.JSONDecodeError as exc: + message = ( + f"The directory ({directory}) exists and contains a 'dataset_description.json' file, " + "but it is not a valid JSON file." + ) + raise ValueError(message) from exc + if dataset_description.get("BIDSVersion", None) is None: message = ( f"The directory ({directory}) exists but is not a valid BIDS dataset: " From ca5ff77f2fcd82a78d9e32dadaa730ec6992f2fe Mon Sep 17 00:00:00 2001 From: Isaac To Date: Tue, 4 Nov 2025 15:09:29 -0800 Subject: [PATCH 10/10] style: add parentheses to make intent explicit Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/nwb2bids/_command_line_interface/_main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nwb2bids/_command_line_interface/_main.py b/src/nwb2bids/_command_line_interface/_main.py index 5b6e46ba..1c84fbfd 100644 --- a/src/nwb2bids/_command_line_interface/_main.py +++ b/src/nwb2bids/_command_line_interface/_main.py @@ -90,7 +90,7 @@ def _run_convert_nwb_dataset( }.items() # Filter out values that indicate absence of direct user input or # signal to use default - if k != "file_mode" and v is not None or k == "file_mode" and v != "auto" + if (k != "file_mode" and v is not None) or (k == "file_mode" and v != "auto") } run_config = RunConfig.model_validate(run_config_kwargs)