diff --git a/src/nwb2bids/_command_line_interface/_main.py b/src/nwb2bids/_command_line_interface/_main.py index af77a076..1c84fbfd 100644 --- a/src/nwb2bids/_command_line_interface/_main.py +++ b/src/nwb2bids/_command_line_interface/_main.py @@ -79,13 +79,21 @@ def _run_convert_nwb_dataset( raise ValueError(message) handled_nwb_paths = [pathlib.Path(nwb_path) for nwb_path in nwb_paths] - run_config = RunConfig( - bids_directory=bids_directory, - additional_metadata_file_path=additional_metadata_file_path, - file_mode=file_mode, - cache_directory=cache_directory, - run_id=run_id, - ) + run_config_kwargs = { + k: v + for k, v in { + "bids_directory": bids_directory, + "additional_metadata_file_path": additional_metadata_file_path, + "file_mode": file_mode, + "cache_directory": cache_directory, + "run_id": run_id, + }.items() + # Filter out values that indicate absence of direct user input or + # signal to use default + if (k != "file_mode" and v is not None) or (k == "file_mode" and v != "auto") + } + + run_config = RunConfig.model_validate(run_config_kwargs) converter = convert_nwb_dataset(nwb_paths=handled_nwb_paths, run_config=run_config) messages = converter.messages diff --git a/src/nwb2bids/_converters/_run_config.py b/src/nwb2bids/_converters/_run_config.py index 8e63b167..d330b219 100644 --- a/src/nwb2bids/_converters/_run_config.py +++ b/src/nwb2bids/_converters/_run_config.py @@ -1,14 +1,65 @@ import json import pathlib -import tempfile import typing import pydantic +from .._core._file_mode import _determine_file_mode from .._core._home import _get_home_directory from .._core._run_id import _generate_run_id +def _validate_existing_directory_as_bids(directory: pathlib.Path) -> pathlib.Path: + + dataset_description_file_path = directory / "dataset_description.json" + current_directory_contents = {path.stem for path in directory.iterdir() if not path.name.startswith(".")} - { + "README", + "CHANGES", + "derivatives", + "dandiset", + } + + if len(current_directory_contents) == 0: + # The directory is considered empty, not containing any meaningful content. + # Populate the directory with `dataset_description.json` to make it + # a valid (though minimal) BIDS dataset. + + default_dataset_description = {"BIDSVersion": "1.10"} + with dataset_description_file_path.open(mode="w") as file_stream: + json.dump(obj=default_dataset_description, fp=file_stream, indent=4) + else: + # The directory is considered non-empty + + if not dataset_description_file_path.is_file(): + # The directory is without `dataset_description.json` file + # It is an invalid BIDS dataset. + + message = ( + f"The directory ({directory}) exists and is not empty, but is not a valid BIDS dataset: " + "missing 'dataset_description.json' file." + ) + raise ValueError(message) + + with dataset_description_file_path.open(mode="r") as file_stream: + try: + dataset_description = json.load(fp=file_stream) + except json.JSONDecodeError as exc: + message = ( + f"The directory ({directory}) exists and contains a 'dataset_description.json' file, " + "but it is not a valid JSON file." + ) + raise ValueError(message) from exc + + if dataset_description.get("BIDSVersion", None) is None: + message = ( + f"The directory ({directory}) exists but is not a valid BIDS dataset: " + "missing 'BIDSVersion' in 'dataset_description.json'." + ) + raise ValueError(message) + + return directory + + class RunConfig(pydantic.BaseModel): """ Specifies configuration options for a single run of NWB to BIDS conversion. @@ -19,12 +70,13 @@ class RunConfig(pydantic.BaseModel): additional_metadata_file_path : file path, optional The path to a YAML file containing additional metadata not included within the NWB files that you wish to include in the BIDS dataset. - file_mode : one of "move", "copy", "symlink", or "auto", default: "auto" + file_mode : one of "move", "copy", or "symlink" Specifies how to handle the NWB files when converting to BIDS format. - "move": Move the files to the BIDS directory. - "copy": Copy the files to the BIDS directory. - "symlink": Create symbolic links to the files in the BIDS directory. - - "auto": Decides between all the above based on the system, with preference for linking when possible. + - if not specified, decide between all the above based on the system, + with preference for linking when possible. cache_directory : directory path The directory where run specific files (e.g., notifications, sanitization reports) will be stored. Defaults to `~/.nwb2bids`. @@ -35,39 +87,42 @@ class RunConfig(pydantic.BaseModel): The default ID uses runtime timestamp information of the form "date-%Y%m%d_time-%H%M%S." """ - bids_directory: pydantic.DirectoryPath | None = None + bids_directory: typing.Annotated[ + pydantic.DirectoryPath, + pydantic.Field(default_factory=pathlib.Path.cwd), + pydantic.AfterValidator(_validate_existing_directory_as_bids), + ] additional_metadata_file_path: pydantic.FilePath | None = None - file_mode: typing.Literal["move", "copy", "symlink", "auto"] = "auto" - cache_directory: pydantic.DirectoryPath | None = None - run_id: str | None = None - _parent_run_directory: pathlib.Path | None = pydantic.PrivateAttr(default=None) - _run_directory: pathlib.Path | None = pydantic.PrivateAttr(default=None) + file_mode: typing.Annotated[ + typing.Literal["move", "copy", "symlink"], pydantic.Field(default_factory=_determine_file_mode) + ] + cache_directory: typing.Annotated[pydantic.DirectoryPath, pydantic.Field(default_factory=_get_home_directory)] + run_id: typing.Annotated[str, pydantic.Field(default_factory=_generate_run_id)] model_config = pydantic.ConfigDict( validate_assignment=True, # Re-validate model on mutation + validate_default=True, # Validate default values as well ) def model_post_init(self, context: typing.Any, /) -> None: - """Ensure the various run directories and files are created.""" - if self.run_id is None: - self.run_id = _generate_run_id() - - if self.bids_directory is None: - self.bids_directory = pathlib.Path.cwd() - - self._validate_existing_directory_as_bids() - self._determine_file_mode() - if self.cache_directory is None: - self.cache_directory = _get_home_directory() - self._parent_run_directory = self.cache_directory / "runs" + # Ensure run directory and its parent exist self._parent_run_directory.mkdir(exist_ok=True) - self._run_directory = self._parent_run_directory / self.run_id self._run_directory.mkdir(exist_ok=True) self.notifications_file_path.touch() self.notifications_json_file_path.touch() + @property + def _parent_run_directory(self) -> pathlib.Path: + """The parent directory where all run-specific directories are stored.""" + return self.cache_directory / "runs" + + @property + def _run_directory(self) -> pathlib.Path: + """The directory specific to this run.""" + return self._parent_run_directory / self.run_id + @pydantic.computed_field @property def notifications_file_path(self) -> pathlib.Path: @@ -81,55 +136,3 @@ def notifications_json_file_path(self) -> pathlib.Path: """The file path leading to a JSON dump of the notifications.""" notifications_file_path = self._run_directory / f"{self.run_id}_notifications.json" return notifications_file_path - - def _validate_existing_directory_as_bids(self) -> None: - bids_directory = self.bids_directory - - dataset_description_file_path = bids_directory / "dataset_description.json" - current_directory_contents = { - path.stem for path in bids_directory.iterdir() if not path.name.startswith(".") - } - {"README", "CHANGES", "derivatives", "dandiset"} - if len(current_directory_contents) == 0: - default_dataset_description = {"BIDSVersion": "1.10"} - with dataset_description_file_path.open(mode="w") as file_stream: - json.dump(obj=default_dataset_description, fp=file_stream, indent=4) - return - - if not dataset_description_file_path.exists(): - message = ( - f"The directory ({bids_directory}) exists and is not empty, but is not a valid BIDS dataset: " - "missing 'dataset_description.json'." - ) - raise ValueError(message) - - with dataset_description_file_path.open(mode="r") as file_stream: - dataset_description = json.load(fp=file_stream) - if dataset_description.get("BIDSVersion", None) is None: - message = ( - f"The directory ({bids_directory}) exists but is not a valid BIDS dataset: " - "missing 'BIDSVersion' in 'dataset_description.json'." - ) - raise ValueError(message) - - def _determine_file_mode(self) -> None: - if self.file_mode != "auto": - return - - with tempfile.TemporaryDirectory(prefix="nwb2bids-") as temp_dir_str: - temp_dir_path = pathlib.Path(temp_dir_str) - - # Create a test file to determine if symlinks are supported - test_file_path = temp_dir_path / "test_file.txt" - test_file_path.touch() - - try: - # Create a symlink to the test file - (temp_dir_path / "test_symlink.txt").symlink_to(target=test_file_path) - except (OSError, PermissionError, NotImplementedError): # Windows can sometimes have trouble with symlinks - # TODO: log a INFO message here when logging is set up - self.file_mode = "copy" - return - else: - # If symlink creation was successful, return "symlink" - self.file_mode = "symlink" - return diff --git a/src/nwb2bids/_core/_file_mode.py b/src/nwb2bids/_core/_file_mode.py new file mode 100644 index 00000000..1391d5f5 --- /dev/null +++ b/src/nwb2bids/_core/_file_mode.py @@ -0,0 +1,25 @@ +import pathlib +import tempfile +import typing + + +def _determine_file_mode() -> typing.Literal["symlink", "copy"]: + """ + Determine what file mode to use in creating a BIDS dataset based on the system + """ + with tempfile.TemporaryDirectory(prefix="nwb2bids-") as temp_dir_str: + temp_dir_path = pathlib.Path(temp_dir_str) + + # Create a test file to determine if symlinks are supported + test_file_path = temp_dir_path / "test_file.txt" + test_file_path.touch() + + try: + # Create a symlink to the test file + (temp_dir_path / "test_symlink.txt").symlink_to(target=test_file_path) + except (OSError, PermissionError, NotImplementedError): # Windows can sometimes have trouble with symlinks + # TODO: log a INFO message here when logging is set up + return "copy" + else: + # If symlink creation was successful, return "symlink" + return "symlink"