Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions src/nwb2bids/_command_line_interface/_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,21 @@ def _run_convert_nwb_dataset(
raise ValueError(message)
handled_nwb_paths = [pathlib.Path(nwb_path) for nwb_path in nwb_paths]

run_config = RunConfig(
bids_directory=bids_directory,
additional_metadata_file_path=additional_metadata_file_path,
file_mode=file_mode,
cache_directory=cache_directory,
run_id=run_id,
)
run_config_kwargs = {
k: v
for k, v in {
"bids_directory": bids_directory,
"additional_metadata_file_path": additional_metadata_file_path,
"file_mode": file_mode,
"cache_directory": cache_directory,
"run_id": run_id,
}.items()
# Filter out values that indicate absence of direct user input or
# signal to use default
if (k != "file_mode" and v is not None) or (k == "file_mode" and v != "auto")
}

run_config = RunConfig.model_validate(run_config_kwargs)
converter = convert_nwb_dataset(nwb_paths=handled_nwb_paths, run_config=run_config)

messages = converter.messages
Expand Down
151 changes: 77 additions & 74 deletions src/nwb2bids/_converters/_run_config.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,65 @@
import json
import pathlib
import tempfile
import typing

import pydantic

from .._core._file_mode import _determine_file_mode
from .._core._home import _get_home_directory
from .._core._run_id import _generate_run_id


def _validate_existing_directory_as_bids(directory: pathlib.Path) -> pathlib.Path:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might as well move this out like you did the file mode?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(into a separate file)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can do that. I don't have a strong preference. Do you want to _core?

However, I want you to know that _validate_existing_directory_as_bids is a field validator in the RunConfig now. The argument it takes, the values it returns, and the errors it raises, are dictated by Pydantic. I.e. It may not have any other use, so it may be a good idea to just keep it close the model that uses it, as it is really part of the model in some sense.


dataset_description_file_path = directory / "dataset_description.json"
current_directory_contents = {path.stem for path in directory.iterdir() if not path.name.startswith(".")} - {
"README",
"CHANGES",
"derivatives",
"dandiset",
}

if len(current_directory_contents) == 0:
# The directory is considered empty, not containing any meaningful content.
# Populate the directory with `dataset_description.json` to make it
# a valid (though minimal) BIDS dataset.

default_dataset_description = {"BIDSVersion": "1.10"}
with dataset_description_file_path.open(mode="w") as file_stream:
json.dump(obj=default_dataset_description, fp=file_stream, indent=4)
else:
# The directory is considered non-empty

if not dataset_description_file_path.is_file():
# The directory is without `dataset_description.json` file
# It is an invalid BIDS dataset.

message = (
f"The directory ({directory}) exists and is not empty, but is not a valid BIDS dataset: "
"missing 'dataset_description.json' file."
)
raise ValueError(message)

with dataset_description_file_path.open(mode="r") as file_stream:
try:
dataset_description = json.load(fp=file_stream)
except json.JSONDecodeError as exc:
message = (
f"The directory ({directory}) exists and contains a 'dataset_description.json' file, "
"but it is not a valid JSON file."
)
raise ValueError(message) from exc

if dataset_description.get("BIDSVersion", None) is None:
message = (
f"The directory ({directory}) exists but is not a valid BIDS dataset: "
"missing 'BIDSVersion' in 'dataset_description.json'."
)
raise ValueError(message)

return directory


class RunConfig(pydantic.BaseModel):
"""
Specifies configuration options for a single run of NWB to BIDS conversion.
Expand All @@ -19,12 +70,13 @@ class RunConfig(pydantic.BaseModel):
additional_metadata_file_path : file path, optional
The path to a YAML file containing additional metadata not included within the NWB files
that you wish to include in the BIDS dataset.
file_mode : one of "move", "copy", "symlink", or "auto", default: "auto"
file_mode : one of "move", "copy", or "symlink"
Comment on lines -22 to +73
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yarik specifically requested the "auto" value previously

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can re-add it if he really wants it, but the it will not be a consistent model.

I preserved "auto" as an option in the CLI, so it is as user friend to the CLI users. Adding it into the config model doesn't make much sense since once an a model object is initialized, the field never hold a value of "auto". This behavior is actually reflected in your code.

if file_mode == "copy":
shutil.copy(src=nwbfile_path, dst=session_file_path)
elif file_mode == "move":
shutil.move(src=nwbfile_path, dst=session_file_path)
elif file_mode == "symlink":
session_file_path.symlink_to(target=nwbfile_path)

There is no checking for the "auto" value in that example, which is appropriate.

@yarikoptic What do you think? It may not be what you want initially, but I think it works better.

Specifies how to handle the NWB files when converting to BIDS format.
- "move": Move the files to the BIDS directory.
- "copy": Copy the files to the BIDS directory.
- "symlink": Create symbolic links to the files in the BIDS directory.
- "auto": Decides between all the above based on the system, with preference for linking when possible.
- if not specified, decide between all the above based on the system,
with preference for linking when possible.
cache_directory : directory path
The directory where run specific files (e.g., notifications, sanitization reports) will be stored.
Defaults to `~/.nwb2bids`.
Expand All @@ -35,39 +87,42 @@ class RunConfig(pydantic.BaseModel):
The default ID uses runtime timestamp information of the form "date-%Y%m%d_time-%H%M%S."
"""

bids_directory: pydantic.DirectoryPath | None = None
bids_directory: typing.Annotated[
pydantic.DirectoryPath,
pydantic.Field(default_factory=pathlib.Path.cwd),
pydantic.AfterValidator(_validate_existing_directory_as_bids),
]
additional_metadata_file_path: pydantic.FilePath | None = None
file_mode: typing.Literal["move", "copy", "symlink", "auto"] = "auto"
cache_directory: pydantic.DirectoryPath | None = None
run_id: str | None = None
_parent_run_directory: pathlib.Path | None = pydantic.PrivateAttr(default=None)
_run_directory: pathlib.Path | None = pydantic.PrivateAttr(default=None)
file_mode: typing.Annotated[
typing.Literal["move", "copy", "symlink"], pydantic.Field(default_factory=_determine_file_mode)
]
cache_directory: typing.Annotated[pydantic.DirectoryPath, pydantic.Field(default_factory=_get_home_directory)]
run_id: typing.Annotated[str, pydantic.Field(default_factory=_generate_run_id)]

model_config = pydantic.ConfigDict(
validate_assignment=True, # Re-validate model on mutation
validate_default=True, # Validate default values as well
)

def model_post_init(self, context: typing.Any, /) -> None:
"""Ensure the various run directories and files are created."""
if self.run_id is None:
self.run_id = _generate_run_id()

if self.bids_directory is None:
self.bids_directory = pathlib.Path.cwd()

self._validate_existing_directory_as_bids()
self._determine_file_mode()

if self.cache_directory is None:
self.cache_directory = _get_home_directory()
self._parent_run_directory = self.cache_directory / "runs"
# Ensure run directory and its parent exist
self._parent_run_directory.mkdir(exist_ok=True)
self._run_directory = self._parent_run_directory / self.run_id
self._run_directory.mkdir(exist_ok=True)

self.notifications_file_path.touch()
self.notifications_json_file_path.touch()

@property
def _parent_run_directory(self) -> pathlib.Path:
"""The parent directory where all run-specific directories are stored."""
return self.cache_directory / "runs"
Comment on lines +116 to +119
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since it is only being set once, isn't it cleaner to set it when I originally did? (otherwise, this line gets re-called every time you access the property)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Further, marking it as a PrivateAttr with the rest of the fields make it more obvious that is there and has all the other pydantic side effects you would expect

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since it is only being set once, isn't it cleaner to set it when I originally did? (otherwise, this line gets re-called every time you access the property)

It may be cleaner. However, what I see is that this value along with _run_directory are based on the value of another attribute in the model, and since this RunConfig is mutable (Pydantic models don't have true immutability), defining these two values as private attributes can render them outdated once the containing model object is mutated. Additionally, since these two values are based on the value of another attribute, it seems more appropriate to me that they are read-only.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Further, marking it as a PrivateAttr with the rest of the fields make it more obvious that is there and has all the other pydantic side effects you would expect

I don't know which side effects you are referring to. The main point having a private attribute in a Pydantic model is that you can keep information in the model that behaves very differently than a regular field, https://docs.pydantic.dev/latest/concepts/models/#private-model-attributes.


@property
def _run_directory(self) -> pathlib.Path:
"""The directory specific to this run."""
return self._parent_run_directory / self.run_id

@pydantic.computed_field
@property
def notifications_file_path(self) -> pathlib.Path:
Expand All @@ -81,55 +136,3 @@ def notifications_json_file_path(self) -> pathlib.Path:
"""The file path leading to a JSON dump of the notifications."""
notifications_file_path = self._run_directory / f"{self.run_id}_notifications.json"
return notifications_file_path

def _validate_existing_directory_as_bids(self) -> None:
bids_directory = self.bids_directory

dataset_description_file_path = bids_directory / "dataset_description.json"
current_directory_contents = {
path.stem for path in bids_directory.iterdir() if not path.name.startswith(".")
} - {"README", "CHANGES", "derivatives", "dandiset"}
if len(current_directory_contents) == 0:
default_dataset_description = {"BIDSVersion": "1.10"}
with dataset_description_file_path.open(mode="w") as file_stream:
json.dump(obj=default_dataset_description, fp=file_stream, indent=4)
return

if not dataset_description_file_path.exists():
message = (
f"The directory ({bids_directory}) exists and is not empty, but is not a valid BIDS dataset: "
"missing 'dataset_description.json'."
)
raise ValueError(message)

with dataset_description_file_path.open(mode="r") as file_stream:
dataset_description = json.load(fp=file_stream)
if dataset_description.get("BIDSVersion", None) is None:
message = (
f"The directory ({bids_directory}) exists but is not a valid BIDS dataset: "
"missing 'BIDSVersion' in 'dataset_description.json'."
)
raise ValueError(message)

def _determine_file_mode(self) -> None:
if self.file_mode != "auto":
return

with tempfile.TemporaryDirectory(prefix="nwb2bids-") as temp_dir_str:
temp_dir_path = pathlib.Path(temp_dir_str)

# Create a test file to determine if symlinks are supported
test_file_path = temp_dir_path / "test_file.txt"
test_file_path.touch()

try:
# Create a symlink to the test file
(temp_dir_path / "test_symlink.txt").symlink_to(target=test_file_path)
except (OSError, PermissionError, NotImplementedError): # Windows can sometimes have trouble with symlinks
# TODO: log a INFO message here when logging is set up
self.file_mode = "copy"
return
else:
# If symlink creation was successful, return "symlink"
self.file_mode = "symlink"
return
25 changes: 25 additions & 0 deletions src/nwb2bids/_core/_file_mode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import pathlib
import tempfile
import typing


def _determine_file_mode() -> typing.Literal["symlink", "copy"]:
"""
Determine what file mode to use in creating a BIDS dataset based on the system
"""
with tempfile.TemporaryDirectory(prefix="nwb2bids-") as temp_dir_str:
temp_dir_path = pathlib.Path(temp_dir_str)

# Create a test file to determine if symlinks are supported
test_file_path = temp_dir_path / "test_file.txt"
test_file_path.touch()

try:
# Create a symlink to the test file
(temp_dir_path / "test_symlink.txt").symlink_to(target=test_file_path)
except (OSError, PermissionError, NotImplementedError): # Windows can sometimes have trouble with symlinks
# TODO: log a INFO message here when logging is set up
return "copy"
else:
# If symlink creation was successful, return "symlink"
return "symlink"
Loading