-
Notifications
You must be signed in to change notification settings - Fork 4
Revise config model #175
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Revise config model #175
Changes from all commits
8b49a36
0091975
649415e
60fa15d
7db118e
74b2be2
27f6ad8
4e58190
7d1c0ce
ca5ff77
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,14 +1,65 @@ | ||||||||||||||
| import json | ||||||||||||||
| import pathlib | ||||||||||||||
| import tempfile | ||||||||||||||
| import typing | ||||||||||||||
|
|
||||||||||||||
| import pydantic | ||||||||||||||
|
|
||||||||||||||
| from .._core._file_mode import _determine_file_mode | ||||||||||||||
| from .._core._home import _get_home_directory | ||||||||||||||
| from .._core._run_id import _generate_run_id | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| def _validate_existing_directory_as_bids(directory: pathlib.Path) -> pathlib.Path: | ||||||||||||||
|
|
||||||||||||||
| dataset_description_file_path = directory / "dataset_description.json" | ||||||||||||||
| current_directory_contents = {path.stem for path in directory.iterdir() if not path.name.startswith(".")} - { | ||||||||||||||
| "README", | ||||||||||||||
| "CHANGES", | ||||||||||||||
| "derivatives", | ||||||||||||||
| "dandiset", | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if len(current_directory_contents) == 0: | ||||||||||||||
| # The directory is considered empty, not containing any meaningful content. | ||||||||||||||
| # Populate the directory with `dataset_description.json` to make it | ||||||||||||||
| # a valid (though minimal) BIDS dataset. | ||||||||||||||
|
|
||||||||||||||
| default_dataset_description = {"BIDSVersion": "1.10"} | ||||||||||||||
| with dataset_description_file_path.open(mode="w") as file_stream: | ||||||||||||||
| json.dump(obj=default_dataset_description, fp=file_stream, indent=4) | ||||||||||||||
| else: | ||||||||||||||
| # The directory is considered non-empty | ||||||||||||||
|
|
||||||||||||||
| if not dataset_description_file_path.is_file(): | ||||||||||||||
| # The directory is without `dataset_description.json` file | ||||||||||||||
| # It is an invalid BIDS dataset. | ||||||||||||||
|
|
||||||||||||||
| message = ( | ||||||||||||||
| f"The directory ({directory}) exists and is not empty, but is not a valid BIDS dataset: " | ||||||||||||||
| "missing 'dataset_description.json' file." | ||||||||||||||
| ) | ||||||||||||||
| raise ValueError(message) | ||||||||||||||
|
|
||||||||||||||
| with dataset_description_file_path.open(mode="r") as file_stream: | ||||||||||||||
| try: | ||||||||||||||
| dataset_description = json.load(fp=file_stream) | ||||||||||||||
| except json.JSONDecodeError as exc: | ||||||||||||||
| message = ( | ||||||||||||||
| f"The directory ({directory}) exists and contains a 'dataset_description.json' file, " | ||||||||||||||
| "but it is not a valid JSON file." | ||||||||||||||
| ) | ||||||||||||||
| raise ValueError(message) from exc | ||||||||||||||
|
|
||||||||||||||
| if dataset_description.get("BIDSVersion", None) is None: | ||||||||||||||
| message = ( | ||||||||||||||
| f"The directory ({directory}) exists but is not a valid BIDS dataset: " | ||||||||||||||
| "missing 'BIDSVersion' in 'dataset_description.json'." | ||||||||||||||
| ) | ||||||||||||||
| raise ValueError(message) | ||||||||||||||
|
|
||||||||||||||
| return directory | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| class RunConfig(pydantic.BaseModel): | ||||||||||||||
| """ | ||||||||||||||
| Specifies configuration options for a single run of NWB to BIDS conversion. | ||||||||||||||
|
|
@@ -19,12 +70,13 @@ class RunConfig(pydantic.BaseModel): | |||||||||||||
| additional_metadata_file_path : file path, optional | ||||||||||||||
| The path to a YAML file containing additional metadata not included within the NWB files | ||||||||||||||
| that you wish to include in the BIDS dataset. | ||||||||||||||
| file_mode : one of "move", "copy", "symlink", or "auto", default: "auto" | ||||||||||||||
| file_mode : one of "move", "copy", or "symlink" | ||||||||||||||
|
Comment on lines
-22
to
+73
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yarik specifically requested the "auto" value previously
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can re-add it if he really wants it, but the it will not be a consistent model. I preserved nwb2bids/src/nwb2bids/_converters/_session_converter.py Lines 128 to 133 in de9064b
There is no checking for the @yarikoptic What do you think? It may not be what you want initially, but I think it works better. |
||||||||||||||
| Specifies how to handle the NWB files when converting to BIDS format. | ||||||||||||||
| - "move": Move the files to the BIDS directory. | ||||||||||||||
| - "copy": Copy the files to the BIDS directory. | ||||||||||||||
| - "symlink": Create symbolic links to the files in the BIDS directory. | ||||||||||||||
| - "auto": Decides between all the above based on the system, with preference for linking when possible. | ||||||||||||||
| - if not specified, decide between all the above based on the system, | ||||||||||||||
| with preference for linking when possible. | ||||||||||||||
| cache_directory : directory path | ||||||||||||||
| The directory where run specific files (e.g., notifications, sanitization reports) will be stored. | ||||||||||||||
| Defaults to `~/.nwb2bids`. | ||||||||||||||
|
|
@@ -35,39 +87,42 @@ class RunConfig(pydantic.BaseModel): | |||||||||||||
| The default ID uses runtime timestamp information of the form "date-%Y%m%d_time-%H%M%S." | ||||||||||||||
| """ | ||||||||||||||
|
|
||||||||||||||
| bids_directory: pydantic.DirectoryPath | None = None | ||||||||||||||
| bids_directory: typing.Annotated[ | ||||||||||||||
| pydantic.DirectoryPath, | ||||||||||||||
| pydantic.Field(default_factory=pathlib.Path.cwd), | ||||||||||||||
| pydantic.AfterValidator(_validate_existing_directory_as_bids), | ||||||||||||||
| ] | ||||||||||||||
| additional_metadata_file_path: pydantic.FilePath | None = None | ||||||||||||||
| file_mode: typing.Literal["move", "copy", "symlink", "auto"] = "auto" | ||||||||||||||
| cache_directory: pydantic.DirectoryPath | None = None | ||||||||||||||
| run_id: str | None = None | ||||||||||||||
| _parent_run_directory: pathlib.Path | None = pydantic.PrivateAttr(default=None) | ||||||||||||||
| _run_directory: pathlib.Path | None = pydantic.PrivateAttr(default=None) | ||||||||||||||
| file_mode: typing.Annotated[ | ||||||||||||||
| typing.Literal["move", "copy", "symlink"], pydantic.Field(default_factory=_determine_file_mode) | ||||||||||||||
| ] | ||||||||||||||
| cache_directory: typing.Annotated[pydantic.DirectoryPath, pydantic.Field(default_factory=_get_home_directory)] | ||||||||||||||
| run_id: typing.Annotated[str, pydantic.Field(default_factory=_generate_run_id)] | ||||||||||||||
CodyCBakerPhD marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
|
|
||||||||||||||
| model_config = pydantic.ConfigDict( | ||||||||||||||
| validate_assignment=True, # Re-validate model on mutation | ||||||||||||||
| validate_default=True, # Validate default values as well | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
| def model_post_init(self, context: typing.Any, /) -> None: | ||||||||||||||
| """Ensure the various run directories and files are created.""" | ||||||||||||||
| if self.run_id is None: | ||||||||||||||
| self.run_id = _generate_run_id() | ||||||||||||||
|
|
||||||||||||||
| if self.bids_directory is None: | ||||||||||||||
| self.bids_directory = pathlib.Path.cwd() | ||||||||||||||
|
|
||||||||||||||
| self._validate_existing_directory_as_bids() | ||||||||||||||
| self._determine_file_mode() | ||||||||||||||
|
|
||||||||||||||
| if self.cache_directory is None: | ||||||||||||||
| self.cache_directory = _get_home_directory() | ||||||||||||||
| self._parent_run_directory = self.cache_directory / "runs" | ||||||||||||||
| # Ensure run directory and its parent exist | ||||||||||||||
| self._parent_run_directory.mkdir(exist_ok=True) | ||||||||||||||
| self._run_directory = self._parent_run_directory / self.run_id | ||||||||||||||
| self._run_directory.mkdir(exist_ok=True) | ||||||||||||||
|
|
||||||||||||||
| self.notifications_file_path.touch() | ||||||||||||||
| self.notifications_json_file_path.touch() | ||||||||||||||
|
|
||||||||||||||
| @property | ||||||||||||||
| def _parent_run_directory(self) -> pathlib.Path: | ||||||||||||||
| """The parent directory where all run-specific directories are stored.""" | ||||||||||||||
| return self.cache_directory / "runs" | ||||||||||||||
|
Comment on lines
+116
to
+119
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since it is only being set once, isn't it cleaner to set it when I originally did? (otherwise, this line gets re-called every time you access the property)
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Further, marking it as a PrivateAttr with the rest of the fields make it more obvious that is there and has all the other pydantic side effects you would expect
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It may be cleaner. However, what I see is that this value along with
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I don't know which side effects you are referring to. The main point having a private attribute in a Pydantic model is that you can keep information in the model that behaves very differently than a regular field, https://docs.pydantic.dev/latest/concepts/models/#private-model-attributes. |
||||||||||||||
|
|
||||||||||||||
| @property | ||||||||||||||
| def _run_directory(self) -> pathlib.Path: | ||||||||||||||
| """The directory specific to this run.""" | ||||||||||||||
| return self._parent_run_directory / self.run_id | ||||||||||||||
|
|
||||||||||||||
| @pydantic.computed_field | ||||||||||||||
| @property | ||||||||||||||
| def notifications_file_path(self) -> pathlib.Path: | ||||||||||||||
|
|
@@ -81,55 +136,3 @@ def notifications_json_file_path(self) -> pathlib.Path: | |||||||||||||
| """The file path leading to a JSON dump of the notifications.""" | ||||||||||||||
| notifications_file_path = self._run_directory / f"{self.run_id}_notifications.json" | ||||||||||||||
| return notifications_file_path | ||||||||||||||
|
|
||||||||||||||
| def _validate_existing_directory_as_bids(self) -> None: | ||||||||||||||
| bids_directory = self.bids_directory | ||||||||||||||
|
|
||||||||||||||
| dataset_description_file_path = bids_directory / "dataset_description.json" | ||||||||||||||
| current_directory_contents = { | ||||||||||||||
| path.stem for path in bids_directory.iterdir() if not path.name.startswith(".") | ||||||||||||||
| } - {"README", "CHANGES", "derivatives", "dandiset"} | ||||||||||||||
| if len(current_directory_contents) == 0: | ||||||||||||||
| default_dataset_description = {"BIDSVersion": "1.10"} | ||||||||||||||
| with dataset_description_file_path.open(mode="w") as file_stream: | ||||||||||||||
| json.dump(obj=default_dataset_description, fp=file_stream, indent=4) | ||||||||||||||
| return | ||||||||||||||
|
|
||||||||||||||
| if not dataset_description_file_path.exists(): | ||||||||||||||
| message = ( | ||||||||||||||
| f"The directory ({bids_directory}) exists and is not empty, but is not a valid BIDS dataset: " | ||||||||||||||
| "missing 'dataset_description.json'." | ||||||||||||||
| ) | ||||||||||||||
| raise ValueError(message) | ||||||||||||||
|
|
||||||||||||||
| with dataset_description_file_path.open(mode="r") as file_stream: | ||||||||||||||
| dataset_description = json.load(fp=file_stream) | ||||||||||||||
| if dataset_description.get("BIDSVersion", None) is None: | ||||||||||||||
| message = ( | ||||||||||||||
| f"The directory ({bids_directory}) exists but is not a valid BIDS dataset: " | ||||||||||||||
| "missing 'BIDSVersion' in 'dataset_description.json'." | ||||||||||||||
| ) | ||||||||||||||
| raise ValueError(message) | ||||||||||||||
|
|
||||||||||||||
| def _determine_file_mode(self) -> None: | ||||||||||||||
| if self.file_mode != "auto": | ||||||||||||||
| return | ||||||||||||||
|
|
||||||||||||||
| with tempfile.TemporaryDirectory(prefix="nwb2bids-") as temp_dir_str: | ||||||||||||||
| temp_dir_path = pathlib.Path(temp_dir_str) | ||||||||||||||
|
|
||||||||||||||
| # Create a test file to determine if symlinks are supported | ||||||||||||||
| test_file_path = temp_dir_path / "test_file.txt" | ||||||||||||||
| test_file_path.touch() | ||||||||||||||
|
|
||||||||||||||
| try: | ||||||||||||||
| # Create a symlink to the test file | ||||||||||||||
| (temp_dir_path / "test_symlink.txt").symlink_to(target=test_file_path) | ||||||||||||||
| except (OSError, PermissionError, NotImplementedError): # Windows can sometimes have trouble with symlinks | ||||||||||||||
| # TODO: log a INFO message here when logging is set up | ||||||||||||||
| self.file_mode = "copy" | ||||||||||||||
| return | ||||||||||||||
| else: | ||||||||||||||
| # If symlink creation was successful, return "symlink" | ||||||||||||||
| self.file_mode = "symlink" | ||||||||||||||
| return | ||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| import pathlib | ||
| import tempfile | ||
| import typing | ||
|
|
||
|
|
||
| def _determine_file_mode() -> typing.Literal["symlink", "copy"]: | ||
| """ | ||
| Determine what file mode to use in creating a BIDS dataset based on the system | ||
| """ | ||
| with tempfile.TemporaryDirectory(prefix="nwb2bids-") as temp_dir_str: | ||
| temp_dir_path = pathlib.Path(temp_dir_str) | ||
|
|
||
| # Create a test file to determine if symlinks are supported | ||
| test_file_path = temp_dir_path / "test_file.txt" | ||
| test_file_path.touch() | ||
|
|
||
| try: | ||
| # Create a symlink to the test file | ||
| (temp_dir_path / "test_symlink.txt").symlink_to(target=test_file_path) | ||
| except (OSError, PermissionError, NotImplementedError): # Windows can sometimes have trouble with symlinks | ||
| # TODO: log a INFO message here when logging is set up | ||
| return "copy" | ||
| else: | ||
| # If symlink creation was successful, return "symlink" | ||
| return "symlink" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might as well move this out like you did the file mode?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(into a separate file)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can do that. I don't have a strong preference. Do you want to
_core?However, I want you to know that
_validate_existing_directory_as_bidsis a field validator in theRunConfignow. The argument it takes, the values it returns, and the errors it raises, are dictated by Pydantic. I.e. It may not have any other use, so it may be a good idea to just keep it close the model that uses it, as it is really part of the model in some sense.