-
Notifications
You must be signed in to change notification settings - Fork 2
[ENH] use datalad copy files #207
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,84 @@ | ||
| """Module to handle copying data out of source datalad datasets.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from pathlib import Path | ||
|
|
||
| from datalad import api | ||
|
|
||
| from cohort_creator._utils import ( | ||
| get_filters, | ||
| list_all_files_with_filter, | ||
| no_files_found_msg, | ||
| ) | ||
| from cohort_creator.logger import cc_logger | ||
|
|
||
| cc_log = cc_logger() | ||
|
|
||
|
|
||
| def copy_top_files(src_pth: Path, target_pth: Path, datatypes: list[str]) -> None: | ||
| """Copy top files from BIDS src_pth to BIDS target_pth.""" | ||
| top_files = ["dataset_description.json", "participants.*", "README*"] | ||
| if "func" in datatypes: | ||
| top_files.extend(["*task-*_events.tsv", "*task-*_events.json", "*task-*_bold.json"]) | ||
| if "anat" in datatypes: | ||
| top_files.append("*T1w.json") | ||
|
|
||
| for top_file_ in top_files: | ||
| for f in src_pth.glob(top_file_): | ||
| if (target_pth / f.name).exists(): | ||
| cc_log.debug(f" file already present:\n '{(target_pth / f.name)}'") | ||
| continue | ||
| try: | ||
| api.copy_file(path=f, target_dir=target_pth) | ||
| except FileNotFoundError: | ||
| cc_log.error(f" Could not find file '{f}'") | ||
|
|
||
|
|
||
| def copy_this_subject( | ||
| subject: str, | ||
| datatypes: list[str], | ||
| dataset_type: str, | ||
| src_pth: Path, | ||
| target_pth: Path, | ||
| space: str = "MNI152NLin2009cAsym", | ||
| task: str = "*", | ||
| sessions: list[str] | list[None] | None = None, | ||
| bids_filter: None | dict[str, dict[str, dict[str, str]]] = None, | ||
| ) -> None: | ||
| if sessions is None: | ||
| sessions = [None] | ||
| for datatype_ in datatypes: | ||
| filters = get_filters( | ||
| dataset_type=dataset_type, datatype=datatype_, bids_filter=bids_filter | ||
| ) | ||
| files = list_all_files_with_filter( | ||
| data_pth=src_pth, | ||
| dataset_type=dataset_type, | ||
| filters=filters, | ||
| subject=subject, | ||
| sessions=sessions, | ||
| datatype=datatype_, | ||
| task=task, | ||
| space=space, | ||
| ) | ||
| if not files: | ||
| cc_log.warning(no_files_found_msg(src_pth, subject, datatype_, filters)) | ||
| continue | ||
|
|
||
| cc_log.debug(f" {subject} - copying files:\n {files}") | ||
|
|
||
| dataset_root = src_pth | ||
| if "derivatives" in str(dataset_root): | ||
| dataset_root = Path(str(dataset_root).split("/derivatives")[0]) | ||
|
|
||
| for f in files: | ||
| sub_dirs = Path(f).parents | ||
| (target_pth / sub_dirs[0]).mkdir(exist_ok=True, parents=True) | ||
| if (target_pth / f).exists(): | ||
| cc_log.debug(f" file already present:\n '{f}'") | ||
| continue | ||
| try: | ||
| api.copy_file(path=dataset_root / f, target_dir=target_pth / sub_dirs[0]) | ||
| except FileNotFoundError: | ||
| cc_log.error(f" Could not find file '{f}' in {dataset_root}") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| """Utilities.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from cohort_creator._utils import sourcedata | ||
| from cohort_creator.copy_files import copy_this_subject, copy_top_files | ||
| from cohort_creator.main import install_datasets | ||
|
|
||
|
|
||
| def test_copy_top_files(output_dir): | ||
| dataset_types = ["raw"] | ||
| install_datasets( | ||
| datasets=["ds000001", "foo"], output_dir=output_dir, dataset_types=dataset_types | ||
| ) | ||
| copy_top_files( | ||
| src_pth=sourcedata(output_dir) / "ds000001", | ||
| target_pth=output_dir / "study-ds000001" / "bids", | ||
| datatypes=["anat", "func"], | ||
| ) | ||
|
|
||
| assert ( | ||
| output_dir / "study-ds000001" / "bids" / "task-balloonanalogrisktask_bold.json" | ||
| ).exists() | ||
| assert (output_dir / "study-ds000001" / "bids" / "README").exists() | ||
| assert (output_dir / "study-ds000001" / "bids" / "dataset_description.json").exists() | ||
|
|
||
|
|
||
| def test_copy_this_subject(output_dir): | ||
| dataset_types = ["raw"] | ||
| datatypes = ["anat"] | ||
| install_datasets(datasets=["ds000001"], output_dir=output_dir, dataset_types=dataset_types) | ||
| copy_this_subject( | ||
| subject="sub-01", | ||
| datatypes=datatypes, | ||
| dataset_type=dataset_types[0], | ||
| src_pth=sourcedata(output_dir) / "ds000001", | ||
| target_pth=output_dir / "study-ds000001" / "bids", | ||
| ) | ||
|
|
||
| assert ( | ||
| output_dir / "study-ds000001" / "bids" / "sub-01" / "anat" / "sub-01_T1w.nii.gz" | ||
| ).exists() | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue (testing): The assertion in The file path in the assertion ( |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| from pathlib import Path | ||
|
|
||
| from cohort_creator._cli import create_yoda | ||
| from cohort_creator._utils import sourcedata | ||
| from cohort_creator.copy_files import copy_this_subject | ||
| from cohort_creator.main import install_datasets | ||
|
|
||
| output_dir = Path.cwd() / "tmp" | ||
| create_yoda(output_dir) | ||
| dataset_types = ["raw"] | ||
| datatypes = ["anat"] | ||
| install_datasets(datasets=["ds000001"], output_dir=output_dir, dataset_types=dataset_types) | ||
| copy_this_subject( | ||
| subject="sub-01", | ||
| datatypes=datatypes, | ||
| dataset_type=dataset_types[0], | ||
| src_pth=sourcedata(output_dir) / "ds000001", | ||
| target_pth=output_dir / "study-ds000001" / "bids", | ||
| ) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
suggestion (testing): Test
test_copy_fileslacks validation for non-existent files.It would be beneficial to include a test case that validates the behavior when the expected output files do not exist after the
copy_filesoperation. This could help in ensuring the function's robustness in handling errors or unexpected conditions.