-
Notifications
You must be signed in to change notification settings - Fork 2
[ENH] use datalad copy files #207
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| """Module to handle copying data out of source datalad datasets.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import shutil | ||
| from pathlib import Path | ||
|
|
||
| import pandas as pd | ||
|
|
||
| from cohort_creator.logger import cc_logger | ||
|
|
||
| # from datalad import api | ||
| # from datalad.support.exceptions import IncompleteResultsError | ||
|
|
||
|
|
||
| cc_log = cc_logger() | ||
|
|
||
|
|
||
| def copy_files( | ||
| output_dir: Path, | ||
| datasets: pd.DataFrame, | ||
| participants: pd.DataFrame | None, | ||
| dataset_types: list[str], | ||
| datatypes: list[str], | ||
| task: str, | ||
| space: str, | ||
| bids_filter: None | dict[str, dict[str, dict[str, str]]] = None, | ||
| ): | ||
| pass | ||
|
||
|
|
||
|
|
||
| def copy_top_files(src_pth: Path, target_pth: Path, datatypes: list[str]) -> None: | ||
| """Copy top files from BIDS src_pth to BIDS target_pth.""" | ||
| top_files = ["dataset_description.json", "participants.*", "README*"] | ||
| if "func" in datatypes: | ||
| top_files.extend(["*task-*_events.tsv", "*task-*_events.json", "*task-*_bold.json"]) | ||
| if "anat" in datatypes: | ||
| top_files.append("*T1w.json") | ||
|
|
||
| for top_file_ in top_files: | ||
| for f in src_pth.glob(top_file_): | ||
| if (target_pth / f.name).exists(): | ||
| cc_log.debug(f" file already present:\n '{(target_pth / f.name)}'") | ||
| continue | ||
| try: | ||
| shutil.copy(src=f, dst=target_pth, follow_symlinks=True) | ||
| except FileNotFoundError: | ||
| cc_log.error(f" Could not find file '{f}'") | ||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,47 @@ | ||||||||||
| """Utilities.""" | ||||||||||
|
|
||||||||||
| from __future__ import annotations | ||||||||||
|
|
||||||||||
| import pandas as pd | ||||||||||
|
|
||||||||||
| from cohort_creator.copy_files import copy_files | ||||||||||
|
||||||||||
| from cohort_creator.main import install_datasets | ||||||||||
|
|
||||||||||
|
|
||||||||||
| def test_copy_top_files(output_dir): | ||||||||||
| dataset_types = ["raw"] | ||||||||||
| # datatypes = ["anat"] | ||||||||||
| install_datasets( | ||||||||||
| datasets=["ds000001", "foo"], output_dir=output_dir, dataset_types=dataset_types | ||||||||||
| ) | ||||||||||
| # copy_top_files(src_pth=src_pth, target_pth=target_pth, datatypes=datatypes) | ||||||||||
|
|
||||||||||
|
|
||||||||||
| def test_copy_files(output_dir): | ||||||||||
| participants = pd.DataFrame( | ||||||||||
| {"DatasetID": ["ds000001"], "SubjectID": ["sub-01"], "SessionID": [""]} | ||||||||||
| ) | ||||||||||
| datasets = pd.DataFrame( | ||||||||||
| { | ||||||||||
| "DatasetID": ["ds000001"], | ||||||||||
| "PortalURI": ["https://github.com/OpenNeuroDatasets-JSONLD/ds000001.git"], | ||||||||||
| } | ||||||||||
| ) | ||||||||||
| dataset_types = ["raw"] | ||||||||||
| datatypes = ["anat"] | ||||||||||
| install_datasets( | ||||||||||
| datasets=["ds000001", "foo"], output_dir=output_dir, dataset_types=dataset_types | ||||||||||
| ) | ||||||||||
| copy_files( | ||||||||||
| output_dir=output_dir, | ||||||||||
| datasets=datasets, | ||||||||||
| participants=participants, | ||||||||||
| dataset_types=dataset_types, | ||||||||||
| datatypes=datatypes, | ||||||||||
| space="not_used_for_raw", | ||||||||||
| task="*", | ||||||||||
| ) | ||||||||||
|
|
||||||||||
| assert ( | ||||||||||
| output_dir / "study-ds000001" / "bids" / "sub-01" / "anat" / "sub-03_T1w.nii.gzz" | ||||||||||
| ).exists() | ||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion (testing): Test It would be beneficial to include a test case that validates the behavior when the expected output files do not exist after the
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue (testing): The assertion in The file path in the assertion ( |
||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
suggestion (code_clarification): Consider documenting the
copy_filesfunction.