Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 0 additions & 19 deletions cohort_creator/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,25 +120,6 @@ def filter_excluded_participants(pth: Path, participants: list[str] | None) -> N
participants_df.to_csv(participants_tsv, sep="\t", index=False)


def copy_top_files(src_pth: Path, target_pth: Path, datatypes: list[str]) -> None:
"""Copy top files from BIDS src_pth to BIDS target_pth."""
top_files = ["dataset_description.json", "participants.*", "README*"]
if "func" in datatypes:
top_files.extend(["*task-*_events.tsv", "*task-*_events.json", "*task-*_bold.json"])
if "anat" in datatypes:
top_files.append("*T1w.json")

for top_file_ in top_files:
for f in src_pth.glob(top_file_):
if (target_pth / f.name).exists():
cc_log.debug(f" file already present:\n '{(target_pth / f.name)}'")
continue
try:
shutil.copy(src=f, dst=target_pth, follow_symlinks=True)
except FileNotFoundError:
cc_log.error(f" Could not find file '{f}'")


def check_tsv_content(tsv_file: Path | str) -> pd.DataFrame:
tsv_file = Path(tsv_file).resolve()
if not tsv_file.exists():
Expand Down
48 changes: 48 additions & 0 deletions cohort_creator/copy_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Module to handle copying data out of source datalad datasets."""

from __future__ import annotations

import shutil
from pathlib import Path

import pandas as pd

from cohort_creator.logger import cc_logger

# from datalad import api
# from datalad.support.exceptions import IncompleteResultsError


cc_log = cc_logger()


def copy_files(
output_dir: Path,
datasets: pd.DataFrame,
participants: pd.DataFrame | None,
dataset_types: list[str],
datatypes: list[str],
task: str,
space: str,
bids_filter: None | dict[str, dict[str, dict[str, str]]] = None,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (code_clarification): Consider documenting the copy_files function.

):
pass
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (code_refinement): Implementation of copy_files is pending; ensure it's completed.



def copy_top_files(src_pth: Path, target_pth: Path, datatypes: list[str]) -> None:
"""Copy top files from BIDS src_pth to BIDS target_pth."""
top_files = ["dataset_description.json", "participants.*", "README*"]
if "func" in datatypes:
top_files.extend(["*task-*_events.tsv", "*task-*_events.json", "*task-*_bold.json"])
if "anat" in datatypes:
top_files.append("*T1w.json")

for top_file_ in top_files:
for f in src_pth.glob(top_file_):
if (target_pth / f.name).exists():
cc_log.debug(f" file already present:\n '{(target_pth / f.name)}'")
continue
try:
shutil.copy(src=f, dst=target_pth, follow_symlinks=True)
except FileNotFoundError:
cc_log.error(f" Could not find file '{f}'")
2 changes: 1 addition & 1 deletion cohort_creator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

from cohort_creator._utils import (
add_study_tsv,
copy_top_files,
create_ds_description,
create_tsv_participant_session_in_datasets,
dataset_path,
Expand All @@ -42,6 +41,7 @@
sourcedata,
)
from cohort_creator.bagelify import bagelify, new_bagel
from cohort_creator.copy_files import copy_top_files
from cohort_creator.data.utils import is_known_dataset
from cohort_creator.logger import cc_logger

Expand Down
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import pytest
from datalad import api

from cohort_creator._cli import create_yoda


def root_dir():
return Path(__file__).parent.parent
Expand All @@ -19,6 +21,12 @@ def bids_examples():
return path_test_data() / "bids-examples"


@pytest.fixture
def output_dir(tmp_path):
create_yoda(output_dir=tmp_path)
return tmp_path


@pytest.fixture
def install_dataset():
def _install_dataset(dataset_name: str):
Expand Down
8 changes: 0 additions & 8 deletions tests/test_cohort_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,11 @@
from __future__ import annotations

import pandas as pd
import pytest

from cohort_creator._cli import create_yoda
from cohort_creator._utils import sourcedata
from cohort_creator.main import construct_cohort, get_data, install_datasets


@pytest.fixture
def output_dir(tmp_path):
create_yoda(output_dir=tmp_path)
return tmp_path


def test_install_datasets(output_dir, caplog):
install_datasets(
datasets=["ds000001", "foo"],
Expand Down
47 changes: 47 additions & 0 deletions tests/test_copy_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Utilities."""

from __future__ import annotations

import pandas as pd

from cohort_creator.copy_files import copy_files
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Consider adding a test for copy_top_files function.

The test_copy_top_files function currently does not assert any outcomes or behaviors. Adding assertions to verify that the expected top files are copied correctly would enhance the test's effectiveness and coverage.

from cohort_creator.main import install_datasets


def test_copy_top_files(output_dir):
dataset_types = ["raw"]
# datatypes = ["anat"]
install_datasets(
datasets=["ds000001", "foo"], output_dir=output_dir, dataset_types=dataset_types
)
# copy_top_files(src_pth=src_pth, target_pth=target_pth, datatypes=datatypes)


def test_copy_files(output_dir):
participants = pd.DataFrame(
{"DatasetID": ["ds000001"], "SubjectID": ["sub-01"], "SessionID": [""]}
)
datasets = pd.DataFrame(
{
"DatasetID": ["ds000001"],
"PortalURI": ["https://github.com/OpenNeuroDatasets-JSONLD/ds000001.git"],
}
)
dataset_types = ["raw"]
datatypes = ["anat"]
install_datasets(
datasets=["ds000001", "foo"], output_dir=output_dir, dataset_types=dataset_types
)
copy_files(
output_dir=output_dir,
datasets=datasets,
participants=participants,
dataset_types=dataset_types,
datatypes=datatypes,
space="not_used_for_raw",
task="*",
)

assert (
output_dir / "study-ds000001" / "bids" / "sub-01" / "anat" / "sub-03_T1w.nii.gzz"
).exists()
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Test test_copy_files lacks validation for non-existent files.

It would be beneficial to include a test case that validates the behavior when the expected output files do not exist after the copy_files operation. This could help in ensuring the function's robustness in handling errors or unexpected conditions.

Suggested change
).exists()
assert not (
output_dir / "study-ds000001" / "bids" / "sub-01" / "anat" / "sub-03_T1w.nii.gzz"
).exists()

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (testing): The assertion in test_copy_files may reference an incorrect file path.

The file path in the assertion (sub-03_T1w.nii.gzz) does not match the participant ID (sub-01) used in the test setup. This discrepancy could lead to false positives or negatives in test outcomes. Please verify the intended file path and participant ID.