diff --git a/.gitignore b/.gitignore index 7c01937..9073dc3 100644 --- a/.gitignore +++ b/.gitignore @@ -125,3 +125,7 @@ dmypy.json .pyre/ .vscode/ + +# Test Data +physutils/tests/data/bids-dir +tmp.* diff --git a/physutils/io.py b/physutils/io.py index b6d460c..76a7f75 100644 --- a/physutils/io.py +++ b/physutils/io.py @@ -5,9 +5,11 @@ import importlib import json +import os import os.path as op import numpy as np +from bids import BIDSLayout from loguru import logger from physutils import physio @@ -15,13 +17,131 @@ EXPECTED = ["data", "fs", "history", "metadata"] +def load_from_bids( + bids_path, + subject, + session=None, + task=None, + run=None, + recording=None, + extension="tsv.gz", + suffix="physio", +): + """ + Load physiological data from BIDS-formatted directory + + Parameters + ---------- + bids_path : str + Path to BIDS-formatted directory + subject : str + Subject identifier + session : str + Session identifier + task : str + Task identifier + run : str + Run identifier + suffix : str + Suffix of file to load + + Returns + ------- + data : :class:`physutils.Physio` + Loaded physiological data + """ + + # check if file exists and is in BIDS format + if not op.exists(bids_path): + raise FileNotFoundError(f"Provided path {bids_path} does not exist") + + layout = BIDSLayout(bids_path, validate=False) + bids_file = layout.get( + subject=subject, + session=session, + task=task, + run=run, + suffix=suffix, + extension=extension, + recording=recording, + ) + logger.debug(f"BIDS file found: {bids_file}") + if len(bids_file) == 0: + raise FileNotFoundError( + f"No files found for subject {subject}, session {session}, task {task}, run {run}, recording {recording}" + ) + if len(bids_file) > 1: + raise ValueError( + f"Multiple files found for subject {subject}, session {session}, task {task}, run {run}, recording {recording}" + ) + + config_file = bids_file[0].get_metadata() + fs = config_file["SamplingFrequency"] + t_start = config_file["StartTime"] if "StartTime" in config_file else 0 + columns = config_file["Columns"] + logger.debug(f"Loaded structure contains columns: {columns}") + + physio_objects = {} + data = np.loadtxt(bids_file[0].path) + + if "time" in columns: + idx_0 = np.argmax(data[:, columns.index("time")] >= t_start) + else: + idx_0 = 0 + logger.warning( + "No time column found in file. Assuming data starts at the beginning of the file" + ) + + for col in columns: + col_physio_type = None + if any([x in col.lower() for x in ["cardiac", "ppg", "ecg", "card", "pulse"]]): + col_physio_type = "cardiac" + elif any( + [ + x in col.lower() + for x in ["respiratory", "rsp", "resp", "breath", "co2", "o2"] + ] + ): + col_physio_type = "respiratory" + elif any([x in col.lower() for x in ["trigger", "tr"]]): + col_physio_type = "trigger" + elif any([x in col.lower() for x in ["time"]]): + continue + else: + logger.warning( + f"Column {col}'s type cannot be determined. Additional features may be missing." + ) + + if col_physio_type in ["cardiac", "respiratory"]: + physio_objects[col] = physio.Physio( + data[idx_0:, columns.index(col)], + fs=fs, + history=[physio._get_call(exclude=[])], + ) + physio_objects[col]._physio_type = col_physio_type + physio_objects[col]._label = ( + bids_file[0].filename.split(".")[0].replace("_physio", "") + ) + + if col_physio_type == "trigger": + # TODO: Implement trigger loading using the MRI data object + logger.warning("MRI trigger characteristics extraction not yet implemented") + physio_objects[col] = physio.Physio( + data[idx_0:, columns.index(col)], + fs=fs, + history=[physio._get_call(exclude=[])], + ) + + return physio_objects + + def load_physio(data, *, fs=None, dtype=None, history=None, allow_pickle=False): """ Returns `Physio` object with provided data Parameters ---------- - data : str or array_like or Physio_like + data : str, os.path.PathLike or array_like or Physio_like Input physiological data. If array_like, should be one-dimensional fs : float, optional Sampling rate of `data`. Default: None @@ -46,7 +166,7 @@ def load_physio(data, *, fs=None, dtype=None, history=None, allow_pickle=False): # first check if the file was made with `save_physio`; otherwise, try to # load it as a plain text file and instantiate a history - if isinstance(data, str): + if isinstance(data, str) or isinstance(data, os.PathLike): try: inp = dict(np.load(data, allow_pickle=allow_pickle)) for attr in EXPECTED: diff --git a/physutils/physio.py b/physutils/physio.py index a0dc577..8c9ce8f 100644 --- a/physutils/physio.py +++ b/physutils/physio.py @@ -220,10 +220,12 @@ def new_physio_like( if suppdata is None: suppdata = ref_physio._suppdata if copy_suppdata else None - + label = ref_physio.label if copy_label else None physio_type = ref_physio.physio_type if copy_physio_type else None - computed_metrics = list(ref_physio.computed_metrics) if copy_computed_metrics else [] + computed_metrics = ( + dict(ref_physio.computed_metrics) if copy_computed_metrics else {} + ) # make new class out = ref_physio.__class__( @@ -340,7 +342,7 @@ def __init__( reject=np.empty(0, dtype=int), ) self._suppdata = None if suppdata is None else np.asarray(suppdata).squeeze() - self._computed_metrics = [] + self._computed_metrics = dict() def __array__(self): return self.data @@ -542,3 +544,49 @@ def neurokit2phys( metadata = dict(peaks=peaks) return cls(data, fs=fs, metadata=metadata, **kwargs) + + +class MRIConfig: + """ + Class to hold MRI configuration information + + Parameters + ---------- + slice_timings : 1D array_like + Slice timings in seconds + n_scans : int + Number of volumes in the MRI scan + tr : float + Repetition time in seconds + """ + + def __init__(self, slice_timings=None, n_scans=None, tr=None): + if np.ndim(slice_timings) > 1: + raise ValueError("Slice timings must be a 1-dimensional array.") + + self._slice_timings = np.asarray(slice_timings) + self._n_scans = int(n_scans) + self._tr = float(tr) + logger.debug(f"Initializing new MRIConfig object: {self}") + + def __str__(self): + return "{name}(n_scans={n_scans}, tr={tr})".format( + name=self.__class__.__name__, + n_scans=self._n_scans, + tr=self._tr, + ) + + @property + def slice_timings(self): + """Slice timings in seconds""" + return self._slice_timings + + @property + def n_scans(self): + """Number of volumes in the MRI scan""" + return self._n_scans + + @property + def tr(self): + """Repetition time in seconds""" + return self._tr diff --git a/physutils/tests/test_io.py b/physutils/tests/test_io.py index b420243..b64078a 100644 --- a/physutils/tests/test_io.py +++ b/physutils/tests/test_io.py @@ -7,7 +7,11 @@ import pytest from physutils import io, physio -from physutils.tests.utils import filter_physio, get_test_data_path +from physutils.tests.utils import ( + create_random_bids_structure, + filter_physio, + get_test_data_path, +) def test_load_physio(caplog): @@ -46,6 +50,43 @@ def test_load_physio(caplog): io.load_physio([1, 2, 3]) +def test_load_from_bids(): + create_random_bids_structure("physutils/tests/data", recording_id="cardiac") + phys_array = io.load_from_bids( + "physutils/tests/data/bids-dir", + subject="01", + session="01", + task="rest", + run="01", + recording="cardiac", + ) + + for col in phys_array.keys(): + assert isinstance(phys_array[col], physio.Physio) + # The data saved are the ones after t_0 = -3s + assert phys_array[col].data.size == 80000 + assert phys_array[col].fs == 10000.0 + assert phys_array[col].history[0][0] == "physutils.io.load_from_bids" + + +def test_load_from_bids_no_rec(): + create_random_bids_structure("physutils/tests/data") + phys_array = io.load_from_bids( + "physutils/tests/data/bids-dir", + subject="01", + session="01", + task="rest", + run="01", + ) + + for col in phys_array.keys(): + assert isinstance(phys_array[col], physio.Physio) + # The data saved are the ones after t_0 = -3s + assert phys_array[col].data.size == 80000 + assert phys_array[col].fs == 10000.0 + assert phys_array[col].history[0][0] == "physutils.io.load_from_bids" + + def test_save_physio(tmpdir): pckl = io.load_physio(get_test_data_path("ECG.phys"), allow_pickle=True) out = io.save_physio(tmpdir.join("tmp").purebasename, pckl) diff --git a/physutils/tests/utils.py b/physutils/tests/utils.py index 7a6998f..fe9c30f 100644 --- a/physutils/tests/utils.py +++ b/physutils/tests/utils.py @@ -2,9 +2,12 @@ Utilities for testing """ +import json +from os import makedirs from os.path import join as pjoin import numpy as np +import pandas as pd from pkg_resources import resource_filename from scipy import signal @@ -77,3 +80,91 @@ def filter_physio(data, cutoffs, method, *, order=3): filtered = physio.new_physio_like(data, signal.filtfilt(b, a, data)) return filtered + + +def create_random_bids_structure(data_dir, recording_id=None): + + dataset_description = { + "Name": "Example BIDS Dataset", + "BIDSVersion": "1.7.0", + "License": "", + "Authors": ["Author1", "Author2"], + "Acknowledgements": "", + "HowToAcknowledge": "", + "Funding": "", + "ReferencesAndLinks": "", + "DatasetDOI": "", + } + + physio_json = { + "SamplingFrequency": 10000.0, + "StartTime": -3, + "Columns": [ + "time", + "respiratory_chest", + "trigger", + "cardiac", + "respiratory_CO2", + "respiratory_O2", + ], + } + + # Create BIDS structure directory + subject_id = "01" + session_id = "01" + task_id = "rest" + run_id = "01" + recording_id = recording_id + + bids_dir = pjoin( + data_dir, "bids-dir", f"sub-{subject_id}", f"ses-{session_id}", "func" + ) + makedirs(bids_dir, exist_ok=True) + + # Create dataset_description.json + with open(pjoin(data_dir, "bids-dir", "dataset_description.json"), "w") as f: + json.dump(dataset_description, f, indent=4) + + if recording_id is not None: + filename_body = f"sub-{subject_id}_ses-{session_id}_task-{task_id}_run-{run_id}_recording-{recording_id}" + else: + filename_body = f"sub-{subject_id}_ses-{session_id}_task-{task_id}_run-{run_id}" + + # Create physio.json + with open( + pjoin( + bids_dir, + f"{filename_body}_physio.json", + ), + "w", + ) as f: + json.dump(physio_json, f, indent=4) + + # Initialize tsv file with random data columns and a time column + num_rows = 100000 + num_cols = 6 + time_offset = 2 + time = ( + np.arange(num_rows) / physio_json["SamplingFrequency"] + + physio_json["StartTime"] + - time_offset + ) + data = np.column_stack((time, np.random.rand(num_rows, num_cols - 1).round(8))) + df = pd.DataFrame(data) + + # Compress dataframe into tsv.gz + tsv_gz_file = pjoin( + bids_dir, + f"{filename_body}_physio.tsv.gz", + ) + + df.to_csv( + tsv_gz_file, + sep="\t", + index=False, + header=False, + float_format="%.8e", + compression="gzip", + ) + + return bids_dir diff --git a/setup.cfg b/setup.cfg index df85b76..05d8cb3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -21,10 +21,11 @@ provides = [options] python_requires = >=3.6.1 install_requires = - matplotlib >=3.9 + matplotlib numpy >=1.9.3 scipy loguru + pybids tests_require = pytest >=3.6 test_suite = pytest