Skip to content

Commit

Permalink
Fix run_id for BIDS dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
sinhaharsh committed Feb 9, 2024
1 parent 435fc76 commit eae4be7
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 8 deletions.
26 changes: 23 additions & 3 deletions MRdataset/bids.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from abc import ABC
from pathlib import Path

from protocol import BidsImagingSequence
from re import search

from MRdataset import logger
from MRdataset.base import BaseDataset
from MRdataset.config import VALID_BIDS_DATATYPES
from MRdataset.dicom_utils import is_bids_file
from MRdataset.utils import folders_with_min_files, valid_dirs, read_json
from protocol import BidsImagingSequence


class BidsDataset(BaseDataset, ABC):
Expand Down Expand Up @@ -100,6 +100,7 @@ def _process(self, folder):
"""Processes the folder and returns a list of sequences."""
json_files = self._filter_json_files(folder)
sequences = []
last_id = 0
for i, file in enumerate(json_files):
try:
seq = BidsImagingSequence(bidsfile=file, path=folder)
Expand All @@ -121,11 +122,30 @@ def _process(self, folder):

# None of the datasets we processed (over 20) had run information,
# even though BIDS allows it. So we just use run-0x for all of them.
run_id = f'run-{str(i + 1).zfill(2)}'
run_id, last_id = self.get_run_id(file, last_id)
seq.set_session_info(subject_id=subject_id,
session_id=session_id,
run_id=run_id,
name=name)
if seq.is_valid():
sequences.append(seq)
return sequences

@staticmethod
def get_run_id(filename, last_id):
"""
Use regex to extract run id from filename.
Example filename : sub-01_ses-imagery01_task-imagery_run-01_bold.json
"""
# Regular expression pattern
pattern = r'run-\d+'
# Extracting substring using regex
match = search(pattern, str(filename))

if match:
run_id = match.group(0)
new_id_num = int(run_id.split('-')[-1])
else:
new_id_num = last_id + 1
run_id = f'run-{str(new_id_num).zfill(2)}'
return run_id, new_id_num
10 changes: 9 additions & 1 deletion MRdataset/dicom_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
""" Utility functions for dicom files """
import warnings
from pathlib import Path
from re import search
from typing import Union

import dicom2nifti
import pydicom

from MRdataset import logger

with warnings.catch_warnings():
Expand All @@ -29,6 +29,14 @@ def is_bids_file(filename: Union[str, Path]):
# TODO: Add some criteria to skip certain files
if 'derivatives' in str(filename):
return False

# Regular expression pattern
pattern = r'sub-\d+'
# Extracting substring using regex
match = search(pattern, str(filename))
if not match:
return False

return True


Expand Down
8 changes: 4 additions & 4 deletions MRdataset/tests/simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pathlib import Path

import pydicom

from MRdataset.dicom_utils import is_bids_file
from MRdataset.tests.config import compl_data_xnat
from MRdataset.utils import convert2ascii

Expand Down Expand Up @@ -100,16 +100,16 @@ def make_compliant_bids_dataset(num_subjects,
echo_train_length,
flip_angle) -> Path:
src_dir, dest_dir = setup_directories(sample_bids_dataset())
json_list = list(src_dir.glob('**/*.json'))
json_list = filter(is_bids_file, src_dir.glob('**/*.json'))
subject_names = set()
i = -1

while len(subject_names) < num_subjects:
i += 1

try:
filepath = json_list[i]
except IndexError:
filepath = next(json_list)
except StopIteration:
break

try:
Expand Down

0 comments on commit eae4be7

Please sign in to comment.