diff --git a/.github/workflows/codacy.yml b/.github/workflows/codacy.yml index 67dfc76..ce958e9 100644 --- a/.github/workflows/codacy.yml +++ b/.github/workflows/codacy.yml @@ -30,7 +30,7 @@ jobs: permissions: contents: read # for actions/checkout to fetch code security-events: write # for github/codeql-action/upload-sarif to upload SARIF results - actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status + actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status name: Codacy Security Scan runs-on: ubuntu-latest steps: diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index 08894d5..ced3824 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -32,10 +32,28 @@ jobs: pip install . - name: Lint with flake8 run: | + make lint # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest run: | pytest +# - name: Coverage +# run: | +# coverage run --rcfile=.coveragerc --source mrQA -m pytest +# coverage report -m +# coverage xml +# - name: Run codacy-coverage-reporter +# uses: codacy/codacy-coverage-reporter-action@v1 +# with: +# # project-token: ${{ secrets.CODACY_PROJECT_TOKEN }} +# # or +# api-token: ${{ secrets.CODACY_API_TOKEN }} +# organization-provider: gh +# username: sinhaharsh +# project-name: mrQA +# coverage-reports: coverage.xml +# # or a comma-separated list for multiple reports +# # coverage-reports: , diff --git a/.gitignore b/.gitignore index 3e25d06..bb053a0 100644 --- a/.gitignore +++ b/.gitignore @@ -106,4 +106,8 @@ ENV/ /update_today.txt # codacy -results.sarif +*.sarif + +# mri protocol +*.xml +*.secrets diff --git a/Makefile b/Makefile index 6049484..76eb8a9 100644 --- a/Makefile +++ b/Makefile @@ -62,8 +62,12 @@ coverage: ## check code coverage quickly with the default Python coverage run --rcfile=.coveragerc --source mrQA -m pytest coverage report -m coverage html + coverage xml $(BROWSER) htmlcov/index.html +act: + act --secret-file .secrets + docs: ## generate Sphinx HTML documentation, including API docs $(MAKE) -C docs clean $(MAKE) -C docs html @@ -82,3 +86,11 @@ dist: clean ## builds source and wheel package install: clean ## install the package to the active Python's site-packages python setup.py install + +merge: + git switch mrds-issue-12 + git push + git switch master + git merge mrds-issue-12 + git push origin master + git switch mrds-issue-12 diff --git a/README.rst b/README.rst index 5053413..9d4bf4a 100644 --- a/README.rst +++ b/README.rst @@ -7,8 +7,8 @@ mrQA : automatic protocol compliance checks on MR datasets .. image:: https://app.codacy.com/project/badge/Grade/8cd263e1eaa0480d8fac50eba0094401 :target: https://app.codacy.com/gh/sinhaharsh/mrQA/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade -.. image:: https://github.com/sinhaharsh/mrQA/actions/workflows/continuos-integration.yml/badge.svg - :target: https://github.com/sinhaharsh/mrQA/actions/workflows/continuos-integration.yml +.. image:: https://github.com/sinhaharsh/mrQA/actions/workflows/continuous-integration.yml/badge.svg + :target: https://github.com/sinhaharsh/mrQA/actions/workflows/continuous-integration.yml .. image:: https://raw.githubusercontent.com/jupyter/design/master/logos/Badges/nbviewer_badge.svg diff --git a/mrQA/base.py b/mrQA/base.py index 9174011..9151a24 100644 --- a/mrQA/base.py +++ b/mrQA/base.py @@ -1,6 +1,7 @@ import json import tempfile from abc import ABC, abstractmethod +from datetime import timedelta from typing import List from MRdataset import valid_dirs @@ -213,8 +214,28 @@ def add_sequence_pair_names(self, list_seqs): """ self._vt_sequences.add(list_seqs) + def _is_scanned_before(self, date, seq): + # Provide an option to include those subjects that were + # scanned after the given date + content_date = seq['ContentDate'].get_value() + # Suppose date for report generation is 2023-11-21 01:00:00 am + # However content date doesn't have time information, so it is + # 2023-11-21 00:00:00 am. Now, if we compare the two dates, date for + # report generation will always be greater than content date, + # even though the scan could have been performed on the same day. + # Hence, we add 1 day to content date, so that the two dates + # can be compared. + + # A better option is to use content time, but not all scanners + # provide content time. Hence, we use content date + 1 day. This means + # that the scan will be skipped only if it was performed at least + # 1 day before the date of report generation. + if date >= content_date + timedelta(days=1): + return True + return False + def generate_hz_log(self, parameters, suppl_params, filter_fn=None, - verbosity=1): + verbosity=1, date=None): sequences = self.get_sequence_ids() nc_log = {} for seq_id in sequences: @@ -224,25 +245,37 @@ def generate_hz_log(self, parameters, suppl_params, filter_fn=None, if param_name not in nc_log: # empty nc_log[param_name] = [] - nc_dict = {} - nc_dict['subject'] = sub - nc_dict['sequence_name'] = seq_id + if self._is_scanned_before(date, seq): + continue + nc_dict = self._populate_nc_dict(param_tuple=param_tupl, + sub=sub, path=path, + seq=seq, seq_ids=seq_id, + suppl_params=suppl_params, + verbosity=verbosity) + nc_log[param_name].append(nc_dict) + return nc_log - # if additional parameters have to be included in the log - if suppl_params: - for i in suppl_params: - nc_dict[i] = seq[i].get_value() + def _populate_nc_dict(self, param_tuple, seq_ids, sub, path, seq, + suppl_params, verbosity): - if verbosity > 1: - nc_dict['values'] = [p.get_value() for p in param_tupl] - if verbosity > 2: - nc_dict['path'] = str(path) + nc_dict = {} + nc_dict['date'] = str(seq['ContentDate'].get_value().date()) + nc_dict['subject'] = sub + nc_dict['sequence_name'] = seq_ids - nc_log[param_name].append(nc_dict) - return nc_log + # if additional parameters have to be included in the log + if suppl_params: + for i in suppl_params: + nc_dict[i] = seq[i].get_value() + + if verbosity > 1: + nc_dict['values'] = [p.get_value() for p in param_tuple] + if verbosity > 2: + nc_dict['path'] = str(path) + return nc_dict def generate_nc_log(self, parameters, filter_fn=None, output_dir=None, - suppl_params=None, audit='vt', verbosity=1): + suppl_params=None, audit='vt', verbosity=1, date=None): """ Generate a log of all non-compliant parameters in the dataset. Apart from returning the log, it also dumps the log as a json file @@ -250,11 +283,11 @@ def generate_nc_log(self, parameters, filter_fn=None, output_dir=None, nc_log = {} if audit == 'hz': nc_log = self.generate_hz_log(parameters, suppl_params, - filter_fn, verbosity) + filter_fn, verbosity, date=date) filename = self.name + '_hz_log.json' elif audit == 'vt': nc_log = self.generate_vt_log(parameters, suppl_params, - filter_fn, verbosity) + filter_fn, verbosity, date=date) filename = self.name + '_vt_log.json' if audit not in ['vt', 'hz']: raise ValueError('Expected one of [vt, hz], got {}'.format(audit)) @@ -267,7 +300,7 @@ def generate_nc_log(self, parameters, filter_fn=None, output_dir=None, return nc_log def generate_vt_log(self, parameters, suppl_params, filter_fn=None, - verbosity=1): + verbosity=1, date=None): nc_log = {} sequence_pairs = self.get_vt_sequences() @@ -276,22 +309,22 @@ def generate_vt_log(self, parameters, suppl_params, filter_fn=None, # want to highlight the issues in field-map and epi sequences. for pair in filter(filter_fn, sequence_pairs): for param_name in parameters: - for param_tupl, sub, path, seq in self.get_vt_param_values( - pair, param_name): + for param_tuple, sub, path, seq in self.get_vt_param_values( + pair, param_name): if param_name not in nc_log: # empty nc_log[param_name] = [] - nc_dict = {} - nc_dict['subject'] = sub - nc_dict['sequence_names'] = pair - - if verbosity > 1: - nc_dict['values'] = [p.get_value() for p in param_tupl] - if verbosity > 2: - nc_dict['path'] = str(path) + # Provide a date to include those subjects that were + # scanned after the given date + if self._is_scanned_before(date, seq): + continue + nc_dict = self._populate_nc_dict(param_tuple=param_tuple, + sub=sub, path=path, + seq=seq, seq_ids=pair, + suppl_params=suppl_params, + verbosity=verbosity) nc_log[param_name].append(nc_dict) - return nc_log def get_nc_param_ids(self, seq_id): diff --git a/mrQA/cli.py b/mrQA/cli.py index 7fe4f58..84602b7 100644 --- a/mrQA/cli.py +++ b/mrQA/cli.py @@ -8,7 +8,7 @@ from mrQA import check_compliance from mrQA import logger -from mrQA.config import PATH_CONFIG +from mrQA.config import PATH_CONFIG, THIS_DIR from mrQA.utils import is_writable @@ -27,7 +27,8 @@ def get_parser(): help='directory containing downloaded dataset with ' 'dicom files, supports nested hierarchies') required.add_argument('--config', type=str, - help='path to config file') + help='path to config file', + default=THIS_DIR / 'resources/mri-config.json') optional.add_argument('-o', '--output-dir', type=str, help='specify the directory where the report' ' would be saved. By default, the --data_source ' diff --git a/mrQA/config.py b/mrQA/config.py index 715fc22..f2b3073 100644 --- a/mrQA/config.py +++ b/mrQA/config.py @@ -6,6 +6,8 @@ from MRdataset.config import MRException from protocol import UnspecifiedType +THIS_DIR = Path(__file__).parent.resolve() + def configure_logger(log, output_dir, mode='w', level='WARNING'): """ @@ -34,14 +36,14 @@ def configure_logger(log, output_dir, mode='w', level='WARNING'): output_dir.mkdir(parents=True, exist_ok=True) options = { - "warn": { - 'level': logging.WARN, - 'file': output_dir / 'warn.log', + "warn" : { + 'level' : logging.WARN, + 'file' : output_dir / 'warn.log', 'formatter': warn_formatter }, "error": { - 'level': logging.ERROR, - 'file': output_dir / 'error.log', + 'level' : logging.ERROR, + 'file' : output_dir / 'error.log', 'formatter': error_formatter } } @@ -64,12 +66,13 @@ def configure_logger(log, output_dir, mode='w', level='WARNING'): PATH_CONFIG = { 'data_source': Path.home() / 'scan_data', - 'output_dir': Path.home() / 'mrqa_reports', + 'output_dir' : Path.home() / 'mrqa_reports', } DATE_SEPARATOR = '_DATE_' ATTRIBUTE_SEPARATOR = '_ATTR_' - +DATETIME_FORMAT = '%m_%d_%Y_%H_%M_%S' +DATE_FORMAT = '%m_%d_%Y' Unspecified = UnspecifiedType() @@ -78,9 +81,9 @@ def past_records_fpath(folder): return Path(folder / 'past_record.txt') -def status_fpath(folder): +def status_fpath(folder, audit): """Constructs the path to the status file""" - return Path(folder / 'non_compliance_log.txt') + return Path(folder / f'{audit}_non_compliance_log.txt') def report_fpath(folder_path, fname): @@ -105,6 +108,7 @@ def __init__(self, name): super().__init__( f"Could not compute majority for {name}") + # # class ReferenceNotSetForModality(MRException): # """Custom error that is raised when majority cannot be computed.""" diff --git a/mrQA/formatter.py b/mrQA/formatter.py index b811063..b106263 100644 --- a/mrQA/formatter.py +++ b/mrQA/formatter.py @@ -160,9 +160,9 @@ def collect_hz_audit_results(self, logger.error('Reference protocol is empty. Cannot generate' ' report for horizontal audit.') self.skip_hz_report = True - if not (compliant_ds.get_sequence_ids() or - non_compliant_ds.get_sequence_ids() or - undetermined_ds.get_sequence_ids()): + if not (compliant_ds.get_sequence_ids() + or non_compliant_ds.get_sequence_ids() + or undetermined_ds.get_sequence_ids()): logger.error('It seems the dataset has not been checked for ' 'horizontal audit. Skipping horizontal audit report') self.skip_hz_report = True @@ -214,8 +214,8 @@ def collect_vt_audit_results(self, logger.error('No sequences found in dataset. Cannot generate' 'report') self.skip_vt_report = True - if not (compliant_ds.get_sequence_ids() or - non_compliant_ds.get_sequence_ids()): + if not (compliant_ds.get_sequence_ids() + or non_compliant_ds.get_sequence_ids()): logger.error('It seems the dataset has not been checked for ' 'vertical audit. Skipping vertical audit report') self.skip_vt_report = True diff --git a/mrQA/monitor.py b/mrQA/monitor.py index 3e99aa5..e5e90e0 100644 --- a/mrQA/monitor.py +++ b/mrQA/monitor.py @@ -1,13 +1,14 @@ """Console script for mrQA.""" import argparse import sys +from datetime import datetime, timedelta from pathlib import Path from typing import Union, List from MRdataset import import_dataset, load_mr_dataset from mrQA import logger -from mrQA.config import PATH_CONFIG +from mrQA.config import PATH_CONFIG, THIS_DIR, DATETIME_FORMAT from mrQA.project import check_compliance from mrQA.utils import is_writable, folders_modified_since, \ get_last_valid_record, log_latest_non_compliance @@ -30,7 +31,8 @@ def get_parser(): help='directory containing downloaded dataset with ' 'dicom files, supports nested hierarchies') required.add_argument('--config', type=str, - help='path to config file') + help='path to config file', + default=THIS_DIR / 'resources/mri-config.json') optional.add_argument('-o', '--output-dir', type=str, help='specify the directory where the report' ' would be saved. By default, the --data_source ' @@ -120,7 +122,8 @@ def parse_args(): return args -def main(): +def cli(): + """Console script for mrQA monitor.""" args = parse_args() monitor(name=args.name, data_source=args.data_source, @@ -170,6 +173,8 @@ def monitor(name: str, """ output_dir = Path(output_dir) last_record = get_last_valid_record(output_dir) + last_reported_on = None + if last_record: last_reported_on, last_report_path, last_mrds_path = last_record # TODO: delete old logs, only keep latest 3-4 reports in the folder @@ -203,21 +208,32 @@ def monitor(name: str, output_dir=output_dir) new_dataset = None - compliance_summary_dict, _ = check_compliance(dataset=dataset, - output_dir=output_dir, - decimals=decimals, - verbose=verbose, - tolerance=tolerance, - reference_path=reference_path, - config_path=config_path) - - log_latest_non_compliance( - ncomp_data=compliance_summary_dict['non_compliant'], - latest_data=new_dataset, - output_dir=output_dir, ) + if last_reported_on is None: + # if this is the first time, set last_reported_on to 1 year ago + last_reported_on = datetime.now() - timedelta(days=365) + last_reported_on = last_reported_on.strftime(DATETIME_FORMAT) + + hz_audit_results, vt_audit_results = check_compliance( + dataset=dataset, + output_dir=output_dir, + decimals=decimals, + verbose=verbose, + tolerance=tolerance, + reference_path=reference_path, + config_path=config_path) + + log_latest_non_compliance(dataset=hz_audit_results['non_compliant'], + config_path=config_path, + output_dir=output_dir, audit='hz', + date=last_reported_on) + + log_latest_non_compliance(dataset=vt_audit_results['non_compliant'], + config_path=config_path, + output_dir=output_dir, audit='vt', + date=last_reported_on) return if __name__ == '__main__': - sys.exit(main()) # pragma: no cover + sys.exit(cli()) # pragma: no cover diff --git a/mrQA/parallel_utils.py b/mrQA/parallel_utils.py index c092f18..4f98e29 100644 --- a/mrQA/parallel_utils.py +++ b/mrQA/parallel_utils.py @@ -188,7 +188,7 @@ def _create_slurm_script(output_script_path: Union[str, Path], # Add flags to python command if verbose: python_cmd += ' --verbose' - python_cmd += ' --is_partial' + python_cmd += ' --is-partial' # Create the slurm script file with open(output_script_path, 'w', encoding='utf-8') as fp: diff --git a/mrQA/project.py b/mrQA/project.py index 4a4086e..e3e6e48 100644 --- a/mrQA/project.py +++ b/mrQA/project.py @@ -164,11 +164,11 @@ def horizontal_audit(dataset: BaseDataset, compliant_ds, non_compliant_ds, undetermined_ds = _init_datasets(dataset) eval_dict = { - 'complete_ds': dataset, - 'reference': ref_protocol, - 'compliant': compliant_ds, + 'complete_ds' : dataset, + 'reference' : ref_protocol, + 'compliant' : compliant_ds, 'non_compliant': non_compliant_ds, - 'undetermined': undetermined_ds, + 'undetermined' : undetermined_ds, } if not (ref_protocol and hz_audit_config): @@ -274,11 +274,11 @@ def vertical_audit(dataset: BaseDataset, report_type='vt') compliant_ds, non_compliant_ds, _ = _init_datasets(dataset) eval_dict = { - 'complete_ds': dataset, - 'compliant': compliant_ds, - 'non_compliant': non_compliant_ds, + 'complete_ds' : dataset, + 'compliant' : compliant_ds, + 'non_compliant' : non_compliant_ds, 'sequence_pairs': [], - 'parameters': [] + 'parameters' : [] } if not vt_audit_config: return eval_dict @@ -290,8 +290,8 @@ def vertical_audit(dataset: BaseDataset, # If no sequence pairs are provided, then compare all possible pairs if chosen_pairs is None: - logger.warn('No sequence pairs provided. Comparing all possible ' - 'sequence pairs.') + logger.warning('No sequence pairs provided. Comparing all possible ' + 'sequence pairs.') chosen_pairs = list(combinations(dataset.get_sequence_ids(), 2)) # check pair are queryable, all the pairs are not present # throw an error if any of the pair is not present @@ -344,11 +344,11 @@ def vertical_audit(dataset: BaseDataset, ) # TODO: add option for num_sequences > 2 eval_dict = { - 'complete_ds': dataset, - 'compliant': compliant_ds, - 'non_compliant': non_compliant_ds, + 'complete_ds' : dataset, + 'compliant' : compliant_ds, + 'non_compliant' : non_compliant_ds, 'sequence_pairs': used_pairs, - 'parameters': include_params + 'parameters' : include_params } return eval_dict diff --git a/mrQA/run_parallel.py b/mrQA/run_parallel.py index e92589e..6cd27c9 100644 --- a/mrQA/run_parallel.py +++ b/mrQA/run_parallel.py @@ -9,7 +9,7 @@ from mrQA import check_compliance from mrQA import logger -from mrQA.config import PATH_CONFIG +from mrQA.config import PATH_CONFIG, THIS_DIR from mrQA.parallel_utils import _check_args, _make_file_folders, \ _run_single_batch, _create_slurm_script, _get_num_workers, \ _get_terminal_folders @@ -36,7 +36,8 @@ def get_parser(): help='directory containing downloaded dataset with ' 'dicom files, supports nested hierarchies') required.add_argument('--config', type=str, - help='path to config file') + help='path to config file', + default=THIS_DIR / 'resources/mri-config.json') optional.add_argument('-o', '--output-dir', type=str, help='specify the directory where the report' ' would be saved. By default, the --data_source ' @@ -45,8 +46,8 @@ def get_parser(): help='specify the path to the output mrds file. ') optional.add_argument('-n', '--name', type=str, help='provide a identifier/name for the dataset') - optional.add_argument('-s', '--subjects-per-job', type=int, default=5, - help='number of subjects to process per job') + optional.add_argument('-j', '--job-size', type=int, default=5, + help='number of folders to process per job') optional.add_argument('-e', '--conda-env', type=str, default='mrcheck', help='name of conda environment to use') optional.add_argument('-c', '--conda-dist', type=str, default='anaconda3', @@ -83,7 +84,7 @@ def cli(): output_dir=args.output_dir, out_mrds_path=args.out_mrds_path, name=args.name, - subjects_per_job=args.subjects_per_job, + job_size=args.job_size, conda_env=args.conda_env, conda_dist=args.conda_dist, config_path=args.config, @@ -145,28 +146,28 @@ def process_parallel(data_source: Union[str, Path], output_dir: Union[str, Path], out_mrds_path: Union[str, Path], name: str = None, - subjects_per_job: int = 5, + job_size: int = 5, conda_env: str = 'mrcheck', conda_dist: str = 'anaconda3', config_path: Union[str, Path] = None, hpc: bool = False): """ Given a folder(or List[folder]) it will divide the work into smaller - jobs. Each job will contain a fixed number of subjects. These jobs can be + jobs. Each job will contain a fixed number of folders. These jobs can be executed in parallel to save time. Parameters ---------- data_source: str | Path - Valid path to the folder containing the subject folders + Valid path to the folder containing the multiple folders output_dir: str | Path Valid path to the folder where the output will be saved out_mrds_path: str | Path Valid path to the final output .mrds.pkl file name: str Name of the final output file - subjects_per_job: int - Number of subjects to be processed in each job + job_size: int + Number of folders to be processed in each job conda_env: str Name of the conda environment to be used conda_dist: str @@ -184,7 +185,7 @@ def process_parallel(data_source: Union[str, Path], debug=False, config_path=config_path, data_source=data_source, - folders_per_job=subjects_per_job, + folders_per_job=job_size, conda_env=conda_env, conda_dist=conda_dist, output_dir=output_dir, @@ -213,7 +214,7 @@ def submit_job(scripts_list_filepath: Union[str, Path], hpc: bool = False) -> None: """ Given a folder(or List[folder]) it will divide the work into smaller - jobs. Each job will contain a fixed number of subjects. These jobs can be + jobs. Each job will contain a fixed number of folders. These jobs can be executed in parallel to save time. Parameters @@ -250,7 +251,7 @@ def create_script(data_source: Union[str, Path, Iterable] = None, config_path: Union[str, Path] = None): """ Given a folder(or List[folder]) it will divide the work into smaller - jobs. Each job will contain a fixed number of subjects. These jobs can be + jobs. Each job will contain a fixed number of folders. These jobs can be executed in parallel to save time. Parameters @@ -266,7 +267,7 @@ def create_script(data_source: Union[str, Path, Iterable] = None, debug: bool If True, the dataset will be created locally. This is useful for testing folders_per_job: int - Number of subjects per job. Recommended value is 50 or 100 + Number of folders per job. Recommended value is 50 or 100 hpc: bool If True, the scripts will be generated for HPC, not for local execution conda_dist: str @@ -294,11 +295,11 @@ def create_script(data_source: Union[str, Path, Iterable] = None, scripts_path_list = [] mrds_path_list = [] - # create a slurm job script for each sub_group of subject ids + # create a slurm job script for each sub_group of folders for fnames_filepath in fnames_path_list: # Filename of the bash script should be same as text file. - # Say batch0000.txt points to set of 10 subjects. Then create a - # slurm script file batch0000.sh which will run for these 10 subjects, + # Say batch0000.txt points to set of 10 folders. Then create a + # slurm script file batch0000.sh which will run for these 10 folders, # and the final partial mrds pickle file will have the name # batch0000.mrds.pkl script_filename = fnames_filepath.stem + '.sh' @@ -333,9 +334,9 @@ def split_folders_list(data_source: Union[str, Path], output_dir: Union[str, Path], folders_per_job: int = 50): """ - Splits a given set of subjects into multiple jobs and creates separate - text files containing the list of subjects. Each text file - contains the list of subjects to be processed in a single job. + Splits a given set of folders into multiple jobs and creates separate + text files containing the list of folders. Each text file + contains the list of folders to be processed in a single job. Parameters ---------- @@ -345,37 +346,37 @@ def split_folders_list(data_source: Union[str, Path], Path to the output directory per_batch_ids : Union[str, Path] filepath to a file which has paths to all txt files for all jobs. - Each of these txt files contains a list of subject ids for + Each of these txt files contains a list of folder ids for corresponding job. output_dir : Union[str, Path] Name of the output directory folders_per_job : int - Number of subjects to process in each job + Number of folders to process in each job Returns ------- batch_ids_path_list : Sized - Paths to the text files, each containing a list of subjects + Paths to the text files, each containing a list of folders """ all_fnames_path = Path(all_fnames_path) # List of paths to the txt files, - # each containing the list of subjects per job + # each containing the list of folders per job batch_fnames_path_list = [] folder_list = _get_terminal_folders(data_source, all_fnames_path) - # Get the list of subjects for each job + # Get the list of folders for each job workers = _get_num_workers(folders_per_job, folder_list) folder_subsets = split_list(folder_list, num_chunks=workers) # Create a text file for each job for i, subset in enumerate(folder_subsets): - # Create a text file containing the list of subjects for each job + # Create a text file containing the list of folders for each job batch_filepath = output_dir / f'batch{i:04}.txt' # Store to the path given to the text file list2txt(batch_filepath, subset) # Add the path to the text file ( containing the - # list of subjects for each job) to a list, return the list + # list of folders for each job) to a list, return the list batch_fnames_path_list.append(batch_filepath) list2txt(fpath=per_batch_ids, list_=batch_fnames_path_list) return batch_fnames_path_list diff --git a/mrQA/run_subset.py b/mrQA/run_subset.py index b96ad0a..72cd252 100644 --- a/mrQA/run_subset.py +++ b/mrQA/run_subset.py @@ -8,10 +8,23 @@ from MRdataset import import_dataset, save_mr_dataset, BaseDataset from mrQA import logger +from mrQA.config import THIS_DIR from mrQA.utils import txt2list -def cli(): +def parse_args(): + parser = get_parser() + args = parser.parse_args() + + if args.verbose: + logger.setLevel('WARNING') + else: + logger.setLevel('ERROR') + + return args + + +def get_parser(): """Console script for mrQA.""" parser = argparse.ArgumentParser( description='Protocol Compliance of MRI scans', @@ -21,36 +34,37 @@ def cli(): required = parser.add_argument_group('required arguments') optional = parser.add_argument_group('optional arguments') - required.add_argument('-o', '--output_path', type=str, + required.add_argument('-o', '--output-path', type=str, required=True, help='complete path to pickle file for storing ' 'partial dataset') - required.add_argument('-b', '--batch_ids_file', type=str, + required.add_argument('-b', '--batch-ids-file', type=str, required=True, help='text file path specifying the folders to read') optional.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help='show this help message and exit') - optional.add_argument('--is_partial', action='store_true', + optional.add_argument('--is-partial', action='store_true', help='flag dataset as a partial dataset') # TODO: use this flag to store cache optional.add_argument('-v', '--verbose', action='store_true', help='allow verbose output on console') required.add_argument('--config', type=str, - help='path to config file') + help='path to config file', + default=THIS_DIR / 'resources/mri-config.json') if len(sys.argv) < 2: logger.critical('Too few arguments!') parser.print_help() parser.exit(1) - args = parser.parse_args() - output_path = Path(args.output_path).resolve() + return parser - if args.verbose: - logger.setLevel('WARNING') - else: - logger.setLevel('ERROR') + +def cli(): + """Console script for mrQA subset.""" + args = parse_args() + output_path = Path(args.output_path).resolve() if not output_path.exists(): partial_dataset = read_subset(output_dir=Path(args.output_path).parent, diff --git a/examples/check_status.py b/mrQA/tests/check_status.py similarity index 69% rename from examples/check_status.py rename to mrQA/tests/check_status.py index 59c71a7..45df14a 100644 --- a/examples/check_status.py +++ b/mrQA/tests/check_status.py @@ -1,14 +1,21 @@ -from pathlib import Path -from mrQA import monitor import tempfile -import shutil +from pathlib import Path -from mrQA.tests.test_utils import copy2dest +from mrQA import monitor +from mrQA.tests.conftest import THIS_DIR +from mrQA.tests.simulate import copy2dest -def run(folder_path): +# @settings(max_examples=10, deadline=None) +# @given(args=dcm_dataset_strategy) +def run(folder_path): # args): + # ds1, attributes = args + # assume(attributes['num_subjects'] > 4) + # folder_path = attributes['fake_ds_dir'] folder_path = Path(folder_path).resolve() - config_path = Path('./mri-config.json').resolve() + # config_path = attributes['config_path'] + config_path = THIS_DIR / 'resources/mri-config.json' + # make a temporary output folder using tempfile with tempfile.TemporaryDirectory() as tmpdirname: output_dir = Path(tmpdirname) / 'output' @@ -28,9 +35,8 @@ def run(folder_path): decimals=2, config_path=config_path, verbose=False, - reference_path='./wpc-6106.xml' ) - copy2dest(output_dir, tmpdirname, '/tmp') + # copy2dest(output_dir, tmpdirname, '/tmp') print('simulation-over') diff --git a/mrQA/tests/conftest.py b/mrQA/tests/conftest.py index bc70301..3772bc2 100644 --- a/mrQA/tests/conftest.py +++ b/mrQA/tests/conftest.py @@ -7,6 +7,7 @@ from hypothesis.strategies import SearchStrategy from mrQA.tests.simulate import make_compliant_test_dataset +from mrQA.tests.utils import download param_strategy: tp.Final[SearchStrategy[Tuple]] = st.tuples( st.text(min_size=1, max_size=10), @@ -21,6 +22,19 @@ THIS_DIR = Path(__file__).parent.resolve() +def sample_protocol(): + """Download a sample protocol from GitHub""" + # Using an example XML file from the following GitHub repository + # https://github.com/lrq3000/mri_protocol + url = 'https://raw.githubusercontent.com/lrq3000/mri_protocol/master/SiemensVidaProtocol/Coma%20Science%20Group.xml' # noqa + filename = THIS_DIR / 'coma_science.xml' + xml_file = Path(filename) + + if not xml_file.is_file(): + download(url, filename) + return filename + + @st.composite def create_dataset(draw_from: st.DrawFn) -> Tuple: name, num_subjects, repetition_time, echo_train_length, flip_angle = draw_from(param_strategy) @@ -31,6 +45,7 @@ def create_dataset(draw_from: st.DrawFn) -> Tuple: ds = DicomDataset(name=name, data_source=fake_ds_dir, config_path=THIS_DIR / 'resources/mri-config.json') + ref_protocol_path = sample_protocol() attributes = { 'name': name, 'num_subjects': num_subjects, @@ -38,7 +53,8 @@ def create_dataset(draw_from: st.DrawFn) -> Tuple: 'echo_train_length': echo_train_length, 'flip_angle': flip_angle, 'fake_ds_dir': fake_ds_dir, - 'config_path': THIS_DIR / 'resources/mri-config.json' + 'config_path': THIS_DIR / 'resources/mri-config.json', + 'ref_protocol_path': ref_protocol_path, } return ds, attributes diff --git a/mrQA/tests/resources/invalid-json.json b/mrQA/tests/resources/invalid-json.json new file mode 100644 index 0000000..e6fc2ad --- /dev/null +++ b/mrQA/tests/resources/invalid-json.json @@ -0,0 +1,44 @@ +{ + "begin": "03_12_2024", + "end": "03_12_2000", + "include_sequence": { + "phantom": false, + "nifti_header": false, + "moco": false, + "sbref": false, + "derived": false + }, + "use_echonumbers": true, + "vertical_audit": { + "stratify_by": null, + "include_parameters": [ + "Rows", + "Columns", + "AcquisitionMatrix", + "PixelSpacing", + "PhaseEncodingDirection", + "ShimMode", + "ShimSetting" + ] + }, + "horizontal_audit": { + "stratify_by": null, + "include_parameters": [ + "EchoTime", + "RepetitionTime", + "FlipAngle", + "EchoTrainLength" + ] + }, + "plots": { + "include_parameters": [ + "ContentDate", + "PatientSex", + "PatientAge", + "PatientWeight", + "OperatorsName", + "InstitutionName" + "Manufacturer" + ] + } +} diff --git a/mrQA/tests/resources/test-config.json b/mrQA/tests/resources/test-config.json new file mode 100644 index 0000000..15c5851 --- /dev/null +++ b/mrQA/tests/resources/test-config.json @@ -0,0 +1,15 @@ +{ + "begin": "03_12_2024", + "end": "03_12_2000", + "include_sequence": { + "phantom": false, + "nifti_header": false, + "moco": false, + "sbref": false, + "derived": false + }, + "use_echonumbers": true, + "vertical_audit": { + "stratify_by": null + } +} diff --git a/mrQA/tests/simulate.py b/mrQA/tests/simulate.py index b222f61..a98370a 100644 --- a/mrQA/tests/simulate.py +++ b/mrQA/tests/simulate.py @@ -1,9 +1,11 @@ import tempfile import zipfile from collections import defaultdict +from datetime import datetime from pathlib import Path import pydicom +from pydicom import dcmread from mrQA.utils import convert2ascii @@ -115,3 +117,22 @@ def setup_directories(src): raise FileNotFoundError("Temporary directory not found") return src_dir, dest_dir + + +def copy2dest(folder, src, dest): + file_list = [] + date = datetime.now() + for file in folder.rglob('*'): + if file.is_file(): + try: + dicom = dcmread(file) + except: + continue + dicom.ContentDate = date.strftime('%Y%m%d') + rel_path = file.relative_to(src) + new_abs_path = dest / rel_path + parent = new_abs_path.parent + parent.mkdir(exist_ok=True, parents=True) + dicom.save_as(new_abs_path) + file_list.append(file) + return file_list diff --git a/mrQA/tests/test_cli.py b/mrQA/tests/test_cli.py index 0590115..1455364 100644 --- a/mrQA/tests/test_cli.py +++ b/mrQA/tests/test_cli.py @@ -5,15 +5,21 @@ from pathlib import Path from time import sleep +import pytest +from MRdataset import load_mr_dataset from hypothesis import given, settings, assume from mrQA.cli import cli from mrQA.config import DATE_SEPARATOR +from mrQA.monitor import cli as monitor_cli +from mrQA.run_parallel import cli as parallel_cli +from mrQA.run_subset import cli as subset_cli from mrQA.tests.conftest import dcm_dataset_strategy +from mrQA.utils import list2txt @settings(max_examples=5, deadline=None) -@given(args=(dcm_dataset_strategy)) +@given(args=dcm_dataset_strategy) def test_binary_mrqa(args): ds1, attributes = args assume(len(ds1.name) > 0) @@ -31,22 +37,230 @@ def test_binary_mrqa(args): '--output-dir', tempdir]) report_paths = list(Path(tempdir).glob('*.html')) # check if report was generated + assert_paths_more_than_2_subjects(report_paths, tempdir, attributes, + ds1) + return + + +@settings(max_examples=5, deadline=None) +@given(args=dcm_dataset_strategy) +def test_binary_mrqa_with_reference_protocol(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + # shlex doesn't test work with binaries + subprocess.run(['mrqa', + '--data-source', attributes['fake_ds_dir'], + '--config', attributes['config_path'], + '--name', ds1.name, + '--format', 'dicom', + '--decimals', '3', + '--tolerance', '0.1', + '--verbose', + '--ref-protocol-path', attributes['ref_protocol_path'], + '--output-dir', tempdir]) + report_paths = list(Path(tempdir).glob('*.html')) + assert_report_paths(report_paths, tempdir, attributes, ds1) + return + + +@settings(max_examples=10, deadline=None) +@given(args=dcm_dataset_strategy) +def test_cli_with_reference_protocol(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + sys.argv = shlex.split( + f'mrqa --data-source {attributes["fake_ds_dir"]}' + f' --config {attributes["config_path"]}' + f' --name {ds1.name}' + f' --format dicom' + ' --decimals 3' + ' --tolerance 0.1' + ' --verbose' + f' --ref-protocol-path {attributes["ref_protocol_path"]}' + f' --output-dir {tempdir}') + cli() + + report_paths = list(Path(tempdir).glob('*.html')) + # check if report was generated + assert_report_paths(report_paths, tempdir, attributes, ds1) + return + + +@settings(max_examples=5, deadline=None) +@given(args=dcm_dataset_strategy) +def test_binary_parallel(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + # shlex doesn't test work with binaries if attributes['num_subjects'] > 2: - assert len(report_paths) > 0 - report_path = report_paths[0] - assert str(report_path.parent) == str(tempdir) - assert ds1.name in report_path.stem.split(DATE_SEPARATOR)[0] + subprocess.run(['mrqa_parallel', + '--data-source', attributes['fake_ds_dir'], + '--config', attributes['config_path'], + '--name', ds1.name, + '--decimals', '3', + '--tolerance', '0.1', + '--verbose', + '--job-size', '1', + '--out-mrds-path', Path(tempdir)/'test.mrds.pkl', + '--output-dir', tempdir]) + report_paths = list(Path(tempdir).glob('*.html')) + # check if report was generated + assert_paths_more_than_2_subjects(report_paths, tempdir, attributes, + ds1) + return + + +@settings(max_examples=5, deadline=None) +@given(args=dcm_dataset_strategy) +def test_binary_mrqa_monitor(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + # shlex doesn't test work with binaries + subprocess.run(['mrqa_monitor', + '--data-source', attributes['fake_ds_dir'], + '--config', attributes['config_path'], + '--name', ds1.name, + '--format', 'dicom', + '--decimals', '3', + '--tolerance', '0.1', + '--verbose', + '--output-dir', tempdir]) + report_paths = list(Path(tempdir).glob('*.html')) + # check if report was generated + assert_paths_more_than_2_subjects(report_paths, tempdir, attributes, + ds1) + return + + +@settings(max_examples=5, deadline=None) +@given(args=dcm_dataset_strategy) +def test_cli_mrqa_monitor(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + # shlex doesn't test work with binaries + sys.argv = shlex.split( + f'mrqa_monitor --data-source {attributes["fake_ds_dir"]} ' + f' --config {attributes["config_path"]} ' + f' --name {ds1.name} ' + '--format dicom ' + '--decimals 3 ' + '--tolerance 0.1 ' + '--verbose ' + f'--output-dir {tempdir}') + monitor_cli() + report_paths = list(Path(tempdir).glob('*.html')) + # check if report was generated + assert_paths_more_than_2_subjects(report_paths, tempdir, attributes, + ds1) + return + + +@settings(max_examples=5, deadline=None) +@given(args=dcm_dataset_strategy) +def test_cli_run_subset(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + # shlex doesn't test work with binaries + folders = [f for f in Path(attributes['fake_ds_dir']).iterdir() + if f.is_dir()] + batch_file = Path(tempdir) / 'batch.txt' + list2txt(batch_file, folders) + + sys.argv = shlex.split( + f'mrqa_subset ' + f' --config {attributes["config_path"]} ' + f' -b {batch_file} ' + '--verbose ' + f'--output-path {tempdir}/test.mrds.pkl') + subset_cli() + ds2 = load_mr_dataset(f"/{tempdir}/test.mrds.pkl") + assert ds1 == ds2 + return + + +@settings(max_examples=5, deadline=None) +@given(args=dcm_dataset_strategy) +def test_cli_parallel(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + # shlex doesn't test work with binaries + sys.argv = shlex.split( + f'mrqa_parallel --data-source {attributes["fake_ds_dir"]} ' + f' --config {attributes["config_path"]} ' + f' --name {ds1.name} ' + '--job-size 1 ' + '--decimals 3 ' + '--tolerance 0.1 ' + '--verbose ' + f'--out-mrds-path {tempdir}/test.mrds.pkl ' + f'--output-dir {tempdir}') + if attributes['num_subjects'] < 2: + with pytest.raises(RuntimeError): + parallel_cli() else: - assert not report_paths + parallel_cli() + report_paths = list(Path(tempdir).glob('*.html')) + # check if report was generated + assert_paths_more_than_2_subjects(report_paths, tempdir, attributes, + ds1) return -def test_binary_parallel(): - pass +def assert_paths_more_than_2_subjects(report_paths, tempdir, attributes, ds1): + if attributes['num_subjects'] > 2: + assert_report_paths(report_paths, tempdir, attributes, ds1) + else: + assert not report_paths -def test_binary_monitor(): - pass +def assert_report_paths(report_paths, tempdir, attributes, ds1): + assert len(report_paths) > 0 + report_path = report_paths[0] + assert str(report_path.parent) == str(tempdir) + assert ds1.name in report_path.stem.split(DATE_SEPARATOR)[0] + + + + +@settings(max_examples=10, deadline=None) +@given(args=dcm_dataset_strategy) +def test_binary_monitor_with_reference_protocol(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + # shlex doesn't test work with binaries + subprocess.run(['mrqa_monitor', + '--data-source', attributes['fake_ds_dir'], + '--config', attributes['config_path'], + '--name', ds1.name, + '--format', 'dicom', + '--decimals', '3', + '--tolerance', '0.1', + '--verbose', + '--ref-protocol-path', attributes['ref_protocol_path'], + '--output-dir', tempdir]) + report_paths = list(Path(tempdir).glob('*.html')) + # check if report was generated + assert len(report_paths) > 0 + report_path = report_paths[0] + assert str(report_path.parent) == str(tempdir) + assert ds1.name in report_path.stem.split(DATE_SEPARATOR)[0] + return def test_binary_subset(): @@ -54,7 +268,7 @@ def test_binary_subset(): @settings(max_examples=10, deadline=None) -@given(args=(dcm_dataset_strategy)) +@given(args=dcm_dataset_strategy) def test_report_generated(args): ds1, attributes = args assume(len(ds1.name) > 0) @@ -71,13 +285,8 @@ def test_report_generated(args): cli() report_paths = list(Path(tempdir).glob('*.html')) # check if report was generated - if attributes['num_subjects'] > 2: - assert len(report_paths) > 0 - report_path = report_paths[0] - assert str(report_path.parent) == str(tempdir) - assert ds1.name in report_path.stem.split(DATE_SEPARATOR)[0] - else: - assert not report_paths + assert_paths_more_than_2_subjects(report_paths, tempdir, attributes, + ds1) # wait for 2 seconds, otherwise the next test will fail. # This happens if report is generated with the same timestamp, then # the number of reports will be 1 because the previous report will be @@ -86,15 +295,16 @@ def test_report_generated(args): # re-run with mrds pkl path mrds_paths = list(Path(tempdir).glob('*.mrds.pkl')) assert len(mrds_paths) > 0 - sys.argv = shlex.split(f'mrqa --data-source {attributes["fake_ds_dir"]} ' - f'--config {attributes["config_path"]} ' - f'--name {ds1.name} ' - f'--format dicom ' - '--decimals 3 ' - '--tolerance 0.1 ' - '--verbose ' - f'--output-dir {tempdir} ' - f'--mrds-pkl-path {mrds_paths[0]} ') + sys.argv = shlex.split( + f'mrqa --data-source {attributes["fake_ds_dir"]} ' + f'--config {attributes["config_path"]} ' + f'--name {ds1.name} ' + f'--format dicom ' + '--decimals 3 ' + '--tolerance 0.1 ' + '--verbose ' + f'--output-dir {tempdir} ' + f'--mrds-pkl-path {mrds_paths[0]} ') cli() report_paths = list(Path(tempdir).glob('*.html')) # check if report was generated diff --git a/mrQA/tests/test_parallel.py b/mrQA/tests/test_parallel.py index 421da5c..52bbb67 100644 --- a/mrQA/tests/test_parallel.py +++ b/mrQA/tests/test_parallel.py @@ -38,7 +38,7 @@ def test_equivalence_seq_vs_parallel(): output_dir=output_dir, out_mrds_path=output_path['parallel'], name='parallel', - subjects_per_job=5, + job_size=5, config_path=config_path, hpc=False, ) diff --git a/mrQA/tests/test_utils.py b/mrQA/tests/test_utils.py index 58e4270..705d33d 100644 --- a/mrQA/tests/test_utils.py +++ b/mrQA/tests/test_utils.py @@ -1,127 +1,394 @@ -# import math -# import shutil -# from datetime import datetime -# from pathlib import Path -# -# import numpy as np -# import pytest -# from MRdataset import load_mr_dataset, import_dataset -# from mrQA import check_compliance -# from mrQA.config import report_fpath, mrds_fpath, past_records_fpath, \ -# DATE_SEPARATOR -# from mrQA.utils import files_in_path -# from mrQA.utils import folders_modified_since, get_last_valid_record, txt2list -# -# -# def test_modified_folders(last_reported_on, -# temp_input_src, -# temp_output_dest, -# data_source, -# file_set): -# modified_files = folders_modified_since( -# input_dir=temp_input_src, -# last_reported_on=last_reported_on, -# output_dir=temp_output_dest) -# expected = get_relative_paths(file_set, data_source) -# got = get_relative_paths(modified_files, temp_input_src) -# assert len(expected) == len(got) -# assert sorted(expected) == sorted(got) -# -# -# def test_output_files_created(fname, folder): -# # add special delimiter to strip time from fname -# time_fname = fname.split(DATE_SEPARATOR)[-1] -# utc = datetime.strptime(time_fname, '%m_%d_%Y_%H_%M_%S').timestamp() -# report_path = report_fpath(folder, fname) -# mrds_path = mrds_fpath(folder, fname) -# records_path = past_records_fpath(folder) -# last_record = get_last_valid_record(folder) -# assert report_path.is_file() -# assert mrds_path.is_file() -# assert records_path.is_file() -# assert math.isclose(float(last_record[0]), utc) -# assert last_record[1] == str(report_path) -# assert last_record[2] == str(mrds_path) -# -# -# def test_same_dataset(mrds_path, -# temp_input_src, -# tempdir, -# name): -# # Read the dataset created by monitor -# monitor_dataset = load_mr_dataset(mrds_path) -# -# # Read full dataset, acts as ground truth -# ds = import_dataset(data_source=temp_input_src, -# name=name) -# report_path = check_compliance(ds, output_dir=tempdir/'complete_eval') -# mrds_path2 = mrds_fpath(report_path.parent, report_path.stem) -# complete_dataset = load_mr_dataset(mrds_path2) -# print() -# # Both datasets should be the same -# # assert is_same_dataset(complete_dataset, monitor_dataset) -# -# -# def get_temp_input_folder(name, temp_dir): -# temp_folder_path = temp_dir / name -# if temp_folder_path.is_dir(): -# shutil.rmtree(temp_folder_path) -# temp_folder_path.mkdir(exist_ok=False, parents=True) -# return temp_folder_path -# -# -# def get_temp_output_folder(name, temp_dir): -# # Set up output directories -# output_dir = temp_dir / 'output_dir' -# if output_dir.is_dir(): -# shutil.rmtree(output_dir) -# output_dir.mkdir(exist_ok=False, parents=True) -# output_folder_path = output_dir / name -# return output_folder_path -# -# -# def pick_random_sets(per_batch_id_list, n, rng): -# rand_id_list_paths = rng.choice(per_batch_id_list, n) -# folder_sets = [txt2list(f) for f in rand_id_list_paths] -# return folder_sets -# -# -# def create_random_file_sets(temp_input_src, n, max_folders, rng): -# # TODO: dataset is not random -# unique_folders = set() -# for f in temp_input_src.rglob('*'): -# if f.is_file() and f.suffix not in ['.html', '.txt']: -# folder_path = f.parent -# unique_folders.add(folder_path) -# unique_folders = sorted(list(unique_folders)) -# -# rng.shuffle(unique_folders) -# testing_set = unique_folders[:max_folders] -# print(testing_set[:5]) -# try: -# folder_sets = np.array_split(testing_set, n) -# except ValueError as e: -# with pytest.raises(ValueError): -# raise ValueError(f"Could not split list of dicom files." -# f" Got n = {n}") from e -# return None -# return folder_sets -# -# -# def get_relative_paths(file_list, data_root): -# rel_paths = [] -# for file in file_list: -# rel_path = Path(file).relative_to(data_root) -# rel_paths.append(str(rel_path)) -# return rel_paths -# -# -# def copy2dest(folder_list, src, dest): -# file_list = files_in_path(folder_list) -# for file in file_list: -# rel_path = file.relative_to(src) -# new_abs_path = dest / rel_path -# parent = new_abs_path.parent -# parent.mkdir(exist_ok=True, parents=True) -# shutil.copy(file, parent) -# return file_list +import re +import tempfile +from datetime import datetime, timedelta, date +from pathlib import Path + +import pytest +from hypothesis import given, settings, assume +from hypothesis.strategies import lists, integers, dates, text, composite, \ + characters, booleans, tuples +from protocol import SiemensMRImagingProtocol, MRImagingProtocol + +from mrQA.tests.conftest import sample_protocol, THIS_DIR, dcm_dataset_strategy +from mrQA.utils import split_list, convert2ascii, next_month, previous_month, \ + has_substring, filter_epi_fmap_pairs, get_protocol_from_file, \ + get_config_from_file, valid_paths, folders_with_min_files, \ + find_terminal_folders, save_audit_results, is_folder_with_no_subfolders, \ + get_reference_protocol, get_config, is_writable + + +@given( + dir_index=lists(integers(), min_size=1), + num_chunks=integers(min_value=1) +) +def test_split_list_hypothesis(dir_index, num_chunks): + if num_chunks < 0: # Ensure num_chunks is greater than 0 + with pytest.raises(ValueError): + split_list(dir_index, num_chunks) + return + + result = list(split_list(dir_index, num_chunks)) + + if len(dir_index) < num_chunks: # Ensure dir_index has enough elements + # Assertions for the result based on the expected behavior of split_list + num_chunks = len(dir_index) + + # Assertions for the result based on the expected behavior of split_list + assert len(result) == num_chunks + assert sum(map(len, result)) == len(dir_index) + + +def test_split_list_value_errors(): + with pytest.raises(ValueError): + split_list([], 1) + with pytest.raises(ValueError): + split_list([1], 0) + with pytest.raises(ValueError): + split_list([1], -1) + + +# Define a strategy for generating strings +@composite +def strings(draw): + return draw(text()) + + +# Define a strategy for generating ASCII strings +@composite +def ascii_strings(draw): + return draw(text( + alphabet=characters(whitelist_categories=('L', 'N', 'P', 'Z', 'S')))) + + +# Define a strategy for generating booleans +@composite +def booleans(draw): + return draw(booleans()) + + +# Property-based test: the output should contain only ASCII characters +@given(strings()) +def test_contains_only_ascii(value): + result = convert2ascii(value, allow_unicode=False) + assert all(ord(char) < 128 for char in result) + + +# Property-based test: the output should not contain spaces or +# dashes at the beginning or end +@given(strings()) +def test_no_spaces_or_dashes_at_ends(value): + result = convert2ascii(value, False) + assert not result.startswith((' ', '-')) + assert not result.endswith((' ', '-')) + + +# Property-based test: the output should not contain consecutive +# spaces or dashes +@given(ascii_strings()) +def test_no_consecutive_spaces_or_dashes(value): + result = convert2ascii(value, allow_unicode=False) + assert ' ' not in result + assert '--' not in result + + +# Property-based test: the output should not contain any special characters +@given(ascii_strings()) +def test_no_special_characters(value): + result = convert2ascii(value, allow_unicode=False) + assert re.match(r'^[a-zA-Z0-9_-]*$', result) + + +# Property-based test: converting twice should be the same as converting once +@given(ascii_strings()) +def test_double_conversion_is_same(value): + result1 = convert2ascii(value, allow_unicode=False) + result2 = convert2ascii(result1, allow_unicode=False) + assert result1 == result2 + + +def test_next_month(): + # Test cases with specific dates + assert next_month(datetime(2023, 1, 15)) == datetime(2023, 2, 1) + assert next_month(datetime(2022, 12, 5)) == datetime(2023, 1, 1) + # Add more test cases as needed + + +@given(dt=dates()) +def test_next_month_hypothesis(dt): + result = next_month(dt) + + # Ensure the result is a datetime object + assert isinstance(result, date) + + # Ensure the result is the first day of the next month + expected_result = (dt.replace(day=28) + timedelta(days=5)).replace(day=1) + assert result == expected_result + + +def test_previous_month(): + # Test cases with specific dates + assert previous_month(datetime(2023, 2, 15)) == datetime(2023, 1, 1) + assert previous_month(datetime(2023, 1, 1)) == datetime(2022, 12, 1) + # Add more test cases as needed + + +@given(dt=dates()) +def test_previous_month_hypothesis(dt): + result = previous_month(dt) + + # Ensure the result is a datetime object + assert isinstance(result, date) + + # Ensure the result is the first day of the previous month + expected_result = (dt.replace(day=1) - timedelta(days=1)).replace(day=1) + assert result == expected_result + + +def test_has_substring(): + # Test cases with specific inputs + assert has_substring("hello world", ["hello", "world"]) + assert has_substring("python", ["java", "python", "cpp"]) + assert not has_substring("apple", ["orange", "banana"]) + # Add more test cases as needed + + +@given( + input_string=text(), + substrings=lists(text(), min_size=1) +) +def test_has_substring_hypothesis(input_string, substrings): + result = has_substring(input_string, substrings) + + # Ensure the result is a boolean + assert isinstance(result, bool) + + # Ensure the result is True if and only if at least one substring is + # present in the input_string + expected_result = any(substring in input_string for substring in substrings) + assert result == expected_result + + +def test_filter_epi_fmap_pairs(): + # Test cases with specific inputs + assert filter_epi_fmap_pairs(("epi_bold", "fmap_fieldmap")) + assert filter_epi_fmap_pairs(("rest_fmri", "map")) + assert not filter_epi_fmap_pairs(("dti", "asl")) + # Add more test cases as needed + + +@given( + pair=tuples(text(), text()) +) +def test_filter_epi_fmap_pairs_hypothesis(pair): + result = filter_epi_fmap_pairs(pair) + assert filter_epi_fmap_pairs(('epi', 'fmap')) + assert filter_epi_fmap_pairs(('fmap', 'epi')) + # Ensure the result is a boolean + assert isinstance(result, bool) + + +def test_get_protocol_from_file(): + ref_protocol = sample_protocol() + protocol = get_protocol_from_file(str(ref_protocol)) + + assert isinstance(protocol, SiemensMRImagingProtocol) + + with pytest.raises(FileNotFoundError): + get_protocol_from_file("nonexistent_file.txt") + + with pytest.raises(ValueError): + get_protocol_from_file(THIS_DIR / 'resources/mri-config.json') + + +def test_get_config_from_file(): + config = get_config_from_file(THIS_DIR / 'resources/mri-config.json') + with pytest.raises(TypeError): + get_config_from_file(config) + with pytest.raises(FileNotFoundError): + get_config_from_file("nonexistent_file.txt") + with pytest.raises(ValueError): + get_config_from_file(THIS_DIR / 'resources/invalid-json.json') + + +def test_valid_paths(): + with pytest.raises(ValueError): + valid_paths(None) + with pytest.raises(FileNotFoundError): + valid_paths('nonexistent_file.txt') + with pytest.raises(FileNotFoundError): + valid_paths(['nonexistent_file.txt']) + + +# Test find_terminal_folders with terminal folders +def test_find_terminal_folders_with_terminals(): + with tempfile.TemporaryDirectory() as tmpdirname: + root = Path(tmpdirname) + folder1 = root / "folder1" + folder1.mkdir() + folder2 = folder1 / "folder2" + folder2.mkdir() + + terminal_folders = find_terminal_folders(root) + assert terminal_folders == [folder2] + + folder3 = folder2 / "folder3" + folder3.mkdir() + + terminal_folders = find_terminal_folders(root) + assert terminal_folders == [folder3] + + +# Test find_terminal_folders with single folder +def test_find_terminal_folders_single_folder(): + with tempfile.TemporaryDirectory() as tmpdirname: + root = Path(tmpdirname) + folder = root / "folder" + folder.mkdir() + + terminal_folders = find_terminal_folders(root) + assert terminal_folders == [folder] + + +# Test find_terminal_folders with non-existent folder +def test_find_terminal_folders_nonexistent_folder(): + with tempfile.TemporaryDirectory() as tmpdirname: + root = Path(tmpdirname) / "nonexistent_folder" + + terminal_folders = find_terminal_folders(root) + assert terminal_folders == [] + + +def test_folder_with_min_files_nonexistent_folder(): + with tempfile.TemporaryDirectory() as tmpdirname: + root = Path(tmpdirname) / "nonexistent_folder" + with pytest.raises(ValueError): + a = list(folders_with_min_files(root, pattern="*.dcm", min_count=1)) + with pytest.raises(ValueError): + a = list(folders_with_min_files([], pattern="*.dcm", min_count=0)) + + +# Test find_terminal_folders with files +def test_find_terminal_folders_with_files(): + with tempfile.TemporaryDirectory() as tmpdirname: + root = Path(tmpdirname) + file = root / "file.txt" + file.touch() + + terminal_folders = find_terminal_folders(root) + assert terminal_folders == [root] + + +# Test find_terminal_folders with nested terminal folders +def test_find_terminal_folders_nested_terminals(): + with tempfile.TemporaryDirectory() as tmpdirname: + root = Path(tmpdirname) + folder1 = root / "folder1" + folder1.mkdir() + folder2 = folder1 / "folder2" + folder2.mkdir() + folder3 = folder2 / "folder3" + folder3.mkdir() + + terminal_folders = find_terminal_folders(folder1) + assert terminal_folders == [folder3] + + +# Test find_terminal_folders with multiple terminal folders +def test_find_terminal_folders_multiple_terminals(): + with tempfile.TemporaryDirectory() as tmpdirname: + root = Path(tmpdirname) + folder1 = root / "folder1" + folder1.mkdir() + folder2 = root / "folder2" + folder2.mkdir() + folder3 = root / "folder3" + folder3.mkdir() + + terminal_folders = find_terminal_folders(root) + assert set(terminal_folders) == {folder1, folder2, folder3} + + +def test_find_folders_with_min_files(): + with tempfile.TemporaryDirectory() as tmpdirname: + root = Path(tmpdirname) + folder1 = root / "folder1" + folder1.mkdir() + file = folder1 / "file.dcm" + file.touch() + folder2 = root / "folder2" + folder2.mkdir() + file = folder2 / "file.dcm" + file.touch() + folder3 = root / "folder3" + folder3.mkdir() + file = folder3 / "file.dcm" + file.touch() + + terminal_folders = folders_with_min_files(root, + pattern="*.dcm", + min_count=1) + assert set(terminal_folders) == {folder1, folder2, folder3} + + +def test_save_audit_results(): + with pytest.raises(OSError): + save_audit_results('/sys/firmware/hz.adt.pkl', {}) + + +# Test when folder has subfolders +def test_has_subfolders(): + with tempfile.TemporaryDirectory() as tmpdirname: + folder_path = Path(tmpdirname) + subfolder = folder_path / "subfolder" + subfolder.mkdir(parents=True, exist_ok=True) + + has_no_subfolders, subfolders = is_folder_with_no_subfolders( + folder_path) + assert has_no_subfolders is False + assert subfolder in subfolders + + +# Test when folder has no subfolders +def test_no_subfolders(): + with tempfile.TemporaryDirectory() as tmpdirname: + folder_path = Path(tmpdirname) + + has_no_subfolders, subfolders = is_folder_with_no_subfolders( + folder_path) + assert has_no_subfolders is True + assert subfolders == [] + + +# Test when folder doesn't exist +def test_nonexistent_folder(): + folder_path = Path("nonexistent_folder") + + with pytest.raises(FileNotFoundError): + is_folder_with_no_subfolders(folder_path) + + +@settings(max_examples=1, deadline=None) +@given(args=(dcm_dataset_strategy)) +def test_get_reference_protocol(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + config = get_config_from_file(attributes['config_path']) + protocol = get_reference_protocol(ds1, config, 'nonexistent_file.txt') + assert isinstance(protocol, MRImagingProtocol) + + +def test_get_config(): + with pytest.raises(FileNotFoundError): + get_config("nonexistent_file.txt") + with pytest.raises(ValueError): + get_config(THIS_DIR / 'resources/mri-config.json', + report_type='horizontal') + config_path = THIS_DIR / 'resources/test-config.json' + config = get_config(config_path, report_type='hz') + config = get_config(config_path, report_type='vt') + assert isinstance(config, dict) + + +def test_is_writable(): + assert not is_writable('/sys/firmware/') diff --git a/mrQA/tests/utils.py b/mrQA/tests/utils.py new file mode 100644 index 0000000..f499fc1 --- /dev/null +++ b/mrQA/tests/utils.py @@ -0,0 +1,139 @@ +# import math +# import shutil +# from datetime import datetime +# from pathlib import Path +# +# import numpy as np +# import pytest +# from MRdataset import load_mr_dataset, import_dataset +# from mrQA import check_compliance +# from mrQA.config import report_fpath, mrds_fpath, past_records_fpath, \ +# DATE_SEPARATOR +# from mrQA.utils import files_in_path +# from mrQA.utils import folders_modified_since, get_last_valid_record, txt2list +# +# +# def test_modified_folders(last_reported_on, +# temp_input_src, +# temp_output_dest, +# data_source, +# file_set): +# modified_files = folders_modified_since( +# input_dir=temp_input_src, +# last_reported_on=last_reported_on, +# output_dir=temp_output_dest) +# expected = get_relative_paths(file_set, data_source) +# got = get_relative_paths(modified_files, temp_input_src) +# assert len(expected) == len(got) +# assert sorted(expected) == sorted(got) +# +# +# def test_output_files_created(fname, folder): +# # add special delimiter to strip time from fname +# time_fname = fname.split(DATE_SEPARATOR)[-1] +# utc = datetime.strptime(time_fname, '%m_%d_%Y_%H_%M_%S').timestamp() +# report_path = report_fpath(folder, fname) +# mrds_path = mrds_fpath(folder, fname) +# records_path = past_records_fpath(folder) +# last_record = get_last_valid_record(folder) +# assert report_path.is_file() +# assert mrds_path.is_file() +# assert records_path.is_file() +# assert math.isclose(float(last_record[0]), utc) +# assert last_record[1] == str(report_path) +# assert last_record[2] == str(mrds_path) +# +# +# def test_same_dataset(mrds_path, +# temp_input_src, +# tempdir, +# name): +# # Read the dataset created by monitor +# monitor_dataset = load_mr_dataset(mrds_path) +# +# # Read full dataset, acts as ground truth +# ds = import_dataset(data_source=temp_input_src, +# name=name) +# report_path = check_compliance(ds, output_dir=tempdir/'complete_eval') +# mrds_path2 = mrds_fpath(report_path.parent, report_path.stem) +# complete_dataset = load_mr_dataset(mrds_path2) +# print() +# # Both datasets should be the same +# # assert is_same_dataset(complete_dataset, monitor_dataset) +# +# +# def get_temp_input_folder(name, temp_dir): +# temp_folder_path = temp_dir / name +# if temp_folder_path.is_dir(): +# shutil.rmtree(temp_folder_path) +# temp_folder_path.mkdir(exist_ok=False, parents=True) +# return temp_folder_path +# +# +# def get_temp_output_folder(name, temp_dir): +# # Set up output directories +# output_dir = temp_dir / 'output_dir' +# if output_dir.is_dir(): +# shutil.rmtree(output_dir) +# output_dir.mkdir(exist_ok=False, parents=True) +# output_folder_path = output_dir / name +# return output_folder_path +# +# +# def pick_random_sets(per_batch_id_list, n, rng): +# rand_id_list_paths = rng.choice(per_batch_id_list, n) +# folder_sets = [txt2list(f) for f in rand_id_list_paths] +# return folder_sets +# +# +# def create_random_file_sets(temp_input_src, n, max_folders, rng): +# # TODO: dataset is not random +# unique_folders = set() +# for f in temp_input_src.rglob('*'): +# if f.is_file() and f.suffix not in ['.html', '.txt']: +# folder_path = f.parent +# unique_folders.add(folder_path) +# unique_folders = sorted(list(unique_folders)) +# +# rng.shuffle(unique_folders) +# testing_set = unique_folders[:max_folders] +# print(testing_set[:5]) +# try: +# folder_sets = np.array_split(testing_set, n) +# except ValueError as e: +# with pytest.raises(ValueError): +# raise ValueError(f"Could not split list of dicom files." +# f" Got n = {n}") from e +# return None +# return folder_sets +# +# +# def get_relative_paths(file_list, data_root): +# rel_paths = [] +# for file in file_list: +# rel_path = Path(file).relative_to(data_root) +# rel_paths.append(str(rel_path)) +# return rel_paths +# +# +# def copy2dest(folder_list, src, dest): +# file_list = files_in_path(folder_list) +# for file in file_list: +# rel_path = file.relative_to(src) +# new_abs_path = dest / rel_path +# parent = new_abs_path.parent +# parent.mkdir(exist_ok=True, parents=True) +# shutil.copy(file, parent) +# return file_list + +from requests import get # to make GET request + + +def download(url, file_name): + """Download file from url and save to file_name""" + # open in binary mode + with open(file_name, "wb") as file: + # get request + response = get(url) + # write to file + file.write(response.content) diff --git a/mrQA/utils.py b/mrQA/utils.py index c37d14b..bbb3a30 100644 --- a/mrQA/utils.py +++ b/mrQA/utils.py @@ -3,9 +3,7 @@ import re import tempfile import time -import typing import unicodedata -import warnings from collections import Counter from datetime import datetime, timedelta, timezone from itertools import takewhile @@ -23,7 +21,7 @@ from mrQA.config import past_records_fpath, report_fpath, mrds_fpath, \ subject_list_dir, DATE_SEPARATOR, CannotComputeMajority, \ Unspecified, \ - EqualCount, status_fpath, ATTRIBUTE_SEPARATOR + EqualCount, status_fpath, ATTRIBUTE_SEPARATOR, DATETIME_FORMAT, DATE_FORMAT def get_reference_protocol(dataset: BaseDataset, @@ -84,9 +82,10 @@ def get_config(config_path: Union[str, Path], report_type='hz') -> dict: else: include_params = audit_config.get('include_parameters', None) if include_params is None: - logger.warn('Parameters to be included in the compliance check are ' - 'not provided. All parameters will be included in the ' - f'{key}') + logger.warning( + 'Parameters to be included in the compliance check are ' + 'not provided. All parameters will be included in the ' + f'{key}') return audit_config @@ -122,69 +121,69 @@ def is_writable(dir_path): return True -def files_under_folder(fpath: Union[str, Path], - ext: str = None) -> typing.Iterable[Path]: - """ - Generates all the files inside the folder recursively. If ext is given - returns file which have that extension. - - Parameters - ---------- - fpath: str - filepath of the directory - ext: str - filter_fn files with given extension. For ex. return only .nii files - - Returns - ------- - generates filepaths - """ - if not Path(fpath).is_dir(): - raise FileNotFoundError(f"Folder doesn't exist : {fpath}") - folder_path = Path(fpath).resolve() - if ext: - pattern = '*' + ext - else: - pattern = '*' - for file in folder_path.rglob(pattern): - if file.is_file(): - # If it is a regular file and not a directory, return filepath - yield file - - -def files_in_path(fp_list: Union[Iterable, str, Path], - ext: Optional[str] = None): - """ - If given a single folder, returns the list of all files in the directory. - If given a list of folders, returns concatenated list of all the files - inside each directory. - - Parameters - ---------- - fp_list : List[Path] - List of folder paths - ext : str - Used to filter_fn files, and select only those which have this extension - Returns - ------- - List of paths - """ - if isinstance(fp_list, Iterable): - files = [] - for i in fp_list: - if str(i) == '' or str(i) == '.' or i == Path(): - logger.warning("Found an empty string. Skipping") - continue - if Path(i).is_dir(): - files.extend(list(files_under_folder(i, ext))) - elif Path(i).is_file(): - files.append(i) - return sorted(list(set(files))) - elif isinstance(fp_list, str) or isinstance(fp_list, Path): - return sorted(list(files_under_folder(fp_list, ext))) - else: - raise NotImplementedError("Expected either Iterable or str type. Got" - f"{type(fp_list)}") +# def files_under_folder(fpath: Union[str, Path], +# ext: str = None) -> typing.Iterable[Path]: +# """ +# Generates all the files inside the folder recursively. If ext is given +# returns file which have that extension. +# +# Parameters +# ---------- +# fpath: str +# filepath of the directory +# ext: str +# filter_fn files with given extension. For ex. return only .nii files +# +# Returns +# ------- +# generates filepaths +# """ +# if not Path(fpath).is_dir(): +# raise FileNotFoundError(f"Folder doesn't exist : {fpath}") +# folder_path = Path(fpath).resolve() +# if ext: +# pattern = '*' + ext +# else: +# pattern = '*' +# for file in folder_path.rglob(pattern): +# if file.is_file(): +# # If it is a regular file and not a directory, return filepath +# yield file + + +# def files_in_path(fp_list: Union[Iterable, str, Path], +# ext: Optional[str] = None): +# """ +# If given a single folder, returns the list of all files in the directory. +# If given a list of folders, returns concatenated list of all the files +# inside each directory. +# +# Parameters +# ---------- +# fp_list : List[Path] +# List of folder paths +# ext : str +# Used to filter_fn files, and select only those which have this ext +# Returns +# ------- +# List of paths +# """ +# if isinstance(fp_list, Iterable): +# files = [] +# for i in fp_list: +# if str(i) == '' or str(i) == '.' or i == Path(): +# logger.warning("Found an empty string. Skipping") +# continue +# if Path(i).is_dir(): +# files.extend(list(files_under_folder(i, ext))) +# elif Path(i).is_file(): +# files.append(i) +# return sorted(list(set(files))) +# elif isinstance(fp_list, str) or isinstance(fp_list, Path): +# return sorted(list(files_under_folder(fp_list, ext))) +# else: +# raise NotImplementedError("Expected either Iterable or str type. Got" +# f"{type(fp_list)}") def get_items_upto_count(dict_: Counter, rank: int = 1): @@ -213,7 +212,7 @@ def get_items_upto_count(dict_: Counter, rank: int = 1): def timestamp(): """Generate a timestamp as a string""" - time_string = time.strftime('%m_%d_%Y_%H_%M_%S') + time_string = time.strftime(DATETIME_FORMAT) return time_string @@ -240,16 +239,16 @@ def make_output_paths(output_dir, dataset): subject lists for each modality """ ts = timestamp() - utc = datetime.strptime(ts, '%m_%d_%Y_%H_%M_%S').timestamp() + # utc = datetime.strptime(ts, '%m_%d_%Y_%H_%M_%S').timestamp() filename = f'{dataset.name}{DATE_SEPARATOR}{ts}' report_path = report_fpath(output_dir, filename) mrds_path = mrds_fpath(output_dir, filename) sub_lists_dir_path = subject_list_dir(output_dir, filename) - log_report_history(output_dir, mrds_path, report_path, ts, utc) + log_report_history(output_dir, mrds_path, report_path, ts) return report_path, mrds_path, sub_lists_dir_path -def log_report_history(output_dir, mrds_path, report_path, ts, utc): +def log_report_history(output_dir, mrds_path, report_path, ts): """ Log the report generation history to a text file @@ -270,8 +269,8 @@ def log_report_history(output_dir, mrds_path, report_path, ts, utc): if not records_filepath.parent.is_dir(): records_filepath.parent.mkdir(parents=True) with open(records_filepath, 'a', encoding='utf-8') as fp: - fp.write(f'{utc},{report_path},' - f'{mrds_path},{ts}\n') + fp.write(f'{ts},{report_path},' + f'{mrds_path}\n') def majority_values(list_seqs: list, @@ -408,7 +407,7 @@ def _check_args_validity(list_: List) -> bool: raise ValueError('List is empty.') for seq in list_: if len(seq) == 0: - raise ValueError('Atleast one of sequences is empty.') + raise ValueError('At least one of sequences is empty.') if len(list_) < 3: logger.info('Cannot compute majority attribute values. ' 'Got less than 3 values for each ' @@ -422,14 +421,14 @@ def split_list(dir_index: Sized, num_chunks: int) -> Iterable: Adapted from https://stackoverflow.com/questions/2130016/splitting-a-list-into-n-parts-of-approximately-equal-length # noqa Given a list of n elements, split it into k parts, where k = num_chunks. - Each part has atleast n/k elements. And the remaining elements + Each part has at least n/k elements. And the remaining elements n % k are distributed uniformly among the sub-parts such that each part has almost same number of elements. The first n % k will have floor(n/k) + 1 elements. Parameters ---------- - dir_index : list + dir_index : Sized list to split num_chunks : int number of parts @@ -446,14 +445,15 @@ def split_list(dir_index: Sized, num_chunks: int) -> Iterable: if not is_integer_number(num_chunks): raise ValueError(f'Number of chunks must be an integer. ' f'Got {num_chunks}') - if num_chunks == 0: + if num_chunks < 1: raise ValueError('Cannot divide list into chunks of size 0') if len(dir_index) == 0: raise ValueError('List of directories is empty!') if len(dir_index) < num_chunks: - warnings.warn(f'Got num_chunks={num_chunks}, list_size={len(dir_index)}' - f'Expected num_chunks < list_size', - stacklevel=2) + logger.warning( + f'Got num_chunks={num_chunks}, list_size={len(dir_index)}' + f'Expected num_chunks < list_size', + stacklevel=2) num_chunks = len(dir_index) k, m = divmod(len(dir_index), num_chunks) # k, m = (len(dir_index)//num_chunks, len(dir_index)%num_chunks) @@ -763,9 +763,9 @@ def _cli_report(hz_audit: dict, report_name): non_compliant_ds = hz_audit['non_compliant'] compliant_ds = hz_audit['compliant'] undetermined_ds = hz_audit['undetermined'] - if not (compliant_ds.get_sequence_ids() or - non_compliant_ds.get_sequence_ids() or - undetermined_ds.get_sequence_ids()): + if not (compliant_ds.get_sequence_ids() + or non_compliant_ds.get_sequence_ids() + or undetermined_ds.get_sequence_ids()): logger.error('No report generated for horizontal audit.') return @@ -816,7 +816,7 @@ def _datasets_processed(dir_path, ignore_case=True): def _get_time(time_format: str, last_reported_on: str): - str_format = '%m/%d/%Y %H:%M:%S' + str_format = DATETIME_FORMAT if time_format == 'timestamp': mod_time = datetime.fromtimestamp(float(last_reported_on)).strftime( str_format) @@ -869,7 +869,7 @@ def folders_modified_since(last_reported_on: str, """ modified_folders = set() - mod_time = _get_time(time_format, last_reported_on) + mod_time = get_datetime(last_reported_on) out_path = Path(output_dir) / 'modified_folders_since.txt' if out_path.is_file(): out_path.unlink() @@ -925,9 +925,8 @@ def get_last_valid_record(folder_path: Path) -> Optional[tuple]: num_records = len(lines) if i < -num_records: return None - last_line = lines[i] - last_reported_on, last_report_path, last_mrds_path, _ = \ - last_line.split(',') + last_line = lines[i].strip('\n').split(',') + last_reported_on, last_report_path, last_mrds_path = last_line if Path(last_mrds_path).is_file(): return last_reported_on, last_report_path, last_mrds_path i -= 1 @@ -942,7 +941,7 @@ def get_timestamps(): ts = datetime.timestamp(now) date_time = now.strftime('%m/%d/%Y %H:%M:%S%z') return { - 'utc': ts, + 'utc' : ts, 'date_time': date_time } @@ -1082,54 +1081,60 @@ def find_terminal_folders(root, leave=True, position=0): else: for sd2 in level2_subdirs: terminal.extend(find_terminal_folders(sd2, leave=False, - position=1)) + position=1)) return terminal -def log_latest_non_compliance(ncomp_data, latest_data, output_dir): - """ - Log the latest non-compliance data from recent sessions to a file - - Parameters - ---------- - ncomp_data - latest_data - output_dir +def get_datetime(date): + try: + date = datetime.strptime(date, DATETIME_FORMAT) + except ValueError as exc: + if 'unconverted data remains' in str(exc): + try: + date = datetime.strptime(date, DATE_FORMAT) + except ValueError as exc: + raise ValueError(f'Invalid date format. ' + f'Use one of ' + f'[{DATE_FORMAT}, {DATETIME_FORMAT}]') from exc + return date - Returns - ------- +def log_latest_non_compliance(dataset, config_path, + filter_fn=None, + audit='hz', date=None, output_dir=None): """ - if latest_data is None: - return - full_status = [] - for seq_id in latest_data.get_sequence_ids(): - # Don't rename run_id as run, it will conflict with subprocess.run - for sub, sess, run_id, seq in latest_data.traverse_horizontal(seq_id): - try: - nc_param_dict = ncomp_data.get_nc_params( - subject_id=sub, session_id=sess, - run_id=run_id, seq_id=seq_id) - status = { - 'ts': seq.timestamp, - 'subject': sub, - 'sequence': seq_id, - 'ds_name': latest_data.name, - 'nc_params': ';'.join(nc_param_dict.keys()) - } - full_status.append(status) - except KeyError: - continue - status_filepath = status_fpath(output_dir) + Log the latest non-compliance data from recent sessions to a file + """ + nc_log = {} + ds_name = None + date = get_datetime(date) + + config = get_config(config_path=config_path, report_type=audit) + parameters = config.get("include_parameters", None) + + if audit == 'hz': + ds_name = dataset.name + nc_log = dataset.generate_nc_log(parameters, filter_fn, + date=date, + audit='hz', verbosity=1, + output_dir=None) + elif audit == 'vt': + ds_name = dataset.name + nc_log = dataset.generate_nc_log(parameters, filter_fn, + date=date, + audit='vt', verbosity=1, + output_dir=None) + + status_filepath = status_fpath(output_dir, audit) if not status_filepath.parent.is_dir(): status_filepath.parent.mkdir(parents=True) - with open(status_filepath, 'a', encoding='utf-8') as fp: - for i in full_status: - fp.write( - f" {i['ts']}, {i['ds_name']}, {i['sequence']}, {i['subject']}, " - f"{i['nc_params']} \n") + with open(status_filepath, 'w', encoding='utf-8') as fp: + for parameter in nc_log: + for i in nc_log[parameter]: + fp.write(f" {i['date']}, {ds_name}, {i['sequence_name']}," + f" {i['subject']}, {parameter} \n") return None # status_filepath @@ -1170,12 +1175,12 @@ def valid_paths(files: Union[List, str]) -> Union[List[Path], Path]: raise ValueError('Expected a valid path or Iterable, Got NoneType') if isinstance(files, str) or isinstance(files, Path): if not Path(files).is_file(): - raise OSError('Invalid File {0}'.format(files)) + raise FileNotFoundError('Invalid File {0}'.format(files)) return Path(files).resolve() elif isinstance(files, Iterable): for file in files: if not Path(file).is_file(): - raise OSError('Invalid File {0}'.format(file)) + raise FileNotFoundError('Invalid File {0}'.format(file)) return [Path(f).resolve() for f in files] else: raise NotImplementedError('Expected str or Path or Iterable, ' @@ -1209,7 +1214,7 @@ def modify_sequence_name(seq: "BaseSequence", stratify_by: str, stratify_value = '' seq_name_with_stratify = ATTRIBUTE_SEPARATOR.join([seq.name, - stratify_value]) + stratify_value]) if datasets: for ds in datasets: ds.set_modified_seq_name(seq.name, seq_name_with_stratify) @@ -1254,7 +1259,7 @@ def get_config_from_file(config_path: Union[Path, str]) -> dict: return config -def get_protocol_from_file(reference_path: Path, +def get_protocol_from_file(reference_path: Union[Path, str], vendor: str = 'siemens') -> MRImagingProtocol: """ Extracts the reference protocol from the file. Supports only Siemens @@ -1262,7 +1267,7 @@ def get_protocol_from_file(reference_path: Path, Parameters ---------- - reference_path : Union[Path, str] + reference_path : Path | str Path to the reference protocol file vendor: str Vendor of the scanner. Default is Siemens @@ -1344,11 +1349,11 @@ def filter_epi_fmap_pairs(pair): epi_substrings = ['epi', 'bold', 'rest', 'fmri', 'pasl', 'asl', 'dsi', 'dti', 'dwi'] fmap_substrings = ['fmap', 'fieldmap', 'map'] - if (has_substring(pair[0].lower(), epi_substrings) and - has_substring(pair[1].lower(), fmap_substrings)): + if (has_substring(pair[0].lower(), epi_substrings) + and has_substring(pair[1].lower(), fmap_substrings)): return True - if (has_substring(pair[1].lower(), epi_substrings) and - has_substring(pair[0].lower(), fmap_substrings)): + if (has_substring(pair[1].lower(), epi_substrings) + and has_substring(pair[0].lower(), fmap_substrings)): return True return False @@ -1358,13 +1363,14 @@ def has_substring(input_string, substrings): for substring in substrings: if substring in input_string: return True + return False def previous_month(dt): """Return the first day of the previous month.""" - return dt.replace(day=1) - timedelta(days=1) + return (dt.replace(day=1) - timedelta(days=1)).replace(day=1) def next_month(dt): """Return the first day of the next month.""" - return dt.replace(day=28) + timedelta(days=5) + return (dt.replace(day=28) + timedelta(days=5)).replace(day=1) diff --git a/requirements_dev.txt b/requirements_dev.txt index c7b7da4..cb08f1e 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -8,4 +8,6 @@ hypothesis pytest bokeh flake8 +requests +coverage diff --git a/setup.cfg b/setup.cfg index 2edef3e..7bfd1a1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -20,7 +20,11 @@ exclude = docs, */_version.py, */tests/*.py, + plotting.py filename = *.py +# E203 - whitespace before ':' +ignore = + E203, W503 max-line-length = 80 max-complexity = 12 accept-encodings = utf-8