From 803be417b7aea8a78b8845e1d8e6347ef435fa25 Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Mon, 20 Nov 2023 11:35:21 -0500
Subject: [PATCH 01/23] add default config file

---
 .gitignore               | 5 ++++-
 examples/check_status.py | 7 +++----
 mrQA/cli.py              | 5 +++--
 mrQA/config.py           | 2 ++
 mrQA/monitor.py          | 5 +++--
 mrQA/run_parallel.py     | 5 +++--
 mrQA/run_subset.py       | 4 +++-
 7 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3e25d06..94fccfb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -106,4 +106,7 @@ ENV/
 /update_today.txt
 
 # codacy
-results.sarif
+*.sarif
+
+# mri protocol
+*.xml
diff --git a/examples/check_status.py b/examples/check_status.py
index 59c71a7..7ad0948 100644
--- a/examples/check_status.py
+++ b/examples/check_status.py
@@ -1,9 +1,8 @@
-from pathlib import Path
-from mrQA import monitor
 import tempfile
-import shutil
+from pathlib import Path
 
-from mrQA.tests.test_utils import copy2dest
+from mrQA import monitor
+from mrQA.tests.utils import copy2dest
 
 
 def run(folder_path):
diff --git a/mrQA/cli.py b/mrQA/cli.py
index 7fe4f58..84602b7 100644
--- a/mrQA/cli.py
+++ b/mrQA/cli.py
@@ -8,7 +8,7 @@
 
 from mrQA import check_compliance
 from mrQA import logger
-from mrQA.config import PATH_CONFIG
+from mrQA.config import PATH_CONFIG, THIS_DIR
 from mrQA.utils import is_writable
 
 
@@ -27,7 +27,8 @@ def get_parser():
                           help='directory containing downloaded dataset with '
                                'dicom files, supports nested hierarchies')
     required.add_argument('--config', type=str,
-                          help='path to config file')
+                          help='path to config file',
+                          default=THIS_DIR / 'resources/mri-config.json')
     optional.add_argument('-o', '--output-dir', type=str,
                           help='specify the directory where the report'
                                ' would be saved. By default, the --data_source '
diff --git a/mrQA/config.py b/mrQA/config.py
index 715fc22..6265fa3 100644
--- a/mrQA/config.py
+++ b/mrQA/config.py
@@ -6,6 +6,8 @@
 from MRdataset.config import MRException
 from protocol import UnspecifiedType
 
+THIS_DIR = Path(__file__).parent.resolve()
+
 
 def configure_logger(log, output_dir, mode='w', level='WARNING'):
     """
diff --git a/mrQA/monitor.py b/mrQA/monitor.py
index 3e99aa5..7e9a9e7 100644
--- a/mrQA/monitor.py
+++ b/mrQA/monitor.py
@@ -7,7 +7,7 @@
 from MRdataset import import_dataset, load_mr_dataset
 
 from mrQA import logger
-from mrQA.config import PATH_CONFIG
+from mrQA.config import PATH_CONFIG, THIS_DIR
 from mrQA.project import check_compliance
 from mrQA.utils import is_writable, folders_modified_since, \
     get_last_valid_record, log_latest_non_compliance
@@ -30,7 +30,8 @@ def get_parser():
                           help='directory containing downloaded dataset with '
                                'dicom files, supports nested hierarchies')
     required.add_argument('--config', type=str,
-                          help='path to config file')
+                          help='path to config file',
+                          default=THIS_DIR / 'resources/mri-config.json')
     optional.add_argument('-o', '--output-dir', type=str,
                           help='specify the directory where the report'
                                ' would be saved. By default, the --data_source '
diff --git a/mrQA/run_parallel.py b/mrQA/run_parallel.py
index e92589e..a31a859 100644
--- a/mrQA/run_parallel.py
+++ b/mrQA/run_parallel.py
@@ -9,7 +9,7 @@
 
 from mrQA import check_compliance
 from mrQA import logger
-from mrQA.config import PATH_CONFIG
+from mrQA.config import PATH_CONFIG, THIS_DIR
 from mrQA.parallel_utils import _check_args, _make_file_folders, \
     _run_single_batch, _create_slurm_script, _get_num_workers, \
     _get_terminal_folders
@@ -36,7 +36,8 @@ def get_parser():
                           help='directory containing downloaded dataset with '
                                'dicom files, supports nested hierarchies')
     required.add_argument('--config', type=str,
-                          help='path to config file')
+                          help='path to config file',
+                          default=THIS_DIR / 'resources/mri-config.json')
     optional.add_argument('-o', '--output-dir', type=str,
                           help='specify the directory where the report'
                                ' would be saved. By default, the --data_source '
diff --git a/mrQA/run_subset.py b/mrQA/run_subset.py
index b96ad0a..e641ed8 100644
--- a/mrQA/run_subset.py
+++ b/mrQA/run_subset.py
@@ -8,6 +8,7 @@
 from MRdataset import import_dataset, save_mr_dataset, BaseDataset
 
 from mrQA import logger
+from mrQA.config import THIS_DIR
 from mrQA.utils import txt2list
 
 
@@ -37,7 +38,8 @@ def cli():
     optional.add_argument('-v', '--verbose', action='store_true',
                           help='allow verbose output on console')
     required.add_argument('--config', type=str,
-                          help='path to config file')
+                          help='path to config file',
+                          default=THIS_DIR / 'resources/mri-config.json')
 
     if len(sys.argv) < 2:
         logger.critical('Too few arguments!')

From 699790e9d21f09fe6c4ca2093dbb8b4c9f159bd3 Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Mon, 20 Nov 2023 11:36:26 -0500
Subject: [PATCH 02/23] minor spelling errors

---
 README.rst    | 4 ++--
 mrQA/utils.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.rst b/README.rst
index 2d5d430..4107030 100644
--- a/README.rst
+++ b/README.rst
@@ -7,8 +7,8 @@ mrQA : automatic protocol compliance checks on MR datasets
 .. image:: https://app.codacy.com/project/badge/Grade/8cd263e1eaa0480d8fac50eba0094401
         :target: https://app.codacy.com/gh/sinhaharsh/mrQA/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade
 
-.. image:: https://github.com/sinhaharsh/mrQA/actions/workflows/continuos-integration.yml/badge.svg
-        :target: https://github.com/sinhaharsh/mrQA/actions/workflows/continuos-integration.yml
+.. image:: https://github.com/sinhaharsh/mrQA/actions/workflows/continuous-integration.yml/badge.svg
+        :target: https://github.com/sinhaharsh/mrQA/actions/workflows/continuous-integration.yml
 
 
 .. image:: https://raw.githubusercontent.com/jupyter/design/master/logos/Badges/nbviewer_badge.svg
diff --git a/mrQA/utils.py b/mrQA/utils.py
index c37d14b..d2a327b 100644
--- a/mrQA/utils.py
+++ b/mrQA/utils.py
@@ -408,7 +408,7 @@ def _check_args_validity(list_: List) -> bool:
         raise ValueError('List is empty.')
     for seq in list_:
         if len(seq) == 0:
-            raise ValueError('Atleast one of sequences is empty.')
+            raise ValueError('At least one of sequences is empty.')
     if len(list_) < 3:
         logger.info('Cannot compute majority attribute values. '
                     'Got less than 3 values for each '
@@ -422,14 +422,14 @@ def split_list(dir_index: Sized, num_chunks: int) -> Iterable:
     Adapted from https://stackoverflow.com/questions/2130016/splitting-a-list-into-n-parts-of-approximately-equal-length # noqa
 
     Given a list of n elements, split it into k parts, where k = num_chunks.
-    Each part has atleast n/k elements. And the remaining elements
+    Each part has at least n/k elements. And the remaining elements
     n % k are distributed uniformly among the sub-parts such that
     each part has almost same number of elements. The first n % k will have
     floor(n/k) + 1 elements.
 
     Parameters
     ----------
-    dir_index : list
+    dir_index : Sized
         list to split
     num_chunks : int
         number of parts

From e54ae9113bc5e016c01e968422d2d9e3e1ee1447 Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Mon, 20 Nov 2023 11:37:01 -0500
Subject: [PATCH 03/23] test cli with reference protocol

---
 mrQA/tests/conftest.py                 | 18 ++++++++++++++++-
 mrQA/tests/test_cli.py                 | 27 ++++++++++++++++++++++++++
 mrQA/tests/{test_utils.py => utils.py} | 12 ++++++++++++
 3 files changed, 56 insertions(+), 1 deletion(-)
 rename mrQA/tests/{test_utils.py => utils.py} (93%)

diff --git a/mrQA/tests/conftest.py b/mrQA/tests/conftest.py
index bc70301..3772bc2 100644
--- a/mrQA/tests/conftest.py
+++ b/mrQA/tests/conftest.py
@@ -7,6 +7,7 @@
 from hypothesis.strategies import SearchStrategy
 
 from mrQA.tests.simulate import make_compliant_test_dataset
+from mrQA.tests.utils import download
 
 param_strategy: tp.Final[SearchStrategy[Tuple]] = st.tuples(
     st.text(min_size=1, max_size=10),
@@ -21,6 +22,19 @@
 THIS_DIR = Path(__file__).parent.resolve()
 
 
+def sample_protocol():
+    """Download a sample protocol from GitHub"""
+    # Using an example XML file from the following GitHub repository
+    # https://github.com/lrq3000/mri_protocol
+    url = 'https://raw.githubusercontent.com/lrq3000/mri_protocol/master/SiemensVidaProtocol/Coma%20Science%20Group.xml'  # noqa
+    filename = THIS_DIR / 'coma_science.xml'
+    xml_file = Path(filename)
+
+    if not xml_file.is_file():
+        download(url, filename)
+    return filename
+
+
 @st.composite
 def create_dataset(draw_from: st.DrawFn) -> Tuple:
     name, num_subjects, repetition_time, echo_train_length, flip_angle = draw_from(param_strategy)
@@ -31,6 +45,7 @@ def create_dataset(draw_from: st.DrawFn) -> Tuple:
     ds = DicomDataset(name=name,
                       data_source=fake_ds_dir,
                       config_path=THIS_DIR / 'resources/mri-config.json')
+    ref_protocol_path = sample_protocol()
     attributes = {
         'name': name,
         'num_subjects': num_subjects,
@@ -38,7 +53,8 @@ def create_dataset(draw_from: st.DrawFn) -> Tuple:
         'echo_train_length': echo_train_length,
         'flip_angle': flip_angle,
         'fake_ds_dir': fake_ds_dir,
-        'config_path': THIS_DIR / 'resources/mri-config.json'
+        'config_path': THIS_DIR / 'resources/mri-config.json',
+        'ref_protocol_path': ref_protocol_path,
     }
     return ds, attributes
 
diff --git a/mrQA/tests/test_cli.py b/mrQA/tests/test_cli.py
index 0590115..93208d0 100644
--- a/mrQA/tests/test_cli.py
+++ b/mrQA/tests/test_cli.py
@@ -41,6 +41,33 @@ def test_binary_mrqa(args):
     return
 
 
+@settings(max_examples=5, deadline=None)
+@given(args=(dcm_dataset_strategy))
+def test_binary_mrqa_with_reference_protocol(args):
+    ds1, attributes = args
+    assume(len(ds1.name) > 0)
+    ds1.load()
+    with tempfile.TemporaryDirectory() as tempdir:
+        # shlex doesn't test work with binaries
+        subprocess.run(['mrqa',
+                        '--data-source', attributes['fake_ds_dir'],
+                        '--config', attributes['config_path'],
+                        '--name', ds1.name,
+                        '--format', 'dicom',
+                        '--decimals', '3',
+                        '--tolerance', '0.1',
+                        '--verbose',
+                        '--ref-protocol-path', attributes['ref_protocol_path'],
+                        '--output-dir', tempdir])
+        report_paths = list(Path(tempdir).glob('*.html'))
+        # check if report was generated
+        assert len(report_paths) > 0
+        report_path = report_paths[0]
+        assert str(report_path.parent) == str(tempdir)
+        assert ds1.name in report_path.stem.split(DATE_SEPARATOR)[0]
+    return
+
+
 def test_binary_parallel():
     pass
 
diff --git a/mrQA/tests/test_utils.py b/mrQA/tests/utils.py
similarity index 93%
rename from mrQA/tests/test_utils.py
rename to mrQA/tests/utils.py
index 58e4270..f499fc1 100644
--- a/mrQA/tests/test_utils.py
+++ b/mrQA/tests/utils.py
@@ -125,3 +125,15 @@
 #         parent.mkdir(exist_ok=True, parents=True)
 #         shutil.copy(file, parent)
 #     return file_list
+
+from requests import get  # to make GET request
+
+
+def download(url, file_name):
+    """Download file from url and save to file_name"""
+    # open in binary mode
+    with open(file_name, "wb") as file:
+        # get request
+        response = get(url)
+        # write to file
+        file.write(response.content)

From de647e7783c2c6c91ed9a5cf50be55addbbb65ea Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Mon, 20 Nov 2023 17:11:16 -0500
Subject: [PATCH 04/23] fix cli for parallel

---
 mrQA/run_parallel.py | 50 ++++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/mrQA/run_parallel.py b/mrQA/run_parallel.py
index a31a859..6cd27c9 100644
--- a/mrQA/run_parallel.py
+++ b/mrQA/run_parallel.py
@@ -46,8 +46,8 @@ def get_parser():
                           help='specify the path to the output mrds file. ')
     optional.add_argument('-n', '--name', type=str,
                           help='provide a identifier/name for the dataset')
-    optional.add_argument('-s', '--subjects-per-job', type=int, default=5,
-                          help='number of subjects to process per job')
+    optional.add_argument('-j', '--job-size', type=int, default=5,
+                          help='number of folders to process per job')
     optional.add_argument('-e', '--conda-env', type=str, default='mrcheck',
                           help='name of conda environment to use')
     optional.add_argument('-c', '--conda-dist', type=str, default='anaconda3',
@@ -84,7 +84,7 @@ def cli():
                      output_dir=args.output_dir,
                      out_mrds_path=args.out_mrds_path,
                      name=args.name,
-                     subjects_per_job=args.subjects_per_job,
+                     job_size=args.job_size,
                      conda_env=args.conda_env,
                      conda_dist=args.conda_dist,
                      config_path=args.config,
@@ -146,28 +146,28 @@ def process_parallel(data_source: Union[str, Path],
                      output_dir: Union[str, Path],
                      out_mrds_path: Union[str, Path],
                      name: str = None,
-                     subjects_per_job: int = 5,
+                     job_size: int = 5,
                      conda_env: str = 'mrcheck',
                      conda_dist: str = 'anaconda3',
                      config_path: Union[str, Path] = None,
                      hpc: bool = False):
     """
     Given a folder(or List[folder]) it will divide the work into smaller
-    jobs. Each job will contain a fixed number of subjects. These jobs can be
+    jobs. Each job will contain a fixed number of folders. These jobs can be
     executed in parallel to save time.
 
     Parameters
     ----------
     data_source: str | Path
-        Valid path to the folder containing the subject folders
+        Valid path to the folder containing the multiple folders
     output_dir: str | Path
         Valid path to the folder where the output will be saved
     out_mrds_path: str | Path
         Valid path to the final output .mrds.pkl file
     name: str
         Name of the final output file
-    subjects_per_job: int
-        Number of subjects to be processed in each job
+    job_size: int
+        Number of folders to be processed in each job
     conda_env: str
         Name of the conda environment to be used
     conda_dist: str
@@ -185,7 +185,7 @@ def process_parallel(data_source: Union[str, Path],
         debug=False,
         config_path=config_path,
         data_source=data_source,
-        folders_per_job=subjects_per_job,
+        folders_per_job=job_size,
         conda_env=conda_env,
         conda_dist=conda_dist,
         output_dir=output_dir,
@@ -214,7 +214,7 @@ def submit_job(scripts_list_filepath: Union[str, Path],
                hpc: bool = False) -> None:
     """
     Given a folder(or List[folder]) it will divide the work into smaller
-    jobs. Each job will contain a fixed number of subjects. These jobs can be
+    jobs. Each job will contain a fixed number of folders. These jobs can be
     executed in parallel to save time.
 
     Parameters
@@ -251,7 +251,7 @@ def create_script(data_source: Union[str, Path, Iterable] = None,
                   config_path: Union[str, Path] = None):
     """
     Given a folder(or List[folder]) it will divide the work into smaller
-    jobs. Each job will contain a fixed number of subjects. These jobs can be
+    jobs. Each job will contain a fixed number of folders. These jobs can be
     executed in parallel to save time.
 
     Parameters
@@ -267,7 +267,7 @@ def create_script(data_source: Union[str, Path, Iterable] = None,
     debug: bool
         If True, the dataset will be created locally. This is useful for testing
     folders_per_job: int
-        Number of subjects per job. Recommended value is 50 or 100
+        Number of folders per job. Recommended value is 50 or 100
     hpc: bool
         If True, the scripts will be generated for HPC, not for local execution
     conda_dist: str
@@ -295,11 +295,11 @@ def create_script(data_source: Union[str, Path, Iterable] = None,
 
     scripts_path_list = []
     mrds_path_list = []
-    # create a slurm job script for each sub_group of subject ids
+    # create a slurm job script for each sub_group of folders
     for fnames_filepath in fnames_path_list:
         # Filename of the bash script should be same as text file.
-        # Say batch0000.txt points to set of 10 subjects. Then create a
-        # slurm script file batch0000.sh which will run for these 10 subjects,
+        # Say batch0000.txt points to set of 10 folders. Then create a
+        # slurm script file batch0000.sh which will run for these 10 folders,
         # and the final partial mrds pickle file will have the name
         # batch0000.mrds.pkl
         script_filename = fnames_filepath.stem + '.sh'
@@ -334,9 +334,9 @@ def split_folders_list(data_source: Union[str, Path],
                        output_dir: Union[str, Path],
                        folders_per_job: int = 50):
     """
-    Splits a given set of subjects into multiple jobs and creates separate
-    text files containing the list of subjects. Each text file
-    contains the list of subjects to be processed in a single job.
+    Splits a given set of folders into multiple jobs and creates separate
+    text files containing the list of folders. Each text file
+    contains the list of folders to be processed in a single job.
 
     Parameters
     ----------
@@ -346,37 +346,37 @@ def split_folders_list(data_source: Union[str, Path],
         Path to the output directory
     per_batch_ids : Union[str, Path]
         filepath to a file which has paths to all txt files for all jobs.
-        Each of these txt files contains a list of subject ids for
+        Each of these txt files contains a list of folder ids for
         corresponding job.
     output_dir : Union[str, Path]
         Name of the output directory
     folders_per_job : int
-        Number of subjects to process in each job
+        Number of folders to process in each job
 
     Returns
     -------
     batch_ids_path_list : Sized
-        Paths to the text files, each containing a list of subjects
+        Paths to the text files, each containing a list of folders
     """
 
     all_fnames_path = Path(all_fnames_path)
     # List of paths to the txt files,
-    # each containing the list of subjects per job
+    # each containing the list of folders per job
     batch_fnames_path_list = []
 
     folder_list = _get_terminal_folders(data_source, all_fnames_path)
-    # Get the list of subjects for each job
+    # Get the list of folders for each job
     workers = _get_num_workers(folders_per_job, folder_list)
     folder_subsets = split_list(folder_list, num_chunks=workers)
 
     # Create a text file for each job
     for i, subset in enumerate(folder_subsets):
-        # Create a text file containing the list of subjects for each job
+        # Create a text file containing the list of folders for each job
         batch_filepath = output_dir / f'batch{i:04}.txt'
         # Store to the path given to the text file
         list2txt(batch_filepath, subset)
         # Add the path to the text file ( containing the
-        # list of subjects for each job) to a list, return the list
+        # list of folders for each job) to a list, return the list
         batch_fnames_path_list.append(batch_filepath)
     list2txt(fpath=per_batch_ids, list_=batch_fnames_path_list)
     return batch_fnames_path_list

From 50d37b589192744b241d9148aaa1ec932e39220d Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Mon, 20 Nov 2023 17:11:22 -0500
Subject: [PATCH 05/23] fix cli for subset

---
 mrQA/run_subset.py | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/mrQA/run_subset.py b/mrQA/run_subset.py
index e641ed8..72cd252 100644
--- a/mrQA/run_subset.py
+++ b/mrQA/run_subset.py
@@ -12,7 +12,19 @@
 from mrQA.utils import txt2list
 
 
-def cli():
+def parse_args():
+    parser = get_parser()
+    args = parser.parse_args()
+
+    if args.verbose:
+        logger.setLevel('WARNING')
+    else:
+        logger.setLevel('ERROR')
+
+    return args
+
+
+def get_parser():
     """Console script for mrQA."""
     parser = argparse.ArgumentParser(
         description='Protocol Compliance of MRI scans',
@@ -22,17 +34,17 @@ def cli():
     required = parser.add_argument_group('required arguments')
     optional = parser.add_argument_group('optional arguments')
 
-    required.add_argument('-o', '--output_path', type=str,
+    required.add_argument('-o', '--output-path', type=str,
                           required=True,
                           help='complete path to pickle file for storing '
                                'partial dataset')
-    required.add_argument('-b', '--batch_ids_file', type=str,
+    required.add_argument('-b', '--batch-ids-file', type=str,
                           required=True,
                           help='text file path specifying the folders to read')
     optional.add_argument('-h', '--help', action='help',
                           default=argparse.SUPPRESS,
                           help='show this help message and exit')
-    optional.add_argument('--is_partial', action='store_true',
+    optional.add_argument('--is-partial', action='store_true',
                           help='flag dataset as a partial dataset')
     # TODO: use this flag to store cache
     optional.add_argument('-v', '--verbose', action='store_true',
@@ -46,13 +58,13 @@ def cli():
         parser.print_help()
         parser.exit(1)
 
-    args = parser.parse_args()
-    output_path = Path(args.output_path).resolve()
+    return parser
 
-    if args.verbose:
-        logger.setLevel('WARNING')
-    else:
-        logger.setLevel('ERROR')
+
+def cli():
+    """Console script for mrQA subset."""
+    args = parse_args()
+    output_path = Path(args.output_path).resolve()
 
     if not output_path.exists():
         partial_dataset = read_subset(output_dir=Path(args.output_path).parent,

From a6d1356a5699910d04ced08bd0638a3e33c413cc Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Mon, 20 Nov 2023 17:11:50 -0500
Subject: [PATCH 06/23] additional changes for cli

---
 mrQA/monitor.py        | 5 +++--
 mrQA/parallel_utils.py | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/mrQA/monitor.py b/mrQA/monitor.py
index 7e9a9e7..fd2550c 100644
--- a/mrQA/monitor.py
+++ b/mrQA/monitor.py
@@ -121,7 +121,8 @@ def parse_args():
     return args
 
 
-def main():
+def cli():
+    """Console script for mrQA monitor."""
     args = parse_args()
     monitor(name=args.name,
             data_source=args.data_source,
@@ -221,4 +222,4 @@ def monitor(name: str,
 
 
 if __name__ == '__main__':
-    sys.exit(main())  # pragma: no cover
+    sys.exit(cli())  # pragma: no cover
diff --git a/mrQA/parallel_utils.py b/mrQA/parallel_utils.py
index c092f18..4f98e29 100644
--- a/mrQA/parallel_utils.py
+++ b/mrQA/parallel_utils.py
@@ -188,7 +188,7 @@ def _create_slurm_script(output_script_path: Union[str, Path],
     # Add flags to python command
     if verbose:
         python_cmd += ' --verbose'
-    python_cmd += ' --is_partial'
+    python_cmd += ' --is-partial'
 
     # Create the slurm script file
     with open(output_script_path, 'w', encoding='utf-8') as fp:

From fe323cf7a2f7b3e87acbc699f03a2f4616049d38 Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Mon, 20 Nov 2023 17:12:23 -0500
Subject: [PATCH 07/23] add tests for cli (parallel, monitor, and subset)

---
 mrQA/tests/test_cli.py | 250 +++++++++++++++++++++++++++++++++++------
 1 file changed, 216 insertions(+), 34 deletions(-)

diff --git a/mrQA/tests/test_cli.py b/mrQA/tests/test_cli.py
index 93208d0..ba49e3c 100644
--- a/mrQA/tests/test_cli.py
+++ b/mrQA/tests/test_cli.py
@@ -5,15 +5,21 @@
 from pathlib import Path
 from time import sleep
 
+import pytest
+from MRdataset import load_mr_dataset
 from hypothesis import given, settings, assume
 
 from mrQA.cli import cli
 from mrQA.config import DATE_SEPARATOR
+from mrQA.monitor import cli as monitor_cli
+from mrQA.run_parallel import cli as parallel_cli
+from mrQA.run_subset import cli as subset_cli
 from mrQA.tests.conftest import dcm_dataset_strategy
+from mrQA.utils import list2txt
 
 
 @settings(max_examples=5, deadline=None)
-@given(args=(dcm_dataset_strategy))
+@given(args=dcm_dataset_strategy)
 def test_binary_mrqa(args):
     ds1, attributes = args
     assume(len(ds1.name) > 0)
@@ -31,18 +37,13 @@ def test_binary_mrqa(args):
                         '--output-dir', tempdir])
         report_paths = list(Path(tempdir).glob('*.html'))
         # check if report was generated
-        if attributes['num_subjects'] > 2:
-            assert len(report_paths) > 0
-            report_path = report_paths[0]
-            assert str(report_path.parent) == str(tempdir)
-            assert ds1.name in report_path.stem.split(DATE_SEPARATOR)[0]
-        else:
-            assert not report_paths
+        assert_paths_more_than_2_subjects(report_paths, tempdir, attributes,
+                                          ds1)
     return
 
 
 @settings(max_examples=5, deadline=None)
-@given(args=(dcm_dataset_strategy))
+@given(args=dcm_dataset_strategy)
 def test_binary_mrqa_with_reference_protocol(args):
     ds1, attributes = args
     assume(len(ds1.name) > 0)
@@ -59,21 +60,206 @@ def test_binary_mrqa_with_reference_protocol(args):
                         '--verbose',
                         '--ref-protocol-path', attributes['ref_protocol_path'],
                         '--output-dir', tempdir])
+        report_paths = list(Path(tempdir).glob('*.html'))
+        assert_report_paths(report_paths, tempdir, attributes, ds1)
+    return
+
+
+@settings(max_examples=10, deadline=None)
+@given(args=dcm_dataset_strategy)
+def test_cli_with_reference_protocol(args):
+    ds1, attributes = args
+    assume(len(ds1.name) > 0)
+    ds1.load()
+    with tempfile.TemporaryDirectory() as tempdir:
+        sys.argv = shlex.split(
+            f'mrqa --data-source {attributes["fake_ds_dir"]}'
+            f' --config {attributes["config_path"]}'
+            f' --name {ds1.name}'
+            f' --format dicom'
+            ' --decimals 3'
+            ' --tolerance 0.1'
+            ' --verbose'
+            f' --ref-protocol-path {attributes["ref_protocol_path"]}'
+            f' --output-dir  {tempdir}')
+        cli()
+
         report_paths = list(Path(tempdir).glob('*.html'))
         # check if report was generated
-        assert len(report_paths) > 0
-        report_path = report_paths[0]
-        assert str(report_path.parent) == str(tempdir)
-        assert ds1.name in report_path.stem.split(DATE_SEPARATOR)[0]
+        assert_report_paths(report_paths, tempdir, attributes, ds1)
     return
 
 
-def test_binary_parallel():
-    pass
+@settings(max_examples=5, deadline=None)
+@given(args=dcm_dataset_strategy)
+def test_binary_parallel(args):
+    ds1, attributes = args
+    assume(len(ds1.name) > 0)
+    ds1.load()
+    with tempfile.TemporaryDirectory() as tempdir:
+        # shlex doesn't test work with binaries
+        subprocess.run(['mrqa_parallel',
+                        '--data-source', attributes['fake_ds_dir'],
+                        '--config', attributes['config_path'],
+                        '--name', ds1.name,
+                        '--decimals', '3',
+                        '--tolerance', '0.1',
+                        '--verbose',
+                        '--job-size', '1',
+                        f'--out-mrds-path {tempdir}/test.mrds.pkl '
+                        '--output-dir', tempdir])
+        report_paths = list(Path(tempdir).glob('*.html'))
+        # check if report was generated
+        assert_paths_more_than_2_subjects(report_paths, tempdir, attributes,
+                                          ds1)
+    return
 
 
-def test_binary_monitor():
-    pass
+@settings(max_examples=5, deadline=None)
+@given(args=dcm_dataset_strategy)
+def test_binary_mrqa_monitor(args):
+    ds1, attributes = args
+    assume(len(ds1.name) > 0)
+    ds1.load()
+    with tempfile.TemporaryDirectory() as tempdir:
+        # shlex doesn't test work with binaries
+        subprocess.run(['mrqa_monitor',
+                        '--data-source', attributes['fake_ds_dir'],
+                        '--config', attributes['config_path'],
+                        '--name', ds1.name,
+                        '--format', 'dicom',
+                        '--decimals', '3',
+                        '--tolerance', '0.1',
+                        '--verbose',
+                        '--output-dir', tempdir])
+        report_paths = list(Path(tempdir).glob('*.html'))
+        # check if report was generated
+        assert_paths_more_than_2_subjects(report_paths, tempdir, attributes,
+                                          ds1)
+    return
+
+
+@settings(max_examples=5, deadline=None)
+@given(args=dcm_dataset_strategy)
+def test_cli_mrqa_monitor(args):
+    ds1, attributes = args
+    assume(len(ds1.name) > 0)
+    ds1.load()
+    with tempfile.TemporaryDirectory() as tempdir:
+        # shlex doesn't test work with binaries
+        sys.argv = shlex.split(
+            f'mrqa_monitor --data-source {attributes["fake_ds_dir"]} '
+            f' --config {attributes["config_path"]} '
+            f' --name {ds1.name} '
+            '--format dicom '
+            '--decimals 3 '
+            '--tolerance 0.1 '
+            '--verbose '
+            f'--output-dir {tempdir}')
+        monitor_cli()
+        report_paths = list(Path(tempdir).glob('*.html'))
+        # check if report was generated
+        assert_paths_more_than_2_subjects(report_paths, tempdir, attributes,
+                                          ds1)
+    return
+
+
+@settings(max_examples=5, deadline=None)
+@given(args=dcm_dataset_strategy)
+def test_cli_run_subset(args):
+    ds1, attributes = args
+    assume(len(ds1.name) > 0)
+    ds1.load()
+    with tempfile.TemporaryDirectory() as tempdir:
+        # shlex doesn't test work with binaries
+        folders = [f for f in Path(attributes['fake_ds_dir']).iterdir()
+                   if f.is_dir()]
+        batch_file = Path(tempdir) / 'batch.txt'
+        list2txt(batch_file, folders)
+
+        sys.argv = shlex.split(
+            f'mrqa_subset  '
+            f' --config {attributes["config_path"]} '
+            f' -b {batch_file} '
+            '--verbose '
+            f'--output-path {tempdir}/test.mrds.pkl')
+        subset_cli()
+        ds2 = load_mr_dataset(f"/{tempdir}/test.mrds.pkl")
+        assert ds1 == ds2
+    return
+
+
+@settings(max_examples=5, deadline=None)
+@given(args=dcm_dataset_strategy)
+def test_cli_parallel(args):
+    ds1, attributes = args
+    assume(len(ds1.name) > 0)
+    ds1.load()
+    with tempfile.TemporaryDirectory() as tempdir:
+        # shlex doesn't test work with binaries
+        sys.argv = shlex.split(
+            f'mrqa_parallel --data-source {attributes["fake_ds_dir"]} '
+            f' --config {attributes["config_path"]} '
+            f' --name {ds1.name} '
+            '--job-size 1 '
+            '--decimals 3 '
+            '--tolerance 0.1 '
+            '--verbose '
+            f'--out-mrds-path {tempdir}/test.mrds.pkl '
+            f'--output-dir {tempdir}')
+        if attributes['num_subjects'] < 2:
+            with pytest.raises(RuntimeError):
+                parallel_cli()
+        else:
+            parallel_cli()
+            report_paths = list(Path(tempdir).glob('*.html'))
+            # check if report was generated
+            assert_paths_more_than_2_subjects(report_paths, tempdir, attributes,
+                                              ds1)
+    return
+
+
+def assert_paths_more_than_2_subjects(report_paths, tempdir, attributes, ds1):
+    if attributes['num_subjects'] > 2:
+        assert_report_paths(report_paths, tempdir, attributes, ds1)
+    else:
+        assert not report_paths
+
+
+def assert_report_paths(report_paths, tempdir, attributes, ds1):
+    assert len(report_paths) > 0
+    report_path = report_paths[0]
+    assert str(report_path.parent) == str(tempdir)
+    assert ds1.name in report_path.stem.split(DATE_SEPARATOR)[0]
+
+
+
+
+@settings(max_examples=10, deadline=None)
+@given(args=dcm_dataset_strategy)
+def test_binary_monitor_with_reference_protocol(args):
+    ds1, attributes = args
+    assume(len(ds1.name) > 0)
+    ds1.load()
+    with tempfile.TemporaryDirectory() as tempdir:
+        # shlex doesn't test work with binaries
+        subprocess.run(['mrqa_monitor',
+                        '--data-source', attributes['fake_ds_dir'],
+                        '--config', attributes['config_path'],
+                        '--name', ds1.name,
+                        '--format', 'dicom',
+                        '--decimals', '3',
+                        '--tolerance', '0.1',
+                        '--verbose',
+                        '--ref-protocol-path', attributes['ref_protocol_path'],
+                        '--output-dir', tempdir])
+        report_paths = list(Path(tempdir).glob('*.html'))
+        # check if report was generated
+        assert len(report_paths) > 0
+        report_path = report_paths[0]
+        assert str(report_path.parent) == str(tempdir)
+        assert ds1.name in report_path.stem.split(DATE_SEPARATOR)[0]
+    return
 
 
 def test_binary_subset():
@@ -81,7 +267,7 @@ def test_binary_subset():
 
 
 @settings(max_examples=10, deadline=None)
-@given(args=(dcm_dataset_strategy))
+@given(args=dcm_dataset_strategy)
 def test_report_generated(args):
     ds1, attributes = args
     assume(len(ds1.name) > 0)
@@ -98,13 +284,8 @@ def test_report_generated(args):
         cli()
         report_paths = list(Path(tempdir).glob('*.html'))
         # check if report was generated
-        if attributes['num_subjects'] > 2:
-            assert len(report_paths) > 0
-            report_path = report_paths[0]
-            assert str(report_path.parent) == str(tempdir)
-            assert ds1.name in report_path.stem.split(DATE_SEPARATOR)[0]
-        else:
-            assert not report_paths
+        assert_paths_more_than_2_subjects(report_paths, tempdir, attributes,
+                                          ds1)
         # wait for 2 seconds, otherwise the next test will fail.
         # This happens if report is generated with the same timestamp, then
         # the number of reports will be 1 because the previous report will be
@@ -113,15 +294,16 @@ def test_report_generated(args):
         # re-run with mrds pkl path
         mrds_paths = list(Path(tempdir).glob('*.mrds.pkl'))
         assert len(mrds_paths) > 0
-        sys.argv = shlex.split(f'mrqa --data-source {attributes["fake_ds_dir"]} '
-                               f'--config {attributes["config_path"]} '
-                               f'--name {ds1.name} '
-                               f'--format dicom '
-                               '--decimals 3 '
-                               '--tolerance 0.1 '
-                               '--verbose '
-                               f'--output-dir  {tempdir} '
-                               f'--mrds-pkl-path  {mrds_paths[0]} ')
+        sys.argv = shlex.split(
+            f'mrqa --data-source {attributes["fake_ds_dir"]} '
+            f'--config {attributes["config_path"]} '
+            f'--name {ds1.name} '
+            f'--format dicom '
+            '--decimals 3 '
+            '--tolerance 0.1 '
+            '--verbose '
+            f'--output-dir  {tempdir} '
+            f'--mrds-pkl-path  {mrds_paths[0]} ')
         cli()
         report_paths = list(Path(tempdir).glob('*.html'))
         # check if report was generated

From 58d414c648f2f14ebb4394ebe68877428ad6e18f Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Mon, 20 Nov 2023 17:12:46 -0500
Subject: [PATCH 08/23] add tests for utils.py

---
 mrQA/tests/test_utils.py | 394 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 394 insertions(+)
 create mode 100644 mrQA/tests/test_utils.py

diff --git a/mrQA/tests/test_utils.py b/mrQA/tests/test_utils.py
new file mode 100644
index 0000000..705d33d
--- /dev/null
+++ b/mrQA/tests/test_utils.py
@@ -0,0 +1,394 @@
+import re
+import tempfile
+from datetime import datetime, timedelta, date
+from pathlib import Path
+
+import pytest
+from hypothesis import given, settings, assume
+from hypothesis.strategies import lists, integers, dates, text, composite, \
+    characters, booleans, tuples
+from protocol import SiemensMRImagingProtocol, MRImagingProtocol
+
+from mrQA.tests.conftest import sample_protocol, THIS_DIR, dcm_dataset_strategy
+from mrQA.utils import split_list, convert2ascii, next_month, previous_month, \
+    has_substring, filter_epi_fmap_pairs, get_protocol_from_file, \
+    get_config_from_file, valid_paths, folders_with_min_files, \
+    find_terminal_folders, save_audit_results, is_folder_with_no_subfolders, \
+    get_reference_protocol, get_config, is_writable
+
+
+@given(
+    dir_index=lists(integers(), min_size=1),
+    num_chunks=integers(min_value=1)
+)
+def test_split_list_hypothesis(dir_index, num_chunks):
+    if num_chunks < 0:  # Ensure num_chunks is greater than 0
+        with pytest.raises(ValueError):
+            split_list(dir_index, num_chunks)
+            return
+
+    result = list(split_list(dir_index, num_chunks))
+
+    if len(dir_index) < num_chunks:  # Ensure dir_index has enough elements
+        # Assertions for the result based on the expected behavior of split_list
+        num_chunks = len(dir_index)
+
+    # Assertions for the result based on the expected behavior of split_list
+    assert len(result) == num_chunks
+    assert sum(map(len, result)) == len(dir_index)
+
+
+def test_split_list_value_errors():
+    with pytest.raises(ValueError):
+        split_list([], 1)
+    with pytest.raises(ValueError):
+        split_list([1], 0)
+    with pytest.raises(ValueError):
+        split_list([1], -1)
+
+
+# Define a strategy for generating strings
+@composite
+def strings(draw):
+    return draw(text())
+
+
+# Define a strategy for generating ASCII strings
+@composite
+def ascii_strings(draw):
+    return draw(text(
+        alphabet=characters(whitelist_categories=('L', 'N', 'P', 'Z', 'S'))))
+
+
+# Define a strategy for generating booleans
+@composite
+def booleans(draw):
+    return draw(booleans())
+
+
+# Property-based test: the output should contain only ASCII characters
+@given(strings())
+def test_contains_only_ascii(value):
+    result = convert2ascii(value, allow_unicode=False)
+    assert all(ord(char) < 128 for char in result)
+
+
+# Property-based test: the output should not contain spaces or
+# dashes at the beginning or end
+@given(strings())
+def test_no_spaces_or_dashes_at_ends(value):
+    result = convert2ascii(value, False)
+    assert not result.startswith((' ', '-'))
+    assert not result.endswith((' ', '-'))
+
+
+# Property-based test: the output should not contain consecutive
+# spaces or dashes
+@given(ascii_strings())
+def test_no_consecutive_spaces_or_dashes(value):
+    result = convert2ascii(value, allow_unicode=False)
+    assert '  ' not in result
+    assert '--' not in result
+
+
+# Property-based test: the output should not contain any special characters
+@given(ascii_strings())
+def test_no_special_characters(value):
+    result = convert2ascii(value, allow_unicode=False)
+    assert re.match(r'^[a-zA-Z0-9_-]*$', result)
+
+
+# Property-based test: converting twice should be the same as converting once
+@given(ascii_strings())
+def test_double_conversion_is_same(value):
+    result1 = convert2ascii(value, allow_unicode=False)
+    result2 = convert2ascii(result1, allow_unicode=False)
+    assert result1 == result2
+
+
+def test_next_month():
+    # Test cases with specific dates
+    assert next_month(datetime(2023, 1, 15)) == datetime(2023, 2, 1)
+    assert next_month(datetime(2022, 12, 5)) == datetime(2023, 1, 1)
+    # Add more test cases as needed
+
+
+@given(dt=dates())
+def test_next_month_hypothesis(dt):
+    result = next_month(dt)
+
+    # Ensure the result is a datetime object
+    assert isinstance(result, date)
+
+    # Ensure the result is the first day of the next month
+    expected_result = (dt.replace(day=28) + timedelta(days=5)).replace(day=1)
+    assert result == expected_result
+
+
+def test_previous_month():
+    # Test cases with specific dates
+    assert previous_month(datetime(2023, 2, 15)) == datetime(2023, 1, 1)
+    assert previous_month(datetime(2023, 1, 1)) == datetime(2022, 12, 1)
+    # Add more test cases as needed
+
+
+@given(dt=dates())
+def test_previous_month_hypothesis(dt):
+    result = previous_month(dt)
+
+    # Ensure the result is a datetime object
+    assert isinstance(result, date)
+
+    # Ensure the result is the first day of the previous month
+    expected_result = (dt.replace(day=1) - timedelta(days=1)).replace(day=1)
+    assert result == expected_result
+
+
+def test_has_substring():
+    # Test cases with specific inputs
+    assert has_substring("hello world", ["hello", "world"])
+    assert has_substring("python", ["java", "python", "cpp"])
+    assert not has_substring("apple", ["orange", "banana"])
+    # Add more test cases as needed
+
+
+@given(
+    input_string=text(),
+    substrings=lists(text(), min_size=1)
+)
+def test_has_substring_hypothesis(input_string, substrings):
+    result = has_substring(input_string, substrings)
+
+    # Ensure the result is a boolean
+    assert isinstance(result, bool)
+
+    # Ensure the result is True if and only if at least one substring is
+    # present in the input_string
+    expected_result = any(substring in input_string for substring in substrings)
+    assert result == expected_result
+
+
+def test_filter_epi_fmap_pairs():
+    # Test cases with specific inputs
+    assert filter_epi_fmap_pairs(("epi_bold", "fmap_fieldmap"))
+    assert filter_epi_fmap_pairs(("rest_fmri", "map"))
+    assert not filter_epi_fmap_pairs(("dti", "asl"))
+    # Add more test cases as needed
+
+
+@given(
+    pair=tuples(text(), text())
+)
+def test_filter_epi_fmap_pairs_hypothesis(pair):
+    result = filter_epi_fmap_pairs(pair)
+    assert filter_epi_fmap_pairs(('epi', 'fmap'))
+    assert filter_epi_fmap_pairs(('fmap', 'epi'))
+    # Ensure the result is a boolean
+    assert isinstance(result, bool)
+
+
+def test_get_protocol_from_file():
+    ref_protocol = sample_protocol()
+    protocol = get_protocol_from_file(str(ref_protocol))
+
+    assert isinstance(protocol, SiemensMRImagingProtocol)
+
+    with pytest.raises(FileNotFoundError):
+        get_protocol_from_file("nonexistent_file.txt")
+
+    with pytest.raises(ValueError):
+        get_protocol_from_file(THIS_DIR / 'resources/mri-config.json')
+
+
+def test_get_config_from_file():
+    config = get_config_from_file(THIS_DIR / 'resources/mri-config.json')
+    with pytest.raises(TypeError):
+        get_config_from_file(config)
+    with pytest.raises(FileNotFoundError):
+        get_config_from_file("nonexistent_file.txt")
+    with pytest.raises(ValueError):
+        get_config_from_file(THIS_DIR / 'resources/invalid-json.json')
+
+
+def test_valid_paths():
+    with pytest.raises(ValueError):
+        valid_paths(None)
+    with pytest.raises(FileNotFoundError):
+        valid_paths('nonexistent_file.txt')
+    with pytest.raises(FileNotFoundError):
+        valid_paths(['nonexistent_file.txt'])
+
+
+# Test find_terminal_folders with terminal folders
+def test_find_terminal_folders_with_terminals():
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        root = Path(tmpdirname)
+        folder1 = root / "folder1"
+        folder1.mkdir()
+        folder2 = folder1 / "folder2"
+        folder2.mkdir()
+
+        terminal_folders = find_terminal_folders(root)
+        assert terminal_folders == [folder2]
+
+        folder3 = folder2 / "folder3"
+        folder3.mkdir()
+
+        terminal_folders = find_terminal_folders(root)
+        assert terminal_folders == [folder3]
+
+
+# Test find_terminal_folders with single folder
+def test_find_terminal_folders_single_folder():
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        root = Path(tmpdirname)
+        folder = root / "folder"
+        folder.mkdir()
+
+        terminal_folders = find_terminal_folders(root)
+        assert terminal_folders == [folder]
+
+
+# Test find_terminal_folders with non-existent folder
+def test_find_terminal_folders_nonexistent_folder():
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        root = Path(tmpdirname) / "nonexistent_folder"
+
+        terminal_folders = find_terminal_folders(root)
+        assert terminal_folders == []
+
+
+def test_folder_with_min_files_nonexistent_folder():
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        root = Path(tmpdirname) / "nonexistent_folder"
+        with pytest.raises(ValueError):
+            a = list(folders_with_min_files(root, pattern="*.dcm", min_count=1))
+        with pytest.raises(ValueError):
+            a = list(folders_with_min_files([], pattern="*.dcm", min_count=0))
+
+
+# Test find_terminal_folders with files
+def test_find_terminal_folders_with_files():
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        root = Path(tmpdirname)
+        file = root / "file.txt"
+        file.touch()
+
+        terminal_folders = find_terminal_folders(root)
+        assert terminal_folders == [root]
+
+
+# Test find_terminal_folders with nested terminal folders
+def test_find_terminal_folders_nested_terminals():
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        root = Path(tmpdirname)
+        folder1 = root / "folder1"
+        folder1.mkdir()
+        folder2 = folder1 / "folder2"
+        folder2.mkdir()
+        folder3 = folder2 / "folder3"
+        folder3.mkdir()
+
+        terminal_folders = find_terminal_folders(folder1)
+        assert terminal_folders == [folder3]
+
+
+# Test find_terminal_folders with multiple terminal folders
+def test_find_terminal_folders_multiple_terminals():
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        root = Path(tmpdirname)
+        folder1 = root / "folder1"
+        folder1.mkdir()
+        folder2 = root / "folder2"
+        folder2.mkdir()
+        folder3 = root / "folder3"
+        folder3.mkdir()
+
+        terminal_folders = find_terminal_folders(root)
+        assert set(terminal_folders) == {folder1, folder2, folder3}
+
+
+def test_find_folders_with_min_files():
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        root = Path(tmpdirname)
+        folder1 = root / "folder1"
+        folder1.mkdir()
+        file = folder1 / "file.dcm"
+        file.touch()
+        folder2 = root / "folder2"
+        folder2.mkdir()
+        file = folder2 / "file.dcm"
+        file.touch()
+        folder3 = root / "folder3"
+        folder3.mkdir()
+        file = folder3 / "file.dcm"
+        file.touch()
+
+        terminal_folders = folders_with_min_files(root,
+                                                  pattern="*.dcm",
+                                                  min_count=1)
+        assert set(terminal_folders) == {folder1, folder2, folder3}
+
+
+def test_save_audit_results():
+    with pytest.raises(OSError):
+        save_audit_results('/sys/firmware/hz.adt.pkl', {})
+
+
+# Test when folder has subfolders
+def test_has_subfolders():
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        folder_path = Path(tmpdirname)
+        subfolder = folder_path / "subfolder"
+        subfolder.mkdir(parents=True, exist_ok=True)
+
+        has_no_subfolders, subfolders = is_folder_with_no_subfolders(
+            folder_path)
+        assert has_no_subfolders is False
+        assert subfolder in subfolders
+
+
+# Test when folder has no subfolders
+def test_no_subfolders():
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        folder_path = Path(tmpdirname)
+
+        has_no_subfolders, subfolders = is_folder_with_no_subfolders(
+            folder_path)
+        assert has_no_subfolders is True
+        assert subfolders == []
+
+
+# Test when folder doesn't exist
+def test_nonexistent_folder():
+    folder_path = Path("nonexistent_folder")
+
+    with pytest.raises(FileNotFoundError):
+        is_folder_with_no_subfolders(folder_path)
+
+
+@settings(max_examples=1, deadline=None)
+@given(args=(dcm_dataset_strategy))
+def test_get_reference_protocol(args):
+    ds1, attributes = args
+    assume(len(ds1.name) > 0)
+    ds1.load()
+    config = get_config_from_file(attributes['config_path'])
+    protocol = get_reference_protocol(ds1, config, 'nonexistent_file.txt')
+    assert isinstance(protocol, MRImagingProtocol)
+
+
+def test_get_config():
+    with pytest.raises(FileNotFoundError):
+        get_config("nonexistent_file.txt")
+    with pytest.raises(ValueError):
+        get_config(THIS_DIR / 'resources/mri-config.json',
+                   report_type='horizontal')
+    config_path = THIS_DIR / 'resources/test-config.json'
+    config = get_config(config_path, report_type='hz')
+    config = get_config(config_path, report_type='vt')
+    assert isinstance(config, dict)
+
+
+def test_is_writable():
+    assert not is_writable('/sys/firmware/')

From 28be3d06b3b2a16fa11b01dfffecd62c2c03da52 Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Mon, 20 Nov 2023 17:13:40 -0500
Subject: [PATCH 09/23] comment out deprecated functions in utils.py

---
 mrQA/utils.py | 126 +++++++++++++++++++++++++-------------------------
 1 file changed, 63 insertions(+), 63 deletions(-)

diff --git a/mrQA/utils.py b/mrQA/utils.py
index d2a327b..e777000 100644
--- a/mrQA/utils.py
+++ b/mrQA/utils.py
@@ -122,69 +122,69 @@ def is_writable(dir_path):
     return True
 
 
-def files_under_folder(fpath: Union[str, Path],
-                       ext: str = None) -> typing.Iterable[Path]:
-    """
-    Generates all the files inside the folder recursively. If ext is given
-    returns file which have that extension.
-
-    Parameters
-    ----------
-    fpath: str
-        filepath of the directory
-    ext: str
-        filter_fn files with given extension. For ex. return only .nii files
-
-    Returns
-    -------
-    generates filepaths
-    """
-    if not Path(fpath).is_dir():
-        raise FileNotFoundError(f"Folder doesn't exist : {fpath}")
-    folder_path = Path(fpath).resolve()
-    if ext:
-        pattern = '*' + ext
-    else:
-        pattern = '*'
-    for file in folder_path.rglob(pattern):
-        if file.is_file():
-            # If it is a regular file and not a directory, return filepath
-            yield file
-
-
-def files_in_path(fp_list: Union[Iterable, str, Path],
-                  ext: Optional[str] = None):
-    """
-    If given a single folder, returns the list of all files in the directory.
-    If given a list of folders, returns concatenated list of all the files
-    inside each directory.
-
-    Parameters
-    ----------
-    fp_list : List[Path]
-        List of folder paths
-    ext : str
-        Used to filter_fn files, and select only those which have this extension
-    Returns
-    -------
-    List of paths
-    """
-    if isinstance(fp_list, Iterable):
-        files = []
-        for i in fp_list:
-            if str(i) == '' or str(i) == '.' or i == Path():
-                logger.warning("Found an empty string. Skipping")
-                continue
-            if Path(i).is_dir():
-                files.extend(list(files_under_folder(i, ext)))
-            elif Path(i).is_file():
-                files.append(i)
-        return sorted(list(set(files)))
-    elif isinstance(fp_list, str) or isinstance(fp_list, Path):
-        return sorted(list(files_under_folder(fp_list, ext)))
-    else:
-        raise NotImplementedError("Expected either Iterable or str type. Got"
-                                  f"{type(fp_list)}")
+# def files_under_folder(fpath: Union[str, Path],
+#                        ext: str = None) -> typing.Iterable[Path]:
+#     """
+#     Generates all the files inside the folder recursively. If ext is given
+#     returns file which have that extension.
+#
+#     Parameters
+#     ----------
+#     fpath: str
+#         filepath of the directory
+#     ext: str
+#         filter_fn files with given extension. For ex. return only .nii files
+#
+#     Returns
+#     -------
+#     generates filepaths
+#     """
+#     if not Path(fpath).is_dir():
+#         raise FileNotFoundError(f"Folder doesn't exist : {fpath}")
+#     folder_path = Path(fpath).resolve()
+#     if ext:
+#         pattern = '*' + ext
+#     else:
+#         pattern = '*'
+#     for file in folder_path.rglob(pattern):
+#         if file.is_file():
+#             # If it is a regular file and not a directory, return filepath
+#             yield file
+
+
+# def files_in_path(fp_list: Union[Iterable, str, Path],
+#                   ext: Optional[str] = None):
+#     """
+#     If given a single folder, returns the list of all files in the directory.
+#     If given a list of folders, returns concatenated list of all the files
+#     inside each directory.
+#
+#     Parameters
+#     ----------
+#     fp_list : List[Path]
+#         List of folder paths
+#     ext : str
+#         Used to filter_fn files, and select only those which have this extension
+#     Returns
+#     -------
+#     List of paths
+#     """
+#     if isinstance(fp_list, Iterable):
+#         files = []
+#         for i in fp_list:
+#             if str(i) == '' or str(i) == '.' or i == Path():
+#                 logger.warning("Found an empty string. Skipping")
+#                 continue
+#             if Path(i).is_dir():
+#                 files.extend(list(files_under_folder(i, ext)))
+#             elif Path(i).is_file():
+#                 files.append(i)
+#         return sorted(list(set(files)))
+#     elif isinstance(fp_list, str) or isinstance(fp_list, Path):
+#         return sorted(list(files_under_folder(fp_list, ext)))
+#     else:
+#         raise NotImplementedError("Expected either Iterable or str type. Got"
+#                                   f"{type(fp_list)}")
 
 
 def get_items_upto_count(dict_: Counter, rank: int = 1):

From 0b9c99dd66022ed27370dade5b061aced5245229 Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Mon, 20 Nov 2023 17:14:17 -0500
Subject: [PATCH 10/23] fix code as per tests of utils.py

---
 mrQA/utils.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/mrQA/utils.py b/mrQA/utils.py
index e777000..6fac45b 100644
--- a/mrQA/utils.py
+++ b/mrQA/utils.py
@@ -3,7 +3,6 @@
 import re
 import tempfile
 import time
-import typing
 import unicodedata
 import warnings
 from collections import Counter
@@ -446,7 +445,7 @@ def split_list(dir_index: Sized, num_chunks: int) -> Iterable:
     if not is_integer_number(num_chunks):
         raise ValueError(f'Number of chunks must be an integer. '
                          f'Got {num_chunks}')
-    if num_chunks == 0:
+    if num_chunks < 1:
         raise ValueError('Cannot divide list into chunks of size 0')
     if len(dir_index) == 0:
         raise ValueError('List of directories is empty!')
@@ -1170,12 +1169,12 @@ def valid_paths(files: Union[List, str]) -> Union[List[Path], Path]:
         raise ValueError('Expected a valid path or Iterable, Got NoneType')
     if isinstance(files, str) or isinstance(files, Path):
         if not Path(files).is_file():
-            raise OSError('Invalid File {0}'.format(files))
+            raise FileNotFoundError('Invalid File {0}'.format(files))
         return Path(files).resolve()
     elif isinstance(files, Iterable):
         for file in files:
             if not Path(file).is_file():
-                raise OSError('Invalid File {0}'.format(file))
+                raise FileNotFoundError('Invalid File {0}'.format(file))
         return [Path(f).resolve() for f in files]
     else:
         raise NotImplementedError('Expected str or Path or Iterable, '
@@ -1254,7 +1253,7 @@ def get_config_from_file(config_path: Union[Path, str]) -> dict:
     return config
 
 
-def get_protocol_from_file(reference_path: Path,
+def get_protocol_from_file(reference_path: Union[Path, str],
                            vendor: str = 'siemens') -> MRImagingProtocol:
     """
     Extracts the reference protocol from the file. Supports only Siemens
@@ -1262,7 +1261,7 @@ def get_protocol_from_file(reference_path: Path,
 
     Parameters
     ----------
-    reference_path : Union[Path, str]
+    reference_path : Path | str
         Path to the reference protocol file
     vendor: str
         Vendor of the scanner. Default is Siemens
@@ -1358,13 +1357,14 @@ def has_substring(input_string, substrings):
     for substring in substrings:
         if substring in input_string:
             return True
+    return False
 
 
 def previous_month(dt):
     """Return the first day of the previous month."""
-    return dt.replace(day=1) - timedelta(days=1)
+    return (dt.replace(day=1) - timedelta(days=1)).replace(day=1)
 
 
 def next_month(dt):
     """Return the first day of the next month."""
-    return dt.replace(day=28) + timedelta(days=5)
+    return (dt.replace(day=28) + timedelta(days=5)).replace(day=1)

From ad7a87ecd79ef8aa0f292cc7d91e15cce5582aec Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Mon, 20 Nov 2023 17:15:17 -0500
Subject: [PATCH 11/23] minor changes

---
 Makefile                    | 1 +
 mrQA/tests/test_parallel.py | 2 +-
 requirements_dev.txt        | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 6049484..abda245 100644
--- a/Makefile
+++ b/Makefile
@@ -62,6 +62,7 @@ coverage: ## check code coverage quickly with the default Python
 	coverage run --rcfile=.coveragerc --source mrQA -m pytest
 	coverage report -m
 	coverage html
+	coverage xml
 	$(BROWSER) htmlcov/index.html
 
 docs: ## generate Sphinx HTML documentation, including API docs
diff --git a/mrQA/tests/test_parallel.py b/mrQA/tests/test_parallel.py
index 421da5c..52bbb67 100644
--- a/mrQA/tests/test_parallel.py
+++ b/mrQA/tests/test_parallel.py
@@ -38,7 +38,7 @@ def test_equivalence_seq_vs_parallel():
                          output_dir=output_dir,
                          out_mrds_path=output_path['parallel'],
                          name='parallel',
-                         subjects_per_job=5,
+                         job_size=5,
                          config_path=config_path,
                          hpc=False, )
 
diff --git a/requirements_dev.txt b/requirements_dev.txt
index c7b7da4..f8cf48e 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -8,4 +8,5 @@ hypothesis
 pytest
 bokeh
 flake8
+requests
 

From 14bed4dadb2bc5b0a6dd618c13d784fe13503a26 Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Mon, 20 Nov 2023 17:15:36 -0500
Subject: [PATCH 12/23] add invalid json for tests

---
 mrQA/tests/resources/invalid-json.json | 44 ++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 mrQA/tests/resources/invalid-json.json

diff --git a/mrQA/tests/resources/invalid-json.json b/mrQA/tests/resources/invalid-json.json
new file mode 100644
index 0000000..e6fc2ad
--- /dev/null
+++ b/mrQA/tests/resources/invalid-json.json
@@ -0,0 +1,44 @@
+{
+    "begin": "03_12_2024",
+    "end": "03_12_2000",
+    "include_sequence": {
+        "phantom": false,
+        "nifti_header": false,
+        "moco": false,
+        "sbref": false,
+        "derived": false
+    },
+    "use_echonumbers": true,
+    "vertical_audit": {
+        "stratify_by": null,
+        "include_parameters": [
+            "Rows",
+            "Columns",
+            "AcquisitionMatrix",
+            "PixelSpacing",
+            "PhaseEncodingDirection",
+            "ShimMode",
+            "ShimSetting"
+        ]
+    },
+    "horizontal_audit": {
+        "stratify_by": null,
+        "include_parameters": [
+            "EchoTime",
+            "RepetitionTime",
+            "FlipAngle",
+            "EchoTrainLength"
+        ]
+    },
+        "plots": {
+        "include_parameters": [
+            "ContentDate",
+            "PatientSex",
+            "PatientAge",
+            "PatientWeight",
+            "OperatorsName",
+            "InstitutionName"
+            "Manufacturer"
+        ]
+    }
+}

From 6aa6b32828d219b87d12cbb4f3e7c012f1d96780 Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Mon, 20 Nov 2023 17:16:17 -0500
Subject: [PATCH 13/23] add test config

---
 mrQA/tests/resources/test-config.json | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 mrQA/tests/resources/test-config.json

diff --git a/mrQA/tests/resources/test-config.json b/mrQA/tests/resources/test-config.json
new file mode 100644
index 0000000..15c5851
--- /dev/null
+++ b/mrQA/tests/resources/test-config.json
@@ -0,0 +1,15 @@
+{
+    "begin": "03_12_2024",
+    "end": "03_12_2000",
+    "include_sequence": {
+        "phantom": false,
+        "nifti_header": false,
+        "moco": false,
+        "sbref": false,
+        "derived": false
+    },
+    "use_echonumbers": true,
+    "vertical_audit": {
+        "stratify_by": null
+    }
+}

From b02cd0299bde11d1d1a078ded3b7e86d3fdde954 Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Tue, 21 Nov 2023 14:40:40 -0500
Subject: [PATCH 14/23] fix test for mrqa_parallel

---
 mrQA/tests/test_cli.py | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/mrQA/tests/test_cli.py b/mrQA/tests/test_cli.py
index ba49e3c..1455364 100644
--- a/mrQA/tests/test_cli.py
+++ b/mrQA/tests/test_cli.py
@@ -98,20 +98,21 @@ def test_binary_parallel(args):
     ds1.load()
     with tempfile.TemporaryDirectory() as tempdir:
         # shlex doesn't test work with binaries
-        subprocess.run(['mrqa_parallel',
-                        '--data-source', attributes['fake_ds_dir'],
-                        '--config', attributes['config_path'],
-                        '--name', ds1.name,
-                        '--decimals', '3',
-                        '--tolerance', '0.1',
-                        '--verbose',
-                        '--job-size', '1',
-                        f'--out-mrds-path {tempdir}/test.mrds.pkl '
-                        '--output-dir', tempdir])
-        report_paths = list(Path(tempdir).glob('*.html'))
-        # check if report was generated
-        assert_paths_more_than_2_subjects(report_paths, tempdir, attributes,
-                                          ds1)
+        if attributes['num_subjects'] > 2:
+            subprocess.run(['mrqa_parallel',
+                            '--data-source', attributes['fake_ds_dir'],
+                            '--config', attributes['config_path'],
+                            '--name', ds1.name,
+                            '--decimals', '3',
+                            '--tolerance', '0.1',
+                            '--verbose',
+                            '--job-size', '1',
+                            '--out-mrds-path', Path(tempdir)/'test.mrds.pkl',
+                            '--output-dir', tempdir])
+            report_paths = list(Path(tempdir).glob('*.html'))
+            # check if report was generated
+            assert_paths_more_than_2_subjects(report_paths, tempdir, attributes,
+                                              ds1)
     return
 
 

From b027e6a0157d1a2797d4f6138ca5d41a38b8f22f Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Tue, 21 Nov 2023 14:42:31 -0500
Subject: [PATCH 15/23] add function copy2dest for monitor tests, modify
 check_status.py

---
 {examples => mrQA/tests}/check_status.py | 17 ++++++++++++-----
 mrQA/tests/simulate.py                   | 21 +++++++++++++++++++++
 2 files changed, 33 insertions(+), 5 deletions(-)
 rename {examples => mrQA/tests}/check_status.py (69%)

diff --git a/examples/check_status.py b/mrQA/tests/check_status.py
similarity index 69%
rename from examples/check_status.py
rename to mrQA/tests/check_status.py
index 7ad0948..45df14a 100644
--- a/examples/check_status.py
+++ b/mrQA/tests/check_status.py
@@ -2,12 +2,20 @@
 from pathlib import Path
 
 from mrQA import monitor
-from mrQA.tests.utils import copy2dest
+from mrQA.tests.conftest import THIS_DIR
+from mrQA.tests.simulate import copy2dest
 
 
-def run(folder_path):
+# @settings(max_examples=10, deadline=None)
+# @given(args=dcm_dataset_strategy)
+def run(folder_path): # args):
+    # ds1, attributes = args
+    # assume(attributes['num_subjects'] > 4)
+    # folder_path = attributes['fake_ds_dir']
     folder_path = Path(folder_path).resolve()
-    config_path = Path('./mri-config.json').resolve()
+    # config_path = attributes['config_path']
+    config_path = THIS_DIR / 'resources/mri-config.json'
+
     # make a temporary output folder using tempfile
     with tempfile.TemporaryDirectory() as tmpdirname:
         output_dir = Path(tmpdirname) / 'output'
@@ -27,9 +35,8 @@ def run(folder_path):
                         decimals=2,
                         config_path=config_path,
                         verbose=False,
-                        reference_path='./wpc-6106.xml'
                         )
-        copy2dest(output_dir, tmpdirname, '/tmp')
+        # copy2dest(output_dir, tmpdirname, '/tmp')
         print('simulation-over')
 
 
diff --git a/mrQA/tests/simulate.py b/mrQA/tests/simulate.py
index b222f61..a98370a 100644
--- a/mrQA/tests/simulate.py
+++ b/mrQA/tests/simulate.py
@@ -1,9 +1,11 @@
 import tempfile
 import zipfile
 from collections import defaultdict
+from datetime import datetime
 from pathlib import Path
 
 import pydicom
+from pydicom import dcmread
 
 from mrQA.utils import convert2ascii
 
@@ -115,3 +117,22 @@ def setup_directories(src):
         raise FileNotFoundError("Temporary directory not found")
 
     return src_dir, dest_dir
+
+
+def copy2dest(folder, src, dest):
+    file_list = []
+    date = datetime.now()
+    for file in folder.rglob('*'):
+        if file.is_file():
+            try:
+                dicom = dcmread(file)
+            except:
+                continue
+            dicom.ContentDate = date.strftime('%Y%m%d')
+            rel_path = file.relative_to(src)
+            new_abs_path = dest / rel_path
+            parent = new_abs_path.parent
+            parent.mkdir(exist_ok=True, parents=True)
+            dicom.save_as(new_abs_path)
+            file_list.append(file)
+    return file_list

From 4165d1eb2f70cbfed8ba6ae864d5e52fe49be40e Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Tue, 21 Nov 2023 14:43:32 -0500
Subject: [PATCH 16/23] add option to include non-compliance in subjects that
 were scanned after a given date

---
 mrQA/base.py | 91 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 62 insertions(+), 29 deletions(-)

diff --git a/mrQA/base.py b/mrQA/base.py
index 9174011..9151a24 100644
--- a/mrQA/base.py
+++ b/mrQA/base.py
@@ -1,6 +1,7 @@
 import json
 import tempfile
 from abc import ABC, abstractmethod
+from datetime import timedelta
 from typing import List
 
 from MRdataset import valid_dirs
@@ -213,8 +214,28 @@ def add_sequence_pair_names(self, list_seqs):
         """
         self._vt_sequences.add(list_seqs)
 
+    def _is_scanned_before(self, date, seq):
+        # Provide an option to include those subjects that were
+        # scanned after the given date
+        content_date = seq['ContentDate'].get_value()
+        # Suppose date for report generation is 2023-11-21 01:00:00 am
+        # However content date doesn't have time information, so it is
+        # 2023-11-21 00:00:00 am. Now, if we compare the two dates, date for
+        # report generation will always be greater than content date,
+        # even though the scan could have been performed on the same day.
+        # Hence, we add 1 day to content date, so that the two dates
+        # can be compared.
+
+        # A better option is to use content time, but not all scanners
+        # provide content time. Hence, we use content date + 1 day. This means
+        # that the scan will be skipped only if it was performed at least
+        # 1 day before the date of report generation.
+        if date >= content_date + timedelta(days=1):
+            return True
+        return False
+
     def generate_hz_log(self, parameters, suppl_params, filter_fn=None,
-                        verbosity=1):
+                        verbosity=1, date=None):
         sequences = self.get_sequence_ids()
         nc_log = {}
         for seq_id in sequences:
@@ -224,25 +245,37 @@ def generate_hz_log(self, parameters, suppl_params, filter_fn=None,
                     if param_name not in nc_log:  # empty
                         nc_log[param_name] = []
 
-                    nc_dict = {}
-                    nc_dict['subject'] = sub
-                    nc_dict['sequence_name'] = seq_id
+                    if self._is_scanned_before(date, seq):
+                        continue
+                    nc_dict = self._populate_nc_dict(param_tuple=param_tupl,
+                                                     sub=sub, path=path,
+                                                     seq=seq, seq_ids=seq_id,
+                                                     suppl_params=suppl_params,
+                                                     verbosity=verbosity)
+                    nc_log[param_name].append(nc_dict)
+        return nc_log
 
-                    # if additional parameters have to be included in the log
-                    if suppl_params:
-                        for i in suppl_params:
-                            nc_dict[i] = seq[i].get_value()
+    def _populate_nc_dict(self, param_tuple, seq_ids, sub, path, seq,
+                          suppl_params, verbosity):
 
-                    if verbosity > 1:
-                        nc_dict['values'] = [p.get_value() for p in param_tupl]
-                    if verbosity > 2:
-                        nc_dict['path'] = str(path)
+        nc_dict = {}
+        nc_dict['date'] = str(seq['ContentDate'].get_value().date())
+        nc_dict['subject'] = sub
+        nc_dict['sequence_name'] = seq_ids
 
-                    nc_log[param_name].append(nc_dict)
-        return nc_log
+        # if additional parameters have to be included in the log
+        if suppl_params:
+            for i in suppl_params:
+                nc_dict[i] = seq[i].get_value()
+
+        if verbosity > 1:
+            nc_dict['values'] = [p.get_value() for p in param_tuple]
+        if verbosity > 2:
+            nc_dict['path'] = str(path)
+        return nc_dict
 
     def generate_nc_log(self, parameters, filter_fn=None, output_dir=None,
-                        suppl_params=None, audit='vt', verbosity=1):
+                        suppl_params=None, audit='vt', verbosity=1, date=None):
         """
         Generate a log of all non-compliant parameters in the dataset.
         Apart from returning the log, it also dumps the log as a json file
@@ -250,11 +283,11 @@ def generate_nc_log(self, parameters, filter_fn=None, output_dir=None,
         nc_log = {}
         if audit == 'hz':
             nc_log = self.generate_hz_log(parameters, suppl_params,
-                                          filter_fn, verbosity)
+                                          filter_fn, verbosity, date=date)
             filename = self.name + '_hz_log.json'
         elif audit == 'vt':
             nc_log = self.generate_vt_log(parameters, suppl_params,
-                                          filter_fn, verbosity)
+                                          filter_fn, verbosity, date=date)
             filename = self.name + '_vt_log.json'
         if audit not in ['vt', 'hz']:
             raise ValueError('Expected one of [vt, hz], got {}'.format(audit))
@@ -267,7 +300,7 @@ def generate_nc_log(self, parameters, filter_fn=None, output_dir=None,
         return nc_log
 
     def generate_vt_log(self, parameters, suppl_params, filter_fn=None,
-                        verbosity=1):
+                        verbosity=1, date=None):
 
         nc_log = {}
         sequence_pairs = self.get_vt_sequences()
@@ -276,22 +309,22 @@ def generate_vt_log(self, parameters, suppl_params, filter_fn=None,
         # want to highlight the issues in field-map and epi sequences.
         for pair in filter(filter_fn, sequence_pairs):
             for param_name in parameters:
-                for param_tupl, sub, path, seq in self.get_vt_param_values(
-                                                            pair, param_name):
+                for param_tuple, sub, path, seq in self.get_vt_param_values(
+                        pair, param_name):
                     if param_name not in nc_log:  # empty
                         nc_log[param_name] = []
 
-                    nc_dict = {}
-                    nc_dict['subject'] = sub
-                    nc_dict['sequence_names'] = pair
-
-                    if verbosity > 1:
-                        nc_dict['values'] = [p.get_value() for p in param_tupl]
-                    if verbosity > 2:
-                        nc_dict['path'] = str(path)
+                    # Provide a date to include those subjects that were
+                    # scanned after the given date
+                    if self._is_scanned_before(date, seq):
+                        continue
 
+                    nc_dict = self._populate_nc_dict(param_tuple=param_tuple,
+                                                     sub=sub, path=path,
+                                                     seq=seq, seq_ids=pair,
+                                                     suppl_params=suppl_params,
+                                                     verbosity=verbosity)
                     nc_log[param_name].append(nc_dict)
-
         return nc_log
 
     def get_nc_param_ids(self, seq_id):

From e20a69414fa55567b13da110520de8ebb562d131 Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Tue, 21 Nov 2023 14:44:27 -0500
Subject: [PATCH 17/23] start using nc_log function for daily status updates

---
 mrQA/config.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/mrQA/config.py b/mrQA/config.py
index 6265fa3..f2b3073 100644
--- a/mrQA/config.py
+++ b/mrQA/config.py
@@ -36,14 +36,14 @@ def configure_logger(log, output_dir, mode='w', level='WARNING'):
     output_dir.mkdir(parents=True, exist_ok=True)
 
     options = {
-        "warn": {
-            'level': logging.WARN,
-            'file': output_dir / 'warn.log',
+        "warn" : {
+            'level'    : logging.WARN,
+            'file'     : output_dir / 'warn.log',
             'formatter': warn_formatter
         },
         "error": {
-            'level': logging.ERROR,
-            'file': output_dir / 'error.log',
+            'level'    : logging.ERROR,
+            'file'     : output_dir / 'error.log',
             'formatter': error_formatter
         }
     }
@@ -66,12 +66,13 @@ def configure_logger(log, output_dir, mode='w', level='WARNING'):
 
 PATH_CONFIG = {
     'data_source': Path.home() / 'scan_data',
-    'output_dir': Path.home() / 'mrqa_reports',
+    'output_dir' : Path.home() / 'mrqa_reports',
 }
 
 DATE_SEPARATOR = '_DATE_'
 ATTRIBUTE_SEPARATOR = '_ATTR_'
-
+DATETIME_FORMAT = '%m_%d_%Y_%H_%M_%S'
+DATE_FORMAT = '%m_%d_%Y'
 Unspecified = UnspecifiedType()
 
 
@@ -80,9 +81,9 @@ def past_records_fpath(folder):
     return Path(folder / 'past_record.txt')
 
 
-def status_fpath(folder):
+def status_fpath(folder, audit):
     """Constructs the path to the status file"""
-    return Path(folder / 'non_compliance_log.txt')
+    return Path(folder / f'{audit}_non_compliance_log.txt')
 
 
 def report_fpath(folder_path, fname):
@@ -107,6 +108,7 @@ def __init__(self, name):
         super().__init__(
             f"Could not compute majority for {name}")
 
+
 #
 # class ReferenceNotSetForModality(MRException):
 #     """Custom error that is raised when majority cannot be computed."""

From c64aac360c3dc793c743344c8e0e347014657453 Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Tue, 21 Nov 2023 14:45:30 -0500
Subject: [PATCH 18/23] modify log_latest_non_complince to generate logs for
 both horizontal and vertical audits

---
 mrQA/utils.py | 85 +++++++++++++++++++++++++++------------------------
 1 file changed, 45 insertions(+), 40 deletions(-)

diff --git a/mrQA/utils.py b/mrQA/utils.py
index 6fac45b..a47d7ec 100644
--- a/mrQA/utils.py
+++ b/mrQA/utils.py
@@ -4,7 +4,6 @@
 import tempfile
 import time
 import unicodedata
-import warnings
 from collections import Counter
 from datetime import datetime, timedelta, timezone
 from itertools import takewhile
@@ -22,7 +21,7 @@
 from mrQA.config import past_records_fpath, report_fpath, mrds_fpath, \
     subject_list_dir, DATE_SEPARATOR, CannotComputeMajority, \
     Unspecified, \
-    EqualCount, status_fpath, ATTRIBUTE_SEPARATOR
+    EqualCount, status_fpath, ATTRIBUTE_SEPARATOR, DATETIME_FORMAT, DATE_FORMAT
 
 
 def get_reference_protocol(dataset: BaseDataset,
@@ -1081,54 +1080,60 @@ def find_terminal_folders(root, leave=True, position=0):
         else:
             for sd2 in level2_subdirs:
                 terminal.extend(find_terminal_folders(sd2, leave=False,
-                                position=1))
+                                                      position=1))
 
     return terminal
 
 
-def log_latest_non_compliance(ncomp_data, latest_data, output_dir):
-    """
-    Log the latest non-compliance data from recent sessions to a file
-
-    Parameters
-    ----------
-    ncomp_data
-    latest_data
-    output_dir
+def get_datetime(date):
+    try:
+        date = datetime.strptime(date, DATETIME_FORMAT)
+    except ValueError as exc:
+        if 'unconverted data remains' in str(exc):
+            try:
+                date = datetime.strptime(date, DATE_FORMAT)
+            except ValueError as exc:
+                raise ValueError(f'Invalid date format. '
+                                 f'Use one of '
+                                 f'[{DATE_FORMAT}, {DATETIME_FORMAT}]') from exc
+    return date
 
-    Returns
-    -------
 
+def log_latest_non_compliance(dataset, config_path,
+                              filter_fn=None,
+                              audit='hz', date=None, output_dir=None):
     """
-    if latest_data is None:
-        return
-    full_status = []
-    for seq_id in latest_data.get_sequence_ids():
-        # Don't rename run_id as run, it will conflict with subprocess.run
-        for sub, sess, run_id, seq in latest_data.traverse_horizontal(seq_id):
-            try:
-                nc_param_dict = ncomp_data.get_nc_params(
-                    subject_id=sub, session_id=sess,
-                    run_id=run_id, seq_id=seq_id)
-                status = {
-                    'ts': seq.timestamp,
-                    'subject': sub,
-                    'sequence': seq_id,
-                    'ds_name': latest_data.name,
-                    'nc_params': ';'.join(nc_param_dict.keys())
-                }
-                full_status.append(status)
-            except KeyError:
-                continue
-    status_filepath = status_fpath(output_dir)
+    Log the latest non-compliance data from recent sessions to a file
+    """
+    nc_log = {}
+    ds_name = None
+    date = get_datetime(date)
+
+    config = get_config(config_path=config_path, report_type=audit)
+    parameters = config.get("include_parameters", None)
+
+    if audit == 'hz':
+        ds_name = dataset.name
+        nc_log = dataset.generate_nc_log(parameters, filter_fn,
+                                         date=date,
+                                         audit='hz', verbosity=1,
+                                         output_dir=None)
+    elif audit == 'vt':
+        ds_name = dataset.name
+        nc_log = dataset.generate_nc_log(parameters, filter_fn,
+                                         date=date,
+                                         audit='vt', verbosity=1,
+                                         output_dir=None)
+
+    status_filepath = status_fpath(output_dir, audit)
     if not status_filepath.parent.is_dir():
         status_filepath.parent.mkdir(parents=True)
 
-    with open(status_filepath, 'a', encoding='utf-8') as fp:
-        for i in full_status:
-            fp.write(
-                f" {i['ts']}, {i['ds_name']}, {i['sequence']}, {i['subject']}, "
-                f"{i['nc_params']} \n")
+    with open(status_filepath, 'w', encoding='utf-8') as fp:
+        for parameter in nc_log:
+            for i in nc_log[parameter]:
+                fp.write(f" {i['date']}, {ds_name}, {i['sequence_name']},"
+                         f" {i['subject']}, {parameter} \n")
     return None  # status_filepath
 
 

From e5fba003b722d2ef22107a9e40cd6f68665661b9 Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Tue, 21 Nov 2023 14:46:53 -0500
Subject: [PATCH 19/23] make lint, remove utc time from log, directly use
 datetime format, import time format from config

---
 mrQA/utils.py | 55 ++++++++++++++++++++++++++-------------------------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/mrQA/utils.py b/mrQA/utils.py
index a47d7ec..bbb3a30 100644
--- a/mrQA/utils.py
+++ b/mrQA/utils.py
@@ -82,9 +82,10 @@ def get_config(config_path: Union[str, Path], report_type='hz') -> dict:
     else:
         include_params = audit_config.get('include_parameters', None)
         if include_params is None:
-            logger.warn('Parameters to be included in the compliance check are '
-                        'not provided. All parameters will be included in the '
-                        f'{key}')
+            logger.warning(
+                'Parameters to be included in the compliance check are '
+                'not provided. All parameters will be included in the '
+                f'{key}')
     return audit_config
 
 
@@ -162,7 +163,7 @@ def is_writable(dir_path):
 #     fp_list : List[Path]
 #         List of folder paths
 #     ext : str
-#         Used to filter_fn files, and select only those which have this extension
+#         Used to filter_fn files, and select only those which have this ext
 #     Returns
 #     -------
 #     List of paths
@@ -211,7 +212,7 @@ def get_items_upto_count(dict_: Counter, rank: int = 1):
 
 def timestamp():
     """Generate a timestamp as a string"""
-    time_string = time.strftime('%m_%d_%Y_%H_%M_%S')
+    time_string = time.strftime(DATETIME_FORMAT)
     return time_string
 
 
@@ -238,16 +239,16 @@ def make_output_paths(output_dir, dataset):
         subject lists for each modality
     """
     ts = timestamp()
-    utc = datetime.strptime(ts, '%m_%d_%Y_%H_%M_%S').timestamp()
+    # utc = datetime.strptime(ts, '%m_%d_%Y_%H_%M_%S').timestamp()
     filename = f'{dataset.name}{DATE_SEPARATOR}{ts}'
     report_path = report_fpath(output_dir, filename)
     mrds_path = mrds_fpath(output_dir, filename)
     sub_lists_dir_path = subject_list_dir(output_dir, filename)
-    log_report_history(output_dir, mrds_path, report_path, ts, utc)
+    log_report_history(output_dir, mrds_path, report_path, ts)
     return report_path, mrds_path, sub_lists_dir_path
 
 
-def log_report_history(output_dir, mrds_path, report_path, ts, utc):
+def log_report_history(output_dir, mrds_path, report_path, ts):
     """
     Log the report generation history to a text file
 
@@ -268,8 +269,8 @@ def log_report_history(output_dir, mrds_path, report_path, ts, utc):
     if not records_filepath.parent.is_dir():
         records_filepath.parent.mkdir(parents=True)
     with open(records_filepath, 'a', encoding='utf-8') as fp:
-        fp.write(f'{utc},{report_path},'
-                 f'{mrds_path},{ts}\n')
+        fp.write(f'{ts},{report_path},'
+                 f'{mrds_path}\n')
 
 
 def majority_values(list_seqs: list,
@@ -449,9 +450,10 @@ def split_list(dir_index: Sized, num_chunks: int) -> Iterable:
     if len(dir_index) == 0:
         raise ValueError('List of directories is empty!')
     if len(dir_index) < num_chunks:
-        warnings.warn(f'Got num_chunks={num_chunks}, list_size={len(dir_index)}'
-                      f'Expected num_chunks < list_size',
-                      stacklevel=2)
+        logger.warning(
+            f'Got num_chunks={num_chunks}, list_size={len(dir_index)}'
+            f'Expected num_chunks < list_size',
+            stacklevel=2)
         num_chunks = len(dir_index)
     k, m = divmod(len(dir_index), num_chunks)
     #  k, m = (len(dir_index)//num_chunks, len(dir_index)%num_chunks)
@@ -761,9 +763,9 @@ def _cli_report(hz_audit: dict, report_name):
     non_compliant_ds = hz_audit['non_compliant']
     compliant_ds = hz_audit['compliant']
     undetermined_ds = hz_audit['undetermined']
-    if not (compliant_ds.get_sequence_ids() or
-            non_compliant_ds.get_sequence_ids() or
-            undetermined_ds.get_sequence_ids()):
+    if not (compliant_ds.get_sequence_ids()
+            or non_compliant_ds.get_sequence_ids()
+            or undetermined_ds.get_sequence_ids()):
         logger.error('No report generated for horizontal audit.')
         return
 
@@ -814,7 +816,7 @@ def _datasets_processed(dir_path, ignore_case=True):
 
 
 def _get_time(time_format: str, last_reported_on: str):
-    str_format = '%m/%d/%Y %H:%M:%S'
+    str_format = DATETIME_FORMAT
     if time_format == 'timestamp':
         mod_time = datetime.fromtimestamp(float(last_reported_on)).strftime(
             str_format)
@@ -867,7 +869,7 @@ def folders_modified_since(last_reported_on: str,
     """
     modified_folders = set()
 
-    mod_time = _get_time(time_format, last_reported_on)
+    mod_time = get_datetime(last_reported_on)
     out_path = Path(output_dir) / 'modified_folders_since.txt'
     if out_path.is_file():
         out_path.unlink()
@@ -923,9 +925,8 @@ def get_last_valid_record(folder_path: Path) -> Optional[tuple]:
             num_records = len(lines)
             if i < -num_records:
                 return None
-            last_line = lines[i]
-            last_reported_on, last_report_path, last_mrds_path, _ = \
-                last_line.split(',')
+            last_line = lines[i].strip('\n').split(',')
+            last_reported_on, last_report_path, last_mrds_path = last_line
             if Path(last_mrds_path).is_file():
                 return last_reported_on, last_report_path, last_mrds_path
             i -= 1
@@ -940,7 +941,7 @@ def get_timestamps():
     ts = datetime.timestamp(now)
     date_time = now.strftime('%m/%d/%Y %H:%M:%S%z')
     return {
-        'utc': ts,
+        'utc'      : ts,
         'date_time': date_time
     }
 
@@ -1213,7 +1214,7 @@ def modify_sequence_name(seq: "BaseSequence", stratify_by: str,
             stratify_value = ''
 
         seq_name_with_stratify = ATTRIBUTE_SEPARATOR.join([seq.name,
-                                                          stratify_value])
+                                                           stratify_value])
         if datasets:
             for ds in datasets:
                 ds.set_modified_seq_name(seq.name, seq_name_with_stratify)
@@ -1348,11 +1349,11 @@ def filter_epi_fmap_pairs(pair):
     epi_substrings = ['epi', 'bold', 'rest', 'fmri', 'pasl',
                       'asl', 'dsi', 'dti', 'dwi']
     fmap_substrings = ['fmap', 'fieldmap', 'map']
-    if (has_substring(pair[0].lower(), epi_substrings) and
-            has_substring(pair[1].lower(), fmap_substrings)):
+    if (has_substring(pair[0].lower(), epi_substrings)
+            and has_substring(pair[1].lower(), fmap_substrings)):
         return True
-    if (has_substring(pair[1].lower(), epi_substrings) and
-            has_substring(pair[0].lower(), fmap_substrings)):
+    if (has_substring(pair[1].lower(), epi_substrings)
+            and has_substring(pair[0].lower(), fmap_substrings)):
         return True
     return False
 

From 4a5ae19d31b70960081899d80bf3e6f8f82ea9e9 Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Tue, 21 Nov 2023 14:48:38 -0500
Subject: [PATCH 20/23] make lint, use log_latest_non_compliance in monitor.py

---
 mrQA/monitor.py | 40 +++++++++++++++++++++++++++-------------
 mrQA/project.py | 28 ++++++++++++++--------------
 2 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/mrQA/monitor.py b/mrQA/monitor.py
index fd2550c..e5e90e0 100644
--- a/mrQA/monitor.py
+++ b/mrQA/monitor.py
@@ -1,13 +1,14 @@
 """Console script for mrQA."""
 import argparse
 import sys
+from datetime import datetime, timedelta
 from pathlib import Path
 from typing import Union, List
 
 from MRdataset import import_dataset, load_mr_dataset
 
 from mrQA import logger
-from mrQA.config import PATH_CONFIG, THIS_DIR
+from mrQA.config import PATH_CONFIG, THIS_DIR, DATETIME_FORMAT
 from mrQA.project import check_compliance
 from mrQA.utils import is_writable, folders_modified_since, \
     get_last_valid_record, log_latest_non_compliance
@@ -172,6 +173,8 @@ def monitor(name: str,
     """
     output_dir = Path(output_dir)
     last_record = get_last_valid_record(output_dir)
+    last_reported_on = None
+
     if last_record:
         last_reported_on, last_report_path, last_mrds_path = last_record
         # TODO: delete old logs, only keep latest 3-4 reports in the folder
@@ -205,18 +208,29 @@ def monitor(name: str,
                                  output_dir=output_dir)
         new_dataset = None
 
-    compliance_summary_dict, _ = check_compliance(dataset=dataset,
-                                                  output_dir=output_dir,
-                                                  decimals=decimals,
-                                                  verbose=verbose,
-                                                  tolerance=tolerance,
-                                                  reference_path=reference_path,
-                                                  config_path=config_path)
-
-    log_latest_non_compliance(
-        ncomp_data=compliance_summary_dict['non_compliant'],
-        latest_data=new_dataset,
-        output_dir=output_dir, )
+    if last_reported_on is None:
+        # if this is the first time, set last_reported_on to 1 year ago
+        last_reported_on = datetime.now() - timedelta(days=365)
+        last_reported_on = last_reported_on.strftime(DATETIME_FORMAT)
+
+    hz_audit_results, vt_audit_results = check_compliance(
+        dataset=dataset,
+        output_dir=output_dir,
+        decimals=decimals,
+        verbose=verbose,
+        tolerance=tolerance,
+        reference_path=reference_path,
+        config_path=config_path)
+
+    log_latest_non_compliance(dataset=hz_audit_results['non_compliant'],
+                              config_path=config_path,
+                              output_dir=output_dir, audit='hz',
+                              date=last_reported_on)
+
+    log_latest_non_compliance(dataset=vt_audit_results['non_compliant'],
+                              config_path=config_path,
+                              output_dir=output_dir, audit='vt',
+                              date=last_reported_on)
 
     return
 
diff --git a/mrQA/project.py b/mrQA/project.py
index 4a4086e..e3e6e48 100644
--- a/mrQA/project.py
+++ b/mrQA/project.py
@@ -164,11 +164,11 @@ def horizontal_audit(dataset: BaseDataset,
     compliant_ds, non_compliant_ds, undetermined_ds = _init_datasets(dataset)
 
     eval_dict = {
-        'complete_ds': dataset,
-        'reference': ref_protocol,
-        'compliant': compliant_ds,
+        'complete_ds'  : dataset,
+        'reference'    : ref_protocol,
+        'compliant'    : compliant_ds,
         'non_compliant': non_compliant_ds,
-        'undetermined': undetermined_ds,
+        'undetermined' : undetermined_ds,
     }
 
     if not (ref_protocol and hz_audit_config):
@@ -274,11 +274,11 @@ def vertical_audit(dataset: BaseDataset,
                                  report_type='vt')
     compliant_ds, non_compliant_ds, _ = _init_datasets(dataset)
     eval_dict = {
-        'complete_ds': dataset,
-        'compliant': compliant_ds,
-        'non_compliant': non_compliant_ds,
+        'complete_ds'   : dataset,
+        'compliant'     : compliant_ds,
+        'non_compliant' : non_compliant_ds,
         'sequence_pairs': [],
-        'parameters': []
+        'parameters'    : []
     }
     if not vt_audit_config:
         return eval_dict
@@ -290,8 +290,8 @@ def vertical_audit(dataset: BaseDataset,
 
     # If no sequence pairs are provided, then compare all possible pairs
     if chosen_pairs is None:
-        logger.warn('No sequence pairs provided. Comparing all possible '
-                    'sequence pairs.')
+        logger.warning('No sequence pairs provided. Comparing all possible '
+                       'sequence pairs.')
         chosen_pairs = list(combinations(dataset.get_sequence_ids(), 2))
     # check pair are queryable, all the pairs are not present
     # throw an error if any of the pair is not present
@@ -344,11 +344,11 @@ def vertical_audit(dataset: BaseDataset,
                 )
     # TODO: add option for num_sequences > 2
     eval_dict = {
-        'complete_ds': dataset,
-        'compliant': compliant_ds,
-        'non_compliant': non_compliant_ds,
+        'complete_ds'   : dataset,
+        'compliant'     : compliant_ds,
+        'non_compliant' : non_compliant_ds,
         'sequence_pairs': used_pairs,
-        'parameters': include_params
+        'parameters'    : include_params
     }
     return eval_dict
 

From 5f6b3760058d51b6ab2029f784e3f9bb222fc8e8 Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Tue, 21 Nov 2023 14:50:08 -0500
Subject: [PATCH 21/23] make lint, changes to setup configurations

---
 .gitignore           |  1 +
 Makefile             |  3 +++
 mrQA/formatter.py    | 10 +++++-----
 requirements_dev.txt |  1 +
 setup.cfg            |  4 ++++
 5 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 94fccfb..bb053a0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -110,3 +110,4 @@ ENV/
 
 # mri protocol
 *.xml
+*.secrets
diff --git a/Makefile b/Makefile
index abda245..5bc0589 100644
--- a/Makefile
+++ b/Makefile
@@ -65,6 +65,9 @@ coverage: ## check code coverage quickly with the default Python
 	coverage xml
 	$(BROWSER) htmlcov/index.html
 
+act:
+	act --secret-file my.secrets
+
 docs: ## generate Sphinx HTML documentation, including API docs
 	$(MAKE) -C docs clean
 	$(MAKE) -C docs html
diff --git a/mrQA/formatter.py b/mrQA/formatter.py
index b811063..b106263 100644
--- a/mrQA/formatter.py
+++ b/mrQA/formatter.py
@@ -160,9 +160,9 @@ def collect_hz_audit_results(self,
             logger.error('Reference protocol is empty. Cannot generate'
                          ' report for horizontal audit.')
             self.skip_hz_report = True
-        if not (compliant_ds.get_sequence_ids() or
-                non_compliant_ds.get_sequence_ids() or
-                undetermined_ds.get_sequence_ids()):
+        if not (compliant_ds.get_sequence_ids()
+                or non_compliant_ds.get_sequence_ids()
+                or undetermined_ds.get_sequence_ids()):
             logger.error('It seems the dataset has not been checked for '
                          'horizontal audit. Skipping horizontal audit report')
             self.skip_hz_report = True
@@ -214,8 +214,8 @@ def collect_vt_audit_results(self,
             logger.error('No sequences found in dataset. Cannot generate'
                          'report')
             self.skip_vt_report = True
-        if not (compliant_ds.get_sequence_ids() or
-                non_compliant_ds.get_sequence_ids()):
+        if not (compliant_ds.get_sequence_ids()
+                or non_compliant_ds.get_sequence_ids()):
             logger.error('It seems the dataset has not been checked for '
                          'vertical audit. Skipping vertical audit report')
             self.skip_vt_report = True
diff --git a/requirements_dev.txt b/requirements_dev.txt
index f8cf48e..cb08f1e 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -9,4 +9,5 @@ pytest
 bokeh
 flake8
 requests
+coverage
 
diff --git a/setup.cfg b/setup.cfg
index 2edef3e..7bfd1a1 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -20,7 +20,11 @@ exclude =
     docs,
     */_version.py,
     */tests/*.py,
+    plotting.py
 filename = *.py
+# E203 - whitespace before ':'
+ignore =
+    E203, W503
 max-line-length = 80
 max-complexity = 12
 accept-encodings = utf-8

From efedeb1328d0bb3c1ea00b86ca42ccfdf3d2549f Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Tue, 21 Nov 2023 15:33:04 -0500
Subject: [PATCH 22/23] make changes to workflow

---
 .github/workflows/continuous-integration.yml | 22 ++++++++++++++++++--
 Makefile                                     |  2 +-
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml
index 08894d5..ced3824 100644
--- a/.github/workflows/continuous-integration.yml
+++ b/.github/workflows/continuous-integration.yml
@@ -32,10 +32,28 @@ jobs:
                     pip install .
             -   name: Lint with flake8
                 run: |
+                    make lint
                     # stop the build if there are Python syntax errors or undefined names
-                    flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+                    # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
                     # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-                    flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+                    # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
             -   name: Test with pytest
                 run: |
                     pytest
+#            -   name: Coverage
+#                run: |
+#                    coverage run --rcfile=.coveragerc --source mrQA -m pytest
+#                    coverage report -m
+#                    coverage xml
+#            -   name: Run codacy-coverage-reporter
+#                uses: codacy/codacy-coverage-reporter-action@v1
+#                with:
+#                    # project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
+#                    # or
+#                    api-token: ${{ secrets.CODACY_API_TOKEN }}
+#                    organization-provider: gh
+#                    username: sinhaharsh
+#                    project-name: mrQA
+#                    coverage-reports: coverage.xml
+#                    # or a comma-separated list for multiple reports
+#                    # coverage-reports: <PATH_TO_REPORT>, <PATH_TO_REPORT>
diff --git a/Makefile b/Makefile
index 5bc0589..34ba80c 100644
--- a/Makefile
+++ b/Makefile
@@ -66,7 +66,7 @@ coverage: ## check code coverage quickly with the default Python
 	$(BROWSER) htmlcov/index.html
 
 act:
-	act --secret-file my.secrets
+	act --secret-file .secrets
 
 docs: ## generate Sphinx HTML documentation, including API docs
 	$(MAKE) -C docs clean

From d14dacc0c1a67326ec08ae4812cd0b6f50568b82 Mon Sep 17 00:00:00 2001
From: Harsh Sinha <harsh.sinha@pitt.edu>
Date: Tue, 21 Nov 2023 15:49:41 -0500
Subject: [PATCH 23/23] add shortcut to merge

---
 .github/workflows/codacy.yml | 2 +-
 Makefile                     | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/codacy.yml b/.github/workflows/codacy.yml
index 67dfc76..ce958e9 100644
--- a/.github/workflows/codacy.yml
+++ b/.github/workflows/codacy.yml
@@ -30,7 +30,7 @@ jobs:
     permissions:
       contents: read # for actions/checkout to fetch code
       security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
-      actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status 
+      actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
     name: Codacy Security Scan
     runs-on: ubuntu-latest
     steps:
diff --git a/Makefile b/Makefile
index 34ba80c..76eb8a9 100644
--- a/Makefile
+++ b/Makefile
@@ -86,3 +86,11 @@ dist: clean ## builds source and wheel package
 
 install: clean ## install the package to the active Python's site-packages
 	python setup.py install
+
+merge:
+	git switch mrds-issue-12
+	git push
+	git switch master
+	git merge mrds-issue-12
+	git push origin master
+	git switch mrds-issue-12