Merge branch 'mrds-issue-12'

sinhaharsh · Nov 21, 2023 · 91307cf · 91307cf
2 parents 868ee9b + d14dacc
commit 91307cf
Show file tree

Hide file tree

Showing 26 changed files with 1,257 additions and 424 deletions.
diff --git a/.github/workflows/codacy.yml b/.github/workflows/codacy.yml
@@ -30,7 +30,7 @@ jobs:
     permissions:
       contents: read # for actions/checkout to fetch code
       security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
-      actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status 
+      actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
     name: Codacy Security Scan
     runs-on: ubuntu-latest
     steps:

diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml
@@ -32,10 +32,28 @@ jobs:
                     pip install .
             -   name: Lint with flake8
                 run: |
+                    make lint
                     # stop the build if there are Python syntax errors or undefined names
-                    flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+                    # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
                     # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-                    flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+                    # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
             -   name: Test with pytest
                 run: |
                     pytest
+#            -   name: Coverage
+#                run: |
+#                    coverage run --rcfile=.coveragerc --source mrQA -m pytest
+#                    coverage report -m
+#                    coverage xml
+#            -   name: Run codacy-coverage-reporter
+#                uses: codacy/codacy-coverage-reporter-action@v1
+#                with:
+#                    # project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
+#                    # or
+#                    api-token: ${{ secrets.CODACY_API_TOKEN }}
+#                    organization-provider: gh
+#                    username: sinhaharsh
+#                    project-name: mrQA
+#                    coverage-reports: coverage.xml
+#                    # or a comma-separated list for multiple reports
+#                    # coverage-reports: <PATH_TO_REPORT>, <PATH_TO_REPORT>
diff --git a/.gitignore b/.gitignore
@@ -106,4 +106,8 @@ ENV/
 /update_today.txt
 
 # codacy
-results.sarif
+*.sarif
+
+# mri protocol
+*.xml
+*.secrets
diff --git a/Makefile b/Makefile
@@ -62,8 +62,12 @@ coverage: ## check code coverage quickly with the default Python
 	coverage run --rcfile=.coveragerc --source mrQA -m pytest
 	coverage report -m
 	coverage html
+	coverage xml
 	$(BROWSER) htmlcov/index.html
 
+act:
+	act --secret-file .secrets
+
 docs: ## generate Sphinx HTML documentation, including API docs
 	$(MAKE) -C docs clean
 	$(MAKE) -C docs html
@@ -82,3 +86,11 @@ dist: clean ## builds source and wheel package
 
 install: clean ## install the package to the active Python's site-packages
 	python setup.py install
+
+merge:
+	git switch mrds-issue-12
+	git push
+	git switch master
+	git merge mrds-issue-12
+	git push origin master
+	git switch mrds-issue-12
diff --git a/README.rst b/README.rst
@@ -7,8 +7,8 @@ mrQA : automatic protocol compliance checks on MR datasets
 .. image:: https://app.codacy.com/project/badge/Grade/8cd263e1eaa0480d8fac50eba0094401
         :target: https://app.codacy.com/gh/sinhaharsh/mrQA/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade
 
-.. image:: https://github.com/sinhaharsh/mrQA/actions/workflows/continuos-integration.yml/badge.svg
-        :target: https://github.com/sinhaharsh/mrQA/actions/workflows/continuos-integration.yml
+.. image:: https://github.com/sinhaharsh/mrQA/actions/workflows/continuous-integration.yml/badge.svg
+        :target: https://github.com/sinhaharsh/mrQA/actions/workflows/continuous-integration.yml
 
 
 .. image:: https://raw.githubusercontent.com/jupyter/design/master/logos/Badges/nbviewer_badge.svg

diff --git a/mrQA/base.py b/mrQA/base.py
@@ -1,6 +1,7 @@
 import json
 import tempfile
 from abc import ABC, abstractmethod
+from datetime import timedelta
 from typing import List
 
 from MRdataset import valid_dirs
@@ -213,8 +214,28 @@ def add_sequence_pair_names(self, list_seqs):
         """
         self._vt_sequences.add(list_seqs)
 
+    def _is_scanned_before(self, date, seq):
+        # Provide an option to include those subjects that were
+        # scanned after the given date
+        content_date = seq['ContentDate'].get_value()
+        # Suppose date for report generation is 2023-11-21 01:00:00 am
+        # However content date doesn't have time information, so it is
+        # 2023-11-21 00:00:00 am. Now, if we compare the two dates, date for
+        # report generation will always be greater than content date,
+        # even though the scan could have been performed on the same day.
+        # Hence, we add 1 day to content date, so that the two dates
+        # can be compared.
+
+        # A better option is to use content time, but not all scanners
+        # provide content time. Hence, we use content date + 1 day. This means
+        # that the scan will be skipped only if it was performed at least
+        # 1 day before the date of report generation.
+        if date >= content_date + timedelta(days=1):
+            return True
+        return False
+
     def generate_hz_log(self, parameters, suppl_params, filter_fn=None,
-                        verbosity=1):
+                        verbosity=1, date=None):
         sequences = self.get_sequence_ids()
         nc_log = {}
         for seq_id in sequences:
@@ -224,37 +245,49 @@ def generate_hz_log(self, parameters, suppl_params, filter_fn=None,
                     if param_name not in nc_log:  # empty
                         nc_log[param_name] = []
 
-                    nc_dict = {}
-                    nc_dict['subject'] = sub
-                    nc_dict['sequence_name'] = seq_id
+                    if self._is_scanned_before(date, seq):
+                        continue
+                    nc_dict = self._populate_nc_dict(param_tuple=param_tupl,
+                                                     sub=sub, path=path,
+                                                     seq=seq, seq_ids=seq_id,
+                                                     suppl_params=suppl_params,
+                                                     verbosity=verbosity)
+                    nc_log[param_name].append(nc_dict)
+        return nc_log
 
-                    # if additional parameters have to be included in the log
-                    if suppl_params:
-                        for i in suppl_params:
-                            nc_dict[i] = seq[i].get_value()
+    def _populate_nc_dict(self, param_tuple, seq_ids, sub, path, seq,
+                          suppl_params, verbosity):
 
-                    if verbosity > 1:
-                        nc_dict['values'] = [p.get_value() for p in param_tupl]
-                    if verbosity > 2:
-                        nc_dict['path'] = str(path)
+        nc_dict = {}
+        nc_dict['date'] = str(seq['ContentDate'].get_value().date())
+        nc_dict['subject'] = sub
+        nc_dict['sequence_name'] = seq_ids
 
-                    nc_log[param_name].append(nc_dict)
-        return nc_log
+        # if additional parameters have to be included in the log
+        if suppl_params:
+            for i in suppl_params:
+                nc_dict[i] = seq[i].get_value()
+
+        if verbosity > 1:
+            nc_dict['values'] = [p.get_value() for p in param_tuple]
+        if verbosity > 2:
+            nc_dict['path'] = str(path)
+        return nc_dict
 
     def generate_nc_log(self, parameters, filter_fn=None, output_dir=None,
-                        suppl_params=None, audit='vt', verbosity=1):
+                        suppl_params=None, audit='vt', verbosity=1, date=None):
         """
         Generate a log of all non-compliant parameters in the dataset.
         Apart from returning the log, it also dumps the log as a json file
         """
         nc_log = {}
         if audit == 'hz':
             nc_log = self.generate_hz_log(parameters, suppl_params,
-                                          filter_fn, verbosity)
+                                          filter_fn, verbosity, date=date)
             filename = self.name + '_hz_log.json'
         elif audit == 'vt':
             nc_log = self.generate_vt_log(parameters, suppl_params,
-                                          filter_fn, verbosity)
+                                          filter_fn, verbosity, date=date)
             filename = self.name + '_vt_log.json'
         if audit not in ['vt', 'hz']:
             raise ValueError('Expected one of [vt, hz], got {}'.format(audit))
@@ -267,7 +300,7 @@ def generate_nc_log(self, parameters, filter_fn=None, output_dir=None,
         return nc_log
 
     def generate_vt_log(self, parameters, suppl_params, filter_fn=None,
-                        verbosity=1):
+                        verbosity=1, date=None):
 
         nc_log = {}
         sequence_pairs = self.get_vt_sequences()
@@ -276,22 +309,22 @@ def generate_vt_log(self, parameters, suppl_params, filter_fn=None,
         # want to highlight the issues in field-map and epi sequences.
         for pair in filter(filter_fn, sequence_pairs):
             for param_name in parameters:
-                for param_tupl, sub, path, seq in self.get_vt_param_values(
-                                                            pair, param_name):
+                for param_tuple, sub, path, seq in self.get_vt_param_values(
+                        pair, param_name):
                     if param_name not in nc_log:  # empty
                         nc_log[param_name] = []
 
-                    nc_dict = {}
-                    nc_dict['subject'] = sub
-                    nc_dict['sequence_names'] = pair
-
-                    if verbosity > 1:
-                        nc_dict['values'] = [p.get_value() for p in param_tupl]
-                    if verbosity > 2:
-                        nc_dict['path'] = str(path)
+                    # Provide a date to include those subjects that were
+                    # scanned after the given date
+                    if self._is_scanned_before(date, seq):
+                        continue
 
+                    nc_dict = self._populate_nc_dict(param_tuple=param_tuple,
+                                                     sub=sub, path=path,
+                                                     seq=seq, seq_ids=pair,
+                                                     suppl_params=suppl_params,
+                                                     verbosity=verbosity)
                     nc_log[param_name].append(nc_dict)
-
         return nc_log
 
     def get_nc_param_ids(self, seq_id):

diff --git a/mrQA/cli.py b/mrQA/cli.py
@@ -8,7 +8,7 @@
 
 from mrQA import check_compliance
 from mrQA import logger
-from mrQA.config import PATH_CONFIG
+from mrQA.config import PATH_CONFIG, THIS_DIR
 from mrQA.utils import is_writable
 
 
@@ -27,7 +27,8 @@ def get_parser():
                           help='directory containing downloaded dataset with '
                                'dicom files, supports nested hierarchies')
     required.add_argument('--config', type=str,
-                          help='path to config file')
+                          help='path to config file',
+                          default=THIS_DIR / 'resources/mri-config.json')
     optional.add_argument('-o', '--output-dir', type=str,
                           help='specify the directory where the report'
                                ' would be saved. By default, the --data_source '

diff --git a/mrQA/config.py b/mrQA/config.py
@@ -6,6 +6,8 @@
 from MRdataset.config import MRException
 from protocol import UnspecifiedType
 
+THIS_DIR = Path(__file__).parent.resolve()
+
 
 def configure_logger(log, output_dir, mode='w', level='WARNING'):
     """
@@ -34,14 +36,14 @@ def configure_logger(log, output_dir, mode='w', level='WARNING'):
     output_dir.mkdir(parents=True, exist_ok=True)
 
     options = {
-        "warn": {
-            'level': logging.WARN,
-            'file': output_dir / 'warn.log',
+        "warn" : {
+            'level'    : logging.WARN,
+            'file'     : output_dir / 'warn.log',
             'formatter': warn_formatter
         },
         "error": {
-            'level': logging.ERROR,
-            'file': output_dir / 'error.log',
+            'level'    : logging.ERROR,
+            'file'     : output_dir / 'error.log',
             'formatter': error_formatter
         }
     }
@@ -64,12 +66,13 @@ def configure_logger(log, output_dir, mode='w', level='WARNING'):
 
 PATH_CONFIG = {
     'data_source': Path.home() / 'scan_data',
-    'output_dir': Path.home() / 'mrqa_reports',
+    'output_dir' : Path.home() / 'mrqa_reports',
 }
 
 DATE_SEPARATOR = '_DATE_'
 ATTRIBUTE_SEPARATOR = '_ATTR_'
-
+DATETIME_FORMAT = '%m_%d_%Y_%H_%M_%S'
+DATE_FORMAT = '%m_%d_%Y'
 Unspecified = UnspecifiedType()
 
 
@@ -78,9 +81,9 @@ def past_records_fpath(folder):
     return Path(folder / 'past_record.txt')
 
 
-def status_fpath(folder):
+def status_fpath(folder, audit):
     """Constructs the path to the status file"""
-    return Path(folder / 'non_compliance_log.txt')
+    return Path(folder / f'{audit}_non_compliance_log.txt')
 
 
 def report_fpath(folder_path, fname):
@@ -105,6 +108,7 @@ def __init__(self, name):
         super().__init__(
             f"Could not compute majority for {name}")
 
+
 #
 # class ReferenceNotSetForModality(MRException):
 #     """Custom error that is raised when majority cannot be computed."""

diff --git a/mrQA/formatter.py b/mrQA/formatter.py
@@ -160,9 +160,9 @@ def collect_hz_audit_results(self,
             logger.error('Reference protocol is empty. Cannot generate'
                          ' report for horizontal audit.')
             self.skip_hz_report = True
-        if not (compliant_ds.get_sequence_ids() or
-                non_compliant_ds.get_sequence_ids() or
-                undetermined_ds.get_sequence_ids()):
+        if not (compliant_ds.get_sequence_ids()
+                or non_compliant_ds.get_sequence_ids()
+                or undetermined_ds.get_sequence_ids()):
             logger.error('It seems the dataset has not been checked for '
                          'horizontal audit. Skipping horizontal audit report')
             self.skip_hz_report = True
@@ -214,8 +214,8 @@ def collect_vt_audit_results(self,
             logger.error('No sequences found in dataset. Cannot generate'
                          'report')
             self.skip_vt_report = True
-        if not (compliant_ds.get_sequence_ids() or
-                non_compliant_ds.get_sequence_ids()):
+        if not (compliant_ds.get_sequence_ids()
+                or non_compliant_ds.get_sequence_ids()):
             logger.error('It seems the dataset has not been checked for '
                          'vertical audit. Skipping vertical audit report')
             self.skip_vt_report = True