Skip to content

Commit

Permalink
Merge branch 'mrds-issue-12'
Browse files Browse the repository at this point in the history
  • Loading branch information
sinhaharsh committed Nov 21, 2023
2 parents 868ee9b + d14dacc commit 91307cf
Show file tree
Hide file tree
Showing 26 changed files with 1,257 additions and 424 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/codacy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
permissions:
contents: read # for actions/checkout to fetch code
security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
name: Codacy Security Scan
runs-on: ubuntu-latest
steps:
Expand Down
22 changes: 20 additions & 2 deletions .github/workflows/continuous-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,28 @@ jobs:
pip install .
- name: Lint with flake8
run: |
make lint
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
# flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
pytest
# - name: Coverage
# run: |
# coverage run --rcfile=.coveragerc --source mrQA -m pytest
# coverage report -m
# coverage xml
# - name: Run codacy-coverage-reporter
# uses: codacy/codacy-coverage-reporter-action@v1
# with:
# # project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
# # or
# api-token: ${{ secrets.CODACY_API_TOKEN }}
# organization-provider: gh
# username: sinhaharsh
# project-name: mrQA
# coverage-reports: coverage.xml
# # or a comma-separated list for multiple reports
# # coverage-reports: <PATH_TO_REPORT>, <PATH_TO_REPORT>
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -106,4 +106,8 @@ ENV/
/update_today.txt

# codacy
results.sarif
*.sarif

# mri protocol
*.xml
*.secrets
12 changes: 12 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,12 @@ coverage: ## check code coverage quickly with the default Python
coverage run --rcfile=.coveragerc --source mrQA -m pytest
coverage report -m
coverage html
coverage xml
$(BROWSER) htmlcov/index.html

act:
act --secret-file .secrets

docs: ## generate Sphinx HTML documentation, including API docs
$(MAKE) -C docs clean
$(MAKE) -C docs html
Expand All @@ -82,3 +86,11 @@ dist: clean ## builds source and wheel package

install: clean ## install the package to the active Python's site-packages
python setup.py install

merge:
git switch mrds-issue-12
git push
git switch master
git merge mrds-issue-12
git push origin master
git switch mrds-issue-12
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ mrQA : automatic protocol compliance checks on MR datasets
.. image:: https://app.codacy.com/project/badge/Grade/8cd263e1eaa0480d8fac50eba0094401
:target: https://app.codacy.com/gh/sinhaharsh/mrQA/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade

.. image:: https://github.com/sinhaharsh/mrQA/actions/workflows/continuos-integration.yml/badge.svg
:target: https://github.com/sinhaharsh/mrQA/actions/workflows/continuos-integration.yml
.. image:: https://github.com/sinhaharsh/mrQA/actions/workflows/continuous-integration.yml/badge.svg
:target: https://github.com/sinhaharsh/mrQA/actions/workflows/continuous-integration.yml


.. image:: https://raw.githubusercontent.com/jupyter/design/master/logos/Badges/nbviewer_badge.svg
Expand Down
91 changes: 62 additions & 29 deletions mrQA/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import tempfile
from abc import ABC, abstractmethod
from datetime import timedelta
from typing import List

from MRdataset import valid_dirs
Expand Down Expand Up @@ -213,8 +214,28 @@ def add_sequence_pair_names(self, list_seqs):
"""
self._vt_sequences.add(list_seqs)

def _is_scanned_before(self, date, seq):
# Provide an option to include those subjects that were
# scanned after the given date
content_date = seq['ContentDate'].get_value()
# Suppose date for report generation is 2023-11-21 01:00:00 am
# However content date doesn't have time information, so it is
# 2023-11-21 00:00:00 am. Now, if we compare the two dates, date for
# report generation will always be greater than content date,
# even though the scan could have been performed on the same day.
# Hence, we add 1 day to content date, so that the two dates
# can be compared.

# A better option is to use content time, but not all scanners
# provide content time. Hence, we use content date + 1 day. This means
# that the scan will be skipped only if it was performed at least
# 1 day before the date of report generation.
if date >= content_date + timedelta(days=1):
return True
return False

def generate_hz_log(self, parameters, suppl_params, filter_fn=None,
verbosity=1):
verbosity=1, date=None):
sequences = self.get_sequence_ids()
nc_log = {}
for seq_id in sequences:
Expand All @@ -224,37 +245,49 @@ def generate_hz_log(self, parameters, suppl_params, filter_fn=None,
if param_name not in nc_log: # empty
nc_log[param_name] = []

nc_dict = {}
nc_dict['subject'] = sub
nc_dict['sequence_name'] = seq_id
if self._is_scanned_before(date, seq):
continue
nc_dict = self._populate_nc_dict(param_tuple=param_tupl,
sub=sub, path=path,
seq=seq, seq_ids=seq_id,
suppl_params=suppl_params,
verbosity=verbosity)
nc_log[param_name].append(nc_dict)
return nc_log

# if additional parameters have to be included in the log
if suppl_params:
for i in suppl_params:
nc_dict[i] = seq[i].get_value()
def _populate_nc_dict(self, param_tuple, seq_ids, sub, path, seq,
suppl_params, verbosity):

if verbosity > 1:
nc_dict['values'] = [p.get_value() for p in param_tupl]
if verbosity > 2:
nc_dict['path'] = str(path)
nc_dict = {}
nc_dict['date'] = str(seq['ContentDate'].get_value().date())
nc_dict['subject'] = sub
nc_dict['sequence_name'] = seq_ids

nc_log[param_name].append(nc_dict)
return nc_log
# if additional parameters have to be included in the log
if suppl_params:
for i in suppl_params:
nc_dict[i] = seq[i].get_value()

if verbosity > 1:
nc_dict['values'] = [p.get_value() for p in param_tuple]
if verbosity > 2:
nc_dict['path'] = str(path)
return nc_dict

def generate_nc_log(self, parameters, filter_fn=None, output_dir=None,
suppl_params=None, audit='vt', verbosity=1):
suppl_params=None, audit='vt', verbosity=1, date=None):
"""
Generate a log of all non-compliant parameters in the dataset.
Apart from returning the log, it also dumps the log as a json file
"""
nc_log = {}
if audit == 'hz':
nc_log = self.generate_hz_log(parameters, suppl_params,
filter_fn, verbosity)
filter_fn, verbosity, date=date)
filename = self.name + '_hz_log.json'
elif audit == 'vt':
nc_log = self.generate_vt_log(parameters, suppl_params,
filter_fn, verbosity)
filter_fn, verbosity, date=date)
filename = self.name + '_vt_log.json'
if audit not in ['vt', 'hz']:
raise ValueError('Expected one of [vt, hz], got {}'.format(audit))
Expand All @@ -267,7 +300,7 @@ def generate_nc_log(self, parameters, filter_fn=None, output_dir=None,
return nc_log

def generate_vt_log(self, parameters, suppl_params, filter_fn=None,
verbosity=1):
verbosity=1, date=None):

nc_log = {}
sequence_pairs = self.get_vt_sequences()
Expand All @@ -276,22 +309,22 @@ def generate_vt_log(self, parameters, suppl_params, filter_fn=None,
# want to highlight the issues in field-map and epi sequences.
for pair in filter(filter_fn, sequence_pairs):
for param_name in parameters:
for param_tupl, sub, path, seq in self.get_vt_param_values(
pair, param_name):
for param_tuple, sub, path, seq in self.get_vt_param_values(
pair, param_name):
if param_name not in nc_log: # empty
nc_log[param_name] = []

nc_dict = {}
nc_dict['subject'] = sub
nc_dict['sequence_names'] = pair

if verbosity > 1:
nc_dict['values'] = [p.get_value() for p in param_tupl]
if verbosity > 2:
nc_dict['path'] = str(path)
# Provide a date to include those subjects that were
# scanned after the given date
if self._is_scanned_before(date, seq):
continue

nc_dict = self._populate_nc_dict(param_tuple=param_tuple,
sub=sub, path=path,
seq=seq, seq_ids=pair,
suppl_params=suppl_params,
verbosity=verbosity)
nc_log[param_name].append(nc_dict)

return nc_log

def get_nc_param_ids(self, seq_id):
Expand Down
5 changes: 3 additions & 2 deletions mrQA/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from mrQA import check_compliance
from mrQA import logger
from mrQA.config import PATH_CONFIG
from mrQA.config import PATH_CONFIG, THIS_DIR
from mrQA.utils import is_writable


Expand All @@ -27,7 +27,8 @@ def get_parser():
help='directory containing downloaded dataset with '
'dicom files, supports nested hierarchies')
required.add_argument('--config', type=str,
help='path to config file')
help='path to config file',
default=THIS_DIR / 'resources/mri-config.json')
optional.add_argument('-o', '--output-dir', type=str,
help='specify the directory where the report'
' would be saved. By default, the --data_source '
Expand Down
22 changes: 13 additions & 9 deletions mrQA/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from MRdataset.config import MRException
from protocol import UnspecifiedType

THIS_DIR = Path(__file__).parent.resolve()


def configure_logger(log, output_dir, mode='w', level='WARNING'):
"""
Expand Down Expand Up @@ -34,14 +36,14 @@ def configure_logger(log, output_dir, mode='w', level='WARNING'):
output_dir.mkdir(parents=True, exist_ok=True)

options = {
"warn": {
'level': logging.WARN,
'file': output_dir / 'warn.log',
"warn" : {
'level' : logging.WARN,
'file' : output_dir / 'warn.log',
'formatter': warn_formatter
},
"error": {
'level': logging.ERROR,
'file': output_dir / 'error.log',
'level' : logging.ERROR,
'file' : output_dir / 'error.log',
'formatter': error_formatter
}
}
Expand All @@ -64,12 +66,13 @@ def configure_logger(log, output_dir, mode='w', level='WARNING'):

PATH_CONFIG = {
'data_source': Path.home() / 'scan_data',
'output_dir': Path.home() / 'mrqa_reports',
'output_dir' : Path.home() / 'mrqa_reports',
}

DATE_SEPARATOR = '_DATE_'
ATTRIBUTE_SEPARATOR = '_ATTR_'

DATETIME_FORMAT = '%m_%d_%Y_%H_%M_%S'
DATE_FORMAT = '%m_%d_%Y'
Unspecified = UnspecifiedType()


Expand All @@ -78,9 +81,9 @@ def past_records_fpath(folder):
return Path(folder / 'past_record.txt')


def status_fpath(folder):
def status_fpath(folder, audit):
"""Constructs the path to the status file"""
return Path(folder / 'non_compliance_log.txt')
return Path(folder / f'{audit}_non_compliance_log.txt')


def report_fpath(folder_path, fname):
Expand All @@ -105,6 +108,7 @@ def __init__(self, name):
super().__init__(
f"Could not compute majority for {name}")


#
# class ReferenceNotSetForModality(MRException):
# """Custom error that is raised when majority cannot be computed."""
Expand Down
10 changes: 5 additions & 5 deletions mrQA/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,9 @@ def collect_hz_audit_results(self,
logger.error('Reference protocol is empty. Cannot generate'
' report for horizontal audit.')
self.skip_hz_report = True
if not (compliant_ds.get_sequence_ids() or
non_compliant_ds.get_sequence_ids() or
undetermined_ds.get_sequence_ids()):
if not (compliant_ds.get_sequence_ids()
or non_compliant_ds.get_sequence_ids()
or undetermined_ds.get_sequence_ids()):
logger.error('It seems the dataset has not been checked for '
'horizontal audit. Skipping horizontal audit report')
self.skip_hz_report = True
Expand Down Expand Up @@ -214,8 +214,8 @@ def collect_vt_audit_results(self,
logger.error('No sequences found in dataset. Cannot generate'
'report')
self.skip_vt_report = True
if not (compliant_ds.get_sequence_ids() or
non_compliant_ds.get_sequence_ids()):
if not (compliant_ds.get_sequence_ids()
or non_compliant_ds.get_sequence_ids()):
logger.error('It seems the dataset has not been checked for '
'vertical audit. Skipping vertical audit report')
self.skip_vt_report = True
Expand Down
Loading

0 comments on commit 91307cf

Please sign in to comment.