Skip to content

Commit

Permalink
Merge pull request #150 from obophenotype/dendrogram_check
Browse files Browse the repository at this point in the history
#100 github action for cell_set_preferred_alias uniqueness testing added
  • Loading branch information
hkir-dev committed Jul 26, 2021
2 parents f042ecd + 459e349 commit 7f218fe
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 0 deletions.
25 changes: 25 additions & 0 deletions .github/workflows/dendrogram_check.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: Validate Dendrograms

# Since this validation fails, added as a separate action and disabled. I future can be integrated to main build task.
on:
# Triggers the workflow on push or pull request events but only for the master branch
push:
paths:
- 'src/dendrograms/**.json'
- '.github/workflows/dendrogram_check.yaml'

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

jobs:
validate-dendrograms:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- name: install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: validate dendrograms
run: python ./src/scripts/dendrogram_validator.py
64 changes: 64 additions & 0 deletions src/scripts/dendrogram_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import logging
import os
from dendrogram_tools import dend_json_2_nodes_n_edges
from abc import ABC, abstractmethod
from os.path import isfile, join

log = logging.getLogger(__name__)

DENDROGRAM_FOLDER = join(os.path.dirname(os.path.realpath(__file__)), "../dendrograms")


class BaseChecker(ABC):

@abstractmethod
def check(self, dend_file, dendrogram):
pass


class PrefAliasUniquenessChecker(BaseChecker):
"""
cell_set_preferred_alias should be unique within any one dendrogram - ignoring nodes with no
cell_set_preferred_alias, no two nodes should have the same one
"""

def __init__(self):
self.reports = []

def check(self, dend_file, dendrogram):
pref_aliases = list()
is_valid = True
for o in dendrogram['nodes']:
if o['cell_set_preferred_alias']:
if o['cell_set_preferred_alias'] not in pref_aliases:
pref_aliases.append(o['cell_set_preferred_alias'])
else:
is_valid = False
log.error("cell_set_preferred_alias '{}' is duplicate in {}"
.format(o['cell_set_preferred_alias'], dend_file))
return is_valid


class ValidationError(Exception):

def __init__(self, message):
Exception.__init__(self)
self.message = message


def main():
log.info("Dendrogram validation started.")
files = [f for f in os.listdir(DENDROGRAM_FOLDER) if isfile(join(DENDROGRAM_FOLDER, f))]
is_valid = True
for file in files:
filename, file_extension = os.path.splitext(file)
if file_extension == ".json":
dend = dend_json_2_nodes_n_edges(join(DENDROGRAM_FOLDER, file))
is_valid &= PrefAliasUniquenessChecker().check(filename, dend)

if not is_valid:
raise ValidationError("Dendrogram validation failed and issues logged.")


if __name__ == '__main__':
main()

0 comments on commit 7f218fe

Please sign in to comment.