Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nerdm.utils: add functions stripping features from JSON Schemas #61

Merged
merged 5 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker/ejsonschema/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ RUN update-alternatives --install /usr/lib/uwsgi/plugins/python3_plugin.so \

RUN python -m pip install "setuptools<66.0.0"
RUN python -m pip install json-spec jsonschema==2.4.0 requests \
pytest==4.6.5 filelock crossrefapi pyyaml
pytest==4.6.5 filelock crossrefapi pyyaml jsonpath_ng
RUN python -m pip install --no-dependencies jsonmerge==1.3.0

WORKDIR /root
Expand Down
25 changes: 25 additions & 0 deletions python/nistoar/base/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from collections.abc import Mapping
from urllib.parse import urlparse

import jsonpath_ng as jp

from . import OARException

oar_home = None
Expand Down Expand Up @@ -476,3 +478,26 @@ def lookup_config_server(serverport):
"""
raise NotImplementedError()

NO_VALUE=NotImplemented
RAISE=NO_VALUE
def hget_jp(obj: Mapping, path: str, default=None):
"""
return the first value from within a hierarchical dictionary (e.g. JSON or config structure)
that corresponds to a given location path. The location path is JSONPath-compliant string
(https://goessner.net/articles/JsonPath/). This function is intended for use with paths that
uniquely locate data--i.e. resolve to only one value.
:param dict obj: the dictionary to search for a matching value.
:param str path: a string indicating the location of the value to return. This should be
a JSONPath-compliant string (where the initial "$." is optional)
:raises KeyError: if default is not provide (i.e. is RAISE) and the path does not resolve to
an existing location.
"""
try:
return jp.parse(path).find(obj)[0].value
except IndexError:
if default is RAISE:
raise KeyError(path)
return default

hget = hget_jp

84 changes: 84 additions & 0 deletions python/nistoar/nerdm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
Utility functions and classes for interrogating and manipulating NERDm metadata objects
"""
import re
import jsonpath_ng as jp
from collections.abc import Mapping, Sequence
from typing import Union, List

from nistoar.base.config import hget

META_PREFIXES = "_$"

Expand Down Expand Up @@ -181,3 +186,82 @@ def cmp_versions(ver1, ver2):
return 0
return +1

_doc_properties = "title description asOntology notes comments valueDocumentation equivalentTo".split()
def declutter_schema(schema: Mapping, post2020: bool=False):
"""
remove documentation nodes from a JSON Schema object in situ
"""
for prop in _doc_properties:
if prop in schema:
del schema[prop]

if "properties" in schema:
for prop in schema['properties']:
declutter_schema(schema['properties'][prop])

deftag = "definitions" if not post2020 else "$defs"
if deftag in schema:
for defname in schema[deftag]:
declutter_schema(schema[deftag][defname])

for seq in "allOf anyOf oneOf".split():
if seq in schema:
for itm in schema[seq]:
declutter_schema(itm)


def unrequire_props_in(schema: Mapping, locations: Union[str, List[str]], post2020: bool=False):
"""
remove ``"required"`` fields at the specified locations from within the given JSON Schema.
The provided locations should point to schema definitions within the given schema dictionary.
This function will remove the ``"required"`` property within the located schema (if it exists)
as well as any found within ``"allOf"``, ``"anyOf"``, or ``"oneOf"`` properties.
:param dict schema: a dictionary representing a JSON Schema
:param str|list locations: slash-delimited paths to an internal schema that contains a required.
An example might be "definitions/Resource". An empty string indicates
the top-level processa
"""
if isinstance(locations, str):
locations = [ locations ]

for loc in locations:
subsch = hget(schema, loc)
if subsch and isinstance(subsch, Mapping):
if "required" in subsch:
del subsch["required"]
for seq in "allOf anyOf oneOf".split():
if seq in subsch and isinstance(subsch[seq], Sequence):
for itm in subsch[seq]:
unrequire_props_in(itm, "$", post2020)


def loosen_schema(schema: Mapping, directives: Mapping, opts=None):
"""
apply the given loosening directive to the given JSON Schema. The directives is a
dictionary describes what to do with the following properties (the directives) supported:
``derequire``
a list of type definitions within the schema from which the required property
should be removed (via :py:func:`~nistoar.nerdm.utils.unrequire_props_in`). Each
type name listed will be assumed to be an item under the "definitions" node in the
schema this directive is applied to.
``dedocument``
a boolean indicating whether the documentation annotations should be removed from
the schema. If not set, the default is determined by opts.dedoc if opts is given or
True, otherwise.
:param dict schema: the schema document as a JSON Schema schema dictionary
:param dict directives: the dictionary of directives to apply
:param opt: an options object (containing scripts command-line options)
"""
if directives.get("dedocument", True):
declutter_schema(schema)

p2020 = directives.get("post2020")
deftag = "$defs" if p2020 else "definitions"

dereqtps = [ deftag+'.'+t for t in directives.get("derequire", []) ]
unrequire_props_in(schema, dereqtps, p2020)

29 changes: 28 additions & 1 deletion python/tests/nistoar/base/test_config.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import os, sys, pdb, shutil, logging, json, re, importlib
import unittest as test
from pathlib import Path
from nistoar.testing import *

import nistoar.base.config as config

datadir = os.path.join(os.path.dirname(__file__), "data")
testdir = Path(__file__).resolve().parents[0]
datadir = str(testdir / "data")
basedir = testdir.parents[3]
schemadir = basedir / 'model'
tmpd = None

def setUpModule():
Expand Down Expand Up @@ -71,6 +75,29 @@ def test_merge_config(self):
self.assertEqual(out['zub'], 'dub')
self.assertEqual(out['tell'], {"a": 1})

def test_hget_jp(self):
with open(schemadir/'nerdm-schema.json') as fd:
schema = json.load(fd)

self.assertEqual(config.hget_jp(schema, "definitions.Resource.properties.title.title"), "Title")
self.assertEqual(config.hget_jp(schema, "definitions.ResourceReference.allOf[1].required"), ["title"])

self.assertIsNone(config.hget_jp(schema, "definitions.goober.title"))
self.assertEqual(config.hget_jp(schema, "definitions.goober.title", "Dr."), "Dr.")
with self.assertRaises(KeyError):
config.hget_jp(schema, "definitions.goober.title", config.RAISE)

with self.assertRaises(KeyError):
config.hget_jp(schema, "definitions.ResourceReference.allOf[23].required", config.RAISE)

# make sure results are not copies of the original
ressch = config.hget_jp(schema, "definitions.Resource")
self.assertIn("required", ressch)
del ressch['required']
with self.assertRaises(KeyError):
config.hget_jp(schema, "definitions.Resource.required", config.RAISE)


class TestLogConfig(test.TestCase):

def resetLogfile(self):
Expand Down
131 changes: 131 additions & 0 deletions python/tests/nistoar/nerdm/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
import os, sys, pdb, shutil, logging, json
import unittest as test
from pathlib import Path
from collections import OrderedDict

from nistoar.nerdm import utils
from nistoar.nerdm import constants as const

testdir = Path(__file__).resolve().parents[0]
basedir = testdir.parents[3]
schemadir = basedir / 'model'

class TestUtils(test.TestCase):

def test_meta_prop_ch(self):
Expand Down Expand Up @@ -100,6 +106,131 @@ def test_schema_version_cmp(self):
self.assertEqual(utils.cmp_versions(utils.get_nerdm_schema_version(data), "0.5"), 1)
self.assertEqual(utils.cmp_versions(utils.get_nerdm_schema_version(data), "2.5"), -1)
self.assertEqual(utils.cmp_versions(utils.get_nerdm_schema_version(data), "1.3"), 0)

def test_declutter_schema(self):
with open(schemadir/'nerdm-schema.json') as fd:
schema = json.load(fd)

self.assertTrue(utils.hget(schema, "title"))
self.assertTrue(utils.hget(schema, "description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.title"))
self.assertTrue(utils.hget(schema, "definitions.Resource.description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.notes"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.title"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.notes"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.description"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.asOntology"))

utils.declutter_schema(schema)

self.assertFalse(utils.hget(schema, "title"))
self.assertFalse(utils.hget(schema, "description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.title"))
self.assertFalse(utils.hget(schema, "definitions.Resource.description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.notes"))
self.assertFalse(utils.hget(schema, "definitions.Resource.properties.title.title"))
self.assertFalse(utils.hget(schema, "definitions.Resource.properties.title.notes"))
self.assertFalse(utils.hget(schema, "definitions.Resource.properties.title.description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.properties.title.asOntology"))

def test_declutter_schema_post2020(self):
with open(schemadir/'nerdm-schema.json') as fd:
schema = json.load(fd)

self.assertTrue(utils.hget(schema, "title"))
self.assertTrue(utils.hget(schema, "description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.title"))
self.assertTrue(utils.hget(schema, "definitions.Resource.description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.notes"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.title"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.notes"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.description"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.asOntology"))

utils.declutter_schema(schema, True)

# the file is not post-2020 compliant, so only the top level documentation will be found
self.assertFalse(utils.hget(schema, "title"))
self.assertFalse(utils.hget(schema, "description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.title"))
self.assertTrue(utils.hget(schema, "definitions.Resource.description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.notes"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.title"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.notes"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.description"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.asOntology"))

def test_unrequire_props_in(self):
with open(schemadir/'nerdm-schema.json') as fd:
schema = json.load(fd)

self.assertTrue(utils.hget(schema, "definitions.Resource.required"))
self.assertTrue(utils.hget(schema, "definitions.ResourceReference.allOf[1].required"))
self.assertTrue(utils.hget(schema, "definitions.Topic.required"))
self.assertTrue(utils.hget(schema, "definitions.Organization.required"))

utils.unrequire_props_in(schema, "definitions.Resource")
self.assertTrue(not utils.hget(schema, "definitions.Resource.required"))
self.assertTrue(utils.hget(schema, "definitions.ResourceReference.allOf[1].required"))
self.assertTrue(utils.hget(schema, "definitions.Topic.required"))
self.assertTrue(utils.hget(schema, "definitions.Organization.required"))

utils.unrequire_props_in(schema, ["definitions.ResourceReference"])
self.assertTrue(not utils.hget(schema, "definitions.Resource.required"))
self.assertTrue(not utils.hget(schema, "definitions.ResourceReference.allOf[1].required"))
self.assertTrue(utils.hget(schema, "definitions.Topic.required"))
self.assertTrue(utils.hget(schema, "definitions.Organization.required"))

utils.unrequire_props_in(schema, ["definitions.Resource",
"definitions.Topic",
"goober",
"definitions.Organization"])
self.assertTrue(not utils.hget(schema, "definitions.Resource.required"))
self.assertTrue(not utils.hget(schema, "definitions.ResourceReference.allOf[1].required"))
self.assertTrue(not utils.hget(schema, "definitions.Topic.required"))
self.assertTrue(not utils.hget(schema, "definitions.Organization.required"))

def test_loosen_schema(self):
with open(schemadir/"nerdm-schema.json") as fd:
schema = json.load(fd, object_pairs_hook=OrderedDict)

self.assertTrue(utils.hget(schema, "title"))
self.assertTrue(utils.hget(schema, "description"))
self.assertTrue(utils.hget(schema, "definitions.Resource.required"))
self.assertTrue(utils.hget(schema, "definitions.Resource.description"))
self.assertTrue(utils.hget(schema, "definitions.Organization.required"))
self.assertTrue(utils.hget(schema, "definitions.Organization.description"))

utils.loosen_schema(schema, {"derequire": ["Resource"], "dedocument": True})

self.assertTrue(not utils.hget(schema, "title"))
self.assertTrue(not utils.hget(schema, "description"))
self.assertTrue(not utils.hget(schema, "definitions.Resource.required"))
self.assertTrue(not utils.hget(schema, "definitions.Resource.description"))
self.assertTrue(utils.hget(schema, "definitions.Organization.required"))
self.assertTrue(not utils.hget(schema, "definitions.Organization.description"))

def test_loosen_schema_no_dedoc(self):
with open(schemadir/"nerdm-schema.json") as fd:
schema = json.load(fd, object_pairs_hook=OrderedDict)

self.assertTrue(utils.hget(schema, "title"))
self.assertTrue(utils.hget(schema, "description"))
self.assertTrue(utils.hget(schema, "definitions.Resource.required"))
self.assertTrue(utils.hget(schema, "definitions.Resource.description"))
self.assertTrue(utils.hget(schema, "definitions.Organization.required"))
self.assertTrue(utils.hget(schema, "definitions.Organization.description"))

utils.loosen_schema(schema, {"derequire": ["Resource"], "dedocument": False})

self.assertTrue(utils.hget(schema, "title"))
self.assertTrue(utils.hget(schema, "description"))
self.assertTrue(not utils.hget(schema, "definitions.Resource.required"))
self.assertTrue(utils.hget(schema, "definitions.Resource.description"))
self.assertTrue(utils.hget(schema, "definitions.Organization.required"))
self.assertTrue(utils.hget(schema, "definitions.Organization.description"))



class TestVersion(test.TestCase):

Expand Down
2 changes: 1 addition & 1 deletion scripts/makedist.nerdmdocs
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ echo '+' PACKAGE_NAME=$PACKAGE_NAME
echo '+' version=$version

# build the components
# set -x
installdir=$BUILD_DIR/docs
set -x
mkdir -p $installdir

# export schema files
Expand Down
24 changes: 15 additions & 9 deletions scripts/record_deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# The default package name (oar-sdp) can be over-ridden by the environment
# variable PACKAGE_NAME
#
import os, sys, json, re
import os, sys, json, re, traceback as tb
from collections import OrderedDict

prog = os.path.basename(sys.argv[0])
Expand Down Expand Up @@ -80,17 +80,23 @@ def ejschemadep():

def jmergedep():
import jsonmerge
eggre = re.compile(r'^jsonmerge-(.*)\.egg-info$')
eggre = re.compile(r'^jsonmerge-(.*)\.egg')
modfile = jsonmerge.__file__
libdir = os.path.dirname(os.path.dirname(modfile))
vers="(unknown)"
try:
egginfo = [d for d in os.listdir(libdir) if eggre.match(d)]
if len(egginfo) > 0:
m = eggre.match(egginfo[0])
vers = m.group(1)
except Exception as ex:
tb.print_exc()
m = eggre.match(os.path.basename(libdir))
if m:
# zipped egg
vers = m.group(1)
else:
# it's the dist-packages dir; look for the egg-info file
try:
egginfo = [d for d in os.listdir(libdir) if eggre.match(d)]
if len(egginfo) > 0:
m = eggre.match(egginfo[0])
vers = m.group(1)
except Exception as ex:
tb.print_exc()
return OrderedDict([
("name", "jsonmerge"),
("version", vers)
Expand Down
Loading