Skip to content

Commit

Permalink
Register imports and SHACL rule inheritance
Browse files Browse the repository at this point in the history
Squashed commit of the following:

commit 7932abb
Author: Alejandro Villar <[email protected]>
Date:   Thu Sep 28 18:55:03 2023 +0200

    Implement SHACL rule inheritance

commit e952a95
Author: Alejandro Villar <[email protected]>
Date:   Wed Sep 27 09:51:26 2023 +0200

    Recursive register imports, fix output

commit 3efa4dc
Author: Alejandro Villar <[email protected]>
Date:   Thu Sep 21 10:39:26 2023 +0200

    WIP register import
  • Loading branch information
avillar committed Sep 28, 2023
1 parent b8707d8 commit c6b4b09
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 40 deletions.
12 changes: 11 additions & 1 deletion ogc/bblocks/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
from ogc.bblocks.postprocess import postprocess
from ogc.na import ingest_json

MAIN_BBR = 'https://blocks.ogc.org/register.json'
DEFAULT_IMPORT_MARKER = 'default'

templates_dir = Path(__file__).parent / 'templates'
uplift_context_file = Path(__file__).parent / 'register-context.yaml'

Expand Down Expand Up @@ -126,13 +129,19 @@
id_prefix = 'ogc.'
annotated_path = Path(args.annotated_path)
schema_mapping_config = {}
imported_registers = []
if bb_config_file and bb_config_file.is_file():
bb_config = load_yaml(filename=bb_config_file)
id_prefix = bb_config.get('identifier-prefix', id_prefix)
if id_prefix and id_prefix[-1] != '.':
id_prefix += '.'
subdirs = id_prefix.split('.')[1:]
schema_mapping_config = bb_config.get('schema-mapping', {})
imported_registers = bb_config.get('imports')
if imported_registers is None:
imported_registers = [MAIN_BBR]
else:
imported_registers = [ir if ir != DEFAULT_IMPORT_MARKER else MAIN_BBR for ir in imported_registers if ir]

# 1. Postprocess BBs
print(f"Running postprocess...", file=sys.stderr)
Expand All @@ -147,7 +156,8 @@
schema_default_base_url=schema_mapping_config.get('default'),
schema_identifier_url_mappings=schema_mapping_config.get('mappings'),
test_outputs_path=args.test_outputs_path,
github_base_url=args.github_base_url)
github_base_url=args.github_base_url,
imported_registers=imported_registers)

# 2. Uplift register.json
print(f"Running semantic uplift of {register_file}", file=sys.stderr)
Expand Down
33 changes: 22 additions & 11 deletions ogc/bblocks/postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from ogc.bblocks.generate_docs import DocGenerator
from ogc.bblocks.util import write_superbblocks_schemas, annotate_schema, BuildingBlock, \
write_jsonld_context, BuildingBlockRegister
write_jsonld_context, BuildingBlockRegister, ImportedBuildingBlocks
from ogc.bblocks.validate import validate_test_resources
from ogc.bblocks.transform import apply_transforms, transformers

Expand All @@ -35,7 +35,8 @@ def postprocess(registered_items_path: str | Path = 'registereditems',
schema_default_base_url: str | None = None,
schema_identifier_url_mappings: list[dict[str, str]] = None,
test_outputs_path: str | Path = 'build/tests',
github_base_url: str | None = None) -> list[BuildingBlock]:
github_base_url: str | None = None,
imported_registers: list[str] | None = None) -> list[BuildingBlock]:

cwd = Path().resolve()

Expand All @@ -53,6 +54,18 @@ def postprocess(registered_items_path: str | Path = 'registereditems',
templates_dir=templates_dir,
id_prefix=id_prefix)

if not isinstance(registered_items_path, Path):
registered_items_path = Path(registered_items_path)

child_bblocks = []
super_bblocks = {}
imported_bblocks = ImportedBuildingBlocks(imported_registers)
bbr = BuildingBlockRegister(registered_items_path,
fail_on_error=fail_on_error,
prefix=id_prefix,
annotated_path=annotated_path,
imported_bblocks=imported_bblocks)

def do_postprocess(bblock: BuildingBlock) -> bool:

try:
Expand Down Expand Up @@ -109,6 +122,7 @@ def do_postprocess(bblock: BuildingBlock) -> bool:
print(f" > Running tests for {bblock.identifier}", file=sys.stderr)
validation_passed, test_count = validate_test_resources(bblock,
registered_items_path=registered_items_path,
bblocks_register=bbr,
outputs_path=test_outputs_path)
bblock.metadata['validationPassed'] = validation_passed
if not validation_passed:
Expand Down Expand Up @@ -149,12 +163,6 @@ def do_postprocess(bblock: BuildingBlock) -> bool:
else:
print("No transformers found", file=sys.stderr)

child_bblocks = []
super_bblocks = {}
bbr = BuildingBlockRegister(registered_items_path,
fail_on_error=fail_on_error,
prefix=id_prefix,
annotated_path=annotated_path)
for building_block in bbr.bblocks.values():
if filter_ids and building_block.identifier not in filter_ids:
continue
Expand Down Expand Up @@ -223,11 +231,15 @@ def do_postprocess(bblock: BuildingBlock) -> bool:
print(f"{building_block.identifier} failed postprocessing, skipping...", file=sys.stderr)

if output_file:
output_register_json = {
'imports': imported_registers or [],
'bblocks': output_bblocks,
}
if output_file == '-':
print(json.dumps(output_bblocks, indent=2))
print(json.dumps(output_register_json, indent=2))
else:
with open(output_file, 'w') as f:
json.dump(output_bblocks, f, indent=2)
json.dump(output_register_json, f, indent=2)

print(f"Finished processing {len(output_bblocks)} building blocks", file=sys.stderr)
return output_bblocks
Expand Down Expand Up @@ -300,7 +312,6 @@ def _main():
output_file=None if args.no_output else args.output_register,
filter_ids=args.filter_id,
base_url=args.base_url,
metadata_schema=args.metadata_schema,
templates_dir=args.templates_dir,
fail_on_error=args.fail_on_error,
id_prefix=args.identifier_prefix)
Expand Down
2 changes: 2 additions & 0 deletions ogc/bblocks/register-context.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
transform:
# Extract bblocks
- if type != "array" then .bblocks else . end
# Add @type from itemClass with initial capital letter
- '[ .[] | ."@type" = [("bblocks:" + (.itemClass[:1]|ascii_upcase) + .itemClass[1:]), "skos:Concept"] ]'
# Add ConceptScheme
Expand Down
3 changes: 2 additions & 1 deletion ogc/bblocks/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
from ogc.na.util import load_yaml

d = load_yaml(filename='/home/alx5000/work/Proyectos/ogc/3d-csdm-schema/_sources/csdm/test/example.json')
schema = load_yaml(filename='/home/alx5000/work/Proyectos/ogc/3d-csdm-schema/build-local/annotated/csdm/test/schema.json')
schema = load_yaml(filename='/home/alx5000/work/Proyectos/ogc/3d-csdm-schema/build-local/annotated/csdm/test/schema'
'.json')
jsonschema.validate(d, schema)
2 changes: 1 addition & 1 deletion ogc/bblocks/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def apply_transforms(bblock: BuildingBlock,
with open(output_fn, 'w') as f:
f.write(transform_result)

except Exception as e:
except Exception:
with open(output_fn.with_stem(output_fn.name + '.error'), 'w') as f:
f.write('Error generating transformed file:\n')
f.write(traceback.format_exc())
89 changes: 76 additions & 13 deletions ogc/bblocks/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import re
import sys
from collections import deque
from functools import lru_cache
from pathlib import Path
from typing import Any, Sequence, Callable, AnyStr

Expand Down Expand Up @@ -105,6 +106,7 @@ def __init__(self, identifier: str, metadata_file: Path,
default_shacl_rules = fp / 'rules.shacl'
if default_shacl_rules.is_file():
shacl_rules.append('rules.shacl')
self.shacl_rules = [r if is_url(r) else fp / r for r in shacl_rules]

self.transforms_file = fp / 'transforms.yaml'
self.transforms = self._load_transforms()
Expand Down Expand Up @@ -182,19 +184,51 @@ def jsonld_context_contents(self):
return self._lazy_properties['jsonld_context_contents']


class ImportedBuildingBlocks:

def __init__(self, metadata_urls: list[str] | None):
self.bblocks: dict[str, dict] = {}
self.imported_registers: dict[str, list[str]] = {}
if metadata_urls:
pending_urls = deque(metadata_urls)
while pending_urls:
metadata_url = pending_urls.popleft()
new_pending = self.load(metadata_url)
pending_urls.extend(u for u in new_pending if u not in self.imported_registers)

def load(self, metadata_url: str) -> list[str]:
r = requests.get(metadata_url)
r.raise_for_status()
imported = r.json()
if isinstance(imported, list):
bblock_list = imported
dependencies = []
else:
bblock_list = imported['bblocks']
dependencies = imported.get('imports', [])
self.imported_registers[metadata_url] = []
for bblock in bblock_list:
bblock['register'] = self
self.bblocks[bblock['itemIdentifier']] = bblock
self.imported_registers[metadata_url].append(bblock['itemIdentifier'])
return dependencies


class BuildingBlockRegister:

def __init__(self,
registered_items_path: Path,
annotated_path: Path = Path(),
fail_on_error: bool = False,
prefix: str = 'ogc.',
find_dependencies=True):
find_dependencies=True,
imported_bblocks: ImportedBuildingBlocks | None = None):

self.registered_items_path = registered_items_path
self.annotated_path = annotated_path
self.prefix = prefix
self.bblocks: dict[str, BuildingBlock] = {}
self.imported_bblocks = imported_bblocks.bblocks if imported_bblocks else {}

self.bblock_paths: dict[Path, BuildingBlock] = {}

Expand All @@ -216,11 +250,19 @@ def __init__(self,
traceback.print_exception(e, file=sys.stderr)
print('=========', file=sys.stderr)

self.imported_bblock_schemas: dict[str, str] = {}
if find_dependencies:
dep_graph = nx.DiGraph()

for identifier, imported_bblock in self.imported_bblocks.items():
dep_graph.add_node(identifier)
dep_graph.add_edges_from([(d, identifier) for d in imported_bblock.get('dependsOn', ())])
imported_bblock.get('dependsOn', [])
for schema_url in imported_bblock.get('schema', {}).values():
self.imported_bblock_schemas[schema_url] = identifier

for bblock in self.bblocks.values():
found_deps = self.find_dependencies(bblock)
found_deps = self._resolve_bblock_deps(bblock)
deps = bblock.metadata.get('dependsOn')
if isinstance(deps, str):
found_deps.add(deps)
Expand All @@ -234,9 +276,12 @@ def __init__(self,
if cycles:
cycles_str = '\n - '.join(' -> '.join(reversed(c)) + ' -> ' + c[-1] for c in cycles)
raise BuildingBlockError(f"Circular dependencies found: \n - {cycles_str}")
self.bblocks = {b: self.bblocks[b] for b in nx.topological_sort(dep_graph) if b in self.bblocks}
self.bblocks: dict[str, BuildingBlock] = {b: self.bblocks[b]
for b in nx.topological_sort(dep_graph)
if b in self.bblocks}
self.dep_graph = dep_graph

def find_dependencies(self, bblock: BuildingBlock) -> set[str]:
def _resolve_bblock_deps(self, bblock: BuildingBlock) -> set[str]:
if not bblock.schema.is_file():
return set()
bblock_schema = load_yaml(filename=bblock.schema)
Expand All @@ -251,6 +296,8 @@ def walk_schema(schema):
if ref.startswith('bblocks://'):
# Get id directly from bblocks:// URI
deps.add(ref[len('bblocks://'):])
elif ref in self.imported_bblock_schemas:
deps.add(self.imported_bblock_schemas[ref])
else:
ref_parent_path = bblock.files_path.joinpath(ref).resolve().parent
ref_bblock = self.bblock_paths.get(ref_parent_path)
Expand All @@ -275,15 +322,31 @@ def walk_schema(schema):

return deps


class ImportedBuildingBlockRegister:

def __init__(self, metadata_url: str):
self.url = metadata_url
r = requests.get(metadata_url)
r.raise_for_status()
bblock_list = r.json()
self.bblocks = {b['itemIdentifier']: b for b in bblock_list}
@lru_cache
def find_dependencies(self, identifier: str) -> list[dict | BuildingBlock]:
if identifier in self.bblocks:
bblock = self.bblocks[identifier]
metadata = bblock.metadata
elif identifier in self.imported_bblocks:
bblock = None
metadata = self.imported_bblocks[identifier]
else:
return []

dependencies = [bblock or metadata]
for d in metadata.get('dependsOn', ()):
dependencies.extend(self.find_dependencies(d))

return dependencies

def get_inherited_shacl_rules(self, identifier: str) -> set[str | Path]:
rules = set()
for dep in self.find_dependencies(identifier):
if isinstance(dep, BuildingBlock):
rules.update(dep.shacl_rules or ())
else:
rules.update(dep.get('shaclRules', ()))
return rules


@dataclasses.dataclass
Expand Down
26 changes: 13 additions & 13 deletions ogc/bblocks/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from rdflib.term import Node, URIRef, BNode
from yaml import MarkedYAMLError

from ogc.bblocks.util import BuildingBlock
from ogc.bblocks.util import BuildingBlock, BuildingBlockRegister
import traceback
import pyshacl
import jsonref
Expand Down Expand Up @@ -276,6 +276,7 @@ def validate_inner():

def validate_test_resources(bblock: BuildingBlock,
registered_items_path: Path,
bblocks_register: BuildingBlockRegister,
outputs_path: str | Path | None = None) -> tuple[bool, int]:
result = True
test_count = 0
Expand All @@ -287,18 +288,17 @@ def validate_test_resources(bblock: BuildingBlock,
shacl_error = None

shacl_files = []
if bblock.shaclRules:
try:
for shacl_file in bblock.shaclRules:
if isinstance(shacl_file, Path) or (isinstance(shacl_file, str) and not is_url(shacl_file)):
# assume file
shacl_file = bblock.files_path / shacl_file
shacl_files.append(os.path.relpath(shacl_file, registered_items_path))
else:
shacl_files.append(shacl_file)
shacl_graph.parse(shacl_file, format='turtle')
except Exception as e:
shacl_error = str(e)
try:
for shacl_file in bblocks_register.get_inherited_shacl_rules(bblock.identifier):
if isinstance(shacl_file, Path) or (isinstance(shacl_file, str) and not is_url(shacl_file)):
# assume file
shacl_file = bblock.files_path / shacl_file
shacl_files.append(os.path.relpath(shacl_file, registered_items_path))
else:
shacl_files.append(shacl_file)
shacl_graph.parse(shacl_file, format='turtle')
except Exception as e:
shacl_error = str(e)

json_error = None
schema_validator = None
Expand Down

0 comments on commit c6b4b09

Please sign in to comment.