diff --git a/ogc/bblocks/entrypoint.py b/ogc/bblocks/entrypoint.py index 8c1e082..411fff6 100644 --- a/ogc/bblocks/entrypoint.py +++ b/ogc/bblocks/entrypoint.py @@ -10,6 +10,9 @@ from ogc.bblocks.postprocess import postprocess from ogc.na import ingest_json +MAIN_BBR = 'https://blocks.ogc.org/register.json' +DEFAULT_IMPORT_MARKER = 'default' + templates_dir = Path(__file__).parent / 'templates' uplift_context_file = Path(__file__).parent / 'register-context.yaml' @@ -126,6 +129,7 @@ id_prefix = 'ogc.' annotated_path = Path(args.annotated_path) schema_mapping_config = {} + imported_registers = [] if bb_config_file and bb_config_file.is_file(): bb_config = load_yaml(filename=bb_config_file) id_prefix = bb_config.get('identifier-prefix', id_prefix) @@ -133,6 +137,11 @@ id_prefix += '.' subdirs = id_prefix.split('.')[1:] schema_mapping_config = bb_config.get('schema-mapping', {}) + imported_registers = bb_config.get('imports') + if imported_registers is None: + imported_registers = [MAIN_BBR] + else: + imported_registers = [ir if ir != DEFAULT_IMPORT_MARKER else MAIN_BBR for ir in imported_registers if ir] # 1. Postprocess BBs print(f"Running postprocess...", file=sys.stderr) @@ -147,7 +156,8 @@ schema_default_base_url=schema_mapping_config.get('default'), schema_identifier_url_mappings=schema_mapping_config.get('mappings'), test_outputs_path=args.test_outputs_path, - github_base_url=args.github_base_url) + github_base_url=args.github_base_url, + imported_registers=imported_registers) # 2. Uplift register.json print(f"Running semantic uplift of {register_file}", file=sys.stderr) diff --git a/ogc/bblocks/postprocess.py b/ogc/bblocks/postprocess.py index 24d4db2..3841b11 100644 --- a/ogc/bblocks/postprocess.py +++ b/ogc/bblocks/postprocess.py @@ -16,7 +16,7 @@ from ogc.bblocks.generate_docs import DocGenerator from ogc.bblocks.util import write_superbblocks_schemas, annotate_schema, BuildingBlock, \ - write_jsonld_context, BuildingBlockRegister + write_jsonld_context, BuildingBlockRegister, ImportedBuildingBlocks from ogc.bblocks.validate import validate_test_resources from ogc.bblocks.transform import apply_transforms, transformers @@ -35,7 +35,8 @@ def postprocess(registered_items_path: str | Path = 'registereditems', schema_default_base_url: str | None = None, schema_identifier_url_mappings: list[dict[str, str]] = None, test_outputs_path: str | Path = 'build/tests', - github_base_url: str | None = None) -> list[BuildingBlock]: + github_base_url: str | None = None, + imported_registers: list[str] | None = None) -> list[BuildingBlock]: cwd = Path().resolve() @@ -53,6 +54,18 @@ def postprocess(registered_items_path: str | Path = 'registereditems', templates_dir=templates_dir, id_prefix=id_prefix) + if not isinstance(registered_items_path, Path): + registered_items_path = Path(registered_items_path) + + child_bblocks = [] + super_bblocks = {} + imported_bblocks = ImportedBuildingBlocks(imported_registers) + bbr = BuildingBlockRegister(registered_items_path, + fail_on_error=fail_on_error, + prefix=id_prefix, + annotated_path=annotated_path, + imported_bblocks=imported_bblocks) + def do_postprocess(bblock: BuildingBlock) -> bool: try: @@ -109,6 +122,7 @@ def do_postprocess(bblock: BuildingBlock) -> bool: print(f" > Running tests for {bblock.identifier}", file=sys.stderr) validation_passed, test_count = validate_test_resources(bblock, registered_items_path=registered_items_path, + bblocks_register=bbr, outputs_path=test_outputs_path) bblock.metadata['validationPassed'] = validation_passed if not validation_passed: @@ -149,12 +163,6 @@ def do_postprocess(bblock: BuildingBlock) -> bool: else: print("No transformers found", file=sys.stderr) - child_bblocks = [] - super_bblocks = {} - bbr = BuildingBlockRegister(registered_items_path, - fail_on_error=fail_on_error, - prefix=id_prefix, - annotated_path=annotated_path) for building_block in bbr.bblocks.values(): if filter_ids and building_block.identifier not in filter_ids: continue @@ -223,11 +231,15 @@ def do_postprocess(bblock: BuildingBlock) -> bool: print(f"{building_block.identifier} failed postprocessing, skipping...", file=sys.stderr) if output_file: + output_register_json = { + 'imports': imported_registers or [], + 'bblocks': output_bblocks, + } if output_file == '-': - print(json.dumps(output_bblocks, indent=2)) + print(json.dumps(output_register_json, indent=2)) else: with open(output_file, 'w') as f: - json.dump(output_bblocks, f, indent=2) + json.dump(output_register_json, f, indent=2) print(f"Finished processing {len(output_bblocks)} building blocks", file=sys.stderr) return output_bblocks @@ -300,7 +312,6 @@ def _main(): output_file=None if args.no_output else args.output_register, filter_ids=args.filter_id, base_url=args.base_url, - metadata_schema=args.metadata_schema, templates_dir=args.templates_dir, fail_on_error=args.fail_on_error, id_prefix=args.identifier_prefix) diff --git a/ogc/bblocks/register-context.yaml b/ogc/bblocks/register-context.yaml index 916e1ff..07774a1 100644 --- a/ogc/bblocks/register-context.yaml +++ b/ogc/bblocks/register-context.yaml @@ -1,4 +1,6 @@ transform: + # Extract bblocks + - if type != "array" then .bblocks else . end # Add @type from itemClass with initial capital letter - '[ .[] | ."@type" = [("bblocks:" + (.itemClass[:1]|ascii_upcase) + .itemClass[1:]), "skos:Concept"] ]' # Add ConceptScheme diff --git a/ogc/bblocks/test.py b/ogc/bblocks/test.py index ca78655..8333de7 100644 --- a/ogc/bblocks/test.py +++ b/ogc/bblocks/test.py @@ -2,5 +2,6 @@ from ogc.na.util import load_yaml d = load_yaml(filename='/home/alx5000/work/Proyectos/ogc/3d-csdm-schema/_sources/csdm/test/example.json') -schema = load_yaml(filename='/home/alx5000/work/Proyectos/ogc/3d-csdm-schema/build-local/annotated/csdm/test/schema.json') +schema = load_yaml(filename='/home/alx5000/work/Proyectos/ogc/3d-csdm-schema/build-local/annotated/csdm/test/schema' + '.json') jsonschema.validate(d, schema) diff --git a/ogc/bblocks/transform.py b/ogc/bblocks/transform.py index 068cd69..b0e84c0 100644 --- a/ogc/bblocks/transform.py +++ b/ogc/bblocks/transform.py @@ -64,7 +64,7 @@ def apply_transforms(bblock: BuildingBlock, with open(output_fn, 'w') as f: f.write(transform_result) - except Exception as e: + except Exception: with open(output_fn.with_stem(output_fn.name + '.error'), 'w') as f: f.write('Error generating transformed file:\n') f.write(traceback.format_exc()) diff --git a/ogc/bblocks/util.py b/ogc/bblocks/util.py index c3acbb2..ffddf04 100644 --- a/ogc/bblocks/util.py +++ b/ogc/bblocks/util.py @@ -7,6 +7,7 @@ import re import sys from collections import deque +from functools import lru_cache from pathlib import Path from typing import Any, Sequence, Callable, AnyStr @@ -105,6 +106,7 @@ def __init__(self, identifier: str, metadata_file: Path, default_shacl_rules = fp / 'rules.shacl' if default_shacl_rules.is_file(): shacl_rules.append('rules.shacl') + self.shacl_rules = [r if is_url(r) else fp / r for r in shacl_rules] self.transforms_file = fp / 'transforms.yaml' self.transforms = self._load_transforms() @@ -182,6 +184,36 @@ def jsonld_context_contents(self): return self._lazy_properties['jsonld_context_contents'] +class ImportedBuildingBlocks: + + def __init__(self, metadata_urls: list[str] | None): + self.bblocks: dict[str, dict] = {} + self.imported_registers: dict[str, list[str]] = {} + if metadata_urls: + pending_urls = deque(metadata_urls) + while pending_urls: + metadata_url = pending_urls.popleft() + new_pending = self.load(metadata_url) + pending_urls.extend(u for u in new_pending if u not in self.imported_registers) + + def load(self, metadata_url: str) -> list[str]: + r = requests.get(metadata_url) + r.raise_for_status() + imported = r.json() + if isinstance(imported, list): + bblock_list = imported + dependencies = [] + else: + bblock_list = imported['bblocks'] + dependencies = imported.get('imports', []) + self.imported_registers[metadata_url] = [] + for bblock in bblock_list: + bblock['register'] = self + self.bblocks[bblock['itemIdentifier']] = bblock + self.imported_registers[metadata_url].append(bblock['itemIdentifier']) + return dependencies + + class BuildingBlockRegister: def __init__(self, @@ -189,12 +221,14 @@ def __init__(self, annotated_path: Path = Path(), fail_on_error: bool = False, prefix: str = 'ogc.', - find_dependencies=True): + find_dependencies=True, + imported_bblocks: ImportedBuildingBlocks | None = None): self.registered_items_path = registered_items_path self.annotated_path = annotated_path self.prefix = prefix self.bblocks: dict[str, BuildingBlock] = {} + self.imported_bblocks = imported_bblocks.bblocks if imported_bblocks else {} self.bblock_paths: dict[Path, BuildingBlock] = {} @@ -216,11 +250,19 @@ def __init__(self, traceback.print_exception(e, file=sys.stderr) print('=========', file=sys.stderr) + self.imported_bblock_schemas: dict[str, str] = {} if find_dependencies: dep_graph = nx.DiGraph() + for identifier, imported_bblock in self.imported_bblocks.items(): + dep_graph.add_node(identifier) + dep_graph.add_edges_from([(d, identifier) for d in imported_bblock.get('dependsOn', ())]) + imported_bblock.get('dependsOn', []) + for schema_url in imported_bblock.get('schema', {}).values(): + self.imported_bblock_schemas[schema_url] = identifier + for bblock in self.bblocks.values(): - found_deps = self.find_dependencies(bblock) + found_deps = self._resolve_bblock_deps(bblock) deps = bblock.metadata.get('dependsOn') if isinstance(deps, str): found_deps.add(deps) @@ -234,9 +276,12 @@ def __init__(self, if cycles: cycles_str = '\n - '.join(' -> '.join(reversed(c)) + ' -> ' + c[-1] for c in cycles) raise BuildingBlockError(f"Circular dependencies found: \n - {cycles_str}") - self.bblocks = {b: self.bblocks[b] for b in nx.topological_sort(dep_graph) if b in self.bblocks} + self.bblocks: dict[str, BuildingBlock] = {b: self.bblocks[b] + for b in nx.topological_sort(dep_graph) + if b in self.bblocks} + self.dep_graph = dep_graph - def find_dependencies(self, bblock: BuildingBlock) -> set[str]: + def _resolve_bblock_deps(self, bblock: BuildingBlock) -> set[str]: if not bblock.schema.is_file(): return set() bblock_schema = load_yaml(filename=bblock.schema) @@ -251,6 +296,8 @@ def walk_schema(schema): if ref.startswith('bblocks://'): # Get id directly from bblocks:// URI deps.add(ref[len('bblocks://'):]) + elif ref in self.imported_bblock_schemas: + deps.add(self.imported_bblock_schemas[ref]) else: ref_parent_path = bblock.files_path.joinpath(ref).resolve().parent ref_bblock = self.bblock_paths.get(ref_parent_path) @@ -275,15 +322,31 @@ def walk_schema(schema): return deps - -class ImportedBuildingBlockRegister: - - def __init__(self, metadata_url: str): - self.url = metadata_url - r = requests.get(metadata_url) - r.raise_for_status() - bblock_list = r.json() - self.bblocks = {b['itemIdentifier']: b for b in bblock_list} + @lru_cache + def find_dependencies(self, identifier: str) -> list[dict | BuildingBlock]: + if identifier in self.bblocks: + bblock = self.bblocks[identifier] + metadata = bblock.metadata + elif identifier in self.imported_bblocks: + bblock = None + metadata = self.imported_bblocks[identifier] + else: + return [] + + dependencies = [bblock or metadata] + for d in metadata.get('dependsOn', ()): + dependencies.extend(self.find_dependencies(d)) + + return dependencies + + def get_inherited_shacl_rules(self, identifier: str) -> set[str | Path]: + rules = set() + for dep in self.find_dependencies(identifier): + if isinstance(dep, BuildingBlock): + rules.update(dep.shacl_rules or ()) + else: + rules.update(dep.get('shaclRules', ())) + return rules @dataclasses.dataclass diff --git a/ogc/bblocks/validate.py b/ogc/bblocks/validate.py index 0361ef2..ce7c34d 100644 --- a/ogc/bblocks/validate.py +++ b/ogc/bblocks/validate.py @@ -18,7 +18,7 @@ from rdflib.term import Node, URIRef, BNode from yaml import MarkedYAMLError -from ogc.bblocks.util import BuildingBlock +from ogc.bblocks.util import BuildingBlock, BuildingBlockRegister import traceback import pyshacl import jsonref @@ -276,6 +276,7 @@ def validate_inner(): def validate_test_resources(bblock: BuildingBlock, registered_items_path: Path, + bblocks_register: BuildingBlockRegister, outputs_path: str | Path | None = None) -> tuple[bool, int]: result = True test_count = 0 @@ -287,18 +288,17 @@ def validate_test_resources(bblock: BuildingBlock, shacl_error = None shacl_files = [] - if bblock.shaclRules: - try: - for shacl_file in bblock.shaclRules: - if isinstance(shacl_file, Path) or (isinstance(shacl_file, str) and not is_url(shacl_file)): - # assume file - shacl_file = bblock.files_path / shacl_file - shacl_files.append(os.path.relpath(shacl_file, registered_items_path)) - else: - shacl_files.append(shacl_file) - shacl_graph.parse(shacl_file, format='turtle') - except Exception as e: - shacl_error = str(e) + try: + for shacl_file in bblocks_register.get_inherited_shacl_rules(bblock.identifier): + if isinstance(shacl_file, Path) or (isinstance(shacl_file, str) and not is_url(shacl_file)): + # assume file + shacl_file = bblock.files_path / shacl_file + shacl_files.append(os.path.relpath(shacl_file, registered_items_path)) + else: + shacl_files.append(shacl_file) + shacl_graph.parse(shacl_file, format='turtle') + except Exception as e: + shacl_error = str(e) json_error = None schema_validator = None