diff --git a/ogc/bblocks/models.py b/ogc/bblocks/models.py index 6b27564..9ee8247 100644 --- a/ogc/bblocks/models.py +++ b/ogc/bblocks/models.py @@ -102,6 +102,8 @@ def __init__(self, identifier: str, metadata_file: Path, ('ontology.ttl', 'ontology.owl')) self.output_ontology = self.annotated_path / 'ontology.ttl' + self.remote_cache_dir = self.annotated_path.parent / 'remote_cache' + def _find_path_or_url(self, metadata_property: str, default_filenames: tuple[str, ...]): ref = self.metadata.get(metadata_property) if ref: @@ -157,7 +159,7 @@ def schema_contents(self): if 'schema_contents' not in self._lazy_properties: if not self.schema.exists: return None - self._lazy_properties['schema_contents'] = load_file(self.schema.value) + self._lazy_properties['schema_contents'] = load_file(self.schema.value, self.remote_cache_dir) return self._lazy_properties['schema_contents'] @property @@ -174,7 +176,7 @@ def annotated_schema_contents(self): if 'annotated_schema_contents' not in self._lazy_properties: if not self.annotated_schema.is_file(): return None - self._lazy_properties['annotated_schema_contents'] = load_file(self.annotated_schema) + self._lazy_properties['annotated_schema_contents'] = load_file(self.annotated_schema, self.remote_cache_dir) return self._lazy_properties['annotated_schema_contents'] @property @@ -184,7 +186,7 @@ def jsonld_context_contents(self): if 'jsonld_context_contents' not in self._lazy_properties: if not self.jsonld_context.is_file(): return None - self._lazy_properties['jsonld_context_contents'] = load_file(self.jsonld_context) + self._lazy_properties['jsonld_context_contents'] = load_file(self.jsonld_context, self.remote_cache_dir) return self._lazy_properties['jsonld_context_contents'] @property @@ -192,7 +194,7 @@ def ontology_graph(self) -> Graph | None: if 'ontology_graph' not in self._lazy_properties: if not self.ontology.exists: return None - self._lazy_properties['ontology_graph'] = Graph().parse(self.ontology.value) + self._lazy_properties['ontology_graph'] = Graph().parse(self.ontology.value, self.remote_cache_dir) return self._lazy_properties['ontology_graph'] @property @@ -202,7 +204,7 @@ def output_openapi_contents(self): if 'output_openapi_contents' not in self._lazy_properties: if not self.output_openapi.is_file(): return None - self._lazy_properties['output_openapi_contents'] = load_file(self.output_openapi) + self._lazy_properties['output_openapi_contents'] = load_file(self.output_openapi, self.remote_cache_dir) return self._lazy_properties['output_openapi_contents'] def get_extra_test_resources(self) -> Generator[dict, None, None]: diff --git a/ogc/bblocks/postprocess.py b/ogc/bblocks/postprocess.py index 859e4e0..b7a994a 100644 --- a/ogc/bblocks/postprocess.py +++ b/ogc/bblocks/postprocess.py @@ -227,10 +227,15 @@ def do_postprocess(bblock: BuildingBlock, light: bool = False) -> bool: super_bblocks[building_block.files_path] = building_block continue - if (filter_id is None or building_block.identifier == filter_id): + if filter_id is None or building_block.identifier == filter_id: if not steps or 'annotate' in steps: if building_block.schema.exists: + + if building_block.schema.is_url: + # Force caching remote file + building_block.schema_contents + # Annotate schema print(f"Annotating schema for {building_block.identifier}", file=sys.stderr) @@ -238,6 +243,8 @@ def do_postprocess(bblock: BuildingBlock, light: bool = False) -> bool: if is_url(building_block.ldContext): # Use URL directly default_jsonld_context = building_block.ldContext + # Force caching remote file + building_block.jsonld_context_contents else: # Use path relative to bblock.json default_jsonld_context = building_block.files_path / building_block.ldContext @@ -296,6 +303,8 @@ def do_postprocess(bblock: BuildingBlock, light: bool = False) -> bool: print("Exception when processing ontology for", building_block.identifier, file=sys.stderr) traceback.print_exception(e, file=sys.stderr) elif building_block.ontology.is_url: + # Force cache + building_block.ontology_graph building_block.metadata['ontology'] = building_block.ontology.value child_bblocks.append(building_block) diff --git a/ogc/bblocks/util.py b/ogc/bblocks/util.py index b931b05..5eef566 100644 --- a/ogc/bblocks/util.py +++ b/ogc/bblocks/util.py @@ -5,7 +5,9 @@ import json import os.path import re +import sys from collections import deque +from hashlib import sha256 from pathlib import Path from typing import Any, Sequence, Callable from urllib.parse import urljoin, urlparse, urlunparse @@ -42,12 +44,24 @@ def load_file_cached(fn): return load_file(fn) -def load_file(fn): +def load_file(fn, remote_cache_dir: Path | None = None): + print("LOADING", fn, remote_cache_dir) if isinstance(fn, PathOrUrl): fn = fn.value if isinstance(fn, str) and is_url(fn): r = requests.get(fn) r.raise_for_status() + + if remote_cache_dir: + url_hash = sha256(fn.encode('utf-8')).hexdigest() + try: + remote_cache_dir.mkdir(exist_ok=True, parents=True) + with open(remote_cache_dir / url_hash, 'wb') as f: + f.write(r.content) + except Exception as e: + print(f"Warning: could not store cached version of remote file in {remote_cache_dir / url_hash}: {e}", + file=sys.stderr) + return r.text with open(fn) as f: return f.read()