Skip to content

Commit

Permalink
Cache remote files to avoid CORS errors
Browse files Browse the repository at this point in the history
  • Loading branch information
avillar committed Jul 2, 2024
1 parent 0bb2535 commit 5b312a0
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 7 deletions.
12 changes: 7 additions & 5 deletions ogc/bblocks/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ def __init__(self, identifier: str, metadata_file: Path,
('ontology.ttl', 'ontology.owl'))
self.output_ontology = self.annotated_path / 'ontology.ttl'

self.remote_cache_dir = self.annotated_path / 'remote_cache'

def _find_path_or_url(self, metadata_property: str, default_filenames: tuple[str, ...]):
ref = self.metadata.get(metadata_property)
if ref:
Expand Down Expand Up @@ -157,7 +159,7 @@ def schema_contents(self):
if 'schema_contents' not in self._lazy_properties:
if not self.schema.exists:
return None
self._lazy_properties['schema_contents'] = load_file(self.schema.value)
self._lazy_properties['schema_contents'] = load_file(self.schema.value, self.remote_cache_dir)
return self._lazy_properties['schema_contents']

@property
Expand All @@ -174,7 +176,7 @@ def annotated_schema_contents(self):
if 'annotated_schema_contents' not in self._lazy_properties:
if not self.annotated_schema.is_file():
return None
self._lazy_properties['annotated_schema_contents'] = load_file(self.annotated_schema)
self._lazy_properties['annotated_schema_contents'] = load_file(self.annotated_schema, self.remote_cache_dir)
return self._lazy_properties['annotated_schema_contents']

@property
Expand All @@ -184,15 +186,15 @@ def jsonld_context_contents(self):
if 'jsonld_context_contents' not in self._lazy_properties:
if not self.jsonld_context.is_file():
return None
self._lazy_properties['jsonld_context_contents'] = load_file(self.jsonld_context)
self._lazy_properties['jsonld_context_contents'] = load_file(self.jsonld_context, self.remote_cache_dir)
return self._lazy_properties['jsonld_context_contents']

@property
def ontology_graph(self) -> Graph | None:
if 'ontology_graph' not in self._lazy_properties:
if not self.ontology.exists:
return None
self._lazy_properties['ontology_graph'] = Graph().parse(self.ontology.value)
self._lazy_properties['ontology_graph'] = Graph().parse(self.ontology.value, self.remote_cache_dir)
return self._lazy_properties['ontology_graph']

@property
Expand All @@ -202,7 +204,7 @@ def output_openapi_contents(self):
if 'output_openapi_contents' not in self._lazy_properties:
if not self.output_openapi.is_file():
return None
self._lazy_properties['output_openapi_contents'] = load_file(self.output_openapi)
self._lazy_properties['output_openapi_contents'] = load_file(self.output_openapi, self.remote_cache_dir)
return self._lazy_properties['output_openapi_contents']

def get_extra_test_resources(self) -> Generator[dict, None, None]:
Expand Down
16 changes: 15 additions & 1 deletion ogc/bblocks/postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,17 +227,24 @@ def do_postprocess(bblock: BuildingBlock, light: bool = False) -> bool:
super_bblocks[building_block.files_path] = building_block
continue

if (filter_id is None or building_block.identifier == filter_id):
if filter_id is None or building_block.identifier == filter_id:
if not steps or 'annotate' in steps:

if building_block.schema.exists:

if building_block.schema.is_url:
# Force caching remote file
building_block.schema_contents

# Annotate schema
print(f"Annotating schema for {building_block.identifier}", file=sys.stderr)

if building_block.ldContext:
if is_url(building_block.ldContext):
# Use URL directly
default_jsonld_context = building_block.ldContext
# Force caching remote file
building_block.jsonld_context_contents
else:
# Use path relative to bblock.json
default_jsonld_context = building_block.files_path / building_block.ldContext
Expand Down Expand Up @@ -296,8 +303,15 @@ def do_postprocess(bblock: BuildingBlock, light: bool = False) -> bool:
print("Exception when processing ontology for", building_block.identifier, file=sys.stderr)
traceback.print_exception(e, file=sys.stderr)
elif building_block.ontology.is_url:
# Force cache
building_block.ontology_graph
building_block.metadata['ontology'] = building_block.ontology.value

if base_url and building_block.remote_cache_dir.is_dir():
building_block.metadata['remoteCacheDir'] = (
base_url + os.path.relpath(building_block.remote_cache_dir.resolve(), cwd) + '/'
)

child_bblocks.append(building_block)

if not steps or 'jsonld' in steps:
Expand Down
15 changes: 14 additions & 1 deletion ogc/bblocks/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import json
import os.path
import re
import sys
from collections import deque
from hashlib import sha256
from pathlib import Path
from typing import Any, Sequence, Callable
from urllib.parse import urljoin, urlparse, urlunparse
Expand Down Expand Up @@ -42,12 +44,23 @@ def load_file_cached(fn):
return load_file(fn)


def load_file(fn):
def load_file(fn, remote_cache_dir: Path | None = None):
if isinstance(fn, PathOrUrl):
fn = fn.value
if isinstance(fn, str) and is_url(fn):
r = requests.get(fn)
r.raise_for_status()

if remote_cache_dir:
url_hash = sha256(fn.encode('utf-8')).hexdigest()
try:
remote_cache_dir.mkdir(exist_ok=True, parents=True)
with open(remote_cache_dir / url_hash, 'wb') as f:
f.write(r.content)
except Exception as e:
print(f"Warning: could not store cached version of remote file in {remote_cache_dir / url_hash}: {e}",
file=sys.stderr)

return r.text
with open(fn) as f:
return f.read()
Expand Down

0 comments on commit 5b312a0

Please sign in to comment.