Skip to content

Commit

Permalink
Cache remote files to avoid CORS errors
Browse files Browse the repository at this point in the history
  • Loading branch information
avillar committed Jul 2, 2024
1 parent 0bb2535 commit e3b5660
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 7 deletions.
12 changes: 7 additions & 5 deletions ogc/bblocks/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ def __init__(self, identifier: str, metadata_file: Path,
('ontology.ttl', 'ontology.owl'))
self.output_ontology = self.annotated_path / 'ontology.ttl'

self.remote_cache_dir = self.annotated_path.parent / 'remote_cache'

def _find_path_or_url(self, metadata_property: str, default_filenames: tuple[str, ...]):
ref = self.metadata.get(metadata_property)
if ref:
Expand Down Expand Up @@ -157,7 +159,7 @@ def schema_contents(self):
if 'schema_contents' not in self._lazy_properties:
if not self.schema.exists:
return None
self._lazy_properties['schema_contents'] = load_file(self.schema.value)
self._lazy_properties['schema_contents'] = load_file(self.schema.value, self.remote_cache_dir)
return self._lazy_properties['schema_contents']

@property
Expand All @@ -174,7 +176,7 @@ def annotated_schema_contents(self):
if 'annotated_schema_contents' not in self._lazy_properties:
if not self.annotated_schema.is_file():
return None
self._lazy_properties['annotated_schema_contents'] = load_file(self.annotated_schema)
self._lazy_properties['annotated_schema_contents'] = load_file(self.annotated_schema, self.remote_cache_dir)
return self._lazy_properties['annotated_schema_contents']

@property
Expand All @@ -184,15 +186,15 @@ def jsonld_context_contents(self):
if 'jsonld_context_contents' not in self._lazy_properties:
if not self.jsonld_context.is_file():
return None
self._lazy_properties['jsonld_context_contents'] = load_file(self.jsonld_context)
self._lazy_properties['jsonld_context_contents'] = load_file(self.jsonld_context, self.remote_cache_dir)
return self._lazy_properties['jsonld_context_contents']

@property
def ontology_graph(self) -> Graph | None:
if 'ontology_graph' not in self._lazy_properties:
if not self.ontology.exists:
return None
self._lazy_properties['ontology_graph'] = Graph().parse(self.ontology.value)
self._lazy_properties['ontology_graph'] = Graph().parse(self.ontology.value, self.remote_cache_dir)
return self._lazy_properties['ontology_graph']

@property
Expand All @@ -202,7 +204,7 @@ def output_openapi_contents(self):
if 'output_openapi_contents' not in self._lazy_properties:
if not self.output_openapi.is_file():
return None
self._lazy_properties['output_openapi_contents'] = load_file(self.output_openapi)
self._lazy_properties['output_openapi_contents'] = load_file(self.output_openapi, self.remote_cache_dir)
return self._lazy_properties['output_openapi_contents']

def get_extra_test_resources(self) -> Generator[dict, None, None]:
Expand Down
11 changes: 10 additions & 1 deletion ogc/bblocks/postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,17 +227,24 @@ def do_postprocess(bblock: BuildingBlock, light: bool = False) -> bool:
super_bblocks[building_block.files_path] = building_block
continue

if (filter_id is None or building_block.identifier == filter_id):
if filter_id is None or building_block.identifier == filter_id:
if not steps or 'annotate' in steps:

if building_block.schema.exists:

if building_block.schema.is_url:
# Force caching remote file
building_block.schema_contents

# Annotate schema
print(f"Annotating schema for {building_block.identifier}", file=sys.stderr)

if building_block.ldContext:
if is_url(building_block.ldContext):
# Use URL directly
default_jsonld_context = building_block.ldContext
# Force caching remote file
building_block.jsonld_context_contents
else:
# Use path relative to bblock.json
default_jsonld_context = building_block.files_path / building_block.ldContext
Expand Down Expand Up @@ -296,6 +303,8 @@ def do_postprocess(bblock: BuildingBlock, light: bool = False) -> bool:
print("Exception when processing ontology for", building_block.identifier, file=sys.stderr)
traceback.print_exception(e, file=sys.stderr)
elif building_block.ontology.is_url:
# Force cache
building_block.ontology_graph
building_block.metadata['ontology'] = building_block.ontology.value

child_bblocks.append(building_block)
Expand Down
16 changes: 15 additions & 1 deletion ogc/bblocks/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import json
import os.path
import re
import sys
from collections import deque
from hashlib import sha256
from pathlib import Path
from typing import Any, Sequence, Callable
from urllib.parse import urljoin, urlparse, urlunparse
Expand Down Expand Up @@ -42,12 +44,24 @@ def load_file_cached(fn):
return load_file(fn)


def load_file(fn):
def load_file(fn, remote_cache_dir: Path | None = None):
print("LOADING", fn, remote_cache_dir)
if isinstance(fn, PathOrUrl):
fn = fn.value
if isinstance(fn, str) and is_url(fn):
r = requests.get(fn)
r.raise_for_status()

if remote_cache_dir:
url_hash = sha256(fn.encode('utf-8')).hexdigest()
try:
remote_cache_dir.mkdir(exist_ok=True, parents=True)
with open(remote_cache_dir / url_hash, 'wb') as f:
f.write(r.content)
except Exception as e:
print(f"Warning: could not store cached version of remote file in {remote_cache_dir / url_hash}: {e}",
file=sys.stderr)

return r.text
with open(fn) as f:
return f.read()
Expand Down

0 comments on commit e3b5660

Please sign in to comment.