Skip to content

Commit

Permalink
Add EPM/JSON-LD writers and SHACL I/O functions (#81)
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt authored Oct 30, 2023
1 parent 4a6f882 commit 14b3d72
Show file tree
Hide file tree
Showing 5 changed files with 231 additions and 1 deletion.
43 changes: 43 additions & 0 deletions docs/source/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,49 @@ This works with both :class:`pathlib.Path` and vanilla strings.
urlretrieve(url, path)
converter = curies.load_jsonld_context(path)
Loading SHACL
~~~~~~~~~~~~~
The `shapes constraint language (SHACL) <https://bioregistry.io/sh>`_ can be used to represent
prefix maps directly in RDF using the `sh:prefix` and `sh:namespace` predicates. Therefore, the
simple ChEBI example from before can be represented using

.. code-block:: turtle
@prefix sh: <http://www.w3.org/ns/shacl#> .
[
sh:declare
[
sh:prefix "CHEBI" ;
sh:namespace "http://purl.obolibrary.org/obo/CHEBI_" .
] .
]
A SHACL context can be loaded from a remote file via HTTP, HTTPS, or FTP with

.. code-block:: python
import curies
url = "https://raw.githubusercontent.com/biopragmatics/bioregistry/main/exports/contexts/semweb.context.ttl"
converter = curies.load_shacl(url)
A SHACL context stored in a local file can be loaded with the following.
This works with both :class:`pathlib.Path` and vanilla strings.

.. code-block:: python
from pathlib import Path
from urllib.request import urlretrieve
import curies
url = "https://raw.githubusercontent.com/biopragmatics/bioregistry/main/exports/contexts/semweb.context.ttl"
path = Path.home().joinpath("Downloads", "semweb.context.ttl")
urlretrieve(url, path)
converter = curies.load_shacl(path)
Modifying a Context
-------------------
Incremental Converters
Expand Down
8 changes: 8 additions & 0 deletions src/curies/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
load_extended_prefix_map,
load_jsonld_context,
load_prefix_map,
load_shacl,
write_extended_prefix_map,
write_jsonld_context,
write_shacl,
)
from .reconciliation import remap_curie_prefixes, remap_uri_prefixes, rewire
from .sources import (
Expand Down Expand Up @@ -42,6 +46,10 @@
"load_prefix_map",
"load_extended_prefix_map",
"load_jsonld_context",
"load_shacl",
"write_extended_prefix_map",
"write_jsonld_context",
"write_shacl",
# sources
"get_obo_converter",
"get_prefixcommons_converter",
Expand Down
117 changes: 117 additions & 0 deletions src/curies/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from collections import defaultdict
from functools import partial
from pathlib import Path
from textwrap import dedent
from typing import (
TYPE_CHECKING,
Any,
Expand Down Expand Up @@ -50,6 +51,11 @@
"load_extended_prefix_map",
"load_prefix_map",
"load_jsonld_context",
"load_shacl",
# Writers
"write_extended_prefix_map",
"write_jsonld_context",
"write_shacl",
]

X = TypeVar("X")
Expand Down Expand Up @@ -873,6 +879,39 @@ def from_rdflib(
prefix_map = {prefix: str(namespace) for prefix, namespace in graph_or_manager.namespaces()}
return cls.from_prefix_map(prefix_map, **kwargs)

@classmethod
def from_shacl(
cls,
graph: Union[str, Path, "rdflib.Graph"],
format: Optional[str] = None,
**kwargs: Any,
) -> "Converter":
"""Get a converter from SHACL, either in a turtle f.
:param graph: A RDFLib graph, a Path, a string representing a file path, or a string URL
:param format: The RDF format, if a file path is given
:param kwargs: Keyword arguments to pass to :meth:`from_prefix_map`
:return: A converter
"""
if isinstance(graph, (str, Path)):
import rdflib

temporary_graph = rdflib.Graph()
temporary_graph.parse(location=graph, format=format)
graph = temporary_graph

query = """\
SELECT ?curie_prefix ?uri_prefix
WHERE {
?bnode1 sh:declare ?bnode2 .
?bnode2 sh:prefix ?curie_prefix .
?bnode2 sh:namespace ?uri_prefix .
}
"""
results = graph.query(query)
prefix_map = {str(k): str(v) for k, v in results}
return cls.from_prefix_map(prefix_map, **kwargs)

def get_prefixes(self) -> Set[str]:
"""Get the set of prefixes covered by this converter."""
return {record.prefix for record in self.records}
Expand Down Expand Up @@ -1747,3 +1786,81 @@ def load_jsonld_context(data: LocationOr[Dict[str, Any]], **kwargs: Any) -> Conv
>>> "rdf" in converter.prefix_map
"""
return Converter.from_jsonld(data, **kwargs)


def load_shacl(data: LocationOr["rdflib.Graph"], **kwargs: Any) -> Converter:
"""Get a converter from a JSON-LD object, which contains a prefix map in its ``@context`` key.
:param data:
A path to an RDF file or a RDFlib graph
:param kwargs: Keyword arguments to pass to :meth:`curies.Converter.__init__`
:return:
A converter
"""
return Converter.from_shacl(data, **kwargs)


def write_extended_prefix_map(converter: Converter, path: Union[str, Path]) -> None:
"""Write an extended prefix map as JSON to a file."""
path = _ensure_path(path)
path.write_text(
json.dumps(
[_record_to_dict(record) for record in converter.records],
indent=4,
sort_keys=True,
ensure_ascii=False,
)
)


def _record_to_dict(record: Record) -> Mapping[str, Union[str, List[str]]]:
"""Convert a record to a dict."""
rv: Dict[str, Union[str, List[str]]] = {
"prefix": record.prefix,
"uri_prefix": record.uri_prefix,
}
if record.prefix_synonyms:
rv["prefix_synonyms"] = sorted(record.prefix_synonyms)
if record.uri_prefix_synonyms:
rv["uri_prefix_synonyms"] = sorted(record.uri_prefix_synonyms)
return rv


def _ensure_path(path: Union[str, Path]) -> Path:
if isinstance(path, str):
path = Path(path).resolve()
return path


def write_jsonld_context(converter: Converter, path: Union[str, Path]) -> None:
"""Write the converter's bijective map as a JSON-LD context to a file."""
path = _ensure_path(path)
with path.open("w") as file:
json.dump(
fp=file,
indent=4,
sort_keys=True,
obj={
"@context": converter.bimap,
},
)


def write_shacl(converter: Converter, path: Union[str, Path]) -> None:
"""Write the converter's bijective map as SHACL in turtle RDF to a file."""
text = dedent(
"""\
@prefix sh: <http://www.w3.org/ns/shacl#> .
[
sh:declare
{entries}
] .
"""
)
path = _ensure_path(path)
entries = ",\n".join(
f' [ sh:prefix "{prefix}" ; sh:namespace "{uri_prefix}" ]'
for prefix, uri_prefix in sorted(converter.bimap.items())
)
path.write_text(text.format(entries=entries))
3 changes: 2 additions & 1 deletion tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pandas as pd
import rdflib

import curies
from curies.api import (
CompressionError,
Converter,
Expand Down Expand Up @@ -227,7 +228,7 @@ def test_invalid_record(self):
def test_invalid_records(self):
"""Test throwing an error for duplicated URI prefixes."""
with self.assertRaises(DuplicateURIPrefixes) as e:
Converter.from_prefix_map(
curies.load_prefix_map(
{
"CHEBI": "http://purl.obolibrary.org/obo/CHEBI_",
"nope": "http://purl.obolibrary.org/obo/CHEBI_",
Expand Down
61 changes: 61 additions & 0 deletions tests/test_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""Test writing I/O."""

import unittest
from pathlib import Path
from tempfile import TemporaryDirectory

import curies
from curies import Converter

CHEBI_URI_PREFIX = "http://purl.obolibrary.org/obo/CHEBI_"


class TestIO(unittest.TestCase):
"""Test I/O."""

def setUp(self) -> None:
"""Set up the test case."""
self.prefix = "CHEBI"
self.uri_prefix = CHEBI_URI_PREFIX
self.prefix_synonym = "p"
self.uri_prefix_synonym = "u"
self.converter = Converter.from_extended_prefix_map(
[
{
"prefix": self.prefix,
"prefix_synonyms": [self.prefix_synonym],
"uri_prefix": self.uri_prefix,
"uri_prefix_synonyms": [self.uri_prefix_synonym],
},
]
)

def test_write_epm(self):
"""Test writing and reading an extended prefix map."""
with TemporaryDirectory() as d:
path = Path(d).joinpath("test.json")
curies.write_extended_prefix_map(self.converter, path)
nc = curies.load_extended_prefix_map(path)
self.assertEqual(self.converter.records, nc.records)

def test_write_jsonld_with_bimap(self):
"""Test writing and reading a prefix map via JSON-LD."""
with TemporaryDirectory() as d:
path = Path(d).joinpath("test.json")
curies.write_jsonld_context(self.converter, path.as_posix())
nc = curies.load_jsonld_context(path)
self.assertEqual({self.prefix: self.uri_prefix}, nc.prefix_map)
self.assertEqual(
{self.uri_prefix: self.prefix},
nc.reverse_prefix_map,
msg="the prefix synonym should not survive round trip",
)
self.assertEqual({self.prefix: self.uri_prefix}, nc.bimap)

def test_shacl(self):
"""Test round-tripping SHACL."""
with TemporaryDirectory() as d:
path = Path(d).joinpath("test.ttl")
curies.write_shacl(self.converter, path)
nc = curies.load_shacl(path)
self.assertEqual(self.converter.bimap, nc.bimap)

0 comments on commit 14b3d72

Please sign in to comment.