Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First attempt at using Open Annotation Data Model v2 #119

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions annotator/annotation.py
Original file line number Diff line number Diff line change
@@ -54,6 +54,7 @@ def save(self, *args, **kwargs):

super(Annotation, self).save(*args, **kwargs)


@classmethod
def search_raw(cls, query=None, params=None, raw_result=False,
user=None, authorization_enabled=None):
161 changes: 161 additions & 0 deletions annotator/oa_renderer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import logging
log = logging.getLogger(__name__)

try:
from collections import OrderedDict
except ImportError:
try:
from ordereddict import OrderedDict
except ImportError:
log.warn("No OrderedDict available, JSON-LD content will be unordered. "
"Use Python>=2.7 or install ordereddict module to fix.")
OrderedDict = dict


class OARenderer(object):
def __init__(self, jsonld_baserurl=None):
self.jsonld_baseurl = jsonld_baserurl

def render(self, annotation):
"""The JSON-LD formatted RDF representation of the annotation."""

context = [
"http://www.w3.org/ns/oa-context-20130208.json",
{'annotator': 'http://annotatorjs.org/ns/'}
]

if self.jsonld_baseurl is not None:
context.append({'@base': self.jsonld_baseurl})

# Extract textual_bodies and tags
textual_bodies = get_textual_bodies(annotation)
tags = get_tags(annotation)

# The JSON-LD spec recommends to put @context at the top of the
# document, so we'll be nice and use and ordered dictionary.
out = OrderedDict()
out['@context'] = context
out['@id'] = annotation['id']
out['@type'] = 'oa:Annotation'
out['hasBody'] = has_body(textual_bodies, tags)
out['hasTarget'] = has_target(annotation)
out['annotatedBy'] = annotated_by(annotation)
out['annotatedAt'] = annotated_at(annotation)
out['serializedBy'] = serialized_by()
out['serializedAt'] = serialized_at(annotation)
out['motivatedBy'] = motivated_by(textual_bodies, tags)
return out


def has_body(textual_bodies, tags):
"""Return all annotation bodies: the text comment and each tag"""
bodies = []
bodies += textual_bodies
bodies += tags
return bodies


def get_textual_bodies(annotation):
"""A list with a single text body or an empty list"""
if not annotation.get('text'):
# Note that we treat an empty text as not having text at all.
return []
body = {
'@type': ['dctypes:Text', 'cnt:ContentAsText'],
'dc:format': 'text/plain',
'cnt:chars': annotation['text'],
}
return [body]


def get_tags(annotation):
"""A list of oa:Tag items"""
if 'tags' not in annotation:
return []
return [
{
'@type': ['oa:Tag', 'cnt:ContentAsText'],
'dc:format': 'text/plain',
'cnt:chars': tag,
}
for tag in annotation['tags']
]


def motivated_by(textual_bodies, tags):
"""Motivations for the annotation.

Currently any combination of commenting and/or tagging.
"""
motivations = []
if textual_bodies:
motivations.append('oa:commenting')
if tags:
motivations.append('oa:tagging')
return motivations


def has_target(annotation):
"""The targets of the annotation.

Returns a selector for each range of the page content that was
selected, or if a range is absent the url of the page itself.
"""
targets = []
if 'uri' not in annotation:
return targets
if annotation.get('ranges'):
# Build the selector for each quote
for rangeSelector in annotation['ranges']:
selector = {
'@type': 'annotator:TextRangeSelector',
'annotator:startContainer': rangeSelector['start'],
'annotator:endContainer': rangeSelector['end'],
'annotator:startOffset': rangeSelector['startOffset'],
'annotator:endOffset': rangeSelector['endOffset'],
}
target = {
'@type': 'oa:SpecificResource',
'hasSource': annotation['uri'],
'hasSelector': selector,
}
targets.append(target)
else:
# The annotation targets the page as a whole
targets.append(annotation['uri'])
return targets


def annotated_by(annotation):
"""The user that created the annotation."""
if not annotation.get('user'):
return {}
return {
'@type': 'foaf:Agent', # It could be either a person or a bot
'foaf:name': annotation['user'],
}


def annotated_at(annotation):
"""The annotation's creation date"""
if annotation.get('created'):
return annotation['created']


def serialized_by():
"""The software used for serializing."""
return {
'@id': 'annotator:annotator-store',
'@type': 'prov:Software-agent',
'foaf:name': 'annotator-store',
'foaf:homepage': {'@id': 'http://annotatorjs.org'},
} # todo: add version number


def serialized_at(annotation):
"""The last time the serialization changed."""
# Following the spec[1], we do not use the current time, but the last
# time the annotation graph has been updated.
# [1]: https://hypothes.is/a/R6uHQyVTQYqBc4-1V9X56Q
if annotation.get('updated'):
return annotation['updated']
238 changes: 238 additions & 0 deletions tests/test_oa_renderer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
import copy
from nose.tools import *

from . import TestCase
from annotator.oa_renderer import OARenderer

annotation = {
'created': '2015-03-07T09:48:34.891753+00:00',
'id': 'test-annotation-id-1',
'ranges': [{
'type': 'RangeSelector',
'startOffset': 0,
'endOffset': 30,
'end': '/div[1]/div[5]/div[1]/div[5]/div[1]/div[2]',
'start': '/div[1]/div[5]/div[1]/div[5]/div[1]/div[1]'
}],
'text': 'From childhood\'s hour I have not been'
'As others were-I have not seen',
'tags': ['Edgar Allan Poe', 'Alone', 'Poem'],
'updated': '2015-03-07T09:49:34.891769+00:00',
'uri': 'http://www.poetryfoundation.org/poem/175776',
'user': 'nameless.raven'
}

oa_rendered_annotation = {
'@context': [
"http://www.w3.org/ns/oa-context-20130208.json",
{'annotator': 'http://annotatorjs.org/ns/'}
],
'@id': annotation['id'],
'@type': 'oa:Annotation',
'hasBody': [
{
'@type': ['dctypes:Text', 'cnt:ContentAsText'],
'dc:format': 'text/plain',
'cnt:chars': annotation['text']
},
{
'@type': ['oa:Tag', 'cnt:ContentAsText'],
'dc:format': 'text/plain',
'cnt:chars': annotation['tags'][0]
},
{
'@type': ['oa:Tag', 'cnt:ContentAsText'],
'dc:format': 'text/plain',
'cnt:chars': annotation['tags'][1]
},
{
'@type': ['oa:Tag', 'cnt:ContentAsText'],
'dc:format': 'text/plain',
'cnt:chars': annotation['tags'][2]
}
],
'hasTarget': [
{
'@type': 'oa:SpecificResource',
'hasSource': annotation['uri'],
'hasSelector': {
'@type': 'annotator:TextRangeSelector',
'annotator:startContainer': annotation['ranges'][0]['start'],
'annotator:endContainer': annotation['ranges'][0]['end'],
'annotator:startOffset': annotation['ranges'][0]['startOffset'],
'annotator:endOffset': annotation['ranges'][0]['endOffset']
}
}
],
'annotatedBy': {
'@type': 'foaf:Agent',
'foaf:name': annotation['user']
},
'annotatedAt': annotation['created'],
'serializedBy': {
'@id': 'annotator:annotator-store',
'@type': 'prov:Software-agent',
'foaf:name': 'annotator-store',
'foaf:homepage': {'@id': 'http://annotatorjs.org'},
},
'serializedAt': annotation['updated'],
'motivatedBy': ['oa:commenting', 'oa:tagging']
}


class TestOARenderer(TestCase):
def setup(self):
super(TestOARenderer, self).setup()
self.renderer = OARenderer()

def teardown(self):
super(TestOARenderer, self).teardown()

def test_context_without_jsonld_baseurl(self):
rendered = self.renderer.render(annotation)

assert '@context' in rendered
context = rendered['@context']
exp_context = oa_rendered_annotation['@context']
assert len(context) is 2
assert context[0] == exp_context[0]
assert context[1] == exp_context[1]

def test_context_with_jsonld_baseurl(self):
jsonld_baseurl = 'http://jsonld_baseurl.com'
renderer = OARenderer(jsonld_baseurl)
rendered = renderer.render(annotation)

assert '@context' in rendered
context = rendered['@context']
assert len(context) is 3
assert '@base' in context[2]
assert context[2]['@base'] == jsonld_baseurl

def test_id(self):
rendered = self.renderer.render(annotation)
assert '@id' in rendered
assert rendered['@id'] == oa_rendered_annotation['@id']

def test_type(self):
rendered = self.renderer.render(annotation)
assert '@type' in rendered
assert rendered['@type'] == oa_rendered_annotation['@type']

def test_has_body(self):
rendered = self.renderer.render(annotation)

assert 'hasBody' in rendered
hasBody = rendered['hasBody']
assert len(hasBody) is 4

assert hasBody[0] == oa_rendered_annotation['hasBody'][0]
assert hasBody[1] == oa_rendered_annotation['hasBody'][1]
assert hasBody[2] == oa_rendered_annotation['hasBody'][2]
assert hasBody[3] == oa_rendered_annotation['hasBody'][3]

assert 'motivatedBy' in rendered
assert len(rendered['motivatedBy']) is 2
assert rendered['motivatedBy'][0] == 'oa:commenting'
assert rendered['motivatedBy'][1] == 'oa:tagging'

def test_has_body_without_tags(self):
copied_annotation = copy.deepcopy(annotation)
del copied_annotation['tags']
rendered = self.renderer.render(copied_annotation)

assert 'hasBody' in rendered
hasBody = rendered['hasBody']
assert len(hasBody) is 1
assert hasBody[0] == oa_rendered_annotation['hasBody'][0]

assert 'motivatedBy' in rendered
assert len(rendered['motivatedBy']) is 1
assert rendered['motivatedBy'][0] == 'oa:commenting'

def test_has_body_without_text(self):
copied_annotation = copy.deepcopy(annotation)
del copied_annotation['text']
rendered = self.renderer.render(copied_annotation)

assert 'hasBody' in rendered
hasBody = rendered['hasBody']
assert len(hasBody) is 3
assert hasBody[0] == oa_rendered_annotation['hasBody'][1]
assert hasBody[1] == oa_rendered_annotation['hasBody'][2]
assert hasBody[2] == oa_rendered_annotation['hasBody'][3]

assert 'motivatedBy' in rendered
assert len(rendered['motivatedBy']) is 1
assert rendered['motivatedBy'][0] == 'oa:tagging'

def test_has_body_empty(self):
copied_annotation = copy.deepcopy(annotation)
del copied_annotation['text']
del copied_annotation['tags']
rendered = self.renderer.render(copied_annotation)

assert 'hasBody' in rendered
hasBody = rendered['hasBody']
assert len(hasBody) is 0

assert 'motivatedBy' in rendered
assert len(rendered['motivatedBy']) is 0

def test_has_target(self):
rendered = self.renderer.render(annotation)

assert 'hasTarget' in rendered
hasTarget = rendered['hasTarget']
assert len(hasTarget) is 1
assert hasTarget[0] == oa_rendered_annotation['hasTarget'][0]

assert 'hasSelector' in hasTarget[0]
hasSelector = hasTarget[0]['hasSelector']
oa_selector = oa_rendered_annotation['hasTarget'][0]['hasSelector']
assert hasSelector == oa_selector

def test_has_target_without_ranges(self):
copied_annotation = copy.deepcopy(annotation)
del copied_annotation['ranges']
rendered = self.renderer.render(copied_annotation)

assert 'hasTarget' in rendered
hasTarget = rendered['hasTarget']
assert len(hasTarget) is 1
assert hasTarget[0] == annotation['uri']

def test_has_target_without_uri(self):
copied_annotation = copy.deepcopy(annotation)
del copied_annotation['uri']
rendered = self.renderer.render(copied_annotation)

assert 'hasTarget' in rendered
hasTarget = rendered['hasTarget']
assert len(hasTarget) is 0

def test_annotated_by(self):
rendered = self.renderer.render(annotation)

assert 'annotatedBy' in rendered
assert rendered['annotatedBy'] == oa_rendered_annotation['annotatedBy']

def test_annotated_by_without_user(self):
copied_annotation = copy.deepcopy(annotation)
del copied_annotation['user']
rendered = self.renderer.render(copied_annotation)

assert 'annotatedBy' in rendered
assert rendered['annotatedBy'] == {}

def test_annotated_at(self):
rendered = self.renderer.render(annotation)

assert 'annotatedAt' in rendered
assert rendered['annotatedAt'] == oa_rendered_annotation['annotatedAt']

def test_serialized_at(self):
rendered = self.renderer.render(annotation)

assert 'serializedAt' in rendered
assert rendered['serializedAt'] == oa_rendered_annotation['serializedAt']