openannotation · Treora · Jul 22, 2014 · Jul 25, 2014 · Jul 26, 2014 · Jul 29, 2014
diff --git a/annotator/annotation.py b/annotator/annotation.py
@@ -64,6 +64,7 @@ def save(self, *args, **kwargs):
 
         super(Annotation, self).save(*args, **kwargs)
 
+
     @classmethod
     def search_raw(cls, query=None, params=None, user=None,
                    authorization_enabled=None, **kwargs):

diff --git a/annotator/openannotation.py b/annotator/openannotation.py
@@ -0,0 +1,166 @@
+import logging
+log = logging.getLogger(__name__)
+
+# Import OrderedDict if available
+try:
+    from collections import OrderedDict
+except ImportError:
+    try:
+        from ordereddict import OrderedDict
+    except ImportError:
+        log.warn("No OrderedDict available, JSON-LD content will be unordered. "
+                 "Use Python>=2.7 or install ordereddict module to fix.")
+        OrderedDict = dict
+
+from annotator.annotation import Annotation
+
+class OAAnnotation(Annotation):
+    """A helper class to represent an annotation according to the Open
+       Annotation Data Model: http://www.openannotation.org/spec/core/core.html
+
+       Currently it only generates JSON-LD.
+    """
+
+    # The ID of the annotation will be relative to the base URL, if it is set.
+    jsonld_baseurl = None
+
+    @property
+    def jsonld(self):
+        """The JSON-LD formatted RDF representation of the annotation."""
+
+        context = [
+            "http://www.w3.org/ns/oa-context-20130208.json",
+            {'annotator': 'http://annotatorjs.org/ns/'}
+        ]
+
+        if self.jsonld_baseurl is not None:
+            context.append({'@base': self.jsonld_baseurl})
+
+        # The JSON-LD spec recommends to put @context at the top of the
+        # document, so we'll be nice and use and ordered dictionary.
+        annotation = OrderedDict()
+        annotation['@context'] = context
+        annotation['@id'] = self['id']
+        annotation['@type'] = 'oa:Annotation'
+        annotation['hasBody'] = self.has_body
+        annotation['hasTarget'] = self.has_target
+        annotation['annotatedBy'] = self.annotated_by
+        annotation['annotatedAt'] = self.annotated_at
+        annotation['serializedBy'] = self.serialized_by
+        annotation['serializedAt'] = self.serialized_at
+        annotation['motivatedBy'] = self.motivated_by
+        return annotation
+
+    @property
+    def has_body(self):
+        """Return all annotation bodies: the text comment and each tag"""
+        bodies = []
+        bodies += self.textual_bodies
+        bodies += self.tags
+        return bodies
+
+    @property
+    def textual_bodies(self):
+        """A list with a single text body or an empty list"""
+        if not self.get('text'):
+            # Note that we treat an empty text as not having text at all.
+            return []
+        body = {
+            '@type': ['dctypes:Text', 'cnt:ContentAsText'],
+            'dc:format': 'text/plain',
+            'cnt:chars': self['text'],
+        }
+        return [body]
+
+    @property
+    def tags(self):
+        """A list of oa:Tag items"""
+        if not 'tags' in self:
+            return []
+        return [
+            {
+                '@type': ['oa:Tag', 'cnt:ContentAsText'],
+                'dc:format': 'text/plain',
+                'cnt:chars': tag,
+            }
+            for tag in self['tags']
+        ]
+
+    @property
+    def motivated_by(self):
+        """Motivations for the annotation.
+
+           Currently any combination of commenting and/or tagging.
+        """
+        motivations = []
+        if self.textual_bodies:
+            motivations.append('oa:commenting')
+        if self.tags:
+            motivations.append('oa:tagging')
+        return motivations
+
+    @property
+    def has_target(self):
+        """The targets of the annotation.
+
+           Returns a selector for each range of the page content that was
+           selected, or if a range is absent the url of the page itself.
+        """
+        targets = []
+        if not 'uri' in self:
+            return targets
+        if self.get('ranges'):
+            # Build the selector for each quote
+            for rangeSelector in self['ranges']:
+                selector = {
+                    '@type': 'annotator:TextRangeSelector',
+                    'annotator:startContainer': rangeSelector['start'],
+                    'annotator:endContainer': rangeSelector['end'],
+                    'annotator:startOffset': rangeSelector['startOffset'],
+                    'annotator:endOffset': rangeSelector['endOffset'],
+                }
+                target = {
+                    '@type': 'oa:SpecificResource',
+                    'hasSource': self['uri'],
+                    'hasSelector': selector,
+                }
+                targets.append(target)
+        else:
+            # The annotation targets the page as a whole
+            targets.append(self['uri'])
+        return targets
+
+    @property
+    def annotated_by(self):
+        """The user that created the annotation."""
+        if not self.get('user'):
+            return []
+        return {
+            '@type': 'foaf:Agent', # It could be either a person or a bot
+            'foaf:name': self['user'],
+        }
+
+    @property
+    def annotated_at(self):
+        """The annotation's creation date"""
+        if self.get('created'):
+            return self['created']
+
+    @property
+    def serialized_by(self):
+        """The software used for serializing."""
+        return {
+            '@id': 'annotator:annotator-store',
+            '@type': 'prov:Software-agent',
+            'foaf:name': 'annotator-store',
+            'foaf:homepage': {'@id': 'http://annotatorjs.org'},
+        } # todo: add version number
+
+    @property
+    def serialized_at(self):
+        """The last time the serialization changed."""
+        # Following the spec[1], we do not use the current time, but the last
+        # time the annotation graph has been updated.
+        # [1]: https://hypothes.is/a/R6uHQyVTQYqBc4-1V9X56Q
+        if self.get('updated'):
+            return self['updated']
diff --git a/annotator/store.py b/annotator/store.py
@@ -1,16 +1,20 @@
 """
-This module implements a Flask-based JSON API to talk with the annotation store via the
-Annotation model.
+This module implements a Flask-based JSON API to talk with the annotation store
+via the Annotation model.
 It defines these routes:
   * Root
-  * Index
+  * Index (OA)
   * Create
-  * Read
+  * Read (OA)
   * Update
   * Delete
-  * Search
+  * Search (OA)
   * Raw ElasticSearch search
 See their descriptions in `root`'s definition for more detail.
+
+Routes marked with OA (the read-only endpoints) will render the annotations in
+JSON-LD following the Open Annotation Data Model if the user agent prefers this
+(by accepting application/ld+json).
 """
 from __future__ import absolute_import
 
@@ -25,6 +29,7 @@
 
 from annotator.atoi import atoi
 from annotator.annotation import Annotation
+from annotator.openannotation import OAAnnotation
 
 store = Blueprint('store', __name__)
 
@@ -39,6 +44,31 @@ def jsonify(obj, *args, **kwargs):
     return Response(res, mimetype='application/json', *args, **kwargs)
 
 
+"""
+Define renderers that can be used for presenting the annotation. Note that we
+currently only use JSON-based types. The renderer returns not a string but a
+jsonifiable object.
+"""
+def render_jsonld(annotation):
+    """Returns a JSON-LD RDF representation of the annotation"""
+    oa_annotation = OAAnnotation(annotation)
+    oa_annotation.jsonld_baseurl = url_for('.read_annotation',
+                                           id='', _external=True)
+    return oa_annotation.jsonld
+
+renderers = {
+    'application/ld+json': render_jsonld,
+    'application/json': lambda annotation: annotation,
+}
+types_by_preference = ['application/json', 'application/ld+json']
+
+def render(annotation, content_type=None):
+    """Return the annotation in the given or negotiated content_type"""
+    if content_type is None:
+        content_type = preferred_content_type(types_by_preference)
+    return renderers[content_type](annotation)
+
+
 @store.before_request
 def before_request():
     if not hasattr(g, 'annotation_class'):
@@ -144,7 +174,9 @@ def index():
         user = None
 
     annotations = g.annotation_class.search(user=user)
-    return jsonify(annotations)
+
+    return jsonify(list(map(render, annotations)))
+
 
 # CREATE
 @store.route('/annotations', methods=['POST'])
@@ -190,7 +222,8 @@ def read_annotation(id):
     if failure:
         return failure
 
-    return jsonify(annotation)
+
+    return jsonify(render(annotation))
 
 
 # UPDATE
@@ -282,7 +315,7 @@ def search_annotations():
     total = g.annotation_class.count(**kwargs)
 
     return jsonify({'total': total,
-                    'rows': results})
+                    'rows': list(map(render, results))})
 
 
 # RAW ES SEARCH
@@ -418,3 +451,18 @@ def _update_query_raw(qo, params, k, v):
 
     elif k == 'search_type':
         params[k] = v
+
+def preferred_content_type(possible_types):
+    """Tells which content (MIME) type is preferred by the user agent.
+
+       In case of ties (or absence of an Accept header) items earlier in the
+       sequence are chosen.
+
+       Arguments:
+       possible_types -- Sequence of content types, in order of preference.
+    """
+    default = possible_types[0]
+    best_type = request.accept_mimetypes.best_match(
+        possible_types,
+        default)
+    return best_type
diff --git a/tests/test_openannotation.py b/tests/test_openannotation.py
@@ -0,0 +1,93 @@
+import re
+
+from annotator.annotation import Annotation
+from annotator.openannotation import OAAnnotation
+from annotator.elasticsearch import _add_created, _add_updated
+
+class TestOpenAnnotation(object):
+
+    def _make_annotation(self):
+        annotation_fields = {
+            'id': '1234',
+            'text': 'blablabla',
+            'uri': 'http://localhost:4000/dev.html',
+            'ranges': [
+                {
+                'start': '/ul[1]/li[1]',
+                'end': '/ul[1]/li[1]',
+                'startOffset': 0,
+                'endOffset': 26
+                }
+            ],
+            'user': 'alice',
+            'quote': 'Lorem ipsum dolor sit amet',
+            'consumer': 'mockconsumer',
+            'permissions': {
+                'read': [],
+                'admin': [],
+                'update': [],
+                'delete': []
+            }
+        }
+        annotation = OAAnnotation(annotation_fields)
+        _add_created(annotation)
+        _add_updated(annotation)
+        return annotation
+
+    def test_basics(self):
+        ann = self._make_annotation()
+
+        # Get the JSON-LD (as a dictionary)
+        ann_ld = ann.jsonld
+
+        # Check the values of some basic fields
+        ldid = ann_ld['@id']
+        assert ldid == '1234', "Incorrect annotation @id: {0}!={1}".format(ldid, id)
+        assert ann_ld['@type'] == 'oa:Annotation'
+        assert ann_ld['hasBody'] == [{
+            "cnt:chars": "blablabla",
+            "@type": [
+                "dctypes:Text",
+                "cnt:ContentAsText"
+            ],
+            "dc:format": "text/plain"
+        }], "Incorrect hasBody: {0}".format(ann_ld['hasBody'])
+
+        assert ann_ld['hasTarget'] == [{
+            "hasSource": "http://localhost:4000/dev.html",
+            "hasSelector": {
+                "annotator:endContainer": "/ul[1]/li[1]",
+                "annotator:startOffset": 0,
+                "annotator:startContainer": "/ul[1]/li[1]",
+                "@type": "annotator:TextRangeSelector",
+                "annotator:endOffset": 26
+            },
+            "@type": "oa:SpecificResource"
+        }], "Incorrect hasTarget: {0}".format(ann_ld['hasBody'])
+
+        assert ann_ld['annotatedBy'] == {
+            '@type': 'foaf:Agent',
+            'foaf:name': 'alice',
+        }, "Incorrect annotatedBy: {0}".format(ann_ld['annotatedBy'])
+
+        date_str = "nnnn-nn-nnTnn:nn:nn(\.nnnnnn)?([+-]nn.nn|Z)"
+        date_regex = re.compile(date_str.replace("n","\d"))
+        assert date_regex.match(ann_ld['annotatedAt']), "Incorrect annotatedAt: {0}".format(ann_ld['annotatedAt'])
+        assert date_regex.match(ann_ld['serializedAt']), "Incorrect createdAt: {0}".format(ann_ld['annotatedAt'])
+
+
+def assemble_context(context_value):
+    if isinstance(context_value, dict):
+        return context_value
+    elif isinstance(context_value, list):
+        # Merge all context parts
+        context = {}
+        for context_piece in context_value:
+            if isinstance(context_piece, dict):
+                context.update(context_piece)
+        return context
+    elif isinstance(context, str):
+        # XXX: we do not retrieve an externally defined context
+        raise NotImplementedError
+    else:
+        raise AssertionError("@context should be dict, list, or str")