diff --git a/.github/workflows/check-release.yml b/.github/workflows/check-release.yml index 785b971..3b28a70 100644 --- a/.github/workflows/check-release.yml +++ b/.github/workflows/check-release.yml @@ -24,6 +24,3 @@ jobs: uses: jupyter-server/jupyter_releaser/.github/actions/check-release@v1 with: token: ${{ secrets.GITHUB_TOKEN }} - - name: Run Link Check - if: ${{ matrix.group == 'link_check' }} - uses: jupyter-server/jupyter_releaser/.github/actions/check-links@v1 diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 3f692cd..77c9d46 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -30,6 +30,7 @@ jobs: - name: Install the Python dependencies run: | pip install -e ".[test]" codecov + pip list - name: Run the tests if: ${{ !startsWith(matrix.python-version, 'pypy') && !startsWith(matrix.os, 'windows') }} run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0e8ea07..89cb4c7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,7 +31,7 @@ repos: hooks: - id: mypy exclude: examples/simple/setup.py - additional_dependencies: [types-requests] + additional_dependencies: [types-requests, types-PyYAML] - repo: https://github.com/pre-commit/mirrors-prettier rev: v2.6.2 diff --git a/CHANGELOG.md b/CHANGELOG.md index bfebf63..bd8020e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,6 @@ All notable changes to this project will be documented in this file. -## 0.0.0 +## 0.1.0 diff --git a/README.md b/README.md index bd1cd72..890e62c 100644 --- a/README.md +++ b/README.md @@ -2,87 +2,18 @@ _An event system for Jupyter Applications and extensions._ -Jupyter Events enables Jupyter Applications (e.g. Jupyter Server, Jupyter Notebook, JupyterLab, JupyterHub, etc.) to emit **events**—i.e. actions by application users—to remote (or local) destinations as **structured** data. It works with Python's standard `logging` library to handle the transmission of events allowing users to send events to local files, over the web, etc. +Jupyter Events enables Jupyter Python Applications (e.g. Jupyter Server, JupyterLab Server, JupyterHub, etc.) to emit **events**—structured data describing things happening inside the application. Other software (e.g. client applications like JupyterLab) can _listen_ and respond to these events. ## Install -The Jupyter Events library can be installed from PyPI. +Install Jupyter Events directly from PyPI: ``` pip install jupyter_events ``` -## Basic Usage +or conda-forge: -Jupyter Events provides a configurable traitlets object, `EventLogger`, for emitting structured event data in Python. It leverages Python's standard `logging` library for filtering, routing, and emitting events. All events are validated (using [jsonschema](https://pypi.org/project/jsonschema/)) against registered [JSON schemas](https://json-schema.org/). - -Let's look at a basic example of an `EventLogger`. - -```python -import logging -from jupyter_events import EventLogger - - -logger = EventLogger( - # Use logging handlers to route where events - # should be record. - handlers=[ - logging.FileHandler('events.log') - ], - # List schemas of events that should be recorded. - allowed_schemas=[ - 'uri.to.event.schema' - ] -) -``` - -EventLogger has two configurable traits: - -- `handlers`: a list of Python's `logging` handlers. -- `allowed_schemas`: a list of event schemas to record. - -Event schemas must be registered with the `EventLogger` for events to be recorded. An event schema looks something like: - -```json -{ - "$id": "url.to.event.schema", - "title": "My Event", - "description": "All events must have a name property.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "description": "Name of event", - "type": "string" - } - }, - "required": ["name"], - "version": 1 -} ``` - -2 fields are required: - -- `$id`: a valid URI to identify the schema (and possibly fetch it from a remote address). -- `version`: the version of the schema. - -The other fields follow standard JSON schema structure. - -Schemas can be registered from a Python `dict` object, a file, or a URL. This example loads the above example schema from file. - -```python -# Register the schema. -logger.register_schema_file('schema.json') -``` - -Events are recorded using the `record_event` method. This method validates the event data and routes the JSON string to the Python `logging` handlers listed in the `EventLogger`. - -```python -# Record an example event. -event = {'name': 'example event'} -logger.record_event( - schema_id='url.to.event.schema', - version=1, - event=event -) +conda install -c conda-forge jupyter_events ``` diff --git a/docs/_static/jupyter_logo.png b/docs/_static/jupyter_logo.png new file mode 100644 index 0000000..1cea1a8 Binary files /dev/null and b/docs/_static/jupyter_logo.png differ diff --git a/docs/conf.py b/docs/conf.py index ec3fbbb..0d57f43 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -29,11 +29,14 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions: List = [] +extensions: List = ["myst_parser", "jupyterlite_sphinx"] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] +source_suffix = [".rst", ".md"] + + # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. @@ -45,10 +48,39 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = "sphinx_rtd_theme" +html_theme = "pydata_sphinx_theme" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] +html_logo = "_static/jupyter_logo.png" + master_doc = "index" + +# Configure jupyterlite to import jupyter_events package +jupyterlite_contents = ["demo/demo-notebook.ipynb"] + +html_theme_options = { + "logo": { + "text": "Jupyter Events", + }, + "icon_links": [ + { + # Label for this link + "name": "GitHub", + # URL where the link will redirect + "url": "https://github.com/jupyter/jupyter_events", # required + # Icon class (if "type": "fontawesome"), or path to local image (if "type": "local") + "icon": "fab fa-github-square", + # The type of image to be used (see below for details) + "type": "fontawesome", + }, + { + "name": "jupyter.org", + "url": "https://jupyter.org", + "icon": "_static/jupyter_logo.png", + "type": "local", + }, + ], +} diff --git a/docs/demo/demo-notebook.ipynb b/docs/demo/demo-notebook.ipynb new file mode 100644 index 0000000..0c68fab --- /dev/null +++ b/docs/demo/demo-notebook.ipynb @@ -0,0 +1,32 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import piplite\n", + "\n", + "piplite.install(\"jupyter_events\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from jupyter_events.logger import EventLogger\n" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/demo/index.md b/docs/demo/index.md new file mode 100644 index 0000000..be3c23f --- /dev/null +++ b/docs/demo/index.md @@ -0,0 +1,10 @@ +# Try it out + +Try out the Jupyter Events Library in the example notebook below (powered by JupyterLite): + +```{retrolite} demo-notebook.ipynb +--- +width: 100% +height: 1200px +--- +``` diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..1ed8455 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,35 @@ +# Jupyter Events + +_An event system for Jupyter Applications and extensions._ + +Jupyter Events enables Jupyter Python Applications (e.g. Jupyter Server, JupyterLab Server, JupyterHub, etc.) to emit **events**—structured data describing things happening inside the application. Other software (e.g. client applications like JupyterLab) can _listen_ and respond to these events. + +## Install + +Install Jupyter Events directly from PyPI: + +``` +pip install jupyter_events +``` + +or conda-forge: + +``` +conda install -c conda-forge jupyter_events +``` + +## Contents + +```{toctree} +--- +maxdepth: 2 +--- +user_guide/index +demo/index +``` + +## Indices and tables + +- {ref}`genindex` +- {ref}`modindex` +- {ref}`search` diff --git a/docs/index.rst b/docs/index.rst deleted file mode 100644 index 5f48ca4..0000000 --- a/docs/index.rst +++ /dev/null @@ -1,51 +0,0 @@ -.. jupyter_events documentation master file, created by - sphinx-quickstart on Fri Sep 27 16:34:00 2019. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Jupyter Events -============== - -**Configurable event system for Jupyter applications and extensions.** - - -Jupyter Events provides a configurable traitlets object, EventLogger, -for structured event-logging in Python. It leverages Python's -standard logging library for filtering, handling, and recording -events. All events are validated (using jsonschema) against -registered JSON schemas. - -The most common way to use Jupyter's event system is to configure -the ``EventLogger`` objects in Jupyter Applications, -(e.g. JupyterLab, Jupyter Notebook, JupyterHub). See the -page ":ref:`using-events`" - -If you're looking to add events to an application that -you're developing, check out the page ":ref:`adding-events`" - - - -Installation ------------- - -Jupyter's Events library can be installed from PyPI. - -.. code-block:: - - pip install jupyter_events - - -.. toctree:: - :maxdepth: 1 - :caption: Table of Contents: - - pages/configure - pages/application - pages/schemas - -Indices and tables ------------------- - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/pages/application.rst b/docs/pages/application.rst deleted file mode 100644 index 963e460..0000000 --- a/docs/pages/application.rst +++ /dev/null @@ -1,52 +0,0 @@ -.. _adding-events: - -Adding the Event System to an application -========================================= - -Jupyter Events enables you to log events from your running application. -(It's designed to work best with traitlet's `Application` object -for simple configuration.) To use the system, begin by creating an -instance of ``EventLogger``: - -.. code-block:: python - - - from jupyter_events import EventLogger - - class MyApplication: - - def __init__(self): - ... - # The arguments - self.eventlogger = EventLogger( - ... - # Either pass the traits (see below) here, - # or enable users of your application to configure - # the EventLogger's traits. - ) - - -EventLogger has two configurable traits: - - - ``handlers``: a list of Python's logging handlers that - handle the recording of incoming events. - - ``allowed_schemas``: a dictionary of options for each schema - describing what data should be collected. - -Next, you'll need to register event schemas for your application. -You can register schemas using the ``register_schema_file`` -(JSON or YAML format) or ``register_schema`` methods. - - -Once your have an instance of ``EventLogger`` and your registered -schemas, you can use the ``record_event`` method to log that event. - -.. code-block:: python - - # Record an example event. - event = {'name': 'example event'} - self.eventlogger.record_event( - schema_id='url.to.event.schema', - version=1, - event=event - ) diff --git a/docs/pages/configure.rst b/docs/pages/configure.rst deleted file mode 100644 index f1ef567..0000000 --- a/docs/pages/configure.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _using-events: - -Using Jupyter Events in Jupyter applications -============================================ - -Most people will use ``jupyter_events`` to log events data from Jupyter -applications, (e.g. JupyterLab, Jupyter Server, JupyterHub, etc). - -In this case, you'll be able to record events provided by schemas within -those applications. To start, you'll need to configure each -application's ``EventLogger`` object. - -This usually means two things: - -1. Define a set of ``logging`` handlers (from Python's standard library) -to tell Jupyter Events where to send your event data -(e.g. file, remote storage, etc.) -2. List the names of events to collect and the properties/categories -to collect from each of those events. (see the example below for more details). - -Here is an example of a Jupyter configuration file, e.g. ``jupyter_config.d``, -that demonstrates how to configure an eventlog. - -.. code-block:: python - - from logging import FileHandler - - # Log events to a local file on disk. - handler = FileHandler('events.txt') - - # Explicitly list the types of events - # to record and what properties or what categories - # of data to begin collecting. - allowed_schemas = { - "uri.to.schema": { - "allowed_properties": ["name", "email"], - "allowed_categories": ["category.jupyter.org/user-identifier"] - } - } - - c.EventLogger.handlers = [handler] - c.EventLogger.allowed_schemas = allowed_schemas diff --git a/docs/pages/schemas.rst b/docs/pages/schemas.rst deleted file mode 100644 index 4fd2ade..0000000 --- a/docs/pages/schemas.rst +++ /dev/null @@ -1,64 +0,0 @@ -Writing a schema for Jupyter Events -=================================== - -All Schemas should be a valid `JSON schema`_ and can be written in valid -YAML or JSON. - -At a minimum, valid Jupyter Event schema requires have the following keys: - -- ``$id`` : a URI to identify (and possibly locate) the schema. -- ``version`` : schema version. -- ``title`` : name of the schema -- ``description`` : documentation for the schema -- ``properties`` : attributes of the event being emitted. - - Each property should have the following attributes: - - + ``title`` : name of the property - + ``description``: documentation for this property. - + ``categories``: list of types of data being collected - -- ``required``: list of required properties. - -Here is a minimal example of a valid JSON schema for an event. - -.. code-block:: yaml - - $id: event.jupyter.org/example-event - version: 1 - title: My Event - description: | - All events must have a name property - type: object - properties: - thing: - title: Thing - categories: - - category.jupyter.org/unrestricted - description: A random thing. - user: - title: User name - categories: - - category.jupyter.org/user-identifier - description: Name of user who initiated event - required: - - thing - - user - - -.. _JSON schema: https://json-schema.org/ - - -Property Categories -------------------- - -Each property can be labelled with ``categories`` field. This makes it easier to -filter properties based on a category. We recommend that schema authors use valid -URIs for these labels, e.g. something like ``category.jupyter.org/unrestricted``. - -Below is a list of common category labels that Jupyter Events recommends using: - -* ``category.jupyter.org/unrestricted`` -* ``category.jupyter.org/user-identifier`` -* ``category.jupyter.org/user-identifiable-information`` -* ``category.jupyter.org/action-timestamp`` diff --git a/docs/requirements.txt b/docs/requirements.txt index 483a4e9..b9c17d5 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1 +1,2 @@ -sphinx_rtd_theme +myst_parser +pydata_sphinx_theme diff --git a/docs/user_guide/application.md b/docs/user_guide/application.md new file mode 100644 index 0000000..15a5925 --- /dev/null +++ b/docs/user_guide/application.md @@ -0,0 +1,78 @@ +# Adding `EventLogger` to a Jupyter application + +To begin using Jupyter Events in your Python application, create an instance of the `EventLogger` object in your application. + +```python +from jupyter_core.application import JupyterApp +from jupyter_events import EventLogger + + +class MyApplication(JupyterApp): + + classes = [EventLogger, ...] + eventlogger = Instance(EventLogger) + + def initialize(self, *args, **kwargs): + self.eventlogger = EventLogger(parent=self) + ... +``` + +Register an event schema with the logger. + +```python + schema = """ + $id: myapplication.org/my-method + version: 1 + title: My Method Executed + description: My method was executed one time. + properties: + msg: + title: Message + type: string + """ + + self.eventlogger.register_event_schema( + schema=schema + ) +``` + +Call `.emit(...)` within the application to emit an instance of the event. + +```python + def my_method(self): + # Do something + ... + # Emit event telling listeners that this event happened. + self.eventlogger.emit( + id="myapplication.org/my-method", + version=1, + data={"msg": "Hello, world!"} + ) + # Do something else... + ... +``` + +Great! Now your application is logging events from within. Deployers of your application can configure the system to listen to this event using Jupyter's configuration system. This usually means reading a `jupyter_config.py` file like this: + +```python +# A Jupyter +from logging import StreamHandler + +handler = StreamHandler() +c.EventLogger.handlers = [handler] +``` + +Now when we run our application and call the method, the event will be emitted to the console: + +``` +app = MyApplication.launch_instance(config_file="jupyter_config.py") +app.my_method() +``` + +``` +{'__timestamp__': '2022-08-09T17:15:27.458048Z', + '__schema__': 'myapplication.org/my-method', + '__schema_version__': 1, + '__metadata_version__': 1, + 'msg': 'Hello, world!'} +``` diff --git a/docs/user_guide/configure.md b/docs/user_guide/configure.md new file mode 100644 index 0000000..7beffe4 --- /dev/null +++ b/docs/user_guide/configure.md @@ -0,0 +1,18 @@ +# Configure applications to emit events + +Jupyter applications can be configured to emit events by registering +logging `Handler`s with an Application's `EventLogger` object. + +This is usually done using a Jupyter configuration file, e.g. `jupyter_config.py`: + +```python +from logging import FileHandler + +# Log events to a local file on disk. +handler = FileHandler('events.txt') + +# Explicitly list the types of events +# to record and what properties or what categories +# of data to begin collecting. +c.EventLogger.handlers = [handler] +``` diff --git a/docs/user_guide/defining-schema.md b/docs/user_guide/defining-schema.md new file mode 100644 index 0000000..29a4d98 --- /dev/null +++ b/docs/user_guide/defining-schema.md @@ -0,0 +1,41 @@ +(defining-schema)= + +# Defining an event schema + +All Jupyter Events schemas are valid [JSON schema](https://json-schema.org/) and can be written in valid YAML or JSON. More specifically, these schemas are validated against Jupyter Event's "meta"-JSON schema, [here](https://github.com/jupyter/jupyter_events/tree/main/jupyter_events/schemas/event-metaschema.yml). + +A common pattern is to define these schemas in separate files and register them with an `EventLogger` using the `.register_event_schema(...)` method: + +```python +schema_filepath = "/path/to/schema.yaml" + +logger = EventLogger() +logger.register_event_schema(schema_file) +``` + +At a minimum, a valid Jupyter event schema requires the following keys: + +- `$id` : a URI to identify (and possibly locate) the schema. +- `version` : the schema version. +- `properties` : attributes of the event being emitted. + +Beyond these required items, any valid JSON should be possible. Here is a simple example of a valid JSON schema for an event. + +```yaml +$id: event.jupyter.org/example-event +version: 1 +title: My Event +description: | + Some information about my event +type: object +properties: + thing: + title: Thing + description: A random thing. + user: + title: User name + description: Name of user who initiated event +required: + - thing + - user +``` diff --git a/docs/user_guide/event-schemas.md b/docs/user_guide/event-schemas.md new file mode 100644 index 0000000..3af3962 --- /dev/null +++ b/docs/user_guide/event-schemas.md @@ -0,0 +1,55 @@ +# What is an event schema? + +A Jupyter event schema defines the _shape_ and _type_ of an emitted event instance. This is a key piece of Jupyter Events. It tells the event listeners what they should expect when an event occurs. + +In the {ref}`first-event`, you saw how to register a schema with the `EventLogger`. + +In the next section, {ref}`defining-schema`, you will learn how to define a new schema. + +_So what exactly happens when we register a schema?_ + +```python +from jupyter_events.logger import EventLogger + +schema = """ +$id: myapplication.org/example-event +version: 1 +title: Example Event +description: An interesting event to collect +properties: + name: + title: Name of Event + type: string +""" + +logger = EventLogger() +logger.register_event_schema(schema) +``` + +First, the schema is validated against [Jupyter Event's metaschema](https://github.com/jupyter/jupyter_events/tree/main/jupyter_events/schemas/event-metaschema.yml). This ensures that your schema adheres minimally to Jupyter Event's expected form (read about how to define a schema [here](../user_guide/defining-schema)). + +Second, a `jsonschema.Validator` is created and cached for each one of your event schemas in a "schema registry" object. + +```python +print(logger.schemas) +``` + +``` +Validator class: Draft7Validator +Schema: { + "$id": "myapplication.org/example-event", + "version": 1, + "title": "Example Event", + "description": "An interesting event to collect", + "properties": { + "name": { + "title": "Name of Event", + "type": "string" + } + } +} +``` + +The registry's validators will be used to check incoming events to ensure all outgoing, emitted events are registered and follow the expected form. + +Lastly, if an incoming event is not found in the registry, it does not get emitted. This ensures that we only collect data that we explicity register with the logger. diff --git a/docs/user_guide/first-event.md b/docs/user_guide/first-event.md new file mode 100644 index 0000000..ed72827 --- /dev/null +++ b/docs/user_guide/first-event.md @@ -0,0 +1,62 @@ +(first-event)= + +# Logging your first event! + +The `EventLogger` is the main object in Jupyter Events. + +```python +from jupyter_events.logger import EventLogger + +logger = EventLogger() +``` + +To begin emitting events from a Python application, you need to tell the `EventLogger` what events you'd like to emit. To do this, we should register our event's schema (more on this later) with the logger. + +```python +schema = """ +$id: myapplication.org/example-event +version: 1 +title: Example Event +description: An interesting event to collect +properties: + name: + title: Name of Event + type: string +""" + + +logger.register_event_schema(schema) +``` + +Now that the logger knows about the event, it needs to know _where_ to send it. To do this, we register a logging _Handler_ —borrowed from Python's standard [`logging`](https://docs.python.org/3/library/logging.html) library—to route the events to the proper place. + +```python +# We will import one of the handlers from Python's logging libray +from logging import StreamHandler + +handler = StreamHandler() + +logger.register_handler(handler) +``` + +The logger knows about the event and where to send it; all that's left is to emit an instance of the event! + +```python +logger.emit( + id="myapplication.org/example-event", + version=1, + data={ + "name": "My Event" + } +) +``` + +On emission, the following data will get printed to your console by the `StreamHandler` instance: + +``` +{'__timestamp__': '2022-08-09T17:15:27.458048Z', + '__schema__': 'myapplication.org/example-event', + '__schema_version__': 1, + '__metadata_version__': 1, + 'name': 'My Event'} +``` diff --git a/docs/user_guide/index.md b/docs/user_guide/index.md new file mode 100644 index 0000000..bb56b2e --- /dev/null +++ b/docs/user_guide/index.md @@ -0,0 +1,12 @@ +# User Guide + +```{toctree} +--- +maxdepth: 2 +--- +first-event +event-schemas +defining-schema +configure +application +``` diff --git a/jupyter_events/_categories.py b/jupyter_events/_categories.py deleted file mode 100644 index e7e8045..0000000 --- a/jupyter_events/_categories.py +++ /dev/null @@ -1,185 +0,0 @@ -from collections import deque - -from jsonschema import Draft7Validator, validators -from jsonschema.exceptions import ValidationError - - -class ExtractCategories(ValidationError): - """ - A special `jsonschema.ValidationError` that carries information about the - `categories` keyword, intended to be yielded whenever a `categories` keyword - is encountered during `jsonschema` JSON validation. - - The primary use case for this class is to make use of the JSON validation - mechanism implemented by `jsonschema` to extract all categories associated - with each property in a JSON instance based on a JSON schema. It is not - intended to be used as an actual validation error. - """ - - def __init__(self, property, categories, *args, **kwargs): - super(ValidationError, self).__init__(*args, **kwargs) - self.property = property - self.categories = categories - - -def extend_with_categories(validator_class): - """ - Extend a `jsonschema.IValidator` class so that it yields a `_ExtractCategories` - whenever a `categories` keyword is encountered during JSON validation - - Parameters - ---------- - validator_class : jsonschema.IValidator - an existing validator class - - Returns - ------- - jsonschema.IValidator - a new `jsonschema.IValidator` class extending the one provided - - Examples - -------- - from jsonschema import Draft7Validator - - - CategoryExtractor = extend_with_categories(Draft7Validator) - """ - validate_properties = validator_class.VALIDATORS["properties"] - - def get_categories(validator, properties, instance, schema): - for property, subschema in properties.items(): - if "categories" in subschema: - yield ExtractCategories(property, subschema["categories"], message=None) - - yield from validate_properties( - validator, - properties, - instance, - schema, - ) - - return validators.extend( - validator_class, - {"properties": get_categories}, - ) - - -JSONSchemaValidator = Draft7Validator -CategoryExtractor = extend_with_categories(JSONSchemaValidator) - - -# Ignore categories under any of these jsonschema keywords -IGNORE_CATEGORIES_SCHEMA_KEYWORDS = {"if", "not", "anyOf", "oneOf", "then", "else"} - - -def extract_categories_from_errors(errors): - for e in errors: - if isinstance(e, ExtractCategories) and not any( - p in IGNORE_CATEGORIES_SCHEMA_KEYWORDS for p in e.absolute_schema_path - ): - yield e - else: - yield from extract_categories_from_errors(e.context) - - -def extract_categories_from_event(event, schema): - """ - Generate a `dict` of `_ExtractCategories` whose keys are pointers to the properties - - Parameters - ---------- - event : dict - Event data - - schema : dict - A JSON schema - - Returns - ------- - dict - A mapping from properties in the event to their categories. - - In each entry, the key is a pointer to a property in the event - (in the form of a tuple) and the value is a `_ExtractCategories` - containing the categories associated with that property. - """ - return { - tuple(c.absolute_path + deque([c.property])): c - for c in extract_categories_from_errors( - CategoryExtractor(schema).iter_errors(event) - ) - } - - -def filter_categories_from_event(event, schema, allowed_categories, allowed_properties): - """ - Filter properties from an event based on their categories. - - Only whitelisted properties and properties whose categories are allowed are kept. - - Parameters - ---------- - event : dict - The input event - - schema : dict - A JSON schema that makes use of the the `categories` keyword to - specify what categories are associated with a certain property. - - allowed_categories : set - Specify which categories are allowed - - allowed_properties : set - Whitelist certain top level properties. - - These properties are included in the output event even if not all of - their properties are allowed. - - Returns - ------- - dict - The output event after category filtering - - """ - categories = extract_categories_from_event(event, schema) - - # Top-level properties without declared categories are set to null - for property in event.keys(): - path = (property,) - if path not in categories: - event[property] = None - - # Allow only properties whose categories are included in allowed_categories - # and whose top-level parent is included in allowed_properties - not_allowed = ( - c - for p, c in categories.items() - if not ( - set(c.categories).issubset(allowed_categories) or p[0] in allowed_properties - ) - ) - - for c in not_allowed: - # In case both a sub property and its parent, e.g. ['user', 'name'] and - # ['user'], do not have all the allowed categories and are to be removed, - # if the parent is removed first then attempting to access - # the descendent would either return None or raise an IndexError or - # KeyError. Just skip it. - try: - item = deep_get(event, c.absolute_path) - except IndexError: - continue - except KeyError: - continue - - if item is not None: - item[c.property] = None - - return event - - -def deep_get(instance, path): - result = instance - while result is not None and path: - result = result[path.popleft()] - return result diff --git a/jupyter_events/_version.py b/jupyter_events/_version.py index 1dd3d0b..f695eb1 100644 --- a/jupyter_events/_version.py +++ b/jupyter_events/_version.py @@ -1,2 +1,2 @@ -version_info = (0, 2, 0, ".dev", "0") -__version__ = "0.2.0.dev0" +version_info = (0, 1, 0, "", "") +__version__ = "0.1.0" diff --git a/jupyter_events/categories.py b/jupyter_events/categories.py deleted file mode 100644 index 8fff253..0000000 --- a/jupyter_events/categories.py +++ /dev/null @@ -1,2 +0,0 @@ -# flake8: noqa -from ._categories import JSONSchemaValidator, filter_categories_from_event diff --git a/jupyter_events/logger.py b/jupyter_events/logger.py index 61fbd03..e917a8e 100644 --- a/jupyter_events/logger.py +++ b/jupyter_events/logger.py @@ -3,72 +3,60 @@ """ import json import logging +import warnings from datetime import datetime +from pathlib import PurePath +from typing import Union from pythonjsonlogger import jsonlogger - -try: - from ruamel.yaml import YAML -except ImportError as e: - # check for known conda bug that prevents - # pip from installing ruamel.yaml dependency - try: - import ruamel_yaml # noqa - except ImportError: - # nope, regular import error; raise original - raise e - else: - # have conda fork ruamel_yaml, but not ruamel.yaml. - # this is a bug in the ruamel_yaml conda package - # mistakenly identifying itself as ruamel.yaml to pip. - # conda install the 'real' ruamel.yaml to fix - raise ImportError( - "Missing dependency ruamel.yaml. Try: `conda install ruamel.yaml`" - ) - +from traitlets import Instance, default from traitlets.config import Config, Configurable from . import EVENTS_METADATA_VERSION -from .categories import JSONSchemaValidator, filter_categories_from_event -from .traits import Handlers, SchemaOptions - -yaml = YAML(typ="safe") +from .schema_registry import SchemaRegistry +from .traits import Handlers -def _skip_message(record, **kwargs): +class SchemaNotRegistered(Warning): + """A warning to raise when an event is given to the logger + but its schema has not be registered with the EventLogger """ - Remove 'message' from log record. - It is always emitted with 'null', and we do not want it, - since we are always emitting events only - """ - del record["message"] - return json.dumps(record, **kwargs) + + +# Only show this warning on the first instance +# of each event type that fails to emit. +warnings.simplefilter("once", SchemaNotRegistered) class EventLogger(Configurable): """ - Send structured events to a logging sink + An Event logger for emitting structured events. + + Event schemas must be registered with the + EventLogger using the `register_schema` or + `register_schema_file` methods. Every schema + will be validated against Jupyter Event's metaschema. """ handlers = Handlers( - [], + default_value=[], allow_none=True, help="""A list of logging.Handler instances to send events to. - When set to None (the default), events are discarded. + When set to None (the default), all events are discarded. """, ).tag(config=True) - allowed_schemas = SchemaOptions( - {}, - allow_none=True, - help=""" - Fully qualified names of schemas to record. - - Each schema you want to record must be manually specified. - The default, an empty list, means no events are recorded. + schemas = Instance( + SchemaRegistry, + help="""The SchemaRegistry for caching validated schemas + and their jsonschema validators. """, - ).tag(config=True) + ) + + @default("schemas") + def _default_schemas(self) -> SchemaRegistry: + return SchemaRegistry() def __init__(self, *args, **kwargs): # We need to initialize the configurable before @@ -82,13 +70,10 @@ def __init__(self, *args, **kwargs): self.log.propagate = False # We will use log.info to emit self.log.setLevel(logging.INFO) - self.schemas = {} # Add each handler to the logger and format the handlers. if self.handlers: - formatter = jsonlogger.JsonFormatter(json_serializer=_skip_message) for handler in self.handlers: - handler.setFormatter(formatter) - self.log.addHandler(handler) + self.register_handler(handler) def _load_config(self, cfg, section_names=None, traits=None): """Load EventLogger traits from a Config object, patching the @@ -107,101 +92,48 @@ def get_handlers(): eventlogger_cfg = Config({"EventLogger": my_cfg}) super()._load_config(eventlogger_cfg, section_names=None, traits=None) - def register_schema_file(self, filename): - """ - Convenience function for registering a JSON schema from a filepath - - Supports both JSON & YAML files. - - Parameters - ---------- - filename: str, path object or file-like object - Path to the schema file or a file object to register. - """ - # Just use YAML loader for everything, since all valid JSON is valid YAML - - # check if input is a file-like object - if hasattr(filename, "read") and hasattr(filename, "write"): - self.register_schema(yaml.load(filename)) - else: - with open(filename) as f: - self.register_schema(yaml.load(f)) + def register_event_schema(self, schema: Union[dict, str, PurePath]): + """Register this schema with the schema registry. - def register_schema(self, schema): + Get this registered schema using the EventLogger.schema.get() method. """ - Register a given JSON Schema with this event emitter - - 'version' and '$id' are required fields. - """ - # Check if our schema itself is valid - # This throws an exception if it isn't valid - JSONSchemaValidator.check_schema(schema) - - # Check that the properties we require are present - required_schema_fields = {"$id", "version", "properties"} - for rsf in required_schema_fields: - if rsf not in schema: - raise ValueError(f"{rsf} is required in schema specification") - - if (schema["$id"], schema["version"]) in self.schemas: - raise ValueError( - "Schema {} version {} has already been registered.".format( - schema["$id"], schema["version"] - ) - ) - - for p, attrs in schema["properties"].items(): - if p.startswith("__"): - raise ValueError( - "Schema {} has properties beginning with __, which is not allowed" - ) - - # Validate "categories" property in proposed schema. - try: - cats = attrs["categories"] - # Categories must be a list. - if not isinstance(cats, list): - raise ValueError( - 'The "categories" field in a registered schemas must be a list.' - ) - except KeyError: - raise KeyError( - 'All properties must have a "categories" field that describes ' - 'the type of data being collected. The "{}" property does not ' - "have a category field.".format(p) - ) + self.schemas.register(schema) - self.schemas[(schema["$id"], schema["version"])] = schema + def register_handler(self, handler: logging.Handler): + """Register a new logging handler to the Event Logger. - def get_allowed_properties(self, schema_name): - """Get the allowed properties for an allowed schema.""" - config = self.allowed_schemas[schema_name] - try: - return set(config["allowed_properties"]) - except KeyError: - return set() - - def get_allowed_categories(self, schema_name): - """ - Return a set of allowed categories for a given schema - from the EventLog's config. + All outgoing messages will be formatted as a JSON string. """ - config = self.allowed_schemas[schema_name] - try: - allowed_categories = config["allowed_categories"] - allowed_categories.append("unrestricted") - return set(allowed_categories) - except KeyError: - return {"unrestricted"} - def record_event(self, schema_name, version, event, timestamp_override=None): + def _skip_message(record, **kwargs): + """ + Remove 'message' from log record. + It is always emitted with 'null', and we do not want it, + since we are always emitting events only + """ + del record["message"] + return json.dumps(record, **kwargs) + + formatter = jsonlogger.JsonFormatter(json_serializer=_skip_message) + handler.setFormatter(formatter) + self.log.addHandler(handler) + if handler not in self.handlers: + self.handlers.append(handler) + + def remove_handler(self, handler: logging.Handler): + """Remove a logging handler from the logger and list of handlers.""" + self.log.removeHandler(handler) + if handler in self.handlers: + self.handlers.remove(handler) + + def emit(self, schema_id: str, version: int, data: dict, timestamp_override=None): """ Record given event with schema has occurred. Parameters ---------- - schema_name: str - Name of the schema + schema_id: str + $id of the schema version: str The schema version event: dict @@ -214,22 +146,20 @@ def record_event(self, schema_name, version, event, timestamp_override=None): dict The recorded event data """ - if not (self.handlers and schema_name in self.allowed_schemas): - # if handler isn't set up or schema is not explicitly whitelisted, - # don't do anything + # If no handlers are routing these events, there's no need to proceed. + if not self.handlers: return - if (schema_name, version) not in self.schemas: - raise ValueError( - "Schema {schema_name} version {version} not registered".format( - schema_name=schema_name, version=version - ) + # If the schema hasn't been registered, raise a warning to make sure + # this was intended. + if (schema_id, version) not in self.schemas: + warnings.warn( + f"({schema_id}, {version}) has not been registered yet. If " + "this was not intentional, please register the schema using the " + "`register_event_schema` method.", + SchemaNotRegistered, ) - - schema = self.schemas[(schema_name, version)] - - # Validate the event data. - JSONSchemaValidator(schema).validate(event) + return # Generate the empty event capsule. if timestamp_override is None: @@ -238,20 +168,12 @@ def record_event(self, schema_name, version, event, timestamp_override=None): timestamp = timestamp_override capsule = { "__timestamp__": timestamp.isoformat() + "Z", - "__schema__": schema_name, + "__schema__": schema_id, "__schema_version__": version, "__metadata_version__": EVENTS_METADATA_VERSION, } - - # Filter properties in the incoming event based on the - # allowed categories and properties from the eventlog config. - allowed_categories = self.get_allowed_categories(schema_name) - allowed_properties = self.get_allowed_properties(schema_name) - - filtered_event = filter_categories_from_event( - event, schema, allowed_categories, allowed_properties - ) - capsule.update(filtered_event) - + # Process this event, i.e. validate and redact (in place) + self.schemas.validate_event(schema_id, version, data) + capsule.update(data) self.log.info(capsule) return capsule diff --git a/jupyter_events/schema.py b/jupyter_events/schema.py new file mode 100644 index 0000000..b972e44 --- /dev/null +++ b/jupyter_events/schema.py @@ -0,0 +1,115 @@ +import json +from pathlib import Path, PurePath +from typing import Tuple, Union + +from jsonschema import validators +from jsonschema.protocols import Validator + +from . import yaml +from .validators import validate_schema + + +class EventSchemaLoadingError(Exception): + pass + + +class EventSchema: + """A validated schema that can be used. + + On instantiation, validate the schema against + Jupyter Event's metaschema. + + Parameters + ---------- + schema: dict or str + JSON schema to validate against Jupyter Events. + + validator_class: jsonschema.validators + The validator class from jsonschema used to validate instances + of this event schema. The schema itself will be validated + against Jupyter Event's metaschema to ensure that + any schema registered here follows the expected form + of Jupyter Events. + + resolver: + RefResolver for nested JSON schema references. + """ + + def __init__( + self, + schema: Union[dict, str, PurePath], + validator_class: Validator = validators.Draft7Validator, + resolver=None, + ): + _schema = self._load_schema(schema) + # Validate the schema against Jupyter Events metaschema. + validate_schema(_schema) + # Create a validator for this schema + self._validator = validator_class(_schema, resolver=resolver) + self._schema = _schema + + def __repr__(self): + out = f"Validator class: {self._validator.__class__.__name__}\n" + out += f"Schema: {json.dumps(self._schema, indent=2)}" + return out + + @staticmethod + def _load_schema(schema: Union[dict, str, PurePath]) -> dict: + """Load a JSON schema from different sources/data types. + + `schema` could be a dictionary, string, or pathlib object representing + a schema file on disk. + + Returns a dictionary with schema data. + """ + if isinstance(schema, str): + # Path accepts any string, so this + # won't throw an error if give it + # a schema string (not a file name). We + # need to do further checks to ensure this + # is a file to read from. + fpath = Path(schema) + # Check if this is an existing file. + # If if doesn't exists, it could mean + # two different things: 1. this is the + # wrong file path or 2. this is actually + # a schema in the form of a string. + if fpath.exists(): + _schema = yaml.load(schema) + # Try loading this string as a schema object. + else: + _schema = yaml.loads(schema) + # If _schema is still a string (not a schema dict), + # it means + if isinstance(_schema, str): + raise EventSchemaLoadingError( + "We tried reading the `schema` string as a file path, but " + "the path did not exist. Then, we tried deserializing the " + "`schema` string, but a string was returned where a schema " + "dictionary was expected. Please check `schema` to make " + "sures it is either a valid file path or schema string." + ) + # Load from a PurePath. + elif isinstance(schema, PurePath): + _schema = yaml.load(schema) + else: + _schema = schema + return _schema + + @property + def id(self) -> str: + """Schema $id field.""" + return self._schema["$id"] + + @property + def version(self) -> int: + """Schema's version.""" + return self._schema["version"] + + @property + def registry_key(self) -> Tuple[str, int]: + return (self.id, self.version) + + def validate(self, data: dict) -> None: + """Validate an incoming instance of this event schema.""" + self._validator.validate(data) diff --git a/jupyter_events/schema_registry.py b/jupyter_events/schema_registry.py new file mode 100644 index 0000000..adb1d4f --- /dev/null +++ b/jupyter_events/schema_registry.py @@ -0,0 +1,73 @@ +from typing import Tuple, Union + +from .schema import EventSchema + + +class SchemaRegistryException(Exception): + """Exception class for Jupyter Events Schema Registry Errors.""" + + +class SchemaRegistry: + """A convenient API for storing and searching a group of schemas.""" + + def __init__(self, schemas: dict = None): + self._schemas = schemas or {} + + def __contains__(self, registry_key: Tuple[str, int]): + """Syntax sugar to check if a schema is found in the registry""" + return registry_key in self._schemas + + def __repr__(self) -> str: + return ",\n".join([str(s) for s in self._schemas.values()]) + + def _add(self, schema_obj: EventSchema): + if schema_obj.registry_key in self._schemas: + raise SchemaRegistryException( + f"The schema, {schema_obj.id} " + f"(version {schema_obj.version}), is already " + "registered. Try removing it and registering it again." + ) + self._schemas[schema_obj.registry_key] = schema_obj + + def register(self, schema: Union[dict, str, EventSchema]): + """Add a valid schema to the registry. + + All schemas are validated against the Jupyter Events meta-schema + found here: + """ + if not isinstance(schema, EventSchema): + schema = EventSchema(schema) + self._add(schema) + + def get(self, id: str, version: int) -> EventSchema: + """Fetch a given schema. If the schema is not found, + this will raise a KeyError. + """ + try: + return self._schemas[(id, version)] + except KeyError: + raise KeyError( + f"The requested schema, {id} " + f"(version {version}), was not found in the " + "schema registry. Are you sure it was previously registered?" + ) + + def remove(self, id: str, version: int) -> None: + """Remove a given schema. If the schema is not found, + this will raise a KeyError. + """ + try: + del self._schemas[(id, version)] + except KeyError: + raise KeyError( + f"The requested schema, {id} " + f"(version {version}), was not found in the " + "schema registry. Are you sure it was previously registered?" + ) + + def validate_event(self, id: str, version: int, data: dict) -> None: + """Validate an event against a schema within this + registry. + """ + schema = self.get(id, version) + schema.validate(data) diff --git a/jupyter_events/schemas/event-metaschema.yml b/jupyter_events/schemas/event-metaschema.yml new file mode 100644 index 0000000..913e572 --- /dev/null +++ b/jupyter_events/schemas/event-metaschema.yml @@ -0,0 +1,26 @@ +$schema: http://json-schema.org/draft-07/schema +$id: http://event.jupyter.org/event-metaschema +version: 1 +title: Event Metaschema +description: | + A meta schema for validating that all registered Jupyter Event + schemas are appropriately defined. +type: object +properties: + version: + type: integer + title: + type: string + description: + type: string + properties: + type: object + additionalProperties: + $ref: http://event.jupyter.org/property-metaschema + propertyNames: + pattern: ^(?!__.*) + +required: + - $id + - version + - properties diff --git a/jupyter_events/schemas/property-metaschema.yml b/jupyter_events/schemas/property-metaschema.yml new file mode 100644 index 0000000..64e2e82 --- /dev/null +++ b/jupyter_events/schemas/property-metaschema.yml @@ -0,0 +1,28 @@ +$schema: http://json-schema.org/draft-07/schema +$id: http://event.jupyter.org/property-metaschema +version: 1 +title: Property Metaschema +description: | + A metaschema for validating properties within + an event schema + +properties: + title: + type: string + description: + type: string + properties: + type: object + additionalProperties: + $ref: http://event.jupyter.org/property-metaschema + propertyNames: + pattern: ^(?!__.*) + + items: + $ref: http://event.jupyter.org/property-metaschema + +additionalProperties: + $ref: http://event.jupyter.org/property-metaschema + +propertyNames: + pattern: ^(?!__.*) diff --git a/jupyter_events/traits.py b/jupyter_events/traits.py index 848bd4b..020320c 100644 --- a/jupyter_events/traits.py +++ b/jupyter_events/traits.py @@ -42,37 +42,3 @@ def validate(self, obj, value): return value else: self.error(obj, value) - - -class SchemaOptions(TraitType): - """A trait for handling options for recording schemas.""" - - info_text = "either a dictionary with schema options or a list with schema names." - - def validate(self, obj, val): - # If the type is a dictionary. - if type(val) is dict: - for schema_name, data in val.items(): - given_keys = set(data.keys()) - # Compare against keys expected. - allowed_keys = {"allowed_categories", "allowed_properties"} - # There should be no extra keys (anything other than - # allowed_keys) in the schema options. - unknown_keys = given_keys.difference(allowed_keys) - if unknown_keys: - # Throw an error if there are unknown keys. - raise TraitError( - "The schema option, {schema_name}, includes " - "unknown key(s): {unknown_keys}".format( - schema_name=schema_name, unknown_keys=",".join(unknown_keys) - ) - ) - validated_val = val - # If the type is a list (for backwards compatibility). - elif type(val) is list: - validated_val = {} - for schema_name in val: - validated_val[schema_name] = {} - else: - raise TraitError("SchemaOptions must be of type dict or list.") - return validated_val diff --git a/jupyter_events/validators.py b/jupyter_events/validators.py new file mode 100644 index 0000000..e64d671 --- /dev/null +++ b/jupyter_events/validators.py @@ -0,0 +1,36 @@ +import pathlib + +from jsonschema import RefResolver, ValidationError, validators + +from . import yaml + +METASCHEMA_PATH = pathlib.Path(__file__).parent.joinpath("schemas") +EVENT_METASCHEMA_FILEPATH = METASCHEMA_PATH.joinpath("event-metaschema.yml") +EVENT_METASCHEMA = yaml.load(EVENT_METASCHEMA_FILEPATH) +PROPERTY_METASCHEMA_FILEPATH = METASCHEMA_PATH.joinpath("property-metaschema.yml") +PROPERTY_METASCHEMA = yaml.load(PROPERTY_METASCHEMA_FILEPATH) +SCHEMA_STORE = { + PROPERTY_METASCHEMA["$id"]: PROPERTY_METASCHEMA, +} +METASCHEMA_RESOLVER = RefResolver( + base_uri=EVENT_METASCHEMA["$id"], referrer=EVENT_METASCHEMA, store=SCHEMA_STORE +) +JUPYTER_EVENTS_VALIDATOR = validators.Draft202012Validator( + schema=EVENT_METASCHEMA, resolver=METASCHEMA_RESOLVER +) + + +def validate_schema(schema: dict): + try: + # Validate the schema against Jupyter Events metaschema. + JUPYTER_EVENTS_VALIDATOR.validate(schema) + except ValidationError as err: + reserved_property_msg = " does not match '^(?!__.*)'" + if reserved_property_msg in str(err): + bad_property = str(err)[: -(len(reserved_property_msg))] + raise ValidationError( + f"{bad_property} is an invalid property name because it " + "starts with `__`. Properties starting with 'dunder' " + "are reserved for Jupyter Events." + ) + raise err diff --git a/jupyter_events/yaml.py b/jupyter_events/yaml.py new file mode 100644 index 0000000..072f6a2 --- /dev/null +++ b/jupyter_events/yaml.py @@ -0,0 +1,28 @@ +# mypy: ignore-errors +import pathlib + +from yaml import dump as ydump +from yaml import load as yload + +try: + from yaml import CDumper as Dumper + from yaml import CLoader as Loader +except ImportError: + from yaml import Dumper, Loader + + +def loads(stream): + return yload(stream, Loader=Loader) + + +def dumps(stream): + return ydump(stream, Dumper=Dumper) + + +def load(fpath): + data = pathlib.Path(str(fpath)).read_text() + return loads(data) + + +def dump(data, outpath): + pathlib.Path(outpath).write_text(dumps(data)) diff --git a/pyproject.toml b/pyproject.toml index 4eeb5dd..e919bc7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,7 @@ build-backend = "hatchling.build" [project] name = "jupyter-events" description = "Jupyter Event System library" +readme = "README.md" requires-python = ">=3.7" authors = [ { name = "Jupyter Development Team", email = "jupyter@googlegroups.com" }, @@ -26,17 +27,13 @@ classifiers = [ dependencies = [ "jsonschema", "python-json-logger", - "ruamel.yaml", + "pyyaml", "traitlets", ] dynamic = [ "version", ] -[project.readme] -file = "README.md" -content-type = "text/markdown" - [project.license] file = 'COPYING.md' @@ -55,7 +52,7 @@ test = [ path = "jupyter_events/_version.py" [tool.tbump.version] -current = "0.2.0.dev0" +current = "0.1.0" regex = ''' (?P\d+)\.(?P\d+)\.(?P\d+) ((?Pa|b|rc|.dev)(?P\d+))? diff --git a/tests/schemas/bad/nested-reserved-property.yaml b/tests/schemas/bad/nested-reserved-property.yaml new file mode 100644 index 0000000..4ea004d --- /dev/null +++ b/tests/schemas/bad/nested-reserved-property.yaml @@ -0,0 +1,32 @@ +$id: http://event.jupyter.org/test +version: 1 +title: Schema with Array +description: | + A schema for an array of objects. +type: object +properties: + users: + title: Test User Array + description: | + Test User array. + type: array + items: + type: object + title: User + properties: + name: + type: string + title: Name + hobbies: + type: array + title: Hobbies + items: + type: object + title: Hobby + properties: + __badName: + title: Sport Name + type: string + position: + title: Position + type: string diff --git a/tests/schemas/bad/reserved-property.yaml b/tests/schemas/bad/reserved-property.yaml new file mode 100644 index 0000000..919a937 --- /dev/null +++ b/tests/schemas/bad/reserved-property.yaml @@ -0,0 +1,11 @@ +$id: http://event.jupyter.org/test +version: 1 +title: Simple Test Schema +description: | + A simple schema for testing +type: object +properties: + __badName: + title: Test Property + description: Test property. + type: string diff --git a/tests/schemas/good/array.yaml b/tests/schemas/good/array.yaml new file mode 100644 index 0000000..a917374 --- /dev/null +++ b/tests/schemas/good/array.yaml @@ -0,0 +1,22 @@ +$id: http://event.jupyter.org/test +version: 1 +title: Schema with Array +description: | + A schema for an array of objects. +type: object +properties: + users: + title: Test User Array + description: | + Test User array. + type: array + items: + type: object + title: User + properties: + email: + type: string + title: Email + id: + type: string + title: Name diff --git a/tests/schemas/good/basic.yaml b/tests/schemas/good/basic.yaml new file mode 100644 index 0000000..33a73e4 --- /dev/null +++ b/tests/schemas/good/basic.yaml @@ -0,0 +1,11 @@ +$id: http://event.jupyter.org/test +version: 1 +title: Simple Test Schema +description: | + A simple schema for testing +type: object +properties: + prop: + title: Test Property + description: Test property. + type: string diff --git a/tests/schemas/good/nested-array.yaml b/tests/schemas/good/nested-array.yaml new file mode 100644 index 0000000..f54c3cf --- /dev/null +++ b/tests/schemas/good/nested-array.yaml @@ -0,0 +1,32 @@ +$id: http://event.jupyter.org/test +version: 1 +title: Schema with Array +description: | + A schema for an array of objects. +type: object +properties: + users: + title: Test User Array + description: | + Test User array. + type: array + items: + type: object + title: User + properties: + name: + type: string + title: Name + hobbies: + type: array + title: Hobbies + items: + type: object + title: Hobby + properties: + sport: + title: Sport Name + type: string + position: + title: Position + type: string diff --git a/tests/test_allowed_schemas.py b/tests/test_allowed_schemas.py deleted file mode 100644 index 7fba879..0000000 --- a/tests/test_allowed_schemas.py +++ /dev/null @@ -1,199 +0,0 @@ -from textwrap import dedent as _ - -import pytest -from ruamel.yaml import YAML - -from jupyter_events.logger import EventLogger - -from .utils import get_event_data - -SCHEMA_ID = "test.event" -VERSION = 1 - - -@pytest.fixture -def schema(): - return { - "$id": SCHEMA_ID, - "title": "Test Event", - "version": VERSION, - "description": "Test Event.", - "type": "object", - "properties": { - "nothing-exciting": { - "description": "a property with nothing exciting happening", - "categories": ["unrestricted"], - "type": "string", - }, - "id": { - "description": "user ID", - "categories": ["user-identifier"], - "type": "string", - }, - "email": { - "description": "email address", - "categories": ["user-identifiable-information"], - "type": "string", - }, - }, - } - - -def test_raised_exception_for_nonlist_categories(): - # Bad schema in yaml form. - yaml_schema = _( - """\ - $id: test.schema - title: Test Event - version: 1 - type: object - properties: - test_property: - description: testing a property - categories: user-identifier - type: string - """ - ) - yaml = YAML(typ="safe") - schema = yaml.load(yaml_schema) - - # Register schema with an EventLogger - e = EventLogger( - allowed_schemas={SCHEMA_ID: {"allowed_categories": ["user-identifier"]}}, - ) - - # This schema does not have categories as a list. - with pytest.raises(ValueError) as err: - e.register_schema(schema) - # Verify that the error message is the expected error message. - assert "must be a list." in str(err.value) - - -def test_missing_categories_label(): - # Bad schema in yaml form. - yaml_schema = _( - """\ - $id: test.schema - title: Test Event - version: 1 - type: object - properties: - test_property: - description: testing a property - type: string - """ - ) - yaml = YAML(typ="safe") - schema = yaml.load(yaml_schema) - - # Register schema with an EventLogger - e = EventLogger( - allowed_schemas={SCHEMA_ID: {"allowed_categories": ["random-category"]}} - ) - - # This schema does not have categories as a list. - with pytest.raises(KeyError) as err: - e.register_schema(schema) - # Verify that the error message is the expected error message. - assert 'All properties must have a "categories"' in str(err.value) - - -EVENT_DATA = { - "nothing-exciting": "hello, world", - "id": "test id", - "email": "test@testemail.com", -} - - -@pytest.mark.parametrize( - "allowed_schemas,expected_output", - [ - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_categories": []}}, - # Expected properties in the recorded event - { - "nothing-exciting": "hello, world", - "id": None, - "email": None, - }, - ), - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_categories": ["unrestricted"]}}, - # Expected properties in the recorded event - { - "nothing-exciting": "hello, world", - "id": None, - "email": None, - }, - ), - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_categories": ["user-identifier"]}}, - # Expected properties in the recorded event - { - "nothing-exciting": "hello, world", - "id": "test id", - "email": None, - }, - ), - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_categories": ["user-identifiable-information"]}}, - # Expected properties in the recorded event - { - "nothing-exciting": "hello, world", - "id": None, - "email": "test@testemail.com", - }, - ), - ( - # User configuration for allowed_schemas - { - SCHEMA_ID: { - "allowed_categories": [ - "user-identifier", - "user-identifiable-information", - ] - } - }, - # Expected properties in the recorded event - { - "nothing-exciting": "hello, world", - "id": "test id", - "email": "test@testemail.com", - }, - ), - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_properties": ["id"]}}, - # Expected properties in the recorded event - { - "nothing-exciting": "hello, world", - "id": "test id", - "email": None, - }, - ), - ( - # User configuration for allowed_schemas - { - SCHEMA_ID: { - "allowed_properties": ["id"], - "allowed_categories": ["user-identifiable-information"], - } - }, - # Expected properties in the recorded event - { - "nothing-exciting": "hello, world", - "id": "test id", - "email": "test@testemail.com", - }, - ), - ], -) -def test_allowed_schemas(schema, allowed_schemas, expected_output): - event_data = get_event_data(EVENT_DATA, schema, SCHEMA_ID, VERSION, allowed_schemas) - - # Verify that *exactly* the right properties are recorded. - assert expected_output == event_data diff --git a/tests/test_category_filtering.py b/tests/test_category_filtering.py deleted file mode 100644 index 55d1197..0000000 --- a/tests/test_category_filtering.py +++ /dev/null @@ -1,440 +0,0 @@ -import pytest - -from .utils import get_event_data - -SCHEMA_ID = "test.event" -VERSION = 1 - - -NESTED_CATEGORY_SCHEMA = { - "$id": SCHEMA_ID, - "title": "Test Event", - "version": VERSION, - "description": "Test Event.", - "type": "object", - "properties": { - "nothing-exciting": { - "description": "a property with nothing exciting happening", - "categories": ["unrestricted"], - "type": "string", - }, - "user": { - "description": "user", - "categories": ["user-identifier"], - "type": "object", - "properties": { - "email": { - "description": "email address", - "categories": ["user-identifiable-information"], - "type": "string", - }, - "id": {"description": "user ID", "type": "string"}, - }, - }, - }, -} - - -NESTED_EVENT_DATA = { - "nothing-exciting": "hello, world", - "user": { - "id": "test id", - "email": "test@testemail.com", - }, -} - - -NESTED_CATEGORY_TEST_CASES = [ - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_categories": []}}, - # Expected properties in the recorded event - {"nothing-exciting": "hello, world", "user": None}, - ), - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_categories": ["unrestricted"]}}, - # Expected properties in the recorded event - {"nothing-exciting": "hello, world", "user": None}, - ), - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_categories": ["user-identifier"]}}, - # Expected properties in the recorded event - {"nothing-exciting": "hello, world", "user": {"id": "test id", "email": None}}, - ), - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_categories": ["user-identifiable-information"]}}, - # Expected properties in the recorded event - {"nothing-exciting": "hello, world", "user": None}, - ), - ( - # User configuration for allowed_schemas - { - SCHEMA_ID: { - "allowed_categories": [ - "user-identifier", - "user-identifiable-information", - ] - } - }, - # Expected properties in the recorded event - { - "nothing-exciting": "hello, world", - "user": { - "id": "test id", - "email": "test@testemail.com", - }, - }, - ), - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_properties": ["user"]}}, - # Expected properties in the recorded event - { - "nothing-exciting": "hello, world", - "user": { - "id": "test id", - "email": "test@testemail.com", - }, - }, - ), -] - - -@pytest.mark.parametrize("allowed_schemas,expected_output", NESTED_CATEGORY_TEST_CASES) -def test_category_filtering(allowed_schemas, expected_output): - event_data = get_event_data( - NESTED_EVENT_DATA, NESTED_CATEGORY_SCHEMA, SCHEMA_ID, VERSION, allowed_schemas - ) - - # Verify that *exactly* the right properties are recorded. - assert expected_output == event_data - - -NESTED_CATEGORY_ARRAY_SCHEMA = { - "$id": SCHEMA_ID, - "title": "Test Event", - "version": VERSION, - "description": "Test Event.", - "type": "object", - "properties": { - "nothing-exciting": { - "description": "a property with nothing exciting happening", - "categories": ["unrestricted"], - "type": "string", - }, - "users": { - "description": "user", - "categories": ["user-identifier"], - "type": "array", - "items": { - "properties": { - "email": { - "description": "email address", - "categories": ["user-identifiable-information"], - "type": "string", - }, - "id": {"description": "user ID", "type": "string"}, - } - }, - }, - }, -} - - -ARRAY_EVENT_DATA = { - "nothing-exciting": "hello, world", - "users": [ - { - "id": "test id 0", - "email": "test0@testemail.com", - }, - { - "id": "test id 1", - "email": "test1@testemail.com", - }, - ], -} - - -@pytest.mark.parametrize( - "allowed_schemas,expected_output", - [ - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_categories": []}}, - # Expected properties in the recorded event - {"nothing-exciting": "hello, world", "users": None}, - ), - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_categories": ["unrestricted"]}}, - # Expected properties in the recorded event - {"nothing-exciting": "hello, world", "users": None}, - ), - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_categories": ["user-identifier"]}}, - # Expected properties in the recorded event - { - "nothing-exciting": "hello, world", - "users": [ - { - "id": "test id 0", - "email": None, - }, - { - "id": "test id 1", - "email": None, - }, - ], - }, - ), - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_categories": ["user-identifiable-information"]}}, - # Expected properties in the recorded event - {"nothing-exciting": "hello, world", "users": None}, - ), - ( - # User configuration for allowed_schemas - { - SCHEMA_ID: { - "allowed_categories": [ - "user-identifier", - "user-identifiable-information", - ] - } - }, - # Expected properties in the recorded event - { - "nothing-exciting": "hello, world", - "users": [ - { - "id": "test id 0", - "email": "test0@testemail.com", - }, - { - "id": "test id 1", - "email": "test1@testemail.com", - }, - ], - }, - ), - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_properties": ["users"]}}, - # Expected properties in the recorded event - { - "nothing-exciting": "hello, world", - "users": [ - { - "id": "test id 0", - "email": "test0@testemail.com", - }, - { - "id": "test id 1", - "email": "test1@testemail.com", - }, - ], - }, - ), - ], -) -def test_array_category_filtering(allowed_schemas, expected_output): - event_data = get_event_data( - ARRAY_EVENT_DATA, - NESTED_CATEGORY_ARRAY_SCHEMA, - SCHEMA_ID, - VERSION, - allowed_schemas, - ) - - # Verify that *exactly* the right properties are recorded. - assert expected_output == event_data - - -ADDITIONAL_PROP_EVENT_DATA = { - "nothing-exciting": "hello, world", - "user": { - "id": "test id", - "email": "test@testemail.com", - }, - "extra": 1234, -} - - -@pytest.mark.parametrize( - "allowed_schemas,expected_output", - [ - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_categories": []}}, - # Expected properties in the recorded event - {"nothing-exciting": "hello, world", "user": None, "extra": None}, - ), - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_categories": ["unrestricted"]}}, - # Expected properties in the recorded event - {"nothing-exciting": "hello, world", "user": None, "extra": None}, - ), - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_categories": ["user-identifier"]}}, - # Expected properties in the recorded event - { - "nothing-exciting": "hello, world", - "user": {"id": "test id", "email": None}, - "extra": None, - }, - ), - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_categories": ["user-identifiable-information"]}}, - # Expected properties in the recorded event - {"nothing-exciting": "hello, world", "user": None, "extra": None}, - ), - ( - # User configuration for allowed_schemas - { - SCHEMA_ID: { - "allowed_categories": [ - "user-identifier", - "user-identifiable-information", - ] - } - }, - # Expected properties in the recorded event - { - "nothing-exciting": "hello, world", - "user": { - "id": "test id", - "email": "test@testemail.com", - }, - "extra": None, - }, - ), - ( - # User configuration for allowed_schemas - {SCHEMA_ID: {"allowed_properties": ["user"]}}, - # Expected properties in the recorded event - { - "nothing-exciting": "hello, world", - "user": { - "id": "test id", - "email": "test@testemail.com", - }, - "extra": None, - }, - ), - ], -) -def test_no_additional_properties(allowed_schemas, expected_output): - event_data = get_event_data( - ADDITIONAL_PROP_EVENT_DATA, - NESTED_CATEGORY_SCHEMA, - SCHEMA_ID, - VERSION, - allowed_schemas, - ) - - # Verify that *exactly* the right properties are recorded. - assert expected_output == event_data - - -NESTED_CATEGORY_SCHEMA_ALLOF = { - "$id": SCHEMA_ID, - "title": "Test Event", - "version": VERSION, - "description": "Test Event.", - "type": "object", - "properties": { - "nothing-exciting": { - "description": "a property with nothing exciting happening", - "categories": ["unrestricted"], - "type": "string", - }, - "user": { - "description": "user", - "categories": ["user-identifier"], - "type": "object", - "allOf": [ - { - "properties": { - "email": { - "description": "email address", - "categories": ["user-identifiable-information"], - "type": "string", - } - } - }, - {"properties": {"id": {"description": "user ID", "type": "string"}}}, - ], - }, - }, -} - - -NESTED_CATEGORY_SCHEMA_REF = { - "$id": SCHEMA_ID, - "title": "Test Event", - "version": VERSION, - "description": "Test Event.", - "type": "object", - "properties": { - "nothing-exciting": { - "description": "a property with nothing exciting happening", - "categories": ["unrestricted"], - "type": "string", - }, - "user": { - "description": "user", - "categories": ["user-identifier"], - "type": "object", - "$ref": "#/definitions/properties", - }, - }, - "definitions": { - "properties": { - "properties": { - "email": { - "description": "email address", - "categories": ["user-identifiable-information"], - "type": "string", - }, - "id": {"description": "user ID", "type": "string"}, - } - } - }, -} - - -@pytest.mark.parametrize("allowed_schemas,expected_output", NESTED_CATEGORY_TEST_CASES) -def test_category_filtering_ref(allowed_schemas, expected_output): - event_data = get_event_data( - NESTED_EVENT_DATA, - NESTED_CATEGORY_SCHEMA_REF, - SCHEMA_ID, - VERSION, - allowed_schemas, - ) - - # Verify that *exactly* the right properties are recorded. - assert expected_output == event_data - - -@pytest.mark.parametrize("allowed_schemas,expected_output", NESTED_CATEGORY_TEST_CASES) -def test_category_filtering_allof(allowed_schemas, expected_output): - event_data = get_event_data( - NESTED_EVENT_DATA, - NESTED_CATEGORY_SCHEMA_ALLOF, - SCHEMA_ID, - VERSION, - allowed_schemas, - ) - - # Verify that *exactly* the right properties are recorded. - assert expected_output == event_data diff --git a/tests/test_eventlog.py b/tests/test_eventlog.py deleted file mode 100644 index 0e77fd3..0000000 --- a/tests/test_eventlog.py +++ /dev/null @@ -1,52 +0,0 @@ -import logging - -import pytest -from traitlets import TraitError -from traitlets.config.loader import PyFileConfigLoader - -from jupyter_events.logger import EventLogger - -GOOD_CONFIG = """ -import logging - -c.EventLogger.handlers = [ - logging.StreamHandler() -] -""" - -BAD_CONFIG = """ -import logging - -c.EventLogger.handlers = [ - 0 -] -""" - - -def get_config_from_file(path, content): - # Write config file - filename = "config.py" - config_file = path / filename - config_file.write_text(content) - - # Load written file. - loader = PyFileConfigLoader(filename, path=str(path)) - cfg = loader.load_config() - return cfg - - -def test_good_config_file(tmp_path): - cfg = get_config_from_file(tmp_path, GOOD_CONFIG) - - # Pass config to EventLogger - e = EventLogger(config=cfg) - - assert len(e.handlers) > 0 - assert isinstance(e.handlers[0], logging.Handler) - - -def test_bad_config_file(tmp_path): - cfg = get_config_from_file(tmp_path, BAD_CONFIG) - - with pytest.raises(TraitError): - EventLogger(config=cfg) diff --git a/tests/test_register_schema.py b/tests/test_logger.py similarity index 57% rename from tests/test_register_schema.py rename to tests/test_logger.py index ef64175..0962332 100644 --- a/tests/test_register_schema.py +++ b/tests/test_logger.py @@ -5,9 +5,58 @@ import jsonschema import pytest -from ruamel.yaml import YAML +from jsonschema.exceptions import ValidationError +from traitlets import TraitError +from traitlets.config.loader import PyFileConfigLoader +from jupyter_events import yaml from jupyter_events.logger import EventLogger +from jupyter_events.schema_registry import SchemaRegistryException + +GOOD_CONFIG = """ +import logging + +c.EventLogger.handlers = [ + logging.StreamHandler() +] +""" + +BAD_CONFIG = """ +import logging + +c.EventLogger.handlers = [ + 0 +] +""" + + +def get_config_from_file(path, content): + # Write config file + filename = "config.py" + config_file = path / filename + config_file.write_text(content) + + # Load written file. + loader = PyFileConfigLoader(filename, path=str(path)) + cfg = loader.load_config() + return cfg + + +def test_good_config_file(tmp_path): + cfg = get_config_from_file(tmp_path, GOOD_CONFIG) + + # Pass config to EventLogger + e = EventLogger(config=cfg) + + assert len(e.handlers) > 0 + assert isinstance(e.handlers[0], logging.Handler) + + +def test_bad_config_file(tmp_path): + cfg = get_config_from_file(tmp_path, BAD_CONFIG) + + with pytest.raises(TraitError): + EventLogger(config=cfg) def test_register_invalid_schema(): @@ -15,8 +64,8 @@ def test_register_invalid_schema(): Invalid JSON Schemas should fail registration """ el = EventLogger() - with pytest.raises(jsonschema.SchemaError): - el.register_schema( + with pytest.raises(ValidationError): + el.register_event_schema( { # Totally invalid "properties": True @@ -31,11 +80,11 @@ def test_missing_required_properties(): They aren't required by JSON Schema itself """ el = EventLogger() - with pytest.raises(ValueError): - el.register_schema({"properties": {}}) + with pytest.raises(ValidationError): + el.register_event_schema({"properties": {}}) - with pytest.raises(ValueError): - el.register_schema( + with pytest.raises(ValidationError): + el.register_event_schema( { "$id": "something", "$version": 1, # This should been 'version' @@ -43,25 +92,6 @@ def test_missing_required_properties(): ) -def test_reserved_properties(): - """ - User schemas can't have properties starting with __ - - These are reserved - """ - el = EventLogger() - with pytest.raises(ValueError): - el.register_schema( - { - "$id": "test/test", - "version": 1, - "properties": { - "__fail__": {"type": "string", "categories": ["unrestricted"]}, - }, - } - ) - - def test_timestamp_override(): """ Simple test for overriding timestamp @@ -70,33 +100,28 @@ def test_timestamp_override(): "$id": "test/test", "version": 1, "properties": { - "something": {"type": "string", "categories": ["unrestricted"]}, + "something": { + "type": "string", + "title": "test", + }, }, } output = io.StringIO() handler = logging.StreamHandler(output) el = EventLogger(handlers=[handler]) - el.register_schema(schema) - el.allowed_schemas = ["test/test"] + el.register_event_schema(schema) timestamp_override = datetime.utcnow() - timedelta(days=1) - el.record_event( - "test/test", - 1, - { - "something": "blah", - }, - timestamp_override=timestamp_override, + el.emit( + "test/test", 1, {"something": "blah"}, timestamp_override=timestamp_override ) handler.flush() - event_capsule = json.loads(output.getvalue()) - assert event_capsule["__timestamp__"] == timestamp_override.isoformat() + "Z" -def test_record_event(): +def test_emit(): """ Simple test for emitting valid events """ @@ -104,17 +129,19 @@ def test_record_event(): "$id": "test/test", "version": 1, "properties": { - "something": {"type": "string", "categories": ["unrestricted"]}, + "something": { + "type": "string", + "title": "test", + }, }, } output = io.StringIO() handler = logging.StreamHandler(output) el = EventLogger(handlers=[handler]) - el.register_schema(schema) - el.allowed_schemas = ["test/test"] + el.register_event_schema(schema) - el.record_event( + el.emit( "test/test", 1, { @@ -136,120 +163,103 @@ def test_record_event(): } -def test_register_schema_file(tmp_path): +def test_register_event_schema(tmp_path): """ Register schema from a file """ schema = { "$id": "test/test", "version": 1, + "type": "object", "properties": { - "something": {"type": "string", "categories": ["unrestricted"]}, + "something": { + "type": "string", + "title": "test", + }, }, } el = EventLogger() - - yaml = YAML(typ="safe") - schema_file = tmp_path.joinpath("schema.yml") yaml.dump(schema, schema_file) - el.register_schema_file(str(schema_file)) + el.register_event_schema(schema_file) + assert ("test/test", 1) in el.schemas - assert schema in el.schemas.values() - -def test_register_schema_file_object(tmp_path): +def test_register_event_schema_object(tmp_path): """ Register schema from a file """ schema = { "$id": "test/test", "version": 1, + "type": "object", "properties": { - "something": {"type": "string", "categories": ["unrestricted"]}, + "something": { + "type": "string", + "title": "test", + }, }, } el = EventLogger() - - yaml = YAML(typ="safe") - schema_file = tmp_path.joinpath("schema.yml") yaml.dump(schema, schema_file) - with open(str(schema_file)) as f: - el.register_schema_file(f) + el.register_event_schema(schema_file) - assert schema in el.schemas.values() + assert ("test/test", 1) in el.schemas -def test_allowed_schemas(): - """ - Events should be emitted only if their schemas are allowed - """ - schema = { - "$id": "test/test", - "version": 1, - "properties": { - "something": {"type": "string", "categories": ["unrestricted"]}, - }, - } - - output = io.StringIO() - handler = logging.StreamHandler(output) - el = EventLogger(handlers=[handler]) - # Just register schema, but do not mark it as allowed - el.register_schema(schema) - - el.record_event( - "test/test", - 1, - { - "something": "blah", - }, - ) - handler.flush() - - assert output.getvalue() == "" - - -def test_record_event_badschema(): +def test_emit_badschema(): """ Fail fast when an event doesn't conform to its schema """ schema = { "$id": "test/test", "version": 1, + "type": "object", "properties": { - "something": {"type": "string", "categories": ["unrestricted"]}, - "status": {"enum": ["success", "failure"], "categories": ["unrestricted"]}, + "something": { + "type": "string", + "title": "test", + }, + "status": { + "enum": ["success", "failure"], + "title": "test 2", + }, }, } el = EventLogger(handlers=[logging.NullHandler()]) - el.register_schema(schema) + el.register_event_schema(schema) el.allowed_schemas = ["test/test"] with pytest.raises(jsonschema.ValidationError): - el.record_event( - "test/test", 1, {"something": "blah", "status": "hi"} # 'not-in-enum' - ) + el.emit("test/test", 1, {"something": "blah", "status": "hi"}) # 'not-in-enum' def test_unique_logger_instances(): schema0 = { "$id": "test/test0", "version": 1, + "type": "object", "properties": { - "something": {"type": "string", "categories": ["unrestricted"]}, + "something": { + "type": "string", + "title": "test", + }, }, } schema1 = { "$id": "test/test1", "version": 1, + "type": "object", "properties": { - "something": {"type": "string", "categories": ["unrestricted"]}, + "something": { + "type": "string", + "title": "test", + }, }, } @@ -259,21 +269,21 @@ def test_unique_logger_instances(): handler1 = logging.StreamHandler(output1) el0 = EventLogger(handlers=[handler0]) - el0.register_schema(schema0) + el0.register_event_schema(schema0) el0.allowed_schemas = ["test/test0"] el1 = EventLogger(handlers=[handler1]) - el1.register_schema(schema1) + el1.register_event_schema(schema1) el1.allowed_schemas = ["test/test1"] - el0.record_event( + el0.emit( "test/test0", 1, { "something": "blah", }, ) - el1.record_event( + el1.emit( "test/test1", 1, { @@ -312,20 +322,28 @@ def test_register_duplicate_schemas(): schema0 = { "$id": "test/test", "version": 1, + "type": "object", "properties": { - "something": {"type": "string", "categories": ["unrestricted"]}, + "something": { + "type": "string", + "title": "test", + }, }, } schema1 = { "$id": "test/test", "version": 1, + "type": "object", "properties": { - "somethingelse": {"type": "string", "categories": ["unrestricted"]}, + "something": { + "type": "string", + "title": "test", + }, }, } el = EventLogger() - el.register_schema(schema0) - with pytest.raises(ValueError): - el.register_schema(schema1) + el.register_event_schema(schema0) + with pytest.raises(SchemaRegistryException): + el.register_event_schema(schema1) diff --git a/tests/test_schema.py b/tests/test_schema.py new file mode 100644 index 0000000..2211bf5 --- /dev/null +++ b/tests/test_schema.py @@ -0,0 +1,60 @@ +import pytest +from jsonschema.exceptions import ValidationError + +from jupyter_events import yaml +from jupyter_events.schema import EventSchema, EventSchemaLoadingError +from jupyter_events.validators import validate_schema + +from .utils import SCHEMA_PATH + +BAD_SCHEMAS = [ + ["reserved-property.yaml", "Properties starting with 'dunder'"], + ["nested-reserved-property.yaml", "Properties starting with 'dunder'"], +] + + +@pytest.mark.parametrize("schema_file,validation_error_msg", BAD_SCHEMAS) +def test_bad_validations(schema_file, validation_error_msg): + """ + Validation fails because the schema is missing + a redactionPolicies field. + """ + # Read the schema file + with open(SCHEMA_PATH / "bad" / schema_file) as f: + schema = yaml.loads(f) + # Assert that the schema files for a known reason. + with pytest.raises(ValidationError) as err: + validate_schema(schema) + assert validation_error_msg in err.value.message + + +GOOD_SCHEMAS = ["array.yaml", "nested-array.yaml", "basic.yaml"] + + +@pytest.mark.parametrize("schema_file", GOOD_SCHEMAS) +def test_good_validations(schema_file): + """ + Validation fails because the schema is missing + a redactionPolicies field. + """ + # Read the schema file + with open(SCHEMA_PATH / "good" / schema_file) as f: + schema = yaml.loads(f) + # Assert that the schema files for a known reason. + validate_schema(schema) + + +@pytest.mark.parametrize( + "schema", + [ + # Non existent paths + "non-existent-file.yml", + "non/existent/path", + "non/existent/path/file.yaml", + # Valid yaml string, but not a valid object + "random string", + ], +) +def test_loading_string_error(schema): + with pytest.raises(EventSchemaLoadingError): + EventSchema(schema) diff --git a/tests/test_traits.py b/tests/test_traits.py index 1740f18..a6878f2 100644 --- a/tests/test_traits.py +++ b/tests/test_traits.py @@ -3,7 +3,7 @@ import pytest from traitlets import HasTraits, TraitError -from jupyter_events.traits import Handlers, SchemaOptions +from jupyter_events.traits import Handlers class HasHandlers(HasTraits): @@ -27,46 +27,3 @@ def test_mixed_handlers_values(): handlers = [logging.NullHandler(), 1] with pytest.raises(TraitError): HasHandlers(handlers=handlers) - - -class HasSchemaOptions(HasTraits): - schema_options = SchemaOptions({}, allow_none=True) - - -@pytest.mark.parametrize( - "schema_options", - [ - # schema_options can be a list of schema_names. In this case, - # the SchemaOptions trait will turn this list into a dictionary - # with the list items as keys the values as empty dictionaries. - ["schema_name_1", "schema_name_2"], - # Empty nested config are okay. - {"schema_name_1": {}}, - # Nested config with empty values is okay too. - {"schema_name_1": {"allowed_categories": []}}, - # Test complete config for good measure. - {"schema_name_1": {"allowed_categories": ["value"]}}, - # Test multiple values. - {"schema_name_1": {"allowed_categories": ["value"]}, "schema_name_2": {}}, - ], -) -def test_good_schema_options(schema_options): - obj = HasSchemaOptions(schema_options=schema_options) - assert type(obj.schema_options) == dict - - -@pytest.mark.parametrize( - "schema_options", - [ - # Raise an error if Schema Options has unknown attribute. - {"schema_name_1": {"unknown_attribute": []}}, - # Test multiple values. - { - "schema_name_1": {"allowed_categories": ["value"]}, - "schema_name_2": {"unknown_attribute": []}, - }, - ], -) -def test_bad_schema_options(schema_options): - with pytest.raises(TraitError): - HasSchemaOptions(schema_options=schema_options) diff --git a/tests/utils.py b/tests/utils.py index 0c2362c..2286364 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,22 +1,25 @@ import io import json import logging +import pathlib from copy import deepcopy from jupyter_events.logger import EventLogger +SCHEMA_PATH = pathlib.Path(__file__).parent / "schemas" -def get_event_data(event, schema, schema_id, version, allowed_schemas): + +def get_event_data(event, schema, schema_id, version, unredacted_policies): sink = io.StringIO() # Create a handler that captures+records events with allowed tags. handler = logging.StreamHandler(sink) - e = EventLogger(handlers=[handler], allowed_schemas=allowed_schemas) + e = EventLogger(handlers=[handler], unredacted_policies=unredacted_policies) e.register_schema(schema) # Record event and read output - e.record_event(schema_id, version, deepcopy(event)) + e.emit(schema_id, version, deepcopy(event)) recorded_event = json.loads(sink.getvalue()) return {