Skip to content

Commit

Permalink
validate JSON types/subtypes (#1647)
Browse files Browse the repository at this point in the history
as our default is `code.custom`, which is not yet part of the spec,
we get a lot of warnings with this.

Closes #1305
  • Loading branch information
beniwohli authored Oct 13, 2022
1 parent 26c2844 commit 6097d44
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 3 deletions.
2 changes: 1 addition & 1 deletion elasticapm/instrumentation/packages/asyncio/aiopg.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ async def call(self, module, method, wrapped, instance, args, kwargs):
else:
raise AssertionError("call from uninstrumented method")
async with async_capture_span(
name, leaf=True, span_type="db", span_subtype="postgres", span_action=action, extra=context
name, leaf=True, span_type="db", span_subtype="postgresql", span_action=action, extra=context
):
return await wrapped(*args, **kwargs)

Expand Down
47 changes: 46 additions & 1 deletion tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import codecs
import gzip
import io
import itertools
import json
import logging
import logging.handlers
Expand All @@ -40,8 +41,10 @@
import sys
import tempfile
import time
import warnings
import zlib
from collections import defaultdict
from typing import Optional

import jsonschema
import mock
Expand Down Expand Up @@ -70,6 +73,9 @@
METRICSET_SCHEMA = os.path.join(cur_dir, "upstream", "json-specs", "metricset.json")
METADATA_SCHEMA = os.path.join(cur_dir, "upstream", "json-specs", "metadata.json")

with open(os.path.join(cur_dir, "upstream", "json-specs", "span_types.json")) as f:
SPAN_TYPES = json.load(f)


with codecs.open(ERRORS_SCHEMA, encoding="utf8") as errors_json, codecs.open(
TRANSACTIONS_SCHEMA, encoding="utf8"
Expand Down Expand Up @@ -113,6 +119,33 @@
}


def validate_span_type_subtype(item: dict) -> Optional[str]:
"""
Validate span type/subtype against spec.
At first, only warnings are issued. At a later point, it should return the message as string
which will cause a validation error.
"""
if item["type"] not in SPAN_TYPES:
warnings.warn(f"Span type \"{item['type']}\" not found in JSON spec", UserWarning)
return
span_type = SPAN_TYPES[item["type"]]
subtypes = span_type.get("subtypes", [])
if not subtypes and item["subtype"] and not span_type.get("allow_unlisted_subtype", False):
warnings.warn(
f"Span type \"{item['type']}\" has no subtypes, but subtype \"{item['subtype']}\" is set", UserWarning
)
return
if item["subtype"] not in SPAN_TYPES[item["type"]].get("subtypes", []):
if not SPAN_TYPES[item["type"]].get("allow_unlisted_subtype", False):
warnings.warn(f"Subtype \"{item['subtype']}\" not allowed for span type \"{item['type']}\"", UserWarning)
return
else:
if "python" not in subtypes.get(item["subtype"], {}).get("__used_by", []):
warnings.warn(f"\"{item['type']}.{item['subtype']}\" not marked as used by Python", UserWarning)
return None


class ValidatingWSGIApp(ContentServer):
def __init__(self, **kwargs):
self.skip_validate = kwargs.pop("skip_validate", False)
Expand Down Expand Up @@ -147,6 +180,11 @@ def __call__(self, environ, start_response):
except jsonschema.ValidationError as e:
fail += 1
content += "/".join(map(str, e.absolute_schema_path)) + ": " + e.message + "\n"
if item_type == "span":
result = validate_span_type_subtype(item)
if result:
fail += 1
content += result
code = 202 if not fail else 400
response = Response(status=code)
response.headers.clear()
Expand Down Expand Up @@ -199,7 +237,10 @@ def elasticapm_client(request):
sys.excepthook = original_exceptionhook
execution_context.set_transaction(None)
execution_context.unset_span(clear_all=True)
assert not client._transport.validation_errors
if client._transport.validation_errors:
pytest.fail(
"Validation errors:" + "\n".join(*itertools.chain(v for v in client._transport.validation_errors.values()))
)


@pytest.fixture()
Expand Down Expand Up @@ -336,6 +377,10 @@ def queue(self, event_type, data, flush=False):
validator.validate(data)
except jsonschema.ValidationError as e:
self.validation_errors[event_type].append(e.message)
if event_type == "span":
result = validate_span_type_subtype(data)
if result:
self.validation_errors[event_type].append(result)

def start_thread(self, pid=None):
# don't call the parent method, but the one from ThreadManager
Expand Down
2 changes: 1 addition & 1 deletion tests/instrumentation/asyncio_tests/aiopg_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ async def test_select_sleep(instrument, cursor, elasticapm_client):
assert 100 < span["duration"] < 110
assert transaction["id"] == span["transaction_id"]
assert span["type"] == "db"
assert span["subtype"] == "postgres"
assert span["subtype"] == "postgresql"
assert span["action"] == "query"
assert span["sync"] == False

Expand Down
65 changes: 65 additions & 0 deletions tests/utils/span_type_subtype_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# BSD 3-Clause License
#
# Copyright (c) 2022, Elasticsearch BV
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import pytest

import elasticapm


def test_span_type_not_found(elasticapm_client):
elasticapm_client.begin_transaction("test")
with pytest.warns(UserWarning, match='Span type "bar" not found in JSON spec'):
with elasticapm.capture_span("foo", span_type="bar"):
pass
elasticapm_client.end_transaction("test")


def test_span_type_no_subtypes(elasticapm_client):
elasticapm_client.begin_transaction("test")
with pytest.warns(UserWarning, match='Span type "process" has no subtypes, but subtype "foo" is set'):
with elasticapm.capture_span("foo", span_type="process", span_subtype="foo"):
pass
elasticapm_client.end_transaction("test")


def test_span_type_subtype_not_allowed(elasticapm_client):
elasticapm_client.begin_transaction("test")
with pytest.warns(UserWarning, match='Subtype "anonexistingdb" not allowed for span type "db"'):
with elasticapm.capture_span("foo", span_type="db", span_subtype="anonexistingdb"):
pass
elasticapm_client.end_transaction("test")


def test_span_type_not_used_by_python(elasticapm_client):
elasticapm_client.begin_transaction("test")
with pytest.warns(UserWarning, match='"json.parse" not marked as used by Python'):
with elasticapm.capture_span("foo", span_type="json", span_subtype="parse"):
pass
elasticapm_client.end_transaction("test")

0 comments on commit 6097d44

Please sign in to comment.