Replace OpenTracing extension with one for OpenTelemetry? #649

ajbouh · 2021-08-16T04:15:07Z

https://opentracing.io/ now carries a banner announcing that it's been supplanted by OpenTelemetry

Are you interested in a PR that adds OpenTelemetry based tracing?

rafalp · 2021-08-24T17:07:00Z

Yup! That would be helpful, thanks!

kamilglod · 2021-09-02T10:22:59Z

here is my basic implementation based on OpenTracing:

from copy import deepcopy
from functools import partial
from inspect import isawaitable
from typing import Any, Callable, Dict, Optional

from graphql import GraphQLResolveInfo
from opentelemetry import trace
from opentelemetry.trace.status import Status, StatusCode
from ariadne.types import Extension, Resolver
from ariadne.contrib.tracing.utils import format_path, should_trace

ArgFilter = Callable[[Dict[str, Any], GraphQLResolveInfo], Dict[str, Any]]


class OpentelemetryExtension(Extension):
    _arg_filter: Optional[ArgFilter]
    _tracer: trace.Tracer

    def __init__(
        self,
        *,
        arg_filter: Optional[ArgFilter] = None,
        tracer_provider: Optional[trace.TracerProvider] = None,
    ):
        self._arg_filter = arg_filter
        self._tracer = trace.get_tracer(__name__, "0.1.0", tracer_provider)

    async def resolve(self, next_: Resolver, parent: Any, info: GraphQLResolveInfo, **kwargs):
        if not should_trace(info):
            result = next_(parent, info, **kwargs)
            if isawaitable(result):
                result = await result
            return result

        with self._tracer.start_as_current_span(info.field_name, kind=trace.SpanKind.SERVER) as span:
            span.set_attribute("graphql.parentType", info.parent_type.name)

            graphql_path = ".".join(map(str, format_path(info.path)))  # pylint: disable=bad-builtin
            span.set_attribute("graphql.path", graphql_path)

            if kwargs:
                filtered_kwargs = self.filter_resolver_args(kwargs, info)
                for kwarg, value in filtered_kwargs.items():
                    span.set_attribute(f"graphql.param.{kwarg}", value)

            result = next_(parent, info, **kwargs)
            if isawaitable(result):
                result = await result

            span.set_status(Status(StatusCode.OK))
            return result

    def filter_resolver_args(self, args: Dict[str, Any], info: GraphQLResolveInfo) -> Dict[str, Any]:
        if not self._arg_filter:
            return args

        return self._arg_filter(deepcopy(args), info)


def opentracing_extension(*, arg_filter: Optional[ArgFilter] = None):
    return partial(OpentelemetryExtension, arg_filter=arg_filter)

nilansaha · 2022-01-27T01:18:15Z

I would suggest extracting the context headers from incoming request and adding that to the trace as well. Very useful for context propagating across services. My team uses it with a federated GQL setup.
Essentially changing

with self._tracer.start_as_current_span(info.field_name, kind=trace.SpanKind.SERVER) as span:

to something like

from opentelemetry.propagate import extract

with self._tracer.start_as_current_span(info.field_name, kind=trace.SpanKind.SERVER, context=extract(request.headers)) as span:

You have to change request.headers to wherever the incoming requests header is stored in the GraphQLResolveInfo. Sorry I dont remember of the top of my head.

process0 · 2022-01-27T01:28:09Z

It's info.context["request"].headers

        ctx = extract(
            carrier=info.context["request"].headers
        )

        with self._tracer.start_as_current_span(
            info.field_name, kind=trace.SpanKind.SERVER, context=ctx
        ) as span:

nilansaha · 2022-01-27T01:33:46Z

Gotta love open-source. Everyone contributing and suddenly we have something super useful.

rmorshea · 2022-06-30T03:15:13Z

This implementation by @kamilglod doesn't seem quite right. The spans are not correctly grouped together since there's not root span created at request_started and closed at request_finished as in OpenTracing.

rmorshea · 2022-06-30T03:20:49Z

Here's my corrected implementation:

from __future__ import annotations

from copy import deepcopy
from functools import partial
from inspect import isawaitable
from typing import Any, Callable

from ariadne.contrib.tracing.utils import format_path, should_trace
from ariadne.types import Extension, Resolver
from graphql import GraphQLResolveInfo
from opentelemetry import trace
from opentelemetry.propagate import extract
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.resources import get_aggregated_resources
from opentelemetry.sdk.trace.export import (
    ConsoleSpanExporter,
    SimpleSpanProcessor,
)
from opentelemetry.trace.status import Status, StatusCode

def opentracing_extension(*, arg_filter: ArgFilter | None = None):

    # REPLACE THESE BASED ON YOUR ENVIRONMENT
    span_processor = SimpleSpanProcessor(ConsoleSpanExporter())
    resources = []
    id_generator = None

    return partial(
        OpentelemetryExtension,
        arg_filter=arg_filter,
        tracer_provider=TracerProvider(
            # Processes traces in batches as opposed to immediately one after the other
            active_span_processor=span_processor,
            resource=get_aggregated_resources(resources),
            id_generator=id_generator,
        ),
    )


ArgFilter = Callable[[dict[str, Any], GraphQLResolveInfo], dict[str, Any]]


class OpenTelemetryExtension(Extension):
    """OpenTelemetry extension for Ariadne"""

    _root_span: trace.Span

    def __init__(
        self,
        *,
        arg_filter: _ArgFilter | None = None,
        tracer_provider: trace.TracerProvider | None = None,
    ):
        self._arg_filter = arg_filter
        self._tracer = trace.get_tracer(__name__, "0.1.0", tracer_provider)

    def request_started(self, context: Any) -> None:
        """Begin the root span for this request and add any request headers as context"""
        self._root_span_manager = self._tracer.start_as_current_span(
            _graphql_span_name(),
            kind=trace.SpanKind.SERVER,
            context=extract(carrier=context["request"].headers),
        )
        self._root_span = self._root_span_manager.__enter__()

    def request_finished(self, context: Any) -> None:
        """Close the root span"""
        self._root_span_manager.__exit__(Exception, Exception(), None)

    async def resolve(
        self, next_: Resolver, parent: Any, info: GraphQLResolveInfo, **kwargs: Any
    ) -> Any:
        """Begin a new span at each query resolution step"""
        if not should_trace(info):
            result = next_(parent, info, **kwargs)
            if isawaitable(result):
                result = await result
            return result

        try:
            with self._tracer.start_as_current_span(_graphql_span_name(info)) as span:
                for kwarg, value in self.filter_args(kwargs, info).items():
                    span.set_attribute(f"graphql.param.{kwarg}", value)

                result = next_(parent, info, **kwargs)
                if isawaitable(result):
                    result = await result

                span.set_status(Status(StatusCode.OK))
                return result
        except Exception:
            self._root_span.set_status(Status(StatusCode.ERROR))

    def filter_args(self, args: dict[str, Any], info: GraphQLResolveInfo) -> dict[str, Any]:
        return self._arg_filter(args, info) if self._arg_filter else args

artursmet · 2022-07-06T09:11:13Z

@rmorshea That's a great snippet! Could you open a pull request and place this extension in ariadne.contrib?

rmorshea · 2022-07-08T01:20:03Z

There's still little problems I'm discovering. For example span.set_attribute(f"graphql.param.{kwarg}", value) doesn't work because value might be an invalid type. Will try to put something up when I've confirmed it all works as expected.

dacevedo12 · 2023-01-31T23:36:52Z

I've used this approach in production but unfortunately had to back down due to a negative impact in performance, even for simple things like the introspection query.

Beware of middlewares and extensions #708

dacevedo12 · 2023-01-31T23:46:29Z

I'm unsure whether the aforementioned performance issue is specific to Ariadne though.

Looking at other frameworks built on top of graphql-python don't seem to have issues with it
https://github.com/strawberry-graphql/strawberry/blob/main/strawberry/extensions/tracing/opentelemetry.py

rafalp · 2023-02-02T14:14:31Z

@dacevedo12 I don't know how they don't have issues with it if its same query executor underneath 🤔

Maybe the skip resolver logic at Ariadne is bogged? I'll be more than happy to investigate this properly when I get 0.18 out of the door sometime next week. I know we will need to expand our benchmarks suite first so we have big and repeatable datasets and schema closer to what real apps are doing.

dacevedo12 · 2023-02-02T15:53:22Z

It's weird indeed. After debugging a bit I've managed to track it down to the get_field_resolver function (https://github.com/graphql-python/graphql-core/blob/55547d8cb4e4abba9e45a8f8e603f9303e351aa3/src/graphql/execution/middleware.py#L53), which is called from https://github.com/graphql-python/graphql-core/blob/072c59e89052a6b28e0c3c8b2710afc885fce2d0/src/graphql/execution/execute.py#L535

That codepath made the introspection query take more than 2 seconds in my schema, which usually takes around 200-600ms.

Will try to put together a reproducible demo this weekend

rafalp · 2023-02-21T13:12:56Z

@dacevedo12 any luck with a demo? I'm currently releasing Ariadne 0.18 where default middleware manager can be swapped for custom one. But I don't know yet if there's much space for performance improvement there.

dacevedo12 · 2023-02-26T00:43:43Z

Sorry for the delay, it's been a busy couple of weeks. Yes, I've managed to put together a minimal reproducible demo here:

https://github.com/dacevedo12/ariadne-perf-demo

Changing extensions=[] to extensions=[DemoExtension] turns into a 2x increase in the response time for the introspection query

Things I've noticed so far:

It happens with ariadne 0.17.1 and 0.18.1
It happens with and without a framework (tried with starlette and bare ariadne asgi app)
It happens with uvloop and asyncio eventloops
It does not happen with sync execution, which kinda points to Significant performance hit when using async resolvers graphql-python/graphql-core#190

Cito · 2023-02-26T11:56:07Z

Thanks @dacevedo12. We should separate the performance degradation due to middleware from that due to async execution. I did not look deeply into this, but it looks like Ariadne's default exensions are async middleware, so they may turn every resolver into an async one.

Note that middleware is one of the features that do not exist in GraphQL.js and was taken over from GraphQL-core 2 in order to stay compatible, not because it is a good solution except for a few special cases. I can well imagine that it causes a performant loss because it is run on every resolver, and this gets worse when every resolver is made async due to the issues mentioned above.

dacevedo12 · 2023-02-27T15:49:12Z

Yeah, it's not middlewares but the way those are implemented that turns all resolvers into async ones, which in the current state of affairs with graphql-core means a significant slowdown. The dots have been connected 🙌

In the meantime, it seems unfeasible to implement this kind of tracing properly with async python... even things like https://github.com/DataDog/dd-trace-py/blob/1.x/ddtrace/contrib/graphql/patch.py rely on middleware, but it doesn't bother awaiting the result, which yields inaccurate span durations

dacevedo12 · 2023-03-10T16:17:37Z

I've figured that you can have a def resolve( method and return a wrapped coroutine to prevent this issue:

with self._tracer.start_as_current_span(
    "graphql.execute",
    attributes=kwargs,
    name=path,
):
    result = super().resolve(next_, obj, info, **kwargs)

    if is_awaitable(result):

        async def await_result() -> Any:
            with self._tracer.start_as_current_span(
                "graphql.execute.async"
            ):
                return await result

        return await_result()
    return result

Note that this approach will create another span.

Still, this is an ariadne-specific instrumentation though. I'll be working on a graphql-core instrumentor for opentelemetry (that doesn't rely on middlewares) so that all gql python libraries can benefit from it

Will link the PR once I have it

dacevedo12 · 2023-03-13T23:15:04Z

Here it is open-telemetry/opentelemetry-python-contrib#1719

rafalp · 2023-06-02T14:28:44Z

Just a quick update that I've got Open Telemetry working with Ariadne 0.19 and I am hoping to wrap up extensions work soonish so Ariadne 0.20 with both OpenTelemetry and OpenTracing extensions seeing performance improvements based on feedback from @Cito <3

dacevedo12 · 2023-06-03T20:26:28Z

Looking great, kudos @rafalp

My attempt at submitting a gql core instrumentor wasn't successful. They say they're short staffed and prefer each library adding OTel support on their own.

rafalp added enhancement New feature or request help wanted Extra attention is needed labels Aug 24, 2021

rmorshea mentioned this issue May 27, 2022

Support For Ariadne aws/aws-xray-sdk-python#339

Open

dacevedo12 mentioned this issue Jan 31, 2023

Graphql query resolution gets very slow with middlewares. #708

Closed

dacevedo12 mentioned this issue Feb 26, 2023

Significant performance hit when using async resolvers graphql-python/graphql-core#190

Open

dacevedo12 mentioned this issue Feb 27, 2023

GraphQL instrumentation open-telemetry/opentelemetry-python-contrib#1698

Open

rafalp self-assigned this Apr 19, 2023

rafalp removed the help wanted Extra attention is needed label Apr 19, 2023

This comment was marked as off-topic.

Sign in to view

rafalp added this to the Next release milestone May 8, 2023

rafalp mentioned this issue Jun 2, 2023

Improve extensions performance & add OpenTelemetry #1091

Merged

4 tasks

rafalp closed this as completed in #1091 Jun 6, 2023

AndersonTorres mentioned this issue Nov 4, 2023

treewide: delete opentracing NixOS/nixpkgs#265032

Closed

13 tasks

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Replace OpenTracing extension with one for OpenTelemetry? #649

Replace OpenTracing extension with one for OpenTelemetry? #649

ajbouh commented Aug 16, 2021

rafalp commented Aug 24, 2021

kamilglod commented Sep 2, 2021

nilansaha commented Jan 27, 2022 •

edited

Loading

process0 commented Jan 27, 2022

nilansaha commented Jan 27, 2022

rmorshea commented Jun 30, 2022 •

edited

Loading

rmorshea commented Jun 30, 2022 •

edited

Loading

artursmet commented Jul 6, 2022

rmorshea commented Jul 8, 2022

dacevedo12 commented Jan 31, 2023

dacevedo12 commented Jan 31, 2023

rafalp commented Feb 2, 2023

dacevedo12 commented Feb 2, 2023 •

edited

Loading

rafalp commented Feb 21, 2023

dacevedo12 commented Feb 26, 2023 •

edited

Loading

Cito commented Feb 26, 2023

dacevedo12 commented Feb 27, 2023 •

edited

Loading

dacevedo12 commented Mar 10, 2023 •

edited

Loading

dacevedo12 commented Mar 13, 2023

This comment was marked as off-topic.

This comment was marked as off-topic.

rafalp commented Jun 2, 2023

dacevedo12 commented Jun 3, 2023

Replace OpenTracing extension with one for OpenTelemetry? #649

Replace OpenTracing extension with one for OpenTelemetry? #649

Comments

ajbouh commented Aug 16, 2021

rafalp commented Aug 24, 2021

kamilglod commented Sep 2, 2021

nilansaha commented Jan 27, 2022 • edited Loading

process0 commented Jan 27, 2022

nilansaha commented Jan 27, 2022

rmorshea commented Jun 30, 2022 • edited Loading

rmorshea commented Jun 30, 2022 • edited Loading

artursmet commented Jul 6, 2022

rmorshea commented Jul 8, 2022

dacevedo12 commented Jan 31, 2023

dacevedo12 commented Jan 31, 2023

rafalp commented Feb 2, 2023

dacevedo12 commented Feb 2, 2023 • edited Loading

rafalp commented Feb 21, 2023

dacevedo12 commented Feb 26, 2023 • edited Loading

Cito commented Feb 26, 2023

dacevedo12 commented Feb 27, 2023 • edited Loading

dacevedo12 commented Mar 10, 2023 • edited Loading

dacevedo12 commented Mar 13, 2023

This comment was marked as off-topic.

This comment was marked as off-topic.

rafalp commented Jun 2, 2023

dacevedo12 commented Jun 3, 2023

nilansaha commented Jan 27, 2022 •

edited

Loading

rmorshea commented Jun 30, 2022 •

edited

Loading

rmorshea commented Jun 30, 2022 •

edited

Loading

dacevedo12 commented Feb 2, 2023 •

edited

Loading

dacevedo12 commented Feb 26, 2023 •

edited

Loading

dacevedo12 commented Feb 27, 2023 •

edited

Loading

dacevedo12 commented Mar 10, 2023 •

edited

Loading