Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ddtrace/tracer: report datadog.tracer.api.errors health metric #3024

Merged
merged 7 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions ddtrace/tracer/transport.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,16 +151,18 @@ func (t *httpTransport) send(p *payload) (body io.ReadCloser, err error) {
}
req.Header.Set(traceCountHeader, strconv.Itoa(p.itemCount()))
req.Header.Set(headerComputedTopLevel, "yes")
if t, ok := traceinternal.GetGlobalTracer().(*tracer); ok {
if t.config.tracingAsTransport || t.config.canComputeStats() {
var tr *tracer
var haveTracer bool
if tr, haveTracer = traceinternal.GetGlobalTracer().(*tracer); haveTracer {
if tr.config.tracingAsTransport || tr.config.canComputeStats() {
// tracingAsTransport uses this header to disable the trace agent's stats computation
// while making canComputeStats() always false to also disable client stats computation.
req.Header.Set("Datadog-Client-Computed-Stats", "yes")
}
droppedTraces := int(atomic.SwapUint32(&t.droppedP0Traces, 0))
partialTraces := int(atomic.SwapUint32(&t.partialTraces, 0))
droppedSpans := int(atomic.SwapUint32(&t.droppedP0Spans, 0))
if stats := t.statsd; stats != nil {
droppedTraces := int(atomic.SwapUint32(&tr.droppedP0Traces, 0))
partialTraces := int(atomic.SwapUint32(&tr.partialTraces, 0))
droppedSpans := int(atomic.SwapUint32(&tr.droppedP0Spans, 0))
if stats := tr.statsd; stats != nil {
stats.Count("datadog.tracer.dropped_p0_traces", int64(droppedTraces),
[]string{fmt.Sprintf("partial:%s", strconv.FormatBool(partialTraces > 0))}, 1)
stats.Count("datadog.tracer.dropped_p0_spans", int64(droppedSpans), nil, 1)
Expand All @@ -170,9 +172,11 @@ func (t *httpTransport) send(p *payload) (body io.ReadCloser, err error) {
}
response, err := t.client.Do(req)
if err != nil {
reportAPIErrorsMetric(haveTracer, response, err, tr)
return nil, err
}
if code := response.StatusCode; code >= 400 {
reportAPIErrorsMetric(haveTracer, response, err, tr)
// error, check the body for context information and
// return a nice error.
msg := make([]byte, 1000)
Expand All @@ -187,6 +191,20 @@ func (t *httpTransport) send(p *payload) (body io.ReadCloser, err error) {
return response.Body, nil
}

func reportAPIErrorsMetric(haveTracer bool, response *http.Response, err error, t *tracer) {
if !haveTracer {
return
}
var reason string
if err != nil {
reason = "network_failure"
}
if response != nil {
reason = fmt.Sprintf("server_response_%d", response.StatusCode)
}
t.statsd.Incr("datadog.tracer.api.errors", []string{"reason:" + reason}, 1)
}

func (t *httpTransport) endpoint() string {
return t.traceURL
}
85 changes: 85 additions & 0 deletions ddtrace/tracer/transport_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ import (

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
traceinternal "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/internal"
"gopkg.in/DataDog/dd-trace-go.v1/internal"
"gopkg.in/DataDog/dd-trace-go.v1/internal/statsdtest"
)

// getTestSpan returns a Span with different fields set
Expand Down Expand Up @@ -241,6 +243,89 @@ func TestCustomTransport(t *testing.T) {
assert.Equal(hits, 1)
}

type ErrTransport struct{}

func (t *ErrTransport) RoundTrip(req *http.Request) (*http.Response, error) {
return nil, fmt.Errorf("error in RoundTripper")
}

type ErrResponseTransport struct{}

func (t *ErrResponseTransport) RoundTrip(req *http.Request) (*http.Response, error) {
return &http.Response{StatusCode: 400}, nil
}

type OkTransport struct{}

func (t *OkTransport) RoundTrip(req *http.Request) (*http.Response, error) {
return &http.Response{StatusCode: 200}, nil
}

func TestApiErrorsMetric(t *testing.T) {
t.Run("error", func(t *testing.T) {
assert := assert.New(t)
c := &http.Client{
Transport: &ErrTransport{},
}
var tg statsdtest.TestStatsdClient
trc := newTracer(WithHTTPClient(c), withStatsdClient(&tg))
traceinternal.SetGlobalTracer(trc)
defer trc.Stop()

p, err := encode(getTestTrace(1, 1))
assert.NoError(err)

// We're expecting an error
_, err = trc.config.transport.send(p)
assert.Error(err)
calls := statsdtest.FilterCallsByName(tg.IncrCalls(), "datadog.tracer.api.errors")
assert.Len(calls, 1)
call := calls[0]
assert.Equal([]string{"reason:network_failure"}, call.Tags())

})
t.Run("response with err code", func(t *testing.T) {
assert := assert.New(t)
c := &http.Client{
Transport: &ErrResponseTransport{},
}
var tg statsdtest.TestStatsdClient
trc := newTracer(WithHTTPClient(c), withStatsdClient(&tg))
traceinternal.SetGlobalTracer(trc)
defer trc.Stop()

p, err := encode(getTestTrace(1, 1))
assert.NoError(err)

_, err = trc.config.transport.send(p)
assert.Error(err)

calls := statsdtest.FilterCallsByName(tg.IncrCalls(), "datadog.tracer.api.errors")
assert.Len(calls, 1)
call := calls[0]
assert.Equal([]string{"reason:server_response_400"}, call.Tags())
})
t.Run("successful send - no metric", func(t *testing.T) {
assert := assert.New(t)
var tg statsdtest.TestStatsdClient
c := &http.Client{
Transport: &OkTransport{},
}
trc := newTracer(WithHTTPClient(c), withStatsdClient(&tg))
traceinternal.SetGlobalTracer(trc)
defer trc.Stop()

p, err := encode(getTestTrace(1, 1))
assert.NoError(err)

_, err = trc.config.transport.send(p)
assert.NoError(err)

calls := statsdtest.FilterCallsByName(tg.IncrCalls(), "datadog.tracer.api.errors")
assert.Len(calls, 0)
})
}

func TestWithHTTPClient(t *testing.T) {
// disable instrumentation telemetry to prevent flaky number of requests
t.Setenv("DD_INSTRUMENTATION_TELEMETRY_ENABLED", "false")
Expand Down
14 changes: 14 additions & 0 deletions internal/statsdtest/statsdtest.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ type TestStatsdCall struct {
rate float64
}

func (c *TestStatsdCall) Tags() []string {
return c.tags
}

func (tg *TestStatsdClient) addCount(name string, value int64) {
tg.mu.Lock()
defer tg.mu.Unlock()
Expand Down Expand Up @@ -221,6 +225,16 @@ func (tg *TestStatsdClient) CallsByName() map[string]int {
return counts
}

func FilterCallsByName(calls []TestStatsdCall, name string) []TestStatsdCall {
var matches []TestStatsdCall
for _, c := range calls {
if c.name == name {
matches = append(matches, c)
}
}
return matches
}

func (tg *TestStatsdClient) Counts() map[string]int64 {
tg.mu.RLock()
defer tg.mu.RUnlock()
Expand Down
Loading