From 8b4d92e76f0f59bf4393b3f8d6cdfdd1386154e3 Mon Sep 17 00:00:00 2001 From: Aleksandr Shamukov Date: Mon, 22 Nov 2021 23:01:37 +0200 Subject: [PATCH] Add status code label to server latency histogram (#51) --- .../grpc/prometheus/Configuration.java | 51 ++++++++++++++++--- .../grpc/prometheus/MonitoringServerCall.java | 5 +- .../grpc/prometheus/ServerMetrics.java | 41 +++++++++++---- ...oringServerInterceptorIntegrationTest.java | 29 +++++++++++ 4 files changed, 109 insertions(+), 17 deletions(-) diff --git a/src/main/java/me/dinowernli/grpc/prometheus/Configuration.java b/src/main/java/me/dinowernli/grpc/prometheus/Configuration.java index 0b28ac0..92fdec1 100644 --- a/src/main/java/me/dinowernli/grpc/prometheus/Configuration.java +++ b/src/main/java/me/dinowernli/grpc/prometheus/Configuration.java @@ -19,6 +19,7 @@ public class Configuration { private final CollectorRegistry collectorRegistry; private final double[] latencyBuckets; private final List labelHeaders; + private final boolean isAddCodeLabelToHistograms; /** Returns a {@link Configuration} for recording all cheap metrics about the rpcs. */ public static Configuration cheapMetricsOnly() { @@ -26,7 +27,8 @@ public static Configuration cheapMetricsOnly() { false /* isIncludeLatencyHistograms */, CollectorRegistry.defaultRegistry, DEFAULT_LATENCY_BUCKETS, - new ArrayList<>()); + new ArrayList<>(), + false /* isAddCodeLabelToHistograms */); } /** @@ -38,7 +40,8 @@ public static Configuration allMetrics() { true /* isIncludeLatencyHistograms */, CollectorRegistry.defaultRegistry, DEFAULT_LATENCY_BUCKETS, - new ArrayList<>()); + new ArrayList<>(), + false); } /** @@ -47,7 +50,11 @@ public static Configuration allMetrics() { */ public Configuration withCollectorRegistry(CollectorRegistry collectorRegistry) { return new Configuration( - isIncludeLatencyHistograms, collectorRegistry, latencyBuckets, labelHeaders); + isIncludeLatencyHistograms, + collectorRegistry, + latencyBuckets, + labelHeaders, + isAddCodeLabelToHistograms); } /** @@ -55,7 +62,12 @@ public Configuration withCollectorRegistry(CollectorRegistry collectorRegistry) * recorded with the specified set of buckets. */ public Configuration withLatencyBuckets(double[] buckets) { - return new Configuration(isIncludeLatencyHistograms, collectorRegistry, buckets, labelHeaders); + return new Configuration( + isIncludeLatencyHistograms, + collectorRegistry, + buckets, + labelHeaders, + isAddCodeLabelToHistograms); } /** @@ -77,7 +89,27 @@ public Configuration withLabelHeaders(List headers) { List newHeaders = new ArrayList<>(labelHeaders); newHeaders.addAll(headers); return new Configuration( - isIncludeLatencyHistograms, collectorRegistry, latencyBuckets, newHeaders); + isIncludeLatencyHistograms, + collectorRegistry, + latencyBuckets, + newHeaders, + isAddCodeLabelToHistograms); + } + + /** + * Returns a copy {@link Configuration} with the difference that status code label will be added + * to latency histogram. If latency histogram itself is disabled, this takes no effect. Warning: + * this will increase the number of histograms by a factor of actually happened codes (up to + * {@link io.grpc.Status.Code} values count), which could lead to additional local memory usage + * and load on prometheus (storage and memory usage, query-time complexity) + */ + public Configuration withCodeLabelInLatencyHistogram() { + return new Configuration( + isIncludeLatencyHistograms, + collectorRegistry, + latencyBuckets, + labelHeaders, + true /* isAddCodeLabelToHistograms */); } /** Returns whether or not latency histograms for calls should be included. */ @@ -100,6 +132,11 @@ public List getLabelHeaders() { return labelHeaders; } + /** Returns whether or not status code label should be added to latency histogram. */ + public boolean isAddCodeLabelToHistograms() { + return isAddCodeLabelToHistograms; + } + /** * Returns the sanitized version of the label headers, after turning all hyphens to underscores. */ @@ -111,10 +148,12 @@ private Configuration( boolean isIncludeLatencyHistograms, CollectorRegistry collectorRegistry, double[] latencyBuckets, - List labelHeaders) { + List labelHeaders, + boolean isAddCodeLabelToHistograms) { this.isIncludeLatencyHistograms = isIncludeLatencyHistograms; this.collectorRegistry = collectorRegistry; this.latencyBuckets = latencyBuckets; this.labelHeaders = labelHeaders; + this.isAddCodeLabelToHistograms = isAddCodeLabelToHistograms; } } diff --git a/src/main/java/me/dinowernli/grpc/prometheus/MonitoringServerCall.java b/src/main/java/me/dinowernli/grpc/prometheus/MonitoringServerCall.java index 3b47f00..2bff8b3 100644 --- a/src/main/java/me/dinowernli/grpc/prometheus/MonitoringServerCall.java +++ b/src/main/java/me/dinowernli/grpc/prometheus/MonitoringServerCall.java @@ -61,11 +61,12 @@ private void reportStartMetrics() { } private void reportEndMetrics(Status status) { - serverMetrics.recordServerHandled(status.getCode(), requestMetadata); + Status.Code code = status.getCode(); + serverMetrics.recordServerHandled(code, requestMetadata); if (configuration.isIncludeLatencyHistograms()) { double latencySec = (clock.millis() - startInstant.toEpochMilli()) / (double) MILLIS_PER_SECOND; - serverMetrics.recordLatency(latencySec, requestMetadata); + serverMetrics.recordLatency(latencySec, requestMetadata, code); } } } diff --git a/src/main/java/me/dinowernli/grpc/prometheus/ServerMetrics.java b/src/main/java/me/dinowernli/grpc/prometheus/ServerMetrics.java index a69f89f..067aac3 100644 --- a/src/main/java/me/dinowernli/grpc/prometheus/ServerMetrics.java +++ b/src/main/java/me/dinowernli/grpc/prometheus/ServerMetrics.java @@ -28,8 +28,10 @@ class ServerMetrics { private static final List defaultRequestLabels = Arrays.asList("grpc_type", "grpc_service", "grpc_method"); + private static final String STATUS_CODE_LABEL = "grpc_code"; + private static final List defaultResponseLabels = - Arrays.asList("grpc_type", "grpc_service", "grpc_method", "code", "grpc_code"); + Arrays.asList("grpc_type", "grpc_service", "grpc_method", "code", STATUS_CODE_LABEL); private static final Counter.Builder serverStartedBuilder = Counter.build() @@ -76,6 +78,7 @@ class ServerMetrics { private final Counter serverStreamMessagesReceived; private final Counter serverStreamMessagesSent; private final Optional serverHandledLatencySeconds; + private final boolean isAddCodeLabelToHistograms; private final GrpcMethod method; @@ -86,7 +89,8 @@ private ServerMetrics( Counter serverHandled, Counter serverStreamMessagesReceived, Counter serverStreamMessagesSent, - Optional serverHandledLatencySeconds) { + Optional serverHandledLatencySeconds, + boolean isAddCodeLabelToHistograms) { this.labelHeaderKeys = labelHeaderKeys; this.method = method; this.serverStarted = serverStarted; @@ -94,6 +98,7 @@ private ServerMetrics( this.serverStreamMessagesReceived = serverStreamMessagesReceived; this.serverStreamMessagesSent = serverStreamMessagesSent; this.serverHandledLatencySeconds = serverHandledLatencySeconds; + this.isAddCodeLabelToHistograms = isAddCodeLabelToHistograms; } public void recordCallStarted(Metadata metadata) { @@ -121,13 +126,18 @@ public void recordStreamMessageReceived(Metadata metadata) { * Only has any effect if monitoring is configured to include latency histograms. Otherwise, this * does nothing. */ - public void recordLatency(double latencySec, Metadata metadata) { + public void recordLatency(double latencySec, Metadata metadata, Code code) { if (!this.serverHandledLatencySeconds.isPresent()) { return; } - addLabels( - this.serverHandledLatencySeconds.get(), customLabels(metadata, labelHeaderKeys), method) - .observe(latencySec); + + final List allLabels = new ArrayList(); + allLabels.addAll(customLabels(metadata, labelHeaderKeys)); + if (isAddCodeLabelToHistograms) { + allLabels.add(code.toString()); + } + + addLabels(this.serverHandledLatencySeconds.get(), allLabels, method).observe(latencySec); } /** Knows how to produce {@link ServerMetrics} instances for individual methods. */ @@ -138,6 +148,7 @@ static class Factory { private final Counter serverStreamMessagesReceived; private final Counter serverStreamMessagesSent; private final Optional serverHandledLatencySeconds; + private final boolean isAddCodeLabelToHistograms; Factory(Configuration configuration) { CollectorRegistry registry = configuration.getCollectorRegistry(); @@ -160,15 +171,26 @@ static class Factory { .register(registry); if (configuration.isIncludeLatencyHistograms()) { + + List labels = new ArrayList(); + labels.addAll(defaultRequestLabels); + labels.addAll(configuration.getSanitizedLabelHeaders()); + + if (configuration.isAddCodeLabelToHistograms()) { + labels.add(STATUS_CODE_LABEL); + } + this.isAddCodeLabelToHistograms = configuration.isAddCodeLabelToHistograms(); + this.serverHandledLatencySeconds = Optional.of( serverHandledLatencySecondsBuilder .buckets(configuration.getLatencyBuckets()) - .labelNames( - asArray(defaultRequestLabels, configuration.getSanitizedLabelHeaders())) + .labelNames(labels.toArray(new String[0])) .register(registry)); + } else { this.serverHandledLatencySeconds = Optional.empty(); + this.isAddCodeLabelToHistograms = false; } } @@ -181,7 +203,8 @@ ServerMetrics createMetricsForMethod(GrpcMethod grpcMethod) { serverHandled, serverStreamMessagesReceived, serverStreamMessagesSent, - serverHandledLatencySeconds); + serverHandledLatencySeconds, + isAddCodeLabelToHistograms); } } } diff --git a/src/test/java/me/dinowernli/grpc/prometheus/integration/MonitoringServerInterceptorIntegrationTest.java b/src/test/java/me/dinowernli/grpc/prometheus/integration/MonitoringServerInterceptorIntegrationTest.java index 9367d73..7507311 100644 --- a/src/test/java/me/dinowernli/grpc/prometheus/integration/MonitoringServerInterceptorIntegrationTest.java +++ b/src/test/java/me/dinowernli/grpc/prometheus/integration/MonitoringServerInterceptorIntegrationTest.java @@ -248,6 +248,35 @@ public void overridesHistogramBuckets() throws Throwable { "grpc_server_handled_latency_seconds", "grpc_server_handled_latency_seconds_bucket")) .isEqualTo(expectedNum); + + MetricFamilySamples.Sample sample = + getSample( + findRecordedMetricOrThrow("grpc_server_handled_latency_seconds"), + "grpc_server_handled_latency_seconds_bucket"); + + assertThat(sample.labelNames).containsExactly("grpc_type", "grpc_service", "grpc_method", "le"); + } + + @Test + public void addsStatusCodeLabel() throws Throwable { + double[] buckets = new double[] {8.0, 9.0, 10.0}; + startGrpcServer(ALL_METRICS.withCodeLabelInLatencyHistogram().withLatencyBuckets(buckets)); + createGrpcBlockingStub().sayHello(REQUEST); + + MetricFamilySamples.Sample sample = + getSample( + findRecordedMetricOrThrow("grpc_server_handled_latency_seconds"), + "grpc_server_handled_latency_seconds_bucket"); + + assertThat(sample.labelNames) + .containsExactly("grpc_type", "grpc_service", "grpc_method", "grpc_code", "le"); + assertThat(sample.labelValues) + .containsExactly( + "UNARY", + HelloServiceImpl.SERVICE_NAME, + HelloServiceImpl.UNARY_METHOD_NAME, + "OK", + "8.0"); } @Test