Skip to content

Commit

Permalink
emit rate limited JFR events when RejectedExecutionHandlers run (#7076)
Browse files Browse the repository at this point in the history
  • Loading branch information
richardstartin authored Jun 4, 2024
1 parent 87dc120 commit 8a0a406
Show file tree
Hide file tree
Showing 8 changed files with 151 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package datadog.trace.bootstrap.instrumentation.jfr.backpressure;

import datadog.trace.api.Config;
import datadog.trace.bootstrap.instrumentation.api.TaskWrapper;

public final class BackpressureProfiling {

private static final class Holder {
static final BackpressureProfiling INSTANCE = new BackpressureProfiling(Config.get());
}

public static BackpressureProfiling getInstance() {
return Holder.INSTANCE;
}

private final BackpressureSampler sampler;

private BackpressureProfiling(final Config config) {
this(new BackpressureSampler(config));
}

BackpressureProfiling(BackpressureSampler sampler) {
this.sampler = sampler;
}

public void start() {
sampler.start();
}

public void process(Class<?> backpressureMechanism, Object task) {
if (sampler.sample()) {
new BackpressureSampleEvent(backpressureMechanism, TaskWrapper.getUnwrappedType(task))
.commit();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package datadog.trace.bootstrap.instrumentation.jfr.backpressure;

import datadog.trace.bootstrap.instrumentation.jfr.ContextualEvent;
import jdk.jfr.Category;
import jdk.jfr.Description;
import jdk.jfr.Event;
import jdk.jfr.Label;
import jdk.jfr.Name;

@Name("datadog.BackpressureSample")
@Label("Backpressure Sample")
@Description("Datadog backpressure sample event.")
@Category("Datadog")
public class BackpressureSampleEvent extends Event implements ContextualEvent {
@Label("Policy")
private final Class<?> policy;

@Label("Task")
private final Class<?> task;

@Label("Local Root Span Id")
private long localRootSpanId;

@Label("Span Id")
private long spanId;

public BackpressureSampleEvent(Class<?> policy, Class<?> task) {
this.policy = policy;
this.task = task;
captureContext();
}

@Override
public void setContext(long localRootSpanId, long spanId) {
this.localRootSpanId = localRootSpanId;
this.spanId = spanId;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package datadog.trace.bootstrap.instrumentation.jfr.backpressure;

import datadog.trace.api.Config;
import datadog.trace.bootstrap.instrumentation.jfr.WindowSampler;
import java.time.Duration;
import java.time.temporal.ChronoUnit;

final class BackpressureSampler extends WindowSampler<BackpressureSampleEvent> {
/*
* Fixed 0.5 second sampling window.
* Logic in AdaptiveSampler relies on sampling window being small compared to (in our case) recording duration:
* sampler may overshoot on one given window but should average to samplesPerWindow in the long run.
*/
private static final Duration SAMPLING_WINDOW = Duration.of(500, ChronoUnit.MILLIS);

BackpressureSampler(final Config config) {
this(
SAMPLING_WINDOW,
getSamplesPerWindow(config),
samplingWindowsPerRecording(config.getProfilingUploadPeriod(), SAMPLING_WINDOW));
}

BackpressureSampler(Duration windowDuration, int samplesPerWindow, int lookback) {
super(windowDuration, samplesPerWindow, lookback, BackpressureSampleEvent.class);
}

protected static int getSamplesPerWindow(final Config config) {
return config.getProfilingBackPressureSampleLimit()
/ samplingWindowsPerRecording(config.getProfilingUploadPeriod(), SAMPLING_WINDOW);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@
import com.datadog.profiling.controller.jfr.JFRAccess;
import com.datadog.profiling.controller.jfr.JfpUtils;
import com.datadog.profiling.controller.openjdk.events.AvailableProcessorCoresEvent;
import datadog.trace.api.Config;
import datadog.trace.api.Platform;
import datadog.trace.api.config.ProfilingConfig;
import datadog.trace.bootstrap.config.provider.ConfigProvider;
import datadog.trace.bootstrap.instrumentation.jfr.backpressure.BackpressureProfiling;
import datadog.trace.bootstrap.instrumentation.jfr.exceptions.ExceptionProfiling;
import datadog.trace.util.PidHelper;
import de.thetaphi.forbiddenapis.SuppressForbidden;
Expand Down Expand Up @@ -231,6 +233,10 @@ && isEventEnabled(recordingSettings, "jdk.NativeMethodSample")) {
ExceptionProfiling.getInstance().start();
}

if (Config.get().isProfilingBackPressureSamplingEnabled()) {
BackpressureProfiling.getInstance().start();
}

// Register periodic events
AvailableProcessorCoresEvent.register();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ public static void onEnter(@Advice.Argument(value = 0, readOnly = false) String[
+ "datadog.trace.bootstrap.instrumentation.java.concurrent.TPEHelper:build_time,"
+ "datadog.trace.bootstrap.instrumentation.jfr.exceptions.ExceptionCountEvent:build_time,"
+ "datadog.trace.bootstrap.instrumentation.jfr.exceptions.ExceptionSampleEvent:build_time,"
+ "datadog.trace.bootstrap.instrumentation.jfr.backpressure.BackpressureSampleEvent:build_time,"
+ "datadog.trace.bootstrap.instrumentation.jfr.directallocation.DirectAllocationTotalEvent:build_time,"
+ "datadog.trace.logging.LoggingSettingsDescription:build_time,"
+ "datadog.trace.logging.simplelogger.SLCompatFactory:build_time,"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
import com.google.auto.service.AutoService;
import datadog.trace.agent.tooling.Instrumenter;
import datadog.trace.agent.tooling.InstrumenterModule;
import datadog.trace.api.Config;
import datadog.trace.bootstrap.InstrumentationContext;
import datadog.trace.bootstrap.instrumentation.java.concurrent.State;
import datadog.trace.bootstrap.instrumentation.java.concurrent.Wrapper;
import datadog.trace.bootstrap.instrumentation.jfr.backpressure.BackpressureProfiling;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
Expand Down Expand Up @@ -81,13 +83,19 @@ public static final class Reject {
// remove our wrapper before calling the handler (save wrapper, so we can cancel it later)
@Advice.OnMethodEnter(suppress = Throwable.class)
public static Wrapper<?> handle(
@Advice.Argument(readOnly = false, value = 0) Runnable runnable) {
@Advice.This Object zis, @Advice.Argument(readOnly = false, value = 0) Runnable runnable) {
Wrapper<?> wrapper = null;
if (runnable instanceof Wrapper) {
Wrapper<?> wrapper = (Wrapper<?>) runnable;
wrapper = (Wrapper<?>) runnable;
runnable = wrapper.unwrap();
return wrapper;
}
return null;
if (Config.get().isProfilingBackPressureSamplingEnabled()) {
// record this event before the handler executes, which will help
// explain why the task is running on the submitter thread for
// rejection policies which run on the caller (CallerRunsPolicy or user-provided)
BackpressureProfiling.getInstance().process(zis.getClass(), runnable);
}
return wrapper;
}

// must execute after in case the handler actually runs the runnable,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ public final class ProfilingConfig {
"profiling.exception.record.message";
public static final boolean PROFILING_EXCEPTION_RECORD_MESSAGE_DEFAULT = true;

public static final String PROFILING_BACKPRESSURE_SAMPLING_ENABLED =
"profiling.backpressure.sampling.enabled";
public static final boolean PROFILING_BACKPRESSURE_SAMPLING_ENABLED_DEFAULT = false;
public static final String PROFILING_BACKPRESSURE_SAMPLE_LIMIT =
"profiling.backpressure.sample.limit";
public static final int PROFILING_BACKPRESSURE_SAMPLE_LIMIT_DEFAULT = 10_000;

public static final String PROFILING_DIRECT_ALLOCATION_SAMPLE_LIMIT =
"profiling.direct.allocation.sample.limit";
public static final int PROFILING_DIRECT_ALLOCATION_SAMPLE_LIMIT_DEFAULT = 2_000;
Expand Down
20 changes: 20 additions & 0 deletions internal-api/src/main/java/datadog/trace/api/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,9 @@
import static datadog.trace.api.config.ProfilingConfig.PROFILING_API_KEY_FILE_VERY_OLD;
import static datadog.trace.api.config.ProfilingConfig.PROFILING_API_KEY_OLD;
import static datadog.trace.api.config.ProfilingConfig.PROFILING_API_KEY_VERY_OLD;
import static datadog.trace.api.config.ProfilingConfig.PROFILING_BACKPRESSURE_SAMPLE_LIMIT_DEFAULT;
import static datadog.trace.api.config.ProfilingConfig.PROFILING_BACKPRESSURE_SAMPLING_ENABLED;
import static datadog.trace.api.config.ProfilingConfig.PROFILING_BACKPRESSURE_SAMPLING_ENABLED_DEFAULT;
import static datadog.trace.api.config.ProfilingConfig.PROFILING_DATADOG_PROFILER_ENABLED;
import static datadog.trace.api.config.ProfilingConfig.PROFILING_DIRECT_ALLOCATION_SAMPLE_LIMIT;
import static datadog.trace.api.config.ProfilingConfig.PROFILING_DIRECT_ALLOCATION_SAMPLE_LIMIT_DEFAULT;
Expand Down Expand Up @@ -713,6 +716,8 @@ static class HostNameHolder {
private final String profilingProxyUsername;
private final String profilingProxyPassword;
private final int profilingExceptionSampleLimit;
private final int profilingBackPressureSampleLimit;
private final boolean profilingBackPressureEnabled;
private final int profilingDirectAllocationSampleLimit;
private final int profilingExceptionHistogramTopItems;
private final int profilingExceptionHistogramMaxCollectionSize;
Expand Down Expand Up @@ -1543,6 +1548,13 @@ PROFILING_DATADOG_PROFILER_ENABLED, isDatadogProfilerSafeInCurrentEnvironment())
profilingExceptionSampleLimit =
configProvider.getInteger(
PROFILING_EXCEPTION_SAMPLE_LIMIT, PROFILING_EXCEPTION_SAMPLE_LIMIT_DEFAULT);
profilingBackPressureSampleLimit =
configProvider.getInteger(
PROFILING_EXCEPTION_SAMPLE_LIMIT, PROFILING_BACKPRESSURE_SAMPLE_LIMIT_DEFAULT);
profilingBackPressureEnabled =
configProvider.getBoolean(
PROFILING_BACKPRESSURE_SAMPLING_ENABLED,
PROFILING_BACKPRESSURE_SAMPLING_ENABLED_DEFAULT);
profilingDirectAllocationSampleLimit =
configProvider.getInteger(
PROFILING_DIRECT_ALLOCATION_SAMPLE_LIMIT,
Expand Down Expand Up @@ -2678,6 +2690,14 @@ public int getProfilingDirectAllocationSampleLimit() {
return profilingDirectAllocationSampleLimit;
}

public int getProfilingBackPressureSampleLimit() {
return profilingBackPressureSampleLimit;
}

public boolean isProfilingBackPressureSamplingEnabled() {
return profilingBackPressureEnabled;
}

public int getProfilingExceptionHistogramTopItems() {
return profilingExceptionHistogramTopItems;
}
Expand Down

0 comments on commit 8a0a406

Please sign in to comment.