Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Emit rate limited JFR events when RejectedExecutionHandlers run #7076

Merged
merged 1 commit into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package datadog.trace.bootstrap.instrumentation.jfr.backpressure;

import datadog.trace.api.Config;
import datadog.trace.bootstrap.instrumentation.api.TaskWrapper;

public final class BackpressureProfiling {

private static final class Holder {
static final BackpressureProfiling INSTANCE = new BackpressureProfiling(Config.get());
}

public static BackpressureProfiling getInstance() {
return Holder.INSTANCE;
}

private final BackpressureSampler sampler;

private BackpressureProfiling(final Config config) {
this(new BackpressureSampler(config));
}

BackpressureProfiling(BackpressureSampler sampler) {
this.sampler = sampler;
}

public void start() {
sampler.start();
}

public void process(Class<?> backpressureMechanism, Object task) {
if (sampler.sample()) {
new BackpressureSampleEvent(backpressureMechanism, TaskWrapper.getUnwrappedType(task))
.commit();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package datadog.trace.bootstrap.instrumentation.jfr.backpressure;

import datadog.trace.bootstrap.instrumentation.jfr.ContextualEvent;
import jdk.jfr.Category;
import jdk.jfr.Description;
import jdk.jfr.Event;
import jdk.jfr.Label;
import jdk.jfr.Name;

@Name("datadog.BackpressureSample")
@Label("Backpressure Sample")
@Description("Datadog backpressure sample event.")
@Category("Datadog")
public class BackpressureSampleEvent extends Event implements ContextualEvent {
@Label("Policy")
private final Class<?> policy;

@Label("Task")
private final Class<?> task;

@Label("Local Root Span Id")
private long localRootSpanId;

@Label("Span Id")
private long spanId;

public BackpressureSampleEvent(Class<?> policy, Class<?> task) {
this.policy = policy;
this.task = task;
captureContext();
}

@Override
public void setContext(long localRootSpanId, long spanId) {
this.localRootSpanId = localRootSpanId;
this.spanId = spanId;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package datadog.trace.bootstrap.instrumentation.jfr.backpressure;

import datadog.trace.api.Config;
import datadog.trace.bootstrap.instrumentation.jfr.WindowSampler;
import java.time.Duration;
import java.time.temporal.ChronoUnit;

final class BackpressureSampler extends WindowSampler<BackpressureSampleEvent> {
/*
* Fixed 0.5 second sampling window.
* Logic in AdaptiveSampler relies on sampling window being small compared to (in our case) recording duration:
* sampler may overshoot on one given window but should average to samplesPerWindow in the long run.
*/
private static final Duration SAMPLING_WINDOW = Duration.of(500, ChronoUnit.MILLIS);

BackpressureSampler(final Config config) {
this(
SAMPLING_WINDOW,
getSamplesPerWindow(config),
samplingWindowsPerRecording(config.getProfilingUploadPeriod(), SAMPLING_WINDOW));
}

BackpressureSampler(Duration windowDuration, int samplesPerWindow, int lookback) {
super(windowDuration, samplesPerWindow, lookback, BackpressureSampleEvent.class);
}

protected static int getSamplesPerWindow(final Config config) {
return config.getProfilingBackPressureSampleLimit()
/ samplingWindowsPerRecording(config.getProfilingUploadPeriod(), SAMPLING_WINDOW);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@
import com.datadog.profiling.controller.jfr.JFRAccess;
import com.datadog.profiling.controller.jfr.JfpUtils;
import com.datadog.profiling.controller.openjdk.events.AvailableProcessorCoresEvent;
import datadog.trace.api.Config;
import datadog.trace.api.Platform;
import datadog.trace.api.config.ProfilingConfig;
import datadog.trace.bootstrap.config.provider.ConfigProvider;
import datadog.trace.bootstrap.instrumentation.jfr.backpressure.BackpressureProfiling;
import datadog.trace.bootstrap.instrumentation.jfr.exceptions.ExceptionProfiling;
import datadog.trace.util.PidHelper;
import de.thetaphi.forbiddenapis.SuppressForbidden;
Expand Down Expand Up @@ -231,6 +233,10 @@ && isEventEnabled(recordingSettings, "jdk.NativeMethodSample")) {
ExceptionProfiling.getInstance().start();
}

if (Config.get().isProfilingBackPressureSamplingEnabled()) {
BackpressureProfiling.getInstance().start();
}

// Register periodic events
AvailableProcessorCoresEvent.register();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ public static void onEnter(@Advice.Argument(value = 0, readOnly = false) String[
+ "datadog.trace.bootstrap.instrumentation.java.concurrent.TPEHelper:build_time,"
+ "datadog.trace.bootstrap.instrumentation.jfr.exceptions.ExceptionCountEvent:build_time,"
+ "datadog.trace.bootstrap.instrumentation.jfr.exceptions.ExceptionSampleEvent:build_time,"
+ "datadog.trace.bootstrap.instrumentation.jfr.backpressure.BackpressureSampleEvent:build_time,"
+ "datadog.trace.bootstrap.instrumentation.jfr.directallocation.DirectAllocationTotalEvent:build_time,"
+ "datadog.trace.logging.LoggingSettingsDescription:build_time,"
+ "datadog.trace.logging.simplelogger.SLCompatFactory:build_time,"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
import com.google.auto.service.AutoService;
import datadog.trace.agent.tooling.Instrumenter;
import datadog.trace.agent.tooling.InstrumenterModule;
import datadog.trace.api.Config;
import datadog.trace.bootstrap.InstrumentationContext;
import datadog.trace.bootstrap.instrumentation.java.concurrent.State;
import datadog.trace.bootstrap.instrumentation.java.concurrent.Wrapper;
import datadog.trace.bootstrap.instrumentation.jfr.backpressure.BackpressureProfiling;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
Expand Down Expand Up @@ -81,13 +83,19 @@ public static final class Reject {
// remove our wrapper before calling the handler (save wrapper, so we can cancel it later)
@Advice.OnMethodEnter(suppress = Throwable.class)
public static Wrapper<?> handle(
@Advice.Argument(readOnly = false, value = 0) Runnable runnable) {
@Advice.This Object zis, @Advice.Argument(readOnly = false, value = 0) Runnable runnable) {
Wrapper<?> wrapper = null;
if (runnable instanceof Wrapper) {
Wrapper<?> wrapper = (Wrapper<?>) runnable;
wrapper = (Wrapper<?>) runnable;
runnable = wrapper.unwrap();
return wrapper;
}
return null;
if (Config.get().isProfilingBackPressureSamplingEnabled()) {
// record this event before the handler executes, which will help
// explain why the task is running on the submitter thread for
// rejection policies which run on the caller (CallerRunsPolicy or user-provided)
BackpressureProfiling.getInstance().process(zis.getClass(), runnable);
}
return wrapper;
}

// must execute after in case the handler actually runs the runnable,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ public final class ProfilingConfig {
"profiling.exception.record.message";
public static final boolean PROFILING_EXCEPTION_RECORD_MESSAGE_DEFAULT = true;

public static final String PROFILING_BACKPRESSURE_SAMPLING_ENABLED =
"profiling.backpressure.sampling.enabled";
public static final boolean PROFILING_BACKPRESSURE_SAMPLING_ENABLED_DEFAULT = false;
public static final String PROFILING_BACKPRESSURE_SAMPLE_LIMIT =
"profiling.backpressure.sample.limit";
public static final int PROFILING_BACKPRESSURE_SAMPLE_LIMIT_DEFAULT = 10_000;

public static final String PROFILING_DIRECT_ALLOCATION_SAMPLE_LIMIT =
"profiling.direct.allocation.sample.limit";
public static final int PROFILING_DIRECT_ALLOCATION_SAMPLE_LIMIT_DEFAULT = 2_000;
Expand Down
20 changes: 20 additions & 0 deletions internal-api/src/main/java/datadog/trace/api/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,9 @@
import static datadog.trace.api.config.ProfilingConfig.PROFILING_API_KEY_FILE_VERY_OLD;
import static datadog.trace.api.config.ProfilingConfig.PROFILING_API_KEY_OLD;
import static datadog.trace.api.config.ProfilingConfig.PROFILING_API_KEY_VERY_OLD;
import static datadog.trace.api.config.ProfilingConfig.PROFILING_BACKPRESSURE_SAMPLE_LIMIT_DEFAULT;
import static datadog.trace.api.config.ProfilingConfig.PROFILING_BACKPRESSURE_SAMPLING_ENABLED;
import static datadog.trace.api.config.ProfilingConfig.PROFILING_BACKPRESSURE_SAMPLING_ENABLED_DEFAULT;
import static datadog.trace.api.config.ProfilingConfig.PROFILING_DATADOG_PROFILER_ENABLED;
import static datadog.trace.api.config.ProfilingConfig.PROFILING_DIRECT_ALLOCATION_SAMPLE_LIMIT;
import static datadog.trace.api.config.ProfilingConfig.PROFILING_DIRECT_ALLOCATION_SAMPLE_LIMIT_DEFAULT;
Expand Down Expand Up @@ -713,6 +716,8 @@ static class HostNameHolder {
private final String profilingProxyUsername;
private final String profilingProxyPassword;
private final int profilingExceptionSampleLimit;
private final int profilingBackPressureSampleLimit;
private final boolean profilingBackPressureEnabled;
private final int profilingDirectAllocationSampleLimit;
private final int profilingExceptionHistogramTopItems;
private final int profilingExceptionHistogramMaxCollectionSize;
Expand Down Expand Up @@ -1543,6 +1548,13 @@ PROFILING_DATADOG_PROFILER_ENABLED, isDatadogProfilerSafeInCurrentEnvironment())
profilingExceptionSampleLimit =
configProvider.getInteger(
PROFILING_EXCEPTION_SAMPLE_LIMIT, PROFILING_EXCEPTION_SAMPLE_LIMIT_DEFAULT);
profilingBackPressureSampleLimit =
configProvider.getInteger(
PROFILING_EXCEPTION_SAMPLE_LIMIT, PROFILING_BACKPRESSURE_SAMPLE_LIMIT_DEFAULT);
profilingBackPressureEnabled =
configProvider.getBoolean(
PROFILING_BACKPRESSURE_SAMPLING_ENABLED,
PROFILING_BACKPRESSURE_SAMPLING_ENABLED_DEFAULT);
profilingDirectAllocationSampleLimit =
configProvider.getInteger(
PROFILING_DIRECT_ALLOCATION_SAMPLE_LIMIT,
Expand Down Expand Up @@ -2678,6 +2690,14 @@ public int getProfilingDirectAllocationSampleLimit() {
return profilingDirectAllocationSampleLimit;
}

public int getProfilingBackPressureSampleLimit() {
return profilingBackPressureSampleLimit;
}

public boolean isProfilingBackPressureSamplingEnabled() {
return profilingBackPressureEnabled;
}

public int getProfilingExceptionHistogramTopItems() {
return profilingExceptionHistogramTopItems;
}
Expand Down
Loading