Skip to content

Commit

Permalink
Fix memory leak in Exception Replay (#7885)
Browse files Browse the repository at this point in the history
The weak map stateByThrowable keep the throwable as the key but this
exception is also strong referenced by snapshots stored inside the
ThrowableState in CapturedThrowable and in locals and extensions for
@exception.
Fixing by storing weak reference inside the CapturedThrowable and
clearing the other ref for @exception at commit time
  • Loading branch information
jpbempel committed Nov 5, 2024
1 parent 6eac0b8 commit ba91d0b
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import datadog.trace.bootstrap.debugger.util.Redaction;
import datadog.trace.bootstrap.debugger.util.TimeoutChecker;
import datadog.trace.bootstrap.debugger.util.WellKnownClasses;
import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
Expand Down Expand Up @@ -183,6 +184,10 @@ public ValueReferenceResolver withExtensions(Map<String, Object> extensions) {
return new CapturedContext(this, extensions);
}

public void removeExtension(String name) {
extensions.remove(name);
}

private void addExtension(String name, Object value) {
extensions.put(name, value);
}
Expand Down Expand Up @@ -642,7 +647,7 @@ public TimeoutChecker getTimeoutChecker() {
public static class CapturedThrowable {
private final String type;
private final String message;
private final transient Throwable throwable;
private final transient WeakReference<Throwable> throwable;

/*
* Need to exclude stacktrace from equals/hashCode computation.
Expand All @@ -663,7 +668,7 @@ public CapturedThrowable(
this.type = type;
this.message = message;
this.stacktrace = new ArrayList<>(stacktrace);
this.throwable = t;
this.throwable = new WeakReference<>(t);
}

public String getType() {
Expand All @@ -679,7 +684,7 @@ public List<CapturedStackFrame> getStacktrace() {
}

public Throwable getThrowable() {
return throwable;
return throwable.get();
}

private static List<CapturedStackFrame> captureFrames(StackTraceElement[] stackTrace) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,14 @@ boolean shouldCaptureException(String fingerprint, Clock clock) {

public void addSnapshot(Snapshot snapshot) {
Throwable throwable = snapshot.getCaptures().getReturn().getCapturedThrowable().getThrowable();
if (throwable == null) {
LOGGER.debug("Snapshot has no throwable: {}", snapshot.getId());
return;
}
throwable = ExceptionHelper.getInnerMostThrowable(throwable);
if (throwable == null) {
LOGGER.debug(
"Unable to find root cause of exception: {}",
snapshot.getCaptures().getReturn().getCapturedThrowable().getThrowable().toString());
throwable = snapshot.getCaptures().getReturn().getCapturedThrowable().getThrowable();
LOGGER.debug("Unable to find root cause of exception: {}", String.valueOf(throwable));
return;
}
ThrowableState state =
Expand All @@ -172,6 +175,10 @@ void updateLastCapture(String fingerprint, Clock clock) {
fingerprints.put(fingerprint, Instant.now(clock));
}

boolean hasExceptionStateTracked() {
return !snapshotsByThrowable.isEmpty();
}

public static class ThrowableState {
private final String exceptionId;
private List<Snapshot> snapshots;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import datadog.trace.bootstrap.debugger.ProbeId;
import datadog.trace.bootstrap.debugger.ProbeImplementation;
import datadog.trace.bootstrap.debugger.ProbeLocation;
import datadog.trace.bootstrap.debugger.el.ValueReferences;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -67,6 +68,10 @@ public void evaluate(
return;
}
Throwable throwable = context.getCapturedThrowable().getThrowable();
if (throwable == null) {
LOGGER.debug("Throwable cleared by GC");
return;
}
Throwable innerMostThrowable = getInnerMostThrowable(throwable);
String fingerprint =
Fingerprinter.fingerprint(innerMostThrowable, exceptionProbeManager.getClassNameFilter());
Expand Down Expand Up @@ -107,6 +112,9 @@ public void commit(
* - DebuggerContext.commit()
*/
snapshot.recordStackTrace(4);
// clear any strong ref to the exception before adding the snapshot to avoid leaking snapshots
// inside the stateByThrowable map
clearExceptionRefs(snapshot);
// add snapshot for later to wait for triggering point (ExceptionDebugger::handleException)
exceptionProbeManager.addSnapshot(snapshot);
LOGGER.debug(
Expand All @@ -117,6 +125,11 @@ public void commit(
}
}

private void clearExceptionRefs(Snapshot snapshot) {
snapshot.getCaptures().getReturn().getLocals().remove(ValueReferences.EXCEPTION_REF);
snapshot.getCaptures().getReturn().removeExtension(ValueReferences.EXCEPTION_EXTENSION_NAME);
}

@Override
public void buildLocation(InstrumentationResult result) {
String type = where.getTypeName();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,14 @@ public void nestedException() {
expectedFrameIndex,
"com.datadog.debugger.exception.DefaultExceptionDebuggerTest",
"createTest1Exception");
// make sure we are not leaking references
exception = null; // release strong reference
System.gc();
// calling ExceptionProbeManager#hasExceptionStateTracked() will call WeakIdentityHashMap#size()
// through isEmpty() an will purge stale entries
assertWithTimeout(
() -> !exceptionDebugger.getExceptionProbeManager().hasExceptionStateTracked(),
Duration.ofSeconds(30));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ public void instrumentAndCaptureSnapshots() throws Exception {
Snapshot snapshot0 = listener.snapshots.get(0);
assertProbeId(probeIdsByMethodName, "processWithException", snapshot0.getProbe().getId());
assertEquals("oops", snapshot0.getCaptures().getReturn().getCapturedThrowable().getMessage());
assertTrue(snapshot0.getCaptures().getReturn().getLocals().containsKey("@exception"));
ProbeLocation location = snapshot0.getProbe().getLocation();
assertEquals(
location.getType() + "." + location.getMethod(), snapshot0.getStack().get(0).getFunction());
Expand Down Expand Up @@ -212,7 +211,6 @@ public void recursive() throws Exception {
assertProbeId(probeIdsByMethodName, "fiboException", snapshot0.getProbe().getId());
assertEquals(
"oops fibo", snapshot0.getCaptures().getReturn().getCapturedThrowable().getMessage());
assertTrue(snapshot0.getCaptures().getReturn().getLocals().containsKey("@exception"));
assertEquals("1", getValue(snapshot0.getCaptures().getReturn().getArguments().get("n")));
Snapshot snapshot1 = listener.snapshots.get(1);
assertEquals("2", getValue(snapshot1.getCaptures().getReturn().getArguments().get("n")));
Expand Down

0 comments on commit ba91d0b

Please sign in to comment.