From cdecb42d28a33c7f391c8fe7e075d9a07a52a6ac Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 1 Jul 2024 17:07:09 +0200 Subject: [PATCH] Add support for sending OOME events (#7253) --- .../monitor/DDAgentStatsDClient.java | 30 +- .../monitor/DDAgentStatsDClientTest.groovy | 41 ++ .../java/datadog/trace/bootstrap/Agent.java | 6 +- .../agent-crashtracking/build.gradle | 11 +- .../datadog/crashtracking/CrashUploader.java | 1 + .../datadog/crashtracking/OOMENotifier.java | 31 ++ .../crashtracking/ScriptInitializer.java | 233 +++++++++-- .../com/datadog/crashtracking/notify_oome.bat | 45 +++ .../com/datadog/crashtracking/notify_oome.sh | 45 +++ .../datadog/crashtracking/upload_crash.bat | 52 ++- .../com/datadog/crashtracking/upload_crash.sh | 55 ++- .../crashtracking/ScriptInitializerTest.java | 87 ++++- .../datadog/trace/agent/tooling/AgentCLI.java | 5 + .../datadog/trace/bootstrap/AgentJar.java | 10 + dd-smoke-tests/crashtracking/build.gradle | 30 ++ .../CrashtrackingTestApplication.java | 48 +++ .../smoketest/CrashtrackingSmokeTest.java | 367 ++++++++++++++++++ settings.gradle | 1 + 18 files changed, 1038 insertions(+), 60 deletions(-) create mode 100644 dd-java-agent/agent-crashtracking/src/main/java/com/datadog/crashtracking/OOMENotifier.java create mode 100644 dd-java-agent/agent-crashtracking/src/main/resources/com/datadog/crashtracking/notify_oome.bat create mode 100644 dd-java-agent/agent-crashtracking/src/main/resources/com/datadog/crashtracking/notify_oome.sh create mode 100644 dd-smoke-tests/crashtracking/build.gradle create mode 100644 dd-smoke-tests/crashtracking/src/main/java/datadog/smoketest/crashtracking/CrashtrackingTestApplication.java create mode 100644 dd-smoke-tests/crashtracking/src/test/java/datadog/smoketest/CrashtrackingSmokeTest.java diff --git a/communication/src/main/java/datadog/communication/monitor/DDAgentStatsDClient.java b/communication/src/main/java/datadog/communication/monitor/DDAgentStatsDClient.java index 23a95175ad7..102ea098a4d 100644 --- a/communication/src/main/java/datadog/communication/monitor/DDAgentStatsDClient.java +++ b/communication/src/main/java/datadog/communication/monitor/DDAgentStatsDClient.java @@ -1,10 +1,14 @@ package datadog.communication.monitor; +import com.timgroup.statsd.Event; import com.timgroup.statsd.ServiceCheck; import datadog.trace.api.StatsDClient; import java.util.function.Function; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -final class DDAgentStatsDClient implements StatsDClient { +public final class DDAgentStatsDClient implements StatsDClient { + private static final Logger log = LoggerFactory.getLogger(DDAgentStatsDClient.class); private final DDAgentStatsDConnection connection; private final Function nameMapping; private final Function tagMapping; @@ -84,6 +88,30 @@ public void serviceCheck( connection.statsd.recordServiceCheckRun(serviceCheck); } + /** + * Record a statsd event + * + * @param type the type of event (error, warning, info, success - @see Event.AlertType) + * @param source the source of the event (e.g. java, myapp, CrashTracking, Telemetry, etc) + * @param eventName the name of the event (or title) + * @param message the message of the event + * @param tags the tags to attach to the event + */ + public void recordEvent( + String type, String source, String eventName, String message, String... tags) { + Event.AlertType alertType = Event.AlertType.valueOf(type.toUpperCase()); + log.debug( + "Recording event: {} - {} - {} - {} [{}]", alertType, source, eventName, message, tags); + Event.Builder eventBuilder = + Event.builder() + .withTitle(eventName) + .withText(message) + .withSourceTypeName(source) + .withDate(System.currentTimeMillis()) + .withAlertType(alertType); + connection.statsd.recordEvent(eventBuilder.build(), tagMapping.apply(tags)); + } + static ServiceCheck.Status serviceCheckStatus(final String status) { switch (status) { case "OK": diff --git a/communication/src/test/groovy/datadog/communication/monitor/DDAgentStatsDClientTest.groovy b/communication/src/test/groovy/datadog/communication/monitor/DDAgentStatsDClientTest.groovy index c084655af61..d079fbc836f 100644 --- a/communication/src/test/groovy/datadog/communication/monitor/DDAgentStatsDClientTest.groovy +++ b/communication/src/test/groovy/datadog/communication/monitor/DDAgentStatsDClientTest.groovy @@ -116,6 +116,47 @@ class DDAgentStatsDClientTest extends DDSpecification { // spotless:on } + def "single statsd client with event"() { + setup: + injectSysConfig(DOGSTATSD_START_DELAY, '0') + def server = new StatsDServer() + server.start() + + def client = statsDClientManager().statsDClient('127.0.0.1', server.socket.localPort, null, namespace, constantTags as String[], false) + + String[] tags = ["type:BufferPool", "jmx_domain:java.nio"] + + expect: + client.recordEvent(eventType, "test", "test.event", "test event", tags) + def event = server.waitForMessage() + event.startsWith("_e{10,10}:test.event|test event|d:") && event.contains("|t:$expectedType|s:test|#$expectedTags") + + cleanup: + client.close() + server.close() + + where: + // spotless:off + namespace | eventType | expectedType | constantTags | expectedTags + null | "INFO" | "info" | null | "jmx_domain:java.nio,type:BufferPool" + null | "INFO" | "info" | ["lang:java", "lang_version:1.8.0"] | "jmx_domain:java.nio,type:BufferPool,lang:java,lang_version:1.8.0" + "example" | "INFO" | "info" | null | "jmx_domain:java.nio,type:BufferPool" + "example" | "INFO" | "info" | ["lang:java", "lang_version:1.8.0"] | "jmx_domain:java.nio,type:BufferPool,lang:java,lang_version:1.8.0" + null | "WARNING" | "warning" | null | "jmx_domain:java.nio,type:BufferPool" + null | "WARNING" | "warning" | ["lang:java", "lang_version:1.8.0"] | "jmx_domain:java.nio,type:BufferPool,lang:java,lang_version:1.8.0" + "example" | "WARNING" | "warning" | null | "jmx_domain:java.nio,type:BufferPool" + "example" | "WARNING" | "warning" | ["lang:java", "lang_version:1.8.0"] | "jmx_domain:java.nio,type:BufferPool,lang:java,lang_version:1.8.0" + null | "ERROR" | "error" | null | "jmx_domain:java.nio,type:BufferPool" + null | "ERROR" | "error" | ["lang:java", "lang_version:1.8.0"] | "jmx_domain:java.nio,type:BufferPool,lang:java,lang_version:1.8.0" + "example" | "ERROR" | "error" | null | "jmx_domain:java.nio,type:BufferPool" + "example" | "ERROR" | "error" | ["lang:java", "lang_version:1.8.0"] | "jmx_domain:java.nio,type:BufferPool,lang:java,lang_version:1.8.0" + null | "SUCCESS" | "success" | null | "jmx_domain:java.nio,type:BufferPool" + null | "SUCCESS" | "success" | ["lang:java", "lang_version:1.8.0"] | "jmx_domain:java.nio,type:BufferPool,lang:java,lang_version:1.8.0" + "example" | "SUCCESS" | "success" | null | "jmx_domain:java.nio,type:BufferPool" + "example" | "SUCCESS" | "success" | ["lang:java", "lang_version:1.8.0"] | "jmx_domain:java.nio,type:BufferPool,lang:java,lang_version:1.8.0" + // spotless:on + } + def "multiple statsd clients"() { setup: injectSysConfig(DOGSTATSD_START_DELAY, '0') diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/Agent.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/Agent.java index c003463f16c..b2a4b3b65ad 100644 --- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/Agent.java +++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/Agent.java @@ -638,8 +638,8 @@ private static synchronized void startJmx() { if (jmxStarting.getAndSet(true)) { return; // another thread is already in startJmx } - // crash uploader initialization relies on JMX being available - initializeCrashUploader(); + // error tracking initialization relies on JMX being available + initializeErrorTracking(); if (jmxFetchEnabled) { startJmxFetch(); } @@ -870,7 +870,7 @@ private static void stopTelemetry() { } } - private static void initializeCrashUploader() { + private static void initializeErrorTracking() { if (Platform.isJ9()) { // TODO currently crash tracking is supported only for HotSpot based JVMs return; diff --git a/dd-java-agent/agent-crashtracking/build.gradle b/dd-java-agent/agent-crashtracking/build.gradle index 3b7f2ec8d8f..9b4947aeb75 100644 --- a/dd-java-agent/agent-crashtracking/build.gradle +++ b/dd-java-agent/agent-crashtracking/build.gradle @@ -1,11 +1,10 @@ apply from: "$rootDir/gradle/java.gradle" -// FIXME: Improve test coverage. -minimumBranchCoverage = 0.6 -// runtime dependent parts (eg. looking up values from the JVM args) are not easy to exercise in unit tests -// the minimum coverage is reduced to reflect that -// minimumInstructionCoverage = 0.9 -minimumInstructionCoverage = 0.7 +// The functionality is tested in dd-smoke-tests/crashtracking + +minimumBranchCoverage = 0.0 +minimumInstructionCoverage = 0.0 +excludedClassesCoverage += ['com.datadog.crashtracking.*'] tasks.withType(Test).configureEach { subTask -> dependsOn ':dd-java-agent:shadowJar' diff --git a/dd-java-agent/agent-crashtracking/src/main/java/com/datadog/crashtracking/CrashUploader.java b/dd-java-agent/agent-crashtracking/src/main/java/com/datadog/crashtracking/CrashUploader.java index 9de21846b86..51ca6f9d91d 100644 --- a/dd-java-agent/agent-crashtracking/src/main/java/com/datadog/crashtracking/CrashUploader.java +++ b/dd-java-agent/agent-crashtracking/src/main/java/com/datadog/crashtracking/CrashUploader.java @@ -91,6 +91,7 @@ public CrashUploader() { ConfigProvider configProvider = config.configProvider(); + System.out.println("===> telemetryUrl: " + telemetryUrl); telemetryClient = OkHttpUtils.buildHttpClient( config, diff --git a/dd-java-agent/agent-crashtracking/src/main/java/com/datadog/crashtracking/OOMENotifier.java b/dd-java-agent/agent-crashtracking/src/main/java/com/datadog/crashtracking/OOMENotifier.java new file mode 100644 index 00000000000..b660bb7586b --- /dev/null +++ b/dd-java-agent/agent-crashtracking/src/main/java/com/datadog/crashtracking/OOMENotifier.java @@ -0,0 +1,31 @@ +package com.datadog.crashtracking; + +import static datadog.communication.monitor.DDAgentStatsDClientManager.statsDClientManager; + +import datadog.communication.monitor.DDAgentStatsDClient; +import de.thetaphi.forbiddenapis.SuppressForbidden; +import java.util.concurrent.locks.LockSupport; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public final class OOMENotifier { + private static final Logger log = LoggerFactory.getLogger(OOMENotifier.class); + + // This method is called via CLI so we don't need to be paranoid about the forbiddend APIs + @SuppressForbidden + public static void sendOomeEvent(String taglist) { + try (DDAgentStatsDClient client = + (DDAgentStatsDClient) + statsDClientManager().statsDClient(null, null, null, null, null, false)) { + String[] tags = taglist.split(","); + client.recordEvent( + "error", + "java", + "OutOfMemoryError", + "Java process encountered out of memory error", + tags); + log.info("OOME event sent"); + LockSupport.parkNanos(2_000_000_000L); // wait 2s to allow statsd client flushing the event + } + } +} diff --git a/dd-java-agent/agent-crashtracking/src/main/java/com/datadog/crashtracking/ScriptInitializer.java b/dd-java-agent/agent-crashtracking/src/main/java/com/datadog/crashtracking/ScriptInitializer.java index cd5e3512d0b..1ac6d99d908 100644 --- a/dd-java-agent/agent-crashtracking/src/main/java/com/datadog/crashtracking/ScriptInitializer.java +++ b/dd-java-agent/agent-crashtracking/src/main/java/com/datadog/crashtracking/ScriptInitializer.java @@ -1,6 +1,7 @@ package com.datadog.crashtracking; import com.sun.management.HotSpotDiagnosticMXBean; +import datadog.trace.api.Config; import datadog.trace.api.Platform; import datadog.trace.util.PidHelper; import datadog.trace.util.Strings; @@ -11,33 +12,139 @@ import java.io.InputStreamReader; import java.lang.management.ManagementFactory; import java.nio.file.FileAlreadyExistsException; +import java.nio.file.FileVisitResult; +import java.nio.file.FileVisitor; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.nio.file.attribute.BasicFileAttributes; import java.nio.file.attribute.PosixFilePermissions; import java.util.Comparator; import java.util.Locale; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; import java.util.stream.Stream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public final class ScriptInitializer { private static final Logger log = LoggerFactory.getLogger(ScriptInitializer.class); + private static final Pattern oomeNotifierScriptPattern = + Pattern.compile("(.*?dd_oome_notifier[.](sh|bat))\\s+(%p)", Pattern.CASE_INSENSITIVE); + private static final String PID_PREFIX = "_pid"; + + private static class ScriptCleanupVisitor implements FileVisitor { + private static final Pattern PID_PATTERN = Pattern.compile(".*?" + PID_PREFIX + "(\\d+)"); + + private final Set pidSet = PidHelper.getJavaPids(); + + static void run(Path dir) { + try { + if (Files.exists(dir)) { + Files.walkFileTree(dir, new ScriptCleanupVisitor()); + } + } catch (IOException e) { + log.warn("Failed cleaning up process specific files in {}", dir, e); + } + } + + private ScriptCleanupVisitor() {} + + @Override + public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) + throws IOException { + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + String fileName = file.getFileName().toString(); + Matcher matcher = PID_PATTERN.matcher(fileName); + if (matcher.find()) { + String pid = matcher.group(1); + if (pid != null && !pid.equals(PidHelper.getPid()) && !pidSet.contains(pid)) { + log.debug("Cleaning process specific file {}", file); + Files.delete(file); + } + } + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { + log.debug("Failed to delete file {}", file, exc); + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + return FileVisitResult.CONTINUE; + } + } + + public static void initialize() { + // this is HotSpot specific implementation (eg. will not work for IBM J9) + HotSpotDiagnosticMXBean diagBean = + ManagementFactory.getPlatformMXBean(HotSpotDiagnosticMXBean.class); + + initializeCrashUploader(diagBean); + initializeOOMENotifier(diagBean); + } + + private static void writeConfig(Path cfgPath, String... entries) { + log.debug("Writing config file: {}", cfgPath); + try (BufferedWriter bw = Files.newBufferedWriter(cfgPath)) { + for (int i = 0; i < entries.length; i += 2) { + bw.write(entries[i]); + bw.write("="); + bw.write(entries[i + 1]); + bw.newLine(); + } + Runtime.getRuntime() + .addShutdownHook( + new Thread( + () -> { + try { + log.debug("Deleting config file: {}", cfgPath); + Files.deleteIfExists(cfgPath); + } catch (IOException e) { + log.warn("Failed deleting config file: {}", cfgPath, e); + } + })); + log.debug("Config file written: {}", cfgPath); + } catch (IOException e) { + log.warn("Failed writing config file: {}", cfgPath, e); + try { + Files.deleteIfExists(cfgPath); + } catch (IOException ignored) { + // ignore + } + } + } + + private static String getBaseName(Path path) { + String filename = path.getFileName().toString(); + int dotIndex = filename.lastIndexOf('.'); + if (dotIndex == -1) { + return filename; + } + + return filename.substring(0, dotIndex); + } /** * If the value of `-XX:OnError` JVM argument is referring to `dd_crash_uploader.sh` or * `dd_crash_uploader.bat` and the script does not exist it will be created and prefilled with * code ensuring the error log upload will be triggered on JVM crash. */ - public static void initialize() { + private static void initializeCrashUploader(HotSpotDiagnosticMXBean diagBean) { try { - // this is HotSpot specific implementation (eg. will not work for IBM J9) - HotSpotDiagnosticMXBean diagBean = - ManagementFactory.getPlatformMXBean(HotSpotDiagnosticMXBean.class); String onErrorVal = diagBean.getVMOption("OnError").getValue(); - String onErrorFile = - Strings.replace(diagBean.getVMOption("ErrorFile").getValue(), "%p", PidHelper.getPid()); - initialize(onErrorVal, onErrorFile); + String onErrorFile = diagBean.getVMOption("ErrorFile").getValue(); + initializeCrashUploader(onErrorVal, onErrorFile); } catch (Throwable t) { log.warn( "Failed creating custom crash upload script. Crash tracking will not work properly.", t); @@ -45,7 +152,7 @@ public static void initialize() { } // @VisibleForTests - static void initialize(String onErrorVal, String onErrorFile) throws IOException { + static void initializeCrashUploader(String onErrorVal, String onErrorFile) throws IOException { if (onErrorVal == null || onErrorVal.isEmpty()) { log.debug("'-XX:OnError' argument was not provided. Crash tracking is disabled."); return; @@ -53,10 +160,22 @@ static void initialize(String onErrorVal, String onErrorFile) throws IOException if (onErrorFile == null || onErrorFile.isEmpty()) { onErrorFile = System.getProperty("user.dir") + "/hs_err_pid" + PidHelper.getPid() + ".log"; log.debug("No -XX:ErrorFile value, defaulting to {}", onErrorFile); + } else { + onErrorFile = Strings.replace(onErrorFile, "%p", PidHelper.getPid()); } - Path scriptPath = Paths.get(onErrorVal); - if (scriptPath.getFileName().toString().toLowerCase(Locale.ROOT).contains("dd_crash_uploader") - && Files.notExists(scriptPath)) { + + String agentJar = findAgentJar(); + if (agentJar == null) { + log.warn("Unable to locate the agent jar. Crash tracking will not work properly."); + return; + } + + Path scriptPath = Paths.get(onErrorVal.replace(" %p", "")); + if (scriptPath + .getFileName() + .toString() + .toLowerCase(Locale.ROOT) + .contains("dd_crash_uploader")) { try { Files.createDirectories( scriptPath.getParent(), @@ -64,27 +183,83 @@ static void initialize(String onErrorVal, String onErrorFile) throws IOException } catch (FileAlreadyExistsException ignored) { // can be safely ignored; if the folder exists we will just reuse it } - String agentJar = findAgentJar(); - if (agentJar == null) { - log.warn("Unable to locate the agent jar. Crash tracking will not work properly."); - return; - } - writeScript(onErrorFile, agentJar, scriptPath); + log.debug("Writing crash uploader script: {}", scriptPath); + writeScript(getCrashUploaderTemplate(), scriptPath, agentJar, onErrorFile); + } + Path cfgPath = + scriptPath.resolveSibling( + getBaseName(scriptPath) + PID_PREFIX + PidHelper.getPid() + ".cfg"); + writeConfig(cfgPath, "agent", agentJar, "hs_err", onErrorFile); + } + + private static void initializeOOMENotifier(HotSpotDiagnosticMXBean diagBean) { + try { + String onOutOfMemoryVal = diagBean.getVMOption("OnOutOfMemoryError").getValue(); + initializeOOMENotifier(onOutOfMemoryVal); + } catch (Throwable t) { + log.warn("Failed initializing OOME notifier. OOMEs will not be tracked.", t); + } + } + + // @VisibleForTests + static void initializeOOMENotifier(String onOutOfMemoryVal) throws IOException { + if (onOutOfMemoryVal == null || onOutOfMemoryVal.isEmpty()) { + log.info("'-XX:OnOutOfMemoryError' argument was not provided. OOME tracking is disabled."); + return; + } + Matcher m = oomeNotifierScriptPattern.matcher(onOutOfMemoryVal); + if (!m.find()) { + log.info( + "OOME notifier script value ({}) does not follow the expected format: /dd_ome_notifier.(sh|bat) %p. OOME tracking is disabled.", + onOutOfMemoryVal); + return; + } + + String tags = + Config.get().getMergedJmxTags().entrySet().stream() + .map(e -> e.getKey() + ":" + e.getValue()) + .collect(Collectors.joining(",")); + Path scriptPath = Paths.get(m.group(1)); + + // cleanup all stale process-specific generated files in the parent folder of the given OOME + // notifier script + ScriptCleanupVisitor.run(scriptPath.getParent()); + + try { + Files.createDirectories( + scriptPath.getParent(), + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwxrwxrwx"))); + } catch (FileAlreadyExistsException ignored) { + // can be safely ignored; if the folder exists we will just reuse it + } + Files.copy(getOomeNotifierTemplate(), scriptPath, StandardCopyOption.REPLACE_EXISTING); + Files.setPosixFilePermissions(scriptPath, PosixFilePermissions.fromString("r-xr-xr-x")); + + String agentJar = findAgentJar(); + if (agentJar == null) { + log.warn("Unable to locate the agent jar. OOME notification will not work properly."); + return; } + Path cfgPath = + scriptPath.resolveSibling( + getBaseName(scriptPath) + PID_PREFIX + PidHelper.getPid() + ".cfg"); + writeConfig(cfgPath, "agent", agentJar, "tags", tags); } - private static void writeScript(String crashFile, String execClass, Path scriptPath) + private static void writeScript( + InputStream template, Path scriptPath, String execClass, String crashFile) throws IOException { - log.debug("Writing crash uploader script: {}", scriptPath); - try (BufferedReader br = new BufferedReader(new InputStreamReader(getScriptData()))) { + try (BufferedReader br = new BufferedReader(new InputStreamReader(template))) { try (BufferedWriter bw = Files.newBufferedWriter(scriptPath)) { br.lines() .map( - line -> - Strings.replace( - Strings.replace(line, "!AGENT_JAR!", execClass), - "!JAVA_ERROR_FILE!", - crashFile)) + line -> { + line = Strings.replace(line, "!AGENT_JAR!", execClass); + if (crashFile != null) { + line = Strings.replace(line, "!JAVA_ERROR_FILE!", crashFile); + } + return line; + }) .forEach(line -> writeLine(bw, line)); } } @@ -100,11 +275,16 @@ private static void writeLine(BufferedWriter bw, String line) { } } - private static InputStream getScriptData() { + private static InputStream getCrashUploaderTemplate() { String name = Platform.isWindows() ? "upload_crash.bat" : "upload_crash.sh"; return CrashUploader.class.getResourceAsStream(name); } + private static InputStream getOomeNotifierTemplate() { + String name = Platform.isWindows() ? "notify_oome.bat" : "notify_oome.sh"; + return OOMENotifier.class.getResourceAsStream(name); + } + private static String findAgentJar() throws IOException { String agentPath = null; String selfClass = @@ -130,6 +310,7 @@ private static String findAgentJar() throws IOException { .filter( p -> p.getFileName().toString().toLowerCase(Locale.ROOT).endsWith(".jar")) .findFirst() + .orElseThrow(() -> new IOException("Missing CLI jar")) .toString(); } } diff --git a/dd-java-agent/agent-crashtracking/src/main/resources/com/datadog/crashtracking/notify_oome.bat b/dd-java-agent/agent-crashtracking/src/main/resources/com/datadog/crashtracking/notify_oome.bat new file mode 100644 index 00000000000..00658a95e6f --- /dev/null +++ b/dd-java-agent/agent-crashtracking/src/main/resources/com/datadog/crashtracking/notify_oome.bat @@ -0,0 +1,45 @@ +@echo off +setlocal enabledelayedexpansion + +:: Check if PID is provided +if "%1"=="" ( + echo "Error: No PID provided" + exit /b 1 +) +set PID=%1 + +:: Get the directory of the script +set scriptDir=%~dp0 +:: Get the base name of the script +set scriptName=%~n0 +set configFile=%scriptDir%%scriptName%_pid%PID%.cfg + +:: Check if the configuration file exists +if not exist "%configFile%" ( + echo Error: Configuration file "%configFile%" not found + exit /b 1 +) + +:: Read the configuration file +:: The expected contents are +:: - agent: Path to the dd-java-agent.jar +:: - tags: Comma-separated list of tags to be sent with the OOME event; key:value pairs are supported +for /f "tokens=1,2 delims=: " %%a in (%configFile%.cfg) do ( + set %%a=%%b +) + +:: Debug: Print the loaded values (Optional) +echo Agent Jar: %agent% +echo Tags: %tags% +echo PID: %PID% + +:: Execute the Java command with the loaded values +java -Ddd.dogstatsd.start-delay=0 -jar "%agent%" sendOomeEvent "%tags%" +set RC=%ERRORLEVEL% +del "%configFile%" :: Clean up the configuration file +if %RC% EQU 0 ( + echo "OOME Event generated successfully" +) else ( + echo "Error: Failed to generate OOME event" + exit /b %RC% +) diff --git a/dd-java-agent/agent-crashtracking/src/main/resources/com/datadog/crashtracking/notify_oome.sh b/dd-java-agent/agent-crashtracking/src/main/resources/com/datadog/crashtracking/notify_oome.sh new file mode 100644 index 00000000000..2d0eace2d6e --- /dev/null +++ b/dd-java-agent/agent-crashtracking/src/main/resources/com/datadog/crashtracking/notify_oome.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + +set +e # Disable exit on error + +# Check if PID is provided +if [ -z "$1" ]; then + echo "Error: No PID provided" + exit 1 +fi +HERE="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" # Get the directory of the script +PID=$1 + +# Get the base name of the script +scriptName=$(basename "$0" .sh) +configFile="${HERE}/${scriptName}_pid${PID}.cfg" +if [ ! -f "$configFile" ]; then + echo "Error: Configuration file not found: $configFile" + exit 1 +fi + +# Read the configuration file +# The expected contents are: +# - agent: Path to the agent jar +# - tags: Comma-separated list of tags to be sent with the OOME event; key:value pairs are supported +declare -A config +while IFS="=" read -r key value; do + config["$key"]="$value" +done < "$configFile" + +# Debug: Print the loaded values (Optional) +echo "Agent Jar: ${config[agent]}" +echo "Tags: ${config[tags]}" +echo "PID: $PID" + +# Execute the Java command with the loaded values +java -Ddd.dogstatsd.start-delay=0 -jar "${config[agent]}" sendOomeEvent "${config[tags]}" +RC=$? +rm -f ${configFile} # Remove the configuration file + +if [ $RC -eq 0 ]; then + echo "OOME Event generated successfully" +else + echo "Error: Failed to generate OOME event" + exit $RC +fi diff --git a/dd-java-agent/agent-crashtracking/src/main/resources/com/datadog/crashtracking/upload_crash.bat b/dd-java-agent/agent-crashtracking/src/main/resources/com/datadog/crashtracking/upload_crash.bat index 09eab9e32b5..588430e683b 100644 --- a/dd-java-agent/agent-crashtracking/src/main/resources/com/datadog/crashtracking/upload_crash.bat +++ b/dd-java-agent/agent-crashtracking/src/main/resources/com/datadog/crashtracking/upload_crash.bat @@ -1,4 +1,52 @@ @echo off +setlocal enabledelayedexpansion -java -jar "!AGENT_JAR!" uploadCrash "!JAVA_ERROR_FILE!" -if %ERRORLEVEL% EQU 0 echo "Uploaded error file \"!JAVA_ERROR_FILE!\"" +:: Check if PID is provided +if "%1"=="" ( + echo "Error: No PID provided. Running in legacy mode." + java -jar "!AGENT_JAR!" uploadCrash "!JAVA_ERROR_FILE!" + if %ERRORLEVEL% EQU 0 ( + echo "Uploaded error file \"!JAVA_ERROR_FILE!\"" + ) else ( + echo "Error: Failed to upload error file \"!JAVA_ERROR_FILE!\"" + exit /b %ERRORLEVEL% + ) + exit /b 0 +) +set PID=%1 + +:: Get the directory of the script +set scriptDir=%~dp0 +:: Get the base name of the script +set scriptName=%~n0 +set configFile=%scriptDir%%scriptName%_pid%PID%.cfg + +:: Check if the configuration file exists +if not exist "%configFile%" ( + echo Error: Configuration file "%configFile%" not found + exit /b 1 +) + +:: Read the configuration file +:: The expected contents are +:: - agent: Path to the dd-java-agent.jar +:: - hs_err: Path to the hs_err log file +for /f "tokens=1,2 delims=: " %%a in (%configFile%.cfg) do ( + set %%a=%%b +) + +:: Debug: Print the loaded values (Optional) +echo Agent Jar: %agent% +echo Error Log: %hs_err% +echo PID: %PID% + +:: Execute the Java command with the loaded values +java -jar "%agent%" uploadCrash "%hs_err%" +set RC=%ERRORLEVEL% +del "%configFile%" :: Clean up the configuration file +if %RC% EQU 0 ( + echo "Error file %hs_err% was uploaded successfully" +) else ( + echo "Error: Failed to upload error file %hs_err%" + exit /b %RC% +) diff --git a/dd-java-agent/agent-crashtracking/src/main/resources/com/datadog/crashtracking/upload_crash.sh b/dd-java-agent/agent-crashtracking/src/main/resources/com/datadog/crashtracking/upload_crash.sh index 31e1e05dbea..160609e259d 100644 --- a/dd-java-agent/agent-crashtracking/src/main/resources/com/datadog/crashtracking/upload_crash.sh +++ b/dd-java-agent/agent-crashtracking/src/main/resources/com/datadog/crashtracking/upload_crash.sh @@ -1,8 +1,53 @@ -#!/bin/bash +#!/usr/bin/env bash -set -euo pipefail +set +e # Disable exit on error -java -jar "!AGENT_JAR!" uploadCrash "!JAVA_ERROR_FILE!" -if [ $? -eq 0 ]; then - echo "Uploaded error file \"!JAVA_ERROR_FILE!\"" +# Check if PID is provided +if [ -z "$1" ]; then + echo "Warn: No PID provided. Running in legacy mode." + java -jar "!AGENT_JAR!" uploadCrash "!JAVA_ERROR_FILE!" + if [ $? -eq 0 ]; then + echo "Error file !JAVA_ERROR_FILE! was uploaded successfully" + else + echo "Error: Failed to upload error file \"!JAVA_ERROR_FILE!\"" + exit 1 + fi + exit 0 +fi + +HERE="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" # Get the directory of the script +PID=$1 + +# Get the base name of the script +scriptName=$(basename "$0" .sh) +configFile="${HERE}/${scriptName}_pid${PID}.cfg" +if [ ! -f "$configFile" ]; then + echo "Error: Configuration file not found: $configFile" + exit 1 +fi + +# Read the configuration file +# The expected contents are: +# - agent: Path to the agent jar +# - hs_err: Path to the hs_err log file +declare -A config +while IFS="=" read -r key value; do + config["$key"]="$value" +done < "$configFile" + +# Debug: Print the loaded values (Optional) +echo "Agent Jar: ${config[agent]}" +echo "Error Log: ${config[hs_err]}" +echo "PID: $PID" + +# Execute the Java command with the loaded values +java -jar "${config[agent]}" uploadCrash "${config[hs_err]}" +RC=$? +rm -f ${configFile} # Remove the configuration file + +if [ $RC -eq 0 ]; then + echo "Error file ${config[hs_err]} was uploaded successfully" +else + echo "Error: Failed to upload error file ${config[hs_err]}" + exit $RC fi diff --git a/dd-java-agent/agent-crashtracking/src/test/java/com/datadog/crashtracking/ScriptInitializerTest.java b/dd-java-agent/agent-crashtracking/src/test/java/com/datadog/crashtracking/ScriptInitializerTest.java index 04340608786..c3d6d6a37f0 100644 --- a/dd-java-agent/agent-crashtracking/src/test/java/com/datadog/crashtracking/ScriptInitializerTest.java +++ b/dd-java-agent/agent-crashtracking/src/test/java/com/datadog/crashtracking/ScriptInitializerTest.java @@ -12,10 +12,13 @@ import java.nio.file.attribute.PosixFilePermissions; import java.util.Comparator; import java.util.List; +import java.util.stream.Stream; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.ValueSource; public class ScriptInitializerTest { @@ -28,24 +31,36 @@ void setup() throws Exception { @AfterEach void teardown() throws Exception { - Files.walk(tempDir).sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete); + try (Stream fileStream = Files.walk(tempDir)) { + fileStream.sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete); + } Files.deleteIfExists(tempDir); } @Test - void testSanity() { - assertDoesNotThrow(() -> ScriptInitializer.initialize(null, null)); + void testCrashUploaderSanity() { + assertDoesNotThrow(() -> ScriptInitializer.initializeCrashUploader(null, null)); assertDoesNotThrow( - () -> ScriptInitializer.initialize(tempDir.resolve("dummy.sh").toString(), null)); - assertDoesNotThrow(() -> ScriptInitializer.initialize(null, "hs_err.log")); + () -> + ScriptInitializer.initializeCrashUploader( + tempDir.resolve("dummy.sh").toString(), null)); + assertDoesNotThrow(() -> ScriptInitializer.initializeCrashUploader(null, "hs_err.log")); + } + + @Test + void testOomeNotifierSanity() { + assertDoesNotThrow(() -> ScriptInitializer.initializeOOMENotifier(null)); + assertDoesNotThrow( + () -> ScriptInitializer.initializeOOMENotifier(tempDir.resolve("dummy.sh").toString())); } @ParameterizedTest - @ValueSource(strings = {"dd_crash_uploader.sh", "dd_crash_uploader.bat"}) - void testInitializationSuccess(String target) throws IOException { + @MethodSource("crashTrackingScripts") + void testCrashUploaderInitializationSuccess(String target, String pidArg) + throws IOException, InterruptedException { Path file = tempDir.resolve(target); String hsErrFile = "/tmp/hs_err.log"; - ScriptInitializer.initialize(file.toString(), hsErrFile); + ScriptInitializer.initializeCrashUploader(file + pidArg, hsErrFile); assertTrue(Files.exists(file), "File " + file + " should have been created"); List lines = Files.readAllLines(file); assertFalse(lines.isEmpty(), "File " + file + " is expected to be non-empty"); @@ -53,20 +68,49 @@ void testInitializationSuccess(String target) throws IOException { assertTrue(lines.stream().anyMatch(l -> l.contains(hsErrFile))); } + private static Stream crashTrackingScripts() { + return Stream.of( + Arguments.of("dd_crash_uploader.sh", ""), + Arguments.of("dd_crash_uploader.bat", ""), + Arguments.of("dd_crash_uploader.sh", " %p"), + Arguments.of("dd_crash_uploader.bat", " %p")); + } + + @ParameterizedTest + @ValueSource(strings = {"dd_oome_notifier.sh", "dd_oome_notifier.bat"}) + void testOomeNotifierInitializationSuccess(String target) + throws IOException, InterruptedException { + Path file = tempDir.resolve(target); + ScriptInitializer.initializeOOMENotifier(file + " %p"); + assertTrue(Files.exists(file), "File " + file + " should have been created"); + List lines = Files.readAllLines(file); + assertFalse(lines.isEmpty(), "File " + file + " is expected to be non-empty"); + // sanity to check the placeholder was properly replaced + assertTrue(lines.stream().anyMatch(l -> !l.contains("!TAGS!"))); + } + @Test - void testNoInitialization() throws IOException { + void testCrashUploaderNoInitialization() throws IOException { // the initializer needs a particular script name to kick-in Path file = tempDir.resolve("some_other_script.sh"); String hsErrFile = "/tmp/hs_err.log"; - ScriptInitializer.initialize(file.toString(), hsErrFile); + ScriptInitializer.initializeCrashUploader(file.toString(), hsErrFile); assertFalse(Files.exists(file), "File " + file + " should not have been created"); } @Test - void testInitializationExisting() throws IOException { - Path file = tempDir.resolve("dd_crash_uploader.sh"); + void testOomeNotifierNoInitialization() throws IOException { + // the initializer needs a particular script name to kick-in + Path file = tempDir.resolve("some_other_script.sh"); + ScriptInitializer.initializeOOMENotifier(file.toString()); + assertFalse(Files.exists(file), "File " + file + " should not have been created"); + } + + @Test + void testOomeNotifierInitializationExisting() throws IOException { + Path file = tempDir.resolve("dd_oome_notifier.sh"); Files.createFile(file); - ScriptInitializer.initialize(file.toString(), "/tmp/hs_err.log"); + ScriptInitializer.initializeOOMENotifier(file.toString()); assertTrue(Files.exists(file), "File " + file + " should not have been removed"); assertTrue( Files.readAllLines(file).isEmpty(), @@ -74,9 +118,9 @@ void testInitializationExisting() throws IOException { } @Test - void testNoErrFileSpec() throws IOException { + void testCrashUploaderNoErrFileSpec() throws IOException { Path file = tempDir.resolve("dd_crash_uploader.sh"); - ScriptInitializer.initialize(file.toString(), ""); + ScriptInitializer.initializeCrashUploader(file.toString(), ""); assertTrue(Files.exists(file), "File " + file + " should have been created"); // sanity to check the crash log file was properly replaced in the script List lines = Files.readAllLines(file); @@ -86,10 +130,19 @@ void testNoErrFileSpec() throws IOException { } @Test - void testInvalidFolder() throws IOException { + void testCrashUploaderInvalidFolder() throws IOException { Files.setPosixFilePermissions(tempDir, PosixFilePermissions.fromString("r-x------")); Path file = tempDir.resolve("dd_crash_uploader.sh"); assertThrows( - IOException.class, () -> ScriptInitializer.initialize(file.toString(), "/tmp/hs_err.log")); + IOException.class, + () -> ScriptInitializer.initializeCrashUploader(file.toString(), "/tmp/hs_err.log")); + } + + @Test + void testOomeInitializeInvalidFolder() throws IOException { + Files.setPosixFilePermissions(tempDir, PosixFilePermissions.fromString("r-x------")); + Path file = tempDir.resolve("dd_oome_notifier.sh"); + assertThrows( + IOException.class, () -> ScriptInitializer.initializeOOMENotifier(file.toString() + " %p")); } } diff --git a/dd-java-agent/agent-tooling/src/main/java/datadog/trace/agent/tooling/AgentCLI.java b/dd-java-agent/agent-tooling/src/main/java/datadog/trace/agent/tooling/AgentCLI.java index 7a8ad19b3e5..5d8c2adb6cb 100644 --- a/dd-java-agent/agent-tooling/src/main/java/datadog/trace/agent/tooling/AgentCLI.java +++ b/dd-java-agent/agent-tooling/src/main/java/datadog/trace/agent/tooling/AgentCLI.java @@ -1,6 +1,7 @@ package datadog.trace.agent.tooling; import com.datadog.crashtracking.CrashUploader; +import com.datadog.crashtracking.OOMENotifier; import datadog.trace.agent.tooling.bytebuddy.SharedTypePools; import datadog.trace.agent.tooling.bytebuddy.matcher.HierarchyMatchers; import datadog.trace.bootstrap.Agent; @@ -84,6 +85,10 @@ public static void uploadCrash(final String[] args) throws Exception { } } + public static void sendOomeEvent(String taglist) throws Exception { + OOMENotifier.sendOomeEvent(taglist); + } + public static void scanDependencies(final String[] args) throws Exception { Class depClass = Class.forName( diff --git a/dd-java-agent/src/main/java/datadog/trace/bootstrap/AgentJar.java b/dd-java-agent/src/main/java/datadog/trace/bootstrap/AgentJar.java index dfa2ec53bba..77895e2873b 100644 --- a/dd-java-agent/src/main/java/datadog/trace/bootstrap/AgentJar.java +++ b/dd-java-agent/src/main/java/datadog/trace/bootstrap/AgentJar.java @@ -27,6 +27,9 @@ public static void main(final String[] args) { case "uploadCrash": uploadCrash(args); break; + case "sendOomeEvent": + sendOomeEvent(args); + break; case "scanDependencies": scanDependencies(args); break; @@ -101,6 +104,13 @@ private static void uploadCrash(final String[] args) throws Exception { .invoke(null, new Object[] {Arrays.copyOfRange(args, 1, args.length)}); } + private static void sendOomeEvent(final String[] args) throws Exception { + if (args.length < 1) { + throw new IllegalArgumentException("unexpected arguments"); + } + installAgentCLI().getMethod("sendOomeEvent", String.class).invoke(null, args[1]); + } + private static void scanDependencies(final String[] args) throws Exception { if (args.length < 2) { throw new IllegalArgumentException("missing path"); diff --git a/dd-smoke-tests/crashtracking/build.gradle b/dd-smoke-tests/crashtracking/build.gradle new file mode 100644 index 00000000000..b019fc5417a --- /dev/null +++ b/dd-smoke-tests/crashtracking/build.gradle @@ -0,0 +1,30 @@ +plugins { + id "com.github.johnrengelman.shadow" +} + +ext { + excludeJdk = ['IBM8'] +} + +apply from: "$rootDir/gradle/java.gradle" + +description = 'Crashtracking Integration Tests.' + +jar { + manifest { + attributes('Main-Class': 'datadog.smoketest.crashtracking.CrashtrackingTestApplication') + } +} + +dependencies { + testImplementation project(':dd-smoke-tests') + testImplementation project(':dd-java-agent:agent-profiling:profiling-testing') + testImplementation libs.bundles.junit5 + testImplementation libs.bundles.mockito +} + +tasks.withType(Test).configureEach { + dependsOn "shadowJar" + jvmArgs "-Ddatadog.smoketest.app.shadowJar.path=${tasks.shadowJar.archiveFile.get()}" +} + diff --git a/dd-smoke-tests/crashtracking/src/main/java/datadog/smoketest/crashtracking/CrashtrackingTestApplication.java b/dd-smoke-tests/crashtracking/src/main/java/datadog/smoketest/crashtracking/CrashtrackingTestApplication.java new file mode 100644 index 00000000000..65b3305345e --- /dev/null +++ b/dd-smoke-tests/crashtracking/src/main/java/datadog/smoketest/crashtracking/CrashtrackingTestApplication.java @@ -0,0 +1,48 @@ +package datadog.smoketest.crashtracking; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.LockSupport; + +public class CrashtrackingTestApplication { + public static void main(String[] args) throws Exception { + if (args.length == 0) { + throw new RuntimeException("Expecting the control file as an argument"); + } + System.out.println("=== CrashtrackingTestApplication ==="); + CountDownLatch latch = new CountDownLatch(1); + + Thread t = + new Thread( + () -> { + Path scriptPath = Paths.get(args[0]); + while (!Files.exists(scriptPath)) { + System.out.println("Waiting for the script " + scriptPath + " to be created..."); + LockSupport.parkNanos(1_000_000_000L); + } + latch.countDown(); + }); + t.setDaemon(true); + t.start(); + + System.out.println("Waiting for initialization..."); + latch.await(5, TimeUnit.MINUTES); + + // let's provoke OOME + List buffer = new ArrayList<>(); + int size = 1; + while (size < 1024) { + buffer.add(new byte[size * 1024 * 1024]); + System.out.println("Allocated " + size + "MB"); + if (size < 256) { + size *= 2; + } + } + System.out.println(buffer.size()); + } +} diff --git a/dd-smoke-tests/crashtracking/src/test/java/datadog/smoketest/CrashtrackingSmokeTest.java b/dd-smoke-tests/crashtracking/src/test/java/datadog/smoketest/CrashtrackingSmokeTest.java new file mode 100644 index 00000000000..549895b57b8 --- /dev/null +++ b/dd-smoke-tests/crashtracking/src/test/java/datadog/smoketest/CrashtrackingSmokeTest.java @@ -0,0 +1,367 @@ +package datadog.smoketest; + +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assumptions.assumeFalse; + +import datadog.trace.api.Platform; +import java.io.BufferedReader; +import java.io.File; +import java.io.InputStreamReader; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Comparator; +import java.util.stream.Stream; +import okhttp3.mockwebserver.Dispatcher; +import okhttp3.mockwebserver.MockResponse; +import okhttp3.mockwebserver.MockWebServer; +import okhttp3.mockwebserver.RecordedRequest; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class CrashtrackingSmokeTest { + private MockWebServer tracingServer; + + @BeforeAll + static void setupAll() { + // Only Hotspot based implementation are supported + assumeFalse(Platform.isJ9()); + } + + private Path tempDir; + + @BeforeEach + void setup() throws Exception { + tempDir = Files.createTempDirectory("dd-smoketest-"); + + tracingServer = new MockWebServer(); + tracingServer.setDispatcher( + new Dispatcher() { + @Override + public MockResponse dispatch(final RecordedRequest request) throws InterruptedException { + return new MockResponse().setResponseCode(200); + } + }); + // tracingServer.start(8126); + } + + @AfterEach + void teardown() throws Exception { + tracingServer.shutdown(); + + try (Stream fileStream = Files.walk(tempDir)) { + fileStream.sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete); + } + Files.deleteIfExists(tempDir); + } + + private static String javaPath() { + final String separator = FileSystems.getDefault().getSeparator(); + return System.getProperty("java.home") + separator + "bin" + separator + "java"; + } + + private static String appShadowJar() { + return System.getProperty("datadog.smoketest.app.shadowJar.path"); + } + + private static String agentShadowJar() { + return System.getProperty("datadog.smoketest.agent.shadowJar.path"); + } + + private static String getExtension() { + return Platform.isWindows() ? "bat" : "sh"; + } + + @Test + void testCrashTracking() throws Exception { + Path script = tempDir.resolve("dd_crash_uploader." + getExtension()); + String onErrorValue = script + " %p"; + String errorFile = tempDir.resolve("hs_err.log").toString(); + + ProcessBuilder pb = + new ProcessBuilder( + Arrays.asList( + javaPath(), + "-javaagent:" + agentShadowJar(), + "-Xmx96m", + "-Xms96m", + "-XX:OnError=" + onErrorValue, + "-XX:ErrorFile=" + errorFile, + "-XX:+CrashOnOutOfMemoryError", // Use OOME to trigger crash + "-Ddd.dogstatsd.start-delay=0", // Minimize the delay to initialize JMX and create + // the scripts + "-Ddd.trace.enabled=false", + "-jar", + appShadowJar(), + script.toString())); + pb.environment().put("DD_TRACE_AGENT_PORT", String.valueOf(tracingServer.getPort())); + StringBuilder stdoutStr = new StringBuilder(); + StringBuilder stderrStr = new StringBuilder(); + + Process p = pb.start(); + Thread stdout = + new Thread( + () -> { + try (BufferedReader br = + new BufferedReader(new InputStreamReader(p.getInputStream()))) { + br.lines() + .forEach( + l -> { + System.out.println(l); + stdoutStr.append(l).append("\n"); + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + Thread stderr = + new Thread( + () -> { + try (BufferedReader br = + new BufferedReader(new InputStreamReader(p.getErrorStream()))) { + br.lines() + .forEach( + l -> { + System.err.println(l); + stderrStr.append(l).append("\n"); + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + stdout.setDaemon(true); + stderr.setDaemon(true); + stdout.start(); + stderr.start(); + + assertNotEquals(0, p.waitFor(), "Application should have crashed"); + + assertTrue(stdoutStr.toString().contains(" was uploaded successfully")); + assertTrue( + stderrStr + .toString() + .contains( + "com.datadog.crashtracking.CrashUploader - Successfully uploaded the crash files")); + } + + @Test + void testCrashTrackingLegacy() throws Exception { + Path script = tempDir.resolve("dd_crash_uploader." + getExtension()); + String onErrorValue = script.toString(); + String errorFile = tempDir.resolve("hs_err.log").toString(); + + ProcessBuilder pb = + new ProcessBuilder( + Arrays.asList( + javaPath(), + "-javaagent:" + agentShadowJar(), + "-Xmx96m", + "-Xms96m", + "-XX:OnError=" + onErrorValue, + "-XX:ErrorFile=" + errorFile, + "-XX:+CrashOnOutOfMemoryError", // Use OOME to trigger crash + "-Ddd.dogstatsd.start-delay=0", // Minimize the delay to initialize JMX and create + // the scripts + "-Ddd.trace.enabled=false", + "-jar", + appShadowJar(), + script.toString())); + pb.environment().put("DD_TRACE_AGENT_PORT", String.valueOf(tracingServer.getPort())); + StringBuilder stdoutStr = new StringBuilder(); + StringBuilder stderrStr = new StringBuilder(); + + Process p = pb.start(); + Thread stdout = + new Thread( + () -> { + try (BufferedReader br = + new BufferedReader(new InputStreamReader(p.getInputStream()))) { + br.lines() + .forEach( + l -> { + System.out.println(l); + stdoutStr.append(l).append("\n"); + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + Thread stderr = + new Thread( + () -> { + try (BufferedReader br = + new BufferedReader(new InputStreamReader(p.getErrorStream()))) { + br.lines() + .forEach( + l -> { + System.err.println(l); + stderrStr.append(l).append("\n"); + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + stdout.setDaemon(true); + stderr.setDaemon(true); + stdout.start(); + stderr.start(); + + assertNotEquals(0, p.waitFor(), "Application should have crashed"); + + assertTrue(stdoutStr.toString().contains(" was uploaded successfully")); + assertTrue( + stderrStr + .toString() + .contains( + "com.datadog.crashtracking.CrashUploader - Successfully uploaded the crash files")); + } + + @Test + void testOomeTracking() throws Exception { + Path script = tempDir.resolve("dd_oome_notifier." + getExtension()); + String onErrorValue = script + " %p"; + String errorFile = tempDir.resolve("hs_err_pid%p.log").toString(); + + ProcessBuilder pb = + new ProcessBuilder( + Arrays.asList( + javaPath(), + "-javaagent:" + agentShadowJar(), + "-XX:OnOutOfMemoryError=" + onErrorValue, + "-XX:ErrorFile=" + errorFile, + "-XX:+CrashOnOutOfMemoryError", // Use OOME to trigger crash + "-Ddd.dogstatsd.start-delay=0", // Minimize the delay to initialize JMX and create + // the scripts + "-Ddd.trace.enabled=false", + "-jar", + appShadowJar(), + script.toString())); + StringBuilder stdoutStr = new StringBuilder(); + StringBuilder stderrStr = new StringBuilder(); + + Process p = pb.start(); + Thread stdout = + new Thread( + () -> { + try (BufferedReader br = + new BufferedReader(new InputStreamReader(p.getInputStream()))) { + br.lines() + .forEach( + l -> { + System.out.println(l); + stdoutStr.append(l).append("\n"); + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + Thread stderr = + new Thread( + () -> { + try (BufferedReader br = + new BufferedReader(new InputStreamReader(p.getErrorStream()))) { + br.lines() + .forEach( + l -> { + System.err.println(l); + stderrStr.append(l).append("\n"); + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + stdout.setDaemon(true); + stderr.setDaemon(true); + stdout.start(); + stderr.start(); + + assertNotEquals(0, p.waitFor(), "Application should have crashed"); + + assertTrue( + stderrStr.toString().contains("com.datadog.crashtracking.OOMENotifier - OOME event sent")); + assertTrue(stdoutStr.toString().contains("OOME Event generated successfully")); + } + + @Test + void testCombineTracking() throws Exception { + Path errorScript = tempDir.resolve("dd_crash_uploader." + getExtension()); + Path oomeScript = tempDir.resolve("dd_oome_notifier." + getExtension()); + String onErrorValue = errorScript + " %p"; + String onOomeValue = oomeScript + " %p"; + String errorFile = tempDir.resolve("hs_err.log").toString(); + + ProcessBuilder pb = + new ProcessBuilder( + Arrays.asList( + javaPath(), + "-javaagent:" + agentShadowJar(), + "-XX:OnOutOfMemoryError=" + onOomeValue, + "-XX:OnError=" + onErrorValue, + "-XX:ErrorFile=" + errorFile, + "-XX:+CrashOnOutOfMemoryError", // Use OOME to trigger crash + "-Ddd.dogstatsd.start-delay=0", // Minimize the delay to initialize JMX and create + // the scripts + "-Ddd.trace.enabled=false", + "-jar", + appShadowJar(), + oomeScript.toString())); + pb.environment().put("DD_TRACE_AGENT_PORT", String.valueOf(tracingServer.getPort())); + StringBuilder stdoutStr = new StringBuilder(); + StringBuilder stderrStr = new StringBuilder(); + + Process p = pb.start(); + Thread stdout = + new Thread( + () -> { + try (BufferedReader br = + new BufferedReader(new InputStreamReader(p.getInputStream()))) { + br.lines() + .forEach( + l -> { + System.out.println(l); + stdoutStr.append(l).append("\n"); + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + Thread stderr = + new Thread( + () -> { + try (BufferedReader br = + new BufferedReader(new InputStreamReader(p.getErrorStream()))) { + br.lines() + .forEach( + l -> { + System.err.println(l); + stderrStr.append(l).append("\n"); + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + stdout.setDaemon(true); + stderr.setDaemon(true); + stdout.start(); + stderr.start(); + + assertNotEquals(0, p.waitFor(), "Application should have crashed"); + + // Crash uploader did get triggered + assertTrue(stdoutStr.toString().contains(" was uploaded successfully")); + assertTrue( + stderrStr + .toString() + .contains( + "com.datadog.crashtracking.CrashUploader - Successfully uploaded the crash files")); + + // OOME notifier did get triggered + assertTrue( + stderrStr.toString().contains("com.datadog.crashtracking.OOMENotifier - OOME event sent")); + assertTrue(stdoutStr.toString().contains("OOME Event generated successfully")); + } +} diff --git a/settings.gradle b/settings.gradle index 9f733fb33b7..e906f7859bd 100644 --- a/settings.gradle +++ b/settings.gradle @@ -95,6 +95,7 @@ include ':dd-smoke-tests:armeria-grpc' include ':dd-smoke-tests:asm-standalone-billing' include ':dd-smoke-tests:backend-mock' include ':dd-smoke-tests:cli' +include ':dd-smoke-tests:crashtracking' include ':dd-smoke-tests:custom-systemloader' include ':dd-smoke-tests:dynamic-config' include ':dd-smoke-tests:field-injection'