Skip to content

Commit 5f3aeab

Browse files
authored
Crashtracking: send ping message (#9804)
* Crashtracking: send ping message * concatenate better * send more info to the telemetry * Remove the possibility to send multiple files to crashtracking * Fix tests * Add file not found as ping message * Put is_crash_ping in the payload * Add more tags to ping * Use better normalisation * Improve smoke tests
1 parent c553751 commit 5f3aeab

File tree

14 files changed

+218
-98
lines changed

14 files changed

+218
-98
lines changed

dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/ConfigManager.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import datadog.trace.api.ProcessTags;
1010
import datadog.trace.api.WellKnownTags;
1111
import datadog.trace.util.PidHelper;
12+
import datadog.trace.util.RandomUtils;
1213
import java.io.BufferedReader;
1314
import java.io.BufferedWriter;
1415
import java.io.IOException;
@@ -31,8 +32,10 @@ public static class StoredConfig {
3132
final String tags;
3233
final String processTags;
3334
final String runtimeId;
35+
final String reportUUID;
3436

3537
StoredConfig(
38+
String reportUUID,
3639
String service,
3740
String env,
3841
String version,
@@ -45,6 +48,7 @@ public static class StoredConfig {
4548
this.tags = tags;
4649
this.processTags = processTags;
4750
this.runtimeId = runtimeId;
51+
this.reportUUID = reportUUID;
4852
}
4953

5054
public static class Builder {
@@ -54,13 +58,15 @@ public static class Builder {
5458
String tags;
5559
String processTags;
5660
String runtimeId;
61+
String reportUUID;
5762

5863
public Builder(Config config) {
5964
// get sane defaults
6065
this.service = config.getServiceName();
6166
this.env = config.getEnv();
6267
this.version = config.getVersion();
6368
this.runtimeId = config.getRuntimeId();
69+
this.reportUUID = RandomUtils.randomUUID().toString();
6470
}
6571

6672
public Builder service(String service) {
@@ -93,8 +99,14 @@ public Builder runtimeId(String runtimeId) {
9399
return this;
94100
}
95101

102+
// @VisibleForTesting
103+
Builder reportUUID(String reportUUID) {
104+
this.reportUUID = reportUUID;
105+
return this;
106+
}
107+
96108
public StoredConfig build() {
97-
return new StoredConfig(service, env, version, tags, processTags, runtimeId);
109+
return new StoredConfig(reportUUID, service, env, version, tags, processTags, runtimeId);
98110
}
99111
}
100112
}

dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/CrashLogParser.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import datadog.crashtracking.parsers.HotspotCrashLogParser;
55

66
public final class CrashLogParser {
7-
public static CrashLog fromHotspotCrashLog(String logText) {
8-
return new HotspotCrashLogParser().parse(logText);
7+
public static CrashLog fromHotspotCrashLog(String uuid, String logText) {
8+
return new HotspotCrashLogParser().parse(uuid, logText);
99
}
1010
}

dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/CrashUploader.java

Lines changed: 74 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
import static datadog.trace.api.config.CrashTrackingConfig.CRASH_TRACKING_PROXY_USERNAME;
77
import static datadog.trace.api.config.CrashTrackingConfig.CRASH_TRACKING_UPLOAD_TIMEOUT;
88
import static datadog.trace.api.config.CrashTrackingConfig.CRASH_TRACKING_UPLOAD_TIMEOUT_DEFAULT;
9+
import static datadog.trace.api.telemetry.LogCollector.SEND_TELEMETRY;
10+
import static datadog.trace.util.TraceUtils.normalizeServiceName;
11+
import static datadog.trace.util.TraceUtils.normalizeTagValue;
912

1013
import com.squareup.moshi.JsonWriter;
1114
import datadog.common.container.ContainerInfo;
@@ -27,7 +30,6 @@
2730
import java.time.Instant;
2831
import java.util.Arrays;
2932
import java.util.HashMap;
30-
import java.util.List;
3133
import java.util.Map;
3234
import java.util.Scanner;
3335
import java.util.concurrent.TimeUnit;
@@ -61,8 +63,6 @@ public final class CrashUploader {
6163

6264
private static final MediaType APPLICATION_JSON =
6365
MediaType.get("application/json; charset=utf-8");
64-
private static final MediaType APPLICATION_OCTET_STREAM =
65-
MediaType.parse("application/octet-stream");
6666

6767
private final Config config;
6868
private final ConfigManager.StoredConfig storedConfig;
@@ -114,13 +114,32 @@ public CrashUploader(@Nonnull final ConfigManager.StoredConfig storedConfig) {
114114
CRASH_TRACKING_UPLOAD_TIMEOUT, CRASH_TRACKING_UPLOAD_TIMEOUT_DEFAULT)));
115115
}
116116

117-
public void upload(@Nonnull List<Path> files) throws IOException {
118-
for (Path file : files) {
119-
uploadToLogs(file);
120-
uploadToTelemetry(file);
117+
public void notifyCrashStarted(String error) {
118+
// send a ping message to the telemetry to notify that the crash report started
119+
try (Buffer buf = new Buffer();
120+
JsonWriter writer = JsonWriter.of(buf)) {
121+
writer.beginObject();
122+
writer.name("crash_uuid").value(storedConfig.reportUUID);
123+
writer.name("kind").value("Crash ping");
124+
writer.name("current_schema_version").value("1.0");
125+
writer
126+
.name("message")
127+
.value(
128+
"Crashtracker crash ping: " + (error != null ? error : "crash processing started"));
129+
writer.endObject();
130+
handleCall(makeTelemetryRequest(makeTelemetryRequestBody(buf.readUtf8(), true)), "ping");
131+
132+
} catch (Throwable t) {
133+
log.error("Failed to send crash ping", t);
121134
}
122135
}
123136

137+
public void upload(@Nonnull Path file) throws IOException {
138+
String uuid = storedConfig.reportUUID;
139+
uploadToLogs(file);
140+
uploadToTelemetry(file, uuid);
141+
}
142+
124143
@SuppressForbidden
125144
boolean uploadToLogs(@Nonnull Path file) {
126145
try {
@@ -236,24 +255,23 @@ private String extractErrorStackTrace(String fileContent, boolean redact) {
236255
return "";
237256
}
238257

239-
private String extractErrorStackTrace(String fileContent) {
240-
return extractErrorStackTrace(fileContent, true);
241-
}
242-
243-
boolean uploadToTelemetry(@Nonnull Path file) {
258+
boolean uploadToTelemetry(@Nonnull Path file, String uuid) {
244259
try {
245260
String content = new String(Files.readAllBytes(file), Charset.defaultCharset());
246-
handleCall(makeTelemetryRequest(content));
247-
} catch (IOException e) {
248-
log.error("Failed to upload crash file: {}", file, e);
261+
CrashLog crashLog = CrashLogParser.fromHotspotCrashLog(uuid, content);
262+
if (crashLog == null) {
263+
log.error(SEND_TELEMETRY, "Failed to parse crash log with uuid {} ", uuid);
264+
return false;
265+
}
266+
handleCall(makeTelemetryRequest(makeTelemetryRequestBody(crashLog.toJson(), false)), "crash");
267+
} catch (Throwable t) {
268+
log.error("Failed to upload crash file: {}", file, t);
249269
return false;
250270
}
251271
return true;
252272
}
253273

254-
private Call makeTelemetryRequest(@Nonnull String content) throws IOException {
255-
final RequestBody requestBody = makeTelemetryRequestBody(content);
256-
274+
private Call makeTelemetryRequest(@Nonnull RequestBody requestBody) throws IOException {
257275
final Map<String, String> headers = new HashMap<>();
258276
// Set chunked transfer
259277
MediaType contentType = requestBody.contentType();
@@ -273,11 +291,9 @@ private Call makeTelemetryRequest(@Nonnull String content) throws IOException {
273291
.build());
274292
}
275293

276-
private RequestBody makeTelemetryRequestBody(@Nonnull String content) throws IOException {
277-
CrashLog crashLog = CrashLogParser.fromHotspotCrashLog(content);
278-
if (crashLog == null) {
279-
throw new IOException("Failed to parse crash log");
280-
}
294+
private RequestBody makeTelemetryRequestBody(@Nonnull String payload, boolean isPing)
295+
throws IOException {
296+
281297
try (Buffer buf = new Buffer()) {
282298
try (JsonWriter writer = JsonWriter.of(buf)) {
283299
writer.beginObject();
@@ -291,11 +307,17 @@ private RequestBody makeTelemetryRequestBody(@Nonnull String content) throws IOE
291307
writer.name("payload");
292308
writer.beginArray();
293309
writer.beginObject();
294-
writer.name("message").value(crashLog.toJson());
295-
writer.name("level").value("ERROR");
296-
writer.name("tags").value("severity:crash");
297-
writer.name("is_sensitive").value(true);
298-
writer.name("is_crash").value(true);
310+
writer.name("message").value(payload);
311+
if (isPing) {
312+
writer.name("level").value("DEBUG");
313+
writer.name("is_sensitive").value(false);
314+
writer.name("tags").value(tagsForPing(storedConfig.reportUUID));
315+
} else {
316+
writer.name("level").value("ERROR");
317+
writer.name("tags").value("severity:crash");
318+
writer.name("is_sensitive").value(true);
319+
writer.name("is_crash").value(true);
320+
}
299321
writer.endObject();
300322
writer.endArray();
301323
writer.name("application");
@@ -327,32 +349,47 @@ private RequestBody makeTelemetryRequestBody(@Nonnull String content) throws IOE
327349
}
328350
}
329351

330-
private void handleCall(final Call call) {
352+
private String tagsForPing(String uuid) {
353+
final StringBuilder tags = new StringBuilder("is_crash_ping:true");
354+
tags.append(",").append("language_name:jvm");
355+
tags.append(",").append("service:").append(normalizeServiceName(storedConfig.service));
356+
tags.append(",")
357+
.append("language_version:")
358+
.append(normalizeTagValue(SystemProperties.getOrDefault("java.version", "unknown")));
359+
tags.append(",").append("tracer_version:").append(normalizeTagValue(VersionInfo.VERSION));
360+
tags.append(",").append("uuid:").append(uuid);
361+
return (tags.toString());
362+
}
363+
364+
private void handleCall(final Call call, String kind) {
331365
try (Response response = call.execute()) {
332-
handleSuccess(call, response);
333-
} catch (IOException e) {
334-
handleFailure(e);
366+
handleSuccess(call, response, kind);
367+
} catch (Throwable t) {
368+
handleFailure(t, kind);
335369
}
336370
}
337371

338-
private void handleSuccess(final Call call, final Response response) throws IOException {
372+
private void handleSuccess(final Call call, final Response response, String kind)
373+
throws IOException {
339374
if (response.isSuccessful()) {
340375
log.info(
341-
"Successfully uploaded the crash files to {}, code = {} \"{}\"",
376+
"Successfully uploaded the crash {} to {}, code = {} \"{}\"",
377+
kind,
342378
call.request().url(),
343379
response.code(),
344380
response.message());
345381
} else {
346382
log.error(
347-
"Failed to upload crash files to {}, code = {} \"{}\", body = \"{}\"",
383+
"Failed to upload crash {} to {}, code = {} \"{}\", body = \"{}\"",
384+
kind,
348385
call.request().url(),
349386
response.code(),
350387
response.message(),
351388
response.body() != null ? response.body().string().trim() : "<null>");
352389
}
353390
}
354391

355-
private void handleFailure(final IOException exception) {
356-
log.error("Failed to upload crash files, got exception", exception);
392+
private void handleFailure(final Throwable exception, String kind) {
393+
log.error("Failed to upload crash {}, got exception", kind, exception);
357394
}
358395
}

dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/dto/CrashLog.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public final class CrashLog {
1717
ADAPTER = moshi.adapter(CrashLog.class);
1818
}
1919

20-
public final String uuid = RandomUtils.randomUUID().toString();
20+
public final String uuid;
2121

2222
@Json(name = "data_schema_version")
2323
public final String dataSchemaVersion;
@@ -37,13 +37,15 @@ public final class CrashLog {
3737
public final int version = VERSION;
3838

3939
public CrashLog(
40+
String uuid,
4041
boolean incomplete,
4142
String timestamp,
4243
ErrorData error,
4344
Metadata metadata,
4445
OSInfo osInfo,
4546
ProcInfo procInfo,
4647
String dataSchemaVersion) {
48+
this.uuid = uuid != null ? uuid : RandomUtils.randomUUID().toString();
4749
this.incomplete = incomplete;
4850
this.timestamp = timestamp;
4951
this.error = error;
@@ -87,7 +89,7 @@ public int hashCode() {
8789
}
8890

8991
public boolean equalsForTest(Object o) {
90-
// for tests, we need to ignore UUID, OSInfo and Metadata part
92+
// for tests, we need to ignore OSInfo and Metadata part
9193
if (this == o) {
9294
return true;
9395
}
@@ -97,6 +99,7 @@ public boolean equalsForTest(Object o) {
9799
CrashLog crashLog = (CrashLog) o;
98100
return incomplete == crashLog.incomplete
99101
&& version == crashLog.version
102+
&& Objects.equals(uuid, crashLog.uuid)
100103
&& Objects.equals(timestamp, crashLog.timestamp)
101104
&& Objects.equals(error, crashLog.error)
102105
&& Objects.equals(procInfo, crashLog.procInfo)

dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/parsers/HotspotCrashLogParser.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ private StackFrame parseLine(String line) {
115115
return null;
116116
}
117117

118-
public CrashLog parse(String crashLog) {
118+
public CrashLog parse(String uuid, String crashLog) {
119119
String signal = null;
120120
String pid = null;
121121
List<StackFrame> frames = new ArrayList<>();
@@ -213,7 +213,7 @@ public CrashLog parse(String crashLog) {
213213
SystemProperties.get("os.name"),
214214
SemanticVersion.of(SystemProperties.get("os.version")));
215215
ProcInfo procInfo = pid != null ? new ProcInfo(pid) : null;
216-
return new CrashLog(false, datetime, error, metadata, osInfo, procInfo, "1.0");
216+
return new CrashLog(uuid, false, datetime, error, metadata, osInfo, procInfo, "1.0");
217217
}
218218

219219
static String dateTimeToISO(String datetime) {

0 commit comments

Comments
 (0)