Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added (initial) support for compressing spans #2477

Merged
merged 17 commits into from
Mar 3, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ endif::[]
===== Features
* Added support for setting service name and version for a transaction via the public api - {pull}2451[#2451]
* Added support for en-/disabling each public annotation on each own - {pull}2472[#2472]
* Added support for compressing spans - {pull}2477[#2477]

[float]
===== Performance improvements
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package co.elastic.apm.agent.configuration;

import co.elastic.apm.agent.configuration.converter.TimeDuration;
import co.elastic.apm.agent.configuration.converter.TimeDurationValueConverter;
import org.stagemonitor.configuration.ConfigurationOption;
import org.stagemonitor.configuration.ConfigurationOptionProvider;

public class SpanConfiguration extends ConfigurationOptionProvider {

public static final String HUGE_TRACES_CATEGORY = "Huge Traces";

private final ConfigurationOption<Boolean> spanCompressionEnabled = ConfigurationOption.booleanOption()
tobiasstadler marked this conversation as resolved.
Show resolved Hide resolved
.key("span_compression_enabled")
.configurationCategory(HUGE_TRACES_CATEGORY)
.tags("added[1.30.0]", "internal")
.description("Setting this option to true will enable span compression feature.\n" +
"Span compression reduces the collection, processing, and storage overhead, and removes clutter from the UI. " +
"The tradeoff is that some information such as DB statements of all the compressed spans will not be collected.")
.dynamic(true)
.buildWithDefault(false);

private final ConfigurationOption<TimeDuration> spanCompressionExactMatchMaxDuration = TimeDurationValueConverter.durationOption("ms")
.key("span_compression_exact_match_max_duration")
.configurationCategory(HUGE_TRACES_CATEGORY)
.tags("added[1.30.0]", "internal")
tobiasstadler marked this conversation as resolved.
Show resolved Hide resolved
.description("Consecutive spans that are exact match and that are under this threshold will be compressed into a single composite span. " +
"This option does not apply to composite spans. This reduces the collection, processing, and storage overhead, and removes clutter from the UI. " +
"The tradeoff is that the DB statements of all the compressed spans will not be collected.")
.dynamic(true)
.buildWithDefault(TimeDuration.of("50ms"));

private final ConfigurationOption<TimeDuration> spanCompressionSameKindMaxDuration = TimeDurationValueConverter.durationOption("ms")
tobiasstadler marked this conversation as resolved.
Show resolved Hide resolved
.key("span_compression_same_kind_max_duration")
.configurationCategory(HUGE_TRACES_CATEGORY)
.tags("added[1.30.0]", "internal")
.description("Consecutive spans to the same destination that are under this threshold will be compressed into a single composite span. " +
"This option does not apply to composite spans. This reduces the collection, processing, and storage overhead, and removes clutter from the UI. " +
"The tradeoff is that the DB statements of all the compressed spans will not be collected.")
.dynamic(true)
.buildWithDefault(TimeDuration.of("5ms"));

public boolean isSpanCompressionEnabled() {
return spanCompressionEnabled.get();
}

public TimeDuration getSpanCompressionExactMatchMaxDuration() {
return spanCompressionExactMatchMaxDuration.get();
}

public TimeDuration getSpanCompressionSameKindMaxDuration() {
return spanCompressionSameKindMaxDuration.get();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ public long getMillis() {
return durationMs;
}

public long getMicros() {
return 1000 * durationMs;
}

@Override
public String toString() {
return durationString;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -383,18 +383,20 @@ public void endSpan(Span span) {
span.decrementReferences();
return;
}
if (span.getDuration() < coreConfiguration.getSpanMinDuration().getMillis() * 1000) {
logger.debug("Span faster than span_min_duration. Request discarding {}", span);
span.requestDiscarding();
}
if (span.isDiscarded()) {
logger.debug("Discarding span {}", span);
Transaction transaction = span.getTransaction();
if (transaction != null) {
transaction.getSpanCount().getDropped().incrementAndGet();
if (!span.isComposite()) {
if (span.getDuration() < coreConfiguration.getSpanMinDuration().getMicros()) {
logger.debug("Span faster than span_min_duration. Request discarding {}", span);
span.requestDiscarding();
}
if (span.isDiscarded()) {
logger.debug("Discarding span {}", span);
Transaction transaction = span.getTransaction();
if (transaction != null) {
transaction.getSpanCount().getDropped().incrementAndGet();
}
span.decrementReferences();
return;
}
span.decrementReferences();
return;
}
reportSpan(span);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;

public abstract class AbstractSpan<T extends AbstractSpan<T>> implements Recyclable {
public static final int PRIO_USER_SUPPLIED = 1000;
Expand Down Expand Up @@ -105,6 +106,8 @@ public abstract class AbstractSpan<T extends AbstractSpan<T>> implements Recycla

private boolean hasCapturedExceptions;

protected final AtomicReference<Span> bufferedSpan = new AtomicReference<>();

public int getReferenceCount() {
return references.get();
}
Expand Down Expand Up @@ -458,6 +461,12 @@ public final void end(long epochMicros) {
childDurations.onSpanEnd(epochMicros);
beforeEnd(epochMicros);
this.finished = true;
Span buffered = bufferedSpan.get();
if (buffered != null) {
if (bufferedSpan.compareAndSet(buffered, null)) {
this.tracer.endSpan(buffered);
}
}
afterEnd();
} else {
logger.warn("End has already been called: {}", this);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package co.elastic.apm.agent.impl.transaction;

import co.elastic.apm.agent.objectpool.Recyclable;

import javax.annotation.Nullable;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

public class Composite implements Recyclable {

private final AtomicInteger count = new AtomicInteger(0);

private final AtomicLong sum = new AtomicLong(0L);

@Nullable
private String compressionStrategy;

public void init(long sum, String compressionStrategy) {
this.count.set(1);
this.sum.set(sum);
this.compressionStrategy = compressionStrategy;
}

public int getCount() {
return count.get();
}

public void increaseCount() {
count.incrementAndGet();
}

public long getSum() {
return sum.get();
}

public double getSumMs() {
return sum.get() / 1000.0;
}

public void increaseSum(long delta) {
this.sum.addAndGet(delta);
}

public String getCompressionStrategy() {
return compressionStrategy;
}

@Override
public void resetState() {
this.count.set(0);
this.sum.set(0L);
this.compressionStrategy = null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package co.elastic.apm.agent.impl.transaction;

import co.elastic.apm.agent.configuration.CoreConfiguration;
import co.elastic.apm.agent.configuration.SpanConfiguration;
import co.elastic.apm.agent.impl.ElasticApmTracer;
import co.elastic.apm.agent.impl.context.Db;
import co.elastic.apm.agent.impl.context.Destination;
Expand All @@ -29,9 +30,11 @@
import co.elastic.apm.agent.objectpool.Recyclable;
import co.elastic.apm.agent.sdk.logging.Logger;
import co.elastic.apm.agent.sdk.logging.LoggerFactory;
import co.elastic.apm.agent.util.StringBuilderUtils;

import javax.annotation.Nullable;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.TimeUnit;

public class Span extends AbstractSpan<Span> implements Recyclable {
Expand Down Expand Up @@ -65,6 +68,7 @@ public class Span extends AbstractSpan<Span> implements Recyclable {
* Any other arbitrary data captured by the agent, optionally provided by the user
*/
private final SpanContext context = new SpanContext();
private final Composite composite = new Composite();
@Nullable
private Throwable stacktrace;
@Nullable
Expand Down Expand Up @@ -151,6 +155,14 @@ public SpanContext getContext() {
return context;
}

public boolean isComposite() {
return composite.getCount() > 0;
}

public Composite getComposite() {
return composite;
}

/**
* Keywords of specific relevance in the span's domain (eg: 'db', 'template', 'ext', etc)
*/
Expand Down Expand Up @@ -292,13 +304,123 @@ public void beforeEnd(long epochMicros) {

@Override
protected void afterEnd() {
this.tracer.endSpan(this);
if (tracer.getConfig(SpanConfiguration.class).isSpanCompressionEnabled()) {
Span buffered = parent.bufferedSpan.get();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like this may lead to multiple threads concurrently having access to the buffered span instance.
In my initial POC for span compression, I tried to avoid that by atomically getting and removing the buffered span or setting it to the current span if there's no buffered span.

https://github.com/felixbarny/apm-agent-java/blob/d15716baab62eaa8ff7b677da704d9c8d780d285/apm-agent-core/src/main/java/co/elastic/apm/agent/impl/transaction/AbstractSpan.java#L190-L219

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Multiple threads can concurrently have access, that's okay here. It is resolved at the update or report stage which you can fully reason about here (nice when it's concurrent). There are 3 possible atomic concurrent updates, and in each case either it succeeds (true) or fails (false). The cases are complete

buffer -> null
true: report buffer
false: do nothing

null -> this
true: do nothing
false: report this

buffer -> this
true: report buffer
false: report this

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, concurrent access is fine here and also covered by tests.

if (!isCompressionEligible()) {
if (buffered != null) {
if (parent.bufferedSpan.compareAndSet(buffered, null)) {
this.tracer.endSpan(buffered);
}
}
this.tracer.endSpan(this);
return;
}
if (buffered == null) {
if (!parent.bufferedSpan.compareAndSet(null, this)) {
this.tracer.endSpan(this);
jackshirazi marked this conversation as resolved.
Show resolved Hide resolved
}
return;
}
if (!buffered.tryToCompress(this)) {
if (!parent.bufferedSpan.compareAndSet(buffered, this)) {
tobiasstadler marked this conversation as resolved.
Show resolved Hide resolved
this.tracer.endSpan(buffered);
tobiasstadler marked this conversation as resolved.
Show resolved Hide resolved
this.tracer.endSpan(this);
jackshirazi marked this conversation as resolved.
Show resolved Hide resolved
} else {
this.tracer.endSpan(buffered);
}
} else if (isSampled()) {
Transaction transaction = getTransaction();
if (transaction != null) {
transaction.getSpanCount().getDropped().incrementAndGet();
}
decrementReferences();
}
} else {
this.tracer.endSpan(this);
}
}

private boolean isCompressionEligible() {
return isExit() && isDiscardable() && (getOutcome() == null || getOutcome() == Outcome.SUCCESS);
}

private boolean tryToCompress(Span sibling) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like this method could also be simplified when we can guarantee that we have exclusive access to both this span and the sibling. (see above linked POC)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@felixbarny I agree your solution was the more elegant one. I decided that in the high contention case (which is really when these alternatives matter) we want to avoid the backoff and retry loop, ie that under conflict it's better to drop the compression and allow the thread to proceed asap rather than maximally try to compress. So felt this solution was acceptable

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note the loops in the try are getting maxes after compression has succeeded, so should not really cause contention, but we may need to review

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for explaining, makes sense. I agree that the approach taken in this PR is probably the one that causes less contention. I find it a little harder to reason about and verify correctness as there's more that can happen concurrently. But from what I can tell, it seems like all the cases are handled properly.

boolean canBeCompressed = isComposite() ? tryToCompressComposite(sibling) : tryToCompressRegular(sibling);
if (!canBeCompressed) {
return false;
}

long newDuration = sibling.getTimestamp() + sibling.duration - getTimestamp();
synchronized (composite) {
if (newDuration > duration) {
duration = newDuration;
}
}

composite.increaseCount();
composite.increaseSum(sibling.duration);

return true;
}

private boolean tryToCompressRegular(Span sibling) {
if (!isSameKind(sibling)) {
return false;
}

long maxExactMatchDuration = tracer.getConfig(SpanConfiguration.class).getSpanCompressionExactMatchMaxDuration().getMicros();
long maxSameKindDuration = tracer.getConfig(SpanConfiguration.class).getSpanCompressionSameKindMaxDuration().getMicros();

boolean isAlreadyComposite;
synchronized (composite) {
isAlreadyComposite = isComposite();
if (!isAlreadyComposite) {
if (StringBuilderUtils.equals(name, sibling.name)) {
if (duration <= maxExactMatchDuration && sibling.duration <= maxExactMatchDuration) {
composite.init(duration, "exact_match");
return true;
}
return false;
}

if (duration <= maxSameKindDuration && sibling.duration <= maxSameKindDuration) {
composite.init(duration, "same_kind");
name.setLength(0);
name.append("Calls to ").append(context.getDestination().getService().getResource());
return true;
}
}
}

return isAlreadyComposite && tryToCompressComposite(sibling);
}

private boolean tryToCompressComposite(Span sibling) {
switch (composite.getCompressionStrategy()) {
case "exact_match":
long maxExactMatchDuration = tracer.getConfig(SpanConfiguration.class).getSpanCompressionExactMatchMaxDuration().getMicros();
return isSameKind(sibling) && StringBuilderUtils.equals(name, sibling.name) && sibling.duration <= maxExactMatchDuration;

case "same_kind":
long maxSameKindDuration = tracer.getConfig(SpanConfiguration.class).getSpanCompressionSameKindMaxDuration().getMicros();
return isSameKind(sibling) && sibling.duration <= maxSameKindDuration;
default:
}

return false;
}

private boolean isSameKind(Span other) {
return Objects.equals(type, other.type)
&& Objects.equals(subtype, other.subtype)
&& StringBuilderUtils.equals(context.getDestination().getService().getResource(), other.context.getDestination().getService().getResource());
}

@Override
public void resetState() {
super.resetState();
context.resetState();
composite.resetState();
stacktrace = null;
type = null;
subtype = null;
Expand Down
Loading