Skip to content

Commit

Permalink
estimate the start time by variance (#257)
Browse files Browse the repository at this point in the history
  • Loading branch information
kkdlau committed Jun 6, 2024
1 parent e273587 commit 277caee
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// Licensed under the MIT License.
package com.microsoft.gctoolkit.aggregator;

import com.microsoft.gctoolkit.event.jvm.JVMEvent;
import com.microsoft.gctoolkit.jvm.JavaVirtualMachine;
import com.microsoft.gctoolkit.online.statistics.WelfordVarianceCalculator;
import com.microsoft.gctoolkit.time.DateTimeStamp;

/**
Expand Down Expand Up @@ -90,6 +92,8 @@ public abstract class Aggregation {

private DateTimeStamp timeOfFirstEvent = null;
private DateTimeStamp timeOfTermination = DateTimeStamp.baseDate();
private final WelfordVarianceCalculator varianceCalculator = new WelfordVarianceCalculator();
private DateTimeStamp timeOfLastSeenEvent = null;

/**
* Constructor for the module SPI
Expand Down Expand Up @@ -127,24 +131,27 @@ public DateTimeStamp timeOfTerminationEvent() {
}

/**
* the 0.25 is a guess as to how far the first event should be from 0.000 seconds before the time of the
* first event will be considered to be the beginning of the log file.
* Estimates the start time of the log based on the available data.
* <p>
* If the first event does not have a timestamp, the method returns the time of the first event minus the variance of GC frequency.
* <p>
* If the timestamp is present and the timestamp of the first event is greater than the variance, the method returns the timestamp minus the variance.
* However, if the resulting timestamp is negative, the method returns the time of the first event instead, since a negative timestamp is not possible.
*
* todo: The better way to do this is to calculate the variance in GC frequency and if the gap between 0.000 and
* the first event exceeds the variance, then the first event would be considered the beginning of the log file.
* otherwise, the time of the first event should be the time of the first event - the variance.
*
* @return estimate of the start of the log using the data presented (most likely is 0.000s)
* @return The estimated start time of the log based on the available data
*/
public DateTimeStamp estimatedStartTime() {
if (timeOfFirstEvent.getTimeStamp() / timeOfTermination.getTimeStamp() > 0.25d) {
return timeOfFirstEvent;
double sd = Math.sqrt(varianceCalculator.getValue());
if (!timeOfFirstEvent.hasTimeStamp()) {
return timeOfFirstEvent.minus(sd);
}

if ( ! timeOfFirstEvent.hasDateStamp())
return new DateTimeStamp(0.0d);
else // this looks after adjusting the date stamp.
return timeOfFirstEvent.minus(timeOfFirstEvent.getTimeStamp());
final DateTimeStamp estimatedStartTime = timeOfFirstEvent.minus(sd);
if (!estimatedStartTime.hasTimeStamp()) {
return timeOfFirstEvent;
} else {
return estimatedStartTime;
}
}

/**
Expand Down Expand Up @@ -175,6 +182,16 @@ public Class<? extends Aggregator<?>> collates() {
return collates(getClass());
}

public void updateEventFrequency(JVMEvent event) {
final DateTimeStamp dateTimeStamp = event.getDateTimeStamp();
if (timeOfLastSeenEvent == null) {
timeOfLastSeenEvent = dateTimeStamp;
return;
}
double timeSpan = dateTimeStamp.minus(timeOfLastSeenEvent);
varianceCalculator.update(timeSpan);
}

/**
* Calculates the aggregator for this aggregation.
* @param clazz this Aggregation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ private void complete() {
* @param event an event to be processed
*/
public void receive(JVMEvent event) {
aggregation().updateEventFrequency(event);

if (event instanceof JVMTermination) {
aggregation().timeOfTerminationEvent(((JVMTermination) event).getTimeOfTerminationEvent());
aggregation().timeOfFirstEvent(((JVMTermination)event).getTimeOfFirstEvent());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.microsoft.gctoolkit.online.statistics;

public class NotEnoughSampleException extends ArithmeticException {
public NotEnoughSampleException() {
super();
}

public NotEnoughSampleException(String s) {
super(s);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package com.microsoft.gctoolkit.online.statistics;

public class OnlineMeanCalculator implements OnlineStatisticsCalculator {
private int numSamples = 0;
private double mean = 0.0;

public void update(double sampleValue) {
numSamples++;
mean += (sampleValue - mean) / numSamples;
}

@Override
public double getValue() {
return mean;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.microsoft.gctoolkit.online.statistics;

public interface OnlineStatisticsCalculator {


/**
* Updates the statistics calculation with the given value.
* <p>
* For example, if the statistics calculation is a mean, this method would update the mean with the given value.
*
* @param sampleValue the value to be added to the statistics calculation
*/
void update(double sampleValue);

/**
* @return the value of the statistics calculation
*/
double getValue();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package com.microsoft.gctoolkit.online.statistics;

public class WelfordVarianceCalculator implements OnlineStatisticsCalculator {
private int numSamples = 0;
private double m2 = 0.0;
private final OnlineMeanCalculator onlineMeanCalculator = new OnlineMeanCalculator();

@Override
public void update(double sampleValue) {
double oldMean = onlineMeanCalculator.getValue();

onlineMeanCalculator.update(sampleValue);
numSamples++;

double newMean = onlineMeanCalculator.getValue();

m2 += (sampleValue - oldMean) * (sampleValue - newMean);
}

@Override
public double getValue() throws NotEnoughSampleException {
if (numSamples < 2) {
throw new NotEnoughSampleException("Variance requires at least 2 samples.");
}
return m2 / (numSamples - 1);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package com.microsoft.gctoolkit.online.statistics;

import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.*;

class WelfordVarianceCalculatorTest {

@Test
void insufficientSamples() {
WelfordVarianceCalculator calculator = new WelfordVarianceCalculator();
calculator.update(1.23d);
assertThrows(NotEnoughSampleException.class, calculator::getValue);
}

@Test
void getVariance() {
WelfordVarianceCalculator calculator = new WelfordVarianceCalculator();
calculator.update(1421.23);
calculator.update(2897.34);
calculator.update(3907.45);
assertEquals(1563418.8054333332, calculator.getValue(), 0.0001d);
}

@Test
void getVarianceWithSmallDifference() {
WelfordVarianceCalculator calculator = new WelfordVarianceCalculator();
calculator.update(71899123.1273789);
calculator.update(71899123.1378323);
calculator.update(71899123.1478654);
assertEquals(0.00010493893, calculator.getValue(), 0.0001d);
}

}

0 comments on commit 277caee

Please sign in to comment.