Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add container.cpu.time metric #5806

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ public long GetHostCpuUsageInNanoseconds()
$"'{_procStat}' should contain whitespace separated values according to POSIX. We've failed trying to get {i}th value. File content: '{new string(stat)}'.");
}

stat = stat.Slice(next, stat.Length - next);
stat = stat.Slice(next);
}

return (long)(total / (double)_userHz * NanosecondsInSecond);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ public long GetHostCpuUsageInNanoseconds()
$"'{_procStat}' should contain whitespace separated values according to POSIX. We've failed trying to get {i}th value. File content: '{new string(stat)}'.");
}

stat = stat.Slice(next, stat.Length - next);
stat = stat.Slice(next);
}

return (long)(total / (double)_userHz * NanosecondsInSecond);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Collections.Generic;
using System.Diagnostics.Metrics;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
Expand All @@ -14,6 +15,7 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider
{
private const double One = 1.0;
private const long Hundred = 100L;
private const double NanosecondsInSecond = 1_000_000_000;

private readonly object _cpuLocker = new();
private readonly object _memoryLocker = new();
Expand Down Expand Up @@ -66,6 +68,7 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
var meter = meterFactory.Create(ResourceUtilizationInstruments.MeterName);
#pragma warning restore CA2000 // Dispose objects before losing scope

_ = meter.CreateObservableCounter(name: ResourceUtilizationInstruments.ContainerCpuTime, observeValues: GetCpuTime, unit: "s", description: "CPU time used by the container.");
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuLimit, unit: "1");
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerMemoryLimitUtilization, observeValue: MemoryUtilization, unit: "1");
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuRequestUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuRequest, unit: "1");
Expand Down Expand Up @@ -167,4 +170,13 @@ public Snapshot GetSnapshot()
userTimeSinceStart: TimeSpan.FromTicks((long)(cgroupTime / Hundred * _scaleRelativeToCpuRequestForTrackerApi)),
memoryUsageInBytes: memoryUsed);
}

private IEnumerable<Measurement<double>> GetCpuTime()
{
long hostCpuTime = _parser.GetHostCpuUsageInNanoseconds();
long cgroupCpuTime = _parser.GetCgroupCpuUsageInNanoseconds();

yield return new(cgroupCpuTime / NanosecondsInSecond, [new KeyValuePair<string, object?>("cpu.mode", "user")]);
yield return new(hostCpuTime / NanosecondsInSecond, [new KeyValuePair<string, object?>("cpu.mode", "system")]);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Diagnostics.Metrics;
using System.Threading;
Expand All @@ -17,6 +18,7 @@ internal sealed class WindowsContainerSnapshotProvider : ISnapshotProvider
{
private const double One = 1.0d;
private const double Hundred = 100.0d;
private const double TicksPerSecondDouble = TimeSpan.TicksPerSecond;

private readonly Lazy<MEMORYSTATUSEX> _memoryStatus;

Expand Down Expand Up @@ -114,6 +116,7 @@ internal WindowsContainerSnapshotProvider(
#pragma warning restore CA2000 // Dispose objects before losing scope

// Container based metrics:
_ = meter.CreateObservableCounter(name: ResourceUtilizationInstruments.ContainerCpuTime, observeValues: GetCpuTime, unit: "s", description: "CPU time used by the container.");
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization, observeValue: CpuPercentage);
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerMemoryLimitUtilization, observeValue: () => MemoryPercentage(() => _processInfo.GetMemoryUsage()));

Expand Down Expand Up @@ -211,6 +214,15 @@ private double MemoryPercentage(Func<ulong> getMemoryUsage)
}
}

private IEnumerable<Measurement<double>> GetCpuTime()
{
using var jobHandle = _createJobHandleObject();
var basicAccountingInfo = jobHandle.GetBasicAccountingInfo();

yield return new(basicAccountingInfo.TotalUserTime / TicksPerSecondDouble, [new KeyValuePair<string, object?>("cpu.mode", "user")]);
yield return new(basicAccountingInfo.TotalKernelTime / TicksPerSecondDouble, [new KeyValuePair<string, object?>("cpu.mode", "system")]);
}

private double CpuPercentage()
{
var now = _timeProvider.GetUtcNow();
Expand Down
8 changes: 8 additions & 0 deletions src/Shared/Instruments/ResourceUtilizationInstruments.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ internal static class ResourceUtilizationInstruments
/// </summary>
public const string MeterName = "Microsoft.Extensions.Diagnostics.ResourceMonitoring";

/// <summary>
/// The name of an instrument to retrieve CPU time consumed by the specific container on all available CPU cores, measured in seconds.
/// </summary>
/// <remarks>
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableCounter{T}"/>.
/// </remarks>
public const string ContainerCpuTime = "container.cpu.time";

/// <summary>
/// The name of an instrument to retrieve CPU limit consumption of all processes running inside a container or control group in range <c>[0, 1]</c>.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.Diagnostics.CodeAnalysis;
using System.Diagnostics.Metrics;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Configuration;
Expand Down Expand Up @@ -209,6 +210,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou

using var listener = new MeterListener();
var clock = new FakeTimeProvider(DateTimeOffset.UtcNow);
var cpuUserTime = 0.0d;
var cpuKernelTime = 0.0d;
var cpuFromGauge = 0.0d;
var cpuLimitFromGauge = 0.0d;
var cpuRequestFromGauge = 0.0d;
Expand All @@ -219,8 +222,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
object? meterScope = null;
listener.InstrumentPublished = (Instrument instrument, MeterListener meterListener)
=> OnInstrumentPublished(instrument, meterListener, meterScope);
listener.SetMeasurementEventCallback<double>((m, f, _, _)
=> OnMeasurementReceived(m, f, ref cpuFromGauge, ref cpuLimitFromGauge, ref cpuRequestFromGauge, ref memoryFromGauge, ref memoryLimitFromGauge));
listener.SetMeasurementEventCallback<double>((m, f, tags, _)
=> OnMeasurementReceived(m, f, tags, ref cpuUserTime, ref cpuKernelTime, ref cpuFromGauge, ref cpuLimitFromGauge, ref cpuRequestFromGauge, ref memoryFromGauge, ref memoryLimitFromGauge));
listener.Start();

using var host = FakeHost.CreateBuilder()
Expand All @@ -246,6 +249,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
Assert.Equal(0, utilization.CpuUsedPercentage);
Assert.Equal(100, utilization.MemoryUsedPercentage);
Assert.True(double.IsNaN(cpuFromGauge));
Assert.Equal(0.000102312, cpuUserTime);
Assert.Equal(0.8, cpuKernelTime);

// gauge multiplied by 100 because gauges are in range [0, 1], and utilization is in range [0, 100]
Assert.Equal(utilization.MemoryUsedPercentage, memoryFromGauge * 100);
Expand All @@ -264,6 +269,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
Assert.Equal(1, utilization.CpuUsedPercentage);
Assert.Equal(50, utilization.MemoryUsedPercentage);
Assert.Equal(0.5, cpuLimitFromGauge * 100);
Assert.Equal(0.000112312, cpuUserTime);
Assert.Equal(0.81, cpuKernelTime);
Assert.Equal(utilization.CpuUsedPercentage, cpuRequestFromGauge * 100);
Assert.Equal(utilization.MemoryUsedPercentage, memoryLimitFromGauge * 100);
Assert.Equal(utilization.CpuUsedPercentage, cpuFromGauge * 100);
Expand Down Expand Up @@ -292,6 +299,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou

using var listener = new MeterListener();
var clock = new FakeTimeProvider(DateTimeOffset.UtcNow);
var cpuUserTime = 0.0d;
var cpuKernelTime = 0.0d;
var cpuFromGauge = 0.0d;
var cpuLimitFromGauge = 0.0d;
var cpuRequestFromGauge = 0.0d;
Expand All @@ -302,8 +311,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
object? meterScope = null;
listener.InstrumentPublished = (Instrument instrument, MeterListener meterListener)
=> OnInstrumentPublished(instrument, meterListener, meterScope);
listener.SetMeasurementEventCallback<double>((m, f, _, _)
=> OnMeasurementReceived(m, f, ref cpuFromGauge, ref cpuLimitFromGauge, ref cpuRequestFromGauge, ref memoryFromGauge, ref memoryLimitFromGauge));
listener.SetMeasurementEventCallback<double>((m, f, tags, _)
=> OnMeasurementReceived(m, f, tags, ref cpuUserTime, ref cpuKernelTime, ref cpuFromGauge, ref cpuLimitFromGauge, ref cpuRequestFromGauge, ref memoryFromGauge, ref memoryLimitFromGauge));
listener.Start();

using var host = FakeHost.CreateBuilder()
Expand Down Expand Up @@ -351,6 +360,8 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
Assert.Equal(1, roundedCpuUsedPercentage);
Assert.Equal(50, utilization.MemoryUsedPercentage);
Assert.Equal(0.5, cpuLimitFromGauge * 100);
Assert.Equal(0.000112, cpuUserTime);
Assert.Equal(0.81, cpuKernelTime);
Assert.Equal(roundedCpuUsedPercentage, Math.Round(cpuRequestFromGauge * 100));
Assert.Equal(utilization.MemoryUsedPercentage, memoryLimitFromGauge * 100);
Assert.Equal(roundedCpuUsedPercentage, Math.Round(cpuFromGauge * 100));
Expand All @@ -369,6 +380,7 @@ private static void OnInstrumentPublished(Instrument instrument, MeterListener m
#pragma warning disable S1067 // Expressions should not be too complex
if (instrument.Name == ResourceUtilizationInstruments.ProcessCpuUtilization ||
instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization ||
instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime ||
instrument.Name == ResourceUtilizationInstruments.ContainerCpuRequestUtilization ||
instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization ||
instrument.Name == ResourceUtilizationInstruments.ContainerMemoryLimitUtilization)
Expand All @@ -378,10 +390,12 @@ private static void OnInstrumentPublished(Instrument instrument, MeterListener m
#pragma warning restore S1067 // Expressions should not be too complex
}

#pragma warning disable S107 // Methods should not have too many parameters
private static void OnMeasurementReceived(
Instrument instrument, double value,
ref double cpuFromGauge, ref double cpuLimitFromGauge, ref double cpuRequestFromGauge,
ref double memoryFromGauge, ref double memoryLimitFromGauge)
Instrument instrument, double value, ReadOnlySpan<KeyValuePair<string, object?>> tags,
ref double cpuUserTime, ref double cpuKernelTime, ref double cpuFromGauge, ref double cpuLimitFromGauge,
ref double cpuRequestFromGauge, ref double memoryFromGauge, ref double memoryLimitFromGauge)
#pragma warning restore S107 // Methods should not have too many parameters
{
if (instrument.Name == ResourceUtilizationInstruments.ProcessCpuUtilization)
{
Expand All @@ -391,6 +405,18 @@ private static void OnMeasurementReceived(
{
memoryFromGauge = value;
}
else if (instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime)
{
var tagsArray = tags.ToArray();
if (tagsArray.Contains(new KeyValuePair<string, object?>("cpu.mode", "user")))
{
cpuUserTime = value;
}
else if (tagsArray.Contains(new KeyValuePair<string, object?>("cpu.mode", "system")))
{
cpuKernelTime = value;
}
}
else if (instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization)
{
cpuLimitFromGauge = value;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ public void Provider_Registers_Instruments()
listener.Start();
listener.RecordObservableInstruments();

Assert.Equal(5, samples.Count);
Assert.Equal(7, samples.Count);

Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization);
Assert.True(double.IsNaN(samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization).value));
Expand All @@ -91,6 +91,9 @@ public void Provider_Registers_Instruments()

Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization);
Assert.Equal(0.5, samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization).value);

Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime && Math.Abs(x.value - (50.0 / 1_000_000_000)) < 0.00001);
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime && Math.Abs(x.value - 0.8) < 0.00001);
}

[ConditionalFact]
Expand Down Expand Up @@ -144,7 +147,7 @@ public void Provider_Registers_Instruments_CgroupV2()
listener.Start();
listener.RecordObservableInstruments();

Assert.Equal(5, samples.Count);
Assert.Equal(7, samples.Count);

Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization);
Assert.True(double.IsNaN(samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization).value));
Expand All @@ -160,6 +163,9 @@ public void Provider_Registers_Instruments_CgroupV2()

Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization);
Assert.Equal(1, samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization).value);

Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime && Math.Abs(x.value - (102312.0 / 1_000_000)) < 0.00001);
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime && Math.Abs(x.value - 0.8) < 0.00001);
}

[Fact]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,66 @@ public void GetSnapshot_With_JobMemoryLimit_Set_To_Zero_ProducesCorrectSnapshot(
Assert.True(data.MemoryUsageInBytes > 0);
}

[Fact]
public void SnapshotProvider_EmitsCpuTimeMetric()
{
// Simulating 10% CPU usage (2 CPUs, 2000 ticks initially, 4000 ticks after 1 ms):
JOBOBJECT_BASIC_ACCOUNTING_INFORMATION updatedAccountingInfo = default;
updatedAccountingInfo.TotalKernelTime = 2500;
updatedAccountingInfo.TotalUserTime = 1500;

_jobHandleMock.SetupSequence(j => j.GetBasicAccountingInfo())
.Returns(_accountingInfo)
.Returns(_accountingInfo)
.Returns(updatedAccountingInfo)
.Returns(updatedAccountingInfo)
.Throws(new InvalidOperationException("We shouldn't hit here..."));

_sysInfo.NumberOfProcessors = 2;

var fakeClock = new FakeTimeProvider();
using var meter = new Meter(nameof(SnapshotProvider_EmitsCpuMetrics));
var meterFactoryMock = new Mock<IMeterFactory>();
meterFactoryMock.Setup(x => x.Create(It.IsAny<MeterOptions>()))
.Returns(meter);
using var metricCollector = new MetricCollector<double>(meter, ResourceUtilizationInstruments.ContainerCpuTime, fakeClock);

var options = new ResourceMonitoringOptions { CpuConsumptionRefreshInterval = TimeSpan.FromMilliseconds(2) };

var snapshotProvider = new WindowsContainerSnapshotProvider(
_memoryInfoMock.Object,
_systemInfoMock.Object,
_processInfoMock.Object,
_logger,
meterFactoryMock.Object,
() => _jobHandleMock.Object,
fakeClock,
options);

// Step #0 - state in the beginning:
metricCollector.RecordObservableInstruments();
var snapshot = metricCollector.GetMeasurementSnapshot();
Assert.Equal(2, snapshot.Count);
Assert.Contains(_accountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));
Assert.Contains(_accountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));

// Step #1 - simulate 1 millisecond passing and collect metrics again:
fakeClock.Advance(TimeSpan.FromMilliseconds(1));
metricCollector.RecordObservableInstruments();
snapshot = metricCollector.GetMeasurementSnapshot();
Assert.Contains(updatedAccountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));
Assert.Contains(updatedAccountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));

// Step #2 - simulate 1 millisecond passing and collect metrics again:
fakeClock.Advance(TimeSpan.FromMilliseconds(1));
metricCollector.RecordObservableInstruments();
snapshot = metricCollector.GetMeasurementSnapshot();

// CPU time should be the same as before, as we're not simulating any CPU usage:
Assert.Contains(updatedAccountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));
Assert.Contains(updatedAccountingInfo.TotalKernelTime / (double)TimeSpan.TicksPerSecond, snapshot.Select(m => m.Value));
}

[Theory]
[InlineData(ResourceUtilizationInstruments.ProcessCpuUtilization, true)]
[InlineData(ResourceUtilizationInstruments.ProcessCpuUtilization, false)]
Expand Down
Loading