diff --git a/d2/src/main/java/com/linkedin/d2/balancer/D2ClientBuilder.java b/d2/src/main/java/com/linkedin/d2/balancer/D2ClientBuilder.java index 88dba60f21..39dbf3ef5b 100644 --- a/d2/src/main/java/com/linkedin/d2/balancer/D2ClientBuilder.java +++ b/d2/src/main/java/com/linkedin/d2/balancer/D2ClientBuilder.java @@ -52,6 +52,7 @@ import com.linkedin.d2.discovery.stores.zk.ZKPersistentConnection; import com.linkedin.d2.discovery.stores.zk.ZooKeeper; import com.linkedin.d2.jmx.XdsServerMetricsProvider; +import com.linkedin.d2.jmx.XdsClientOtelMetricsProvider; import com.linkedin.d2.jmx.JmxManager; import com.linkedin.d2.xds.XdsClientValidator; import com.linkedin.d2.jmx.NoOpJmxManager; @@ -237,6 +238,7 @@ public D2Client build() _config.xdsChannelLoadBalancingPolicyConfig, _config.subscribeToUriGlobCollection, _config._xdsServerMetricsProvider, + _config.xdsClientOtelMetricsProvider, _config.loadBalanceStreamException, _config.xdsInitialResourceVersionsEnabled, _config.disableDetectLiRawD2Client, @@ -856,6 +858,11 @@ public D2ClientBuilder setXdsServerMetricsProvider(XdsServerMetricsProvider xdsS return this; } + public D2ClientBuilder setXdsClientOtelMetricsProvider(XdsClientOtelMetricsProvider xdsClientOtelMetricsProvider) { + _config.xdsClientOtelMetricsProvider = xdsClientOtelMetricsProvider; + return this; + } + public D2ClientBuilder setLoadBalanceStreamException(boolean loadBalanceStreamException) { _config.loadBalanceStreamException = loadBalanceStreamException; return this; diff --git a/d2/src/main/java/com/linkedin/d2/balancer/D2ClientConfig.java b/d2/src/main/java/com/linkedin/d2/balancer/D2ClientConfig.java index d0ffa9e826..5072b6dd9e 100644 --- a/d2/src/main/java/com/linkedin/d2/balancer/D2ClientConfig.java +++ b/d2/src/main/java/com/linkedin/d2/balancer/D2ClientConfig.java @@ -44,6 +44,8 @@ import com.linkedin.d2.jmx.JmxManager; import com.linkedin.d2.jmx.NoOpXdsServerMetricsProvider; import com.linkedin.d2.jmx.NoOpJmxManager; +import com.linkedin.d2.jmx.XdsClientOtelMetricsProvider; +import com.linkedin.d2.jmx.NoOpXdsClientOtelMetricsProvider; import com.linkedin.r2.transport.common.TransportClientFactory; import io.grpc.netty.shaded.io.netty.handler.ssl.SslContext; import java.time.Duration; @@ -181,6 +183,12 @@ public class D2ClientConfig public boolean subscribeToUriGlobCollection = false; public XdsServerMetricsProvider _xdsServerMetricsProvider = new NoOpXdsServerMetricsProvider(); + + /** + * Provider for OpenTelemetry metrics collection for XDS client operations. + * Defaults to no-op implementation; can be overridden to enable metric tracking. + */ + public XdsClientOtelMetricsProvider xdsClientOtelMetricsProvider = new NoOpXdsClientOtelMetricsProvider(); public boolean loadBalanceStreamException = false; public boolean xdsInitialResourceVersionsEnabled = false; public Integer xdsStreamMaxRetryBackoffSeconds = null; @@ -293,6 +301,145 @@ public D2ClientConfig() D2CalleeInfoRecorder d2CalleeInfoRecorder, Boolean enableIndisDownstreamServicesFetcher, Duration indisDownstreamServicesFetchTimeout) + { + this(zkHosts, xdsServer, hostName, zkSessionTimeoutInMs, zkStartupTimeoutInMs, lbWaitTimeout, lbWaitUnit, + flagFile, basePath, fsBasePath, indisFsBasePath, componentFactory, clientFactories, lbWithFacilitiesFactory, + sslContext, grpcSslContext, sslParameters, isSSLEnabled, shutdownAsynchronously, isSymlinkAware, + clientServicesConfig, d2ServicePath, useNewEphemeralStoreWatcher, healthCheckOperations, executorService, + retry, restRetryEnabled, streamRetryEnabled, retryLimit, retryUpdateIntervalMs, retryAggregatedIntervalNum, + warmUp, warmUpTimeoutSeconds, indisWarmUpTimeoutSeconds, warmUpConcurrentRequests, + indisWarmUpConcurrentRequests, downstreamServicesFetcher, indisDownstreamServicesFetcher, + backupRequestsEnabled, backupRequestsStrategyStatsConsumer, + backupRequestsLatencyNotificationInterval, + backupRequestsLatencyNotificationIntervalUnit, + enableBackupRequestsClientAsync, + backupRequestsExecutorService, + emitter, + partitionAccessorRegistry, + zooKeeperDecorator, + enableSaveUriDataOnDisk, + loadBalancerStrategyFactories, + requestTimeoutHandlerEnabled, + sslSessionValidatorFactory, + zkConnection, + startUpExecutorService, + indisStartUpExecutorService, + jmxManager, + d2JmxManagerPrefix, + zookeeperReadWindowMs, + enableRelativeLoadBalancer, + deterministicSubsettingMetadataProvider, + canaryDistributionProvider, + enableClusterFailout, + failoutConfigProviderFactory, + failoutRedirectStrategy, + serviceDiscoveryEventEmitter, + dualReadStateManager, + xdsExecutorService, + xdsStreamReadyTimeout, + dualReadNewLbExecutor, + xdsChannelLoadBalancingPolicy, + xdsChannelLoadBalancingPolicyConfig, + subscribeToUriGlobCollection, + xdsServerMetricsProvider, + new NoOpXdsClientOtelMetricsProvider(), + loadBalanceStreamException, + xdsInitialResourceVersionsEnabled, + disableDetectLiRawD2Client, + isLiRawD2Client, + xdsStreamMaxRetryBackoffSeconds, + xdsChannelKeepAliveTimeMins, + xdsMinimumJavaVersion, + actionOnPrecheckFailure, + d2CalleeInfoRecorder, + enableIndisDownstreamServicesFetcher, + indisDownstreamServicesFetchTimeout); + } + + D2ClientConfig(String zkHosts, + String xdsServer, + String hostName, + long zkSessionTimeoutInMs, + long zkStartupTimeoutInMs, + long lbWaitTimeout, + TimeUnit lbWaitUnit, + String flagFile, + String basePath, + String fsBasePath, + String indisFsBasePath, + ComponentFactory componentFactory, + Map clientFactories, + LoadBalancerWithFacilitiesFactory lbWithFacilitiesFactory, + SSLContext sslContext, + SslContext grpcSslContext, + SSLParameters sslParameters, + boolean isSSLEnabled, + boolean shutdownAsynchronously, + boolean isSymlinkAware, + Map> clientServicesConfig, + String d2ServicePath, + boolean useNewEphemeralStoreWatcher, + HealthCheckOperations healthCheckOperations, + ScheduledExecutorService executorService, + boolean retry, + boolean restRetryEnabled, + boolean streamRetryEnabled, + int retryLimit, + long retryUpdateIntervalMs, + int retryAggregatedIntervalNum, + boolean warmUp, + int warmUpTimeoutSeconds, + int indisWarmUpTimeoutSeconds, + int warmUpConcurrentRequests, + int indisWarmUpConcurrentRequests, + DownstreamServicesFetcher downstreamServicesFetcher, + DownstreamServicesFetcher indisDownstreamServicesFetcher, + boolean backupRequestsEnabled, + BackupRequestsStrategyStatsConsumer backupRequestsStrategyStatsConsumer, + long backupRequestsLatencyNotificationInterval, + TimeUnit backupRequestsLatencyNotificationIntervalUnit, + boolean enableBackupRequestsClientAsync, + ScheduledExecutorService backupRequestsExecutorService, + EventEmitter emitter, + PartitionAccessorRegistry partitionAccessorRegistry, + Function zooKeeperDecorator, + boolean enableSaveUriDataOnDisk, + Map> loadBalancerStrategyFactories, + boolean requestTimeoutHandlerEnabled, + SslSessionValidatorFactory sslSessionValidatorFactory, + ZKPersistentConnection zkConnection, + ScheduledExecutorService startUpExecutorService, + ScheduledExecutorService indisStartUpExecutorService, + JmxManager jmxManager, + String d2JmxManagerPrefix, + int zookeeperReadWindowMs, + boolean enableRelativeLoadBalancer, + DeterministicSubsettingMetadataProvider deterministicSubsettingMetadataProvider, + CanaryDistributionProvider canaryDistributionProvider, + boolean enableClusterFailout, + FailoutConfigProviderFactory failoutConfigProviderFactory, + FailoutRedirectStrategy failoutRedirectStrategy, + ServiceDiscoveryEventEmitter serviceDiscoveryEventEmitter, + DualReadStateManager dualReadStateManager, + ScheduledExecutorService xdsExecutorService, + Long xdsStreamReadyTimeout, + ExecutorService dualReadNewLbExecutor, + String xdsChannelLoadBalancingPolicy, + Map xdsChannelLoadBalancingPolicyConfig, + boolean subscribeToUriGlobCollection, + XdsServerMetricsProvider xdsServerMetricsProvider, + XdsClientOtelMetricsProvider xdsClientOtelMetricsProvider, + boolean loadBalanceStreamException, + boolean xdsInitialResourceVersionsEnabled, + boolean disableDetectLiRawD2Client, + boolean isLiRawD2Client, + Integer xdsStreamMaxRetryBackoffSeconds, + Long xdsChannelKeepAliveTimeMins, + String xdsMinimumJavaVersion, + XdsClientValidator.ActionOnPrecheckFailure actionOnPrecheckFailure, + D2CalleeInfoRecorder d2CalleeInfoRecorder, + Boolean enableIndisDownstreamServicesFetcher, + Duration indisDownstreamServicesFetchTimeout) { this.zkHosts = zkHosts; this.xdsServer = xdsServer; @@ -367,6 +514,7 @@ public D2ClientConfig() this.xdsChannelKeepAliveTimeMins = xdsChannelKeepAliveTimeMins; this.subscribeToUriGlobCollection = subscribeToUriGlobCollection; this._xdsServerMetricsProvider = xdsServerMetricsProvider; + this.xdsClientOtelMetricsProvider = xdsClientOtelMetricsProvider; this.loadBalanceStreamException = loadBalanceStreamException; this.xdsInitialResourceVersionsEnabled = xdsInitialResourceVersionsEnabled; this.disableDetectLiRawD2Client = disableDetectLiRawD2Client; diff --git a/d2/src/main/java/com/linkedin/d2/jmx/D2ClientJmxManager.java b/d2/src/main/java/com/linkedin/d2/jmx/D2ClientJmxManager.java index 7795d8c80f..1b4a678d48 100644 --- a/d2/src/main/java/com/linkedin/d2/jmx/D2ClientJmxManager.java +++ b/d2/src/main/java/com/linkedin/d2/jmx/D2ClientJmxManager.java @@ -305,7 +305,13 @@ public void registerXdsClientJmx(XdsClientJmx xdsClientJmx) { _log.warn("Setting XdsClientJmx for Non-XDS source type: {}", _discoverySourceType); } - final String jmxName = String.format("%s-XdsClientJmx", getGlobalPrefix(null)); + // Get the client name from global prefix + String clientName = getGlobalPrefix(null); + if(clientName != null && !clientName.isEmpty()) + { + xdsClientJmx.setClientName(clientName); + } + final String jmxName = String.format("%s-XdsClientJmx", clientName); _jmxManager.registerXdsClientJmxBean(jmxName, xdsClientJmx); } diff --git a/d2/src/main/java/com/linkedin/d2/jmx/NoOpXdsClientOtelMetricsProvider.java b/d2/src/main/java/com/linkedin/d2/jmx/NoOpXdsClientOtelMetricsProvider.java new file mode 100644 index 0000000000..12182a76a4 --- /dev/null +++ b/d2/src/main/java/com/linkedin/d2/jmx/NoOpXdsClientOtelMetricsProvider.java @@ -0,0 +1,96 @@ +package com.linkedin.d2.jmx; + +/** + * No-Op implementation of {@link XdsClientOtelMetricsProvider}. + * Used when OpenTelemetry metrics are disabled. + */ +public class NoOpXdsClientOtelMetricsProvider implements XdsClientOtelMetricsProvider { + + /** + * {@inheritDoc} + */ + @Override + public void recordConnectionLost(String clientName) { + // No-op + } + + /** + * {@inheritDoc} + */ + @Override + public void recordConnectionClosed(String clientName) { + // No-op + } + + /** + * {@inheritDoc} + */ + @Override + public void recordReconnection(String clientName) { + // No-op + } + + /** + * {@inheritDoc} + */ + @Override + public void recordRequestSent(String clientName) { + // No-op + } + + /** + * {@inheritDoc} + */ + @Override + public void recordResponseReceived(String clientName) { + // No-op + } + + /** + * {@inheritDoc} + */ + @Override + public void recordInitialResourceVersionSent(String clientName, int count) { + // No-op + } + + /** + * {@inheritDoc} + */ + @Override + public void recordResourceNotFound(String clientName) { + // No-op + } + + /** + * {@inheritDoc} + */ + @Override + public void recordResourceInvalid(String clientName) { + // No-op + } + + /** + * {@inheritDoc} + */ + @Override + public void recordServerLatency(String clientName, long latencyMs) { + // No-op + } + + /** + * {@inheritDoc} + */ + @Override + public void updateConnectionState(String clientName, boolean isConnected) { + // No-op + } + + /** + * {@inheritDoc} + */ + @Override + public void updateActiveInitialWaitTime(String clientName, long waitTimeMs) { + // No-op + } +} \ No newline at end of file diff --git a/d2/src/main/java/com/linkedin/d2/jmx/XdsClientJmx.java b/d2/src/main/java/com/linkedin/d2/jmx/XdsClientJmx.java index 0b0f643293..fece131dcb 100644 --- a/d2/src/main/java/com/linkedin/d2/jmx/XdsClientJmx.java +++ b/d2/src/main/java/com/linkedin/d2/jmx/XdsClientJmx.java @@ -20,11 +20,12 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; import javax.annotation.Nullable; - public class XdsClientJmx implements XdsClientJmxMBean { + private final String NO_VALUE = "-"; private final AtomicInteger _connectionLostCount = new AtomicInteger(); private final AtomicInteger _connectionClosedCount = new AtomicInteger(); @@ -37,18 +38,47 @@ public class XdsClientJmx implements XdsClientJmxMBean private final AtomicInteger _resourceNotFoundCount = new AtomicInteger(); private final AtomicInteger _resourceInvalidCount = new AtomicInteger(); private final XdsServerMetricsProvider _xdsServerMetricsProvider; + private final XdsClientOtelMetricsProvider _xdsClientOtelMetricsProvider; + private final AtomicReference _clientName = new AtomicReference<>(NO_VALUE); @Nullable private XdsClientImpl _xdsClient = null; @Deprecated public XdsClientJmx() { - this(new NoOpXdsServerMetricsProvider()); + this(new NoOpXdsServerMetricsProvider(), null); } public XdsClientJmx(XdsServerMetricsProvider xdsServerMetricsProvider) + { + this(xdsServerMetricsProvider, null); + } + + public XdsClientJmx(XdsServerMetricsProvider xdsServerMetricsProvider, + XdsClientOtelMetricsProvider xdsClientOtelMetricsProvider) { _xdsServerMetricsProvider = xdsServerMetricsProvider == null ? new NoOpXdsServerMetricsProvider() : xdsServerMetricsProvider; + _xdsClientOtelMetricsProvider = xdsClientOtelMetricsProvider == null ? + new NoOpXdsClientOtelMetricsProvider() : xdsClientOtelMetricsProvider; + } + + /** + * Sets the client name for this XDS client instance. + * Used for identifying metrics associated with this client. + * + * @param clientName the name to identify this XDS client + */ + public void setClientName(String clientName) { + _clientName.compareAndSet(NO_VALUE, clientName); + } + + /** + * Gets the client name for this XDS client instance. + * + * @return the client name, or "-" if not set + */ + public String getClientName() { + return _clientName.get(); } public void setXdsClient(XdsClientImpl xdsClient) @@ -146,55 +176,66 @@ public int isDisconnected() @Override public long getActiveInitialWaitTimeMillis() { + long waitTime = -1; if (_xdsClient != null) { - return _xdsClient.getActiveInitialWaitTimeMillis(); + waitTime = _xdsClient.getActiveInitialWaitTimeMillis(); + _xdsClientOtelMetricsProvider.updateActiveInitialWaitTime(getClientName(), waitTime); } - return -1; + return waitTime; } public void incrementConnectionLostCount() { _connectionLostCount.incrementAndGet(); + _xdsClientOtelMetricsProvider.recordConnectionLost(getClientName()); } public void incrementConnectionClosedCount() { _connectionClosedCount.incrementAndGet(); + _xdsClientOtelMetricsProvider.recordConnectionClosed(getClientName()); } public void incrementReconnectionCount() { _reconnectionCount.incrementAndGet(); + _xdsClientOtelMetricsProvider.recordReconnection(getClientName()); } public void incrementRequestSentCount() { _resquestSentCount.incrementAndGet(); + _xdsClientOtelMetricsProvider.recordRequestSent(getClientName()); } public void addToIrvSentCount(int delta) { _irvSentCount.addAndGet(delta); + _xdsClientOtelMetricsProvider.recordInitialResourceVersionSent(getClientName(), delta); } public void incrementResponseReceivedCount() { _responseReceivedCount.incrementAndGet(); + _xdsClientOtelMetricsProvider.recordResponseReceived(getClientName()); } public void setIsConnected(boolean connected) { _isConnected.getAndSet(connected); + _xdsClientOtelMetricsProvider.updateConnectionState(getClientName(), connected); } public void incrementResourceNotFoundCount() { _resourceNotFoundCount.incrementAndGet(); + _xdsClientOtelMetricsProvider.recordResourceNotFound(getClientName()); } public void incrementResourceInvalidCount() { _resourceInvalidCount.incrementAndGet(); + _xdsClientOtelMetricsProvider.recordResourceInvalid(getClientName()); } } diff --git a/d2/src/main/java/com/linkedin/d2/jmx/XdsClientOtelMetricsProvider.java b/d2/src/main/java/com/linkedin/d2/jmx/XdsClientOtelMetricsProvider.java new file mode 100644 index 0000000000..5e57d5f5ef --- /dev/null +++ b/d2/src/main/java/com/linkedin/d2/jmx/XdsClientOtelMetricsProvider.java @@ -0,0 +1,88 @@ +package com.linkedin.d2.jmx; + +/** + * Interface for OpenTelemetry metrics collection for XDS Client. + */ +public interface XdsClientOtelMetricsProvider { + + /** + * Records a connection lost event in the OpenTelemetry counter. + * + * @param clientName the name of the XDS client + */ + void recordConnectionLost(String clientName); + + /** + * Records a connection closed event in the OpenTelemetry counter. + * + * @param clientName the name of the XDS client + */ + void recordConnectionClosed(String clientName); + + /** + * Records a reconnection event in the OpenTelemetry counter. + * + * @param clientName the name of the XDS client + */ + void recordReconnection(String clientName); + + /** + * Records a request sent event in the OpenTelemetry counter. + * + * @param clientName the name of the XDS client + */ + void recordRequestSent(String clientName); + + /** + * Records a response received event in the OpenTelemetry counter. + * + * @param clientName the name of the XDS client + */ + void recordResponseReceived(String clientName); + + /** + * Records initial resource version sent count in the OpenTelemetry counter. + * + * @param clientName the name of the XDS client + * @param count the count to add + */ + void recordInitialResourceVersionSent(String clientName, int count); + + /** + * Records a resource not found error in the OpenTelemetry counter. + * + * @param clientName the name of the XDS client + */ + void recordResourceNotFound(String clientName); + + /** + * Records a resource invalid error in the OpenTelemetry counter. + * + * @param clientName the name of the XDS client + */ + void recordResourceInvalid(String clientName); + + /** + * Records server latency in the OpenTelemetry histogram. + * + * @param clientName the name of the XDS client + * @param latencyMs the latency in milliseconds + */ + void recordServerLatency(String clientName, long latencyMs); + + /** + * Updates the connection state for a client. + * + * @param clientName the name of the XDS client + * @param isConnected whether the client is connected + */ + void updateConnectionState(String clientName, boolean isConnected); + + /** + * Updates the active initial wait time for a client. + * + * @param clientName the name of the XDS client + * @param waitTimeMs the wait time in milliseconds + */ + void updateActiveInitialWaitTime(String clientName, long waitTimeMs); +} diff --git a/d2/src/main/java/com/linkedin/d2/xds/XdsClientImpl.java b/d2/src/main/java/com/linkedin/d2/xds/XdsClientImpl.java index e4faa58326..795f744c11 100644 --- a/d2/src/main/java/com/linkedin/d2/xds/XdsClientImpl.java +++ b/d2/src/main/java/com/linkedin/d2/xds/XdsClientImpl.java @@ -23,8 +23,10 @@ import com.google.common.collect.Maps; import com.google.protobuf.util.Timestamps; import com.google.rpc.Code; +import com.linkedin.d2.jmx.NoOpXdsClientOtelMetricsProvider; import com.linkedin.d2.jmx.NoOpXdsServerMetricsProvider; import com.linkedin.d2.jmx.XdsClientJmx; +import com.linkedin.d2.jmx.XdsClientOtelMetricsProvider; import com.linkedin.d2.jmx.XdsServerMetricsProvider; import com.linkedin.d2.xds.GlobCollectionUtils.D2UriIdentifier; import com.linkedin.util.RateLimitedLogger; @@ -115,6 +117,7 @@ public class XdsClientImpl extends XdsClient private final XdsClientJmx _xdsClientJmx; private final XdsServerMetricsProvider _serverMetricsProvider; + private final XdsClientOtelMetricsProvider _xdsClientOtelMetricsProvider; private final boolean _initialResourceVersionsEnabled; private final String _minimumJavaVersion; private final XdsClientValidator.ActionOnPrecheckFailure _actionOnPrecheckFailure; @@ -200,6 +203,27 @@ public XdsClientImpl(Node node, Integer maxRetryBackoffSeconds, String minimumJavaVersion, XdsClientValidator.ActionOnPrecheckFailure actionOnPrecheckFailure) + { + this(node, managedChannel, executorService, readyTimeoutMillis, subscribeToUriGlobCollection, + serverMetricsProvider, null, irvSupport, maxRetryBackoffSeconds, XdsClientValidator.DEFAULT_MINIMUM_JAVA_VERSION, XdsClientValidator.DEFAULT_ACTION_ON_PRECHECK_FAILURE); + } + + /** + * Constructor for XdsClientImpl with OpenTelemetry metrics support. + * + * @param xdsClientOtelMetricsProvider provider for OpenTelemetry metrics collection, or null for no-op + */ + public XdsClientImpl(Node node, + ManagedChannel managedChannel, + ScheduledExecutorService executorService, + long readyTimeoutMillis, + boolean subscribeToUriGlobCollection, + XdsServerMetricsProvider serverMetricsProvider, + XdsClientOtelMetricsProvider xdsClientOtelMetricsProvider, + boolean irvSupport, + Integer maxRetryBackoffSeconds, + String minimumJavaVersion, + XdsClientValidator.ActionOnPrecheckFailure actionOnPrecheckFailure) { _readyTimeoutMillis = readyTimeoutMillis; _node = node; @@ -212,8 +236,9 @@ public XdsClientImpl(Node node, _log.info("Glob collection support enabled"); } - _xdsClientJmx = new XdsClientJmx(serverMetricsProvider); + _xdsClientOtelMetricsProvider = xdsClientOtelMetricsProvider == null ? new NoOpXdsClientOtelMetricsProvider() : xdsClientOtelMetricsProvider; _serverMetricsProvider = serverMetricsProvider == null ? new NoOpXdsServerMetricsProvider() : serverMetricsProvider; + _xdsClientJmx = new XdsClientJmx(serverMetricsProvider, _xdsClientOtelMetricsProvider); _initialResourceVersionsEnabled = irvSupport; if (_initialResourceVersionsEnabled) { @@ -678,7 +703,7 @@ private void handleD2URICollectionResponse(DiscoveryResponseData data) || uriSubscriber.getData() == null // The URI was corrupted and there was no previous version of this URI ) { - uriSubscriber.onData(new D2URIUpdate(uri), _serverMetricsProvider); + uriSubscriber.onData(new D2URIUpdate(uri), _serverMetricsProvider, _xdsClientOtelMetricsProvider, _xdsClientJmx.getClientName()); } } @@ -786,6 +811,7 @@ private void processResourceChanges(ResourceType type, Map updates, ResourceType type) { + String clientName = _xdsClientJmx.getClientName(); Map subscribers = getResourceSubscriberMap(type); WildcardResourceSubscriber wildcardSubscriber = getWildcardResourceSubscriber(type); @@ -794,12 +820,12 @@ private void handleResourceUpdate(Map updates, ResourceSubscriber subscriber = subscribers.get(entry.getKey()); if (subscriber != null) { - subscriber.onData(entry.getValue(), _serverMetricsProvider); + subscriber.onData(entry.getValue(), _serverMetricsProvider, _xdsClientOtelMetricsProvider, clientName); } if (wildcardSubscriber != null) { - wildcardSubscriber.onData(entry.getKey(), entry.getValue(), _serverMetricsProvider); + wildcardSubscriber.onData(entry.getKey(), entry.getValue(), _serverMetricsProvider, _xdsClientOtelMetricsProvider, clientName); } } } @@ -995,7 +1021,7 @@ void addWatcher(ResourceWatcher watcher) } @VisibleForTesting - void onData(ResourceUpdate data, XdsServerMetricsProvider metricsProvider) + void onData(ResourceUpdate data, XdsServerMetricsProvider metricsProvider, XdsClientOtelMetricsProvider xdsClientOtelMetricsProvider, String clientName) { SubscriberFetchState prev = _fetchState.getAndSet(FETCHED); if (!FETCHED.equals(prev)) @@ -1009,7 +1035,8 @@ void onData(ResourceUpdate data, XdsServerMetricsProvider metricsProvider) { // Even though the data is the same, the subscriber is waiting for init data after either startup // or a reconnection, so we need to track latency. - trackServerLatency(data, _data, metricsProvider, _subscribedAt.get(), _isIrvEnabled, prev); + // data updated, track xds server latency + trackServerLatency(data, _data, metricsProvider, _subscribedAt.get(), _isIrvEnabled, prev, xdsClientOtelMetricsProvider, clientName); } _log.debug("Received resource update data equal to the current data. Will not perform any update."); return; @@ -1018,7 +1045,7 @@ void onData(ResourceUpdate data, XdsServerMetricsProvider metricsProvider) // null value guard to avoid overwriting the property with null if (data != null && data.isValid()) { - trackServerLatency(data, _data, metricsProvider, _subscribedAt.get(), _isIrvEnabled, prev); + trackServerLatency(data, _data, metricsProvider, _subscribedAt.get(), _isIrvEnabled, prev, xdsClientOtelMetricsProvider, clientName); _data = data; } else @@ -1190,7 +1217,8 @@ void addWatcher(WildcardResourceWatcher watcher) } @VisibleForTesting - void onData(String resourceName, ResourceUpdate data, XdsServerMetricsProvider metricsProvider) + void onData(String resourceName, ResourceUpdate data, XdsServerMetricsProvider metricsProvider, + XdsClientOtelMetricsProvider xdsClientOtelMetricsProvider, String clientName) { if (Objects.equals(_data.get(resourceName), data)) { @@ -1198,7 +1226,8 @@ void onData(String resourceName, ResourceUpdate data, XdsServerMetricsProvider m { // Even though the data is the same, the subscriber is waiting for init data after either startup // or a reconnection, so we need to track latency. - trackServerLatency(data, _data.get(resourceName), metricsProvider, _subscribedAt.get(), _isIrvEnabled, _fetchState.get()); + // we are passing OpenTelemetry provider for wildcard subscribers too + trackServerLatency(data, _data.get(resourceName), metricsProvider, _subscribedAt.get(), _isIrvEnabled, _fetchState.get(), xdsClientOtelMetricsProvider, clientName); } _log.debug("Received resource update data equal to the current data. Will not perform the update."); return; @@ -1206,7 +1235,7 @@ void onData(String resourceName, ResourceUpdate data, XdsServerMetricsProvider m // null value guard to avoid overwriting the property with null if (data != null && data.isValid()) { - trackServerLatency(data, _data.get(resourceName), metricsProvider, _subscribedAt.get(), _isIrvEnabled, _fetchState.get()); + trackServerLatency(data, _data.get(resourceName), metricsProvider, _subscribedAt.get(), _isIrvEnabled, _fetchState.get(), xdsClientOtelMetricsProvider, clientName); _data.put(resourceName, data); } else @@ -1344,7 +1373,8 @@ private boolean shouldSubscribeUriGlobCollection(ResourceType type) } private static void trackServerLatency(ResourceUpdate resourceUpdate, ResourceUpdate currentData, - XdsServerMetricsProvider metricsProvider, long subscribedAt, boolean isIrvEnabled, SubscriberFetchState fetchState) + XdsServerMetricsProvider metricsProvider, long subscribedAt, boolean isIrvEnabled, SubscriberFetchState fetchState, + XdsClientOtelMetricsProvider xdsClientOtelMetricsProvider, String clientName) { long now = SystemClock.instance().currentTimeMillis(); if (resourceUpdate instanceof NodeUpdate) @@ -1355,7 +1385,7 @@ private static void trackServerLatency(ResourceUpdate resourceUpdate, ResourceUp return; } trackServerLatencyHelper(metricsProvider, now, nodeData.getStat().getMtime(), subscribedAt, - isIrvEnabled, fetchState); + isIrvEnabled, fetchState, xdsClientOtelMetricsProvider, clientName); } else if (resourceUpdate instanceof D2URIMapUpdate) { @@ -1370,9 +1400,9 @@ else if (resourceUpdate instanceof D2URIMapUpdate) Map.Entry::getKey, e -> e.getValue().leftValue()) // new data of updated uris ); - trackServerLatencyForUris(updatedUris, update, metricsProvider, now, subscribedAt, isIrvEnabled, fetchState); + trackServerLatencyForUris(updatedUris, update, metricsProvider, now, subscribedAt, isIrvEnabled, fetchState, xdsClientOtelMetricsProvider, clientName); trackServerLatencyForUris(rawDiff.entriesOnlyOnLeft(), update, metricsProvider, now, subscribedAt, - isIrvEnabled, fetchState); // newly added uris + isIrvEnabled, fetchState, xdsClientOtelMetricsProvider, clientName); // newly added uris } else if (resourceUpdate instanceof D2URIUpdate) { @@ -1382,7 +1412,7 @@ else if (resourceUpdate instanceof D2URIUpdate) { update.setIsStaleModifiedTime( trackServerLatencyHelper(metricsProvider, now, Timestamps.toMillis(uri.getModifiedTime()), subscribedAt, - isIrvEnabled, fetchState) + isIrvEnabled, fetchState, xdsClientOtelMetricsProvider, clientName) ); } } @@ -1390,11 +1420,11 @@ else if (resourceUpdate instanceof D2URIUpdate) private static void trackServerLatencyForUris(Map uriMap, D2URIMapUpdate update, XdsServerMetricsProvider metricsProvider, long end, long subscribedAt, boolean isIrvEnabled, - SubscriberFetchState fetchState) + SubscriberFetchState fetchState, XdsClientOtelMetricsProvider xdsClientOtelMetricsProvider, String clientName) { uriMap.forEach((k, v) -> { boolean isStaleModifiedTime = trackServerLatencyHelper(metricsProvider, end, Timestamps.toMillis(v.getModifiedTime()), subscribedAt, - isIrvEnabled, fetchState); + isIrvEnabled, fetchState, xdsClientOtelMetricsProvider, clientName); update.setIsStaleModifiedTime(k, isStaleModifiedTime); } ); @@ -1409,7 +1439,8 @@ private static void trackServerLatencyForUris(Map uriMap, D // received with IRV, except the first fetch, so after skipping the first fetch we can track latency always based // on the resource modified time. private static boolean trackServerLatencyHelper(XdsServerMetricsProvider metricsProvider, - long end, long modifiedAt, long subscribedAt, boolean isIrvEnabled, SubscriberFetchState fetchState) + long end, long modifiedAt, long subscribedAt, boolean isIrvEnabled, SubscriberFetchState fetchState, + XdsClientOtelMetricsProvider xdsClientOtelMetricsProvider, String clientName) { long start; boolean isStaleModifiedAt; @@ -1423,7 +1454,12 @@ private static boolean trackServerLatencyHelper(XdsServerMetricsProvider metrics start = Math.max(modifiedAt, subscribedAt); isStaleModifiedAt = modifiedAt < subscribedAt; } - metricsProvider.trackLatency(end - start); + long latency = end - start; + metricsProvider.trackLatency(latency); + + // Record OpenTelemetry latency + xdsClientOtelMetricsProvider.recordServerLatency(clientName, latency); + return isStaleModifiedAt; } diff --git a/d2/src/main/java/com/linkedin/d2/xds/balancer/XdsLoadBalancerWithFacilitiesFactory.java b/d2/src/main/java/com/linkedin/d2/xds/balancer/XdsLoadBalancerWithFacilitiesFactory.java index 4c77c81433..cbe2c2d2a8 100644 --- a/d2/src/main/java/com/linkedin/d2/xds/balancer/XdsLoadBalancerWithFacilitiesFactory.java +++ b/d2/src/main/java/com/linkedin/d2/xds/balancer/XdsLoadBalancerWithFacilitiesFactory.java @@ -69,6 +69,7 @@ public LoadBalancerWithFacilities create(D2ClientConfig config) xdsStreamReadyTimeout, config.subscribeToUriGlobCollection, config._xdsServerMetricsProvider, + config.xdsClientOtelMetricsProvider, config.xdsInitialResourceVersionsEnabled, config.xdsStreamMaxRetryBackoffSeconds, config.xdsMinimumJavaVersion, diff --git a/d2/src/test/java/com/linkedin/d2/jmx/XdsClientOtelMetricsProviderTest.java b/d2/src/test/java/com/linkedin/d2/jmx/XdsClientOtelMetricsProviderTest.java new file mode 100644 index 0000000000..4f7cfbaf19 --- /dev/null +++ b/d2/src/test/java/com/linkedin/d2/jmx/XdsClientOtelMetricsProviderTest.java @@ -0,0 +1,318 @@ +package com.linkedin.d2.jmx; + +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +/** + * Test suite for {@XdsClientOtelMetricsProvider} interface. + * Uses a test implementation to verify that all methods are called with expected parameters. + */ +public class XdsClientOtelMetricsProviderTest { + + private TestMetricsProvider _testProvider; + + @BeforeMethod + public void setUp() { + _testProvider = new TestMetricsProvider(); + } + + @DataProvider(name = "simpleMethodProvider") + public Object[][] simpleMethodProvider() { + return new Object[][] { + {"recordConnectionLost"}, + {"recordConnectionClosed"}, + {"recordReconnection"}, + {"recordRequestSent"}, + {"recordResponseReceived"}, + {"recordResourceNotFound"}, + {"recordResourceInvalid"} + }; + } + + @Test(dataProvider = "simpleMethodProvider") + public void testSimpleRecordMethods(String methodName) { + String clientName = "test-client-" + methodName; + + // Call the appropriate method based on methodName + switch (methodName) { + case "recordConnectionLost": + _testProvider.recordConnectionLost(clientName); + break; + case "recordConnectionClosed": + _testProvider.recordConnectionClosed(clientName); + break; + case "recordReconnection": + _testProvider.recordReconnection(clientName); + break; + case "recordRequestSent": + _testProvider.recordRequestSent(clientName); + break; + case "recordResponseReceived": + _testProvider.recordResponseReceived(clientName); + break; + case "recordResourceNotFound": + _testProvider.recordResourceNotFound(clientName); + break; + case "recordResourceInvalid": + _testProvider.recordResourceInvalid(clientName); + break; + default: + throw new IllegalArgumentException("Unknown method: " + methodName); + } + + assertEquals(_testProvider.getCallCount(methodName), 1); + assertEquals(_testProvider.getLastClientName(methodName), clientName); + } + + @Test + public void testRecordInitialResourceVersionSent() { + String clientName = "test-client-6"; + int count = 42; + _testProvider.recordInitialResourceVersionSent(clientName, count); + + assertEquals(_testProvider.getCallCount("recordInitialResourceVersionSent"), 1); + assertEquals(_testProvider.getLastClientName("recordInitialResourceVersionSent"), clientName); + assertEquals(_testProvider.getLastCount("recordInitialResourceVersionSent").intValue(), count); + } + + @Test + public void testRecordServerLatency() { + String clientName = "test-client-9"; + long latencyMs = 150L; + _testProvider.recordServerLatency(clientName, latencyMs); + + assertEquals(_testProvider.getCallCount("recordServerLatency"), 1); + assertEquals(_testProvider.getLastClientName("recordServerLatency"), clientName); + assertEquals(_testProvider.getLastLatency("recordServerLatency").longValue(), latencyMs); + } + + @Test + public void testUpdateConnectionState() { + String clientName = "test-client-10"; + boolean isConnected = true; + _testProvider.updateConnectionState(clientName, isConnected); + + assertEquals(_testProvider.getCallCount("updateConnectionState"), 1); + assertEquals(_testProvider.getLastClientName("updateConnectionState"), clientName); + assertTrue(_testProvider.getLastConnectionState("updateConnectionState")); + } + + @Test + public void testUpdateActiveInitialWaitTime() { + String clientName = "test-client-11"; + long waitTimeMs = 5000L; + _testProvider.updateActiveInitialWaitTime(clientName, waitTimeMs); + + assertEquals(_testProvider.getCallCount("updateActiveInitialWaitTime"), 1); + assertEquals(_testProvider.getLastClientName("updateActiveInitialWaitTime"), clientName); + assertEquals(_testProvider.getLastWaitTime("updateActiveInitialWaitTime").longValue(), waitTimeMs); + } + + @Test + public void testMultipleCalls() { + String clientName = "test-client-multi"; + + _testProvider.recordConnectionLost(clientName); + _testProvider.recordConnectionLost(clientName); + _testProvider.recordConnectionLost(clientName); + + assertEquals(_testProvider.getCallCount("recordConnectionLost"), 3); + assertEquals(_testProvider.getLastClientName("recordConnectionLost"), clientName); + } + + @Test + public void testDifferentClientNames() { + _testProvider.recordRequestSent("client-A"); + _testProvider.recordRequestSent("client-B"); + _testProvider.recordRequestSent("client-C"); + + assertEquals(_testProvider.getCallCount("recordRequestSent"), 3); + assertEquals(_testProvider.getLastClientName("recordRequestSent"), "client-C"); + } + + @Test + public void testAllMethodsCalled() { + String clientName = "comprehensive-client"; + + _testProvider.recordConnectionLost(clientName); + _testProvider.recordConnectionClosed(clientName); + _testProvider.recordReconnection(clientName); + _testProvider.recordRequestSent(clientName); + _testProvider.recordResponseReceived(clientName); + _testProvider.recordInitialResourceVersionSent(clientName, 10); + _testProvider.recordResourceNotFound(clientName); + _testProvider.recordResourceInvalid(clientName); + _testProvider.recordServerLatency(clientName, 200L); + _testProvider.updateConnectionState(clientName, true); + _testProvider.updateActiveInitialWaitTime(clientName, 3000L); + + // Verify all methods were called exactly once + assertEquals(_testProvider.getCallCount("recordConnectionLost"), 1); + assertEquals(_testProvider.getCallCount("recordConnectionClosed"), 1); + assertEquals(_testProvider.getCallCount("recordReconnection"), 1); + assertEquals(_testProvider.getCallCount("recordRequestSent"), 1); + assertEquals(_testProvider.getCallCount("recordResponseReceived"), 1); + assertEquals(_testProvider.getCallCount("recordInitialResourceVersionSent"), 1); + assertEquals(_testProvider.getCallCount("recordResourceNotFound"), 1); + assertEquals(_testProvider.getCallCount("recordResourceInvalid"), 1); + assertEquals(_testProvider.getCallCount("recordServerLatency"), 1); + assertEquals(_testProvider.getCallCount("updateConnectionState"), 1); + assertEquals(_testProvider.getCallCount("updateActiveInitialWaitTime"), 1); + } + + /** + * Test implementation of XdsClientOtelMetricsProvider that tracks method calls + * and their parameters for verification purposes. + */ + private static class TestMetricsProvider implements XdsClientOtelMetricsProvider { + + private final List _calls = new ArrayList<>(); + + @Override + public void recordConnectionLost(String clientName) { + _calls.add(new MetricsInvocation("recordConnectionLost", clientName)); + } + + @Override + public void recordConnectionClosed(String clientName) { + _calls.add(new MetricsInvocation("recordConnectionClosed", clientName)); + } + + @Override + public void recordReconnection(String clientName) { + _calls.add(new MetricsInvocation("recordReconnection", clientName)); + } + + @Override + public void recordRequestSent(String clientName) { + _calls.add(new MetricsInvocation("recordRequestSent", clientName)); + } + + @Override + public void recordResponseReceived(String clientName) { + _calls.add(new MetricsInvocation("recordResponseReceived", clientName)); + } + + @Override + public void recordInitialResourceVersionSent(String clientName, int count) { + _calls.add(new MetricsInvocation("recordInitialResourceVersionSent", clientName, count)); + } + + @Override + public void recordResourceNotFound(String clientName) { + _calls.add(new MetricsInvocation("recordResourceNotFound", clientName)); + } + + @Override + public void recordResourceInvalid(String clientName) { + _calls.add(new MetricsInvocation("recordResourceInvalid", clientName)); + } + + @Override + public void recordServerLatency(String clientName, long latencyMs) { + _calls.add(new MetricsInvocation("recordServerLatency", clientName, latencyMs)); + } + + @Override + public void updateConnectionState(String clientName, boolean isConnected) { + _calls.add(new MetricsInvocation("updateConnectionState", clientName, isConnected)); + } + + @Override + public void updateActiveInitialWaitTime(String clientName, long waitTimeMs) { + _calls.add(new MetricsInvocation("updateActiveInitialWaitTime", clientName, waitTimeMs)); + } + + // Helper methods for verification + + public int getCallCount(String methodName) { + return (int) _calls.stream() + .filter(call -> call.methodName.equals(methodName)) + .count(); + } + + public String getLastClientName(String methodName) { + return _calls.stream() + .filter(call -> call.methodName.equals(methodName)) + .reduce((first, second) -> second) + .map(call -> call.clientName) + .orElse(null); + } + + public Integer getLastCount(String methodName) { + return _calls.stream() + .filter(call -> call.methodName.equals(methodName)) + .reduce((first, second) -> second) + .map(call -> call.count) + .orElse(null); + } + + public Long getLastLatency(String methodName) { + return _calls.stream() + .filter(call -> call.methodName.equals(methodName)) + .reduce((first, second) -> second) + .map(call -> call.latencyMs) + .orElse(null); + } + + public Boolean getLastConnectionState(String methodName) { + return _calls.stream() + .filter(call -> call.methodName.equals(methodName)) + .reduce((first, second) -> second) + .map(call -> call.isConnected) + .orElse(null); + } + + public Long getLastWaitTime(String methodName) { + return _calls.stream() + .filter(call -> call.methodName.equals(methodName)) + .reduce((first, second) -> second) + .map(call -> call.waitTimeMs) + .orElse(null); + } + + /** + * Inner class to represent a metrics invocation with its parameters + */ + private static class MetricsInvocation { + String methodName; + String clientName; + Integer count; + Long latencyMs; + Boolean isConnected; + Long waitTimeMs; + + MetricsInvocation(String methodName, String clientName) { + this.methodName = methodName; + this.clientName = clientName; + } + + MetricsInvocation(String methodName, String clientName, int count) { + this(methodName, clientName); + this.count = count; + } + + MetricsInvocation(String methodName, String clientName, long value) { + this(methodName, clientName); + // Determine if it's latency or wait time based on method name + if (methodName.equals("recordServerLatency")) { + this.latencyMs = value; + } else if (methodName.equals("updateActiveInitialWaitTime")) { + this.waitTimeMs = value; + } + } + + MetricsInvocation(String methodName, String clientName, boolean isConnected) { + this(methodName, clientName); + this.isConnected = isConnected; + } + } + } +} diff --git a/d2/src/test/java/com/linkedin/d2/xds/TestXdsClientImpl.java b/d2/src/test/java/com/linkedin/d2/xds/TestXdsClientImpl.java index 57c27f34aa..3c9c0673ab 100644 --- a/d2/src/test/java/com/linkedin/d2/xds/TestXdsClientImpl.java +++ b/d2/src/test/java/com/linkedin/d2/xds/TestXdsClientImpl.java @@ -8,6 +8,7 @@ import com.google.protobuf.ByteString; import com.google.protobuf.util.Timestamps; import com.linkedin.d2.jmx.XdsClientJmx; +import com.linkedin.d2.jmx.XdsClientOtelMetricsProvider; import com.linkedin.d2.jmx.XdsServerMetricsProvider; import com.linkedin.d2.xds.XdsClient.D2URIMapUpdate; import com.linkedin.d2.xds.XdsClient.ResourceType; @@ -336,9 +337,9 @@ public void testHandleD2NodeUpdateWithEmptyResponse() { fixture.watchAllResourceAndWatcherTypes(); fixture._xdsClientImpl.handleResponse(DISCOVERY_RESPONSE_WITH_EMPTY_NODE_RESPONSE); fixture.verifyAckSent(1); - verify(fixture._d2UriSubscriber, times(0)).onData(any(), any()); - verify(fixture._clusterSubscriber, times(0)).onData(any(), any()); - verify(fixture._uriMapWildcardSubscriber, times(0)).onData(any(), any(), any()); + verify(fixture._d2UriSubscriber, times(0)).onData(any(), any(), any(), any()); + verify(fixture._clusterSubscriber, times(0)).onData(any(), any(), any(), any()); + verify(fixture._uriMapWildcardSubscriber, times(0)).onData(any(), any(), any(), any(), any()); } @DataProvider(name = "badNodeUpdateTestCases") @@ -432,6 +433,7 @@ public void testHandleD2ClusterOrServiceNameResponse() { Assert.assertEquals(fixture._nameWildcardSubscriber.getData(SERVICE_RESOURCE_NAME), SERVICE_NAME_DATA_UPDATE); Assert.assertEquals(fixture._nameWildcardSubscriber.getData(SERVICE_RESOURCE_NAME_2), SERVICE_NAME_DATA_UPDATE_2); verifyZeroInteractions(fixture._serverMetricsProvider); // D2ClusterOrServiceName response does not track latency + verifyZeroInteractions(fixture._xdsClientOtelMetricsProvider); } @Test @@ -440,7 +442,7 @@ public void testHandleD2ClusterOrServiceNameEmptyResponse() { fixture.watchAllResourceAndWatcherTypes(); fixture._xdsClientImpl.handleResponse(RESPONSE_WITH_EMPTY_NAMES); fixture.verifyAckSent(1); - verify(fixture._nameWildcardSubscriber, times(0)).onData(any(), any(), any()); + verify(fixture._nameWildcardSubscriber, times(0)).onData(any(), any(), any(), any(), any()); } @Test @@ -541,8 +543,8 @@ public void testHandleD2URIMapUpdateWithEmptyResponse() { fixture._xdsClientImpl.handleResponse(DISCOVERY_RESPONSE_WITH_EMPTY_URI_MAP_RESPONSE); fixture.verifyAckSent(2); // onData is called only once. Empty response does not trigger onData calls. - verify(fixture._clusterSubscriber).onData(any(), any()); - verify(fixture._uriMapWildcardSubscriber).onData(any(), any(), any()); + verify(fixture._clusterSubscriber).onData(any(), any(), any(), any()); + verify(fixture._uriMapWildcardSubscriber).onData(any(), any(), any(), any(), any()); } @Test(dataProvider = "providerWatcherFlags") @@ -574,6 +576,7 @@ public void testHandleD2URIMapUpdateWithBadData(boolean toWatchIndividual, boole // bad data will not overwrite the original valid data Assert.assertEquals(fixture._clusterSubscriber.getData(), D2_URI_MAP_UPDATE_WITH_DATA1); verifyZeroInteractions(fixture._serverMetricsProvider); + verifyZeroInteractions(fixture._xdsClientOtelMetricsProvider); D2URIMapUpdate actualData = (D2URIMapUpdate) fixture._clusterSubscriber.getData(); Assert.assertFalse(actualData.isGlobCollectionEnabled()); Assert.assertTrue(actualData.getUpdatedUrisName().isEmpty()); @@ -685,6 +688,7 @@ public void testHandleD2URICollectionUpdateWithBadData(boolean toWatchIndividual verify(fixture._wildcardResourceWatcher, times(toWatchWildcard ? 1 : 0)).onChanged(any(), eq(D2_URI_MAP.emptyData())); verifyZeroInteractions(fixture._serverMetricsProvider); + verifyZeroInteractions(fixture._xdsClientOtelMetricsProvider); // current data is not null, bad data will not overwrite the original valid data and watchers won't be notified. fixture._clusterSubscriber.setData(D2_URI_MAP_GLOB_COLLECTION_UPDATE_WITH_DATA1); @@ -695,6 +699,7 @@ public void testHandleD2URICollectionUpdateWithBadData(boolean toWatchIndividual verify(fixture._wildcardResourceWatcher, times(0)).onChanged(any(), eq(D2_URI_MAP_GLOB_COLLECTION_UPDATE_WITH_DATA1)); verifyZeroInteractions(fixture._serverMetricsProvider); + verifyZeroInteractions(fixture._xdsClientOtelMetricsProvider); Assert.assertEquals(fixture._clusterSubscriber.getData(), D2_URI_MAP_GLOB_COLLECTION_UPDATE_WITH_DATA1); // Verify that bad data doesn't affect the updated and removed URIs D2URIMapUpdate actualData = (D2URIMapUpdate) fixture._clusterSubscriber.getData(); @@ -1121,8 +1126,8 @@ public void testHandleD2CalleesUpdateWithEmptyResponse() { fixture.watchAllResourceAndWatcherTypes(); fixture._xdsClientImpl.handleResponse(DISCOVERY_RESPONSE_WITH_EMPTY_CALLEES_RESPONSE); fixture.verifyAckSent(1); - verify(fixture._calleesSubscriber, times(0)).onData(any(), any()); - verify(fixture._calleesWildcardSubscriber, times(0)).onData(any(), any(), any()); + verify(fixture._calleesSubscriber, times(0)).onData(any(), any(), any(), any()); + verify(fixture._calleesWildcardSubscriber, times(0)).onData(any(), any(), any(), any(), any()); } @Test @@ -1262,6 +1267,8 @@ private static final class XdsClientImplFixture { @Mock XdsServerMetricsProvider _serverMetricsProvider; @Mock + XdsClientOtelMetricsProvider _xdsClientOtelMetricsProvider; + @Mock Clock _clock; @Captor @@ -1295,6 +1302,7 @@ private static final class XdsClientImplFixture { doNothing().when(_resourceWatcher).onChanged(any()); doNothing().when(_wildcardResourceWatcher).onChanged(any(), any()); doNothing().when(_serverMetricsProvider).trackLatency(anyLong()); + doNothing().when(_xdsClientOtelMetricsProvider).recordServerLatency(anyString(), anyLong()); for (ResourceSubscriber subscriber : Lists.newArrayList(_nodeSubscriber, _clusterSubscriber, _d2UriSubscriber, _calleesSubscriber)) { @@ -1311,7 +1319,7 @@ private static final class XdsClientImplFixture { _executorService = spy(Executors.newScheduledThreadPool(1)); _xdsClientImpl = spy(new XdsClientImpl(null, mock(ManagedChannel.class), _executorService, 0, useGlobCollections, - _serverMetricsProvider, useIRV)); + _serverMetricsProvider, _xdsClientOtelMetricsProvider, useIRV, null, null, null)); _xdsClientImpl._adsStream = _adsStream; doNothing().when(_xdsClientImpl).startRpcStreamLocal(); diff --git a/d2/src/test/java/com/linkedin/d2/xds/balancer/TestXdsLoadBalancerWithFacilitiesFactory.java b/d2/src/test/java/com/linkedin/d2/xds/balancer/TestXdsLoadBalancerWithFacilitiesFactory.java new file mode 100644 index 0000000000..d583704571 --- /dev/null +++ b/d2/src/test/java/com/linkedin/d2/xds/balancer/TestXdsLoadBalancerWithFacilitiesFactory.java @@ -0,0 +1,67 @@ +package com.linkedin.d2.xds.balancer; + +import com.linkedin.d2.balancer.D2ClientConfig; +import com.linkedin.d2.jmx.NoOpXdsClientOtelMetricsProvider; +import com.linkedin.d2.jmx.XdsClientOtelMetricsProvider; +import org.testng.annotations.Test; + +import static org.testng.Assert.*; + +/** + * Minimal test for {@link XdsLoadBalancerWithFacilitiesFactory}. + * Verifies that config.xdsClientOtelMetricsProvider can be configured. + */ +public class TestXdsLoadBalancerWithFacilitiesFactory { + + @Test + public void testFactoryIsIndisOnly() { + XdsLoadBalancerWithFacilitiesFactory factory = new XdsLoadBalancerWithFacilitiesFactory(); + assertTrue(factory.isIndisOnly()); + } + + @Test + public void testConfigWithDefaultMetricsProvider() { + // Verify the config field that the factory uses exists and has a default + D2ClientConfig config = new D2ClientConfig(); + assertNotNull(config.xdsClientOtelMetricsProvider); + assertTrue(config.xdsClientOtelMetricsProvider instanceof NoOpXdsClientOtelMetricsProvider); + } + + @Test + public void testConfigWithCustomMetricsProvider() { + // Verify the config field that the factory uses can be set to a custom provider + D2ClientConfig config = new D2ClientConfig(); + XdsClientOtelMetricsProvider customProvider = new TestMetricsProvider(); + config.xdsClientOtelMetricsProvider = customProvider; + + assertSame(config.xdsClientOtelMetricsProvider, customProvider); + } + + /** + * Minimal test metrics provider implementation. + */ + private static class TestMetricsProvider implements XdsClientOtelMetricsProvider { + @Override + public void recordConnectionLost(String clientName) {} + @Override + public void recordConnectionClosed(String clientName) {} + @Override + public void recordReconnection(String clientName) {} + @Override + public void recordRequestSent(String clientName) {} + @Override + public void recordResponseReceived(String clientName) {} + @Override + public void recordInitialResourceVersionSent(String clientName, int count) {} + @Override + public void recordResourceNotFound(String clientName) {} + @Override + public void recordResourceInvalid(String clientName) {} + @Override + public void recordServerLatency(String clientName, long latencyMs) {} + @Override + public void updateConnectionState(String clientName, boolean isConnected) {} + @Override + public void updateActiveInitialWaitTime(String clientName, long waitTimeMs) {} + } +} \ No newline at end of file