feat: revisit Otel metrics semantic convention migration logics (#1267)

wrn14897 · web-flow · commit 43e32aafc781 · 2025-10-14T22:06:31.000Z
Since users can still switch to the new metric name using feature gate Follow up #1248
diff --git a/.changeset/lucky-plums-sort.md b/.changeset/lucky-plums-sort.md
@@ -0,0 +1,7 @@
+---
+"@hyperdx/common-utils": patch
+"@hyperdx/api": patch
+"@hyperdx/app": patch
+---
+
+fix: handle metrics semantic convention upgrade (feature gate)
diff --git a/packages/api/src/clickhouse/__tests__/renderChartConfig.test.ts b/packages/api/src/clickhouse/__tests__/renderChartConfig.test.ts
@@ -1431,7 +1431,7 @@ describe('renderChartConfig', () => {
               aggFn: 'avg',
               metricName: 'k8s.pod.cpu.utilization',
               metricNameSql:
-                "if(greaterOrEquals(ScopeVersion, '0.125.0'), 'k8s.pod.cpu.usage', 'k8s.pod.cpu.utilization')",
+                "MetricName IN ('k8s.pod.cpu.utilization', 'k8s.pod.cpu.usage')",
               metricType: MetricsDataType.Gauge,
               valueExpression: 'Value',
             },
@@ -1452,10 +1452,10 @@ describe('renderChartConfig', () => {
       expect(res.length).toBeGreaterThan(0);
       expect(res).toMatchSnapshot();
 
-      // Verify the SQL contains the dynamic metric name condition
-      expect(query.sql).toContain('if(greaterOrEquals(ScopeVersion');
+      // Verify the SQL contains the IN-based metric name condition
       expect(query.sql).toContain('k8s.pod.cpu.usage');
       expect(query.sql).toContain('k8s.pod.cpu.utilization');
+      expect(query.sql).toMatch(/MetricName IN /);
     });
 
     it('should handle gauge metric with metricNameSql and groupBy', async () => {
@@ -1466,7 +1466,7 @@ describe('renderChartConfig', () => {
               aggFn: 'avg',
               metricName: 'k8s.pod.cpu.utilization',
               metricNameSql:
-                "if(greaterOrEquals(ScopeVersion, '0.125.0'), 'k8s.pod.cpu.usage', 'k8s.pod.cpu.utilization')",
+                "MetricName IN ('k8s.pod.cpu.utilization', 'k8s.pod.cpu.usage')",
               metricType: MetricsDataType.Gauge,
               valueExpression: 'Value',
             },
@@ -1516,9 +1516,9 @@ describe('renderChartConfig', () => {
       // Should only return data from old metric name (k8s.pod.cpu.utilization)
       expect(res).toMatchSnapshot();
 
-      // Verify the SQL uses simple string comparison
+      // Verify the SQL uses simple string comparison (not IN-based)
       expect(query.sql).toContain("MetricName = 'k8s.pod.cpu.utilization'");
-      expect(query.sql).not.toContain('if(greaterOrEquals(ScopeVersion');
+      expect(query.sql).not.toMatch(/MetricName IN /);
     });
   });
 });
diff --git a/packages/app/src/__tests__/otelSemanticConventions.test.ts b/packages/app/src/__tests__/otelSemanticConventions.test.ts
@@ -5,21 +5,21 @@ describe('otelSemanticConventions', () => {
     it('should return SQL for k8s.pod.cpu.utilization migration', () => {
       const result = getMetricNameSql('k8s.pod.cpu.utilization');
       expect(result).toBe(
-        "if(greaterOrEquals(ScopeVersion, '0.125.0'), 'k8s.pod.cpu.usage', 'k8s.pod.cpu.utilization')",
+        "MetricName IN ('k8s.pod.cpu.utilization', 'k8s.pod.cpu.usage')",
       );
     });
 
     it('should return SQL for k8s.node.cpu.utilization migration', () => {
       const result = getMetricNameSql('k8s.node.cpu.utilization');
       expect(result).toBe(
-        "if(greaterOrEquals(ScopeVersion, '0.125.0'), 'k8s.node.cpu.usage', 'k8s.node.cpu.utilization')",
+        "MetricName IN ('k8s.node.cpu.utilization', 'k8s.node.cpu.usage')",
       );
     });
 
     it('should return SQL for container.cpu.utilization migration', () => {
       const result = getMetricNameSql('container.cpu.utilization');
       expect(result).toBe(
-        "if(greaterOrEquals(ScopeVersion, '0.125.0'), 'container.cpu.usage', 'container.cpu.utilization')",
+        "MetricName IN ('container.cpu.utilization', 'container.cpu.usage')",
       );
     });
 
diff --git a/packages/app/src/otelSemanticConventions.ts b/packages/app/src/otelSemanticConventions.ts
@@ -2,38 +2,37 @@
  * OpenTelemetry Semantic Conventions utilities
  * Handles transformations between different versions of OTel semantic conventions
  */
+import SqlString from 'sqlstring';
 
 /**
  * Mapping of old metric names to new metric names based on semantic convention version
+ * The key is the old metric name for easy lookup
  */
 const METRIC_NAME_MIGRATIONS: Record<
   string,
   {
-    oldName: string;
     newName: string;
     versionThreshold: string;
   }
 > = {
   'k8s.pod.cpu.utilization': {
-    oldName: 'k8s.pod.cpu.utilization',
     newName: 'k8s.pod.cpu.usage',
     versionThreshold: '0.125.0',
   },
   'k8s.node.cpu.utilization': {
-    oldName: 'k8s.node.cpu.utilization',
     newName: 'k8s.node.cpu.usage',
     versionThreshold: '0.125.0',
   },
   'container.cpu.utilization': {
-    oldName: 'container.cpu.utilization',
     newName: 'container.cpu.usage',
     versionThreshold: '0.125.0',
   },
 };
 
 /**
- * Generates SQL expression to dynamically select metric name based on ScopeVersion
- * @param metricName - The metric name to check for migrations
+ * Generates SQL expression to coerce metric name to handle both old and new conventions
+ * Matches metrics using either the old or new naming convention
+ * @param metricName - The metric name to check for migrations (should be the old name)
  * @returns SQL expression if migration exists, undefined otherwise
  */
 export function getMetricNameSql(metricName: string): string | undefined {
@@ -43,5 +42,7 @@ export function getMetricNameSql(metricName: string): string | undefined {
     return undefined;
   }
 
-  return `if(greaterOrEquals(ScopeVersion, '${migration.versionThreshold}'), '${migration.newName}', '${migration.oldName}')`;
+  return SqlString.format('MetricName IN (?)', [
+    [metricName, migration.newName],
+  ]);
 }
diff --git a/packages/common-utils/src/__tests__/__snapshots__/renderChartConfig.test.ts.snap b/packages/common-utils/src/__tests__/__snapshots__/renderChartConfig.test.ts.snap
@@ -296,7 +296,7 @@ exports[`renderChartConfig k8s semantic convention migrations should generate SQ
                   cityHash64(ExplicitBounds) AS bounds_hash,
                   CAST(BucketCounts AS Array(Int64)) counts
               FROM default.otel_metrics_histogram
-              WHERE (TimeUnix >= toStartOfInterval(fromUnixTimestamp64Milli(1739318400000), INTERVAL 2 minute) - INTERVAL 2 minute AND TimeUnix <= toStartOfInterval(fromUnixTimestamp64Milli(1765670400000), INTERVAL 2 minute) + INTERVAL 2 minute) AND ((MetricName = if(greaterOrEquals(ScopeVersion, '0.125.0'), 'container.cpu.usage', 'container.cpu.utilization')))
+              WHERE (TimeUnix >= toStartOfInterval(fromUnixTimestamp64Milli(1739318400000), INTERVAL 2 minute) - INTERVAL 2 minute AND TimeUnix <= toStartOfInterval(fromUnixTimestamp64Milli(1765670400000), INTERVAL 2 minute) + INTERVAL 2 minute) AND ((MetricName IN ('container.cpu.utilization', 'container.cpu.usage')))
               ORDER BY attr_hash, TimeUnix ASC
             )
           )
@@ -380,7 +380,7 @@ exports[`renderChartConfig k8s semantic convention migrations should generate SQ
                   cityHash64(ExplicitBounds) AS bounds_hash,
                   CAST(BucketCounts AS Array(Int64)) counts
               FROM default.otel_metrics_histogram
-              WHERE (TimeUnix >= toStartOfInterval(fromUnixTimestamp64Milli(1739318400000), INTERVAL 1 minute) - INTERVAL 1 minute AND TimeUnix <= toStartOfInterval(fromUnixTimestamp64Milli(1765670400000), INTERVAL 1 minute) + INTERVAL 1 minute) AND ((MetricName = if(greaterOrEquals(ScopeVersion, '0.125.0'), 'k8s.pod.cpu.usage', 'k8s.pod.cpu.utilization')))
+              WHERE (TimeUnix >= toStartOfInterval(fromUnixTimestamp64Milli(1739318400000), INTERVAL 1 minute) - INTERVAL 1 minute AND TimeUnix <= toStartOfInterval(fromUnixTimestamp64Milli(1765670400000), INTERVAL 1 minute) + INTERVAL 1 minute) AND ((MetricName IN ('k8s.pod.cpu.utilization', 'k8s.pod.cpu.usage')))
               ORDER BY attr_hash, TimeUnix ASC
             )
           )
@@ -435,7 +435,7 @@ exports[`renderChartConfig k8s semantic convention migrations should generate SQ
                   ) AS Rate,
                   IF(AggregationTemporality = 1, Rate, Value) AS Sum
                 FROM default.otel_metrics_sum
-                WHERE (TimeUnix >= toStartOfInterval(fromUnixTimestamp64Milli(1739318400000), INTERVAL 5 minute) - INTERVAL 5 minute AND TimeUnix <= toStartOfInterval(fromUnixTimestamp64Milli(1765670400000), INTERVAL 5 minute) + INTERVAL 5 minute) AND ((MetricName = if(greaterOrEquals(ScopeVersion, '0.125.0'), 'k8s.node.cpu.usage', 'k8s.node.cpu.utilization')))),Bucketed AS (
+                WHERE (TimeUnix >= toStartOfInterval(fromUnixTimestamp64Milli(1739318400000), INTERVAL 5 minute) - INTERVAL 5 minute AND TimeUnix <= toStartOfInterval(fromUnixTimestamp64Milli(1765670400000), INTERVAL 5 minute) + INTERVAL 5 minute) AND ((MetricName IN ('k8s.node.cpu.utilization', 'k8s.node.cpu.usage')))),Bucketed AS (
             SELECT
               toStartOfInterval(toDateTime(TimeUnix), INTERVAL 5 minute) AS \`__hdx_time_bucket2\`,
               AttributesHash,
@@ -473,7 +473,7 @@ exports[`renderChartConfig k8s semantic convention migrations should generate SQ
               *,
               cityHash64(mapConcat(ScopeAttributes, ResourceAttributes, Attributes)) AS AttributesHash
             FROM default.otel_metrics_gauge
-            WHERE (TimeUnix >= fromUnixTimestamp64Milli(1739318400000) AND TimeUnix <= fromUnixTimestamp64Milli(1765670400000)) AND ((MetricName = if(greaterOrEquals(ScopeVersion, '0.125.0'), 'k8s.pod.cpu.usage', 'k8s.pod.cpu.utilization')))
+            WHERE (TimeUnix >= fromUnixTimestamp64Milli(1739318400000) AND TimeUnix <= fromUnixTimestamp64Milli(1765670400000)) AND ((MetricName IN ('k8s.pod.cpu.utilization', 'k8s.pod.cpu.usage')))
           ),Bucketed AS (
             SELECT
               toStartOfInterval(toDateTime(TimeUnix), INTERVAL 1 minute) AS \`__hdx_time_bucket2\`,
diff --git a/packages/common-utils/src/__tests__/renderChartConfig.test.ts b/packages/common-utils/src/__tests__/renderChartConfig.test.ts
@@ -434,7 +434,7 @@ describe('renderChartConfig', () => {
             valueExpression: 'Value',
             metricName: 'k8s.pod.cpu.utilization',
             metricNameSql:
-              "if(greaterOrEquals(ScopeVersion, '0.125.0'), 'k8s.pod.cpu.usage', 'k8s.pod.cpu.utilization')",
+              "MetricName IN ('k8s.pod.cpu.utilization', 'k8s.pod.cpu.usage')",
             metricType: MetricsDataType.Gauge,
           },
         ],
@@ -449,10 +449,10 @@ describe('renderChartConfig', () => {
       const generatedSql = await renderChartConfig(config, mockMetadata);
       const actual = parameterizedQueryToSql(generatedSql);
 
-      // Verify the SQL contains the dynamic metric name condition
-      expect(actual).toContain(
-        "MetricName = if(greaterOrEquals(ScopeVersion, '0.125.0'), 'k8s.pod.cpu.usage', 'k8s.pod.cpu.utilization')",
-      );
+      // Verify the SQL contains the IN-based metric name condition
+      expect(actual).toContain('k8s.pod.cpu.utilization');
+      expect(actual).toContain('k8s.pod.cpu.usage');
+      expect(actual).toMatch(/MetricName IN /);
       expect(actual).toMatchSnapshot();
     });
 
@@ -479,7 +479,7 @@ describe('renderChartConfig', () => {
             valueExpression: 'Value',
             metricName: 'k8s.node.cpu.utilization',
             metricNameSql:
-              "if(greaterOrEquals(ScopeVersion, '0.125.0'), 'k8s.node.cpu.usage', 'k8s.node.cpu.utilization')",
+              "MetricName IN ('k8s.node.cpu.utilization', 'k8s.node.cpu.usage')",
             metricType: MetricsDataType.Sum,
           },
         ],
@@ -494,9 +494,9 @@ describe('renderChartConfig', () => {
       const generatedSql = await renderChartConfig(config, mockMetadata);
       const actual = parameterizedQueryToSql(generatedSql);
 
-      expect(actual).toContain(
-        "MetricName = if(greaterOrEquals(ScopeVersion, '0.125.0'), 'k8s.node.cpu.usage', 'k8s.node.cpu.utilization')",
-      );
+      expect(actual).toContain('k8s.node.cpu.utilization');
+      expect(actual).toContain('k8s.node.cpu.usage');
+      expect(actual).toMatch(/MetricName IN /);
       expect(actual).toMatchSnapshot();
     });
 
@@ -522,7 +522,7 @@ describe('renderChartConfig', () => {
             valueExpression: 'Value',
             metricName: 'container.cpu.utilization',
             metricNameSql:
-              "if(greaterOrEquals(ScopeVersion, '0.125.0'), 'container.cpu.usage', 'container.cpu.utilization')",
+              "MetricName IN ('container.cpu.utilization', 'container.cpu.usage')",
             metricType: MetricsDataType.Histogram,
           },
         ],
@@ -537,9 +537,9 @@ describe('renderChartConfig', () => {
       const generatedSql = await renderChartConfig(config, mockMetadata);
       const actual = parameterizedQueryToSql(generatedSql);
 
-      expect(actual).toContain(
-        "MetricName = if(greaterOrEquals(ScopeVersion, '0.125.0'), 'container.cpu.usage', 'container.cpu.utilization')",
-      );
+      expect(actual).toContain('container.cpu.utilization');
+      expect(actual).toContain('container.cpu.usage');
+      expect(actual).toMatch(/MetricName IN /);
       expect(actual).toMatchSnapshot();
     });
 
@@ -565,7 +565,7 @@ describe('renderChartConfig', () => {
             valueExpression: 'Value',
             metricName: 'k8s.pod.cpu.utilization',
             metricNameSql:
-              "if(greaterOrEquals(ScopeVersion, '0.125.0'), 'k8s.pod.cpu.usage', 'k8s.pod.cpu.utilization')",
+              "MetricName IN ('k8s.pod.cpu.utilization', 'k8s.pod.cpu.usage')",
             metricType: MetricsDataType.Histogram,
           },
         ],
@@ -581,9 +581,9 @@ describe('renderChartConfig', () => {
       const generatedSql = await renderChartConfig(config, mockMetadata);
       const actual = parameterizedQueryToSql(generatedSql);
 
-      expect(actual).toContain(
-        "MetricName = if(greaterOrEquals(ScopeVersion, '0.125.0'), 'k8s.pod.cpu.usage', 'k8s.pod.cpu.utilization')",
-      );
+      expect(actual).toContain('k8s.pod.cpu.utilization');
+      expect(actual).toContain('k8s.pod.cpu.usage');
+      expect(actual).toMatch(/MetricName IN /);
       expect(actual).toMatchSnapshot();
     });
 
@@ -624,9 +624,9 @@ describe('renderChartConfig', () => {
       const generatedSql = await renderChartConfig(config, mockMetadata);
       const actual = parameterizedQueryToSql(generatedSql);
 
-      // Should use the simple string comparison for regular metrics
+      // Should use the simple string comparison for regular metrics (not IN-based)
       expect(actual).toContain("MetricName = 'some.regular.metric'");
-      expect(actual).not.toContain('if(greaterOrEquals(ScopeVersion');
+      expect(actual).not.toMatch(/MetricName IN /);
       expect(actual).toMatchSnapshot();
     });
   });
diff --git a/packages/common-utils/src/renderChartConfig.ts b/packages/common-utils/src/renderChartConfig.ts
@@ -8,16 +8,17 @@ import { CustomSchemaSQLSerializerV2, SearchQueryBuilder } from '@/queryParser';
 
 /**
  * Helper function to create a MetricName filter condition.
- * Uses metricNameSql if available (for dynamic SQL), otherwise falls back to metricName.
+ * Uses metricNameSql if available (which handles both old and new metric names via OR),
+ * otherwise falls back to a simple equality check.
  */
 function createMetricNameFilter(
   metricName: string,
   metricNameSql?: string,
 ): string {
-  return SqlString.format(
-    'MetricName = ?',
-    metricNameSql ? SqlString.raw(metricNameSql) : [metricName],
-  );
+  if (metricNameSql) {
+    return metricNameSql;
+  }
+  return SqlString.format('MetricName = ?', [metricName]);
 }
 import {
   AggregateFunction,