From d94f04e7088616f80ca9f9a036a7a3a55b7b03f6 Mon Sep 17 00:00:00 2001 From: Juntao Wang Date: Thu, 6 Jul 2023 17:04:37 -0400 Subject: [PATCH] Add queries for server and model performance metrics --- backend/src/routes/api/prometheus/index.ts | 24 +++-- backend/src/utils/prometheusUtils.ts | 7 +- frontend/src/api/prometheus/serving.ts | 27 ++++-- .../prometheus/useQueryRangeResourceData.ts | 7 +- .../src/pages/modelServing/screens/const.ts | 24 ++++- .../metrics/GlobalModelMetricsWrapper.tsx | 28 +++--- .../screens/metrics/MetricsChart.tsx | 91 ++++++++++++++----- .../screens/metrics/ModelGraphs.tsx | 9 +- .../metrics/ModelServingMetricsContext.tsx | 5 +- .../screens/metrics/ServerGraphs.tsx | 39 ++++++-- .../modelServing/screens/metrics/types.ts | 4 +- .../useCurrentTimeframeBrowserStorage.ts | 16 ++++ .../modelServing/screens/metrics/utils.tsx | 57 ++++++++---- .../projects/ProjectModelMetricsWrapper.tsx | 28 +++--- .../projects/ProjectServerMetricsWrapper.tsx | 60 ++++++------ .../src/pages/modelServing/screens/types.ts | 5 + 16 files changed, 294 insertions(+), 137 deletions(-) create mode 100644 frontend/src/pages/modelServing/screens/metrics/useCurrentTimeframeBrowserStorage.ts diff --git a/backend/src/routes/api/prometheus/index.ts b/backend/src/routes/api/prometheus/index.ts index 2c62e3164f..6e0b3ccc9e 100644 --- a/backend/src/routes/api/prometheus/index.ts +++ b/backend/src/routes/api/prometheus/index.ts @@ -6,7 +6,7 @@ import { PrometheusQueryResponse, QueryType, } from '../../../types'; -import { callPrometheusThanos, callPrometheusServing } from '../../../utils/prometheusUtils'; +import { callPrometheusThanos } from '../../../utils/prometheusUtils'; import { createCustomError } from '../../../utils/requestUtils'; import { logRequestDetails } from '../../../utils/fileUtils'; @@ -36,7 +36,9 @@ module.exports = async (fastify: KubeFastifyInstance) => { ): Promise<{ code: number; response: PrometheusQueryResponse }> => { const { query } = request.body; - return callPrometheusThanos(fastify, request, query).catch(handleError); + return callPrometheusThanos(fastify, request, query).catch( + handleError, + ); }, ); @@ -46,12 +48,15 @@ module.exports = async (fastify: KubeFastifyInstance) => { request: OauthFastifyRequest<{ Body: { query: string }; }>, - ): Promise<{ code: number; response: PrometheusQueryResponse }> => { + ): Promise<{ code: number; response: PrometheusQueryRangeResponse }> => { const { query } = request.body; - return callPrometheusThanos(fastify, request, query, QueryType.QUERY_RANGE).catch( - handleError, - ); + return callPrometheusThanos( + fastify, + request, + query, + QueryType.QUERY_RANGE, + ).catch(handleError); }, ); @@ -65,7 +70,12 @@ module.exports = async (fastify: KubeFastifyInstance) => { logRequestDetails(fastify, request); const { query } = request.body; - return callPrometheusServing(fastify, request, query).catch(handleError); + return callPrometheusThanos( + fastify, + request, + query, + QueryType.QUERY_RANGE, + ).catch(handleError); }, ); }; diff --git a/backend/src/utils/prometheusUtils.ts b/backend/src/utils/prometheusUtils.ts index 5735a850cc..a51bbb1173 100644 --- a/backend/src/utils/prometheusUtils.ts +++ b/backend/src/utils/prometheusUtils.ts @@ -2,7 +2,6 @@ import { KubeFastifyInstance, OauthFastifyRequest, PrometheusQueryRangeResponse, - PrometheusQueryResponse, QueryType, } from '../types'; import { DEV_MODE } from './constants'; @@ -84,13 +83,13 @@ const generatePrometheusHostURL = ( return `https://${instanceName}.${namespace}.svc.cluster.local:${port}`; }; -export const callPrometheusThanos = ( +export const callPrometheusThanos = ( fastify: KubeFastifyInstance, request: OauthFastifyRequest, query: string, queryType: QueryType = QueryType.QUERY, -): Promise<{ code: number; response: PrometheusQueryResponse }> => - callPrometheus( +): Promise<{ code: number; response: T }> => + callPrometheus( fastify, request, query, diff --git a/frontend/src/api/prometheus/serving.ts b/frontend/src/api/prometheus/serving.ts index d9dd517ccc..3f51eb532f 100644 --- a/frontend/src/api/prometheus/serving.ts +++ b/frontend/src/api/prometheus/serving.ts @@ -16,7 +16,7 @@ import { import useBiasMetricsEnabled from '~/concepts/explainability/useBiasMetricsEnabled'; import { ResponsePredicate } from '~/api/prometheus/usePrometheusQueryRange'; import useRefreshInterval from '~/utilities/useRefreshInterval'; -import { RefreshIntervalValue } from '~/pages/modelServing/screens/const'; +import { QueryTimeframeStep, RefreshIntervalValue } from '~/pages/modelServing/screens/const'; import usePerformanceMetricsEnabled from '~/pages/modelServing/screens/metrics/usePerformanceMetricsEnabled'; import useQueryRangeResourceData from './useQueryRangeResourceData'; @@ -45,29 +45,33 @@ export const useModelServingMetrics = ( const trustyResponsePredicate = React.useCallback< ResponsePredicate - >((data) => data.result, []); + >((data) => data.result || [], []); const serverRequestCount = useQueryRangeResourceData( performanceMetricsEnabled && type === PerformanceMetricType.SERVER, queries[ServerMetricType.REQUEST_COUNT], end, timeframe, + QueryTimeframeStep[ServerMetricType.REQUEST_COUNT], defaultResponsePredicate, ); - const serverAverageResponseTime = useQueryRangeResourceData( - performanceMetricsEnabled && type === PerformanceMetricType.SERVER, - queries[ServerMetricType.AVG_RESPONSE_TIME], - end, - timeframe, - defaultResponsePredicate, - ); + const serverAverageResponseTime = + useQueryRangeResourceData( + performanceMetricsEnabled && type === PerformanceMetricType.SERVER, + queries[ServerMetricType.AVG_RESPONSE_TIME], + end, + timeframe, + QueryTimeframeStep[ServerMetricType.AVG_RESPONSE_TIME], + trustyResponsePredicate, + ); const serverCPUUtilization = useQueryRangeResourceData( performanceMetricsEnabled && type === PerformanceMetricType.SERVER, queries[ServerMetricType.CPU_UTILIZATION], end, timeframe, + QueryTimeframeStep[ServerMetricType.CPU_UTILIZATION], defaultResponsePredicate, ); @@ -76,6 +80,7 @@ export const useModelServingMetrics = ( queries[ServerMetricType.MEMORY_UTILIZATION], end, timeframe, + QueryTimeframeStep[ServerMetricType.MEMORY_UTILIZATION], defaultResponsePredicate, ); @@ -84,6 +89,7 @@ export const useModelServingMetrics = ( queries[ModelMetricType.REQUEST_COUNT_SUCCESS], end, timeframe, + QueryTimeframeStep[ModelMetricType.REQUEST_COUNT_SUCCESS], defaultResponsePredicate, ); @@ -92,6 +98,7 @@ export const useModelServingMetrics = ( queries[ModelMetricType.REQUEST_COUNT_FAILED], end, timeframe, + QueryTimeframeStep[ModelMetricType.REQUEST_COUNT_FAILED], defaultResponsePredicate, ); @@ -100,6 +107,7 @@ export const useModelServingMetrics = ( queries[ModelMetricType.TRUSTY_AI_SPD], end, timeframe, + QueryTimeframeStep[ModelMetricType.TRUSTY_AI_SPD], trustyResponsePredicate, '/api/prometheus/bias', ); @@ -109,6 +117,7 @@ export const useModelServingMetrics = ( queries[ModelMetricType.TRUSTY_AI_DIR], end, timeframe, + QueryTimeframeStep[ModelMetricType.TRUSTY_AI_DIR], trustyResponsePredicate, '/api/prometheus/bias', ); diff --git a/frontend/src/api/prometheus/useQueryRangeResourceData.ts b/frontend/src/api/prometheus/useQueryRangeResourceData.ts index 52f082fc4b..6917ddec9f 100644 --- a/frontend/src/api/prometheus/useQueryRangeResourceData.ts +++ b/frontend/src/api/prometheus/useQueryRangeResourceData.ts @@ -1,7 +1,7 @@ -import { TimeframeStep, TimeframeTimeRange } from '~/pages/modelServing/screens/const'; +import { TimeframeTimeRange } from '~/pages/modelServing/screens/const'; import { ContextResourceData, PrometheusQueryRangeResultValue } from '~/types'; import useRestructureContextResourceData from '~/utilities/useRestructureContextResourceData'; -import { TimeframeTitle } from '~/pages/modelServing/screens/types'; +import { TimeframeStepType, TimeframeTitle } from '~/pages/modelServing/screens/types'; import usePrometheusQueryRange, { ResponsePredicate } from './usePrometheusQueryRange'; const useQueryRangeResourceData = ( @@ -10,6 +10,7 @@ const useQueryRangeResourceData = ( query: string, end: number, timeframe: TimeframeTitle, + timeframeStep: TimeframeStepType, responsePredicate: ResponsePredicate, apiPath = '/api/prometheus/serving', ): ContextResourceData => @@ -20,7 +21,7 @@ const useQueryRangeResourceData = ( query, TimeframeTimeRange[timeframe], end, - TimeframeStep[timeframe], + timeframeStep[timeframe], responsePredicate, ), ); diff --git a/frontend/src/pages/modelServing/screens/const.ts b/frontend/src/pages/modelServing/screens/const.ts index c1f0ccf91a..c26c2cc89e 100644 --- a/frontend/src/pages/modelServing/screens/const.ts +++ b/frontend/src/pages/modelServing/screens/const.ts @@ -1,4 +1,6 @@ +import { ModelMetricType, ServerMetricType } from './metrics/ModelServingMetricsContext'; import { + QueryTimeframeStepType, RefreshIntervalTitle, RefreshIntervalValueType, ServingRuntimeSize, @@ -72,7 +74,7 @@ export const TimeframeTimeRange: TimeframeTimeType = { [TimeframeTitle.ONE_HOUR]: 60 * 60, [TimeframeTitle.ONE_DAY]: 24 * 60 * 60, [TimeframeTitle.ONE_WEEK]: 7 * 24 * 60 * 60, - [TimeframeTitle.ONE_MONTH]: 30 * 7 * 24 * 60 * 60, + [TimeframeTitle.ONE_MONTH]: 30 * 24 * 60 * 60, // [TimeframeTitle.UNLIMITED]: 0, }; @@ -84,7 +86,7 @@ export const TimeframeTimeRange: TimeframeTimeType = { * 24h * 60m * 60s => 86,400 seconds of space * 86,400 / (24 * 12) => 300 points of prometheus data */ -export const TimeframeStep: TimeframeStepType = { +const TimeframeStep: TimeframeStepType = { [TimeframeTitle.ONE_HOUR]: 12, [TimeframeTitle.ONE_DAY]: 24 * 12, [TimeframeTitle.ONE_WEEK]: 7 * 24 * 12, @@ -92,6 +94,24 @@ export const TimeframeStep: TimeframeStepType = { // [TimeframeTitle.UNLIMITED]: 30 * 7 * 24 * 12, // TODO: determine if we "zoom out" more }; +const TimeframeStepForRequestCountAndAverageTime = { + [TimeframeTitle.ONE_HOUR]: 5 * 60, + [TimeframeTitle.ONE_DAY]: 60 * 60, + [TimeframeTitle.ONE_WEEK]: 12 * 60 * 60, + [TimeframeTitle.ONE_MONTH]: 24 * 60 * 60, +}; + +export const QueryTimeframeStep: QueryTimeframeStepType = { + [ServerMetricType.REQUEST_COUNT]: TimeframeStepForRequestCountAndAverageTime, + [ServerMetricType.AVG_RESPONSE_TIME]: TimeframeStepForRequestCountAndAverageTime, + [ServerMetricType.CPU_UTILIZATION]: TimeframeStep, + [ServerMetricType.MEMORY_UTILIZATION]: TimeframeStep, + [ModelMetricType.REQUEST_COUNT_FAILED]: TimeframeStepForRequestCountAndAverageTime, + [ModelMetricType.REQUEST_COUNT_SUCCESS]: TimeframeStepForRequestCountAndAverageTime, + [ModelMetricType.TRUSTY_AI_DIR]: TimeframeStep, + [ModelMetricType.TRUSTY_AI_SPD]: TimeframeStep, +}; + export const RefreshIntervalValue: RefreshIntervalValueType = { [RefreshIntervalTitle.FIFTEEN_SECONDS]: 15 * 1000, [RefreshIntervalTitle.THIRTY_SECONDS]: 30 * 1000, diff --git a/frontend/src/pages/modelServing/screens/metrics/GlobalModelMetricsWrapper.tsx b/frontend/src/pages/modelServing/screens/metrics/GlobalModelMetricsWrapper.tsx index 3e28e964e5..70269e8928 100644 --- a/frontend/src/pages/modelServing/screens/metrics/GlobalModelMetricsWrapper.tsx +++ b/frontend/src/pages/modelServing/screens/metrics/GlobalModelMetricsWrapper.tsx @@ -5,23 +5,27 @@ import { InferenceServiceKind } from '~/k8sTypes'; import ModelMetricsPathWrapper from './ModelMetricsPathWrapper'; import { ModelServingMetricsProvider } from './ModelServingMetricsContext'; import { getModelMetricsQueries } from './utils'; +import useCurrentTimeframeBrowserStorage from './useCurrentTimeframeBrowserStorage'; export type GlobalModelMetricsOutletContextProps = { model: InferenceServiceKind; projectName: string; }; -const GlobalModelMetricsWrapper: React.FC = () => ( - - {(model, projectName) => { - const queries = getModelMetricsQueries(model); - return ( - - - - ); - }} - -); +const GlobalModelMetricsWrapper: React.FC = () => { + const [currentTimeframe] = useCurrentTimeframeBrowserStorage(); + return ( + + {(model, projectName) => { + const queries = getModelMetricsQueries(model, currentTimeframe); + return ( + + + + ); + }} + + ); +}; export default GlobalModelMetricsWrapper; diff --git a/frontend/src/pages/modelServing/screens/metrics/MetricsChart.tsx b/frontend/src/pages/modelServing/screens/metrics/MetricsChart.tsx index 7086409624..954b8c01d9 100644 --- a/frontend/src/pages/modelServing/screens/metrics/MetricsChart.tsx +++ b/frontend/src/pages/modelServing/screens/metrics/MetricsChart.tsx @@ -17,10 +17,13 @@ import { ChartArea, ChartAxis, ChartGroup, + ChartLegendTooltip, ChartLine, + ChartStack, ChartThemeColor, ChartThreshold, ChartVoronoiContainer, + createContainer, getResizeObserver, } from '@patternfly/react-charts'; import { CubesIcon } from '@patternfly/react-icons'; @@ -50,6 +53,7 @@ type MetricsChartProps = { domain?: DomainCalculator; toolbar?: React.ReactElement; type?: MetricsChartTypes; + isStack?: boolean; }; const MetricsChart: React.FC = ({ title, @@ -59,38 +63,52 @@ const MetricsChart: React.FC = ({ domain = defaultDomainCalculator, toolbar, type = MetricsChartTypes.AREA, + isStack = false, }) => { const bodyRef = React.useRef(null); const [chartWidth, setChartWidth] = React.useState(0); const { currentTimeframe, lastUpdateTime } = React.useContext(ModelServingMetricsContext); const metrics = useStableMetrics(unstableMetrics, title); + const CursorVoronoiContainer = React.useMemo(() => createContainer('voronoi', 'cursor'), []); + const [tooltipDisabled, setTooltipDisabled] = React.useState(false); + const [tooltipTitle, setTooltipTitle] = React.useState( + convertTimestamp(Date.now(), formatToShow(currentTimeframe)), + ); const { data: graphLines, maxYValue, minYValue, + maxXValue, + minXValue, } = React.useMemo( () => metrics.reduce( (acc, metric) => { const lineValues = createGraphMetricLine(metric); - const newMaxValue = Math.max(...lineValues.map((v) => v.y)); - const newMinValue = Math.min(...lineValues.map((v) => v.y)); + const newMaxYValue = Math.max(...lineValues.map((v) => v.y)); + const newMinYValue = Math.min(...lineValues.map((v) => v.y)); + const newMaxXValue = Math.max(...lineValues.map((v) => v.x)); + const newMinXValue = Math.min(...lineValues.map((v) => v.x)); return { - data: [...acc.data, lineValues], - maxYValue: Math.max(acc.maxYValue, newMaxValue), - minYValue: Math.min(acc.minYValue, newMinValue), + data: [...acc.data, { points: lineValues, name: metric.name }], + maxYValue: Math.max(acc.maxYValue, newMaxYValue), + minYValue: Math.min(acc.minYValue, newMinYValue), + maxXValue: Math.max(acc.maxXValue, newMaxXValue), + minXValue: Math.min(acc.minXValue, newMinXValue), }; }, - { data: [], maxYValue: 0, minYValue: 0 }, + { data: [], maxYValue: 0, minYValue: 0, maxXValue: 0, minXValue: Date.now() }, ), [metrics], ); const error = metrics.find((line) => line.metric.error)?.metric.error; const isAllLoaded = metrics.every((line) => line.metric.loaded); - const hasSomeData = graphLines.some((line) => line.length > 0); + const hasSomeData = graphLines.some((line) => line.points.length > 0); + + const ChartGroupWrapper = React.useMemo(() => (isStack ? ChartStack : ChartGroup), [isStack]); React.useEffect(() => { const ref = bodyRef.current; @@ -105,14 +123,49 @@ const MetricsChart: React.FC = ({ return () => observer(); }, []); + const handleCursorChange = React.useCallback( + (xValue: number) => { + if (!xValue) { + return; + } + setTooltipTitle(convertTimestamp(xValue, formatToShow(currentTimeframe))); + if (xValue < minXValue || xValue > maxXValue) { + setTooltipDisabled(true); + } else { + setTooltipDisabled(false); + } + }, + [minXValue, currentTimeframe, maxXValue], + ); + let legendProps: Partial> = {}; + let containerComponent; if (metrics.length > 1 && metrics.every(({ name }) => !!name)) { // We don't need a label if there is only one line & we need a name for every item (or it won't align) + const legendData = metrics.map(({ name }) => ({ name, childName: name })); legendProps = { - legendData: metrics.map(({ name }) => ({ name })), + legendData, legendOrientation: 'horizontal', legendPosition: 'bottom-left', }; + containerComponent = ( + (tooltipDisabled ? 'No data' : datum.y)} + labelComponent={} + onCursorChange={handleCursorChange} + mouseFollowTooltips + voronoiDimension="x" + voronoiPadding={50} + /> + ); + } else { + containerComponent = ( + `${datum.name}: ${datum.y}`} + constrainToVisibleArea + /> + ); } return ( @@ -130,12 +183,7 @@ const MetricsChart: React.FC = ({ {hasSomeData ? ( `${datum.name}: ${datum.y}`} - constrainToVisibleArea - /> - } + containerComponent={containerComponent} domain={domain(maxYValue, minYValue)} height={400} width={chartWidth} @@ -152,24 +200,25 @@ const MetricsChart: React.FC = ({ fixLabelOverlap /> - + {graphLines.map((line, i) => { switch (type) { case MetricsChartTypes.AREA: - return ; - break; + return ; case MetricsChartTypes.LINE: - return ; - break; + return ; default: return null; } })} - + {thresholds.map((t) => ( lines.points), + t.value, + )} style={t.color ? { data: { stroke: t.color } } : undefined} name={t.label} /> diff --git a/frontend/src/pages/modelServing/screens/metrics/ModelGraphs.tsx b/frontend/src/pages/modelServing/screens/metrics/ModelGraphs.tsx index a7426535c2..b9463ee192 100644 --- a/frontend/src/pages/modelServing/screens/metrics/ModelGraphs.tsx +++ b/frontend/src/pages/modelServing/screens/metrics/ModelGraphs.tsx @@ -5,14 +5,10 @@ import { ModelMetricType, ModelServingMetricsContext, } from '~/pages/modelServing/screens/metrics/ModelServingMetricsContext'; -import { TimeframeTitle } from '~/pages/modelServing/screens/types'; import { per100 } from './utils'; const ModelGraphs: React.FC = () => { - const { data, currentTimeframe } = React.useContext(ModelServingMetricsContext); - - const inHours = - currentTimeframe === TimeframeTitle.ONE_HOUR || currentTimeframe === TimeframeTitle.ONE_DAY; + const { data } = React.useContext(ModelServingMetricsContext); return ( @@ -27,9 +23,10 @@ const ModelGraphs: React.FC = () => { { name: 'Failed http requests (x100)', metric: data[ModelMetricType.REQUEST_COUNT_FAILED], + translatePoint: per100, }, ]} - title={`Http requests per ${inHours ? 'hour' : 'day'} (x100)`} + title="Http requests (x100)" /> diff --git a/frontend/src/pages/modelServing/screens/metrics/ModelServingMetricsContext.tsx b/frontend/src/pages/modelServing/screens/metrics/ModelServingMetricsContext.tsx index 1e6e898408..b887a5cd06 100644 --- a/frontend/src/pages/modelServing/screens/metrics/ModelServingMetricsContext.tsx +++ b/frontend/src/pages/modelServing/screens/metrics/ModelServingMetricsContext.tsx @@ -7,6 +7,7 @@ import { RefreshIntervalTitle, TimeframeTitle, } from '~/pages/modelServing/screens/types'; +import useCurrentTimeframeBrowserStorage from './useCurrentTimeframeBrowserStorage'; export enum ServerMetricType { AVG_RESPONSE_TIME = 'runtime_avg-response-time', @@ -68,9 +69,7 @@ export const ModelServingMetricsProvider: React.FC { - const [currentTimeframe, setCurrentTimeframe] = React.useState( - TimeframeTitle.ONE_DAY, - ); + const [currentTimeframe, setCurrentTimeframe] = useCurrentTimeframeBrowserStorage(); const [currentRefreshInterval, setCurrentRefreshInterval] = React.useState( RefreshIntervalTitle.FIVE_MINUTES, diff --git a/frontend/src/pages/modelServing/screens/metrics/ServerGraphs.tsx b/frontend/src/pages/modelServing/screens/metrics/ServerGraphs.tsx index deb05ddeb2..f336285570 100644 --- a/frontend/src/pages/modelServing/screens/metrics/ServerGraphs.tsx +++ b/frontend/src/pages/modelServing/screens/metrics/ServerGraphs.tsx @@ -5,14 +5,15 @@ import { ModelServingMetricsContext, ServerMetricType, } from '~/pages/modelServing/screens/metrics/ModelServingMetricsContext'; -import { TimeframeTitle } from '~/pages/modelServing/screens/types'; -import { per100 } from '~/pages/modelServing/screens/metrics/utils'; +import { + convertPrometheusNaNToZero, + per100, + toPercentage, +} from '~/pages/modelServing/screens/metrics/utils'; +import { NamedMetricChartLine } from './types'; const ServerGraphs: React.FC = () => { - const { data, currentTimeframe } = React.useContext(ModelServingMetricsContext); - - const inHours = - currentTimeframe === TimeframeTitle.ONE_HOUR || currentTimeframe === TimeframeTitle.ONE_DAY; + const { data } = React.useContext(ModelServingMetricsContext); return ( @@ -20,28 +21,46 @@ const ServerGraphs: React.FC = () => { ({ + name: line.metric.pod, + metric: { + ...data[ServerMetricType.AVG_RESPONSE_TIME], + data: convertPrometheusNaNToZero(line.values), + }, + }), + )} color="green" title="Average response time (ms)" + isStack /> ({ + y: [0, 100], + })} /> ({ + y: [0, 100], + })} /> diff --git a/frontend/src/pages/modelServing/screens/metrics/types.ts b/frontend/src/pages/modelServing/screens/metrics/types.ts index ce09a759fa..318057ae03 100644 --- a/frontend/src/pages/modelServing/screens/metrics/types.ts +++ b/frontend/src/pages/modelServing/screens/metrics/types.ts @@ -27,9 +27,11 @@ export type GraphMetricPoint = { export type GraphMetricLine = GraphMetricPoint[]; export type ProcessedMetrics = { - data: GraphMetricLine[]; + data: { points: GraphMetricLine; name: string }[]; maxYValue: number; minYValue: number; + maxXValue: number; + minXValue: number; }; export type MetricChartThreshold = { diff --git a/frontend/src/pages/modelServing/screens/metrics/useCurrentTimeframeBrowserStorage.ts b/frontend/src/pages/modelServing/screens/metrics/useCurrentTimeframeBrowserStorage.ts new file mode 100644 index 0000000000..8b08f78c10 --- /dev/null +++ b/frontend/src/pages/modelServing/screens/metrics/useCurrentTimeframeBrowserStorage.ts @@ -0,0 +1,16 @@ +import { useBrowserStorage } from '~/components/browserStorage'; +import { SetBrowserStorageHook } from '~/components/browserStorage/BrowserStorageContext'; +import { TimeframeTitle } from '~/pages/modelServing/screens/types'; + +const useCurrentTimeframeBrowserStorage = (): [ + TimeframeTitle, + SetBrowserStorageHook, +] => + useBrowserStorage( + 'odh.dashboard.metrics.current.timeframe', + TimeframeTitle.ONE_DAY, + false, + true, + ); + +export default useCurrentTimeframeBrowserStorage; diff --git a/frontend/src/pages/modelServing/screens/metrics/utils.tsx b/frontend/src/pages/modelServing/screens/metrics/utils.tsx index 60986805a8..4c298e65fc 100644 --- a/frontend/src/pages/modelServing/screens/metrics/utils.tsx +++ b/frontend/src/pages/modelServing/screens/metrics/utils.tsx @@ -4,7 +4,14 @@ import { BreadcrumbItem, SelectOptionObject } from '@patternfly/react-core'; import { Link } from 'react-router-dom'; import { RefreshIntervalTitle, TimeframeTitle } from '~/pages/modelServing/screens/types'; import { InferenceServiceKind, ServingRuntimeKind } from '~/k8sTypes'; -import { BreadcrumbItemType, DashboardConfig } from '~/types'; +import { BreadcrumbItemType, DashboardConfig, PrometheusQueryRangeResultValue } from '~/types'; +import { BaseMetricRequest, BaseMetricRequestInput, BiasMetricType } from '~/api'; +import { BiasMetricConfig } from '~/concepts/explainability/types'; +import { + BIAS_CHART_CONFIGS, + BIAS_THRESHOLD_COLOR, +} from '~/pages/modelServing/screens/metrics/const'; +import { QueryTimeframeStep } from '~/pages/modelServing/screens/const'; import { BiasSelectOption, DomainCalculator, @@ -14,13 +21,7 @@ import { MetricChartThreshold, NamedMetricChartLine, TranslatePoint, -} from '~/pages/modelServing/screens/metrics/types'; -import { BaseMetricRequest, BaseMetricRequestInput, BiasMetricType } from '~/api'; -import { BiasMetricConfig } from '~/concepts/explainability/types'; -import { - BIAS_CHART_CONFIGS, - BIAS_THRESHOLD_COLOR, -} from '~/pages/modelServing/screens/metrics/const'; +} from './types'; import { ModelMetricType, ServerMetricType } from './ModelServingMetricsContext'; export const isModelMetricsEnabled = ( @@ -35,26 +36,35 @@ export const isModelMetricsEnabled = ( export const getServerMetricsQueries = ( server: ServingRuntimeKind, + currentTimeframe: TimeframeTitle, ): Record => { const namespace = server.metadata.namespace; + const name = server.metadata.name; + const responseTimeStep = QueryTimeframeStep[ServerMetricType.AVG_RESPONSE_TIME][currentTimeframe]; return { - // TODO: Get new queries - [ServerMetricType.REQUEST_COUNT]: `TBD`, - [ServerMetricType.AVG_RESPONSE_TIME]: `rate(modelmesh_api_request_milliseconds_sum{exported_namespace="${namespace}"}[1m])/rate(modelmesh_api_request_milliseconds_count{exported_namespace="${namespace}"}[1m])`, - [ServerMetricType.CPU_UTILIZATION]: `TBD`, - [ServerMetricType.MEMORY_UTILIZATION]: `TBD`, + [ServerMetricType.REQUEST_COUNT]: `sum(increase(modelmesh_api_request_milliseconds_count{namespace="${namespace}",pod=~"modelmesh-serving-${name}-.*"}[${ + QueryTimeframeStep[ServerMetricType.REQUEST_COUNT][currentTimeframe] + }s]))`, + [ServerMetricType.AVG_RESPONSE_TIME]: `increase(modelmesh_api_request_milliseconds_sum{namespace="${namespace}",pod=~"modelmesh-serving-${name}-.*"}[${responseTimeStep}s])/increase(modelmesh_api_request_milliseconds_count{namespace="${namespace}",pod=~"modelmesh-serving-${name}-.*"}[${responseTimeStep}s])`, + [ServerMetricType.CPU_UTILIZATION]: `sum(pod:container_cpu_usage:sum{namespace="${namespace}", pod=~"modelmesh-serving-${name}-.*"})/sum(kube_pod_resource_limit{resource="cpu", pod=~"modelmesh-serving-${name}-.*", namespace="${namespace}"})`, + [ServerMetricType.MEMORY_UTILIZATION]: `sum(container_memory_working_set_bytes{namespace="${namespace}", pod=~"modelmesh-serving-${name}-.*"})/sum(kube_pod_resource_limit{resource="memory", pod=~"modelmesh-serving-${name}-.*", namespace="${namespace}"})`, }; }; export const getModelMetricsQueries = ( model: InferenceServiceKind, + currentTimeframe: TimeframeTitle, ): Record => { const namespace = model.metadata.namespace; const name = model.metadata.name; return { - [ModelMetricType.REQUEST_COUNT_SUCCESS]: `sum(haproxy_backend_http_responses_total{exported_namespace="${namespace}", route="${name}"})`, - [ModelMetricType.REQUEST_COUNT_FAILED]: `sum(haproxy_backend_http_responses_total{exported_namespace="${namespace}", route="${name}"})`, + [ModelMetricType.REQUEST_COUNT_SUCCESS]: `sum(increase(haproxy_backend_http_responses_total{exported_namespace="${namespace}", route="${name}", code="2xx"}[${ + QueryTimeframeStep[ModelMetricType.REQUEST_COUNT_SUCCESS][currentTimeframe] + }s]))`, + [ModelMetricType.REQUEST_COUNT_FAILED]: `sum(increase(haproxy_backend_http_responses_total{exported_namespace="${namespace}", route="${name}", code="4xx|5xx"}[${ + QueryTimeframeStep[ModelMetricType.REQUEST_COUNT_FAILED][currentTimeframe] + }s]))`, [ModelMetricType.TRUSTY_AI_SPD]: `trustyai_spd{model="${name}"}`, [ModelMetricType.TRUSTY_AI_DIR]: `trustyai_dir{model="${name}"}`, }; @@ -96,9 +106,10 @@ export const convertTimestamp = (timestamp: number, show?: 'date' | 'second'): s hour = hour ? hour : 12; const minuteString = minute < 10 ? '0' + minute : minute; const secondString = second < 10 ? '0' + second : second; - return `${show === 'date' ? `${day} ${month} ` : ''}${hour}:${minuteString}${ - show === 'second' ? `:${secondString}` : '' - } ${ampm}`; + if (show === 'date') { + return `${day} ${month}`; + } + return `${hour}:${minuteString}${show === 'second' ? `:${secondString}` : ''} ${ampm}`; }; export const getThresholdData = (data: GraphMetricLine[], threshold: number): GraphMetricLine => @@ -125,7 +136,12 @@ export const formatToShow = (timeframe: TimeframeTitle): 'date' | 'second' | und export const per100: TranslatePoint = (point) => ({ ...point, - y: point.y / 100, + y: Number((point.y / 100).toFixed(2)), +}); + +export const toPercentage: TranslatePoint = (point) => ({ + ...point, + y: point.y * 100, }); export const createGraphMetricLine = ({ @@ -315,3 +331,6 @@ export const convertConfigurationRequestType = ( export const getThresholdDefaultDelta = (metricType?: BiasMetricType) => metricType && BIAS_CHART_CONFIGS[metricType].defaultDelta; + +export const convertPrometheusNaNToZero = (data: PrometheusQueryRangeResultValue[]) => + data.map((value) => [value[0], isNaN(Number(value[1])) ? '0' : value[1]]); diff --git a/frontend/src/pages/modelServing/screens/projects/ProjectModelMetricsWrapper.tsx b/frontend/src/pages/modelServing/screens/projects/ProjectModelMetricsWrapper.tsx index 05900f7992..bc6ae5b43a 100644 --- a/frontend/src/pages/modelServing/screens/projects/ProjectModelMetricsWrapper.tsx +++ b/frontend/src/pages/modelServing/screens/projects/ProjectModelMetricsWrapper.tsx @@ -4,6 +4,7 @@ import { ModelServingMetricsProvider } from '~/pages/modelServing/screens/metric import { getModelMetricsQueries } from '~/pages/modelServing/screens/metrics/utils'; import { PerformanceMetricType } from '~/pages/modelServing/screens/types'; import { InferenceServiceKind, ProjectKind } from '~/k8sTypes'; +import useCurrentTimeframeBrowserStorage from '~/pages/modelServing/screens/metrics/useCurrentTimeframeBrowserStorage'; import ProjectModelMetricsPathWrapper from './ProjectModelMetricsPathWrapper'; export type ProjectModelMetricsOutletContextProps = { @@ -11,17 +12,20 @@ export type ProjectModelMetricsOutletContextProps = { currentProject: ProjectKind; }; -const ProjectModelMetricsWrapper: React.FC = () => ( - - {(model, currentProject) => { - const queries = getModelMetricsQueries(model); - return ( - - - - ); - }} - -); +const ProjectModelMetricsWrapper: React.FC = () => { + const [currentTimeframe] = useCurrentTimeframeBrowserStorage(); + return ( + + {(model, currentProject) => { + const queries = getModelMetricsQueries(model, currentTimeframe); + return ( + + + + ); + }} + + ); +}; export default ProjectModelMetricsWrapper; diff --git a/frontend/src/pages/modelServing/screens/projects/ProjectServerMetricsWrapper.tsx b/frontend/src/pages/modelServing/screens/projects/ProjectServerMetricsWrapper.tsx index 15f82883ff..e698a831f5 100644 --- a/frontend/src/pages/modelServing/screens/projects/ProjectServerMetricsWrapper.tsx +++ b/frontend/src/pages/modelServing/screens/projects/ProjectServerMetricsWrapper.tsx @@ -4,36 +4,40 @@ import { ModelServingMetricsProvider } from '~/pages/modelServing/screens/metric import { getServerMetricsQueries } from '~/pages/modelServing/screens/metrics/utils'; import { getProjectDisplayName } from '~/pages/projects/utils'; import { PerformanceMetricType } from '~/pages/modelServing/screens/types'; +import useCurrentTimeframeBrowserStorage from '~/pages/modelServing/screens/metrics/useCurrentTimeframeBrowserStorage'; import ProjectServerMetricsPathWrapper from './ProjectServerMetricsPathWrapper'; import { getModelServerDisplayName } from './utils'; -const ProjectServerMetricsWrapper: React.FC = () => ( - - {(servingRuntime, currentProject) => { - const queries = getServerMetricsQueries(servingRuntime); - const projectDisplayName = getProjectDisplayName(currentProject); - const serverName = getModelServerDisplayName(servingRuntime); - return ( - - - - ); - }} - -); +const ProjectServerMetricsWrapper: React.FC = () => { + const [currentTimeframe] = useCurrentTimeframeBrowserStorage(); + return ( + + {(servingRuntime, currentProject) => { + const queries = getServerMetricsQueries(servingRuntime, currentTimeframe); + const projectDisplayName = getProjectDisplayName(currentProject); + const serverName = getModelServerDisplayName(servingRuntime); + return ( + + + + ); + }} + + ); +}; export default ProjectServerMetricsWrapper; diff --git a/frontend/src/pages/modelServing/screens/types.ts b/frontend/src/pages/modelServing/screens/types.ts index c6c0e31411..25fcc8d570 100644 --- a/frontend/src/pages/modelServing/screens/types.ts +++ b/frontend/src/pages/modelServing/screens/types.ts @@ -1,5 +1,6 @@ import { EnvVariableDataEntry } from '~/pages/projects/types'; import { ContainerResources } from '~/types'; +import { ModelMetricType, ServerMetricType } from './metrics/ModelServingMetricsContext'; export enum PerformanceMetricType { SERVER = 'server', @@ -20,6 +21,10 @@ export type TimeframeTimeType = { export type TimeframeStepType = TimeframeTimeType; +export type QueryTimeframeStepType = { + [key in ServerMetricType | ModelMetricType]: TimeframeStepType; +}; + export enum RefreshIntervalTitle { FIFTEEN_SECONDS = '15 seconds', THIRTY_SECONDS = '30 seconds',