Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP Merge 'f/mserving-metrics' into 'incubation' #1504

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 19 additions & 7 deletions backend/src/routes/api/prometheus/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ import {
PrometheusQueryResponse,
QueryType,
} from '../../../types';
import { callPrometheusThanos, callPrometheusServing } from '../../../utils/prometheusUtils';
import { callPrometheusThanos } from '../../../utils/prometheusUtils';
import { createCustomError } from '../../../utils/requestUtils';
import { logRequestDetails } from '../../../utils/fileUtils';
import { THANOS_DEFAULT_OAUTH_PORT } from '../../../utils/constants';

const handleError = (e: createError.HttpError) => {
if (e?.code) {
Expand Down Expand Up @@ -36,7 +37,9 @@ module.exports = async (fastify: KubeFastifyInstance) => {
): Promise<{ code: number; response: PrometheusQueryResponse }> => {
const { query } = request.body;

return callPrometheusThanos(fastify, request, query).catch(handleError);
return callPrometheusThanos<PrometheusQueryResponse>(fastify, request, query).catch(
handleError,
);
},
);

Expand All @@ -46,12 +49,16 @@ module.exports = async (fastify: KubeFastifyInstance) => {
request: OauthFastifyRequest<{
Body: { query: string };
}>,
): Promise<{ code: number; response: PrometheusQueryResponse }> => {
): Promise<{ code: number; response: PrometheusQueryRangeResponse }> => {
const { query } = request.body;

return callPrometheusThanos(fastify, request, query, QueryType.QUERY_RANGE).catch(
handleError,
);
return callPrometheusThanos<PrometheusQueryRangeResponse>(
fastify,
request,
query,
QueryType.QUERY_RANGE,
THANOS_DEFAULT_OAUTH_PORT,
).catch(handleError);
},
);

Expand All @@ -65,7 +72,12 @@ module.exports = async (fastify: KubeFastifyInstance) => {
logRequestDetails(fastify, request);
const { query } = request.body;

return callPrometheusServing(fastify, request, query).catch(handleError);
return callPrometheusThanos<PrometheusQueryRangeResponse>(
fastify,
request,
query,
QueryType.QUERY_RANGE,
).catch(handleError);
},
);
};
3 changes: 3 additions & 0 deletions backend/src/utils/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,6 @@ export const DEFAULT_NOTEBOOK_SIZES: NotebookSize[] = [

export const imageUrlRegex =
/^([\w.\-_]+((?::\d+|)(?=\/[a-z0-9._-]+\/[a-z0-9._-]+))|)(?:\/|)([a-z0-9.\-_]+(?:\/[a-z0-9.\-_]+|))(?::([\w.\-_]{1,127})|)/;

export const THANOS_DEFAULT_RBAC_PORT = '9092';
export const THANOS_DEFAULT_OAUTH_PORT = '9091';
12 changes: 6 additions & 6 deletions backend/src/utils/prometheusUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@ import {
KubeFastifyInstance,
OauthFastifyRequest,
PrometheusQueryRangeResponse,
PrometheusQueryResponse,
QueryType,
} from '../types';
import { DEV_MODE } from './constants';
import { DEV_MODE, THANOS_DEFAULT_RBAC_PORT } from './constants';
import { getNamespaces } from './notebookUtils';
import { getDashboardConfig } from './resourceUtils';
import { createCustomError } from './requestUtils';
Expand Down Expand Up @@ -84,17 +83,18 @@ const generatePrometheusHostURL = (
return `https://${instanceName}.${namespace}.svc.cluster.local:${port}`;
};

export const callPrometheusThanos = (
export const callPrometheusThanos = <T>(
fastify: KubeFastifyInstance,
request: OauthFastifyRequest,
query: string,
queryType: QueryType = QueryType.QUERY,
): Promise<{ code: number; response: PrometheusQueryResponse }> =>
callPrometheus(
port = THANOS_DEFAULT_RBAC_PORT,
): Promise<{ code: number; response: T }> =>
callPrometheus<T>(
fastify,
request,
query,
generatePrometheusHostURL(fastify, 'thanos-querier', 'openshift-monitoring', '9092'),
generatePrometheusHostURL(fastify, 'thanos-querier', 'openshift-monitoring', port),
queryType,
);

Expand Down
27 changes: 18 additions & 9 deletions frontend/src/api/prometheus/serving.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import {
import useBiasMetricsEnabled from '~/concepts/explainability/useBiasMetricsEnabled';
import { ResponsePredicate } from '~/api/prometheus/usePrometheusQueryRange';
import useRefreshInterval from '~/utilities/useRefreshInterval';
import { RefreshIntervalValue } from '~/pages/modelServing/screens/const';
import { QueryTimeframeStep, RefreshIntervalValue } from '~/pages/modelServing/screens/const';
import usePerformanceMetricsEnabled from '~/pages/modelServing/screens/metrics/usePerformanceMetricsEnabled';
import useQueryRangeResourceData from './useQueryRangeResourceData';

Expand Down Expand Up @@ -45,29 +45,33 @@ export const useModelServingMetrics = (

const trustyResponsePredicate = React.useCallback<
ResponsePredicate<PrometheusQueryRangeResponseDataResult>
>((data) => data.result, []);
>((data) => data.result || [], []);

const serverRequestCount = useQueryRangeResourceData(
performanceMetricsEnabled && type === PerformanceMetricType.SERVER,
queries[ServerMetricType.REQUEST_COUNT],
end,
timeframe,
QueryTimeframeStep[ServerMetricType.REQUEST_COUNT],
defaultResponsePredicate,
);

const serverAverageResponseTime = useQueryRangeResourceData(
performanceMetricsEnabled && type === PerformanceMetricType.SERVER,
queries[ServerMetricType.AVG_RESPONSE_TIME],
end,
timeframe,
defaultResponsePredicate,
);
const serverAverageResponseTime =
useQueryRangeResourceData<PrometheusQueryRangeResponseDataResult>(
performanceMetricsEnabled && type === PerformanceMetricType.SERVER,
queries[ServerMetricType.AVG_RESPONSE_TIME],
end,
timeframe,
QueryTimeframeStep[ServerMetricType.AVG_RESPONSE_TIME],
trustyResponsePredicate,
);

const serverCPUUtilization = useQueryRangeResourceData(
performanceMetricsEnabled && type === PerformanceMetricType.SERVER,
queries[ServerMetricType.CPU_UTILIZATION],
end,
timeframe,
QueryTimeframeStep[ServerMetricType.CPU_UTILIZATION],
defaultResponsePredicate,
);

Expand All @@ -76,6 +80,7 @@ export const useModelServingMetrics = (
queries[ServerMetricType.MEMORY_UTILIZATION],
end,
timeframe,
QueryTimeframeStep[ServerMetricType.MEMORY_UTILIZATION],
defaultResponsePredicate,
);

Expand All @@ -84,6 +89,7 @@ export const useModelServingMetrics = (
queries[ModelMetricType.REQUEST_COUNT_SUCCESS],
end,
timeframe,
QueryTimeframeStep[ModelMetricType.REQUEST_COUNT_SUCCESS],
defaultResponsePredicate,
);

Expand All @@ -92,6 +98,7 @@ export const useModelServingMetrics = (
queries[ModelMetricType.REQUEST_COUNT_FAILED],
end,
timeframe,
QueryTimeframeStep[ModelMetricType.REQUEST_COUNT_FAILED],
defaultResponsePredicate,
);

Expand All @@ -100,6 +107,7 @@ export const useModelServingMetrics = (
queries[ModelMetricType.TRUSTY_AI_SPD],
end,
timeframe,
QueryTimeframeStep[ModelMetricType.TRUSTY_AI_SPD],
trustyResponsePredicate,
'/api/prometheus/bias',
);
Expand All @@ -109,6 +117,7 @@ export const useModelServingMetrics = (
queries[ModelMetricType.TRUSTY_AI_DIR],
end,
timeframe,
QueryTimeframeStep[ModelMetricType.TRUSTY_AI_DIR],
trustyResponsePredicate,
'/api/prometheus/bias',
);
Expand Down
7 changes: 4 additions & 3 deletions frontend/src/api/prometheus/useQueryRangeResourceData.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { TimeframeStep, TimeframeTimeRange } from '~/pages/modelServing/screens/const';
import { TimeframeTimeRange } from '~/pages/modelServing/screens/const';
import { ContextResourceData, PrometheusQueryRangeResultValue } from '~/types';
import useRestructureContextResourceData from '~/utilities/useRestructureContextResourceData';
import { TimeframeTitle } from '~/pages/modelServing/screens/types';
import { TimeframeStepType, TimeframeTitle } from '~/pages/modelServing/screens/types';
import usePrometheusQueryRange, { ResponsePredicate } from './usePrometheusQueryRange';

const useQueryRangeResourceData = <T = PrometheusQueryRangeResultValue>(
Expand All @@ -10,6 +10,7 @@ const useQueryRangeResourceData = <T = PrometheusQueryRangeResultValue>(
query: string,
end: number,
timeframe: TimeframeTitle,
timeframeStep: TimeframeStepType,
responsePredicate: ResponsePredicate<T>,
apiPath = '/api/prometheus/serving',
): ContextResourceData<T> =>
Expand All @@ -20,7 +21,7 @@ const useQueryRangeResourceData = <T = PrometheusQueryRangeResultValue>(
query,
TimeframeTimeRange[timeframe],
end,
TimeframeStep[timeframe],
timeframeStep[timeframe],
responsePredicate,
),
);
Expand Down
24 changes: 22 additions & 2 deletions frontend/src/pages/modelServing/screens/const.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import { ModelMetricType, ServerMetricType } from './metrics/ModelServingMetricsContext';
import {
QueryTimeframeStepType,
RefreshIntervalTitle,
RefreshIntervalValueType,
ServingRuntimeSize,
Expand Down Expand Up @@ -72,7 +74,7 @@ export const TimeframeTimeRange: TimeframeTimeType = {
[TimeframeTitle.ONE_HOUR]: 60 * 60,
[TimeframeTitle.ONE_DAY]: 24 * 60 * 60,
[TimeframeTitle.ONE_WEEK]: 7 * 24 * 60 * 60,
[TimeframeTitle.ONE_MONTH]: 30 * 7 * 24 * 60 * 60,
[TimeframeTitle.ONE_MONTH]: 30 * 24 * 60 * 60,
// [TimeframeTitle.UNLIMITED]: 0,
};

Expand All @@ -84,14 +86,32 @@ export const TimeframeTimeRange: TimeframeTimeType = {
* 24h * 60m * 60s => 86,400 seconds of space
* 86,400 / (24 * 12) => 300 points of prometheus data
*/
export const TimeframeStep: TimeframeStepType = {
const TimeframeStep: TimeframeStepType = {
[TimeframeTitle.ONE_HOUR]: 12,
[TimeframeTitle.ONE_DAY]: 24 * 12,
[TimeframeTitle.ONE_WEEK]: 7 * 24 * 12,
[TimeframeTitle.ONE_MONTH]: 30 * 24 * 12,
// [TimeframeTitle.UNLIMITED]: 30 * 7 * 24 * 12, // TODO: determine if we "zoom out" more
};

const TimeframeStepForRequestCountAndAverageTime = {
[TimeframeTitle.ONE_HOUR]: 5 * 60,
[TimeframeTitle.ONE_DAY]: 60 * 60,
[TimeframeTitle.ONE_WEEK]: 12 * 60 * 60,
[TimeframeTitle.ONE_MONTH]: 24 * 60 * 60,
};

export const QueryTimeframeStep: QueryTimeframeStepType = {
[ServerMetricType.REQUEST_COUNT]: TimeframeStepForRequestCountAndAverageTime,
[ServerMetricType.AVG_RESPONSE_TIME]: TimeframeStepForRequestCountAndAverageTime,
[ServerMetricType.CPU_UTILIZATION]: TimeframeStep,
[ServerMetricType.MEMORY_UTILIZATION]: TimeframeStep,
[ModelMetricType.REQUEST_COUNT_FAILED]: TimeframeStepForRequestCountAndAverageTime,
[ModelMetricType.REQUEST_COUNT_SUCCESS]: TimeframeStepForRequestCountAndAverageTime,
[ModelMetricType.TRUSTY_AI_DIR]: TimeframeStep,
[ModelMetricType.TRUSTY_AI_SPD]: TimeframeStep,
};

export const RefreshIntervalValue: RefreshIntervalValueType = {
[RefreshIntervalTitle.FIFTEEN_SECONDS]: 15 * 1000,
[RefreshIntervalTitle.THIRTY_SECONDS]: 30 * 1000,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,27 @@ import { InferenceServiceKind } from '~/k8sTypes';
import ModelMetricsPathWrapper from './ModelMetricsPathWrapper';
import { ModelServingMetricsProvider } from './ModelServingMetricsContext';
import { getModelMetricsQueries } from './utils';
import useCurrentTimeframeBrowserStorage from './useCurrentTimeframeBrowserStorage';

export type GlobalModelMetricsOutletContextProps = {
model: InferenceServiceKind;
projectName: string;
};

const GlobalModelMetricsWrapper: React.FC = () => (
<ModelMetricsPathWrapper>
{(model, projectName) => {
const queries = getModelMetricsQueries(model);
return (
<ModelServingMetricsProvider queries={queries} type={PerformanceMetricType.MODEL}>
<Outlet context={{ model, projectName }} />
</ModelServingMetricsProvider>
);
}}
</ModelMetricsPathWrapper>
);
const GlobalModelMetricsWrapper: React.FC = () => {
const [currentTimeframe] = useCurrentTimeframeBrowserStorage();
return (
<ModelMetricsPathWrapper>
{(model, projectName) => {
const queries = getModelMetricsQueries(model, currentTimeframe);
return (
<ModelServingMetricsProvider queries={queries} type={PerformanceMetricType.MODEL}>
<Outlet context={{ model, projectName }} />
</ModelServingMetricsProvider>
);
}}
</ModelMetricsPathWrapper>
);
};

export default GlobalModelMetricsWrapper;
Loading
Loading