From 84c2231ba1a4366ffe5226298da851763511c603 Mon Sep 17 00:00:00 2001 From: Gage Krumbach Date: Fri, 28 Jul 2023 08:27:31 -0500 Subject: [PATCH 1/2] added accelerator detection --- .../api/accelerators/acceleratorUtils.ts | 46 +++++++++++++++++++ backend/src/routes/api/accelerators/index.ts | 11 +++++ backend/src/routes/api/gpu/gpuUtils.ts | 3 ++ backend/src/routes/api/gpu/index.ts | 3 ++ backend/src/types.ts | 8 ++++ 5 files changed, 71 insertions(+) create mode 100644 backend/src/routes/api/accelerators/acceleratorUtils.ts create mode 100644 backend/src/routes/api/accelerators/index.ts diff --git a/backend/src/routes/api/accelerators/acceleratorUtils.ts b/backend/src/routes/api/accelerators/acceleratorUtils.ts new file mode 100644 index 0000000000..7ea0068e0e --- /dev/null +++ b/backend/src/routes/api/accelerators/acceleratorUtils.ts @@ -0,0 +1,46 @@ +import { AcceleratorInfo, KubeFastifyInstance } from "../../../types" + +const RESOURCE_TYPES = ["cpu", "memory", "pods", "ephemeral-storage", "hugepages-1Gi", "hugepages-2Mi"] + +const getIdentifiersFromResources = (resources: {[key: string]: string} = {}) => { + return Object.entries(resources) + .filter(([key,]) => !RESOURCE_TYPES.includes(key)) + .reduce<{[key: string]: number}>((identifiers, [key, value]) => { + identifiers[key] = isNaN(parseInt(value)) ? 0 : parseInt(value) + return identifiers + }, {}) +} + +export const getAcceleratorNumbers = async (fastify: KubeFastifyInstance): Promise => ( + fastify.kube.coreV1Api.listNode() + .then((res) => res.body.items.reduce((info, node) => { + // reduce resources down to just the accelerators and their counts + const allocatable = getIdentifiersFromResources(node.status.allocatable) + const capacity = getIdentifiersFromResources(node.status.capacity) + + // update the max count for each accelerator + Object.entries(allocatable).forEach(([key, value]) => ( + info.available[key] = Math.max((info.available[key] || 0), value) + )) + + // update the total count for each accelerator + Object.entries(capacity).forEach(([key, value]) => ( + info.total[key] = (info.total[key] || 0) + value + )) + + + // update the allocated count for each accelerator + Object.entries(capacity).forEach(([key, value]) => ( + info.allocated[key] = (info.allocated[key] || 0) + value - (allocatable[key] || 0) + )) + + // if any accelerators are available, the cluster is configured + const configured = info.configured || Object.values(info.available).some((value) => value > 0) + + return {total: info.total, available: info.available, allocated: info.allocated, configured} + }, {configured: false, available: {}, total: {}, allocated: {}})) + .catch((e) => { + fastify.log.error(`Exception when listing cluster nodes: ${e}`); + return {configured: false, available: {}, total: {}, allocated: {}} + }) +) diff --git a/backend/src/routes/api/accelerators/index.ts b/backend/src/routes/api/accelerators/index.ts new file mode 100644 index 0000000000..16d651ad6d --- /dev/null +++ b/backend/src/routes/api/accelerators/index.ts @@ -0,0 +1,11 @@ +import { KubeFastifyInstance, OauthFastifyRequest } from '../../../types'; +import { getAcceleratorNumbers } from './acceleratorUtils'; +import { logRequestDetails } from '../../../utils/fileUtils'; + +export default async (fastify: KubeFastifyInstance): Promise => { + fastify.get('/', async (request: OauthFastifyRequest) => { + logRequestDetails(fastify, request); + + return getAcceleratorNumbers(fastify); + }); +}; diff --git a/backend/src/routes/api/gpu/gpuUtils.ts b/backend/src/routes/api/gpu/gpuUtils.ts index e70cad3ece..fc2b74d0dd 100644 --- a/backend/src/routes/api/gpu/gpuUtils.ts +++ b/backend/src/routes/api/gpu/gpuUtils.ts @@ -16,6 +16,9 @@ const storage: { lastFetch: number; lastValue: GPUInfo } = { lastFetch: 0, }; +/** + * @deprecated - use getAcceleratorNumbers instead + */ export const getGPUNumber = async (fastify: KubeFastifyInstance): Promise => { if (storage.lastFetch >= Date.now() - 30_000) { fastify.log.info(`Returning cached gpu value (${JSON.stringify(storage)})`); diff --git a/backend/src/routes/api/gpu/index.ts b/backend/src/routes/api/gpu/index.ts index 5d91bb04c3..dc7068851c 100644 --- a/backend/src/routes/api/gpu/index.ts +++ b/backend/src/routes/api/gpu/index.ts @@ -2,6 +2,9 @@ import { KubeFastifyInstance, OauthFastifyRequest } from '../../../types'; import { getGPUNumber } from './gpuUtils'; import { logRequestDetails } from '../../../utils/fileUtils'; +/** + * @deprecated - use accelerators instead + */ export default async (fastify: KubeFastifyInstance): Promise => { fastify.get('/', async (request: OauthFastifyRequest) => { logRequestDetails(fastify, request); diff --git a/backend/src/types.ts b/backend/src/types.ts index 3c85ad8afa..57ae0bd50e 100644 --- a/backend/src/types.ts +++ b/backend/src/types.ts @@ -748,6 +748,14 @@ export type GPUInfo = { available: number; autoscalers: gpuScale[]; }; + +export type AcceleratorInfo = { + configured: boolean; + available: {[key: string]: number}; + total: {[key: string]: number}; + allocated: {[key: string]: number}; +} + export type EnvironmentVariable = EitherNotBoth< { value: string | number }, { valueFrom: Record } From ab07f22480a547c0708ddbc358670584557fde67 Mon Sep 17 00:00:00 2001 From: Gage Krumbach Date: Tue, 1 Aug 2023 16:48:20 -0500 Subject: [PATCH 2/2] added more resource types --- backend/src/routes/api/accelerators/acceleratorUtils.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/routes/api/accelerators/acceleratorUtils.ts b/backend/src/routes/api/accelerators/acceleratorUtils.ts index 7ea0068e0e..09ae2ddba6 100644 --- a/backend/src/routes/api/accelerators/acceleratorUtils.ts +++ b/backend/src/routes/api/accelerators/acceleratorUtils.ts @@ -1,6 +1,6 @@ import { AcceleratorInfo, KubeFastifyInstance } from "../../../types" -const RESOURCE_TYPES = ["cpu", "memory", "pods", "ephemeral-storage", "hugepages-1Gi", "hugepages-2Mi"] +const RESOURCE_TYPES = ["cpu", "memory", "pods", "ephemeral-storage", "hugepages-1Gi", "hugepages-2Mi", "attachable-volumes-aws-ebs"] const getIdentifiersFromResources = (resources: {[key: string]: string} = {}) => { return Object.entries(resources)