diff --git a/backend/src/routes/api/nim-serving/index.ts b/backend/src/routes/api/nim-serving/index.ts index 5c14cf9463..770209277d 100644 --- a/backend/src/routes/api/nim-serving/index.ts +++ b/backend/src/routes/api/nim-serving/index.ts @@ -3,23 +3,38 @@ import { createCustomError } from '../../../utils/requestUtils'; import { logRequestDetails } from '../../../utils/fileUtils'; const secretNames = ['nvidia-nim-access', 'nvidia-nim-image-pull']; +const configMapName = 'nvidia-nim-images-data'; export default async (fastify: KubeFastifyInstance): Promise => { fastify.get( - '/:secretName', + '/:nimResource', async ( request: OauthFastifyRequest<{ - Params: { secretName: string }; + Params: { nimResource: string }; }>, ) => { logRequestDetails(fastify, request); - const { secretName } = request.params; - if (!secretNames.includes(secretName)) { - throw createCustomError('Not found', 'Secret not found', 404); - } + const { nimResource } = request.params; const { coreV1Api, namespace } = fastify.kube; - return coreV1Api.readNamespacedSecret(secretName, namespace); + if (secretNames.includes(nimResource)) { + try { + return await coreV1Api.readNamespacedSecret(nimResource, namespace); + } catch (e) { + fastify.log.error(`Failed to fetch secret ${nimResource}: ${e.message}`); + throw createCustomError('Not found', 'Secret not found', 404); + } + } + + if (nimResource === configMapName) { + try { + return await coreV1Api.readNamespacedConfigMap(configMapName, namespace); + } catch (e) { + fastify.log.error(`Failed to fetch configMap ${nimResource}: ${e.message}`); + throw createCustomError('Not found', 'ConfigMap not found', 404); + } + } + throw createCustomError('Not found', 'Resource not found', 404); }, ); }; diff --git a/frontend/src/api/k8s/servingRuntimes.ts b/frontend/src/api/k8s/servingRuntimes.ts index 7ecf91c4e1..ce17a6ca21 100644 --- a/frontend/src/api/k8s/servingRuntimes.ts +++ b/frontend/src/api/k8s/servingRuntimes.ts @@ -35,6 +35,7 @@ export const assembleServingRuntime = ( initialAcceleratorProfile?: AcceleratorProfileState, selectedAcceleratorProfile?: AcceleratorProfileSelectFieldState, isModelMesh?: boolean, + nimPVCName?: string, ): ServingRuntimeKind => { const { name: displayName, @@ -133,6 +134,15 @@ export const assembleServingRuntime = ( if (!volumeMounts.find((volumeMount) => volumeMount.mountPath === '/dev/shm')) { volumeMounts.push(getshmVolumeMount()); } + const updatedVolumeMounts = volumeMounts.map((volumeMount) => { + if (volumeMount.name === 'nim-pvc' && nimPVCName) { + return { + ...volumeMount, + name: nimPVCName, + }; + } + return volumeMount; + }); const updatedContainer = { ...container, @@ -145,7 +155,7 @@ export const assembleServingRuntime = ( ...containerWithoutResources, ...(isModelMesh ? { resources } : {}), affinity, - volumeMounts, + volumeMounts: updatedVolumeMounts, }; }, ); @@ -171,8 +181,33 @@ export const assembleServingRuntime = ( volumes.push(getshmVolume('2Gi')); } - updatedServingRuntime.spec.volumes = volumes; + if (nimPVCName) { + const updatedVolumes = volumes.map((volume) => { + if (volume.name === 'nim-pvc') { + return { + ...volume, + name: nimPVCName, + persistentVolumeClaim: { + claimName: nimPVCName, + }, + }; + } + return volume; + }); + + if (!updatedVolumes.find((volume) => volume.name === nimPVCName)) { + updatedVolumes.push({ + name: nimPVCName, + persistentVolumeClaim: { + claimName: nimPVCName, + }, + }); + } + updatedServingRuntime.spec.volumes = updatedVolumes; + } else { + updatedServingRuntime.spec.volumes = volumes; + } return updatedServingRuntime; }; @@ -242,6 +277,7 @@ export const updateServingRuntime = (options: { initialAcceleratorProfile?: AcceleratorProfileState; selectedAcceleratorProfile?: AcceleratorProfileSelectFieldState; isModelMesh?: boolean; + nimPVCName?: string; }): Promise => { const { data, @@ -251,6 +287,7 @@ export const updateServingRuntime = (options: { initialAcceleratorProfile, selectedAcceleratorProfile, isModelMesh, + nimPVCName, } = options; const updatedServingRuntime = assembleServingRuntime( @@ -262,6 +299,7 @@ export const updateServingRuntime = (options: { initialAcceleratorProfile, selectedAcceleratorProfile, isModelMesh, + nimPVCName, ); return k8sUpdateResource( @@ -284,6 +322,7 @@ export const createServingRuntime = (options: { initialAcceleratorProfile?: AcceleratorProfileState; selectedAcceleratorProfile?: AcceleratorProfileSelectFieldState; isModelMesh?: boolean; + nimPVCName?: string; }): Promise => { const { data, @@ -294,6 +333,7 @@ export const createServingRuntime = (options: { initialAcceleratorProfile, selectedAcceleratorProfile, isModelMesh, + nimPVCName, } = options; const assembledServingRuntime = assembleServingRuntime( data, @@ -304,6 +344,7 @@ export const createServingRuntime = (options: { initialAcceleratorProfile, selectedAcceleratorProfile, isModelMesh, + nimPVCName, ); return k8sCreateResource( diff --git a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx index e3d27a932d..14bd9876cc 100644 --- a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx +++ b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx @@ -3,6 +3,7 @@ import { Alert, AlertActionCloseButton, Form, + getUniqueId, Modal, Stack, StackItem, @@ -50,7 +51,6 @@ import { getServingRuntimeFromTemplate } from '~/pages/modelServing/customServin const NIM_SECRET_NAME = 'nvidia-nim-secrets'; const NIM_NGC_SECRET_NAME = 'ngc-secret'; -const NIM_PVC_NAME = 'nim-pvc'; const accessReviewResource: AccessReviewResourceAttributes = { group: 'rbac.authorization.k8s.io', @@ -95,6 +95,7 @@ const DeployNIMServiceModal: React.FC = ({ const isAuthorinoEnabled = useIsAreaAvailable(SupportedArea.K_SERVE_AUTH).status; const currentProjectName = projectContext?.currentProject.metadata.name; const namespace = currentProjectName || createDataInferenceService.project; + const nimPVCName = getUniqueId('nim-pvc'); const [translatedName] = translateDisplayNameForK8sAndReport(createDataInferenceService.name, { maxLength: 253, @@ -202,6 +203,7 @@ const DeployNIMServiceModal: React.FC = ({ projectContext?.currentProject, servingRuntimeName, true, + nimPVCName, ); const submitInferenceServiceResource = getSubmitInferenceServiceResourceFn( @@ -226,7 +228,7 @@ const DeployNIMServiceModal: React.FC = ({ submitInferenceServiceResource({ dryRun: false }), createNIMSecret(namespace, NIM_SECRET_NAME, false, false), createNIMSecret(namespace, NIM_NGC_SECRET_NAME, true, false), - createNIMPVC(namespace, NIM_PVC_NAME, pvcSize, false), + createNIMPVC(namespace, nimPVCName, pvcSize, false), ]), ) .then(() => onSuccess()) diff --git a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection.tsx b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection.tsx index be19b0ef20..5215c8a892 100644 --- a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection.tsx +++ b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection.tsx @@ -32,7 +32,7 @@ const NIMModelListSection: React.FC = ({ useEffect(() => { const getModelNames = async () => { try { - const modelInfos = await fetchNIMModelNames(dashboardNamespace); + const modelInfos = await fetchNIMModelNames(); if (modelInfos && modelInfos.length > 0) { const fetchedOptions = modelInfos.flatMap((modelInfo) => modelInfo.tags.map((tag) => ({ diff --git a/frontend/src/pages/modelServing/screens/projects/__tests__/utils.spec.ts b/frontend/src/pages/modelServing/screens/projects/__tests__/utils.spec.ts index 9797d22418..9ef74de592 100644 --- a/frontend/src/pages/modelServing/screens/projects/__tests__/utils.spec.ts +++ b/frontend/src/pages/modelServing/screens/projects/__tests__/utils.spec.ts @@ -12,20 +12,24 @@ import { import { LabeledDataConnection, ServingPlatformStatuses } from '~/pages/modelServing/screens/types'; import { ServingRuntimePlatform } from '~/types'; import { mockInferenceServiceK8sResource } from '~/__mocks__/mockInferenceServiceK8sResource'; -import { createPvc, createSecret, getConfigMap } from '~/api'; +import { createPvc, createSecret } from '~/api'; import { PersistentVolumeClaimKind } from '~/k8sTypes'; -import { getNGCSecretType, getNIMData } from '~/pages/modelServing/screens/projects/nimUtils'; +import { + getNGCSecretType, + getNIMData, + getNIMResource, +} from '~/pages/modelServing/screens/projects/nimUtils'; jest.mock('~/api', () => ({ getSecret: jest.fn(), createSecret: jest.fn(), - getConfigMap: jest.fn(), createPvc: jest.fn(), })); jest.mock('~/pages/modelServing/screens/projects/nimUtils', () => ({ getNIMData: jest.fn(), getNGCSecretType: jest.fn(), + getNIMResource: jest.fn(), })); describe('filterOutConnectionsWithoutBucket', () => { @@ -312,7 +316,6 @@ describe('createNIMSecret', () => { }); }); describe('fetchNIMModelNames', () => { - const dashboardNamespace = 'test-namespace'; const NIM_CONFIGMAP_NAME = 'nvidia-nim-images-data'; const configMapMock = { @@ -341,11 +344,11 @@ describe('fetchNIMModelNames', () => { }); it('should return model infos when configMap has data', async () => { - (getConfigMap as jest.Mock).mockResolvedValueOnce(configMapMock); + (getNIMResource as jest.Mock).mockResolvedValueOnce(configMapMock); - const result = await fetchNIMModelNames(dashboardNamespace); + const result = await fetchNIMModelNames(); - expect(getConfigMap).toHaveBeenCalledWith(dashboardNamespace, NIM_CONFIGMAP_NAME); + expect(getNIMResource).toHaveBeenCalledWith(NIM_CONFIGMAP_NAME); expect(result).toEqual([ { name: 'model1', @@ -369,20 +372,20 @@ describe('fetchNIMModelNames', () => { }); it('should return undefined if configMap has no data', async () => { - (getConfigMap as jest.Mock).mockResolvedValueOnce({ data: {} }); + (getNIMResource as jest.Mock).mockResolvedValueOnce({ data: {} }); - const result = await fetchNIMModelNames(dashboardNamespace); + const result = await fetchNIMModelNames(); - expect(getConfigMap).toHaveBeenCalledWith(dashboardNamespace, NIM_CONFIGMAP_NAME); + expect(getNIMResource).toHaveBeenCalledWith(NIM_CONFIGMAP_NAME); expect(result).toBeUndefined(); }); it('should return undefined if configMap.data is not defined', async () => { - (getConfigMap as jest.Mock).mockResolvedValueOnce({ data: undefined }); + (getNIMResource as jest.Mock).mockResolvedValueOnce({ data: undefined }); - const result = await fetchNIMModelNames(dashboardNamespace); + const result = await fetchNIMModelNames(); - expect(getConfigMap).toHaveBeenCalledWith(dashboardNamespace, NIM_CONFIGMAP_NAME); + expect(getNIMResource).toHaveBeenCalledWith(NIM_CONFIGMAP_NAME); expect(result).toBeUndefined(); }); }); diff --git a/frontend/src/pages/modelServing/screens/projects/nimUtils.ts b/frontend/src/pages/modelServing/screens/projects/nimUtils.ts index 7051c903f0..a22869194e 100644 --- a/frontend/src/pages/modelServing/screens/projects/nimUtils.ts +++ b/frontend/src/pages/modelServing/screens/projects/nimUtils.ts @@ -9,9 +9,9 @@ const NIM_NGC_SECRET_NAME = 'nvidia-nim-image-pull'; export const getNGCSecretType = (isNGC: boolean): string => isNGC ? 'kubernetes.io/dockerconfigjson' : 'Opaque'; -const getNIMSecretData = async (secretName: string): Promise => { +export const getNIMResource = async (resourceName: string): Promise => { try { - const response = await fetch(`/api/nim-serving/${secretName}`, { + const response = await fetch(`/api/nim-serving/${resourceName}`, { method: 'GET', headers: { 'Content-Type': 'application/json', @@ -21,16 +21,16 @@ const getNIMSecretData = async (secretName: string): Promise => { if (!response.ok) { throw new Error(`Error fetching secret: ${response.statusText}`); } - const secretData = await response.json(); - return secretData.body; + const resourceData = await response.json(); + return resourceData.body; } catch (error) { - throw new Error(`Failed to fetch secret: ${secretName}.`); + throw new Error(`Failed to fetch the resource: ${resourceName}.`); } }; export const getNIMData = async (isNGC: boolean): Promise | undefined> => { const nimSecretData: SecretKind = isNGC - ? await getNIMSecretData(NIM_NGC_SECRET_NAME) - : await getNIMSecretData(NIM_SECRET_NAME); + ? await getNIMResource(NIM_NGC_SECRET_NAME) + : await getNIMResource(NIM_SECRET_NAME); if (!nimSecretData.data) { throw new Error(`Error retrieving NIM ${isNGC ? 'NGC' : ''} secret data`); diff --git a/frontend/src/pages/modelServing/screens/projects/utils.ts b/frontend/src/pages/modelServing/screens/projects/utils.ts index c6ffa63a31..6cd0c9dd6b 100644 --- a/frontend/src/pages/modelServing/screens/projects/utils.ts +++ b/frontend/src/pages/modelServing/screens/projects/utils.ts @@ -40,7 +40,6 @@ import { createPvc, createSecret, createServingRuntime, - getConfigMap, updateInferenceService, updateServingRuntime, } from '~/api'; @@ -48,7 +47,11 @@ import { isDataConnectionAWS } from '~/pages/projects/screens/detail/data-connec import { removeLeadingSlash } from '~/utilities/string'; import { RegisteredModelDeployInfo } from '~/pages/modelRegistry/screens/RegisteredModels/useRegisteredModelDeployInfo'; import { AcceleratorProfileSelectFieldState } from '~/pages/notebookController/screens/server/AcceleratorProfileSelectField'; -import { getNGCSecretType, getNIMData } from '~/pages/modelServing/screens/projects/nimUtils'; +import { + getNGCSecretType, + getNIMData, + getNIMResource, +} from '~/pages/modelServing/screens/projects/nimUtils'; const NIM_CONFIGMAP_NAME = 'nvidia-nim-images-data'; @@ -449,6 +452,7 @@ export const getSubmitServingRuntimeResourcesFn = ( currentProject?: ProjectKind, name?: string, isModelMesh?: boolean, + nimPVCName?: string, ): ((opts: { dryRun?: boolean }) => Promise) => { if (!servingRuntimeSelected) { return () => @@ -498,6 +502,7 @@ export const getSubmitServingRuntimeResourcesFn = ( selectedAcceleratorProfile: controlledState, initialAcceleratorProfile, isModelMesh, + nimPVCName, }), setUpTokenAuth( servingRuntimeData, @@ -524,6 +529,7 @@ export const getSubmitServingRuntimeResourcesFn = ( selectedAcceleratorProfile: controlledState, initialAcceleratorProfile, isModelMesh, + nimPVCName, }).then((servingRuntime) => setUpTokenAuth( servingRuntimeData, @@ -579,10 +585,8 @@ export interface ModelInfo { updatedDate: string; } -export const fetchNIMModelNames = async ( - dashboardNamespace: string, -): Promise => { - const configMap = await getConfigMap(dashboardNamespace, NIM_CONFIGMAP_NAME); +export const fetchNIMModelNames = async (): Promise => { + const configMap = await getNIMResource(NIM_CONFIGMAP_NAME); if (configMap.data && Object.keys(configMap.data).length > 0) { const modelInfos: ModelInfo[] = []; for (const [key, value] of Object.entries(configMap.data)) {