diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index 58f1ca6cb7..09f010af61 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -242,6 +242,11 @@ variations: docker: base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public + nvidia-original,r4.1_default,_gptj: + docker: + deps: + - tags: get,ml-model,gptj,_nvidia,_fp8 + nvidia-original: docker: interactive: True @@ -309,8 +314,8 @@ variations: - "${{ GPTJ_CHECKPOINT_PATH }}:${{ GPTJ_CHECKPOINT_PATH }}" skip_run_cmd: 'no' shm_size: '32gb' - docker_os: ubuntu - docker_real_run: false + os: ubuntu + real_run: false run: true docker_input_mapping: imagenet_path: IMAGENET_PATH @@ -333,11 +338,6 @@ variations: env: CM_MLPERF_IMPLEMENTATION: intel - intel-original,gptj_: - docker: - deps: - - tags: get,ml-model,gptj - intel-original,gptj_,build-harness: docker: run: false @@ -625,6 +625,9 @@ variations: alias: gptj_ gptj_: + docker: + deps: + - tags: get,ml-model,gptj,raw env: CM_MLPERF_MODEL_EQUAL_ISSUE_MODE: 'yes' posthook_deps: @@ -1272,6 +1275,7 @@ docker: - "${{ CM_DATASET_IMAGENET_PATH }}:${{ CM_DATASET_IMAGENET_PATH }}" - "${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}:${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}" - "${{ CM_MLPERF_INFERENCE_SUBMISSION_DIR }}:${{ CM_MLPERF_INFERENCE_SUBMISSION_DIR }}" + - "${{ GPTJ_CHECKPOINT_PATH }}:${{ GPTJ_CHECKPOINT_PATH }}" - "${{ DLRM_DATA_PATH }}:/home/mlperf_inf_dlrmv2" skip_run_cmd: 'no' shm_size: '32gb' diff --git a/script/build-mlperf-inference-server-nvidia/_cm.yaml b/script/build-mlperf-inference-server-nvidia/_cm.yaml index dc2e2fc5b8..e6bea0a2d2 100644 --- a/script/build-mlperf-inference-server-nvidia/_cm.yaml +++ b/script/build-mlperf-inference-server-nvidia/_cm.yaml @@ -193,6 +193,7 @@ variations: nvidia-scratch-space: tags: _version.4_1 deps: + - tags: get,generic,sys-util,_git-lfs - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v4.0 names: - pytorch diff --git a/script/get-ml-model-gptj/customize.py b/script/get-ml-model-gptj/customize.py index 4a22ff2ec5..4c52200930 100644 --- a/script/get-ml-model-gptj/customize.py +++ b/script/get-ml-model-gptj/customize.py @@ -62,6 +62,8 @@ def postprocess(i): env['CM_ML_MODEL_FILE_WITH_PATH'] = env['GPTJ_SAXML_INT8_CHECKPOINT_PATH'] else: return {'return': 1, 'error': 'pax_gptj_checkpoint generation failed'} + elif env.get('CM_TMP_ML_MODEL_PROVIDER', '') == 'nvidia': + env['CM_ML_MODEL_FILE_WITH_PATH'] = os.path.join(env['CM_NVIDIA_MLPERF_SCRATCH_PATH'], 'models', 'GPTJ-6B', 'fp8-quantized-ammo', 'GPTJ-FP8-quantized') else: env['CM_ML_MODEL_FILE_WITH_PATH'] = env['GPTJ_CHECKPOINT_PATH'] diff --git a/script/get-ml-model-gptj/run-nvidia.sh b/script/get-ml-model-gptj/run-nvidia.sh index b115f7838a..3cd7dd0fa8 100644 --- a/script/get-ml-model-gptj/run-nvidia.sh +++ b/script/get-ml-model-gptj/run-nvidia.sh @@ -11,7 +11,7 @@ cd ${CM_TENSORRT_LLM_CHECKOUT_PATH} make -C docker build test $? -eq 0 || exit $? -RUN_CMD="bash -c '${CM_PYTHON_BIN_WITH_PATH} scripts/build_wheel.py -a=${CM_GPU_ARCH} --clean --install --trt_root /usr/local/tensorrt/ && python examples/quantization/quantize.py --dtype=float16 --output_dir=/mnt/models/GPTJ-6B/fp8-quantized-ammo/GPTJ-FP8-quantized --model_dir=/mnt/models/GPTJ-6B/checkpoint-final --qformat=fp8 --kv_cache_dtype=fp8 '" +RUN_CMD="bash -c 'python3 scripts/build_wheel.py -a=${CM_GPU_ARCH} --clean --install --trt_root /usr/local/tensorrt/ && python examples/quantization/quantize.py --dtype=float16 --output_dir=/mnt/models/GPTJ-6B/fp8-quantized-ammo/GPTJ-FP8-quantized --model_dir=/mnt/models/GPTJ-6B/checkpoint-final --qformat=fp8 --kv_cache_dtype=fp8 '" DOCKER_RUN_ARGS=" -v ${CM_NVIDIA_MLPERF_SCRATCH_PATH}:/mnt" export DOCKER_RUN_ARGS="$DOCKER_RUN_ARGS" export RUN_CMD="$RUN_CMD"