Skip to content

Commit

Permalink
Merge pull request #24 from arjunsuresh/mlperf-inference
Browse files Browse the repository at this point in the history
Fixes for nvidia mlperf inference gptj
  • Loading branch information
arjunsuresh authored May 24, 2024
2 parents b924dea + 37adb64 commit 2719254
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 8 deletions.
18 changes: 11 additions & 7 deletions script/app-mlperf-inference/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,11 @@ variations:
docker:
base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public

nvidia-original,r4.1_default,_gptj:
docker:
deps:
- tags: get,ml-model,gptj,_nvidia,_fp8

nvidia-original:
docker:
interactive: True
Expand Down Expand Up @@ -309,8 +314,8 @@ variations:
- "${{ GPTJ_CHECKPOINT_PATH }}:${{ GPTJ_CHECKPOINT_PATH }}"
skip_run_cmd: 'no'
shm_size: '32gb'
docker_os: ubuntu
docker_real_run: false
os: ubuntu
real_run: false
run: true
docker_input_mapping:
imagenet_path: IMAGENET_PATH
Expand All @@ -333,11 +338,6 @@ variations:
env:
CM_MLPERF_IMPLEMENTATION: intel

intel-original,gptj_:
docker:
deps:
- tags: get,ml-model,gptj

intel-original,gptj_,build-harness:
docker:
run: false
Expand Down Expand Up @@ -625,6 +625,9 @@ variations:
alias: gptj_

gptj_:
docker:
deps:
- tags: get,ml-model,gptj,raw
env:
CM_MLPERF_MODEL_EQUAL_ISSUE_MODE: 'yes'
posthook_deps:
Expand Down Expand Up @@ -1272,6 +1275,7 @@ docker:
- "${{ CM_DATASET_IMAGENET_PATH }}:${{ CM_DATASET_IMAGENET_PATH }}"
- "${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}:${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}"
- "${{ CM_MLPERF_INFERENCE_SUBMISSION_DIR }}:${{ CM_MLPERF_INFERENCE_SUBMISSION_DIR }}"
- "${{ GPTJ_CHECKPOINT_PATH }}:${{ GPTJ_CHECKPOINT_PATH }}"
- "${{ DLRM_DATA_PATH }}:/home/mlperf_inf_dlrmv2"
skip_run_cmd: 'no'
shm_size: '32gb'
Expand Down
1 change: 1 addition & 0 deletions script/build-mlperf-inference-server-nvidia/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ variations:
nvidia-scratch-space:
tags: _version.4_1
deps:
- tags: get,generic,sys-util,_git-lfs
- tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v4.0
names:
- pytorch
Expand Down
2 changes: 2 additions & 0 deletions script/get-ml-model-gptj/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ def postprocess(i):
env['CM_ML_MODEL_FILE_WITH_PATH'] = env['GPTJ_SAXML_INT8_CHECKPOINT_PATH']
else:
return {'return': 1, 'error': 'pax_gptj_checkpoint generation failed'}
elif env.get('CM_TMP_ML_MODEL_PROVIDER', '') == 'nvidia':
env['CM_ML_MODEL_FILE_WITH_PATH'] = os.path.join(env['CM_NVIDIA_MLPERF_SCRATCH_PATH'], 'models', 'GPTJ-6B', 'fp8-quantized-ammo', 'GPTJ-FP8-quantized')
else:
env['CM_ML_MODEL_FILE_WITH_PATH'] = env['GPTJ_CHECKPOINT_PATH']

Expand Down
2 changes: 1 addition & 1 deletion script/get-ml-model-gptj/run-nvidia.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ cd ${CM_TENSORRT_LLM_CHECKOUT_PATH}
make -C docker build
test $? -eq 0 || exit $?

RUN_CMD="bash -c '${CM_PYTHON_BIN_WITH_PATH} scripts/build_wheel.py -a=${CM_GPU_ARCH} --clean --install --trt_root /usr/local/tensorrt/ && python examples/quantization/quantize.py --dtype=float16 --output_dir=/mnt/models/GPTJ-6B/fp8-quantized-ammo/GPTJ-FP8-quantized --model_dir=/mnt/models/GPTJ-6B/checkpoint-final --qformat=fp8 --kv_cache_dtype=fp8 '"
RUN_CMD="bash -c 'python3 scripts/build_wheel.py -a=${CM_GPU_ARCH} --clean --install --trt_root /usr/local/tensorrt/ && python examples/quantization/quantize.py --dtype=float16 --output_dir=/mnt/models/GPTJ-6B/fp8-quantized-ammo/GPTJ-FP8-quantized --model_dir=/mnt/models/GPTJ-6B/checkpoint-final --qformat=fp8 --kv_cache_dtype=fp8 '"
DOCKER_RUN_ARGS=" -v ${CM_NVIDIA_MLPERF_SCRATCH_PATH}:/mnt"
export DOCKER_RUN_ARGS="$DOCKER_RUN_ARGS"
export RUN_CMD="$RUN_CMD"
Expand Down

0 comments on commit 2719254

Please sign in to comment.