Skip to content

Commit

Permalink
Merge pull request #21 from arjunsuresh/mlperf-inference
Browse files Browse the repository at this point in the history
Merge
  • Loading branch information
arjunsuresh authored May 22, 2024
2 parents 38941fc + 839d06f commit c35530e
Show file tree
Hide file tree
Showing 12 changed files with 192 additions and 6 deletions.
33 changes: 30 additions & 3 deletions script/app-mlperf-inference-nvidia/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,23 @@ post_deps:

# Variations to customize dependencies
variations:
# MLPerf inference version
v4.0:
group: version
default: true
env:
CM_MLPERF_INFERENCE_VERSION: "v4.0"
CM_MLPERF_GPTJ_MODEL_FP8_PATH_SUFFIX: GPTJ-FP8-quantized
adr:
pytorch:
tags: _for-nvidia-mlperf-inference-v4.0
v3.1:
env:
CM_MLPERF_INFERENCE_VERSION: "v3.1"
CM_MLPERF_GPTJ_MODEL_FP8_PATH_SUFFIX: GPTJ-07142023.pth
adr:
pytorch:
tags: _for-nvidia-mlperf-inference-v3.1
# Target devices
cpu:
group: device
Expand Down Expand Up @@ -369,6 +386,7 @@ variations:
- tags: get,generic-python-lib,_transformers
- tags: get,generic-python-lib,_safetensors
- tags: get,generic-python-lib,_onnx
- tags: get,generic-python-lib,_onnx-graphsurgeon

bert-99:
group: model
Expand Down Expand Up @@ -479,18 +497,25 @@ variations:
deps:
- tags: get,generic-python-lib,_package.datasets
- tags: get,generic-python-lib,_package.simplejson
- tags: get,generic-python-lib,_onnx
- tags: get,generic-python-lib,_transformers
- tags: get,generic-python-lib,_onnx-graphsurgeon
env:
CM_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://cloud.mlcommons.org/index.php/s/QAZ2oM94MkFtbQx/download"

gptj_,build:
deps:
- tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1
- tags: install,pytorch,from.src
names:
- pytorch
- tags: get,cmake
version_min: "3.25.0"

gptj_,build_engine:
deps:
- tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1
- tags: install,pytorch,from.src
names:
- pytorch
- tags: get,cmake
version_min: "3.25.0"

Expand Down Expand Up @@ -880,7 +905,9 @@ variations:

gptj_,run_harness:
deps:
- tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1
- tags: install,pytorch,from.src
names:
- pytorch
- tags: get,cmake
version_min: "3.25.0"
env:
Expand Down
2 changes: 1 addition & 1 deletion script/app-mlperf-inference-nvidia/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def preprocess(i):
cmds.append("make download_data BENCHMARKS='gptj'")

fp32_model_path = os.path.join(env['MLPERF_SCRATCH_PATH'], 'models', 'GPTJ-6B', 'checkpoint-final')
fp8_model_path = os.path.join(env['MLPERF_SCRATCH_PATH'], 'models', 'GPTJ-6B', 'fp8-quantized-ammo', 'GPTJ-07142023.pth')
fp8_model_path = os.path.join(env['MLPERF_SCRATCH_PATH'], 'models', 'GPTJ-6B', 'fp8-quantized-ammo', env['CM_MLPERF_GPTJ_MODEL_FP8_PATH_SUFFIX'])
vocab_path = os.path.join(env['MLPERF_SCRATCH_PATH'], 'models', 'bert', 'vocab.txt')

if not os.path.exists(os.path.dirname(fp32_model_path)):
Expand Down
16 changes: 16 additions & 0 deletions script/authenticate-github-cli/_cm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
alias: authenticate-github-cli
automation_alias: script
automation_uid: 5b4e0237da074764
cache: true
input_mapping:
with_token: CM_GH_AUTH_TOKEN
with-token: CM_GH_AUTH_TOKEN
tags:
- auth
- authenticate
- github
- gh
- cli
uid: 7b57673ac14a4337
deps:
- tags: get,gh,cli
27 changes: 27 additions & 0 deletions script/authenticate-github-cli/customize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from cmind import utils
import os

def preprocess(i):

os_info = i['os_info']

env = i['env']

meta = i['meta']

automation = i['automation']

cmd = "gh auth login"
if env.get('CM_GH_AUTH_TOKEN', '') != '':
cmd = f" echo {env['CM_GH_AUTH_TOKEN']} | {cmd} --with-token"

env['CM_RUN_CMD'] = cmd
quiet = (env.get('CM_QUIET', False) == 'yes')

return {'return':0}

def postprocess(i):

env = i['env']

return {'return':0}
1 change: 1 addition & 0 deletions script/authenticate-github-cli/run.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
rem native script
18 changes: 18 additions & 0 deletions script/authenticate-github-cli/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash

#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH}

#To export any variable
#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out

#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency

echo "Running: "
echo "${CM_RUN_CMD}"
echo ""

if [[ ${CM_FAKE_RUN} != "yes" ]]; then
eval "${CM_RUN_CMD}"
test $? -eq 0 || exit 1
fi

2 changes: 2 additions & 0 deletions script/build-mlperf-inference-server-nvidia/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@ versions:
version: r4.0
nvidia-scratch-space:
tags: _version.4_1
env:
BUILD_TRTLLM: 1
deps:
- tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v4.0
names:
Expand Down
63 changes: 63 additions & 0 deletions script/get-ml-model-gptj/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,13 @@
}
]
},
"fp8": {
"env": {
"CM_ML_MODEL_INPUT_DATA_TYPES": "fp8",
"CM_ML_MODEL_WEIGHT_DATA_TYPES": "fp8"
},
"group": "precision"
},
"int4": {
"env": {
"CM_ML_MODEL_INPUT_DATA_TYPES": "int4",
Expand All @@ -193,6 +200,62 @@
"group": "model-provider",
"default": true
},
"nvidia": {
"default_variations": {
"framework": "pytorch"
},
"group": "model-provider",
"env": {
"CM_TMP_ML_MODEL_PROVIDER": "nvidia"
}
},
"pytorch,nvidia": {
"default_variations": {
"precision": "fp8"
},
"deps": [
{
"tags": "get,git,repo,_repo.https://github.com/NVIDIA/TensorRT-LLM.git,_sha.0ab9d17a59c284d2de36889832fe9fc7c8697604",
"extra_cache_tags": "tensorrt-llm",
"env": {
"CM_GIT_CHECKOUT_PATH_ENV_NAME": "CM_TENSORRT_LLM_CHECKOUT_PATH"
}
},
{
"tags": "get,cuda",
"names": [
"cuda"
]
},
{
"tags": "get,nvidia,scratch,space"
},
{
"tags": "get,cuda-devices"
},
{
"tags": "get,ml-model,gpt-j,_fp32,_pytorch",
"env": {
},
"force_new_env_keys": [
"GPTJ_CHECKPOINT_PATH"
]
},
{
"tags": "get,nvidia,inference,common-code",
"names": [
"nvidia-inference-common-code"
]
},
{
"tags": "get,python3",
"names": [
"python",
"python3"
]
}
]
},
"intel": {
"default_variations": {
"framework": "pytorch"
Expand Down
6 changes: 6 additions & 0 deletions script/get-ml-model-gptj/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ def preprocess(i):
env['INT8_MODEL_DIR'] = os.getcwd()
else:
env['INT4_MODEL_DIR'] = os.getcwd()
elif env.get('CM_TMP_ML_MODEL_PROVIDER', '') == 'nvidia':
i['run_script_input']['script_name'] = 'run-nvidia'
gpu_arch = int(float(env['CM_CUDA_DEVICE_PROP_GPU_COMPUTE_CAPABILITY']) * 10)
env['CM_GPU_ARCH'] = gpu_arch
env['CM_TMP_REQUIRE_DOWNLOAD'] = 'no'

else:
is_saxml = env.get('CM_TMP_MODEL_SAXML','')
if is_saxml == "fp32":
Expand Down
22 changes: 22 additions & 0 deletions script/get-ml-model-gptj/run-nvidia.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

if [[ ! -e ${CM_NVIDIA_MLPERF_SCRATCH_PATH}/models/GPTJ-6B/checkpoint-final ]]; then
cp -r ${GPTJ_CHECKPOINT_PATH} ${CM_NVIDIA_MLPERF_SCRATCH_PATH}/models/GPTJ-6B/
test $? -eq 0 || exit $?
fi

echo "cd ${CM_TENSORRT_LLM_CHECKOUT_PATH}"
cd ${CM_TENSORRT_LLM_CHECKOUT_PATH}

make -C docker build
test $? -eq 0 || exit $?

RUN_CMD="bash -c '${CM_PYTHON_BIN_WITH_PATH} scripts/build_wheel.py -a=${CM_GPU_ARCH} --clean --install --trt_root /usr/local/tensorrt/ && python examples/quantization/quantize.py --dtype=float16 --output_dir=/mnt/models/GPTJ-6B/fp8-quantized-ammo/GPTJ-FP8-quantized --model_dir=/mnt/models/GPTJ-6B/checkpoint-final --qformat=fp8 --kv_cache_dtype=fp8 '"
DOCKER_RUN_ARGS=" -v ${CM_NVIDIA_MLPERF_SCRATCH_PATH}:/mnt"
export DOCKER_RUN_ARGS="$DOCKER_RUN_ARGS"
export RUN_CMD="$RUN_CMD"
make -C docker run LOCAL_USER=1
test $? -eq 0 || exit $?

${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_INFERENCE_NVIDIA_CODE_PATH}/code/gptj/tensorrt/onnx_tune.py --fp8-scalers-path=${CM_NVIDIA_MLPERF_SCRATCH_PATH}/models/GPTJ-6B/fp8-quantized-ammo/GPTJ-FP8-quantized/rank0.safetensors --scaler 1.005 --index 15
test $? -eq 0 || exit $?
4 changes: 3 additions & 1 deletion script/install-llvm-src/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,9 @@
"tags": "get,generic-python-lib,_custom-python,_package.setuptools",
"env": {
"CM_PYTHON_BIN_WITH_PATH": "<<<CM_CONDA_BIN_PATH>>>/python3"
}
},
"version_max": "69.9.999",
"version_max_usable": "58.2.0"
},
{
"tags": "get,generic-python-lib,_custom-python,_package.neural-compressor,_url.git+https://github.com/intel/neural-compressor.git@a2931eaa4052eec195be3c79a13f7bfa23e54473",
Expand Down
4 changes: 3 additions & 1 deletion script/install-pytorch-from-src/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,9 @@
"conda-package",
"setuptools"
],
"tags": "get,generic,conda-package,_package.setuptools,_source.conda-forge"
"tags": "get,generic,conda-package,_package.setuptools,_source.conda-forge",
"version_max": "69.9.999",
"version_max_usable": "58.2.0"
},
{
"names": [
Expand Down

0 comments on commit c35530e

Please sign in to comment.