Skip to content

Commit

Permalink
Xiaoxia/fp v1 (#871)
Browse files Browse the repository at this point in the history
* add FP6-benchmark

* update

* updatefile

* update tpsize

---------

Co-authored-by: Ubuntu <deepspeed@DS-A100-Largedisk.3xui22esprkudm2r30sfjzexrc.bx.internal.cloudapp.net>
  • Loading branch information
xiaoxiawu-microsoft and Ubuntu authored Mar 8, 2024
1 parent 6e9ada6 commit f415ec8
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 1 deletion.
10 changes: 10 additions & 0 deletions benchmarks/inference/mii/run_fp6.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0

# DeepSpeed Team

MODELS=(NousResearch/Llama-2-70b-hf)

for MODEL in ${MODELS[@]}; do
python ./run_benchmark.py --model ${MODEL} --num_requests 128 --stream --backend fastgen --fp6 --tp_size 1
done
6 changes: 5 additions & 1 deletion benchmarks/inference/mii/src/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,17 @@ def start_fastgen_server(args: argparse.Namespace) -> None:
inference_config = RaggedInferenceEngineConfig(
tensor_parallel=tp_config, state_manager=mgr_config
)

if args.fp6:
quantization_mode = 'wf6af16'
else:
quantization_mode = None
mii.serve(
args.model,
deployment_name=args.deployment_name,
tensor_parallel=args.tp_size,
inference_engine_config=inference_config,
replica_num=args.num_replicas,
quantization_mode=quantization_mode
)


Expand Down
3 changes: 3 additions & 0 deletions benchmarks/inference/mii/src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ def parse_args(
parser.add_argument(
"--overwrite_results", action="store_true", help="Overwrite existing results"
)
parser.add_argument(
"--fp6", action="store_true", help="Enable FP6"
)

# Parse arguments
args = parser.parse_args()
Expand Down

0 comments on commit f415ec8

Please sign in to comment.