Skip to content

Commit 63a82c6

Browse files
committed
Enable max_model_len when when initializing VLLM engine
1 parent d656e81 commit 63a82c6

7 files changed

+25
-12
lines changed

configs/models/mistral/vllm_mixtral_8x7b_instruct_v0_1.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,18 @@
88
dict(role='BOT', begin='', end='</s>', generate=True),
99
],
1010
)
11+
max_seq_len = 2048
1112

1213
models = [
1314
dict(
1415
type=VLLM,
1516
abbr='mixtral-8x7b-instruct-v0.1-vllm',
1617
path='mistralai/Mixtral-8x7B-Instruct-v0.1',
17-
model_kwargs=dict(tensor_parallel_size=2),
18+
# more vllm model_kwargs: https://github.com/vllm-project/vllm/blob/main/vllm/engine/arg_utils.py
19+
model_kwargs=dict(tensor_parallel_size=2, max_model_len=max_seq_len),
1820
meta_template=_meta_template,
1921
max_out_len=100,
20-
max_seq_len=2048,
22+
max_seq_len=max_seq_len,
2123
batch_size=32,
2224
generation_kwargs=dict(temperature=0),
2325
stop_words=['</s>'],

configs/models/qwen/vllm_qwen1_5_14b_chat.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,18 @@
77
dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
88
],
99
)
10+
max_seq_len = 2048
1011

1112
models = [
1213
dict(
1314
type=VLLM,
1415
abbr='qwen1.5-14b-chat-vllm',
1516
path='Qwen/Qwen1.5-14B-Chat',
16-
model_kwargs=dict(tensor_parallel_size=2),
17+
# more vllm model_kwargs: https://github.com/vllm-project/vllm/blob/main/vllm/engine/arg_utils.py
18+
model_kwargs=dict(tensor_parallel_size=2, max_model_len=max_seq_len),
1719
meta_template=_meta_template,
1820
max_out_len=100,
19-
max_seq_len=2048,
21+
max_seq_len=max_seq_len,
2022
batch_size=32,
2123
generation_kwargs=dict(temperature=0),
2224
stop_words=['<|im_end|>'],

configs/models/qwen/vllm_qwen1_5_72b.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
from opencompass.models import VLLM
22

3+
max_seq_len = 2048
34

45
models = [
56
dict(
67
type=VLLM,
78
abbr='qwen1.5-72b-vllm',
89
path='Qwen/Qwen1.5-72B',
9-
model_kwargs=dict(tensor_parallel_size=4),
10+
# more vllm model_kwargs: https://github.com/vllm-project/vllm/blob/main/vllm/engine/arg_utils.py
11+
model_kwargs=dict(tensor_parallel_size=4, max_model_len=max_seq_len),
1012
max_out_len=100,
11-
max_seq_len=2048,
13+
max_seq_len=max_seq_len,
1214
batch_size=32,
1315
generation_kwargs=dict(temperature=0),
1416
run_cfg=dict(num_gpus=4, num_procs=1),

configs/models/qwen/vllm_qwen1_5_72b_chat.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,18 @@
77
dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
88
],
99
)
10+
max_seq_len = 2048
1011

1112
models = [
1213
dict(
1314
type=VLLM,
1415
abbr='qwen1.5-72b-chat-vllm',
1516
path='Qwen/Qwen1.5-72B-Chat',
16-
model_kwargs=dict(tensor_parallel_size=4),
17+
# more vllm model_kwargs: https://github.com/vllm-project/vllm/blob/main/vllm/engine/arg_utils.py
18+
model_kwargs=dict(tensor_parallel_size=4, max_model_len=max_seq_len),
1719
meta_template=_meta_template,
1820
max_out_len=100,
19-
max_seq_len=2048,
21+
max_seq_len=max_seq_len,
2022
batch_size=32,
2123
generation_kwargs=dict(temperature=0),
2224
stop_words=['<|im_end|>'],

configs/models/qwen/vllm_qwen_14b_chat.py

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
type=VLLM,
1414
abbr='qwen-14b-chat-vllm',
1515
path='Qwen/Qwen-14B-Chat',
16+
# more vllm model_kwargs: https://github.com/vllm-project/vllm/blob/main/vllm/engine/arg_utils.py
1617
model_kwargs=dict(tensor_parallel_size=4),
1718
meta_template=_meta_template,
1819
max_out_len=100,

configs/models/qwen/vllm_qwen_72b_chat.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,18 @@
77
dict(role='BOT', begin='\n<|im_start|>assistant\n', end='<|im_end|>', generate=True),
88
],
99
)
10+
max_seq_len = 2048
1011

1112
models = [
1213
dict(
1314
type=VLLM,
1415
abbr='qwen-72b-chat-vllm',
1516
path='Qwen/Qwen-72B-Chat',
16-
model_kwargs=dict(tensor_parallel_size=4),
17+
# more vllm model_kwargs: https://github.com/vllm-project/vllm/blob/main/vllm/engine/arg_utils.py
18+
model_kwargs=dict(tensor_parallel_size=4, max_model_len=max_seq_len),
1719
meta_template=_meta_template,
1820
max_out_len=100,
19-
max_seq_len=2048,
21+
max_seq_len=max_seq_len,
2022
batch_size=32,
2123
generation_kwargs=dict(temperature=0),
2224
stop_words=['<|im_end|>'],

configs/models/wizardlm/vllm_wizardlm_70b_v1_0.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,18 @@
77
dict(role='BOT', begin='ASSISTANT: ', end='</s>', generate=True),
88
],
99
)
10+
max_seq_len = 2048
1011

1112
models = [
1213
dict(
1314
type=VLLM,
1415
abbr='wizardlm-70b-v1.0-vllm',
1516
path='WizardLM/WizardLM-70B-V1.0',
16-
model_kwargs=dict(tensor_parallel_size=4),
17+
# more vllm model_kwargs: https://github.com/vllm-project/vllm/blob/main/vllm/engine/arg_utils.py
18+
model_kwargs=dict(tensor_parallel_size=4, max_model_len=max_seq_len),
1719
meta_template=_meta_template,
1820
max_out_len=100,
19-
max_seq_len=2048,
21+
max_seq_len=max_seq_len,
2022
batch_size=32,
2123
generation_kwargs=dict(temperature=0),
2224
stop_words=['</s>'],

0 commit comments

Comments
 (0)