Skip to content

Commit

Permalink
fixup some bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Tlntin committed Nov 28, 2023
1 parent 7b42608 commit d49bab0
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 22 deletions.
2 changes: 1 addition & 1 deletion qwen/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from tensorrt_llm.builder import Builder
from tensorrt_llm.logger import logger
from tensorrt_llm.models import (
fp8_quantize,
# fp8_quantize,
smooth_quantize,
weight_only_groupwise_quantize,
weight_only_quantize,
Expand Down
42 changes: 21 additions & 21 deletions qwen/weight.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from tensorrt_llm._utils import (
str_dtype_to_torch,
str_dtype_to_np,
pad_vocab_size,
# pad_vocab_size,
torch_to_numpy,
)
from tensorrt_llm.quantization import QuantMode
Expand Down Expand Up @@ -254,15 +254,15 @@ def sq_trick(x):
)

tensorrt_llm_qwen.layers[i].ln_1.weight.value = fromfile(
dir_path, "transformer.h." + str(i) + ".ln_1.weight.bin"
dir_path, "model.layers." + str(i) + ".ln_1.weight.bin"
)

dst = tensorrt_llm_qwen.layers[i].ln_2.weight
dst.value = fromfile(dir_path, "transformer.h." + str(i) + ".ln_2.weight.bin")
dst.value = fromfile(dir_path, "model.layers." + str(i) + ".ln_2.weight.bin")

t = fromfile(
dir_path,
"transformer.h." + str(i) + ".attention.qkv.weight." + suffix,
"model.layers." + str(i) + ".attention.qkv.weight." + suffix,
[hidden_size, c_attn_out_dim],
w_type,
)
Expand All @@ -275,7 +275,7 @@ def sq_trick(x):
tensorrt_llm_qwen.layers[i].attention.qkv,
tensorrt_llm_qwen.layers[i].ln_1.scale_to_int,
dir_path,
"transformer.h." + str(i) + ".attention.qkv.",
"model.layers." + str(i) + ".attention.qkv.",
[1, c_attn_out_dim],
quant_per_token_dyn,
quant_per_channel,
Expand All @@ -300,7 +300,7 @@ def sq_trick(x):
dst = tensorrt_llm_qwen.layers[i].attention.qkv.bias
t = fromfile(
dir_path,
"transformer.h."
"model.layers."
+ str(i)
+ ".attention.qkv.bias."
+ str(mapping.rank)
Expand All @@ -312,7 +312,7 @@ def sq_trick(x):
dst = tensorrt_llm_qwen.layers[i].attention.dense.weight
t = fromfile(
dir_path,
"transformer.h." + str(i) + ".attention.dense.weight." + suffix,
"model.layers." + str(i) + ".attention.dense.weight." + suffix,
[hidden_size // mapping.tp_size, hidden_size],
w_type,
)
Expand All @@ -327,15 +327,15 @@ def sq_trick(x):
tensorrt_llm_qwen.layers[i].attention.dense,
dense_scale,
dir_path,
"transformer.h." + str(i) + ".attention.dense.",
"model.layers." + str(i) + ".attention.dense.",
[1, hidden_size],
quant_per_token_dyn,
quant_per_channel,
)
set_smoother(
tensorrt_llm_qwen.layers[i].attention.dense,
dir_path,
"transformer.h." + str(i) + ".attention.dense",
"model.layers." + str(i) + ".attention.dense",
[1, hidden_size // mapping.tp_size],
mapping.rank,
)
Expand All @@ -357,7 +357,7 @@ def sq_trick(x):

t = fromfile(
dir_path,
"transformer.h." + str(i) + ".mlp.w1.weight." + suffix,
"model.layers." + str(i) + ".mlp.w1.weight." + suffix,
[hidden_size, inter_size // mapping.tp_size // 2],
w_type,
)
Expand All @@ -369,7 +369,7 @@ def sq_trick(x):
tensorrt_llm_qwen.layers[i].mlp.w1,
tensorrt_llm_qwen.layers[i].ln_2.scale_to_int,
dir_path,
"transformer.h." + str(i) + ".mlp.w1.",
"model.layers." + str(i) + ".mlp.w1.",
[1, inter_size // mapping.tp_size // 2],
quant_per_token_dyn,
quant_per_channel,
Expand All @@ -395,7 +395,7 @@ def sq_trick(x):

t = fromfile(
dir_path,
"transformer.h." + str(i) + ".mlp.w2.weight." + suffix,
"model.layers." + str(i) + ".mlp.w2.weight." + suffix,
[hidden_size, inter_size // mapping.tp_size // 2],
w_type,
)
Expand All @@ -407,7 +407,7 @@ def sq_trick(x):
tensorrt_llm_qwen.layers[i].mlp.w2,
tensorrt_llm_qwen.layers[i].ln_2.scale_to_int,
dir_path,
"transformer.h." + str(i) + ".mlp.w2.",
"model.layers." + str(i) + ".mlp.w2.",
[1, inter_size // mapping.tp_size // 2],
quant_per_token_dyn,
quant_per_channel,
Expand All @@ -433,7 +433,7 @@ def sq_trick(x):

t = fromfile(
dir_path,
"transformer.h." + str(i) + ".mlp.c_proj.weight." + suffix,
"model.layers." + str(i) + ".mlp.c_proj.weight." + suffix,
[inter_size // mapping.tp_size // 2, hidden_size],
w_type,
)
Expand All @@ -448,15 +448,15 @@ def sq_trick(x):
tensorrt_llm_qwen.layers[i].mlp.c_proj,
proj_scale,
dir_path,
"transformer.h." + str(i) + ".mlp.c_proj.",
"model.layers." + str(i) + ".mlp.c_proj.",
[1, hidden_size],
quant_per_token_dyn,
quant_per_channel,
)
set_smoother(
tensorrt_llm_qwen.layers[i].mlp.c_proj,
dir_path,
"transformer.h." + str(i) + ".mlp.c_proj",
"model.layers." + str(i) + ".mlp.c_proj",
[1, inter_size // mapping.tp_size // 2],
mapping.rank,
)
Expand All @@ -481,7 +481,7 @@ def sq_trick(x):
if use_int8_kv_cache:
t = fromfile(
dir_path,
"transformer.h." + str(i) + ".attention.qkv.scale_y_quant_orig.bin",
"model.layers." + str(i) + ".attention.qkv.scale_y_quant_orig.bin",
[1],
np.float32,
)
Expand Down Expand Up @@ -797,7 +797,7 @@ def preprocess_groupwise_weight_params(
1,
)
)

torch_dtype = str_dtype_to_torch(dtype)
for layer in tqdm(layers_range, ncols=80, desc="loading attention weight..."):
prefix = f"transformer.h.{layer}.attn."
split_qkv_suf = []
Expand Down Expand Up @@ -840,7 +840,7 @@ def preprocess_groupwise_weight_params(
tensorrt_llm_qwen.layers[idx].attention.qkv.zero.value = th_zero.numpy()
tensorrt_llm_qwen.layers[idx].attention.qkv.scale.value = th_scale.to(torch_dtype).numpy()

torch_dtype = str_dtype_to_torch(dtype)


for k, v in tqdm(model_params.items(), ncols=80, desc="loading other weight..."):
if isinstance(v, list):
Expand Down Expand Up @@ -1138,12 +1138,12 @@ def process_and_assign_weight(model_params, mPrefix, mOp, tp_dim=0):
if pad_vocab:
weight = model_params['lm_head.weight']
[vocab_size, k] = weight.shape
new_weight = torch.zeros([pad_vocab_size, k])
new_weight = torch.zeros([pad_vocab_size1, k])
new_weight[:vocab_size, :] = weight
new_weight = new_weight.T.contiguous()
amax = model_params['lm_head.weight_quantizer._amax'].reshape(
[vocab_size, k // group_size])
new_amax = torch.ones([pad_vocab_size, k // group_size])
new_amax = torch.ones([pad_vocab_size1, k // group_size])
new_amax[:vocab_size, :] = amax
new_amax = new_amax.T.contiguous()
new_scale = new_amax / 8
Expand Down

0 comments on commit d49bab0

Please sign in to comment.