Skip to content

Commit

Permalink
Update transformer.py -> Add intermediate_size
Browse files Browse the repository at this point in the history
  • Loading branch information
dtamayo-nlp committed May 10, 2024
1 parent 4bc6670 commit 6c6a46b
Showing 1 changed file with 13 additions and 7 deletions.
20 changes: 13 additions & 7 deletions megatron/model/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,19 @@ def __init__(
self.activation_type = neox_args.activation
self.bias_gelu_fusion = neox_args.bias_gelu_fusion

# auto scale so geglu has equal parameters
ff_mult = int(4 * 2 / 3) if self.activation_type == "geglu" else 4
ff_dim = (
int(ff_mult * neox_args.hidden_size) * 2
if self.activation_type == "geglu"
else ff_mult * neox_args.hidden_size
)

if neox_args.intermediate_size:
ff_dim = neox_args.intermediate_size

else:
# auto scale so geglu has equal parameters
ff_mult = int(4 * 2 / 3) if self.activation_type == "geglu" else 4
ff_dim = (
int(ff_mult * neox_args.hidden_size) * 2
if self.activation_type == "geglu"
else int(ff_mult * neox_args.hidden_size)
)

self.dense_h_to_4h = mpu.ColumnParallelLinear(
neox_args=neox_args,
input_size=neox_args.hidden_size,
Expand Down

0 comments on commit 6c6a46b

Please sign in to comment.