Skip to content

Commit 0e3209a

Browse files
HollowMan6loadams
andauthored
Fix extra_repr_str when weight is None / in zero-3 (#7254)
extra_repr_str will be undefined if self.weight is None with current code. In addition, the shape is stored in ds_shape if it's in ZeRO-3, so we also need to do this check (Although currently AutoTP hasn't supported ZeRO-3). ```logs File "deepspeed/__init__.py", line 394, in tp_model_init model = TpTrainingManager(model=model, tp_size=tp_size, dtype=dtype).module ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "deepspeed/runtime/tensor_parallel/tp_manager.py", line 35, in __init__ self._apply_policies(parser_dict) File "deepspeed/runtime/tensor_parallel/tp_manager.py", line 47, in _apply_policies self._apply_injection_policy(self.config, client_module) File "deepspeed/runtime/tensor_parallel/tp_manager.py", line 53, in _apply_injection_policy replace_transformer_layer(client_module, self.module, None, self.config, self.model_config) File "deepspeed/module_inject/replace_module.py", line 400, in replace_transformer_layer replaced_module = replace_module(model=model, ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "deepspeed/module_inject/replace_module.py", line 653, in replace_module replaced_module, _ = _replace_module(model, policy, state_dict=sd) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "deepspeed/module_inject/replace_module.py", line 713, in _replace_module _, layer_id = _replace_module(child, ^^^^^^^^^^^^^^^^^^^^^^ File "deepspeed/module_inject/replace_module.py", line 713, in _replace_module _, layer_id = _replace_module(child, ^^^^^^^^^^^^^^^^^^^^^^ File "deepspeed/module_inject/replace_module.py", line 689, in _replace_module replaced_module = policies[child.__class__][0](child, ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "deepspeed/module_inject/replace_module.py", line 333, in replace_fn new_module = replace_wo_policy(child, _policy, prefix=prefix, state_dict=state_dict) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "deepspeed/module_inject/replace_module.py", line 316, in replace_wo_policy return _autotp._replace_module(module) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "deepspeed/module_inject/auto_tp.py", line 481, in _replace_module self._replace_module(child, name, class_name) File "deepspeed/module_inject/auto_tp.py", line 466, in _replace_module setattr(r_module, name, self.linear_policies[child.__class__](child, prev_name + '.' + name, ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "deepspeed/module_inject/auto_tp.py", line 361, in _replace if 'Yuan' in str(self.module): ^^^^^^^^^^^^^^^^ File "torch/nn/modules/module.py", line 2940, in __repr__ mod_str = repr(module) ^^^^^^^^^^^^ File "torch/nn/modules/module.py", line 2940, in __repr__ mod_str = repr(module) ^^^^^^^^^^^^ File "torch/nn/modules/module.py", line 2934, in __repr__ extra_repr = self.extra_repr() ^^^^^^^^^^^^^^^^^ File "deepspeed/module_inject/layers.py", line 267, in extra_repr out_features, in_features = self.weight.shape[-2:] if self.weight is not None else (None, None) ^^^^^^^^^^^^^^^^^^^^^^^^^ ValueError: not enough values to unpack (expected 2, got 1) ``` Signed-off-by: Hollow Man <[email protected]> Co-authored-by: Logan Adams <[email protected]>
1 parent e290bf5 commit 0e3209a

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

deepspeed/module_inject/layers.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from torch.nn.parameter import Parameter
1111
from deepspeed.accelerator import get_accelerator
1212
from deepspeed.module_inject.tp_shard import get_shard_size, get_shard_size_list
13+
from deepspeed.runtime.zero.utils import is_zero_param
1314
from abc import ABC, abstractmethod
1415
from typing import Iterable, Any, Optional, List, Tuple
1516
from .fusedqkv_utils import shard_value_with_share_qk, shard_chunk_mlp, prepare_tp_fused_qkvw
@@ -262,12 +263,13 @@ def __deepcopy__(self, memo):
262263
return new_obj
263264

264265
def extra_repr(self):
266+
out_features, in_features = None, None
265267
if self.weight is not None:
266-
out_features, in_features = self.weight.shape[-2:] if self.weight is not None else (None, None)
267-
dtype = self.weight.dtype if self.weight is not None else None
268-
extra_repr_str = "in_features={}, out_features={}, bias={}, dtype={}".format(
269-
in_features, out_features, self.bias is not None, dtype)
270-
return extra_repr_str
268+
out_features, in_features = self.weight.ds_shape[-2:] if is_zero_param(
269+
self.weight) else self.weight.shape[-2:]
270+
dtype = self.weight.dtype if self.weight is not None else None
271+
return "in_features={}, out_features={}, bias={}, dtype={}".format(in_features, out_features, self.bias
272+
is not None, dtype)
271273

272274
def move(self, tensor):
273275
# TODO: consider the timing of deletion

0 commit comments

Comments
 (0)