From f3e4dbfbb59709138970e2b6f1c682d72cf7c031 Mon Sep 17 00:00:00 2001 From: Shane O'Brien Date: Tue, 12 Nov 2024 09:27:57 +0000 Subject: [PATCH] Added OLMoModel Class and config.architecture detection, and temporary fake layernorm --- src/python/py/models/builder.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/python/py/models/builder.py b/src/python/py/models/builder.py index 8e5dc006c..31d126e13 100644 --- a/src/python/py/models/builder.py +++ b/src/python/py/models/builder.py @@ -995,7 +995,13 @@ def make_layernorm(self, layer_id, layernorm, skip, simple, location): skip_input = self.layernorm_attrs["skip_input"] weight = f"model.layers.{layer_id}.{location}_layernorm.weight" + #ShaneTim + if layernorm.weight is None: + layernorm.weight = torch.ones(2048) self.make_external_tensor(layernorm.weight.detach().numpy().astype(self.to_numpy_dtype[self.io_dtype]) + self.layernorm_attrs["add_offset"], weight) + #ShaneTim + if layernorm.bias is None: + layernorm.bias = torch.ones(2048) bias = f"model.layers.{layer_id}.{location}_layernorm.bias" if not simple: self.make_external_tensor(layernorm.bias.detach().numpy().astype(self.to_numpy_dtype[self.io_dtype]), bias) @@ -3040,6 +3046,10 @@ def make_layer(self, layer_id, layer): layer.self_attn = layer.self_attn if hasattr(layer, 'self_attn') else layer.self_attention super().make_layer(layer_id, layer) +class OLMoModel(Model): + def __init__(self, config, io_dtype, onnx_dtype, ep, cache_dir, extra_options): + super().__init__(config, io_dtype, onnx_dtype, ep, cache_dir, extra_options) + def check_extra_options(kv_pairs): if "int4_op_types_to_quantize" in kv_pairs: op_types_to_quantize = () @@ -3144,6 +3154,8 @@ def create_model(model_name, input_path, output_dir, precision, execution_provid # Quantized ChatGLM model has ChatGLMForConditionalGeneration as architecture whereas HF model as the latter config.hidden_act = "swiglu" onnx_model = ChatGLMModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options) + elif config.architectures[0] == "OlmoForCausalLM": + onnx_model = OLMoModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options) else: raise NotImplementedError(f"The {hf_name} model is not currently supported.")