huggingface
diff --git a/‎optimum/exporters/neuron/model_configs/decoder_configs.py
+7 b/‎optimum/exporters/neuron/model_configs/decoder_configs.py
+7
diff --git a/‎optimum/neuron/models/granite/__init__.py
+14 b/‎optimum/neuron/models/granite/__init__.py
+14
diff --git a/‎optimum/neuron/models/granite/config.py
+32 b/‎optimum/neuron/models/granite/config.py
+32
@@ -17,6 +17,7 @@
 
 from optimum.exporters.tasks import TasksManager
 
+from ....neuron.models.granite.model import GraniteForSampling
 from ....neuron.models.qwen2.model import Qwen2ForSampling
 from ..config import TextNeuronDecoderConfig
 
@@ -63,3 +64,9 @@ class Qwen2NeuronConfig(TextNeuronDecoderConfig):
     NEURONX_CLASS = Qwen2ForSampling
     CONTINUOUS_BATCHING = True
     FUSE_QKV = False
+
+
+@register_in_tasks_manager("granite", "text-generation")
+class GraniteNeuronConfig(TextNeuronDecoderConfig):
+    NEURONX_CLASS = GraniteForSampling
+    CONTINUOUS_BATCHING = True
@@ -0,0 +1,14 @@
+# coding=utf-8
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
@@ -0,0 +1,32 @@
+# coding=utf-8
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from transformers import PretrainedConfig
+from transformers_neuronx.llama.config import LlamaConfig
+
+
+class GraniteConfig(LlamaConfig):
+    """The Granite model uses the same configuration as the TnX LLama model"""
+
+    def __init__(
+        self, config: PretrainedConfig, n_positions: int, batch_size: int, amp: str, tp_degree: int, **kwargs
+    ):
+        super().__init__(config, n_positions, batch_size, amp, tp_degree, **kwargs)
+        self.model_type = "granite"
+        # These are parameters specific to the granite modeling
+        self.attention_multiplier = config.attention_multiplier
+        self.embedding_multiplier = config.embedding_multiplier
+        self.logits_scaling = config.logits_scaling
+        self.residual_multiplier = config.residual_multiplier