feat(decoder): add support for granite models

dacorvo · dacorvo · commit 6f125af16720 · 2024-12-13T10:54:54.000Z
Using straight Llama modeling for now -&gt; generated content does not look
so good, so maybe differences in rope ?
diff --git a/optimum/exporters/neuron/model_configs/decoder_configs.py b/optimum/exporters/neuron/model_configs/decoder_configs.py
@@ -63,3 +63,9 @@ class Qwen2NeuronConfig(TextNeuronDecoderConfig):
     NEURONX_CLASS = Qwen2ForSampling
     CONTINUOUS_BATCHING = True
     FUSE_QKV = False
+
+
+@register_in_tasks_manager("granite", "text-generation")
+class GraniteNeuronConfig(TextNeuronDecoderConfig):
+    NEURONX_CLASS = LlamaForSampling
+    CONTINUOUS_BATCHING = True