We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent cb1a4d0 commit 6f125afCopy full SHA for 6f125af
optimum/exporters/neuron/model_configs/decoder_configs.py
@@ -63,3 +63,9 @@ class Qwen2NeuronConfig(TextNeuronDecoderConfig):
63
NEURONX_CLASS = Qwen2ForSampling
64
CONTINUOUS_BATCHING = True
65
FUSE_QKV = False
66
+
67
68
+@register_in_tasks_manager("granite", "text-generation")
69
+class GraniteNeuronConfig(TextNeuronDecoderConfig):
70
+ NEURONX_CLASS = LlamaForSampling
71
+ CONTINUOUS_BATCHING = True
0 commit comments