diff --git a/kubeai/models/deepseek-r1-distill-llama-70b-gaudi.yaml b/kubeai/models/deepseek-r1-distill-llama-70b-gaudi.yaml
new file mode 100644
index 000000000..a694a4439
--- /dev/null
+++ b/kubeai/models/deepseek-r1-distill-llama-70b-gaudi.yaml
@@ -0,0 +1,25 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Source: models/templates/models.yaml
+apiVersion: kubeai.org/v1
+kind: Model
+metadata:
+  name: deepseek-r1-distill-llama-70b-gaudi
+spec:
+  features: [TextGeneration]
+  url: hf://deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+  cacheProfile: nfs
+  engine: VLLM
+  args:
+    - --tensor-parallel-size=8
+  env:
+    OMPI_MCA_btl_vader_single_copy_mechanism: none
+    PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
+    # vLLM startup takes too long for autoscaling, especially with Gaudi
+    VLLM_SKIP_WARMUP: "true"
+
+  # scale-from-zero avoids idle instance occupying a node, but causes long delay
+  minReplicas: 0
+  maxReplicas: 1
+  resourceProfile: gaudi-for-text-generation:8
diff --git a/kubeai/models/deepseek-r1-distill-llama-8b-gaudi.yaml b/kubeai/models/deepseek-r1-distill-llama-8b-gaudi.yaml
new file mode 100644
index 000000000..0266dc261
--- /dev/null
+++ b/kubeai/models/deepseek-r1-distill-llama-8b-gaudi.yaml
@@ -0,0 +1,26 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Source: models/templates/models.yaml
+apiVersion: kubeai.org/v1
+kind: Model
+metadata:
+  name: deepseek-r1-distill-llama-8b-gaudi
+spec:
+  features: [TextGeneration]
+  url: hf://deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+  cacheProfile: nfs
+  engine: VLLM
+  args:
+    - --tensor-parallel-size=1
+    - --block-size=128
+    - --max-num-seqs=256
+    - --max-seq-len-to-capture=2048
+  env:
+    OMPI_MCA_btl_vader_single_copy_mechanism: "none"
+    # vLLM startup takes too long for autoscaling, especially with Gaudi
+    VLLM_SKIP_WARMUP: "true"
+  minReplicas: 1
+  maxReplicas: 4
+  targetRequests: 120
+  resourceProfile: gaudi-for-text-generation:1
diff --git a/kubeai/models/qwen2.5-72b-instruct-gaudi.yaml b/kubeai/models/qwen2.5-72b-instruct-gaudi.yaml
new file mode 100644
index 000000000..7079bfb4c
--- /dev/null
+++ b/kubeai/models/qwen2.5-72b-instruct-gaudi.yaml
@@ -0,0 +1,25 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Source: models/templates/models.yaml
+apiVersion: kubeai.org/v1
+kind: Model
+metadata:
+  name: qwen2.5-72b-instruct-gaudi
+spec:
+  features: [TextGeneration]
+  url: hf://Qwen/Qwen2.5-72B-Instruct
+  cacheProfile: nfs
+  engine: VLLM
+  args:
+    - --tensor-parallel-size=4
+  env:
+    OMPI_MCA_btl_vader_single_copy_mechanism: none
+    PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
+    # vLLM startup takes too long for autoscaling, especially with Gaudi
+    VLLM_SKIP_WARMUP: "true"
+
+  # scale-from-zero avoids idle instance occupying half a node, but causes long delay
+  minReplicas: 0
+  maxReplicas: 2
+  resourceProfile: gaudi-for-text-generation:4
diff --git a/kubeai/models/qwen2.5-7b-instruct-gaudi.yaml b/kubeai/models/qwen2.5-7b-instruct-gaudi.yaml
new file mode 100644
index 000000000..ec1772366
--- /dev/null
+++ b/kubeai/models/qwen2.5-7b-instruct-gaudi.yaml
@@ -0,0 +1,26 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Source: models/templates/models.yaml
+apiVersion: kubeai.org/v1
+kind: Model
+metadata:
+  name: qwen2.5-7b-instruct-gaudi
+spec:
+  features: [TextGeneration]
+  url: hf://Qwen/Qwen2.5-7B-Instruct
+  cacheProfile: nfs
+  engine: VLLM
+  args:
+    - --tensor-parallel-size=1
+    - --block-size=128
+    - --max-num-seqs=256
+    - --max-seq-len-to-capture=2048
+  env:
+    OMPI_MCA_btl_vader_single_copy_mechanism: "none"
+    # vLLM startup takes too long for autoscaling, especially with Gaudi
+    VLLM_SKIP_WARMUP: "true"
+  minReplicas: 1
+  maxReplicas: 4
+  targetRequests: 120
+  resourceProfile: gaudi-for-text-generation:1