From f9325782396a5c5f0b89ed8966d8e2e35d6577a3 Mon Sep 17 00:00:00 2001
From: Diego Lovison <diegolovison@gmail.com>
Date: Tue, 15 Oct 2024 14:30:02 -0300
Subject: [PATCH] Automated test to verify AMD GPU's can be used in Data
 Science Pipelines

---
 .../pytorch/pytorch_amd_gpu_availability.py   |  54 ++++++
 ...pytorch_amd_gpu_availability_compiled.yaml | 139 +++++++++++++++
 .../pytorch_nvidia_gpu_availability.py        |  54 ++++++
 ...orch_nvidia_gpu_availability_compiled.yaml | 139 +++++++++++++++
 .../pytorch_verify_gpu_availability.py        |  65 -------
 ...orch_verify_gpu_availability_compiled.yaml | 164 ------------------
 ...=> 1105__data-science-pipelines-gpu.robot} |  41 +++--
 7 files changed, 416 insertions(+), 240 deletions(-)
 create mode 100644 ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability.py
 create mode 100644 ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability_compiled.yaml
 create mode 100644 ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability.py
 create mode 100644 ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability_compiled.yaml
 delete mode 100644 ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_verify_gpu_availability.py
 delete mode 100644 ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_verify_gpu_availability_compiled.yaml
 rename ods_ci/tests/Tests/1100__data_science_pipelines/{1105__data-science-pipelines-gpu-nvidia.robot => 1105__data-science-pipelines-gpu.robot} (63%)

diff --git a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability.py b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability.py
new file mode 100644
index 000000000..71f532de3
--- /dev/null
+++ b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability.py
@@ -0,0 +1,54 @@
+from kfp import compiler, dsl, kubernetes
+from kfp.dsl import PipelineTask
+
+#  Runtime: Pytorch with ROCm and Python 3.9 (UBI 9)
+common_base_image = (
+    "quay.io/modh/runtime-images@sha256:76d2a5d9ca0bb35c4d9962a7ee6798672afd3a5c7d226d87ef65f952e7cb93cf"
+)
+
+
+def add_gpu_toleration(task: PipelineTask, accelerator_type: str, accelerator_limit: int):
+    print(f"Adding GPU tolerations: {accelerator_type}({accelerator_limit})")
+    task.set_accelerator_type(accelerator=accelerator_type)
+    task.set_accelerator_limit(accelerator_limit)
+    kubernetes.add_toleration(task, key=accelerator_type, operator="Exists", effect="NoSchedule")
+
+
+@dsl.component(
+    base_image=common_base_image
+)
+def verify_gpu_availability(gpu_toleration: bool):
+    import torch
+
+    cuda_available = torch.cuda.is_available()
+    device_count = torch.cuda.device_count()
+    print("------------------------------")
+    print("GPU availability")
+    print("------------------------------")
+    print(f"cuda available: {cuda_available}")
+    print(f"device count: {device_count}")
+    if gpu_toleration:
+        assert torch.cuda.is_available()
+        assert torch.cuda.device_count() > 0
+        t = torch.tensor([5, 5, 5], dtype=torch.int64, device='cuda')
+    else:
+        assert not torch.cuda.is_available()
+        assert torch.cuda.device_count() == 0
+        t = torch.tensor([5, 5, 5], dtype=torch.int64)
+    print(f"tensor: {t}")
+    print("GPU availability test: PASS")
+
+
+@dsl.pipeline(
+    name="pytorch-amd-gpu-availability",
+    description="Verifies pipeline tasks run on GPU nodes only when tolerations are added",
+)
+def pytorch_amd_gpu_availability():
+    verify_gpu_availability(gpu_toleration=False).set_caching_options(False)
+
+    task_with_toleration = verify_gpu_availability(gpu_toleration=True).set_caching_options(False)
+    add_gpu_toleration(task_with_toleration, "amd.com/gpu", 1)
+
+
+if __name__ == "__main__":
+    compiler.Compiler().compile(pytorch_amd_gpu_availability, package_path=__file__.replace(".py", "_compiled.yaml"))
diff --git a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability_compiled.yaml b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability_compiled.yaml
new file mode 100644
index 000000000..1c2b67ddc
--- /dev/null
+++ b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability_compiled.yaml
@@ -0,0 +1,139 @@
+# PIPELINE DEFINITION
+# Name: pytorch-amd-gpu-availability
+# Description: Verifies pipeline tasks run on GPU nodes only when tolerations are added
+components:
+  comp-verify-gpu-availability:
+    executorLabel: exec-verify-gpu-availability
+    inputDefinitions:
+      parameters:
+        gpu_toleration:
+          parameterType: BOOLEAN
+  comp-verify-gpu-availability-2:
+    executorLabel: exec-verify-gpu-availability-2
+    inputDefinitions:
+      parameters:
+        gpu_toleration:
+          parameterType: BOOLEAN
+deploymentSpec:
+  executors:
+    exec-verify-gpu-availability:
+      container:
+        args:
+        - --executor_input
+        - '{{$}}'
+        - --function_to_execute
+        - verify_gpu_availability
+        command:
+        - sh
+        - -c
+        - "\nif ! [ -x \"$(command -v pip)\" ]; then\n    python3 -m ensurepip ||\
+          \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
+          \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.9.0'\
+          \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\
+          $0\" \"$@\"\n"
+        - sh
+        - -ec
+        - 'program_path=$(mktemp -d)
+
+
+          printf "%s" "$0" > "$program_path/ephemeral_component.py"
+
+          _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main                         --component_module_path                         "$program_path/ephemeral_component.py"                         "$@"
+
+          '
+        - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
+          \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n    import torch\n\
+          \n    cuda_available = torch.cuda.is_available()\n    device_count = torch.cuda.device_count()\n\
+          \    print(\"------------------------------\")\n    print(\"GPU availability\"\
+          )\n    print(\"------------------------------\")\n    print(f\"cuda available:\
+          \ {cuda_available}\")\n    print(f\"device count: {device_count}\")\n  \
+          \  if gpu_toleration:\n        assert torch.cuda.is_available()\n      \
+          \  assert torch.cuda.device_count() > 0\n        t = torch.tensor([5, 5,\
+          \ 5], dtype=torch.int64, device='cuda')\n    else:\n        assert not torch.cuda.is_available()\n\
+          \        assert torch.cuda.device_count() == 0\n        t = torch.tensor([5,\
+          \ 5, 5], dtype=torch.int64)\n    print(f\"tensor: {t}\")\n    print(\"GPU\
+          \ availability test: PASS\")\n\n"
+        image: quay.io/modh/runtime-images@sha256:76d2a5d9ca0bb35c4d9962a7ee6798672afd3a5c7d226d87ef65f952e7cb93cf
+    exec-verify-gpu-availability-2:
+      container:
+        args:
+        - --executor_input
+        - '{{$}}'
+        - --function_to_execute
+        - verify_gpu_availability
+        command:
+        - sh
+        - -c
+        - "\nif ! [ -x \"$(command -v pip)\" ]; then\n    python3 -m ensurepip ||\
+          \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
+          \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.9.0'\
+          \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\
+          $0\" \"$@\"\n"
+        - sh
+        - -ec
+        - 'program_path=$(mktemp -d)
+
+
+          printf "%s" "$0" > "$program_path/ephemeral_component.py"
+
+          _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main                         --component_module_path                         "$program_path/ephemeral_component.py"                         "$@"
+
+          '
+        - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
+          \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n    import torch\n\
+          \n    cuda_available = torch.cuda.is_available()\n    device_count = torch.cuda.device_count()\n\
+          \    print(\"------------------------------\")\n    print(\"GPU availability\"\
+          )\n    print(\"------------------------------\")\n    print(f\"cuda available:\
+          \ {cuda_available}\")\n    print(f\"device count: {device_count}\")\n  \
+          \  if gpu_toleration:\n        assert torch.cuda.is_available()\n      \
+          \  assert torch.cuda.device_count() > 0\n        t = torch.tensor([5, 5,\
+          \ 5], dtype=torch.int64, device='cuda')\n    else:\n        assert not torch.cuda.is_available()\n\
+          \        assert torch.cuda.device_count() == 0\n        t = torch.tensor([5,\
+          \ 5, 5], dtype=torch.int64)\n    print(f\"tensor: {t}\")\n    print(\"GPU\
+          \ availability test: PASS\")\n\n"
+        image: quay.io/modh/runtime-images@sha256:76d2a5d9ca0bb35c4d9962a7ee6798672afd3a5c7d226d87ef65f952e7cb93cf
+        resources:
+          accelerator:
+            count: '1'
+            type: amd.com/gpu
+pipelineInfo:
+  description: Verifies pipeline tasks run on GPU nodes only when tolerations are
+    added
+  name: pytorch-amd-gpu-availability
+root:
+  dag:
+    tasks:
+      verify-gpu-availability:
+        cachingOptions: {}
+        componentRef:
+          name: comp-verify-gpu-availability
+        inputs:
+          parameters:
+            gpu_toleration:
+              runtimeValue:
+                constant: false
+        taskInfo:
+          name: verify-gpu-availability
+      verify-gpu-availability-2:
+        cachingOptions: {}
+        componentRef:
+          name: comp-verify-gpu-availability-2
+        inputs:
+          parameters:
+            gpu_toleration:
+              runtimeValue:
+                constant: true
+        taskInfo:
+          name: verify-gpu-availability-2
+schemaVersion: 2.1.0
+sdkVersion: kfp-2.9.0
+---
+platforms:
+  kubernetes:
+    deploymentSpec:
+      executors:
+        exec-verify-gpu-availability-2:
+          tolerations:
+          - effect: NoSchedule
+            key: amd.com/gpu
+            operator: Exists
diff --git a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability.py b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability.py
new file mode 100644
index 000000000..575e859fc
--- /dev/null
+++ b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability.py
@@ -0,0 +1,54 @@
+from kfp import compiler, dsl, kubernetes
+from kfp.dsl import PipelineTask
+
+#  Runtime: Pytorch with CUDA and Python 3.9 (UBI 9)
+common_base_image = (
+    "quay.io/modh/runtime-images@sha256:cee154f6db15de27929362f91baa128fc4f79b9c1930ab0f27561174d39aadfa"
+)
+
+
+def add_gpu_toleration(task: PipelineTask, accelerator_type: str, accelerator_limit: int):
+    print(f"Adding GPU tolerations: {accelerator_type}({accelerator_limit})")
+    task.set_accelerator_type(accelerator=accelerator_type)
+    task.set_accelerator_limit(accelerator_limit)
+    kubernetes.add_toleration(task, key=accelerator_type, operator="Exists", effect="NoSchedule")
+
+
+@dsl.component(
+    base_image=common_base_image
+)
+def verify_gpu_availability(gpu_toleration: bool):
+    import torch
+
+    cuda_available = torch.cuda.is_available()
+    device_count = torch.cuda.device_count()
+    print("------------------------------")
+    print("GPU availability")
+    print("------------------------------")
+    print(f"cuda available: {cuda_available}")
+    print(f"device count: {device_count}")
+    if gpu_toleration:
+        assert torch.cuda.is_available()
+        assert torch.cuda.device_count() > 0
+        t = torch.tensor([5, 5, 5], dtype=torch.int64, device='cuda')
+    else:
+        assert not torch.cuda.is_available()
+        assert torch.cuda.device_count() == 0
+        t = torch.tensor([5, 5, 5], dtype=torch.int64)
+    print(f"tensor: {t}")
+    print("GPU availability test: PASS")
+
+
+@dsl.pipeline(
+    name="pytorch-nvidia-gpu-availability",
+    description="Verifies pipeline tasks run on GPU nodes only when tolerations are added",
+)
+def pytorch_nvidia_gpu_availability():
+    verify_gpu_availability(gpu_toleration=False).set_caching_options(False)
+
+    task_with_toleration = verify_gpu_availability(gpu_toleration=True).set_caching_options(False)
+    add_gpu_toleration(task_with_toleration, "nvidia.com/gpu", 1)
+
+
+if __name__ == "__main__":
+    compiler.Compiler().compile(pytorch_nvidia_gpu_availability, package_path=__file__.replace(".py", "_compiled.yaml"))
diff --git a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability_compiled.yaml b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability_compiled.yaml
new file mode 100644
index 000000000..7c40c99f4
--- /dev/null
+++ b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability_compiled.yaml
@@ -0,0 +1,139 @@
+# PIPELINE DEFINITION
+# Name: pytorch-nvidia-gpu-availability
+# Description: Verifies pipeline tasks run on GPU nodes only when tolerations are added
+components:
+  comp-verify-gpu-availability:
+    executorLabel: exec-verify-gpu-availability
+    inputDefinitions:
+      parameters:
+        gpu_toleration:
+          parameterType: BOOLEAN
+  comp-verify-gpu-availability-2:
+    executorLabel: exec-verify-gpu-availability-2
+    inputDefinitions:
+      parameters:
+        gpu_toleration:
+          parameterType: BOOLEAN
+deploymentSpec:
+  executors:
+    exec-verify-gpu-availability:
+      container:
+        args:
+        - --executor_input
+        - '{{$}}'
+        - --function_to_execute
+        - verify_gpu_availability
+        command:
+        - sh
+        - -c
+        - "\nif ! [ -x \"$(command -v pip)\" ]; then\n    python3 -m ensurepip ||\
+          \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
+          \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.9.0'\
+          \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\
+          $0\" \"$@\"\n"
+        - sh
+        - -ec
+        - 'program_path=$(mktemp -d)
+
+
+          printf "%s" "$0" > "$program_path/ephemeral_component.py"
+
+          _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main                         --component_module_path                         "$program_path/ephemeral_component.py"                         "$@"
+
+          '
+        - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
+          \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n    import torch\n\
+          \n    cuda_available = torch.cuda.is_available()\n    device_count = torch.cuda.device_count()\n\
+          \    print(\"------------------------------\")\n    print(\"GPU availability\"\
+          )\n    print(\"------------------------------\")\n    print(f\"cuda available:\
+          \ {cuda_available}\")\n    print(f\"device count: {device_count}\")\n  \
+          \  if gpu_toleration:\n        assert torch.cuda.is_available()\n      \
+          \  assert torch.cuda.device_count() > 0\n        t = torch.tensor([5, 5,\
+          \ 5], dtype=torch.int64, device='cuda')\n    else:\n        assert not torch.cuda.is_available()\n\
+          \        assert torch.cuda.device_count() == 0\n        t = torch.tensor([5,\
+          \ 5, 5], dtype=torch.int64)\n    print(f\"tensor: {t}\")\n    print(\"GPU\
+          \ availability test: PASS\")\n\n"
+        image: quay.io/modh/runtime-images@sha256:cee154f6db15de27929362f91baa128fc4f79b9c1930ab0f27561174d39aadfa
+    exec-verify-gpu-availability-2:
+      container:
+        args:
+        - --executor_input
+        - '{{$}}'
+        - --function_to_execute
+        - verify_gpu_availability
+        command:
+        - sh
+        - -c
+        - "\nif ! [ -x \"$(command -v pip)\" ]; then\n    python3 -m ensurepip ||\
+          \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
+          \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.9.0'\
+          \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\
+          $0\" \"$@\"\n"
+        - sh
+        - -ec
+        - 'program_path=$(mktemp -d)
+
+
+          printf "%s" "$0" > "$program_path/ephemeral_component.py"
+
+          _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main                         --component_module_path                         "$program_path/ephemeral_component.py"                         "$@"
+
+          '
+        - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
+          \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n    import torch\n\
+          \n    cuda_available = torch.cuda.is_available()\n    device_count = torch.cuda.device_count()\n\
+          \    print(\"------------------------------\")\n    print(\"GPU availability\"\
+          )\n    print(\"------------------------------\")\n    print(f\"cuda available:\
+          \ {cuda_available}\")\n    print(f\"device count: {device_count}\")\n  \
+          \  if gpu_toleration:\n        assert torch.cuda.is_available()\n      \
+          \  assert torch.cuda.device_count() > 0\n        t = torch.tensor([5, 5,\
+          \ 5], dtype=torch.int64, device='cuda')\n    else:\n        assert not torch.cuda.is_available()\n\
+          \        assert torch.cuda.device_count() == 0\n        t = torch.tensor([5,\
+          \ 5, 5], dtype=torch.int64)\n    print(f\"tensor: {t}\")\n    print(\"GPU\
+          \ availability test: PASS\")\n\n"
+        image: quay.io/modh/runtime-images@sha256:cee154f6db15de27929362f91baa128fc4f79b9c1930ab0f27561174d39aadfa
+        resources:
+          accelerator:
+            count: '1'
+            type: nvidia.com/gpu
+pipelineInfo:
+  description: Verifies pipeline tasks run on GPU nodes only when tolerations are
+    added
+  name: pytorch-nvidia-gpu-availability
+root:
+  dag:
+    tasks:
+      verify-gpu-availability:
+        cachingOptions: {}
+        componentRef:
+          name: comp-verify-gpu-availability
+        inputs:
+          parameters:
+            gpu_toleration:
+              runtimeValue:
+                constant: false
+        taskInfo:
+          name: verify-gpu-availability
+      verify-gpu-availability-2:
+        cachingOptions: {}
+        componentRef:
+          name: comp-verify-gpu-availability-2
+        inputs:
+          parameters:
+            gpu_toleration:
+              runtimeValue:
+                constant: true
+        taskInfo:
+          name: verify-gpu-availability-2
+schemaVersion: 2.1.0
+sdkVersion: kfp-2.9.0
+---
+platforms:
+  kubernetes:
+    deploymentSpec:
+      executors:
+        exec-verify-gpu-availability-2:
+          tolerations:
+          - effect: NoSchedule
+            key: nvidia.com/gpu
+            operator: Exists
diff --git a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_verify_gpu_availability.py b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_verify_gpu_availability.py
deleted file mode 100644
index eb824e0cd..000000000
--- a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_verify_gpu_availability.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from kfp import compiler, dsl, kubernetes
-from kfp.dsl import PipelineTask
-
-#  Runtime: Pytorch with CUDA and Python 3.9 (UBI 9)
-common_base_image = (
-    "quay.io/modh/runtime-images@sha256:cee154f6db15de27929362f91baa128fc4f79b9c1930ab0f27561174d39aadfa"
-)
-
-
-def add_pip_index_configuration(task: PipelineTask):
-    kubernetes.use_config_map_as_env(
-        task,
-        config_map_name="ds-pipeline-custom-env-vars",
-        config_map_key_to_env={"pip_index_url": "PIP_INDEX_URL", "pip_trusted_host": "PIP_TRUSTED_HOST"},
-    )
-
-
-def add_gpu_toleration(task: PipelineTask, accelerator_type: str, accelerator_limit: int):
-    print("Adding GPU tolerations")
-    task.set_accelerator_type(accelerator=accelerator_type)
-    task.set_accelerator_limit(accelerator_limit)
-    kubernetes.add_toleration(task, key=accelerator_type, operator="Exists", effect="NoSchedule")
-
-
-@dsl.component(
-    base_image=common_base_image,
-    packages_to_install=["torch"],
-    pip_index_urls=["$PIP_INDEX_URL"],
-    pip_trusted_hosts=["$PIP_TRUSTED_HOST"],
-)
-def verify_gpu_availability(gpu_toleration_added: bool):
-    import torch  # noqa: PLC0415
-
-    cuda_available = torch.cuda.is_available()
-    device_count = torch.cuda.device_count()
-    print("------------------------------")
-    print("GPU availability")
-    print("------------------------------")
-    print("gpu_toleration_added:" + str(gpu_toleration_added))
-    print("torch.cuda.is_available():" + str(cuda_available))
-    print("torch.cuda.device_count():" + str(device_count))
-    if gpu_toleration_added and not torch.cuda.is_available():
-        print("GPU availability test: FAIL")
-        raise ValueError("GPU toleration was added but there is no GPU not available for this task")
-    if not gpu_toleration_added and torch.cuda.is_available():
-        print("GPU availability test: FAIL")
-        raise ValueError("GPU toleration was not added but there is a GPU available for this task")
-    print("GPU availability test: PASS")
-
-
-@dsl.pipeline(
-    name="pytorch-verify-gpu-availability",
-    description="Verifies pipeline tasks run on GPU nodes only when tolerations are added",
-)
-def pytorch_verify_gpu_availability():
-    task_without_toleration = verify_gpu_availability(gpu_toleration_added=False).set_caching_options(False)
-    add_pip_index_configuration(task_without_toleration)
-
-    task_with_toleration = verify_gpu_availability(gpu_toleration_added=True).set_caching_options(False)
-    add_pip_index_configuration(task_with_toleration)
-    add_gpu_toleration(task_with_toleration, "nvidia.com/gpu", 1)
-
-
-if __name__ == "__main__":
-    compiler.Compiler().compile(pytorch_verify_gpu_availability, package_path=__file__.replace(".py", "_compiled.yaml"))
diff --git a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_verify_gpu_availability_compiled.yaml b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_verify_gpu_availability_compiled.yaml
deleted file mode 100644
index b2a004fcd..000000000
--- a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_verify_gpu_availability_compiled.yaml
+++ /dev/null
@@ -1,164 +0,0 @@
-# PIPELINE DEFINITION
-# Name: pytorch-verify-gpu-availability
-# Description: Verifies pipeline tasks run on GPU nodes only when tolerations are added
-components:
-  comp-verify-gpu-availability:
-    executorLabel: exec-verify-gpu-availability
-    inputDefinitions:
-      parameters:
-        gpu_toleration_added:
-          parameterType: BOOLEAN
-  comp-verify-gpu-availability-2:
-    executorLabel: exec-verify-gpu-availability-2
-    inputDefinitions:
-      parameters:
-        gpu_toleration_added:
-          parameterType: BOOLEAN
-deploymentSpec:
-  executors:
-    exec-verify-gpu-availability:
-      container:
-        args:
-        - --executor_input
-        - '{{$}}'
-        - --function_to_execute
-        - verify_gpu_availability
-        command:
-        - sh
-        - -c
-        - "\nif ! [ -x \"$(command -v pip)\" ]; then\n    python3 -m ensurepip ||\
-          \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
-          \ python3 -m pip install --quiet --no-warn-script-location --index-url $PIP_INDEX_URL\
-          \ --trusted-host $PIP_TRUSTED_HOST 'kfp==2.9.0' '--no-deps' 'typing-extensions>=3.7.4,<5;\
-          \ python_version<\"3.9\"'  &&  python3 -m pip install --quiet --no-warn-script-location\
-          \ --index-url $PIP_INDEX_URL --trusted-host $PIP_TRUSTED_HOST 'torch' &&\
-          \ \"$0\" \"$@\"\n"
-        - sh
-        - -ec
-        - 'program_path=$(mktemp -d)
-
-
-          printf "%s" "$0" > "$program_path/ephemeral_component.py"
-
-          _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main                         --component_module_path                         "$program_path/ephemeral_component.py"                         "$@"
-
-          '
-        - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
-          \ *\n\ndef verify_gpu_availability(gpu_toleration_added: bool):\n    import\
-          \ torch  # noqa: PLC0415\n\n    cuda_available = torch.cuda.is_available()\n\
-          \    device_count = torch.cuda.device_count()\n    print(\"------------------------------\"\
-          )\n    print(\"GPU availability\")\n    print(\"------------------------------\"\
-          )\n    print(\"gpu_toleration_added:\" + str(gpu_toleration_added))\n  \
-          \  print(\"torch.cuda.is_available():\" + str(cuda_available))\n    print(\"\
-          torch.cuda.device_count():\" + str(device_count))\n    if gpu_toleration_added\
-          \ and not torch.cuda.is_available():\n        print(\"GPU availability test:\
-          \ FAIL\")\n        raise ValueError(\"GPU toleration was added but there\
-          \ is no GPU not available for this task\")\n    if not gpu_toleration_added\
-          \ and torch.cuda.is_available():\n        print(\"GPU availability test:\
-          \ FAIL\")\n        raise ValueError(\"GPU toleration was not added but there\
-          \ is a GPU available for this task\")\n    print(\"GPU availability test:\
-          \ PASS\")\n\n"
-        image: quay.io/modh/runtime-images@sha256:cee154f6db15de27929362f91baa128fc4f79b9c1930ab0f27561174d39aadfa
-    exec-verify-gpu-availability-2:
-      container:
-        args:
-        - --executor_input
-        - '{{$}}'
-        - --function_to_execute
-        - verify_gpu_availability
-        command:
-        - sh
-        - -c
-        - "\nif ! [ -x \"$(command -v pip)\" ]; then\n    python3 -m ensurepip ||\
-          \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
-          \ python3 -m pip install --quiet --no-warn-script-location --index-url $PIP_INDEX_URL\
-          \ --trusted-host $PIP_TRUSTED_HOST 'kfp==2.9.0' '--no-deps' 'typing-extensions>=3.7.4,<5;\
-          \ python_version<\"3.9\"'  &&  python3 -m pip install --quiet --no-warn-script-location\
-          \ --index-url $PIP_INDEX_URL --trusted-host $PIP_TRUSTED_HOST 'torch' &&\
-          \ \"$0\" \"$@\"\n"
-        - sh
-        - -ec
-        - 'program_path=$(mktemp -d)
-
-
-          printf "%s" "$0" > "$program_path/ephemeral_component.py"
-
-          _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main                         --component_module_path                         "$program_path/ephemeral_component.py"                         "$@"
-
-          '
-        - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
-          \ *\n\ndef verify_gpu_availability(gpu_toleration_added: bool):\n    import\
-          \ torch  # noqa: PLC0415\n\n    cuda_available = torch.cuda.is_available()\n\
-          \    device_count = torch.cuda.device_count()\n    print(\"------------------------------\"\
-          )\n    print(\"GPU availability\")\n    print(\"------------------------------\"\
-          )\n    print(\"gpu_toleration_added:\" + str(gpu_toleration_added))\n  \
-          \  print(\"torch.cuda.is_available():\" + str(cuda_available))\n    print(\"\
-          torch.cuda.device_count():\" + str(device_count))\n    if gpu_toleration_added\
-          \ and not torch.cuda.is_available():\n        print(\"GPU availability test:\
-          \ FAIL\")\n        raise ValueError(\"GPU toleration was added but there\
-          \ is no GPU not available for this task\")\n    if not gpu_toleration_added\
-          \ and torch.cuda.is_available():\n        print(\"GPU availability test:\
-          \ FAIL\")\n        raise ValueError(\"GPU toleration was not added but there\
-          \ is a GPU available for this task\")\n    print(\"GPU availability test:\
-          \ PASS\")\n\n"
-        image: quay.io/modh/runtime-images@sha256:cee154f6db15de27929362f91baa128fc4f79b9c1930ab0f27561174d39aadfa
-        resources:
-          accelerator:
-            count: '1'
-            type: nvidia.com/gpu
-pipelineInfo:
-  description: Verifies pipeline tasks run on GPU nodes only when tolerations are
-    added
-  name: pytorch-verify-gpu-availability
-root:
-  dag:
-    tasks:
-      verify-gpu-availability:
-        cachingOptions: {}
-        componentRef:
-          name: comp-verify-gpu-availability
-        inputs:
-          parameters:
-            gpu_toleration_added:
-              runtimeValue:
-                constant: false
-        taskInfo:
-          name: verify-gpu-availability
-      verify-gpu-availability-2:
-        cachingOptions: {}
-        componentRef:
-          name: comp-verify-gpu-availability-2
-        inputs:
-          parameters:
-            gpu_toleration_added:
-              runtimeValue:
-                constant: true
-        taskInfo:
-          name: verify-gpu-availability-2
-schemaVersion: 2.1.0
-sdkVersion: kfp-2.9.0
----
-platforms:
-  kubernetes:
-    deploymentSpec:
-      executors:
-        exec-verify-gpu-availability:
-          configMapAsEnv:
-          - configMapName: ds-pipeline-custom-env-vars
-            keyToEnv:
-            - configMapKey: pip_index_url
-              envVar: PIP_INDEX_URL
-            - configMapKey: pip_trusted_host
-              envVar: PIP_TRUSTED_HOST
-        exec-verify-gpu-availability-2:
-          configMapAsEnv:
-          - configMapName: ds-pipeline-custom-env-vars
-            keyToEnv:
-            - configMapKey: pip_index_url
-              envVar: PIP_INDEX_URL
-            - configMapKey: pip_trusted_host
-              envVar: PIP_TRUSTED_HOST
-          tolerations:
-          - effect: NoSchedule
-            key: nvidia.com/gpu
-            operator: Exists
diff --git a/ods_ci/tests/Tests/1100__data_science_pipelines/1105__data-science-pipelines-gpu-nvidia.robot b/ods_ci/tests/Tests/1100__data_science_pipelines/1105__data-science-pipelines-gpu.robot
similarity index 63%
rename from ods_ci/tests/Tests/1100__data_science_pipelines/1105__data-science-pipelines-gpu-nvidia.robot
rename to ods_ci/tests/Tests/1100__data_science_pipelines/1105__data-science-pipelines-gpu.robot
index 87526ac08..086e53afc 100644
--- a/ods_ci/tests/Tests/1100__data_science_pipelines/1105__data-science-pipelines-gpu-nvidia.robot
+++ b/ods_ci/tests/Tests/1100__data_science_pipelines/1105__data-science-pipelines-gpu.robot
@@ -4,46 +4,65 @@ Resource            ../../Resources/RHOSi.resource
 Resource            ../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource
 Resource            ../../Resources/CLI/DataSciencePipelines/DataSciencePipelinesBackend.resource
 Test Tags           DataSciencePipelines-Backend
-Suite Setup         Dsp Nvidia Gpu Suite Setup
-Suite Teardown      Dsp Nvidia Gpu Suite Teardown
+Suite Setup         Dsp Gpu Suite Setup
+Suite Teardown      Dsp Gpu Suite Teardown
 
 
 *** Variables ***
 # robocop: off=line-too-long
-${PROJECT}=    dsp-gpu-nvidia
-${PIPELINE_GPU_AVAILABILITY_FILEPATH}=    tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_verify_gpu_availability_compiled.yaml
+${PROJECT}=    dsp-gpu
+${PIPELINE_NVIDIA_GPU}=    tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability_compiled.yaml
+${PIPELINE_AMD_GPU}=    tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability_compiled.yaml
 # robocop: on=line-too-long
 
 
 *** Test Cases ***
 # robocop: off=too-long-test-case
-Verify Pipeline Tasks Run On GPU Nodes Only When Tolerations Are Added
+Verify Pipeline Tasks Run On Nvidia GPU Nodes Only When Tolerations Are Added
     [Documentation]    Runs a pipeline that tests GPU availability according to GPU tolerations in pipeline tasks:
     ...    - One task should not have GPUs available, as we don't add the GPU tolerations
     ...    - Another task should have GPUs available, as we add the GPU tolerations
     [Tags]    Tier1    Resources-GPU    NVIDIA-GPUs
 
+    Verify Pipeline Tasks Run On GPU Nodes Only When Tolerations Are Added    ${PIPELINE_NVIDIA_GPU}
+
+# robocop: off=too-long-test-case
+Verify Pipeline Tasks Run On AMD GPU Nodes Only When Tolerations Are Added
+    [Documentation]    Runs a pipeline that tests GPU availability according to GPU tolerations in pipeline tasks:
+    ...    - One task should not have GPUs available, as we don't add the GPU tolerations
+    ...    - Another task should have GPUs available, as we add the GPU tolerations
+    [Tags]    Tier1    Resources-GPU    AMD-GPUs
+
+    Verify Pipeline Tasks Run On GPU Nodes Only When Tolerations Are Added    ${PIPELINE_AMD_GPU}
+
+
+*** Keywords ***
+# robocop: off=too-long-test-case
+Verify Pipeline Tasks Run On GPU Nodes Only When Tolerations Are Added
+    [Documentation]    Runs a pipeline that tests GPU availability according to GPU tolerations in pipeline tasks:
+    ...    - One task should not have GPUs available, as we don't add the GPU tolerations
+    ...    - Another task should have GPUs available, as we add the GPU tolerations
+    [Arguments]     ${pipeline_package_path}
+
     # robocop: off=unused-variable
     ${pipeline_id}    ${pipeline_version_id}    ${pipeline_run_id}    ${experiment_id}=
     ...    DataSciencePipelinesBackend.Import Pipeline And Create Run
     ...    namespace=${PROJECT}    username=${TEST_USER.USERNAME}    password=${TEST_USER.PASSWORD}
     ...    pipeline_name=pytorch-verify-gpu-availability
     ...    pipeline_description=Verifies GPU availability in tasks when using tolerations
-    ...    pipeline_package_path=${PIPELINE_GPU_AVAILABILITY_FILEPATH}
+    ...    pipeline_package_path=${pipeline_package_path}
     ...    pipeline_run_name=pytorch-verify-gpu-availability-run
 
     DataSciencePipelinesBackend.Wait For Run Completion And Verify Status
     ...    namespace=${PROJECT}    username=${TEST_USER.USERNAME}    password=${TEST_USER.PASSWORD}
-    ...    pipeline_run_id=${pipeline_run_id}    pipeline_run_timeout=240
+    ...    pipeline_run_id=${pipeline_run_id}    pipeline_run_timeout=600
     ...    pipeline_run_expected_status=SUCCEEDED
 
     [Teardown]       DataSciencePipelinesBackend.Delete Pipeline And Related Resources
     ...    namespace=${PROJECT}    username=${TEST_USER.USERNAME}    password=${TEST_USER.PASSWORD}
     ...    pipeline_id=${pipeline_id}
 
-
-*** Keywords ***
-Dsp Nvidia Gpu Suite Setup
+Dsp Gpu Suite Setup
     [Documentation]    Dsp Acceptance Suite Setup
     RHOSi Setup
     Projects.Create Data Science Project From CLI    ${PROJECT}
@@ -56,7 +75,7 @@ Dsp Nvidia Gpu Suite Setup
     ...    dsp_version=v2
     DataSciencePipelinesBackend.Wait Until Pipeline Server Is Deployed    namespace=${PROJECT}
 
-Dsp Nvidia Gpu Suite Teardown
+Dsp Gpu Suite Teardown
     [Documentation]    Dsp Acceptance Suite Teardown
     Projects.Delete Project Via CLI By Display Name    ${PROJECT}
     RHOSi Teardown