opea-project · poussa · Nov 8, 2024 · Nov 8, 2024 · Mar 3, 2025 · Apr 14, 2025
@@ -0,0 +1,83 @@
+# OPEA applications Google Cloud GKE deployment guide
+
+This guide shows how to deploy OPEA applications on Google Cloud Platform (GCP) Google Kubernetes Engine (GKE) using Terraform.
+
+## Prerequisites
+
+- Access to GCP GKE
+- [Terraform](https://developer.hashicorp.com/terraform/tutorials/aws-get-started/install-cli), [GCP CLI](https://cloud.google.com/sdk/gcloud) and [Helm](https://helm.sh/docs/helm/helm_install/) installed on your local machine.
+
+## Setup
+
+The setup uses Terraform to create GKE cluster with the following properties:
+
+- 1-node GKE cluster with 50 GB disk and `m7i.x8large` SPOT instance (16 vCPU and 32 GB memory)
+- Cluster autoscaling up to 10 nodes
+- Storage Class (SC) `efs-sc` and Persistent Volume Claim (PVC) `model-volume` for storing the model data
+- `LoadBalancer` address type for the service for external consumption
+- Updates the kubeconfig file for `kubectl` access
+
+Initialize the Terraform environment.
+
+```bash
+terraform init
+```
+
+## GKE cluster
+
+By default, 1-node cluster is created which is suitable for running the OPEA application. See `variables.tf` and `opea-<application-name>.tfvars` if you want to tune the cluster properties, e.g., number of nodes, instance types or disk size.
+
+## Persistent Volume Claim
+
+OPEA needs a volume where to store the model. For that we need to create Kubernetes Persistent Volume Claim (PVC). OPEA requires `ReadWriteMany` option since multiple pods needs access to the storage and they can be on different nodes. On GKE, only EFS supports `ReadWriteMany`. Thus, each OPEA application below uses the file `GKE-efs-csi-pvc.yaml` to create PVC in its namespace.
+
+## OPEA Applications
+
+### ChatQnA
+
+Use the commands below to create GKE cluster.
+
+```bash
+terraform plan --var-file opea-chatqna.tfvars -out opea-chatqna.plan
+terraform apply "opea-chatqna.plan"
+```
+
+Once the cluster is ready, the kubeconfig file to access the new cluster is updated automatically. By default, the file is `~/.kube/config`.
+
+Now you should have access to the cluster via the `kubectl` command.
+
+Deploy ChatQnA Application with Helm
+
+```bash
+helm install -n chatqna --create-namespace chatqna oci://ghcr.io/opea-project/charts/chatqna --set service.type=LoadBalancer --set global.modelUsePVC=model-volume --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN}
+```
+
+Create the PVC as mentioned [above](#-persistent-volume-claim)
+
+```bash
+kubectl apply -f GKE-efs-csi-pvc.yaml -n chatqna
+```
+
+After a while, the OPEA application should be running. You can check the status via `kubectl`.
+
+```bash
+kubectl get pod -n chatqna
+```
+
+You can now start using the OPEA application.
+
+```bash
+OPEA_SERVICE=$(kubectl get svc -n chatqna chatqna -ojsonpath='{.status.loadBalancer.ingress[0].hostname}')
+curl http://${OPEA_SERVICE}:8888/v1/chatqna \
+    -H "Content-Type: application/json" \
+    -d '{"messages": "What is the revenue of Nike in 2023?"}'
+```
+
+Cleanup
+
+Delete the cluster via the following command.
+
+```bash
+helm uninstall -n chatqna chatqna
+terraform destroy -var-file opea-chatqna.tfvars
+```
@@ -0,0 +1,12 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+tgi:
+  podAnnotations:
+    gke-gcsfuse/volumes: "true"
+tei:
+  podAnnotations:
+    gke-gcsfuse/volumes: "true"
+teirerank:
+  podAnnotations:
+    gke-gcsfuse/volumes: "true"
@@ -0,0 +1,157 @@
+data "google_client_config" "default" {}
+data "google_project" "current" { project_id = var.project_id }
+
+provider "kubernetes" {
+  host                   = "https://${module.gke.endpoint}"
+  token                  = data.google_client_config.default.access_token
+  cluster_ca_certificate = base64decode(module.gke.ca_certificate)
+}
+
+provider "helm" {
+  kubernetes {
+    host                   = "https://${module.gke.endpoint}"
+    token                  = data.google_client_config.default.access_token
+    cluster_ca_certificate = base64decode(module.gke.ca_certificate)    
+  }
+}
+resource "google_compute_firewall" "default" {
+  #count    = var.firewall ? 1 : 0
+  name    = "${var.cluster_name}-firewall"
+  network = google_compute_network.default.name
+
+  deny {
+    protocol = "tcp"
+    ports    = ["20-22", "3389"]
+  }
+  #target_tags = [ "${var.cluster_name}-firewall" ]
+  source_ranges = ["0.0.0.0/0"]
+}
+
+resource "google_compute_network" "default" {
+  name                    = "standalone"
+  auto_create_subnetworks = false
+}
+
+resource "google_compute_subnetwork" "default" {
+  name          = "example-subnetwork"
+  region        = var.region
+  ip_cidr_range = "10.0.0.0/16"
+  stack_type    = "IPV4_ONLY"
+
+  network = google_compute_network.default.id
+  secondary_ip_range {
+    range_name    = "services-range"
+    ip_cidr_range = "192.168.0.0/24"
+  }
+
+  secondary_ip_range {
+    range_name    = "pod-ranges"
+    ip_cidr_range = "172.16.0.0/12"
+  }
+}
+
+module "gke" {
+  source                   = "terraform-google-modules/kubernetes-engine/google"
+  version                  = "34.0.0"
+  project_id               = var.project_id
+  name                     = var.cluster_name
+  region                   = var.region
+  kubernetes_version       = var.cluster_version
+  network                  = google_compute_network.default.name
+  subnetwork               = google_compute_subnetwork.default.name
+  ip_range_pods            = google_compute_subnetwork.default.secondary_ip_range[1].range_name
+  ip_range_services        = google_compute_subnetwork.default.secondary_ip_range[0].range_name
+  gcs_fuse_csi_driver      = true
+  deletion_protection      = false
+  remove_default_node_pool = true
+  node_pools               = var.cpu_pool
+
+  node_pools_oauth_scopes = {
+    all = [
+      "https://www.googleapis.com/auth/cloud-platform",
+      "https://www.googleapis.com/auth/logging.write",
+      "https://www.googleapis.com/auth/monitoring",
+      "https://www.googleapis.com/auth/service.management.readonly",
+      "https://www.googleapis.com/auth/servicecontrol",
+    ]
+  }
+}
+
+resource "null_resource" "kubectl" {
+  provisioner "local-exec" {
+    command = "gcloud container clusters get-credentials ${var.cluster_name} --region ${var.region}"
+  }
+  depends_on = [ module.gke ]
+}
+
+resource "kubernetes_namespace" "opea_app" {
+  metadata {
+    name = var.namespace
+  }
+}
+
+resource "kubernetes_service_account" "opea_gcs_sa" {
+  metadata {
+    name = "opea-gcs-sa"
+    namespace = var.namespace
+  }
+  depends_on = [kubernetes_namespace.opea_app]
+}
+
+resource "google_storage_bucket" "model" {
+  name          = "${var.gcs_bucket_name}"
+  location      = var.gcs_bucket_location
+  force_destroy = true
+
+  uniform_bucket_level_access = true
+}
+
+resource "google_storage_bucket_iam_binding" "opea_gcs_sa_binding" {
+  bucket = google_storage_bucket.model.name
+  role = "roles/storage.objectUser"
+  members = [
+  # FIXME: we can't use the SA we created due to #532
+  #  "principal://iam.googleapis.com/projects/${data.google_project.current.number}/locations/global/workloadIdentityPools/${data.google_project.current.project_id}.svc.id.goog/subject/ns/${kubernetes_service_account.opea_gcs_sa.metadata[0].namespace}/sa/${kubernetes_service_account.opea_gcs_sa.metadata[0].name}",
+    "principal://iam.googleapis.com/projects/${data.google_project.current.number}/locations/global/workloadIdentityPools/${data.google_project.current.project_id}.svc.id.goog/subject/ns/${kubernetes_service_account.opea_gcs_sa.metadata[0].namespace}/sa/default",
+  ]
+  depends_on = [kubernetes_service_account.opea_gcs_sa]
+}
+
+resource "kubernetes_persistent_volume_claim" "model" {
+  metadata {
+    name = "model-volume"
+    namespace = var.namespace
+  }
+  spec {
+    storage_class_name = "dummy"
+    access_modes = ["ReadWriteMany"]
+    resources {
+      requests = {
+        storage = "50Gi"
+      }
+    }
+    volume_name = "${kubernetes_persistent_volume.model.metadata.0.name}"
+  }
+  depends_on = [ null_resource.kubectl ]
+}
+
+resource "kubernetes_persistent_volume" "model" {
+  metadata {
+    name = "opea-model-pv"
+  }
+  spec {
+    capacity = {
+      storage = "50Gi"
+    }
+    storage_class_name = "dummy"
+    access_modes = ["ReadWriteMany"]
+    persistent_volume_source {
+      csi {
+        driver = "gcsfuse.csi.storage.gke.io"
+        volume_handle = google_storage_bucket.model.name
+      }
+    }
+    mount_options = [ "implicit-dirs", "uid=1000", "gid=1000" ]
+  }
+  depends_on = [ null_resource.kubectl ]
+}
@@ -0,0 +1,15 @@
+#hf_token = ""
+project_id = "service-mesh-296815"
+region = "europe-west4"
+cluster_name = "opea"
+app_name = "chatqna"
+namespace = "chatqna"
+cpu_pool = [ {
+  name: "cpu-pool"
+  machine_type: "c4-standard-32"
+  autoscaling: false
+  min_count: 1
+  max_count: 5
+  disk_size_gb: 100
+  disk_type: "hyperdisk-balanced"
+} ]
@@ -0,0 +1,11 @@
+terraform {
+  required_providers {
+    google = {
+      source = "hashicorp/google"
+    }
+    kubernetes = {
+      source = "hashicorp/kubernetes"
+    }
+  }
+  required_version = ">= 0.13"
+}
@@ -0,0 +1,109 @@
+variable "hf_token" {
+  description = "Hugginface API token"
+  type        = string
+}
+
+variable "project_id" {
+  description = "Google Cloud PROJECT_ID"
+  type        = string
+}
+
+variable "region" {
+  description = "Google Cloud region"
+  type        = string
+  default     = "europe-west1"
+}
+
+variable "zone" {
+  description = "Google zone"
+  type        = string
+  default     = "a"
+}
+
+variable "cluster_name" {
+  description = "GKE cluster name"
+  type        = string
+  default     = null
+}
+
+variable "cluster_version" {
+  description = "GKE cluster version"
+  type        = string
+  default     = "1.31"
+}
+
+variable "firewall" {
+  description = "GKE firewall"
+  type        = bool
+  default     = false
+}
+
+variable "firewall_ports" {
+  description = "GKE firewall ports"
+  type        = list(string)
+  default     = null
+}
+
+variable "namespace" {
+  description = "OPEA application namespace"
+  type        = string
+  default     = "default"
+}
+
+variable "app_name" {
+  description = "OPEA application name"
+  type        = string
+}
+
+variable "cpu_pool" {
+  type = list(map(any))
+}
+
+variable "disk_size" {
+  description = "Disk size in GiB for nodes."
+  type        = number
+  default     = 20
+}
+
+variable "capacity_type" {
+  description = "EC2 spot or on-demad instance types"
+  type        = string
+  default     = "ON_DEMAND"
+}
+
+variable "min_size" {
+  description = "min size"
+  type        = number
+  default     = 1
+}
+
+variable "max_size" {
+  description = "max size"
+  type        = number
+  default     = 10
+}
+
+variable "desired_size" {
+  description = "desired size"
+  type        = number
+  default     = 1
+}
+
+variable "compute_engine_service_account" {
+  description = "SA for managing the nodes"
+  type = string
+  default = null
+}
+
+variable "gcs_bucket_name" {
+  description = "Bucket name for storing model data"
+  type = string
+  default = "opea-models"
+}
+
+variable "gcs_bucket_location" {
+  description = "Bucket location"
+  type = string
+  default = "EU"
+}
+