diff --git a/cloud-service-provider/gcp/gke/terraform/README.md b/cloud-service-provider/gcp/gke/terraform/README.md new file mode 100644 index 000000000..89f53ce25 --- /dev/null +++ b/cloud-service-provider/gcp/gke/terraform/README.md @@ -0,0 +1,83 @@ +# OPEA applications Google Cloud GKE deployment guide + +This guide shows how to deploy OPEA applications on Google Cloud Platform (GCP) Google Kubernetes Engine (GKE) using Terraform. + +## Prerequisites + +- Access to GCP GKE +- [Terraform](https://developer.hashicorp.com/terraform/tutorials/aws-get-started/install-cli), [GCP CLI](https://cloud.google.com/sdk/gcloud) and [Helm](https://helm.sh/docs/helm/helm_install/) installed on your local machine. + +## Setup + +The setup uses Terraform to create GKE cluster with the following properties: + +- 1-node GKE cluster with 50 GB disk and `m7i.x8large` SPOT instance (16 vCPU and 32 GB memory) +- Cluster autoscaling up to 10 nodes +- Storage Class (SC) `efs-sc` and Persistent Volume Claim (PVC) `model-volume` for storing the model data +- `LoadBalancer` address type for the service for external consumption +- Updates the kubeconfig file for `kubectl` access + +Initialize the Terraform environment. + +```bash +terraform init +``` + +## GKE cluster + +By default, 1-node cluster is created which is suitable for running the OPEA application. See `variables.tf` and `opea-.tfvars` if you want to tune the cluster properties, e.g., number of nodes, instance types or disk size. + +## Persistent Volume Claim + +OPEA needs a volume where to store the model. For that we need to create Kubernetes Persistent Volume Claim (PVC). OPEA requires `ReadWriteMany` option since multiple pods needs access to the storage and they can be on different nodes. On GKE, only EFS supports `ReadWriteMany`. Thus, each OPEA application below uses the file `GKE-efs-csi-pvc.yaml` to create PVC in its namespace. + +## OPEA Applications + +### ChatQnA + +Use the commands below to create GKE cluster. + +```bash +terraform plan --var-file opea-chatqna.tfvars -out opea-chatqna.plan +terraform apply "opea-chatqna.plan" +``` + +Once the cluster is ready, the kubeconfig file to access the new cluster is updated automatically. By default, the file is `~/.kube/config`. + +Now you should have access to the cluster via the `kubectl` command. + +Deploy ChatQnA Application with Helm + +```bash +helm install -n chatqna --create-namespace chatqna oci://ghcr.io/opea-project/charts/chatqna --set service.type=LoadBalancer --set global.modelUsePVC=model-volume --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} +``` + +Create the PVC as mentioned [above](#-persistent-volume-claim) + +```bash +kubectl apply -f GKE-efs-csi-pvc.yaml -n chatqna +``` + +After a while, the OPEA application should be running. You can check the status via `kubectl`. + +```bash +kubectl get pod -n chatqna +``` + +You can now start using the OPEA application. + +```bash +OPEA_SERVICE=$(kubectl get svc -n chatqna chatqna -ojsonpath='{.status.loadBalancer.ingress[0].hostname}') +curl http://${OPEA_SERVICE}:8888/v1/chatqna \ + -H "Content-Type: application/json" \ + -d '{"messages": "What is the revenue of Nike in 2023?"}' +``` + +Cleanup + +Delete the cluster via the following command. + +```bash +helm uninstall -n chatqna chatqna +terraform destroy -var-file opea-chatqna.tfvars +``` diff --git a/cloud-service-provider/gcp/gke/terraform/custom-values.yaml b/cloud-service-provider/gcp/gke/terraform/custom-values.yaml new file mode 100644 index 000000000..2b001b1d5 --- /dev/null +++ b/cloud-service-provider/gcp/gke/terraform/custom-values.yaml @@ -0,0 +1,12 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +tgi: + podAnnotations: + gke-gcsfuse/volumes: "true" +tei: + podAnnotations: + gke-gcsfuse/volumes: "true" +teirerank: + podAnnotations: + gke-gcsfuse/volumes: "true" diff --git a/cloud-service-provider/gcp/gke/terraform/main.tf b/cloud-service-provider/gcp/gke/terraform/main.tf new file mode 100644 index 000000000..0493968e1 --- /dev/null +++ b/cloud-service-provider/gcp/gke/terraform/main.tf @@ -0,0 +1,157 @@ +data "google_client_config" "default" {} +data "google_project" "current" { project_id = var.project_id } + +provider "kubernetes" { + host = "https://${module.gke.endpoint}" + token = data.google_client_config.default.access_token + cluster_ca_certificate = base64decode(module.gke.ca_certificate) +} + +provider "helm" { + kubernetes { + host = "https://${module.gke.endpoint}" + token = data.google_client_config.default.access_token + cluster_ca_certificate = base64decode(module.gke.ca_certificate) + } +} +resource "google_compute_firewall" "default" { + #count = var.firewall ? 1 : 0 + name = "${var.cluster_name}-firewall" + network = google_compute_network.default.name + + deny { + protocol = "tcp" + ports = ["20-22", "3389"] + } + #target_tags = [ "${var.cluster_name}-firewall" ] + source_ranges = ["0.0.0.0/0"] +} + +resource "google_compute_network" "default" { + name = "standalone" + auto_create_subnetworks = false +} + +resource "google_compute_subnetwork" "default" { + name = "example-subnetwork" + region = var.region + ip_cidr_range = "10.0.0.0/16" + stack_type = "IPV4_ONLY" + + network = google_compute_network.default.id + secondary_ip_range { + range_name = "services-range" + ip_cidr_range = "192.168.0.0/24" + } + + secondary_ip_range { + range_name = "pod-ranges" + ip_cidr_range = "172.16.0.0/12" + } +} + +module "gke" { + source = "terraform-google-modules/kubernetes-engine/google" + version = "34.0.0" + project_id = var.project_id + name = var.cluster_name + region = var.region + kubernetes_version = var.cluster_version + network = google_compute_network.default.name + subnetwork = google_compute_subnetwork.default.name + ip_range_pods = google_compute_subnetwork.default.secondary_ip_range[1].range_name + ip_range_services = google_compute_subnetwork.default.secondary_ip_range[0].range_name + gcs_fuse_csi_driver = true + deletion_protection = false + remove_default_node_pool = true + node_pools = var.cpu_pool + + node_pools_oauth_scopes = { + all = [ + "https://www.googleapis.com/auth/cloud-platform", + "https://www.googleapis.com/auth/logging.write", + "https://www.googleapis.com/auth/monitoring", + "https://www.googleapis.com/auth/service.management.readonly", + "https://www.googleapis.com/auth/servicecontrol", + ] + } +} + +resource "null_resource" "kubectl" { + provisioner "local-exec" { + command = "gcloud container clusters get-credentials ${var.cluster_name} --region ${var.region}" + } + depends_on = [ module.gke ] +} + +resource "kubernetes_namespace" "opea_app" { + metadata { + name = var.namespace + } +} + +resource "kubernetes_service_account" "opea_gcs_sa" { + metadata { + name = "opea-gcs-sa" + namespace = var.namespace + } + depends_on = [kubernetes_namespace.opea_app] +} + +resource "google_storage_bucket" "model" { + name = "${var.gcs_bucket_name}" + location = var.gcs_bucket_location + force_destroy = true + + uniform_bucket_level_access = true +} + +resource "google_storage_bucket_iam_binding" "opea_gcs_sa_binding" { + bucket = google_storage_bucket.model.name + role = "roles/storage.objectUser" + members = [ + # FIXME: we can't use the SA we created due to #532 + # "principal://iam.googleapis.com/projects/${data.google_project.current.number}/locations/global/workloadIdentityPools/${data.google_project.current.project_id}.svc.id.goog/subject/ns/${kubernetes_service_account.opea_gcs_sa.metadata[0].namespace}/sa/${kubernetes_service_account.opea_gcs_sa.metadata[0].name}", + "principal://iam.googleapis.com/projects/${data.google_project.current.number}/locations/global/workloadIdentityPools/${data.google_project.current.project_id}.svc.id.goog/subject/ns/${kubernetes_service_account.opea_gcs_sa.metadata[0].namespace}/sa/default", + ] + depends_on = [kubernetes_service_account.opea_gcs_sa] +} + +resource "kubernetes_persistent_volume_claim" "model" { + metadata { + name = "model-volume" + namespace = var.namespace + } + spec { + storage_class_name = "dummy" + access_modes = ["ReadWriteMany"] + resources { + requests = { + storage = "50Gi" + } + } + volume_name = "${kubernetes_persistent_volume.model.metadata.0.name}" + } + depends_on = [ null_resource.kubectl ] +} + +resource "kubernetes_persistent_volume" "model" { + metadata { + name = "opea-model-pv" + } + spec { + capacity = { + storage = "50Gi" + } + storage_class_name = "dummy" + access_modes = ["ReadWriteMany"] + persistent_volume_source { + csi { + driver = "gcsfuse.csi.storage.gke.io" + volume_handle = google_storage_bucket.model.name + } + } + mount_options = [ "implicit-dirs", "uid=1000", "gid=1000" ] + } + depends_on = [ null_resource.kubectl ] +} diff --git a/cloud-service-provider/gcp/gke/terraform/opea-chatqna.tfvars b/cloud-service-provider/gcp/gke/terraform/opea-chatqna.tfvars new file mode 100644 index 000000000..cb2828c57 --- /dev/null +++ b/cloud-service-provider/gcp/gke/terraform/opea-chatqna.tfvars @@ -0,0 +1,15 @@ +#hf_token = "" +project_id = "service-mesh-296815" +region = "europe-west4" +cluster_name = "opea" +app_name = "chatqna" +namespace = "chatqna" +cpu_pool = [ { + name: "cpu-pool" + machine_type: "c4-standard-32" + autoscaling: false + min_count: 1 + max_count: 5 + disk_size_gb: 100 + disk_type: "hyperdisk-balanced" +} ] \ No newline at end of file diff --git a/cloud-service-provider/gcp/gke/terraform/terraform.tf b/cloud-service-provider/gcp/gke/terraform/terraform.tf new file mode 100644 index 000000000..bd24835d0 --- /dev/null +++ b/cloud-service-provider/gcp/gke/terraform/terraform.tf @@ -0,0 +1,11 @@ +terraform { + required_providers { + google = { + source = "hashicorp/google" + } + kubernetes = { + source = "hashicorp/kubernetes" + } + } + required_version = ">= 0.13" +} \ No newline at end of file diff --git a/cloud-service-provider/gcp/gke/terraform/variables.tf b/cloud-service-provider/gcp/gke/terraform/variables.tf new file mode 100644 index 000000000..f8b7a63ed --- /dev/null +++ b/cloud-service-provider/gcp/gke/terraform/variables.tf @@ -0,0 +1,109 @@ +variable "hf_token" { + description = "Hugginface API token" + type = string +} + +variable "project_id" { + description = "Google Cloud PROJECT_ID" + type = string +} + +variable "region" { + description = "Google Cloud region" + type = string + default = "europe-west1" +} + +variable "zone" { + description = "Google zone" + type = string + default = "a" +} + +variable "cluster_name" { + description = "GKE cluster name" + type = string + default = null +} + +variable "cluster_version" { + description = "GKE cluster version" + type = string + default = "1.31" +} + +variable "firewall" { + description = "GKE firewall" + type = bool + default = false +} + +variable "firewall_ports" { + description = "GKE firewall ports" + type = list(string) + default = null +} + +variable "namespace" { + description = "OPEA application namespace" + type = string + default = "default" +} + +variable "app_name" { + description = "OPEA application name" + type = string +} + +variable "cpu_pool" { + type = list(map(any)) +} + +variable "disk_size" { + description = "Disk size in GiB for nodes." + type = number + default = 20 +} + +variable "capacity_type" { + description = "EC2 spot or on-demad instance types" + type = string + default = "ON_DEMAND" +} + +variable "min_size" { + description = "min size" + type = number + default = 1 +} + +variable "max_size" { + description = "max size" + type = number + default = 10 +} + +variable "desired_size" { + description = "desired size" + type = number + default = 1 +} + +variable "compute_engine_service_account" { + description = "SA for managing the nodes" + type = string + default = null +} + +variable "gcs_bucket_name" { + description = "Bucket name for storing model data" + type = string + default = "opea-models" +} + +variable "gcs_bucket_location" { + description = "Bucket location" + type = string + default = "EU" +} +