From 60b612187c876694720c7c24427a815053da74b5 Mon Sep 17 00:00:00 2001 From: Steven Dake Date: Thu, 22 Aug 2024 18:10:00 +0000 Subject: [PATCH] Add istiod service mesh The goal here is to connect a public cloud DNS entry to virtual machines in my home lab. The configuration is derived from the Istio documentation (multi-network, single cluster, automatic worklaodentry creation) I am not sure why `run-curl.sh` doesn't complete. The virtual machines are added to the mesh: ``` sdake@a40x2:~$ istioctl proxy-status NAME CLUSTER CDS LDS EDS RDS ECDS ISTIOD VERSION a40x2.vllm cluster1 SYNCED SYNCED SYNCED SYNCED istiod-67f89ccbd9-5slrc 1.23.0 istio-eastwestgateway-6cb57b485f-f76pv.istio-ingress cluster1 SYNCED SYNCED SYNCED istiod-67f89ccbd9-5slrc 1.23.0 istio-ingress-7bd5b47574-zs548.istio-ingress cluster1 SYNCED SYNCED SYNCED SYNCED istiod-67f89ccbd9-5slrc 1.23.0 ``` The a40x2.vllm proxy should map to the DNS name vllm.vllm. I have another node not currently shown here (a30x2). This configuration doesn't quite setup automatic workload entry creation. to do that, some environment varibales need to be set. The workflow is: ``` bash generate-istio-manifests.sh kubectl apply -f istio-ns.yaml kubectl apply -f istio-base.yaml kubectl apply -f istio-istiod-mutli.yaml kubectl apply -f istio-gateway-mutli.yaml kubectl apply -f istio-gateway-eastwest-mutli.yaml kubectl apply -f meta.yaml kubectl apply -f expose-istiod.yaml bash install-vm-files.sh bash run-curl.sh ``` This run-curl.sh operation should generate some output, but currently fails. I am not sure if the failure is a misconfiguration, related to a problem with the eastwest gateway and ingress gateway routing, or a defect in istio. --- .../terraform/03_servicemesh/accesslogs.yaml | 10 ++ platform/terraform/03_servicemesh/bridge.yaml | 37 ++++++ .../03_servicemesh/expose-istiod.yaml | 59 ++++++++++ .../03_servicemesh/expose-services.yaml | 20 ++++ .../generate-istio-manifests.sh | 28 +++++ .../03_servicemesh/install-vm-files.sh | 28 +++++ .../terraform/03_servicemesh/istio-ns.yaml | 12 ++ platform/terraform/03_servicemesh/meta.yaml | 107 ++++++++++++++++++ .../precheck-check-jwt-enabled.sh | 7 ++ platform/terraform/03_servicemesh/run-curl.sh | 10 ++ .../terraform/03_servicemesh/values-base.yaml | 1 + .../values-gateway-eastwest-multi.yaml | 26 +++++ .../03_servicemesh/values-gateway-multi.yaml | 15 +++ .../03_servicemesh/values-istiod-multi.yaml | 14 +++ .../03_servicemesh/workloadgroup.yaml | 21 ++++ 15 files changed, 395 insertions(+) create mode 100644 platform/terraform/03_servicemesh/accesslogs.yaml create mode 100644 platform/terraform/03_servicemesh/bridge.yaml create mode 100644 platform/terraform/03_servicemesh/expose-istiod.yaml create mode 100644 platform/terraform/03_servicemesh/expose-services.yaml create mode 100644 platform/terraform/03_servicemesh/generate-istio-manifests.sh create mode 100644 platform/terraform/03_servicemesh/install-vm-files.sh create mode 100644 platform/terraform/03_servicemesh/istio-ns.yaml create mode 100644 platform/terraform/03_servicemesh/meta.yaml create mode 100644 platform/terraform/03_servicemesh/precheck-check-jwt-enabled.sh create mode 100644 platform/terraform/03_servicemesh/run-curl.sh create mode 100644 platform/terraform/03_servicemesh/values-base.yaml create mode 100644 platform/terraform/03_servicemesh/values-gateway-eastwest-multi.yaml create mode 100644 platform/terraform/03_servicemesh/values-gateway-multi.yaml create mode 100644 platform/terraform/03_servicemesh/values-istiod-multi.yaml create mode 100644 platform/terraform/03_servicemesh/workloadgroup.yaml diff --git a/platform/terraform/03_servicemesh/accesslogs.yaml b/platform/terraform/03_servicemesh/accesslogs.yaml new file mode 100644 index 0000000..423465f --- /dev/null +++ b/platform/terraform/03_servicemesh/accesslogs.yaml @@ -0,0 +1,10 @@ +apiVersion: telemetry.istio.io/v1 +kind: Telemetry +metadata: + name: mesh-default + namespace: istio-system +spec: + accessLogging: + - providers: + - name: envoy + diff --git a/platform/terraform/03_servicemesh/bridge.yaml b/platform/terraform/03_servicemesh/bridge.yaml new file mode 100644 index 0000000..0e62d95 --- /dev/null +++ b/platform/terraform/03_servicemesh/bridge.yaml @@ -0,0 +1,37 @@ +### +# +# I couldn't get this to work. The jist was to forward servicees that arrive on ingress-gateway +# to the eastwest-gateway. Just storing in git for now as a reference for later. + +apiVersion: networking.istio.io/v1 +kind: Gateway +metadata: + name: eastwest-gateway +spec: + selector: + istio: eastwest-gateway + servers: + - port: + number: 8080 + name: http + protocol: HTTP + hosts: + - "*" +--- +apiVersion: networking.istio.io/v1 +kind: VirtualService +metadata: + name: forward-to-eastwest +spec: + hosts: + - "*" + gateways: + - ingress-gateway + http: + - match: + - port: 8080 + route: + - destination: + host: eastwest-gateway + port: + number: 8080 diff --git a/platform/terraform/03_servicemesh/expose-istiod.yaml b/platform/terraform/03_servicemesh/expose-istiod.yaml new file mode 100644 index 0000000..1681e1f --- /dev/null +++ b/platform/terraform/03_servicemesh/expose-istiod.yaml @@ -0,0 +1,59 @@ +### +# +# Provides a gateway to connect to istiod.istio-system from virtual machines. +# Direct from the samples directory in istio. + +apiVersion: networking.istio.io/v1alpha3 +kind: Gateway +metadata: + name: istiod-gateway +spec: + selector: + istio: eastwestgateway + servers: + - port: + name: tls-istiod + number: 15012 + protocol: tls + tls: + mode: PASSTHROUGH + hosts: + - "*" + - port: + name: tls-istiodwebhook + number: 15017 + protocol: tls + tls: + mode: PASSTHROUGH + hosts: + - "*" +--- +apiVersion: networking.istio.io/v1alpha3 +kind: VirtualService +metadata: + name: istiod-vs +spec: + hosts: + - "*" + gateways: + - istiod-gateway + tls: + - match: + - port: 15012 + sniHosts: + - "*" + route: + - destination: + host: istiod.istio-system.svc.cluster.local + port: + number: 15012 + - match: + - port: 15017 + sniHosts: + - "*" + route: + - destination: + host: istiod.istio-system.svc.cluster.local + port: + number: 443 + diff --git a/platform/terraform/03_servicemesh/expose-services.yaml b/platform/terraform/03_servicemesh/expose-services.yaml new file mode 100644 index 0000000..5ae16eb --- /dev/null +++ b/platform/terraform/03_servicemesh/expose-services.yaml @@ -0,0 +1,20 @@ +### +# +# Unclear what this does. + +apiVersion: networking.istio.io/v1alpha3 +kind: Gateway +metadata: + name: cross-network-gateway +spec: + selector: + istio: eastwestgateway + servers: + - port: + number: 15443 + name: tls + protocol: TLS + tls: + mode: AUTO_PASSTHROUGH + hosts: + - "*.local" diff --git a/platform/terraform/03_servicemesh/generate-istio-manifests.sh b/platform/terraform/03_servicemesh/generate-istio-manifests.sh new file mode 100644 index 0000000..56746af --- /dev/null +++ b/platform/terraform/03_servicemesh/generate-istio-manifests.sh @@ -0,0 +1,28 @@ +### +# +# This was tested with Istio 1.22.3 +# +# curl -LO https://github.com/istio/istio/releases/download/1.17.1/istio-1.22.3-linux-amd64.tar.gz + + +### +# +# ambient mode does not work with virtual machines +# ambient mode appears to require ztunnel +# to enable ambient mode, use `--set profile=ambient` with `istio-cni` and `istiod`. Add ztunnel. +# helm template ztunnel istio/ztunnel --namespace istio-system > istio-ztunnel.yaml + + +### +# +# Helm is currently recommended upstream. +# use `helm template` to create a record of manifests. + +#helm repo add istio https://istio-release.storage.googleapis.com/charts +#helm repo update +#helm template istio-cni istio/cni --namespace istio-system > istio-cni.yaml + +helm template istio-base istio/base --namespace istio-system --include-crds --values values-base.yaml > istio-base.yaml +helm template istiod istio/istiod --namespace istio-system --values values-istiod-multi.yaml > istio-istiod-multi.yaml +helm template istio-ingress istio/gateway --namespace istio-ingress --values values-gateway-multi.yaml > istio-gateway-multi.yaml +helm template istio-ingress istio/gateway --namespace istio-ingress --values values-gateway-eastwest-multi.yaml > istio-gateway-eastwest-multi.yaml diff --git a/platform/terraform/03_servicemesh/install-vm-files.sh b/platform/terraform/03_servicemesh/install-vm-files.sh new file mode 100644 index 0000000..c5767be --- /dev/null +++ b/platform/terraform/03_servicemesh/install-vm-files.sh @@ -0,0 +1,28 @@ +### +# +# Make sure the service istio-eastwestgateway has an assigned external ip. +# Multi-network, automatic workloadentry creation: +# https://istio.io/latest/docs/setup/install/virtual-machine/ + +### +# +# This creates files to install on the virtual machine. + +mkdir -p "$(pwd)/vm-files" +ingress_ip=$(kubectl get svc istio-eastwestgateway -n istio-ingress -o jsonpath='{.status.loadBalancer.ingress[0].ip}') +istioctl x workload entry configure -f workloadgroup.yaml -o "$(pwd)/vm-files" --clusterID "cluster1" --ingressIP "${ingress_ip}" + +### +# +# This needs to be run on the virtual machine. + +sudo systemctl stop istio +sudo rm -rf /etc/certs/* +sudo mkdir -p /etc/certs +sudo cp vm-files/mesh.yaml /etc/istio/config/mesh +sudo cp vm-files/root-cert.pem /etc/certs/root-cert.pem +sudo cp vm-files/cluster.env /var/lib/istio/envoy/cluster.env +sudo cp vm-files/istio-token /var/run/secrets/tokens/istio-token +sudo mkdir -p /etc/istio/proxy +sudo chown -R istio-proxy /var/lib/istio /etc/certs /etc/istio/proxy /etc/istio/config /var/run/secrets /etc/certs +sudo systemctl start istio diff --git a/platform/terraform/03_servicemesh/istio-ns.yaml b/platform/terraform/03_servicemesh/istio-ns.yaml new file mode 100644 index 0000000..bf204ab --- /dev/null +++ b/platform/terraform/03_servicemesh/istio-ns.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: istio-system + # labels: + #topology.istio.io/network=kubenetwork +--- +apiVersion: v1 +kind: Namespace +metadata: + name: istio-ingress diff --git a/platform/terraform/03_servicemesh/meta.yaml b/platform/terraform/03_servicemesh/meta.yaml new file mode 100644 index 0000000..204bba2 --- /dev/null +++ b/platform/terraform/03_servicemesh/meta.yaml @@ -0,0 +1,107 @@ +### +# +# The goal of this manifest is to +# 1. provide a workloadgroup for vllm serverless operations +# 2. provide a vllm.vllm dns resolved name that maps to the virtual machines +# 3. automatically healthcheck vllm.vllm on port 8000 +# 4. forward all traffic from ingressgateway to vllm.vllm. This last part doesn't appear to work. + +--- +apiVersion: networking.istio.io/v1 +kind: WorkloadGroup +metadata: + name: vllm + namespace: vllm +spec: + metadata: + labels: + app: vllm + template: + ports: + http: 8000 + serviceAccount: default + network: vmnetwork + probe: + periodSeconds: 5 + initialDelaySeconds: 1 + httpGet: + port: 8000 + path: /health +--- +apiVersion: networking.istio.io/v1 +kind: ServiceEntry +metadata: + name: vllm + namespace: vllm + labels: + app: vllm +spec: + hosts: + - vllm + location: MESH_INTERNAL + ports: + - name: http + number: 8000 + protocol: http + resolution: STATIC + workloadSelector: + labels: + app: vllm +--- +apiVersion: networking.istio.io/v1 +kind: VirtualService +metadata: + name: vllm + namespace: vllm + labels: + app: vllm +spec: + hosts: + - "*" + gateways: + - gateway + http: + - match: + - uri: + prefix: /health + route: + - destination: + port: + number: 8000 + host: vllm +--- +apiVersion: networking.istio.io/v1 +kind: Gateway +metadata: + name: gateway + namespace: vllm +spec: + selector: + istio: ingressgateway + servers: + - port: + number: 80 + name: http + protocol: HTTP + hosts: + - "*" +#--- +# I am not currently using this +# +#apiVersion: networking.istio.io/v1 +#kind: DestinationRule +#metadata: +# name: vllm-dr +# namespace: vllm +# labels: +# app: foo +#spec: +# host: vm1 +# trafficPolicy: +# loadBalancer: +# simple: ROUND_ROBIN + # subsets: + # - name: v1 + # labels: + # version: v1 +--- diff --git a/platform/terraform/03_servicemesh/precheck-check-jwt-enabled.sh b/platform/terraform/03_servicemesh/precheck-check-jwt-enabled.sh new file mode 100644 index 0000000..c7e67a0 --- /dev/null +++ b/platform/terraform/03_servicemesh/precheck-check-jwt-enabled.sh @@ -0,0 +1,7 @@ +### +# +# Check for third-party jwt token support +# +# https://istio.io/latest/docs/ops/best-practices/security/#configure-third-party-service-account-tokens + +kubectl get --raw /api/v1 | jq '.resources[] | select(.name | index("serviceaccounts/token"))' diff --git a/platform/terraform/03_servicemesh/run-curl.sh b/platform/terraform/03_servicemesh/run-curl.sh new file mode 100644 index 0000000..b554109 --- /dev/null +++ b/platform/terraform/03_servicemesh/run-curl.sh @@ -0,0 +1,10 @@ +ingress_ip=$(kubectl get svc -n istio-ingress istio-ingress -o jsonpath='{.status.loadBalancer.ingress[0].ip}') +curl -v http://${ingress_ip}:80/ +#curl -v -H "Host: httpbin.example.com" http://${ingress_ip}:80/health +#curl -v -H "Host: vllm" http://${ingress_ip}:80 +#curl -v http://${ingress_ip}:80 +echo "This is the ingress gateway --------------------->" + +kubectl logs -n istio-ingress -l istio=ingressgateway +echo "This is the eastwest gateway --------------------->" +kubectl logs -n istio-ingress -l istio=eastwestgateway diff --git a/platform/terraform/03_servicemesh/values-base.yaml b/platform/terraform/03_servicemesh/values-base.yaml new file mode 100644 index 0000000..07718d8 --- /dev/null +++ b/platform/terraform/03_servicemesh/values-base.yaml @@ -0,0 +1 @@ +#revision: v1-22-3 diff --git a/platform/terraform/03_servicemesh/values-gateway-eastwest-multi.yaml b/platform/terraform/03_servicemesh/values-gateway-eastwest-multi.yaml new file mode 100644 index 0000000..b2db614 --- /dev/null +++ b/platform/terraform/03_servicemesh/values-gateway-eastwest-multi.yaml @@ -0,0 +1,26 @@ +# labels.topology.istio.io/network: vmnetwork +name: istio-eastwestgateway +labels: + istio: eastwestgateway + app: istio-eastwestgateway +service: + annotations: + oci.oraclecloud.com/load-balancer-type: "lb" + service.beta.kubernetes.io/oci-load-balancer-shape: "flexible" + service.beta.kubernetes.io/oci-load-balancer-shape-flex-min: "10" + service.beta.kubernetes.io/oci-load-balancer-shape-flex-max: "100" + topology.istio.io/network: kubenetwork + ports: + - name: status-port + port: 15021 + targetPort: 15021 + - name: tls + port: 15443 + targetPort: 15443 + - name: tls-istiod + port: 15012 + targetPort: 15012 + - name: tls-webhook + port: 15017 + targetPort: 15017 +networkGateway: kubenetwork diff --git a/platform/terraform/03_servicemesh/values-gateway-multi.yaml b/platform/terraform/03_servicemesh/values-gateway-multi.yaml new file mode 100644 index 0000000..2d3860b --- /dev/null +++ b/platform/terraform/03_servicemesh/values-gateway-multi.yaml @@ -0,0 +1,15 @@ +labels: + istio: ingressgateway + app: istio-ingressgateway +service: + annotations: + oci.oraclecloud.com/load-balancer-type: "lb" + service.beta.kubernetes.io/oci-load-balancer-shape: "flexible" + service.beta.kubernetes.io/oci-load-balancer-shape-flex-min: "10" + service.beta.kubernetes.io/oci-load-balancer-shape-flex-max: "100" +pilot: + traceSampling: 100 +logLevel: debug +tracing: + enabled: true +networkGateway: kubenetwork diff --git a/platform/terraform/03_servicemesh/values-istiod-multi.yaml b/platform/terraform/03_servicemesh/values-istiod-multi.yaml new file mode 100644 index 0000000..aa7df7b --- /dev/null +++ b/platform/terraform/03_servicemesh/values-istiod-multi.yaml @@ -0,0 +1,14 @@ +global: + meshID: mesh1 + multiCluster: + enabled: true + clusterName: cluster1 + network: kubenetwork + meshConfig: + enableTracing: true + accessLogFile: /dev/stdout + accessLogEncoding: JSON + accessLogFormat: "[%START_TIME%] \"%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%\" %RESPONSE_CODE% %RESPONSE_FLAGS% %RESPONSE_CODE_DETAILS% %CONNECTION_TERMINATION_DETAILS% \"%UPSTREAM_TRANSPORT_FAILURE_REASON%\" %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% \"%REQ(X-FORWARDED-FOR)%\" \"%REQ(USER-AGENT)%\" \"%REQ(X-REQUEST-ID)%\" \"%REQ(:AUTHORITY)%\" \"%UPSTREAM_HOST%\" %UPSTREAM_CLUSTER% %UPSTREAM_LOCAL_ADDRESS% %DOWNSTREAM_LOCAL_ADDRESS% %DOWNSTREAM_REMOTE_ADDRESS% %REQUESTED_SERVER_NAME% %ROUTE_NAME%\n" + traceSampling: 100 +tracing: + enabled: true diff --git a/platform/terraform/03_servicemesh/workloadgroup.yaml b/platform/terraform/03_servicemesh/workloadgroup.yaml new file mode 100644 index 0000000..0412e6c --- /dev/null +++ b/platform/terraform/03_servicemesh/workloadgroup.yaml @@ -0,0 +1,21 @@ +--- +apiVersion: networking.istio.io/v1 +kind: WorkloadGroup +metadata: + name: vllm + namespace: vllm +spec: + metadata: + labels: + app: vllm + template: + ports: + http: 8000 + serviceAccount: default + network: vmnetwork + probe: + periodSeconds: 5 + initialDelaySeconds: 1 + httpGet: + port: 8000 + path: /health