Skip to content

Commit 3dd0bed

Browse files
ciarams87sjbermansalonichf5bjee19
authored
Implement Inference Extension (#4091)
Problem: As a cluster operator managing traffic for generative models I want to route prompt traffic within my cluster based on generative model request criteria So that I can build a system to host multiple generative models. Solution: Add Gateway API Inference Extension support Ref: https://gateway-api-inference-extension.sigs.k8s.io/ --------- Co-authored-by: Saylor Berman <[email protected]> Co-authored-by: Saloni Choudhary <[email protected]> Co-authored-by: bjee19 <[email protected]>
1 parent d1300f7 commit 3dd0bed

File tree

82 files changed

+6080
-206
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+6080
-206
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,7 @@ jobs:
443443
build-os: ${{ matrix.build-os }}
444444
production-release: ${{ inputs.is_production_release == true && (inputs.dry_run == false || inputs.dry_run == null) }}
445445
release_version: ${{ inputs.release_version }}
446+
enable-inference-extension: true
446447
secrets: inherit
447448
permissions:
448449
contents: write

.github/workflows/conformance.yml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ on:
1616
enable-experimental:
1717
required: true
1818
type: boolean
19+
enable-inference-extension:
20+
required: true
21+
type: boolean
1922
production-release:
2023
required: false
2124
type: boolean
@@ -32,6 +35,7 @@ defaults:
3235
env:
3336
PLUS_USAGE_ENDPOINT: ${{ secrets.JWT_PLUS_REPORTING_ENDPOINT }}
3437
ENABLE_EXPERIMENTAL: ${{ inputs.enable-experimental }}
38+
ENABLE_INFERENCE_EXTENSION: ${{ inputs.enable-inference-extension }}
3539

3640
permissions:
3741
contents: read
@@ -194,3 +198,24 @@ jobs:
194198
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
195199
run: gh release upload ${{ github.ref_name }} conformance-profile.yaml --clobber
196200
working-directory: ./tests
201+
202+
- name: Run inference conformance tests
203+
run: |
204+
make run-inference-conformance-tests CONFORMANCE_TAG=${{ github.sha }} NGF_VERSION=${{ github.ref_name }} CLUSTER_NAME=${{ github.run_id }}
205+
core_result=$(cat conformance-profile-inference.yaml | yq '.profiles[0].core.result')
206+
if [ "${core_result}" == "failure" ] ]; then echo "Inference Conformance test failed, see above for details." && exit 2; fi
207+
working-directory: ./tests
208+
209+
- name: Upload profile to GitHub
210+
if: ${{ inputs.enable-experimental }} # add experimental flag to filter result upload
211+
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
212+
with:
213+
name: conformance-profile-inference-${{ inputs.image }}-${{ inputs.k8s-version }}-${{ steps.ngf-meta.outputs.version }}-${{ github.run_id }}
214+
path: ./tests/conformance-profile-inference.yaml
215+
216+
- name: Upload profile to release
217+
if: ${{ inputs.production-release && inputs.enable-experimental }}
218+
env:
219+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
220+
run: gh release upload ${{ github.ref_name }} conformance-profile-inference.yaml --clobber
221+
working-directory: ./tests

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
cover.html
1414
cmd-cover.html
1515
conformance-profile.yaml
16+
conformance-profile-inference.yaml
1617

1718
# Dependency directories (remove the comment below to include it)
1819
# vendor/

.nvmrc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
22
1+
24

Makefile

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ TELEMETRY_ENDPOINT=# if empty, NGF will report telemetry in its logs at debug le
1515
TELEMETRY_ENDPOINT_INSECURE = false
1616

1717
ENABLE_EXPERIMENTAL ?= false
18+
ENABLE_INFERENCE_EXTENSION ?= false
1819

1920
# go build flags - should not be overridden by the user
2021
GO_LINKER_FlAGS_VARS = -X main.version=${VERSION} -X main.telemetryReportPeriod=${TELEMETRY_REPORT_PERIOD} -X main.telemetryEndpoint=${TELEMETRY_ENDPOINT} -X main.telemetryEndpointInsecure=${TELEMETRY_ENDPOINT_INSECURE}
@@ -33,7 +34,7 @@ GEN_CRD_API_REFERENCE_DOCS_VERSION = v0.3.0
3334
# renovate: datasource=go depName=sigs.k8s.io/controller-tools
3435
CONTROLLER_TOOLS_VERSION = v0.19.0
3536
# renovate: datasource=docker depName=node
36-
NODE_VERSION = 22
37+
NODE_VERSION = 24
3738
# renovate: datasource=docker depName=quay.io/helmpack/chart-testing
3839
CHART_TESTING_VERSION = v3.14.0
3940
# renovate: datasource=github-tags depName=dadav/helm-schema
@@ -139,6 +140,14 @@ install-gateway-crds: ## Install Gateway API CRDs
139140
uninstall-gateway-crds: ## Uninstall Gateway API CRDs
140141
kubectl kustomize $(SELF_DIR)config/crd/gateway-api/$(if $(filter true,$(ENABLE_EXPERIMENTAL)),experimental,standard) | kubectl delete -f -
141142

143+
.PHONY: install-inference-crds
144+
install-inference-crds: ## Install Gateway API Inference Extension CRDs
145+
kubectl kustomize $(SELF_DIR)config/crd/inference-extension | kubectl apply -f -
146+
147+
.PHONY: uninstall-inference-crds
148+
uninstall-inference-crds: ## Uninstall Gateway API Inference Extension CRDs
149+
kubectl kustomize $(SELF_DIR)config/crd/inference-extension | kubectl delete -f -
150+
142151
.PHONY: generate-manifests
143152
generate-manifests: ## Generate manifests using Helm.
144153
./scripts/generate-manifests.sh
@@ -229,10 +238,16 @@ install-ngf-local-build-with-plus: check-for-plus-usage-endpoint build-images-wi
229238

230239
.PHONY: helm-install-local
231240
helm-install-local: install-gateway-crds ## Helm install NGF on configured kind cluster with local images. To build, load, and install with helm run make install-ngf-local-build.
232-
helm install nginx-gateway $(CHART_DIR) --set nginx.image.repository=$(NGINX_PREFIX) --create-namespace --wait --set nginxGateway.image.pullPolicy=$(PULL_POLICY) --set nginx.service.type=$(NGINX_SERVICE_TYPE) --set nginxGateway.image.repository=$(PREFIX) --set nginxGateway.image.tag=$(TAG) --set nginx.image.tag=$(TAG) --set nginx.image.pullPolicy=$(PULL_POLICY) --set nginxGateway.gwAPIExperimentalFeatures.enable=$(ENABLE_EXPERIMENTAL) -n nginx-gateway $(HELM_PARAMETERS)
241+
@if [ "$(ENABLE_INFERENCE_EXTENSION)" = "true" ]; then \
242+
$(MAKE) install-inference-crds; \
243+
fi
244+
helm install nginx-gateway $(CHART_DIR) --set nginx.image.repository=$(NGINX_PREFIX) --create-namespace --wait --set nginxGateway.image.pullPolicy=Never --set nginx.service.type=NodePort --set nginxGateway.image.repository=$(PREFIX) --set nginxGateway.image.tag=$(TAG) --set nginx.image.tag=$(TAG) --set nginx.image.pullPolicy=Never --set nginxGateway.gwAPIExperimentalFeatures.enable=$(ENABLE_EXPERIMENTAL) -n nginx-gateway $(HELM_PARAMETERS)
233245

234246
.PHONY: helm-install-local-with-plus
235247
helm-install-local-with-plus: check-for-plus-usage-endpoint install-gateway-crds ## Helm install NGF with NGINX Plus on configured kind cluster with local images. To build, load, and install with helm run make install-ngf-local-build-with-plus.
248+
@if [ "$(ENABLE_INFERENCE_EXTENSION)" = "true" ]; then \
249+
$(MAKE) install-inference-crds; \
250+
fi
236251
kubectl create namespace nginx-gateway || true
237252
kubectl -n nginx-gateway create secret generic nplus-license --from-file $(PLUS_LICENSE_FILE) || true
238253
helm install nginx-gateway $(CHART_DIR) --set nginx.image.repository=$(NGINX_PLUS_PREFIX) --wait --set nginxGateway.image.pullPolicy=$(PULL_POLICY) --set nginx.service.type=$(NGINX_SERVICE_TYPE) --set nginxGateway.image.repository=$(PREFIX) --set nginxGateway.image.tag=$(TAG) --set nginx.image.tag=$(TAG) --set nginx.image.pullPolicy=$(PULL_POLICY) --set nginxGateway.gwAPIExperimentalFeatures.enable=$(ENABLE_EXPERIMENTAL) -n nginx-gateway --set nginx.plus=true --set nginx.usage.endpoint=$(PLUS_USAGE_ENDPOINT) $(HELM_PARAMETERS)

build/Dockerfile.nginx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ RUN apk add --no-cache bash \
2323
&& ln -sf /dev/stderr /var/log/nginx/error.log
2424

2525
COPY build/entrypoint.sh /agent/entrypoint.sh
26-
COPY ${NJS_DIR}/httpmatches.js /usr/lib/nginx/modules/njs/httpmatches.js
26+
COPY ${NJS_DIR}/ /usr/lib/nginx/modules/njs/
2727
COPY ${NGINX_CONF_DIR}/nginx.conf /etc/nginx/nginx.conf
2828
COPY ${NGINX_CONF_DIR}/grpc-error-locations.conf /etc/nginx/grpc-error-locations.conf
2929
COPY ${NGINX_CONF_DIR}/grpc-error-pages.conf /etc/nginx/grpc-error-pages.conf

build/Dockerfile.nginxplus

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ RUN apk add --no-cache bash \
2929
&& ln -sf /dev/stderr /var/log/nginx/error.log
3030

3131
COPY build/entrypoint.sh /agent/entrypoint.sh
32-
COPY ${NJS_DIR}/httpmatches.js /usr/lib/nginx/modules/njs/httpmatches.js
32+
COPY ${NJS_DIR}/ /usr/lib/nginx/modules/njs/
3333
COPY ${NGINX_CONF_DIR}/nginx-plus.conf /etc/nginx/nginx.conf
3434
COPY ${NGINX_CONF_DIR}/grpc-error-locations.conf /etc/nginx/grpc-error-locations.conf
3535
COPY ${NGINX_CONF_DIR}/grpc-error-pages.conf /etc/nginx/grpc-error-pages.conf

charts/nginx-gateway-fabric/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri
245245
| `nginx.usage.resolver` | The nameserver used to resolve the NGINX Plus usage reporting endpoint. Used with NGINX Instance Manager. | string | `""` |
246246
| `nginx.usage.secretName` | The name of the Secret containing the JWT for NGINX Plus usage reporting. Must exist in the same namespace that the NGINX Gateway Fabric control plane is running in (default namespace: nginx-gateway). | string | `"nplus-license"` |
247247
| `nginx.usage.skipVerify` | Disable client verification of the NGINX Plus usage reporting server certificate. | bool | `false` |
248-
| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"image":{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` |
248+
| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"gwAPIInferenceExtension":{"enable":false},"image":{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` |
249249
| `nginxGateway.affinity` | The affinity of the NGINX Gateway Fabric control plane pod. | object | `{}` |
250250
| `nginxGateway.autoscaling` | Autoscaling configuration for the NGINX Gateway Fabric control plane. | object | `{"enable":false}` |
251251
| `nginxGateway.autoscaling.enable` | Enable or disable Horizontal Pod Autoscaler for the control plane. | bool | `false` |
@@ -257,6 +257,7 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri
257257
| `nginxGateway.gatewayClassName` | The name of the GatewayClass that will be created as part of this release. Every NGINX Gateway Fabric must have a unique corresponding GatewayClass resource. NGINX Gateway Fabric only processes resources that belong to its class - i.e. have the "gatewayClassName" field resource equal to the class. | string | `"nginx"` |
258258
| `nginxGateway.gatewayControllerName` | The name of the Gateway controller. The controller name must be of the form: DOMAIN/PATH. The controller's domain is gateway.nginx.org. | string | `"gateway.nginx.org/nginx-gateway-controller"` |
259259
| `nginxGateway.gwAPIExperimentalFeatures.enable` | Enable the experimental features of Gateway API which are supported by NGINX Gateway Fabric. Requires the Gateway APIs installed from the experimental channel. | bool | `false` |
260+
| `nginxGateway.gwAPIInferenceExtension.enable` | Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads. | bool | `false` |
260261
| `nginxGateway.image` | The image configuration for the NGINX Gateway Fabric control plane. | object | `{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"}` |
261262
| `nginxGateway.image.repository` | The NGINX Gateway Fabric image to use | string | `"ghcr.io/nginx/nginx-gateway-fabric"` |
262263
| `nginxGateway.kind` | The kind of the NGINX Gateway Fabric installation - currently, only deployment is supported. | string | `"deployment"` |

charts/nginx-gateway-fabric/templates/clusterrole.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,22 @@ rules:
147147
{{- end }}
148148
verbs:
149149
- update
150+
{{- if .Values.nginxGateway.gwAPIInferenceExtension.enable }}
151+
- apiGroups:
152+
- inference.networking.k8s.io
153+
resources:
154+
- inferencepools
155+
verbs:
156+
- get
157+
- list
158+
- watch
159+
- apiGroups:
160+
- inference.networking.k8s.io
161+
resources:
162+
- inferencepools/status
163+
verbs:
164+
- update
165+
{{- end }}
150166
{{- if .Values.nginxGateway.leaderElection.enable }}
151167
- apiGroups:
152168
- coordination.k8s.io

charts/nginx-gateway-fabric/templates/deployment.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,9 @@ spec:
100100
{{- if .Values.nginxGateway.gwAPIExperimentalFeatures.enable }}
101101
- --gateway-api-experimental-features
102102
{{- end }}
103+
{{- if .Values.nginxGateway.gwAPIInferenceExtension.enable }}
104+
- --gateway-api-inference-extension
105+
{{- end }}
103106
{{- if .Values.nginxGateway.snippetsFilters.enable }}
104107
- --snippets-filters
105108
{{- end }}

0 commit comments

Comments
 (0)