Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,12 @@ EKSCTL_VERSION=v0.191.0
EKS_CLUSTER_K8_VERSION=1.31
EKS_INSTANCE_TYPE=m5.2xlarge
EKS_INSTANCE_TYPE_ARM64=c6g.4xlarge
SPLUNK_ENTERPRISE_RELEASE_IMAGE=splunk/splunk:9.4.1
SPLUNK_ENTERPRISE_RELEASE_IMAGE=splunk/splunk:9.4.1
RELATED_IMAGE_SPLUNK_ENTERPRISE=splunk/splunk:9.2.3
RELATED_IMAGE_RAY_HEAD=667741767953.dkr.ecr.us-west-2.amazonaws.com/ml-platform/ray/ray-head:build-5
RELATED_IMAGE_RAY_WORKER=667741767953.dkr.ecr.us-west-2.amazonaws.com/ml-platform/ray/ray-worker-gpu:build-6
RELATED_IMAGE_WEAVIATE=semitechnologies/weaviate:stable-v1.28-007846a
RELATED_IMAGE_POST_INSTALL_HOOK=667741767953.dkr.ecr.us-west-2.amazonaws.com/vivek/ml-platform/saia/ai-helm-post-hook:0.0.5
CLUSTER_NAME=sok-ml-platform
MODEL_VERSION=v0.3.14-36-g1549f5a
RAY_VERSION=2.44.0
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ ARG BASE_IMAGE=registry.access.redhat.com/ubi8/ubi-minimal@sha256
ARG BASE_IMAGE_VERSION=b2a1bec3dfbc7a14a1d84d98934dfe8fdde6eb822a211286601cf109cbccb075

# Build the manager binary
FROM golang:1.23.0 AS builder
FROM golang:1.24 AS builder

WORKDIR /workspace

Expand Down Expand Up @@ -90,6 +90,7 @@ COPY LICENSE /licenses/LICENSE-2.0.txt
COPY tools/k8_probes/livenessProbe.sh /tools/k8_probes/
COPY tools/k8_probes/readinessProbe.sh /tools/k8_probes/
COPY tools/k8_probes/startupProbe.sh /tools/k8_probes/
COPY applications.yaml /home/nonroot/applications.yaml

# Set the user
USER 1001
Expand Down
22 changes: 22 additions & 0 deletions PROJECT
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Code generated by tool. DO NOT EDIT.
# This file is used to track the info used to scaffold your project
# and allow the plugins properly work.
# More info: https://book.kubebuilder.io/reference/project-config.html
domain: splunk.com
layout:
- go.kubebuilder.io/v3
Expand Down Expand Up @@ -109,4 +113,22 @@ resources:
kind: LicenseManager
path: github.com/splunk/splunk-operator/api/v4
version: v4
- api:
crdVersion: v1
namespaced: true
controller: true
domain: splunk.com
group: enterprise
kind: AIPlatform
path: github.com/splunk/splunk-operator/api/v4
version: v4
- api:
crdVersion: v1
namespaced: true
controller: true
domain: splunk.com
group: enterprise
kind: AIAssistant
path: github.com/splunk/splunk-operator/api/v4
version: v4
version: "3"
191 changes: 191 additions & 0 deletions api/v4/aiplatform_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
/*
Copyright 2024.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v4

import (
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// AIPlatform is the Schema for the AIPlatform API
// +k8s:openapi-gen=true
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:resource:path=aiplatforms,scope=Namespaced,shortName=spai
// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status"
// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
type AIPlatform struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec AIPlatformSpec `json:"spec,omitempty"`
Status AIPlatformStatus `json:"status,omitempty"`
}

// AIPlatformSpec defines the desired state
type AIPlatformSpec struct {

Volume AiVolumeSpec `json:"volume,omitempty"`
// s3://bucket/artifacts
// s3://bucket/tasks - get rid of this

AppsVolume AiVolumeSpec `json:"appsVolume,omitempty"`
ArtifactsVolume AiVolumeSpec `json:"artifactsVolume,omitempty"`

Features:

Check failure on line 50 in api/v4/aiplatform_types.go

View workflow job for this annotation

GitHub Actions / check-formating

expected type, found ':'

Check failure on line 50 in api/v4/aiplatform_types.go

View workflow job for this annotation

GitHub Actions / check-formating

expected type, found ':'
// saia-spl
serviceAccountName string `json:"serviceAccountName,omitempty"`
// saia-sec
serviceAccountName string `json:"serviceAccountName,omitempty"`

HeadGroupSpec HeadGroupSpec `json:"headGroupSpec,omitempty"`
WorkerGroupSpec WorkerGroupSpec `json:"workerGroupSpec,omitempty"`
DefaultAcceleratorType string `json:"defaultAcceleratorType"`
// Which sidecars to inject
Sidecars SidecarConfig `json:"sidecars,omitempty"`

// cert-manager Certificate for mTLS
CertificateRef string `json:"certificateRef,omitempty"`

// Cluster domain (default: cluster.local)
// +kubebuilder:default=cluster.local
ClusterDomain string `json:"clusterDomain,omitempty"`

// SplunkConfiguration instance reference
SplunkConfiguration SplunkConfiguration `json:"splunkConfiguration,omitempty"`

Weaviate WeaviateSpec `json:"weaviate,omitempty"`
weaviateStorage PersistentVolumeClaim `json:"storage,omitempty"`
SchedulingSpec `json:",inline"` // inlines NodeSelector, Tolerations, Affinity
Ingress `json:",inline"`
}

type WeaviateSpec struct {

Check failure on line 78 in api/v4/aiplatform_types.go

View workflow job for this annotation

GitHub Actions / check-formating

expected '}', found 'type'

Check failure on line 78 in api/v4/aiplatform_types.go

View workflow job for this annotation

GitHub Actions / check-formating

expected '}', found 'type'
// +kubebuilder:validation:Minimum=1
Replicas *int32 `json:"replicas"`
//Image string `json:"image"`
Resources corev1.ResourceRequirements `json:"resources,omitempty"`
ServiceAccountName string `json:"serviceAccountName,omitempty"`
SchedulingSpec `json:",inline"` // inlines NodeSelector, Tolerations, Affinity
}

type HeadGroupSpec struct {
ServiceAccountName string `json:"serviceAccountName,omitempty"`
SchedulingSpec `json:",inline"` // inlines NodeSelector, Tolerations, Affinity
// image registries for Ray
ImageRegistry string `json:"imageRegistry,omitempty"`
}

type WorkerGroupSpec struct {
ServiceAccountName string `json:"serviceAccountName,omitempty"`
ImageRegistry string `json:"imageRegistry,omitempty"`
GPUConfigs []GPUConfig `json:"gpuConfigs,omitempty"`
SchedulingSpec `json:",inline"` // inlines NodeSelector, Tolerations, Affinity
}

// GPUConfig defines one worker-tier with scheduling and accelerator settings.
type GPUConfig struct {
Tier string `json:"tier"`
MinReplicas int32 `json:"minReplicas"`
MaxReplicas int32 `json:"maxReplicas"`
GPUsPerPod int32 `json:"gpusPerPod"`
Resources corev1.ResourceRequirements `json:"resources,omitempty"`
}

// SchedulingSpec exposes common pod-scheduling knobs.
type SchedulingSpec struct {
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
Affinity *corev1.Affinity `json:"affinity,omitempty"`
}

type SplunkConfiguration struct {
// Name of the SplunkConfiguration instance
// +kubebuilder:validation:Pattern=^[a-z0-9]([-a-z0-9]*[a-z0-9])?$
// +kubebuilder:validation:MaxLength=63
// +kubebuilder:validation:MinLength=1
CRName string `json:"crName,omitempty"`
// Namespace of the SplunkConfiguration instance
// +kubebuilder:validation:Pattern=^[a-z0-9]([-a-z0-9]*[a-z0-9])?$
// +kubebuilder:validation:MaxLength=63
// +kubebuilder:validation:MinLength=1
CRNamespace string `json:"crNamespace,omitempty"`
// Splunk secret reference
SecretRef corev1.SecretReference `json:"secretRef,omitempty"`
Endpoint string `json:"endpoint,omitempty"`
Token string `json:"token,omitempty"`
}

// ReplicasSpec sets min/max worker replicas
type ReplicasSpec struct {
Min int32 `json:"min,omitempty"`
Max int32 `json:"max,omitempty"`
}

// MachineClass configures CPU, memory, GPU per-worker
type MachineClass struct {
ResourceRequirements corev1.ResourceRequirements `json:"resourceRequirements,omitempty"`
GPU int32 `json:"gpu,omitempty"`
EphimeralStorage string `json:"ephemeral-storage,omitempty"` // e.g. "100Gi"
}

// SidecarConfig toggles injection of sidecars
type SidecarConfig struct {
// +kubebuilder:default=true
Envoy bool `json:"envoy,omitempty"`
// +kubebuilder:default=true
FluentBit bool `json:"fluentBit,omitempty"`
// +kubebuilder:default=true
Otel bool `json:"otel,omitempty"`
// +kubebuilder:default=true
PrometheusOperator bool `json:"prometheusOperator,omitempty"`
}

type AiVolumeSpec struct {
// Remote volume URI in the format s3://bucketname/<path prefix>
Path string `json:"path"` // s3://bucketname/<path prefix> or gs://bucketname/<path prefix> or azure://containername/<path prefix>

// optional override endpoint (only really needed for S3-compatible like MinIO)
Endpoint string `json:"endpoint,omitempty"`

// Region of the remote storage volume where apps reside. Used for aws, if provided. Not used for minio and azure.
Region string `json:"region"`

// Secret object name
SecretRef string `json:"secretRef"`
}

// AIPlatformStatus defines observed state
type AIPlatformStatus struct {
RayServiceName string `json:"rayServiceName,omitempty"`
VectorDbServiceName string `json:"vectorDbServiceName,omitempty"`
RayServiceStatus rayv1.ServiceStatus `json:"rayServiceStatus,omitempty"`
Conditions []metav1.Condition `json:"conditions,omitempty"`
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
}

// +kubebuilder:object:root=true
type AIPlatformList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []AIPlatform `json:"items"`
}

func init() {
SchemeBuilder.Register(&AIPlatform{}, &AIPlatformList{})
}
111 changes: 111 additions & 0 deletions api/v4/aiservice_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
Copyright 2024.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v4

import (
cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.

// AIServiceSpec defines the desired state of AIService
type AIServiceSpec struct {
// SplunkConfiguration instance reference
Version string `json:"version,omitempty"`
TaskVolume AiVolumeSpec `json:"taskVolume,omitempty"`
SplunkConfiguration SplunkConfiguration `json:"splunkConfiguration,omitempty"`
VectorDbUrl string `json:"vectorDbUrl"`
AIPlatformUrl string `json:"aiPlatformUrl,omitempty"`
AIPlatformRef corev1.ObjectReference `json:"aiPlatformRef,omitempty"`
Replicas int32 `json:"replicas,omitempty"`
ServiceAccountName string `json:"serviceAccountName,omitempty"`
//Port specifies the default port for the service
Port int32 `json:"port,omitempty" default:"80"`
Env map[string]string `json:"env,omitempty"`
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
// node affinity configuraiton
Affinity corev1.Affinity `json:"affinity,omitempty"`
// resources k8s resources cpu, memory
Resources corev1.ResourceRequirements `json:"resources,omitempty"`
// metrics configuration
Metrics MetricsConfig `json:"metrics,omitempty"`
// mtls configuration
MTLS MTLSConfig `json:"mtls,omitempty"`
// ServiceTemplate is a template used to create Kubernetes services
ServiceTemplate corev1.Service `json:"serviceTemplate"`
}

type MetricsConfig struct {
// Enable scraping of SAIA metrics
Enabled bool `json:"enabled,omitempty"`
// Path under /metrics, default "/metrics"
Path string `json:"path,omitempty"`
// Port name or number, default "metrics"
Port int32 `json:"port,omitempty"`
}

type MTLSConfig struct {
// Enable or disable mTLS on the SAIA service
Enabled bool `json:"enabled"`
// If Enabled, how to request the cert
IssuerRef cmmeta.ObjectReference `json:"issuerRef,omitempty"`
SecretName string `json:"secretName,omitempty"`
DNSNames []string `json:"dnsNames,omitempty"`
// Let users declare “I don’t want operator-managed TLS” even if Enabled=true,
// e.g. they’re on Istio and will terminate externally.
Termination string `json:"termination,omitempty"` // "operator" or "mesh"
}

// AIServiceStatus defines the observed state of AIService
type AIServiceStatus struct {
SchemaJobId string `json:"schemaJobId,omitempty"`
VectorDbStatus string `json:"vectorDbStatus,omitempty"`
PlatformStatus string `json:"platformStatus,omitempty"`
Conditions []metav1.Condition `json:"conditions,omitempty"`
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
}

// AIService is the Schema for the aiservices API
// +k8s:openapi-gen=true
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:resource:path=aiservices,scope=Namespaced,shortName=saia
// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status"
// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
type AIService struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec AIServiceSpec `json:"spec,omitempty"`
Status AIServiceStatus `json:"status,omitempty"`
}

//+kubebuilder:object:root=true

// AIServiceList contains a list of AIService
type AIServiceList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []AIService `json:"items"`
}

func init() {
SchemeBuilder.Register(&AIService{}, &AIServiceList{})
}
Loading
Loading