Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
cdabc55
Patch code up to 42722628
actions-user Dec 22, 2025
8d87f42
Patch code up to 8d9e05df
actions-user Dec 22, 2025
6f9d32d
Patch code up to e9ec2f4e
actions-user Dec 23, 2025
365f236
Patch code up to 7f9fa847
actions-user Dec 30, 2025
97e08a4
Patch code up to 32bd261b
actions-user Jan 2, 2026
b3facec
Patch code up to 91af06f1
actions-user Jan 7, 2026
31ffb5e
Patch code up to 838c329e
actions-user Jan 7, 2026
8a92039
Patch code up to 10b2bed9
actions-user Jan 7, 2026
17ea850
Patch code up to d4fefff8
actions-user Jan 7, 2026
5154b3d
Patch code up to e5bac6c4
actions-user Jan 8, 2026
bb3d2fa
Merge branch 'main' into internal_main
kkurzacz-intel Jan 8, 2026
f765819
Patch code up to acb3f622
actions-user Jan 12, 2026
8e6ac58
Patch code up to 81798fcf
actions-user Jan 12, 2026
adc6c1b
Patch code up to 0b544de7
actions-user Jan 13, 2026
1c46e5f
Patch code up to 1e443d96
actions-user Jan 13, 2026
f99e3d6
Patch code up to 5f554e39
actions-user Jan 14, 2026
5820802
Patch code up to f4dafb54
actions-user Jan 16, 2026
90a8124
Patch code up to 8552cca7
actions-user Jan 16, 2026
59e2b61
Merge branch 'main' into internal_main
kkurzacz-intel Jan 19, 2026
bb4ae54
Patch code up to 33c774dd
actions-user Jan 20, 2026
8139b53
Patch code up to 00b756c3
actions-user Jan 21, 2026
72fa7fc
Patch code up to 648df6fb
actions-user Jan 21, 2026
5414cca
Patch code up to a38938df
actions-user Jan 21, 2026
2639938
Patch code up to 615956dc
actions-user Jan 22, 2026
b1f549a
Patch code up to fb049433
actions-user Jan 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ metadata:
name: edp-ingestion-configmap
namespace: edp
data:
EMBEDDING_MODEL_NAME: {{ .Values.ingestion.config.embedding_model_name | quote }}
VECTOR_STORE: {{ .Values.ingestion.config.vector_store | quote }}
VECTOR_ALGORITHM: {{ .Values.ingestion.config.vector_algorithm | quote }}
VECTOR_DIMS: {{ .Values.ingestion.config.vector_dims | quote }}
Expand Down
3 changes: 3 additions & 0 deletions deployment/components/edp/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ edpOidcConfigUrl: "http://keycloak-http.auth.svc/realms/EnterpriseRAG/.well-know
edpOidcClientSecret: ""
bucketNameRegexFilter: '.*'
presignedUrlCredentialsSystemFallback: "false"
embedding_model_name: &embedding_model_name "BAAI/bge-base-en-v1.5"


minioApiDomain: &minioApiDomain "s3.erag.com"
minioBrowserDomain: &minioBrowserDomain "minio.erag.com"
Expand Down Expand Up @@ -895,6 +897,7 @@ ingestion:
tag: latest
config:
opeaLoggerLevel: "INFO" # "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
embedding_model_name: *embedding_model_name # e.g., "BAAI/bge-base-en-v1.5"
# Vector Algorithm configuration
vector_algorithm: "FLAT" # "FLAT", "HNSW"
vector_dims: "768" # Depends on model used in embedding. For example bge-large-en-v1.5=768, bge-large-en-v1.5=1024
Expand Down
2 changes: 2 additions & 0 deletions deployment/components/gmc/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ images:
tag: *tag
pullPolicy: Always
envfile: "src/comps/retrievers/impl/microservice/.env"
envs:
EMBEDDING_MODEL_NAME: *embedding_model_name
ingestion-usvc:
image: "erag-ingestion"
repository: *repo
Expand Down
1 change: 0 additions & 1 deletion deployment/pipelines/docsum/reference-cpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ spec:
config:
endpoint: /v1/chat/completions
LLM_MODEL_SERVER: vllm
LLM_OPENAI_FORMAT_STREAMING: "True"
LLM_MODEL_SERVER_ENDPOINT: vllm-service-m
isDownstreamService: true
- name: DocSum
Expand Down
1 change: 0 additions & 1 deletion deployment/pipelines/docsum/reference-hpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ spec:
endpoint: /v1/chat/completions
LLM_MODEL_SERVER: vllm
LLM_MODEL_SERVER_ENDPOINT: vllm-gaudi-svc
LLM_OPENAI_FORMAT_STREAMING: "True"
isDownstreamService: true
- name: DocSum
data: $response
Expand Down
4 changes: 3 additions & 1 deletion deployment/roles/application/edp/templates/values.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ proxy:
alternateTagging: {{ use_alternate_tagging }}
{% endif %}

embedding_model_name: &embedding_model_name {{ embedding_model_name }}

{% set storage = lookup('env', 'edp_storage_type') or edp.storageType if edp.storageType is defined else "minio" %}
{% if storage == "minio" %}
edpAccessKey: {{ EDP_MINIO_ACCESS_KEY }}
Expand Down Expand Up @@ -136,10 +138,10 @@ ingestion:
tag: {{ tag }}
repository: {{ registry }}
config:
embedding_model_name: *embedding_model_name
{% if edp.hierarchical_indices.enabled is true %}
use_hierarchical_indices: "True"
{% endif %}
config:
vector_dims: {{ vector_databases.vector_dims }}
vector_datatype: {{ vector_databases.vector_datatype }}
{% if edp.late_chunking.enabled is true %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ images:
vector_store: {{ vector_databases.vector_store }}
{% endif %}
envs:
EMBEDDING_MODEL_NAME: *embedding_model_name
{% if edp.hierarchical_indices.enabled is true %}
USE_HIERARCHICAL_INDICES: "True"
K_SUMMARIES: {{ edp.hierarchical_indices.kSummaries }}
Expand Down
21 changes: 13 additions & 8 deletions deployment/terraform/ibm/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# IBM Cloud Terraform Configuration for Enterprise RAG (Intel Gaudi)

This directory contains Terraform configuration files for deploying the Enterprise RAG solution on IBM Cloud using Intel Gaudi accelerators.
This directory contains Terraform configuration files for deploying the Enterprise RAG solution on IBM Cloud using Intel hardware.

## Complete Deployment Guide

Expand All @@ -20,13 +20,15 @@ The comprehensive deployment guide covers:
This deployment creates a complete Enterprise RAG infrastructure:

**Infrastructure Components:**
- Intel Gaudi3-powered VPC instance (`gx3d-160x1792x8gaudi3`)
- Intel-powered VPC instance:
- Gaudi - `gx3d-160x1792x8gaudi3`
- Xeon - `bx3d-128x640`
- VPC networking with subnet and security groups
- Optimized storage configuration for AI workloads
- Automated software installation and configuration

**AI Software Stack:**
- Intel Gaudi drivers and optimization libraries
- Intel Gaudi drivers (if necessary) and optimization libraries
- Large Language Model service (Intel optimized)
- Text embedding service for semantic search
- Document reranking for improved relevance
Expand Down Expand Up @@ -90,7 +92,7 @@ api_key = "YOUR_IBM_CLOUD_API_KEY"

# Basic Configuration
region = "us-south"
instance_name = "erag-gaudi"
instance_name = "YOUR_IBM_CLOUD_INSTANCE_NAME"
instance_zone = "us-south-2"
ssh_key_name = "your-ssh-key-name"
resource_group = "default"
Expand All @@ -103,19 +105,22 @@ ssh_user = "ubuntu"
hugging_face_token = "YOUR_HUGGING_FACE_TOKEN"

# Solution Version (optional)
solution_version = "release-2.0.0" # Options: "release-2.0.0", "release-1.5.0", "main"
solution_version = "release-2.0.1" # Options: "release-2.0.1", "release-1.5.0", "main"
```

## Optional Variables

```hcl
# Instance Configuration
instance_profile = "gx3d-160x1792x8gaudi3" # Intel Gaudi instance
instance_profile = "" # Intel instance
# Example gaudi instance: gx3d-160x1792x8gaudi3
# Example xeon instance: bx3d-128x640

boot_volume_size = 250 # GB

# Version Control
solution_version = "release-2.0.0" # Git tag or branch (default: "release-2.0.0")
# Examples: "release-2.0.0", "release-1.5.0", "main"
solution_version = "release-2.0.1" # Git tag or branch (default: "release-2.0.1")
# Examples: "release-2.0.1", "release-1.5.0", "main"

# Model Configuration (Intel Gaudi optimized)
deployment_type = "hpu"
Expand Down
6 changes: 3 additions & 3 deletions deployment/terraform/ibm/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ resource "null_resource" "run_install_system" {
}
inline = [
"chmod +x /tmp/run_install.sh",
"/tmp/run_install.sh --platform ibm --gaudi --stage system --tag ${var.solution_version}"
"/tmp/run_install.sh --platform ibm --stage system --tag ${var.solution_version}${var.deployment_type == "hpu" ? " --gaudi" : ""}"
]
}
}
Expand All @@ -326,7 +326,7 @@ resource "null_resource" "run_install_cluster" {
proxy_port = var.use_proxy ? var.proxy_port : null
}
inline = [
"/tmp/run_install.sh --platform ibm --gaudi --stage cluster --tag ${var.solution_version}"
"/tmp/run_install.sh --platform ibm --stage cluster --tag ${var.solution_version}${var.deployment_type == "hpu" ? " --gaudi" : ""}"
]
}
}
Expand All @@ -350,7 +350,7 @@ resource "null_resource" "run_install_application" {
proxy_port = var.use_proxy ? var.proxy_port : null
}
inline = [
"/tmp/run_install.sh --platform ibm --gaudi --stage application --tag ${var.solution_version}"
"/tmp/run_install.sh --platform ibm --stage application --tag ${var.solution_version}${var.deployment_type == "hpu" ? " --gaudi" : ""}"
]
}
}
18 changes: 14 additions & 4 deletions deployment/terraform/scripts/run_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -247,12 +247,16 @@ configure_platform_defaults() {
;;
ibm)
log_info "Configuring IBM Cloud platform defaults"
STORAGE_DEVICE="/dev/nvme1n1"
STORAGE_MOUNT_POINT="/mnt/nvme1"
if [[ "$INSTALL_GAUDI_DRIVER" == "true" ]]; then
STORAGE_DEVICE="/dev/nvme1n1"
STORAGE_MOUNT_POINT="/mnt/nvme1"
else
STORAGE_DEVICE="/dev/vdb"
STORAGE_MOUNT_POINT="/mnt/vdb1"
fi
CONTAINERD_STORAGE_DIR="${STORAGE_MOUNT_POINT}/containerd"
LOCAL_PATH_STORAGE_DIR="${STORAGE_MOUNT_POINT}/local-path-provisioner"
ETCD_DATA_DIR="${STORAGE_MOUNT_POINT}/etcd"
INSTALL_GAUDI_DRIVER=true
;;
esac

Expand Down Expand Up @@ -535,7 +539,13 @@ create_partition_and_format_storage() {
sudo parted -s $device mkpart primary ext4 0% 100%
sudo partprobe $device

# Wait for partition to be available
# Wait for partition to be available (up to 10 seconds)
for i in {1..10}; do
if [[ -b "$partition_device" ]]; then
break
fi
sleep 1
done
if [[ ! -b "$partition_device" ]]; then
log_fatal "Partition device not found after creation: $partition_device"
fi
Expand Down
Loading