Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .tekton/lightspeed-rag-tool-pull-request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ metadata:
pipelinesascode.tekton.dev/on-cel-expression: |
event == "pull_request" &&
target_branch == "main" &&
(".tekton/lightspeed-rag-tool-pull-request.yaml".pathChanged() || "artifacts.lock.yaml".pathChanged() || "rpms.lock.yaml".pathChanged() || "requirements*".pathChanged() || "byok/***".pathChanged() || "embeddings_model/***".pathChanged())
(".tekton/lightspeed-rag-tool-pull-request.yaml".pathChanged() || "artifacts.lock.yaml".pathChanged() || "byok/***".pathChanged() || "embeddings_model/***".pathChanged())
creationTimestamp:
labels:
appstudio.openshift.io/application: rag-tool
Expand All @@ -31,6 +31,8 @@ spec:
value: 5d
- name: dockerfile
value: byok/Containerfile.tool
- name: path-context
value: .
- name: hermetic
value: "true"
- name: build-args
Expand All @@ -39,7 +41,7 @@ spec:
- name: build-source-image
value: "true"
- name: prefetch-input
value: '[{"type": "generic", "path": "."}, {"type": "rpm", "path": "."}, {"type": "pip", "path": ".", "allow_binary": "true", "requirements_files": ["requirements.cpu.txt"]}]'
value: '[{"type": "generic", "path": "."}, {"type": "rpm", "path": "byok"}, {"type": "pip", "path": "byok", "allow_binary": "true", "requirements_files": ["requirements.txt"]}]'
pipelineSpec:
description: |
This pipeline is ideal for building container images from a Containerfile while maintaining trust after pipeline customization.
Expand Down
6 changes: 4 additions & 2 deletions .tekton/lightspeed-rag-tool-push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ metadata:
pipelinesascode.tekton.dev/on-cel-expression: |
event == "push" &&
target_branch == "main" &&
(".tekton/lightspeed-rag-tool-push.yaml".pathChanged() || "artifacts.lock.yaml".pathChanged() || "rpms.lock.yaml".pathChanged() || "requirements*".pathChanged() || "byok/***".pathChanged() || "embeddings_model/***".pathChanged())
(".tekton/lightspeed-rag-tool-push.yaml".pathChanged() || "artifacts.lock.yaml".pathChanged() || "byok/***".pathChanged() || "embeddings_model/***".pathChanged())
creationTimestamp:
labels:
appstudio.openshift.io/application: rag-tool
Expand All @@ -28,6 +28,8 @@ spec:
value: quay.io/redhat-user-workloads/crt-nshift-lightspeed-tenant/lightspeed-rag-tool:{{revision}}
- name: dockerfile
value: byok/Containerfile.tool
- name: path-context
value: .
- name: hermetic
value: "true"
- name: build-args
Expand All @@ -36,7 +38,7 @@ spec:
- name: build-source-image
value: "true"
- name: prefetch-input
value: '[{"type": "generic", "path": "."}, {"type": "rpm", "path": "."}, {"type": "pip", "path": ".", "allow_binary": "true", "requirements_files": ["requirements.cpu.txt"]}]'
value: '[{"type": "generic", "path": "."}, {"type": "rpm", "path": "byok"}, {"type": "pip", "path": "byok", "allow_binary": "true", "requirements_files": ["requirements.txt"]}]'
pipelineSpec:
description: |
This pipeline is ideal for building container images from a Containerfile while maintaining trust after pipeline customization.
Expand Down
7 changes: 5 additions & 2 deletions byok/Containerfile.output
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
ARG BYOK_TOOL_IMAGE=registry.redhat.io/lightspeed-rag-tool-tech-preview/lightspeed-rag-tool-rhel9:latest
ARG UBI_BASE_IMAGE=registry.access.redhat.com/ubi9/ubi:latest
ARG LLAMA_STACK
FROM ${BYOK_TOOL_IMAGE} as tool
ARG UBI_BASE_IMAGE
ARG VECTOR_DB_INDEX=vector_db_index
ARG LLAMA_STACK

USER 0
WORKDIR /workdir

ENV VECTOR_DB_INDEX=$VECTOR_DB_INDEX
RUN python3.11 generate_embeddings_tool.py -i /markdown -emd embeddings_model \
-emn sentence-transformers/all-mpnet-base-v2 -o vector_db -id $VECTOR_DB_INDEX
ENV LLAMA_STACK=$LLAMA_STACK
RUN python3.12 generate_embeddings_tool.py -i /markdown -emd embeddings_model \
-emn sentence-transformers/all-mpnet-base-v2 -o vector_db -id $VECTOR_DB_INDEX $LLAMA_STACK

FROM ${UBI_BASE_IMAGE}
COPY --from=tool /workdir/vector_db /rag/vector_db
13 changes: 8 additions & 5 deletions byok/Containerfile.tool
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ ARG VECTOR_DB_INDEX=vector_db_index
ARG BYOK_TOOL_IMAGE
ARG UBI_BASE_IMAGE
ARG HERMETIC
RUN dnf install -y buildah python3.11 python3.11-pip && dnf clean all
ARG LLAMA_STACK
RUN dnf install -y buildah python3.12 python3.12-pip && dnf clean all

USER 0
WORKDIR /workdir

COPY requirements.cpu.txt .
RUN pip3.11 install --no-cache-dir --no-deps -r requirements.cpu.txt
COPY byok/requirements.txt .
RUN pip3.12 install --upgrade pip && pip3.12 install --no-cache-dir --no-deps -r requirements.txt

COPY embeddings_model ./embeddings_model
ENV HERMETIC=$HERMETIC
Expand All @@ -26,7 +27,7 @@ RUN cd embeddings_model; \
curl -L -O https://huggingface.co/sentence-transformers/all-mpnet-base-v2/resolve/9a3225965996d404b775526de6dbfe85d3368642/model.safetensors; \
fi \
fi
COPY byok/generate_embeddings_tool.py byok/Containerfile.output ./
COPY byok/generate_embeddings_tool.py byok/__init__.py byok/document_processor.py byok/metadata_processor.py byok/Containerfile.output ./

# this directory is checked by ecosystem-cert-preflight-checks task in Konflux
RUN mkdir /licenses
Expand All @@ -38,7 +39,7 @@ LABEL cpe="cpe:/a:redhat:openshift_lightspeed:1::el9"
LABEL description="Red Hat OpenShift Lightspeed BYO Knowledge Tools"
LABEL distribution-scope=private
LABEL io.k8s.description="Red Hat OpenShift Lightspeed BYO Knowledge Tools"
LABEL io.k8s.display-name="Openshift Lightspeed BYO Knowledge Tools"
LABEL io.k8s.display-name="OpenShift Lightspeed BYO Knowledge Tools"
LABEL io.openshift.tags="openshift,lightspeed,ai,assistant,rag"
LABEL name="openshift-lightspeed-tech-preview/lightspeed-rag-tool-rhel9"
LABEL release=0.0.1
Expand All @@ -54,8 +55,10 @@ ENV BYOK_TOOL_IMAGE=$BYOK_TOOL_IMAGE
ENV UBI_BASE_IMAGE=$UBI_BASE_IMAGE
ENV LOG_LEVEL=$LOG_LEVEL
ENV VECTOR_DB_INDEX=$VECTOR_DB_INDEX
ENV LLAMA_STACK=$LLAMA_STACK
CMD buildah --log-level $LOG_LEVEL build --build-arg BYOK_TOOL_IMAGE=$BYOK_TOOL_IMAGE \
--build-arg UBI_BASE_IMAGE=$UBI_BASE_IMAGE --env VECTOR_DB_INDEX=$VECTOR_DB_INDEX \
--env LLAMA_STACK=$LLAMA_STACK \
-t $OUT_IMAGE_TAG -f Containerfile.output \
-v /markdown:/markdown:Z . && rm -f /output/$OUT_IMAGE_TAG.tar && \
buildah push $OUT_IMAGE_TAG docker-archive:/output/$OUT_IMAGE_TAG.tar
120 changes: 120 additions & 0 deletions byok/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
NUM_WORKERS ?= $$(( $(shell nproc --all) / 2))
ARTIFACT_DIR := $(if $(ARTIFACT_DIR),$(ARTIFACT_DIR),tests/test_results)

# Define arguments for pgvector support
POSTGRES_USER ?= postgres
POSTGRES_PASSWORD ?= somesecret
POSTGRES_HOST ?= localhost
POSTGRES_PORT ?= 15432
POSTGRES_DATABASE ?= postgres


.PHONY: unit-test
test-unit: ## Run the unit tests
@echo "Running unit tests..."
@echo "Reports will be written to ${ARTIFACT_DIR}"
COVERAGE_FILE="${ARTIFACT_DIR}/.coverage.unit" uv run pytest tests --cov=src/lightspeed_rag_content --cov-report term-missing --cov-report "json:${ARTIFACT_DIR}/coverage_unit.json" --junit-xml="${ARTIFACT_DIR}/junit_unit.xml" --cov-fail-under=60

.PHONY: install-tools
install-tools: ## Install required utilities/tools
@command -v uv > /dev/null || { echo >&2 "uv is not installed. Installing..."; pip3.12 install --upgrade pip uv; }

.PHONY: uv-lock-check
uv-lock-check: ## Check that the uv.lock file is in a good shape
uv lock --check

.PHONY: install-global
install-global: install-tools ## Install ligthspeed-rag-content into file system.
uv pip install --python 3.12 --system .

.PHONY: install-hooks
install-hooks: install-deps-test ## Install commit hooks
uv pip install pre-commit

.PHONY: install-deps
install-deps: install-tools uv-lock-check ## Install all required dependencies, according to uv.lock
uv sync
uv export --no-emit-workspace --no-dev --no-annotate --no-header --output-file requirements.txt

.PHONY: install-deps-test
install-deps-test: install-tools uv-lock-check ## Install all required dev dependencies, according to uv.lock
uv sync --dev

.PHONY: update-deps
update-deps: ## Check pyproject.toml for changes, update the lock file if needed, then sync.
uv lock --upgrade
uv sync
uv sync --dev
uv export --no-emit-workspace --no-dev --no-annotate --no-header --output-file requirements.txt

.PHONY: check-types
check-types: ## Check types in the code.
@echo "Running $@ target ..."
uv run mypy --namespace-packages --explicit-package-bases --strict --disallow-untyped-calls --disallow-untyped-defs --disallow-incomplete-defs src scripts

.PHONY: check-format
check-format: ## Check that the code is properly formatted using Black and Ruff formatter.
@echo "Running $@ target ..."
uv run black --check scripts src tests
uv run ruff check scripts src

.PHONY: check-coverage
check-coverage: ## Check the coverage of unit tests.
@echo "Running $@ target ..."
uv run coverage run --source=src/lightspeed_rag_content -m unittest discover tests --verbose && uv run coverage report -m --fail-under 90

.PHONY: check-code-metrics
check-code-metrics: ## Check the code using Radon.
@echo "Running $@ target ..."
@OUTPUT=$$(uv run radon cc -a A src/ | tee /dev/tty | tail -1) && \
GRADE=$$(echo $$OUTPUT | grep -oP " [A-F] " | tr -d '[:space:]') && \
if [ "$$GRADE" = "A" ]; then exit 0; else exit 1; fi

.PHONY: format
format: ## Format the code into unified format
uv run black scripts src tests
uv run ruff check scripts src --fix
uv run pre-commit run

black:
uv tool run black --check .

pylint:
uv run pylint src

ruff:
uv run ruff check src

.PHONY: verify
verify: check-types check-format check-code-metrics check-coverage ## Verify the code using various linters

.PHONY: start-postgres
start-postgres: ## Start postgresql from the pgvector container image
mkdir -pv ./postgresql/data ./output
podman run -d --name pgvector --rm -e POSTGRES_PASSWORD=$(POSTGRES_PASSWORD) \
-p $(POSTGRES_PORT):5432 \
-v $(PWD)/postgresql/data:/var/lib/postgresql/data:Z pgvector/pgvector:pg16

.PHONY: start-postgres-debug
start-postgres-debug: ## Start postgresql from the pgvector container image with debugging enabled
mkdir -pv ./postgresql/data ./output
podman run --name pgvector --rm -e POSTGRES_PASSWORD=$(POSTGRES_PASSWORD) \
-p $(POSTGRES_PORT):5432 \
-v ./postgresql/data:/var/lib/postgresql/data:Z pgvector/pgvector:pg16 \
postgres -c log_statement=all -c log_destination=stderr

update-docs: ## Update the plaintext OCP docs in ocp-product-docs-plaintext/
@set -e && for OCP_VERSION in $$(ls -1 ocp-product-docs-plaintext); do \
scripts/get_ocp_plaintext_docs.sh $$OCP_VERSION; \
done
scripts/get_runbooks.sh

.PHONY: help
help: ## Show this help screen
@echo 'Usage: make <OPTIONS> ... <TARGETS>'
@echo ''
@echo 'Available targets are:'
@echo ''
@grep -E '^[ a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-25s\033[0m %s\n", $$1, $$2}'
@echo ''
Empty file added byok/__init__.py
Empty file.
Loading