Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion deployment/pipelines/chatqa/resources-model-hpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ modelConfigs:
VLLM_DECODE_BLOCK_BUCKET_MAX: "8256"
VLLM_ALLOW_LONG_MAX_MODEL_LEN: 1
VLLM_ENABLE_EXPERT_PARALLEL: "0"
extraCmdArgs: ["--block-size","128","--dtype","bfloat16","--max-model-len","33024","--gpu-memory-util","0.99","--max-num-seqs","64","--max-num-prefill-seqs","16","--num_scheduler_steps","16","--use-padding-aware-scheduling"]
extraCmdArgs: ["--block-size","128","--dtype","bfloat16","--gpu-memory-util","0.99","--max-num-seqs","64"]
tensor_parallel_size: "2"
modelChatTemplate: |
{%- if messages[0]['role'] == 'system' %}
Expand Down
133 changes: 116 additions & 17 deletions src/comps/llms/impl/model_server/vllm/docker/hpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,29 +1,128 @@
# Copyright (C) 2024-2025 Intel Corporation
# Copyright (C) 2025 Habana Labs, Ltd. an Intel Company
# SPDX-License-Identifier: Apache-2.0

FROM vault.habana.ai/gaudi-docker/1.22.1/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest
# ------------------------------
# Base image parameters
# ------------------------------
ARG DOCKER_URL=vault.habana.ai/gaudi-docker
ARG VERSION=1.22.2
ARG BASE_NAME=ubuntu24.04
ARG PT_VERSION=2.7.1
ARG REVISION=latest
ARG REPO_TYPE=habanalabs
ARG TORCH_TYPE_SUFFIX

FROM ${DOCKER_URL}/${VERSION}/${BASE_NAME}/${REPO_TYPE}/pytorch-${TORCH_TYPE_SUFFIX}installer-${PT_VERSION}:${REVISION}

# Use bash and strict mode in RUN
SHELL ["/bin/bash", "-euo", "pipefail", "-c"]

# ------------------------------
# General environment
# ------------------------------
ENV OMPI_MCA_btl_vader_single_copy_mechanism=none
ENV HABANA_VISIBLE_DEVICES=all
ENV LANG=en_US.UTF-8

# create user and folders
RUN useradd -u 1000 -m -s /bin/bash user
ENV PATH="$PATH:/home/user/.local/bin"
WORKDIR /home/user/
# Ensure `python` resolves to Python 3
RUN ln -sf /usr/bin/python3 /usr/bin/python

# Optional: avoid dash for /bin/sh scripts
RUN echo "dash dash/sh boolean false" | debconf-set-selections && \
DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash

# Build-time system dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
git gettext moreutils jq ca-certificates && \
rm -rf /var/lib/apt/lists/*

# ------------------------------
# Create non-root user for runtime
# ------------------------------
ARG APP_USER=appuser
ENV HOME=/home/${APP_USER}
ENV PATH="${HOME}/.local/bin:${PATH}"

# ------------------------------
# Prepare directories inside the image
# ------------------------------
# /opt/vllm-project and /opt/vllm-gaudi hold cloned sources.
# /opt/app/scripts will host app-level scripts copied from vllm-gaudi/.cd
RUN mkdir -p /opt/vllm-project /opt/vllm-gaudi /opt/app/scripts

# ------------------------------
# vLLM + vllm-gaudi clone & install
# ------------------------------
ENV VLLM_PATH=/opt/vllm-project
ENV VLLM_PATH2=/opt/vllm-gaudi
ARG VLLM_GAUDI_COMMIT=v0.11.2
# Leave empty to auto-resolve from vllm-gaudi marker
ARG VLLM_PROJECT_COMMIT=v0.11.2

ENV VLLM_TARGET_DEVICE="hpu"
# VLLM doesn't share any prebuilt HPU packages (https://docs.vllm.ai/en/latest/getting_started/installation/cpu.html#pre-built-wheels)
# So it is impossible to easily freeze the whole vllm environment with uv
RUN pip install --upgrade pip==25.0.1
RUN \
# Clone vllm-gaudi
git clone https://github.com/vllm-project/vllm-gaudi.git "${VLLM_PATH2}" && \
cd "${VLLM_PATH2}" && \
git checkout "${VLLM_GAUDI_COMMIT}" && \
# Fetch marker branch that contains last-good commit file
git fetch origin "vllm/last-good-commit-for-vllm-gaudi" || true && \
# Resolve vLLM commit (explicit or last-good) and trim whitespace
if [[ -z "${VLLM_PROJECT_COMMIT}" ]]; then \
VLLM_PROJECT_COMMIT="$(git show 'origin/vllm/last-good-commit-for-vllm-gaudi:VLLM_STABLE_COMMIT' 2>/dev/null | tr -d ' \t\r\n')"; \
echo "Using last-good vLLM commit: ${VLLM_PROJECT_COMMIT}"; \
else \
echo "Using explicit vLLM commit: ${VLLM_PROJECT_COMMIT}"; \
fi && \
[[ -n "${VLLM_PROJECT_COMMIT}" ]] || (echo "ERROR: could not resolve VLLM_PROJECT_COMMIT" && exit 1) && \
# Clone vllm
git clone https://github.com/vllm-project/vllm.git "${VLLM_PATH}" && \
cd "${VLLM_PATH}" && \
git fetch origin --tags || true && \
git checkout "${VLLM_PROJECT_COMMIT}" && \
# Install vLLM build dependencies excluding torch
pip install --no-cache-dir -r <(sed '/^torch/d' requirements/build.txt) && \
# Install vLLM itself (empty target to avoid device-specific builds)
VLLM_TARGET_DEVICE=empty pip install --no-cache-dir --no-build-isolation . && \
# Install vllm-gaudi plugin for HPU
cd "${VLLM_PATH2}" && \
VLLM_TARGET_DEVICE=hpu pip install -v --no-cache-dir . --no-build-isolation

RUN git clone -b v0.9.0.1+Gaudi-1.22.0 --single-branch https://github.com/HabanaAI/vllm-fork.git
WORKDIR /home/user/vllm-fork
RUN pip install --upgrade pip && \
pip install -v -r requirements-hpu.txt
# ------------------------------
# Copy app-like assets from vllm-gaudi/.cd (inside the image)
# We DON'T use Docker COPY here, because these folders come from the repo we cloned during build.
# ------------------------------
RUN \
# Create target subfolders
mkdir -p /opt/app/scripts/templates \
/opt/app/scripts/entrypoints \
/opt/app/scripts/server \
/opt/app/scripts/benchmark && \
# Copy from vllm-gaudi's internal .cd folder into our app path
cp -r "${VLLM_PATH2}/.cd/templates/" /opt/app/scripts/templates/ && \
cp -r "${VLLM_PATH2}/.cd/entrypoints/" /opt/app/scripts/entrypoints/ && \
cp -r "${VLLM_PATH2}/.cd/server/" /opt/app/scripts/server/ && \
cp -r "${VLLM_PATH2}/.cd/benchmark/" /opt/app/scripts/benchmark/

# Make scripts importable
ENV PYTHONPATH=/opt/app/scripts:${PYTHONPATH}
WORKDIR /opt/app/scripts

RUN VLLM_TARGET_DEVICE=hpu python3 setup.py install
# ------------------------------
# Optional: install extra Python packages you need
# ------------------------------
RUN pip install --no-cache-dir datasets pandas

USER user
WORKDIR /home/user/
# ------------------------------
# Switch to non-root user for runtime
# ------------------------------
USER ${APP_USER}
ENV PYTHONDONTWRITEBYTECODE=1

# ------------------------------
# Entrypoint
# If you have a specific server module in /opt/app/scripts (from .cd),
# adjust this to what you actually want to run. Otherwise, keep a shell.
# ------------------------------
# ENTRYPOINT ["python3", "-m", "entrypoints.entrypoint_main"]
CMD ["/bin/bash"]