diff --git a/.github/workflows/docker/compose/third_parties-compose.yaml b/.github/workflows/docker/compose/third_parties-compose.yaml index 5e8a7eccd3..d04df36ddc 100644 --- a/.github/workflows/docker/compose/third_parties-compose.yaml +++ b/.github/workflows/docker/compose/third_parties-compose.yaml @@ -50,6 +50,11 @@ services: dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu shm_size: '128g' image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest} + tgi-rocm: + build: + dockerfile: comps/third_parties/tgi/src/Dockerfile.amd_gpu + shm_size: '128g' + image: ${REGISTRY:-opea}/tgi-rocm:${TAG:-latest} whisper: build: dockerfile: comps/third_parties/whisper/src/Dockerfile diff --git a/comps/third_parties/tgi/README.md b/comps/third_parties/tgi/README.md index 66bacec7fa..f97d7c5a85 100644 --- a/comps/third_parties/tgi/README.md +++ b/comps/third_parties/tgi/README.md @@ -28,3 +28,14 @@ Run tgi on gaudi. cd deployment/docker_compose docker compose -f compose.yaml up -d tgi-gaudi-server ``` + +Run tgi on ROCm. + +```bash +cd deployment/docker_compose +# Since volume directories are created under root, +# you need to create directories in advance and grant permissions. +sudo mkdir -p data && sudo chmod -R 0777 data +sudo mkdir -p out && sudo chmod -R 0777 out +docker compose -f compose.yaml up -d tgi-rocm-server +``` diff --git a/comps/third_parties/tgi/deployment/docker_compose/compose.yaml b/comps/third_parties/tgi/deployment/docker_compose/compose.yaml index 8653c32c25..669e684483 100644 --- a/comps/third_parties/tgi/deployment/docker_compose/compose.yaml +++ b/comps/third_parties/tgi/deployment/docker_compose/compose.yaml @@ -61,6 +61,44 @@ services: retries: 100 command: --model-id ${LLM_MODEL_ID} + tgi-rocm-server: + image: ${REGISTRY:-opea}/tgi-rocm:${TAG:-latest} + container_name: tgi-rocm-server + user: user + ports: + - ${LLM_ENDPOINT_PORT:-8008}:80 + volumes: + - "${HF_CACHE_DIR:-./data}:/data" + - "${TOKENIZER_CACHE_DIR:-./out}:/out" + shm_size: 32g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} + host_ip: ${host_ip} + LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT} + MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS:-2048} + MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS:-4096} + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + - render + security_opt: + - seccomp=unconfined + - apparmor=unconfined + healthcheck: + test: [ "CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1" ] + interval: 10s + timeout: 10s + retries: 100 + command: --model-id ${LLM_MODEL_ID} --num-shard 1 + ipc: host + networks: default: driver: bridge diff --git a/comps/third_parties/tgi/src/Dockerfile.amd_gpu b/comps/third_parties/tgi/src/Dockerfile.amd_gpu new file mode 100644 index 0000000000..fe87400b66 --- /dev/null +++ b/comps/third_parties/tgi/src/Dockerfile.amd_gpu @@ -0,0 +1,16 @@ +FROM ghcr.io/huggingface/text-generation-inference:3.0.0-rocm + +RUN groupadd --gid 2000 user \ + && useradd --uid 2000 --gid user --shell /bin/bash --create-home user + +RUN usermod -a -G video,render user + +USER root + +RUN chown user:user /tgi-entrypoint.sh + +USER user + +RUN chmod +x /tgi-entrypoint.sh + +ENTRYPOINT ["/tgi-entrypoint.sh"] diff --git a/tests/third_parties/test_third_parties_tgi_on_amd_gpu.sh b/tests/third_parties/test_third_parties_tgi_on_amd_gpu.sh new file mode 100644 index 0000000000..0bda9edf8c --- /dev/null +++ b/tests/third_parties/test_third_parties_tgi_on_amd_gpu.sh @@ -0,0 +1,118 @@ +#!/bin/bash +# Copyright (c) 2024 Advanced Micro Devices, Inc. + +set -x + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +WORKDIR=${WORKPATH}/../ +export host_ip=$(hostname -I | awk '{print $1}') +LOG_PATH="$WORKPATH" +service_name="tgi-rocm-server" +docker_container_name="tgi-rocm-server" + +function build_container() { + cd $WORKPATH/comps/third_parties/tgi/src + docker build --no-cache -t ${REGISTRY:-opea}/tgi-rocm:${TAG:-latest} \ + -f Dockerfile.amd_gpu \ + . \ + --build-arg https_proxy=$https_proxy \ + --build-arg http_proxy=$http_proxy + if [ $? -ne 0 ]; then + echo "tgi-rocm built fail" + exit 1 + else + echo "tgi-rocm built successful" + fi +} + +# Function to start Docker container +start_container() { + export HF_CACHE_DIR=${model_cache:-./data} + export TOKENIZER_CACHE_DIR=${HF_CACHE_DIR}/out + export LLM_ENDPOINT_PORT=8008 + export host_ip=${host_ip} + export HF_TOKEN=${HF_TOKEN} + export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" + export MAX_INPUT_TOKENS=1024 + export MAX_TOTAL_TOKENS=2048 + + cd $WORKPATH/comps/third_parties/tgi/deployment/docker_compose + sudo chown -R 777 ${HF_CACHE_DIR} + sudo mkdir -p ${HF_CACHE_DIR}/out && sudo chown -R 777 ${HF_CACHE_DIR}/out + docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + # check whether service is fully ready + n=0 + until [[ "$n" -ge 300 ]]; do + docker logs ${docker_container_name} &> ${LOG_PATH}/${docker_container_name}.log 2>&1 + n=$((n+1)) + if grep -q "Connected" ${LOG_PATH}/${docker_container_name}.log; then + break + fi + sleep 10s + done + +} + +# Function to test API endpoint +function test_api_endpoint { + local endpoint="$1" + local expected_status="$2" + + # Make the HTTP request + DATA= + if test "$1" = "generate" + then + local response=$(curl "http://${host_ip}:${LLM_ENDPOINT_PORT}/$endpoint" \ + -H "Content-Type: application/json" \ + -d '{"inputs":"What is a Deep Learning?","parameters":{"max_new_tokens":64,"do_sample": true}}' \ + --write-out '%{http_code}' \ + --silent \ + --output /dev/null) + else + local response=$(curl "http://${host_ip}:${LLM_ENDPOINT_PORT}/$endpoint" \ + --write-out '%{http_code}' \ + --silent \ + --output /dev/null) + fi + + # Assert the response status code + if [[ "$response" -eq "$expected_status" ]]; then + echo "PASS: $endpoint returned expected status code: $expected_status" + else + echo "FAIL: $endpoint returned unexpected status code: $response (expected: $expected_status)" + docker logs $service_name + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/../comps/third_parties/tgi/deployment/docker_compose + docker compose -f compose.yaml down --remove-orphans +} + +# Main function +main() { + + stop_docker + + build_container + start_container + + # Sleep to allow the container to start up fully + sleep 10 + + # Test the /generate API + test_api_endpoint "generate" 200 + + stop_docker +} + +# Call main function +main