opea-project · chyundunovDatamonsters · Mar 3, 2025 · Mar 4, 2025 · Mar 4, 2025 · Mar 5, 2025
@@ -50,6 +50,11 @@ services:
       dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu
     shm_size: '128g'
     image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
+  tgi-rocm:
+    build:
+      dockerfile: comps/third_parties/tgi/src/Dockerfile.amd_gpu
+    shm_size: '128g'
+    image: ${REGISTRY:-opea}/tgi-rocm:${TAG:-latest}
   whisper:
     build:
       dockerfile: comps/third_parties/whisper/src/Dockerfile

@@ -28,3 +28,14 @@ Run tgi on gaudi.
 cd deployment/docker_compose
 docker compose -f compose.yaml up -d tgi-gaudi-server
 ```
+
+Run tgi on ROCm.
+
+```bash
+cd deployment/docker_compose
+# Since volume directories are created under root,
+# you need to create directories in advance and grant permissions.
+sudo mkdir -p data && sudo chmod -R 0777 data
+sudo mkdir -p out && sudo chmod -R 0777 out
+docker compose -f compose.yaml up -d tgi-rocm-server
+```
@@ -61,6 +61,44 @@ services:
       retries: 100
     command: --model-id ${LLM_MODEL_ID}
 
+  tgi-rocm-server:
+    image: ${REGISTRY:-opea}/tgi-rocm:${TAG:-latest}
+    container_name: tgi-rocm-server
+    user: user
+    ports:
+      - ${LLM_ENDPOINT_PORT:-8008}:80
+    volumes:
+      - "${HF_CACHE_DIR:-./data}:/data"
+      - "${TOKENIZER_CACHE_DIR:-./out}:/out"
+    shm_size: 32g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      host_ip: ${host_ip}
+      LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
+      MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS:-2048}
+      MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS:-4096}
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri/:/dev/dri/
+    cap_add:
+      - SYS_PTRACE
+    group_add:
+      - video
+      - render
+    security_opt:
+      - seccomp=unconfined
+      - apparmor=unconfined
+    healthcheck:
+      test: [ "CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1" ]
+      interval: 10s
+      timeout: 10s
+      retries: 100
+    command: --model-id ${LLM_MODEL_ID} --num-shard 1
+    ipc: host
+
 networks:
   default:
     driver: bridge
@@ -0,0 +1,16 @@
+FROM ghcr.io/huggingface/text-generation-inference:3.0.0-rocm
+
+RUN groupadd --gid 2000 user \
+  && useradd --uid 2000 --gid user --shell /bin/bash --create-home user
+
+RUN usermod -a -G video,render user
+
+USER root
+
+RUN chown user:user /tgi-entrypoint.sh
+
+USER user
+
+RUN chmod +x /tgi-entrypoint.sh
+
+ENTRYPOINT ["/tgi-entrypoint.sh"]
@@ -0,0 +1,118 @@
+#!/bin/bash
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+
+set -x
+
+IMAGE_REPO=${IMAGE_REPO:-"opea"}
+export REGISTRY=${IMAGE_REPO}
+export TAG="comps"
+echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
+echo "TAG=${TAG}"
+
+WORKPATH=$(dirname "$PWD")
+WORKDIR=${WORKPATH}/../
+export host_ip=$(hostname -I | awk '{print $1}')
+LOG_PATH="$WORKPATH"
+service_name="tgi-rocm-server"
+docker_container_name="tgi-rocm-server"
+
+function build_container() {
+    cd $WORKPATH/comps/third_parties/tgi/src
+    docker build --no-cache -t ${REGISTRY:-opea}/tgi-rocm:${TAG:-latest} \
+      -f Dockerfile.amd_gpu \
+      . \
+      --build-arg https_proxy=$https_proxy \
+      --build-arg http_proxy=$http_proxy
+    if [ $? -ne 0 ]; then
+        echo "tgi-rocm built fail"
+        exit 1
+    else
+        echo "tgi-rocm built successful"
+    fi
+}
+
+# Function to start Docker container
+start_container() {
+    export HF_CACHE_DIR=${model_cache:-./data}
+    export TOKENIZER_CACHE_DIR=${HF_CACHE_DIR}/out
+    export LLM_ENDPOINT_PORT=8008
+    export host_ip=${host_ip}
+    export HF_TOKEN=${HF_TOKEN}
+    export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+    export MAX_INPUT_TOKENS=1024
+    export MAX_TOTAL_TOKENS=2048
+
+    cd $WORKPATH/comps/third_parties/tgi/deployment/docker_compose
+    sudo chown -R 777 ${HF_CACHE_DIR}
+    sudo mkdir -p ${HF_CACHE_DIR}/out && sudo chown -R 777 ${HF_CACHE_DIR}/out
+    docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log
+
+    # check whether service is fully ready
+    n=0
+    until [[ "$n" -ge 300 ]]; do
+        docker logs ${docker_container_name} &> ${LOG_PATH}/${docker_container_name}.log 2>&1
+        n=$((n+1))
+        if grep -q "Connected" ${LOG_PATH}/${docker_container_name}.log; then
+            break
+        fi
+        sleep 10s
+    done
+
+}
+
+# Function to test API endpoint
+function test_api_endpoint {
+    local endpoint="$1"
+    local expected_status="$2"
+
+    # Make the HTTP request
+    DATA=
+    if test "$1" = "generate"
+    then
+        local response=$(curl "http://${host_ip}:${LLM_ENDPOINT_PORT}/$endpoint" \
+          -H "Content-Type: application/json" \
+          -d '{"inputs":"What is a Deep Learning?","parameters":{"max_new_tokens":64,"do_sample": true}}' \
+          --write-out '%{http_code}' \
+          --silent \
+          --output /dev/null)
+    else
+        local response=$(curl "http://${host_ip}:${LLM_ENDPOINT_PORT}/$endpoint" \
+          --write-out '%{http_code}' \
+          --silent \
+          --output /dev/null)
+    fi
+
+    # Assert the response status code
+    if [[ "$response" -eq "$expected_status" ]]; then
+        echo "PASS: $endpoint returned expected status code: $expected_status"
+    else
+        echo "FAIL: $endpoint returned unexpected status code: $response (expected: $expected_status)"
+        docker logs $service_name
+        exit 1
+    fi
+}
+
+function stop_docker() {
+    cd $WORKPATH/../comps/third_parties/tgi/deployment/docker_compose
+    docker compose -f compose.yaml down --remove-orphans
+}
+
+# Main function
+main() {
+
+    stop_docker
+
+    build_container
+    start_container
+
+    # Sleep to allow the container to start up fully
+    sleep 10
+
+    # Test the /generate API
+    test_api_endpoint "generate" 200
+
+    stop_docker
+}
+
+# Call main function
+main