@@ -7,31 +7,64 @@ export WORKPATH=$(dirname "$PWD")
77export WORKDIR=$WORKPATH /../../
88echo " WORKDIR=${WORKDIR} "
99export IP_ADDRESS=$( hostname -I | awk ' {print $1}' )
10+ export HOST_IP=${IP_ADDRESS}
1011LOG_PATH=$WORKPATH
1112
12- # ### env vars for LLM endpoint #############
13+ # Proxy settings
14+ export NO_PROXY=" ${NO_PROXY} ,${HOST_IP} "
15+ export HTTP_PROXY=" ${http_proxy} "
16+ export HTTPS_PROXY=" ${https_proxy} "
17+
18+ # VLLM configuration
1319MODEL=meta-llama/Llama-3.3-70B-Instruct
14- VLLM_IMAGE=opea/vllm-gaudi:latest
15- VLLM_PORT=8086
16- HF_CACHE_DIR=${model_cache:- " /data2/huggingface" }
17- VLLM_VOLUME=${HF_CACHE_DIR}
18- # ######################################
20+ export VLLM_PORT=" ${VLLM_PORT:- 8086} "
21+
22+ # export HF_CACHE_DIR="${HF_CACHE_DIR:-"./data"}"
23+ export HF_CACHE_DIR=${model_cache:- " ./data2/huggingface" }
24+ export VLLM_VOLUME=" ${HF_CACHE_DIR:- " ./data2/huggingface" } "
25+ export VLLM_IMAGE=" ${VLLM_IMAGE:- opea/ vllm-gaudi: latest} "
26+ export LLM_MODEL_ID=" ${LLM_MODEL_ID:- meta-llama/ Llama-3.3-70B-Instruct} "
27+ export LLM_MODEL=$LLM_MODEL_ID
28+ export LLM_ENDPOINT=" http://${IP_ADDRESS} :${VLLM_PORT} "
29+ export MAX_LEN=" ${MAX_LEN:- 16384} "
30+ export NUM_CARDS=" ${NUM_CARDS:- 4} "
31+
32+ # Recursion limits
33+ export RECURSION_LIMIT_WORKER=" ${RECURSION_LIMIT_WORKER:- 12} "
34+ export RECURSION_LIMIT_SUPERVISOR=" ${RECURSION_LIMIT_SUPERVISOR:- 10} "
35+
36+ # Hugging Face API token
37+ export HUGGINGFACEHUB_API_TOKEN=" ${HF_TOKEN} "
38+
39+ # LLM configuration
40+ export TEMPERATURE=" ${TEMPERATURE:- 0.5} "
41+ export MAX_TOKENS=" ${MAX_TOKENS:- 4096} "
42+ export MAX_INPUT_TOKENS=" ${MAX_INPUT_TOKENS:- 2048} "
43+ export MAX_TOTAL_TOKENS=" ${MAX_TOTAL_TOKENS:- 4096} "
44+
45+ # Worker URLs
46+ export WORKER_FINQA_AGENT_URL=" http://${IP_ADDRESS} :9095/v1/chat/completions"
47+ export WORKER_RESEARCH_AGENT_URL=" http://${IP_ADDRESS} :9096/v1/chat/completions"
48+
49+ # DocSum configuration
50+ export DOCSUM_COMPONENT_NAME=" ${DOCSUM_COMPONENT_NAME:- " OpeaDocSumvLLM" } "
51+ export DOCSUM_ENDPOINT=" http://${IP_ADDRESS} :9000/v1/docsum"
52+
53+ # Toolset and prompt paths
54+ export TOOLSET_PATH=$WORKDIR /GenAIExamples/FinanceAgent/tools/
55+ export PROMPT_PATH=$WORKDIR /GenAIExamples/FinanceAgent/prompts/
1956
2057# ### env vars for dataprep #############
21- export hOST_IP=${IP_ADDRESS}
2258export DATAPREP_PORT=" 6007"
2359export TEI_EMBEDDER_PORT=" 10221"
2460export REDIS_URL_VECTOR=" redis://${IP_ADDRESS} :6379"
2561export REDIS_URL_KV=" redis://${IP_ADDRESS} :6380"
26- export LLM_MODEL=$MODEL
27- export LLM_ENDPOINT=" http://${IP_ADDRESS} :${VLLM_PORT} "
62+
2863export DATAPREP_COMPONENT_NAME=" OPEA_DATAPREP_REDIS_FINANCE"
2964export EMBEDDING_MODEL_ID=" BAAI/bge-base-en-v1.5"
3065export TEI_EMBEDDING_ENDPOINT=" http://${IP_ADDRESS} :${TEI_EMBEDDER_PORT} "
3166# ######################################
3267
33-
34-
3568function get_genai_comps() {
3669 if [ ! -d " GenAIComps" ] ; then
3770 git clone --depth 1 --branch ${opea_branch:- " main" } https://github.com/opea-project/GenAIComps.git
@@ -70,11 +103,10 @@ function build_vllm_docker_image() {
70103 fi
71104}
72105
73-
74106function start_vllm_service_70B() {
75107 echo " token is ${HF_TOKEN} "
76108 echo " start vllm gaudi service"
77- echo " **************MODEL is $MODEL **************"
109+ echo " **************MODEL is $LLM_MODEL_ID **************"
78110 docker run -d --runtime=habana --rm --name " vllm-gaudi-server" -e HABANA_VISIBLE_DEVICES=all -p $VLLM_PORT :8000 -v $VLLM_VOLUME :/data -e HF_TOKEN=$HF_TOKEN -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_HOME=/data -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e VLLM_SKIP_WARMUP=true --cap-add=sys_nice --ipc=host $VLLM_IMAGE --model ${MODEL} --max-seq-len-to-capture 16384 --tensor-parallel-size 4
79111 sleep 10s
80112 echo " Waiting vllm gaudi ready"
@@ -95,7 +127,6 @@ function start_vllm_service_70B() {
95127 echo " Service started successfully"
96128}
97129
98-
99130function stop_llm(){
100131 cid=$( docker ps -aq --filter " name=vllm-gaudi-server" )
101132 echo " Stopping container $cid "
@@ -104,7 +135,17 @@ function stop_llm(){
104135}
105136
106137function start_dataprep_and_agent(){
107- docker compose -f $WORKPATH /docker_compose/intel/hpu/gaudi/compose.yaml up -d tei-embedding-serving redis-vector-db redis-kv-store dataprep-redis-finance worker-finqa-agent worker-research-agent docsum-vllm-gaudi supervisor-react-agent agent-ui
138+ docker compose -f $WORKPATH /docker_compose/intel/hpu/gaudi/compose.yaml up -d \
139+ tei-embedding-serving \
140+ redis-vector-db \
141+ redis-kv-store \
142+ dataprep-redis-finance \
143+ worker-finqa-agent \
144+ worker-research-agent \
145+ docsum-vllm-gaudi \
146+ supervisor-react-agent \
147+ agent-ui
148+
108149 sleep 1m
109150}
110151
@@ -219,7 +260,6 @@ function stop_agent_docker() {
219260 done
220261}
221262
222-
223263echo " workpath: $WORKPATH "
224264echo " =================== Stop containers ===================="
225265stop_llm
@@ -232,9 +272,9 @@ echo "=================== #1 Building docker images===================="
232272build_vllm_docker_image
233273build_dataprep_agent_images
234274
235- # ### for local test
275+ # ## for local test
236276# build_agent_image_local
237- # echo "=================== #1 Building docker images completed===================="
277+ echo " =================== #1 Building docker images completed===================="
238278
239279echo " =================== #2 Start vllm endpoint===================="
240280start_vllm_service_70B
0 commit comments