Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
174 commits
Select commit Hold shift + click to select a range
523eca1
Build and upstream latest base image on push event (#1355)
chensuyue Mar 3, 2025
d6ec04d
Add timeout param for DocSum and FaqGen to deal with long context (#1…
XinyaoWa Mar 4, 2025
3d688d2
Megaservice / orchestrator metric testing + fixes (#1348)
eero-t Mar 4, 2025
d353f39
update image push machine (#1361)
chensuyue Mar 5, 2025
7b7be6a
Improve dataprep CI and fix pptx file ingesting bug (#1334)
lianhao Mar 7, 2025
9076845
Fix docker compose command in embedding BridgeTower readme (#1374)
dmsuehir Mar 7, 2025
65f8c32
Changes to checkin text2graph microservice (#1357)
intelsharath Mar 7, 2025
6d7ec3d
add text2cypher component (#1319)
jeanyu-habana Mar 10, 2025
3394259
Add Dockerfile for build ROCm vLLM Docker image (#1372)
chyundunovDatamonsters Mar 12, 2025
40d714b
Filter none test scripts in test matrix (#1386)
chensuyue Mar 12, 2025
3ded252
Use the latest HabanaAI/vllm-fork release tag to build vllm-gaudi ima…
chensuyue Mar 12, 2025
e26819b
[Bug: 1375] Fix Readme errors in dataprep component for all VectorDBs…
srajabos Mar 12, 2025
d6e4da2
bridgetower/clip: add missing dependency for aiohttp (#1371)
lianhao Mar 12, 2025
ba5988e
Refine Dataprep Redis with Async Issue (#1390)
letonghan Mar 13, 2025
cd36db7
Fix file list and index drop for opensearch (#1322)
jonminkin97 Mar 13, 2025
4bfe4db
Fix dependency issue in Retriever (#1393)
letonghan Mar 14, 2025
5aec639
[Telemetry] use existed env variable instead of introducing new one (…
louie-tsai Mar 14, 2025
3b80dce
Add GitHub Action to check and close stale issues and PRs (#1394)
XuehaoSun Mar 14, 2025
e6b259a
enable custom prompt for react_llama and react_langgraph (#1391)
minmin-intel Mar 15, 2025
2287d48
vLLM lvm integration (#1362)
Spycsh Mar 17, 2025
558a3df
[Bug: 1378] Added Multimodal support for Milvus for dataprep componen…
srajabos Mar 18, 2025
dd0579c
Fix Dockerfile.intel_gpu build issue (#1429)
Yongbozzz Mar 19, 2025
09a7ae1
Add third party into test matrix (#1430)
chensuyue Mar 19, 2025
5017419
Fix failing data prep redis multimodal tests that use the LVM (#1440)
dmsuehir Mar 21, 2025
cfd0aee
Add LLaMA Vision OH optimization (#1296)
Spycsh Mar 21, 2025
0ffb51f
Add redis-finance to dataprep (#1384)
minmin-intel Mar 21, 2025
a0d3563
Fix failing data prep milvus multimodal tests that use the LVM (#1441)
dmsuehir Mar 22, 2025
42c3629
Fix curl commands in the LLaVA server README (#1427)
dmsuehir Mar 22, 2025
eb412c2
Refine the README for llms/doc-summarization (#1437)
baravkareknath Mar 22, 2025
3446cc5
Added Mistral-Small-24B-Instruct-2501 and Mistral-Large-Instruct-2411…
XinyuYe-Intel Mar 24, 2025
810ff78
Enhance docker clean up in CI (#1442)
chensuyue Mar 24, 2025
9b3bea1
Fix default model cache adapt to the new test cluster (#1443)
chensuyue Mar 24, 2025
0e04d92
Fix: make Orchestrator metrics singleton (#1301)
eero-t Mar 24, 2025
86756a8
Use model cache for text2sql docker compose test. (#1436)
ZePan110 Mar 24, 2025
023bb73
Update default model cache for new CI cluster (#1445)
chensuyue Mar 25, 2025
4d99b0f
Fix Deprecation warnings in logs service logs (#1444)
ZePan110 Mar 25, 2025
fe53806
Return Mega/Micro Service Version number at runtime (#912)
louie-tsai Mar 26, 2025
11dbf97
fix function name issue (#1426)
letonghan Mar 26, 2025
b35a4db
Fix Retriever Async Issue (#1457)
letonghan Mar 26, 2025
9dd4fda
Use model cache for embeddings docker compose test. (#1452)
ZePan110 Mar 26, 2025
768634c
Fix issue with orphaned containers in the Github runtime program for …
ZePan110 Mar 26, 2025
d70792c
text generation, embedding and reranking with ovms (#1318)
dtrawins Mar 26, 2025
b75c9d6
Support parametrization of nginx port (#1456)
ZePan110 Mar 27, 2025
63be2d9
Use model cache for web_retriever docker compose tests. (#1461)
ZePan110 Mar 27, 2025
9644c58
Use model cache for rerankings docker compose tests. (#1459)
ZePan110 Mar 27, 2025
432c0d6
Use model cache for retrievers docker compose tests. (#1460)
ZePan110 Mar 27, 2025
5db85aa
Use model cache for llms docker compose test. (#1463)
ZePan110 Mar 27, 2025
c7cca69
Use model cache for agent and guardrails docker compose test. (#1462)
ZePan110 Mar 27, 2025
4fbcb66
Use model cache for third_parties docker compose test. (#1464)
ZePan110 Mar 27, 2025
85678be
Update TEI docker image to 1.6 (#1453)
xiguiw Mar 27, 2025
aecfb62
Use model cache for dataprep docker compose test. (#1450)
ZePan110 Mar 27, 2025
70ee027
Enlarge DocSum prompt buffer (#1471)
XinyaoWa Mar 27, 2025
56c12fb
Enhance test env clean up (#1469)
chensuyue Mar 27, 2025
8fde89e
remote endpoint support (#1399)
srinarayan-srikanthan Mar 27, 2025
cc555d6
Revert "Support parametrization of nginx port (#1456)" (#1473)
ZePan110 Mar 28, 2025
261bed5
add nginx src into example test trigger path (#1474)
chensuyue Mar 28, 2025
222ead3
Bump version of web search (#1451)
Spycsh Mar 28, 2025
09da4cb
Docker support for nebula (#1396)
siddhivelankar23 Mar 29, 2025
3d33f92
CICD update to adapt the new xeon test cluster (#1475)
chensuyue Mar 31, 2025
34cd04f
ignore false positive errors in reseting cache permissions (#1489)
dtrawins Mar 31, 2025
47884cd
Fix finetuning python regex syntax error (#1446)
eero-t Mar 31, 2025
ef9c264
Update nofile's hard limit to 262144 for opensearch (#1495)
ashahba Apr 1, 2025
fe6869f
unify service ports in compose and READMEs (#1506)
letonghan Apr 1, 2025
5001305
Remove langchain-huggingface from requirement. (#1505)
ZePan110 Apr 2, 2025
e9c2579
add model cache for example test (#1509)
chensuyue Apr 2, 2025
d3adee8
Fix Dataprep ingest PPT and async issues (#1504)
letonghan Apr 2, 2025
1c1e4a2
Integrate UI-TARS vLLM in lvm component (#1458)
Spycsh Apr 2, 2025
a905077
Fix model cache path and use Random to avoid ns conflict (#1500)
yongfengdu Apr 2, 2025
3d641e7
MultimodalQnA audio features completion (#1433)
mhbuehler Apr 2, 2025
226e539
[Bug: 1379] Added Multimodal support for Milvus for retriever compone…
srajabos Apr 2, 2025
a142ad3
VDMS langchain package update (#1317)
cwlacewe Apr 2, 2025
5619bb8
Enable Telemetry Tracing in Agent Comp and also add class name along …
louie-tsai Apr 2, 2025
d84dc7f
Fix CI workflow (#1518)
chensuyue Apr 3, 2025
b075b63
minor fix gpt-sovits service names (#1521)
Spycsh Apr 3, 2025
13c059e
remove concurrency in image build and push workflow (#1510)
chensuyue Apr 3, 2025
e9969bf
Add in entrypoint new download links for wav2lip and wav2lip_gan mode…
ctao456 Apr 3, 2025
5f6b447
Add xtune to finetuning (#1432)
jilongW Apr 3, 2025
84a4d03
Unset TEI_EMBEDDING_ENDPOINT when running multimodal redis retriever …
dmsuehir Apr 4, 2025
19eb989
Data Ingestion and Retrieval with custom index_name (#1439)
MSCetin37 Apr 4, 2025
ca32848
Update Gaudi Docker to v1.19.0 and PyTorch Installer 2.5.1 (#1513)
ashahba Apr 4, 2025
5fb2dab
Limit vllm and vllm-fork tags (#1529)
ZePan110 Apr 7, 2025
0593a69
feature: OpeaStore Class (#1493)
aMahanna Apr 8, 2025
47b9612
Refactor multimodal dependencies (#1527)
Spycsh Apr 8, 2025
3ef7d70
format react agent llama response to openai for openwebui display. (#…
lkk12014402 Apr 8, 2025
a78eac9
Add native LLM microservice using IPEX (#1337)
lvliang-intel Apr 8, 2025
8586f58
Support Phi-4-mini and Phi-4-multimodal-instruct in LLM text-generati…
XinyaoWa Apr 8, 2025
957ab03
Sync values yaml file for 1.3 release (#1524)
yongfengdu Apr 8, 2025
f63be4b
Hermes-2-Pro-Llama-3-8B model to be deprecated from Prediction Guard …
sharanshirodkar7 Apr 8, 2025
8964bbf
separate test script ports vs. ports in code/readme/compose (#1478)
rbrugaro Apr 8, 2025
e4b48ee
Fix GenAIExamples #1607 by adding timeout to the wav2lip request (#1540)
ctao456 Apr 9, 2025
1ff8a03
Fix xtune output location is wrong and update doc (#1533)
jilongW Apr 9, 2025
892187c
[pre-commit.ci] pre-commit autoupdate (#1531)
pre-commit-ci[bot] Apr 9, 2025
917a044
Update readme for supporting deepseek and phi4 (#1522)
XinyaoWa Apr 9, 2025
06f9e06
Struct2graph microservice for HybridRAG (#1502)
siddhivelankar23 Apr 9, 2025
56ebc44
Add tests + docs for BaseStatistics and generalize its code (#1107)
eero-t Apr 9, 2025
3ca0f0f
upgrade setuptools version adapt to the latest wheel version (#1545)
chensuyue Apr 9, 2025
5fe8a37
Misc apt and pip updates to Dockerfiles (#1542)
ashahba Apr 10, 2025
555f4af
Adaptation to vllm v0.8.3 build paths and limit vllm version (#1544)
ZePan110 Apr 10, 2025
26ca6d8
align ports host/docker and simply some healthcheck logics (#1499)
Spycsh Apr 10, 2025
e064994
Support health check in dataprep component (#1546)
letonghan Apr 10, 2025
92d29ff
Enhance CD workfow. (#1519)
ZePan110 Apr 10, 2025
33c2e37
Text to knowledge graph (text2kg) microservice implementation (#1472)
siddhivelankar23 Apr 11, 2025
08cf5fa
Upgrade Torch and it's dependencies to v2.5.x for video-llama (#1551)
ashahba Apr 11, 2025
1c9d82d
Fix image build issue (#1553)
chensuyue Apr 11, 2025
e13ef2e
Unified default port number for the same service in text2graph and te…
yao531441 Apr 11, 2025
87bdf5f
Update xtune file and change DDP paramter (#1552)
jilongW Apr 14, 2025
accf4f6
add N/A option (#1561)
NeoZhangJianyu Apr 14, 2025
f469532
Test latest gaudi docker container (#1477)
chensuyue Apr 14, 2025
0ce09cb
fix audioqna male voice setting (#1559)
Spycsh Apr 14, 2025
cd90bc5
added error handling for lvm (#1556)
okhleif-10 Apr 14, 2025
5c1ec76
enable mysql db for sql agent (#1431)
cheehook Apr 15, 2025
dbf5307
Enlarge DocSum prompt buffer (#1567)
XinyaoWa Apr 15, 2025
119653a
Update vLLM parameter max-seq-len-to-capture (#1565)
lvliang-intel Apr 15, 2025
89154bc
Remove Transformers versions from requirements.txt file (#1547)
ashahba Apr 16, 2025
bf9a88b
Remove index_names from files for dataprep-get request (#1569)
MSCetin37 Apr 16, 2025
c8f3347
Upgrade Optimum Habana version to fix security check issue (#1571)
lvliang-intel Apr 16, 2025
fea9244
Make llamaguard compatible with both TGI and vLLM (#1581)
lvliang-intel Apr 16, 2025
3f8bb7d
Fix Dockerfile error and add CI test for IPEX (#1585)
lvliang-intel Apr 17, 2025
ff69840
Reduce multilang tts docker image size (#1587)
Spycsh Apr 17, 2025
710c9ed
unset OPENAI_KEY in CI test (#1586)
rbrugaro Apr 17, 2025
4eb6099
Add AWS Credentials for CD test (#1588)
ZePan110 Apr 17, 2025
caf6838
new: `DataprepRequest` model (#1525)
aMahanna Apr 17, 2025
cf4d958
Fix the issue of reporting errors when there are no values files (#1597)
ZePan110 Apr 18, 2025
922b2f0
Feature: `OpeaArangoDataprep` & `OpeaArangoRetriever` (#1558)
aMahanna Apr 18, 2025
5e9d82c
fix typos (#1607)
daniel-de-leon-user293 Apr 18, 2025
bb588a4
Fix guardrails-bias-detection security issue (#1603)
ZePan110 Apr 20, 2025
f863a30
Fixed the metadata issue with milvus related to b64_img_str (#1606)
pallavijaini0525 Apr 20, 2025
053df0f
Update TGI docker image to 2.4.1 (#1598)
xiguiw Apr 20, 2025
09db4dd
bug: default value for StreamOptions (#1582)
danielfleischer Apr 20, 2025
7648e5f
Remove template_llava.jinja in command since its no longer included i…
XinyuYe-Intel Apr 20, 2025
59666ff
Upgrade Optimum Habana version to fix security check issue (#1604)
ZePan110 Apr 21, 2025
cd53bde
Fix OH version for Whisper on HPU (#1611)
Spycsh Apr 21, 2025
1e96df0
dynamic model switching (#1583)
srinarayan-srikanthan Apr 21, 2025
ed83fd2
Enhance CI test env clean up (#1612)
chensuyue Apr 21, 2025
46c3c87
Bump version into v1.3 (#1613)
chensuyue Apr 21, 2025
9ee14a9
Update base image build workflow (#1614)
chensuyue Apr 21, 2025
0e8dad6
Fix UT issue caused by bump version (#1615)
lvliang-intel Apr 22, 2025
f5cc757
Update base image version for opea/vllm-rocm image (#1610)
chyundunovDatamonsters Apr 23, 2025
4c93f02
fix pymilvus==2.5.6 (#1620)
chensuyue Apr 24, 2025
dcad6da
fix llm streamoptions issue (#1623)
letonghan Apr 24, 2025
b2abacb
fix vllm-openvino-arc issue (#1621)
Yongbozzz Apr 24, 2025
0222c92
Add exempt-issue-labels configuration to check stale issue and PR wor…
XuehaoSun Apr 24, 2025
e923fe2
Add Prompt Injection and Jailbreak detection microservice using Promp…
mitalipo Apr 25, 2025
00df9a6
Update guardrails table (#1625)
daniel-de-leon-user293 Apr 25, 2025
c490aa0
Delete NV related package in xtune (#1626)
jilongW Apr 28, 2025
533fa6e
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
fe5df92
Merge branch 'main' of https://github.com/opea-project/GenAIComps int…
chyundunovDatamonsters Apr 29, 2025
8b39e63
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
6574c18
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
a5a3053
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
6350015
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
1e1c462
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
043f0ed
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
b10a00c
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
287a551
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
9ae5bdc
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
2ef094b
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
9ea194d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 29, 2025
007a0ae
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
8b8cd28
Merge remote-tracking branch 'origin/feature/ROCm_TGI_Dockerfile' int…
chyundunovDatamonsters Apr 29, 2025
538c7e7
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
6cedf3a
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
bc4045b
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
ff21acc
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
f3404e9
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
262bd22
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
fd6b4c4
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
22086f7
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
fa7e573
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
92683f1
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
1d02d13
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
07d5208
Adding a Dockerfile to build a TGI ROCm image with an unprivileged us…
chyundunovDatamonsters Apr 29, 2025
db9d297
Merge branch 'main' into feature/ROCm_TGI_Dockerfile
chyundunovDatamonsters Apr 30, 2025
5b0cd22
Merge branch 'main' into feature/ROCm_TGI_Dockerfile
chyundunovDatamonsters May 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/docker/compose/third_parties-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ services:
dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu
shm_size: '128g'
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
tgi-rocm:
build:
dockerfile: comps/third_parties/tgi/src/Dockerfile.amd_gpu
shm_size: '128g'
image: ${REGISTRY:-opea}/tgi-rocm:${TAG:-latest}
whisper:
build:
dockerfile: comps/third_parties/whisper/src/Dockerfile
Expand Down
11 changes: 11 additions & 0 deletions comps/third_parties/tgi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,14 @@ Run tgi on gaudi.
cd deployment/docker_compose
docker compose -f compose.yaml up -d tgi-gaudi-server
```

Run tgi on ROCm.

```bash
cd deployment/docker_compose
# Since volume directories are created under root,
# you need to create directories in advance and grant permissions.
sudo mkdir -p data && sudo chmod -R 0777 data
sudo mkdir -p out && sudo chmod -R 0777 out
docker compose -f compose.yaml up -d tgi-rocm-server
```
38 changes: 38 additions & 0 deletions comps/third_parties/tgi/deployment/docker_compose/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,44 @@ services:
retries: 100
command: --model-id ${LLM_MODEL_ID}

tgi-rocm-server:
image: ${REGISTRY:-opea}/tgi-rocm:${TAG:-latest}
container_name: tgi-rocm-server
user: user
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "${HF_CACHE_DIR:-./data}:/data"
- "${TOKENIZER_CACHE_DIR:-./out}:/out"
shm_size: 32g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS:-2048}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS:-4096}
devices:
- /dev/kfd:/dev/kfd
- /dev/dri/:/dev/dri/
cap_add:
- SYS_PTRACE
group_add:
- video
- render
security_opt:
- seccomp=unconfined
- apparmor=unconfined
healthcheck:
test: [ "CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1" ]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${LLM_MODEL_ID} --num-shard 1
ipc: host

networks:
default:
driver: bridge
16 changes: 16 additions & 0 deletions comps/third_parties/tgi/src/Dockerfile.amd_gpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FROM ghcr.io/huggingface/text-generation-inference:3.0.0-rocm

RUN groupadd --gid 2000 user \
&& useradd --uid 2000 --gid user --shell /bin/bash --create-home user

RUN usermod -a -G video,render user

USER root

RUN chown user:user /tgi-entrypoint.sh

USER user

RUN chmod +x /tgi-entrypoint.sh

ENTRYPOINT ["/tgi-entrypoint.sh"]
118 changes: 118 additions & 0 deletions tests/third_parties/test_third_parties_tgi_on_amd_gpu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc.

set -x

IMAGE_REPO=${IMAGE_REPO:-"opea"}
export REGISTRY=${IMAGE_REPO}
export TAG="comps"
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=${TAG}"

WORKPATH=$(dirname "$PWD")
WORKDIR=${WORKPATH}/../
export host_ip=$(hostname -I | awk '{print $1}')
LOG_PATH="$WORKPATH"
service_name="tgi-rocm-server"
docker_container_name="tgi-rocm-server"

function build_container() {
cd $WORKPATH/comps/third_parties/tgi/src
docker build --no-cache -t ${REGISTRY:-opea}/tgi-rocm:${TAG:-latest} \
-f Dockerfile.amd_gpu \
. \
--build-arg https_proxy=$https_proxy \
--build-arg http_proxy=$http_proxy
if [ $? -ne 0 ]; then
echo "tgi-rocm built fail"
exit 1
else
echo "tgi-rocm built successful"
fi
}

# Function to start Docker container
start_container() {
export HF_CACHE_DIR=${model_cache:-./data}
export TOKENIZER_CACHE_DIR=${HF_CACHE_DIR}/out
export LLM_ENDPOINT_PORT=8008
export host_ip=${host_ip}
export HF_TOKEN=${HF_TOKEN}
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export MAX_INPUT_TOKENS=1024
export MAX_TOTAL_TOKENS=2048

cd $WORKPATH/comps/third_parties/tgi/deployment/docker_compose
sudo chown -R 777 ${HF_CACHE_DIR}
sudo mkdir -p ${HF_CACHE_DIR}/out && sudo chown -R 777 ${HF_CACHE_DIR}/out
docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log

# check whether service is fully ready
n=0
until [[ "$n" -ge 300 ]]; do
docker logs ${docker_container_name} &> ${LOG_PATH}/${docker_container_name}.log 2>&1
n=$((n+1))
if grep -q "Connected" ${LOG_PATH}/${docker_container_name}.log; then
break
fi
sleep 10s
done

}

# Function to test API endpoint
function test_api_endpoint {
local endpoint="$1"
local expected_status="$2"

# Make the HTTP request
DATA=
if test "$1" = "generate"
then
local response=$(curl "http://${host_ip}:${LLM_ENDPOINT_PORT}/$endpoint" \
-H "Content-Type: application/json" \
-d '{"inputs":"What is a Deep Learning?","parameters":{"max_new_tokens":64,"do_sample": true}}' \
--write-out '%{http_code}' \
--silent \
--output /dev/null)
else
local response=$(curl "http://${host_ip}:${LLM_ENDPOINT_PORT}/$endpoint" \
--write-out '%{http_code}' \
--silent \
--output /dev/null)
fi

# Assert the response status code
if [[ "$response" -eq "$expected_status" ]]; then
echo "PASS: $endpoint returned expected status code: $expected_status"
else
echo "FAIL: $endpoint returned unexpected status code: $response (expected: $expected_status)"
docker logs $service_name
exit 1
fi
}

function stop_docker() {
cd $WORKPATH/../comps/third_parties/tgi/deployment/docker_compose
docker compose -f compose.yaml down --remove-orphans
}

# Main function
main() {

stop_docker

build_container
start_container

# Sleep to allow the container to start up fully
sleep 10

# Test the /generate API
test_api_endpoint "generate" 200

stop_docker
}

# Call main function
main
Loading