Skip to content
This repository was archived by the owner on Oct 15, 2025. It is now read-only.

Commit 350550c

Browse files
committed
add tool-calling quickstart
Signed-off-by: sallyom <[email protected]>
1 parent f750a8d commit 350550c

File tree

1 file changed

+53
-0
lines changed

1 file changed

+53
-0
lines changed
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# To run this example:
2+
# ./llmd-installer.sh --values-file examples/tool-calling/llama3-2-tool-calling.yaml
3+
sampleApplication:
4+
baseConfigMapRefName: basic-gpu-with-nixl-and-redis-lookup-preset
5+
model:
6+
modelArtifactURI: hf://meta-llama/Llama-3.2-3B-Instruct
7+
modelName: "meta-llama/Llama-3.2-3B-Instruct"
8+
modelservice:
9+
resources:
10+
limits:
11+
nvidia.com/gpu: 1
12+
prefill:
13+
replicas: 1
14+
extraArgs:
15+
- --enable-auto-tool-choice
16+
- --tool-call-parser
17+
- llama3_json
18+
- --chat-template
19+
# See https://github.com/neuralmagic/vllm/tree/main/examples for other templates
20+
- /workspace/vllm/examples/tool_chat_template_llama3.2_json.jinja
21+
decode:
22+
replicas: 1
23+
extraArgs:
24+
- --enable-auto-tool-choice
25+
- --tool-call-parser
26+
- llama3_json
27+
- --chat-template
28+
# See https://github.com/neuralmagic/vllm/tree/main/examples for other templates
29+
- /workspace/vllm/examples/tool_chat_template_llama3.2_json.jinja
30+
epp:
31+
defaultEnvVarsOverride:
32+
- name: ENABLE_KVCACHE_AWARE_SCORER
33+
value: "true"
34+
- name: ENABLE_PREFIX_AWARE_SCORER
35+
value: "true"
36+
- name: ENABLE_LOAD_AWARE_SCORER
37+
value: "true"
38+
- name: ENABLE_SESSION_AWARE_SCORER
39+
value: "true"
40+
- name: PD_ENABLED
41+
value: "true"
42+
- name: PD_PROMPT_LEN_THRESHOLD
43+
value: "10"
44+
- name: PREFILL_ENABLE_KVCACHE_AWARE_SCORER
45+
value: "true"
46+
- name: PREFILL_ENABLE_LOAD_AWARE_SCORER
47+
value: "true"
48+
- name: PREFILL_ENABLE_PREFIX_AWARE_SCORER
49+
value: "true"
50+
- name: PREFILL_ENABLE_SESSION_AWARE_SCORER
51+
value: "true"
52+
redis:
53+
enabled: true

0 commit comments

Comments
 (0)