Skip to content

Commit 9c2a93a

Browse files
authored
Merge pull request #295 from dusty-nv/dev
Dev
2 parents 8db7262 + 72cc341 commit 9c2a93a

File tree

23 files changed

+961
-259
lines changed

23 files changed

+961
-259
lines changed

docs/portal/data/models/gemma3.json

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22
"gemma-3": {
33
"name": "Google Gemma 3",
44
"header": "gemma-3-header",
5+
"hf_token": "${HF_TOKEN}",
56
"max_context_len": {"placeholder": 4096},
67
"prefill_chunk": {"placeholder": 4096},
78
"blacklist": ["mlc", "llama_cpp"],
8-
"tags": ["llm"],
9+
"tags": ["vlm"],
910
"links": {
1011
"google": {
1112
"name": "Google",
@@ -36,5 +37,17 @@
3637
"url": "hf.co/google/gemma-3-27b-it",
3738
"blacklist": ["mlc", "llama_cpp"],
3839
"tags": ["gemma-3", "agx-orin"]
40+
},
41+
"gemma-3-1b-.*-ollama-.*": {
42+
"url": "ollama.com/library/gemma3:1b"
43+
},
44+
"gemma-3-4b-.*-ollama-.*": {
45+
"url": "ollama.com/library/gemma3:4b"
46+
},
47+
"gemma-3-12b-.*-ollama-.*": {
48+
"url": "ollama.com/library/gemma3:12b"
49+
},
50+
"gemma-3-27b-.*-ollama-.*": {
51+
"url": "ollama.com/library/gemma3:27b"
3952
}
4053
}

docs/portal/dist/db.json

Lines changed: 171 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,26 @@
4646
"l4t-r36"
4747
]
4848
},
49-
"awq": {
50-
"name": "AWQ TinyChat",
51-
"tags": "api"
49+
"ollama": {
50+
"name": "ollama",
51+
"tags": "api",
52+
"links": {
53+
"ollama": {
54+
"name": "ollama",
55+
"url": "https://ollama.com/"
56+
}
57+
}
58+
},
59+
"ollama:jp6": {
60+
"name": "dustynv/ollama:main-r36.4.0",
61+
"docker_image": "dustynv/ollama:main-r36.4.0",
62+
"docker_options": "-it --rm -e OLLAMA_MODEL=${MODEL} -e OLLAMA_MODELS=/root/.ollama -e OLLAMA_HOST=${SERVER_HOST} -e OLLAMA_CONTEXT_LEN=${MAX_CONTEXT_LEN} -e OLLAMA_LOGS=/root/.ollama/ollama.log -v ${CACHE_DIR}/ollama:/root/.ollama",
63+
"server_host": "0.0.0.0:9000",
64+
"tags": [
65+
"container",
66+
"ollama",
67+
"l4t-r36"
68+
]
5269
},
5370
"vllm": {
5471
"name": "vLLM",
@@ -64,7 +81,7 @@
6481
"name": "dustynv/vllm:0.7.4-r36.4.0-cu128-24.04",
6582
"docker_image": "dustynv/vllm:0.7.4-r36.4.0-cu128-24.04",
6683
"docker_cmd": "vllm serve ${MODEL}",
67-
"docker_args": "--host=${SERVER_ADDR} --port=${SERVER_PORT} --dtype=auto --max-num-seqs=${MAX_BATCH_SIZE} --max-model-len=${MAX_CONTEXT_LEN} --gpu-memory-utilization=0.75",
84+
"docker_args": "--host=${SERVER_ADDR} --port=${SERVER_PORT} --dtype=auto --max-num-seqs=${MAX_BATCH_SIZE} --max-model-len=${MAX_CONTEXT_LEN} --gpu-memory-utilization=0.75 ${VLLM_QUANTIZATION}",
6885
"docker_options": "-it --rm",
6986
"server_host": "0.0.0.0:9000",
7087
"tags": [
@@ -73,6 +90,10 @@
7390
"l4t-r36"
7491
]
7592
},
93+
"awq": {
94+
"name": "AWQ TinyChat",
95+
"tags": "api"
96+
},
7697
"sudonim": {
7798
"docker_cmd": "sudonim serve",
7899
"docker_options": "-it --rm",
@@ -95,6 +116,20 @@
95116
"vllm"
96117
]
97118
},
119+
"fp8": {
120+
"name": "fp8 (vLLM)",
121+
"tags": [
122+
"quantization",
123+
"vllm"
124+
]
125+
},
126+
"bnb4": {
127+
"name": "bnb4 (vLLM)",
128+
"tags": [
129+
"quantization",
130+
"vllm"
131+
]
132+
},
98133
"q4f16_ft": {
99134
"name": "q4f16_ft (MLC)",
100135
"tags": [
@@ -2681,6 +2716,7 @@
26812716
"gemma-3": {
26822717
"name": "Google Gemma 3",
26832718
"header": "gemma-3-header",
2719+
"hf_token": "${HF_TOKEN}",
26842720
"max_context_len": {
26852721
"placeholder": 4096
26862722
},
@@ -2692,7 +2728,7 @@
26922728
"llama_cpp"
26932729
],
26942730
"tags": [
2695-
"llm"
2731+
"vlm"
26962732
],
26972733
"links": {
26982734
"google": {
@@ -2792,6 +2828,34 @@
27922828
"created_at": "2025-03-01 19:10:19+00:00",
27932829
"last_modified": "2025-03-12 08:30:59+00:00"
27942830
},
2831+
"gemma-3-1b-it-q4_k_m-ollama-jp6": {
2832+
"title": "Gemma 3 1B \u276f ollama q4_k_m \u276f JetPack 6.1+",
2833+
"quantization": "q4_k_m",
2834+
"tags": [
2835+
"gemma-3-1b-it",
2836+
"q4_k_m",
2837+
"ollama:jp6"
2838+
],
2839+
"url": "ollama.com/library/gemma3:1b"
2840+
},
2841+
"gemma-3-1b-it-bnb4-vllm-jp6": {
2842+
"title": "Gemma 3 1B \u276f vLLM bnb4 \u276f JetPack 6.1+",
2843+
"quantization": "bnb4",
2844+
"tags": [
2845+
"gemma-3-1b-it",
2846+
"bnb4",
2847+
"vllm:jp6"
2848+
]
2849+
},
2850+
"gemma-3-1b-it-fp8-vllm-jp6": {
2851+
"title": "Gemma 3 1B \u276f vLLM fp8 \u276f JetPack 6.1+",
2852+
"quantization": "fp8",
2853+
"tags": [
2854+
"gemma-3-1b-it",
2855+
"fp8",
2856+
"vllm:jp6"
2857+
]
2858+
},
27952859
"gemma-3-1b-it-fp16-vllm-jp6": {
27962860
"title": "Gemma 3 1B \u276f vLLM fp16 \u276f JetPack 6.1+",
27972861
"quantization": "fp16",
@@ -2801,13 +2865,32 @@
28012865
"vllm:jp6"
28022866
]
28032867
},
2804-
"gemma-3-1b-it-q4_0-ollama-jp6": {
2805-
"title": "Gemma 3 1B \u276f ollama q4_0 \u276f JetPack 6.1+",
2806-
"quantization": "q4_0",
2868+
"gemma-3-4b-it-q4_k_m-ollama-jp6": {
2869+
"title": "Gemma 3 4B \u276f ollama q4_k_m \u276f JetPack 6.1+",
2870+
"quantization": "q4_k_m",
28072871
"tags": [
2808-
"gemma-3-1b-it",
2809-
"q4_0",
2872+
"gemma-3-4b-it",
2873+
"q4_k_m",
28102874
"ollama:jp6"
2875+
],
2876+
"url": "ollama.com/library/gemma3:4b"
2877+
},
2878+
"gemma-3-4b-it-bnb4-vllm-jp6": {
2879+
"title": "Gemma 3 4B \u276f vLLM bnb4 \u276f JetPack 6.1+",
2880+
"quantization": "bnb4",
2881+
"tags": [
2882+
"gemma-3-4b-it",
2883+
"bnb4",
2884+
"vllm:jp6"
2885+
]
2886+
},
2887+
"gemma-3-4b-it-fp8-vllm-jp6": {
2888+
"title": "Gemma 3 4B \u276f vLLM fp8 \u276f JetPack 6.1+",
2889+
"quantization": "fp8",
2890+
"tags": [
2891+
"gemma-3-4b-it",
2892+
"fp8",
2893+
"vllm:jp6"
28112894
]
28122895
},
28132896
"gemma-3-4b-it-fp16-vllm-jp6": {
@@ -2819,13 +2902,32 @@
28192902
"vllm:jp6"
28202903
]
28212904
},
2822-
"gemma-3-4b-it-q4_0-ollama-jp6": {
2823-
"title": "Gemma 3 4B \u276f ollama q4_0 \u276f JetPack 6.1+",
2824-
"quantization": "q4_0",
2905+
"gemma-3-12b-it-q4_k_m-ollama-jp6": {
2906+
"title": "Gemma 3 12B \u276f ollama q4_k_m \u276f JetPack 6.1+",
2907+
"quantization": "q4_k_m",
28252908
"tags": [
2826-
"gemma-3-4b-it",
2827-
"q4_0",
2909+
"gemma-3-12b-it",
2910+
"q4_k_m",
28282911
"ollama:jp6"
2912+
],
2913+
"url": "ollama.com/library/gemma3:12b"
2914+
},
2915+
"gemma-3-12b-it-bnb4-vllm-jp6": {
2916+
"title": "Gemma 3 12B \u276f vLLM bnb4 \u276f JetPack 6.1+",
2917+
"quantization": "bnb4",
2918+
"tags": [
2919+
"gemma-3-12b-it",
2920+
"bnb4",
2921+
"vllm:jp6"
2922+
]
2923+
},
2924+
"gemma-3-12b-it-fp8-vllm-jp6": {
2925+
"title": "Gemma 3 12B \u276f vLLM fp8 \u276f JetPack 6.1+",
2926+
"quantization": "fp8",
2927+
"tags": [
2928+
"gemma-3-12b-it",
2929+
"fp8",
2930+
"vllm:jp6"
28292931
]
28302932
},
28312933
"gemma-3-12b-it-fp16-vllm-jp6": {
@@ -2837,31 +2939,41 @@
28372939
"vllm:jp6"
28382940
]
28392941
},
2840-
"gemma-3-12b-it-q4_0-ollama-jp6": {
2841-
"title": "Gemma 3 12B \u276f ollama q4_0 \u276f JetPack 6.1+",
2842-
"quantization": "q4_0",
2942+
"gemma-3-27b-it-q4_k_m-ollama-jp6": {
2943+
"title": "Gemma 3 27B \u276f ollama q4_k_m \u276f JetPack 6.1+",
2944+
"quantization": "q4_k_m",
28432945
"tags": [
2844-
"gemma-3-12b-it",
2845-
"q4_0",
2946+
"gemma-3-27b-it",
2947+
"q4_k_m",
28462948
"ollama:jp6"
2949+
],
2950+
"url": "ollama.com/library/gemma3:27b"
2951+
},
2952+
"gemma-3-27b-it-bnb4-vllm-jp6": {
2953+
"title": "Gemma 3 27B \u276f vLLM bnb4 \u276f JetPack 6.1+",
2954+
"quantization": "bnb4",
2955+
"tags": [
2956+
"gemma-3-27b-it",
2957+
"bnb4",
2958+
"vllm:jp6"
28472959
]
28482960
},
2849-
"gemma-3-27b-it-fp16-vllm-jp6": {
2850-
"title": "Gemma 3 27B \u276f vLLM fp16 \u276f JetPack 6.1+",
2851-
"quantization": "fp16",
2961+
"gemma-3-27b-it-fp8-vllm-jp6": {
2962+
"title": "Gemma 3 27B \u276f vLLM fp8 \u276f JetPack 6.1+",
2963+
"quantization": "fp8",
28522964
"tags": [
28532965
"gemma-3-27b-it",
2854-
"fp16",
2966+
"fp8",
28552967
"vllm:jp6"
28562968
]
28572969
},
2858-
"gemma-3-27b-it-q4_0-ollama-jp6": {
2859-
"title": "Gemma 3 27B \u276f ollama q4_0 \u276f JetPack 6.1+",
2860-
"quantization": "q4_0",
2970+
"gemma-3-27b-it-fp16-vllm-jp6": {
2971+
"title": "Gemma 3 27B \u276f vLLM fp16 \u276f JetPack 6.1+",
2972+
"quantization": "fp16",
28612973
"tags": [
28622974
"gemma-3-27b-it",
2863-
"q4_0",
2864-
"ollama:jp6"
2975+
"fp16",
2976+
"vllm:jp6"
28652977
]
28662978
},
28672979
"deepseek-r1-distill": {
@@ -5046,7 +5158,6 @@
50465158
"open_webui"
50475159
],
50485160
"child_order": [
5049-
"gemma-3",
50505161
"deepseek-r1-distill",
50515162
"qwen-2.5",
50525163
"llama-3",
@@ -5055,6 +5166,36 @@
50555166
"phi"
50565167
]
50575168
},
5169+
"vlm": {
5170+
"name": "Vision/Language Models (VLM)",
5171+
"tags": "models",
5172+
"refs": "resource",
5173+
"xref": false,
5174+
"max_batch_size": 1,
5175+
"max_context_len": null,
5176+
"prefill_chunk": null,
5177+
"chat_template": null,
5178+
"hf_token": null,
5179+
"property_order": [
5180+
"url",
5181+
"docker_image",
5182+
"quantization",
5183+
"max_batch_size",
5184+
"max_context_len",
5185+
"prefill_chunk",
5186+
"chat_template",
5187+
"hf_token",
5188+
"cache_dir",
5189+
"docker_run",
5190+
"docker_cmd",
5191+
"docker_options",
5192+
"server_host",
5193+
"auto_update"
5194+
],
5195+
"child_order": [
5196+
"gemma-3"
5197+
]
5198+
},
50585199
"max_batch_size": {
50595200
"name": "Max Batch Size",
50605201
"tags": "number",

0 commit comments

Comments
 (0)