|
46 | 46 | "l4t-r36"
|
47 | 47 | ]
|
48 | 48 | },
|
49 |
| - "awq": { |
50 |
| - "name": "AWQ TinyChat", |
51 |
| - "tags": "api" |
| 49 | + "ollama": { |
| 50 | + "name": "ollama", |
| 51 | + "tags": "api", |
| 52 | + "links": { |
| 53 | + "ollama": { |
| 54 | + "name": "ollama", |
| 55 | + "url": "https://ollama.com/" |
| 56 | + } |
| 57 | + } |
| 58 | + }, |
| 59 | + "ollama:jp6": { |
| 60 | + "name": "dustynv/ollama:main-r36.4.0", |
| 61 | + "docker_image": "dustynv/ollama:main-r36.4.0", |
| 62 | + "docker_options": "-it --rm -e OLLAMA_MODEL=${MODEL} -e OLLAMA_MODELS=/root/.ollama -e OLLAMA_HOST=${SERVER_HOST} -e OLLAMA_CONTEXT_LEN=${MAX_CONTEXT_LEN} -e OLLAMA_LOGS=/root/.ollama/ollama.log -v ${CACHE_DIR}/ollama:/root/.ollama", |
| 63 | + "server_host": "0.0.0.0:9000", |
| 64 | + "tags": [ |
| 65 | + "container", |
| 66 | + "ollama", |
| 67 | + "l4t-r36" |
| 68 | + ] |
52 | 69 | },
|
53 | 70 | "vllm": {
|
54 | 71 | "name": "vLLM",
|
|
64 | 81 | "name": "dustynv/vllm:0.7.4-r36.4.0-cu128-24.04",
|
65 | 82 | "docker_image": "dustynv/vllm:0.7.4-r36.4.0-cu128-24.04",
|
66 | 83 | "docker_cmd": "vllm serve ${MODEL}",
|
67 |
| - "docker_args": "--host=${SERVER_ADDR} --port=${SERVER_PORT} --dtype=auto --max-num-seqs=${MAX_BATCH_SIZE} --max-model-len=${MAX_CONTEXT_LEN} --gpu-memory-utilization=0.75", |
| 84 | + "docker_args": "--host=${SERVER_ADDR} --port=${SERVER_PORT} --dtype=auto --max-num-seqs=${MAX_BATCH_SIZE} --max-model-len=${MAX_CONTEXT_LEN} --gpu-memory-utilization=0.75 ${VLLM_QUANTIZATION}", |
68 | 85 | "docker_options": "-it --rm",
|
69 | 86 | "server_host": "0.0.0.0:9000",
|
70 | 87 | "tags": [
|
|
73 | 90 | "l4t-r36"
|
74 | 91 | ]
|
75 | 92 | },
|
| 93 | + "awq": { |
| 94 | + "name": "AWQ TinyChat", |
| 95 | + "tags": "api" |
| 96 | + }, |
76 | 97 | "sudonim": {
|
77 | 98 | "docker_cmd": "sudonim serve",
|
78 | 99 | "docker_options": "-it --rm",
|
|
95 | 116 | "vllm"
|
96 | 117 | ]
|
97 | 118 | },
|
| 119 | + "fp8": { |
| 120 | + "name": "fp8 (vLLM)", |
| 121 | + "tags": [ |
| 122 | + "quantization", |
| 123 | + "vllm" |
| 124 | + ] |
| 125 | + }, |
| 126 | + "bnb4": { |
| 127 | + "name": "bnb4 (vLLM)", |
| 128 | + "tags": [ |
| 129 | + "quantization", |
| 130 | + "vllm" |
| 131 | + ] |
| 132 | + }, |
98 | 133 | "q4f16_ft": {
|
99 | 134 | "name": "q4f16_ft (MLC)",
|
100 | 135 | "tags": [
|
|
2681 | 2716 | "gemma-3": {
|
2682 | 2717 | "name": "Google Gemma 3",
|
2683 | 2718 | "header": "gemma-3-header",
|
| 2719 | + "hf_token": "${HF_TOKEN}", |
2684 | 2720 | "max_context_len": {
|
2685 | 2721 | "placeholder": 4096
|
2686 | 2722 | },
|
|
2692 | 2728 | "llama_cpp"
|
2693 | 2729 | ],
|
2694 | 2730 | "tags": [
|
2695 |
| - "llm" |
| 2731 | + "vlm" |
2696 | 2732 | ],
|
2697 | 2733 | "links": {
|
2698 | 2734 | "google": {
|
|
2792 | 2828 | "created_at": "2025-03-01 19:10:19+00:00",
|
2793 | 2829 | "last_modified": "2025-03-12 08:30:59+00:00"
|
2794 | 2830 | },
|
| 2831 | + "gemma-3-1b-it-q4_k_m-ollama-jp6": { |
| 2832 | + "title": "Gemma 3 1B \u276f ollama q4_k_m \u276f JetPack 6.1+", |
| 2833 | + "quantization": "q4_k_m", |
| 2834 | + "tags": [ |
| 2835 | + "gemma-3-1b-it", |
| 2836 | + "q4_k_m", |
| 2837 | + "ollama:jp6" |
| 2838 | + ], |
| 2839 | + "url": "ollama.com/library/gemma3:1b" |
| 2840 | + }, |
| 2841 | + "gemma-3-1b-it-bnb4-vllm-jp6": { |
| 2842 | + "title": "Gemma 3 1B \u276f vLLM bnb4 \u276f JetPack 6.1+", |
| 2843 | + "quantization": "bnb4", |
| 2844 | + "tags": [ |
| 2845 | + "gemma-3-1b-it", |
| 2846 | + "bnb4", |
| 2847 | + "vllm:jp6" |
| 2848 | + ] |
| 2849 | + }, |
| 2850 | + "gemma-3-1b-it-fp8-vllm-jp6": { |
| 2851 | + "title": "Gemma 3 1B \u276f vLLM fp8 \u276f JetPack 6.1+", |
| 2852 | + "quantization": "fp8", |
| 2853 | + "tags": [ |
| 2854 | + "gemma-3-1b-it", |
| 2855 | + "fp8", |
| 2856 | + "vllm:jp6" |
| 2857 | + ] |
| 2858 | + }, |
2795 | 2859 | "gemma-3-1b-it-fp16-vllm-jp6": {
|
2796 | 2860 | "title": "Gemma 3 1B \u276f vLLM fp16 \u276f JetPack 6.1+",
|
2797 | 2861 | "quantization": "fp16",
|
|
2801 | 2865 | "vllm:jp6"
|
2802 | 2866 | ]
|
2803 | 2867 | },
|
2804 |
| - "gemma-3-1b-it-q4_0-ollama-jp6": { |
2805 |
| - "title": "Gemma 3 1B \u276f ollama q4_0 \u276f JetPack 6.1+", |
2806 |
| - "quantization": "q4_0", |
| 2868 | + "gemma-3-4b-it-q4_k_m-ollama-jp6": { |
| 2869 | + "title": "Gemma 3 4B \u276f ollama q4_k_m \u276f JetPack 6.1+", |
| 2870 | + "quantization": "q4_k_m", |
2807 | 2871 | "tags": [
|
2808 |
| - "gemma-3-1b-it", |
2809 |
| - "q4_0", |
| 2872 | + "gemma-3-4b-it", |
| 2873 | + "q4_k_m", |
2810 | 2874 | "ollama:jp6"
|
| 2875 | + ], |
| 2876 | + "url": "ollama.com/library/gemma3:4b" |
| 2877 | + }, |
| 2878 | + "gemma-3-4b-it-bnb4-vllm-jp6": { |
| 2879 | + "title": "Gemma 3 4B \u276f vLLM bnb4 \u276f JetPack 6.1+", |
| 2880 | + "quantization": "bnb4", |
| 2881 | + "tags": [ |
| 2882 | + "gemma-3-4b-it", |
| 2883 | + "bnb4", |
| 2884 | + "vllm:jp6" |
| 2885 | + ] |
| 2886 | + }, |
| 2887 | + "gemma-3-4b-it-fp8-vllm-jp6": { |
| 2888 | + "title": "Gemma 3 4B \u276f vLLM fp8 \u276f JetPack 6.1+", |
| 2889 | + "quantization": "fp8", |
| 2890 | + "tags": [ |
| 2891 | + "gemma-3-4b-it", |
| 2892 | + "fp8", |
| 2893 | + "vllm:jp6" |
2811 | 2894 | ]
|
2812 | 2895 | },
|
2813 | 2896 | "gemma-3-4b-it-fp16-vllm-jp6": {
|
|
2819 | 2902 | "vllm:jp6"
|
2820 | 2903 | ]
|
2821 | 2904 | },
|
2822 |
| - "gemma-3-4b-it-q4_0-ollama-jp6": { |
2823 |
| - "title": "Gemma 3 4B \u276f ollama q4_0 \u276f JetPack 6.1+", |
2824 |
| - "quantization": "q4_0", |
| 2905 | + "gemma-3-12b-it-q4_k_m-ollama-jp6": { |
| 2906 | + "title": "Gemma 3 12B \u276f ollama q4_k_m \u276f JetPack 6.1+", |
| 2907 | + "quantization": "q4_k_m", |
2825 | 2908 | "tags": [
|
2826 |
| - "gemma-3-4b-it", |
2827 |
| - "q4_0", |
| 2909 | + "gemma-3-12b-it", |
| 2910 | + "q4_k_m", |
2828 | 2911 | "ollama:jp6"
|
| 2912 | + ], |
| 2913 | + "url": "ollama.com/library/gemma3:12b" |
| 2914 | + }, |
| 2915 | + "gemma-3-12b-it-bnb4-vllm-jp6": { |
| 2916 | + "title": "Gemma 3 12B \u276f vLLM bnb4 \u276f JetPack 6.1+", |
| 2917 | + "quantization": "bnb4", |
| 2918 | + "tags": [ |
| 2919 | + "gemma-3-12b-it", |
| 2920 | + "bnb4", |
| 2921 | + "vllm:jp6" |
| 2922 | + ] |
| 2923 | + }, |
| 2924 | + "gemma-3-12b-it-fp8-vllm-jp6": { |
| 2925 | + "title": "Gemma 3 12B \u276f vLLM fp8 \u276f JetPack 6.1+", |
| 2926 | + "quantization": "fp8", |
| 2927 | + "tags": [ |
| 2928 | + "gemma-3-12b-it", |
| 2929 | + "fp8", |
| 2930 | + "vllm:jp6" |
2829 | 2931 | ]
|
2830 | 2932 | },
|
2831 | 2933 | "gemma-3-12b-it-fp16-vllm-jp6": {
|
|
2837 | 2939 | "vllm:jp6"
|
2838 | 2940 | ]
|
2839 | 2941 | },
|
2840 |
| - "gemma-3-12b-it-q4_0-ollama-jp6": { |
2841 |
| - "title": "Gemma 3 12B \u276f ollama q4_0 \u276f JetPack 6.1+", |
2842 |
| - "quantization": "q4_0", |
| 2942 | + "gemma-3-27b-it-q4_k_m-ollama-jp6": { |
| 2943 | + "title": "Gemma 3 27B \u276f ollama q4_k_m \u276f JetPack 6.1+", |
| 2944 | + "quantization": "q4_k_m", |
2843 | 2945 | "tags": [
|
2844 |
| - "gemma-3-12b-it", |
2845 |
| - "q4_0", |
| 2946 | + "gemma-3-27b-it", |
| 2947 | + "q4_k_m", |
2846 | 2948 | "ollama:jp6"
|
| 2949 | + ], |
| 2950 | + "url": "ollama.com/library/gemma3:27b" |
| 2951 | + }, |
| 2952 | + "gemma-3-27b-it-bnb4-vllm-jp6": { |
| 2953 | + "title": "Gemma 3 27B \u276f vLLM bnb4 \u276f JetPack 6.1+", |
| 2954 | + "quantization": "bnb4", |
| 2955 | + "tags": [ |
| 2956 | + "gemma-3-27b-it", |
| 2957 | + "bnb4", |
| 2958 | + "vllm:jp6" |
2847 | 2959 | ]
|
2848 | 2960 | },
|
2849 |
| - "gemma-3-27b-it-fp16-vllm-jp6": { |
2850 |
| - "title": "Gemma 3 27B \u276f vLLM fp16 \u276f JetPack 6.1+", |
2851 |
| - "quantization": "fp16", |
| 2961 | + "gemma-3-27b-it-fp8-vllm-jp6": { |
| 2962 | + "title": "Gemma 3 27B \u276f vLLM fp8 \u276f JetPack 6.1+", |
| 2963 | + "quantization": "fp8", |
2852 | 2964 | "tags": [
|
2853 | 2965 | "gemma-3-27b-it",
|
2854 |
| - "fp16", |
| 2966 | + "fp8", |
2855 | 2967 | "vllm:jp6"
|
2856 | 2968 | ]
|
2857 | 2969 | },
|
2858 |
| - "gemma-3-27b-it-q4_0-ollama-jp6": { |
2859 |
| - "title": "Gemma 3 27B \u276f ollama q4_0 \u276f JetPack 6.1+", |
2860 |
| - "quantization": "q4_0", |
| 2970 | + "gemma-3-27b-it-fp16-vllm-jp6": { |
| 2971 | + "title": "Gemma 3 27B \u276f vLLM fp16 \u276f JetPack 6.1+", |
| 2972 | + "quantization": "fp16", |
2861 | 2973 | "tags": [
|
2862 | 2974 | "gemma-3-27b-it",
|
2863 |
| - "q4_0", |
2864 |
| - "ollama:jp6" |
| 2975 | + "fp16", |
| 2976 | + "vllm:jp6" |
2865 | 2977 | ]
|
2866 | 2978 | },
|
2867 | 2979 | "deepseek-r1-distill": {
|
|
5046 | 5158 | "open_webui"
|
5047 | 5159 | ],
|
5048 | 5160 | "child_order": [
|
5049 |
| - "gemma-3", |
5050 | 5161 | "deepseek-r1-distill",
|
5051 | 5162 | "qwen-2.5",
|
5052 | 5163 | "llama-3",
|
|
5055 | 5166 | "phi"
|
5056 | 5167 | ]
|
5057 | 5168 | },
|
| 5169 | + "vlm": { |
| 5170 | + "name": "Vision/Language Models (VLM)", |
| 5171 | + "tags": "models", |
| 5172 | + "refs": "resource", |
| 5173 | + "xref": false, |
| 5174 | + "max_batch_size": 1, |
| 5175 | + "max_context_len": null, |
| 5176 | + "prefill_chunk": null, |
| 5177 | + "chat_template": null, |
| 5178 | + "hf_token": null, |
| 5179 | + "property_order": [ |
| 5180 | + "url", |
| 5181 | + "docker_image", |
| 5182 | + "quantization", |
| 5183 | + "max_batch_size", |
| 5184 | + "max_context_len", |
| 5185 | + "prefill_chunk", |
| 5186 | + "chat_template", |
| 5187 | + "hf_token", |
| 5188 | + "cache_dir", |
| 5189 | + "docker_run", |
| 5190 | + "docker_cmd", |
| 5191 | + "docker_options", |
| 5192 | + "server_host", |
| 5193 | + "auto_update" |
| 5194 | + ], |
| 5195 | + "child_order": [ |
| 5196 | + "gemma-3" |
| 5197 | + ] |
| 5198 | + }, |
5058 | 5199 | "max_batch_size": {
|
5059 | 5200 | "name": "Max Batch Size",
|
5060 | 5201 | "tags": "number",
|
|
0 commit comments