Skip to content

Commit ed368d7

Browse files
authored
[Model] Temporarily removing support for q0f32 models due to correctness issues (#745)
Certain q0f32 models are running into correctness issues after the TVM FFI refactor: 1. Qwen3-0.6B-q0f32-MLC 2. Qwen2.5-0.5B-Instruct-q0f32-MLC 3. Qwen2.5-Coder-0.5B-Instruct-q0f32-MLC 4. Qwen2-0.5B-Instruct-q0f32-MLC 5. Llama-3.2-1B-Instruct-q0f32-MLC These have temporarily been commented out in ```config.ts``` while these issues are being debugged. If you need to use these specific models, please use WebLLM v0.2.79 (https://www.npmjs.com/package/@mlc-ai/web-llm/v/0.2.79).
1 parent 0f35526 commit ed368d7

File tree

1 file changed

+71
-66
lines changed

1 file changed

+71
-66
lines changed

src/config.ts

Lines changed: 71 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -339,19 +339,20 @@ export const prebuiltAppConfig: AppConfig = {
339339
context_window_size: 4096,
340340
},
341341
},
342-
{
343-
model: "https://huggingface.co/mlc-ai/Llama-3.2-1B-Instruct-q0f32-MLC",
344-
model_id: "Llama-3.2-1B-Instruct-q0f32-MLC",
345-
model_lib:
346-
modelLibURLPrefix +
347-
modelVersion +
348-
"/Llama-3.2-1B-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm",
349-
vram_required_MB: 5106.26,
350-
low_resource_required: true,
351-
overrides: {
352-
context_window_size: 4096,
353-
},
354-
},
342+
// TODO: temporarily commenting out q0f32 models due to correctness issues
343+
// {
344+
// model: "https://huggingface.co/mlc-ai/Llama-3.2-1B-Instruct-q0f32-MLC",
345+
// model_id: "Llama-3.2-1B-Instruct-q0f32-MLC",
346+
// model_lib:
347+
// modelLibURLPrefix +
348+
// modelVersion +
349+
// "/Llama-3.2-1B-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm",
350+
// vram_required_MB: 5106.26,
351+
// low_resource_required: true,
352+
// overrides: {
353+
// context_window_size: 4096,
354+
// },
355+
// },
355356
{
356357
model: "https://huggingface.co/mlc-ai/Llama-3.2-1B-Instruct-q0f16-MLC",
357358
model_id: "Llama-3.2-1B-Instruct-q0f16-MLC",
@@ -1093,19 +1094,20 @@ export const prebuiltAppConfig: AppConfig = {
10931094
context_window_size: 4096,
10941095
},
10951096
},
1096-
{
1097-
model: "https://huggingface.co/mlc-ai/Qwen3-0.6B-q0f32-MLC",
1098-
model_id: "Qwen3-0.6B-q0f32-MLC",
1099-
model_lib:
1100-
modelLibURLPrefix +
1101-
modelVersion +
1102-
"/Qwen3-0.6B-q0f32-ctx4k_cs1k-webgpu.wasm",
1103-
vram_required_MB: 3843.25,
1104-
low_resource_required: true,
1105-
overrides: {
1106-
context_window_size: 4096,
1107-
},
1108-
},
1097+
// TODO: temporarily commenting out q0f32 models due to correctness issues
1098+
// {
1099+
// model: "https://huggingface.co/mlc-ai/Qwen3-0.6B-q0f32-MLC",
1100+
// model_id: "Qwen3-0.6B-q0f32-MLC",
1101+
// model_lib:
1102+
// modelLibURLPrefix +
1103+
// modelVersion +
1104+
// "/Qwen3-0.6B-q0f32-ctx4k_cs1k-webgpu.wasm",
1105+
// vram_required_MB: 3843.25,
1106+
// low_resource_required: true,
1107+
// overrides: {
1108+
// context_window_size: 4096,
1109+
// },
1110+
// },
11091111
{
11101112
model: "https://huggingface.co/mlc-ai/Qwen3-1.7B-q4f16_1-MLC",
11111113
model_id: "Qwen3-1.7B-q4f16_1-MLC",
@@ -1224,19 +1226,20 @@ export const prebuiltAppConfig: AppConfig = {
12241226
context_window_size: 4096,
12251227
},
12261228
},
1227-
{
1228-
model: "https://huggingface.co/mlc-ai/Qwen2.5-0.5B-Instruct-q0f32-MLC",
1229-
model_id: "Qwen2.5-0.5B-Instruct-q0f32-MLC",
1230-
model_lib:
1231-
modelLibURLPrefix +
1232-
modelVersion +
1233-
"/Qwen2-0.5B-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm",
1234-
low_resource_required: true,
1235-
vram_required_MB: 2654.75,
1236-
overrides: {
1237-
context_window_size: 4096,
1238-
},
1239-
},
1229+
// TODO: temporarily commenting out q0f32 models due to correctness issues
1230+
// {
1231+
// model: "https://huggingface.co/mlc-ai/Qwen2.5-0.5B-Instruct-q0f32-MLC",
1232+
// model_id: "Qwen2.5-0.5B-Instruct-q0f32-MLC",
1233+
// model_lib:
1234+
// modelLibURLPrefix +
1235+
// modelVersion +
1236+
// "/Qwen2-0.5B-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm",
1237+
// low_resource_required: true,
1238+
// vram_required_MB: 2654.75,
1239+
// overrides: {
1240+
// context_window_size: 4096,
1241+
// },
1242+
// },
12401243
{
12411244
model: "https://huggingface.co/mlc-ai/Qwen2.5-1.5B-Instruct-q4f16_1-MLC",
12421245
model_id: "Qwen2.5-1.5B-Instruct-q4f16_1-MLC",
@@ -1358,20 +1361,21 @@ export const prebuiltAppConfig: AppConfig = {
13581361
context_window_size: 4096,
13591362
},
13601363
},
1361-
{
1362-
model:
1363-
"https://huggingface.co/mlc-ai/Qwen2.5-Coder-0.5B-Instruct-q0f32-MLC",
1364-
model_id: "Qwen2.5-Coder-0.5B-Instruct-q0f32-MLC",
1365-
model_lib:
1366-
modelLibURLPrefix +
1367-
modelVersion +
1368-
"/Qwen2-0.5B-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm",
1369-
low_resource_required: true,
1370-
vram_required_MB: 2654.75,
1371-
overrides: {
1372-
context_window_size: 4096,
1373-
},
1374-
},
1364+
// TODO: temporarily commenting out q0f32 models due to correctness issues
1365+
// {
1366+
// model:
1367+
// "https://huggingface.co/mlc-ai/Qwen2.5-Coder-0.5B-Instruct-q0f32-MLC",
1368+
// model_id: "Qwen2.5-Coder-0.5B-Instruct-q0f32-MLC",
1369+
// model_lib:
1370+
// modelLibURLPrefix +
1371+
// modelVersion +
1372+
// "/Qwen2-0.5B-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm",
1373+
// low_resource_required: true,
1374+
// vram_required_MB: 2654.75,
1375+
// overrides: {
1376+
// context_window_size: 4096,
1377+
// },
1378+
// },
13751379
{
13761380
model:
13771381
"https://huggingface.co/mlc-ai/Qwen2.5-Coder-1.5B-Instruct-q4f16_1-MLC",
@@ -1698,19 +1702,20 @@ export const prebuiltAppConfig: AppConfig = {
16981702
context_window_size: 4096,
16991703
},
17001704
},
1701-
{
1702-
model: "https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q0f32-MLC",
1703-
model_id: "Qwen2-0.5B-Instruct-q0f32-MLC",
1704-
model_lib:
1705-
modelLibURLPrefix +
1706-
modelVersion +
1707-
"/Qwen2-0.5B-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm",
1708-
low_resource_required: true,
1709-
vram_required_MB: 2654.75,
1710-
overrides: {
1711-
context_window_size: 4096,
1712-
},
1713-
},
1705+
// TODO: temporarily commenting out q0f32 models due to correctness issues
1706+
// {
1707+
// model: "https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q0f32-MLC",
1708+
// model_id: "Qwen2-0.5B-Instruct-q0f32-MLC",
1709+
// model_lib:
1710+
// modelLibURLPrefix +
1711+
// modelVersion +
1712+
// "/Qwen2-0.5B-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm",
1713+
// low_resource_required: true,
1714+
// vram_required_MB: 2654.75,
1715+
// overrides: {
1716+
// context_window_size: 4096,
1717+
// },
1718+
// },
17141719
{
17151720
model: "https://huggingface.co/mlc-ai/Qwen2-1.5B-Instruct-q4f16_1-MLC",
17161721
model_id: "Qwen2-1.5B-Instruct-q4f16_1-MLC",

0 commit comments

Comments
 (0)