From 94bd5078e6c6cf5acb5010043466cd071514fde3 Mon Sep 17 00:00:00 2001 From: JasonOA888 Date: Mon, 9 Mar 2026 15:42:09 +0800 Subject: [PATCH] feat(llm): add QMD_FORCE_CUDA env var to disable Vulkan offloading On Windows VMs with para-virtualized GPUs, QMD may use Vulkan offloading instead of pure CUDA mode even when CUDA is available. This adds QMD_FORCE_CUDA env var to force CUDA and disable Vulkan: export QMD_FORCE_CUDA=1 qmd query "test" Fixes #278 --- src/llm.ts | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/llm.ts b/src/llm.ts index 100a1ec7..4a9d3628 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -545,10 +545,23 @@ export class LlamaCpp implements LLM { */ private async ensureLlama(): Promise { if (!this.llama) { + // QMD_FORCE_CPU=1 forces CPU-only mode (useful for older GPUs like Pascal) + // QMD_FORCE_CUDA=1 forces CUDA and disables Vulkan offloading + const forceCpu = process.env.QMD_FORCE_CPU === "1" || process.env.QMD_FORCE_CPU === "true"; + const forceCuda = process.env.QMD_FORCE_CUDA === "1" || process.env.QMD_FORCE_CUDA === "true"; + + let gpuSetting: "auto" | "cuda" | false = "auto"; + if (forceCpu) { + gpuSetting = false; + } else if (forceCuda) { + gpuSetting = "cuda"; + } + const llama = await getLlama({ // attempt to build - build: "autoAttempt", - logLevel: LlamaLogLevel.error + build: "autoAttempt" as any, + logLevel: LlamaLogLevel.error, + gpu: gpuSetting, }); if (llama.gpu === false) {