diff --git a/src/llm.ts b/src/llm.ts
index 100a1ec7..e0fc89e2 100644
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -545,10 +545,13 @@ export class LlamaCpp implements LLM {
    */
   private async ensureLlama(): Promise<Llama> {
     if (!this.llama) {
+      // QMD_FORCE_CPU=1 forces CPU-only mode (useful for older GPUs like Pascal)
+      const forceCpu = process.env.QMD_FORCE_CPU === "1" || process.env.QMD_FORCE_CPU === "true";
       const llama = await getLlama({
         // attempt to build
-        build: "autoAttempt",
-        logLevel: LlamaLogLevel.error
+        build: "autoAttempt" as any,
+        logLevel: LlamaLogLevel.error,
+        gpu: forceCpu ? false : "auto",
       });
 
       if (llama.gpu === false) {