From a4a1f24efeb7377eb3ead02c08cd9515951eafbe Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Sat, 18 Oct 2025 11:05:05 -0700 Subject: [PATCH 1/6] no custom or quantized ops on win cuda --- tools/cmake/preset/llm.cmake | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/cmake/preset/llm.cmake b/tools/cmake/preset/llm.cmake index 6cd2482f717..c35ba2f633a 100644 --- a/tools/cmake/preset/llm.cmake +++ b/tools/cmake/preset/llm.cmake @@ -26,9 +26,13 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") # Linux-specific code here elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows" OR CMAKE_SYSTEM_NAME STREQUAL - "WIN32" + "WIN32" ) - # Windows or other OS-specific code here + # Windows-specific code: disable quantized and custom ops when building with CUDA + if(EXECUTORCH_BUILD_CUDA) + set_overridable_option(EXECUTORCH_BUILD_KERNELS_QUANTIZED OFF) + set_overridable_option(EXECUTORCH_BUILD_KERNELS_LLM OFF) + endif() elseif(CMAKE_SYSTEM_NAME STREQUAL "Android") # Android-specific code here else() From 8bbe392e6c81591385a2fd65ff5ba75810fb6cb6 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Sat, 18 Oct 2025 11:06:04 -0700 Subject: [PATCH 2/6] lint --- tools/cmake/preset/llm.cmake | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/cmake/preset/llm.cmake b/tools/cmake/preset/llm.cmake index c35ba2f633a..650443b69fd 100644 --- a/tools/cmake/preset/llm.cmake +++ b/tools/cmake/preset/llm.cmake @@ -26,9 +26,10 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") # Linux-specific code here elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows" OR CMAKE_SYSTEM_NAME STREQUAL - "WIN32" + "WIN32" ) - # Windows-specific code: disable quantized and custom ops when building with CUDA + # Windows-specific code: disable quantized and custom ops when building with + # CUDA if(EXECUTORCH_BUILD_CUDA) set_overridable_option(EXECUTORCH_BUILD_KERNELS_QUANTIZED OFF) set_overridable_option(EXECUTORCH_BUILD_KERNELS_LLM OFF) From 505536d188268ee2f79062ca66200217eb3b8e32 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Sat, 18 Oct 2025 11:33:17 -0700 Subject: [PATCH 3/6] just force them off --- tools/cmake/preset/llm.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/cmake/preset/llm.cmake b/tools/cmake/preset/llm.cmake index 650443b69fd..eb642d716bc 100644 --- a/tools/cmake/preset/llm.cmake +++ b/tools/cmake/preset/llm.cmake @@ -31,8 +31,8 @@ elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows" OR CMAKE_SYSTEM_NAME STREQUAL # Windows-specific code: disable quantized and custom ops when building with # CUDA if(EXECUTORCH_BUILD_CUDA) - set_overridable_option(EXECUTORCH_BUILD_KERNELS_QUANTIZED OFF) - set_overridable_option(EXECUTORCH_BUILD_KERNELS_LLM OFF) + set(EXECUTORCH_BUILD_KERNELS_QUANTIZED OFF) + set(EXECUTORCH_BUILD_KERNELS_LLM OFF) endif() elseif(CMAKE_SYSTEM_NAME STREQUAL "Android") # Android-specific code here From e140b92224d697569bb62ec613066fe1f0c5da24 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 5 Nov 2025 12:40:48 -0800 Subject: [PATCH 4/6] restructure --- tools/cmake/preset/llm.cmake | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tools/cmake/preset/llm.cmake b/tools/cmake/preset/llm.cmake index 32efbfb3744..99222cf658a 100644 --- a/tools/cmake/preset/llm.cmake +++ b/tools/cmake/preset/llm.cmake @@ -13,11 +13,20 @@ set_overridable_option(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_MODULE ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_TENSOR ON) -set_overridable_option(EXECUTORCH_BUILD_KERNELS_LLM ON) + set_overridable_option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON) -set_overridable_option(EXECUTORCH_BUILD_KERNELS_QUANTIZED ON) + set_overridable_option(EXECUTORCH_BUILD_XNNPACK ON) +# Turn on the quantized and LLM kernels unless I'm on windows cuda build which +# currently doesn't support this due to using msvc. +if(NOT (EXECUTORCH_BUILD_CUDA AND (CMAKE_SYSTEM_NAME STREQUAL "Windows" + OR CMAKE_SYSTEM_NAME STREQUAL "WIN32")) +) + set_overridable_option(EXECUTORCH_BUILD_KERNELS_QUANTIZED ON) + set_overridable_option(EXECUTORCH_BUILD_KERNELS_LLM ON) +endif() + if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") set_overridable_option(EXECUTORCH_BUILD_COREML ON) set_overridable_option(EXECUTORCH_BUILD_MPS ON) @@ -29,12 +38,7 @@ elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows" OR CMAKE_SYSTEM_NAME STREQUAL "WIN32" ) - # Windows-specific code: disable quantized and custom ops when building with - # CUDA - if(EXECUTORCH_BUILD_CUDA) - set(EXECUTORCH_BUILD_KERNELS_QUANTIZED OFF) - set(EXECUTORCH_BUILD_KERNELS_LLM OFF) - endif() + elseif(CMAKE_SYSTEM_NAME STREQUAL "Android") # Android-specific code here else() From fb1f877a42741675834619f9d539593bf27eac76 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 5 Nov 2025 12:47:38 -0800 Subject: [PATCH 5/6] format --- tools/cmake/preset/llm.cmake | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/cmake/preset/llm.cmake b/tools/cmake/preset/llm.cmake index 99222cf658a..c5ea8bca6d4 100644 --- a/tools/cmake/preset/llm.cmake +++ b/tools/cmake/preset/llm.cmake @@ -13,9 +13,7 @@ set_overridable_option(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_MODULE ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_TENSOR ON) - set_overridable_option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON) - set_overridable_option(EXECUTORCH_BUILD_XNNPACK ON) # Turn on the quantized and LLM kernels unless I'm on windows cuda build which From 40bcbfcd459c388dd7728ed0763705e06b94df5e Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 5 Nov 2025 12:48:07 -0800 Subject: [PATCH 6/6] comment --- tools/cmake/preset/llm.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/cmake/preset/llm.cmake b/tools/cmake/preset/llm.cmake index c5ea8bca6d4..231a25f0c1e 100644 --- a/tools/cmake/preset/llm.cmake +++ b/tools/cmake/preset/llm.cmake @@ -16,7 +16,7 @@ set_overridable_option(EXECUTORCH_BUILD_EXTENSION_TENSOR ON) set_overridable_option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON) set_overridable_option(EXECUTORCH_BUILD_XNNPACK ON) -# Turn on the quantized and LLM kernels unless I'm on windows cuda build which +# Turn on the quantized and LLM kernels unless on windows cuda build which # currently doesn't support this due to using msvc. if(NOT (EXECUTORCH_BUILD_CUDA AND (CMAKE_SYSTEM_NAME STREQUAL "Windows" OR CMAKE_SYSTEM_NAME STREQUAL "WIN32"))