From ca31e4cddfcaf4ee573ff1652cd14bbbd76b269d Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Fri, 7 Nov 2025 13:28:25 -0800
Subject: [PATCH 1/3] Fix windows build cannot find versions.py

When I install executorch on windows I run into this error:   error:
[Errno 2] No such file or directory:
'pip-out\\lib.win-amd64-cpython-312\\executorch\\version.py'

It turns out sometimes the dst directory (in this case
pip-out\lib.win-amd64-cpython-312\executorch) is not installed before we
write version.py into it. This PR adds a mkpath call to make sure it's
always there.
---
 setup.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/setup.py b/setup.py
index 45b69b8b828..62eed183cae 100644
--- a/setup.py
+++ b/setup.py
@@ -635,6 +635,9 @@ def run(self):
             dst_root = self.get_package_dir(".")
         else:
             dst_root = os.path.join(self.build_lib, "executorch")
+            # On Windows the package directory might not exist yet when building from a
+            # clean tree. Ensure it is created before we attempt to write version.py.
+            self.mkpath(dst_root)
         # Create the version file.
         Version.write_to_python_file(os.path.join(dst_root, "version.py"))
 

From 2ebc280333e26bf4a2eeb377f63c6f9c60e1ae1f Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Fri, 7 Nov 2025 14:53:37 -0800
Subject: [PATCH 2/3] Exit if symlink is not enabled on Windows

---
 install_executorch.bat | 18 +++++++++++++++++-
 setup.py               |  3 ---
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/install_executorch.bat b/install_executorch.bat
index e6d5c5db363..50fb6fd9b77 100644
--- a/install_executorch.bat
+++ b/install_executorch.bat
@@ -1,4 +1,5 @@
 @ECHO OFF
+setlocal EnableDelayedExpansion
 
 rem Copyright (c) Meta Platforms, Inc. and affiliates.
 rem All rights reserved.
@@ -7,9 +8,24 @@ rem This batch file provides a basic functionality similar to the bash script.
 
 cd /d "%~dp0"
 
+rem Verify that Git checked out symlinks correctly. Without this the Python install
+rem will fail when attempting to copy files from src\executorch.
+where git >NUL 2>&1
+if not errorlevel 1 (
+    set "GIT_SYMLINKS="
+    for /f "usebackq delims=" %%i in (`git config --get core.symlinks 2^>nul`) do set "GIT_SYMLINKS=%%i"
+    if /I not "!GIT_SYMLINKS!"=="true" (
+        echo ExecuTorch requires Git symlink support on Windows.
+        echo Enable Developer Mode and run: git config --global core.symlinks true
+        echo Re-clone the repository after enabling symlinks, then rerun install_executorch.bat.
+        exit /b 1
+    )
+)
+
 rem Under windows, it's always python
 set PYTHON_EXECUTABLE=python
 
 "%PYTHON_EXECUTABLE%" install_executorch.py %*
 
-exit /b %ERRORLEVEL%
+set "EXIT_CODE=%ERRORLEVEL%"
+endlocal & exit /b %EXIT_CODE%
diff --git a/setup.py b/setup.py
index 62eed183cae..45b69b8b828 100644
--- a/setup.py
+++ b/setup.py
@@ -635,9 +635,6 @@ def run(self):
             dst_root = self.get_package_dir(".")
         else:
             dst_root = os.path.join(self.build_lib, "executorch")
-            # On Windows the package directory might not exist yet when building from a
-            # clean tree. Ensure it is created before we attempt to write version.py.
-            self.mkpath(dst_root)
         # Create the version file.
         Version.write_to_python_file(os.path.join(dst_root, "version.py"))
 

From 90bee21b612d9e6758f7c7a564c797ce79da7083 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Mon, 10 Nov 2025 16:56:07 -0800
Subject: [PATCH 3/3] Add Vulkan support for whisper runner

---
 examples/models/whisper/CMakeLists.txt | 25 ++++++++-----
 extension/asr/runner/runner.cpp        | 37 ++++++++++++++++---
 extension/llm/runner/util.h            | 50 +++++++++++++++++++++-----
 tools/cmake/preset/llm.cmake           |  4 +--
 4 files changed, 91 insertions(+), 25 deletions(-)

diff --git a/examples/models/whisper/CMakeLists.txt b/examples/models/whisper/CMakeLists.txt
index 70f5892baa7..870b0d06d91 100644
--- a/examples/models/whisper/CMakeLists.txt
+++ b/examples/models/whisper/CMakeLists.txt
@@ -16,6 +16,11 @@ include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
 # Let files say "include <executorch/path/to/header.h>"
 set(_common_include_directories ${EXECUTORCH_ROOT}/..)
 
+# Options
+option(EXECUTORCH_BUILD_CUDA "Build with CUDA support" OFF)
+option(EXECUTORCH_BUILD_METAL "Build with Metal support" OFF)
+option(EXECUTORCH_BUILD_VULKAN "Build with Vulkan support" OFF)
+
 # Need this for gflags for some reason
 set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
 find_package(gflags REQUIRED)
@@ -26,15 +31,12 @@ find_package(executorch CONFIG REQUIRED FIND_ROOT_PATH_BOTH)
 set(_link_libraries executorch gflags)
 set(_srcs multimodal.cpp)
 
-list(
-  APPEND
-  _link_libraries
-  optimized_native_cpu_ops_lib
-  quantized_ops_lib
-  custom_ops
-  cpublas
-  eigen_blas
-)
+list(APPEND _link_libraries optimized_native_cpu_ops_lib cpublas eigen_blas)
+
+# Quantized ops and custom llm kernels are not compiled on MSVC
+if(NOT MSVC)
+  list(APPEND _link_libraries quantized_ops_lib custom_ops)
+endif()
 
 # XNNPACK
 if(TARGET xnnpack_backend)
@@ -78,6 +80,11 @@ if(EXECUTORCH_BUILD_METAL)
   executorch_target_link_options_shared_lib(metal_backend)
 endif()
 
+if(EXECUTORCH_BUILD_VULKAN)
+  list(APPEND _link_libraries vulkan_backend)
+  executorch_target_link_options_shared_lib(vulkan_backend)
+endif()
+
 # Add tokenizers
 list(APPEND _link_libraries tokenizers::tokenizers)
 
diff --git a/extension/asr/runner/runner.cpp b/extension/asr/runner/runner.cpp
index 6bbb44e4faa..66de2423d25 100644
--- a/extension/asr/runner/runner.cpp
+++ b/extension/asr/runner/runner.cpp
@@ -178,12 +178,28 @@ Result<std::vector<int64_t>> AsrRunner::transcribe(
 
   // Convert preprocessed_features to expected dtype if needed
   if (preprocessed_features->scalar_type() != expected_dtype) {
-    if (expected_dtype == ::executorch::aten::ScalarType::BFloat16) {
+    ET_LOG(
+        Info,
+        "Encoder input dtype mismatch: got %s, expected %s",
+        ::executorch::runtime::toString(preprocessed_features->scalar_type()),
+        ::executorch::runtime::toString(expected_dtype));
+    if ((expected_dtype != ::executorch::aten::ScalarType::BFloat16 &&
+         expected_dtype != ::executorch::aten::ScalarType::Half)) {
       ET_LOG(
-          Info,
-          "Converting audio features from %s to BFloat16. Before converting, first value = %f",
-          ::executorch::runtime::toString(preprocessed_features->scalar_type()),
-          preprocessed_features->mutable_data_ptr<float>()[0]);
+          Error,
+          "Unsupported expected dtype %s for encoder input",
+          ::executorch::runtime::toString(expected_dtype));
+      return ::executorch::runtime::Error::Internal;
+    }
+
+    ET_LOG(
+        Info,
+        "Converting audio features from %s to %s. Before converting, first value = %f",
+        ::executorch::runtime::toString(preprocessed_features->scalar_type()),
+        ::executorch::runtime::toString(expected_dtype),
+        preprocessed_features->mutable_data_ptr<float>()[0]);
+
+    if (expected_dtype == ::executorch::aten::ScalarType::BFloat16) {
       auto convert_result = ::executorch::extension::llm::convert_to_bfloat16(
           preprocessed_features);
       ET_CHECK_OK_OR_RETURN_ERROR(convert_result.error());
@@ -194,6 +210,17 @@ Result<std::vector<int64_t>> AsrRunner::transcribe(
           static_cast<float>(
               preprocessed_features
                   ->mutable_data_ptr<::executorch::aten::BFloat16>()[0]));
+    } else {
+      auto convert_result = ::executorch::extension::llm::convert_to_float16(
+          preprocessed_features);
+      ET_CHECK_OK_OR_RETURN_ERROR(convert_result.error());
+      preprocessed_features = convert_result.get();
+      ET_LOG(
+          Info,
+          "Conversion complete, first value = %f",
+          static_cast<float>(
+              preprocessed_features
+                  ->mutable_data_ptr<::executorch::aten::Half>()[0]));
     }
   }
 
diff --git a/extension/llm/runner/util.h b/extension/llm/runner/util.h
index e87d625f140..b9067b4a930 100644
--- a/extension/llm/runner/util.h
+++ b/extension/llm/runner/util.h
@@ -153,25 +153,59 @@ inline runtime::Result<TensorPtr> populate_start_pos_or_cache_position(
  * Helper function to convert a float tensor to bfloat16.
  * Creates a new tensor with bfloat16 dtype and copies/converts the data.
  */
+// Note: implementation moved to templated `convert_to_fp` below. Keep a
+// single implementation (the templated one) to avoid duplicate definitions.
+
+// Note: implementation moved to templated `convert_to_fp` below. Keep a
+// single implementation (the templated one) to avoid duplicate definitions.
+
+/**
+ * Generic helper to convert a float tensor to a target floating-point-like
+ * dtype (e.g., Half, BFloat16). This implements the core conversion loop
+ * and is used by the strongly-typed wrappers below so existing call sites
+ * remain unchanged.
+ */
+template <
+    typename TargetCppType,
+    ::executorch::aten::ScalarType TargetScalarType>
 inline ::executorch::runtime::Result<::executorch::extension::TensorPtr>
-convert_to_bfloat16(const ::executorch::extension::TensorPtr& src_tensor) {
+convert_to_fp(const ::executorch::extension::TensorPtr& src_tensor) {
   ET_CHECK_OR_RETURN_ERROR(
       src_tensor->scalar_type() == ::executorch::aten::ScalarType::Float,
       InvalidArgument,
-      "BFloat16 conversion only supported from Float source data");
+      "Conversion only supported from Float source data");
+
+  static_assert(
+      TargetScalarType == ::executorch::aten::ScalarType::Half ||
+          TargetScalarType == ::executorch::aten::ScalarType::BFloat16,
+      "convert_to_fp only supports Half or BFloat16 target dtypes");
 
   const auto num_elements = static_cast<size_t>(src_tensor->numel());
   const float* float_data = src_tensor->const_data_ptr<float>();
 
-  auto bf16_tensor = ::executorch::extension::empty_like(
-      src_tensor, ::executorch::aten::ScalarType::BFloat16);
-  auto* bf16_data =
-      bf16_tensor->mutable_data_ptr<::executorch::aten::BFloat16>();
+  auto out_tensor =
+      ::executorch::extension::empty_like(src_tensor, TargetScalarType);
+  auto* out_data = out_tensor->mutable_data_ptr<TargetCppType>();
   for (size_t i = 0; i < num_elements; ++i) {
-    bf16_data[i] = ::executorch::aten::BFloat16(float_data[i]);
+    out_data[i] = TargetCppType(float_data[i]);
   }
 
-  return bf16_tensor;
+  return out_tensor;
+}
+
+// Keep the original API names as thin wrappers for backward compatibility.
+inline ::executorch::runtime::Result<::executorch::extension::TensorPtr>
+convert_to_bfloat16(const ::executorch::extension::TensorPtr& src_tensor) {
+  return convert_to_fp<
+      ::executorch::aten::BFloat16,
+      ::executorch::aten::ScalarType::BFloat16>(src_tensor);
+}
+
+inline ::executorch::runtime::Result<::executorch::extension::TensorPtr>
+convert_to_float16(const ::executorch::extension::TensorPtr& src_tensor) {
+  return convert_to_fp<
+      ::executorch::aten::Half,
+      ::executorch::aten::ScalarType::Half>(src_tensor);
 }
 
 } // namespace llm
diff --git a/tools/cmake/preset/llm.cmake b/tools/cmake/preset/llm.cmake
index 231a25f0c1e..36aabecd6f8 100644
--- a/tools/cmake/preset/llm.cmake
+++ b/tools/cmake/preset/llm.cmake
@@ -18,9 +18,7 @@ set_overridable_option(EXECUTORCH_BUILD_XNNPACK ON)
 
 # Turn on the quantized and LLM kernels unless on windows cuda build which
 # currently doesn't support this due to using msvc.
-if(NOT (EXECUTORCH_BUILD_CUDA AND (CMAKE_SYSTEM_NAME STREQUAL "Windows"
-                                   OR CMAKE_SYSTEM_NAME STREQUAL "WIN32"))
-)
+if(NOT MSVC)
   set_overridable_option(EXECUTORCH_BUILD_KERNELS_QUANTIZED ON)
   set_overridable_option(EXECUTORCH_BUILD_KERNELS_LLM ON)
 endif()