pytorch · larryliu0820 · Nov 7, 2025 · Nov 7, 2025 · Nov 11, 2025
@@ -16,6 +16,11 @@ include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
 # Let files say "include <executorch/path/to/header.h>"
 set(_common_include_directories ${EXECUTORCH_ROOT}/..)
 
+# Options
+option(EXECUTORCH_BUILD_CUDA "Build with CUDA support" OFF)
+option(EXECUTORCH_BUILD_METAL "Build with Metal support" OFF)
+option(EXECUTORCH_BUILD_VULKAN "Build with Vulkan support" OFF)
+
 # Need this for gflags for some reason
 set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
 find_package(gflags REQUIRED)
@@ -26,15 +31,12 @@ find_package(executorch CONFIG REQUIRED FIND_ROOT_PATH_BOTH)
 set(_link_libraries executorch gflags)
 set(_srcs multimodal.cpp)
 
-list(
-  APPEND
-  _link_libraries
-  optimized_native_cpu_ops_lib
-  quantized_ops_lib
-  custom_ops
-  cpublas
-  eigen_blas
-)
+list(APPEND _link_libraries optimized_native_cpu_ops_lib cpublas eigen_blas)
+
+# Quantized ops and custom llm kernels are not compiled on MSVC
+if(NOT MSVC)
+  list(APPEND _link_libraries quantized_ops_lib custom_ops)
+endif()
 
 # XNNPACK
 if(TARGET xnnpack_backend)
@@ -78,6 +80,11 @@ if(EXECUTORCH_BUILD_METAL)
   executorch_target_link_options_shared_lib(metal_backend)
 endif()
 
+if(EXECUTORCH_BUILD_VULKAN)
+  list(APPEND _link_libraries vulkan_backend)
+  executorch_target_link_options_shared_lib(vulkan_backend)
+endif()
+
 # Add tokenizers
 list(APPEND _link_libraries tokenizers::tokenizers)
 

diff --git a/extension/asr/runner/runner.cpp b/extension/asr/runner/runner.cpp
@@ -178,12 +178,28 @@ Result<std::vector<int64_t>> AsrRunner::transcribe(
 
   // Convert preprocessed_features to expected dtype if needed
   if (preprocessed_features->scalar_type() != expected_dtype) {
-    if (expected_dtype == ::executorch::aten::ScalarType::BFloat16) {
+    ET_LOG(
+        Info,
+        "Encoder input dtype mismatch: got %s, expected %s",
+        ::executorch::runtime::toString(preprocessed_features->scalar_type()),
+        ::executorch::runtime::toString(expected_dtype));
+    if ((expected_dtype != ::executorch::aten::ScalarType::BFloat16 &&
+         expected_dtype != ::executorch::aten::ScalarType::Half)) {
       ET_LOG(
-          Info,
-          "Converting audio features from %s to BFloat16. Before converting, first value = %f",
-          ::executorch::runtime::toString(preprocessed_features->scalar_type()),
-          preprocessed_features->mutable_data_ptr<float>()[0]);
+          Error,
+          "Unsupported expected dtype %s for encoder input",
+          ::executorch::runtime::toString(expected_dtype));
+      return ::executorch::runtime::Error::Internal;
+    }
+
+    ET_LOG(
+        Info,
+        "Converting audio features from %s to %s. Before converting, first value = %f",
+        ::executorch::runtime::toString(preprocessed_features->scalar_type()),
+        ::executorch::runtime::toString(expected_dtype),
+        preprocessed_features->mutable_data_ptr<float>()[0]);
+
+    if (expected_dtype == ::executorch::aten::ScalarType::BFloat16) {
       auto convert_result = ::executorch::extension::llm::convert_to_bfloat16(
           preprocessed_features);
       ET_CHECK_OK_OR_RETURN_ERROR(convert_result.error());
@@ -194,6 +210,17 @@ Result<std::vector<int64_t>> AsrRunner::transcribe(
           static_cast<float>(
               preprocessed_features
                   ->mutable_data_ptr<::executorch::aten::BFloat16>()[0]));
+    } else {
+      auto convert_result = ::executorch::extension::llm::convert_to_float16(
+          preprocessed_features);
+      ET_CHECK_OK_OR_RETURN_ERROR(convert_result.error());
+      preprocessed_features = convert_result.get();
+      ET_LOG(
+          Info,
+          "Conversion complete, first value = %f",
+          static_cast<float>(
+              preprocessed_features
+                  ->mutable_data_ptr<::executorch::aten::Half>()[0]));
     }
   }
 

@@ -153,25 +153,59 @@ inline runtime::Result<TensorPtr> populate_start_pos_or_cache_position(
  * Helper function to convert a float tensor to bfloat16.
  * Creates a new tensor with bfloat16 dtype and copies/converts the data.
  */
+// Note: implementation moved to templated `convert_to_fp` below. Keep a
+// single implementation (the templated one) to avoid duplicate definitions.
+
+// Note: implementation moved to templated `convert_to_fp` below. Keep a
+// single implementation (the templated one) to avoid duplicate definitions.
+
+/**
+ * Generic helper to convert a float tensor to a target floating-point-like
+ * dtype (e.g., Half, BFloat16). This implements the core conversion loop
+ * and is used by the strongly-typed wrappers below so existing call sites
+ * remain unchanged.
+ */
+template <
+    typename TargetCppType,
+    ::executorch::aten::ScalarType TargetScalarType>
 inline ::executorch::runtime::Result<::executorch::extension::TensorPtr>
-convert_to_bfloat16(const ::executorch::extension::TensorPtr& src_tensor) {
+convert_to_fp(const ::executorch::extension::TensorPtr& src_tensor) {
   ET_CHECK_OR_RETURN_ERROR(
       src_tensor->scalar_type() == ::executorch::aten::ScalarType::Float,
       InvalidArgument,
-      "BFloat16 conversion only supported from Float source data");
+      "Conversion only supported from Float source data");
+
+  static_assert(
+      TargetScalarType == ::executorch::aten::ScalarType::Half ||
+          TargetScalarType == ::executorch::aten::ScalarType::BFloat16,
+      "convert_to_fp only supports Half or BFloat16 target dtypes");
 
   const auto num_elements = static_cast<size_t>(src_tensor->numel());
   const float* float_data = src_tensor->const_data_ptr<float>();
 
-  auto bf16_tensor = ::executorch::extension::empty_like(
-      src_tensor, ::executorch::aten::ScalarType::BFloat16);
-  auto* bf16_data =
-      bf16_tensor->mutable_data_ptr<::executorch::aten::BFloat16>();
+  auto out_tensor =
+      ::executorch::extension::empty_like(src_tensor, TargetScalarType);
+  auto* out_data = out_tensor->mutable_data_ptr<TargetCppType>();
   for (size_t i = 0; i < num_elements; ++i) {
-    bf16_data[i] = ::executorch::aten::BFloat16(float_data[i]);
+    out_data[i] = TargetCppType(float_data[i]);
   }
 
-  return bf16_tensor;
+  return out_tensor;
+}
+
+// Keep the original API names as thin wrappers for backward compatibility.
+inline ::executorch::runtime::Result<::executorch::extension::TensorPtr>
+convert_to_bfloat16(const ::executorch::extension::TensorPtr& src_tensor) {
+  return convert_to_fp<
+      ::executorch::aten::BFloat16,
+      ::executorch::aten::ScalarType::BFloat16>(src_tensor);
+}
+
+inline ::executorch::runtime::Result<::executorch::extension::TensorPtr>
+convert_to_float16(const ::executorch::extension::TensorPtr& src_tensor) {
+  return convert_to_fp<
+      ::executorch::aten::Half,
+      ::executorch::aten::ScalarType::Half>(src_tensor);
 }
 
 } // namespace llm

diff --git a/install_executorch.bat b/install_executorch.bat
@@ -1,4 +1,5 @@
 @ECHO OFF
+setlocal EnableDelayedExpansion
 
 rem Copyright (c) Meta Platforms, Inc. and affiliates.
 rem All rights reserved.
@@ -7,9 +8,24 @@ rem This batch file provides a basic functionality similar to the bash script.
 
 cd /d "%~dp0"
 
+rem Verify that Git checked out symlinks correctly. Without this the Python install
+rem will fail when attempting to copy files from src\executorch.
+where git >NUL 2>&1
+if not errorlevel 1 (
+    set "GIT_SYMLINKS="
+    for /f "usebackq delims=" %%i in (`git config --get core.symlinks 2^>nul`) do set "GIT_SYMLINKS=%%i"
+    if /I not "!GIT_SYMLINKS!"=="true" (
+        echo ExecuTorch requires Git symlink support on Windows.
+        echo Enable Developer Mode and run: git config --global core.symlinks true
+        echo Re-clone the repository after enabling symlinks, then rerun install_executorch.bat.
+        exit /b 1
+    )
+)
+
 rem Under windows, it's always python
 set PYTHON_EXECUTABLE=python
 
 "%PYTHON_EXECUTABLE%" install_executorch.py %*
 
-exit /b %ERRORLEVEL%
+set "EXIT_CODE=%ERRORLEVEL%"
+endlocal & exit /b %EXIT_CODE%
@@ -18,9 +18,7 @@ set_overridable_option(EXECUTORCH_BUILD_XNNPACK ON)
 
 # Turn on the quantized and LLM kernels unless on windows cuda build which
 # currently doesn't support this due to using msvc.
-if(NOT (EXECUTORCH_BUILD_CUDA AND (CMAKE_SYSTEM_NAME STREQUAL "Windows"
-                                   OR CMAKE_SYSTEM_NAME STREQUAL "WIN32"))
-)
+if(NOT MSVC)
   set_overridable_option(EXECUTORCH_BUILD_KERNELS_QUANTIZED ON)
   set_overridable_option(EXECUTORCH_BUILD_KERNELS_LLM ON)
 endif()