From ca31e4cddfcaf4ee573ff1652cd14bbbd76b269d Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Fri, 7 Nov 2025 13:28:25 -0800 Subject: [PATCH 1/3] Fix windows build cannot find versions.py When I install executorch on windows I run into this error: error: [Errno 2] No such file or directory: 'pip-out\\lib.win-amd64-cpython-312\\executorch\\version.py' It turns out sometimes the dst directory (in this case pip-out\lib.win-amd64-cpython-312\executorch) is not installed before we write version.py into it. This PR adds a mkpath call to make sure it's always there. --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index 45b69b8b828..62eed183cae 100644 --- a/setup.py +++ b/setup.py @@ -635,6 +635,9 @@ def run(self): dst_root = self.get_package_dir(".") else: dst_root = os.path.join(self.build_lib, "executorch") + # On Windows the package directory might not exist yet when building from a + # clean tree. Ensure it is created before we attempt to write version.py. + self.mkpath(dst_root) # Create the version file. Version.write_to_python_file(os.path.join(dst_root, "version.py")) From 2ebc280333e26bf4a2eeb377f63c6f9c60e1ae1f Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Fri, 7 Nov 2025 14:53:37 -0800 Subject: [PATCH 2/3] Exit if symlink is not enabled on Windows --- install_executorch.bat | 18 +++++++++++++++++- setup.py | 3 --- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/install_executorch.bat b/install_executorch.bat index e6d5c5db363..50fb6fd9b77 100644 --- a/install_executorch.bat +++ b/install_executorch.bat @@ -1,4 +1,5 @@ @ECHO OFF +setlocal EnableDelayedExpansion rem Copyright (c) Meta Platforms, Inc. and affiliates. rem All rights reserved. @@ -7,9 +8,24 @@ rem This batch file provides a basic functionality similar to the bash script. cd /d "%~dp0" +rem Verify that Git checked out symlinks correctly. Without this the Python install +rem will fail when attempting to copy files from src\executorch. +where git >NUL 2>&1 +if not errorlevel 1 ( + set "GIT_SYMLINKS=" + for /f "usebackq delims=" %%i in (`git config --get core.symlinks 2^>nul`) do set "GIT_SYMLINKS=%%i" + if /I not "!GIT_SYMLINKS!"=="true" ( + echo ExecuTorch requires Git symlink support on Windows. + echo Enable Developer Mode and run: git config --global core.symlinks true + echo Re-clone the repository after enabling symlinks, then rerun install_executorch.bat. + exit /b 1 + ) +) + rem Under windows, it's always python set PYTHON_EXECUTABLE=python "%PYTHON_EXECUTABLE%" install_executorch.py %* -exit /b %ERRORLEVEL% +set "EXIT_CODE=%ERRORLEVEL%" +endlocal & exit /b %EXIT_CODE% diff --git a/setup.py b/setup.py index 62eed183cae..45b69b8b828 100644 --- a/setup.py +++ b/setup.py @@ -635,9 +635,6 @@ def run(self): dst_root = self.get_package_dir(".") else: dst_root = os.path.join(self.build_lib, "executorch") - # On Windows the package directory might not exist yet when building from a - # clean tree. Ensure it is created before we attempt to write version.py. - self.mkpath(dst_root) # Create the version file. Version.write_to_python_file(os.path.join(dst_root, "version.py")) From 90bee21b612d9e6758f7c7a564c797ce79da7083 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Mon, 10 Nov 2025 16:56:07 -0800 Subject: [PATCH 3/3] Add Vulkan support for whisper runner --- examples/models/whisper/CMakeLists.txt | 25 ++++++++----- extension/asr/runner/runner.cpp | 37 ++++++++++++++++--- extension/llm/runner/util.h | 50 +++++++++++++++++++++----- tools/cmake/preset/llm.cmake | 4 +-- 4 files changed, 91 insertions(+), 25 deletions(-) diff --git a/examples/models/whisper/CMakeLists.txt b/examples/models/whisper/CMakeLists.txt index 70f5892baa7..870b0d06d91 100644 --- a/examples/models/whisper/CMakeLists.txt +++ b/examples/models/whisper/CMakeLists.txt @@ -16,6 +16,11 @@ include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake) # Let files say "include " set(_common_include_directories ${EXECUTORCH_ROOT}/..) +# Options +option(EXECUTORCH_BUILD_CUDA "Build with CUDA support" OFF) +option(EXECUTORCH_BUILD_METAL "Build with Metal support" OFF) +option(EXECUTORCH_BUILD_VULKAN "Build with Vulkan support" OFF) + # Need this for gflags for some reason set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags) find_package(gflags REQUIRED) @@ -26,15 +31,12 @@ find_package(executorch CONFIG REQUIRED FIND_ROOT_PATH_BOTH) set(_link_libraries executorch gflags) set(_srcs multimodal.cpp) -list( - APPEND - _link_libraries - optimized_native_cpu_ops_lib - quantized_ops_lib - custom_ops - cpublas - eigen_blas -) +list(APPEND _link_libraries optimized_native_cpu_ops_lib cpublas eigen_blas) + +# Quantized ops and custom llm kernels are not compiled on MSVC +if(NOT MSVC) + list(APPEND _link_libraries quantized_ops_lib custom_ops) +endif() # XNNPACK if(TARGET xnnpack_backend) @@ -78,6 +80,11 @@ if(EXECUTORCH_BUILD_METAL) executorch_target_link_options_shared_lib(metal_backend) endif() +if(EXECUTORCH_BUILD_VULKAN) + list(APPEND _link_libraries vulkan_backend) + executorch_target_link_options_shared_lib(vulkan_backend) +endif() + # Add tokenizers list(APPEND _link_libraries tokenizers::tokenizers) diff --git a/extension/asr/runner/runner.cpp b/extension/asr/runner/runner.cpp index 6bbb44e4faa..66de2423d25 100644 --- a/extension/asr/runner/runner.cpp +++ b/extension/asr/runner/runner.cpp @@ -178,12 +178,28 @@ Result> AsrRunner::transcribe( // Convert preprocessed_features to expected dtype if needed if (preprocessed_features->scalar_type() != expected_dtype) { - if (expected_dtype == ::executorch::aten::ScalarType::BFloat16) { + ET_LOG( + Info, + "Encoder input dtype mismatch: got %s, expected %s", + ::executorch::runtime::toString(preprocessed_features->scalar_type()), + ::executorch::runtime::toString(expected_dtype)); + if ((expected_dtype != ::executorch::aten::ScalarType::BFloat16 && + expected_dtype != ::executorch::aten::ScalarType::Half)) { ET_LOG( - Info, - "Converting audio features from %s to BFloat16. Before converting, first value = %f", - ::executorch::runtime::toString(preprocessed_features->scalar_type()), - preprocessed_features->mutable_data_ptr()[0]); + Error, + "Unsupported expected dtype %s for encoder input", + ::executorch::runtime::toString(expected_dtype)); + return ::executorch::runtime::Error::Internal; + } + + ET_LOG( + Info, + "Converting audio features from %s to %s. Before converting, first value = %f", + ::executorch::runtime::toString(preprocessed_features->scalar_type()), + ::executorch::runtime::toString(expected_dtype), + preprocessed_features->mutable_data_ptr()[0]); + + if (expected_dtype == ::executorch::aten::ScalarType::BFloat16) { auto convert_result = ::executorch::extension::llm::convert_to_bfloat16( preprocessed_features); ET_CHECK_OK_OR_RETURN_ERROR(convert_result.error()); @@ -194,6 +210,17 @@ Result> AsrRunner::transcribe( static_cast( preprocessed_features ->mutable_data_ptr<::executorch::aten::BFloat16>()[0])); + } else { + auto convert_result = ::executorch::extension::llm::convert_to_float16( + preprocessed_features); + ET_CHECK_OK_OR_RETURN_ERROR(convert_result.error()); + preprocessed_features = convert_result.get(); + ET_LOG( + Info, + "Conversion complete, first value = %f", + static_cast( + preprocessed_features + ->mutable_data_ptr<::executorch::aten::Half>()[0])); } } diff --git a/extension/llm/runner/util.h b/extension/llm/runner/util.h index e87d625f140..b9067b4a930 100644 --- a/extension/llm/runner/util.h +++ b/extension/llm/runner/util.h @@ -153,25 +153,59 @@ inline runtime::Result populate_start_pos_or_cache_position( * Helper function to convert a float tensor to bfloat16. * Creates a new tensor with bfloat16 dtype and copies/converts the data. */ +// Note: implementation moved to templated `convert_to_fp` below. Keep a +// single implementation (the templated one) to avoid duplicate definitions. + +// Note: implementation moved to templated `convert_to_fp` below. Keep a +// single implementation (the templated one) to avoid duplicate definitions. + +/** + * Generic helper to convert a float tensor to a target floating-point-like + * dtype (e.g., Half, BFloat16). This implements the core conversion loop + * and is used by the strongly-typed wrappers below so existing call sites + * remain unchanged. + */ +template < + typename TargetCppType, + ::executorch::aten::ScalarType TargetScalarType> inline ::executorch::runtime::Result<::executorch::extension::TensorPtr> -convert_to_bfloat16(const ::executorch::extension::TensorPtr& src_tensor) { +convert_to_fp(const ::executorch::extension::TensorPtr& src_tensor) { ET_CHECK_OR_RETURN_ERROR( src_tensor->scalar_type() == ::executorch::aten::ScalarType::Float, InvalidArgument, - "BFloat16 conversion only supported from Float source data"); + "Conversion only supported from Float source data"); + + static_assert( + TargetScalarType == ::executorch::aten::ScalarType::Half || + TargetScalarType == ::executorch::aten::ScalarType::BFloat16, + "convert_to_fp only supports Half or BFloat16 target dtypes"); const auto num_elements = static_cast(src_tensor->numel()); const float* float_data = src_tensor->const_data_ptr(); - auto bf16_tensor = ::executorch::extension::empty_like( - src_tensor, ::executorch::aten::ScalarType::BFloat16); - auto* bf16_data = - bf16_tensor->mutable_data_ptr<::executorch::aten::BFloat16>(); + auto out_tensor = + ::executorch::extension::empty_like(src_tensor, TargetScalarType); + auto* out_data = out_tensor->mutable_data_ptr(); for (size_t i = 0; i < num_elements; ++i) { - bf16_data[i] = ::executorch::aten::BFloat16(float_data[i]); + out_data[i] = TargetCppType(float_data[i]); } - return bf16_tensor; + return out_tensor; +} + +// Keep the original API names as thin wrappers for backward compatibility. +inline ::executorch::runtime::Result<::executorch::extension::TensorPtr> +convert_to_bfloat16(const ::executorch::extension::TensorPtr& src_tensor) { + return convert_to_fp< + ::executorch::aten::BFloat16, + ::executorch::aten::ScalarType::BFloat16>(src_tensor); +} + +inline ::executorch::runtime::Result<::executorch::extension::TensorPtr> +convert_to_float16(const ::executorch::extension::TensorPtr& src_tensor) { + return convert_to_fp< + ::executorch::aten::Half, + ::executorch::aten::ScalarType::Half>(src_tensor); } } // namespace llm diff --git a/tools/cmake/preset/llm.cmake b/tools/cmake/preset/llm.cmake index 231a25f0c1e..36aabecd6f8 100644 --- a/tools/cmake/preset/llm.cmake +++ b/tools/cmake/preset/llm.cmake @@ -18,9 +18,7 @@ set_overridable_option(EXECUTORCH_BUILD_XNNPACK ON) # Turn on the quantized and LLM kernels unless on windows cuda build which # currently doesn't support this due to using msvc. -if(NOT (EXECUTORCH_BUILD_CUDA AND (CMAKE_SYSTEM_NAME STREQUAL "Windows" - OR CMAKE_SYSTEM_NAME STREQUAL "WIN32")) -) +if(NOT MSVC) set_overridable_option(EXECUTORCH_BUILD_KERNELS_QUANTIZED ON) set_overridable_option(EXECUTORCH_BUILD_KERNELS_LLM ON) endif()