Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 16 additions & 9 deletions examples/models/whisper/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
# Let files say "include <executorch/path/to/header.h>"
set(_common_include_directories ${EXECUTORCH_ROOT}/..)

# Options
option(EXECUTORCH_BUILD_CUDA "Build with CUDA support" OFF)
option(EXECUTORCH_BUILD_METAL "Build with Metal support" OFF)
option(EXECUTORCH_BUILD_VULKAN "Build with Vulkan support" OFF)

# Need this for gflags for some reason
set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
find_package(gflags REQUIRED)
Expand All @@ -26,15 +31,12 @@ find_package(executorch CONFIG REQUIRED FIND_ROOT_PATH_BOTH)
set(_link_libraries executorch gflags)
set(_srcs multimodal.cpp)

list(
APPEND
_link_libraries
optimized_native_cpu_ops_lib
quantized_ops_lib
custom_ops
cpublas
eigen_blas
)
list(APPEND _link_libraries optimized_native_cpu_ops_lib cpublas eigen_blas)

# Quantized ops and custom llm kernels are not compiled on MSVC
if(NOT MSVC)
list(APPEND _link_libraries quantized_ops_lib custom_ops)
endif()

# XNNPACK
if(TARGET xnnpack_backend)
Expand Down Expand Up @@ -78,6 +80,11 @@ if(EXECUTORCH_BUILD_METAL)
executorch_target_link_options_shared_lib(metal_backend)
endif()

if(EXECUTORCH_BUILD_VULKAN)
list(APPEND _link_libraries vulkan_backend)
executorch_target_link_options_shared_lib(vulkan_backend)
endif()

# Add tokenizers
list(APPEND _link_libraries tokenizers::tokenizers)

Expand Down
37 changes: 32 additions & 5 deletions extension/asr/runner/runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,12 +178,28 @@ Result<std::vector<int64_t>> AsrRunner::transcribe(

// Convert preprocessed_features to expected dtype if needed
if (preprocessed_features->scalar_type() != expected_dtype) {
if (expected_dtype == ::executorch::aten::ScalarType::BFloat16) {
ET_LOG(
Info,
"Encoder input dtype mismatch: got %s, expected %s",
::executorch::runtime::toString(preprocessed_features->scalar_type()),
::executorch::runtime::toString(expected_dtype));
if ((expected_dtype != ::executorch::aten::ScalarType::BFloat16 &&
expected_dtype != ::executorch::aten::ScalarType::Half)) {
ET_LOG(
Info,
"Converting audio features from %s to BFloat16. Before converting, first value = %f",
::executorch::runtime::toString(preprocessed_features->scalar_type()),
preprocessed_features->mutable_data_ptr<float>()[0]);
Error,
"Unsupported expected dtype %s for encoder input",
::executorch::runtime::toString(expected_dtype));
return ::executorch::runtime::Error::Internal;
}

ET_LOG(
Info,
"Converting audio features from %s to %s. Before converting, first value = %f",
::executorch::runtime::toString(preprocessed_features->scalar_type()),
::executorch::runtime::toString(expected_dtype),
preprocessed_features->mutable_data_ptr<float>()[0]);

if (expected_dtype == ::executorch::aten::ScalarType::BFloat16) {
auto convert_result = ::executorch::extension::llm::convert_to_bfloat16(
preprocessed_features);
ET_CHECK_OK_OR_RETURN_ERROR(convert_result.error());
Expand All @@ -194,6 +210,17 @@ Result<std::vector<int64_t>> AsrRunner::transcribe(
static_cast<float>(
preprocessed_features
->mutable_data_ptr<::executorch::aten::BFloat16>()[0]));
} else {
auto convert_result = ::executorch::extension::llm::convert_to_float16(
preprocessed_features);
ET_CHECK_OK_OR_RETURN_ERROR(convert_result.error());
preprocessed_features = convert_result.get();
ET_LOG(
Info,
"Conversion complete, first value = %f",
static_cast<float>(
preprocessed_features
->mutable_data_ptr<::executorch::aten::Half>()[0]));
}
}

Expand Down
50 changes: 42 additions & 8 deletions extension/llm/runner/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,25 +153,59 @@ inline runtime::Result<TensorPtr> populate_start_pos_or_cache_position(
* Helper function to convert a float tensor to bfloat16.
* Creates a new tensor with bfloat16 dtype and copies/converts the data.
*/
// Note: implementation moved to templated `convert_to_fp` below. Keep a
// single implementation (the templated one) to avoid duplicate definitions.

// Note: implementation moved to templated `convert_to_fp` below. Keep a
// single implementation (the templated one) to avoid duplicate definitions.

/**
* Generic helper to convert a float tensor to a target floating-point-like
* dtype (e.g., Half, BFloat16). This implements the core conversion loop
* and is used by the strongly-typed wrappers below so existing call sites
* remain unchanged.
*/
template <
typename TargetCppType,
::executorch::aten::ScalarType TargetScalarType>
inline ::executorch::runtime::Result<::executorch::extension::TensorPtr>
convert_to_bfloat16(const ::executorch::extension::TensorPtr& src_tensor) {
convert_to_fp(const ::executorch::extension::TensorPtr& src_tensor) {
ET_CHECK_OR_RETURN_ERROR(
src_tensor->scalar_type() == ::executorch::aten::ScalarType::Float,
InvalidArgument,
"BFloat16 conversion only supported from Float source data");
"Conversion only supported from Float source data");

static_assert(
TargetScalarType == ::executorch::aten::ScalarType::Half ||
TargetScalarType == ::executorch::aten::ScalarType::BFloat16,
"convert_to_fp only supports Half or BFloat16 target dtypes");

const auto num_elements = static_cast<size_t>(src_tensor->numel());
const float* float_data = src_tensor->const_data_ptr<float>();

auto bf16_tensor = ::executorch::extension::empty_like(
src_tensor, ::executorch::aten::ScalarType::BFloat16);
auto* bf16_data =
bf16_tensor->mutable_data_ptr<::executorch::aten::BFloat16>();
auto out_tensor =
::executorch::extension::empty_like(src_tensor, TargetScalarType);
auto* out_data = out_tensor->mutable_data_ptr<TargetCppType>();
for (size_t i = 0; i < num_elements; ++i) {
bf16_data[i] = ::executorch::aten::BFloat16(float_data[i]);
out_data[i] = TargetCppType(float_data[i]);
}

return bf16_tensor;
return out_tensor;
}

// Keep the original API names as thin wrappers for backward compatibility.
inline ::executorch::runtime::Result<::executorch::extension::TensorPtr>
convert_to_bfloat16(const ::executorch::extension::TensorPtr& src_tensor) {
return convert_to_fp<
::executorch::aten::BFloat16,
::executorch::aten::ScalarType::BFloat16>(src_tensor);
}

inline ::executorch::runtime::Result<::executorch::extension::TensorPtr>
convert_to_float16(const ::executorch::extension::TensorPtr& src_tensor) {
return convert_to_fp<
::executorch::aten::Half,
::executorch::aten::ScalarType::Half>(src_tensor);
}

} // namespace llm
Expand Down
18 changes: 17 additions & 1 deletion install_executorch.bat
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
@ECHO OFF
setlocal EnableDelayedExpansion

rem Copyright (c) Meta Platforms, Inc. and affiliates.
rem All rights reserved.
Expand All @@ -7,9 +8,24 @@ rem This batch file provides a basic functionality similar to the bash script.

cd /d "%~dp0"

rem Verify that Git checked out symlinks correctly. Without this the Python install
rem will fail when attempting to copy files from src\executorch.
where git >NUL 2>&1
if not errorlevel 1 (
set "GIT_SYMLINKS="
for /f "usebackq delims=" %%i in (`git config --get core.symlinks 2^>nul`) do set "GIT_SYMLINKS=%%i"
if /I not "!GIT_SYMLINKS!"=="true" (
echo ExecuTorch requires Git symlink support on Windows.
echo Enable Developer Mode and run: git config --global core.symlinks true
echo Re-clone the repository after enabling symlinks, then rerun install_executorch.bat.
exit /b 1
)
)

rem Under windows, it's always python
set PYTHON_EXECUTABLE=python

"%PYTHON_EXECUTABLE%" install_executorch.py %*

exit /b %ERRORLEVEL%
set "EXIT_CODE=%ERRORLEVEL%"
endlocal & exit /b %EXIT_CODE%
4 changes: 1 addition & 3 deletions tools/cmake/preset/llm.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@ set_overridable_option(EXECUTORCH_BUILD_XNNPACK ON)

# Turn on the quantized and LLM kernels unless on windows cuda build which
# currently doesn't support this due to using msvc.
if(NOT (EXECUTORCH_BUILD_CUDA AND (CMAKE_SYSTEM_NAME STREQUAL "Windows"
OR CMAKE_SYSTEM_NAME STREQUAL "WIN32"))
)
if(NOT MSVC)
set_overridable_option(EXECUTORCH_BUILD_KERNELS_QUANTIZED ON)
set_overridable_option(EXECUTORCH_BUILD_KERNELS_LLM ON)
endif()
Expand Down
Loading