From f913242115ec21dd775b020c4ff3de84a39593a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C5=82awomir=20Siwek?= Date: Mon, 20 Oct 2025 19:39:01 +0200 Subject: [PATCH 01/10] bump required cmake --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 063c728411..492a2d451e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ # torch_xpu_ops # -- Static archive library target -cmake_minimum_required(VERSION 3.13 FATAL_ERROR) +cmake_minimum_required(VERSION 3.27 FATAL_ERROR) set(PROJECT_NAME "torch-xpu-ops") set(PROJECT_VERSION "2.3.0") From 11534a7e28139eec24fdd8c2405c27b1fe3d39bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C5=82awomir=20Siwek?= Date: Tue, 21 Oct 2025 05:23:22 +0000 Subject: [PATCH 02/10] match torch version --- CMakeLists.txt | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 492a2d451e..dfa0a12fe7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,8 +20,6 @@ cmake_minimum_required(VERSION 3.27 FATAL_ERROR) -set(PROJECT_NAME "torch-xpu-ops") -set(PROJECT_VERSION "2.3.0") # Avoid SYCL compiler error if(NOT WIN32) string(APPEND CMAKE_CXX_FLAGS " -Wno-error") @@ -30,14 +28,12 @@ if(NOT WIN32) endif() endif() -cmake_policy(SET CMP0048 NEW) -project(${PROJECT_NAME} VERSION "${PROJECT_VERSION}" LANGUAGES C CXX) +project(torch-xpu-ops VERSION 2.10.0 LANGUAGES C CXX) set(TORCH_XPU_OPS_FOUND FALSE) - set(TORCH_XPU_OPS_ROOT ${PROJECT_SOURCE_DIR}) -list(APPEND CMAKE_MODULE_PATH ${TORCH_XPU_OPS_ROOT}/cmake/Modules) +list(APPEND CMAKE_MODULE_PATH ${TORCH_XPU_OPS_ROOT}/cmake/Modules) include(${TORCH_XPU_OPS_ROOT}/cmake/SYCL.cmake) include(${TORCH_XPU_OPS_ROOT}/cmake/ONEMKL.cmake) include(${TORCH_XPU_OPS_ROOT}/cmake/BuildFlags.cmake) From a055f23575b9ffe877971a5ace7c6ab13f7a7b86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C5=82awomir=20Siwek?= Date: Tue, 21 Oct 2025 06:03:14 +0000 Subject: [PATCH 03/10] Simplify install_xpu_headers macro --- src/ATen/CMakeLists.txt | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/ATen/CMakeLists.txt b/src/ATen/CMakeLists.txt index 493675e804..ebe3da5f3b 100644 --- a/src/ATen/CMakeLists.txt +++ b/src/ATen/CMakeLists.txt @@ -19,25 +19,25 @@ set(ATen_XPU_SYCL_SRCS ${ATen_XPU_SYCL_SRCS} PARENT_SCOPE) # ATen XPU headers -macro(install_xpu_headers glob_pattern dest_subdir) - file(GLOB headers ${glob_pattern}) +macro(install_xpu_headers subdir) + file(GLOB headers CONFIGURE_DEPENDS "${subdir}/*.h") if(headers) - install(FILES ${headers} DESTINATION "${AT_INSTALL_INCLUDE_DIR}/${dest_subdir}") + install(FILES ${headers} DESTINATION "${AT_INSTALL_INCLUDE_DIR}/ATen/${subdir}") endif() endmacro() -install_xpu_headers("xpu/*.h" "ATen/xpu") -install_xpu_headers("native/xpu/*.h" "ATen/native/xpu") -install_xpu_headers("native/xpu/sycl/*.h" "ATen/native/xpu/sycl") -install_xpu_headers("native/xpu/mkl/*.h" "ATen/native/xpu/mkl") -install_xpu_headers("native/nested/xpu/*.h" "ATen/native/nested/xpu") -install_xpu_headers("native/nested/xpu/sycl/*.h" "ATen/native/nested/xpu/sycl") -install_xpu_headers("native/quantized/*.h" "ATen/native/quantized/xpu") -install_xpu_headers("native/quantized/sycl/*.h" "ATen/native/quantized/xpu/sycl") -install_xpu_headers("native/sparse/xpu/*.h" "ATen/native/sparse/xpu") -install_xpu_headers("native/sparse/xpu/sycl/*.h" "ATen/native/sparse/xpu/sycl") -install_xpu_headers("native/transformers/*.h" "ATen/native/transformers/xpu") -install_xpu_headers("native/transformers/sycl/*.h" "ATen/native/transformers/xpu/sycl") +install_xpu_headers("xpu") +install_xpu_headers("native/xpu") +install_xpu_headers("native/xpu/sycl") +install_xpu_headers("native/xpu/mkl") +install_xpu_headers("native/nested/xpu") +install_xpu_headers("native/nested/xpu/sycl") +install_xpu_headers("native/quantized/xpu") +install_xpu_headers("native/quantized/xpu/sycl") +install_xpu_headers("native/sparse/xpu") +install_xpu_headers("native/sparse/xpu/sycl") +install_xpu_headers("native/transformers/xpu") +install_xpu_headers("native/transformers/xpu/sycl") if(xpu_ops_generated_headers) install(FILES ${xpu_ops_generated_headers} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen/ops) From caab6f91611ea6293471768b507f5fefe5f26a20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C5=82awomir=20Siwek?= Date: Tue, 21 Oct 2025 07:08:41 +0000 Subject: [PATCH 04/10] Align filename in logs --- CMakeLists.txt | 3 ++- cmake/Modules/FindSYCL.cmake | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dfa0a12fe7..04e21cceff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,9 +31,10 @@ endif() project(torch-xpu-ops VERSION 2.10.0 LANGUAGES C CXX) set(TORCH_XPU_OPS_FOUND FALSE) -set(TORCH_XPU_OPS_ROOT ${PROJECT_SOURCE_DIR}) +set(TORCH_XPU_OPS_ROOT ${PROJECT_SOURCE_DIR}) list(APPEND CMAKE_MODULE_PATH ${TORCH_XPU_OPS_ROOT}/cmake/Modules) + include(${TORCH_XPU_OPS_ROOT}/cmake/SYCL.cmake) include(${TORCH_XPU_OPS_ROOT}/cmake/ONEMKL.cmake) include(${TORCH_XPU_OPS_ROOT}/cmake/BuildFlags.cmake) diff --git a/cmake/Modules/FindSYCL.cmake b/cmake/Modules/FindSYCL.cmake index 86457ba362..e78bcbdfcf 100644 --- a/cmake/Modules/FindSYCL.cmake +++ b/cmake/Modules/FindSYCL.cmake @@ -107,7 +107,7 @@ macro(SYCL_INCLUDE_DEPENDENCIES dependency_file) if(SYCL_DEPEND_REGENERATE) set(SYCL_DEPEND ${dependency_file}) - file(WRITE ${dependency_file} "#FindCUDA.cmake generated file. Do not edit.\n") + file(WRITE ${dependency_file} "#FindSYCL.cmake generated file. Do not edit.\n") endif() endmacro() From 02a091b77ddf10d491edd2a834908111cfda73bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C5=82awomir=20Siwek?= Date: Tue, 21 Oct 2025 07:21:29 +0000 Subject: [PATCH 05/10] Extract common libs for win/linux --- cmake/Modules/FindONEMKL.cmake | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cmake/Modules/FindONEMKL.cmake b/cmake/Modules/FindONEMKL.cmake index 67e801f55f..bc0e5d2485 100644 --- a/cmake/Modules/FindONEMKL.cmake +++ b/cmake/Modules/FindONEMKL.cmake @@ -44,17 +44,18 @@ find_file( if((ONEMKL_INCLUDE_DIR STREQUAL "ONEMKL_INCLUDE_DIR-NOTFOUND") OR(ONEMKL_LIB_DIR STREQUAL "ONEMKL_LIB_DIR-NOTFOUND")) - message(WARNING "oneMKL sdk is incomplete!!") + message(WARNING "oneMKL SDK is incomplete!!") return() endif() +set(MKL_LIB_NAMES "mkl_sycl_blas" "mkl_sycl_dft" "mkl_sycl_lapack" + "mkl_intel_lp64" "mkl_core") + if(WIN32) - set(MKL_LIB_NAMES "mkl_sycl_blas" "mkl_sycl_dft" "mkl_sycl_lapack" - "mkl_intel_lp64" "mkl_intel_thread" "mkl_core") + list(APPEND MKL_LIB_NAMES "mkl_intel_thread") list(TRANSFORM MKL_LIB_NAMES APPEND "_dll.lib") else() - set(MKL_LIB_NAMES "mkl_sycl_blas" "mkl_sycl_dft" "mkl_sycl_lapack" - "mkl_intel_lp64" "mkl_gnu_thread" "mkl_core") + list(APPEND MKL_LIB_NAMES "mkl_gnu_thread") list(TRANSFORM MKL_LIB_NAMES PREPEND "lib") list(TRANSFORM MKL_LIB_NAMES APPEND ".so") endif() From b573bd35205949b162fa832845022c111db27321 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C5=82awomir=20Siwek?= Date: Tue, 21 Oct 2025 07:34:45 +0000 Subject: [PATCH 06/10] Remove checks for old compilers (compared to 2025.2.1) --- src/BuildOnLinux.cmake | 2 +- src/BuildOnWindows.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/BuildOnLinux.cmake b/src/BuildOnLinux.cmake index 3cf18e008d..b87151c0c1 100644 --- a/src/BuildOnLinux.cmake +++ b/src/BuildOnLinux.cmake @@ -38,7 +38,7 @@ if(BUILD_SEPARATE_OPS) endforeach() # Working with the compilers which don't support device code compression, we have to split kernels # into multiple libraries to meet the bin size limitation. -elseif(BUILD_SPLIT_KERNEL_LIB OR __INTEL_LLVM_COMPILER LESS 20250004 OR ICX_DATE LESS 20241205) +elseif(BUILD_SPLIT_KERNEL_LIB) setup_common_libraries() # Split SYCL kernels into 4 libraries as categories 1) Unary+Binary 2) Reduce 3) Foreach 4) Others. set(ATen_XPU_SYCL_UNARY_BINARY_SRCS) diff --git a/src/BuildOnWindows.cmake b/src/BuildOnWindows.cmake index bf067c8e70..ae5c88da19 100644 --- a/src/BuildOnWindows.cmake +++ b/src/BuildOnWindows.cmake @@ -43,7 +43,7 @@ if(BUILD_SEPARATE_OPS) list(APPEND TORCH_XPU_OPS_LIBRARIES torch_xpu_ops_aten) # Working with the compilers which don't support device code compression, we have to split kernels # into multiple libraries to meet the bin size limitation. -elseif(BUILD_SPLIT_KERNEL_LIB OR __INTEL_LLVM_COMPILER LESS 20250004 OR ICX_DATE LESS 20241205) +elseif(BUILD_SPLIT_KERNEL_LIB) setup_common_libraries() # Split SYCL kernels into 2 libraries as categories 1) Unary+Binary 2) Others. set(ATen_XPU_SYCL_BINARY_SRCS) From 638184f724229d5cb4e7179b10097ebf296d9f88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C5=82awomir=20Siwek?= Date: Tue, 21 Oct 2025 12:23:43 +0000 Subject: [PATCH 07/10] move MKL glob to MKL ifdef --- src/ATen/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ATen/CMakeLists.txt b/src/ATen/CMakeLists.txt index ebe3da5f3b..86e1a549ff 100644 --- a/src/ATen/CMakeLists.txt +++ b/src/ATen/CMakeLists.txt @@ -1,12 +1,12 @@ # ATen XPU sources file(GLOB xpu_cpp "xpu/*.cpp") -file(GLOB xpu_mkl "native/xpu/mkl/*.cpp") file(GLOB xpu_native_cpp "native/xpu/*.cpp" "native/sparse/*.cpp" "native/sparse/xpu/*.cpp" "native/nested/*.cpp" "native/nested/xpu/*.cpp" "native/transformers/*.cpp" "native/quantized/*.cpp") file(GLOB xpu_sycl "native/xpu/sycl/*.cpp" "native/sparse/xpu/sycl/*.cpp" "native/nested/xpu/sycl/*.cpp" "native/transformers/sycl/*.cpp" "native/quantized/sycl/*.cpp") list(APPEND ATen_XPU_CPP_SRCS ${xpu_cpp}) if(USE_ONEMKL_XPU) + file(GLOB xpu_mkl "native/xpu/mkl/*.cpp") list(APPEND ATen_XPU_MKL_SRCS ${xpu_mkl}) endif() list(APPEND ATen_XPU_NATIVE_CPP_SRCS ${xpu_native_cpp}) From c65abec23ac6e84432b2e3c9f64f648571c1b125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C5=82awomir=20Siwek?= Date: Tue, 21 Oct 2025 12:43:33 +0000 Subject: [PATCH 08/10] Remove BUILD_SPLIT_KERNEL_LIB --- CMakeLists.txt | 2 +- src/BuildOnLinux.cmake | 81 ---------------- src/BuildOnWindows.cmake | 200 --------------------------------------- 3 files changed, 1 insertion(+), 282 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 04e21cceff..8778d93338 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -79,7 +79,7 @@ set(BUILD_SEPARATE_OPS $ENV{BUILD_SEPARATE_OPS}) if(CMAKE_BUILD_TYPE MATCHES "(Debug|RelWithDebInfo)") set(BUILD_SEPARATE_OPS TRUE) endif() -set(BUILD_SPLIT_KERNEL_LIB $ENV{BUILD_SPLIT_KERNEL_LIB}) + add_subdirectory(${TORCH_XPU_OPS_ROOT}/src) set(TORCH_XPU_OPS_FOUND TRUE) diff --git a/src/BuildOnLinux.cmake b/src/BuildOnLinux.cmake index b87151c0c1..fd01b26a3f 100644 --- a/src/BuildOnLinux.cmake +++ b/src/BuildOnLinux.cmake @@ -36,87 +36,6 @@ if(BUILD_SEPARATE_OPS) # Decouple with PyTorch cmake definition. install(TARGETS ${sycl_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") endforeach() -# Working with the compilers which don't support device code compression, we have to split kernels -# into multiple libraries to meet the bin size limitation. -elseif(BUILD_SPLIT_KERNEL_LIB) - setup_common_libraries() - # Split SYCL kernels into 4 libraries as categories 1) Unary+Binary 2) Reduce 3) Foreach 4) Others. - set(ATen_XPU_SYCL_UNARY_BINARY_SRCS) - set(ATen_XPU_SYCL_REDUCE_SRCS) - set(ATen_XPU_SYCL_FOREACH_SRCS) - set(ATen_XPU_SYCL_OTHERS_SRCS) - - foreach(sycl_src ${ATen_XPU_SYCL_SRCS}) - string(REGEX MATCH "Binary" IS_BINARY ${sycl_src}) - string(REGEX MATCH "Unary" IS_UNARY ${sycl_src}) - string(REGEX MATCH "Pow" IS_POW ${sycl_src}) - string(REGEX MATCH "Copy" IS_COPY ${sycl_src}) - string(REGEX MATCH "Reduce" IS_REDUCE ${sycl_src}) - string(REGEX MATCH "Activation" IS_ACTIVATION ${sycl_src}) - string(REGEX MATCH "Foreach" IS_FOREACH ${sycl_src}) - - if(NOT IS_FOREACH STREQUAL "") - list(APPEND ATen_XPU_SYCL_FOREACH_SRCS ${sycl_src}) - elseif(NOT IS_REDUCE STREQUAL "") - list(APPEND ATen_XPU_SYCL_REDUCE_SRCS ${sycl_src}) - elseif(NOT IS_UNARY STREQUAL "" OR NOT IS_BINARY STREQUAL "") - list(APPEND ATen_XPU_SYCL_UNARY_BINARY_SRCS ${sycl_src}) - elseif(NOT IS_COPY STREQUAL "" OR NOT IS_POW STREQUAL "") - list(APPEND ATen_XPU_SYCL_UNARY_BINARY_SRCS ${sycl_src}) - elseif(NOT IS_ACTIVATION STREQUAL "") - list(APPEND ATen_XPU_SYCL_UNARY_BINARY_SRCS ${sycl_src}) - else() - list(APPEND ATen_XPU_SYCL_OTHERS_SRCS ${sycl_src}) - endif() - endforeach() - - # Unary binary kernel lib - set(sycl_unary_binary_lib torch_xpu_ops_sycl_unary_binary_kernels) - sycl_add_library( - ${sycl_unary_binary_lib} - SHARED - SYCL_SOURCES ${ATen_XPU_SYCL_UNARY_BINARY_SRCS}) - target_link_libraries(torch_xpu_ops PUBLIC ${sycl_unary_binary_lib}) - list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_unary_binary_lib}) - - # Decouple with PyTorch cmake definition. - install(TARGETS ${sycl_unary_binary_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") - - # Reduce kernel lib - set(sycl_reduce_lib torch_xpu_ops_sycl_reduce_kernels) - sycl_add_library( - ${sycl_reduce_lib} - SHARED - SYCL_SOURCES ${ATen_XPU_SYCL_REDUCE_SRCS}) - target_link_libraries(torch_xpu_ops PUBLIC ${sycl_reduce_lib}) - list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_reduce_lib}) - - # Decouple with PyTorch cmake definition. - install(TARGETS ${sycl_reduce_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") - - # Foreach kernel lib - set(sycl_foreach_lib torch_xpu_ops_sycl_foreach_kernels) - sycl_add_library( - ${sycl_foreach_lib} - SHARED - SYCL_SOURCES ${ATen_XPU_SYCL_FOREACH_SRCS}) - target_link_libraries(torch_xpu_ops PUBLIC ${sycl_foreach_lib}) - list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_foreach_lib}) - - # Decouple with PyTorch cmake definition. - install(TARGETS ${sycl_foreach_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") - - # Other kernel lib - set(sycl_lib torch_xpu_ops_sycl_kernels) - sycl_add_library( - ${sycl_lib} - SHARED - SYCL_SOURCES ${ATen_XPU_SYCL_OTHERS_SRCS}) - target_link_libraries(torch_xpu_ops PUBLIC ${sycl_lib}) - list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_lib}) - - # Decouple with PyTorch cmake definition. - install(TARGETS ${sycl_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") else() sycl_add_library( torch_xpu_ops diff --git a/src/BuildOnWindows.cmake b/src/BuildOnWindows.cmake index ae5c88da19..4005a2f3a6 100644 --- a/src/BuildOnWindows.cmake +++ b/src/BuildOnWindows.cmake @@ -39,206 +39,6 @@ if(BUILD_SEPARATE_OPS) # Decouple with PyTorch cmake definition. install(TARGETS ${sycl_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") endforeach() - list(APPEND TORCH_XPU_OPS_LIBRARIES torch_xpu_ops) - list(APPEND TORCH_XPU_OPS_LIBRARIES torch_xpu_ops_aten) -# Working with the compilers which don't support device code compression, we have to split kernels -# into multiple libraries to meet the bin size limitation. -elseif(BUILD_SPLIT_KERNEL_LIB) - setup_common_libraries() - # Split SYCL kernels into 2 libraries as categories 1) Unary+Binary 2) Others. - set(ATen_XPU_SYCL_BINARY_SRCS) - set(ATen_XPU_SYCL_UNARY_SRCS) - set(ATen_XPU_SYCL_REDUCE_SRCS) - set(ATen_XPU_SYCL_ACTIVATION_SRCS) - set(ATen_XPU_SYCL_FOREACH_SRCS) - set(ATen_XPU_SYCL_TENSOR_SRCS) - set(ATen_XPU_SYCL_NORM_LOSS_SRCS) - set(ATen_XPU_SYCL_POLY_SRCS) - set(ATen_XPU_SYCL_DISTRIBUTION_SRCS) - set(ATen_XPU_SYCL_OTHERS_SRCS) - foreach(sycl_src ${ATen_XPU_SYCL_SRCS}) - string(REGEX MATCH "Binary" IS_BINARY ${sycl_src}) - string(REGEX MATCH "Unary" IS_UNARY ${sycl_src}) - # Resolve cyclic dependences between - # torch_xpu_ops_sycl_unary_binary_kernels.dll and - # torch_xpu_ops_sycl_kernels.dll. Move definition and invoke of kernels - # into a same kernel library. Here we move elementwise kernel pow and copy - # into torch_xpu_ops_sycl_unary_binary_kernels.dll. - string(REGEX MATCH "Pow" IS_POW ${sycl_src}) - string(REGEX MATCH "Copy" IS_COPY ${sycl_src}) - string(REGEX MATCH "Activation" IS_ACTIVATION ${sycl_src}) - string(REGEX MATCH "Foreach" IS_FOREACH ${sycl_src}) - string(REGEX MATCH "Reduce" IS_REDUCE ${sycl_src}) - string(REGEX MATCH "Tensor" IS_TENSOR ${sycl_src}) - string(REGEX MATCH "Norm" IS_NORM ${sycl_src}) - string(REGEX MATCH "Loss" IS_LOSS ${sycl_src}) - string(REGEX MATCH "Polynomial" IS_POLY ${sycl_src}) - #Move resize kernel to Norm and Loss lib, to resolve symbol. - string(REGEX MATCH "Resize" IS_RESIZE ${sycl_src}) - string(REGEX MATCH "Distribution" IS_DISTRIBUTION ${sycl_src}) - - if(NOT IS_FOREACH STREQUAL "") - list(APPEND ATen_XPU_SYCL_FOREACH_SRCS ${sycl_src}) - elseif(NOT IS_BINARY STREQUAL "") - list(APPEND ATen_XPU_SYCL_BINARY_SRCS ${sycl_src}) - elseif(NOT IS_UNARY STREQUAL "" OR NOT IS_COPY STREQUAL "" OR NOT IS_POW STREQUAL "") - list(APPEND ATen_XPU_SYCL_UNARY_SRCS ${sycl_src}) - elseif(NOT IS_REDUCE STREQUAL "") - list(APPEND ATen_XPU_SYCL_REDUCE_SRCS ${sycl_src}) - elseif(NOT IS_ACTIVATION STREQUAL "") - list(APPEND ATen_XPU_SYCL_ACTIVATION_SRCS ${sycl_src}) - elseif(NOT IS_TENSOR STREQUAL "") - list(APPEND ATen_XPU_SYCL_TENSOR_SRCS ${sycl_src}) - elseif(NOT IS_DISTRIBUTION STREQUAL "") - list(APPEND ATen_XPU_SYCL_DISTRIBUTION_SRCS ${sycl_src}) - elseif(NOT IS_NORM STREQUAL "" OR NOT IS_LOSS STREQUAL "" OR NOT IS_RESIZE STREQUAL "") - list(APPEND ATen_XPU_SYCL_NORM_LOSS_SRCS ${sycl_src}) - elseif(NOT IS_POLY STREQUAL "") - list(APPEND ATen_XPU_SYCL_POLY_SRCS ${sycl_src}) - else() - list(APPEND ATen_XPU_SYCL_OTHERS_SRCS ${sycl_src}) - endif() - endforeach() - # Binary kernel lib - set(sycl_binary_lib torch_xpu_ops_sycl_binary_kernels) - sycl_add_library( - ${sycl_binary_lib} - SHARED - SYCL_SOURCES ${ATen_XPU_SYCL_BINARY_SRCS}) - target_compile_definitions(${sycl_binary_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) - target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_binary_lib}) - target_link_libraries(${sycl_binary_lib} PUBLIC torch_xpu) - list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_binary_lib}) - - # Decouple with PyTorch cmake definition. - install(TARGETS ${sycl_binary_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") - - # Unary kernel lib - set(sycl_unary_lib torch_xpu_ops_sycl_unary_kernels) - sycl_add_library( - ${sycl_unary_lib} - SHARED - SYCL_SOURCES ${ATen_XPU_SYCL_UNARY_SRCS}) - target_compile_definitions(${sycl_unary_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) - target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_unary_lib}) - target_link_libraries(${sycl_unary_lib} PUBLIC torch_xpu) - list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_unary_lib}) - - # Decouple with PyTorch cmake definition. - install(TARGETS ${sycl_unary_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") - - # Reduce kernel lib - set(sycl_reduce_lib torch_xpu_ops_sycl_reduce_kernels) - sycl_add_library( - ${sycl_reduce_lib} - SHARED - SYCL_SOURCES ${ATen_XPU_SYCL_REDUCE_SRCS}) - target_compile_definitions(${sycl_reduce_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) - target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_reduce_lib}) - target_link_libraries(${sycl_reduce_lib} PUBLIC torch_xpu) - list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_reduce_lib}) - - # Decouple with PyTorch cmake definition. - install(TARGETS ${sycl_reduce_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") - - # Activation kernel lib - set(sycl_activation_lib torch_xpu_ops_sycl_activation_kernels) - sycl_add_library( - ${sycl_activation_lib} - SHARED - SYCL_SOURCES ${ATen_XPU_SYCL_ACTIVATION_SRCS}) - target_compile_definitions(${sycl_activation_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) - target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_activation_lib}) - target_link_libraries(${sycl_activation_lib} PUBLIC torch_xpu) - list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_activation_lib}) - - # Decouple with PyTorch cmake definition. - install(TARGETS ${sycl_activation_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") - - # Foreach kernel lib - set(sycl_foreach_lib torch_xpu_ops_sycl_foreach_kernels) - sycl_add_library( - ${sycl_foreach_lib} - SHARED - SYCL_SOURCES ${ATen_XPU_SYCL_FOREACH_SRCS}) - target_compile_definitions(${sycl_foreach_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) - target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_foreach_lib}) - target_link_libraries(${sycl_foreach_lib} PUBLIC torch_xpu) - list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_foreach_lib}) - - # Decouple with PyTorch cmake definition. - install(TARGETS ${sycl_foreach_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") - - # Tensor kernel lib - set(sycl_tensor_lib torch_xpu_ops_sycl_tensor_kernels) - sycl_add_library( - ${sycl_tensor_lib} - SHARED - SYCL_SOURCES ${ATen_XPU_SYCL_TENSOR_SRCS}) - target_compile_definitions(${sycl_tensor_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) - target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_tensor_lib}) - target_link_libraries(${sycl_tensor_lib} PUBLIC torch_xpu) - list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_tensor_lib}) - - # Decouple with PyTorch cmake definition. - install(TARGETS ${sycl_tensor_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") - - # Norm and Loss kernel lib - set(sycl_norm_loss_lib torch_xpu_ops_sycl_norm_loss_kernels) - sycl_add_library( - ${sycl_norm_loss_lib} - SHARED - SYCL_SOURCES ${ATen_XPU_SYCL_NORM_LOSS_SRCS}) - target_compile_definitions(${sycl_norm_loss_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) - target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_norm_loss_lib}) - target_link_libraries(${sycl_norm_loss_lib} PUBLIC torch_xpu) - list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_norm_loss_lib}) - - # Decouple with PyTorch cmake definition. - install(TARGETS ${sycl_norm_loss_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") - - # Polynomial kernel lib - set(sycl_poly_lib torch_xpu_ops_sycl_poly_kernels) - sycl_add_library( - ${sycl_poly_lib} - SHARED - SYCL_SOURCES ${ATen_XPU_SYCL_POLY_SRCS}) - target_compile_definitions(${sycl_poly_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) - target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_poly_lib}) - target_link_libraries(${sycl_poly_lib} PUBLIC torch_xpu) - list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_poly_lib}) - - # Decouple with PyTorch cmake definition. - install(TARGETS ${sycl_poly_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") - - # Distribution kernel lib - set(sycl_dist_lib torch_xpu_ops_sycl_dist_kernels) - sycl_add_library( - ${sycl_dist_lib} - SHARED - SYCL_SOURCES ${ATen_XPU_SYCL_DISTRIBUTION_SRCS}) - target_compile_definitions(${sycl_dist_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) - target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_dist_lib}) - target_link_libraries(${sycl_dist_lib} PUBLIC torch_xpu) - list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_dist_lib}) - - # Decouple with PyTorch cmake definition. - install(TARGETS ${sycl_dist_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") - - # Other kernel lib - set(sycl_lib torch_xpu_ops_sycl_kernels) - sycl_add_library( - ${sycl_lib} - SHARED - SYCL_SOURCES ${ATen_XPU_SYCL_OTHERS_SRCS}) - target_compile_definitions(${sycl_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) - target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_lib}) - target_link_libraries(${sycl_lib} PUBLIC torch_xpu) - list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_lib}) - - # Decouple with PyTorch cmake definition. - install(TARGETS ${sycl_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") - list(APPEND TORCH_XPU_OPS_LIBRARIES torch_xpu_ops) list(APPEND TORCH_XPU_OPS_LIBRARIES torch_xpu_ops_aten) else() From 4c8ede205a5362556454027c2e9ddcb61b583f18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C5=82awomir=20Siwek?= Date: Thu, 23 Oct 2025 05:29:20 +0000 Subject: [PATCH 09/10] Move part of diff to second PR --- src/BuildOnLinux.cmake | 81 ++++++++++++++++ src/BuildOnWindows.cmake | 200 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 281 insertions(+) diff --git a/src/BuildOnLinux.cmake b/src/BuildOnLinux.cmake index fd01b26a3f..3cf18e008d 100644 --- a/src/BuildOnLinux.cmake +++ b/src/BuildOnLinux.cmake @@ -36,6 +36,87 @@ if(BUILD_SEPARATE_OPS) # Decouple with PyTorch cmake definition. install(TARGETS ${sycl_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") endforeach() +# Working with the compilers which don't support device code compression, we have to split kernels +# into multiple libraries to meet the bin size limitation. +elseif(BUILD_SPLIT_KERNEL_LIB OR __INTEL_LLVM_COMPILER LESS 20250004 OR ICX_DATE LESS 20241205) + setup_common_libraries() + # Split SYCL kernels into 4 libraries as categories 1) Unary+Binary 2) Reduce 3) Foreach 4) Others. + set(ATen_XPU_SYCL_UNARY_BINARY_SRCS) + set(ATen_XPU_SYCL_REDUCE_SRCS) + set(ATen_XPU_SYCL_FOREACH_SRCS) + set(ATen_XPU_SYCL_OTHERS_SRCS) + + foreach(sycl_src ${ATen_XPU_SYCL_SRCS}) + string(REGEX MATCH "Binary" IS_BINARY ${sycl_src}) + string(REGEX MATCH "Unary" IS_UNARY ${sycl_src}) + string(REGEX MATCH "Pow" IS_POW ${sycl_src}) + string(REGEX MATCH "Copy" IS_COPY ${sycl_src}) + string(REGEX MATCH "Reduce" IS_REDUCE ${sycl_src}) + string(REGEX MATCH "Activation" IS_ACTIVATION ${sycl_src}) + string(REGEX MATCH "Foreach" IS_FOREACH ${sycl_src}) + + if(NOT IS_FOREACH STREQUAL "") + list(APPEND ATen_XPU_SYCL_FOREACH_SRCS ${sycl_src}) + elseif(NOT IS_REDUCE STREQUAL "") + list(APPEND ATen_XPU_SYCL_REDUCE_SRCS ${sycl_src}) + elseif(NOT IS_UNARY STREQUAL "" OR NOT IS_BINARY STREQUAL "") + list(APPEND ATen_XPU_SYCL_UNARY_BINARY_SRCS ${sycl_src}) + elseif(NOT IS_COPY STREQUAL "" OR NOT IS_POW STREQUAL "") + list(APPEND ATen_XPU_SYCL_UNARY_BINARY_SRCS ${sycl_src}) + elseif(NOT IS_ACTIVATION STREQUAL "") + list(APPEND ATen_XPU_SYCL_UNARY_BINARY_SRCS ${sycl_src}) + else() + list(APPEND ATen_XPU_SYCL_OTHERS_SRCS ${sycl_src}) + endif() + endforeach() + + # Unary binary kernel lib + set(sycl_unary_binary_lib torch_xpu_ops_sycl_unary_binary_kernels) + sycl_add_library( + ${sycl_unary_binary_lib} + SHARED + SYCL_SOURCES ${ATen_XPU_SYCL_UNARY_BINARY_SRCS}) + target_link_libraries(torch_xpu_ops PUBLIC ${sycl_unary_binary_lib}) + list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_unary_binary_lib}) + + # Decouple with PyTorch cmake definition. + install(TARGETS ${sycl_unary_binary_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") + + # Reduce kernel lib + set(sycl_reduce_lib torch_xpu_ops_sycl_reduce_kernels) + sycl_add_library( + ${sycl_reduce_lib} + SHARED + SYCL_SOURCES ${ATen_XPU_SYCL_REDUCE_SRCS}) + target_link_libraries(torch_xpu_ops PUBLIC ${sycl_reduce_lib}) + list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_reduce_lib}) + + # Decouple with PyTorch cmake definition. + install(TARGETS ${sycl_reduce_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") + + # Foreach kernel lib + set(sycl_foreach_lib torch_xpu_ops_sycl_foreach_kernels) + sycl_add_library( + ${sycl_foreach_lib} + SHARED + SYCL_SOURCES ${ATen_XPU_SYCL_FOREACH_SRCS}) + target_link_libraries(torch_xpu_ops PUBLIC ${sycl_foreach_lib}) + list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_foreach_lib}) + + # Decouple with PyTorch cmake definition. + install(TARGETS ${sycl_foreach_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") + + # Other kernel lib + set(sycl_lib torch_xpu_ops_sycl_kernels) + sycl_add_library( + ${sycl_lib} + SHARED + SYCL_SOURCES ${ATen_XPU_SYCL_OTHERS_SRCS}) + target_link_libraries(torch_xpu_ops PUBLIC ${sycl_lib}) + list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_lib}) + + # Decouple with PyTorch cmake definition. + install(TARGETS ${sycl_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") else() sycl_add_library( torch_xpu_ops diff --git a/src/BuildOnWindows.cmake b/src/BuildOnWindows.cmake index 4005a2f3a6..bf067c8e70 100644 --- a/src/BuildOnWindows.cmake +++ b/src/BuildOnWindows.cmake @@ -39,6 +39,206 @@ if(BUILD_SEPARATE_OPS) # Decouple with PyTorch cmake definition. install(TARGETS ${sycl_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") endforeach() + list(APPEND TORCH_XPU_OPS_LIBRARIES torch_xpu_ops) + list(APPEND TORCH_XPU_OPS_LIBRARIES torch_xpu_ops_aten) +# Working with the compilers which don't support device code compression, we have to split kernels +# into multiple libraries to meet the bin size limitation. +elseif(BUILD_SPLIT_KERNEL_LIB OR __INTEL_LLVM_COMPILER LESS 20250004 OR ICX_DATE LESS 20241205) + setup_common_libraries() + # Split SYCL kernels into 2 libraries as categories 1) Unary+Binary 2) Others. + set(ATen_XPU_SYCL_BINARY_SRCS) + set(ATen_XPU_SYCL_UNARY_SRCS) + set(ATen_XPU_SYCL_REDUCE_SRCS) + set(ATen_XPU_SYCL_ACTIVATION_SRCS) + set(ATen_XPU_SYCL_FOREACH_SRCS) + set(ATen_XPU_SYCL_TENSOR_SRCS) + set(ATen_XPU_SYCL_NORM_LOSS_SRCS) + set(ATen_XPU_SYCL_POLY_SRCS) + set(ATen_XPU_SYCL_DISTRIBUTION_SRCS) + set(ATen_XPU_SYCL_OTHERS_SRCS) + foreach(sycl_src ${ATen_XPU_SYCL_SRCS}) + string(REGEX MATCH "Binary" IS_BINARY ${sycl_src}) + string(REGEX MATCH "Unary" IS_UNARY ${sycl_src}) + # Resolve cyclic dependences between + # torch_xpu_ops_sycl_unary_binary_kernels.dll and + # torch_xpu_ops_sycl_kernels.dll. Move definition and invoke of kernels + # into a same kernel library. Here we move elementwise kernel pow and copy + # into torch_xpu_ops_sycl_unary_binary_kernels.dll. + string(REGEX MATCH "Pow" IS_POW ${sycl_src}) + string(REGEX MATCH "Copy" IS_COPY ${sycl_src}) + string(REGEX MATCH "Activation" IS_ACTIVATION ${sycl_src}) + string(REGEX MATCH "Foreach" IS_FOREACH ${sycl_src}) + string(REGEX MATCH "Reduce" IS_REDUCE ${sycl_src}) + string(REGEX MATCH "Tensor" IS_TENSOR ${sycl_src}) + string(REGEX MATCH "Norm" IS_NORM ${sycl_src}) + string(REGEX MATCH "Loss" IS_LOSS ${sycl_src}) + string(REGEX MATCH "Polynomial" IS_POLY ${sycl_src}) + #Move resize kernel to Norm and Loss lib, to resolve symbol. + string(REGEX MATCH "Resize" IS_RESIZE ${sycl_src}) + string(REGEX MATCH "Distribution" IS_DISTRIBUTION ${sycl_src}) + + if(NOT IS_FOREACH STREQUAL "") + list(APPEND ATen_XPU_SYCL_FOREACH_SRCS ${sycl_src}) + elseif(NOT IS_BINARY STREQUAL "") + list(APPEND ATen_XPU_SYCL_BINARY_SRCS ${sycl_src}) + elseif(NOT IS_UNARY STREQUAL "" OR NOT IS_COPY STREQUAL "" OR NOT IS_POW STREQUAL "") + list(APPEND ATen_XPU_SYCL_UNARY_SRCS ${sycl_src}) + elseif(NOT IS_REDUCE STREQUAL "") + list(APPEND ATen_XPU_SYCL_REDUCE_SRCS ${sycl_src}) + elseif(NOT IS_ACTIVATION STREQUAL "") + list(APPEND ATen_XPU_SYCL_ACTIVATION_SRCS ${sycl_src}) + elseif(NOT IS_TENSOR STREQUAL "") + list(APPEND ATen_XPU_SYCL_TENSOR_SRCS ${sycl_src}) + elseif(NOT IS_DISTRIBUTION STREQUAL "") + list(APPEND ATen_XPU_SYCL_DISTRIBUTION_SRCS ${sycl_src}) + elseif(NOT IS_NORM STREQUAL "" OR NOT IS_LOSS STREQUAL "" OR NOT IS_RESIZE STREQUAL "") + list(APPEND ATen_XPU_SYCL_NORM_LOSS_SRCS ${sycl_src}) + elseif(NOT IS_POLY STREQUAL "") + list(APPEND ATen_XPU_SYCL_POLY_SRCS ${sycl_src}) + else() + list(APPEND ATen_XPU_SYCL_OTHERS_SRCS ${sycl_src}) + endif() + endforeach() + # Binary kernel lib + set(sycl_binary_lib torch_xpu_ops_sycl_binary_kernels) + sycl_add_library( + ${sycl_binary_lib} + SHARED + SYCL_SOURCES ${ATen_XPU_SYCL_BINARY_SRCS}) + target_compile_definitions(${sycl_binary_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) + target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_binary_lib}) + target_link_libraries(${sycl_binary_lib} PUBLIC torch_xpu) + list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_binary_lib}) + + # Decouple with PyTorch cmake definition. + install(TARGETS ${sycl_binary_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") + + # Unary kernel lib + set(sycl_unary_lib torch_xpu_ops_sycl_unary_kernels) + sycl_add_library( + ${sycl_unary_lib} + SHARED + SYCL_SOURCES ${ATen_XPU_SYCL_UNARY_SRCS}) + target_compile_definitions(${sycl_unary_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) + target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_unary_lib}) + target_link_libraries(${sycl_unary_lib} PUBLIC torch_xpu) + list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_unary_lib}) + + # Decouple with PyTorch cmake definition. + install(TARGETS ${sycl_unary_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") + + # Reduce kernel lib + set(sycl_reduce_lib torch_xpu_ops_sycl_reduce_kernels) + sycl_add_library( + ${sycl_reduce_lib} + SHARED + SYCL_SOURCES ${ATen_XPU_SYCL_REDUCE_SRCS}) + target_compile_definitions(${sycl_reduce_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) + target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_reduce_lib}) + target_link_libraries(${sycl_reduce_lib} PUBLIC torch_xpu) + list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_reduce_lib}) + + # Decouple with PyTorch cmake definition. + install(TARGETS ${sycl_reduce_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") + + # Activation kernel lib + set(sycl_activation_lib torch_xpu_ops_sycl_activation_kernels) + sycl_add_library( + ${sycl_activation_lib} + SHARED + SYCL_SOURCES ${ATen_XPU_SYCL_ACTIVATION_SRCS}) + target_compile_definitions(${sycl_activation_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) + target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_activation_lib}) + target_link_libraries(${sycl_activation_lib} PUBLIC torch_xpu) + list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_activation_lib}) + + # Decouple with PyTorch cmake definition. + install(TARGETS ${sycl_activation_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") + + # Foreach kernel lib + set(sycl_foreach_lib torch_xpu_ops_sycl_foreach_kernels) + sycl_add_library( + ${sycl_foreach_lib} + SHARED + SYCL_SOURCES ${ATen_XPU_SYCL_FOREACH_SRCS}) + target_compile_definitions(${sycl_foreach_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) + target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_foreach_lib}) + target_link_libraries(${sycl_foreach_lib} PUBLIC torch_xpu) + list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_foreach_lib}) + + # Decouple with PyTorch cmake definition. + install(TARGETS ${sycl_foreach_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") + + # Tensor kernel lib + set(sycl_tensor_lib torch_xpu_ops_sycl_tensor_kernels) + sycl_add_library( + ${sycl_tensor_lib} + SHARED + SYCL_SOURCES ${ATen_XPU_SYCL_TENSOR_SRCS}) + target_compile_definitions(${sycl_tensor_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) + target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_tensor_lib}) + target_link_libraries(${sycl_tensor_lib} PUBLIC torch_xpu) + list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_tensor_lib}) + + # Decouple with PyTorch cmake definition. + install(TARGETS ${sycl_tensor_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") + + # Norm and Loss kernel lib + set(sycl_norm_loss_lib torch_xpu_ops_sycl_norm_loss_kernels) + sycl_add_library( + ${sycl_norm_loss_lib} + SHARED + SYCL_SOURCES ${ATen_XPU_SYCL_NORM_LOSS_SRCS}) + target_compile_definitions(${sycl_norm_loss_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) + target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_norm_loss_lib}) + target_link_libraries(${sycl_norm_loss_lib} PUBLIC torch_xpu) + list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_norm_loss_lib}) + + # Decouple with PyTorch cmake definition. + install(TARGETS ${sycl_norm_loss_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") + + # Polynomial kernel lib + set(sycl_poly_lib torch_xpu_ops_sycl_poly_kernels) + sycl_add_library( + ${sycl_poly_lib} + SHARED + SYCL_SOURCES ${ATen_XPU_SYCL_POLY_SRCS}) + target_compile_definitions(${sycl_poly_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) + target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_poly_lib}) + target_link_libraries(${sycl_poly_lib} PUBLIC torch_xpu) + list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_poly_lib}) + + # Decouple with PyTorch cmake definition. + install(TARGETS ${sycl_poly_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") + + # Distribution kernel lib + set(sycl_dist_lib torch_xpu_ops_sycl_dist_kernels) + sycl_add_library( + ${sycl_dist_lib} + SHARED + SYCL_SOURCES ${ATen_XPU_SYCL_DISTRIBUTION_SRCS}) + target_compile_definitions(${sycl_dist_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) + target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_dist_lib}) + target_link_libraries(${sycl_dist_lib} PUBLIC torch_xpu) + list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_dist_lib}) + + # Decouple with PyTorch cmake definition. + install(TARGETS ${sycl_dist_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") + + # Other kernel lib + set(sycl_lib torch_xpu_ops_sycl_kernels) + sycl_add_library( + ${sycl_lib} + SHARED + SYCL_SOURCES ${ATen_XPU_SYCL_OTHERS_SRCS}) + target_compile_definitions(${sycl_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) + target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_lib}) + target_link_libraries(${sycl_lib} PUBLIC torch_xpu) + list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_lib}) + + # Decouple with PyTorch cmake definition. + install(TARGETS ${sycl_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}") + list(APPEND TORCH_XPU_OPS_LIBRARIES torch_xpu_ops) list(APPEND TORCH_XPU_OPS_LIBRARIES torch_xpu_ops_aten) else() From e7555e462b13639865f39574633544df03b1417d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C5=82awomir=20Siwek?= Date: Thu, 23 Oct 2025 05:31:41 +0000 Subject: [PATCH 10/10] Restore missing line --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8778d93338..04e21cceff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -79,7 +79,7 @@ set(BUILD_SEPARATE_OPS $ENV{BUILD_SEPARATE_OPS}) if(CMAKE_BUILD_TYPE MATCHES "(Debug|RelWithDebInfo)") set(BUILD_SEPARATE_OPS TRUE) endif() - +set(BUILD_SPLIT_KERNEL_LIB $ENV{BUILD_SPLIT_KERNEL_LIB}) add_subdirectory(${TORCH_XPU_OPS_ROOT}/src) set(TORCH_XPU_OPS_FOUND TRUE)