intel · guangyey · Oct 23, 2025 · Oct 20, 2025 · Oct 21, 2025 · Oct 21, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -18,10 +18,8 @@
 #  torch_xpu_ops
 #  -- Static archive library target
 
-cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.27 FATAL_ERROR)
 
-set(PROJECT_NAME "torch-xpu-ops")
-set(PROJECT_VERSION "2.3.0")
 # Avoid SYCL compiler error
 if(NOT WIN32)
   string(APPEND CMAKE_CXX_FLAGS " -Wno-error")
@@ -30,8 +28,7 @@ if(NOT WIN32)
   endif()
 endif()
 
-cmake_policy(SET CMP0048 NEW)
-project(${PROJECT_NAME} VERSION "${PROJECT_VERSION}" LANGUAGES C CXX)
+project(torch-xpu-ops VERSION 2.10.0 LANGUAGES C CXX)
 
 set(TORCH_XPU_OPS_FOUND FALSE)
 
@@ -82,7 +79,7 @@ set(BUILD_SEPARATE_OPS $ENV{BUILD_SEPARATE_OPS})
 if(CMAKE_BUILD_TYPE MATCHES "(Debug|RelWithDebInfo)")
   set(BUILD_SEPARATE_OPS TRUE)
 endif()
-set(BUILD_SPLIT_KERNEL_LIB $ENV{BUILD_SPLIT_KERNEL_LIB})
+
 add_subdirectory(${TORCH_XPU_OPS_ROOT}/src)
 
 set(TORCH_XPU_OPS_FOUND TRUE)
diff --git a/cmake/Modules/FindONEMKL.cmake b/cmake/Modules/FindONEMKL.cmake
@@ -44,17 +44,18 @@ find_file(
 
 if((ONEMKL_INCLUDE_DIR STREQUAL "ONEMKL_INCLUDE_DIR-NOTFOUND")
    OR(ONEMKL_LIB_DIR STREQUAL "ONEMKL_LIB_DIR-NOTFOUND"))
-  message(WARNING "oneMKL sdk is incomplete!!")
+  message(WARNING "oneMKL SDK is incomplete!!")
   return()
 endif()
 
+set(MKL_LIB_NAMES "mkl_sycl_blas" "mkl_sycl_dft" "mkl_sycl_lapack"
+                  "mkl_intel_lp64" "mkl_core")
+
 if(WIN32)
-  set(MKL_LIB_NAMES "mkl_sycl_blas" "mkl_sycl_dft" "mkl_sycl_lapack"
-                    "mkl_intel_lp64" "mkl_intel_thread" "mkl_core")
+  list(APPEND MKL_LIB_NAMES "mkl_intel_thread")
   list(TRANSFORM MKL_LIB_NAMES APPEND "_dll.lib")
 else()
-  set(MKL_LIB_NAMES "mkl_sycl_blas" "mkl_sycl_dft" "mkl_sycl_lapack"
-                    "mkl_intel_lp64" "mkl_gnu_thread" "mkl_core")
+  list(APPEND MKL_LIB_NAMES "mkl_gnu_thread")
   list(TRANSFORM MKL_LIB_NAMES PREPEND "lib")
   list(TRANSFORM MKL_LIB_NAMES APPEND ".so")
 endif()

diff --git a/cmake/Modules/FindSYCL.cmake b/cmake/Modules/FindSYCL.cmake
@@ -107,7 +107,7 @@ macro(SYCL_INCLUDE_DEPENDENCIES dependency_file)
 
   if(SYCL_DEPEND_REGENERATE)
     set(SYCL_DEPEND ${dependency_file})
-    file(WRITE ${dependency_file} "#FindCUDA.cmake generated file.  Do not edit.\n")
+    file(WRITE ${dependency_file} "#FindSYCL.cmake generated file.  Do not edit.\n")
   endif()
 endmacro()
 

diff --git a/src/ATen/CMakeLists.txt b/src/ATen/CMakeLists.txt
@@ -1,12 +1,12 @@
 # ATen XPU sources
 
 file(GLOB xpu_cpp "xpu/*.cpp")
-file(GLOB xpu_mkl "native/xpu/mkl/*.cpp")
 file(GLOB xpu_native_cpp "native/xpu/*.cpp" "native/sparse/*.cpp" "native/sparse/xpu/*.cpp" "native/nested/*.cpp" "native/nested/xpu/*.cpp" "native/transformers/*.cpp" "native/quantized/*.cpp")
 file(GLOB xpu_sycl "native/xpu/sycl/*.cpp" "native/sparse/xpu/sycl/*.cpp" "native/nested/xpu/sycl/*.cpp" "native/transformers/sycl/*.cpp" "native/quantized/sycl/*.cpp")
 
 list(APPEND ATen_XPU_CPP_SRCS ${xpu_cpp})
 if(USE_ONEMKL_XPU)
+  file(GLOB xpu_mkl "native/xpu/mkl/*.cpp")
   list(APPEND ATen_XPU_MKL_SRCS ${xpu_mkl})
 endif()
 list(APPEND ATen_XPU_NATIVE_CPP_SRCS ${xpu_native_cpp})
@@ -19,25 +19,25 @@ set(ATen_XPU_SYCL_SRCS ${ATen_XPU_SYCL_SRCS} PARENT_SCOPE)
 
 # ATen XPU headers
 
-macro(install_xpu_headers glob_pattern dest_subdir)
-  file(GLOB headers ${glob_pattern})
+macro(install_xpu_headers subdir)
+  file(GLOB headers CONFIGURE_DEPENDS "${subdir}/*.h")
   if(headers)
-    install(FILES ${headers} DESTINATION "${AT_INSTALL_INCLUDE_DIR}/${dest_subdir}")
+    install(FILES ${headers} DESTINATION "${AT_INSTALL_INCLUDE_DIR}/ATen/${subdir}")
   endif()
 endmacro()
 
-install_xpu_headers("xpu/*.h" "ATen/xpu")
-install_xpu_headers("native/xpu/*.h" "ATen/native/xpu")
-install_xpu_headers("native/xpu/sycl/*.h" "ATen/native/xpu/sycl")
-install_xpu_headers("native/xpu/mkl/*.h" "ATen/native/xpu/mkl")
-install_xpu_headers("native/nested/xpu/*.h" "ATen/native/nested/xpu")
-install_xpu_headers("native/nested/xpu/sycl/*.h" "ATen/native/nested/xpu/sycl")
-install_xpu_headers("native/quantized/*.h" "ATen/native/quantized/xpu")
-install_xpu_headers("native/quantized/sycl/*.h" "ATen/native/quantized/xpu/sycl")
-install_xpu_headers("native/sparse/xpu/*.h" "ATen/native/sparse/xpu")
-install_xpu_headers("native/sparse/xpu/sycl/*.h" "ATen/native/sparse/xpu/sycl")
-install_xpu_headers("native/transformers/*.h" "ATen/native/transformers/xpu")
-install_xpu_headers("native/transformers/sycl/*.h" "ATen/native/transformers/xpu/sycl")
+install_xpu_headers("xpu")
+install_xpu_headers("native/xpu")
+install_xpu_headers("native/xpu/sycl")
+install_xpu_headers("native/xpu/mkl")
+install_xpu_headers("native/nested/xpu")
+install_xpu_headers("native/nested/xpu/sycl")
+install_xpu_headers("native/quantized/xpu")
+install_xpu_headers("native/quantized/xpu/sycl")
+install_xpu_headers("native/sparse/xpu")
+install_xpu_headers("native/sparse/xpu/sycl")
+install_xpu_headers("native/transformers/xpu")
+install_xpu_headers("native/transformers/xpu/sycl")
 
 if(xpu_ops_generated_headers)
   install(FILES ${xpu_ops_generated_headers} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen/ops)

diff --git a/src/BuildOnLinux.cmake b/src/BuildOnLinux.cmake
@@ -36,87 +36,6 @@ if(BUILD_SEPARATE_OPS)
     # Decouple with PyTorch cmake definition.
     install(TARGETS ${sycl_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
   endforeach()
-# Working with the compilers which don't support device code compression, we have to split kernels
-# into multiple libraries to meet the bin size limitation.
-elseif(BUILD_SPLIT_KERNEL_LIB OR __INTEL_LLVM_COMPILER LESS 20250004 OR ICX_DATE LESS 20241205)
-  setup_common_libraries()
-  # Split SYCL kernels into 4 libraries as categories 1) Unary+Binary 2) Reduce 3) Foreach 4) Others.
-  set(ATen_XPU_SYCL_UNARY_BINARY_SRCS)
-  set(ATen_XPU_SYCL_REDUCE_SRCS)
-  set(ATen_XPU_SYCL_FOREACH_SRCS)
-  set(ATen_XPU_SYCL_OTHERS_SRCS)
-
-  foreach(sycl_src ${ATen_XPU_SYCL_SRCS})
-    string(REGEX MATCH "Binary" IS_BINARY ${sycl_src})
-    string(REGEX MATCH "Unary" IS_UNARY ${sycl_src})
-    string(REGEX MATCH "Pow" IS_POW ${sycl_src})
-    string(REGEX MATCH "Copy" IS_COPY ${sycl_src})
-    string(REGEX MATCH "Reduce" IS_REDUCE ${sycl_src})
-    string(REGEX MATCH "Activation" IS_ACTIVATION ${sycl_src})
-    string(REGEX MATCH "Foreach" IS_FOREACH ${sycl_src})
-
-    if(NOT IS_FOREACH STREQUAL "")
-      list(APPEND ATen_XPU_SYCL_FOREACH_SRCS ${sycl_src})
-    elseif(NOT IS_REDUCE STREQUAL "")
-      list(APPEND ATen_XPU_SYCL_REDUCE_SRCS ${sycl_src})
-    elseif(NOT IS_UNARY STREQUAL "" OR NOT IS_BINARY STREQUAL "")
-      list(APPEND ATen_XPU_SYCL_UNARY_BINARY_SRCS ${sycl_src})
-    elseif(NOT IS_COPY STREQUAL "" OR NOT IS_POW STREQUAL "")
-      list(APPEND ATen_XPU_SYCL_UNARY_BINARY_SRCS ${sycl_src})
-    elseif(NOT IS_ACTIVATION STREQUAL "")
-      list(APPEND ATen_XPU_SYCL_UNARY_BINARY_SRCS ${sycl_src})
-    else()
-      list(APPEND ATen_XPU_SYCL_OTHERS_SRCS ${sycl_src})
-    endif()
-  endforeach()
-
-  # Unary binary kernel lib
-  set(sycl_unary_binary_lib torch_xpu_ops_sycl_unary_binary_kernels)
-  sycl_add_library(
-    ${sycl_unary_binary_lib}
-    SHARED
-    SYCL_SOURCES ${ATen_XPU_SYCL_UNARY_BINARY_SRCS})
-  target_link_libraries(torch_xpu_ops PUBLIC ${sycl_unary_binary_lib})
-  list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_unary_binary_lib})
-
-  # Decouple with PyTorch cmake definition.
-  install(TARGETS ${sycl_unary_binary_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
-
-  # Reduce kernel lib
-  set(sycl_reduce_lib torch_xpu_ops_sycl_reduce_kernels)
-  sycl_add_library(
-    ${sycl_reduce_lib}
-    SHARED
-    SYCL_SOURCES ${ATen_XPU_SYCL_REDUCE_SRCS})
-  target_link_libraries(torch_xpu_ops PUBLIC ${sycl_reduce_lib})
-  list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_reduce_lib})
-
-  # Decouple with PyTorch cmake definition.
-  install(TARGETS ${sycl_reduce_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
-
-  # Foreach kernel lib
-  set(sycl_foreach_lib torch_xpu_ops_sycl_foreach_kernels)
-  sycl_add_library(
-    ${sycl_foreach_lib}
-    SHARED
-    SYCL_SOURCES ${ATen_XPU_SYCL_FOREACH_SRCS})
-  target_link_libraries(torch_xpu_ops PUBLIC ${sycl_foreach_lib})
-  list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_foreach_lib})
-
-  # Decouple with PyTorch cmake definition.
-  install(TARGETS ${sycl_foreach_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
-
-  # Other kernel lib
-  set(sycl_lib torch_xpu_ops_sycl_kernels)
-  sycl_add_library(
-    ${sycl_lib}
-    SHARED
-    SYCL_SOURCES ${ATen_XPU_SYCL_OTHERS_SRCS})
-  target_link_libraries(torch_xpu_ops PUBLIC ${sycl_lib})
-  list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_lib})
-
-  # Decouple with PyTorch cmake definition.
-  install(TARGETS ${sycl_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
 else()
   sycl_add_library(
     torch_xpu_ops