Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions subprojects/packagefiles/sleef/fix-purecfma-scalar-x86.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
diff --git a/Configure.cmake b/Configure.cmake
index e23f577..e1a1be6 100644
--- a/Configure.cmake
+++ b/Configure.cmake
@@ -193,7 +193,9 @@ endif()
if(SLEEF_TARGET_PROCESSOR MATCHES "(x86|AMD64|amd64|^i.86$)")
set(SLEEF_ARCH_X86 ON CACHE INTERNAL "True for x86 architecture.")

- set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-mavx2;-mfma")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately my cmake is pretty terrible: is there a way to set these flags for x86_64-v3 or newer?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was reading and it seems querying CPUIDs is a way to conditionally set the flags, also not just flags but the entire dispatch mechanism.
I'll try if I somehow managed to get this right, as this unconditional SIMD dispatching is very tightly integrated with SLEEF's FMA code generation

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, thanks! It'd be nice to enable better optimizations when people build for themselves but not critical.

+ # Removed PURECFMA_SCALAR flags for x86-64-v2 compatibility
+ # The PURECFMA code path uses FMA intrinsics that cause SIGILL on Sandy Bridge CPUs
+ # Original was: set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-mavx2;-mfma")
elseif(SLEEF_TARGET_PROCESSOR MATCHES "aarch64|arm64")
set(SLEEF_ARCH_AARCH64 ON CACHE INTERNAL "True for Aarch64 architecture.")
# Aarch64 requires support for advsimdfma4
@@ -220,7 +222,11 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
endif()

set(COMPILER_SUPPORTS_PUREC_SCALAR 1)
-set(COMPILER_SUPPORTS_PURECFMA_SCALAR 1)
+# Disable PURECFMA_SCALAR on x86 for x86-64-v2 compatibility (Sandy Bridge support)
+# PURECFMA requires FMA instructions which are only available in x86-64-v3 (Haswell+)
+if(NOT SLEEF_ARCH_X86)
+ set(COMPILER_SUPPORTS_PURECFMA_SCALAR 1)
+endif()

# Compiler feature detection

diff --git a/src/quad/CMakeLists.txt b/src/quad/CMakeLists.txt
index 8e4e261..cc55002 100644
--- a/src/quad/CMakeLists.txt
+++ b/src/quad/CMakeLists.txt
@@ -398,9 +398,17 @@ set_target_properties(qmkdisp PROPERTIES ${COMMON_TARGET_PROPERTIES})

# Target qdispscalar.c

+# Set scalar dispatch backends based on PURECFMA support
+# On x86, PURECFMA is disabled for x86-64-v2 compatibility
+if(COMPILER_SUPPORTS_PURECFMA_SCALAR)
+ set(SCALAR_DISPATCH_BACKENDS "purec" "purecfma")
+else()
+ set(SCALAR_DISPATCH_BACKENDS "purec" "purec") # Use purec for both slots
+endif()
+
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/qdispscalar.c.body
- COMMAND $<TARGET_FILE:qmkdisp> 1 Sleef_quad double int32_t int64_t uint64_t purec purecfma > ${CMAKE_CURRENT_BINARY_DIR}/qdispscalar.c.body
+ COMMAND $<TARGET_FILE:qmkdisp> 1 Sleef_quad double int32_t int64_t uint64_t ${SCALAR_DISPATCH_BACKENDS} > ${CMAKE_CURRENT_BINARY_DIR}/qdispscalar.c.body
DEPENDS qmkdisp
)
sleef_concat_files(
diff --git a/src/quad/qdispscalar.c.org b/src/quad/qdispscalar.c.org
index c4c1292..48f309c 100644
--- a/src/quad/qdispscalar.c.org
+++ b/src/quad/qdispscalar.c.org
@@ -15,10 +15,14 @@

#include "qdispatcher.h"

+#ifdef ENABLE_PURECFMA
NOEXPORT Sleef_quad sleef_cpuid_QUADFMA_0;
static void tryFMA() { sleef_cpuid_QUADFMA_0 = Sleef_sinq1_u10purecfma(sleef_cpuid_QUADFMA_0); }

#define SUBST_IF_EXT1(funcExt1) if (cpuSupportsExt(tryFMA)) p = funcExt1;
+#else
+#define SUBST_IF_EXT1(funcExt1)
+#endif

//

1 change: 1 addition & 0 deletions subprojects/sleef.wrap
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ directory=sleef
url=https://github.com/shibatch/sleef.git
revision=3.9.0
patch_directory=sleef
diff_files=sleef/fix-purecfma-scalar-x86.patch

[provide]
sleef = sleef_dep
Expand Down