diff --git a/CMakeLists.txt b/CMakeLists.txt index 820aa3230..e30ee333b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -157,9 +157,8 @@ set(FINUFFT_PRECISION_DEPENDENT_SOURCES # set linker flags for sanitizer set(FINUFFT_SANITIZER_FLAGS) if(FINUFFT_ENABLE_SANITIZERS) - set(FINUFFT_SANITIZER_FLAGS - -fsanitize=address -fsanitize=undefined -shared-libasan - -fsanitize=bounds-strict /fsanitize=address /RTC1) + set(FINUFFT_SANITIZER_FLAGS -fsanitize=address -fsanitize=undefined + -fsanitize=bounds-strict /fsanitize=address /RTC1) filter_supported_compiler_flags(FINUFFT_SANITIZER_FLAGS FINUFFT_SANITIZER_FLAGS) set(FINUFFT_SANITIZER_FLAGS diff --git a/cmake/CheckAVX.cpp b/cmake/CheckAVX.cpp index b6ba2208b..553a5c35f 100644 --- a/cmake/CheckAVX.cpp +++ b/cmake/CheckAVX.cpp @@ -2,16 +2,28 @@ #include #include -bool is_sse_supported() { +bool is_sse2_supported() { std::array cpui; __cpuid(cpui.data(), 1); - return (cpui[3] & (1 << 25)) != 0; + return (cpui[3] & (1 << 26)) != 0; } -bool is_avx2_supported() { +bool is_avx_supported() { std::array cpui; __cpuid(cpui.data(), 1); - return (cpui[2] & (1 << 5)) != 0; + bool osUsesXSAVE_XRSTORE = (cpui[2] & (1 << 27)) != 0; + bool cpuAVXSupport = (cpui[2] & (1 << 28)) != 0; + if (osUsesXSAVE_XRSTORE && cpuAVXSupport) { + unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); + return (xcrFeatureMask & 0x6) == 0x6; + } + return false; +} + +bool is_avx2_supported() { + std::array cpui; + __cpuid(cpui.data(), 7); + return (cpui[1] & (1 << 5)) != 0; } bool is_avx512_supported() { @@ -25,10 +37,12 @@ int main() { std::cout << "AVX512"; } else if (is_avx2_supported()) { std::cout << "AVX2"; - } else if (is_sse_supported()) { - std::cout << "SSE"; + } else if (is_avx_supported()) { + std::cout << "AVX"; + } else if (is_sse2_supported()) { + std::cout << "SSE2"; } else { std::cout << "NONE"; } return 0; -} \ No newline at end of file +} diff --git a/cmake/utils.cmake b/cmake/utils.cmake index b8bc4ca17..4a1e57956 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -41,6 +41,10 @@ function(check_arch_support) set(FINUFFT_ARCH_FLAGS "/arch:AVX512" CACHE STRING "" FORCE) + elseif(RUN_OUTPUT MATCHES "AVX2") + set(FINUFFT_ARCH_FLAGS + "/arch:AVX2" + CACHE STRING "" FORCE) elseif(RUN_OUTPUT MATCHES "AVX") set(FINUFFT_ARCH_FLAGS "/arch:AVX"