diff --git a/pybind_interface/avx2/CMakeLists.txt b/pybind_interface/avx2/CMakeLists.txt index cbd6ea2d3..b8a989d2b 100644 --- a/pybind_interface/avx2/CMakeLists.txt +++ b/pybind_interface/avx2/CMakeLists.txt @@ -20,8 +20,15 @@ if(WIN32) # Add /O2 to any configuration that is NOT Debug. # This prevents a conflict with /RTC1 in DEBUG builds. add_compile_options($<$>:/O2>) -else() +elseif(LINUX) add_compile_options(-mavx2 -mfma -O3 -flto=auto) + execute_process( + COMMAND bash --noprofile -c "grep -qs bmi2 /proc/cpuinfo" + RESULT_VARIABLE _EXIT_CODE + ) + if(_EXIT_CODE EQUAL 0) + add_compile_options("-mbmi2") + endif() endif() if(APPLE) diff --git a/tests/Makefile b/tests/Makefile index 6b5eee410..c02f470fd 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -13,33 +13,34 @@ # limitations under the License. # Determine the hardware features available in this CPU. -HAVE_SSE := $(shell grep -qs sse /proc/cpuinfo && echo "true") HAVE_AVX2 := $(shell grep -qs avx2 /proc/cpuinfo && echo "true") HAVE_AVX512 := $(shell grep -qs avx512f /proc/cpuinfo && echo "true") +HAVE_BMI2 := $(shell grep -qs bmi2 /proc/cpuinfo && echo "true") +HAVE_SSE := $(shell grep -qs sse /proc/cpuinfo && echo "true") # Default targets. Always built. BASIC_FILES := $(shell ls *.cc | egrep -v '_avx|_sse') # Additional flags and targets for non-CUDA cases. -SSE_FILES = -AVX2_FILES = -AVX512_FILES = -ifneq (,$(HAVE_SSE)) - SSE_FLAGS ?= -msse4 - SSE_FILES := $(wildcard *_sse_test.cc) -endif ifneq (,$(HAVE_AVX2)) AVX2_FLAGS ?= -mavx2 -mfma AVX2_FILES := $(wildcard *_avx_test.cc) endif ifneq (,$(HAVE_AVX512)) - AVX512_FLAGS ?= -mavx512f -mbmi2 + AVX512_FLAGS ?= -mavx512f AVX512_FILES := $(wildcard *_avx512_test.cc) endif +ifneq (,$(HAVE_BMI2)) + BMI2_FLAGS ?= -mbmi2 +endif +ifneq (,$(HAVE_SSE)) + SSE_FLAGS ?= -msse4 + SSE_FILES := $(wildcard *_sse_test.cc) +endif CXX_FILES := $(BASIC_FILES) $(SSE_FILES) $(AVX2_FILES) $(AVX512_FILES) CXX_TARGETS := $(CXX_FILES:%.cc=%.x) -CXXFLAGS := $(CXXFLAGS) $(SSE_FLAGS) $(AVX2_FLAGS) $(AVX512_FLAGS) +CXXFLAGS := $(CXXFLAGS) $(SSE_FLAGS) $(AVX2_FLAGS) $(AVX512_FLAGS) $(BMI2_FLAGS) CUDA_FILES := $(wildcard *cuda_test.cu) CUDA_TARGETS := $(CUDA_FILES:%cuda_test.cu=%cuda_test.x) @@ -125,8 +126,17 @@ clean: -rm -f ./*.x ./*.a ./*.so ./*.mod rm -rf $(GTEST_DIR)/build -LOCAL_VARS = HAVE_SSE HAVE_AVX2 HAVE_AVX512 SSE_FLAGS AVX2_FLAGS $\ - AVX512_FLAGS CXXFLAGS CXX_TARGETS TEST_FLAGS +LOCAL_VARS = BASIC_FILES CXX_FILES CXX_TARGETS CXXFLAGS $\ + CUDA_FILES CUDA_TARGETS $\ + CUSTATEVEC_FILES CUSTATEVEC_FLAGS $\ + CUSTATEVECEX_FILES CUSTATEVECEX_FLAGS $\ + HAVE_AVX2 AVX2_FILES AVX2_FLAGS $\ + HAVE_AVX512 AVX512_FILES AVX512_FLAGS $\ + HAVE_BMI2 BMI2_FLAGS $\ + HAVE_SSE SSE_FILES SSE_FLAGS $\ + HIP_FILES HIP_TARGETS $\ + GMOCK_DIR GTEST_DIR $\ + TESTFLAGS .PHONY: print-vars print-vars: ; @$(foreach n,$(sort $(LOCAL_VARS)),echo $n=$($n);)