Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pybind_interface/avx2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@ if(WIN32)
# This prevents a conflict with /RTC1 in DEBUG builds.
add_compile_options($<$<NOT:$<CONFIG:Debug>>:/O2>)
else()
include(CheckCCompilerFlag)
check_c_compiler_flag("-mbmi2" COMPILER_HAS_BMI2)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That just checks if the compiler accepts this flag. BMI2 might still not be supported by CPU. I think this is okay for now. Perhaps we should have two AVX2 versions (with BMI2 and without) and detect one at runtime as we now detect SSE/AVX2/AVX512. We can add the second version later.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That just checks if the compiler accepts this flag. BMI2 might still not be supported by CPU. I think this is okay for now. Perhaps we should have two AVX2 versions (with BMI2 and without) and detect one at runtime as we now detect SSE/AVX2/AVX512. We can add the second version later.

Yeah, I confess I used the compiler flags because I couldn't find a cmake construct to do it properly. Of course you're right, it's incorrect and lazy.

So I searched for a better way. I found some alternatives, but they are more involved and IMHO better left to a separate PR. The simplest solution for this PR seems to be to use grep as is done in tests/Makefile. The latest push to this PR has the change to the CMakeLists.txt to do it that way instead of checking compiler flags.

@sergeisakov sorry to ask you to take yet another look …

if(COMPILER_HAS_BMI2)
add_compile_options("-mbmi2")
add_compile_definitions(HAVE_BMI2)
endif()

add_compile_options(-mavx2 -mfma -O3 -flto=auto)
endif()

Expand Down
34 changes: 22 additions & 12 deletions tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,33 +13,34 @@
# limitations under the License.

# Determine the hardware features available in this CPU.
HAVE_SSE := $(shell grep -qs sse /proc/cpuinfo && echo "true")
HAVE_AVX2 := $(shell grep -qs avx2 /proc/cpuinfo && echo "true")
HAVE_AVX512 := $(shell grep -qs avx512f /proc/cpuinfo && echo "true")
HAVE_BMI2 := $(shell grep -qs bmi2 /proc/cpuinfo && echo "true")
HAVE_SSE := $(shell grep -qs sse /proc/cpuinfo && echo "true")

# Default targets. Always built.
BASIC_FILES := $(shell ls *.cc | egrep -v '_avx|_sse')

# Additional flags and targets for non-CUDA cases.
SSE_FILES =
AVX2_FILES =
AVX512_FILES =
ifneq (,$(HAVE_SSE))
SSE_FLAGS ?= -msse4
SSE_FILES := $(wildcard *_sse_test.cc)
endif
ifneq (,$(HAVE_AVX2))
AVX2_FLAGS ?= -mavx2 -mfma
AVX2_FILES := $(wildcard *_avx_test.cc)
endif
ifneq (,$(HAVE_AVX512))
AVX512_FLAGS ?= -mavx512f -mbmi2
AVX512_FLAGS ?= -mavx512f
AVX512_FILES := $(wildcard *_avx512_test.cc)
endif
ifneq (,$(HAVE_BMI2))
BMI2_FLAGS ?= -mbmi2
endif
ifneq (,$(HAVE_SSE))
SSE_FLAGS ?= -msse4
SSE_FILES := $(wildcard *_sse_test.cc)
endif

CXX_FILES := $(BASIC_FILES) $(SSE_FILES) $(AVX2_FILES) $(AVX512_FILES)
CXX_TARGETS := $(CXX_FILES:%.cc=%.x)
CXXFLAGS := $(CXXFLAGS) $(SSE_FLAGS) $(AVX2_FLAGS) $(AVX512_FLAGS)
CXXFLAGS := $(CXXFLAGS) $(SSE_FLAGS) $(AVX2_FLAGS) $(AVX512_FLAGS) $(BMI2_FLAGS)

CUDA_FILES := $(wildcard *cuda_test.cu)
CUDA_TARGETS := $(CUDA_FILES:%cuda_test.cu=%cuda_test.x)
Expand Down Expand Up @@ -125,8 +126,17 @@ clean:
-rm -f ./*.x ./*.a ./*.so ./*.mod
rm -rf $(GTEST_DIR)/build

LOCAL_VARS = HAVE_SSE HAVE_AVX2 HAVE_AVX512 SSE_FLAGS AVX2_FLAGS $\
AVX512_FLAGS CXXFLAGS CXX_TARGETS TEST_FLAGS
LOCAL_VARS = BASIC_FILES CXX_FILES CXX_TARGETS CXXFLAGS $\
CUDA_FILES CUDA_TARGETS $\
CUSTATEVEC_FILES CUSTATEVEC_FLAGS $\
CUSTATEVECEX_FILES CUSTATEVECEX_FLAGS $\
HAVE_AVX2 AVX2_FILES AVX2_FLAGS $\
HAVE_AVX512 AVX512_FILES AVX512_FLAGS $\
HAVE_BMI2 BMI2_FLAGS $\
HAVE_SSE SSE_FILES SSE_FLAGS $\
HIP_FILES HIP_TARGETS $\
GMOCK_DIR GTEST_DIR $\
TESTFLAGS

.PHONY: print-vars
print-vars: ; @$(foreach n,$(sort $(LOCAL_VARS)),echo $n=$($n);)
Loading