Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion pybind_interface/avx2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,15 @@ if(WIN32)
# Add /O2 to any configuration that is NOT Debug.
# This prevents a conflict with /RTC1 in DEBUG builds.
add_compile_options($<$<NOT:$<CONFIG:Debug>>:/O2>)
else()
elseif(LINUX)
add_compile_options(-mavx2 -mfma -O3 -flto=auto)
execute_process(
COMMAND bash --noprofile -c "grep -qs bmi2 /proc/cpuinfo"
RESULT_VARIABLE _EXIT_CODE
)
if(_EXIT_CODE EQUAL 0)
add_compile_options("-mbmi2")
endif()
endif()

if(APPLE)
Expand Down
34 changes: 22 additions & 12 deletions tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,33 +13,34 @@
# limitations under the License.

# Determine the hardware features available in this CPU.
HAVE_SSE := $(shell grep -qs sse /proc/cpuinfo && echo "true")
HAVE_AVX2 := $(shell grep -qs avx2 /proc/cpuinfo && echo "true")
HAVE_AVX512 := $(shell grep -qs avx512f /proc/cpuinfo && echo "true")
HAVE_BMI2 := $(shell grep -qs bmi2 /proc/cpuinfo && echo "true")
HAVE_SSE := $(shell grep -qs sse /proc/cpuinfo && echo "true")

# Default targets. Always built.
BASIC_FILES := $(shell ls *.cc | egrep -v '_avx|_sse')

# Additional flags and targets for non-CUDA cases.
SSE_FILES =
AVX2_FILES =
AVX512_FILES =
ifneq (,$(HAVE_SSE))
SSE_FLAGS ?= -msse4
SSE_FILES := $(wildcard *_sse_test.cc)
endif
ifneq (,$(HAVE_AVX2))
AVX2_FLAGS ?= -mavx2 -mfma
AVX2_FILES := $(wildcard *_avx_test.cc)
endif
ifneq (,$(HAVE_AVX512))
AVX512_FLAGS ?= -mavx512f -mbmi2
AVX512_FLAGS ?= -mavx512f
AVX512_FILES := $(wildcard *_avx512_test.cc)
endif
ifneq (,$(HAVE_BMI2))
BMI2_FLAGS ?= -mbmi2
endif
ifneq (,$(HAVE_SSE))
SSE_FLAGS ?= -msse4
SSE_FILES := $(wildcard *_sse_test.cc)
endif

CXX_FILES := $(BASIC_FILES) $(SSE_FILES) $(AVX2_FILES) $(AVX512_FILES)
CXX_TARGETS := $(CXX_FILES:%.cc=%.x)
CXXFLAGS := $(CXXFLAGS) $(SSE_FLAGS) $(AVX2_FLAGS) $(AVX512_FLAGS)
CXXFLAGS := $(CXXFLAGS) $(SSE_FLAGS) $(AVX2_FLAGS) $(AVX512_FLAGS) $(BMI2_FLAGS)

CUDA_FILES := $(wildcard *cuda_test.cu)
CUDA_TARGETS := $(CUDA_FILES:%cuda_test.cu=%cuda_test.x)
Expand Down Expand Up @@ -125,8 +126,17 @@ clean:
-rm -f ./*.x ./*.a ./*.so ./*.mod
rm -rf $(GTEST_DIR)/build

LOCAL_VARS = HAVE_SSE HAVE_AVX2 HAVE_AVX512 SSE_FLAGS AVX2_FLAGS $\
AVX512_FLAGS CXXFLAGS CXX_TARGETS TEST_FLAGS
LOCAL_VARS = BASIC_FILES CXX_FILES CXX_TARGETS CXXFLAGS $\
CUDA_FILES CUDA_TARGETS $\
CUSTATEVEC_FILES CUSTATEVEC_FLAGS $\
CUSTATEVECEX_FILES CUSTATEVECEX_FLAGS $\
HAVE_AVX2 AVX2_FILES AVX2_FLAGS $\
HAVE_AVX512 AVX512_FILES AVX512_FLAGS $\
HAVE_BMI2 BMI2_FLAGS $\
HAVE_SSE SSE_FILES SSE_FLAGS $\
HIP_FILES HIP_TARGETS $\
GMOCK_DIR GTEST_DIR $\
TESTFLAGS

.PHONY: print-vars
print-vars: ; @$(foreach n,$(sort $(LOCAL_VARS)),echo $n=$($n);)
Loading