From 27c6398d4783f985a6fb23a40a848beab29b0a35 Mon Sep 17 00:00:00 2001 From: Kim Walisch Date: Wed, 19 Jun 2024 16:26:17 +0200 Subject: [PATCH] Refactor CPUID code --- include/cpu_supports_avx512_bmi2.hpp | 28 +++++++++---- include/cpu_supports_popcnt.hpp | 12 ++++-- include/cpuid.hpp | 62 ++++++++++++++-------------- 3 files changed, 59 insertions(+), 43 deletions(-) diff --git a/include/cpu_supports_avx512_bmi2.hpp b/include/cpu_supports_avx512_bmi2.hpp index 9b812d3c..d650b30c 100644 --- a/include/cpu_supports_avx512_bmi2.hpp +++ b/include/cpu_supports_avx512_bmi2.hpp @@ -17,6 +17,9 @@ #include #endif +// CPUID bits documentation: +// https://en.wikipedia.org/wiki/CPUID + // %ebx bit flags #define bit_BMI2 (1 << 8) #define bit_AVX512F (1 << 16) @@ -39,7 +42,7 @@ inline int get_xcr0() #if defined(_MSC_VER) xcr0 = (int) _xgetbv(0); #else - __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" ); + __asm__ __volatile__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" ); #endif return xcr0; @@ -47,19 +50,21 @@ inline int get_xcr0() inline bool run_cpuid_avx512_bmi2() { - int abcd[4]; + int eax = 1; + int ebx = 0; + int ecx = 0; + int edx = 0; - run_cpuid(1, 0, abcd); + run_cpuid(&eax, &ebx, &ecx, &edx); int osxsave_mask = (1 << 27); // Ensure OS supports extended processor state management - if ((abcd[2] & osxsave_mask) != osxsave_mask) + if ((ecx & osxsave_mask) != osxsave_mask) return false; int ymm_mask = XSTATE_SSE | XSTATE_YMM; int zmm_mask = XSTATE_SSE | XSTATE_YMM | XSTATE_ZMM; - int xcr0 = get_xcr0(); // Check AVX OS support @@ -70,14 +75,19 @@ inline bool run_cpuid_avx512_bmi2() if ((xcr0 & zmm_mask) != zmm_mask) return false; - run_cpuid(7, 0, abcd); + eax = 7; + ebx = 0; + ecx = 0; + edx = 0; + + run_cpuid(&eax, &ebx, &ecx, &edx); - if ((abcd[1] & bit_BMI2) != bit_BMI2) + if ((ebx & bit_BMI2) != bit_BMI2) return false; // AVX512F, AVX512VPOPCNTDQ - return ((abcd[1] & bit_AVX512F) == bit_AVX512F && - (abcd[2] & bit_AVX512_VPOPCNTDQ) == bit_AVX512_VPOPCNTDQ); + return ((ebx & bit_AVX512F) == bit_AVX512F && + (ecx & bit_AVX512_VPOPCNTDQ) == bit_AVX512_VPOPCNTDQ); } /// Initialized at startup diff --git a/include/cpu_supports_popcnt.hpp b/include/cpu_supports_popcnt.hpp index a7e5b007..83cef4fa 100644 --- a/include/cpu_supports_popcnt.hpp +++ b/include/cpu_supports_popcnt.hpp @@ -39,12 +39,16 @@ namespace { inline bool run_cpuid_supports_popcnt() { - int abcd[4]; - run_cpuid(1, 0, abcd); + int eax = 1; + int ebx = 0; + int ecx = 0; + int edx = 0; - // %ecx POPCNT bit flag + run_cpuid(&eax, &ebx, &ecx, &edx); + + // https://en.wikipedia.org/wiki/CPUID int bit_POPCNT = 1 << 23; - return (abcd[2] & bit_POPCNT) == bit_POPCNT; + return (ecx & bit_POPCNT) == bit_POPCNT; } /// Initialized at startup diff --git a/include/cpuid.hpp b/include/cpuid.hpp index 87e85b13..632c3dc5 100644 --- a/include/cpuid.hpp +++ b/include/cpuid.hpp @@ -17,40 +17,42 @@ namespace { -inline void run_cpuid(int eax, int ecx, int* abcd) +inline void run_cpuid(int* eax, int* ebx, int* ecx, int* edx) { #if defined(_MSC_VER) - __cpuidex(abcd, eax, ecx); -#else - int ebx = 0; - int edx = 0; - #if defined(__i386__) && \ + int abcd[4]; + __cpuidex(abcd, *eax, *ecx); + + *eax = abcd[0]; + *ebx = abcd[1]; + *ecx = abcd[2]; + *edx = abcd[3]; + +#elif defined(__i386__) && \ defined(__PIC__) - /* in case of PIC under 32-bit EBX cannot be clobbered */ - __asm__ __volatile__ ( - "movl %%ebx, %%edi;" - "cpuid;" - "xchgl %%ebx, %%edi;" - : "=D" (ebx), - "+a" (eax), - "+c" (ecx), - "=d" (edx) - : "memory" - ); - #else - __asm__ __volatile__ ( - "cpuid" - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "0" (eax), "2" (ecx) - : "memory" - ); - #endif - - abcd[0] = eax; - abcd[1] = ebx; - abcd[2] = ecx; - abcd[3] = edx; + + // in case of PIC under 32-bit EBX cannot be clobbered + __asm__ __volatile__ ( + "movl %%ebx, %%edi;" + "cpuid;" + "xchgl %%ebx, %%edi;" + : "=D" (*ebx), + "+a" (*eax), + "+c" (*ecx), + "=d" (*edx) + ); + +#else + + __asm__ __volatile__ ( + "cpuid" + : "+a" (*eax), + "+b" (*ebx), + "+c" (*ecx), + "=d" (*edx) + ); + #endif }