diff --git a/CMakeLists.txt b/CMakeLists.txt index 4d154e4..f9ea663 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,6 +105,7 @@ add_library(base64 # library files lib/lib.c lib/codec_choose.c + lib/feature_level.c include/libbase64.h lib/tables/tables.c diff --git a/Makefile b/Makefile index bba3fde..79d91a1 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,7 @@ OBJS = \ lib/arch/avx/codec.o \ lib/lib.o \ lib/codec_choose.o \ + lib/feature_level.o \ lib/tables/tables.o HAVE_AVX512 = 0 diff --git a/lib/codec_choose.c b/lib/codec_choose.c index 8b5938d..941117e 100644 --- a/lib/codec_choose.c +++ b/lib/codec_choose.c @@ -16,76 +16,6 @@ #endif #endif -#ifdef BASE64_X86 -#ifdef _MSC_VER - #include - #define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \ - { \ - int info[4]; \ - __cpuidex(info, __level, __count); \ - __eax = info[0]; \ - __ebx = info[1]; \ - __ecx = info[2]; \ - __edx = info[3]; \ - } - #define __cpuid(__level, __eax, __ebx, __ecx, __edx) \ - __cpuid_count(__level, 0, __eax, __ebx, __ecx, __edx) -#else - #include - #if HAVE_AVX512 || HAVE_AVX2 || HAVE_AVX - #if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3)) - static inline uint64_t _xgetbv (uint32_t index) - { - uint32_t eax, edx; - __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); - return ((uint64_t)edx << 32) | eax; - } - #else - #error "Platform not supported" - #endif - #endif -#endif - -#ifndef bit_AVX512vl -#define bit_AVX512vl (1 << 31) -#endif -#ifndef bit_AVX512vbmi -#define bit_AVX512vbmi (1 << 1) -#endif -#ifndef bit_AVX2 -#define bit_AVX2 (1 << 5) -#endif -#ifndef bit_SSSE3 -#define bit_SSSE3 (1 << 9) -#endif -#ifndef bit_SSE41 -#define bit_SSE41 (1 << 19) -#endif -#ifndef bit_SSE42 -#define bit_SSE42 (1 << 20) -#endif -#ifndef bit_AVX -#define bit_AVX (1 << 28) -#endif - -#define bit_XSAVE_XRSTORE (1 << 27) - -#ifndef _XCR_XFEATURE_ENABLED_MASK -#define _XCR_XFEATURE_ENABLED_MASK 0 -#endif - -#define bit_XMM (1 << 1) -#define bit_YMM (1 << 2) -#define bit_OPMASK (1 << 5) -#define bit_ZMM (1 << 6) -#define bit_HIGH_ZMM (1 << 7) - -#define _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS (bit_XMM | bit_YMM) - -#define _AVX_512_ENABLED_BY_OS (bit_XMM | bit_YMM | bit_OPMASK | bit_ZMM | bit_HIGH_ZMM) - -#endif - // Function declarations: #define BASE64_CODEC_FUNCS(arch) \ extern void base64_stream_encode_ ## arch BASE64_ENC_PARAMS; \ @@ -185,113 +115,35 @@ codec_choose_arm (struct codec *codec) #endif } -static bool -codec_choose_x86 (struct codec *codec) -{ #ifdef BASE64_X86_SIMD - unsigned int eax, ebx = 0, ecx = 0, edx; - unsigned int max_level; - - #ifdef _MSC_VER - int info[4]; - __cpuidex(info, 0, 0); - max_level = info[0]; - #else - max_level = __get_cpuid_max(0, NULL); - #endif +static const int cpu_feature_flags[X86_FEATURE_LEVEL_COUNT] = { + [X86_FEATURE_LEVEL_NONE] = 0, + [X86_FEATURE_LEVEL_SSSE3] = BASE64_FORCE_SSSE3, + [X86_FEATURE_LEVEL_SSE41] = BASE64_FORCE_SSE41, + [X86_FEATURE_LEVEL_SSE42] = BASE64_FORCE_SSE42, + [X86_FEATURE_LEVEL_AVX] = BASE64_FORCE_AVX, + [X86_FEATURE_LEVEL_AVX2] = BASE64_FORCE_AVX2, + [X86_FEATURE_LEVEL_AVX512] = BASE64_FORCE_AVX512 +}; - #if HAVE_AVX512 || HAVE_AVX2 || HAVE_AVX - // Check for AVX/AVX2/AVX512 support: - // Checking for AVX requires 3 things: - // 1) CPUID indicates that the OS uses XSAVE and XRSTORE instructions - // (allowing saving YMM registers on context switch) - // 2) CPUID indicates support for AVX - // 3) XGETBV indicates the AVX registers will be saved and restored on - // context switch - // - // Note that XGETBV is only available on 686 or later CPUs, so the - // instruction needs to be conditionally run. - if (max_level >= 1) { - __cpuid_count(1, 0, eax, ebx, ecx, edx); - if (ecx & bit_XSAVE_XRSTORE) { - uint64_t xcr_mask; - xcr_mask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); - if ((xcr_mask & _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) == _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) { // check multiple bits at once - #if HAVE_AVX512 - if (max_level >= 7 && ((xcr_mask & _AVX_512_ENABLED_BY_OS) == _AVX_512_ENABLED_BY_OS)) { - __cpuid_count(7, 0, eax, ebx, ecx, edx); - if ((ebx & bit_AVX512vl) && (ecx & bit_AVX512vbmi)) { - codec->enc = base64_stream_encode_avx512; - codec->dec = base64_stream_decode_avx512; - return true; - } - } - #endif - #if HAVE_AVX2 - if (max_level >= 7) { - __cpuid_count(7, 0, eax, ebx, ecx, edx); - if (ebx & bit_AVX2) { - codec->enc = base64_stream_encode_avx2; - codec->dec = base64_stream_decode_avx2; - return true; - } - } - #endif - #if HAVE_AVX - __cpuid_count(1, 0, eax, ebx, ecx, edx); - if (ecx & bit_AVX) { - codec->enc = base64_stream_encode_avx; - codec->dec = base64_stream_decode_avx; - return true; - } - #endif - } - } - } - #endif - - #if HAVE_SSE42 - // Check for SSE42 support: - if (max_level >= 1) { - __cpuid(1, eax, ebx, ecx, edx); - if (ecx & bit_SSE42) { - codec->enc = base64_stream_encode_sse42; - codec->dec = base64_stream_decode_sse42; - return true; - } - } - #endif +#endif // BASE64_X86 - #if HAVE_SSE41 - // Check for SSE41 support: - if (max_level >= 1) { - __cpuid(1, eax, ebx, ecx, edx); - if (ecx & bit_SSE41) { - codec->enc = base64_stream_encode_sse41; - codec->dec = base64_stream_decode_sse41; - return true; - } - } - #endif +static bool +codec_choose_x86 (struct codec *codec) +{ +#ifdef BASE64_X86_SIMD + int feature_level; - #if HAVE_SSSE3 - // Check for SSSE3 support: - if (max_level >= 1) { - __cpuid(1, eax, ebx, ecx, edx); - if (ecx & bit_SSSE3) { - codec->enc = base64_stream_encode_ssse3; - codec->dec = base64_stream_decode_ssse3; - return true; - } - } - #endif + feature_level = x86_get_cpu_feature_level(); + codec_choose_forced(codec, cpu_feature_flags[feature_level]); + return true; #else (void)codec; -#endif return false; +#endif // BASE64_X86_SIMD } void diff --git a/lib/codecs.h b/lib/codecs.h index f6af0e7..61469c6 100644 --- a/lib/codecs.h +++ b/lib/codecs.h @@ -55,3 +55,20 @@ struct codec }; extern void codec_choose (struct codec *, int flags); + +#if (__x86_64__ || __i386__ || _M_X86 || _M_X64) +enum +{ + X86_FEATURE_LEVEL_NONE, + X86_FEATURE_LEVEL_SSSE3, + X86_FEATURE_LEVEL_SSE41, + X86_FEATURE_LEVEL_SSE42, + X86_FEATURE_LEVEL_AVX, + X86_FEATURE_LEVEL_AVX2, + X86_FEATURE_LEVEL_AVX512, + + X86_FEATURE_LEVEL_COUNT +}; + +extern int x86_get_cpu_feature_level (void); +#endif // (__x86_64__ || __i386__ || _M_X86 || _M_X64) diff --git a/lib/feature_level.c b/lib/feature_level.c new file mode 100644 index 0000000..47da0e6 --- /dev/null +++ b/lib/feature_level.c @@ -0,0 +1,183 @@ +#include +#include +#include +#include +#include + +#include "../include/libbase64.h" +#include "codecs.h" +#include "config.h" +#include "env.h" + +#if (__x86_64__ || __i386__ || _M_X86 || _M_X64) + #define BASE64_X86 + #if (HAVE_SSSE3 || HAVE_SSE41 || HAVE_SSE42 || HAVE_AVX || HAVE_AVX2 || HAVE_AVX512) + #define BASE64_X86_SIMD + #endif +#endif + +#ifdef BASE64_X86 +#ifdef _MSC_VER + #include + #define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \ + { \ + int info[4]; \ + __cpuidex(info, __level, __count); \ + __eax = info[0]; \ + __ebx = info[1]; \ + __ecx = info[2]; \ + __edx = info[3]; \ + } + #define __cpuid(__level, __eax, __ebx, __ecx, __edx) \ + __cpuid_count(__level, 0, __eax, __ebx, __ecx, __edx) +#else + #include + #if HAVE_AVX512 || HAVE_AVX2 || HAVE_AVX + #if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3)) + static inline uint64_t _xgetbv (uint32_t index) + { + uint32_t eax, edx; + __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); + return ((uint64_t)edx << 32) | eax; + } + #else + #error "Platform not supported" + #endif + #endif +#endif + +#ifndef bit_AVX512vl +#define bit_AVX512vl (1 << 31) +#endif +#ifndef bit_AVX512vbmi +#define bit_AVX512vbmi (1 << 1) +#endif +#ifndef bit_AVX2 +#define bit_AVX2 (1 << 5) +#endif +#ifndef bit_SSSE3 +#define bit_SSSE3 (1 << 9) +#endif +#ifndef bit_SSE41 +#define bit_SSE41 (1 << 19) +#endif +#ifndef bit_SSE42 +#define bit_SSE42 (1 << 20) +#endif +#ifndef bit_AVX +#define bit_AVX (1 << 28) +#endif + +#define bit_XSAVE_XRSTORE (1 << 27) + +#ifndef _XCR_XFEATURE_ENABLED_MASK +#define _XCR_XFEATURE_ENABLED_MASK 0 +#endif + +#define bit_XMM (1 << 1) +#define bit_YMM (1 << 2) +#define bit_OPMASK (1 << 5) +#define bit_ZMM (1 << 6) +#define bit_HIGH_ZMM (1 << 7) + +#define _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS (bit_XMM | bit_YMM) + +#define _AVX_512_ENABLED_BY_OS (bit_XMM | bit_YMM | bit_OPMASK | bit_ZMM | bit_HIGH_ZMM) + +#endif + +#ifdef BASE64_X86 + +int x86_get_cpu_feature_level (void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int max_level; + +#ifdef _MSC_VER + int info[4]; + __cpuidex(info, 0, 0); + max_level = info[0]; +#else + max_level = __get_cpuid_max(0, NULL); +#endif + + if (max_level >= 1) { + +#if HAVE_AVX512 || HAVE_AVX2 || HAVE_AVX + // Check for AVX/AVX2/AVX512 support: + // Checking for AVX requires 3 things: + // 1) CPUID indicates that the OS uses XSAVE and XRSTORE instructions + // (allowing saving YMM registers on context switch) + // 2) CPUID indicates support for AVX + // 3) XGETBV indicates the AVX registers will be saved and restored on + // context switch + // + // Note that XGETBV is only available on 686 or later CPUs, so the + // instruction needs to be conditionally run. + __cpuid_count(1, 0, eax, ebx, ecx, edx); + if (ecx & bit_XSAVE_XRSTORE) { + uint64_t xcr_mask; + xcr_mask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); + if ((xcr_mask & _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) == _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) { // check multiple bits at once +#if HAVE_AVX512 + if (max_level >= 7 && ((xcr_mask & _AVX_512_ENABLED_BY_OS) == _AVX_512_ENABLED_BY_OS)) { + __cpuid_count(7, 0, eax, ebx, ecx, edx); + if ((ebx & bit_AVX512vl) && (ecx & bit_AVX512vbmi)) { + return X86_FEATURE_LEVEL_AVX512; + } + } +#endif // HAVE_AVX512 + +#if HAVE_AVX2 + if (max_level >= 7) { + __cpuid_count(7, 0, eax, ebx, ecx, edx); + if (ebx & bit_AVX2) { + return X86_FEATURE_LEVEL_AVX2; + } + } +#endif // HAVE_AVX2 + +#if HAVE_AVX + __cpuid_count(1, 0, eax, ebx, ecx, edx); + if (ecx & bit_AVX) { + return X86_FEATURE_LEVEL_AVX; + } +#endif // HAVE_AVX + } + } +#endif // HAVE_AVX512 || HAVE_AVX2 || HAVE_AVX + + __cpuid(1, eax, ebx, ecx, edx); + +#if HAVE_SSE42 + // Check for SSE42 support: + if (ecx & bit_SSE42) { + return X86_FEATURE_LEVEL_SSE42; + } +#endif // HAVE_SSE42 + +#if HAVE_SSE41 + // Check for SSE41 support: + if (ecx & bit_SSE41) { + return X86_FEATURE_LEVEL_SSE41; + } +#endif // HAVE_SSE41 + +#if HAVE_SSSE3 + // Check for SSSE3 support: + if (ecx & bit_SSSE3) { + return X86_FEATURE_LEVEL_SSSE3; + } +#endif // HAVE_SSSE3 + } + + return X86_FEATURE_LEVEL_NONE; +} +#else + +int x86_get_cpu_feature_level (void) +{ + return X86_FEATURE_LEVEL_NONE; +} + +#endif // BASE64_X86 diff --git a/test/Makefile b/test/Makefile index 7ecb893..7dbe4cc 100644 --- a/test/Makefile +++ b/test/Makefile @@ -22,10 +22,13 @@ test: clean test_base64 benchmark valgrind: clean test_base64 valgrind --error-exitcode=2 ./test_base64 -test_base64: test_base64.c codec_supported.o ../lib/libbase64.o +feature_level.o: ../lib/feature_level.c ../lib/config.h + $(CC) $(CFLAGS) -o $@ -c $< + +test_base64: test_base64.c codec_supported.o feature_level.o ../lib/libbase64.o $(CC) $(CFLAGS) -o $@ $^ -benchmark: benchmark.c codec_supported.o ../lib/libbase64.o +benchmark: benchmark.c codec_supported.o feature_level.o ../lib/libbase64.o $(CC) $(CFLAGS) -o $@ $^ $(BENCH_LDFLAGS) ../%: diff --git a/test/codec_supported.c b/test/codec_supported.c index e90d820..5910d8b 100644 --- a/test/codec_supported.c +++ b/test/codec_supported.c @@ -3,6 +3,40 @@ #include #include "../include/libbase64.h" +#include "../lib/codecs.h" + +static int +cpu_has_x86_extension (const char *codec) +{ +#if (__x86_64__ || __i386__ || _M_X86 || _M_X64) + int feature_level; + + feature_level = x86_get_cpu_feature_level(); + + if (strcmp(codec, "SSSE3") == 0) { + return feature_level >= X86_FEATURE_LEVEL_SSSE3; + } + if (strcmp(codec, "SSE41") == 0) { + return feature_level >= X86_FEATURE_LEVEL_SSE41; + } + if (strcmp(codec, "SSE42") == 0) { + return feature_level >= X86_FEATURE_LEVEL_SSE42; + } + if (strcmp(codec, "AVX") == 0) { + return feature_level >= X86_FEATURE_LEVEL_AVX; + } + if (strcmp(codec, "AVX2") == 0) { + return feature_level >= X86_FEATURE_LEVEL_AVX2; + } + if (strcmp(codec, "AVX512") == 0) { + return feature_level >= X86_FEATURE_LEVEL_AVX512; + } + return 0; +#else + (void)codec; + return 0; +#endif +} static char *_codecs[] = { "AVX2" @@ -25,6 +59,12 @@ codec_supported (size_t index) if (index >= (sizeof(_codecs) / sizeof(_codecs[0])) - 1) { return 0; } + + // Early out if CPU extension is not present: + if (!cpu_has_x86_extension(_codecs[index])) { + return 0; + } + // Check if given codec is supported by trying to decode a test string: char *a = "aGVsbG8="; char b[10];