Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ add_library(base64
# library files
lib/lib.c
lib/codec_choose.c
lib/feature_level.c
include/libbase64.h

lib/tables/tables.c
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ OBJS = \
lib/arch/avx/codec.o \
lib/lib.o \
lib/codec_choose.o \
lib/feature_level.o \
lib/tables/tables.o

HAVE_AVX512 = 0
Expand Down
186 changes: 19 additions & 167 deletions lib/codec_choose.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,76 +16,6 @@
#endif
#endif

#ifdef BASE64_X86
#ifdef _MSC_VER
#include <intrin.h>
#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
{ \
int info[4]; \
__cpuidex(info, __level, __count); \
__eax = info[0]; \
__ebx = info[1]; \
__ecx = info[2]; \
__edx = info[3]; \
}
#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
__cpuid_count(__level, 0, __eax, __ebx, __ecx, __edx)
#else
#include <cpuid.h>
#if HAVE_AVX512 || HAVE_AVX2 || HAVE_AVX
#if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3))
static inline uint64_t _xgetbv (uint32_t index)
{
uint32_t eax, edx;
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
return ((uint64_t)edx << 32) | eax;
}
#else
#error "Platform not supported"
#endif
#endif
#endif

#ifndef bit_AVX512vl
#define bit_AVX512vl (1 << 31)
#endif
#ifndef bit_AVX512vbmi
#define bit_AVX512vbmi (1 << 1)
#endif
#ifndef bit_AVX2
#define bit_AVX2 (1 << 5)
#endif
#ifndef bit_SSSE3
#define bit_SSSE3 (1 << 9)
#endif
#ifndef bit_SSE41
#define bit_SSE41 (1 << 19)
#endif
#ifndef bit_SSE42
#define bit_SSE42 (1 << 20)
#endif
#ifndef bit_AVX
#define bit_AVX (1 << 28)
#endif

#define bit_XSAVE_XRSTORE (1 << 27)

#ifndef _XCR_XFEATURE_ENABLED_MASK
#define _XCR_XFEATURE_ENABLED_MASK 0
#endif

#define bit_XMM (1 << 1)
#define bit_YMM (1 << 2)
#define bit_OPMASK (1 << 5)
#define bit_ZMM (1 << 6)
#define bit_HIGH_ZMM (1 << 7)

#define _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS (bit_XMM | bit_YMM)

#define _AVX_512_ENABLED_BY_OS (bit_XMM | bit_YMM | bit_OPMASK | bit_ZMM | bit_HIGH_ZMM)

#endif

// Function declarations:
#define BASE64_CODEC_FUNCS(arch) \
extern void base64_stream_encode_ ## arch BASE64_ENC_PARAMS; \
Expand Down Expand Up @@ -185,113 +115,35 @@ codec_choose_arm (struct codec *codec)
#endif
}

static bool
codec_choose_x86 (struct codec *codec)
{
#ifdef BASE64_X86_SIMD

unsigned int eax, ebx = 0, ecx = 0, edx;
unsigned int max_level;

#ifdef _MSC_VER
int info[4];
__cpuidex(info, 0, 0);
max_level = info[0];
#else
max_level = __get_cpuid_max(0, NULL);
#endif
static const int cpu_feature_flags[X86_FEATURE_LEVEL_COUNT] = {
[X86_FEATURE_LEVEL_NONE] = 0,
[X86_FEATURE_LEVEL_SSSE3] = BASE64_FORCE_SSSE3,
[X86_FEATURE_LEVEL_SSE41] = BASE64_FORCE_SSE41,
[X86_FEATURE_LEVEL_SSE42] = BASE64_FORCE_SSE42,
[X86_FEATURE_LEVEL_AVX] = BASE64_FORCE_AVX,
[X86_FEATURE_LEVEL_AVX2] = BASE64_FORCE_AVX2,
[X86_FEATURE_LEVEL_AVX512] = BASE64_FORCE_AVX512
};

#if HAVE_AVX512 || HAVE_AVX2 || HAVE_AVX
// Check for AVX/AVX2/AVX512 support:
// Checking for AVX requires 3 things:
// 1) CPUID indicates that the OS uses XSAVE and XRSTORE instructions
// (allowing saving YMM registers on context switch)
// 2) CPUID indicates support for AVX
// 3) XGETBV indicates the AVX registers will be saved and restored on
// context switch
//
// Note that XGETBV is only available on 686 or later CPUs, so the
// instruction needs to be conditionally run.
if (max_level >= 1) {
__cpuid_count(1, 0, eax, ebx, ecx, edx);
if (ecx & bit_XSAVE_XRSTORE) {
uint64_t xcr_mask;
xcr_mask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
if ((xcr_mask & _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) == _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) { // check multiple bits at once
#if HAVE_AVX512
if (max_level >= 7 && ((xcr_mask & _AVX_512_ENABLED_BY_OS) == _AVX_512_ENABLED_BY_OS)) {
__cpuid_count(7, 0, eax, ebx, ecx, edx);
if ((ebx & bit_AVX512vl) && (ecx & bit_AVX512vbmi)) {
codec->enc = base64_stream_encode_avx512;
codec->dec = base64_stream_decode_avx512;
return true;
}
}
#endif
#if HAVE_AVX2
if (max_level >= 7) {
__cpuid_count(7, 0, eax, ebx, ecx, edx);
if (ebx & bit_AVX2) {
codec->enc = base64_stream_encode_avx2;
codec->dec = base64_stream_decode_avx2;
return true;
}
}
#endif
#if HAVE_AVX
__cpuid_count(1, 0, eax, ebx, ecx, edx);
if (ecx & bit_AVX) {
codec->enc = base64_stream_encode_avx;
codec->dec = base64_stream_decode_avx;
return true;
}
#endif
}
}
}
#endif

#if HAVE_SSE42
// Check for SSE42 support:
if (max_level >= 1) {
__cpuid(1, eax, ebx, ecx, edx);
if (ecx & bit_SSE42) {
codec->enc = base64_stream_encode_sse42;
codec->dec = base64_stream_decode_sse42;
return true;
}
}
#endif
#endif // BASE64_X86

#if HAVE_SSE41
// Check for SSE41 support:
if (max_level >= 1) {
__cpuid(1, eax, ebx, ecx, edx);
if (ecx & bit_SSE41) {
codec->enc = base64_stream_encode_sse41;
codec->dec = base64_stream_decode_sse41;
return true;
}
}
#endif
static bool
codec_choose_x86 (struct codec *codec)
{
#ifdef BASE64_X86_SIMD
int feature_level;

#if HAVE_SSSE3
// Check for SSSE3 support:
if (max_level >= 1) {
__cpuid(1, eax, ebx, ecx, edx);
if (ecx & bit_SSSE3) {
codec->enc = base64_stream_encode_ssse3;
codec->dec = base64_stream_decode_ssse3;
return true;
}
}
#endif
feature_level = x86_get_cpu_feature_level();
codec_choose_forced(codec, cpu_feature_flags[feature_level]);
return true;

#else
(void)codec;
#endif

return false;
#endif // BASE64_X86_SIMD
}

void
Expand Down
17 changes: 17 additions & 0 deletions lib/codecs.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,20 @@ struct codec
};

extern void codec_choose (struct codec *, int flags);

#if (__x86_64__ || __i386__ || _M_X86 || _M_X64)
enum
{
X86_FEATURE_LEVEL_NONE,
X86_FEATURE_LEVEL_SSSE3,
X86_FEATURE_LEVEL_SSE41,
X86_FEATURE_LEVEL_SSE42,
X86_FEATURE_LEVEL_AVX,
X86_FEATURE_LEVEL_AVX2,
X86_FEATURE_LEVEL_AVX512,

X86_FEATURE_LEVEL_COUNT
};

extern int x86_get_cpu_feature_level (void);
#endif // (__x86_64__ || __i386__ || _M_X86 || _M_X64)
Loading