Skip to content

Commit 4621276

Browse files
[0.8] [MOD-7049] Change bm dimensions (#503)
[MOD-7049] Change bm dimensions (#502) * Change bm dimnesion to cover: 1. high dimensions 2. low dimensions 3. dimensions common for all types 4. no residual dimension 5. all residual possabiltties for each type. smal fix: add $MODE to ubuntu22 & 20 script * Update tests/benchmark/spaces_benchmarks/bm_spaces.h Co-authored-by: GuyAv46 <47632673+GuyAv46@users.noreply.github.com> * fux comment * fix comment --------- Co-authored-by: GuyAv46 <47632673+GuyAv46@users.noreply.github.com> (cherry picked from commit dbb9d24) Co-authored-by: meiravgri <109056284+meiravgri@users.noreply.github.com>
1 parent 1347f5c commit 4621276

File tree

3 files changed

+41
-28
lines changed

3 files changed

+41
-28
lines changed

.install/ubuntu_20.04.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@ $MODE apt update
1010
$MODE apt-get install -yqq wget gcc-11 g++-11 make clang-format valgrind python3-pip lcov git
1111
$MODE update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 60 --slave /usr/bin/g++ g++ /usr/bin/g++-11
1212
# align gcov version with gcc version
13-
update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-11 60
13+
$MODE update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-11 60
1414
source install_cmake.sh $MODE

.install/ubuntu_22.04.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@ $MODE apt-get update -qq || true
77
$MODE apt-get install -yqq gcc-12 g++-12 git wget build-essential valgrind lcov
88
$MODE update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 60 --slave /usr/bin/g++ g++ /usr/bin/g++-12
99
# align gcov version with gcc version
10-
update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-12 60
10+
$MODE update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-12 60
1111
source install_cmake.sh $MODE

tests/benchmark/spaces_benchmarks/bm_spaces.h

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -41,47 +41,60 @@
4141
} \
4242
}
4343

44-
// dim_opt: number of elements in 512 bits.
45-
// i.e. 512/sizeof(type): FP32 = 16, FP64 = 8, BF16 = 32 ...
46-
// The is the number of elements calculated in each distance function loop,
47-
// regardless of the arch optimization type.
48-
// Run each function for {1, 4, 16} iterations.
49-
#define EXACT_512BIT_PARAMS(dim_opt) RangeMultiplier(4)->Range(dim_opt, 400)
50-
51-
// elem_per_128_bits: dim_opt / 4. FP32 = 4, FP64 = 2, BF16 = 8...
52-
// Dimensions to test 128 bit chunks.
53-
// Run each function at least one full 512 bits iteration + 1/2/3 iterations of 128 bit chunks.
54-
#define EXACT_128BIT_PARAMS(elem_per_128_bits) \
55-
DenseRange(128 + elem_per_128_bits, 128 + 3 * elem_per_128_bits, elem_per_128_bits)
56-
57-
// Run each function at least one full 512 bits iteration + (1 * elements : elem_per_128_bits *
58-
// elements) FP32 = residual = 1,2,3, FP64 = residual = 1, BF16 = residual = 1,2,3,4,5,6,7...
59-
#define RESIDUAL_PARAMS(elem_per_128_bits) DenseRange(128 + 1, 128 + elem_per_128_bits - 1, 1)
60-
6144
#define INITIALIZE_BM(bm_class, type_prefix, arch, metric, bm_name, arch_supported) \
6245
BENCHMARK_DISTANCE_F(bm_class, type_prefix, arch, metric, bm_name, arch_supported) \
6346
BENCHMARK_REGISTER_F(bm_class, type_prefix##_##arch##_##metric##_##bm_name) \
6447
->ArgName("Dimension") \
6548
->Unit(benchmark::kNanosecond)
6649

50+
/**
51+
* A number that is
52+
* 1. divisible by 32 to ensure that we have at least one full 512 bits iteration in all types
53+
* 2. higher than the minimum dimension requires to choose all possible optimizations.
54+
* (currently it's 500 for IP with AVX512_FP16)
55+
*/
56+
static constexpr size_t min_no_res_th_dim = 512;
57+
58+
/**
59+
* @param dim_opt: Number of elements in 512 bits.
60+
*/
61+
62+
/**
63+
* @param dim_opt is also, the smallest dimension to satisfy:
64+
* dim % num_elements_in_512_bits == 0.
65+
* We use it to start this set of BM from the smallest dimension that satisfies the above condition.
66+
* RangeMultiplier(val)->Range(start, end) generates powers of `val` in the range [start, end],
67+
* including `start` and `end`.
68+
*/
6769
#define INITIALIZE_EXACT_512BIT_BM(bm_class, type_prefix, arch, metric, dim_opt, arch_supported) \
6870
INITIALIZE_BM(bm_class, type_prefix, arch, metric, 512_bit_chunks, arch_supported) \
69-
->EXACT_512BIT_PARAMS(dim_opt)
70-
71-
#define INITIALIZE_EXACT_128BIT_BM(bm_class, type_prefix, arch, metric, dim_opt, arch_supported) \
72-
INITIALIZE_BM(bm_class, type_prefix, arch, metric, 128_bit_chunks, arch_supported) \
73-
->EXACT_128BIT_PARAMS(dim_opt / 4)
71+
->RangeMultiplier(4) \
72+
->Range(dim_opt, 1024)
7473

74+
/** for `start` = min_no_res_th_dim (defined above) we run bm for all dimensions
75+
* in the following range: (start, start + 1, start + 2, start + 3, ... start + dim_opt)
76+
* to test all possible residual cases.
77+
*/
78+
static constexpr size_t start = min_no_res_th_dim;
7579
#define INITIALIZE_RESIDUAL_BM(bm_class, type_prefix, arch, metric, dim_opt, arch_supported) \
7680
INITIALIZE_BM(bm_class, type_prefix, arch, metric, residual, arch_supported) \
77-
->RESIDUAL_PARAMS(dim_opt / 4)
81+
->DenseRange(start + 1, start + dim_opt - 1, 1)
7882

83+
/** Test high dim
84+
* This range satisfies at least one full 512 bits iteration in all types.
85+
*/
7986
#define INITIALIZE_HIGH_DIM(bm_class, type_prefix, arch, metric, arch_supported) \
8087
INITIALIZE_BM(bm_class, type_prefix, arch, metric, high_dim, arch_supported) \
8188
->DenseRange(900, 1000, 15)
8289

83-
// Naive algorithms
90+
/** Test low dim
91+
* This range satisfies at least one full 512-bit iteration in all types (160).
92+
*/
93+
#define INITIALIZE_LOW_DIM(bm_class, type_prefix, arch, metric, arch_supported) \
94+
INITIALIZE_BM(bm_class, type_prefix, arch, metric, low_dim, arch_supported) \
95+
->DenseRange(100, 200, 15)
8496

97+
/* Naive algorithms */
8598
#define BENCHMARK_DEFINE_NAIVE(bm_class, type_prefix, metric) \
8699
BENCHMARK_DEFINE_F(bm_class, type_prefix##_NAIVE_##metric) \
87100
(benchmark::State & st) { \
@@ -102,13 +115,13 @@
102115

103116
#define INITIALIZE_BENCHMARKS_SET_L2(bm_class, type_prefix, arch, dim_opt, arch_supported) \
104117
INITIALIZE_HIGH_DIM(bm_class, type_prefix, arch, L2, arch_supported); \
105-
INITIALIZE_EXACT_128BIT_BM(bm_class, type_prefix, arch, L2, dim_opt, arch_supported); \
118+
INITIALIZE_LOW_DIM(bm_class, type_prefix, arch, L2, arch_supported); \
106119
INITIALIZE_EXACT_512BIT_BM(bm_class, type_prefix, arch, L2, dim_opt, arch_supported); \
107120
INITIALIZE_RESIDUAL_BM(bm_class, type_prefix, arch, L2, dim_opt, arch_supported);
108121

109122
#define INITIALIZE_BENCHMARKS_SET_IP(bm_class, type_prefix, arch, dim_opt, arch_supported) \
110123
INITIALIZE_HIGH_DIM(bm_class, type_prefix, arch, IP, arch_supported); \
111-
INITIALIZE_EXACT_128BIT_BM(bm_class, type_prefix, arch, IP, dim_opt, arch_supported); \
124+
INITIALIZE_LOW_DIM(bm_class, type_prefix, arch, IP, arch_supported); \
112125
INITIALIZE_EXACT_512BIT_BM(bm_class, type_prefix, arch, IP, dim_opt, arch_supported); \
113126
INITIALIZE_RESIDUAL_BM(bm_class, type_prefix, arch, IP, dim_opt, arch_supported);
114127

0 commit comments

Comments
 (0)