Skip to content

Commit 2fb0e56

Browse files
committed
Try to fix ubuntu fail
1 parent f9ba138 commit 2fb0e56

File tree

1 file changed

+65
-6
lines changed

1 file changed

+65
-6
lines changed

pyfastutil/src/utils/simd/BitonicSort.cpp

Lines changed: 65 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -371,10 +371,39 @@ namespace simd {
371371
* Merge sorted blocks with SIMD optimization, or fallback
372372
* make sure aligned
373373
*/
374-
template <typename T, typename = std::enable_if_t<std::is_same_v<T, int> || std::is_same_v<T, long long>>>
375-
__forceinline void mergeSortedBlocks(std::vector<T, AlignedAllocator<T, 64>> &data, const size_t &blockSize) {
374+
__forceinline void mergeSortedBlocks(std::vector<int, AlignedAllocator<int, 64>> &data, const size_t &blockSize) {
376375
const size_t total = data.size();
377-
auto temp = std::vector<T, AlignedAllocator<T, 64>>(total);
376+
auto temp = std::vector<int, AlignedAllocator<int, 64>>(total);
377+
378+
bool cycle = true;
379+
size_t mid;
380+
size_t right;
381+
for (size_t size = blockSize; size < total; size *= 2) {
382+
size_t left = 0;
383+
prefetchL1(&data + left);
384+
prefetchL1(&temp + left);
385+
for (; left < total; left += 2 * size) {
386+
mid = std::min(left + size - 1, total - 1);
387+
right = std::min(left + 2 * size - 1, total - 1);
388+
if (cycle) {
389+
doSingleMerge(left, mid, right, data, temp);
390+
} else {
391+
doSingleMerge(left, mid, right, temp, data);
392+
}
393+
}
394+
395+
cycle = !cycle;
396+
}
397+
398+
// copy the final result
399+
if (!cycle) {
400+
simdMemCpyAligned(temp.data(), data.data(), temp.size());
401+
}
402+
}
403+
404+
__forceinline void mergeSortedBlocks(std::vector<long long, AlignedAllocator<long long, 64>> &data, const size_t &blockSize) {
405+
const size_t total = data.size();
406+
auto temp = std::vector<long long, AlignedAllocator<long long, 64>>(total);
378407

379408
bool cycle = true;
380409
size_t mid;
@@ -405,11 +434,41 @@ namespace simd {
405434
/**
406435
* Merge sorted blocks with SIMD optimization reversed, or fallback
407436
*/
408-
template <typename T, typename = std::enable_if_t<std::is_same_v<T, int> || std::is_same_v<T, long long>>>
409437
__forceinline void
410-
mergeSortedBlocksReversed(std::vector<T, AlignedAllocator<T, 64>> &data, const size_t &blockSize) {
438+
mergeSortedBlocksReversed(std::vector<int, AlignedAllocator<int, 64>> &data, const size_t &blockSize) {
439+
const size_t total = data.size();
440+
auto temp = std::vector<int, AlignedAllocator<int, 64>>(total);
441+
442+
bool cycle = true;
443+
size_t mid;
444+
size_t right;
445+
for (size_t size = blockSize; size < total; size *= 2) {
446+
size_t left = 0;
447+
prefetchL1(&data + left);
448+
prefetchL1(&temp + left);
449+
for (; left < total; left += 2 * size) {
450+
mid = std::min(left + size - 1, total - 1);
451+
right = std::min(left + 2 * size - 1, total - 1);
452+
if (cycle) {
453+
doSingleMergeReversed(left, mid, right, data, temp);
454+
} else {
455+
doSingleMergeReversed(left, mid, right, temp, data);
456+
}
457+
}
458+
459+
cycle = !cycle;
460+
}
461+
462+
// copy the final result
463+
if (!cycle) {
464+
simdMemCpyAligned(temp.data(), data.data(), temp.size());
465+
}
466+
}
467+
468+
__forceinline void
469+
mergeSortedBlocksReversed(std::vector<long long, AlignedAllocator<long long, 64>> &data, const size_t &blockSize) {
411470
const size_t total = data.size();
412-
auto temp = std::vector<T, AlignedAllocator<T, 64>>(total);
471+
auto temp = std::vector<long long, AlignedAllocator<long long, 64>>(total);
413472

414473
bool cycle = true;
415474
size_t mid;

0 commit comments

Comments
 (0)