Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
CXX := g++
CXXFLAGS := -std=c++17 -O3 -fPIC -fopenmp
CXXFLAGS := -std=c++17 -O3 -fPIC -march=native -fopenmp
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

增加 -march=native , 啟用 CPU 支援指令集


# Python / pybind11 include flags
PYBIND11_INCLUDES := $(shell python3 -m pybind11 --includes)
Expand Down
29 changes: 26 additions & 3 deletions include/zenann/SimdUtils.h
Original file line number Diff line number Diff line change
@@ -1,16 +1,39 @@
#pragma once
#include <cstddef>
#include <immintrin.h>

namespace zenann {
inline float l2_naive(const float* a,
const float* b,
size_t dim) {
inline float l2_simd(const float* __restrict a,
const float* __restrict b,
size_t dim) {
#if defined(__AVX2__)
const size_t step = 8; // 8 × 32-bit floats
__m256 acc = _mm256_setzero_ps();
size_t i = 0;
for (; i + step - 1 < dim; i += step) {
__m256 va = _mm256_loadu_ps(a + i);
__m256 vb = _mm256_loadu_ps(b + i);
__m256 diff = _mm256_sub_ps(va, vb);
acc = _mm256_fmadd_ps(diff, diff, acc); // acc += diff²
}
float buf[step];
_mm256_storeu_ps(buf, acc);
float d = 0.f;
for (int j = 0; j < step; ++j) d += buf[j];

for (; i < dim; ++i) {
float diff = a[i] - b[i];
d += diff * diff;
}
return d;
#else
float d = 0.f;
for (size_t i = 0; i < dim; ++i) {
float diff = a[i] - b[i];
d += diff * diff;
}
return d;
#endif
Comment on lines +6 to +36
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

L2 計算新增 AVX SIMD 版本,如果不支援 AVX2,則會退回原版

}

}
4 changes: 2 additions & 2 deletions src/IVFFlatIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ SearchResult IVFFlatIndex::search(const Vector& query, size_t k) const {
// Calculate distance from query to all centroids (parallelized)
#pragma omp parallel for schedule(static)
for (size_t c = 0; c < nlist_; ++c) {
float d = l2_naive(query.data(), centroids_[c].data(), dimension_);
float d = l2_simd(query.data(), centroids_[c].data(), dimension_);
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

將既有 l2_naive 替換成新的 l2_simd

cdist[c] = {d, c};
}

Expand All @@ -77,7 +77,7 @@ SearchResult IVFFlatIndex::search(const Vector& query, size_t k) const {

// Search within this cluster's inverted list
for (size_t id : lists_[c]) {
float dist = l2_naive(query.data(), data[id].data(), dimension_);
float dist = l2_simd(query.data(), data[id].data(), dimension_);

if (local.size() < k) {
local.emplace_back(dist, id);
Expand Down