Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 38 additions & 16 deletions src/geometry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,31 @@

namespace Clockwork::geometry {

// clang-format off
// Offset arrangement is AVX2-specific (due to punpck-ordering).
constexpr std::array<u8, 64> AVX2_OFFSETS{{
0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377,
0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, //
0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, //
0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, //
0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, //
0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, //
0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, //
0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, //
0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377, //
}};
// clang-format on

constexpr std::array<u8, 64> AVX512_OFFSETS{{
0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, //
0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, //
0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, //
0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, //
0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, //
0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, //
0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, //
0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377, //
}};

const std::array<u8x64, 64> SUPERPIECE_INVERSE_RAYS_AVX2_TABLE = []() {
template<std::array<u8, 64> OFFSETS, u8 RAY_OFFSET>
consteval std::array<u8x64, 64> calc_superpiece_inverse_rays_table() {
// clang-format off
constexpr u8 NONE = 0x80;
constexpr std::array<u8, 256> BASE{{
Expand Down Expand Up @@ -46,15 +55,25 @@ const std::array<u8x64, 64> SUPERPIECE_INVERSE_RAYS_AVX2_TABLE = []() {
u8 esq = internal::expand_sq(Square{sq});
std::array<u8, 64> b;
for (usize i = 0; i < 64; i++) {
u8 value = BASE[AVX2_OFFSETS[i] - esq];
u8 value = BASE[OFFSETS[i] - esq];
value = value != NONE ? (value + RAY_OFFSET) % 64 : NONE;
b[i] = value;
}
table[sq] = u8x64{b};
}
return table;
}();
}


const std::array<u8x64, 64> PIECE_MOVES_AVX2_TABLE = []() {
const std::array<u8x64, 64> SUPERPIECE_INVERSE_RAYS_AVX2_TABLE =
calc_superpiece_inverse_rays_table<AVX2_OFFSETS, 0>();
const std::array<u8x64, 64> SUPERPIECE_INVERSE_RAYS_AVX512_TABLE =
calc_superpiece_inverse_rays_table<AVX512_OFFSETS, 0>();
const std::array<u8x64, 64> SUPERPIECE_INVERSE_RAYS_FLIPPED_AVX512_TABLE =
calc_superpiece_inverse_rays_table<AVX512_OFFSETS, 32>();

template<std::array<u8, 64> OFFSETS>
consteval std::array<u8x64, 64> calc_piece_moves_table() {
// clang-format off
constexpr u8 K = 1 << static_cast<i32>(PieceType::King);
constexpr u8 Q = 1 << static_cast<i32>(PieceType::Queen);
Expand Down Expand Up @@ -92,11 +111,14 @@ const std::array<u8x64, 64> PIECE_MOVES_AVX2_TABLE = []() {
u8 esq = internal::expand_sq(Square{sq});
std::array<u8, 64> b;
for (usize i = 0; i < 64; i++) {
b[i] = BASE[AVX2_OFFSETS[i] - esq];
b[i] = BASE[OFFSETS[i] - esq];
}
table[sq] = u8x64{b};
}
return table;
}();
}

const std::array<u8x64, 64> PIECE_MOVES_AVX2_TABLE = calc_piece_moves_table<AVX2_OFFSETS>();
const std::array<u8x64, 64> PIECE_MOVES_AVX512_TABLE = calc_piece_moves_table<AVX512_OFFSETS>();

} // namespace Clockwork::geometry
36 changes: 35 additions & 1 deletion src/geometry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,18 @@ forceinline u8x64 superpiece_inverse_rays_avx2(Square sq) {
return SUPERPIECE_INVERSE_RAYS_AVX2_TABLE[sq.raw];
}

extern const std::array<u8x64, 64> SUPERPIECE_INVERSE_RAYS_AVX512_TABLE;

forceinline u8x64 superpiece_inverse_rays_avx512(Square sq) {
return SUPERPIECE_INVERSE_RAYS_AVX512_TABLE[sq.raw];
}

extern const std::array<u8x64, 64> SUPERPIECE_INVERSE_RAYS_FLIPPED_AVX512_TABLE;

forceinline u8x64 superpiece_inverse_rays_flipped_avx512(Square sq) {
return SUPERPIECE_INVERSE_RAYS_FLIPPED_AVX512_TABLE[sq.raw];
}

extern const std::array<u8x64, 64> PIECE_MOVES_AVX2_TABLE;

forceinline m8x64 piece_moves_avx2(bool color, PieceType ptype, Square sq) {
Expand All @@ -134,6 +146,16 @@ forceinline m8x64 piece_moves_avx2(bool color, PieceType ptype, Square sq) {
return table.test(bit);
}

extern const std::array<u8x64, 64> PIECE_MOVES_AVX512_TABLE;

forceinline m8x64 piece_moves_avx512(bool color, PieceType ptype, Square sq) {
assert(ptype != PieceType::None);
i32 index = ptype == PieceType::Pawn ? color : static_cast<i32>(ptype);
u8x64 bit = u8x64::splat(static_cast<u8>(1 << index));
u8x64 table = PIECE_MOVES_AVX512_TABLE[sq.raw];
return table.test(bit);
}

forceinline u8x64 slider_broadcast(u8x64 x) {
#if LPS_AVX512
constexpr u8 NONE = 0xFF;
Expand Down Expand Up @@ -172,7 +194,19 @@ forceinline u8x64 slider_broadcast(u8x64 x) {
}

forceinline u8x64 lane_broadcast(u8x64 x) {
#if LPS_AVX2
#if LPS_AVX512
u8x64 EXPAND_IDX{{
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, //
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, //
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, //
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, //
0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, //
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, //
0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, //
}};
return EXPAND_IDX.swizzle(u8x64{_mm512_sad_epu8(x.raw, _mm512_setzero_si512())});
#elif LPS_AVX2
u8x64 y;
y.raw[0].raw = _mm256_sad_epu8(x.raw[0].raw, _mm256_setzero_si256());
y.raw[1].raw = _mm256_sad_epu8(x.raw[1].raw, _mm256_setzero_si256());
Expand Down
33 changes: 33 additions & 0 deletions src/movegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,10 +443,21 @@ bool MoveGen::is_hside_castling_legal(Bitboard empty, Bitboard danger) const {
}

void MoveGen::write(MoveList& moves, Square dest, PieceMask piecemask, MoveFlags mf) {
#if LPS_AVX512
moves.unsafe_append([&](Move* data) {
u16x16 vec =
u16x16::splat(Move{Square{0}, dest, mf}.raw)
| u16x16{_mm256_cvtepu8_epi16(m_position.piece_list_sq(m_active_color).to_vector().raw)};
vec = m16x16{piecemask.value()}.compress(vec);
std::memcpy(data, &vec, sizeof(vec));
return piecemask.popcount();
});
#else
for (PieceId id : piecemask) {
Square src = m_position.piece_list_sq(m_active_color)[id];
moves.push_back(Move{src, dest, mf});
}
#endif
}

void MoveGen::write(MoveList& moves,
Expand All @@ -460,10 +471,32 @@ void MoveGen::write(MoveList& moves,
}

void MoveGen::write_pawn(MoveList& moves, Bitboard src_bb, i32 shift, MoveFlags mf) {
#if LPS_AVX512
u16x32 base = []() consteval {
std::array<Move, 32> base;
for (usize i = 0; i < 32; i++) {
Square src{static_cast<u8>(i)};
Square dest{static_cast<u8>(i)};
base[i] = Move{src, dest, static_cast<MoveFlags>(0)};
}
return std::bit_cast<u16x32>(base);
}();
for (int i : {0, 32}) {
moves.unsafe_append([&](Move* data) {
m16x32 mask{static_cast<u32>(src_bb.value() >> i)};
u16x32 vec =
u16x32::splat(static_cast<u16>(i + ((i + shift) << 6) + static_cast<u16>(mf))) + base;
vec = mask.compress(vec);
std::memcpy(data, &vec, sizeof(vec));
return static_cast<usize>(std::popcount(mask.raw));
});
}
#else
for (Square src : src_bb) {
Square dest{static_cast<u8>(src.raw + shift)};
moves.push_back(Move{src, dest, mf});
}
#endif
}

bool MoveGen::is_ep_clearance_pinned(PieceMask ep_attackers_mask) const {
Expand Down
89 changes: 71 additions & 18 deletions src/position.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,22 +175,44 @@ void Position::incrementally_move_piece(
dst_slider_ids = dst_raymask.mask(geometry::flip_rays(dst_slider_ids)); // flip rays
dst_slider_ids |= dst_raymask.mask(u8x64::splat(0x20)); // pack information for efficiency

#if LPS_AVX512
u8x64 src_inv_perm = geometry::superpiece_inverse_rays_avx512(from);
u8x64 dst_inv_perm = geometry::superpiece_inverse_rays_avx512(to);
#else
u8x64 src_inv_perm = geometry::superpiece_inverse_rays_avx2(from);
u8x64 dst_inv_perm = geometry::superpiece_inverse_rays_avx2(to);
#endif

// Transform into board layout
src_slider_ids = src_inv_perm.swizzle(src_slider_ids);
dst_slider_ids = dst_inv_perm.swizzle(dst_slider_ids);

// Recover color information
u8x64 src_col = src_slider_ids.test(u8x64::splat(0x10)).to_vector();
u8x64 dst_col = dst_slider_ids.test(u8x64::splat(0x10)).to_vector();
m8x64 src_col = src_slider_ids.test(u8x64::splat(0x10));
m8x64 dst_col = dst_slider_ids.test(u8x64::splat(0x10));
// Recover ray mask information
m8x64 ret = dst_slider_ids.test(u8x64::splat(0x20));

src_slider_ids &= u8x64::splat(0x0F);
dst_slider_ids &= u8x64::splat(0x0F);

#if LPS_AVX512
u16x64 src_slider_ids2 = std::bit_cast<u16x64>(
std::array{_mm512_cvtepu8_epi16(_mm512_castsi512_si256(src_slider_ids.raw)),
_mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(src_slider_ids.raw, 1))});
u16x64 dst_slider_ids2 = std::bit_cast<u16x64>(
std::array{_mm512_cvtepu8_epi16(_mm512_castsi512_si256(dst_slider_ids.raw)),
_mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(dst_slider_ids.raw, 1))});

u16x64 src_at = m16x64{src_slider_ids.nonzeros().raw}.mask(u16x64::splat(1) << src_slider_ids2);
u16x64 dst_at = m16x64{dst_slider_ids.nonzeros().raw}.mask(u16x64::splat(1) << dst_slider_ids2);

m16x64 src_color{src_col.raw};
m16x64 dst_color{dst_col.raw};

m_attack_table[0].raw ^= (~src_color).mask(src_at) ^ (~dst_color).mask(dst_at);
m_attack_table[1].raw ^= src_color.mask(src_at) ^ dst_color.mask(dst_at);
#else
// AVX2 doesn't have a variable word shift, so were're doing it this way.
// Index zero is invalid here (the king is never a slider), so 0 converts to 0.
static const u8x16 BITS_LO{{0x00, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, //
Expand All @@ -202,10 +224,10 @@ void Position::incrementally_move_piece(
u8x64 dst_at_lo = dst_slider_ids.swizzle(BITS_LO);
u8x64 dst_at_hi = dst_slider_ids.swizzle(BITS_HI);

u8x64 src_color0 = src_col.zip_low_128lanes(src_col);
u8x64 src_color1 = src_col.zip_high_128lanes(src_col);
u8x64 dst_color0 = dst_col.zip_low_128lanes(dst_col);
u8x64 dst_color1 = dst_col.zip_high_128lanes(dst_col);
u8x64 src_color0 = src_col.to_vector().zip_low_128lanes(src_col.to_vector());
u8x64 src_color1 = src_col.to_vector().zip_high_128lanes(src_col.to_vector());
u8x64 dst_color0 = dst_col.to_vector().zip_low_128lanes(dst_col.to_vector());
u8x64 dst_color1 = dst_col.to_vector().zip_high_128lanes(dst_col.to_vector());

u16x64 src_color = std::bit_cast<u16x64>(std::array<u8x64, 2>{src_color0, src_color1});
u16x64 dst_color = std::bit_cast<u16x64>(std::array<u8x64, 2>{dst_color0, dst_color1});
Expand All @@ -220,6 +242,7 @@ void Position::incrementally_move_piece(

m_attack_table[0].raw ^= src_at.andnot(src_color) ^ dst_at.andnot(dst_color);
m_attack_table[1].raw ^= (src_at & src_color) ^ (dst_at & dst_color);
#endif

add_attacks(color, p.id(), to, p.ptype(), ret);
}
Expand All @@ -242,18 +265,34 @@ m8x64 Position::toggle_rays(Square sq) {
slider_ids = raymask.mask(geometry::flip_rays(slider_ids)); // flip rays
slider_ids |= raymask.mask(u8x64::splat(0x20)); // pack information for efficiency

#if LPS_AVX512
u8x64 inv_perm = geometry::superpiece_inverse_rays_avx512(sq);
#else
u8x64 inv_perm = geometry::superpiece_inverse_rays_avx2(sq);
#endif

// Transform into board layout
slider_ids = inv_perm.swizzle(slider_ids);

// Recover color information
u8x64 col = slider_ids.test(u8x64::splat(0x10)).to_vector();
m8x64 col = slider_ids.test(u8x64::splat(0x10));
// Recover ray mask information
m8x64 ret = slider_ids.test(u8x64::splat(0x20));

slider_ids &= u8x64::splat(0x0F);

#if LPS_AVX512
u16x64 slider_ids2 = std::bit_cast<u16x64>(
std::array{_mm512_cvtepu8_epi16(_mm512_castsi512_si256(slider_ids.raw)),
_mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(slider_ids.raw, 1))});

u16x64 at = m16x64{slider_ids.nonzeros().raw}.mask(u16x64::splat(1) << slider_ids2);

m16x64 color{col.raw};

m_attack_table[0].raw ^= (~color).mask(at);
m_attack_table[1].raw ^= color.mask(at);
#else
// AVX2 doesn't have a variable word shift, so were're doing it this way.
// Index zero is invalid here (the king is never a slider), so 0 converts to 0.
static const u8x16 BITS_LO{{0x00, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, //
Expand All @@ -263,8 +302,8 @@ m8x64 Position::toggle_rays(Square sq) {
u8x64 at_lo = slider_ids.swizzle(BITS_LO);
u8x64 at_hi = slider_ids.swizzle(BITS_HI);

u8x64 color0 = col.zip_low_128lanes(col);
u8x64 color1 = col.zip_high_128lanes(col);
u8x64 color0 = col.to_vector().zip_low_128lanes(col.to_vector());
u8x64 color1 = col.to_vector().zip_high_128lanes(col.to_vector());
u16x64 color = std::bit_cast<u16x64>(std::array<u8x64, 2>{color0, color1});

u8x64 at0 = at_lo.zip_low_128lanes(at_hi);
Expand All @@ -273,6 +312,7 @@ m8x64 Position::toggle_rays(Square sq) {

m_attack_table[0].raw ^= at.andnot(color);
m_attack_table[1].raw ^= at & color;
#endif

return ret;
}
Expand All @@ -293,7 +333,11 @@ void Position::add_attacks(bool color, PieceId id, Square sq, PieceType ptype) {
u8x64 ray_places = ray_coords.swizzle(m_board.to_vector());
m8x64 raymask = geometry::superpiece_attacks(ray_places, ray_valid);

u8x64 inv_perm = geometry::superpiece_inverse_rays_avx2(sq);
#if LPS_AVX512
u8x64 inv_perm = geometry::superpiece_inverse_rays_avx512(sq);
#else
u8x64 inv_perm = geometry::superpiece_inverse_rays_avx2(sq);
#endif
m8x64 boardmask = inv_perm.swizzle(raymask);

add_attacks(color, id, sq, ptype, boardmask);
Expand All @@ -303,14 +347,22 @@ void Position::add_attacks(bool color, PieceId id, Square sq, PieceType ptype) {
}

void Position::add_attacks(bool color, PieceId id, Square sq, PieceType ptype, m8x64 mask) {
u8x64 moves = (mask & geometry::piece_moves_avx2(color, ptype, sq)).to_vector();
u16x64 bit = u16x64::splat(id.to_piece_mask().value());

u8x64 m0 = moves.zip_low_128lanes(moves);
u8x64 m1 = moves.zip_high_128lanes(moves);
#if LPS_AVX512
m8x64 moves = mask & geometry::piece_moves_avx512(color, ptype, sq);

m_attack_table[color].raw |= m16x64{moves.raw}.mask(bit);
#else
m8x64 moves = mask & geometry::piece_moves_avx2(color, ptype, sq);
u8x64 moves_vec = moves.to_vector();

u8x64 m0 = moves_vec.zip_low_128lanes(moves_vec);
u8x64 m1 = moves_vec.zip_high_128lanes(moves_vec);
u16x64 m = std::bit_cast<u16x64>(std::array<u8x64, 2>{m0, m1});

u16x64 bit = u16x64::splat(id.to_piece_mask().value());
m_attack_table[color].raw |= m & bit;
#endif
}

template<bool UPDATE_PSQT>
Expand Down Expand Up @@ -523,13 +575,14 @@ std::tuple<Wordboard, Bitboard> Position::calc_pin_mask() const {

// Does this ray have a pinner?
#if LPS_AVX512
m8x64 no_pinner_mask{
std::bit_cast<vm8x64>(std::bit_cast<u64x8>(pinner.to_vector()).zeros().to_vector())
.to_bits()};
const m8x16 has_attacker_vecmask = u8x16{_mm_set1_epi64x(pinner.raw)}.nonzeros();
const m8x64 pinned = m8x64{static_cast<u64>(
_mm_cvtsi128_si64(has_attacker_vecmask.mask(u8x16{_mm_set1_epi64x(maybe_pinned.raw)}).raw))};
#else
m8x64 no_pinner_mask = std::bit_cast<m8x64>(std::bit_cast<m64x8>(pinner).to_vector().zeros());
m8x64 pinned = maybe_pinned.andnot(no_pinner_mask);
#endif
m8x64 pinned = maybe_pinned.andnot(no_pinner_mask);


u8x64 nonmasked_pinned_ids =
geometry::lane_broadcast(pinned.mask(ray_places & u8x64::splat(0xF)));
Expand Down
6 changes: 6 additions & 0 deletions src/util/static_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ class StaticVector {
return res;
}

template<typename F>
void unsafe_append(F&& f) {
usize sz = f(end());
m_len += sz;
}

void append(const StaticVector& other) {
assert(m_len + other.m_len <= cap);
std::uninitialized_copy(other.begin(), other.end(), end());
Expand Down
Loading