Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ set(srcs
src/util/bit.hpp
src/util/parse.hpp
src/util/pretty.hpp
src/util/large_pages.hpp
src/util/static_vector.hpp
src/util/types.hpp
src/util/vec/sse2.hpp
Expand Down
5 changes: 3 additions & 2 deletions src/search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "tm.hpp"
#include "tuned.hpp"
#include "uci.hpp"
#include "util/large_pages.hpp"
#include "util/log2.hpp"
#include "util/types.hpp"
#include <algorithm>
Expand Down Expand Up @@ -118,9 +119,9 @@ void Searcher::initialize(size_t thread_count) {
started_barrier = std::make_unique<std::barrier<>>(1 + thread_count);

if (thread_count > 0) {
m_workers.push_back(std::make_unique<Worker>(*this, ThreadType::MAIN));
m_workers.push_back(make_unique_huge_page<Worker>(*this, ThreadType::MAIN));
for (size_t i = 1; i < thread_count; i++) {
m_workers.push_back(std::make_unique<Worker>(*this, ThreadType::SECONDARY));
m_workers.push_back(make_unique_huge_page<Worker>(*this, ThreadType::SECONDARY));
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/search.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ class Searcher {
}

private:
std::vector<std::unique_ptr<Worker>> m_workers;
std::vector<unique_ptr_huge_page<Worker>> m_workers;
};

class alignas(128) Worker {
Expand Down
7 changes: 1 addition & 6 deletions src/tt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,6 @@ TT::TT(size_t mb) :
resize(mb);
}

TT::~TT() {
aligned_free(m_clusters);
}

std::optional<TTData> TT::probe(const Position& pos, i32 ply) const {
size_t idx = mulhi64(pos.get_hash_key(), m_size);
const auto cluster = this->m_clusters[idx].load();
Expand Down Expand Up @@ -161,13 +157,12 @@ void TT::store(const Position& pos,
}

void TT::resize(size_t mb) {
aligned_free(m_clusters);

size_t bytes = mb * 1024 * 1024;
size_t entries = bytes / sizeof(TTClusterMemory);

m_size = entries;
m_clusters = static_cast<TTClusterMemory*>(aligned_alloc(TT_ALIGNMENT, bytes));
m_clusters = make_unique_for_overwrite_huge_page<TTClusterMemory[]>(m_size);
clear();
}

Expand Down
8 changes: 4 additions & 4 deletions src/tt.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include "position.hpp"
#include "util/large_pages.hpp"
#include <array>
#include <atomic>
#include <bit>
Expand Down Expand Up @@ -90,7 +91,6 @@ class TT {
static constexpr u8 AGE_MASK = 0x1F;

TT(size_t mb = DEFAULT_SIZE_MB);
~TT();

std::optional<TTData> probe(const Position& position, i32 ply) const;
void store(const Position& position,
Expand All @@ -107,9 +107,9 @@ class TT {
i32 hashfull() const;

private:
TTClusterMemory* m_clusters;
size_t m_size;
u8 m_age;
unique_ptr_huge_page<TTClusterMemory[]> m_clusters;
size_t m_size;
u8 m_age;
};

} // namespace Clockwork
156 changes: 156 additions & 0 deletions src/util/large_pages.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#pragma once

#include <cstddef>
#include <functional>
#include <iostream>
#include <memory>
#include <mutex>
#include <type_traits>

#ifdef __linux__
#include <sys/mman.h>
#elif defined(_WIN32)
#include <windows.h>
#endif

template<typename T>
using unique_ptr_huge_page =
std::conditional_t<std::is_array_v<T>,
std::unique_ptr<T, std::function<void(std::remove_all_extents_t<T>*)>>,
std::unique_ptr<T, std::function<void(T*)>>>;

template<typename T>
T* allocate_huge_page(std::size_t size) {
constexpr static auto huge_page_size = 2 * 1024 * 1024; // 2MB pages

#ifdef __linux__
size = ((size + huge_page_size - 1) / huge_page_size) * huge_page_size;
T* data = static_cast<T*>(std::aligned_alloc(huge_page_size, size));
if (data) {
madvise(data, size, MADV_HUGEPAGE);
}
return data;
#elif defined(_WIN32)
HANDLE hToken;
TOKEN_PRIVILEGES tp;
LUID luid;

// Get the current process token
if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken)) {
return static_cast<T*>(
VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE));
}

// Get the LUID for the SeLockMemoryPrivilege
if (!LookupPrivilegeValue(nullptr, SE_LOCK_MEMORY_NAME, &luid)) {
CloseHandle(hToken);
return static_cast<T*>(
VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE));
}

// Enable the SeLockMemoryPrivilege
tp.PrivilegeCount = 1;
tp.Privileges[0].Luid = luid;
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;

if (!AdjustTokenPrivileges(hToken, FALSE, &tp, sizeof(tp), nullptr, nullptr)) {
CloseHandle(hToken);
return static_cast<T*>(
VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE));
}

// Even if AdjustTokenPrivileges returns success, must check GetLastError for ERROR_NOT_ALL_ASSIGNED
if (GetLastError() == ERROR_NOT_ALL_ASSIGNED) {
CloseHandle(hToken);
return static_cast<T*>(
VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE));
}

// Get the large page minimum size (typically 2MB on x64 Windows)
SIZE_T largePageMinimum = GetLargePageMinimum();
SIZE_T roundedSize = ((size + largePageMinimum - 1) / largePageMinimum) * largePageMinimum;

// Allocate with MEM_LARGE_PAGES
T* data = static_cast<T*>(VirtualAlloc(
nullptr, roundedSize, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE));

if (!data) {
CloseHandle(hToken);
return static_cast<T*>(
VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE));
}

CloseHandle(hToken);
return data;
#else
// Fallback for other platforms
size = ((size + huge_page_size - 1) / huge_page_size) * huge_page_size;
T* data = static_cast<T*>(std::aligned_alloc(huge_page_size, size));
return data;
#endif
}

template<typename T>
void deallocate_huge_page(T* ptr) {
#ifdef __linux__
std::free(ptr);
#elif defined(_WIN32)
VirtualFree(ptr, 0, MEM_RELEASE);
#else
std::free(ptr);
#endif
}

template<class T, class... Args>
requires(!std::is_array_v<T>)
unique_ptr_huge_page<T> make_unique_huge_page(Args&&... args) {
T* data = allocate_huge_page<T>(sizeof(T));
std::construct_at(data, std::forward<Args>(args)...);
return unique_ptr_huge_page<T>(data, [](T* ptr) {
std::destroy_at(ptr);
deallocate_huge_page(ptr);
});
}

template<class T>
requires std::is_unbounded_array_v<T>
unique_ptr_huge_page<T> make_unique_huge_page(std::size_t n) {
using E = std::remove_all_extents_t<T>;
E* data = allocate_huge_page<E>(n * sizeof(E));
std::uninitialized_value_construct_n(data, n);
return unique_ptr_huge_page<T>(data, [n](E* ptr) {
std::destroy_n(ptr, n);
deallocate_huge_page(ptr);
});
}

template<class T, class... Args>
requires std::is_bounded_array_v<T>
void make_unique_huge_page(Args&&...) = delete;

template<class T>
requires(!std::is_array_v<T>)
unique_ptr_huge_page<T> make_unique_for_overwrite_huge_page() {
T* data = allocate_huge_page<T>(sizeof(T));
new (data) T;
return unique_ptr_huge_page<T>(data, [](T* ptr) {
std::destroy_at(ptr);
deallocate_huge_page(ptr);
});
}

template<class T>
requires std::is_unbounded_array_v<T>
unique_ptr_huge_page<T> make_unique_for_overwrite_huge_page(std::size_t n) {
using E = std::remove_all_extents_t<T>;
E* data = allocate_huge_page<E>(n * sizeof(E));
std::uninitialized_default_construct_n(data, n);
return unique_ptr_huge_page<T>(data, [n](E* ptr) {
std::destroy_n(ptr, n);
deallocate_huge_page(ptr);
});
}

template<class T, class... Args>
requires std::is_bounded_array_v<T>
void make_unique_for_overwrite_huge_page(Args&&...) = delete;
Loading