diff --git a/include/exec/detail/numa.hpp b/include/exec/detail/numa.hpp index 26b42f5ac..d1edf64fd 100644 --- a/include/exec/detail/numa.hpp +++ b/include/exec/detail/numa.hpp @@ -16,232 +16,64 @@ */ #pragma once +#include "../../stdexec/__detail/__any.hpp" #include "../../stdexec/__detail/__config.hpp" #include "../scope.hpp" // IWYU pragma: keep -#include // IWYU pragma: keep #include -#include -#include // IWYU pragma: keep #include -#include -#include - -// Work around a bug in the NVHPC compilers prior to version 24.3 -#if STDEXEC_NVHPC() && STDEXEC_NVHPC_VERSION < 24'03 -# define STDEXEC_NUMA_VTABLE_INLINE -#else -# define STDEXEC_NUMA_VTABLE_INLINE inline -#endif +#include // IWYU pragma: keep namespace experimental::execution { namespace _numa { - using _small_t = void *[1]; - - template - using _is_small = STDEXEC::__mbool; - - union _storage - { - _storage() noexcept = default; - - template Ty> - explicit _storage(Ty &&value) - : ptr{new STDEXEC::__decay_t{static_cast(value)}} - {} - - template Ty> - requires(_is_small>::value) - explicit _storage(Ty &&value) noexcept(STDEXEC::__nothrow_decay_copyable) - : buf{} + // NOLINTBEGIN(modernize-use-override) + template + struct _ipolicy + : STDEXEC::__any::__interface_base<_ipolicy, + Base, + STDEXEC::__any::__extends, + sizeof(void *)> + { + using _ipolicy::__interface_base::__interface_base; + + [[nodiscard]] + constexpr virtual auto num_nodes() const noexcept -> std::size_t { - ::new (static_cast(buf)) STDEXEC::__decay_t{static_cast(value)}; + return STDEXEC::__any::__value(*this).num_nodes(); } - void *ptr{}; - char buf[sizeof(_small_t)]; - }; - - struct _vtable - { - auto (*move)(_storage *, _storage *) noexcept -> void; - auto (*copy)(_storage *, _storage const *) -> void; - auto (*destroy)(_storage *) noexcept -> void; - auto (*num_nodes)(_storage const *) noexcept -> std::size_t; - auto (*num_cpus)(_storage const *, int) noexcept -> std::size_t; - auto (*bind_to_node)(_storage const *, int) noexcept -> int; - auto (*thread_index_to_node)(_storage const *, std::size_t) noexcept -> int; - }; - - template - struct _vtable_for - { - // move - static auto _move(_storage *self, _storage *other) noexcept -> void + [[nodiscard]] + constexpr virtual auto num_cpus(int node) const noexcept -> std::size_t { - if constexpr (!_is_small::value) - { - self->ptr = std::exchange(other->ptr, nullptr); - } - else - { - ::new (static_cast(self->buf)) - T{static_cast(*reinterpret_cast(other->buf))}; - } + return STDEXEC::__any::__value(*this).num_cpus(node); } - // copy - static auto _copy(_storage *self, _storage const *other) noexcept -> void + // NOLINTNEXTLINE(modernize-use-nodiscard) + constexpr virtual auto bind_to_node(int node) const noexcept -> int { - if constexpr (!_is_small::value) - { - self->ptr = new T{*static_cast(other->ptr)}; - } - else - { - ::new (static_cast(self->buf)) T{*reinterpret_cast(other->buf)}; - } + return STDEXEC::__any::__value(*this).bind_to_node(node); } - // destroy - static auto _destroy(_storage *self) noexcept -> void + [[nodiscard]] + constexpr virtual auto thread_index_to_node(std::size_t index) const noexcept -> int { - if constexpr (!_is_small::value) - { - delete static_cast(self->ptr); - } - else - { - std::destroy_at(reinterpret_cast(self->buf)); - } - } - - // num_nodes - static auto _num_nodes(_storage const *self) noexcept -> std::size_t - { - if constexpr (!_is_small::value) - { - return static_cast(self->ptr)->num_nodes(); - } - else - { - return reinterpret_cast(self->buf)->num_nodes(); - } - } - - // num_cpus - static auto _num_cpus(_storage const *self, int node) noexcept -> std::size_t - { - if constexpr (!_is_small::value) - { - return static_cast(self->ptr)->num_cpus(node); - } - else - { - return reinterpret_cast(self->buf)->num_cpus(node); - } - } - - // bind_to_node - static auto _bind_to_node(_storage const *self, int node) noexcept -> int - { - if constexpr (!_is_small::value) - { - return static_cast(self->ptr)->bind_to_node(node); - } - else - { - return reinterpret_cast(self->buf)->bind_to_node(node); - } - } - - // thread_index_to_node - static auto _thread_index_to_node(_storage const *self, std::size_t index) noexcept -> int - { - if constexpr (!_is_small::value) - { - return static_cast(self->ptr)->thread_index_to_node(index); - } - else - { - return reinterpret_cast(self->buf)->thread_index_to_node(index); - } + return STDEXEC::__any::__value(*this).thread_index_to_node(index); } }; - - template - STDEXEC_NUMA_VTABLE_INLINE constexpr _vtable _vtable_for_v = { - .move = _vtable_for::_move, - .copy = _vtable_for::_copy, - .destroy = _vtable_for::_destroy, - .num_nodes = _vtable_for::_num_nodes, - .num_cpus = _vtable_for::_num_cpus, - .bind_to_node = _vtable_for::_bind_to_node, - .thread_index_to_node = _vtable_for::_thread_index_to_node}; + // NOLINTEND(modernize-use-override) } // namespace _numa - struct numa_policy + struct numa_policy final : STDEXEC::__any::__any { - private: - _numa::_vtable const *vtable_; - _numa::_storage storage_; - - public: - template NumaPolicy> - numa_policy(NumaPolicy &&policy) - : vtable_(&_numa::_vtable_for_v>) - , storage_(static_cast(policy)) - {} - - numa_policy(numa_policy &&other) noexcept - : vtable_(other.vtable_) - , storage_{} - { - vtable_->move(&storage_, &other.storage_); - } - - numa_policy(numa_policy const &other) - : vtable_(other.vtable_) - , storage_{} - { - vtable_->copy(&storage_, &other.storage_); - } - - ~numa_policy() - { - vtable_->destroy(&storage_); - } - - [[nodiscard]] - auto num_nodes() const noexcept -> std::size_t - { - return vtable_->num_nodes(&storage_); - } - - [[nodiscard]] - auto num_cpus(int node) const noexcept -> std::size_t - { - return vtable_->num_cpus(&storage_, node); - } - - auto bind_to_node(int node) const noexcept -> int - { // NOLINT(modernize-use-nodiscard) - return vtable_->bind_to_node(&storage_, node); - } - - [[nodiscard]] - auto thread_index_to_node(std::size_t index) const noexcept -> int - { - return vtable_->thread_index_to_node(&storage_, index); - } + using numa_policy::__any::__any; }; struct no_numa_policy { [[nodiscard]] - auto num_nodes() const noexcept -> std::size_t + constexpr auto num_nodes() const noexcept -> std::size_t { return 1; } @@ -252,13 +84,13 @@ namespace experimental::execution return std::thread::hardware_concurrency(); } - auto bind_to_node(int) const noexcept -> int - { // NOLINT(modernize-use-nodiscard) + constexpr auto bind_to_node(int) const noexcept -> int // NOLINT(modernize-use-nodiscard) + { return 0; } [[nodiscard]] - auto thread_index_to_node(std::size_t) const noexcept -> int + constexpr auto thread_index_to_node(std::size_t) const noexcept -> int { return 0; } @@ -312,17 +144,19 @@ namespace experimental::execution struct default_numa_policy { + [[nodiscard]] std::size_t num_nodes() const noexcept { return _node_to_thread_index::get().size(); } + [[nodiscard]] std::size_t num_cpus(int node) const noexcept { return exec::_get_numa_num_cpus(node); } - int bind_to_node(int node) const noexcept + int bind_to_node(int node) const noexcept // NOLINT(modernize-use-nodiscard) { struct ::bitmask *nodes = ::numa_allocate_nodemask(); if (!nodes) @@ -335,6 +169,7 @@ namespace experimental::execution return 0; } + [[nodiscard]] int thread_index_to_node(std::size_t idx) const noexcept { auto const &node_to_thread_index = _node_to_thread_index::get(); @@ -370,8 +205,6 @@ namespace experimental::execution : node_(other.node_) {} - int node_; - void *do_allocate(std::size_t n) { return ::numa_alloc_onnode(n, node_); @@ -393,6 +226,12 @@ namespace experimental::execution } friend bool operator==(numa_allocator const &, numa_allocator const &) noexcept = default; + + private: + template + friend struct numa_allocator; + + int node_; }; class nodemask @@ -406,7 +245,6 @@ namespace experimental::execution public: nodemask() noexcept - : mask_{} { ::copy_bitmask_to_nodemask(::numa_no_nodes_ptr, &mask_); } @@ -453,13 +291,14 @@ namespace experimental::execution } private: - ::nodemask_t mask_; + ::nodemask_t mask_{}; }; } // namespace experimental::execution namespace exec = experimental::execution; #else + namespace experimental::execution { using default_numa_policy = no_numa_policy; @@ -470,32 +309,13 @@ namespace experimental::execution } template - struct numa_allocator + struct numa_allocator : std::allocator { - using pointer = T *; - using const_pointer = T const *; - using value_type = T; + constexpr numa_allocator(int) noexcept {} - explicit numa_allocator(int) noexcept {} - - template - explicit numa_allocator(numa_allocator const &) noexcept + template U> + numa_allocator(numa_allocator const &) noexcept {} - - auto allocate(std::size_t n) -> T * - { - std::allocator alloc{}; - return alloc.allocate(n); - } - - void deallocate(T *p, std::size_t n) - { - std::allocator alloc{}; - alloc.deallocate(p, n); - } - - friend auto - operator==(numa_allocator const &, numa_allocator const &) noexcept -> bool = default; }; class nodemask @@ -538,4 +358,4 @@ namespace experimental::execution namespace exec = experimental::execution; -#endif \ No newline at end of file +#endif diff --git a/include/stdexec/__detail/__any.hpp b/include/stdexec/__detail/__any.hpp index 0ffe8f3ae..3598458ba 100644 --- a/include/stdexec/__detail/__any.hpp +++ b/include/stdexec/__detail/__any.hpp @@ -35,6 +35,8 @@ STDEXEC_PRAGMA_PUSH() STDEXEC_PRAGMA_IGNORE_GNU("-Wredundant-consteval-if") +// NOLINTBEGIN(moderize-use-override) + namespace STDEXEC::__any { @@ -182,7 +184,7 @@ namespace STDEXEC::__any class _BaseInterfaces = __extends<>, size_t _BufferSize = __default_buffer_size, size_t _BufferAlignment = alignof(std::max_align_t)> - struct interface; + struct __interface_base; ////////////////////////////////////////////////////////////////////////////////////////// // __interface_cast @@ -394,7 +396,7 @@ namespace STDEXEC::__any private: template