From 2d5cc3a5e815320448c4b12639601649abf20ebf Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Thu, 13 Nov 2025 15:54:41 -0600
Subject: [PATCH 01/18] most of eigenbuffer move is done

---
 CMakeLists.txt                                |  13 +-
 include/tensorwrapper/buffer/mdbuffer.hpp     |  64 ++
 .../tensorwrapper/forward_declarations.hpp    |  36 +
 include/tensorwrapper/shape/smooth_view.hpp   |   2 +-
 .../tensorwrapper/symmetry/permutation.hpp    |  30 +
 .../tensorwrapper/types/mdbuffer_traits.hpp   |  52 ++
 include/tensorwrapper/types/shape_traits.hpp  |  36 +
 .../backends/eigen/eigen_tensor.hpp           | 194 ++++++
 .../backends/eigen/eigen_tensor_impl.cpp      | 248 +++++++
 .../backends/eigen/eigen_tensor_impl.hpp      | 176 +++++
 .../buffer/detail_/addition_visitor.hpp       |  38 ++
 .../buffer/detail_/eigen_dispatch.hpp         |  60 ++
 .../buffer/detail_/mdbuffer_pimpl.hpp         |  53 ++
 src/tensorwrapper/buffer/mdbuffer.cpp         |  56 ++
 src/tensorwrapper/shape/smooth_view.cpp       |   5 +-
 .../backends/eigen/eigen_tensor_impl.cpp      | 627 ++++++++++++++++++
 .../buffer/detail_/addition_visitor.cpp       |  38 ++
 .../tensorwrapper/symmetry/permutation.cpp    |  22 +
 18 files changed, 1745 insertions(+), 5 deletions(-)
 create mode 100644 include/tensorwrapper/buffer/mdbuffer.hpp
 create mode 100644 include/tensorwrapper/forward_declarations.hpp
 create mode 100644 include/tensorwrapper/types/mdbuffer_traits.hpp
 create mode 100644 include/tensorwrapper/types/shape_traits.hpp
 create mode 100644 src/tensorwrapper/backends/eigen/eigen_tensor.hpp
 create mode 100644 src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
 create mode 100644 src/tensorwrapper/backends/eigen/eigen_tensor_impl.hpp
 create mode 100644 src/tensorwrapper/buffer/detail_/addition_visitor.hpp
 create mode 100644 src/tensorwrapper/buffer/detail_/eigen_dispatch.hpp
 create mode 100644 src/tensorwrapper/buffer/detail_/mdbuffer_pimpl.hpp
 create mode 100644 src/tensorwrapper/buffer/mdbuffer.cpp
 create mode 100644 tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
 create mode 100644 tests/cxx/unit_tests/tensorwrapper/buffer/detail_/addition_visitor.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2ade5882..2395d44f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -20,6 +20,7 @@ include(cmake/get_nwx_cmake.cmake)
 #Sets the version to whatever git thinks it is
 include(get_version_from_git)
 get_version_from_git(tensorwrapper_version "${CMAKE_CURRENT_LIST_DIR}")
+set(CMAKE_CXX_STANDARD 20)
 project(tensorwrapper VERSION "${tensorwrapper_version}" LANGUAGES CXX)
 
 include(nwx_versions)
@@ -83,7 +84,17 @@ cmaize_find_or_build_optional_dependency(
     CMAKE_ARGS BUILD_TESTING=OFF
                ENABLE_EIGEN_SUPPORT=ON
 )
-set(DEPENDENCIES utilities parallelzone Boost::boost eigen sigma)
+
+cmaize_find_or_build_dependency(
+    WeaklyTypedFloat
+    NAME WeaklyTypedFloat
+    URL https://www.github.com/NWChemEx/weaklytypedfloat
+    VERSION master
+    BUILD_TARGET wtf
+    FIND_TARGET wtf::wtf
+)
+
+set(DEPENDENCIES utilities parallelzone Boost::boost eigen sigma wtf)
 
 if("${ENABLE_CUTENSOR}")
     include(cmake/FindcuTENSOR.cmake)
diff --git a/include/tensorwrapper/buffer/mdbuffer.hpp b/include/tensorwrapper/buffer/mdbuffer.hpp
new file mode 100644
index 00000000..72f5c765
--- /dev/null
+++ b/include/tensorwrapper/buffer/mdbuffer.hpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <tensorwrapper/types/mdbuffer_traits.hpp>
+
+namespace tensorwrapper::buffer {
+
+/** @brief A multidimensional (MD) buffer.
+ *
+ *  This class is a dense multidimensional buffer of floating-point values.
+ */
+class MDBuffer {
+private:
+    using traits_type = types::ClassTraits<MDBuffer>;
+
+public:
+    /// Add types to public API
+    ///@{
+    using buffer_type   = typename traits_type::buffer_type;
+    using pimpl_type    = typename traits_type::pimpl_type;
+    using pimpl_pointer = typename traits_type::pimpl_pointer;
+    using rank_type     = typename traits_type::rank_type;
+    using shape_type    = typename traits_type::shape_type;
+    ///@}
+
+    MDBuffer() noexcept;
+
+    template<typename T>
+    MDBuffer(shape_type shape, std::vector<T> elements) {
+        MDBuffer(std::move(shape), buffer_type(std::move(elements)));
+    }
+
+    MDBuffer(shape_type shape, buffer_type buffer);
+
+    rank_type rank() const;
+
+private:
+    explicit MDBuffer(pimpl_pointer pimpl) noexcept;
+
+    bool has_pimpl_() const noexcept;
+
+    void assert_pimpl_() const;
+
+    pimpl_type& pimpl_();
+    const pimpl_type& pimpl_() const;
+
+    pimpl_pointer m_pimpl_;
+};
+
+} // namespace tensorwrapper::buffer
diff --git a/include/tensorwrapper/forward_declarations.hpp b/include/tensorwrapper/forward_declarations.hpp
new file mode 100644
index 00000000..16c51064
--- /dev/null
+++ b/include/tensorwrapper/forward_declarations.hpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+namespace tensorwrapper {
+
+namespace buffer {
+namespace detail_ {
+class MDBufferPIMPL;
+}
+
+class MDBuffer;
+
+} // namespace buffer
+
+namespace shape {
+
+class Smooth;
+
+} // namespace shape
+
+} // namespace tensorwrapper
diff --git a/include/tensorwrapper/shape/smooth_view.hpp b/include/tensorwrapper/shape/smooth_view.hpp
index f26d412e..b65cb7c3 100644
--- a/include/tensorwrapper/shape/smooth_view.hpp
+++ b/include/tensorwrapper/shape/smooth_view.hpp
@@ -198,7 +198,7 @@ class SmoothView {
      *
      *  @throw None No throw guarantee.
      */
-    bool operator==(const SmoothView<const SmoothType>& rhs) const noexcept;
+    bool operator==(const SmoothView& rhs) const noexcept;
 
     /** @brief Is *this different from @p rhs?
      *
diff --git a/include/tensorwrapper/symmetry/permutation.hpp b/include/tensorwrapper/symmetry/permutation.hpp
index e749402b..ca5b349f 100644
--- a/include/tensorwrapper/symmetry/permutation.hpp
+++ b/include/tensorwrapper/symmetry/permutation.hpp
@@ -161,6 +161,36 @@ class Permutation : public Operation {
      */
     mode_index_type size() const noexcept { return m_cycles_.size(); }
 
+    /** @brief Permutes the objects in @p input according to *this.
+     *
+     *  @tparam T The type of a container-like object. It must support size(),
+     *            and operator[].
+     *
+     *  @param[in] input The object to permute.
+     *
+     *  @return A copy of @p input with its elements permuted according to
+     *          *this.
+     *
+     *  @throw std::runtime_error if the size of @p input does not match the
+     *                         rank of *this. Strong throw guarantee.
+     */
+    template<typename T>
+    T apply(T input) const {
+        if(input.size() != m_rank_)
+            throw std::runtime_error(
+              "Input size does not match permutation rank");
+        for(const auto& cycle : m_cycles_) {
+            if(cycle.size() < 2) continue;
+            T buffer = input;
+            for(std::size_t i = 0; i < cycle.size(); ++i) {
+                auto from = cycle[i];
+                auto to   = cycle[(i + 1) % cycle.size()];
+                input[to] = buffer[from];
+            }
+        }
+        return input;
+    }
+
     // -------------------------------------------------------------------------
     // -- Utility methods
     // -------------------------------------------------------------------------
diff --git a/include/tensorwrapper/types/mdbuffer_traits.hpp b/include/tensorwrapper/types/mdbuffer_traits.hpp
new file mode 100644
index 00000000..27c74421
--- /dev/null
+++ b/include/tensorwrapper/types/mdbuffer_traits.hpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <tensorwrapper/forward_declarations.hpp>
+#include <tensorwrapper/types/class_traits.hpp>
+#include <tensorwrapper/types/shape_traits.hpp>
+#include <wtf/wtf.hpp>
+
+namespace tensorwrapper::types {
+
+struct MDBufferTraitsCommon {
+    using value_type        = wtf::fp::Float;
+    using const_reference   = wtf::fp::FloatView<const value_type>;
+    using buffer_type       = wtf::buffer::FloatBuffer;
+    using const_buffer_view = wtf::buffer::BufferView<const value_type>;
+    using shape_type        = shape::Smooth;
+    using rank_type         = typename shape_type::rank_type;
+    using pimpl_type        = tensorwrapper::buffer::detail_::MDBufferPIMPL;
+    using pimpl_pointer     = std::unique_ptr<pimpl_type>;
+};
+
+template<>
+struct ClassTraits<tensorwrapper::buffer::MDBuffer>
+  : public MDBufferTraitsCommon {
+    using reference = wtf::fp::FloatView<value_type>;
+
+    using buffer_view       = wtf::buffer::BufferView<value_type>;
+    using const_buffer_view = wtf::buffer::BufferView<const value_type>;
+};
+
+template<>
+struct ClassTraits<const tensorwrapper::buffer::MDBuffer>
+  : public MDBufferTraitsCommon {
+    using reference   = wtf::fp::FloatView<const value_type>;
+    using buffer_view = wtf::buffer::BufferView<const value_type>;
+};
+
+} // namespace tensorwrapper::types
diff --git a/include/tensorwrapper/types/shape_traits.hpp b/include/tensorwrapper/types/shape_traits.hpp
new file mode 100644
index 00000000..00d7c6dd
--- /dev/null
+++ b/include/tensorwrapper/types/shape_traits.hpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <cstddef>
+#include <tensorwrapper/forward_declarations.hpp>
+#include <tensorwrapper/types/class_traits.hpp>
+
+namespace tensorwrapper::types {
+
+struct ShapeTraitsCommon {
+    using size_type = std::size_t;
+    using rank_type = unsigned short;
+};
+
+template<>
+struct ClassTraits<tensorwrapper::shape::Smooth> : public ShapeTraitsCommon {};
+
+template<>
+struct ClassTraits<const tensorwrapper::shape::Smooth>
+  : public ShapeTraitsCommon {};
+
+} // namespace tensorwrapper::types
diff --git a/src/tensorwrapper/backends/eigen/eigen_tensor.hpp b/src/tensorwrapper/backends/eigen/eigen_tensor.hpp
new file mode 100644
index 00000000..adf6ec43
--- /dev/null
+++ b/src/tensorwrapper/backends/eigen/eigen_tensor.hpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <ostream>
+#include <string>
+#include <tensorwrapper/shape/smooth_view.hpp>
+#include <tensorwrapper/symmetry/permutation.hpp>
+#include <vector>
+
+namespace tensorwrapper::backends::eigen {
+
+/** @brief API for interacting with Eigen's tensor object.
+ *
+ *  @tparam FloatType The floating-point type stored in the tensor.
+ *
+ *  This class defines the API for interacting with Eigen's tensor objects.
+ *  Unfortunately, Eigen's tensor objects are templated on the rank of the
+ *  tensor (and some other stuff) which makes it hard to deal with them
+ *  generically. This class gets the templating down to just the floating-point
+ *  type.
+ *
+ *   N.b. this class wraps Eigen::TensorMap objects, not Eigen::Tensor objects
+ *   so as to avoid needing to transfer the data to Eigen. Idea is these classes
+ *   can be made on-demand since they just wrap pointers.
+ */
+template<typename FloatType>
+class EigenTensor {
+private:
+    /// Type of *this
+    using my_type = EigenTensor<FloatType>;
+
+public:
+    /// Type of an element in *this
+    using value_type = FloatType;
+
+    /// Type of a reference to an element in *this
+    using reference = value_type&;
+
+    /// Type of a read-only reference to an element in *this
+    using const_reference = const value_type&;
+
+    /// Type used to express the shape of *this
+    using shape_type = shape::Smooth;
+
+    /// Type of a view acting like a read-only shape_type
+    using const_shape_reference = shape::SmoothView<const shape::Smooth>;
+
+    /// Type Eigen uses to express tensor rank
+    using eigen_rank_type = unsigned int;
+
+    /// Type used to express sizes and extents
+    using size_type = std::size_t;
+
+    /// Type used toe express multi-dimensional indices
+    using index_vector = std::vector<size_type>;
+
+    /// Type used to express strings
+    using string_type = std::string;
+
+    /// Type used to specify a permutation
+    using permutation_type = symmetry::Permutation;
+
+    /// Type of a read-only reference to an object of permutation_type
+    using const_permutation_reference = const permutation_type&;
+
+    /** @brief Retrieves the rank of the wrapped tensor.
+     *
+     *  @return The rank of the wrapped tensor.
+     */
+    eigen_rank_type rank() const noexcept { return rank_(); }
+
+    size_type size() const noexcept { return size_(); }
+
+    size_type extent(eigen_rank_type i) const {
+        assert(i < rank());
+        return extent_(i);
+    }
+
+    const_reference get_elem(index_vector index) const {
+        assert(index.size() == rank());
+        return get_elem_(std::move(index));
+    }
+
+    void set_elem(index_vector index, value_type new_value) {
+        assert(index.size() == rank());
+        set_elem_(index, new_value);
+    }
+
+    void fill(value_type value) { fill_(std::move(value)); }
+
+    string_type to_string() const { return to_string_(); }
+
+    std::ostream& add_to_stream(std::ostream& os) const {
+        return add_to_stream_(os);
+    }
+
+    void addition_assignment(const_permutation_reference lhs_permute,
+                             const_permutation_reference rhs_permute,
+                             const EigenTensor& lhs, const EigenTensor& rhs) {
+        return addition_assignment_(lhs_permute, rhs_permute, lhs, rhs);
+    }
+
+    void subtraction_assignment(const_permutation_reference lhs_permute,
+                                const_permutation_reference rhs_permute,
+                                const EigenTensor& lhs,
+                                const EigenTensor& rhs) {
+        return subtraction_assignment_(lhs_permute, rhs_permute, lhs, rhs);
+    }
+
+    void hadamard_assignment(const_permutation_reference lhs_permute,
+                             const_permutation_reference rhs_permute,
+                             const EigenTensor& lhs, const EigenTensor& rhs) {
+        return hadamard_assignment_(lhs_permute, rhs_permute, lhs, rhs);
+    }
+
+    void permute_assignment(const_permutation_reference rhs_permute,
+                            const EigenTensor& rhs) {
+        return permute_assignment_(rhs_permute, rhs);
+    }
+
+    void scalar_multiplication(const_permutation_reference rhs_permute,
+                               FloatType scalar, const EigenTensor& rhs) {
+        return scalar_multiplication_(rhs_permute, scalar, rhs);
+    }
+
+    // void contraction_assignment(label_type this_labels, label_type
+    // lhs_labels,
+    //                             label_type rhs_labels,
+    //                             const_shape_reference result_shape,
+    //                             const_pimpl_reference lhs,
+    //                             const_pimpl_reference rhs) {
+    //     contraction_assignment_(std::move(this_labels),
+    //     std::move(lhs_labels),
+    //                             std::move(rhs_labels), result_shape, lhs,
+    //                             rhs);
+    // }
+
+protected:
+    EigenTensor() noexcept = default;
+
+    virtual eigen_rank_type rank_() const noexcept                   = 0;
+    virtual size_type size_() const                                  = 0;
+    virtual size_type extent_(eigen_rank_type i) const               = 0;
+    virtual const_reference get_elem_(index_vector index) const      = 0;
+    virtual void set_elem_(index_vector index, value_type new_value) = 0;
+    virtual void fill_(value_type value)                             = 0;
+    virtual string_type to_string_() const                           = 0;
+    virtual std::ostream& add_to_stream_(std::ostream& os) const     = 0;
+
+    virtual void addition_assignment_(const_permutation_reference lhs_permute,
+                                      const_permutation_reference rhs_permute,
+                                      const EigenTensor& lhs,
+                                      const EigenTensor& rhs) = 0;
+
+    virtual void subtraction_assignment_(
+      const_permutation_reference lhs_permute,
+      const_permutation_reference rhs_permute, const EigenTensor& lhs,
+      const EigenTensor& rhs) = 0;
+
+    virtual void hadamard_assignment_(const_permutation_reference lhs_permute,
+                                      const_permutation_reference rhs_permute,
+                                      const EigenTensor& lhs,
+                                      const EigenTensor& rhs) = 0;
+
+    virtual void permute_assignment_(const_permutation_reference rhs_permute,
+                                     const EigenTensor& rhs) = 0;
+
+    virtual void scalar_multiplication_(const_permutation_reference rhs_permute,
+                                        FloatType scalar,
+                                        const EigenTensor& rhs) = 0;
+
+    // virtual void contraction_assignment_(label_type this_labels,
+    //                                      label_type lhs_labels,
+    //                                      label_type rhs_labels,
+    //                                      const_shape_reference result_shape,
+    //                                      const_pimpl_reference lhs,
+    //                                      const_pimpl_reference rhs)    = 0;
+};
+
+} // namespace tensorwrapper::backends::eigen
diff --git a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
new file mode 100644
index 00000000..53072b01
--- /dev/null
+++ b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
@@ -0,0 +1,248 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// #include "../contraction_planner.hpp"
+#include "eigen_tensor_impl.hpp"
+#include <iomanip>
+#include <sstream>
+
+#ifdef ENABLE_CUTENSOR
+#include "eigen_tensor.cuh"
+#endif
+
+namespace tensorwrapper::backends::eigen {
+
+std::vector<int> to_eigen_permutation(const symmetry::Permutation& perm) {
+    std::vector<int> eigen_perm(perm.rank());
+    std::iota(eigen_perm.begin(), eigen_perm.end(), 0);
+    return perm.apply(std::move(eigen_perm));
+}
+
+#define TPARAMS template<typename FloatType, unsigned int Rank>
+#define EIGEN_TENSOR EigenTensorImpl<FloatType, Rank>
+
+TPARAMS
+auto EIGEN_TENSOR::get_elem_(index_vector index) const -> const_reference {
+    return unwrap_vector_(std::move(index), std::make_index_sequence<Rank>());
+}
+
+TPARAMS
+void EIGEN_TENSOR::set_elem_(index_vector index, value_type new_value) {
+    unwrap_vector_(std::move(index), std::make_index_sequence<Rank>()) =
+      new_value;
+}
+
+TPARAMS
+void EIGEN_TENSOR::fill_(value_type value) {
+    std::fill(m_tensor_.data(), m_tensor_.data() + m_tensor_.size(), value);
+}
+
+TPARAMS
+auto EIGEN_TENSOR::to_string_() const -> string_type {
+    std::stringstream ss;
+    add_to_stream_(ss);
+    return ss.str();
+}
+
+TPARAMS
+std::ostream& EIGEN_TENSOR::add_to_stream_(std::ostream& os) const {
+    os << std::fixed << std::setprecision(16);
+    return os << m_tensor_.format(Eigen::TensorIOFormat::Numpy());
+}
+
+TPARAMS
+void EIGEN_TENSOR::addition_assignment_(const_permutation_reference lhs_permute,
+                                        const_permutation_reference rhs_permute,
+                                        const base_type& lhs,
+                                        const base_type& rhs) {
+    auto lambda = [](auto&& lhs, auto&& rhs) { return lhs + rhs; };
+    element_wise_op_(lambda, lhs_permute, rhs_permute, lhs, rhs);
+}
+
+TPARAMS
+void EIGEN_TENSOR::subtraction_assignment_(
+  const_permutation_reference lhs_permute,
+  const_permutation_reference rhs_permute, const base_type& lhs,
+  const base_type& rhs) {
+    auto lambda = [](auto&& lhs, auto&& rhs) { return lhs - rhs; };
+    element_wise_op_(lambda, lhs_permute, rhs_permute, lhs, rhs);
+}
+
+TPARAMS
+void EIGEN_TENSOR::hadamard_assignment_(const_permutation_reference lhs_permute,
+                                        const_permutation_reference rhs_permute,
+                                        const base_type& lhs,
+                                        const base_type& rhs) {
+    auto lambda = [](auto&& lhs, auto&& rhs) { return lhs * rhs; };
+    element_wise_op_(lambda, lhs_permute, rhs_permute, lhs, rhs);
+}
+
+TPARAMS
+void EIGEN_TENSOR::permute_assignment_(const_permutation_reference rhs_permute,
+                                       const base_type& rhs) {
+    const auto* rhs_down = dynamic_cast<const my_type*>(&rhs);
+
+    if constexpr(Rank <= 1) {
+        m_tensor_ = rhs_down->m_tensor_;
+        return;
+    } else {
+        auto eigen_rhs_permute = to_eigen_permutation(rhs_permute);
+        auto rhs_shuffled      = rhs_down->m_tensor_.shuffle(eigen_rhs_permute);
+        m_tensor_              = rhs_shuffled;
+    }
+}
+
+TPARAMS
+void EIGEN_TENSOR::scalar_multiplication_(
+  const_permutation_reference rhs_permute, FloatType scalar,
+  const base_type& rhs) {
+    const auto* rhs_down = dynamic_cast<const my_type*>(&rhs);
+
+    if constexpr(Rank <= 1) {
+        m_tensor_ = rhs_down->m_tensor_ * scalar;
+        return;
+    } else {
+        auto eigen_rhs_permute = to_eigen_permutation(rhs_permute);
+        auto rhs_shuffled      = rhs_down->m_tensor_.shuffle(eigen_rhs_permute);
+        m_tensor_              = rhs_shuffled * scalar;
+    }
+}
+
+TPARAMS
+template<typename OperationType>
+void EIGEN_TENSOR::element_wise_op_(OperationType op,
+                                    const_permutation_reference lhs_permute,
+                                    const_permutation_reference rhs_permute,
+                                    const base_type& lhs,
+                                    const base_type& rhs) {
+    const auto* lhs_down = dynamic_cast<const my_type*>(&lhs);
+    const auto* rhs_down = dynamic_cast<const my_type*>(&rhs);
+
+    if constexpr(Rank <= 1) {
+        m_tensor_ = op(lhs_down->m_tensor_, rhs_down->m_tensor_);
+        return;
+    } else {
+        auto eigen_lhs_permute = to_eigen_permutation(lhs_permute);
+        auto eigen_rhs_permute = to_eigen_permutation(rhs_permute);
+        auto lhs_shuffled      = lhs_down->m_tensor_.shuffle(eigen_lhs_permute);
+        auto rhs_shuffled      = rhs_down->m_tensor_.shuffle(eigen_rhs_permute);
+        m_tensor_              = op(lhs_shuffled, rhs_shuffled);
+    }
+}
+
+// template<typename TensorType>
+// auto matrix_size(TensorType&& t, std::size_t row_ranks) {
+//     std::size_t nrows = 1;
+//     for(std::size_t i = 0; i < row_ranks; ++i) nrows *= t.extent(i);
+
+//     std::size_t ncols = 1;
+//     const auto rank   = t.rank();
+//     for(std::size_t i = row_ranks; i < rank; ++i) ncols *= t.extent(i);
+//     return std::make_pair(nrows, ncols);
+// }
+
+// TPARAMS
+// void EIGEN_TENSOR::contraction_assignment_(label_type olabels,
+//                                            label_type llabels,
+//                                            label_type rlabels,
+//                                            const_shape_reference
+//                                            result_shape,
+//                                            const_pimpl_reference lhs,
+//                                            const_pimpl_reference rhs) {
+//     ContractionPlanner plan(olabels, llabels, rlabels);
+
+// #ifdef ENABLE_CUTENSOR
+//     // Prepare m_tensor_
+//     m_tensor_ = allocate_from_shape_(result_shape.as_smooth(),
+//                                      std::make_index_sequence<Rank>());
+//     m_tensor_.setZero();
+
+//     // Dispatch to cuTENSOR
+//     cutensor_contraction<my_type>(olabels, llabels, rlabels,
+//     result_shape, lhs,
+//                                   rhs, m_tensor_);
+// #else
+//     auto lt = lhs.clone();
+//     auto rt = rhs.clone();
+//     lt->permute_assignment(plan.lhs_permutation(), llabels, lhs);
+//     rt->permute_assignment(plan.rhs_permutation(), rlabels, rhs);
+
+//     const auto [lrows, lcols] = matrix_size(*lt, plan.lhs_free().size());
+//     const auto [rrows, rcols] = matrix_size(*rt,
+//     plan.rhs_dummy().size());
+
+//     // Work out the types of the matrix amd a map
+//     constexpr auto e_dyn       = ::Eigen::Dynamic;
+//     constexpr auto e_row_major = ::Eigen::RowMajor;
+//     using matrix_t = ::Eigen::Matrix<FloatType, e_dyn, e_dyn,
+//     e_row_major>; using map_t    = ::Eigen::Map<matrix_t>;
+
+//     eigen::data_type<FloatType, 2> buffer(lrows, rcols);
+
+//     map_t lmatrix(lt->get_mutable_data(), lrows, lcols);
+//     map_t rmatrix(rt->get_mutable_data(), rrows, rcols);
+//     map_t omatrix(buffer.data(), lrows, rcols);
+//     omatrix = lmatrix * rmatrix;
+
+//     auto mlabels = plan.result_matrix_labels();
+//     auto oshape  = result_shape(olabels);
+
+//     // oshapes is the final shape, permute it to shape omatrix is
+//     currently in auto temp_shape = result_shape.clone();
+//     temp_shape->permute_assignment(mlabels, oshape);
+//     auto mshape = temp_shape->as_smooth();
+
+//     auto m_to_o = olabels.permutation(mlabels); // N.b. Eigen def is
+//     inverse us
+
+//     std::array<int, Rank> out_size;
+//     std::array<int, Rank> m_to_o_array;
+//     for(std::size_t i = 0; i < Rank; ++i) {
+//         out_size[i]     = mshape.extent(i);
+//         m_to_o_array[i] = m_to_o[i];
+//     }
+
+//     auto tensor = buffer.reshape(out_size);
+//     if constexpr(Rank > 0) {
+//         m_tensor_ = tensor.shuffle(m_to_o_array);
+//     } else {
+//         m_tensor_ = tensor;
+//     }
+// #endif
+//     mark_for_rehash_();
+// }
+
+#undef EIGEN_TENSOR
+#undef TPARAMS
+
+#define DEFINE_EIGEN_TENSOR(TYPE)            \
+    template class EigenTensorImpl<TYPE, 0>; \
+    template class EigenTensorImpl<TYPE, 1>; \
+    template class EigenTensorImpl<TYPE, 2>; \
+    template class EigenTensorImpl<TYPE, 3>; \
+    template class EigenTensorImpl<TYPE, 4>; \
+    template class EigenTensorImpl<TYPE, 5>; \
+    template class EigenTensorImpl<TYPE, 6>; \
+    template class EigenTensorImpl<TYPE, 7>; \
+    template class EigenTensorImpl<TYPE, 8>; \
+    template class EigenTensorImpl<TYPE, 9>; \
+    template class EigenTensorImpl<TYPE, 10>
+
+TW_APPLY_FLOATING_POINT_TYPES(DEFINE_EIGEN_TENSOR);
+
+#undef DEFINE_EIGEN_TENSOR
+
+} // namespace tensorwrapper::backends::eigen
diff --git a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.hpp b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.hpp
new file mode 100644
index 00000000..6b13039c
--- /dev/null
+++ b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.hpp
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include "eigen_tensor.hpp"
+#include <iomanip>
+#include <span>
+#include <sstream>
+#include <tensorwrapper/detail_/integer_utilities.hpp>
+#include <tensorwrapper/types/floating_point.hpp>
+#include <unsupported/Eigen/CXX11/Tensor>
+
+namespace tensorwrapper::backends::eigen {
+
+/// Implements EigenTensor by wrapping eigen::TensorMap
+template<typename FloatType, unsigned int Rank>
+class EigenTensorImpl : public EigenTensor<FloatType> {
+private:
+    /// Type of *this
+    using my_type = EigenTensorImpl<FloatType, Rank>;
+
+    /// Type *this inherits from
+    using base_type = EigenTensor<FloatType>;
+
+public:
+    using eigen_tensor_type = Eigen::Tensor<FloatType, Rank, Eigen::RowMajor>;
+    using eigen_data_type   = Eigen::TensorMap<eigen_tensor_type>;
+    using eigen_reference   = eigen_data_type&;
+    using const_eigen_reference = const eigen_data_type&;
+
+    ///@{
+    using typename base_type::const_permutation_reference;
+    using typename base_type::const_reference;
+    using typename base_type::const_shape_reference;
+    using typename base_type::eigen_rank_type;
+    using typename base_type::index_vector;
+    using typename base_type::reference;
+    using typename base_type::size_type;
+    using typename base_type::string_type;
+    using typename base_type::value_type;
+    ///@}
+
+    EigenTensorImpl(std::span<value_type> data, const_shape_reference shape) :
+      m_tensor_(
+        make_from_shape_(data, shape, std::make_index_sequence<Rank>())) {}
+
+    EigenTensorImpl permute(const_permutation_reference perm) const;
+
+protected:
+    /// Implement rank by returning template parameter
+    eigen_rank_type rank_() const noexcept override { return Rank; }
+
+    /// Calls Eigen's size() method to implement size()
+    size_type size_() const noexcept override { return m_tensor_.size(); }
+
+    /// Calls Eigen's dimension(i) method to implement extent(i)
+    size_type extent_(eigen_rank_type i) const override {
+        return m_tensor_.dimension(i);
+    }
+
+    /// Unwraps index vector into Eigen's operator() to get element
+    const_reference get_elem_(index_vector index) const override;
+
+    /// Unwraps index vector into Eigen's operator() to set element
+    void set_elem_(index_vector index, value_type new_value) override;
+
+    /// Calls std::fill to set the values
+    void fill_(value_type value) override;
+
+    /// Calls add_to_stream_ on a stringstream to implement to_string
+    string_type to_string_() const override;
+
+    /// Relies on Eigen's operator<< to add to stream
+    std::ostream& add_to_stream_(std::ostream& os) const override;
+
+    void addition_assignment_(const_permutation_reference lhs_permute,
+                              const_permutation_reference rhs_permute,
+                              const base_type& lhs,
+                              const base_type& rhs) override;
+
+    void subtraction_assignment_(const_permutation_reference lhs_permute,
+                                 const_permutation_reference rhs_permute,
+                                 const base_type& lhs,
+                                 const base_type& rhs) override;
+
+    void hadamard_assignment_(const_permutation_reference lhs_permute,
+                              const_permutation_reference rhs_permute,
+                              const base_type& lhs,
+                              const base_type& rhs) override;
+
+    void permute_assignment_(const_permutation_reference rhs_permute,
+                             const base_type& rhs) override;
+
+    void scalar_multiplication_(const_permutation_reference rhs_permute,
+                                FloatType scalar,
+                                const base_type& rhs) override;
+
+    // void contraction_assignment_(label_type this_labels, label_type
+    // lhs_labels,
+    //                              label_type rhs_labels,
+    //                              const_shape_reference result_shape,
+    //                              const_pimpl_reference lhs,
+    //                              const_pimpl_reference rhs) override;
+
+    // void permute_assignment_(label_type this_labels, label_type rhs_labels,
+    //                          const_pimpl_reference rhs) override;
+
+    // void scalar_multiplication_(label_type this_labels, label_type
+    // rhs_labels,
+    //                             FloatType scalar,
+    //                             const_pimpl_reference rhs) override;
+
+private:
+    // Code factorization for implementing element-wise operations
+    template<typename OperationType>
+    void element_wise_op_(OperationType op,
+                          const_permutation_reference lhs_permute,
+                          const_permutation_reference rhs_permute,
+                          const base_type& lhs, const base_type& rhs);
+
+    // Handles TMP needed to create an Eigen TensorMap from a Smooth object
+    template<std::size_t... I>
+    auto make_from_shape_(std::span<value_type> data,
+                          const_shape_reference shape,
+                          std::index_sequence<I...>) {
+        return eigen_data_type(data.data(), shape.extent(I)...);
+    }
+
+    // Gets an element from the Eigen Tensor by unwrapping a std::vector
+    template<std::size_t... I>
+    reference unwrap_vector_(index_vector index, std::index_sequence<I...>) {
+        return m_tensor_(tensorwrapper::detail_::to_long(index.at(I))...);
+    }
+
+    // Same as mutable version, but result is read-only
+    template<std::size_t... I>
+    const_reference unwrap_vector_(index_vector index,
+                                   std::index_sequence<I...>) const {
+        return m_tensor_(tensorwrapper::detail_::to_long(index.at(I))...);
+    }
+
+    // The Eigen tensor *this wraps
+    eigen_data_type m_tensor_;
+};
+
+#define DECLARE_EIGEN_TENSOR(TYPE)                  \
+    extern template class EigenTensorImpl<TYPE, 0>; \
+    extern template class EigenTensorImpl<TYPE, 1>; \
+    extern template class EigenTensorImpl<TYPE, 2>; \
+    extern template class EigenTensorImpl<TYPE, 3>; \
+    extern template class EigenTensorImpl<TYPE, 4>; \
+    extern template class EigenTensorImpl<TYPE, 5>; \
+    extern template class EigenTensorImpl<TYPE, 6>; \
+    extern template class EigenTensorImpl<TYPE, 7>; \
+    extern template class EigenTensorImpl<TYPE, 8>; \
+    extern template class EigenTensorImpl<TYPE, 9>; \
+    extern template class EigenTensorImpl<TYPE, 10>
+
+TW_APPLY_FLOATING_POINT_TYPES(DECLARE_EIGEN_TENSOR);
+
+#undef DECLARE_EIGEN_TENSOR
+
+} // namespace tensorwrapper::backends::eigen
diff --git a/src/tensorwrapper/buffer/detail_/addition_visitor.hpp b/src/tensorwrapper/buffer/detail_/addition_visitor.hpp
new file mode 100644
index 00000000..4e021e8a
--- /dev/null
+++ b/src/tensorwrapper/buffer/detail_/addition_visitor.hpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <span>
+
+namespace tensorwrapper::buffer::detail_ {
+
+/** @brief Dispatches to the appropriate backend based on the FP type.
+ *
+ *
+ *
+ */
+class AdditionVisitor {
+public:
+    // AdditionVisitor(shape, permutation, shape, permutation)
+    template<typename LHSType, typename RHSType>
+    void operator()(std::span<LHSType> lhs, std::span<const RHSType> rhs) {
+        // auto lhs_wrapped = backends::eigen::wrap_span(lhs);
+        // auto rhs_wrapped = backends::eigen::wrap_span(rhs);
+        for(std::size_t i = 0; i < lhs.size(); ++i) lhs[i] += rhs[i];
+    }
+};
+
+} // namespace tensorwrapper::buffer::detail_
diff --git a/src/tensorwrapper/buffer/detail_/eigen_dispatch.hpp b/src/tensorwrapper/buffer/detail_/eigen_dispatch.hpp
new file mode 100644
index 00000000..90c26509
--- /dev/null
+++ b/src/tensorwrapper/buffer/detail_/eigen_dispatch.hpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <Eigen/CXX11/Tensor>
+#include <tensorwrapper/shape/smooth.hpp>
+
+namespace tensorwrapper::buffer::detail_ {
+
+constexpr std::size_t MaxEigenRank = 8;
+
+template<typename FloatType, unsigned int Rank>
+using eigen_tensor_type = eigen::Tensor<FloatType, Rank>;
+
+template<typename FloatType, unsigned int Rank>
+using eigen_tensor_map = eigen::TensorMap<eigen_tensor_type<FloatType, Rank>>;
+
+template<typename FloatType, unsigned int Rank>
+auto wrap_tensor(std::span<FloatType> s, const shape::Smooth& shape) {
+    using tensor_type = eigen::Tensor<FloatType, Rank>;
+    using map_type    = eigen::TensorMap<tensor_type>;
+
+    if constexpr(Rank > MaxEigenRank) {
+        static_assert(
+          Rank <= MaxEigenRank,
+          "Eigen tensors of rank > MaxEigenRank are not supported.");
+    } else {
+        if(shape.rank() == Rank) return variant_type(map_type(s));
+    }
+}
+
+template<typename VisitorType, typename FloatType, unsigned int Rank,
+         typename... Args>
+auto eigen_dispatch_impl(VisitorType&& visitor,
+                         eigen::TensorMap<eigen::Tensor<FloatType, Rank>>& A,
+                         Args&&... args) {
+    return visitor(A, std::forward<Args>(args)...);
+}
+
+template<typename VisitorType, typename FloatType, unsigned int Rank,
+         typename... Args>
+auto eigen_tensor_dispatch(std::span<FloatType> s, shape::Smooth shape,
+                           Args&&... args) {
+    using tensor_type = eigen::Tensor<FloatType, Rank>;
+}
+
+} // namespace tensorwrapper::buffer::detail_
diff --git a/src/tensorwrapper/buffer/detail_/mdbuffer_pimpl.hpp b/src/tensorwrapper/buffer/detail_/mdbuffer_pimpl.hpp
new file mode 100644
index 00000000..6f410098
--- /dev/null
+++ b/src/tensorwrapper/buffer/detail_/mdbuffer_pimpl.hpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <tensorwrapper/shape/smooth.hpp>
+#include <tensorwrapper/types/mdbuffer_traits.hpp>
+
+namespace tensorwrapper::buffer::detail_ {
+
+class MDBufferPIMPL {
+public:
+    using parent_type = tensorwrapper::buffer::MDBuffer;
+    using traits_type = tensorwrapper::types::ClassTraits<parent_type>;
+
+    /// Add types to public API
+    ///@{
+    using value_type  = typename traits_type::value_type;
+    using rank_type   = typename traits_type::rank_type;
+    using buffer_type = typename traits_type::buffer_type;
+    using shape_type  = typename traits_type::shape_type;
+    ///@}
+
+    MDBufferPIMPL(shape_type shape, buffer_type buffer) noexcept :
+      m_shape_(std::move(shape)), m_buffer_(std::move(buffer)) {}
+
+    auto& shape() noexcept { return m_shape_; }
+
+    const auto& shape() const noexcept { return m_shape_; }
+
+    auto& buffer() noexcept { return m_buffer_; }
+
+    const auto& buffer() const noexcept { return m_buffer_; }
+
+private:
+    shape_type m_shape_;
+
+    buffer_type m_buffer_;
+};
+
+} // namespace tensorwrapper::buffer::detail_
diff --git a/src/tensorwrapper/buffer/mdbuffer.cpp b/src/tensorwrapper/buffer/mdbuffer.cpp
new file mode 100644
index 00000000..fe92be9c
--- /dev/null
+++ b/src/tensorwrapper/buffer/mdbuffer.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "detail_/addition_visitor.hpp"
+#include "detail_/mdbuffer_pimpl.hpp"
+#include <tensorwrapper/buffer/mdbuffer.hpp>
+#include <tensorwrapper/types/floating_point.hpp>
+
+namespace tensorwrapper::buffer {
+
+MDBuffer::MDBuffer() noexcept : m_pimpl_(nullptr) {}
+
+MDBuffer::MDBuffer(shape_type shape, buffer_type buffer) :
+  MDBuffer(std::make_unique<detail_::MDBufferPIMPL>(std::move(shape),
+                                                    std::move(buffer))) {}
+
+MDBuffer::MDBuffer(pimpl_pointer pimpl) noexcept : m_pimpl_(std::move(pimpl)) {}
+
+auto MDBuffer::rank() const -> rank_type {
+    assert_pimpl_();
+    return m_pimpl_->shape().rank();
+}
+
+bool MDBuffer::has_pimpl_() const noexcept { return m_pimpl_ != nullptr; }
+
+void MDBuffer::assert_pimpl_() const {
+    if(!has_pimpl_()) {
+        throw std::runtime_error(
+          "MDBuffer has no PIMPL. Was it default constructed?");
+    }
+}
+
+auto MDBuffer::pimpl_() -> pimpl_type& {
+    assert_pimpl_();
+    return *m_pimpl_;
+}
+
+auto MDBuffer::pimpl_() const -> const pimpl_type& {
+    assert_pimpl_();
+    return *m_pimpl_;
+}
+
+} // namespace tensorwrapper::buffer
diff --git a/src/tensorwrapper/shape/smooth_view.cpp b/src/tensorwrapper/shape/smooth_view.cpp
index 78aa5aa6..ccdadc3a 100644
--- a/src/tensorwrapper/shape/smooth_view.cpp
+++ b/src/tensorwrapper/shape/smooth_view.cpp
@@ -70,11 +70,10 @@ void SMOOTH_VIEW::swap(SmoothView& rhs) noexcept {
 }
 
 TPARAMS
-bool SMOOTH_VIEW::operator==(
-  const SmoothView<const SmoothType>& rhs) const noexcept {
+bool SMOOTH_VIEW::operator==(const SmoothView<SmoothType>& rhs) const noexcept {
     if(has_pimpl_() != rhs.has_pimpl_()) return false;
     if(!has_pimpl_()) return true;
-    return m_pimpl_->as_const()->are_equal(*rhs.m_pimpl_);
+    return m_pimpl_->are_equal(*rhs.m_pimpl_);
 }
 
 TPARAMS
diff --git a/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp b/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
new file mode 100644
index 00000000..d6dbb9de
--- /dev/null
+++ b/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
@@ -0,0 +1,627 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../../testing/testing.hpp"
+#include <tensorwrapper/backends/eigen/eigen_tensor_impl.hpp>
+
+using namespace tensorwrapper;
+using namespace tensorwrapper::backends::eigen;
+
+TEMPLATE_LIST_TEST_CASE("EigenTensorImpl", "", types::floating_point_types) {
+    using scalar_type      = EigenTensorImpl<TestType, 0>;
+    using vector_type      = EigenTensorImpl<TestType, 1>;
+    using matrix_type      = EigenTensorImpl<TestType, 2>;
+    using tensor3_type     = EigenTensorImpl<TestType, 3>;
+    using tensor4_type     = EigenTensorImpl<TestType, 4>;
+    using permutation_type = typename scalar_type::permutation_type;
+
+    std::vector<TestType> data(16);
+    for(std::size_t i = 0; i < data.size(); ++i)
+        data[i] = static_cast<TestType>(i);
+
+    std::span<TestType> data_span(data.data(), data.size());
+
+    using shape_type = scalar_type::shape_type;
+
+    shape_type scalar_shape({});
+    shape_type vector_shape({16});
+    shape_type matrix_shape({4, 4});
+    shape_type tensor3_shape({2, 2, 4});
+    shape_type tensor4_shape({2, 2, 2, 2});
+
+    scalar_type scalar(data_span, scalar_shape);
+    vector_type vector(data_span, vector_shape);
+    matrix_type matrix(data_span, matrix_shape);
+    tensor3_type tensor3(data_span, tensor3_shape);
+    tensor4_type tensor4(data_span, tensor4_shape);
+
+    using pair_index   = std::pair<std::size_t, std::size_t>;
+    using triple_index = std::tuple<std::size_t, std::size_t, std::size_t>;
+    using quad_index =
+      std::tuple<std::size_t, std::size_t, std::size_t, std::size_t>;
+
+    std::vector<pair_index> matrix_indices;
+    for(std::size_t i = 0; i < 4; ++i)
+        for(std::size_t j = 0; j < 4; ++j) matrix_indices.emplace_back(i, j);
+
+    std::vector<triple_index> tensor3_indices;
+    for(std::size_t i = 0; i < 2; ++i)
+        for(std::size_t j = 0; j < 2; ++j)
+            for(std::size_t k = 0; k < 4; ++k)
+                tensor3_indices.emplace_back(i, j, k);
+
+    std::vector<quad_index> tensor4_indices;
+    for(std::size_t i = 0; i < 2; ++i)
+        for(std::size_t j = 0; j < 2; ++j)
+            for(std::size_t k = 0; k < 2; ++k)
+                for(std::size_t l = 0; l < 2; ++l)
+                    tensor4_indices.emplace_back(i, j, k, l);
+
+    SECTION("rank") {
+        REQUIRE(scalar.rank() == 0);
+        REQUIRE(vector.rank() == 1);
+        REQUIRE(matrix.rank() == 2);
+        REQUIRE(tensor3.rank() == 3);
+        REQUIRE(tensor4.rank() == 4);
+    }
+
+    SECTION("size") {
+        REQUIRE(scalar.size() == 1);
+        REQUIRE(vector.size() == 16);
+        REQUIRE(matrix.size() == 16);
+        REQUIRE(tensor3.size() == 16);
+        REQUIRE(tensor4.size() == 16);
+    }
+
+    SECTION("extent") {
+        REQUIRE(vector.extent(0) == 16);
+
+        REQUIRE(matrix.extent(0) == 4);
+        REQUIRE(matrix.extent(1) == 4);
+
+        REQUIRE(tensor3.extent(0) == 2);
+        REQUIRE(tensor3.extent(1) == 2);
+        REQUIRE(tensor3.extent(2) == 4);
+
+        REQUIRE(tensor4.extent(0) == 2);
+        REQUIRE(tensor4.extent(1) == 2);
+        REQUIRE(tensor4.extent(2) == 2);
+        REQUIRE(tensor4.extent(3) == 2);
+    }
+
+    SECTION("get_elem") {
+        REQUIRE(scalar.get_elem({}) == data[0]);
+
+        REQUIRE(vector.get_elem({0}) == data[0]);
+        REQUIRE(vector.get_elem({15}) == data[15]);
+
+        REQUIRE(matrix.get_elem({0, 0}) == data[0]);
+        REQUIRE(matrix.get_elem({3, 3}) == data[15]);
+
+        REQUIRE(tensor3.get_elem({0, 0, 0}) == data[0]);
+        REQUIRE(tensor3.get_elem({1, 1, 3}) == data[15]);
+
+        REQUIRE(tensor4.get_elem({0, 0, 0, 0}) == data[0]);
+        REQUIRE(tensor4.get_elem({1, 1, 1, 1}) == data[15]);
+    }
+
+    SECTION("set_elem") {
+        TestType corr(42);
+        scalar.set_elem({}, 42);
+        REQUIRE(scalar.get_elem({}) == corr);
+
+        vector.set_elem({5}, 42);
+        REQUIRE(vector.get_elem({5}) == corr);
+
+        matrix.set_elem({2, 2}, 42);
+        REQUIRE(matrix.get_elem({2, 2}) == corr);
+
+        tensor3.set_elem({1, 0, 3}, 42);
+        REQUIRE(tensor3.get_elem({1, 0, 3}) == corr);
+
+        tensor4.set_elem({0, 1, 1, 0}, 42);
+        REQUIRE(tensor4.get_elem({0, 1, 1, 0}) == corr);
+    }
+
+    SECTION("fill") {
+        TestType corr(7);
+        SECTION("scalar") {
+            scalar.fill(corr);
+            REQUIRE(scalar.get_elem({}) == corr);
+        }
+
+        SECTION("vector") {
+            vector.fill(corr);
+            for(std::size_t i = 0; i < vector.size(); ++i)
+                REQUIRE(vector.get_elem({i}) == corr);
+        }
+
+        SECTION("matrix") {
+            matrix.fill(corr);
+            for(std::size_t i = 0; i < matrix.extent(0); ++i)
+                for(std::size_t j = 0; j < matrix.extent(1); ++j)
+                    REQUIRE(matrix.get_elem({i, j}) == corr);
+        }
+
+        SECTION("rank 3 tensor") {
+            tensor3.fill(corr);
+            for(std::size_t i = 0; i < tensor3.extent(0); ++i)
+                for(std::size_t j = 0; j < tensor3.extent(1); ++j)
+                    for(std::size_t k = 0; k < tensor3.extent(2); ++k)
+                        REQUIRE(tensor3.get_elem({i, j, k}) == corr);
+        }
+
+        SECTION("rank 4 tensor") {
+            tensor4.fill(corr);
+            for(std::size_t i = 0; i < tensor4.extent(0); ++i)
+                for(std::size_t j = 0; j < tensor4.extent(1); ++j)
+                    for(std::size_t k = 0; k < tensor4.extent(2); ++k)
+                        for(std::size_t l = 0; l < tensor4.extent(3); ++l)
+                            REQUIRE(tensor4.get_elem({i, j, k, l}) == corr);
+        }
+    }
+
+    SECTION("addition_assignment") {
+        std::vector<TestType> result_data(16, TestType{0});
+        std::span<TestType> result_data_span(result_data.data(),
+                                             result_data.size());
+
+        auto lambda = [](TestType a, TestType b) { return a + b; };
+
+        SECTION("scalar") {
+            permutation_type i(0);
+            scalar_type result(result_data_span, scalar_shape);
+            result.addition_assignment(i, i, scalar, scalar);
+            REQUIRE(result.get_elem({}) == lambda(data[0], data[0]));
+        }
+
+        SECTION("vector") {
+            permutation_type i(1);
+            vector_type result(result_data_span, vector_shape);
+            result.addition_assignment(i, i, vector, vector);
+            for(std::size_t i = 0; i < result.size(); ++i)
+                REQUIRE(result.get_elem({i}) == lambda(data[i], data[i]));
+        }
+
+        SECTION("matrix") {
+            permutation_type i(2);
+            matrix_type result(result_data_span, matrix_shape);
+            result.addition_assignment(i, i, matrix, matrix);
+            for(auto [i, j] : matrix_indices)
+                REQUIRE(result.get_elem({i, j}) ==
+                        lambda(data[i * 4 + j], data[i * 4 + j]));
+
+            permutation_type p{{1, 0}};
+            result.addition_assignment(p, i, matrix, matrix);
+            for(auto [i, j] : matrix_indices)
+                REQUIRE(result.get_elem({i, j}) ==
+                        lambda(data[j * 4 + i], data[i * 4 + j]));
+
+            result.addition_assignment(p, p, matrix, matrix);
+            for(auto [i, j] : matrix_indices)
+                REQUIRE(result.get_elem({i, j}) ==
+                        lambda(data[j * 4 + i], data[j * 4 + i]));
+        }
+
+        SECTION("rank 3 tensor") {
+            permutation_type i(3);
+            tensor3_type result(result_data_span, tensor3_shape);
+            result.addition_assignment(i, i, tensor3, tensor3);
+            for(auto [i, j, k] : tensor3_indices)
+                REQUIRE(
+                  result.get_elem({i, j, k}) ==
+                  lambda(data[i * 8 + j * 4 + k], data[i * 8 + j * 4 + k]));
+
+            permutation_type p({1, 0, 2});
+            result.addition_assignment(p, i, tensor3, tensor3);
+            for(auto [i, j, k] : tensor3_indices)
+                REQUIRE(
+                  result.get_elem({i, j, k}) ==
+                  lambda(data[j * 8 + i * 4 + k], data[i * 8 + j * 4 + k]));
+
+            result.addition_assignment(p, p, tensor3, tensor3);
+            for(auto [i, j, k] : tensor3_indices)
+                REQUIRE(
+                  result.get_elem({i, j, k}) ==
+                  lambda(data[j * 8 + i * 4 + k], data[j * 8 + i * 4 + k]));
+        }
+
+        SECTION("rank 4 tensor") {
+            permutation_type i(4);
+            tensor4_type result(result_data_span, tensor4_shape);
+            result.addition_assignment(i, i, tensor4, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        lambda(data[i * 8 + j * 4 + k * 2 + l],
+                               data[i * 8 + j * 4 + k * 2 + l]));
+
+            permutation_type p({1, 0, 2, 3});
+            result.addition_assignment(p, i, tensor4, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        lambda(data[j * 8 + i * 4 + k * 2 + l],
+                               data[i * 8 + j * 4 + k * 2 + l]));
+
+            permutation_type p1({0, 1, 3, 2});
+            result.addition_assignment(p1, i, tensor4, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        lambda(data[i * 8 + j * 4 + l * 2 + k],
+                               data[i * 8 + j * 4 + k * 2 + l]));
+
+            permutation_type p2({2, 3, 0, 1});
+            result.addition_assignment(p2, p, tensor4, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        lambda(data[k * 8 + l * 4 + i * 2 + j],
+                               data[j * 8 + i * 4 + k * 2 + l]));
+        }
+    }
+
+    SECTION("subtraction_assignment") {
+        std::vector<TestType> result_data(16, TestType{0});
+        std::span<TestType> result_data_span(result_data.data(),
+                                             result_data.size());
+
+        auto lambda = [](TestType a, TestType b) { return a - b; };
+
+        SECTION("scalar") {
+            permutation_type i(0);
+            scalar_type result(result_data_span, scalar_shape);
+            result.subtraction_assignment(i, i, scalar, scalar);
+            REQUIRE(result.get_elem({}) == lambda(data[0], data[0]));
+        }
+
+        SECTION("vector") {
+            permutation_type i(1);
+            vector_type result(result_data_span, vector_shape);
+            result.subtraction_assignment(i, i, vector, vector);
+            for(std::size_t i = 0; i < result.size(); ++i)
+                REQUIRE(result.get_elem({i}) == lambda(data[i], data[i]));
+        }
+
+        SECTION("matrix") {
+            permutation_type i(2);
+            matrix_type result(result_data_span, matrix_shape);
+            result.subtraction_assignment(i, i, matrix, matrix);
+            for(auto [i, j] : matrix_indices)
+                REQUIRE(result.get_elem({i, j}) ==
+                        lambda(data[i * 4 + j], data[i * 4 + j]));
+
+            permutation_type p{{1, 0}};
+            result.subtraction_assignment(p, i, matrix, matrix);
+            for(auto [i, j] : matrix_indices)
+                REQUIRE(result.get_elem({i, j}) ==
+                        lambda(data[j * 4 + i], data[i * 4 + j]));
+
+            result.subtraction_assignment(p, p, matrix, matrix);
+            for(auto [i, j] : matrix_indices)
+                REQUIRE(result.get_elem({i, j}) ==
+                        lambda(data[j * 4 + i], data[j * 4 + i]));
+        }
+
+        SECTION("rank 3 tensor") {
+            permutation_type i(3);
+            tensor3_type result(result_data_span, tensor3_shape);
+            result.subtraction_assignment(i, i, tensor3, tensor3);
+            for(auto [i, j, k] : tensor3_indices)
+                REQUIRE(
+                  result.get_elem({i, j, k}) ==
+                  lambda(data[i * 8 + j * 4 + k], data[i * 8 + j * 4 + k]));
+
+            permutation_type p({1, 0, 2});
+            result.subtraction_assignment(p, i, tensor3, tensor3);
+            for(auto [i, j, k] : tensor3_indices)
+                REQUIRE(
+                  result.get_elem({i, j, k}) ==
+                  lambda(data[j * 8 + i * 4 + k], data[i * 8 + j * 4 + k]));
+
+            result.subtraction_assignment(p, p, tensor3, tensor3);
+            for(auto [i, j, k] : tensor3_indices)
+                REQUIRE(
+                  result.get_elem({i, j, k}) ==
+                  lambda(data[j * 8 + i * 4 + k], data[j * 8 + i * 4 + k]));
+        }
+
+        SECTION("rank 4 tensor") {
+            permutation_type i(4);
+            tensor4_type result(result_data_span, tensor4_shape);
+            result.subtraction_assignment(i, i, tensor4, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        lambda(data[i * 8 + j * 4 + k * 2 + l],
+                               data[i * 8 + j * 4 + k * 2 + l]));
+
+            permutation_type p({1, 0, 2, 3});
+            result.subtraction_assignment(p, i, tensor4, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        lambda(data[j * 8 + i * 4 + k * 2 + l],
+                               data[i * 8 + j * 4 + k * 2 + l]));
+
+            permutation_type p1({0, 1, 3, 2});
+            result.subtraction_assignment(p1, i, tensor4, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        lambda(data[i * 8 + j * 4 + l * 2 + k],
+                               data[i * 8 + j * 4 + k * 2 + l]));
+
+            permutation_type p2({2, 3, 0, 1});
+            result.subtraction_assignment(p2, p, tensor4, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        lambda(data[k * 8 + l * 4 + i * 2 + j],
+                               data[j * 8 + i * 4 + k * 2 + l]));
+        }
+    }
+
+    SECTION("hadamard_assignment") {
+        std::vector<TestType> result_data(16, TestType{0});
+        std::span<TestType> result_data_span(result_data.data(),
+                                             result_data.size());
+
+        auto lambda = [](TestType a, TestType b) { return a * b; };
+
+        SECTION("scalar") {
+            permutation_type i(0);
+            scalar_type result(result_data_span, scalar_shape);
+            result.hadamard_assignment(i, i, scalar, scalar);
+            REQUIRE(result.get_elem({}) == lambda(data[0], data[0]));
+        }
+
+        SECTION("vector") {
+            permutation_type i(1);
+            vector_type result(result_data_span, vector_shape);
+            result.hadamard_assignment(i, i, vector, vector);
+            for(std::size_t i = 0; i < result.size(); ++i)
+                REQUIRE(result.get_elem({i}) == lambda(data[i], data[i]));
+        }
+
+        SECTION("matrix") {
+            permutation_type i(2);
+            matrix_type result(result_data_span, matrix_shape);
+            result.hadamard_assignment(i, i, matrix, matrix);
+            for(auto [i, j] : matrix_indices)
+                REQUIRE(result.get_elem({i, j}) ==
+                        lambda(data[i * 4 + j], data[i * 4 + j]));
+
+            permutation_type p{{1, 0}};
+            result.hadamard_assignment(p, i, matrix, matrix);
+            for(auto [i, j] : matrix_indices)
+                REQUIRE(result.get_elem({i, j}) ==
+                        lambda(data[j * 4 + i], data[i * 4 + j]));
+
+            result.hadamard_assignment(p, p, matrix, matrix);
+            for(auto [i, j] : matrix_indices)
+                REQUIRE(result.get_elem({i, j}) ==
+                        lambda(data[j * 4 + i], data[j * 4 + i]));
+        }
+
+        SECTION("rank 3 tensor") {
+            permutation_type i(3);
+            tensor3_type result(result_data_span, tensor3_shape);
+            result.hadamard_assignment(i, i, tensor3, tensor3);
+            for(auto [i, j, k] : tensor3_indices)
+                REQUIRE(
+                  result.get_elem({i, j, k}) ==
+                  lambda(data[i * 8 + j * 4 + k], data[i * 8 + j * 4 + k]));
+
+            permutation_type p({1, 0, 2});
+            result.hadamard_assignment(p, i, tensor3, tensor3);
+            for(auto [i, j, k] : tensor3_indices)
+                REQUIRE(
+                  result.get_elem({i, j, k}) ==
+                  lambda(data[j * 8 + i * 4 + k], data[i * 8 + j * 4 + k]));
+
+            result.hadamard_assignment(p, p, tensor3, tensor3);
+            for(auto [i, j, k] : tensor3_indices)
+                REQUIRE(
+                  result.get_elem({i, j, k}) ==
+                  lambda(data[j * 8 + i * 4 + k], data[j * 8 + i * 4 + k]));
+        }
+
+        SECTION("rank 4 tensor") {
+            permutation_type i(4);
+            tensor4_type result(result_data_span, tensor4_shape);
+            result.hadamard_assignment(i, i, tensor4, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        lambda(data[i * 8 + j * 4 + k * 2 + l],
+                               data[i * 8 + j * 4 + k * 2 + l]));
+
+            permutation_type p({1, 0, 2, 3});
+            result.hadamard_assignment(p, i, tensor4, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        lambda(data[j * 8 + i * 4 + k * 2 + l],
+                               data[i * 8 + j * 4 + k * 2 + l]));
+
+            permutation_type p1({0, 1, 3, 2});
+            result.hadamard_assignment(p1, i, tensor4, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        lambda(data[i * 8 + j * 4 + l * 2 + k],
+                               data[i * 8 + j * 4 + k * 2 + l]));
+
+            permutation_type p2({2, 3, 0, 1});
+            result.hadamard_assignment(p2, p, tensor4, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        lambda(data[k * 8 + l * 4 + i * 2 + j],
+                               data[j * 8 + i * 4 + k * 2 + l]));
+        }
+    }
+
+    SECTION("permute_assignment") {
+        std::vector<TestType> result_data(16, TestType{0});
+        std::span<TestType> result_data_span(result_data.data(),
+                                             result_data.size());
+
+        SECTION("scalar") {
+            permutation_type identity(0);
+            scalar_type result(result_data_span, scalar_shape);
+            result.permute_assignment(identity, scalar);
+            REQUIRE(result.get_elem({}) == data[0]);
+        }
+
+        SECTION("vector") {
+            permutation_type identity(1);
+            vector_type result(result_data_span, vector_shape);
+            result.permute_assignment(identity, vector);
+            for(std::size_t i = 0; i < result.size(); ++i)
+                REQUIRE(result.get_elem({i}) == data[i]);
+        }
+
+        SECTION("matrix") {
+            permutation_type identity(2);
+            matrix_type result(result_data_span, matrix_shape);
+            result.permute_assignment(identity, matrix);
+            for(auto [i, j] : matrix_indices)
+                REQUIRE(result.get_elem({i, j}) == data[i * 4 + j]);
+
+            permutation_type p{{1, 0}};
+            result.permute_assignment(p, matrix);
+            for(auto [i, j] : matrix_indices)
+                REQUIRE(result.get_elem({i, j}) == data[j * 4 + i]);
+        }
+
+        SECTION("rank 3 tensor") {
+            permutation_type identity(3);
+            tensor3_type result(result_data_span, tensor3_shape);
+            result.permute_assignment(identity, tensor3);
+            for(auto [i, j, k] : tensor3_indices)
+                REQUIRE(result.get_elem({i, j, k}) == data[i * 8 + j * 4 + k]);
+
+            permutation_type p({1, 0, 2});
+            result.permute_assignment(p, tensor3);
+            for(auto [i, j, k] : tensor3_indices)
+                REQUIRE(result.get_elem({i, j, k}) == data[j * 8 + i * 4 + k]);
+        }
+
+        SECTION("rank 4 tensor") {
+            permutation_type identity(4);
+            tensor4_type result(result_data_span, tensor4_shape);
+            result.permute_assignment(identity, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        data[i * 8 + j * 4 + k * 2 + l]);
+
+            permutation_type p({1, 0, 2, 3});
+            result.permute_assignment(p, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        data[j * 8 + i * 4 + k * 2 + l]);
+
+            permutation_type p1({0, 1, 3, 2});
+            result.permute_assignment(p1, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        data[i * 8 + j * 4 + l * 2 + k]);
+
+            permutation_type p2({2, 3, 0, 1});
+            result.permute_assignment(p2, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        data[k * 8 + l * 4 + i * 2 + j]);
+
+            permutation_type p3({3, 2, 1, 0});
+            result.permute_assignment(p3, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        data[l * 8 + k * 4 + j * 2 + i]);
+        }
+    }
+
+    SECTION("scalar_multiplication") {
+        std::vector<TestType> result_data(16, TestType{0});
+        std::span<TestType> result_data_span(result_data.data(),
+                                             result_data.size());
+
+        TestType scalar_value(3);
+
+        SECTION("scalar") {
+            permutation_type i(0);
+            scalar_type result(result_data_span, scalar_shape);
+            result.scalar_multiplication(i, scalar_value, scalar);
+            REQUIRE(result.get_elem({}) == data[0] * scalar_value);
+        }
+
+        SECTION("vector") {
+            permutation_type identity(1);
+            vector_type result(result_data_span, vector_shape);
+            result.scalar_multiplication(identity, scalar_value, vector);
+            for(std::size_t i = 0; i < result.size(); ++i)
+                REQUIRE(result.get_elem({i}) == data[i] * scalar_value);
+        }
+
+        SECTION("matrix") {
+            permutation_type identity(2);
+            matrix_type result(result_data_span, matrix_shape);
+            result.scalar_multiplication(identity, scalar_value, matrix);
+            for(auto [i, j] : matrix_indices)
+                REQUIRE(result.get_elem({i, j}) ==
+                        data[i * 4 + j] * scalar_value);
+
+            permutation_type p{{1, 0}};
+            result.scalar_multiplication(p, scalar_value, matrix);
+            for(auto [i, j] : matrix_indices)
+                REQUIRE(result.get_elem({i, j}) ==
+                        data[j * 4 + i] * scalar_value);
+        }
+
+        SECTION("rank 3 tensor") {
+            permutation_type identity(3);
+            tensor3_type result(result_data_span, tensor3_shape);
+            result.scalar_multiplication(identity, scalar_value, tensor3);
+            for(auto [i, j, k] : tensor3_indices)
+                REQUIRE(result.get_elem({i, j, k}) ==
+                        data[i * 8 + j * 4 + k] * scalar_value);
+
+            permutation_type p({1, 0, 2});
+            result.scalar_multiplication(p, scalar_value, tensor3);
+            for(auto [i, j, k] : tensor3_indices)
+                REQUIRE(result.get_elem({i, j, k}) ==
+                        data[j * 8 + i * 4 + k] * scalar_value);
+        }
+
+        SECTION("rank 4 tensor") {
+            permutation_type identity(4);
+            tensor4_type result(result_data_span, tensor4_shape);
+            result.scalar_multiplication(identity, scalar_value, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        data[i * 8 + j * 4 + k * 2 + l] * scalar_value);
+
+            permutation_type p({1, 0, 2, 3});
+            result.scalar_multiplication(p, scalar_value, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        data[j * 8 + i * 4 + k * 2 + l] * scalar_value);
+
+            permutation_type p1({0, 1, 3, 2});
+            result.scalar_multiplication(p1, scalar_value, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        data[i * 8 + j * 4 + l * 2 + k] * scalar_value);
+
+            permutation_type p2({2, 3, 0, 1});
+            result.scalar_multiplication(p2, scalar_value, tensor4);
+            for(auto [i, j, k, l] : tensor4_indices)
+                REQUIRE(result.get_elem({i, j, k, l}) ==
+                        data[k * 8 + l * 4 + i * 2 + j] * scalar_value);
+        }
+    }
+}
diff --git a/tests/cxx/unit_tests/tensorwrapper/buffer/detail_/addition_visitor.cpp b/tests/cxx/unit_tests/tensorwrapper/buffer/detail_/addition_visitor.cpp
new file mode 100644
index 00000000..d7b46618
--- /dev/null
+++ b/tests/cxx/unit_tests/tensorwrapper/buffer/detail_/addition_visitor.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// #include <tensorwrapper/buffer/detail_/addition_visitor.hpp>
+// #include <tensorwrapper/types/floating_point.hpp>
+
+// using namespace tensorwrapper;
+
+// TEMPLATE_LIST_TEST_CASE("AdditionVisitor", "[buffer][detail_]",
+//                         types::floating_point_types) {
+//     using VisitorType = buffer::detail_::AdditionVisitor;
+
+//     VisitorType visitor;
+
+//     SECTION("vectors") {
+//         std::vector<TestType> lhs{1.0, 2.0, 3.0};
+//         std::vector<TestType> rhs{4.0, 5.0, 6.0};
+
+//         visitor(std::span<TestType>(lhs), std::span<const TestType>(rhs));
+
+//         REQUIRE(lhs[0] == Approx(5.0).epsilon(1e-10));
+//         REQUIRE(lhs[1] == Approx(7.0).epsilon(1e-10));
+//         REQUIRE(lhs[2] == Approx(9.0).epsilon(1e-10));
+//     }
+// }
diff --git a/tests/cxx/unit_tests/tensorwrapper/symmetry/permutation.cpp b/tests/cxx/unit_tests/tensorwrapper/symmetry/permutation.cpp
index a99489e4..e28d4ce2 100644
--- a/tests/cxx/unit_tests/tensorwrapper/symmetry/permutation.cpp
+++ b/tests/cxx/unit_tests/tensorwrapper/symmetry/permutation.cpp
@@ -58,6 +58,12 @@ TEST_CASE("Permutation") {
             REQUIRE(p5.size() == mode_index_type(0));
             REQUIRE(p5.rank() == mode_index_type(5));
 
+            // One cycle and a fix-point via one-line
+            Permutation one_line({1, 0, 2});
+            REQUIRE(one_line.size() == mode_index_type(1));
+            REQUIRE(one_line.rank() == mode_index_type(3));
+            REQUIRE(one_line.at(0) == c01);
+
             // Two cycles via one-line
             Permutation p01_23{1, 0, 3, 2};
             REQUIRE(p01_23.size() == mode_index_type(2));
@@ -139,6 +145,22 @@ TEST_CASE("Permutation") {
         REQUIRE(two_cycles.size() == 2);
     }
 
+    SECTION("apply") {
+        REQUIRE(defaulted.apply(cycle_type{}) == cycle_type{});
+
+        // One cycle (0 1)
+        REQUIRE(one_cycle.apply(cycle_type{0, 1}) == cycle_type{1, 0});
+        REQUIRE(one_cycle.apply(cycle_type{1, 2}) == cycle_type{2, 1});
+
+        // Two cycles (1 3 2)(4 5)
+        REQUIRE(two_cycles.apply(cycle_type{0, 1, 2, 3, 4, 5}) ==
+                cycle_type{0, 2, 3, 1, 5, 4});
+
+        Permutation one_line({1, 0, 2});
+        REQUIRE(one_line.apply(cycle_type{0, 1, 2}) == cycle_type{1, 0, 2});
+        REQUIRE(one_line.apply(cycle_type{2, 3, 4}) == cycle_type{3, 2, 4});
+    }
+
     SECTION("swap") {
         Permutation copy_defaulted(defaulted);
         Permutation copy_one_cycle(one_cycle);

From 1fbb769ce3635e916dcc2708b80fd0f6d4604bf0 Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Fri, 14 Nov 2025 13:16:51 -0600
Subject: [PATCH 02/18] backup

---
 .../backends/cutensor/cuda_tensor.cpp         |  46 +
 .../cutensor/cuda_tensor.cu}                  | 117 ++-
 .../cutensor/cuda_tensor.cuh}                 |  12 +-
 .../backends/cutensor/cuda_tensor.hpp         |  70 ++
 .../cutensor}/cutensor_traits.cuh             |   4 +-
 .../backends/eigen/eigen_tensor.hpp           | 102 +--
 .../backends/eigen/eigen_tensor_impl.cpp      | 315 ++++---
 .../backends/eigen/eigen_tensor_impl.hpp      |  49 +-
 .../buffer/detail_/eigen_tensor.cpp           |  16 +-
 .../backends/eigen/eigen_tensor_impl.cpp      | 827 ++++++++----------
 .../backends/testing/addition_assignment.hpp  |  72 ++
 .../backends/testing/elementwise_op.hpp       | 315 +++++++
 12 files changed, 1194 insertions(+), 751 deletions(-)
 create mode 100644 src/tensorwrapper/backends/cutensor/cuda_tensor.cpp
 rename src/tensorwrapper/{buffer/detail_/eigen_tensor.cu => backends/cutensor/cuda_tensor.cu} (74%)
 rename src/tensorwrapper/{buffer/detail_/eigen_tensor.cuh => backends/cutensor/cuda_tensor.cuh} (78%)
 create mode 100644 src/tensorwrapper/backends/cutensor/cuda_tensor.hpp
 rename src/tensorwrapper/{buffer/detail_ => backends/cutensor}/cutensor_traits.cuh (92%)
 create mode 100644 tests/cxx/unit_tests/tensorwrapper/backends/testing/addition_assignment.hpp
 create mode 100644 tests/cxx/unit_tests/tensorwrapper/backends/testing/elementwise_op.hpp

diff --git a/src/tensorwrapper/backends/cutensor/cuda_tensor.cpp b/src/tensorwrapper/backends/cutensor/cuda_tensor.cpp
new file mode 100644
index 00000000..2df8446b
--- /dev/null
+++ b/src/tensorwrapper/backends/cutensor/cuda_tensor.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cuda_tensor.hpp"
+
+#ifdef ENABLE_CUTENSOR
+#include "eigen_tensor.cuh"
+#endif
+
+namespace tensorwrapper::backends::cutensor {
+
+#define TPARAMS template<typename FloatType>
+#define CUDA_TENSOR CUDATensor<FloatType>
+
+TPARAMS
+void CUDA_TENSOR::contraction_assignment(label_type this_label,
+                                         label_type lhs_label,
+                                         label_type rhs_label,
+                                         const_my_reference lhs,
+                                         const_my_reference rhs) {
+#ifdef ENABLE_CUTENSOR
+    cutensor_contraction<my_type>(this_label, lhs_label, rhs_label, lhs, rhs,
+                                  *this);
+#else
+    throw std::runtime_error(
+      "cuTENSOR backend not enabled. Recompile with -DENABLE_CUTENSOR.");
+}
+#endif
+
+#undef CUDA_TENSOR
+#undef TPARAMS
+
+} // namespace tensorwrapper::backends::cutensor
diff --git a/src/tensorwrapper/buffer/detail_/eigen_tensor.cu b/src/tensorwrapper/backends/cutensor/cuda_tensor.cu
similarity index 74%
rename from src/tensorwrapper/buffer/detail_/eigen_tensor.cu
rename to src/tensorwrapper/backends/cutensor/cuda_tensor.cu
index dd6efc1a..b3e12329 100644
--- a/src/tensorwrapper/buffer/detail_/eigen_tensor.cu
+++ b/src/tensorwrapper/backends/cutensor/cuda_tensor.cu
@@ -14,12 +14,16 @@
  * limitations under the License.
  */
 #ifdef ENABLE_CUTENSOR
+#include "cuda_tensor.cuh"
 #include "cutensor_traits.cuh"
-#include "eigen_tensor.cuh"
 #include <unordered_map>
 #include <vector>
 
-namespace tensorwrapper::buffer::detail_ {
+namespace tensorwrapper::backends::cutensor {
+
+// Some common typedefs
+using mode_vector_t  = std::vector<int>;
+using int64_vector_t = std::vector<int64_t>;
 
 // Handle cuda errors
 #define HANDLE_CUDA_ERROR(x)                                \
@@ -38,12 +42,7 @@ namespace tensorwrapper::buffer::detail_ {
         if(err != CUTENSOR_STATUS_SUCCESS) {                    \
             printf("Error: %s\n", cutensorGetErrorString(err)); \
             exit(-1);                                           \
-        }                                                       \
-    };
-
-// Some common typedefs
-using mode_vector_t  = std::vector<int>;
-using int64_vector_t = std::vector<int64_t>;
+        }
 
 // Convert a label into a vector of modes
 template<typename LabelType>
@@ -53,16 +52,6 @@ mode_vector_t label_to_modes(const LabelType& label) {
     return mode;
 }
 
-// Query extent information from an input
-template<typename InfoType>
-int64_vector_t get_extents(const InfoType& info) {
-    int64_vector_t extent;
-    for(std::size_t i = 0; i < info.rank(); ++i) {
-        extent.push_back((int64_t)info.extent(i));
-    }
-    return extent;
-}
-
 // Compute strides in row major
 int64_vector_t get_strides(std::size_t N, const int64_vector_t& extent) {
     int64_vector_t strides;
@@ -74,41 +63,56 @@ int64_vector_t get_strides(std::size_t N, const int64_vector_t& extent) {
     return strides;
 }
 
+// Query extent information from an input
+template<typename InfoType>
+int64_vector_t get_extents(const InfoType& info) {
+    int64_vector_t extent;
+    for(std::size_t i = 0; i < info.rank(); ++i) {
+        extent.push_back((int64_t)info.extent(i));
+    }
+    return extent;
+}
+
 // Perform tensor contraction with cuTENSOR
 template<typename TensorType>
 void cutensor_contraction(typename TensorType::label_type c_label,
                           typename TensorType::label_type a_label,
                           typename TensorType::label_type b_label,
-                          typename TensorType::const_shape_reference c_shape,
-                          typename TensorType::const_pimpl_reference A,
-                          typename TensorType::const_pimpl_reference B,
-                          typename TensorType::eigen_reference C) {
-    using element_t    = typename TensorType::element_type;
-    using eigen_data_t = typename TensorType::eigen_data_type;
+                          const TensorType& A, const TensorType& B,
+                          TensorType& C) {
+    using element_t = typename TensorType::value_type;
+
+    const auto a_rank = A.rank();
+    const auto b_rank = B.rank();
+    const auto c_rank = C.rank();
+
+    const auto& a_shape = A.shape();
+    const auto& b_shape = B.shape();
+    const auto& c_shape = C.shape();
 
     // GEMM alpha and beta (hardcoded for now)
     element_t alpha = 1.0;
     element_t beta  = 0.0;
 
+    // The extents of each tensor
+    int64_vector_t a_extents = get_extents(a_shape);
+    int64_vector_t b_extents = get_extents(b_shape);
+    int64_vector_t c_extents = get_extents(c_shape);
+
     // The modes of the tensors
     mode_vector_t a_modes = label_to_modes(a_label);
     mode_vector_t b_modes = label_to_modes(b_label);
     mode_vector_t c_modes = label_to_modes(c_label);
 
-    // The extents of each tensor
-    int64_vector_t a_extents = get_extents(A);
-    int64_vector_t b_extents = get_extents(B);
-    int64_vector_t c_extents = get_extents(c_shape.as_smooth());
-
     // The strides of each tensor
-    int64_vector_t a_strides = get_strides(A.rank(), a_extents);
-    int64_vector_t b_strides = get_strides(B.rank(), b_extents);
-    int64_vector_t c_strides = get_strides(c_shape.rank(), c_extents);
+    int64_vector_t a_strides = get_strides(a_rank, a_extents);
+    int64_vector_t b_strides = get_strides(b_rank, b_extents);
+    int64_vector_t c_strides = get_strides(c_rank, c_extents);
 
     // The size of each tensor
     std::size_t a_size = sizeof(element_t) * A.size();
     std::size_t b_size = sizeof(element_t) * B.size();
-    std::size_t c_size = sizeof(element_t) * c_shape.size();
+    std::size_t c_size = sizeof(element_t) * C.size();
 
     // Allocate on device
     void *A_d, *B_d, *C_d;
@@ -118,9 +122,9 @@ void cutensor_contraction(typename TensorType::label_type c_label,
 
     // Copy to data to device
     HANDLE_CUDA_ERROR(
-      cudaMemcpy(A_d, A.get_immutable_data(), a_size, cudaMemcpyHostToDevice));
+      cudaMemcpy(A_d, A.data(), a_size, cudaMemcpyHostToDevice));
     HANDLE_CUDA_ERROR(
-      cudaMemcpy(B_d, B.get_immutable_data(), b_size, cudaMemcpyHostToDevice));
+      cudaMemcpy(B_d, B.data(), b_size, cudaMemcpyHostToDevice));
     HANDLE_CUDA_ERROR(
       cudaMemcpy(C_d, C.data(), c_size, cudaMemcpyHostToDevice));
 
@@ -141,17 +145,17 @@ void cutensor_contraction(typename TensorType::label_type c_label,
     // Create Tensor Descriptors
     cutensorTensorDescriptor_t descA;
     HANDLE_CUTENSOR_ERROR(cutensorCreateTensorDescriptor(
-      handle, &descA, A.rank(), a_extents.data(), a_strides.data(),
+      handle, &descA, a_rank, a_extents.data(), a_strides.data(),
       traits.cutensorDataType, kAlignment));
 
     cutensorTensorDescriptor_t descB;
     HANDLE_CUTENSOR_ERROR(cutensorCreateTensorDescriptor(
-      handle, &descB, B.rank(), b_extents.data(), b_strides.data(),
+      handle, &descB, b_rank, b_extents.data(), b_strides.data(),
       traits.cutensorDataType, kAlignment));
 
     cutensorTensorDescriptor_t descC;
     HANDLE_CUTENSOR_ERROR(cutensorCreateTensorDescriptor(
-      handle, &descC, c_shape.rank(), c_extents.data(), c_strides.data(),
+      handle, &descC, c_rank, c_extents.data(), c_strides.data(),
       traits.cutensorDataType, kAlignment));
 
     // Create Contraction Descriptor
@@ -232,34 +236,17 @@ void cutensor_contraction(typename TensorType::label_type c_label,
 #undef HANDLE_CUDA_ERROR
 
 // Template instantiations
-#define FUNCTION_INSTANTIATE(TYPE, RANK)                         \
-    template void cutensor_contraction<EigenTensor<TYPE, RANK>>( \
-      typename EigenTensor<TYPE, RANK>::label_type,              \
-      typename EigenTensor<TYPE, RANK>::label_type,              \
-      typename EigenTensor<TYPE, RANK>::label_type,              \
-      typename EigenTensor<TYPE, RANK>::const_shape_reference,   \
-      typename EigenTensor<TYPE, RANK>::const_pimpl_reference,   \
-      typename EigenTensor<TYPE, RANK>::const_pimpl_reference,   \
-      typename EigenTensor<TYPE, RANK>::eigen_reference)
-
-#define DEFINE_CUTENSOR_CONTRACTION(TYPE) \
-    FUNCTION_INSTANTIATE(TYPE, 0);        \
-    FUNCTION_INSTANTIATE(TYPE, 1);        \
-    FUNCTION_INSTANTIATE(TYPE, 2);        \
-    FUNCTION_INSTANTIATE(TYPE, 3);        \
-    FUNCTION_INSTANTIATE(TYPE, 4);        \
-    FUNCTION_INSTANTIATE(TYPE, 5);        \
-    FUNCTION_INSTANTIATE(TYPE, 6);        \
-    FUNCTION_INSTANTIATE(TYPE, 7);        \
-    FUNCTION_INSTANTIATE(TYPE, 8);        \
-    FUNCTION_INSTANTIATE(TYPE, 9);        \
-    FUNCTION_INSTANTIATE(TYPE, 10)
-
-TW_APPLY_FLOATING_POINT_TYPES(DEFINE_CUTENSOR_CONTRACTION);
-
-#undef DEFINE_CUTENSOR_CONTRACTION
+#define FUNCTION_INSTANTIATE(TYPE)                                    \
+    template void cutensor_contraction<CUDATensor<TYPE>>(             \
+      typename CUDATensor<TYPE>::label_type,                          \
+      typename CUDATensor<TYPE>::label_type,                          \
+      typename CUDATensor<TYPE>::label_type, const CUDATensor<TYPE>&, \
+      const CUDATensor<TYPE>&, CUDATensor<TYPE>&)
+
+TW_APPLY_FLOATING_POINT_TYPES(FUNCTION_INSTANTIATE);
+
 #undef FUNCTION_INSTANTIATE
 
-} // namespace tensorwrapper::buffer::detail_
+} // namespace tensorwrapper::backends::cutensor
 
 #endif
diff --git a/src/tensorwrapper/buffer/detail_/eigen_tensor.cuh b/src/tensorwrapper/backends/cutensor/cuda_tensor.cuh
similarity index 78%
rename from src/tensorwrapper/buffer/detail_/eigen_tensor.cuh
rename to src/tensorwrapper/backends/cutensor/cuda_tensor.cuh
index bc7d4e0b..b820afca 100644
--- a/src/tensorwrapper/buffer/detail_/eigen_tensor.cuh
+++ b/src/tensorwrapper/backends/cutensor/cuda_tensor.cuh
@@ -15,9 +15,9 @@
  */
 #pragma once
 #ifdef ENABLE_CUTENSOR
-#include "eigen_tensor.hpp"
+#include "cuda_tensor.hpp"
 
-namespace tensorwrapper::buffer::detail_ {
+namespace tensorwrapper::backends::cutensor {
 
 /** @brief Performs a tensor contraction on GPU
  *
@@ -36,11 +36,9 @@ template<typename TensorType>
 void cutensor_contraction(typename TensorType::label_type c_label,
                           typename TensorType::label_type a_label,
                           typename TensorType::label_type b_label,
-                          typename TensorType::const_shape_reference c_shape,
-                          typename TensorType::const_pimpl_reference A,
-                          typename TensorType::const_pimpl_reference B,
-                          typename TensorType::eigen_reference C);
+                          const TensorType& A, const TensorType& B,
+                          TensorType& C);
 
-} // namespace tensorwrapper::buffer::detail_
+} // namespace tensorwrapper::backends::cutensor
 
 #endif
diff --git a/src/tensorwrapper/backends/cutensor/cuda_tensor.hpp b/src/tensorwrapper/backends/cutensor/cuda_tensor.hpp
new file mode 100644
index 00000000..0e9dff46
--- /dev/null
+++ b/src/tensorwrapper/backends/cutensor/cuda_tensor.hpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <span>
+#include <tensorwrapper/dsl/dummy_indices.hpp>
+#include <tensorwrapper/shape/smooth_view.hpp>
+
+namespace tensorwrapper::backends::cutensor {
+
+/** @brief Wraps using cuTENSOR
+ *
+ *  @tparam FloatType Floating point type used for the tensor's elements
+ *
+ *  N.b. The name of this class is chosen to avoid conflict with cuTENSOR.
+ */
+template<typename FloatType>
+class CUDATensor {
+private:
+    /// Type of *this
+    using my_type = CUDATensor<FloatType>;
+
+    /// Read-only reference to an object of type my_type
+    using const_my_reference = const my_type&;
+
+public:
+    using value_type       = FloatType;
+    using span_type        = std::span<value_type>;
+    using shape_type       = shape::Smooth;
+    using const_shape_view = shape::SmoothView<const shape_type>;
+    using label_type       = dsl::DummyIndices<std::string>;
+    using size_type        = std::size_t;
+
+    CUDATensor(span_type data, const_shape_view shape) :
+      m_data_(data), m_shape_(shape) {}
+
+    void contraction_assignment(label_type this_labels, label_type lhs_labels,
+                                label_type rhs_labels, const_my_reference lhs,
+                                const_my_reference rhs);
+
+    size_type rank() const noexcept { return m_shape_.rank(); }
+
+    size_type size() const noexcept { return m_shape_.size(); }
+
+    auto shape() const noexcept { return m_shape_; }
+
+    auto data() noexcept { return m_data_.data(); }
+
+    auto data() const noexcept { return m_data_.data(); }
+
+private:
+    span_type m_data_;
+
+    const_shape_view m_shape_;
+};
+
+} // namespace tensorwrapper::backends::cutensor
diff --git a/src/tensorwrapper/buffer/detail_/cutensor_traits.cuh b/src/tensorwrapper/backends/cutensor/cutensor_traits.cuh
similarity index 92%
rename from src/tensorwrapper/buffer/detail_/cutensor_traits.cuh
rename to src/tensorwrapper/backends/cutensor/cutensor_traits.cuh
index c098c1c5..4846bc54 100644
--- a/src/tensorwrapper/buffer/detail_/cutensor_traits.cuh
+++ b/src/tensorwrapper/backends/cutensor/cutensor_traits.cuh
@@ -18,7 +18,7 @@
 #include <cuda_runtime.h>
 #include <cutensor.h>
 
-namespace tensorwrapper::buffer::detail_ {
+namespace tensorwrapper::backends::cutensor {
 
 // Traits for cuTENSOR based on the floating point type
 template<typename FloatType>
@@ -36,6 +36,6 @@ struct cutensor_traits<double> {
     cutensorComputeDescriptor_t descCompute = CUTENSOR_COMPUTE_DESC_64F;
 };
 
-} // namespace tensorwrapper::buffer::detail_
+} // namespace tensorwrapper::backends::cutensor
 
 #endif
diff --git a/src/tensorwrapper/backends/eigen/eigen_tensor.hpp b/src/tensorwrapper/backends/eigen/eigen_tensor.hpp
index adf6ec43..1fb3e4bd 100644
--- a/src/tensorwrapper/backends/eigen/eigen_tensor.hpp
+++ b/src/tensorwrapper/backends/eigen/eigen_tensor.hpp
@@ -17,8 +17,8 @@
 #pragma once
 #include <ostream>
 #include <string>
+#include <tensorwrapper/dsl/dummy_indices.hpp>
 #include <tensorwrapper/shape/smooth_view.hpp>
-#include <tensorwrapper/symmetry/permutation.hpp>
 #include <vector>
 
 namespace tensorwrapper::backends::eigen {
@@ -71,11 +71,10 @@ class EigenTensor {
     /// Type used to express strings
     using string_type = std::string;
 
-    /// Type used to specify a permutation
-    using permutation_type = symmetry::Permutation;
+    /// Type of a label
+    using label_type = dsl::DummyIndices<string_type>;
 
-    /// Type of a read-only reference to an object of permutation_type
-    using const_permutation_reference = const permutation_type&;
+    virtual ~EigenTensor() noexcept = default;
 
     /** @brief Retrieves the rank of the wrapped tensor.
      *
@@ -83,6 +82,12 @@ class EigenTensor {
      */
     eigen_rank_type rank() const noexcept { return rank_(); }
 
+    /** @brief The total number of elements in *this.
+     *
+     *  @return The total number of elements in *this.
+     *
+     *  @throw None No throw guarantee.
+     */
     size_type size() const noexcept { return size_(); }
 
     size_type extent(eigen_rank_type i) const {
@@ -108,47 +113,41 @@ class EigenTensor {
         return add_to_stream_(os);
     }
 
-    void addition_assignment(const_permutation_reference lhs_permute,
-                             const_permutation_reference rhs_permute,
-                             const EigenTensor& lhs, const EigenTensor& rhs) {
-        return addition_assignment_(lhs_permute, rhs_permute, lhs, rhs);
+    void addition_assignment(label_type this_label, label_type lhs_label,
+                             label_type rhs_label, const EigenTensor& lhs,
+                             const EigenTensor& rhs) {
+        return addition_assignment_(this_label, lhs_label, rhs_label, lhs, rhs);
     }
 
-    void subtraction_assignment(const_permutation_reference lhs_permute,
-                                const_permutation_reference rhs_permute,
-                                const EigenTensor& lhs,
+    void subtraction_assignment(label_type this_label, label_type lhs_label,
+                                label_type rhs_label, const EigenTensor& lhs,
                                 const EigenTensor& rhs) {
-        return subtraction_assignment_(lhs_permute, rhs_permute, lhs, rhs);
+        return subtraction_assignment_(this_label, lhs_label, rhs_label, lhs,
+                                       rhs);
     }
 
-    void hadamard_assignment(const_permutation_reference lhs_permute,
-                             const_permutation_reference rhs_permute,
-                             const EigenTensor& lhs, const EigenTensor& rhs) {
-        return hadamard_assignment_(lhs_permute, rhs_permute, lhs, rhs);
+    void hadamard_assignment(label_type this_label, label_type lhs_label,
+                             label_type rhs_label, const EigenTensor& lhs,
+                             const EigenTensor& rhs) {
+        return hadamard_assignment_(this_label, lhs_label, rhs_label, lhs, rhs);
     }
 
-    void permute_assignment(const_permutation_reference rhs_permute,
+    void contraction_assignment(label_type this_label, label_type lhs_label,
+                                label_type rhs_label, const EigenTensor& lhs,
+                                const EigenTensor& rhs) {
+        contraction_assignment_(this_label, lhs_label, rhs_label, lhs, rhs);
+    }
+
+    void permute_assignment(label_type this_label, label_type rhs_label,
                             const EigenTensor& rhs) {
-        return permute_assignment_(rhs_permute, rhs);
+        return permute_assignment_(this_label, rhs_label, rhs);
     }
 
-    void scalar_multiplication(const_permutation_reference rhs_permute,
+    void scalar_multiplication(label_type this_label, label_type rhs_label,
                                FloatType scalar, const EigenTensor& rhs) {
-        return scalar_multiplication_(rhs_permute, scalar, rhs);
+        return scalar_multiplication_(this_label, rhs_label, scalar, rhs);
     }
 
-    // void contraction_assignment(label_type this_labels, label_type
-    // lhs_labels,
-    //                             label_type rhs_labels,
-    //                             const_shape_reference result_shape,
-    //                             const_pimpl_reference lhs,
-    //                             const_pimpl_reference rhs) {
-    //     contraction_assignment_(std::move(this_labels),
-    //     std::move(lhs_labels),
-    //                             std::move(rhs_labels), result_shape, lhs,
-    //                             rhs);
-    // }
-
 protected:
     EigenTensor() noexcept = default;
 
@@ -161,34 +160,37 @@ class EigenTensor {
     virtual string_type to_string_() const                           = 0;
     virtual std::ostream& add_to_stream_(std::ostream& os) const     = 0;
 
-    virtual void addition_assignment_(const_permutation_reference lhs_permute,
-                                      const_permutation_reference rhs_permute,
+    virtual void addition_assignment_(label_type this_label,
+                                      label_type lhs_label,
+                                      label_type rhs_label,
                                       const EigenTensor& lhs,
                                       const EigenTensor& rhs) = 0;
 
-    virtual void subtraction_assignment_(
-      const_permutation_reference lhs_permute,
-      const_permutation_reference rhs_permute, const EigenTensor& lhs,
-      const EigenTensor& rhs) = 0;
+    virtual void subtraction_assignment_(label_type this_label,
+                                         label_type lhs_label,
+                                         label_type rhs_label,
+                                         const EigenTensor& lhs,
+                                         const EigenTensor& rhs) = 0;
 
-    virtual void hadamard_assignment_(const_permutation_reference lhs_permute,
-                                      const_permutation_reference rhs_permute,
+    virtual void hadamard_assignment_(label_type this_label,
+                                      label_type lhs_label,
+                                      label_type rhs_label,
                                       const EigenTensor& lhs,
                                       const EigenTensor& rhs) = 0;
 
-    virtual void permute_assignment_(const_permutation_reference rhs_permute,
+    virtual void permute_assignment_(label_type this_label,
+                                     label_type rhs_label,
                                      const EigenTensor& rhs) = 0;
 
-    virtual void scalar_multiplication_(const_permutation_reference rhs_permute,
-                                        FloatType scalar,
+    virtual void scalar_multiplication_(label_type this_label,
+                                        label_type rhs_label, FloatType scalar,
                                         const EigenTensor& rhs) = 0;
 
-    // virtual void contraction_assignment_(label_type this_labels,
-    //                                      label_type lhs_labels,
-    //                                      label_type rhs_labels,
-    //                                      const_shape_reference result_shape,
-    //                                      const_pimpl_reference lhs,
-    //                                      const_pimpl_reference rhs)    = 0;
+    virtual void contraction_assignment_(label_type this_label,
+                                         label_type lhs_label,
+                                         label_type rhs_label,
+                                         const EigenTensor& lhs,
+                                         const EigenTensor& rhs) = 0;
 };
 
 } // namespace tensorwrapper::backends::eigen
diff --git a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
index 53072b01..e7aa1eff 100644
--- a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
+++ b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
@@ -19,18 +19,8 @@
 #include <iomanip>
 #include <sstream>
 
-#ifdef ENABLE_CUTENSOR
-#include "eigen_tensor.cuh"
-#endif
-
 namespace tensorwrapper::backends::eigen {
 
-std::vector<int> to_eigen_permutation(const symmetry::Permutation& perm) {
-    std::vector<int> eigen_perm(perm.rank());
-    std::iota(eigen_perm.begin(), eigen_perm.end(), 0);
-    return perm.apply(std::move(eigen_perm));
-}
-
 #define TPARAMS template<typename FloatType, unsigned int Rank>
 #define EIGEN_TENSOR EigenTensorImpl<FloatType, Rank>
 
@@ -64,34 +54,38 @@ std::ostream& EIGEN_TENSOR::add_to_stream_(std::ostream& os) const {
 }
 
 TPARAMS
-void EIGEN_TENSOR::addition_assignment_(const_permutation_reference lhs_permute,
-                                        const_permutation_reference rhs_permute,
+void EIGEN_TENSOR::addition_assignment_(label_type this_label,
+                                        label_type lhs_label,
+                                        label_type rhs_label,
                                         const base_type& lhs,
                                         const base_type& rhs) {
     auto lambda = [](auto&& lhs, auto&& rhs) { return lhs + rhs; };
-    element_wise_op_(lambda, lhs_permute, rhs_permute, lhs, rhs);
+    element_wise_op_(lambda, this_label, lhs_label, rhs_label, lhs, rhs);
 }
 
 TPARAMS
-void EIGEN_TENSOR::subtraction_assignment_(
-  const_permutation_reference lhs_permute,
-  const_permutation_reference rhs_permute, const base_type& lhs,
-  const base_type& rhs) {
+void EIGEN_TENSOR::subtraction_assignment_(label_type this_label,
+                                           label_type lhs_label,
+                                           label_type rhs_label,
+                                           const base_type& lhs,
+                                           const base_type& rhs) {
     auto lambda = [](auto&& lhs, auto&& rhs) { return lhs - rhs; };
-    element_wise_op_(lambda, lhs_permute, rhs_permute, lhs, rhs);
+    element_wise_op_(lambda, this_label, lhs_label, rhs_label, lhs, rhs);
 }
 
 TPARAMS
-void EIGEN_TENSOR::hadamard_assignment_(const_permutation_reference lhs_permute,
-                                        const_permutation_reference rhs_permute,
+void EIGEN_TENSOR::hadamard_assignment_(label_type this_label,
+                                        label_type lhs_label,
+                                        label_type rhs_label,
                                         const base_type& lhs,
                                         const base_type& rhs) {
     auto lambda = [](auto&& lhs, auto&& rhs) { return lhs * rhs; };
-    element_wise_op_(lambda, lhs_permute, rhs_permute, lhs, rhs);
+    element_wise_op_(lambda, this_label, lhs_label, rhs_label, lhs, rhs);
 }
 
 TPARAMS
-void EIGEN_TENSOR::permute_assignment_(const_permutation_reference rhs_permute,
+void EIGEN_TENSOR::permute_assignment_(label_type this_label,
+                                       label_type rhs_label,
                                        const base_type& rhs) {
     const auto* rhs_down = dynamic_cast<const my_type*>(&rhs);
 
@@ -99,135 +93,214 @@ void EIGEN_TENSOR::permute_assignment_(const_permutation_reference rhs_permute,
         m_tensor_ = rhs_down->m_tensor_;
         return;
     } else {
-        auto eigen_rhs_permute = to_eigen_permutation(rhs_permute);
-        auto rhs_shuffled      = rhs_down->m_tensor_.shuffle(eigen_rhs_permute);
-        m_tensor_              = rhs_shuffled;
+        if(this_label != rhs_label) { // We need to permute rhs first
+            // Eigen adopts the opposite definition of permutation from us.
+            auto r_to_l = this_label.permutation(rhs_label);
+            // Eigen wants int objects
+            std::vector<int> r_to_l2(r_to_l.begin(), r_to_l.end());
+            m_tensor_ = rhs_down->m_tensor_.shuffle(r_to_l2);
+        } else {
+            m_tensor_ = rhs_down->m_tensor_;
+        }
     }
 }
 
 TPARAMS
-void EIGEN_TENSOR::scalar_multiplication_(
-  const_permutation_reference rhs_permute, FloatType scalar,
-  const base_type& rhs) {
+void EIGEN_TENSOR::scalar_multiplication_(label_type this_label,
+                                          label_type rhs_label,
+                                          FloatType scalar,
+                                          const base_type& rhs) {
     const auto* rhs_down = dynamic_cast<const my_type*>(&rhs);
 
     if constexpr(Rank <= 1) {
         m_tensor_ = rhs_down->m_tensor_ * scalar;
         return;
     } else {
-        auto eigen_rhs_permute = to_eigen_permutation(rhs_permute);
-        auto rhs_shuffled      = rhs_down->m_tensor_.shuffle(eigen_rhs_permute);
-        m_tensor_              = rhs_shuffled * scalar;
+        if(this_label != rhs_label) { // We need to permute rhs first
+            auto r_to_l = rhs_label.permutation(this_label);
+            // Eigen wants int objects
+            std::vector<int> r_to_l2(r_to_l.begin(), r_to_l.end());
+            m_tensor_ = rhs_down->m_tensor_.shuffle(r_to_l2) * scalar;
+        } else {
+            m_tensor_ = rhs_down->m_tensor_ * scalar;
+        }
     }
 }
 
 TPARAMS
 template<typename OperationType>
-void EIGEN_TENSOR::element_wise_op_(OperationType op,
-                                    const_permutation_reference lhs_permute,
-                                    const_permutation_reference rhs_permute,
+void EIGEN_TENSOR::element_wise_op_(OperationType op, label_type this_label,
+                                    label_type lhs_label, label_type rhs_label,
                                     const base_type& lhs,
                                     const base_type& rhs) {
     const auto* lhs_down = dynamic_cast<const my_type*>(&lhs);
     const auto* rhs_down = dynamic_cast<const my_type*>(&rhs);
 
+    // Whose indices match whose?
+    bool this_matches_lhs = (this_label == lhs_label);
+    bool this_matches_rhs = (this_label == rhs_label);
+    bool lhs_matches_rhs  = (lhs_label == rhs_label);
+
+    // The three possible permutations we may need to apply
+    auto get_permutation = [](auto&& lhs_, auto&& rhs_) {
+        auto l_to_r = lhs_.permutation(rhs_);
+        return std::vector<int>(l_to_r.begin(), l_to_r.end());
+    };
+    auto r_to_l    = get_permutation(rhs_label, lhs_label);
+    auto l_to_r    = get_permutation(lhs_label, rhs_label);
+    auto this_to_r = get_permutation(this_label, rhs_label);
+
+    auto& lhs_eigen = lhs_down->m_tensor_;
+    auto& rhs_eigen = rhs_down->m_tensor_;
+
     if constexpr(Rank <= 1) {
-        m_tensor_ = op(lhs_down->m_tensor_, rhs_down->m_tensor_);
+        m_tensor_ = op(lhs_eigen, rhs_eigen);
         return;
     } else {
-        auto eigen_lhs_permute = to_eigen_permutation(lhs_permute);
-        auto eigen_rhs_permute = to_eigen_permutation(rhs_permute);
-        auto lhs_shuffled      = lhs_down->m_tensor_.shuffle(eigen_lhs_permute);
-        auto rhs_shuffled      = rhs_down->m_tensor_.shuffle(eigen_rhs_permute);
-        m_tensor_              = op(lhs_shuffled, rhs_shuffled);
+        if(this_matches_lhs && this_matches_rhs) { // No permutations
+            m_tensor_ = op(lhs_eigen, rhs_eigen);
+        } else if(this_matches_lhs) { // RHS needs permuted
+            m_tensor_ = op(lhs_eigen, rhs_eigen.shuffle(r_to_l));
+        } else if(this_matches_rhs) { // LHS needs permuted
+            m_tensor_ = op(lhs_eigen.shuffle(l_to_r), rhs_eigen);
+        } else if(lhs_matches_rhs) { // This needs permuted
+            m_tensor_ = op(lhs_eigen, rhs_eigen).shuffle(this_to_r);
+        } else { // Everything needs permuted
+            auto lhs_shuffled = lhs_eigen.shuffle(l_to_r);
+            m_tensor_         = op(lhs_shuffled, rhs_eigen).shuffle(this_to_r);
+        }
     }
 }
 
-// template<typename TensorType>
-// auto matrix_size(TensorType&& t, std::size_t row_ranks) {
-//     std::size_t nrows = 1;
-//     for(std::size_t i = 0; i < row_ranks; ++i) nrows *= t.extent(i);
-
-//     std::size_t ncols = 1;
-//     const auto rank   = t.rank();
-//     for(std::size_t i = row_ranks; i < rank; ++i) ncols *= t.extent(i);
-//     return std::make_pair(nrows, ncols);
-// }
-
-// TPARAMS
-// void EIGEN_TENSOR::contraction_assignment_(label_type olabels,
-//                                            label_type llabels,
-//                                            label_type rlabels,
-//                                            const_shape_reference
-//                                            result_shape,
-//                                            const_pimpl_reference lhs,
-//                                            const_pimpl_reference rhs) {
-//     ContractionPlanner plan(olabels, llabels, rlabels);
-
-// #ifdef ENABLE_CUTENSOR
-//     // Prepare m_tensor_
-//     m_tensor_ = allocate_from_shape_(result_shape.as_smooth(),
-//                                      std::make_index_sequence<Rank>());
-//     m_tensor_.setZero();
-
-//     // Dispatch to cuTENSOR
-//     cutensor_contraction<my_type>(olabels, llabels, rlabels,
-//     result_shape, lhs,
-//                                   rhs, m_tensor_);
-// #else
-//     auto lt = lhs.clone();
-//     auto rt = rhs.clone();
-//     lt->permute_assignment(plan.lhs_permutation(), llabels, lhs);
-//     rt->permute_assignment(plan.rhs_permutation(), rlabels, rhs);
-
-//     const auto [lrows, lcols] = matrix_size(*lt, plan.lhs_free().size());
-//     const auto [rrows, rcols] = matrix_size(*rt,
-//     plan.rhs_dummy().size());
-
-//     // Work out the types of the matrix amd a map
-//     constexpr auto e_dyn       = ::Eigen::Dynamic;
-//     constexpr auto e_row_major = ::Eigen::RowMajor;
-//     using matrix_t = ::Eigen::Matrix<FloatType, e_dyn, e_dyn,
-//     e_row_major>; using map_t    = ::Eigen::Map<matrix_t>;
-
-//     eigen::data_type<FloatType, 2> buffer(lrows, rcols);
-
-//     map_t lmatrix(lt->get_mutable_data(), lrows, lcols);
-//     map_t rmatrix(rt->get_mutable_data(), rrows, rcols);
-//     map_t omatrix(buffer.data(), lrows, rcols);
-//     omatrix = lmatrix * rmatrix;
-
-//     auto mlabels = plan.result_matrix_labels();
-//     auto oshape  = result_shape(olabels);
-
-//     // oshapes is the final shape, permute it to shape omatrix is
-//     currently in auto temp_shape = result_shape.clone();
-//     temp_shape->permute_assignment(mlabels, oshape);
-//     auto mshape = temp_shape->as_smooth();
-
-//     auto m_to_o = olabels.permutation(mlabels); // N.b. Eigen def is
-//     inverse us
-
-//     std::array<int, Rank> out_size;
-//     std::array<int, Rank> m_to_o_array;
-//     for(std::size_t i = 0; i < Rank; ++i) {
-//         out_size[i]     = mshape.extent(i);
-//         m_to_o_array[i] = m_to_o[i];
-//     }
-
-//     auto tensor = buffer.reshape(out_size);
-//     if constexpr(Rank > 0) {
-//         m_tensor_ = tensor.shuffle(m_to_o_array);
-//     } else {
-//         m_tensor_ = tensor;
-//     }
-// #endif
-//     mark_for_rehash_();
-// }
+template<typename TensorType>
+auto matrix_size(TensorType&& t, std::size_t row_ranks) {
+    std::size_t nrows = 1;
+    for(std::size_t i = 0; i < row_ranks; ++i) nrows *= t.extent(i);
+
+    std::size_t ncols = 1;
+    const auto rank   = t.rank();
+    for(std::size_t i = row_ranks; i < rank; ++i) ncols *= t.extent(i);
+    return std::make_pair(nrows, ncols);
+}
+
+TPARAMS
+void EIGEN_TENSOR::contraction_assignment_(label_type this_label,
+                                           label_type lhs_label,
+                                           label_type rhs_label,
+                                           const base_type& lhs,
+                                           const base_type& rhs) {
+    // ContractionPlanner plan(this_labels, lhs_labels, rhs_labels);
+
+    // auto lhs_permutation = plan.lhs_permutation();
+    // auto rhs_permutation = plan.rhs_permutation();
+
+    // std::vector<FloatType> new_lhs_buffer(lhs.size());
+    // std::vector<FloatType> new_rhs_buffer(rhs.size());
+    // std::span<FloatType> new_lhs_span(new_lhs_buffer.data(),
+    //                                   new_lhs_buffer.size());
+    // std::span<FloatType> new_rhs_span(new_rhs_buffer.data(),
+    //                                   new_rhs_buffer.size());
+
+    // auto new_lhs_shape = lhs_permutation.apply(lhs.shape());
+    // auto new_rhs_shape = rhs_permutation.apply(rhs.shape());
+
+    // auto new_lhs_tensor =
+    //   make_eigen_tensor<FloatType>(new_lhs_span, new_lhs_shape);
+
+    // auto new_rhs_tensor =
+    //   make_eigen_tensor<FloatType>(new_rhs_span, new_rhs_shape);
+
+    // new_lhs_tensor.permute_assignment(lhs_permutation, lhs);
+    // new_rhs_tensor.permute_assignment(rhs_permutation, rhs);
+
+    // const auto [lrows, lcols] = matrix_size(*lt, plan.lhs_free().size());
+    // const auto [rrows, rcols] = matrix_size(*rt,
+    // plan.rhs_dummy().size());
+
+    // // Work out the types of the matrix amd a map
+    // constexpr auto e_dyn       = ::Eigen::Dynamic;
+    // constexpr auto e_row_major = ::Eigen::RowMajor;
+    // using matrix_t = ::Eigen::Matrix<FloatType, e_dyn, e_dyn,
+    // e_row_major>; using map_t    = ::Eigen::Map<matrix_t>;
+
+    // map_t lmatrix(new_lhs_buffer.data(), lrows, lcols);
+    // map_t rmatrix(new_rhs_buffer.data(), rrows, rcols);
+    // map_t omatrix(m_tensor_.data(), lrows, rcols);
+
+    // omatrix = lmatrix * rmatrix;
+
+    // // auto mlabels = plan.result_matrix_labels();
+    // // auto oshape  = result_shape(olabels);
+
+    // // oshapes is the final shape, permute it to shape omatrix is
+    // currently in
+
+    // auto temp_shape = result_shape.clone();
+    // temp_shape->permute_assignment(mlabels, oshape);
+    // auto mshape = temp_shape->as_smooth();
+
+    // auto m_to_o = olabels.permutation(mlabels); // N.b. Eigen def is
+    // inverse us
+
+    // std::array<int, Rank> out_size;
+    // std::array<int, Rank> m_to_o_array;
+    // for(std::size_t i = 0; i < Rank; ++i) {
+    //     out_size[i]     = mshape.extent(i);
+    //     m_to_o_array[i] = m_to_o[i];
+    // }
+
+    // auto tensor = buffer.reshape(out_size);
+    // if constexpr(Rank > 0) {
+    //     m_tensor_ = tensor.shuffle(m_to_o_array);
+    // } else {
+    //     m_tensor_ = tensor;
+    // }
+}
 
 #undef EIGEN_TENSOR
 #undef TPARAMS
 
+template<typename FloatType>
+std::unique_ptr<EigenTensor<FloatType>> make_eigen_tensor(
+  std::span<FloatType> data, shape::SmoothView<const shape::Smooth> shape) {
+    switch(shape.rank()) {
+        case 0:
+            return std::make_unique<EigenTensorImpl<FloatType, 0>>(data, shape);
+        case 1:
+            return std::make_unique<EigenTensorImpl<FloatType, 1>>(data, shape);
+        case 2:
+            return std::make_unique<EigenTensorImpl<FloatType, 2>>(data, shape);
+        case 3:
+            return std::make_unique<EigenTensorImpl<FloatType, 3>>(data, shape);
+        case 4:
+            return std::make_unique<EigenTensorImpl<FloatType, 4>>(data, shape);
+        case 5:
+            return std::make_unique<EigenTensorImpl<FloatType, 5>>(data, shape);
+        case 6:
+            return std::make_unique<EigenTensorImpl<FloatType, 6>>(data, shape);
+        case 7:
+            return std::make_unique<EigenTensorImpl<FloatType, 7>>(data, shape);
+        case 8:
+            return std::make_unique<EigenTensorImpl<FloatType, 8>>(data, shape);
+        case 9:
+            return std::make_unique<EigenTensorImpl<FloatType, 9>>(data, shape);
+        case 10:
+            return std::make_unique<EigenTensorImpl<FloatType, 10>>(data,
+                                                                    shape);
+        default:
+            throw std::runtime_error(
+              "EigenTensor backend only supports ranks 0 through 10.");
+    }
+}
+
+#define DEFINE_MAKE_EIGEN_TENSOR(TYPE)                                   \
+    template std::unique_ptr<EigenTensor<TYPE>> make_eigen_tensor<TYPE>( \
+      std::span<TYPE> data, shape::SmoothView<const shape::Smooth> shape);
+
+TW_APPLY_FLOATING_POINT_TYPES(DEFINE_MAKE_EIGEN_TENSOR);
+
+#undef DEFINE_MAKE_EIGEN_TENSOR
+
 #define DEFINE_EIGEN_TENSOR(TYPE)            \
     template class EigenTensorImpl<TYPE, 0>; \
     template class EigenTensorImpl<TYPE, 1>; \
diff --git a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.hpp b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.hpp
index 6b13039c..e7b6ea79 100644
--- a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.hpp
+++ b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.hpp
@@ -42,11 +42,11 @@ class EigenTensorImpl : public EigenTensor<FloatType> {
     using const_eigen_reference = const eigen_data_type&;
 
     ///@{
-    using typename base_type::const_permutation_reference;
     using typename base_type::const_reference;
     using typename base_type::const_shape_reference;
     using typename base_type::eigen_rank_type;
     using typename base_type::index_vector;
+    using typename base_type::label_type;
     using typename base_type::reference;
     using typename base_type::size_type;
     using typename base_type::string_type;
@@ -57,7 +57,7 @@ class EigenTensorImpl : public EigenTensor<FloatType> {
       m_tensor_(
         make_from_shape_(data, shape, std::make_index_sequence<Rank>())) {}
 
-    EigenTensorImpl permute(const_permutation_reference perm) const;
+    EigenTensorImpl permute(label_type perm) const;
 
 protected:
     /// Implement rank by returning template parameter
@@ -86,49 +86,34 @@ class EigenTensorImpl : public EigenTensor<FloatType> {
     /// Relies on Eigen's operator<< to add to stream
     std::ostream& add_to_stream_(std::ostream& os) const override;
 
-    void addition_assignment_(const_permutation_reference lhs_permute,
-                              const_permutation_reference rhs_permute,
-                              const base_type& lhs,
+    void addition_assignment_(label_type this_label, label_type lhs_label,
+                              label_type rhs_label, const base_type& lhs,
                               const base_type& rhs) override;
 
-    void subtraction_assignment_(const_permutation_reference lhs_permute,
-                                 const_permutation_reference rhs_permute,
-                                 const base_type& lhs,
+    void subtraction_assignment_(label_type this_label, label_type lhs_label,
+                                 label_type rhs_label, const base_type& lhs,
                                  const base_type& rhs) override;
 
-    void hadamard_assignment_(const_permutation_reference lhs_permute,
-                              const_permutation_reference rhs_permute,
-                              const base_type& lhs,
+    void hadamard_assignment_(label_type this_label, label_type lhs_label,
+                              label_type rhs_label, const base_type& lhs,
                               const base_type& rhs) override;
 
-    void permute_assignment_(const_permutation_reference rhs_permute,
+    void permute_assignment_(label_type this_label, label_type rhs_label,
                              const base_type& rhs) override;
 
-    void scalar_multiplication_(const_permutation_reference rhs_permute,
+    void scalar_multiplication_(label_type this_label, label_type rhs_label,
                                 FloatType scalar,
                                 const base_type& rhs) override;
 
-    // void contraction_assignment_(label_type this_labels, label_type
-    // lhs_labels,
-    //                              label_type rhs_labels,
-    //                              const_shape_reference result_shape,
-    //                              const_pimpl_reference lhs,
-    //                              const_pimpl_reference rhs) override;
-
-    // void permute_assignment_(label_type this_labels, label_type rhs_labels,
-    //                          const_pimpl_reference rhs) override;
-
-    // void scalar_multiplication_(label_type this_labels, label_type
-    // rhs_labels,
-    //                             FloatType scalar,
-    //                             const_pimpl_reference rhs) override;
+    void contraction_assignment_(label_type this_labels, label_type lhs_labels,
+                                 label_type rhs_labels, const base_type& lhs,
+                                 const base_type& rhs) override;
 
 private:
     // Code factorization for implementing element-wise operations
     template<typename OperationType>
-    void element_wise_op_(OperationType op,
-                          const_permutation_reference lhs_permute,
-                          const_permutation_reference rhs_permute,
+    void element_wise_op_(OperationType op, label_type this_label,
+                          label_type lhs_label, label_type rhs_label,
                           const base_type& lhs, const base_type& rhs);
 
     // Handles TMP needed to create an Eigen TensorMap from a Smooth object
@@ -156,6 +141,10 @@ class EigenTensorImpl : public EigenTensor<FloatType> {
     eigen_data_type m_tensor_;
 };
 
+template<typename FloatType>
+std::unique_ptr<EigenTensor<FloatType>> make_eigen_tensor(
+  std::span<FloatType> data, shape::SmoothView<const shape::Smooth> shape);
+
 #define DECLARE_EIGEN_TENSOR(TYPE)                  \
     extern template class EigenTensorImpl<TYPE, 0>; \
     extern template class EigenTensorImpl<TYPE, 1>; \
diff --git a/src/tensorwrapper/buffer/detail_/eigen_tensor.cpp b/src/tensorwrapper/buffer/detail_/eigen_tensor.cpp
index dde5a7e0..84096fb5 100644
--- a/src/tensorwrapper/buffer/detail_/eigen_tensor.cpp
+++ b/src/tensorwrapper/buffer/detail_/eigen_tensor.cpp
@@ -18,10 +18,6 @@
 #include "../contraction_planner.hpp"
 #include "eigen_tensor.hpp"
 
-#ifdef ENABLE_CUTENSOR
-#include "eigen_tensor.cuh"
-#endif
-
 namespace tensorwrapper::buffer::detail_ {
 
 #define TPARAMS template<typename FloatType, unsigned int Rank>
@@ -100,16 +96,6 @@ void EIGEN_TENSOR::contraction_assignment_(label_type olabels,
                                            const_pimpl_reference rhs) {
     ContractionPlanner plan(olabels, llabels, rlabels);
 
-#ifdef ENABLE_CUTENSOR
-    // Prepare m_tensor_
-    m_tensor_ = allocate_from_shape_(result_shape.as_smooth(),
-                                     std::make_index_sequence<Rank>());
-    m_tensor_.setZero();
-
-    // Dispatch to cuTENSOR
-    cutensor_contraction<my_type>(olabels, llabels, rlabels, result_shape, lhs,
-                                  rhs, m_tensor_);
-#else
     auto lt = lhs.clone();
     auto rt = rhs.clone();
     lt->permute_assignment(plan.lhs_permutation(), llabels, lhs);
@@ -154,7 +140,7 @@ void EIGEN_TENSOR::contraction_assignment_(label_type olabels,
     } else {
         m_tensor_ = tensor;
     }
-#endif
+
     mark_for_rehash_();
 }
 
diff --git a/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp b/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
index d6dbb9de..e46cae2c 100644
--- a/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
+++ b/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
@@ -15,18 +15,18 @@
  */
 
 #include "../../testing/testing.hpp"
+#include "../testing/addition_assignment.hpp"
 #include <tensorwrapper/backends/eigen/eigen_tensor_impl.hpp>
 
 using namespace tensorwrapper;
 using namespace tensorwrapper::backends::eigen;
 
 TEMPLATE_LIST_TEST_CASE("EigenTensorImpl", "", types::floating_point_types) {
-    using scalar_type      = EigenTensorImpl<TestType, 0>;
-    using vector_type      = EigenTensorImpl<TestType, 1>;
-    using matrix_type      = EigenTensorImpl<TestType, 2>;
-    using tensor3_type     = EigenTensorImpl<TestType, 3>;
-    using tensor4_type     = EigenTensorImpl<TestType, 4>;
-    using permutation_type = typename scalar_type::permutation_type;
+    using scalar_type  = EigenTensorImpl<TestType, 0>;
+    using vector_type  = EigenTensorImpl<TestType, 1>;
+    using matrix_type  = EigenTensorImpl<TestType, 2>;
+    using tensor3_type = EigenTensorImpl<TestType, 3>;
+    using tensor4_type = EigenTensorImpl<TestType, 4>;
 
     std::vector<TestType> data(16);
     for(std::size_t i = 0; i < data.size(); ++i)
@@ -48,28 +48,6 @@ TEMPLATE_LIST_TEST_CASE("EigenTensorImpl", "", types::floating_point_types) {
     tensor3_type tensor3(data_span, tensor3_shape);
     tensor4_type tensor4(data_span, tensor4_shape);
 
-    using pair_index   = std::pair<std::size_t, std::size_t>;
-    using triple_index = std::tuple<std::size_t, std::size_t, std::size_t>;
-    using quad_index =
-      std::tuple<std::size_t, std::size_t, std::size_t, std::size_t>;
-
-    std::vector<pair_index> matrix_indices;
-    for(std::size_t i = 0; i < 4; ++i)
-        for(std::size_t j = 0; j < 4; ++j) matrix_indices.emplace_back(i, j);
-
-    std::vector<triple_index> tensor3_indices;
-    for(std::size_t i = 0; i < 2; ++i)
-        for(std::size_t j = 0; j < 2; ++j)
-            for(std::size_t k = 0; k < 4; ++k)
-                tensor3_indices.emplace_back(i, j, k);
-
-    std::vector<quad_index> tensor4_indices;
-    for(std::size_t i = 0; i < 2; ++i)
-        for(std::size_t j = 0; j < 2; ++j)
-            for(std::size_t k = 0; k < 2; ++k)
-                for(std::size_t l = 0; l < 2; ++l)
-                    tensor4_indices.emplace_back(i, j, k, l);
-
     SECTION("rank") {
         REQUIRE(scalar.rank() == 0);
         REQUIRE(vector.rank() == 1);
@@ -175,453 +153,380 @@ TEMPLATE_LIST_TEST_CASE("EigenTensorImpl", "", types::floating_point_types) {
     }
 
     SECTION("addition_assignment") {
-        std::vector<TestType> result_data(16, TestType{0});
-        std::span<TestType> result_data_span(result_data.data(),
-                                             result_data.size());
-
-        auto lambda = [](TestType a, TestType b) { return a + b; };
-
-        SECTION("scalar") {
-            permutation_type i(0);
-            scalar_type result(result_data_span, scalar_shape);
-            result.addition_assignment(i, i, scalar, scalar);
-            REQUIRE(result.get_elem({}) == lambda(data[0], data[0]));
-        }
-
-        SECTION("vector") {
-            permutation_type i(1);
-            vector_type result(result_data_span, vector_shape);
-            result.addition_assignment(i, i, vector, vector);
-            for(std::size_t i = 0; i < result.size(); ++i)
-                REQUIRE(result.get_elem({i}) == lambda(data[i], data[i]));
-        }
-
-        SECTION("matrix") {
-            permutation_type i(2);
-            matrix_type result(result_data_span, matrix_shape);
-            result.addition_assignment(i, i, matrix, matrix);
-            for(auto [i, j] : matrix_indices)
-                REQUIRE(result.get_elem({i, j}) ==
-                        lambda(data[i * 4 + j], data[i * 4 + j]));
-
-            permutation_type p{{1, 0}};
-            result.addition_assignment(p, i, matrix, matrix);
-            for(auto [i, j] : matrix_indices)
-                REQUIRE(result.get_elem({i, j}) ==
-                        lambda(data[j * 4 + i], data[i * 4 + j]));
-
-            result.addition_assignment(p, p, matrix, matrix);
-            for(auto [i, j] : matrix_indices)
-                REQUIRE(result.get_elem({i, j}) ==
-                        lambda(data[j * 4 + i], data[j * 4 + i]));
-        }
-
-        SECTION("rank 3 tensor") {
-            permutation_type i(3);
-            tensor3_type result(result_data_span, tensor3_shape);
-            result.addition_assignment(i, i, tensor3, tensor3);
-            for(auto [i, j, k] : tensor3_indices)
-                REQUIRE(
-                  result.get_elem({i, j, k}) ==
-                  lambda(data[i * 8 + j * 4 + k], data[i * 8 + j * 4 + k]));
-
-            permutation_type p({1, 0, 2});
-            result.addition_assignment(p, i, tensor3, tensor3);
-            for(auto [i, j, k] : tensor3_indices)
-                REQUIRE(
-                  result.get_elem({i, j, k}) ==
-                  lambda(data[j * 8 + i * 4 + k], data[i * 8 + j * 4 + k]));
-
-            result.addition_assignment(p, p, tensor3, tensor3);
-            for(auto [i, j, k] : tensor3_indices)
-                REQUIRE(
-                  result.get_elem({i, j, k}) ==
-                  lambda(data[j * 8 + i * 4 + k], data[j * 8 + i * 4 + k]));
-        }
-
-        SECTION("rank 4 tensor") {
-            permutation_type i(4);
-            tensor4_type result(result_data_span, tensor4_shape);
-            result.addition_assignment(i, i, tensor4, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        lambda(data[i * 8 + j * 4 + k * 2 + l],
-                               data[i * 8 + j * 4 + k * 2 + l]));
-
-            permutation_type p({1, 0, 2, 3});
-            result.addition_assignment(p, i, tensor4, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        lambda(data[j * 8 + i * 4 + k * 2 + l],
-                               data[i * 8 + j * 4 + k * 2 + l]));
-
-            permutation_type p1({0, 1, 3, 2});
-            result.addition_assignment(p1, i, tensor4, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        lambda(data[i * 8 + j * 4 + l * 2 + k],
-                               data[i * 8 + j * 4 + k * 2 + l]));
-
-            permutation_type p2({2, 3, 0, 1});
-            result.addition_assignment(p2, p, tensor4, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        lambda(data[k * 8 + l * 4 + i * 2 + j],
-                               data[j * 8 + i * 4 + k * 2 + l]));
-        }
-    }
-
-    SECTION("subtraction_assignment") {
-        std::vector<TestType> result_data(16, TestType{0});
-        std::span<TestType> result_data_span(result_data.data(),
-                                             result_data.size());
-
-        auto lambda = [](TestType a, TestType b) { return a - b; };
-
-        SECTION("scalar") {
-            permutation_type i(0);
-            scalar_type result(result_data_span, scalar_shape);
-            result.subtraction_assignment(i, i, scalar, scalar);
-            REQUIRE(result.get_elem({}) == lambda(data[0], data[0]));
-        }
-
-        SECTION("vector") {
-            permutation_type i(1);
-            vector_type result(result_data_span, vector_shape);
-            result.subtraction_assignment(i, i, vector, vector);
-            for(std::size_t i = 0; i < result.size(); ++i)
-                REQUIRE(result.get_elem({i}) == lambda(data[i], data[i]));
-        }
-
-        SECTION("matrix") {
-            permutation_type i(2);
-            matrix_type result(result_data_span, matrix_shape);
-            result.subtraction_assignment(i, i, matrix, matrix);
-            for(auto [i, j] : matrix_indices)
-                REQUIRE(result.get_elem({i, j}) ==
-                        lambda(data[i * 4 + j], data[i * 4 + j]));
-
-            permutation_type p{{1, 0}};
-            result.subtraction_assignment(p, i, matrix, matrix);
-            for(auto [i, j] : matrix_indices)
-                REQUIRE(result.get_elem({i, j}) ==
-                        lambda(data[j * 4 + i], data[i * 4 + j]));
-
-            result.subtraction_assignment(p, p, matrix, matrix);
-            for(auto [i, j] : matrix_indices)
-                REQUIRE(result.get_elem({i, j}) ==
-                        lambda(data[j * 4 + i], data[j * 4 + i]));
-        }
-
-        SECTION("rank 3 tensor") {
-            permutation_type i(3);
-            tensor3_type result(result_data_span, tensor3_shape);
-            result.subtraction_assignment(i, i, tensor3, tensor3);
-            for(auto [i, j, k] : tensor3_indices)
-                REQUIRE(
-                  result.get_elem({i, j, k}) ==
-                  lambda(data[i * 8 + j * 4 + k], data[i * 8 + j * 4 + k]));
-
-            permutation_type p({1, 0, 2});
-            result.subtraction_assignment(p, i, tensor3, tensor3);
-            for(auto [i, j, k] : tensor3_indices)
-                REQUIRE(
-                  result.get_elem({i, j, k}) ==
-                  lambda(data[j * 8 + i * 4 + k], data[i * 8 + j * 4 + k]));
-
-            result.subtraction_assignment(p, p, tensor3, tensor3);
-            for(auto [i, j, k] : tensor3_indices)
-                REQUIRE(
-                  result.get_elem({i, j, k}) ==
-                  lambda(data[j * 8 + i * 4 + k], data[j * 8 + i * 4 + k]));
-        }
-
-        SECTION("rank 4 tensor") {
-            permutation_type i(4);
-            tensor4_type result(result_data_span, tensor4_shape);
-            result.subtraction_assignment(i, i, tensor4, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        lambda(data[i * 8 + j * 4 + k * 2 + l],
-                               data[i * 8 + j * 4 + k * 2 + l]));
-
-            permutation_type p({1, 0, 2, 3});
-            result.subtraction_assignment(p, i, tensor4, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        lambda(data[j * 8 + i * 4 + k * 2 + l],
-                               data[i * 8 + j * 4 + k * 2 + l]));
-
-            permutation_type p1({0, 1, 3, 2});
-            result.subtraction_assignment(p1, i, tensor4, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        lambda(data[i * 8 + j * 4 + l * 2 + k],
-                               data[i * 8 + j * 4 + k * 2 + l]));
-
-            permutation_type p2({2, 3, 0, 1});
-            result.subtraction_assignment(p2, p, tensor4, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        lambda(data[k * 8 + l * 4 + i * 2 + j],
-                               data[j * 8 + i * 4 + k * 2 + l]));
-        }
-    }
-
-    SECTION("hadamard_assignment") {
-        std::vector<TestType> result_data(16, TestType{0});
-        std::span<TestType> result_data_span(result_data.data(),
-                                             result_data.size());
-
-        auto lambda = [](TestType a, TestType b) { return a * b; };
-
-        SECTION("scalar") {
-            permutation_type i(0);
-            scalar_type result(result_data_span, scalar_shape);
-            result.hadamard_assignment(i, i, scalar, scalar);
-            REQUIRE(result.get_elem({}) == lambda(data[0], data[0]));
-        }
-
-        SECTION("vector") {
-            permutation_type i(1);
-            vector_type result(result_data_span, vector_shape);
-            result.hadamard_assignment(i, i, vector, vector);
-            for(std::size_t i = 0; i < result.size(); ++i)
-                REQUIRE(result.get_elem({i}) == lambda(data[i], data[i]));
-        }
-
-        SECTION("matrix") {
-            permutation_type i(2);
-            matrix_type result(result_data_span, matrix_shape);
-            result.hadamard_assignment(i, i, matrix, matrix);
-            for(auto [i, j] : matrix_indices)
-                REQUIRE(result.get_elem({i, j}) ==
-                        lambda(data[i * 4 + j], data[i * 4 + j]));
-
-            permutation_type p{{1, 0}};
-            result.hadamard_assignment(p, i, matrix, matrix);
-            for(auto [i, j] : matrix_indices)
-                REQUIRE(result.get_elem({i, j}) ==
-                        lambda(data[j * 4 + i], data[i * 4 + j]));
-
-            result.hadamard_assignment(p, p, matrix, matrix);
-            for(auto [i, j] : matrix_indices)
-                REQUIRE(result.get_elem({i, j}) ==
-                        lambda(data[j * 4 + i], data[j * 4 + i]));
-        }
-
-        SECTION("rank 3 tensor") {
-            permutation_type i(3);
-            tensor3_type result(result_data_span, tensor3_shape);
-            result.hadamard_assignment(i, i, tensor3, tensor3);
-            for(auto [i, j, k] : tensor3_indices)
-                REQUIRE(
-                  result.get_elem({i, j, k}) ==
-                  lambda(data[i * 8 + j * 4 + k], data[i * 8 + j * 4 + k]));
-
-            permutation_type p({1, 0, 2});
-            result.hadamard_assignment(p, i, tensor3, tensor3);
-            for(auto [i, j, k] : tensor3_indices)
-                REQUIRE(
-                  result.get_elem({i, j, k}) ==
-                  lambda(data[j * 8 + i * 4 + k], data[i * 8 + j * 4 + k]));
-
-            result.hadamard_assignment(p, p, tensor3, tensor3);
-            for(auto [i, j, k] : tensor3_indices)
-                REQUIRE(
-                  result.get_elem({i, j, k}) ==
-                  lambda(data[j * 8 + i * 4 + k], data[j * 8 + i * 4 + k]));
-        }
-
-        SECTION("rank 4 tensor") {
-            permutation_type i(4);
-            tensor4_type result(result_data_span, tensor4_shape);
-            result.hadamard_assignment(i, i, tensor4, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        lambda(data[i * 8 + j * 4 + k * 2 + l],
-                               data[i * 8 + j * 4 + k * 2 + l]));
-
-            permutation_type p({1, 0, 2, 3});
-            result.hadamard_assignment(p, i, tensor4, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        lambda(data[j * 8 + i * 4 + k * 2 + l],
-                               data[i * 8 + j * 4 + k * 2 + l]));
-
-            permutation_type p1({0, 1, 3, 2});
-            result.hadamard_assignment(p1, i, tensor4, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        lambda(data[i * 8 + j * 4 + l * 2 + k],
-                               data[i * 8 + j * 4 + k * 2 + l]));
-
-            permutation_type p2({2, 3, 0, 1});
-            result.hadamard_assignment(p2, p, tensor4, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        lambda(data[k * 8 + l * 4 + i * 2 + j],
-                               data[j * 8 + i * 4 + k * 2 + l]));
-        }
-    }
-
-    SECTION("permute_assignment") {
-        std::vector<TestType> result_data(16, TestType{0});
-        std::span<TestType> result_data_span(result_data.data(),
-                                             result_data.size());
-
         SECTION("scalar") {
-            permutation_type identity(0);
-            scalar_type result(result_data_span, scalar_shape);
-            result.permute_assignment(identity, scalar);
-            REQUIRE(result.get_elem({}) == data[0]);
+            testing::scalar_addition_assignment<scalar_type>();
         }
 
         SECTION("vector") {
-            permutation_type identity(1);
-            vector_type result(result_data_span, vector_shape);
-            result.permute_assignment(identity, vector);
-            for(std::size_t i = 0; i < result.size(); ++i)
-                REQUIRE(result.get_elem({i}) == data[i]);
+            testing::vector_addition_assignment<vector_type>();
         }
 
         SECTION("matrix") {
-            permutation_type identity(2);
-            matrix_type result(result_data_span, matrix_shape);
-            result.permute_assignment(identity, matrix);
-            for(auto [i, j] : matrix_indices)
-                REQUIRE(result.get_elem({i, j}) == data[i * 4 + j]);
-
-            permutation_type p{{1, 0}};
-            result.permute_assignment(p, matrix);
-            for(auto [i, j] : matrix_indices)
-                REQUIRE(result.get_elem({i, j}) == data[j * 4 + i]);
+            testing::matrix_addition_assignment<matrix_type>();
         }
 
         SECTION("rank 3 tensor") {
-            permutation_type identity(3);
-            tensor3_type result(result_data_span, tensor3_shape);
-            result.permute_assignment(identity, tensor3);
-            for(auto [i, j, k] : tensor3_indices)
-                REQUIRE(result.get_elem({i, j, k}) == data[i * 8 + j * 4 + k]);
-
-            permutation_type p({1, 0, 2});
-            result.permute_assignment(p, tensor3);
-            for(auto [i, j, k] : tensor3_indices)
-                REQUIRE(result.get_elem({i, j, k}) == data[j * 8 + i * 4 + k]);
+            testing::tensor3_addition_assignment<tensor3_type>();
         }
 
         SECTION("rank 4 tensor") {
-            permutation_type identity(4);
-            tensor4_type result(result_data_span, tensor4_shape);
-            result.permute_assignment(identity, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        data[i * 8 + j * 4 + k * 2 + l]);
-
-            permutation_type p({1, 0, 2, 3});
-            result.permute_assignment(p, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        data[j * 8 + i * 4 + k * 2 + l]);
-
-            permutation_type p1({0, 1, 3, 2});
-            result.permute_assignment(p1, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        data[i * 8 + j * 4 + l * 2 + k]);
-
-            permutation_type p2({2, 3, 0, 1});
-            result.permute_assignment(p2, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        data[k * 8 + l * 4 + i * 2 + j]);
-
-            permutation_type p3({3, 2, 1, 0});
-            result.permute_assignment(p3, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        data[l * 8 + k * 4 + j * 2 + i]);
+            testing::tensor4_addition_assignment<tensor4_type>();
         }
     }
 
-    SECTION("scalar_multiplication") {
-        std::vector<TestType> result_data(16, TestType{0});
-        std::span<TestType> result_data_span(result_data.data(),
-                                             result_data.size());
-
-        TestType scalar_value(3);
-
-        SECTION("scalar") {
-            permutation_type i(0);
-            scalar_type result(result_data_span, scalar_shape);
-            result.scalar_multiplication(i, scalar_value, scalar);
-            REQUIRE(result.get_elem({}) == data[0] * scalar_value);
-        }
-
-        SECTION("vector") {
-            permutation_type identity(1);
-            vector_type result(result_data_span, vector_shape);
-            result.scalar_multiplication(identity, scalar_value, vector);
-            for(std::size_t i = 0; i < result.size(); ++i)
-                REQUIRE(result.get_elem({i}) == data[i] * scalar_value);
-        }
-
-        SECTION("matrix") {
-            permutation_type identity(2);
-            matrix_type result(result_data_span, matrix_shape);
-            result.scalar_multiplication(identity, scalar_value, matrix);
-            for(auto [i, j] : matrix_indices)
-                REQUIRE(result.get_elem({i, j}) ==
-                        data[i * 4 + j] * scalar_value);
-
-            permutation_type p{{1, 0}};
-            result.scalar_multiplication(p, scalar_value, matrix);
-            for(auto [i, j] : matrix_indices)
-                REQUIRE(result.get_elem({i, j}) ==
-                        data[j * 4 + i] * scalar_value);
-        }
-
-        SECTION("rank 3 tensor") {
-            permutation_type identity(3);
-            tensor3_type result(result_data_span, tensor3_shape);
-            result.scalar_multiplication(identity, scalar_value, tensor3);
-            for(auto [i, j, k] : tensor3_indices)
-                REQUIRE(result.get_elem({i, j, k}) ==
-                        data[i * 8 + j * 4 + k] * scalar_value);
-
-            permutation_type p({1, 0, 2});
-            result.scalar_multiplication(p, scalar_value, tensor3);
-            for(auto [i, j, k] : tensor3_indices)
-                REQUIRE(result.get_elem({i, j, k}) ==
-                        data[j * 8 + i * 4 + k] * scalar_value);
-        }
-
-        SECTION("rank 4 tensor") {
-            permutation_type identity(4);
-            tensor4_type result(result_data_span, tensor4_shape);
-            result.scalar_multiplication(identity, scalar_value, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        data[i * 8 + j * 4 + k * 2 + l] * scalar_value);
-
-            permutation_type p({1, 0, 2, 3});
-            result.scalar_multiplication(p, scalar_value, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        data[j * 8 + i * 4 + k * 2 + l] * scalar_value);
-
-            permutation_type p1({0, 1, 3, 2});
-            result.scalar_multiplication(p1, scalar_value, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        data[i * 8 + j * 4 + l * 2 + k] * scalar_value);
-
-            permutation_type p2({2, 3, 0, 1});
-            result.scalar_multiplication(p2, scalar_value, tensor4);
-            for(auto [i, j, k, l] : tensor4_indices)
-                REQUIRE(result.get_elem({i, j, k, l}) ==
-                        data[k * 8 + l * 4 + i * 2 + j] * scalar_value);
-        }
-    }
+    // SECTION("subtraction_assignment") {
+    //     std::vector<TestType> result_data(16, TestType{0});
+    //     std::span<TestType> result_data_span(result_data.data(),
+    //                                          result_data.size());
+
+    //     auto lambda = [](TestType a, TestType b) { return a - b; };
+
+    //     SECTION("scalar") {
+    //         permutation_type i(0);
+    //         scalar_type result(result_data_span, scalar_shape);
+    //         result.subtraction_assignment(i, i, scalar, scalar);
+    //         REQUIRE(result.get_elem({}) == lambda(data[0], data[0]));
+    //     }
+
+    //     SECTION("vector") {
+    //         permutation_type i(1);
+    //         vector_type result(result_data_span, vector_shape);
+    //         result.subtraction_assignment(i, i, vector, vector);
+    //         for(std::size_t i = 0; i < result.size(); ++i)
+    //             REQUIRE(result.get_elem({i}) == lambda(data[i], data[i]));
+    //     }
+
+    //     SECTION("matrix") {
+    //         permutation_type i(2);
+    //         matrix_type result(result_data_span, matrix_shape);
+    //         result.subtraction_assignment(i, i, matrix, matrix);
+    //         for(auto [i, j] : matrix_indices)
+    //             REQUIRE(result.get_elem({i, j}) ==
+    //                     lambda(data[i * 4 + j], data[i * 4 + j]));
+
+    //         permutation_type p{{1, 0}};
+    //         result.subtraction_assignment(p, i, matrix, matrix);
+    //         for(auto [i, j] : matrix_indices)
+    //             REQUIRE(result.get_elem({i, j}) ==
+    //                     lambda(data[j * 4 + i], data[i * 4 + j]));
+
+    //         result.subtraction_assignment(p, p, matrix, matrix);
+    //         for(auto [i, j] : matrix_indices)
+    //             REQUIRE(result.get_elem({i, j}) ==
+    //                     lambda(data[j * 4 + i], data[j * 4 + i]));
+    //     }
+
+    //     SECTION("rank 3 tensor") {
+    //         permutation_type i(3);
+    //         tensor3_type result(result_data_span, tensor3_shape);
+    //         result.subtraction_assignment(i, i, tensor3, tensor3);
+    //         for(auto [i, j, k] : tensor3_indices)
+    //             REQUIRE(
+    //               result.get_elem({i, j, k}) ==
+    //               lambda(data[i * 8 + j * 4 + k], data[i * 8 + j * 4 + k]));
+
+    //         permutation_type p({1, 0, 2});
+    //         result.subtraction_assignment(p, i, tensor3, tensor3);
+    //         for(auto [i, j, k] : tensor3_indices)
+    //             REQUIRE(
+    //               result.get_elem({i, j, k}) ==
+    //               lambda(data[j * 8 + i * 4 + k], data[i * 8 + j * 4 + k]));
+
+    //         result.subtraction_assignment(p, p, tensor3, tensor3);
+    //         for(auto [i, j, k] : tensor3_indices)
+    //             REQUIRE(
+    //               result.get_elem({i, j, k}) ==
+    //               lambda(data[j * 8 + i * 4 + k], data[j * 8 + i * 4 + k]));
+    //     }
+
+    //     SECTION("rank 4 tensor") {
+    //         permutation_type i(4);
+    //         tensor4_type result(result_data_span, tensor4_shape);
+    //         result.subtraction_assignment(i, i, tensor4, tensor4);
+    //         for(auto [i, j, k, l] : tensor4_indices)
+    //             REQUIRE(result.get_elem({i, j, k, l}) ==
+    //                     lambda(data[i * 8 + j * 4 + k * 2 + l],
+    //                            data[i * 8 + j * 4 + k * 2 + l]));
+
+    //         permutation_type p({1, 0, 2, 3});
+    //         result.subtraction_assignment(p, i, tensor4, tensor4);
+    //         for(auto [i, j, k, l] : tensor4_indices)
+    //             REQUIRE(result.get_elem({i, j, k, l}) ==
+    //                     lambda(data[j * 8 + i * 4 + k * 2 + l],
+    //                            data[i * 8 + j * 4 + k * 2 + l]));
+
+    //         permutation_type p1({0, 1, 3, 2});
+    //         result.subtraction_assignment(p1, i, tensor4, tensor4);
+    //         for(auto [i, j, k, l] : tensor4_indices)
+    //             REQUIRE(result.get_elem({i, j, k, l}) ==
+    //                     lambda(data[i * 8 + j * 4 + l * 2 + k],
+    //                            data[i * 8 + j * 4 + k * 2 + l]));
+
+    //         permutation_type p2({2, 3, 0, 1});
+    //         result.subtraction_assignment(p2, p, tensor4, tensor4);
+    //         for(auto [i, j, k, l] : tensor4_indices)
+    //             REQUIRE(result.get_elem({i, j, k, l}) ==
+    //                     lambda(data[k * 8 + l * 4 + i * 2 + j],
+    //                            data[j * 8 + i * 4 + k * 2 + l]));
+    //     }
+    // }
+
+    // SECTION("hadamard_assignment") {
+    //     std::vector<TestType> result_data(16, TestType{0});
+    //     std::span<TestType> result_data_span(result_data.data(),
+    //                                          result_data.size());
+
+    //     auto lambda = [](TestType a, TestType b) { return a * b; };
+
+    //     SECTION("scalar") {
+    //         permutation_type i(0);
+    //         scalar_type result(result_data_span, scalar_shape);
+    //         result.hadamard_assignment(i, i, scalar, scalar);
+    //         REQUIRE(result.get_elem({}) == lambda(data[0], data[0]));
+    //     }
+
+    //     SECTION("vector") {
+    //         permutation_type i(1);
+    //         vector_type result(result_data_span, vector_shape);
+    //         result.hadamard_assignment(i, i, vector, vector);
+    //         for(std::size_t i = 0; i < result.size(); ++i)
+    //             REQUIRE(result.get_elem({i}) == lambda(data[i], data[i]));
+    //     }
+
+    //     SECTION("matrix") {
+    //         permutation_type i(2);
+    //         matrix_type result(result_data_span, matrix_shape);
+    //         result.hadamard_assignment(i, i, matrix, matrix);
+    //         for(auto [i, j] : matrix_indices)
+    //             REQUIRE(result.get_elem({i, j}) ==
+    //                     lambda(data[i * 4 + j], data[i * 4 + j]));
+
+    //         permutation_type p{{1, 0}};
+    //         result.hadamard_assignment(p, i, matrix, matrix);
+    //         for(auto [i, j] : matrix_indices)
+    //             REQUIRE(result.get_elem({i, j}) ==
+    //                     lambda(data[j * 4 + i], data[i * 4 + j]));
+
+    //         result.hadamard_assignment(p, p, matrix, matrix);
+    //         for(auto [i, j] : matrix_indices)
+    //             REQUIRE(result.get_elem({i, j}) ==
+    //                     lambda(data[j * 4 + i], data[j * 4 + i]));
+    //     }
+
+    //     SECTION("rank 3 tensor") {
+    //         permutation_type i(3);
+    //         tensor3_type result(result_data_span, tensor3_shape);
+    //         result.hadamard_assignment(i, i, tensor3, tensor3);
+    //         for(auto [i, j, k] : tensor3_indices)
+    //             REQUIRE(
+    //               result.get_elem({i, j, k}) ==
+    //               lambda(data[i * 8 + j * 4 + k], data[i * 8 + j * 4 + k]));
+
+    //         permutation_type p({1, 0, 2});
+    //         result.hadamard_assignment(p, i, tensor3, tensor3);
+    //         for(auto [i, j, k] : tensor3_indices)
+    //             REQUIRE(
+    //               result.get_elem({i, j, k}) ==
+    //               lambda(data[j * 8 + i * 4 + k], data[i * 8 + j * 4 + k]));
+
+    //         result.hadamard_assignment(p, p, tensor3, tensor3);
+    //         for(auto [i, j, k] : tensor3_indices)
+    //             REQUIRE(
+    //               result.get_elem({i, j, k}) ==
+    //               lambda(data[j * 8 + i * 4 + k], data[j * 8 + i * 4 + k]));
+    //     }
+
+    //     SECTION("rank 4 tensor") {
+    //         permutation_type i(4);
+    //         tensor4_type result(result_data_span, tensor4_shape);
+    //         result.hadamard_assignment(i, i, tensor4, tensor4);
+    //         for(auto [i, j, k, l] : tensor4_indices)
+    //             REQUIRE(result.get_elem({i, j, k, l}) ==
+    //                     lambda(data[i * 8 + j * 4 + k * 2 + l],
+    //                            data[i * 8 + j * 4 + k * 2 + l]));
+
+    //         permutation_type p({1, 0, 2, 3});
+    //         result.hadamard_assignment(p, i, tensor4, tensor4);
+    //         for(auto [i, j, k, l] : tensor4_indices)
+    //             REQUIRE(result.get_elem({i, j, k, l}) ==
+    //                     lambda(data[j * 8 + i * 4 + k * 2 + l],
+    //                            data[i * 8 + j * 4 + k * 2 + l]));
+
+    //         permutation_type p1({0, 1, 3, 2});
+    //         result.hadamard_assignment(p1, i, tensor4, tensor4);
+    //         for(auto [i, j, k, l] : tensor4_indices)
+    //             REQUIRE(result.get_elem({i, j, k, l}) ==
+    //                     lambda(data[i * 8 + j * 4 + l * 2 + k],
+    //                            data[i * 8 + j * 4 + k * 2 + l]));
+
+    //         permutation_type p2({2, 3, 0, 1});
+    //         result.hadamard_assignment(p2, p, tensor4, tensor4);
+    //         for(auto [i, j, k, l] : tensor4_indices)
+    //             REQUIRE(result.get_elem({i, j, k, l}) ==
+    //                     lambda(data[k * 8 + l * 4 + i * 2 + j],
+    //                            data[j * 8 + i * 4 + k * 2 + l]));
+    //     }
+    // }
+
+    // SECTION("permute_assignment") {
+    //     std::vector<TestType> result_data(16, TestType{0});
+    //     std::span<TestType> result_data_span(result_data.data(),
+    //                                          result_data.size());
+
+    //     SECTION("scalar") {
+    //         permutation_type identity(0);
+    //         scalar_type result(result_data_span, scalar_shape);
+    //         result.permute_assignment(identity, scalar);
+    //         REQUIRE(result.get_elem({}) == data[0]);
+    //     }
+
+    //     SECTION("vector") {
+    //         permutation_type identity(1);
+    //         vector_type result(result_data_span, vector_shape);
+    //         result.permute_assignment(identity, vector);
+    //         for(std::size_t i = 0; i < result.size(); ++i)
+    //             REQUIRE(result.get_elem({i}) == data[i]);
+    //     }
+
+    //     SECTION("matrix") {
+    //         permutation_type identity(2);
+    //         matrix_type result(result_data_span, matrix_shape);
+    //         result.permute_assignment(identity, matrix);
+    //         for(auto [i, j] : matrix_indices)
+    //             REQUIRE(result.get_elem({i, j}) == data[i * 4 + j]);
+
+    //         permutation_type p{{1, 0}};
+    //         result.permute_assignment(p, matrix);
+    //         for(auto [i, j] : matrix_indices)
+    //             REQUIRE(result.get_elem({i, j}) == data[j * 4 + i]);
+    //     }
+
+    //     SECTION("rank 3 tensor") {
+    //         permutation_type identity(3);
+    //         tensor3_type result(result_data_span, tensor3_shape);
+    //         result.permute_assignment(identity, tensor3);
+    //         for(auto [i, j, k] : tensor3_indices)
+    //             REQUIRE(result.get_elem({i, j, k}) == data[i * 8 + j * 4 +
+    //             k]);
+
+    //         permutation_type p({1, 0, 2});
+    //         result.permute_assignment(p, tensor3);
+    //         for(auto [i, j, k] : tensor3_indices)
+    //             REQUIRE(result.get_elem({i, j, k}) == data[j * 8 + i * 4 +
+    //             k]);
+    //     }
+
+    //     SECTION("rank 4 tensor") {
+    //         permutation_type identity(4);
+    //         tensor4_type result(result_data_span, tensor4_shape);
+    //         result.permute_assignment(identity, tensor4);
+    //         for(auto [i, j, k, l] : tensor4_indices)
+    //             REQUIRE(result.get_elem({i, j, k, l}) ==
+    //                     data[i * 8 + j * 4 + k * 2 + l]);
+
+    //         permutation_type p({1, 0, 2, 3});
+    //         result.permute_assignment(p, tensor4);
+    //         for(auto [i, j, k, l] : tensor4_indices)
+    //             REQUIRE(result.get_elem({i, j, k, l}) ==
+    //                     data[j * 8 + i * 4 + k * 2 + l]);
+
+    //         permutation_type p1({0, 1, 3, 2});
+    //         result.permute_assignment(p1, tensor4);
+    //         for(auto [i, j, k, l] : tensor4_indices)
+    //             REQUIRE(result.get_elem({i, j, k, l}) ==
+    //                     data[i * 8 + j * 4 + l * 2 + k]);
+
+    //         permutation_type p2({2, 3, 0, 1});
+    //         result.permute_assignment(p2, tensor4);
+    //         for(auto [i, j, k, l] : tensor4_indices)
+    //             REQUIRE(result.get_elem({i, j, k, l}) ==
+    //                     data[k * 8 + l * 4 + i * 2 + j]);
+
+    //         permutation_type p3({3, 2, 1, 0});
+    //         result.permute_assignment(p3, tensor4);
+    //         for(auto [i, j, k, l] : tensor4_indices)
+    //             REQUIRE(result.get_elem({i, j, k, l}) ==
+    //                     data[l * 8 + k * 4 + j * 2 + i]);
+    //     }
+    // }
+
+    // SECTION("scalar_multiplication") {
+    //     std::vector<TestType> result_data(16, TestType{0});
+    //     std::span<TestType> result_data_span(result_data.data(),
+    //                                          result_data.size());
+
+    //     TestType scalar_value(3);
+
+    //     SECTION("scalar") {
+    //         permutation_type i(0);
+    //         scalar_type result(result_data_span, scalar_shape);
+    //         result.scalar_multiplication(i, scalar_value, scalar);
+    //         REQUIRE(result.get_elem({}) == data[0] * scalar_value);
+    //     }
+
+    //     SECTION("vector") {
+    //         permutation_type identity(1);
+    //         vector_type result(result_data_span, vector_shape);
+    //         result.scalar_multiplication(identity, scalar_value, vector);
+    //         for(std::size_t i = 0; i < result.size(); ++i)
+    //             REQUIRE(result.get_elem({i}) == data[i] * scalar_value);
+    //     }
+
+    //     SECTION("matrix") {
+    //         permutation_type identity(2);
+    //         matrix_type result(result_data_span, matrix_shape);
+    //         result.scalar_multiplication(identity, scalar_value, matrix);
+    //         for(auto [i, j] : matrix_indices)
+    //             REQUIRE(result.get_elem({i, j}) ==
+    //                     data[i * 4 + j] * scalar_value);
+
+    //         permutation_type p{{1, 0}};
+    //         result.scalar_multiplication(p, scalar_value, matrix);
+    //         for(auto [i, j] : matrix_indices)
+    //             REQUIRE(result.get_elem({i, j}) ==
+    //                     data[j * 4 + i] * scalar_value);
+    //     }
+
+    //     SECTION("rank 3 tensor") {
+    //         permutation_type identity(3);
+    //         tensor3_type result(result_data_span, tensor3_shape);
+    //         result.scalar_multiplication(identity, scalar_value, tensor3);
+    //         for(auto [i, j, k] : tensor3_indices)
+    //             REQUIRE(result.get_elem({i, j, k}) ==
+    //                     data[i * 8 + j * 4 + k] * scalar_value);
+
+    //         permutation_type p({1, 0, 2});
+    //         result.scalar_multiplication(p, scalar_value, tensor3);
+    //         for(auto [i, j, k] : tensor3_indices)
+    //             REQUIRE(result.get_elem({i, j, k}) ==
+    //                     data[j * 8 + i * 4 + k] * scalar_value);
+    //     }
+
+    //     SECTION("rank 4 tensor") {
+    //         permutation_type identity(4);
+    //         tensor4_type result(result_data_span, tensor4_shape);
+    //         result.scalar_multiplication(identity, scalar_value, tensor4);
+    //         for(auto [i, j, k, l] : tensor4_indices)
+    //             REQUIRE(result.get_elem({i, j, k, l}) ==
+    //                     data[i * 8 + j * 4 + k * 2 + l] * scalar_value);
+
+    //         permutation_type p({1, 0, 2, 3});
+    //         result.scalar_multiplication(p, scalar_value, tensor4);
+    //         for(auto [i, j, k, l] : tensor4_indices)
+    //             REQUIRE(result.get_elem({i, j, k, l}) ==
+    //                     data[j * 8 + i * 4 + k * 2 + l] * scalar_value);
+
+    //         permutation_type p1({0, 1, 3, 2});
+    //         result.scalar_multiplication(p1, scalar_value, tensor4);
+    //         for(auto [i, j, k, l] : tensor4_indices)
+    //             REQUIRE(result.get_elem({i, j, k, l}) ==
+    //                     data[i * 8 + j * 4 + l * 2 + k] * scalar_value);
+
+    //         permutation_type p2({2, 3, 0, 1});
+    //         result.scalar_multiplication(p2, scalar_value, tensor4);
+    //         for(auto [i, j, k, l] : tensor4_indices)
+    //             REQUIRE(result.get_elem({i, j, k, l}) ==
+    //                     data[k * 8 + l * 4 + i * 2 + j] * scalar_value);
+    //     }
+    // }
 }
diff --git a/tests/cxx/unit_tests/tensorwrapper/backends/testing/addition_assignment.hpp b/tests/cxx/unit_tests/tensorwrapper/backends/testing/addition_assignment.hpp
new file mode 100644
index 00000000..9e17d587
--- /dev/null
+++ b/tests/cxx/unit_tests/tensorwrapper/backends/testing/addition_assignment.hpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include "elementwise_op.hpp"
+
+namespace tensorwrapper::testing {
+
+template<typename TestType>
+void scalar_addition_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& lhs_idx, auto&& rhs_idx,
+                     auto&& result, auto&& t0, auto&& t1) {
+        result.addition_assignment(out_idx, lhs_idx, rhs_idx, t0, t1);
+    };
+    auto corr_op = [](auto a, auto b) { return a + b; };
+    scalar_binary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void vector_addition_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& lhs_idx, auto&& rhs_idx,
+                     auto&& result, auto&& t0, auto&& t1) {
+        result.addition_assignment(out_idx, lhs_idx, rhs_idx, t0, t1);
+    };
+    auto corr_op = [](auto a, auto b) { return a + b; };
+    vector_binary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void matrix_addition_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& lhs_idx, auto&& rhs_idx,
+                     auto&& result, auto&& t0, auto&& t1) {
+        result.addition_assignment(out_idx, lhs_idx, rhs_idx, t0, t1);
+    };
+    auto corr_op = [](auto a, auto b) { return a + b; };
+    matrix_binary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void tensor3_addition_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& lhs_idx, auto&& rhs_idx,
+                     auto&& result, auto&& t0, auto&& t1) {
+        result.addition_assignment(out_idx, lhs_idx, rhs_idx, t0, t1);
+    };
+    auto corr_op = [](auto a, auto b) { return a + b; };
+    tensor3_binary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void tensor4_addition_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& lhs_idx, auto&& rhs_idx,
+                     auto&& result, auto&& t0, auto&& t1) {
+        result.addition_assignment(out_idx, lhs_idx, rhs_idx, t0, t1);
+    };
+    auto corr_op = [](auto a, auto b) { return a + b; };
+    tensor4_binary_assignment<TestType>(the_op, corr_op);
+}
+
+} // namespace tensorwrapper::testing
diff --git a/tests/cxx/unit_tests/tensorwrapper/backends/testing/elementwise_op.hpp b/tests/cxx/unit_tests/tensorwrapper/backends/testing/elementwise_op.hpp
new file mode 100644
index 00000000..b19ee125
--- /dev/null
+++ b/tests/cxx/unit_tests/tensorwrapper/backends/testing/elementwise_op.hpp
@@ -0,0 +1,315 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <span>
+#include <vector>
+
+namespace tensorwrapper::testing {
+
+template<typename TestType, typename Fxn1, typename Fxn2>
+void scalar_binary_assignment(Fxn1&& the_op, Fxn2&& corr_op) {
+    using value_type = typename TestType::value_type;
+    using shape_type = typename TestType::shape_type;
+    using label_type = typename TestType::label_type;
+
+    std::vector<value_type> result_data(1, value_type{0});
+    std::span<value_type> result_span(result_data.data(), result_data.size());
+
+    std::vector<value_type> s0_data(1, value_type{3});
+    std::span<value_type> s0_span(s0_data.data(), s0_data.size());
+
+    std::vector<value_type> s1_data(1, value_type{5});
+    std::span<value_type> s1_span(s1_data.data(), s1_data.size());
+
+    TestType result(result_span, shape_type({}));
+    TestType s0(s0_span, shape_type({}));
+    TestType s1(s1_span, shape_type({}));
+
+    label_type out("");
+    label_type lhs("");
+    label_type rhs("");
+    the_op(out, lhs, rhs, result, s0, s1);
+    REQUIRE(result.get_elem({}) == corr_op(s0_data[0], s1_data[0]));
+}
+
+template<typename TestType, typename Fxn1, typename Fxn2>
+void vector_binary_assignment(Fxn1&& the_op, Fxn2&& corr_op) {
+    using value_type = typename TestType::value_type;
+    using shape_type = typename TestType::shape_type;
+    using label_type = typename TestType::label_type;
+
+    std::vector<value_type> result_data(4, value_type{0});
+    std::span<value_type> result_span(result_data.data(), result_data.size());
+
+    std::vector<value_type> s0_data{value_type{1}, value_type{2}, value_type{3},
+                                    value_type{4}};
+    std::span<value_type> s0_span(s0_data.data(), s0_data.size());
+
+    std::vector<value_type> s1_data{value_type{5}, value_type{6}, value_type{7},
+                                    value_type{8}};
+    std::span<value_type> s1_span(s1_data.data(), s1_data.size());
+
+    TestType result(result_span, shape_type({4}));
+    TestType s0(s0_span, shape_type({4}));
+    TestType s1(s1_span, shape_type({4}));
+
+    label_type out("i");
+    label_type lhs("i");
+    label_type rhs("i");
+
+    the_op(out, lhs, rhs, result, s0, s1);
+    for(std::size_t i = 0; i < 4; ++i) {
+        REQUIRE(result.get_elem({i}) == corr_op(s0_data[i], s1_data[i]));
+    }
+}
+
+template<typename TestType, typename Fxn1, typename Fxn2>
+void matrix_binary_assignment(Fxn1&& the_op, Fxn2&& corr_op) {
+    using value_type = typename TestType::value_type;
+    using shape_type = typename TestType::shape_type;
+    using label_type = typename TestType::label_type;
+
+    std::vector<value_type> result_data(16, value_type{0});
+    std::span<value_type> result_span(result_data.data(), result_data.size());
+
+    std::vector<value_type> s0_data(16);
+    std::vector<value_type> s1_data(16);
+    for(std::size_t i = 0; i < s0_data.size(); ++i) {
+        s0_data[i] = static_cast<value_type>(i);
+        s1_data[i] = static_cast<value_type>(i * 2);
+    }
+
+    std::span<value_type> s0_span(s0_data.data(), s0_data.size());
+    std::span<value_type> s1_span(s1_data.data(), s1_data.size());
+
+    TestType result(result_span, shape_type({4, 4}));
+    TestType s0(s0_span, shape_type({4, 4}));
+    TestType s1(s1_span, shape_type({4, 4}));
+
+    label_type ij("i,j");
+    label_type ji("j,i");
+
+    SECTION("No permutation") {
+        the_op(ij, ij, ij, result, s0, s1);
+        for(std::size_t i = 0; i < 4; ++i) {
+            for(std::size_t j = 0; j < 4; ++j) {
+                std::size_t idx = i * 4 + j;
+                auto corr       = corr_op(s0_data[idx], s1_data[idx]);
+                REQUIRE(result.get_elem({i, j}) == corr);
+            }
+        }
+    }
+
+    SECTION("Permute lhs") {
+        the_op(ij, ji, ij, result, s0, s1);
+        for(std::size_t i = 0; i < 4; ++i) {
+            for(std::size_t j = 0; j < 4; ++j) {
+                std::size_t lhs_idx = j * 4 + i;
+                std::size_t rhs_idx = i * 4 + j;
+                auto corr = corr_op(s0_data[lhs_idx], s1_data[rhs_idx]);
+                REQUIRE(result.get_elem({i, j}) == corr);
+            }
+        }
+    }
+
+    SECTION("Permute rhs") {
+        the_op(ij, ij, ji, result, s0, s1);
+        for(std::size_t i = 0; i < 4; ++i) {
+            for(std::size_t j = 0; j < 4; ++j) {
+                std::size_t lhs_idx = i * 4 + j;
+                std::size_t rhs_idx = j * 4 + i;
+                auto corr = corr_op(s0_data[lhs_idx], s1_data[rhs_idx]);
+                REQUIRE(result.get_elem({i, j}) == corr);
+            }
+        }
+    }
+
+    SECTION("Permute result") {
+        the_op(ji, ij, ij, result, s0, s1);
+        for(std::size_t i = 0; i < 4; ++i) {
+            for(std::size_t j = 0; j < 4; ++j) {
+                std::size_t lhs_idx = i * 4 + j;
+                std::size_t rhs_idx = i * 4 + j;
+                auto corr = corr_op(s0_data[lhs_idx], s1_data[rhs_idx]);
+                REQUIRE(result.get_elem({j, i}) == corr);
+            }
+        }
+    }
+}
+
+template<typename TestType, typename Fxn1, typename Fxn2>
+void tensor3_binary_assignment(Fxn1&& the_op, Fxn2&& corr_op) {
+    using value_type = typename TestType::value_type;
+    using shape_type = typename TestType::shape_type;
+    using label_type = typename TestType::label_type;
+
+    const auto n_elements = 8;
+    std::vector<value_type> result_data(n_elements, value_type{0});
+    std::span<value_type> result_span(result_data.data(), result_data.size());
+
+    std::vector<value_type> t0_data(n_elements);
+    std::vector<value_type> t1_data(n_elements);
+    for(std::size_t i = 0; i < n_elements; ++i) {
+        t0_data[i] = static_cast<value_type>(i);
+        t1_data[i] = static_cast<value_type>(i * 2);
+    }
+
+    std::span<value_type> t0_span(t0_data.data(), t0_data.size());
+    std::span<value_type> t1_span(t1_data.data(), t1_data.size());
+
+    using rank3_index = std::array<std::size_t, 3>;
+    std::vector<rank3_index> tensor3_indices;
+    for(std::size_t i = 0; i < 2; ++i) {
+        for(std::size_t j = 0; j < 2; ++j) {
+            for(std::size_t k = 0; k < 2; ++k)
+                tensor3_indices.push_back(rank3_index{i, j, k});
+        }
+    }
+
+    TestType result(result_span, shape_type({2, 2, 2}));
+    TestType t0(t0_span, shape_type({2, 2, 2}));
+    TestType t1(t1_span, shape_type({2, 2, 2}));
+
+    label_type ijk("i,j,k");
+    label_type jik("j,i,k");
+
+    SECTION("No permutation") {
+        the_op(ijk, ijk, ijk, result, t0, t1);
+        for(auto [i, j, k] : tensor3_indices) {
+            std::size_t lhs_idx = i * 4 + j * 2 + k;
+            std::size_t rhs_idx = i * 4 + j * 2 + k;
+            auto corr           = corr_op(t0_data[lhs_idx], t1_data[rhs_idx]);
+            REQUIRE(result.get_elem({i, j, k}) == corr);
+        }
+    }
+
+    SECTION("Permute lhs") {
+        the_op(ijk, jik, ijk, result, t0, t1);
+        for(auto [i, j, k] : tensor3_indices) {
+            std::size_t lhs_idx = j * 4 + i * 2 + k;
+            std::size_t rhs_idx = i * 4 + j * 2 + k;
+            auto corr           = corr_op(t0_data[lhs_idx], t1_data[rhs_idx]);
+            REQUIRE(result.get_elem({i, j, k}) == corr);
+        }
+    }
+
+    SECTION("Permute rhs") {
+        the_op(ijk, ijk, jik, result, t0, t1);
+        for(auto [i, j, k] : tensor3_indices) {
+            std::size_t lhs_idx = i * 4 + j * 2 + k;
+            std::size_t rhs_idx = j * 4 + i * 2 + k;
+            auto corr           = corr_op(t0_data[lhs_idx], t1_data[rhs_idx]);
+            REQUIRE(result.get_elem({i, j, k}) == corr);
+        }
+    }
+
+    SECTION("Permute result") {
+        the_op(jik, ijk, ijk, result, t0, t1);
+        for(auto [i, j, k] : tensor3_indices) {
+            std::size_t lhs_idx = i * 4 + j * 2 + k;
+            std::size_t rhs_idx = i * 4 + j * 2 + k;
+            auto corr           = corr_op(t0_data[lhs_idx], t1_data[rhs_idx]);
+            REQUIRE(result.get_elem({j, i, k}) == corr);
+        }
+    }
+}
+
+template<typename TestType, typename Fxn1, typename Fxn2>
+void tensor4_binary_assignment(Fxn1&& the_op, Fxn2&& corr_op) {
+    using value_type = typename TestType::value_type;
+    using shape_type = typename TestType::shape_type;
+    using label_type = typename TestType::label_type;
+
+    const auto n_elements = 16;
+    std::vector<value_type> result_data(n_elements, value_type{0});
+    std::span<value_type> result_span(result_data.data(), result_data.size());
+
+    std::vector<value_type> t0_data(n_elements);
+    std::vector<value_type> t1_data(n_elements);
+    for(std::size_t i = 0; i < n_elements; ++i) {
+        t0_data[i] = static_cast<value_type>(i);
+        t1_data[i] = static_cast<value_type>(i * 2);
+    }
+
+    std::span<value_type> t0_span(t0_data.data(), t0_data.size());
+    std::span<value_type> t1_span(t1_data.data(), t1_data.size());
+
+    using rank4_index = std::array<std::size_t, 4>;
+    std::vector<rank4_index> tensor4_indices;
+    for(std::size_t i = 0; i < 2; ++i) {
+        for(std::size_t j = 0; j < 2; ++j) {
+            for(std::size_t k = 0; k < 2; ++k) {
+                for(std::size_t l = 0; l < 2; ++l) {
+                    tensor4_indices.emplace_back(rank4_index{i, j, k, l});
+                }
+            }
+        }
+    }
+
+    TestType result(result_span, shape_type({2, 2, 2, 2}));
+    TestType t0(t0_span, shape_type({2, 2, 2, 2}));
+    TestType t1(t1_span, shape_type({2, 2, 2, 2}));
+
+    label_type ijkl("i,j,k,l");
+    label_type jilk("j,i,l,k");
+
+    const auto stride0 = 8;
+    const auto stride1 = 4;
+    const auto stride2 = 2;
+
+    SECTION("No permutation") {
+        the_op(ijkl, ijkl, ijkl, result, t0, t1);
+        for(auto [i, j, k, l] : tensor4_indices) {
+            std::size_t lhs_idx = i * stride0 + j * stride1 + k * stride2 + l;
+            std::size_t rhs_idx = i * stride0 + j * stride1 + k * stride2 + l;
+            auto corr           = corr_op(t0_data[lhs_idx], t1_data[rhs_idx]);
+            REQUIRE(result.get_elem({i, j, k, l}) == corr);
+        }
+    }
+
+    SECTION("Permute lhs") {
+        the_op(ijkl, jilk, ijkl, result, t0, t1);
+        for(auto [i, j, k, l] : tensor4_indices) {
+            std::size_t lhs_idx = j * stride0 + i * stride1 + l * stride2 + k;
+            std::size_t rhs_idx = i * stride0 + j * stride1 + k * stride2 + l;
+            auto corr           = corr_op(t0_data[lhs_idx], t1_data[rhs_idx]);
+            REQUIRE(result.get_elem({i, j, k, l}) == corr);
+        }
+    }
+
+    SECTION("Permute rhs") {
+        the_op(ijkl, ijkl, jilk, result, t0, t1);
+        for(auto [i, j, k, l] : tensor4_indices) {
+            std::size_t lhs_idx = i * stride0 + j * stride1 + k * stride2 + l;
+            std::size_t rhs_idx = j * stride0 + i * stride1 + l * stride2 + k;
+            auto corr           = corr_op(t0_data[lhs_idx], t1_data[rhs_idx]);
+            REQUIRE(result.get_elem({i, j, k, l}) == corr);
+        }
+    }
+
+    SECTION("Permute result") {
+        the_op(jilk, ijkl, ijkl, result, t0, t1);
+        for(auto [i, j, k, l] : tensor4_indices) {
+            std::size_t lhs_idx = i * stride0 + j * stride1 + k * stride2 + l;
+            std::size_t rhs_idx = i * stride0 + j * stride1 + k * stride2 + l;
+            auto corr           = corr_op(t0_data[lhs_idx], t1_data[rhs_idx]);
+            REQUIRE(result.get_elem({j, i, l, k}) == corr);
+        }
+    }
+}
+
+} // namespace tensorwrapper::testing

From 78123a062e410b43ee0a7542ea169fa188379c09 Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Fri, 14 Nov 2025 15:59:54 -0600
Subject: [PATCH 03/18] backup for the day

---
 .../backends/eigen/eigen_tensor_impl.cpp      | 265 ++++--------------
 .../backends/testing/hadamard_assignment.hpp  |  72 +++++
 .../backends/testing/permute_assignment.hpp   |  31 ++
 .../testing/subtraction_assignment.hpp        |  72 +++++
 .../backends/testing/unary_op.hpp             |  44 +++
 5 files changed, 276 insertions(+), 208 deletions(-)
 create mode 100644 tests/cxx/unit_tests/tensorwrapper/backends/testing/hadamard_assignment.hpp
 create mode 100644 tests/cxx/unit_tests/tensorwrapper/backends/testing/permute_assignment.hpp
 create mode 100644 tests/cxx/unit_tests/tensorwrapper/backends/testing/subtraction_assignment.hpp
 create mode 100644 tests/cxx/unit_tests/tensorwrapper/backends/testing/unary_op.hpp

diff --git a/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp b/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
index e46cae2c..7eaea8b6 100644
--- a/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
+++ b/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
@@ -16,6 +16,9 @@
 
 #include "../../testing/testing.hpp"
 #include "../testing/addition_assignment.hpp"
+#include "../testing/hadamard_assignment.hpp"
+#include "../testing/permute_assignment.hpp"
+#include "../testing/subtraction_assignment.hpp"
 #include <tensorwrapper/backends/eigen/eigen_tensor_impl.hpp>
 
 using namespace tensorwrapper;
@@ -174,224 +177,70 @@ TEMPLATE_LIST_TEST_CASE("EigenTensorImpl", "", types::floating_point_types) {
         }
     }
 
-    // SECTION("subtraction_assignment") {
-    //     std::vector<TestType> result_data(16, TestType{0});
-    //     std::span<TestType> result_data_span(result_data.data(),
-    //                                          result_data.size());
-
-    //     auto lambda = [](TestType a, TestType b) { return a - b; };
-
-    //     SECTION("scalar") {
-    //         permutation_type i(0);
-    //         scalar_type result(result_data_span, scalar_shape);
-    //         result.subtraction_assignment(i, i, scalar, scalar);
-    //         REQUIRE(result.get_elem({}) == lambda(data[0], data[0]));
-    //     }
-
-    //     SECTION("vector") {
-    //         permutation_type i(1);
-    //         vector_type result(result_data_span, vector_shape);
-    //         result.subtraction_assignment(i, i, vector, vector);
-    //         for(std::size_t i = 0; i < result.size(); ++i)
-    //             REQUIRE(result.get_elem({i}) == lambda(data[i], data[i]));
-    //     }
-
-    //     SECTION("matrix") {
-    //         permutation_type i(2);
-    //         matrix_type result(result_data_span, matrix_shape);
-    //         result.subtraction_assignment(i, i, matrix, matrix);
-    //         for(auto [i, j] : matrix_indices)
-    //             REQUIRE(result.get_elem({i, j}) ==
-    //                     lambda(data[i * 4 + j], data[i * 4 + j]));
-
-    //         permutation_type p{{1, 0}};
-    //         result.subtraction_assignment(p, i, matrix, matrix);
-    //         for(auto [i, j] : matrix_indices)
-    //             REQUIRE(result.get_elem({i, j}) ==
-    //                     lambda(data[j * 4 + i], data[i * 4 + j]));
-
-    //         result.subtraction_assignment(p, p, matrix, matrix);
-    //         for(auto [i, j] : matrix_indices)
-    //             REQUIRE(result.get_elem({i, j}) ==
-    //                     lambda(data[j * 4 + i], data[j * 4 + i]));
-    //     }
-
-    //     SECTION("rank 3 tensor") {
-    //         permutation_type i(3);
-    //         tensor3_type result(result_data_span, tensor3_shape);
-    //         result.subtraction_assignment(i, i, tensor3, tensor3);
-    //         for(auto [i, j, k] : tensor3_indices)
-    //             REQUIRE(
-    //               result.get_elem({i, j, k}) ==
-    //               lambda(data[i * 8 + j * 4 + k], data[i * 8 + j * 4 + k]));
-
-    //         permutation_type p({1, 0, 2});
-    //         result.subtraction_assignment(p, i, tensor3, tensor3);
-    //         for(auto [i, j, k] : tensor3_indices)
-    //             REQUIRE(
-    //               result.get_elem({i, j, k}) ==
-    //               lambda(data[j * 8 + i * 4 + k], data[i * 8 + j * 4 + k]));
-
-    //         result.subtraction_assignment(p, p, tensor3, tensor3);
-    //         for(auto [i, j, k] : tensor3_indices)
-    //             REQUIRE(
-    //               result.get_elem({i, j, k}) ==
-    //               lambda(data[j * 8 + i * 4 + k], data[j * 8 + i * 4 + k]));
-    //     }
-
-    //     SECTION("rank 4 tensor") {
-    //         permutation_type i(4);
-    //         tensor4_type result(result_data_span, tensor4_shape);
-    //         result.subtraction_assignment(i, i, tensor4, tensor4);
-    //         for(auto [i, j, k, l] : tensor4_indices)
-    //             REQUIRE(result.get_elem({i, j, k, l}) ==
-    //                     lambda(data[i * 8 + j * 4 + k * 2 + l],
-    //                            data[i * 8 + j * 4 + k * 2 + l]));
-
-    //         permutation_type p({1, 0, 2, 3});
-    //         result.subtraction_assignment(p, i, tensor4, tensor4);
-    //         for(auto [i, j, k, l] : tensor4_indices)
-    //             REQUIRE(result.get_elem({i, j, k, l}) ==
-    //                     lambda(data[j * 8 + i * 4 + k * 2 + l],
-    //                            data[i * 8 + j * 4 + k * 2 + l]));
-
-    //         permutation_type p1({0, 1, 3, 2});
-    //         result.subtraction_assignment(p1, i, tensor4, tensor4);
-    //         for(auto [i, j, k, l] : tensor4_indices)
-    //             REQUIRE(result.get_elem({i, j, k, l}) ==
-    //                     lambda(data[i * 8 + j * 4 + l * 2 + k],
-    //                            data[i * 8 + j * 4 + k * 2 + l]));
-
-    //         permutation_type p2({2, 3, 0, 1});
-    //         result.subtraction_assignment(p2, p, tensor4, tensor4);
-    //         for(auto [i, j, k, l] : tensor4_indices)
-    //             REQUIRE(result.get_elem({i, j, k, l}) ==
-    //                     lambda(data[k * 8 + l * 4 + i * 2 + j],
-    //                            data[j * 8 + i * 4 + k * 2 + l]));
-    //     }
-    // }
-
-    // SECTION("hadamard_assignment") {
-    //     std::vector<TestType> result_data(16, TestType{0});
-    //     std::span<TestType> result_data_span(result_data.data(),
-    //                                          result_data.size());
-
-    //     auto lambda = [](TestType a, TestType b) { return a * b; };
-
-    //     SECTION("scalar") {
-    //         permutation_type i(0);
-    //         scalar_type result(result_data_span, scalar_shape);
-    //         result.hadamard_assignment(i, i, scalar, scalar);
-    //         REQUIRE(result.get_elem({}) == lambda(data[0], data[0]));
-    //     }
-
-    //     SECTION("vector") {
-    //         permutation_type i(1);
-    //         vector_type result(result_data_span, vector_shape);
-    //         result.hadamard_assignment(i, i, vector, vector);
-    //         for(std::size_t i = 0; i < result.size(); ++i)
-    //             REQUIRE(result.get_elem({i}) == lambda(data[i], data[i]));
-    //     }
-
-    //     SECTION("matrix") {
-    //         permutation_type i(2);
-    //         matrix_type result(result_data_span, matrix_shape);
-    //         result.hadamard_assignment(i, i, matrix, matrix);
-    //         for(auto [i, j] : matrix_indices)
-    //             REQUIRE(result.get_elem({i, j}) ==
-    //                     lambda(data[i * 4 + j], data[i * 4 + j]));
-
-    //         permutation_type p{{1, 0}};
-    //         result.hadamard_assignment(p, i, matrix, matrix);
-    //         for(auto [i, j] : matrix_indices)
-    //             REQUIRE(result.get_elem({i, j}) ==
-    //                     lambda(data[j * 4 + i], data[i * 4 + j]));
-
-    //         result.hadamard_assignment(p, p, matrix, matrix);
-    //         for(auto [i, j] : matrix_indices)
-    //             REQUIRE(result.get_elem({i, j}) ==
-    //                     lambda(data[j * 4 + i], data[j * 4 + i]));
-    //     }
+    SECTION("subtraction_assignment") {
+        SECTION("scalar") {
+            testing::scalar_subtraction_assignment<scalar_type>();
+        }
 
-    //     SECTION("rank 3 tensor") {
-    //         permutation_type i(3);
-    //         tensor3_type result(result_data_span, tensor3_shape);
-    //         result.hadamard_assignment(i, i, tensor3, tensor3);
-    //         for(auto [i, j, k] : tensor3_indices)
-    //             REQUIRE(
-    //               result.get_elem({i, j, k}) ==
-    //               lambda(data[i * 8 + j * 4 + k], data[i * 8 + j * 4 + k]));
+        SECTION("vector") {
+            testing::vector_subtraction_assignment<vector_type>();
+        }
 
-    //         permutation_type p({1, 0, 2});
-    //         result.hadamard_assignment(p, i, tensor3, tensor3);
-    //         for(auto [i, j, k] : tensor3_indices)
-    //             REQUIRE(
-    //               result.get_elem({i, j, k}) ==
-    //               lambda(data[j * 8 + i * 4 + k], data[i * 8 + j * 4 + k]));
+        SECTION("matrix") {
+            testing::matrix_subtraction_assignment<matrix_type>();
+        }
 
-    //         result.hadamard_assignment(p, p, tensor3, tensor3);
-    //         for(auto [i, j, k] : tensor3_indices)
-    //             REQUIRE(
-    //               result.get_elem({i, j, k}) ==
-    //               lambda(data[j * 8 + i * 4 + k], data[j * 8 + i * 4 + k]));
-    //     }
+        SECTION("rank 3 tensor") {
+            testing::tensor3_subtraction_assignment<tensor3_type>();
+        }
 
-    //     SECTION("rank 4 tensor") {
-    //         permutation_type i(4);
-    //         tensor4_type result(result_data_span, tensor4_shape);
-    //         result.hadamard_assignment(i, i, tensor4, tensor4);
-    //         for(auto [i, j, k, l] : tensor4_indices)
-    //             REQUIRE(result.get_elem({i, j, k, l}) ==
-    //                     lambda(data[i * 8 + j * 4 + k * 2 + l],
-    //                            data[i * 8 + j * 4 + k * 2 + l]));
+        SECTION("rank 4 tensor") {
+            testing::tensor4_subtraction_assignment<tensor4_type>();
+        }
+    }
 
-    //         permutation_type p({1, 0, 2, 3});
-    //         result.hadamard_assignment(p, i, tensor4, tensor4);
-    //         for(auto [i, j, k, l] : tensor4_indices)
-    //             REQUIRE(result.get_elem({i, j, k, l}) ==
-    //                     lambda(data[j * 8 + i * 4 + k * 2 + l],
-    //                            data[i * 8 + j * 4 + k * 2 + l]));
+    SECTION("hadamard_assignment") {
+        SECTION("scalar") {
+            testing::scalar_hadamard_assignment<scalar_type>();
+        }
 
-    //         permutation_type p1({0, 1, 3, 2});
-    //         result.hadamard_assignment(p1, i, tensor4, tensor4);
-    //         for(auto [i, j, k, l] : tensor4_indices)
-    //             REQUIRE(result.get_elem({i, j, k, l}) ==
-    //                     lambda(data[i * 8 + j * 4 + l * 2 + k],
-    //                            data[i * 8 + j * 4 + k * 2 + l]));
+        SECTION("vector") {
+            testing::vector_hadamard_assignment<vector_type>();
+        }
 
-    //         permutation_type p2({2, 3, 0, 1});
-    //         result.hadamard_assignment(p2, p, tensor4, tensor4);
-    //         for(auto [i, j, k, l] : tensor4_indices)
-    //             REQUIRE(result.get_elem({i, j, k, l}) ==
-    //                     lambda(data[k * 8 + l * 4 + i * 2 + j],
-    //                            data[j * 8 + i * 4 + k * 2 + l]));
-    //     }
-    // }
+        SECTION("matrix") {
+            testing::matrix_hadamard_assignment<matrix_type>();
+        }
 
-    // SECTION("permute_assignment") {
-    //     std::vector<TestType> result_data(16, TestType{0});
-    //     std::span<TestType> result_data_span(result_data.data(),
-    //                                          result_data.size());
+        SECTION("rank 3 tensor") {
+            testing::tensor3_hadamard_assignment<tensor3_type>();
+        }
 
-    //     SECTION("scalar") {
-    //         permutation_type identity(0);
-    //         scalar_type result(result_data_span, scalar_shape);
-    //         result.permute_assignment(identity, scalar);
-    //         REQUIRE(result.get_elem({}) == data[0]);
-    //     }
+        SECTION("rank 4 tensor") {
+            testing::tensor4_hadamard_assignment<tensor4_type>();
+        }
+    }
 
+    SECTION("permute_assignment") {
+        SECTION("scalar") { testing::scalar_permute_assignment<scalar_type>(); }
+    }
     //     SECTION("vector") {
-    //         permutation_type identity(1);
+    //         using label_type = scalar_type::label_type;
+    //         label_type result_idx("i");
+    //         label_type rhs_idx("i");
     //         vector_type result(result_data_span, vector_shape);
-    //         result.permute_assignment(identity, vector);
+    //         result.permute_assignment(result_idx, rhs_idx, vector);
     //         for(std::size_t i = 0; i < result.size(); ++i)
     //             REQUIRE(result.get_elem({i}) == data[i]);
     //     }
 
     //     SECTION("matrix") {
-    //         permutation_type identity(2);
+    //         using label_type = scalar_type::label_type;
+    //         label_type ij("i,j");
+    //         label_type ji("j,i");
     //         matrix_type result(result_data_span, matrix_shape);
-    //         result.permute_assignment(identity, matrix);
+    //         result.permute_assignment(ij, ij, matrix);
+
     //         for(auto [i, j] : matrix_indices)
     //             REQUIRE(result.get_elem({i, j}) == data[i * 4 + j]);
 
@@ -406,14 +255,14 @@ TEMPLATE_LIST_TEST_CASE("EigenTensorImpl", "", types::floating_point_types) {
     //         tensor3_type result(result_data_span, tensor3_shape);
     //         result.permute_assignment(identity, tensor3);
     //         for(auto [i, j, k] : tensor3_indices)
-    //             REQUIRE(result.get_elem({i, j, k}) == data[i * 8 + j * 4 +
-    //             k]);
+    //             REQUIRE(result.get_elem({i, j, k}) == data[i * 8 + j * 4
+    //             + k]);
 
     //         permutation_type p({1, 0, 2});
     //         result.permute_assignment(p, tensor3);
     //         for(auto [i, j, k] : tensor3_indices)
-    //             REQUIRE(result.get_elem({i, j, k}) == data[j * 8 + i * 4 +
-    //             k]);
+    //             REQUIRE(result.get_elem({i, j, k}) == data[j * 8 + i * 4
+    //             + k]);
     //     }
 
     //     SECTION("rank 4 tensor") {
@@ -490,8 +339,8 @@ TEMPLATE_LIST_TEST_CASE("EigenTensorImpl", "", types::floating_point_types) {
     //     SECTION("rank 3 tensor") {
     //         permutation_type identity(3);
     //         tensor3_type result(result_data_span, tensor3_shape);
-    //         result.scalar_multiplication(identity, scalar_value, tensor3);
-    //         for(auto [i, j, k] : tensor3_indices)
+    //         result.scalar_multiplication(identity, scalar_value,
+    //         tensor3); for(auto [i, j, k] : tensor3_indices)
     //             REQUIRE(result.get_elem({i, j, k}) ==
     //                     data[i * 8 + j * 4 + k] * scalar_value);
 
@@ -505,8 +354,8 @@ TEMPLATE_LIST_TEST_CASE("EigenTensorImpl", "", types::floating_point_types) {
     //     SECTION("rank 4 tensor") {
     //         permutation_type identity(4);
     //         tensor4_type result(result_data_span, tensor4_shape);
-    //         result.scalar_multiplication(identity, scalar_value, tensor4);
-    //         for(auto [i, j, k, l] : tensor4_indices)
+    //         result.scalar_multiplication(identity, scalar_value,
+    //         tensor4); for(auto [i, j, k, l] : tensor4_indices)
     //             REQUIRE(result.get_elem({i, j, k, l}) ==
     //                     data[i * 8 + j * 4 + k * 2 + l] * scalar_value);
 
diff --git a/tests/cxx/unit_tests/tensorwrapper/backends/testing/hadamard_assignment.hpp b/tests/cxx/unit_tests/tensorwrapper/backends/testing/hadamard_assignment.hpp
new file mode 100644
index 00000000..b8e44285
--- /dev/null
+++ b/tests/cxx/unit_tests/tensorwrapper/backends/testing/hadamard_assignment.hpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include "elementwise_op.hpp"
+
+namespace tensorwrapper::testing {
+
+template<typename TestType>
+void scalar_hadamard_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& lhs_idx, auto&& rhs_idx,
+                     auto&& result, auto&& t0, auto&& t1) {
+        result.hadamard_assignment(out_idx, lhs_idx, rhs_idx, t0, t1);
+    };
+    auto corr_op = [](auto a, auto b) { return a * b; };
+    scalar_binary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void vector_hadamard_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& lhs_idx, auto&& rhs_idx,
+                     auto&& result, auto&& t0, auto&& t1) {
+        result.hadamard_assignment(out_idx, lhs_idx, rhs_idx, t0, t1);
+    };
+    auto corr_op = [](auto a, auto b) { return a * b; };
+    vector_binary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void matrix_hadamard_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& lhs_idx, auto&& rhs_idx,
+                     auto&& result, auto&& t0, auto&& t1) {
+        result.hadamard_assignment(out_idx, lhs_idx, rhs_idx, t0, t1);
+    };
+    auto corr_op = [](auto a, auto b) { return a * b; };
+    matrix_binary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void tensor3_hadamard_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& lhs_idx, auto&& rhs_idx,
+                     auto&& result, auto&& t0, auto&& t1) {
+        result.hadamard_assignment(out_idx, lhs_idx, rhs_idx, t0, t1);
+    };
+    auto corr_op = [](auto a, auto b) { return a * b; };
+    tensor3_binary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void tensor4_hadamard_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& lhs_idx, auto&& rhs_idx,
+                     auto&& result, auto&& t0, auto&& t1) {
+        result.hadamard_assignment(out_idx, lhs_idx, rhs_idx, t0, t1);
+    };
+    auto corr_op = [](auto a, auto b) { return a * b; };
+    tensor4_binary_assignment<TestType>(the_op, corr_op);
+}
+
+} // namespace tensorwrapper::testing
diff --git a/tests/cxx/unit_tests/tensorwrapper/backends/testing/permute_assignment.hpp b/tests/cxx/unit_tests/tensorwrapper/backends/testing/permute_assignment.hpp
new file mode 100644
index 00000000..6e7f154f
--- /dev/null
+++ b/tests/cxx/unit_tests/tensorwrapper/backends/testing/permute_assignment.hpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include "unary_op.hpp"
+
+namespace tensorwrapper::testing {
+
+template<typename TestType>
+void scalar_permute_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& rhs_idx, auto&& result, auto&& t0) {
+        result.permute_assignment(out_idx, rhs_idx, t0);
+    };
+    auto corr_op = [](auto a) { return a; };
+    scalar_unary_assignment<TestType>(the_op, corr_op);
+}
+
+} // namespace tensorwrapper::testing
diff --git a/tests/cxx/unit_tests/tensorwrapper/backends/testing/subtraction_assignment.hpp b/tests/cxx/unit_tests/tensorwrapper/backends/testing/subtraction_assignment.hpp
new file mode 100644
index 00000000..baf54910
--- /dev/null
+++ b/tests/cxx/unit_tests/tensorwrapper/backends/testing/subtraction_assignment.hpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include "elementwise_op.hpp"
+
+namespace tensorwrapper::testing {
+
+template<typename TestType>
+void scalar_subtraction_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& lhs_idx, auto&& rhs_idx,
+                     auto&& result, auto&& t0, auto&& t1) {
+        result.subtraction_assignment(out_idx, lhs_idx, rhs_idx, t0, t1);
+    };
+    auto corr_op = [](auto a, auto b) { return a - b; };
+    scalar_binary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void vector_subtraction_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& lhs_idx, auto&& rhs_idx,
+                     auto&& result, auto&& t0, auto&& t1) {
+        result.subtraction_assignment(out_idx, lhs_idx, rhs_idx, t0, t1);
+    };
+    auto corr_op = [](auto a, auto b) { return a - b; };
+    vector_binary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void matrix_subtraction_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& lhs_idx, auto&& rhs_idx,
+                     auto&& result, auto&& t0, auto&& t1) {
+        result.subtraction_assignment(out_idx, lhs_idx, rhs_idx, t0, t1);
+    };
+    auto corr_op = [](auto a, auto b) { return a - b; };
+    matrix_binary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void tensor3_subtraction_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& lhs_idx, auto&& rhs_idx,
+                     auto&& result, auto&& t0, auto&& t1) {
+        result.subtraction_assignment(out_idx, lhs_idx, rhs_idx, t0, t1);
+    };
+    auto corr_op = [](auto a, auto b) { return a - b; };
+    tensor3_binary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void tensor4_subtraction_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& lhs_idx, auto&& rhs_idx,
+                     auto&& result, auto&& t0, auto&& t1) {
+        result.subtraction_assignment(out_idx, lhs_idx, rhs_idx, t0, t1);
+    };
+    auto corr_op = [](auto a, auto b) { return a - b; };
+    tensor4_binary_assignment<TestType>(the_op, corr_op);
+}
+
+} // namespace tensorwrapper::testing
diff --git a/tests/cxx/unit_tests/tensorwrapper/backends/testing/unary_op.hpp b/tests/cxx/unit_tests/tensorwrapper/backends/testing/unary_op.hpp
new file mode 100644
index 00000000..37df5ba1
--- /dev/null
+++ b/tests/cxx/unit_tests/tensorwrapper/backends/testing/unary_op.hpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <span>
+#include <vector>
+
+namespace tensorwrapper::testing {
+
+template<typename TestType, typename Fxn1, typename Fxn2>
+void scalar_unary_assignment(Fxn1&& the_op, Fxn2&& corr_op) {
+    using value_type = typename TestType::value_type;
+    using shape_type = typename TestType::shape_type;
+    using label_type = typename TestType::label_type;
+
+    std::vector<value_type> result_data(1, value_type{0});
+    std::span<value_type> result_span(result_data.data(), result_data.size());
+
+    std::vector<value_type> s0_data(1, value_type{3});
+    std::span<value_type> s0_span(s0_data.data(), s0_data.size());
+
+    TestType result(result_span, shape_type({}));
+    TestType s0(s0_span, shape_type({}));
+
+    label_type out("");
+    label_type rhs("");
+    the_op(out, rhs, result, s0);
+    REQUIRE(result.get_elem({}) == corr_op(s0_data[0]));
+}
+
+} // namespace tensorwrapper::testing

From 1d077ffd6b424a5af657c10a32e3c50722d79242 Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Sun, 16 Nov 2025 22:30:19 -0600
Subject: [PATCH 04/18] adds scalar multiplication and permutation

---
 .../backends/eigen/eigen_tensor_impl.cpp      | 181 +++--------------
 .../backends/testing/permute_assignment.hpp   |  36 ++++
 .../testing/scalar_multiplication.hpp         |  83 ++++++++
 .../backends/testing/unary_op.hpp             | 186 ++++++++++++++++++
 4 files changed, 332 insertions(+), 154 deletions(-)
 create mode 100644 tests/cxx/unit_tests/tensorwrapper/backends/testing/scalar_multiplication.hpp

diff --git a/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp b/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
index 7eaea8b6..d31a3c95 100644
--- a/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
+++ b/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
@@ -18,6 +18,7 @@
 #include "../testing/addition_assignment.hpp"
 #include "../testing/hadamard_assignment.hpp"
 #include "../testing/permute_assignment.hpp"
+#include "../testing/scalar_multiplication.hpp"
 #include "../testing/subtraction_assignment.hpp"
 #include <tensorwrapper/backends/eigen/eigen_tensor_impl.hpp>
 
@@ -223,159 +224,31 @@ TEMPLATE_LIST_TEST_CASE("EigenTensorImpl", "", types::floating_point_types) {
 
     SECTION("permute_assignment") {
         SECTION("scalar") { testing::scalar_permute_assignment<scalar_type>(); }
+        SECTION("vector") { testing::vector_permute_assignment<vector_type>(); }
+        SECTION("matrix") { testing::matrix_permute_assignment<matrix_type>(); }
+        SECTION("rank 3 tensor") {
+            testing::tensor3_permute_assignment<tensor3_type>();
+        }
+        SECTION("rank 4 tensor") {
+            testing::tensor4_permute_assignment<tensor4_type>();
+        }
+    }
+
+    SECTION("scalar_multiplication") {
+        SECTION("scalar") {
+            testing::scalar_scalar_multiplication<scalar_type>();
+        }
+        SECTION("vector") {
+            testing::vector_scalar_multiplication<vector_type>();
+        }
+        SECTION("matrix") {
+            testing::matrix_scalar_multiplication<matrix_type>();
+        }
+        SECTION("rank 3 tensor") {
+            testing::tensor3_scalar_multiplication<tensor3_type>();
+        }
+        SECTION("rank 4 tensor") {
+            testing::tensor4_scalar_multiplication<tensor4_type>();
+        }
     }
-    //     SECTION("vector") {
-    //         using label_type = scalar_type::label_type;
-    //         label_type result_idx("i");
-    //         label_type rhs_idx("i");
-    //         vector_type result(result_data_span, vector_shape);
-    //         result.permute_assignment(result_idx, rhs_idx, vector);
-    //         for(std::size_t i = 0; i < result.size(); ++i)
-    //             REQUIRE(result.get_elem({i}) == data[i]);
-    //     }
-
-    //     SECTION("matrix") {
-    //         using label_type = scalar_type::label_type;
-    //         label_type ij("i,j");
-    //         label_type ji("j,i");
-    //         matrix_type result(result_data_span, matrix_shape);
-    //         result.permute_assignment(ij, ij, matrix);
-
-    //         for(auto [i, j] : matrix_indices)
-    //             REQUIRE(result.get_elem({i, j}) == data[i * 4 + j]);
-
-    //         permutation_type p{{1, 0}};
-    //         result.permute_assignment(p, matrix);
-    //         for(auto [i, j] : matrix_indices)
-    //             REQUIRE(result.get_elem({i, j}) == data[j * 4 + i]);
-    //     }
-
-    //     SECTION("rank 3 tensor") {
-    //         permutation_type identity(3);
-    //         tensor3_type result(result_data_span, tensor3_shape);
-    //         result.permute_assignment(identity, tensor3);
-    //         for(auto [i, j, k] : tensor3_indices)
-    //             REQUIRE(result.get_elem({i, j, k}) == data[i * 8 + j * 4
-    //             + k]);
-
-    //         permutation_type p({1, 0, 2});
-    //         result.permute_assignment(p, tensor3);
-    //         for(auto [i, j, k] : tensor3_indices)
-    //             REQUIRE(result.get_elem({i, j, k}) == data[j * 8 + i * 4
-    //             + k]);
-    //     }
-
-    //     SECTION("rank 4 tensor") {
-    //         permutation_type identity(4);
-    //         tensor4_type result(result_data_span, tensor4_shape);
-    //         result.permute_assignment(identity, tensor4);
-    //         for(auto [i, j, k, l] : tensor4_indices)
-    //             REQUIRE(result.get_elem({i, j, k, l}) ==
-    //                     data[i * 8 + j * 4 + k * 2 + l]);
-
-    //         permutation_type p({1, 0, 2, 3});
-    //         result.permute_assignment(p, tensor4);
-    //         for(auto [i, j, k, l] : tensor4_indices)
-    //             REQUIRE(result.get_elem({i, j, k, l}) ==
-    //                     data[j * 8 + i * 4 + k * 2 + l]);
-
-    //         permutation_type p1({0, 1, 3, 2});
-    //         result.permute_assignment(p1, tensor4);
-    //         for(auto [i, j, k, l] : tensor4_indices)
-    //             REQUIRE(result.get_elem({i, j, k, l}) ==
-    //                     data[i * 8 + j * 4 + l * 2 + k]);
-
-    //         permutation_type p2({2, 3, 0, 1});
-    //         result.permute_assignment(p2, tensor4);
-    //         for(auto [i, j, k, l] : tensor4_indices)
-    //             REQUIRE(result.get_elem({i, j, k, l}) ==
-    //                     data[k * 8 + l * 4 + i * 2 + j]);
-
-    //         permutation_type p3({3, 2, 1, 0});
-    //         result.permute_assignment(p3, tensor4);
-    //         for(auto [i, j, k, l] : tensor4_indices)
-    //             REQUIRE(result.get_elem({i, j, k, l}) ==
-    //                     data[l * 8 + k * 4 + j * 2 + i]);
-    //     }
-    // }
-
-    // SECTION("scalar_multiplication") {
-    //     std::vector<TestType> result_data(16, TestType{0});
-    //     std::span<TestType> result_data_span(result_data.data(),
-    //                                          result_data.size());
-
-    //     TestType scalar_value(3);
-
-    //     SECTION("scalar") {
-    //         permutation_type i(0);
-    //         scalar_type result(result_data_span, scalar_shape);
-    //         result.scalar_multiplication(i, scalar_value, scalar);
-    //         REQUIRE(result.get_elem({}) == data[0] * scalar_value);
-    //     }
-
-    //     SECTION("vector") {
-    //         permutation_type identity(1);
-    //         vector_type result(result_data_span, vector_shape);
-    //         result.scalar_multiplication(identity, scalar_value, vector);
-    //         for(std::size_t i = 0; i < result.size(); ++i)
-    //             REQUIRE(result.get_elem({i}) == data[i] * scalar_value);
-    //     }
-
-    //     SECTION("matrix") {
-    //         permutation_type identity(2);
-    //         matrix_type result(result_data_span, matrix_shape);
-    //         result.scalar_multiplication(identity, scalar_value, matrix);
-    //         for(auto [i, j] : matrix_indices)
-    //             REQUIRE(result.get_elem({i, j}) ==
-    //                     data[i * 4 + j] * scalar_value);
-
-    //         permutation_type p{{1, 0}};
-    //         result.scalar_multiplication(p, scalar_value, matrix);
-    //         for(auto [i, j] : matrix_indices)
-    //             REQUIRE(result.get_elem({i, j}) ==
-    //                     data[j * 4 + i] * scalar_value);
-    //     }
-
-    //     SECTION("rank 3 tensor") {
-    //         permutation_type identity(3);
-    //         tensor3_type result(result_data_span, tensor3_shape);
-    //         result.scalar_multiplication(identity, scalar_value,
-    //         tensor3); for(auto [i, j, k] : tensor3_indices)
-    //             REQUIRE(result.get_elem({i, j, k}) ==
-    //                     data[i * 8 + j * 4 + k] * scalar_value);
-
-    //         permutation_type p({1, 0, 2});
-    //         result.scalar_multiplication(p, scalar_value, tensor3);
-    //         for(auto [i, j, k] : tensor3_indices)
-    //             REQUIRE(result.get_elem({i, j, k}) ==
-    //                     data[j * 8 + i * 4 + k] * scalar_value);
-    //     }
-
-    //     SECTION("rank 4 tensor") {
-    //         permutation_type identity(4);
-    //         tensor4_type result(result_data_span, tensor4_shape);
-    //         result.scalar_multiplication(identity, scalar_value,
-    //         tensor4); for(auto [i, j, k, l] : tensor4_indices)
-    //             REQUIRE(result.get_elem({i, j, k, l}) ==
-    //                     data[i * 8 + j * 4 + k * 2 + l] * scalar_value);
-
-    //         permutation_type p({1, 0, 2, 3});
-    //         result.scalar_multiplication(p, scalar_value, tensor4);
-    //         for(auto [i, j, k, l] : tensor4_indices)
-    //             REQUIRE(result.get_elem({i, j, k, l}) ==
-    //                     data[j * 8 + i * 4 + k * 2 + l] * scalar_value);
-
-    //         permutation_type p1({0, 1, 3, 2});
-    //         result.scalar_multiplication(p1, scalar_value, tensor4);
-    //         for(auto [i, j, k, l] : tensor4_indices)
-    //             REQUIRE(result.get_elem({i, j, k, l}) ==
-    //                     data[i * 8 + j * 4 + l * 2 + k] * scalar_value);
-
-    //         permutation_type p2({2, 3, 0, 1});
-    //         result.scalar_multiplication(p2, scalar_value, tensor4);
-    //         for(auto [i, j, k, l] : tensor4_indices)
-    //             REQUIRE(result.get_elem({i, j, k, l}) ==
-    //                     data[k * 8 + l * 4 + i * 2 + j] * scalar_value);
-    //     }
-    // }
 }
diff --git a/tests/cxx/unit_tests/tensorwrapper/backends/testing/permute_assignment.hpp b/tests/cxx/unit_tests/tensorwrapper/backends/testing/permute_assignment.hpp
index 6e7f154f..3d2c4bf5 100644
--- a/tests/cxx/unit_tests/tensorwrapper/backends/testing/permute_assignment.hpp
+++ b/tests/cxx/unit_tests/tensorwrapper/backends/testing/permute_assignment.hpp
@@ -28,4 +28,40 @@ void scalar_permute_assignment() {
     scalar_unary_assignment<TestType>(the_op, corr_op);
 }
 
+template<typename TestType>
+void vector_permute_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& rhs_idx, auto&& result, auto&& t0) {
+        result.permute_assignment(out_idx, rhs_idx, t0);
+    };
+    auto corr_op = [](auto a) { return a; };
+    vector_unary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void matrix_permute_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& rhs_idx, auto&& result, auto&& t0) {
+        result.permute_assignment(out_idx, rhs_idx, t0);
+    };
+    auto corr_op = [](auto a) { return a; };
+    matrix_unary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void tensor3_permute_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& rhs_idx, auto&& result, auto&& t0) {
+        result.permute_assignment(out_idx, rhs_idx, t0);
+    };
+    auto corr_op = [](auto a) { return a; };
+    tensor3_unary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void tensor4_permute_assignment() {
+    auto the_op = [](auto&& out_idx, auto&& rhs_idx, auto&& result, auto&& t0) {
+        result.permute_assignment(out_idx, rhs_idx, t0);
+    };
+    auto corr_op = [](auto a) { return a; };
+    tensor4_unary_assignment<TestType>(the_op, corr_op);
+}
+
 } // namespace tensorwrapper::testing
diff --git a/tests/cxx/unit_tests/tensorwrapper/backends/testing/scalar_multiplication.hpp b/tests/cxx/unit_tests/tensorwrapper/backends/testing/scalar_multiplication.hpp
new file mode 100644
index 00000000..e700fc1c
--- /dev/null
+++ b/tests/cxx/unit_tests/tensorwrapper/backends/testing/scalar_multiplication.hpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include "unary_op.hpp"
+
+namespace tensorwrapper::testing {
+
+template<typename TestType>
+void scalar_scalar_multiplication() {
+    using value_type = typename TestType::value_type;
+    value_type scalar{42.0};
+    auto the_op = [=](auto&& out_idx, auto&& rhs_idx, auto&& result,
+                      auto&& t0) {
+        result.scalar_multiplication(out_idx, rhs_idx, scalar, t0);
+    };
+
+    auto corr_op = [=](auto a) { return a * scalar; };
+    scalar_unary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void vector_scalar_multiplication() {
+    using value_type = typename TestType::value_type;
+    value_type scalar{42.0};
+    auto the_op = [=](auto&& out_idx, auto&& rhs_idx, auto&& result,
+                      auto&& t0) {
+        result.scalar_multiplication(out_idx, rhs_idx, scalar, t0);
+    };
+    auto corr_op = [=](auto a) { return scalar * a; };
+    vector_unary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void matrix_scalar_multiplication() {
+    using value_type = typename TestType::value_type;
+    value_type scalar{42.0};
+    auto the_op = [=](auto&& out_idx, auto&& rhs_idx, auto&& result,
+                      auto&& t0) {
+        result.scalar_multiplication(out_idx, rhs_idx, scalar, t0);
+    };
+    auto corr_op = [=](auto a) { return a * scalar; };
+    matrix_unary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void tensor3_scalar_multiplication() {
+    using value_type = typename TestType::value_type;
+    value_type scalar{42.0};
+    auto the_op = [=](auto&& out_idx, auto&& rhs_idx, auto&& result,
+                      auto&& t0) {
+        result.scalar_multiplication(out_idx, rhs_idx, scalar, t0);
+    };
+    auto corr_op = [=](auto a) { return scalar * a; };
+    tensor3_unary_assignment<TestType>(the_op, corr_op);
+}
+
+template<typename TestType>
+void tensor4_scalar_multiplication() {
+    using value_type = typename TestType::value_type;
+    value_type scalar{42.0};
+    auto the_op = [=](auto&& out_idx, auto&& rhs_idx, auto&& result,
+                      auto&& t0) {
+        result.scalar_multiplication(out_idx, rhs_idx, scalar, t0);
+    };
+    auto corr_op = [=](auto a) { return a * scalar; };
+    tensor4_unary_assignment<TestType>(the_op, corr_op);
+}
+
+} // namespace tensorwrapper::testing
diff --git a/tests/cxx/unit_tests/tensorwrapper/backends/testing/unary_op.hpp b/tests/cxx/unit_tests/tensorwrapper/backends/testing/unary_op.hpp
index 37df5ba1..af541d05 100644
--- a/tests/cxx/unit_tests/tensorwrapper/backends/testing/unary_op.hpp
+++ b/tests/cxx/unit_tests/tensorwrapper/backends/testing/unary_op.hpp
@@ -41,4 +41,190 @@ void scalar_unary_assignment(Fxn1&& the_op, Fxn2&& corr_op) {
     REQUIRE(result.get_elem({}) == corr_op(s0_data[0]));
 }
 
+template<typename TestType, typename Fxn1, typename Fxn2>
+void vector_unary_assignment(Fxn1&& the_op, Fxn2&& corr_op) {
+    using value_type = typename TestType::value_type;
+    using shape_type = typename TestType::shape_type;
+    using label_type = typename TestType::label_type;
+
+    const auto n_elements = 4;
+    std::vector<value_type> result_data(n_elements, value_type{0});
+    std::span<value_type> result_span(result_data.data(), result_data.size());
+
+    std::vector<value_type> s0_data(n_elements, value_type{3});
+    std::span<value_type> s0_span(s0_data.data(), s0_data.size());
+
+    TestType result(result_span, shape_type({4}));
+    TestType s0(s0_span, shape_type({4}));
+
+    label_type out("i");
+    label_type rhs("i");
+    the_op(out, rhs, result, s0);
+    for(std::size_t i = 0; i < n_elements; ++i)
+        REQUIRE(result.get_elem({i}) == corr_op(s0_data[i]));
+}
+
+template<typename TestType, typename Fxn1, typename Fxn2>
+void matrix_unary_assignment(Fxn1&& the_op, Fxn2&& corr_op) {
+    using value_type = typename TestType::value_type;
+    using shape_type = typename TestType::shape_type;
+    using label_type = typename TestType::label_type;
+
+    const auto n_elements = 16;
+    std::vector<value_type> result_data(n_elements, value_type{0});
+    std::span<value_type> result_span(result_data.data(), result_data.size());
+
+    std::vector<value_type> s0_data(n_elements, value_type{0});
+    for(std::size_t i = 0; i < n_elements; ++i)
+        s0_data[i] = static_cast<value_type>(i);
+
+    std::span<value_type> s0_span(s0_data.data(), s0_data.size());
+
+    TestType result(result_span, shape_type({4, 4}));
+    TestType s0(s0_span, shape_type({4, 4}));
+
+    label_type ij("i,j");
+    label_type ji("j,i");
+
+    SECTION("No permutation") {
+        the_op(ij, ij, result, s0);
+        for(std::size_t i = 0; i < 4; ++i)
+            for(std::size_t j = 0; j < 4; ++j) {
+                std::size_t idx = i * 4 + j;
+                REQUIRE(result.get_elem({i, j}) == corr_op(s0_data[idx]));
+            }
+    }
+
+    SECTION("Permute rhs") {
+        the_op(ij, ji, result, s0);
+        for(std::size_t i = 0; i < 4; ++i)
+            for(std::size_t j = 0; j < 4; ++j) {
+                std::size_t idx = j * 4 + i;
+                REQUIRE(result.get_elem({i, j}) == corr_op(s0_data[idx]));
+            }
+    }
+
+    SECTION("Permute result") {
+        the_op(ji, ij, result, s0);
+        for(std::size_t i = 0; i < 4; ++i)
+            for(std::size_t j = 0; j < 4; ++j) {
+                std::size_t idx = i * 4 + j;
+                REQUIRE(result.get_elem({j, i}) == corr_op(s0_data[idx]));
+            }
+    }
+}
+
+template<typename TestType, typename Fxn1, typename Fxn2>
+void tensor3_unary_assignment(Fxn1&& the_op, Fxn2&& corr_op) {
+    using value_type = typename TestType::value_type;
+    using shape_type = typename TestType::shape_type;
+    using label_type = typename TestType::label_type;
+
+    const auto n_elements = 8;
+    std::vector<value_type> result_data(n_elements, value_type{0});
+    std::span<value_type> result_span(result_data.data(), result_data.size());
+
+    std::vector<value_type> s0_data(n_elements, value_type{0});
+    for(std::size_t i = 0; i < n_elements; ++i)
+        s0_data[i] = static_cast<value_type>(i);
+
+    std::span<value_type> s0_span(s0_data.data(), s0_data.size());
+
+    TestType result(result_span, shape_type({2, 2, 2}));
+    TestType s0(s0_span, shape_type({2, 2, 2}));
+
+    label_type ijk("i,j,k");
+    label_type jik("j,i,k");
+
+    using rank3_index = std::array<std::size_t, 3>;
+    std::vector<rank3_index> tensor3_indices;
+    for(std::size_t i = 0; i < 2; ++i) {
+        for(std::size_t j = 0; j < 2; ++j) {
+            for(std::size_t k = 0; k < 2; ++k)
+                tensor3_indices.push_back(rank3_index{i, j, k});
+        }
+    }
+
+    SECTION("No permutation") {
+        the_op(ijk, ijk, result, s0);
+        for(const auto [i, j, k] : tensor3_indices) {
+            std::size_t idx = i * 4 + j * 2 + k;
+            REQUIRE(result.get_elem({i, j, k}) == corr_op(s0_data[idx]));
+        }
+    }
+
+    SECTION("Permute rhs") {
+        the_op(ijk, jik, result, s0);
+        for(const auto [i, j, k] : tensor3_indices) {
+            std::size_t idx = j * 4 + i * 2 + k;
+            REQUIRE(result.get_elem({i, j, k}) == corr_op(s0_data[idx]));
+        }
+    }
+
+    SECTION("Permute result") {
+        the_op(jik, ijk, result, s0);
+        for(const auto [i, j, k] : tensor3_indices) {
+            std::size_t idx = i * 4 + j * 2 + k;
+            REQUIRE(result.get_elem({j, i, k}) == corr_op(s0_data[idx]));
+        }
+    }
+}
+
+template<typename TestType, typename Fxn1, typename Fxn2>
+void tensor4_unary_assignment(Fxn1&& the_op, Fxn2&& corr_op) {
+    using value_type = typename TestType::value_type;
+    using shape_type = typename TestType::shape_type;
+    using label_type = typename TestType::label_type;
+
+    const auto n_elements = 16;
+    std::vector<value_type> result_data(n_elements, value_type{0});
+    std::span<value_type> result_span(result_data.data(), result_data.size());
+
+    std::vector<value_type> s0_data(n_elements, value_type{0});
+    for(std::size_t i = 0; i < n_elements; ++i)
+        s0_data[i] = static_cast<value_type>(i);
+
+    std::span<value_type> s0_span(s0_data.data(), s0_data.size());
+
+    TestType result(result_span, shape_type({2, 2, 2, 2}));
+    TestType s0(s0_span, shape_type({2, 2, 2, 2}));
+
+    label_type ijkl("i,j,k,l");
+    label_type jikl("j,i,k,l");
+
+    using rank4_index = std::array<std::size_t, 4>;
+    std::vector<rank4_index> tensor4_indices;
+    for(std::size_t i = 0; i < 2; ++i) {
+        for(std::size_t j = 0; j < 2; ++j) {
+            for(std::size_t k = 0; k < 2; ++k)
+                for(std::size_t l = 0; l < 2; ++l)
+                    tensor4_indices.push_back(rank4_index{i, j, k, l});
+        }
+    }
+
+    SECTION("No permutation") {
+        the_op(ijkl, ijkl, result, s0);
+        for(const auto [i, j, k, l] : tensor4_indices) {
+            std::size_t idx = i * 8 + j * 4 + k * 2 + l;
+            REQUIRE(result.get_elem({i, j, k, l}) == corr_op(s0_data[idx]));
+        }
+    }
+
+    SECTION("Permute rhs") {
+        the_op(ijkl, jikl, result, s0);
+        for(const auto [i, j, k, l] : tensor4_indices) {
+            std::size_t idx = j * 8 + i * 4 + k * 2 + l;
+            REQUIRE(result.get_elem({i, j, k, l}) == corr_op(s0_data[idx]));
+        }
+    }
+
+    SECTION("Permute result") {
+        the_op(jikl, ijkl, result, s0);
+        for(const auto [i, j, k, l] : tensor4_indices) {
+            std::size_t idx = i * 8 + j * 4 + k * 2 + l;
+            REQUIRE(result.get_elem({j, i, k, l}) == corr_op(s0_data[idx]));
+        }
+    }
+}
+
 } // namespace tensorwrapper::testing

From 5a9aeee483f6b3d30d93d29ca5f80355d78c53f8 Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Mon, 17 Nov 2025 14:56:03 -0600
Subject: [PATCH 05/18] contraction works, cutensor quasitested

---
 .../backends/cutensor/cuda_tensor.cpp         |   3 +
 .../backends/cutensor/cuda_tensor.hpp         |   3 +
 .../backends/eigen/eigen_tensor.hpp           |  35 ++-
 .../backends/eigen/eigen_tensor_impl.cpp      | 109 ++++-----
 .../backends/eigen/eigen_tensor_impl.hpp      |  20 +-
 .../backends/cutensor/cuda_tensor.cpp         | 100 ++++++++
 .../backends/eigen/eigen_tensor_impl.cpp      | 139 +++++++++++
 .../testing/contraction_assignment.hpp        | 217 ++++++++++++++++++
 .../backends/testing/unary_op.hpp             |  16 --
 9 files changed, 559 insertions(+), 83 deletions(-)
 create mode 100644 tests/cxx/unit_tests/tensorwrapper/backends/cutensor/cuda_tensor.cpp
 create mode 100644 tests/cxx/unit_tests/tensorwrapper/backends/testing/contraction_assignment.hpp

diff --git a/src/tensorwrapper/backends/cutensor/cuda_tensor.cpp b/src/tensorwrapper/backends/cutensor/cuda_tensor.cpp
index 2df8446b..2cbfdb8a 100644
--- a/src/tensorwrapper/backends/cutensor/cuda_tensor.cpp
+++ b/src/tensorwrapper/backends/cutensor/cuda_tensor.cpp
@@ -43,4 +43,7 @@ void CUDA_TENSOR::contraction_assignment(label_type this_label,
 #undef CUDA_TENSOR
 #undef TPARAMS
 
+    template class CUDATensor<float>;
+    template class CUDATensor<double>;
+
 } // namespace tensorwrapper::backends::cutensor
diff --git a/src/tensorwrapper/backends/cutensor/cuda_tensor.hpp b/src/tensorwrapper/backends/cutensor/cuda_tensor.hpp
index 0e9dff46..d0ef8eb9 100644
--- a/src/tensorwrapper/backends/cutensor/cuda_tensor.hpp
+++ b/src/tensorwrapper/backends/cutensor/cuda_tensor.hpp
@@ -67,4 +67,7 @@ class CUDATensor {
     const_shape_view m_shape_;
 };
 
+extern template class CUDATensor<float>;
+extern template class CUDATensor<double>;
+
 } // namespace tensorwrapper::backends::cutensor
diff --git a/src/tensorwrapper/backends/eigen/eigen_tensor.hpp b/src/tensorwrapper/backends/eigen/eigen_tensor.hpp
index 1fb3e4bd..8c7c2760 100644
--- a/src/tensorwrapper/backends/eigen/eigen_tensor.hpp
+++ b/src/tensorwrapper/backends/eigen/eigen_tensor.hpp
@@ -15,9 +15,12 @@
  */
 
 #pragma once
+#include <cassert>
 #include <ostream>
+#include <span>
 #include <string>
 #include <tensorwrapper/dsl/dummy_indices.hpp>
+#include <tensorwrapper/shape/smooth.hpp>
 #include <tensorwrapper/shape/smooth_view.hpp>
 #include <vector>
 
@@ -44,6 +47,9 @@ class EigenTensor {
     using my_type = EigenTensor<FloatType>;
 
 public:
+    /// Pointer to an object of my_type
+    using eigen_tensor_pointer = std::unique_ptr<my_type>;
+
     /// Type of an element in *this
     using value_type = FloatType;
 
@@ -53,6 +59,12 @@ class EigenTensor {
     /// Type of a read-only reference to an element in *this
     using const_reference = const value_type&;
 
+    /// Type of a span to raw memory
+    using span_type = std::span<value_type>;
+
+    /// Type of a read-only span to raw memory
+    using const_span_type = std::span<const value_type>;
+
     /// Type used to express the shape of *this
     using shape_type = shape::Smooth;
 
@@ -74,8 +86,17 @@ class EigenTensor {
     /// Type of a label
     using label_type = dsl::DummyIndices<string_type>;
 
+    /// Type returned by permuted_copy
+    using permuted_copy_return_type =
+      std::pair<std::vector<FloatType>, eigen_tensor_pointer>;
+
     virtual ~EigenTensor() noexcept = default;
 
+    permuted_copy_return_type permuted_copy(label_type perm,
+                                            label_type this_label) const {
+        return permuted_copy_(perm, this_label);
+    }
+
     /** @brief Retrieves the rank of the wrapped tensor.
      *
      *  @return The rank of the wrapped tensor.
@@ -105,6 +126,10 @@ class EigenTensor {
         set_elem_(index, new_value);
     }
 
+    span_type data() noexcept { return data_(); }
+
+    const_span_type data() const noexcept { return data_(); }
+
     void fill(value_type value) { fill_(std::move(value)); }
 
     string_type to_string() const { return to_string_(); }
@@ -149,8 +174,9 @@ class EigenTensor {
     }
 
 protected:
-    EigenTensor() noexcept = default;
-
+    explicit EigenTensor() noexcept = default;
+    virtual permuted_copy_return_type permuted_copy_(
+      label_type perm, label_type this_label) const                  = 0;
     virtual eigen_rank_type rank_() const noexcept                   = 0;
     virtual size_type size_() const                                  = 0;
     virtual size_type extent_(eigen_rank_type i) const               = 0;
@@ -159,12 +185,13 @@ class EigenTensor {
     virtual void fill_(value_type value)                             = 0;
     virtual string_type to_string_() const                           = 0;
     virtual std::ostream& add_to_stream_(std::ostream& os) const     = 0;
-
+    virtual span_type data_() noexcept                               = 0;
+    virtual const_span_type data_() const noexcept                   = 0;
     virtual void addition_assignment_(label_type this_label,
                                       label_type lhs_label,
                                       label_type rhs_label,
                                       const EigenTensor& lhs,
-                                      const EigenTensor& rhs) = 0;
+                                      const EigenTensor& rhs)        = 0;
 
     virtual void subtraction_assignment_(label_type this_label,
                                          label_type lhs_label,
diff --git a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
index e7aa1eff..53eb677e 100644
--- a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
+++ b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// #include "../contraction_planner.hpp"
+#include "../../buffer/contraction_planner.hpp"
 #include "eigen_tensor_impl.hpp"
 #include <iomanip>
 #include <sstream>
@@ -24,6 +24,32 @@ namespace tensorwrapper::backends::eigen {
 #define TPARAMS template<typename FloatType, unsigned int Rank>
 #define EIGEN_TENSOR EigenTensorImpl<FloatType, Rank>
 
+TPARAMS
+EIGEN_TENSOR::EigenTensorImpl(std::span<value_type> data,
+                              const_shape_reference shape) :
+  m_tensor_(make_from_shape_(data, shape, std::make_index_sequence<Rank>())) {}
+
+TPARAMS
+auto EIGEN_TENSOR::permuted_copy_(label_type out, label_type in) const
+  -> permuted_copy_return_type {
+    using value_type = FloatType;
+    std::vector<value_type> buffer(this->size(), value_type{0});
+    std::span<value_type> buffer_span(buffer.data(), buffer.size());
+
+    // Make a shape::Smooth object for tensor
+    std::vector<std::size_t> old_shape_vec(this->rank());
+    for(std::size_t i = 0; i < old_shape_vec.size(); ++i) {
+        old_shape_vec[i] = this->extent(i);
+    }
+    shape_type old_shape(old_shape_vec.begin(), old_shape_vec.end());
+    shape_type new_shape(old_shape);
+    new_shape(out) = old_shape(in);
+    auto pnew_tensor =
+      std::make_unique<EigenTensorImpl>(buffer_span, new_shape);
+    pnew_tensor->permute_assignment(out, in, *this);
+    return std::make_pair(std::move(buffer), std::move(pnew_tensor));
+}
+
 TPARAMS
 auto EIGEN_TENSOR::get_elem_(index_vector index) const -> const_reference {
     return unwrap_vector_(std::move(index), std::make_index_sequence<Rank>());
@@ -189,72 +215,39 @@ void EIGEN_TENSOR::contraction_assignment_(label_type this_label,
                                            label_type rhs_label,
                                            const base_type& lhs,
                                            const base_type& rhs) {
-    // ContractionPlanner plan(this_labels, lhs_labels, rhs_labels);
-
-    // auto lhs_permutation = plan.lhs_permutation();
-    // auto rhs_permutation = plan.rhs_permutation();
-
-    // std::vector<FloatType> new_lhs_buffer(lhs.size());
-    // std::vector<FloatType> new_rhs_buffer(rhs.size());
-    // std::span<FloatType> new_lhs_span(new_lhs_buffer.data(),
-    //                                   new_lhs_buffer.size());
-    // std::span<FloatType> new_rhs_span(new_rhs_buffer.data(),
-    //                                   new_rhs_buffer.size());
-
-    // auto new_lhs_shape = lhs_permutation.apply(lhs.shape());
-    // auto new_rhs_shape = rhs_permutation.apply(rhs.shape());
-
-    // auto new_lhs_tensor =
-    //   make_eigen_tensor<FloatType>(new_lhs_span, new_lhs_shape);
-
-    // auto new_rhs_tensor =
-    //   make_eigen_tensor<FloatType>(new_rhs_span, new_rhs_shape);
-
-    // new_lhs_tensor.permute_assignment(lhs_permutation, lhs);
-    // new_rhs_tensor.permute_assignment(rhs_permutation, rhs);
-
-    // const auto [lrows, lcols] = matrix_size(*lt, plan.lhs_free().size());
-    // const auto [rrows, rcols] = matrix_size(*rt,
-    // plan.rhs_dummy().size());
+    buffer::ContractionPlanner plan(this_label, lhs_label, rhs_label);
 
-    // // Work out the types of the matrix amd a map
-    // constexpr auto e_dyn       = ::Eigen::Dynamic;
-    // constexpr auto e_row_major = ::Eigen::RowMajor;
-    // using matrix_t = ::Eigen::Matrix<FloatType, e_dyn, e_dyn,
-    // e_row_major>; using map_t    = ::Eigen::Map<matrix_t>;
+    // Transpose, Transpose part of TTGT
+    auto&& [new_lhs_buffer, pnew_lhs_tensor] =
+      lhs.permuted_copy(plan.lhs_permutation(), lhs_label);
 
-    // map_t lmatrix(new_lhs_buffer.data(), lrows, lcols);
-    // map_t rmatrix(new_rhs_buffer.data(), rrows, rcols);
-    // map_t omatrix(m_tensor_.data(), lrows, rcols);
+    auto&& [new_rhs_buffer, pnew_rhs_tensor] =
+      rhs.permuted_copy(plan.rhs_permutation(), rhs_label);
 
-    // omatrix = lmatrix * rmatrix;
+    // Gemm part of TTGT
+    auto olabels = plan.result_matrix_labels();
 
-    // // auto mlabels = plan.result_matrix_labels();
-    // // auto oshape  = result_shape(olabels);
+    auto&& [out_buffer, pout_tensor] = this->permuted_copy(olabels, this_label);
 
-    // // oshapes is the final shape, permute it to shape omatrix is
-    // currently in
+    const auto [lrows, lcols] =
+      matrix_size(*pnew_lhs_tensor, plan.lhs_free().size());
+    const auto [rrows, rcols] =
+      matrix_size(*pnew_rhs_tensor, plan.rhs_dummy().size());
 
-    // auto temp_shape = result_shape.clone();
-    // temp_shape->permute_assignment(mlabels, oshape);
-    // auto mshape = temp_shape->as_smooth();
+    // Work out the types of the matrix amd a map
+    constexpr auto e_dyn       = ::Eigen::Dynamic;
+    constexpr auto e_row_major = ::Eigen::RowMajor;
+    using matrix_t = ::Eigen::Matrix<FloatType, e_dyn, e_dyn, e_row_major>;
+    using map_t    = ::Eigen::Map<matrix_t>;
 
-    // auto m_to_o = olabels.permutation(mlabels); // N.b. Eigen def is
-    // inverse us
+    map_t lmatrix(new_lhs_buffer.data(), lrows, lcols);
+    map_t rmatrix(new_rhs_buffer.data(), rrows, rcols);
+    map_t omatrix(out_buffer.data(), lrows, rcols);
 
-    // std::array<int, Rank> out_size;
-    // std::array<int, Rank> m_to_o_array;
-    // for(std::size_t i = 0; i < Rank; ++i) {
-    //     out_size[i]     = mshape.extent(i);
-    //     m_to_o_array[i] = m_to_o[i];
-    // }
+    omatrix = lmatrix * rmatrix;
 
-    // auto tensor = buffer.reshape(out_size);
-    // if constexpr(Rank > 0) {
-    //     m_tensor_ = tensor.shuffle(m_to_o_array);
-    // } else {
-    //     m_tensor_ = tensor;
-    // }
+    // The last transpose part of TTGT
+    this->permute_assignment(this_label, olabels, *pout_tensor);
 }
 
 #undef EIGEN_TENSOR
diff --git a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.hpp b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.hpp
index e7b6ea79..f3d0b60c 100644
--- a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.hpp
+++ b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.hpp
@@ -44,22 +44,25 @@ class EigenTensorImpl : public EigenTensor<FloatType> {
     ///@{
     using typename base_type::const_reference;
     using typename base_type::const_shape_reference;
+    using typename base_type::const_span_type;
     using typename base_type::eigen_rank_type;
     using typename base_type::index_vector;
     using typename base_type::label_type;
+    using typename base_type::permuted_copy_return_type;
     using typename base_type::reference;
+    using typename base_type::shape_type;
     using typename base_type::size_type;
+    using typename base_type::span_type;
     using typename base_type::string_type;
     using typename base_type::value_type;
     ///@}
 
-    EigenTensorImpl(std::span<value_type> data, const_shape_reference shape) :
-      m_tensor_(
-        make_from_shape_(data, shape, std::make_index_sequence<Rank>())) {}
-
-    EigenTensorImpl permute(label_type perm) const;
+    EigenTensorImpl(std::span<value_type> data, const_shape_reference shape);
 
 protected:
+    permuted_copy_return_type permuted_copy_(
+      label_type perm, label_type this_label) const override;
+
     /// Implement rank by returning template parameter
     eigen_rank_type rank_() const noexcept override { return Rank; }
 
@@ -77,6 +80,13 @@ class EigenTensorImpl : public EigenTensor<FloatType> {
     /// Unwraps index vector into Eigen's operator() to set element
     void set_elem_(index_vector index, value_type new_value) override;
 
+    virtual span_type data_() noexcept override {
+        return {m_tensor_.data(), size_type(m_tensor_.size())};
+    }
+    virtual const_span_type data_() const noexcept override {
+        return {m_tensor_.data(), size_type(m_tensor_.size())};
+    }
+
     /// Calls std::fill to set the values
     void fill_(value_type value) override;
 
diff --git a/tests/cxx/unit_tests/tensorwrapper/backends/cutensor/cuda_tensor.cpp b/tests/cxx/unit_tests/tensorwrapper/backends/cutensor/cuda_tensor.cpp
new file mode 100644
index 00000000..47fb1579
--- /dev/null
+++ b/tests/cxx/unit_tests/tensorwrapper/backends/cutensor/cuda_tensor.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../../testing/testing.hpp"
+#include "../testing/contraction_assignment.hpp"
+#include <tensorwrapper/backends/cutensor/cuda_tensor.hpp>
+
+using namespace tensorwrapper;
+using namespace tensorwrapper::backends::cutensor;
+
+using supported_fp_types = std::tuple<float, double>;
+
+TEMPLATE_LIST_TEST_CASE("CUDATensor", "", supported_fp_types) {
+    using tensor_type = CUDATensor<TestType>;
+    using shape_type  = typename tensor_type::shape_type;
+    using label_type  = typename tensor_type::label_type;
+
+    std::vector<TestType> data(16);
+    for(std::size_t i = 0; i < data.size(); ++i)
+        data[i] = static_cast<TestType>(i);
+
+    std::span<TestType> data_span(data.data(), data.size());
+
+    shape_type scalar_shape({});
+    shape_type vector_shape({16});
+    shape_type matrix_shape({4, 4});
+    shape_type tensor3_shape({2, 2, 4});
+    shape_type tensor4_shape({2, 2, 2, 2});
+
+    tensor_type scalar(data_span, scalar_shape);
+    tensor_type vector(data_span, vector_shape);
+    tensor_type matrix(data_span, matrix_shape);
+    tensor_type tensor3(data_span, tensor3_shape);
+    tensor_type tensor4(data_span, tensor4_shape);
+
+    SECTION("rank") {
+        REQUIRE(scalar.rank() == 0);
+        REQUIRE(vector.rank() == 1);
+        REQUIRE(matrix.rank() == 2);
+        REQUIRE(tensor3.rank() == 3);
+        REQUIRE(tensor4.rank() == 4);
+    }
+
+    SECTION("size") {
+        REQUIRE(scalar.size() == 1);
+        REQUIRE(vector.size() == 16);
+        REQUIRE(matrix.size() == 16);
+        REQUIRE(tensor3.size() == 16);
+        REQUIRE(tensor4.size() == 16);
+    }
+
+    SECTION("shape") {
+        REQUIRE(scalar.shape() == scalar_shape);
+        REQUIRE(vector.shape() == vector_shape);
+        REQUIRE(matrix.shape() == matrix_shape);
+        REQUIRE(tensor3.shape() == tensor3_shape);
+        REQUIRE(tensor4.shape() == tensor4_shape);
+    }
+
+    SECTION("data()") {
+        REQUIRE(scalar.data() == data.data());
+        REQUIRE(vector.data() == data.data());
+        REQUIRE(matrix.data() == data.data());
+        REQUIRE(tensor3.data() == data.data());
+        REQUIRE(tensor4.data() == data.data());
+    }
+
+    SECTION("data() const") {
+        REQUIRE(std::as_const(scalar).data() == data.data());
+        REQUIRE(std::as_const(vector).data() == data.data());
+        REQUIRE(std::as_const(matrix).data() == data.data());
+        REQUIRE(std::as_const(tensor3).data() == data.data());
+        REQUIRE(std::as_const(tensor4).data() == data.data());
+    }
+
+    SECTION("contraction_assignment") {
+#ifdef ENABLE_CUTESNSOR
+        testing::contraction_assignment<tensor_type>();
+#else
+        label_type label("");
+        REQUIRE_THROWS_AS(
+          scalar.contraction_assignment(label, label, label, scalar, scalar),
+
+          std::runtime_error);
+#endif
+    }
+}
diff --git a/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp b/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
index d31a3c95..7d1ec863 100644
--- a/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
+++ b/tests/cxx/unit_tests/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
@@ -16,6 +16,7 @@
 
 #include "../../testing/testing.hpp"
 #include "../testing/addition_assignment.hpp"
+#include "../testing/contraction_assignment.hpp"
 #include "../testing/hadamard_assignment.hpp"
 #include "../testing/permute_assignment.hpp"
 #include "../testing/scalar_multiplication.hpp"
@@ -52,6 +53,123 @@ TEMPLATE_LIST_TEST_CASE("EigenTensorImpl", "", types::floating_point_types) {
     tensor3_type tensor3(data_span, tensor3_shape);
     tensor4_type tensor4(data_span, tensor4_shape);
 
+    SECTION("permuted_copy") {
+        using label_type = typename scalar_type::label_type;
+        SECTION("scalar") {
+            label_type label("");
+            auto&& [scalar_buffer, pscalar] =
+              scalar.permuted_copy(label, label);
+            REQUIRE(scalar_buffer.size() == 1);
+            REQUIRE(pscalar->get_elem({}) == data[0]);
+            REQUIRE(scalar_buffer.data() != data.data()); // Ensure a copy
+        }
+
+        SECTION("vector") {
+            label_type i("i");
+            auto&& [vector_buffer, pvector] = vector.permuted_copy(i, i);
+            REQUIRE(vector_buffer.size() == 16);
+            for(std::size_t idx = 0; idx < 16; ++idx) {
+                REQUIRE(pvector->get_elem({idx}) == data[idx]);
+            }
+            REQUIRE(vector_buffer.data() != data.data()); // Ensure a copy
+        }
+
+        SECTION("matrix") {
+            label_type ij("i,j");
+            label_type ji("j,i");
+            SECTION("no permutation") {
+                auto&& [matrix_buffer, pmatrix] = matrix.permuted_copy(ij, ij);
+                REQUIRE(matrix_buffer.size() == 16);
+                for(std::size_t idx = 0; idx < 4; ++idx) {
+                    for(std::size_t jdx = 0; jdx < 4; ++jdx) {
+                        REQUIRE(pmatrix->get_elem({idx, jdx}) ==
+                                data[idx * 4 + jdx]);
+                    }
+                }
+                REQUIRE(matrix_buffer.data() != data.data()); // Ensure a copy
+            }
+            SECTION("Permutation") {
+                auto&& [matrix_buffer, pmatrix] = matrix.permuted_copy(ji, ij);
+                REQUIRE(matrix_buffer.size() == 16);
+                for(std::size_t idx = 0; idx < 4; ++idx) {
+                    for(std::size_t jdx = 0; jdx < 4; ++jdx) {
+                        REQUIRE(pmatrix->get_elem({jdx, idx}) ==
+                                data[idx * 4 + jdx]);
+                    }
+                }
+                REQUIRE(matrix_buffer.data() != data.data()); // Ensure a copy
+            }
+        }
+
+        SECTION("Rank 3 tensor") {
+            label_type ijk("i,j,k");
+            label_type jik("j,i,k");
+            SECTION("no permutation") {
+                auto&& [t_buffer, pt] = tensor3.permuted_copy(ijk, ijk);
+                REQUIRE(t_buffer.size() == 16);
+                for(std::size_t idx = 0; idx < 2; ++idx) {
+                    for(std::size_t jdx = 0; jdx < 2; ++jdx) {
+                        for(std::size_t kdx = 0; kdx < 4; ++kdx) {
+                            REQUIRE(pt->get_elem({idx, jdx, kdx}) ==
+                                    data[idx * 8 + jdx * 4 + kdx]);
+                        }
+                    }
+                }
+                REQUIRE(t_buffer.data() != data.data()); // Ensure a copy
+            }
+            SECTION("Permutation") {
+                auto&& [t_buffer, pt] = tensor3.permuted_copy(jik, ijk);
+                REQUIRE(t_buffer.size() == 16);
+                for(std::size_t idx = 0; idx < 2; ++idx) {
+                    for(std::size_t jdx = 0; jdx < 2; ++jdx) {
+                        for(std::size_t kdx = 0; kdx < 4; ++kdx) {
+                            REQUIRE(pt->get_elem({jdx, idx, kdx}) ==
+                                    data[idx * 8 + jdx * 4 + kdx]);
+                        }
+                    }
+                }
+                REQUIRE(t_buffer.data() != data.data()); // Ensure a copy
+            }
+        }
+
+        SECTION("Rank 4 tensor") {
+            label_type ijkl("i,j,k,l");
+            label_type jikl("j,i,k,l");
+            SECTION("no permutation") {
+                auto&& [t_buffer, pt] = tensor4.permuted_copy(ijkl, ijkl);
+                REQUIRE(t_buffer.size() == 16);
+                for(std::size_t idx = 0; idx < 2; ++idx) {
+                    for(std::size_t jdx = 0; jdx < 2; ++jdx) {
+                        for(std::size_t kdx = 0; kdx < 2; ++kdx) {
+                            for(std::size_t ldx = 0; ldx < 2; ++ldx) {
+                                REQUIRE(
+                                  pt->get_elem({idx, jdx, kdx, ldx}) ==
+                                  data[idx * 8 + jdx * 4 + kdx * 2 + ldx]);
+                            }
+                        }
+                    }
+                }
+                REQUIRE(t_buffer.data() != data.data()); // Ensure a copy
+            }
+            SECTION("Permutation") {
+                auto&& [t_buffer, pt] = tensor4.permuted_copy(jikl, ijkl);
+                REQUIRE(t_buffer.size() == 16);
+                for(std::size_t idx = 0; idx < 2; ++idx) {
+                    for(std::size_t jdx = 0; jdx < 2; ++jdx) {
+                        for(std::size_t kdx = 0; kdx < 2; ++kdx) {
+                            for(std::size_t ldx = 0; ldx < 2; ++ldx) {
+                                REQUIRE(
+                                  pt->get_elem({jdx, idx, kdx, ldx}) ==
+                                  data[idx * 8 + jdx * 4 + kdx * 2 + ldx]);
+                            }
+                        }
+                    }
+                }
+                REQUIRE(t_buffer.data() != data.data()); // Ensure a copy
+            }
+        }
+    }
+
     SECTION("rank") {
         REQUIRE(scalar.rank() == 0);
         REQUIRE(vector.rank() == 1);
@@ -118,6 +236,22 @@ TEMPLATE_LIST_TEST_CASE("EigenTensorImpl", "", types::floating_point_types) {
         REQUIRE(tensor4.get_elem({0, 1, 1, 0}) == corr);
     }
 
+    SECTION("data()") {
+        REQUIRE(scalar.data().data() == data.data());
+        REQUIRE(vector.data().data() == data.data());
+        REQUIRE(matrix.data().data() == data.data());
+        REQUIRE(tensor3.data().data() == data.data());
+        REQUIRE(tensor4.data().data() == data.data());
+    }
+
+    SECTION("data() const") {
+        REQUIRE(std::as_const(scalar).data().data() == data.data());
+        REQUIRE(std::as_const(vector).data().data() == data.data());
+        REQUIRE(std::as_const(matrix).data().data() == data.data());
+        REQUIRE(std::as_const(tensor3).data().data() == data.data());
+        REQUIRE(std::as_const(tensor4).data().data() == data.data());
+    }
+
     SECTION("fill") {
         TestType corr(7);
         SECTION("scalar") {
@@ -251,4 +385,9 @@ TEMPLATE_LIST_TEST_CASE("EigenTensorImpl", "", types::floating_point_types) {
             testing::tensor4_scalar_multiplication<tensor4_type>();
         }
     }
+
+    SECTION("contraction_assignment") {
+        testing::contraction_assignment_tests<
+          scalar_type, vector_type, matrix_type, tensor3_type, tensor4_type>();
+    }
 }
diff --git a/tests/cxx/unit_tests/tensorwrapper/backends/testing/contraction_assignment.hpp b/tests/cxx/unit_tests/tensorwrapper/backends/testing/contraction_assignment.hpp
new file mode 100644
index 00000000..b6837d0e
--- /dev/null
+++ b/tests/cxx/unit_tests/tensorwrapper/backends/testing/contraction_assignment.hpp
@@ -0,0 +1,217 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <span>
+#include <vector>
+
+namespace tensorwrapper::testing {
+
+template<typename ScalarType, typename VectorType = ScalarType,
+         typename MatrixType = VectorType, typename Tensor3Type = MatrixType,
+         typename Tensor4Type = Tensor3Type>
+void contraction_assignment_tests() {
+    using scalar_value_type  = typename ScalarType::value_type;
+    using vector_value_type  = typename VectorType::value_type;
+    using matrix_value_type  = typename MatrixType::value_type;
+    using tensor3_value_type = typename Tensor3Type::value_type;
+    using tensor4_value_type = typename Tensor4Type::value_type;
+
+    using shape_type = typename ScalarType::shape_type;
+    using label_type = typename ScalarType::label_type;
+
+    std::vector<scalar_value_type> scalar_data(1, scalar_value_type(42.0));
+    std::vector<vector_value_type> vector_data(2, vector_value_type(0.0));
+    std::vector<matrix_value_type> matrix_data(4, matrix_value_type(0.0));
+    std::vector<tensor3_value_type> tensor3_data(8, tensor3_value_type(0.0));
+    std::vector<tensor4_value_type> tensor4_data(16, tensor4_value_type(0.0));
+
+    for(std::size_t i = 0; i < vector_data.size(); ++i)
+        vector_data[i] = scalar_value_type(i + 1.0);
+
+    for(std::size_t i = 0; i < matrix_data.size(); ++i)
+        matrix_data[i] = scalar_value_type(i + 1.0);
+
+    for(std::size_t i = 0; i < tensor3_data.size(); ++i)
+        tensor3_data[i] = scalar_value_type(i + 1.0);
+
+    std::span<vector_value_type> vector_data_span(vector_data.data(),
+                                                  vector_data.size());
+    std::span<matrix_value_type> matrix_data_span(matrix_data.data(),
+                                                  matrix_data.size());
+    std::span<tensor3_value_type> tensor3_data_span(tensor3_data.data(),
+                                                    tensor3_data.size());
+    std::span<tensor4_value_type> tensor4_data_span(tensor4_data.data(),
+                                                    tensor4_data.size());
+
+    shape_type scalar_shape{};
+    shape_type vector_shape{2};
+    shape_type matrix_shape{2, 2};
+    shape_type tensor3_shape{2, 2, 2};
+    shape_type tensor4_shape{2, 2, 2, 2};
+
+    ScalarType scalar(scalar_data, scalar_shape);
+    VectorType vector(vector_data_span, vector_shape);
+    MatrixType matrix(matrix_data_span, matrix_shape);
+    Tensor3Type tensor3(tensor3_data_span, tensor3_shape);
+    Tensor4Type tensor4(tensor4_data, shape_type{2, 2, 2, 2});
+
+    SECTION("scalar,scalar->") {
+        label_type o("");
+        label_type l("");
+        label_type r("");
+        scalar.contraction_assignment(o, l, r, scalar, scalar);
+
+        REQUIRE(scalar.get_elem({}) == scalar_value_type(42.0 * 42.0));
+    }
+
+    SECTION("i,i->") {
+        label_type o("");
+        label_type l("i");
+        label_type r("i");
+        scalar.contraction_assignment(o, l, r, vector, vector);
+        REQUIRE(scalar.get_elem({}) == vector_value_type(5.0));
+    }
+
+    SECTION("i,ij->j") {
+        label_type o("j");
+        label_type l("i");
+        label_type r("i,j");
+        vector.contraction_assignment(o, l, r, vector, matrix);
+        REQUIRE(vector.get_elem({0}) == vector_value_type(7.0));
+        REQUIRE(vector.get_elem({1}) == vector_value_type(10.0));
+    }
+
+    SECTION("ij,ji->") {
+        label_type o("");
+        label_type l("i,j");
+        label_type r("j,i");
+        scalar.contraction_assignment(o, l, r, matrix, matrix);
+
+        REQUIRE(scalar.get_elem({}) == matrix_value_type(29.0));
+    }
+
+    SECTION("ij,jk->ik") {
+        label_type o("i,k");
+        label_type l("i,j");
+        label_type r("j,k");
+        matrix.contraction_assignment(o, l, r, matrix, matrix);
+
+        REQUIRE(matrix.get_elem({0, 0}) == matrix_value_type(7.0));
+        REQUIRE(matrix.get_elem({0, 1}) == matrix_value_type(10.0));
+        REQUIRE(matrix.get_elem({1, 0}) == matrix_value_type(15.0));
+        REQUIRE(matrix.get_elem({1, 1}) == matrix_value_type(22.0));
+    }
+
+    SECTION("ijk,ijk->") {
+        label_type o("");
+        label_type l("i,j,k");
+        label_type r("i,j,k");
+        scalar.contraction_assignment(o, l, r, tensor3, tensor3);
+
+        REQUIRE(scalar.get_elem({}) == scalar_value_type(204.0));
+    }
+
+    SECTION("ijk,jik->") {
+        label_type o("");
+        label_type l("i,j,k");
+        label_type r("j,i,k");
+        scalar.contraction_assignment(o, l, r, tensor3, tensor3);
+
+        REQUIRE(scalar.get_elem({}) == scalar_value_type(196.0));
+    }
+
+    SECTION("ijk,jkl->il") {
+        label_type o("i,l");
+        label_type l("i,j,k");
+        label_type r("j,k,l");
+        matrix.contraction_assignment(o, l, r, tensor3, tensor3);
+
+        REQUIRE(matrix.get_elem({0, 0}) == matrix_value_type(50.0));
+        REQUIRE(matrix.get_elem({0, 1}) == matrix_value_type(60.0));
+        REQUIRE(matrix.get_elem({1, 0}) == matrix_value_type(114.0));
+        REQUIRE(matrix.get_elem({1, 1}) == matrix_value_type(140.0));
+    }
+
+    SECTION("ijk,jlk->il") {
+        label_type o("i,l");
+        label_type l("i,j,k");
+        label_type r("j,l,k");
+        matrix.contraction_assignment(o, l, r, tensor3, tensor3);
+
+        REQUIRE(matrix.get_elem({0, 0}) == matrix_value_type(44.0));
+        REQUIRE(matrix.get_elem({0, 1}) == matrix_value_type(64.0));
+        REQUIRE(matrix.get_elem({1, 0}) == matrix_value_type(100.0));
+        REQUIRE(matrix.get_elem({1, 1}) == matrix_value_type(152.0));
+    }
+
+    SECTION("ijk,jlk->li") {
+        label_type o("l,i");
+        label_type l("i,j,k");
+        label_type r("j,l,k");
+        matrix.contraction_assignment(o, l, r, tensor3, tensor3);
+
+        REQUIRE(matrix.get_elem({0, 0}) == matrix_value_type(44.0));
+        REQUIRE(matrix.get_elem({0, 1}) == matrix_value_type(100.0));
+        REQUIRE(matrix.get_elem({1, 0}) == matrix_value_type(64.0));
+        REQUIRE(matrix.get_elem({1, 1}) == matrix_value_type(152.0));
+    }
+
+    // SECTION("ijk,ljm->iklm") {
+
+    SECTION("ijk,ljm->iklm") {
+        label_type o("i,k,l,m");
+        label_type l("i,j,k");
+        label_type r("l,j,m");
+        tensor4.contraction_assignment(o, l, r, tensor3, tensor3);
+
+        REQUIRE(tensor4.get_elem({0, 0, 0, 0}) == tensor4_value_type(10.0));
+        REQUIRE(tensor4.get_elem({0, 0, 0, 1}) == tensor4_value_type(14.0));
+        REQUIRE(tensor4.get_elem({0, 0, 1, 0}) == tensor4_value_type(26.0));
+        REQUIRE(tensor4.get_elem({0, 0, 1, 1}) == tensor4_value_type(30.0));
+        REQUIRE(tensor4.get_elem({0, 1, 0, 0}) == tensor4_value_type(14.0));
+        REQUIRE(tensor4.get_elem({0, 1, 0, 1}) == tensor4_value_type(20.0));
+        REQUIRE(tensor4.get_elem({0, 1, 1, 0}) == tensor4_value_type(38.0));
+        REQUIRE(tensor4.get_elem({0, 1, 1, 1}) == tensor4_value_type(44.0));
+        REQUIRE(tensor4.get_elem({1, 0, 0, 0}) == tensor4_value_type(26.0));
+        REQUIRE(tensor4.get_elem({1, 0, 0, 1}) == tensor4_value_type(38.0));
+        REQUIRE(tensor4.get_elem({1, 0, 1, 0}) == tensor4_value_type(74.0));
+        REQUIRE(tensor4.get_elem({1, 0, 1, 1}) == tensor4_value_type(86.0));
+        REQUIRE(tensor4.get_elem({1, 1, 0, 0}) == tensor4_value_type(30.0));
+        REQUIRE(tensor4.get_elem({1, 1, 0, 1}) == tensor4_value_type(44.0));
+        REQUIRE(tensor4.get_elem({1, 1, 1, 0}) == tensor4_value_type(86.0));
+        REQUIRE(tensor4.get_elem({1, 1, 1, 1}) == tensor4_value_type(100.0));
+    }
+
+    // SECTION("ij,jkl->ikl") {
+
+    SECTION("ij,jkl->ikl") {
+        label_type o("i,k,l");
+        label_type l("i,j");
+        label_type r("j,k,l");
+        tensor3.contraction_assignment(o, l, r, matrix, tensor3);
+
+        REQUIRE(tensor3.get_elem({0, 0, 0}) == tensor3_value_type(11.0));
+        REQUIRE(tensor3.get_elem({0, 0, 1}) == tensor3_value_type(14.0));
+        REQUIRE(tensor3.get_elem({0, 1, 0}) == tensor3_value_type(17.0));
+        REQUIRE(tensor3.get_elem({0, 1, 1}) == tensor3_value_type(20.0));
+        REQUIRE(tensor3.get_elem({1, 0, 0}) == tensor3_value_type(23.0));
+        REQUIRE(tensor3.get_elem({1, 0, 1}) == tensor3_value_type(30.0));
+        REQUIRE(tensor3.get_elem({1, 1, 0}) == tensor3_value_type(37.0));
+        REQUIRE(tensor3.get_elem({1, 1, 1}) == tensor3_value_type(44.0));
+    }
+}
+} // namespace tensorwrapper::testing
diff --git a/tests/cxx/unit_tests/tensorwrapper/backends/testing/unary_op.hpp b/tests/cxx/unit_tests/tensorwrapper/backends/testing/unary_op.hpp
index af541d05..6c8f8a05 100644
--- a/tests/cxx/unit_tests/tensorwrapper/backends/testing/unary_op.hpp
+++ b/tests/cxx/unit_tests/tensorwrapper/backends/testing/unary_op.hpp
@@ -160,14 +160,6 @@ void tensor3_unary_assignment(Fxn1&& the_op, Fxn2&& corr_op) {
             REQUIRE(result.get_elem({i, j, k}) == corr_op(s0_data[idx]));
         }
     }
-
-    SECTION("Permute result") {
-        the_op(jik, ijk, result, s0);
-        for(const auto [i, j, k] : tensor3_indices) {
-            std::size_t idx = i * 4 + j * 2 + k;
-            REQUIRE(result.get_elem({j, i, k}) == corr_op(s0_data[idx]));
-        }
-    }
 }
 
 template<typename TestType, typename Fxn1, typename Fxn2>
@@ -217,14 +209,6 @@ void tensor4_unary_assignment(Fxn1&& the_op, Fxn2&& corr_op) {
             REQUIRE(result.get_elem({i, j, k, l}) == corr_op(s0_data[idx]));
         }
     }
-
-    SECTION("Permute result") {
-        the_op(jikl, ijkl, result, s0);
-        for(const auto [i, j, k, l] : tensor4_indices) {
-            std::size_t idx = i * 8 + j * 4 + k * 2 + l;
-            REQUIRE(result.get_elem({j, i, k, l}) == corr_op(s0_data[idx]));
-        }
-    }
 }
 
 } // namespace tensorwrapper::testing

From 1b64e819f3377fb85e83aabdca7e8ec2dac9966e Mon Sep 17 00:00:00 2001
From: "Jonathan M. Waldrop" <relik107@gmail.com>
Date: Wed, 19 Nov 2025 11:51:19 -0600
Subject: [PATCH 06/18] Update compiler versions in pull_request.yaml

---
 .github/workflows/pull_request.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml
index 8bf142a7..5f8a5b10 100644
--- a/.github/workflows/pull_request.yaml
+++ b/.github/workflows/pull_request.yaml
@@ -32,10 +32,10 @@ jobs:
   test_library:
     uses: NWChemEx/.github/.github/workflows/test_nwx_library.yaml@master
     with:
-      compilers: '["gcc-11", "clang-14"]'
+      compilers: '["gcc-13", "clang-14"]'
 
   test_library_with_sigma:
     uses: NWChemEx/.github/.github/workflows/test_nwx_library.yaml@master
     with:
-      compilers: '["gcc-11", "clang-14"]'
+      compilers: '["gcc-13", "clang-14"]'
       repo_toolchain: ".github/enable_sigma.cmake"

From ea9bd000e76a2f73833255ad11469afadac06297 Mon Sep 17 00:00:00 2001
From: "Jonathan M. Waldrop" <relik107@gmail.com>
Date: Wed, 19 Nov 2025 15:53:01 -0600
Subject: [PATCH 07/18] Update compilers in pull_request workflow

---
 .github/workflows/pull_request.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml
index 5f8a5b10..a9527041 100644
--- a/.github/workflows/pull_request.yaml
+++ b/.github/workflows/pull_request.yaml
@@ -32,10 +32,10 @@ jobs:
   test_library:
     uses: NWChemEx/.github/.github/workflows/test_nwx_library.yaml@master
     with:
-      compilers: '["gcc-13", "clang-14"]'
+      compilers: '["gcc-14", "clang-19"]'
 
   test_library_with_sigma:
     uses: NWChemEx/.github/.github/workflows/test_nwx_library.yaml@master
     with:
-      compilers: '["gcc-13", "clang-14"]'
+      compilers: '["gcc-14", "clang-19"]'
       repo_toolchain: ".github/enable_sigma.cmake"

From 4dd089c7738bef75043252dc16a861d68609d503 Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Thu, 20 Nov 2025 10:43:17 -0600
Subject: [PATCH 08/18] compiles (FWIW)

---
 include/tensorwrapper/buffer/mdbuffer.hpp     | 134 +++++++++++++++---
 .../tensorwrapper/concepts/floating_point.hpp |  26 ++++
 .../tensorwrapper/forward_declarations.hpp    |   2 +
 .../tensorwrapper/types/mdbuffer_traits.hpp   |   4 +-
 .../buffer/detail_/mdbuffer_pimpl.hpp         |  53 -------
 src/tensorwrapper/buffer/mdbuffer.cpp         | 115 ++++++++++++---
 6 files changed, 241 insertions(+), 93 deletions(-)
 create mode 100644 include/tensorwrapper/concepts/floating_point.hpp
 delete mode 100644 src/tensorwrapper/buffer/detail_/mdbuffer_pimpl.hpp

diff --git a/include/tensorwrapper/buffer/mdbuffer.hpp b/include/tensorwrapper/buffer/mdbuffer.hpp
index 72f5c765..82d9ba36 100644
--- a/include/tensorwrapper/buffer/mdbuffer.hpp
+++ b/include/tensorwrapper/buffer/mdbuffer.hpp
@@ -15,6 +15,8 @@
  */
 
 #pragma once
+#include <tensorwrapper/buffer/replicated.hpp>
+#include <tensorwrapper/concepts/floating_point.hpp>
 #include <tensorwrapper/types/mdbuffer_traits.hpp>
 
 namespace tensorwrapper::buffer {
@@ -23,42 +25,138 @@ namespace tensorwrapper::buffer {
  *
  *  This class is a dense multidimensional buffer of floating-point values.
  */
-class MDBuffer {
+class MDBuffer : public Replicated {
 private:
-    using traits_type = types::ClassTraits<MDBuffer>;
+    /// Type *this derives from
+    using my_base_type = Replicated;
+    using traits_type  = types::ClassTraits<MDBuffer>;
+    using my_type      = MDBuffer;
 
 public:
     /// Add types to public API
     ///@{
-    using buffer_type   = typename traits_type::buffer_type;
-    using pimpl_type    = typename traits_type::pimpl_type;
-    using pimpl_pointer = typename traits_type::pimpl_pointer;
-    using rank_type     = typename traits_type::rank_type;
-    using shape_type    = typename traits_type::shape_type;
+    using value_type        = typename traits_type::value_type;
+    using reference         = typename traits_type::reference;
+    using const_reference   = typename traits_type::const_reference;
+    using buffer_type       = typename traits_type::buffer_type;
+    using buffer_view       = typename traits_type::buffer_view;
+    using const_buffer_view = typename traits_type::const_buffer_view;
+    using pimpl_type        = typename traits_type::pimpl_type;
+    using pimpl_pointer     = typename traits_type::pimpl_pointer;
+    using rank_type         = typename traits_type::rank_type;
+    using shape_type        = typename traits_type::shape_type;
+    using const_shape_view  = typename traits_type::const_shape_view;
+    using size_type         = typename traits_type::size_type;
     ///@}
 
+    using index_vector = std::vector<size_type>;
+    using typename my_base_type::label_type;
+    using string_type = std::string;
+    using hash_type   = std::size_t;
+
     MDBuffer() noexcept;
 
-    template<typename T>
-    MDBuffer(shape_type shape, std::vector<T> elements) {
-        MDBuffer(std::move(shape), buffer_type(std::move(elements)));
+    template<concepts::FloatingPoint T>
+    MDBuffer(std::vector<T> elements, const_shape_view shape) :
+      MDBuffer(buffer_type(std::move(elements)),
+               std::make_unique<layout::Physical>(shape), nullptr) {}
+
+    template<concepts::FloatingPoint T>
+    MDBuffer(std::vector<T> elements, layout_pointer playout = nullptr,
+             allocator_base_pointer pallocator = nullptr) {
+        MDBuffer(buffer_type(std::move(elements)), std::move(playout),
+                 std::move(pallocator));
     }
 
-    MDBuffer(shape_type shape, buffer_type buffer);
+    MDBuffer(buffer_type buffer, layout_pointer playout = nullptr,
+             allocator_base_pointer pallocator = nullptr);
+
+    MDBuffer(const MDBuffer& other)     = default;
+    MDBuffer(MDBuffer&& other) noexcept = default;
+
+    MDBuffer& operator=(const MDBuffer& other)     = default;
+    MDBuffer& operator=(MDBuffer&& other) noexcept = default;
+
+    ~MDBuffer() override = default;
+
+    // -------------------------------------------------------------------------
+    // -- State Accessors
+    // -------------------------------------------------------------------------
+
+    size_type size() const noexcept;
+
+    const_reference get_elem(index_vector index) const;
+
+    void set_elem(index_vector index, value_type new_value);
+
+    buffer_view get_mutable_data();
+
+    const_buffer_view get_immutable_data() const;
+
+    // -------------------------------------------------------------------------
+    // -- Utility Methods
+    // -------------------------------------------------------------------------
+
+    bool operator==(const my_type& rhs) const noexcept;
 
-    rank_type rank() const;
+protected:
+    const_shape_view shape_() const;
+
+    buffer_base_pointer clone_() const override;
+
+    bool are_equal_(const_buffer_base_reference rhs) const noexcept override;
+
+    dsl_reference addition_assignment_(label_type this_labels,
+                                       const_labeled_reference lhs,
+                                       const_labeled_reference rhs) override;
+    dsl_reference subtraction_assignment_(label_type this_labels,
+                                          const_labeled_reference lhs,
+                                          const_labeled_reference rhs) override;
+    dsl_reference multiplication_assignment_(
+      label_type this_labels, const_labeled_reference lhs,
+      const_labeled_reference rhs) override;
+
+    dsl_reference permute_assignment_(label_type this_labels,
+                                      const_labeled_reference rhs) override;
+
+    dsl_reference scalar_multiplication_(label_type this_labels, double scalar,
+                                         const_labeled_reference rhs) override;
+
+    string_type to_string_() const override;
+
+    std::ostream& add_to_stream_(std::ostream& os) const override;
+
+    // Returns the hash for the current state of *this, computing first if
+    // needed.
+    hash_type get_hash_() const {
+        if(m_recalculate_hash_ or !m_hash_caching_) update_hash_();
+        return m_hash_;
+    }
 
 private:
-    explicit MDBuffer(pimpl_pointer pimpl) noexcept;
+    size_type coordinate_to_ordinal_(index_vector index) const;
+
+    // Computes the hash for the current state of *this
+    void update_hash_() const;
+
+    // Designates that the state may have changed and to recalculate the hash.
+    // This function is really just for readability and clarity.
+    void mark_for_rehash_() const { m_recalculate_hash_ = true; }
+
+    // Designates that state changes are not trackable and we should recalculate
+    // the hash each time.
+    void turn_off_hash_caching_() const { m_hash_caching_ = false; }
 
-    bool has_pimpl_() const noexcept;
+    // Tracks whether the hash needs to be redetermined
+    mutable bool m_recalculate_hash_ = true;
 
-    void assert_pimpl_() const;
+    // Tracks whether hash caching has been turned off
+    mutable bool m_hash_caching_ = true;
 
-    pimpl_type& pimpl_();
-    const pimpl_type& pimpl_() const;
+    // Holds the computed hash value for this instance's state
+    mutable hash_type m_hash_ = 0;
 
-    pimpl_pointer m_pimpl_;
+    buffer_type m_buffer_;
 };
 
 } // namespace tensorwrapper::buffer
diff --git a/include/tensorwrapper/concepts/floating_point.hpp b/include/tensorwrapper/concepts/floating_point.hpp
new file mode 100644
index 00000000..d95588d0
--- /dev/null
+++ b/include/tensorwrapper/concepts/floating_point.hpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <wtf/concepts/floating_point.hpp>
+
+namespace tensorwrapper::concepts {
+
+using wtf::concepts::ConstFloatingPoint;
+using wtf::concepts::FloatingPoint;
+using wtf::concepts::UnmodifiedFloatingPoint;
+
+} // namespace tensorwrapper::concepts
diff --git a/include/tensorwrapper/forward_declarations.hpp b/include/tensorwrapper/forward_declarations.hpp
index 16c51064..e030b3a9 100644
--- a/include/tensorwrapper/forward_declarations.hpp
+++ b/include/tensorwrapper/forward_declarations.hpp
@@ -28,6 +28,8 @@ class MDBuffer;
 } // namespace buffer
 
 namespace shape {
+template<typename T>
+class SmoothView;
 
 class Smooth;
 
diff --git a/include/tensorwrapper/types/mdbuffer_traits.hpp b/include/tensorwrapper/types/mdbuffer_traits.hpp
index 27c74421..aa60a608 100644
--- a/include/tensorwrapper/types/mdbuffer_traits.hpp
+++ b/include/tensorwrapper/types/mdbuffer_traits.hpp
@@ -28,7 +28,9 @@ struct MDBufferTraitsCommon {
     using buffer_type       = wtf::buffer::FloatBuffer;
     using const_buffer_view = wtf::buffer::BufferView<const value_type>;
     using shape_type        = shape::Smooth;
-    using rank_type         = typename shape_type::rank_type;
+    using const_shape_view  = shape::SmoothView<const shape_type>;
+    using rank_type         = typename ClassTraits<shape_type>::rank_type;
+    using size_type         = typename ClassTraits<shape_type>::size_type;
     using pimpl_type        = tensorwrapper::buffer::detail_::MDBufferPIMPL;
     using pimpl_pointer     = std::unique_ptr<pimpl_type>;
 };
diff --git a/src/tensorwrapper/buffer/detail_/mdbuffer_pimpl.hpp b/src/tensorwrapper/buffer/detail_/mdbuffer_pimpl.hpp
deleted file mode 100644
index 6f410098..00000000
--- a/src/tensorwrapper/buffer/detail_/mdbuffer_pimpl.hpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright 2025 NWChemEx-Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-#include <tensorwrapper/shape/smooth.hpp>
-#include <tensorwrapper/types/mdbuffer_traits.hpp>
-
-namespace tensorwrapper::buffer::detail_ {
-
-class MDBufferPIMPL {
-public:
-    using parent_type = tensorwrapper::buffer::MDBuffer;
-    using traits_type = tensorwrapper::types::ClassTraits<parent_type>;
-
-    /// Add types to public API
-    ///@{
-    using value_type  = typename traits_type::value_type;
-    using rank_type   = typename traits_type::rank_type;
-    using buffer_type = typename traits_type::buffer_type;
-    using shape_type  = typename traits_type::shape_type;
-    ///@}
-
-    MDBufferPIMPL(shape_type shape, buffer_type buffer) noexcept :
-      m_shape_(std::move(shape)), m_buffer_(std::move(buffer)) {}
-
-    auto& shape() noexcept { return m_shape_; }
-
-    const auto& shape() const noexcept { return m_shape_; }
-
-    auto& buffer() noexcept { return m_buffer_; }
-
-    const auto& buffer() const noexcept { return m_buffer_; }
-
-private:
-    shape_type m_shape_;
-
-    buffer_type m_buffer_;
-};
-
-} // namespace tensorwrapper::buffer::detail_
diff --git a/src/tensorwrapper/buffer/mdbuffer.cpp b/src/tensorwrapper/buffer/mdbuffer.cpp
index fe92be9c..d427d4df 100644
--- a/src/tensorwrapper/buffer/mdbuffer.cpp
+++ b/src/tensorwrapper/buffer/mdbuffer.cpp
@@ -15,42 +15,115 @@
  */
 
 #include "detail_/addition_visitor.hpp"
-#include "detail_/mdbuffer_pimpl.hpp"
+#include "detail_/hash_utilities.hpp"
 #include <tensorwrapper/buffer/mdbuffer.hpp>
 #include <tensorwrapper/types/floating_point.hpp>
 
 namespace tensorwrapper::buffer {
 
-MDBuffer::MDBuffer() noexcept : m_pimpl_(nullptr) {}
+MDBuffer::MDBuffer() noexcept = default;
 
-MDBuffer::MDBuffer(shape_type shape, buffer_type buffer) :
-  MDBuffer(std::make_unique<detail_::MDBufferPIMPL>(std::move(shape),
-                                                    std::move(buffer))) {}
+MDBuffer::MDBuffer(buffer_type buffer, layout_pointer playout,
+                   allocator_base_pointer pallocator) :
+  my_base_type(std::move(playout), std::move(pallocator)),
+  m_buffer_(std::move(buffer)) {}
 
-MDBuffer::MDBuffer(pimpl_pointer pimpl) noexcept : m_pimpl_(std::move(pimpl)) {}
+// -----------------------------------------------------------------------------
+// -- State Accessor
+// -----------------------------------------------------------------------------
 
-auto MDBuffer::rank() const -> rank_type {
-    assert_pimpl_();
-    return m_pimpl_->shape().rank();
+auto MDBuffer::size() const noexcept -> size_type { return m_buffer_.size(); }
+
+auto MDBuffer::get_elem(index_vector index) const -> const_reference {
+    auto ordinal_index = coordinate_to_ordinal_(index);
+    return m_buffer_.at(ordinal_index);
 }
 
-bool MDBuffer::has_pimpl_() const noexcept { return m_pimpl_ != nullptr; }
+void MDBuffer::set_elem(index_vector index, value_type new_value) {
+    auto ordinal_index = coordinate_to_ordinal_(index);
+    mark_for_rehash_();
+    m_buffer_.at(ordinal_index) = new_value;
+}
 
-void MDBuffer::assert_pimpl_() const {
-    if(!has_pimpl_()) {
-        throw std::runtime_error(
-          "MDBuffer has no PIMPL. Was it default constructed?");
-    }
+auto MDBuffer::get_mutable_data() -> buffer_view {
+    mark_for_rehash_();
+    return m_buffer_;
+}
+
+auto MDBuffer::get_immutable_data() const -> const_buffer_view {
+    return m_buffer_;
+}
+
+// -----------------------------------------------------------------------------
+// -- Utility Methods
+// -----------------------------------------------------------------------------
+
+bool MDBuffer::operator==(const my_type& rhs) const noexcept {
+    if(!my_base_type::operator==(rhs)) return false;
+    return get_hash_() == rhs.get_hash_();
+}
+
+// -----------------------------------------------------------------------------
+// -- Protected Methods
+// -----------------------------------------------------------------------------
+
+auto MDBuffer::shape_() const -> const_shape_view {
+    return this->layout().shape().as_smooth();
 }
 
-auto MDBuffer::pimpl_() -> pimpl_type& {
-    assert_pimpl_();
-    return *m_pimpl_;
+auto MDBuffer::clone_() const -> buffer_base_pointer {
+    return std::make_unique<MDBuffer>(*this);
+}
+
+bool MDBuffer::are_equal_(const_buffer_base_reference rhs) const noexcept {
+    return my_base_type::template are_equal_impl_<my_type>(rhs);
+}
+
+auto MDBuffer::addition_assignment_(label_type this_labels,
+                                    const_labeled_reference lhs,
+                                    const_labeled_reference rhs)
+  -> dsl_reference {}
+
+auto MDBuffer::subtraction_assignment_(label_type this_labels,
+                                       const_labeled_reference lhs,
+                                       const_labeled_reference rhs)
+  -> dsl_reference {}
+auto MDBuffer::multiplication_assignment_(label_type this_labels,
+                                          const_labeled_reference lhs,
+                                          const_labeled_reference rhs)
+  -> dsl_reference {}
+
+auto MDBuffer::permute_assignment_(label_type this_labels,
+                                   const_labeled_reference rhs)
+  -> dsl_reference {}
+
+auto MDBuffer::scalar_multiplication_(label_type this_labels, double scalar,
+                                      const_labeled_reference rhs)
+  -> dsl_reference {}
+
+auto MDBuffer::to_string_() const -> string_type {}
+
+std::ostream& MDBuffer::add_to_stream_(std::ostream& os) const {}
+
+// -----------------------------------------------------------------------------
+// -- Private Methods
+// -----------------------------------------------------------------------------
+
+auto MDBuffer::coordinate_to_ordinal_(index_vector index) const -> size_type {
+    using size_type   = typename decltype(index)::size_type;
+    size_type ordinal = 0;
+    size_type stride  = 1;
+    for(rank_type i = shape_().rank(); i-- > 0;) {
+        ordinal += index[i] * stride;
+        stride *= shape_().extent(i);
+    }
+    return ordinal;
 }
 
-auto MDBuffer::pimpl_() const -> const pimpl_type& {
-    assert_pimpl_();
-    return *m_pimpl_;
+void MDBuffer::update_hash_() const {
+    // for(auto i = 0; i < m_buffer_.size(); ++i)
+    //     hash_utilities::hash_input(m_hash_, m_tensor_.data()[i]);
+    m_recalculate_hash_ = false;
 }
 
 } // namespace tensorwrapper::buffer

From 07298b797ff76cac8e457205a24f320d5d615b33 Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Fri, 21 Nov 2025 09:52:56 -0600
Subject: [PATCH 09/18] adds more mdbuffer tests

---
 include/tensorwrapper/buffer/mdbuffer.hpp     |  22 ++-
 include/tensorwrapper/shape/smooth.hpp        |   2 +
 .../tensorwrapper/types/floating_point.hpp    |   9 +-
 .../buffer/detail_/hash_utilities.hpp         |  17 +++
 src/tensorwrapper/buffer/mdbuffer.cpp         |  37 +++--
 .../tensorwrapper/buffer/mdbuffer.cpp         | 127 ++++++++++++++++++
 6 files changed, 185 insertions(+), 29 deletions(-)
 create mode 100644 tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp

diff --git a/include/tensorwrapper/buffer/mdbuffer.hpp b/include/tensorwrapper/buffer/mdbuffer.hpp
index 82d9ba36..8db7271f 100644
--- a/include/tensorwrapper/buffer/mdbuffer.hpp
+++ b/include/tensorwrapper/buffer/mdbuffer.hpp
@@ -17,6 +17,7 @@
 #pragma once
 #include <tensorwrapper/buffer/replicated.hpp>
 #include <tensorwrapper/concepts/floating_point.hpp>
+#include <tensorwrapper/shape/smooth.hpp>
 #include <tensorwrapper/types/mdbuffer_traits.hpp>
 
 namespace tensorwrapper::buffer {
@@ -57,19 +58,10 @@ class MDBuffer : public Replicated {
     MDBuffer() noexcept;
 
     template<concepts::FloatingPoint T>
-    MDBuffer(std::vector<T> elements, const_shape_view shape) :
-      MDBuffer(buffer_type(std::move(elements)),
-               std::make_unique<layout::Physical>(shape), nullptr) {}
+    MDBuffer(std::vector<T> elements, shape_type shape) :
+      MDBuffer(buffer_type(std::move(elements)), std::move(shape)) {}
 
-    template<concepts::FloatingPoint T>
-    MDBuffer(std::vector<T> elements, layout_pointer playout = nullptr,
-             allocator_base_pointer pallocator = nullptr) {
-        MDBuffer(buffer_type(std::move(elements)), std::move(playout),
-                 std::move(pallocator));
-    }
-
-    MDBuffer(buffer_type buffer, layout_pointer playout = nullptr,
-             allocator_base_pointer pallocator = nullptr);
+    MDBuffer(buffer_type buffer, shape_type shape);
 
     MDBuffer(const MDBuffer& other)     = default;
     MDBuffer(MDBuffer&& other) noexcept = default;
@@ -83,6 +75,8 @@ class MDBuffer : public Replicated {
     // -- State Accessors
     // -------------------------------------------------------------------------
 
+    const_shape_view shape() const;
+
     size_type size() const noexcept;
 
     const_reference get_elem(index_vector index) const;
@@ -100,8 +94,6 @@ class MDBuffer : public Replicated {
     bool operator==(const my_type& rhs) const noexcept;
 
 protected:
-    const_shape_view shape_() const;
-
     buffer_base_pointer clone_() const override;
 
     bool are_equal_(const_buffer_base_reference rhs) const noexcept override;
@@ -156,6 +148,8 @@ class MDBuffer : public Replicated {
     // Holds the computed hash value for this instance's state
     mutable hash_type m_hash_ = 0;
 
+    shape_type m_shape_;
+
     buffer_type m_buffer_;
 };
 
diff --git a/include/tensorwrapper/shape/smooth.hpp b/include/tensorwrapper/shape/smooth.hpp
index 32d167de..fd6cc86e 100644
--- a/include/tensorwrapper/shape/smooth.hpp
+++ b/include/tensorwrapper/shape/smooth.hpp
@@ -39,6 +39,8 @@ class Smooth : public ShapeBase {
     // -- Ctors, assignment, and dtor
     // -------------------------------------------------------------------------
 
+    Smooth() noexcept = default;
+
     /** @brief Constructs *this with a statically specified number of extents.
      *
      *  This ctor is used to create a Smooth object by explicitly providing
diff --git a/include/tensorwrapper/types/floating_point.hpp b/include/tensorwrapper/types/floating_point.hpp
index d34ba1f1..46bf8464 100644
--- a/include/tensorwrapper/types/floating_point.hpp
+++ b/include/tensorwrapper/types/floating_point.hpp
@@ -15,7 +15,9 @@
  */
 
 #pragma once
+#include <cmath>
 #include <tuple>
+#include <wtf/wtf.hpp>
 #ifdef ENABLE_SIGMA
 #include <sigma/sigma.hpp>
 #endif
@@ -46,6 +48,10 @@ T fabs(T value) {
     MACRO_IN(double);                           \
     MACRO_IN(types::ufloat);                    \
     MACRO_IN(types::udouble)
+} // namespace tensorwrapper::types
+
+WTF_REGISTER_FP_TYPE(tensorwrapper::types::ufloat);
+WTF_REGISTER_FP_TYPE(tensorwrapper::types::udouble);
 
 #else
 using ufloat  = float;
@@ -65,6 +71,5 @@ T fabs(T value) {
     MACRO_IN(float);                            \
     MACRO_IN(double)
 
-#endif
-
 } // namespace tensorwrapper::types
+#endif
diff --git a/src/tensorwrapper/buffer/detail_/hash_utilities.hpp b/src/tensorwrapper/buffer/detail_/hash_utilities.hpp
index 021b291e..a9c35cdb 100644
--- a/src/tensorwrapper/buffer/detail_/hash_utilities.hpp
+++ b/src/tensorwrapper/buffer/detail_/hash_utilities.hpp
@@ -68,4 +68,21 @@ void hash_input(hash_type& seed, const sigma::Uncertain<T>& value) {
 
 #endif
 
+class HashVisitor {
+public:
+    HashVisitor(hash_type seed = 0) : m_seed_(seed) {}
+
+    hash_type get_hash() const { return m_seed_; }
+
+    template<typename T>
+    void operator()(std::span<const T> data) {
+        for(std::size_t i = 0; i < data.size(); ++i) {
+            hash_input(m_seed_, data[i]);
+        }
+    }
+
+private:
+    hash_type m_seed_;
+};
+
 } // namespace tensorwrapper::buffer::detail_::hash_utilities
diff --git a/src/tensorwrapper/buffer/mdbuffer.cpp b/src/tensorwrapper/buffer/mdbuffer.cpp
index d427d4df..f291ceca 100644
--- a/src/tensorwrapper/buffer/mdbuffer.cpp
+++ b/src/tensorwrapper/buffer/mdbuffer.cpp
@@ -21,17 +21,29 @@
 
 namespace tensorwrapper::buffer {
 
+using fp_types = types::floating_point_types;
+
 MDBuffer::MDBuffer() noexcept = default;
 
-MDBuffer::MDBuffer(buffer_type buffer, layout_pointer playout,
-                   allocator_base_pointer pallocator) :
-  my_base_type(std::move(playout), std::move(pallocator)),
-  m_buffer_(std::move(buffer)) {}
+MDBuffer::MDBuffer(buffer_type buffer, shape_type shape) :
+  my_base_type(std::make_unique<layout::Physical>(shape), nullptr),
+  m_shape_(std::move(shape)),
+  m_buffer_() {
+    if(buffer.size() == shape.size()) {
+        m_buffer_ = std::move(buffer);
+    } else {
+        throw std::invalid_argument(
+          "The size of the provided buffer does not match the size "
+          "implied by the provided shape.");
+    }
+}
 
 // -----------------------------------------------------------------------------
 // -- State Accessor
 // -----------------------------------------------------------------------------
 
+auto MDBuffer::shape() const -> const_shape_view { return m_shape_; }
+
 auto MDBuffer::size() const noexcept -> size_type { return m_buffer_.size(); }
 
 auto MDBuffer::get_elem(index_vector index) const -> const_reference {
@@ -67,10 +79,6 @@ bool MDBuffer::operator==(const my_type& rhs) const noexcept {
 // -- Protected Methods
 // -----------------------------------------------------------------------------
 
-auto MDBuffer::shape_() const -> const_shape_view {
-    return this->layout().shape().as_smooth();
-}
-
 auto MDBuffer::clone_() const -> buffer_base_pointer {
     return std::make_unique<MDBuffer>(*this);
 }
@@ -103,7 +111,7 @@ auto MDBuffer::scalar_multiplication_(label_type this_labels, double scalar,
 
 auto MDBuffer::to_string_() const -> string_type {}
 
-std::ostream& MDBuffer::add_to_stream_(std::ostream& os) const {}
+std::ostream& MDBuffer::add_to_stream_(std::ostream& os) const { return os; }
 
 // -----------------------------------------------------------------------------
 // -- Private Methods
@@ -113,16 +121,19 @@ auto MDBuffer::coordinate_to_ordinal_(index_vector index) const -> size_type {
     using size_type   = typename decltype(index)::size_type;
     size_type ordinal = 0;
     size_type stride  = 1;
-    for(rank_type i = shape_().rank(); i-- > 0;) {
+    for(rank_type i = shape().rank(); i-- > 0;) {
         ordinal += index[i] * stride;
-        stride *= shape_().extent(i);
+        stride *= shape().extent(i);
     }
     return ordinal;
 }
 
 void MDBuffer::update_hash_() const {
-    // for(auto i = 0; i < m_buffer_.size(); ++i)
-    //     hash_utilities::hash_input(m_hash_, m_tensor_.data()[i]);
+    buffer::detail_::hash_utilities::HashVisitor visitor;
+    if(m_buffer_.size()) {
+        wtf::buffer::visit_contiguous_buffer<fp_types>(visitor, m_buffer_);
+        m_hash_ = visitor.get_hash();
+    }
     m_recalculate_hash_ = false;
 }
 
diff --git a/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp b/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp
new file mode 100644
index 00000000..5b613924
--- /dev/null
+++ b/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../testing/testing.hpp"
+#include <tensorwrapper/buffer/mdbuffer.hpp>
+#include <tensorwrapper/types/floating_point.hpp>
+
+using namespace tensorwrapper;
+
+TEMPLATE_LIST_TEST_CASE("MDBuffer", "", types::floating_point_types) {
+    using buffer::MDBuffer;
+    using buffer_type = MDBuffer::buffer_type;
+    using shape_type  = typename MDBuffer::shape_type;
+
+    TestType one(1.0), two(2.0), three(3.0), four(4.0);
+    std::vector<TestType> data = {one, two, three, four};
+
+    shape_type scalar_shape({});
+    shape_type vector_shape({4});
+    shape_type matrix_shape({2, 2});
+
+    MDBuffer defaulted;
+    MDBuffer scalar(std::vector{one}, scalar_shape);
+    MDBuffer vector(data, vector_shape);
+    MDBuffer matrix(data, matrix_shape);
+
+    SECTION("Ctors and assignment") {
+        SECTION("Default ctor") { REQUIRE(defaulted.size() == 0); }
+
+        SECTION("vector ctor") {}
+
+        SECTION("FloatBuffer ctor") {
+            buffer_type buf(data);
+            REQUIRE_THROWS_AS(MDBuffer(buf, scalar_shape),
+                              std::invalid_argument);
+        }
+    }
+
+    SECTION("shape") {
+        REQUIRE(defaulted.shape() == shape_type());
+        REQUIRE(scalar.shape() == scalar_shape);
+        REQUIRE(vector.shape() == vector_shape);
+        REQUIRE(matrix.shape() == matrix_shape);
+    }
+
+    SECTION("size") {
+        REQUIRE(defaulted.size() == 0);
+        REQUIRE(scalar.size() == 1);
+        REQUIRE(vector.size() == 4);
+        REQUIRE(matrix.size() == 4);
+    }
+
+    SECTION("get_elem") {
+        REQUIRE_THROWS_AS(defaulted.get_elem({}), std::out_of_range);
+
+        REQUIRE(scalar.get_elem({}) == one);
+
+        REQUIRE(vector.get_elem({0}) == one);
+        REQUIRE(vector.get_elem({1}) == two);
+        REQUIRE(vector.get_elem({2}) == three);
+        REQUIRE(vector.get_elem({3}) == four);
+
+        REQUIRE(matrix.get_elem({0, 0}) == one);
+        REQUIRE(matrix.get_elem({0, 1}) == two);
+        REQUIRE(matrix.get_elem({1, 0}) == three);
+        REQUIRE(matrix.get_elem({1, 1}) == four);
+    }
+
+    SECTION("set_elem") {
+        REQUIRE_THROWS_AS(defaulted.set_elem({}, one), std::out_of_range);
+
+        REQUIRE(scalar.get_elem({}) != two);
+        scalar.set_elem({}, two);
+        REQUIRE(scalar.get_elem({}) == two);
+
+        REQUIRE(vector.get_elem({2}) != four);
+        vector.set_elem({2}, four);
+        REQUIRE(vector.get_elem({2}) == four);
+
+        REQUIRE(matrix.get_elem({1, 0}) != one);
+        matrix.set_elem({1, 0}, one);
+        REQUIRE(matrix.get_elem({1, 0}) == one);
+    }
+
+    SECTION("operator==") {
+        // Same object
+        REQUIRE(defaulted == defaulted);
+
+        MDBuffer scalar_copy(std::vector{one}, scalar_shape);
+        REQUIRE(scalar == scalar_copy);
+
+        MDBuffer vector_copy(data, vector_shape);
+        REQUIRE(vector == vector_copy);
+
+        MDBuffer matrix_copy(data, matrix_shape);
+        REQUIRE(matrix == matrix_copy);
+
+        // Different ranks
+        REQUIRE_FALSE(scalar == vector);
+        REQUIRE_FALSE(vector == matrix);
+        REQUIRE_FALSE(scalar == matrix);
+
+        // Different shapes
+        shape_type matrix_shape2({4, 1});
+        REQUIRE_FALSE(scalar == MDBuffer(data, matrix_shape2));
+
+        // Different values
+        std::vector<TestType> diff_data = {two, three, four, one};
+        MDBuffer scalar_diff(std::vector{two}, scalar_shape);
+        REQUIRE_FALSE(scalar == scalar_diff);
+        REQUIRE_FALSE(vector == MDBuffer(diff_data, vector_shape));
+        REQUIRE_FALSE(matrix == MDBuffer(diff_data, matrix_shape));
+    }
+}

From 84c4437722c09f605af884e8754fec704f8da769 Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Fri, 21 Nov 2025 09:53:54 -0600
Subject: [PATCH 10/18] add missing header

---
 include/tensorwrapper/types/floating_point.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/tensorwrapper/types/floating_point.hpp b/include/tensorwrapper/types/floating_point.hpp
index d34ba1f1..fb37346a 100644
--- a/include/tensorwrapper/types/floating_point.hpp
+++ b/include/tensorwrapper/types/floating_point.hpp
@@ -15,6 +15,7 @@
  */
 
 #pragma once
+#include <cmath>
 #include <tuple>
 #ifdef ENABLE_SIGMA
 #include <sigma/sigma.hpp>

From 6dd590160f9c92d27f0422fca00c281f8a0263e4 Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Sat, 22 Nov 2025 09:59:23 -0600
Subject: [PATCH 11/18] backup

---
 include/tensorwrapper/buffer/mdbuffer.hpp     | 223 ++++++++++++++++--
 .../backends/eigen/eigen_tensor_impl.cpp      |   6 +-
 .../buffer/detail_/addition_visitor.hpp       | 106 ++++++++-
 src/tensorwrapper/buffer/mdbuffer.cpp         |  74 +++++-
 .../tensorwrapper/buffer/mdbuffer.cpp         | 157 +++++++++++-
 5 files changed, 533 insertions(+), 33 deletions(-)

diff --git a/include/tensorwrapper/buffer/mdbuffer.hpp b/include/tensorwrapper/buffer/mdbuffer.hpp
index 8db7271f..ab5e1cc0 100644
--- a/include/tensorwrapper/buffer/mdbuffer.hpp
+++ b/include/tensorwrapper/buffer/mdbuffer.hpp
@@ -30,11 +30,15 @@ class MDBuffer : public Replicated {
 private:
     /// Type *this derives from
     using my_base_type = Replicated;
-    using traits_type  = types::ClassTraits<MDBuffer>;
-    using my_type      = MDBuffer;
+
+    /// Type defining the types for the public API of *this
+    using traits_type = types::ClassTraits<MDBuffer>;
+
+    /// Type of *this
+    using my_type = MDBuffer;
 
 public:
-    /// Add types to public API
+    /// Add types from traits_type to public API
     ///@{
     using value_type        = typename traits_type::value_type;
     using reference         = typename traits_type::reference;
@@ -53,49 +57,215 @@ class MDBuffer : public Replicated {
     using index_vector = std::vector<size_type>;
     using typename my_base_type::label_type;
     using string_type = std::string;
-    using hash_type   = std::size_t;
 
+    // -------------------------------------------------------------------------
+    // -- Ctors, assignment, and dtor
+    // -------------------------------------------------------------------------
+
+    /** @brief Creates an empty multi-dimensional buffer.
+     *
+     *  The resulting buffer will have a shape of rank 0, but a size of 0. Thus
+     *  the buffer can NOT be used to store any elements (including treating
+     *  *this as a scalar). The resulting buffer can be assigned to or moved
+     *  to to populate it.
+     *
+     *  @throw None No throw guarantee.
+     */
     MDBuffer() noexcept;
 
+    /** @brief Treats allocated memory like a multi-dimensional buffer.
+     *
+     *  @tparam T The type of the elements in the buffer. Must satisfy the
+     *            FloatingPoint concept.
+     *
+     *  This ctor will use @p element to create a buffer_type object and then
+     *  pass that along with @p shape to the main ctor.
+     *
+     *  @param[in] elements The elements to be used as the backing store.
+     *  @param[in] shape The shape of *this.
+     *
+     *  @throw std::invalid_argument if the size of @p elements does not match
+     *                               the size implied by @p shape. Strong throw
+     *                               guarantee.
+     *  @throw std::bad_alloc if there is a problem allocating memory for the
+     *                        internal state. Strong throw guarantee.
+     */
     template<concepts::FloatingPoint T>
     MDBuffer(std::vector<T> elements, shape_type shape) :
       MDBuffer(buffer_type(std::move(elements)), std::move(shape)) {}
 
+    /** @brief The main ctor.
+     *
+     *  This ctor will create *this using @p buffer as the backing store and
+     *  @p shape to describe the geometry of the multidimensional array.
+     *
+     *  All other ctors (aside from copy and move) delegate to this one.
+     *
+     *  @param[in] buffer The buffer to be used as the backing store.
+     *  @param[in] shape The shape of *this.
+     *
+     *  @throw std::invalid_argument if the size of @p buffer does not match
+     *                               the size implied by @p shape. Strong throw
+     *                               guarantee.
+     *  @throw std::bad_alloc if there is a problem allocating memory for the
+     *                        internal state. Strong throw guarantee.
+     */
     MDBuffer(buffer_type buffer, shape_type shape);
 
-    MDBuffer(const MDBuffer& other)     = default;
+    /** @brief Initializes *this to a deep copy of @p other.
+     *
+     *  This ctor will initialize *this to be a deep copy of @p other.
+     *
+     *  @param[in] other The MDBuffer to copy.
+     *
+     *  @throw std::bad_alloc if there is a problem allocating memory for the
+     *                        internal state. Strong throw guarantee.
+     */
+    MDBuffer(const MDBuffer& other) = default;
+
+    /** @brief Move ctor.
+     *
+     *  This ctor will initialize *this by taking the state from @p other.
+     *  After this ctor is called @p other is left in a valid but unspecified
+     *  state.
+     *
+     *  @param[in,out] other The MDBuffer to move from.
+     *
+     *  @throw None No throw guarantee.
+     */
     MDBuffer(MDBuffer&& other) noexcept = default;
 
-    MDBuffer& operator=(const MDBuffer& other)     = default;
+    /** @brief Copy assignment.
+     *
+     *  This operator will make *this a deep copy of @p other.
+     *
+     *  @param[in] other The MDBuffer to copy.
+     *
+     *  @return *this after the assignment.
+     *
+     *  @throw std::bad_alloc if there is a problem allocating memory for the
+     *                        internal state. Strong throw guarantee.
+     */
+    MDBuffer& operator=(const MDBuffer& other) = default;
+
+    /** @brief Move assignment.
+     *
+     *  This operator will make *this take the state from @p other. After
+     *  this operator is called @p other is left in a valid but unspecified
+     *  state.
+     *
+     *  @param[in,out] other The MDBuffer to move from.
+     *
+     *  @return *this after the assignment.
+     *
+     *  @throw None No throw guarantee.
+     */
     MDBuffer& operator=(MDBuffer&& other) noexcept = default;
 
+    /** @brief Defaulted dtor.
+     *
+     *  @throw None No throw guarantee.
+     */
     ~MDBuffer() override = default;
 
     // -------------------------------------------------------------------------
     // -- State Accessors
     // -------------------------------------------------------------------------
 
+    /** @brief Returns (a view of) the shape of *this.
+     *
+     *  The shape of *this describes the geometry of the underlying
+     *  multidimensional array.
+     *
+     *  @return A view of the shape of *this.
+     *
+     *  @throw std::bad_alloc if there is a problem allocating memory for the
+     *                        returned view. Strong throw guarantee.
+     */
     const_shape_view shape() const;
 
+    /** @brief The total number of elements in *this.
+     *
+     *  The total number of elements is the product of the extents of each
+     *  mode of *this.
+     *
+     *  @return The total number of elements in *this.
+     *
+     *  @throw None No throw guarantee.
+     */
     size_type size() const noexcept;
 
+    /** @brief Returns the element with the offsets specified by @p index.
+     *
+     *  This method will retrieve a const reference to the element at the
+     *  offsets specified by @p index. The length of @p index must be equal
+     *  to the rank of *this and each entry in @p index must be less than the
+     *  extent of the corresponding mode of *this.
+     *
+     *  This method can only be used to retrieve elements from *this. To modify
+     *  elements use set_elem().
+     *
+     *  @param[in] index The offsets into each mode of *this for the desired
+     *                   element.
+     *
+     *  @return A const reference to the element at the specified offsets.
+     */
     const_reference get_elem(index_vector index) const;
 
+    /** @brief Sets the specified element to @p new_value.
+     *
+     *  This method will set the element at the offsets specified by @p index.
+     *  The length of @p index must be equal to the rank of *this and each
+     *  entry in @p index must be less than the extent of the corresponding
+     *  mode of *this.
+     *
+     *  @param[in] index The offsets into each mode of *this for the desired
+     *                   element.
+     *  @param[in] new_value The new value for the specified element.
+     *
+     *  @throw std::out_of_range if any entry in @p index is invalid. Strong
+     *                           throw guarantee.
+     */
     void set_elem(index_vector index, value_type new_value);
 
-    buffer_view get_mutable_data();
+    /** @brief Returns a view of the data.
+     *
+     *  This method is deprecated. Use set_slice instead.
+     */
+    [[deprecated]] buffer_view get_mutable_data();
 
-    const_buffer_view get_immutable_data() const;
+    /** @brief Returns a read-only view of the data.
+     *
+     *  This method is deprecated. Use get_slice instead.
+     */
+    [[deprecated]] const_buffer_view get_immutable_data() const;
 
     // -------------------------------------------------------------------------
     // -- Utility Methods
     // -------------------------------------------------------------------------
 
+    /** @brief Compares two MDBuffer objects for exact equality.
+     *
+     *  Two MDBuffer objects are exactly equal if they have the same shape and
+     *  if all of their corresponding elements are bitwise identical.
+     *  In practice, the implementation stores a hash of the elements in the
+     *  tensor and compares the hashes for equality rather than checking each
+     *  element individually.
+     *
+     *  @param[in] rhs The MDBuffer to compare against.
+     *
+     *  @return True if *this and @p rhs are exactly equal and false otherwise.
+     *
+     *  @throw None No throw guarantee.
+     */
     bool operator==(const my_type& rhs) const noexcept;
 
 protected:
+    /// Makes a deep polymorphic copy of *this
     buffer_base_pointer clone_() const override;
 
+    /// Implements are_equal by checking that rhs is an MDBuffer and then
+    /// calling operator==
     bool are_equal_(const_buffer_base_reference rhs) const noexcept override;
 
     dsl_reference addition_assignment_(label_type this_labels,
@@ -114,42 +284,53 @@ class MDBuffer : public Replicated {
     dsl_reference scalar_multiplication_(label_type this_labels, double scalar,
                                          const_labeled_reference rhs) override;
 
+    /// Calls add_to_stream_ on a stringstream to implement
     string_type to_string_() const override;
 
+    /// Uses Eigen's printing capabilities to add to stream
     std::ostream& add_to_stream_(std::ostream& os) const override;
 
-    // Returns the hash for the current state of *this, computing first if
-    // needed.
+private:
+    /// Type for storing the hash of *this
+    using hash_type = std::size_t;
+
+    /// Logic for validating that an index is within the bounds of the shape
+    void check_index_(const index_vector& index) const;
+
+    /// Converts a coordinate index to a linear (ordinal) index
+    size_type coordinate_to_ordinal_(index_vector index) const;
+
+    /// Returns the hash for the current state of *this, computing first if
+    /// needed.
     hash_type get_hash_() const {
         if(m_recalculate_hash_ or !m_hash_caching_) update_hash_();
         return m_hash_;
     }
 
-private:
-    size_type coordinate_to_ordinal_(index_vector index) const;
-
-    // Computes the hash for the current state of *this
+    /// Computes the hash for the current state of *this
     void update_hash_() const;
 
-    // Designates that the state may have changed and to recalculate the hash.
-    // This function is really just for readability and clarity.
+    /// Designates that the state may have changed and to recalculate the hash.
+    /// This function is really just for readability and clarity.
     void mark_for_rehash_() const { m_recalculate_hash_ = true; }
 
-    // Designates that state changes are not trackable and we should recalculate
-    // the hash each time.
+    /// Designates that state changes are not trackable and we should
+    /// recalculate the hash each time.
     void turn_off_hash_caching_() const { m_hash_caching_ = false; }
 
-    // Tracks whether the hash needs to be redetermined
+    /// Tracks whether the hash needs to be redetermined
     mutable bool m_recalculate_hash_ = true;
 
-    // Tracks whether hash caching has been turned off
+    /// Tracks whether hash caching has been turned off
     mutable bool m_hash_caching_ = true;
 
-    // Holds the computed hash value for this instance's state
+    /// Holds the computed hash value for this instance's state
     mutable hash_type m_hash_ = 0;
 
+    /// How the hyper-rectangular array is shaped
     shape_type m_shape_;
 
+    /// The flat buffer holding the elements of *this
     buffer_type m_buffer_;
 };
 
diff --git a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
index 53eb677e..28d13020 100644
--- a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
+++ b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
@@ -76,7 +76,11 @@ auto EIGEN_TENSOR::to_string_() const -> string_type {
 TPARAMS
 std::ostream& EIGEN_TENSOR::add_to_stream_(std::ostream& os) const {
     os << std::fixed << std::setprecision(16);
-    return os << m_tensor_.format(Eigen::TensorIOFormat::Numpy());
+    if constexpr(Rank > 0) {
+        return os << m_tensor_.format(Eigen::TensorIOFormat::Numpy());
+    } else {
+        return os << m_tensor_;
+    }
 }
 
 TPARAMS
diff --git a/src/tensorwrapper/buffer/detail_/addition_visitor.hpp b/src/tensorwrapper/buffer/detail_/addition_visitor.hpp
index 4e021e8a..0f1d99ca 100644
--- a/src/tensorwrapper/buffer/detail_/addition_visitor.hpp
+++ b/src/tensorwrapper/buffer/detail_/addition_visitor.hpp
@@ -16,6 +16,9 @@
 
 #pragma once
 #include <span>
+#include <tensorwrapper/dsl/dummy_indices.hpp>
+#include <tensorwrapper/shape/smooth_view.hpp>
+#include <type_traits>
 
 namespace tensorwrapper::buffer::detail_ {
 
@@ -24,14 +27,105 @@ namespace tensorwrapper::buffer::detail_ {
  *
  *
  */
-class AdditionVisitor {
+class BinaryOperationVisitor {
 public:
-    // AdditionVisitor(shape, permutation, shape, permutation)
+    using buffer_type      = wtf::buffer::FloatBuffer;
+    using string_type      = std::string;
+    using label_type       = dsl::DummyIndices<string_type>;
+    using shape_type       = shape::Smooth;
+    using const_shape_view = shape::SmoothView<const shape_type>;
+
+    BinaryOperationVisitor(buffer_type& this_buffer, label_type this_labels,
+                           shape_type this_shape, label_type lhs_labels,
+                           shape_type lhs_shape, label_type rhs_labels,
+                           shape_type rhs_shape) :
+      m_pthis_buffer_(&this_buffer),
+      m_this_labels_(std::move(this_labels)),
+      m_this_shape_(std::move(this_shape)),
+      m_lhs_labels_(std::move(lhs_labels)),
+      m_lhs_shape_(std::move(lhs_shape)),
+      m_rhs_labels_(std::move(rhs_labels)),
+      m_rhs_shape_(std::move(rhs_shape)) {}
+
+    const auto& this_shape() const { return m_this_shape_; }
+    const auto& lhs_shape() const { return m_lhs_shape_; }
+    const auto& rhs_shape() const { return m_rhs_shape_; }
+
+    const auto& this_labels() const { return m_this_labels_; }
+    const auto& lhs_labels() const { return m_lhs_labels_; }
+    const auto& rhs_labels() const { return m_rhs_labels_; }
+
     template<typename LHSType, typename RHSType>
-    void operator()(std::span<LHSType> lhs, std::span<const RHSType> rhs) {
-        // auto lhs_wrapped = backends::eigen::wrap_span(lhs);
-        // auto rhs_wrapped = backends::eigen::wrap_span(rhs);
-        for(std::size_t i = 0; i < lhs.size(); ++i) lhs[i] += rhs[i];
+        requires(!std::is_same_v<LHSType, RHSType>)
+    void operator()(std::span<const LHSType>, std::span<const RHSType>) {
+        throw std::runtime_error(
+          "BinaryOperationVisitor: Mixed types not supported");
+    }
+
+protected:
+    template<typename FloatType>
+    auto make_eigen_tensor_(std::span<FloatType> data, const_shape_view shape) {
+        return backends::eigen::make_eigen_tensor(data, shape);
+    }
+
+    template<typename FloatType>
+    auto make_this_eigen_tensor_() {
+        if(m_pthis_buffer_->size() != m_this_shape_.size()) {
+            std::vector<FloatType> temp_buffer(m_this_shape_.size());
+            *m_pthis_buffer_ = buffer_type(std::move(temp_buffer));
+        }
+        auto this_span =
+          wtf::buffer::contiguous_buffer_cast<FloatType>(*m_pthis_buffer_);
+        return backends::eigen::make_eigen_tensor(this_span, m_this_shape_);
+    }
+
+    template<typename FloatType>
+    auto make_lhs_eigen_tensor_(std::span<const FloatType> data) {
+        /// XXX: Ideally we would not need to const_cast here, but we didn't
+        ///      code EigenTensor correctly...
+
+        auto* pdata = const_cast<FloatType*>(data.data());
+        std::span<FloatType> non_const_data(pdata, data.size());
+        return backends::eigen::make_eigen_tensor(non_const_data, m_lhs_shape_);
+    }
+
+    template<typename FloatType>
+    auto make_rhs_eigen_tensor_(std::span<const FloatType> data) {
+        /// XXX: Ideally we would not need to const_cast here, but we didn't
+        ///      code EigenTensor correctly...
+
+        auto* pdata = const_cast<FloatType*>(data.data());
+        std::span<FloatType> non_const_data(pdata, data.size());
+        return backends::eigen::make_eigen_tensor(non_const_data, m_rhs_shape_);
+    }
+
+private:
+    buffer_type* m_pthis_buffer_;
+    label_type m_this_labels_;
+    shape_type m_this_shape_;
+
+    label_type m_lhs_labels_;
+    shape_type m_lhs_shape_;
+
+    label_type m_rhs_labels_;
+    shape_type m_rhs_shape_;
+};
+
+class AdditionVisitor : public BinaryOperationVisitor {
+public:
+    using BinaryOperationVisitor::BinaryOperationVisitor;
+
+    // AdditionVisitor(shape, permutation, shape, permutation)
+    template<typename FloatType>
+    void operator()(std::span<const FloatType> lhs,
+                    std::span<const FloatType> rhs) {
+        using clean_t = std::decay_t<FloatType>;
+        auto pthis    = this->make_this_eigen_tensor_<clean_t>();
+        auto plhs     = this->make_lhs_eigen_tensor_(lhs);
+        auto prhs     = this->make_rhs_eigen_tensor_(rhs);
+
+        pthis->addition_assignment(this_labels(), lhs_labels(), rhs_labels(),
+                                   *plhs, *prhs);
     }
 };
 
diff --git a/src/tensorwrapper/buffer/mdbuffer.cpp b/src/tensorwrapper/buffer/mdbuffer.cpp
index f291ceca..78b869d5 100644
--- a/src/tensorwrapper/buffer/mdbuffer.cpp
+++ b/src/tensorwrapper/buffer/mdbuffer.cpp
@@ -14,12 +14,24 @@
  * limitations under the License.
  */
 
+#include "../backends/eigen/eigen_tensor_impl.hpp"
 #include "detail_/addition_visitor.hpp"
 #include "detail_/hash_utilities.hpp"
 #include <tensorwrapper/buffer/mdbuffer.hpp>
 #include <tensorwrapper/types/floating_point.hpp>
 
 namespace tensorwrapper::buffer {
+namespace {
+
+template<typename T>
+const MDBuffer& downcast(T&& object) {
+    auto* pobject = dynamic_cast<const MDBuffer*>(&object);
+    if(pobject == nullptr) {
+        throw std::invalid_argument("The provided buffer must be an MDBuffer.");
+    }
+    return *pobject;
+}
+} // namespace
 
 using fp_types = types::floating_point_types;
 
@@ -90,7 +102,29 @@ bool MDBuffer::are_equal_(const_buffer_base_reference rhs) const noexcept {
 auto MDBuffer::addition_assignment_(label_type this_labels,
                                     const_labeled_reference lhs,
                                     const_labeled_reference rhs)
-  -> dsl_reference {}
+  -> dsl_reference {
+    const auto& lhs_down   = downcast(lhs.object());
+    const auto& rhs_down   = downcast(rhs.object());
+    const auto& lhs_labels = lhs.labels();
+    const auto& rhs_labels = rhs.labels();
+    const auto& lhs_shape  = lhs_down.m_shape_;
+    const auto& rhs_shape  = rhs_down.m_shape_;
+
+    auto labeled_lhs_shape = lhs_shape(lhs_labels);
+    auto labeled_rhs_shape = rhs_shape(rhs_labels);
+
+    m_shape_.addition_assignment(this_labels, labeled_lhs_shape,
+                                 labeled_rhs_shape);
+
+    detail_::AdditionVisitor visitor(m_buffer_, this_labels, m_shape_,
+                                     lhs.labels(), lhs_shape, rhs.labels(),
+                                     rhs_shape);
+
+    wtf::buffer::visit_contiguous_buffer<fp_types>(visitor, lhs_down.m_buffer_,
+                                                   rhs_down.m_buffer_);
+    mark_for_rehash_();
+    return *this;
+}
 
 auto MDBuffer::subtraction_assignment_(label_type this_labels,
                                        const_labeled_reference lhs,
@@ -109,15 +143,49 @@ auto MDBuffer::scalar_multiplication_(label_type this_labels, double scalar,
                                       const_labeled_reference rhs)
   -> dsl_reference {}
 
-auto MDBuffer::to_string_() const -> string_type {}
+auto MDBuffer::to_string_() const -> string_type {
+    std::stringstream ss;
+    add_to_stream_(ss);
+    return ss.str();
+}
 
-std::ostream& MDBuffer::add_to_stream_(std::ostream& os) const { return os; }
+std::ostream& MDBuffer::add_to_stream_(std::ostream& os) const {
+    /// XXX: EigenTensor should handle aliasing a const buffer correctly. That's
+    ///      a lot of work, just to get this to work though...
+
+    if(m_buffer_.size() == 0) return os;
+    auto lambda = [&](auto&& span) {
+        using clean_type = std::decay_t<decltype(span)>::value_type;
+        auto data_ptr    = const_cast<clean_type*>(span.data());
+        std::span<clean_type> data_span(data_ptr, span.size());
+        auto ptensor = backends::eigen::make_eigen_tensor(data_span, m_shape_);
+        ptensor->add_to_stream(os);
+    };
+    wtf::buffer::visit_contiguous_buffer<fp_types>(lambda, m_buffer_);
+    return os;
+}
 
 // -----------------------------------------------------------------------------
 // -- Private Methods
 // -----------------------------------------------------------------------------
 
+void MDBuffer::check_index_(const index_vector& index) const {
+    if(index.size() != m_shape_.rank()) {
+        throw std::out_of_range(
+          "The length of the provided index does not match the rank of "
+          "*this.");
+    }
+    for(rank_type i = 0; i < m_shape_.rank(); ++i) {
+        if(index[i] >= m_shape_.extent(i)) {
+            throw std::out_of_range(
+              "An index provided is out of bounds for the corresponding "
+              "dimension.");
+        }
+    }
+}
+
 auto MDBuffer::coordinate_to_ordinal_(index_vector index) const -> size_type {
+    check_index_(index);
     using size_type   = typename decltype(index)::size_type;
     size_type ordinal = 0;
     size_type stride  = 1;
diff --git a/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp b/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp
index 5b613924..6937839f 100644
--- a/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp
+++ b/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp
@@ -24,6 +24,7 @@ TEMPLATE_LIST_TEST_CASE("MDBuffer", "", types::floating_point_types) {
     using buffer::MDBuffer;
     using buffer_type = MDBuffer::buffer_type;
     using shape_type  = typename MDBuffer::shape_type;
+    using label_type  = typename MDBuffer::label_type;
 
     TestType one(1.0), two(2.0), three(3.0), four(4.0);
     std::vector<TestType> data = {one, two, three, four};
@@ -38,15 +39,127 @@ TEMPLATE_LIST_TEST_CASE("MDBuffer", "", types::floating_point_types) {
     MDBuffer matrix(data, matrix_shape);
 
     SECTION("Ctors and assignment") {
-        SECTION("Default ctor") { REQUIRE(defaulted.size() == 0); }
+        SECTION("Default ctor") {
+            REQUIRE(defaulted.size() == 0);
+            REQUIRE(defaulted.shape() == shape_type());
+        }
+
+        SECTION("vector ctor") {
+            REQUIRE(scalar.size() == 1);
+            REQUIRE(scalar.shape() == scalar_shape);
+            REQUIRE(scalar.get_elem({}) == one);
+
+            REQUIRE(vector.size() == 4);
+            REQUIRE(vector.shape() == vector_shape);
+            REQUIRE(vector.get_elem({0}) == one);
+            REQUIRE(vector.get_elem({1}) == two);
+            REQUIRE(vector.get_elem({2}) == three);
+            REQUIRE(vector.get_elem({3}) == four);
 
-        SECTION("vector ctor") {}
+            REQUIRE(matrix.size() == 4);
+            REQUIRE(matrix.shape() == matrix_shape);
+            REQUIRE(matrix.get_elem({0, 0}) == one);
+            REQUIRE(matrix.get_elem({0, 1}) == two);
+            REQUIRE(matrix.get_elem({1, 0}) == three);
+            REQUIRE(matrix.get_elem({1, 1}) == four);
+
+            REQUIRE_THROWS_AS(MDBuffer(data, scalar_shape),
+                              std::invalid_argument);
+        }
 
         SECTION("FloatBuffer ctor") {
             buffer_type buf(data);
+
+            MDBuffer vector_buf(buf, vector_shape);
+            REQUIRE(vector_buf == vector);
+
+            MDBuffer matrix_buf(buf, matrix_shape);
+            REQUIRE(matrix_buf == matrix);
+
             REQUIRE_THROWS_AS(MDBuffer(buf, scalar_shape),
                               std::invalid_argument);
         }
+
+        SECTION("Copy ctor") {
+            MDBuffer defaulted_copy(defaulted);
+            REQUIRE(defaulted_copy == defaulted);
+
+            MDBuffer scalar_copy(scalar);
+            REQUIRE(scalar_copy == scalar);
+
+            MDBuffer vector_copy(vector);
+            REQUIRE(vector_copy == vector);
+
+            MDBuffer matrix_copy(matrix);
+            REQUIRE(matrix_copy == matrix);
+        }
+
+        SECTION("Move ctor") {
+            MDBuffer defaulted_temp(defaulted);
+            MDBuffer defaulted_move(std::move(defaulted_temp));
+            REQUIRE(defaulted_move == defaulted);
+
+            MDBuffer scalar_temp(scalar);
+            MDBuffer scalar_move(std::move(scalar_temp));
+            REQUIRE(scalar_move == scalar);
+
+            MDBuffer vector_temp(vector);
+            MDBuffer vector_move(std::move(vector_temp));
+            REQUIRE(vector_move == vector);
+
+            MDBuffer matrix_temp(matrix);
+            MDBuffer matrix_move(std::move(matrix_temp));
+            REQUIRE(matrix_move == matrix);
+        }
+
+        SECTION("Copy assignment") {
+            MDBuffer defaulted_copy;
+            auto pdefaulted_copy = &(defaulted_copy = defaulted);
+            REQUIRE(defaulted_copy == defaulted);
+            REQUIRE(pdefaulted_copy == &defaulted_copy);
+
+            MDBuffer scalar_copy;
+            auto pscalar_copy = &(scalar_copy = scalar);
+            REQUIRE(scalar_copy == scalar);
+            REQUIRE(pscalar_copy == &scalar_copy);
+
+            MDBuffer vector_copy;
+            auto pvector_copy = &(vector_copy = vector);
+            REQUIRE(vector_copy == vector);
+            REQUIRE(pvector_copy == &vector_copy);
+
+            MDBuffer matrix_copy;
+            auto pmatrix_copy = &(matrix_copy = matrix);
+            REQUIRE(matrix_copy == matrix);
+            REQUIRE(pmatrix_copy == &matrix_copy);
+        }
+
+        SECTION("Move assignment") {
+            MDBuffer defaulted_temp(defaulted);
+            MDBuffer defaulted_move;
+            auto pdefaulted_move =
+              &(defaulted_move = std::move(defaulted_temp));
+            REQUIRE(defaulted_move == defaulted);
+            REQUIRE(pdefaulted_move == &defaulted_move);
+
+            MDBuffer scalar_temp(scalar);
+            MDBuffer scalar_move;
+            auto pscalar_move = &(scalar_move = std::move(scalar_temp));
+            REQUIRE(scalar_move == scalar);
+            REQUIRE(pscalar_move == &scalar_move);
+
+            MDBuffer vector_temp(vector);
+            MDBuffer vector_move;
+            auto pvector_move = &(vector_move = std::move(vector_temp));
+            REQUIRE(vector_move == vector);
+            REQUIRE(pvector_move == &vector_move);
+
+            MDBuffer matrix_temp(matrix);
+            MDBuffer matrix_move;
+            auto pmatrix_move = &(matrix_move = std::move(matrix_temp));
+            REQUIRE(matrix_move == matrix);
+            REQUIRE(pmatrix_move == &matrix_move);
+        }
     }
 
     SECTION("shape") {
@@ -67,16 +180,19 @@ TEMPLATE_LIST_TEST_CASE("MDBuffer", "", types::floating_point_types) {
         REQUIRE_THROWS_AS(defaulted.get_elem({}), std::out_of_range);
 
         REQUIRE(scalar.get_elem({}) == one);
+        REQUIRE_THROWS_AS(scalar.get_elem({0}), std::out_of_range);
 
         REQUIRE(vector.get_elem({0}) == one);
         REQUIRE(vector.get_elem({1}) == two);
         REQUIRE(vector.get_elem({2}) == three);
         REQUIRE(vector.get_elem({3}) == four);
+        REQUIRE_THROWS_AS(vector.get_elem({4}), std::out_of_range);
 
         REQUIRE(matrix.get_elem({0, 0}) == one);
         REQUIRE(matrix.get_elem({0, 1}) == two);
         REQUIRE(matrix.get_elem({1, 0}) == three);
         REQUIRE(matrix.get_elem({1, 1}) == four);
+        REQUIRE_THROWS_AS(matrix.get_elem({2, 0}), std::out_of_range);
     }
 
     SECTION("set_elem") {
@@ -124,4 +240,41 @@ TEMPLATE_LIST_TEST_CASE("MDBuffer", "", types::floating_point_types) {
         REQUIRE_FALSE(vector == MDBuffer(diff_data, vector_shape));
         REQUIRE_FALSE(matrix == MDBuffer(diff_data, matrix_shape));
     }
+
+    SECTION("addition_assignment_") {
+        SECTION("scalar") {
+            label_type labels("");
+            MDBuffer result;
+            result.addition_assignment(labels, scalar(labels), scalar(labels));
+            REQUIRE(result.shape() == scalar_shape);
+            REQUIRE(result.get_elem({}) == TestType(2.0));
+        }
+    }
+
+    SECTION("to_string") {
+        REQUIRE(defaulted.to_string().empty());
+        REQUIRE_FALSE(scalar.to_string().empty());
+        REQUIRE_FALSE(vector.to_string().empty());
+        REQUIRE_FALSE(matrix.to_string().empty());
+    }
+
+    SECTION("add_to_stream") {
+        std::stringstream ss;
+        SECTION("defaulted") {
+            defaulted.add_to_stream(ss);
+            REQUIRE(ss.str().empty());
+        }
+        SECTION("scalar") {
+            scalar.add_to_stream(ss);
+            REQUIRE_FALSE(ss.str().empty());
+        }
+        SECTION("vector") {
+            vector.add_to_stream(ss);
+            REQUIRE_FALSE(ss.str().empty());
+        }
+        SECTION("matrix") {
+            matrix.add_to_stream(ss);
+            REQUIRE_FALSE(ss.str().empty());
+        }
+    }
 }

From 0819e551ae53313906cbbef4623aaa86430dcbf5 Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Thu, 13 Nov 2025 15:54:41 -0600
Subject: [PATCH 12/18] most of eigenbuffer move is done

---
 src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
index 53eb677e..46a42037 100644
--- a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
+++ b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
@@ -253,7 +253,6 @@ void EIGEN_TENSOR::contraction_assignment_(label_type this_label,
 #undef EIGEN_TENSOR
 #undef TPARAMS
 
-template<typename FloatType>
 std::unique_ptr<EigenTensor<FloatType>> make_eigen_tensor(
   std::span<FloatType> data, shape::SmoothView<const shape::Smooth> shape) {
     switch(shape.rank()) {

From 90c826d1a79008c375e9ce8e5b6c83efde2b0ca5 Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Thu, 20 Nov 2025 10:43:17 -0600
Subject: [PATCH 13/18] compiles (FWIW)

---
 include/tensorwrapper/buffer/mdbuffer.hpp     | 134 +++++++++++++++---
 .../tensorwrapper/concepts/floating_point.hpp |  26 ++++
 .../tensorwrapper/forward_declarations.hpp    |   2 +
 .../tensorwrapper/types/mdbuffer_traits.hpp   |   4 +-
 .../buffer/detail_/mdbuffer_pimpl.hpp         |  53 -------
 src/tensorwrapper/buffer/mdbuffer.cpp         | 115 ++++++++++++---
 6 files changed, 241 insertions(+), 93 deletions(-)
 create mode 100644 include/tensorwrapper/concepts/floating_point.hpp
 delete mode 100644 src/tensorwrapper/buffer/detail_/mdbuffer_pimpl.hpp

diff --git a/include/tensorwrapper/buffer/mdbuffer.hpp b/include/tensorwrapper/buffer/mdbuffer.hpp
index 72f5c765..82d9ba36 100644
--- a/include/tensorwrapper/buffer/mdbuffer.hpp
+++ b/include/tensorwrapper/buffer/mdbuffer.hpp
@@ -15,6 +15,8 @@
  */
 
 #pragma once
+#include <tensorwrapper/buffer/replicated.hpp>
+#include <tensorwrapper/concepts/floating_point.hpp>
 #include <tensorwrapper/types/mdbuffer_traits.hpp>
 
 namespace tensorwrapper::buffer {
@@ -23,42 +25,138 @@ namespace tensorwrapper::buffer {
  *
  *  This class is a dense multidimensional buffer of floating-point values.
  */
-class MDBuffer {
+class MDBuffer : public Replicated {
 private:
-    using traits_type = types::ClassTraits<MDBuffer>;
+    /// Type *this derives from
+    using my_base_type = Replicated;
+    using traits_type  = types::ClassTraits<MDBuffer>;
+    using my_type      = MDBuffer;
 
 public:
     /// Add types to public API
     ///@{
-    using buffer_type   = typename traits_type::buffer_type;
-    using pimpl_type    = typename traits_type::pimpl_type;
-    using pimpl_pointer = typename traits_type::pimpl_pointer;
-    using rank_type     = typename traits_type::rank_type;
-    using shape_type    = typename traits_type::shape_type;
+    using value_type        = typename traits_type::value_type;
+    using reference         = typename traits_type::reference;
+    using const_reference   = typename traits_type::const_reference;
+    using buffer_type       = typename traits_type::buffer_type;
+    using buffer_view       = typename traits_type::buffer_view;
+    using const_buffer_view = typename traits_type::const_buffer_view;
+    using pimpl_type        = typename traits_type::pimpl_type;
+    using pimpl_pointer     = typename traits_type::pimpl_pointer;
+    using rank_type         = typename traits_type::rank_type;
+    using shape_type        = typename traits_type::shape_type;
+    using const_shape_view  = typename traits_type::const_shape_view;
+    using size_type         = typename traits_type::size_type;
     ///@}
 
+    using index_vector = std::vector<size_type>;
+    using typename my_base_type::label_type;
+    using string_type = std::string;
+    using hash_type   = std::size_t;
+
     MDBuffer() noexcept;
 
-    template<typename T>
-    MDBuffer(shape_type shape, std::vector<T> elements) {
-        MDBuffer(std::move(shape), buffer_type(std::move(elements)));
+    template<concepts::FloatingPoint T>
+    MDBuffer(std::vector<T> elements, const_shape_view shape) :
+      MDBuffer(buffer_type(std::move(elements)),
+               std::make_unique<layout::Physical>(shape), nullptr) {}
+
+    template<concepts::FloatingPoint T>
+    MDBuffer(std::vector<T> elements, layout_pointer playout = nullptr,
+             allocator_base_pointer pallocator = nullptr) {
+        MDBuffer(buffer_type(std::move(elements)), std::move(playout),
+                 std::move(pallocator));
     }
 
-    MDBuffer(shape_type shape, buffer_type buffer);
+    MDBuffer(buffer_type buffer, layout_pointer playout = nullptr,
+             allocator_base_pointer pallocator = nullptr);
+
+    MDBuffer(const MDBuffer& other)     = default;
+    MDBuffer(MDBuffer&& other) noexcept = default;
+
+    MDBuffer& operator=(const MDBuffer& other)     = default;
+    MDBuffer& operator=(MDBuffer&& other) noexcept = default;
+
+    ~MDBuffer() override = default;
+
+    // -------------------------------------------------------------------------
+    // -- State Accessors
+    // -------------------------------------------------------------------------
+
+    size_type size() const noexcept;
+
+    const_reference get_elem(index_vector index) const;
+
+    void set_elem(index_vector index, value_type new_value);
+
+    buffer_view get_mutable_data();
+
+    const_buffer_view get_immutable_data() const;
+
+    // -------------------------------------------------------------------------
+    // -- Utility Methods
+    // -------------------------------------------------------------------------
+
+    bool operator==(const my_type& rhs) const noexcept;
 
-    rank_type rank() const;
+protected:
+    const_shape_view shape_() const;
+
+    buffer_base_pointer clone_() const override;
+
+    bool are_equal_(const_buffer_base_reference rhs) const noexcept override;
+
+    dsl_reference addition_assignment_(label_type this_labels,
+                                       const_labeled_reference lhs,
+                                       const_labeled_reference rhs) override;
+    dsl_reference subtraction_assignment_(label_type this_labels,
+                                          const_labeled_reference lhs,
+                                          const_labeled_reference rhs) override;
+    dsl_reference multiplication_assignment_(
+      label_type this_labels, const_labeled_reference lhs,
+      const_labeled_reference rhs) override;
+
+    dsl_reference permute_assignment_(label_type this_labels,
+                                      const_labeled_reference rhs) override;
+
+    dsl_reference scalar_multiplication_(label_type this_labels, double scalar,
+                                         const_labeled_reference rhs) override;
+
+    string_type to_string_() const override;
+
+    std::ostream& add_to_stream_(std::ostream& os) const override;
+
+    // Returns the hash for the current state of *this, computing first if
+    // needed.
+    hash_type get_hash_() const {
+        if(m_recalculate_hash_ or !m_hash_caching_) update_hash_();
+        return m_hash_;
+    }
 
 private:
-    explicit MDBuffer(pimpl_pointer pimpl) noexcept;
+    size_type coordinate_to_ordinal_(index_vector index) const;
+
+    // Computes the hash for the current state of *this
+    void update_hash_() const;
+
+    // Designates that the state may have changed and to recalculate the hash.
+    // This function is really just for readability and clarity.
+    void mark_for_rehash_() const { m_recalculate_hash_ = true; }
+
+    // Designates that state changes are not trackable and we should recalculate
+    // the hash each time.
+    void turn_off_hash_caching_() const { m_hash_caching_ = false; }
 
-    bool has_pimpl_() const noexcept;
+    // Tracks whether the hash needs to be redetermined
+    mutable bool m_recalculate_hash_ = true;
 
-    void assert_pimpl_() const;
+    // Tracks whether hash caching has been turned off
+    mutable bool m_hash_caching_ = true;
 
-    pimpl_type& pimpl_();
-    const pimpl_type& pimpl_() const;
+    // Holds the computed hash value for this instance's state
+    mutable hash_type m_hash_ = 0;
 
-    pimpl_pointer m_pimpl_;
+    buffer_type m_buffer_;
 };
 
 } // namespace tensorwrapper::buffer
diff --git a/include/tensorwrapper/concepts/floating_point.hpp b/include/tensorwrapper/concepts/floating_point.hpp
new file mode 100644
index 00000000..d95588d0
--- /dev/null
+++ b/include/tensorwrapper/concepts/floating_point.hpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <wtf/concepts/floating_point.hpp>
+
+namespace tensorwrapper::concepts {
+
+using wtf::concepts::ConstFloatingPoint;
+using wtf::concepts::FloatingPoint;
+using wtf::concepts::UnmodifiedFloatingPoint;
+
+} // namespace tensorwrapper::concepts
diff --git a/include/tensorwrapper/forward_declarations.hpp b/include/tensorwrapper/forward_declarations.hpp
index 16c51064..e030b3a9 100644
--- a/include/tensorwrapper/forward_declarations.hpp
+++ b/include/tensorwrapper/forward_declarations.hpp
@@ -28,6 +28,8 @@ class MDBuffer;
 } // namespace buffer
 
 namespace shape {
+template<typename T>
+class SmoothView;
 
 class Smooth;
 
diff --git a/include/tensorwrapper/types/mdbuffer_traits.hpp b/include/tensorwrapper/types/mdbuffer_traits.hpp
index 27c74421..aa60a608 100644
--- a/include/tensorwrapper/types/mdbuffer_traits.hpp
+++ b/include/tensorwrapper/types/mdbuffer_traits.hpp
@@ -28,7 +28,9 @@ struct MDBufferTraitsCommon {
     using buffer_type       = wtf::buffer::FloatBuffer;
     using const_buffer_view = wtf::buffer::BufferView<const value_type>;
     using shape_type        = shape::Smooth;
-    using rank_type         = typename shape_type::rank_type;
+    using const_shape_view  = shape::SmoothView<const shape_type>;
+    using rank_type         = typename ClassTraits<shape_type>::rank_type;
+    using size_type         = typename ClassTraits<shape_type>::size_type;
     using pimpl_type        = tensorwrapper::buffer::detail_::MDBufferPIMPL;
     using pimpl_pointer     = std::unique_ptr<pimpl_type>;
 };
diff --git a/src/tensorwrapper/buffer/detail_/mdbuffer_pimpl.hpp b/src/tensorwrapper/buffer/detail_/mdbuffer_pimpl.hpp
deleted file mode 100644
index 6f410098..00000000
--- a/src/tensorwrapper/buffer/detail_/mdbuffer_pimpl.hpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright 2025 NWChemEx-Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-#include <tensorwrapper/shape/smooth.hpp>
-#include <tensorwrapper/types/mdbuffer_traits.hpp>
-
-namespace tensorwrapper::buffer::detail_ {
-
-class MDBufferPIMPL {
-public:
-    using parent_type = tensorwrapper::buffer::MDBuffer;
-    using traits_type = tensorwrapper::types::ClassTraits<parent_type>;
-
-    /// Add types to public API
-    ///@{
-    using value_type  = typename traits_type::value_type;
-    using rank_type   = typename traits_type::rank_type;
-    using buffer_type = typename traits_type::buffer_type;
-    using shape_type  = typename traits_type::shape_type;
-    ///@}
-
-    MDBufferPIMPL(shape_type shape, buffer_type buffer) noexcept :
-      m_shape_(std::move(shape)), m_buffer_(std::move(buffer)) {}
-
-    auto& shape() noexcept { return m_shape_; }
-
-    const auto& shape() const noexcept { return m_shape_; }
-
-    auto& buffer() noexcept { return m_buffer_; }
-
-    const auto& buffer() const noexcept { return m_buffer_; }
-
-private:
-    shape_type m_shape_;
-
-    buffer_type m_buffer_;
-};
-
-} // namespace tensorwrapper::buffer::detail_
diff --git a/src/tensorwrapper/buffer/mdbuffer.cpp b/src/tensorwrapper/buffer/mdbuffer.cpp
index fe92be9c..d427d4df 100644
--- a/src/tensorwrapper/buffer/mdbuffer.cpp
+++ b/src/tensorwrapper/buffer/mdbuffer.cpp
@@ -15,42 +15,115 @@
  */
 
 #include "detail_/addition_visitor.hpp"
-#include "detail_/mdbuffer_pimpl.hpp"
+#include "detail_/hash_utilities.hpp"
 #include <tensorwrapper/buffer/mdbuffer.hpp>
 #include <tensorwrapper/types/floating_point.hpp>
 
 namespace tensorwrapper::buffer {
 
-MDBuffer::MDBuffer() noexcept : m_pimpl_(nullptr) {}
+MDBuffer::MDBuffer() noexcept = default;
 
-MDBuffer::MDBuffer(shape_type shape, buffer_type buffer) :
-  MDBuffer(std::make_unique<detail_::MDBufferPIMPL>(std::move(shape),
-                                                    std::move(buffer))) {}
+MDBuffer::MDBuffer(buffer_type buffer, layout_pointer playout,
+                   allocator_base_pointer pallocator) :
+  my_base_type(std::move(playout), std::move(pallocator)),
+  m_buffer_(std::move(buffer)) {}
 
-MDBuffer::MDBuffer(pimpl_pointer pimpl) noexcept : m_pimpl_(std::move(pimpl)) {}
+// -----------------------------------------------------------------------------
+// -- State Accessor
+// -----------------------------------------------------------------------------
 
-auto MDBuffer::rank() const -> rank_type {
-    assert_pimpl_();
-    return m_pimpl_->shape().rank();
+auto MDBuffer::size() const noexcept -> size_type { return m_buffer_.size(); }
+
+auto MDBuffer::get_elem(index_vector index) const -> const_reference {
+    auto ordinal_index = coordinate_to_ordinal_(index);
+    return m_buffer_.at(ordinal_index);
 }
 
-bool MDBuffer::has_pimpl_() const noexcept { return m_pimpl_ != nullptr; }
+void MDBuffer::set_elem(index_vector index, value_type new_value) {
+    auto ordinal_index = coordinate_to_ordinal_(index);
+    mark_for_rehash_();
+    m_buffer_.at(ordinal_index) = new_value;
+}
 
-void MDBuffer::assert_pimpl_() const {
-    if(!has_pimpl_()) {
-        throw std::runtime_error(
-          "MDBuffer has no PIMPL. Was it default constructed?");
-    }
+auto MDBuffer::get_mutable_data() -> buffer_view {
+    mark_for_rehash_();
+    return m_buffer_;
+}
+
+auto MDBuffer::get_immutable_data() const -> const_buffer_view {
+    return m_buffer_;
+}
+
+// -----------------------------------------------------------------------------
+// -- Utility Methods
+// -----------------------------------------------------------------------------
+
+bool MDBuffer::operator==(const my_type& rhs) const noexcept {
+    if(!my_base_type::operator==(rhs)) return false;
+    return get_hash_() == rhs.get_hash_();
+}
+
+// -----------------------------------------------------------------------------
+// -- Protected Methods
+// -----------------------------------------------------------------------------
+
+auto MDBuffer::shape_() const -> const_shape_view {
+    return this->layout().shape().as_smooth();
 }
 
-auto MDBuffer::pimpl_() -> pimpl_type& {
-    assert_pimpl_();
-    return *m_pimpl_;
+auto MDBuffer::clone_() const -> buffer_base_pointer {
+    return std::make_unique<MDBuffer>(*this);
+}
+
+bool MDBuffer::are_equal_(const_buffer_base_reference rhs) const noexcept {
+    return my_base_type::template are_equal_impl_<my_type>(rhs);
+}
+
+auto MDBuffer::addition_assignment_(label_type this_labels,
+                                    const_labeled_reference lhs,
+                                    const_labeled_reference rhs)
+  -> dsl_reference {}
+
+auto MDBuffer::subtraction_assignment_(label_type this_labels,
+                                       const_labeled_reference lhs,
+                                       const_labeled_reference rhs)
+  -> dsl_reference {}
+auto MDBuffer::multiplication_assignment_(label_type this_labels,
+                                          const_labeled_reference lhs,
+                                          const_labeled_reference rhs)
+  -> dsl_reference {}
+
+auto MDBuffer::permute_assignment_(label_type this_labels,
+                                   const_labeled_reference rhs)
+  -> dsl_reference {}
+
+auto MDBuffer::scalar_multiplication_(label_type this_labels, double scalar,
+                                      const_labeled_reference rhs)
+  -> dsl_reference {}
+
+auto MDBuffer::to_string_() const -> string_type {}
+
+std::ostream& MDBuffer::add_to_stream_(std::ostream& os) const {}
+
+// -----------------------------------------------------------------------------
+// -- Private Methods
+// -----------------------------------------------------------------------------
+
+auto MDBuffer::coordinate_to_ordinal_(index_vector index) const -> size_type {
+    using size_type   = typename decltype(index)::size_type;
+    size_type ordinal = 0;
+    size_type stride  = 1;
+    for(rank_type i = shape_().rank(); i-- > 0;) {
+        ordinal += index[i] * stride;
+        stride *= shape_().extent(i);
+    }
+    return ordinal;
 }
 
-auto MDBuffer::pimpl_() const -> const pimpl_type& {
-    assert_pimpl_();
-    return *m_pimpl_;
+void MDBuffer::update_hash_() const {
+    // for(auto i = 0; i < m_buffer_.size(); ++i)
+    //     hash_utilities::hash_input(m_hash_, m_tensor_.data()[i]);
+    m_recalculate_hash_ = false;
 }
 
 } // namespace tensorwrapper::buffer

From b29640e8d563373e3be5f52701115a3f062719cd Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Fri, 21 Nov 2025 09:52:56 -0600
Subject: [PATCH 14/18] adds more mdbuffer tests

---
 include/tensorwrapper/buffer/mdbuffer.hpp     |  22 ++-
 include/tensorwrapper/shape/smooth.hpp        |   2 +
 .../tensorwrapper/types/floating_point.hpp    |   8 +-
 .../buffer/detail_/hash_utilities.hpp         |  17 +++
 src/tensorwrapper/buffer/mdbuffer.cpp         |  37 +++--
 .../tensorwrapper/buffer/mdbuffer.cpp         | 127 ++++++++++++++++++
 6 files changed, 184 insertions(+), 29 deletions(-)
 create mode 100644 tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp

diff --git a/include/tensorwrapper/buffer/mdbuffer.hpp b/include/tensorwrapper/buffer/mdbuffer.hpp
index 82d9ba36..8db7271f 100644
--- a/include/tensorwrapper/buffer/mdbuffer.hpp
+++ b/include/tensorwrapper/buffer/mdbuffer.hpp
@@ -17,6 +17,7 @@
 #pragma once
 #include <tensorwrapper/buffer/replicated.hpp>
 #include <tensorwrapper/concepts/floating_point.hpp>
+#include <tensorwrapper/shape/smooth.hpp>
 #include <tensorwrapper/types/mdbuffer_traits.hpp>
 
 namespace tensorwrapper::buffer {
@@ -57,19 +58,10 @@ class MDBuffer : public Replicated {
     MDBuffer() noexcept;
 
     template<concepts::FloatingPoint T>
-    MDBuffer(std::vector<T> elements, const_shape_view shape) :
-      MDBuffer(buffer_type(std::move(elements)),
-               std::make_unique<layout::Physical>(shape), nullptr) {}
+    MDBuffer(std::vector<T> elements, shape_type shape) :
+      MDBuffer(buffer_type(std::move(elements)), std::move(shape)) {}
 
-    template<concepts::FloatingPoint T>
-    MDBuffer(std::vector<T> elements, layout_pointer playout = nullptr,
-             allocator_base_pointer pallocator = nullptr) {
-        MDBuffer(buffer_type(std::move(elements)), std::move(playout),
-                 std::move(pallocator));
-    }
-
-    MDBuffer(buffer_type buffer, layout_pointer playout = nullptr,
-             allocator_base_pointer pallocator = nullptr);
+    MDBuffer(buffer_type buffer, shape_type shape);
 
     MDBuffer(const MDBuffer& other)     = default;
     MDBuffer(MDBuffer&& other) noexcept = default;
@@ -83,6 +75,8 @@ class MDBuffer : public Replicated {
     // -- State Accessors
     // -------------------------------------------------------------------------
 
+    const_shape_view shape() const;
+
     size_type size() const noexcept;
 
     const_reference get_elem(index_vector index) const;
@@ -100,8 +94,6 @@ class MDBuffer : public Replicated {
     bool operator==(const my_type& rhs) const noexcept;
 
 protected:
-    const_shape_view shape_() const;
-
     buffer_base_pointer clone_() const override;
 
     bool are_equal_(const_buffer_base_reference rhs) const noexcept override;
@@ -156,6 +148,8 @@ class MDBuffer : public Replicated {
     // Holds the computed hash value for this instance's state
     mutable hash_type m_hash_ = 0;
 
+    shape_type m_shape_;
+
     buffer_type m_buffer_;
 };
 
diff --git a/include/tensorwrapper/shape/smooth.hpp b/include/tensorwrapper/shape/smooth.hpp
index 32d167de..fd6cc86e 100644
--- a/include/tensorwrapper/shape/smooth.hpp
+++ b/include/tensorwrapper/shape/smooth.hpp
@@ -39,6 +39,8 @@ class Smooth : public ShapeBase {
     // -- Ctors, assignment, and dtor
     // -------------------------------------------------------------------------
 
+    Smooth() noexcept = default;
+
     /** @brief Constructs *this with a statically specified number of extents.
      *
      *  This ctor is used to create a Smooth object by explicitly providing
diff --git a/include/tensorwrapper/types/floating_point.hpp b/include/tensorwrapper/types/floating_point.hpp
index fb37346a..46bf8464 100644
--- a/include/tensorwrapper/types/floating_point.hpp
+++ b/include/tensorwrapper/types/floating_point.hpp
@@ -17,6 +17,7 @@
 #pragma once
 #include <cmath>
 #include <tuple>
+#include <wtf/wtf.hpp>
 #ifdef ENABLE_SIGMA
 #include <sigma/sigma.hpp>
 #endif
@@ -47,6 +48,10 @@ T fabs(T value) {
     MACRO_IN(double);                           \
     MACRO_IN(types::ufloat);                    \
     MACRO_IN(types::udouble)
+} // namespace tensorwrapper::types
+
+WTF_REGISTER_FP_TYPE(tensorwrapper::types::ufloat);
+WTF_REGISTER_FP_TYPE(tensorwrapper::types::udouble);
 
 #else
 using ufloat  = float;
@@ -66,6 +71,5 @@ T fabs(T value) {
     MACRO_IN(float);                            \
     MACRO_IN(double)
 
-#endif
-
 } // namespace tensorwrapper::types
+#endif
diff --git a/src/tensorwrapper/buffer/detail_/hash_utilities.hpp b/src/tensorwrapper/buffer/detail_/hash_utilities.hpp
index 021b291e..a9c35cdb 100644
--- a/src/tensorwrapper/buffer/detail_/hash_utilities.hpp
+++ b/src/tensorwrapper/buffer/detail_/hash_utilities.hpp
@@ -68,4 +68,21 @@ void hash_input(hash_type& seed, const sigma::Uncertain<T>& value) {
 
 #endif
 
+class HashVisitor {
+public:
+    HashVisitor(hash_type seed = 0) : m_seed_(seed) {}
+
+    hash_type get_hash() const { return m_seed_; }
+
+    template<typename T>
+    void operator()(std::span<const T> data) {
+        for(std::size_t i = 0; i < data.size(); ++i) {
+            hash_input(m_seed_, data[i]);
+        }
+    }
+
+private:
+    hash_type m_seed_;
+};
+
 } // namespace tensorwrapper::buffer::detail_::hash_utilities
diff --git a/src/tensorwrapper/buffer/mdbuffer.cpp b/src/tensorwrapper/buffer/mdbuffer.cpp
index d427d4df..f291ceca 100644
--- a/src/tensorwrapper/buffer/mdbuffer.cpp
+++ b/src/tensorwrapper/buffer/mdbuffer.cpp
@@ -21,17 +21,29 @@
 
 namespace tensorwrapper::buffer {
 
+using fp_types = types::floating_point_types;
+
 MDBuffer::MDBuffer() noexcept = default;
 
-MDBuffer::MDBuffer(buffer_type buffer, layout_pointer playout,
-                   allocator_base_pointer pallocator) :
-  my_base_type(std::move(playout), std::move(pallocator)),
-  m_buffer_(std::move(buffer)) {}
+MDBuffer::MDBuffer(buffer_type buffer, shape_type shape) :
+  my_base_type(std::make_unique<layout::Physical>(shape), nullptr),
+  m_shape_(std::move(shape)),
+  m_buffer_() {
+    if(buffer.size() == shape.size()) {
+        m_buffer_ = std::move(buffer);
+    } else {
+        throw std::invalid_argument(
+          "The size of the provided buffer does not match the size "
+          "implied by the provided shape.");
+    }
+}
 
 // -----------------------------------------------------------------------------
 // -- State Accessor
 // -----------------------------------------------------------------------------
 
+auto MDBuffer::shape() const -> const_shape_view { return m_shape_; }
+
 auto MDBuffer::size() const noexcept -> size_type { return m_buffer_.size(); }
 
 auto MDBuffer::get_elem(index_vector index) const -> const_reference {
@@ -67,10 +79,6 @@ bool MDBuffer::operator==(const my_type& rhs) const noexcept {
 // -- Protected Methods
 // -----------------------------------------------------------------------------
 
-auto MDBuffer::shape_() const -> const_shape_view {
-    return this->layout().shape().as_smooth();
-}
-
 auto MDBuffer::clone_() const -> buffer_base_pointer {
     return std::make_unique<MDBuffer>(*this);
 }
@@ -103,7 +111,7 @@ auto MDBuffer::scalar_multiplication_(label_type this_labels, double scalar,
 
 auto MDBuffer::to_string_() const -> string_type {}
 
-std::ostream& MDBuffer::add_to_stream_(std::ostream& os) const {}
+std::ostream& MDBuffer::add_to_stream_(std::ostream& os) const { return os; }
 
 // -----------------------------------------------------------------------------
 // -- Private Methods
@@ -113,16 +121,19 @@ auto MDBuffer::coordinate_to_ordinal_(index_vector index) const -> size_type {
     using size_type   = typename decltype(index)::size_type;
     size_type ordinal = 0;
     size_type stride  = 1;
-    for(rank_type i = shape_().rank(); i-- > 0;) {
+    for(rank_type i = shape().rank(); i-- > 0;) {
         ordinal += index[i] * stride;
-        stride *= shape_().extent(i);
+        stride *= shape().extent(i);
     }
     return ordinal;
 }
 
 void MDBuffer::update_hash_() const {
-    // for(auto i = 0; i < m_buffer_.size(); ++i)
-    //     hash_utilities::hash_input(m_hash_, m_tensor_.data()[i]);
+    buffer::detail_::hash_utilities::HashVisitor visitor;
+    if(m_buffer_.size()) {
+        wtf::buffer::visit_contiguous_buffer<fp_types>(visitor, m_buffer_);
+        m_hash_ = visitor.get_hash();
+    }
     m_recalculate_hash_ = false;
 }
 
diff --git a/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp b/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp
new file mode 100644
index 00000000..5b613924
--- /dev/null
+++ b/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../testing/testing.hpp"
+#include <tensorwrapper/buffer/mdbuffer.hpp>
+#include <tensorwrapper/types/floating_point.hpp>
+
+using namespace tensorwrapper;
+
+TEMPLATE_LIST_TEST_CASE("MDBuffer", "", types::floating_point_types) {
+    using buffer::MDBuffer;
+    using buffer_type = MDBuffer::buffer_type;
+    using shape_type  = typename MDBuffer::shape_type;
+
+    TestType one(1.0), two(2.0), three(3.0), four(4.0);
+    std::vector<TestType> data = {one, two, three, four};
+
+    shape_type scalar_shape({});
+    shape_type vector_shape({4});
+    shape_type matrix_shape({2, 2});
+
+    MDBuffer defaulted;
+    MDBuffer scalar(std::vector{one}, scalar_shape);
+    MDBuffer vector(data, vector_shape);
+    MDBuffer matrix(data, matrix_shape);
+
+    SECTION("Ctors and assignment") {
+        SECTION("Default ctor") { REQUIRE(defaulted.size() == 0); }
+
+        SECTION("vector ctor") {}
+
+        SECTION("FloatBuffer ctor") {
+            buffer_type buf(data);
+            REQUIRE_THROWS_AS(MDBuffer(buf, scalar_shape),
+                              std::invalid_argument);
+        }
+    }
+
+    SECTION("shape") {
+        REQUIRE(defaulted.shape() == shape_type());
+        REQUIRE(scalar.shape() == scalar_shape);
+        REQUIRE(vector.shape() == vector_shape);
+        REQUIRE(matrix.shape() == matrix_shape);
+    }
+
+    SECTION("size") {
+        REQUIRE(defaulted.size() == 0);
+        REQUIRE(scalar.size() == 1);
+        REQUIRE(vector.size() == 4);
+        REQUIRE(matrix.size() == 4);
+    }
+
+    SECTION("get_elem") {
+        REQUIRE_THROWS_AS(defaulted.get_elem({}), std::out_of_range);
+
+        REQUIRE(scalar.get_elem({}) == one);
+
+        REQUIRE(vector.get_elem({0}) == one);
+        REQUIRE(vector.get_elem({1}) == two);
+        REQUIRE(vector.get_elem({2}) == three);
+        REQUIRE(vector.get_elem({3}) == four);
+
+        REQUIRE(matrix.get_elem({0, 0}) == one);
+        REQUIRE(matrix.get_elem({0, 1}) == two);
+        REQUIRE(matrix.get_elem({1, 0}) == three);
+        REQUIRE(matrix.get_elem({1, 1}) == four);
+    }
+
+    SECTION("set_elem") {
+        REQUIRE_THROWS_AS(defaulted.set_elem({}, one), std::out_of_range);
+
+        REQUIRE(scalar.get_elem({}) != two);
+        scalar.set_elem({}, two);
+        REQUIRE(scalar.get_elem({}) == two);
+
+        REQUIRE(vector.get_elem({2}) != four);
+        vector.set_elem({2}, four);
+        REQUIRE(vector.get_elem({2}) == four);
+
+        REQUIRE(matrix.get_elem({1, 0}) != one);
+        matrix.set_elem({1, 0}, one);
+        REQUIRE(matrix.get_elem({1, 0}) == one);
+    }
+
+    SECTION("operator==") {
+        // Same object
+        REQUIRE(defaulted == defaulted);
+
+        MDBuffer scalar_copy(std::vector{one}, scalar_shape);
+        REQUIRE(scalar == scalar_copy);
+
+        MDBuffer vector_copy(data, vector_shape);
+        REQUIRE(vector == vector_copy);
+
+        MDBuffer matrix_copy(data, matrix_shape);
+        REQUIRE(matrix == matrix_copy);
+
+        // Different ranks
+        REQUIRE_FALSE(scalar == vector);
+        REQUIRE_FALSE(vector == matrix);
+        REQUIRE_FALSE(scalar == matrix);
+
+        // Different shapes
+        shape_type matrix_shape2({4, 1});
+        REQUIRE_FALSE(scalar == MDBuffer(data, matrix_shape2));
+
+        // Different values
+        std::vector<TestType> diff_data = {two, three, four, one};
+        MDBuffer scalar_diff(std::vector{two}, scalar_shape);
+        REQUIRE_FALSE(scalar == scalar_diff);
+        REQUIRE_FALSE(vector == MDBuffer(diff_data, vector_shape));
+        REQUIRE_FALSE(matrix == MDBuffer(diff_data, matrix_shape));
+    }
+}

From 15b8c111d6c0ffea2bab1b70e0234dcfa0ca277a Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Sat, 22 Nov 2025 09:59:23 -0600
Subject: [PATCH 15/18] backup

---
 include/tensorwrapper/buffer/mdbuffer.hpp     | 223 ++++++++++++++++--
 .../backends/eigen/eigen_tensor_impl.cpp      |   6 +-
 .../buffer/detail_/addition_visitor.hpp       | 106 ++++++++-
 src/tensorwrapper/buffer/mdbuffer.cpp         |  74 +++++-
 .../tensorwrapper/buffer/mdbuffer.cpp         | 157 +++++++++++-
 5 files changed, 533 insertions(+), 33 deletions(-)

diff --git a/include/tensorwrapper/buffer/mdbuffer.hpp b/include/tensorwrapper/buffer/mdbuffer.hpp
index 8db7271f..ab5e1cc0 100644
--- a/include/tensorwrapper/buffer/mdbuffer.hpp
+++ b/include/tensorwrapper/buffer/mdbuffer.hpp
@@ -30,11 +30,15 @@ class MDBuffer : public Replicated {
 private:
     /// Type *this derives from
     using my_base_type = Replicated;
-    using traits_type  = types::ClassTraits<MDBuffer>;
-    using my_type      = MDBuffer;
+
+    /// Type defining the types for the public API of *this
+    using traits_type = types::ClassTraits<MDBuffer>;
+
+    /// Type of *this
+    using my_type = MDBuffer;
 
 public:
-    /// Add types to public API
+    /// Add types from traits_type to public API
     ///@{
     using value_type        = typename traits_type::value_type;
     using reference         = typename traits_type::reference;
@@ -53,49 +57,215 @@ class MDBuffer : public Replicated {
     using index_vector = std::vector<size_type>;
     using typename my_base_type::label_type;
     using string_type = std::string;
-    using hash_type   = std::size_t;
 
+    // -------------------------------------------------------------------------
+    // -- Ctors, assignment, and dtor
+    // -------------------------------------------------------------------------
+
+    /** @brief Creates an empty multi-dimensional buffer.
+     *
+     *  The resulting buffer will have a shape of rank 0, but a size of 0. Thus
+     *  the buffer can NOT be used to store any elements (including treating
+     *  *this as a scalar). The resulting buffer can be assigned to or moved
+     *  to to populate it.
+     *
+     *  @throw None No throw guarantee.
+     */
     MDBuffer() noexcept;
 
+    /** @brief Treats allocated memory like a multi-dimensional buffer.
+     *
+     *  @tparam T The type of the elements in the buffer. Must satisfy the
+     *            FloatingPoint concept.
+     *
+     *  This ctor will use @p element to create a buffer_type object and then
+     *  pass that along with @p shape to the main ctor.
+     *
+     *  @param[in] elements The elements to be used as the backing store.
+     *  @param[in] shape The shape of *this.
+     *
+     *  @throw std::invalid_argument if the size of @p elements does not match
+     *                               the size implied by @p shape. Strong throw
+     *                               guarantee.
+     *  @throw std::bad_alloc if there is a problem allocating memory for the
+     *                        internal state. Strong throw guarantee.
+     */
     template<concepts::FloatingPoint T>
     MDBuffer(std::vector<T> elements, shape_type shape) :
       MDBuffer(buffer_type(std::move(elements)), std::move(shape)) {}
 
+    /** @brief The main ctor.
+     *
+     *  This ctor will create *this using @p buffer as the backing store and
+     *  @p shape to describe the geometry of the multidimensional array.
+     *
+     *  All other ctors (aside from copy and move) delegate to this one.
+     *
+     *  @param[in] buffer The buffer to be used as the backing store.
+     *  @param[in] shape The shape of *this.
+     *
+     *  @throw std::invalid_argument if the size of @p buffer does not match
+     *                               the size implied by @p shape. Strong throw
+     *                               guarantee.
+     *  @throw std::bad_alloc if there is a problem allocating memory for the
+     *                        internal state. Strong throw guarantee.
+     */
     MDBuffer(buffer_type buffer, shape_type shape);
 
-    MDBuffer(const MDBuffer& other)     = default;
+    /** @brief Initializes *this to a deep copy of @p other.
+     *
+     *  This ctor will initialize *this to be a deep copy of @p other.
+     *
+     *  @param[in] other The MDBuffer to copy.
+     *
+     *  @throw std::bad_alloc if there is a problem allocating memory for the
+     *                        internal state. Strong throw guarantee.
+     */
+    MDBuffer(const MDBuffer& other) = default;
+
+    /** @brief Move ctor.
+     *
+     *  This ctor will initialize *this by taking the state from @p other.
+     *  After this ctor is called @p other is left in a valid but unspecified
+     *  state.
+     *
+     *  @param[in,out] other The MDBuffer to move from.
+     *
+     *  @throw None No throw guarantee.
+     */
     MDBuffer(MDBuffer&& other) noexcept = default;
 
-    MDBuffer& operator=(const MDBuffer& other)     = default;
+    /** @brief Copy assignment.
+     *
+     *  This operator will make *this a deep copy of @p other.
+     *
+     *  @param[in] other The MDBuffer to copy.
+     *
+     *  @return *this after the assignment.
+     *
+     *  @throw std::bad_alloc if there is a problem allocating memory for the
+     *                        internal state. Strong throw guarantee.
+     */
+    MDBuffer& operator=(const MDBuffer& other) = default;
+
+    /** @brief Move assignment.
+     *
+     *  This operator will make *this take the state from @p other. After
+     *  this operator is called @p other is left in a valid but unspecified
+     *  state.
+     *
+     *  @param[in,out] other The MDBuffer to move from.
+     *
+     *  @return *this after the assignment.
+     *
+     *  @throw None No throw guarantee.
+     */
     MDBuffer& operator=(MDBuffer&& other) noexcept = default;
 
+    /** @brief Defaulted dtor.
+     *
+     *  @throw None No throw guarantee.
+     */
     ~MDBuffer() override = default;
 
     // -------------------------------------------------------------------------
     // -- State Accessors
     // -------------------------------------------------------------------------
 
+    /** @brief Returns (a view of) the shape of *this.
+     *
+     *  The shape of *this describes the geometry of the underlying
+     *  multidimensional array.
+     *
+     *  @return A view of the shape of *this.
+     *
+     *  @throw std::bad_alloc if there is a problem allocating memory for the
+     *                        returned view. Strong throw guarantee.
+     */
     const_shape_view shape() const;
 
+    /** @brief The total number of elements in *this.
+     *
+     *  The total number of elements is the product of the extents of each
+     *  mode of *this.
+     *
+     *  @return The total number of elements in *this.
+     *
+     *  @throw None No throw guarantee.
+     */
     size_type size() const noexcept;
 
+    /** @brief Returns the element with the offsets specified by @p index.
+     *
+     *  This method will retrieve a const reference to the element at the
+     *  offsets specified by @p index. The length of @p index must be equal
+     *  to the rank of *this and each entry in @p index must be less than the
+     *  extent of the corresponding mode of *this.
+     *
+     *  This method can only be used to retrieve elements from *this. To modify
+     *  elements use set_elem().
+     *
+     *  @param[in] index The offsets into each mode of *this for the desired
+     *                   element.
+     *
+     *  @return A const reference to the element at the specified offsets.
+     */
     const_reference get_elem(index_vector index) const;
 
+    /** @brief Sets the specified element to @p new_value.
+     *
+     *  This method will set the element at the offsets specified by @p index.
+     *  The length of @p index must be equal to the rank of *this and each
+     *  entry in @p index must be less than the extent of the corresponding
+     *  mode of *this.
+     *
+     *  @param[in] index The offsets into each mode of *this for the desired
+     *                   element.
+     *  @param[in] new_value The new value for the specified element.
+     *
+     *  @throw std::out_of_range if any entry in @p index is invalid. Strong
+     *                           throw guarantee.
+     */
     void set_elem(index_vector index, value_type new_value);
 
-    buffer_view get_mutable_data();
+    /** @brief Returns a view of the data.
+     *
+     *  This method is deprecated. Use set_slice instead.
+     */
+    [[deprecated]] buffer_view get_mutable_data();
 
-    const_buffer_view get_immutable_data() const;
+    /** @brief Returns a read-only view of the data.
+     *
+     *  This method is deprecated. Use get_slice instead.
+     */
+    [[deprecated]] const_buffer_view get_immutable_data() const;
 
     // -------------------------------------------------------------------------
     // -- Utility Methods
     // -------------------------------------------------------------------------
 
+    /** @brief Compares two MDBuffer objects for exact equality.
+     *
+     *  Two MDBuffer objects are exactly equal if they have the same shape and
+     *  if all of their corresponding elements are bitwise identical.
+     *  In practice, the implementation stores a hash of the elements in the
+     *  tensor and compares the hashes for equality rather than checking each
+     *  element individually.
+     *
+     *  @param[in] rhs The MDBuffer to compare against.
+     *
+     *  @return True if *this and @p rhs are exactly equal and false otherwise.
+     *
+     *  @throw None No throw guarantee.
+     */
     bool operator==(const my_type& rhs) const noexcept;
 
 protected:
+    /// Makes a deep polymorphic copy of *this
     buffer_base_pointer clone_() const override;
 
+    /// Implements are_equal by checking that rhs is an MDBuffer and then
+    /// calling operator==
     bool are_equal_(const_buffer_base_reference rhs) const noexcept override;
 
     dsl_reference addition_assignment_(label_type this_labels,
@@ -114,42 +284,53 @@ class MDBuffer : public Replicated {
     dsl_reference scalar_multiplication_(label_type this_labels, double scalar,
                                          const_labeled_reference rhs) override;
 
+    /// Calls add_to_stream_ on a stringstream to implement
     string_type to_string_() const override;
 
+    /// Uses Eigen's printing capabilities to add to stream
     std::ostream& add_to_stream_(std::ostream& os) const override;
 
-    // Returns the hash for the current state of *this, computing first if
-    // needed.
+private:
+    /// Type for storing the hash of *this
+    using hash_type = std::size_t;
+
+    /// Logic for validating that an index is within the bounds of the shape
+    void check_index_(const index_vector& index) const;
+
+    /// Converts a coordinate index to a linear (ordinal) index
+    size_type coordinate_to_ordinal_(index_vector index) const;
+
+    /// Returns the hash for the current state of *this, computing first if
+    /// needed.
     hash_type get_hash_() const {
         if(m_recalculate_hash_ or !m_hash_caching_) update_hash_();
         return m_hash_;
     }
 
-private:
-    size_type coordinate_to_ordinal_(index_vector index) const;
-
-    // Computes the hash for the current state of *this
+    /// Computes the hash for the current state of *this
     void update_hash_() const;
 
-    // Designates that the state may have changed and to recalculate the hash.
-    // This function is really just for readability and clarity.
+    /// Designates that the state may have changed and to recalculate the hash.
+    /// This function is really just for readability and clarity.
     void mark_for_rehash_() const { m_recalculate_hash_ = true; }
 
-    // Designates that state changes are not trackable and we should recalculate
-    // the hash each time.
+    /// Designates that state changes are not trackable and we should
+    /// recalculate the hash each time.
     void turn_off_hash_caching_() const { m_hash_caching_ = false; }
 
-    // Tracks whether the hash needs to be redetermined
+    /// Tracks whether the hash needs to be redetermined
     mutable bool m_recalculate_hash_ = true;
 
-    // Tracks whether hash caching has been turned off
+    /// Tracks whether hash caching has been turned off
     mutable bool m_hash_caching_ = true;
 
-    // Holds the computed hash value for this instance's state
+    /// Holds the computed hash value for this instance's state
     mutable hash_type m_hash_ = 0;
 
+    /// How the hyper-rectangular array is shaped
     shape_type m_shape_;
 
+    /// The flat buffer holding the elements of *this
     buffer_type m_buffer_;
 };
 
diff --git a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
index 46a42037..5e853a79 100644
--- a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
+++ b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
@@ -76,7 +76,11 @@ auto EIGEN_TENSOR::to_string_() const -> string_type {
 TPARAMS
 std::ostream& EIGEN_TENSOR::add_to_stream_(std::ostream& os) const {
     os << std::fixed << std::setprecision(16);
-    return os << m_tensor_.format(Eigen::TensorIOFormat::Numpy());
+    if constexpr(Rank > 0) {
+        return os << m_tensor_.format(Eigen::TensorIOFormat::Numpy());
+    } else {
+        return os << m_tensor_;
+    }
 }
 
 TPARAMS
diff --git a/src/tensorwrapper/buffer/detail_/addition_visitor.hpp b/src/tensorwrapper/buffer/detail_/addition_visitor.hpp
index 4e021e8a..0f1d99ca 100644
--- a/src/tensorwrapper/buffer/detail_/addition_visitor.hpp
+++ b/src/tensorwrapper/buffer/detail_/addition_visitor.hpp
@@ -16,6 +16,9 @@
 
 #pragma once
 #include <span>
+#include <tensorwrapper/dsl/dummy_indices.hpp>
+#include <tensorwrapper/shape/smooth_view.hpp>
+#include <type_traits>
 
 namespace tensorwrapper::buffer::detail_ {
 
@@ -24,14 +27,105 @@ namespace tensorwrapper::buffer::detail_ {
  *
  *
  */
-class AdditionVisitor {
+class BinaryOperationVisitor {
 public:
-    // AdditionVisitor(shape, permutation, shape, permutation)
+    using buffer_type      = wtf::buffer::FloatBuffer;
+    using string_type      = std::string;
+    using label_type       = dsl::DummyIndices<string_type>;
+    using shape_type       = shape::Smooth;
+    using const_shape_view = shape::SmoothView<const shape_type>;
+
+    BinaryOperationVisitor(buffer_type& this_buffer, label_type this_labels,
+                           shape_type this_shape, label_type lhs_labels,
+                           shape_type lhs_shape, label_type rhs_labels,
+                           shape_type rhs_shape) :
+      m_pthis_buffer_(&this_buffer),
+      m_this_labels_(std::move(this_labels)),
+      m_this_shape_(std::move(this_shape)),
+      m_lhs_labels_(std::move(lhs_labels)),
+      m_lhs_shape_(std::move(lhs_shape)),
+      m_rhs_labels_(std::move(rhs_labels)),
+      m_rhs_shape_(std::move(rhs_shape)) {}
+
+    const auto& this_shape() const { return m_this_shape_; }
+    const auto& lhs_shape() const { return m_lhs_shape_; }
+    const auto& rhs_shape() const { return m_rhs_shape_; }
+
+    const auto& this_labels() const { return m_this_labels_; }
+    const auto& lhs_labels() const { return m_lhs_labels_; }
+    const auto& rhs_labels() const { return m_rhs_labels_; }
+
     template<typename LHSType, typename RHSType>
-    void operator()(std::span<LHSType> lhs, std::span<const RHSType> rhs) {
-        // auto lhs_wrapped = backends::eigen::wrap_span(lhs);
-        // auto rhs_wrapped = backends::eigen::wrap_span(rhs);
-        for(std::size_t i = 0; i < lhs.size(); ++i) lhs[i] += rhs[i];
+        requires(!std::is_same_v<LHSType, RHSType>)
+    void operator()(std::span<const LHSType>, std::span<const RHSType>) {
+        throw std::runtime_error(
+          "BinaryOperationVisitor: Mixed types not supported");
+    }
+
+protected:
+    template<typename FloatType>
+    auto make_eigen_tensor_(std::span<FloatType> data, const_shape_view shape) {
+        return backends::eigen::make_eigen_tensor(data, shape);
+    }
+
+    template<typename FloatType>
+    auto make_this_eigen_tensor_() {
+        if(m_pthis_buffer_->size() != m_this_shape_.size()) {
+            std::vector<FloatType> temp_buffer(m_this_shape_.size());
+            *m_pthis_buffer_ = buffer_type(std::move(temp_buffer));
+        }
+        auto this_span =
+          wtf::buffer::contiguous_buffer_cast<FloatType>(*m_pthis_buffer_);
+        return backends::eigen::make_eigen_tensor(this_span, m_this_shape_);
+    }
+
+    template<typename FloatType>
+    auto make_lhs_eigen_tensor_(std::span<const FloatType> data) {
+        /// XXX: Ideally we would not need to const_cast here, but we didn't
+        ///      code EigenTensor correctly...
+
+        auto* pdata = const_cast<FloatType*>(data.data());
+        std::span<FloatType> non_const_data(pdata, data.size());
+        return backends::eigen::make_eigen_tensor(non_const_data, m_lhs_shape_);
+    }
+
+    template<typename FloatType>
+    auto make_rhs_eigen_tensor_(std::span<const FloatType> data) {
+        /// XXX: Ideally we would not need to const_cast here, but we didn't
+        ///      code EigenTensor correctly...
+
+        auto* pdata = const_cast<FloatType*>(data.data());
+        std::span<FloatType> non_const_data(pdata, data.size());
+        return backends::eigen::make_eigen_tensor(non_const_data, m_rhs_shape_);
+    }
+
+private:
+    buffer_type* m_pthis_buffer_;
+    label_type m_this_labels_;
+    shape_type m_this_shape_;
+
+    label_type m_lhs_labels_;
+    shape_type m_lhs_shape_;
+
+    label_type m_rhs_labels_;
+    shape_type m_rhs_shape_;
+};
+
+class AdditionVisitor : public BinaryOperationVisitor {
+public:
+    using BinaryOperationVisitor::BinaryOperationVisitor;
+
+    // AdditionVisitor(shape, permutation, shape, permutation)
+    template<typename FloatType>
+    void operator()(std::span<const FloatType> lhs,
+                    std::span<const FloatType> rhs) {
+        using clean_t = std::decay_t<FloatType>;
+        auto pthis    = this->make_this_eigen_tensor_<clean_t>();
+        auto plhs     = this->make_lhs_eigen_tensor_(lhs);
+        auto prhs     = this->make_rhs_eigen_tensor_(rhs);
+
+        pthis->addition_assignment(this_labels(), lhs_labels(), rhs_labels(),
+                                   *plhs, *prhs);
     }
 };
 
diff --git a/src/tensorwrapper/buffer/mdbuffer.cpp b/src/tensorwrapper/buffer/mdbuffer.cpp
index f291ceca..78b869d5 100644
--- a/src/tensorwrapper/buffer/mdbuffer.cpp
+++ b/src/tensorwrapper/buffer/mdbuffer.cpp
@@ -14,12 +14,24 @@
  * limitations under the License.
  */
 
+#include "../backends/eigen/eigen_tensor_impl.hpp"
 #include "detail_/addition_visitor.hpp"
 #include "detail_/hash_utilities.hpp"
 #include <tensorwrapper/buffer/mdbuffer.hpp>
 #include <tensorwrapper/types/floating_point.hpp>
 
 namespace tensorwrapper::buffer {
+namespace {
+
+template<typename T>
+const MDBuffer& downcast(T&& object) {
+    auto* pobject = dynamic_cast<const MDBuffer*>(&object);
+    if(pobject == nullptr) {
+        throw std::invalid_argument("The provided buffer must be an MDBuffer.");
+    }
+    return *pobject;
+}
+} // namespace
 
 using fp_types = types::floating_point_types;
 
@@ -90,7 +102,29 @@ bool MDBuffer::are_equal_(const_buffer_base_reference rhs) const noexcept {
 auto MDBuffer::addition_assignment_(label_type this_labels,
                                     const_labeled_reference lhs,
                                     const_labeled_reference rhs)
-  -> dsl_reference {}
+  -> dsl_reference {
+    const auto& lhs_down   = downcast(lhs.object());
+    const auto& rhs_down   = downcast(rhs.object());
+    const auto& lhs_labels = lhs.labels();
+    const auto& rhs_labels = rhs.labels();
+    const auto& lhs_shape  = lhs_down.m_shape_;
+    const auto& rhs_shape  = rhs_down.m_shape_;
+
+    auto labeled_lhs_shape = lhs_shape(lhs_labels);
+    auto labeled_rhs_shape = rhs_shape(rhs_labels);
+
+    m_shape_.addition_assignment(this_labels, labeled_lhs_shape,
+                                 labeled_rhs_shape);
+
+    detail_::AdditionVisitor visitor(m_buffer_, this_labels, m_shape_,
+                                     lhs.labels(), lhs_shape, rhs.labels(),
+                                     rhs_shape);
+
+    wtf::buffer::visit_contiguous_buffer<fp_types>(visitor, lhs_down.m_buffer_,
+                                                   rhs_down.m_buffer_);
+    mark_for_rehash_();
+    return *this;
+}
 
 auto MDBuffer::subtraction_assignment_(label_type this_labels,
                                        const_labeled_reference lhs,
@@ -109,15 +143,49 @@ auto MDBuffer::scalar_multiplication_(label_type this_labels, double scalar,
                                       const_labeled_reference rhs)
   -> dsl_reference {}
 
-auto MDBuffer::to_string_() const -> string_type {}
+auto MDBuffer::to_string_() const -> string_type {
+    std::stringstream ss;
+    add_to_stream_(ss);
+    return ss.str();
+}
 
-std::ostream& MDBuffer::add_to_stream_(std::ostream& os) const { return os; }
+std::ostream& MDBuffer::add_to_stream_(std::ostream& os) const {
+    /// XXX: EigenTensor should handle aliasing a const buffer correctly. That's
+    ///      a lot of work, just to get this to work though...
+
+    if(m_buffer_.size() == 0) return os;
+    auto lambda = [&](auto&& span) {
+        using clean_type = std::decay_t<decltype(span)>::value_type;
+        auto data_ptr    = const_cast<clean_type*>(span.data());
+        std::span<clean_type> data_span(data_ptr, span.size());
+        auto ptensor = backends::eigen::make_eigen_tensor(data_span, m_shape_);
+        ptensor->add_to_stream(os);
+    };
+    wtf::buffer::visit_contiguous_buffer<fp_types>(lambda, m_buffer_);
+    return os;
+}
 
 // -----------------------------------------------------------------------------
 // -- Private Methods
 // -----------------------------------------------------------------------------
 
+void MDBuffer::check_index_(const index_vector& index) const {
+    if(index.size() != m_shape_.rank()) {
+        throw std::out_of_range(
+          "The length of the provided index does not match the rank of "
+          "*this.");
+    }
+    for(rank_type i = 0; i < m_shape_.rank(); ++i) {
+        if(index[i] >= m_shape_.extent(i)) {
+            throw std::out_of_range(
+              "An index provided is out of bounds for the corresponding "
+              "dimension.");
+        }
+    }
+}
+
 auto MDBuffer::coordinate_to_ordinal_(index_vector index) const -> size_type {
+    check_index_(index);
     using size_type   = typename decltype(index)::size_type;
     size_type ordinal = 0;
     size_type stride  = 1;
diff --git a/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp b/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp
index 5b613924..6937839f 100644
--- a/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp
+++ b/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp
@@ -24,6 +24,7 @@ TEMPLATE_LIST_TEST_CASE("MDBuffer", "", types::floating_point_types) {
     using buffer::MDBuffer;
     using buffer_type = MDBuffer::buffer_type;
     using shape_type  = typename MDBuffer::shape_type;
+    using label_type  = typename MDBuffer::label_type;
 
     TestType one(1.0), two(2.0), three(3.0), four(4.0);
     std::vector<TestType> data = {one, two, three, four};
@@ -38,15 +39,127 @@ TEMPLATE_LIST_TEST_CASE("MDBuffer", "", types::floating_point_types) {
     MDBuffer matrix(data, matrix_shape);
 
     SECTION("Ctors and assignment") {
-        SECTION("Default ctor") { REQUIRE(defaulted.size() == 0); }
+        SECTION("Default ctor") {
+            REQUIRE(defaulted.size() == 0);
+            REQUIRE(defaulted.shape() == shape_type());
+        }
+
+        SECTION("vector ctor") {
+            REQUIRE(scalar.size() == 1);
+            REQUIRE(scalar.shape() == scalar_shape);
+            REQUIRE(scalar.get_elem({}) == one);
+
+            REQUIRE(vector.size() == 4);
+            REQUIRE(vector.shape() == vector_shape);
+            REQUIRE(vector.get_elem({0}) == one);
+            REQUIRE(vector.get_elem({1}) == two);
+            REQUIRE(vector.get_elem({2}) == three);
+            REQUIRE(vector.get_elem({3}) == four);
 
-        SECTION("vector ctor") {}
+            REQUIRE(matrix.size() == 4);
+            REQUIRE(matrix.shape() == matrix_shape);
+            REQUIRE(matrix.get_elem({0, 0}) == one);
+            REQUIRE(matrix.get_elem({0, 1}) == two);
+            REQUIRE(matrix.get_elem({1, 0}) == three);
+            REQUIRE(matrix.get_elem({1, 1}) == four);
+
+            REQUIRE_THROWS_AS(MDBuffer(data, scalar_shape),
+                              std::invalid_argument);
+        }
 
         SECTION("FloatBuffer ctor") {
             buffer_type buf(data);
+
+            MDBuffer vector_buf(buf, vector_shape);
+            REQUIRE(vector_buf == vector);
+
+            MDBuffer matrix_buf(buf, matrix_shape);
+            REQUIRE(matrix_buf == matrix);
+
             REQUIRE_THROWS_AS(MDBuffer(buf, scalar_shape),
                               std::invalid_argument);
         }
+
+        SECTION("Copy ctor") {
+            MDBuffer defaulted_copy(defaulted);
+            REQUIRE(defaulted_copy == defaulted);
+
+            MDBuffer scalar_copy(scalar);
+            REQUIRE(scalar_copy == scalar);
+
+            MDBuffer vector_copy(vector);
+            REQUIRE(vector_copy == vector);
+
+            MDBuffer matrix_copy(matrix);
+            REQUIRE(matrix_copy == matrix);
+        }
+
+        SECTION("Move ctor") {
+            MDBuffer defaulted_temp(defaulted);
+            MDBuffer defaulted_move(std::move(defaulted_temp));
+            REQUIRE(defaulted_move == defaulted);
+
+            MDBuffer scalar_temp(scalar);
+            MDBuffer scalar_move(std::move(scalar_temp));
+            REQUIRE(scalar_move == scalar);
+
+            MDBuffer vector_temp(vector);
+            MDBuffer vector_move(std::move(vector_temp));
+            REQUIRE(vector_move == vector);
+
+            MDBuffer matrix_temp(matrix);
+            MDBuffer matrix_move(std::move(matrix_temp));
+            REQUIRE(matrix_move == matrix);
+        }
+
+        SECTION("Copy assignment") {
+            MDBuffer defaulted_copy;
+            auto pdefaulted_copy = &(defaulted_copy = defaulted);
+            REQUIRE(defaulted_copy == defaulted);
+            REQUIRE(pdefaulted_copy == &defaulted_copy);
+
+            MDBuffer scalar_copy;
+            auto pscalar_copy = &(scalar_copy = scalar);
+            REQUIRE(scalar_copy == scalar);
+            REQUIRE(pscalar_copy == &scalar_copy);
+
+            MDBuffer vector_copy;
+            auto pvector_copy = &(vector_copy = vector);
+            REQUIRE(vector_copy == vector);
+            REQUIRE(pvector_copy == &vector_copy);
+
+            MDBuffer matrix_copy;
+            auto pmatrix_copy = &(matrix_copy = matrix);
+            REQUIRE(matrix_copy == matrix);
+            REQUIRE(pmatrix_copy == &matrix_copy);
+        }
+
+        SECTION("Move assignment") {
+            MDBuffer defaulted_temp(defaulted);
+            MDBuffer defaulted_move;
+            auto pdefaulted_move =
+              &(defaulted_move = std::move(defaulted_temp));
+            REQUIRE(defaulted_move == defaulted);
+            REQUIRE(pdefaulted_move == &defaulted_move);
+
+            MDBuffer scalar_temp(scalar);
+            MDBuffer scalar_move;
+            auto pscalar_move = &(scalar_move = std::move(scalar_temp));
+            REQUIRE(scalar_move == scalar);
+            REQUIRE(pscalar_move == &scalar_move);
+
+            MDBuffer vector_temp(vector);
+            MDBuffer vector_move;
+            auto pvector_move = &(vector_move = std::move(vector_temp));
+            REQUIRE(vector_move == vector);
+            REQUIRE(pvector_move == &vector_move);
+
+            MDBuffer matrix_temp(matrix);
+            MDBuffer matrix_move;
+            auto pmatrix_move = &(matrix_move = std::move(matrix_temp));
+            REQUIRE(matrix_move == matrix);
+            REQUIRE(pmatrix_move == &matrix_move);
+        }
     }
 
     SECTION("shape") {
@@ -67,16 +180,19 @@ TEMPLATE_LIST_TEST_CASE("MDBuffer", "", types::floating_point_types) {
         REQUIRE_THROWS_AS(defaulted.get_elem({}), std::out_of_range);
 
         REQUIRE(scalar.get_elem({}) == one);
+        REQUIRE_THROWS_AS(scalar.get_elem({0}), std::out_of_range);
 
         REQUIRE(vector.get_elem({0}) == one);
         REQUIRE(vector.get_elem({1}) == two);
         REQUIRE(vector.get_elem({2}) == three);
         REQUIRE(vector.get_elem({3}) == four);
+        REQUIRE_THROWS_AS(vector.get_elem({4}), std::out_of_range);
 
         REQUIRE(matrix.get_elem({0, 0}) == one);
         REQUIRE(matrix.get_elem({0, 1}) == two);
         REQUIRE(matrix.get_elem({1, 0}) == three);
         REQUIRE(matrix.get_elem({1, 1}) == four);
+        REQUIRE_THROWS_AS(matrix.get_elem({2, 0}), std::out_of_range);
     }
 
     SECTION("set_elem") {
@@ -124,4 +240,41 @@ TEMPLATE_LIST_TEST_CASE("MDBuffer", "", types::floating_point_types) {
         REQUIRE_FALSE(vector == MDBuffer(diff_data, vector_shape));
         REQUIRE_FALSE(matrix == MDBuffer(diff_data, matrix_shape));
     }
+
+    SECTION("addition_assignment_") {
+        SECTION("scalar") {
+            label_type labels("");
+            MDBuffer result;
+            result.addition_assignment(labels, scalar(labels), scalar(labels));
+            REQUIRE(result.shape() == scalar_shape);
+            REQUIRE(result.get_elem({}) == TestType(2.0));
+        }
+    }
+
+    SECTION("to_string") {
+        REQUIRE(defaulted.to_string().empty());
+        REQUIRE_FALSE(scalar.to_string().empty());
+        REQUIRE_FALSE(vector.to_string().empty());
+        REQUIRE_FALSE(matrix.to_string().empty());
+    }
+
+    SECTION("add_to_stream") {
+        std::stringstream ss;
+        SECTION("defaulted") {
+            defaulted.add_to_stream(ss);
+            REQUIRE(ss.str().empty());
+        }
+        SECTION("scalar") {
+            scalar.add_to_stream(ss);
+            REQUIRE_FALSE(ss.str().empty());
+        }
+        SECTION("vector") {
+            vector.add_to_stream(ss);
+            REQUIRE_FALSE(ss.str().empty());
+        }
+        SECTION("matrix") {
+            matrix.add_to_stream(ss);
+            REQUIRE_FALSE(ss.str().empty());
+        }
+    }
 }

From 600606a7f08efbff813d98777eaf7fff0312641a Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Mon, 1 Dec 2025 21:47:58 -0600
Subject: [PATCH 16/18] addition works

---
 .../backends/eigen/eigen_tensor_impl.cpp      |   1 +
 ...sitor.hpp => binary_operation_visitor.hpp} |  44 +++++---
 src/tensorwrapper/buffer/mdbuffer.cpp         |   2 +-
 .../buffer/detail_/addition_visitor.cpp       |  38 -------
 .../detail_/binary_operation_visitor.cpp      | 104 ++++++++++++++++++
 .../tensorwrapper/buffer/mdbuffer.cpp         |  31 ++++++
 6 files changed, 167 insertions(+), 53 deletions(-)
 rename src/tensorwrapper/buffer/detail_/{addition_visitor.hpp => binary_operation_visitor.hpp} (75%)
 delete mode 100644 tests/cxx/unit_tests/tensorwrapper/buffer/detail_/addition_visitor.cpp
 create mode 100644 tests/cxx/unit_tests/tensorwrapper/buffer/detail_/binary_operation_visitor.cpp

diff --git a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
index 5e853a79..28d13020 100644
--- a/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
+++ b/src/tensorwrapper/backends/eigen/eigen_tensor_impl.cpp
@@ -257,6 +257,7 @@ void EIGEN_TENSOR::contraction_assignment_(label_type this_label,
 #undef EIGEN_TENSOR
 #undef TPARAMS
 
+template<typename FloatType>
 std::unique_ptr<EigenTensor<FloatType>> make_eigen_tensor(
   std::span<FloatType> data, shape::SmoothView<const shape::Smooth> shape) {
     switch(shape.rank()) {
diff --git a/src/tensorwrapper/buffer/detail_/addition_visitor.hpp b/src/tensorwrapper/buffer/detail_/binary_operation_visitor.hpp
similarity index 75%
rename from src/tensorwrapper/buffer/detail_/addition_visitor.hpp
rename to src/tensorwrapper/buffer/detail_/binary_operation_visitor.hpp
index 0f1d99ca..36aa07ae 100644
--- a/src/tensorwrapper/buffer/detail_/addition_visitor.hpp
+++ b/src/tensorwrapper/buffer/detail_/binary_operation_visitor.hpp
@@ -15,24 +15,38 @@
  */
 
 #pragma once
+#include "../../backends/eigen/eigen_tensor_impl.hpp"
 #include <span>
 #include <tensorwrapper/dsl/dummy_indices.hpp>
+#include <tensorwrapper/shape/smooth.hpp>
 #include <tensorwrapper/shape/smooth_view.hpp>
 #include <type_traits>
+#include <wtf/wtf.hpp>
 
 namespace tensorwrapper::buffer::detail_ {
 
 /** @brief Dispatches to the appropriate backend based on the FP type.
  *
- *
+ *  This visitor is intended to be used with WTF's buffer visitation mechanism.
+ *  This base class implements the logic common to all binary operations and
+ *  lets the derived classes implement the operation-specific logic.
  *
  */
 class BinaryOperationVisitor {
 public:
-    using buffer_type      = wtf::buffer::FloatBuffer;
-    using string_type      = std::string;
-    using label_type       = dsl::DummyIndices<string_type>;
-    using shape_type       = shape::Smooth;
+    /// Type of the WTF buffer
+    using buffer_type = wtf::buffer::FloatBuffer;
+
+    /// Type that the labels use for representing indices
+    using string_type = std::string;
+
+    /// Type of a set of labels
+    using label_type = dsl::DummyIndices<string_type>;
+
+    /// Type describing the shape of the tensors
+    using shape_type = shape::Smooth;
+
+    /// Type describing a read-only view acting like shape_type
     using const_shape_view = shape::SmoothView<const shape_type>;
 
     BinaryOperationVisitor(buffer_type& this_buffer, label_type this_labels,
@@ -57,7 +71,7 @@ class BinaryOperationVisitor {
 
     template<typename LHSType, typename RHSType>
         requires(!std::is_same_v<LHSType, RHSType>)
-    void operator()(std::span<const LHSType>, std::span<const RHSType>) {
+    void operator()(std::span<const LHSType>, std::span<const RHSType>) const {
         throw std::runtime_error(
           "BinaryOperationVisitor: Mixed types not supported");
     }
@@ -80,22 +94,24 @@ class BinaryOperationVisitor {
     }
 
     template<typename FloatType>
-    auto make_lhs_eigen_tensor_(std::span<const FloatType> data) {
+    auto make_lhs_eigen_tensor_(std::span<FloatType> data) {
         /// XXX: Ideally we would not need to const_cast here, but we didn't
         ///      code EigenTensor correctly...
 
-        auto* pdata = const_cast<FloatType*>(data.data());
-        std::span<FloatType> non_const_data(pdata, data.size());
+        using clean_type = std::decay_t<FloatType>;
+        auto* pdata      = const_cast<clean_type*>(data.data());
+        std::span<clean_type> non_const_data(pdata, data.size());
         return backends::eigen::make_eigen_tensor(non_const_data, m_lhs_shape_);
     }
 
     template<typename FloatType>
-    auto make_rhs_eigen_tensor_(std::span<const FloatType> data) {
+    auto make_rhs_eigen_tensor_(std::span<FloatType> data) {
         /// XXX: Ideally we would not need to const_cast here, but we didn't
         ///      code EigenTensor correctly...
 
-        auto* pdata = const_cast<FloatType*>(data.data());
-        std::span<FloatType> non_const_data(pdata, data.size());
+        using clean_type = std::decay_t<FloatType>;
+        auto* pdata      = const_cast<clean_type*>(data.data());
+        std::span<clean_type> non_const_data(pdata, data.size());
         return backends::eigen::make_eigen_tensor(non_const_data, m_rhs_shape_);
     }
 
@@ -114,11 +130,11 @@ class BinaryOperationVisitor {
 class AdditionVisitor : public BinaryOperationVisitor {
 public:
     using BinaryOperationVisitor::BinaryOperationVisitor;
+    using BinaryOperationVisitor::operator();
 
     // AdditionVisitor(shape, permutation, shape, permutation)
     template<typename FloatType>
-    void operator()(std::span<const FloatType> lhs,
-                    std::span<const FloatType> rhs) {
+    void operator()(std::span<FloatType> lhs, std::span<FloatType> rhs) {
         using clean_t = std::decay_t<FloatType>;
         auto pthis    = this->make_this_eigen_tensor_<clean_t>();
         auto plhs     = this->make_lhs_eigen_tensor_(lhs);
diff --git a/src/tensorwrapper/buffer/mdbuffer.cpp b/src/tensorwrapper/buffer/mdbuffer.cpp
index 78b869d5..fb0185df 100644
--- a/src/tensorwrapper/buffer/mdbuffer.cpp
+++ b/src/tensorwrapper/buffer/mdbuffer.cpp
@@ -15,7 +15,7 @@
  */
 
 #include "../backends/eigen/eigen_tensor_impl.hpp"
-#include "detail_/addition_visitor.hpp"
+#include "detail_/binary_operation_visitor.hpp"
 #include "detail_/hash_utilities.hpp"
 #include <tensorwrapper/buffer/mdbuffer.hpp>
 #include <tensorwrapper/types/floating_point.hpp>
diff --git a/tests/cxx/unit_tests/tensorwrapper/buffer/detail_/addition_visitor.cpp b/tests/cxx/unit_tests/tensorwrapper/buffer/detail_/addition_visitor.cpp
deleted file mode 100644
index d7b46618..00000000
--- a/tests/cxx/unit_tests/tensorwrapper/buffer/detail_/addition_visitor.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2025 NWChemEx-Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// #include <tensorwrapper/buffer/detail_/addition_visitor.hpp>
-// #include <tensorwrapper/types/floating_point.hpp>
-
-// using namespace tensorwrapper;
-
-// TEMPLATE_LIST_TEST_CASE("AdditionVisitor", "[buffer][detail_]",
-//                         types::floating_point_types) {
-//     using VisitorType = buffer::detail_::AdditionVisitor;
-
-//     VisitorType visitor;
-
-//     SECTION("vectors") {
-//         std::vector<TestType> lhs{1.0, 2.0, 3.0};
-//         std::vector<TestType> rhs{4.0, 5.0, 6.0};
-
-//         visitor(std::span<TestType>(lhs), std::span<const TestType>(rhs));
-
-//         REQUIRE(lhs[0] == Approx(5.0).epsilon(1e-10));
-//         REQUIRE(lhs[1] == Approx(7.0).epsilon(1e-10));
-//         REQUIRE(lhs[2] == Approx(9.0).epsilon(1e-10));
-//     }
-// }
diff --git a/tests/cxx/unit_tests/tensorwrapper/buffer/detail_/binary_operation_visitor.cpp b/tests/cxx/unit_tests/tensorwrapper/buffer/detail_/binary_operation_visitor.cpp
new file mode 100644
index 00000000..fd8e3b5a
--- /dev/null
+++ b/tests/cxx/unit_tests/tensorwrapper/buffer/detail_/binary_operation_visitor.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../../testing/testing.hpp"
+#include <tensorwrapper/buffer/detail_/binary_operation_visitor.hpp>
+#include <tensorwrapper/types/floating_point.hpp>
+using namespace tensorwrapper;
+
+/* Testing notes:
+ *
+ * In testing the derived classes we assume that the backends have been
+ * exhaustively tested. Therefore, we simply ensure that each overload works
+ * correctly and that the correct backend is dispatched to.
+ */
+TEMPLATE_LIST_TEST_CASE("BinaryOperationVisitor", "[buffer][detail_]",
+                        types::floating_point_types) {
+    using VisitorType = buffer::detail_::BinaryOperationVisitor;
+    using buffer_type = typename VisitorType::buffer_type;
+    using label_type  = typename VisitorType::label_type;
+    using shape_type  = typename VisitorType::shape_type;
+
+    buffer_type this_buffer(std::vector<TestType>(6, TestType(0.0)));
+
+    label_type this_labels("i,j");
+    shape_type this_shape({2, 3});
+
+    label_type lhs_labels("i,k");
+    shape_type lhs_shape({2, 4});
+
+    label_type rhs_labels("k,j");
+    shape_type rhs_shape({4, 3});
+
+    VisitorType visitor(this_buffer, this_labels, this_shape, lhs_labels,
+                        lhs_shape, rhs_labels, rhs_shape);
+
+    REQUIRE(visitor.this_shape() == this_shape);
+    REQUIRE(visitor.lhs_shape() == lhs_shape);
+    REQUIRE(visitor.rhs_shape() == rhs_shape);
+
+    REQUIRE(visitor.this_labels() == this_labels);
+    REQUIRE(visitor.lhs_labels() == lhs_labels);
+    REQUIRE(visitor.rhs_labels() == rhs_labels);
+
+    std::span<const double> dspan;
+    std::span<const float> fspan;
+    REQUIRE_THROWS_AS(visitor(dspan, fspan), std::runtime_error);
+}
+
+TEMPLATE_LIST_TEST_CASE("AdditionVisitor", "[buffer][detail_]",
+                        types::floating_point_types) {
+    using VisitorType = buffer::detail_::AdditionVisitor;
+    using buffer_type = typename VisitorType::buffer_type;
+    using label_type  = typename VisitorType::label_type;
+    using shape_type  = typename VisitorType::shape_type;
+
+    TestType one{1.0}, two{2.0}, three{3.0}, four{4.0};
+    std::vector<TestType> this_data{one, two, three, four};
+    std::vector<TestType> lhs_data{four, three, two, one};
+    std::vector<TestType> rhs_data{one, one, one, one};
+    shape_type shape({4});
+    label_type labels("i");
+
+    std::span<TestType> lhs_span(lhs_data.data(), lhs_data.size());
+    std::span<const TestType> clhs_span(lhs_data.data(), lhs_data.size());
+    std::span<TestType> rhs_span(rhs_data.data(), rhs_data.size());
+    std::span<const TestType> crhs_span(rhs_data.data(), rhs_data.size());
+
+    SECTION("existing buffer") {
+        buffer_type this_buffer(this_data);
+        VisitorType visitor(this_buffer, labels, shape, labels, shape, labels,
+                            shape);
+
+        visitor(lhs_span, rhs_span);
+        REQUIRE(this_buffer.at(0) == TestType(5.0));
+        REQUIRE(this_buffer.at(1) == TestType(4.0));
+        REQUIRE(this_buffer.at(2) == TestType(3.0));
+        REQUIRE(this_buffer.at(3) == TestType(2.0));
+    }
+
+    SECTION("non-existing buffer") {
+        buffer_type empty_buffer;
+        VisitorType visitor(empty_buffer, labels, shape, labels, shape, labels,
+                            shape);
+
+        visitor(clhs_span, crhs_span);
+        REQUIRE(empty_buffer.at(0) == TestType(5.0));
+        REQUIRE(empty_buffer.at(1) == TestType(4.0));
+        REQUIRE(empty_buffer.at(2) == TestType(3.0));
+        REQUIRE(empty_buffer.at(3) == TestType(2.0));
+    }
+}
diff --git a/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp b/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp
index 6937839f..a01b92b6 100644
--- a/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp
+++ b/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp
@@ -20,6 +20,15 @@
 
 using namespace tensorwrapper;
 
+/* Testing notes:
+ *
+ * The various operations (addition_assignment, etc.) are not exhaustively
+ * tested here. These operations are implemented via visitors that dispatch to
+ * various backends. The visitors themselves are tested in their own unit tests.
+ * Here we assume the visitors work and spot check a couple of operations for
+ * to help catch any integration issues.
+ */
+
 TEMPLATE_LIST_TEST_CASE("MDBuffer", "", types::floating_point_types) {
     using buffer::MDBuffer;
     using buffer_type = MDBuffer::buffer_type;
@@ -249,6 +258,28 @@ TEMPLATE_LIST_TEST_CASE("MDBuffer", "", types::floating_point_types) {
             REQUIRE(result.shape() == scalar_shape);
             REQUIRE(result.get_elem({}) == TestType(2.0));
         }
+
+        SECTION("vector") {
+            label_type labels("i");
+            MDBuffer result;
+            result.addition_assignment(labels, vector(labels), vector(labels));
+            REQUIRE(result.shape() == vector_shape);
+            REQUIRE(result.get_elem({0}) == TestType(2.0));
+            REQUIRE(result.get_elem({1}) == TestType(4.0));
+            REQUIRE(result.get_elem({2}) == TestType(6.0));
+            REQUIRE(result.get_elem({3}) == TestType(8.0));
+        }
+
+        SECTION("matrix") {
+            label_type labels("i,j");
+            MDBuffer result;
+            result.addition_assignment(labels, matrix(labels), matrix(labels));
+            REQUIRE(result.shape() == matrix_shape);
+            REQUIRE(result.get_elem({0, 0}) == TestType(2.0));
+            REQUIRE(result.get_elem({0, 1}) == TestType(4.0));
+            REQUIRE(result.get_elem({1, 0}) == TestType(6.0));
+            REQUIRE(result.get_elem({1, 1}) == TestType(8.0));
+        }
     }
 
     SECTION("to_string") {

From a632830e2e6da56919a2dfbe862f1986d8b69826 Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Tue, 2 Dec 2025 10:38:27 -0600
Subject: [PATCH 17/18] everything but multiplication works

---
 .../detail_/binary_operation_visitor.hpp      |  98 +++++-------
 .../detail_/unary_operation_visitor.hpp       | 146 +++++++++++++++++
 src/tensorwrapper/buffer/mdbuffer.cpp         |  63 +++++++-
 .../detail_/binary_operation_visitor.cpp      |  44 +++++
 .../detail_/unary_operation_visitor.cpp       | 151 ++++++++++++++++++
 .../tensorwrapper/buffer/mdbuffer.cpp         | 106 ++++++++++++
 6 files changed, 549 insertions(+), 59 deletions(-)
 create mode 100644 src/tensorwrapper/buffer/detail_/unary_operation_visitor.hpp
 create mode 100644 tests/cxx/unit_tests/tensorwrapper/buffer/detail_/unary_operation_visitor.cpp

diff --git a/src/tensorwrapper/buffer/detail_/binary_operation_visitor.hpp b/src/tensorwrapper/buffer/detail_/binary_operation_visitor.hpp
index 36aa07ae..82bb3be3 100644
--- a/src/tensorwrapper/buffer/detail_/binary_operation_visitor.hpp
+++ b/src/tensorwrapper/buffer/detail_/binary_operation_visitor.hpp
@@ -16,6 +16,7 @@
 
 #pragma once
 #include "../../backends/eigen/eigen_tensor_impl.hpp"
+#include "unary_operation_visitor.hpp"
 #include <span>
 #include <tensorwrapper/dsl/dummy_indices.hpp>
 #include <tensorwrapper/shape/smooth.hpp>
@@ -31,42 +32,40 @@ namespace tensorwrapper::buffer::detail_ {
  *  This base class implements the logic common to all binary operations and
  *  lets the derived classes implement the operation-specific logic.
  *
+ *  @note This class derives from UnaryOperationVisitor to reuse some of its
+ *        functionality. This inheritance is private because it does not make
+ *        sense to use a BinaryOperationVisitor as a UnaryOperationVisitor.
  */
-class BinaryOperationVisitor {
-public:
-    /// Type of the WTF buffer
-    using buffer_type = wtf::buffer::FloatBuffer;
-
-    /// Type that the labels use for representing indices
-    using string_type = std::string;
-
-    /// Type of a set of labels
-    using label_type = dsl::DummyIndices<string_type>;
-
-    /// Type describing the shape of the tensors
-    using shape_type = shape::Smooth;
+class BinaryOperationVisitor : private UnaryOperationVisitor {
+private:
+    using base_class = UnaryOperationVisitor;
 
-    /// Type describing a read-only view acting like shape_type
-    using const_shape_view = shape::SmoothView<const shape_type>;
+public:
+    /// Pull in types from the base class
+    ///@{
+    using typename base_class::buffer_type;
+    using typename base_class::const_shape_view;
+    using typename base_class::label_type;
+    using typename base_class::shape_type;
+    using typename base_class::string_type;
+    ///@}
 
     BinaryOperationVisitor(buffer_type& this_buffer, label_type this_labels,
                            shape_type this_shape, label_type lhs_labels,
                            shape_type lhs_shape, label_type rhs_labels,
                            shape_type rhs_shape) :
-      m_pthis_buffer_(&this_buffer),
-      m_this_labels_(std::move(this_labels)),
-      m_this_shape_(std::move(this_shape)),
-      m_lhs_labels_(std::move(lhs_labels)),
-      m_lhs_shape_(std::move(lhs_shape)),
+      UnaryOperationVisitor(this_buffer, this_labels, this_shape, lhs_labels,
+                            lhs_shape),
       m_rhs_labels_(std::move(rhs_labels)),
       m_rhs_shape_(std::move(rhs_shape)) {}
 
-    const auto& this_shape() const { return m_this_shape_; }
-    const auto& lhs_shape() const { return m_lhs_shape_; }
+    using base_class::this_labels;
+    using base_class::this_shape;
+
+    const auto& lhs_shape() const { return other_shape(); }
     const auto& rhs_shape() const { return m_rhs_shape_; }
 
-    const auto& this_labels() const { return m_this_labels_; }
-    const auto& lhs_labels() const { return m_lhs_labels_; }
+    const auto& lhs_labels() const { return other_labels(); }
     const auto& rhs_labels() const { return m_rhs_labels_; }
 
     template<typename LHSType, typename RHSType>
@@ -77,31 +76,9 @@ class BinaryOperationVisitor {
     }
 
 protected:
-    template<typename FloatType>
-    auto make_eigen_tensor_(std::span<FloatType> data, const_shape_view shape) {
-        return backends::eigen::make_eigen_tensor(data, shape);
-    }
-
-    template<typename FloatType>
-    auto make_this_eigen_tensor_() {
-        if(m_pthis_buffer_->size() != m_this_shape_.size()) {
-            std::vector<FloatType> temp_buffer(m_this_shape_.size());
-            *m_pthis_buffer_ = buffer_type(std::move(temp_buffer));
-        }
-        auto this_span =
-          wtf::buffer::contiguous_buffer_cast<FloatType>(*m_pthis_buffer_);
-        return backends::eigen::make_eigen_tensor(this_span, m_this_shape_);
-    }
-
     template<typename FloatType>
     auto make_lhs_eigen_tensor_(std::span<FloatType> data) {
-        /// XXX: Ideally we would not need to const_cast here, but we didn't
-        ///      code EigenTensor correctly...
-
-        using clean_type = std::decay_t<FloatType>;
-        auto* pdata      = const_cast<clean_type*>(data.data());
-        std::span<clean_type> non_const_data(pdata, data.size());
-        return backends::eigen::make_eigen_tensor(non_const_data, m_lhs_shape_);
+        return base_class::make_other_eigen_tensor_(data);
     }
 
     template<typename FloatType>
@@ -116,23 +93,16 @@ class BinaryOperationVisitor {
     }
 
 private:
-    buffer_type* m_pthis_buffer_;
-    label_type m_this_labels_;
-    shape_type m_this_shape_;
-
-    label_type m_lhs_labels_;
-    shape_type m_lhs_shape_;
-
     label_type m_rhs_labels_;
     shape_type m_rhs_shape_;
 };
 
+/// Visitor that calls addition_assignment
 class AdditionVisitor : public BinaryOperationVisitor {
 public:
     using BinaryOperationVisitor::BinaryOperationVisitor;
     using BinaryOperationVisitor::operator();
 
-    // AdditionVisitor(shape, permutation, shape, permutation)
     template<typename FloatType>
     void operator()(std::span<FloatType> lhs, std::span<FloatType> rhs) {
         using clean_t = std::decay_t<FloatType>;
@@ -145,4 +115,22 @@ class AdditionVisitor : public BinaryOperationVisitor {
     }
 };
 
+/// Visitor that calls subtraction_assignment
+class SubtractionVisitor : public BinaryOperationVisitor {
+public:
+    using BinaryOperationVisitor::BinaryOperationVisitor;
+    using BinaryOperationVisitor::operator();
+
+    template<typename FloatType>
+    void operator()(std::span<FloatType> lhs, std::span<FloatType> rhs) {
+        using clean_t = std::decay_t<FloatType>;
+        auto pthis    = this->make_this_eigen_tensor_<clean_t>();
+        auto plhs     = this->make_lhs_eigen_tensor_(lhs);
+        auto prhs     = this->make_rhs_eigen_tensor_(rhs);
+
+        pthis->subtraction_assignment(this_labels(), lhs_labels(), rhs_labels(),
+                                      *plhs, *prhs);
+    }
+};
+
 } // namespace tensorwrapper::buffer::detail_
diff --git a/src/tensorwrapper/buffer/detail_/unary_operation_visitor.hpp b/src/tensorwrapper/buffer/detail_/unary_operation_visitor.hpp
new file mode 100644
index 00000000..4a99c003
--- /dev/null
+++ b/src/tensorwrapper/buffer/detail_/unary_operation_visitor.hpp
@@ -0,0 +1,146 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include "../../backends/eigen/eigen_tensor_impl.hpp"
+#include <span>
+#include <tensorwrapper/dsl/dummy_indices.hpp>
+#include <tensorwrapper/shape/smooth.hpp>
+#include <tensorwrapper/shape/smooth_view.hpp>
+#include <type_traits>
+#include <wtf/wtf.hpp>
+
+namespace tensorwrapper::buffer::detail_ {
+
+/** @brief Dispatches to the appropriate backend based on the FP type.
+ *
+ *  This visitor is intended to be used with WTF's buffer visitation mechanism.
+ *  This base class implements the logic common to all unary operations and
+ *  lets the derived classes implement the operation-specific logic.
+ *
+ */
+class UnaryOperationVisitor {
+public:
+    /// Type of the WTF buffer
+    using buffer_type = wtf::buffer::FloatBuffer;
+
+    /// Type that the labels use for representing indices
+    using string_type = std::string;
+
+    /// Type of a set of labels
+    using label_type = dsl::DummyIndices<string_type>;
+
+    /// Type describing the shape of the tensors
+    using shape_type = shape::Smooth;
+
+    /// Type describing a read-only view acting like shape_type
+    using const_shape_view = shape::SmoothView<const shape_type>;
+
+    UnaryOperationVisitor(buffer_type& this_buffer, label_type this_labels,
+                          shape_type this_shape, label_type other_labels,
+                          shape_type other_shape) :
+      m_pthis_buffer_(&this_buffer),
+      m_this_labels_(std::move(this_labels)),
+      m_this_shape_(std::move(this_shape)),
+      m_other_labels_(std::move(other_labels)),
+      m_other_shape_(std::move(other_shape)) {}
+
+    const auto& this_shape() const { return m_this_shape_; }
+    const auto& other_shape() const { return m_other_shape_; }
+
+    const auto& this_labels() const { return m_this_labels_; }
+    const auto& other_labels() const { return m_other_labels_; }
+
+protected:
+    template<typename FloatType>
+    auto make_eigen_tensor_(std::span<FloatType> data, const_shape_view shape) {
+        return backends::eigen::make_eigen_tensor(data, shape);
+    }
+
+    template<typename FloatType>
+    auto make_this_eigen_tensor_() {
+        if(m_pthis_buffer_->size() != m_this_shape_.size()) {
+            std::vector<FloatType> temp_buffer(m_this_shape_.size());
+            *m_pthis_buffer_ = buffer_type(std::move(temp_buffer));
+        }
+        auto this_span =
+          wtf::buffer::contiguous_buffer_cast<FloatType>(*m_pthis_buffer_);
+        return backends::eigen::make_eigen_tensor(this_span, m_this_shape_);
+    }
+
+    template<typename FloatType>
+    auto make_other_eigen_tensor_(std::span<FloatType> data) {
+        /// XXX: Ideally we would not need to const_cast here, but we didn't
+        ///      code EigenTensor correctly...
+
+        using clean_type = std::decay_t<FloatType>;
+        auto* pdata      = const_cast<clean_type*>(data.data());
+        std::span<clean_type> non_const_data(pdata, data.size());
+        return backends::eigen::make_eigen_tensor(non_const_data,
+                                                  m_other_shape_);
+    }
+
+private:
+    buffer_type* m_pthis_buffer_;
+    label_type m_this_labels_;
+    shape_type m_this_shape_;
+
+    label_type m_other_labels_;
+    shape_type m_other_shape_;
+};
+
+class PermuteVisitor : public UnaryOperationVisitor {
+public:
+    using UnaryOperationVisitor::UnaryOperationVisitor;
+
+    template<typename FloatType>
+    void operator()(std::span<FloatType> other) {
+        using clean_t = std::decay_t<FloatType>;
+        auto pthis    = this->make_this_eigen_tensor_<clean_t>();
+        auto pother   = this->make_other_eigen_tensor_(other);
+
+        pthis->permute_assignment(this->this_labels(), other_labels(), *pother);
+    }
+};
+
+class ScalarMultiplicationVisitor : public UnaryOperationVisitor {
+public:
+    using scalar_type = wtf::fp::Float;
+    ScalarMultiplicationVisitor(buffer_type& this_buffer,
+                                label_type this_labels, shape_type this_shape,
+                                label_type other_labels, shape_type other_shape,
+                                scalar_type scalar) :
+      UnaryOperationVisitor(this_buffer, this_labels, this_shape, other_labels,
+                            other_shape),
+      m_scalar_(scalar) {}
+
+    template<typename FloatType>
+    void operator()(std::span<FloatType> other) {
+        using clean_t = std::decay_t<FloatType>;
+        auto pthis    = this->make_this_eigen_tensor_<clean_t>();
+        auto pother   = this->make_other_eigen_tensor_(other);
+
+        // TODO: Change when public API changes to support other FP types
+        auto scalar = wtf::fp::float_cast<double>(m_scalar_);
+        pthis->scalar_multiplication(this->this_labels(), other_labels(),
+                                     scalar, *pother);
+    }
+
+private:
+    scalar_type m_scalar_;
+};
+
+} // namespace tensorwrapper::buffer::detail_
diff --git a/src/tensorwrapper/buffer/mdbuffer.cpp b/src/tensorwrapper/buffer/mdbuffer.cpp
index fb0185df..dc016b47 100644
--- a/src/tensorwrapper/buffer/mdbuffer.cpp
+++ b/src/tensorwrapper/buffer/mdbuffer.cpp
@@ -129,19 +129,74 @@ auto MDBuffer::addition_assignment_(label_type this_labels,
 auto MDBuffer::subtraction_assignment_(label_type this_labels,
                                        const_labeled_reference lhs,
                                        const_labeled_reference rhs)
-  -> dsl_reference {}
+  -> dsl_reference {
+    const auto& lhs_down   = downcast(lhs.object());
+    const auto& rhs_down   = downcast(rhs.object());
+    const auto& lhs_labels = lhs.labels();
+    const auto& rhs_labels = rhs.labels();
+    const auto& lhs_shape  = lhs_down.m_shape_;
+    const auto& rhs_shape  = rhs_down.m_shape_;
+
+    auto labeled_lhs_shape = lhs_shape(lhs_labels);
+    auto labeled_rhs_shape = rhs_shape(rhs_labels);
+
+    m_shape_.subtraction_assignment(this_labels, labeled_lhs_shape,
+                                    labeled_rhs_shape);
+
+    detail_::SubtractionVisitor visitor(m_buffer_, this_labels, m_shape_,
+                                        lhs.labels(), lhs_shape, rhs.labels(),
+                                        rhs_shape);
+
+    wtf::buffer::visit_contiguous_buffer<fp_types>(visitor, lhs_down.m_buffer_,
+                                                   rhs_down.m_buffer_);
+    mark_for_rehash_();
+    return *this;
+}
+
 auto MDBuffer::multiplication_assignment_(label_type this_labels,
                                           const_labeled_reference lhs,
                                           const_labeled_reference rhs)
-  -> dsl_reference {}
+  -> dsl_reference {
+    throw std::runtime_error("multiplication NYI");
+}
 
 auto MDBuffer::permute_assignment_(label_type this_labels,
                                    const_labeled_reference rhs)
-  -> dsl_reference {}
+  -> dsl_reference {
+    const auto& rhs_down   = downcast(rhs.object());
+    const auto& rhs_labels = rhs.labels();
+    const auto& rhs_shape  = rhs_down.m_shape_;
+
+    auto labeled_rhs_shape = rhs_shape(rhs_labels);
+
+    m_shape_.permute_assignment(this_labels, labeled_rhs_shape);
+
+    detail_::PermuteVisitor visitor(m_buffer_, this_labels, m_shape_,
+                                    rhs.labels(), rhs_shape);
+
+    wtf::buffer::visit_contiguous_buffer<fp_types>(visitor, rhs_down.m_buffer_);
+    mark_for_rehash_();
+    return *this;
+}
 
 auto MDBuffer::scalar_multiplication_(label_type this_labels, double scalar,
                                       const_labeled_reference rhs)
-  -> dsl_reference {}
+  -> dsl_reference {
+    const auto& rhs_down   = downcast(rhs.object());
+    const auto& rhs_labels = rhs.labels();
+    const auto& rhs_shape  = rhs_down.m_shape_;
+
+    auto labeled_rhs_shape = rhs_shape(rhs_labels);
+
+    m_shape_.permute_assignment(this_labels, labeled_rhs_shape);
+
+    detail_::ScalarMultiplicationVisitor visitor(
+      m_buffer_, this_labels, m_shape_, rhs.labels(), rhs_shape, scalar);
+
+    wtf::buffer::visit_contiguous_buffer<fp_types>(visitor, rhs_down.m_buffer_);
+    mark_for_rehash_();
+    return *this;
+}
 
 auto MDBuffer::to_string_() const -> string_type {
     std::stringstream ss;
diff --git a/tests/cxx/unit_tests/tensorwrapper/buffer/detail_/binary_operation_visitor.cpp b/tests/cxx/unit_tests/tensorwrapper/buffer/detail_/binary_operation_visitor.cpp
index fd8e3b5a..bbcc0599 100644
--- a/tests/cxx/unit_tests/tensorwrapper/buffer/detail_/binary_operation_visitor.cpp
+++ b/tests/cxx/unit_tests/tensorwrapper/buffer/detail_/binary_operation_visitor.cpp
@@ -102,3 +102,47 @@ TEMPLATE_LIST_TEST_CASE("AdditionVisitor", "[buffer][detail_]",
         REQUIRE(empty_buffer.at(3) == TestType(2.0));
     }
 }
+
+TEMPLATE_LIST_TEST_CASE("SubtractionVisitor", "[buffer][detail_]",
+                        types::floating_point_types) {
+    using VisitorType = buffer::detail_::SubtractionVisitor;
+    using buffer_type = typename VisitorType::buffer_type;
+    using label_type  = typename VisitorType::label_type;
+    using shape_type  = typename VisitorType::shape_type;
+
+    TestType one{1.0}, two{2.0}, three{3.0}, four{4.0};
+    std::vector<TestType> this_data{one, two, three, four};
+    std::vector<TestType> lhs_data{four, three, two, one};
+    std::vector<TestType> rhs_data{one, one, one, one};
+    shape_type shape({4});
+    label_type labels("i");
+
+    std::span<TestType> lhs_span(lhs_data.data(), lhs_data.size());
+    std::span<const TestType> clhs_span(lhs_data.data(), lhs_data.size());
+    std::span<TestType> rhs_span(rhs_data.data(), rhs_data.size());
+    std::span<const TestType> crhs_span(rhs_data.data(), rhs_data.size());
+
+    SECTION("existing buffer") {
+        buffer_type this_buffer(this_data);
+        VisitorType visitor(this_buffer, labels, shape, labels, shape, labels,
+                            shape);
+
+        visitor(lhs_span, rhs_span);
+        REQUIRE(this_buffer.at(0) == TestType(3.0));
+        REQUIRE(this_buffer.at(1) == TestType(2.0));
+        REQUIRE(this_buffer.at(2) == TestType(1.0));
+        REQUIRE(this_buffer.at(3) == TestType(0.0));
+    }
+
+    SECTION("non-existing buffer") {
+        buffer_type empty_buffer;
+        VisitorType visitor(empty_buffer, labels, shape, labels, shape, labels,
+                            shape);
+
+        visitor(clhs_span, crhs_span);
+        REQUIRE(empty_buffer.at(0) == TestType(3.0));
+        REQUIRE(empty_buffer.at(1) == TestType(2.0));
+        REQUIRE(empty_buffer.at(2) == TestType(1.0));
+        REQUIRE(empty_buffer.at(3) == TestType(0.0));
+    }
+}
diff --git a/tests/cxx/unit_tests/tensorwrapper/buffer/detail_/unary_operation_visitor.cpp b/tests/cxx/unit_tests/tensorwrapper/buffer/detail_/unary_operation_visitor.cpp
new file mode 100644
index 00000000..83d9b675
--- /dev/null
+++ b/tests/cxx/unit_tests/tensorwrapper/buffer/detail_/unary_operation_visitor.cpp
@@ -0,0 +1,151 @@
+/*
+ * Copyright 2025 NWChemEx-Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../../testing/testing.hpp"
+#include <tensorwrapper/buffer/detail_/unary_operation_visitor.hpp>
+#include <tensorwrapper/types/floating_point.hpp>
+using namespace tensorwrapper;
+
+/* Testing notes:
+ *
+ * In testing the derived classes we assume that the backends have been
+ * exhaustively tested. Therefore, we simply ensure that each overload works
+ * correctly and that the correct backend is dispatched to.
+ */
+TEMPLATE_LIST_TEST_CASE("UnaryOperationVisitor", "[buffer][detail_]",
+                        types::floating_point_types) {
+    using VisitorType = buffer::detail_::UnaryOperationVisitor;
+    using buffer_type = typename VisitorType::buffer_type;
+    using label_type  = typename VisitorType::label_type;
+    using shape_type  = typename VisitorType::shape_type;
+
+    buffer_type this_buffer(std::vector<TestType>(6, TestType(0.0)));
+
+    label_type this_labels("i,j");
+    shape_type this_shape({2, 3});
+
+    label_type other_labels("i,k");
+    shape_type other_shape({2, 4});
+
+    VisitorType visitor(this_buffer, this_labels, this_shape, other_labels,
+                        other_shape);
+
+    REQUIRE(visitor.this_shape() == this_shape);
+    REQUIRE(visitor.other_shape() == other_shape);
+
+    REQUIRE(visitor.this_labels() == this_labels);
+    REQUIRE(visitor.other_labels() == other_labels);
+}
+
+TEMPLATE_LIST_TEST_CASE("PermuteVisitor", "[buffer][detail_]",
+                        types::floating_point_types) {
+    using VisitorType = buffer::detail_::PermuteVisitor;
+    using buffer_type = typename VisitorType::buffer_type;
+    using label_type  = typename VisitorType::label_type;
+    using shape_type  = typename VisitorType::shape_type;
+
+    label_type this_labels("i,j");
+    shape_type this_shape({2, 3});
+
+    label_type other_labels("j,i");
+    shape_type other_shape({3, 2});
+
+    std::vector<TestType> other_data = {TestType(1.0), TestType(2.0),
+                                        TestType(3.0), TestType(4.0),
+                                        TestType(5.0), TestType(6.0)};
+    std::span<TestType> other_span(other_data.data(), other_data.size());
+    std::span<const TestType> cother_span(other_data.data(), other_data.size());
+
+    SECTION("Buffer is allocated") {
+        buffer_type this_buffer(std::vector<TestType>(6, TestType(0.0)));
+        VisitorType visitor(this_buffer, this_labels, this_shape, other_labels,
+                            other_shape);
+        visitor(other_span);
+
+        REQUIRE(this_buffer.at(0) == TestType(1.0));
+        REQUIRE(this_buffer.at(1) == TestType(3.0));
+        REQUIRE(this_buffer.at(2) == TestType(5.0));
+        REQUIRE(this_buffer.at(3) == TestType(2.0));
+        REQUIRE(this_buffer.at(4) == TestType(4.0));
+        REQUIRE(this_buffer.at(5) == TestType(6.0));
+    }
+
+    SECTION("Buffer is not allocated") {
+        buffer_type this_buffer;
+        VisitorType visitor(this_buffer, this_labels, this_shape, other_labels,
+                            other_shape);
+        visitor(cother_span);
+
+        REQUIRE(this_buffer.at(0) == TestType(1.0));
+        REQUIRE(this_buffer.at(1) == TestType(3.0));
+        REQUIRE(this_buffer.at(2) == TestType(5.0));
+        REQUIRE(this_buffer.at(3) == TestType(2.0));
+        REQUIRE(this_buffer.at(4) == TestType(4.0));
+        REQUIRE(this_buffer.at(5) == TestType(6.0));
+    }
+}
+
+TEMPLATE_LIST_TEST_CASE("ScalarMultiplicationVisitor", "[buffer][detail_]",
+                        types::floating_point_types) {
+    using VisitorType = buffer::detail_::ScalarMultiplicationVisitor;
+    using buffer_type = typename VisitorType::buffer_type;
+    using label_type  = typename VisitorType::label_type;
+    using shape_type  = typename VisitorType::shape_type;
+
+    label_type this_labels("i,j");
+    shape_type this_shape({2, 3});
+
+    label_type other_labels("j,i");
+    shape_type other_shape({3, 2});
+
+    std::vector<TestType> other_data = {TestType(1.0), TestType(2.0),
+                                        TestType(3.0), TestType(4.0),
+                                        TestType(5.0), TestType(6.0)};
+    std::span<TestType> other_span(other_data.data(), other_data.size());
+    std::span<const TestType> cother_span(other_data.data(), other_data.size());
+
+    // TODO: when public API of MDBuffer supports other FP types, test them here
+    double scalar_{2.0};
+    TestType scalar(scalar_);
+
+    SECTION("Buffer is allocated") {
+        buffer_type this_buffer(std::vector<TestType>(6, TestType(0.0)));
+        VisitorType visitor(this_buffer, this_labels, this_shape, other_labels,
+                            other_shape, scalar_);
+        visitor(other_span);
+
+        REQUIRE(this_buffer.at(0) == TestType(1.0) * scalar);
+        REQUIRE(this_buffer.at(1) == TestType(3.0) * scalar);
+        REQUIRE(this_buffer.at(2) == TestType(5.0) * scalar);
+        REQUIRE(this_buffer.at(3) == TestType(2.0) * scalar);
+        REQUIRE(this_buffer.at(4) == TestType(4.0) * scalar);
+        REQUIRE(this_buffer.at(5) == TestType(6.0) * scalar);
+    }
+
+    SECTION("Buffer is not allocated") {
+        buffer_type this_buffer;
+        VisitorType visitor(this_buffer, this_labels, this_shape, other_labels,
+                            other_shape, scalar_);
+        visitor(cother_span);
+
+        REQUIRE(this_buffer.at(0) == TestType(1.0) * scalar);
+        REQUIRE(this_buffer.at(1) == TestType(3.0) * scalar);
+        REQUIRE(this_buffer.at(2) == TestType(5.0) * scalar);
+        REQUIRE(this_buffer.at(3) == TestType(2.0) * scalar);
+        REQUIRE(this_buffer.at(4) == TestType(4.0) * scalar);
+        REQUIRE(this_buffer.at(5) == TestType(6.0) * scalar);
+    }
+}
diff --git a/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp b/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp
index a01b92b6..11dd8081 100644
--- a/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp
+++ b/tests/cxx/unit_tests/tensorwrapper/buffer/mdbuffer.cpp
@@ -282,6 +282,112 @@ TEMPLATE_LIST_TEST_CASE("MDBuffer", "", types::floating_point_types) {
         }
     }
 
+    SECTION("subtraction_assignment_") {
+        SECTION("scalar") {
+            label_type labels("");
+            MDBuffer result;
+            result.subtraction_assignment(labels, scalar(labels),
+                                          scalar(labels));
+            REQUIRE(result.shape() == scalar_shape);
+            REQUIRE(result.get_elem({}) == TestType(0.0));
+        }
+
+        SECTION("vector") {
+            label_type labels("i");
+            MDBuffer result;
+            result.subtraction_assignment(labels, vector(labels),
+                                          vector(labels));
+            REQUIRE(result.shape() == vector_shape);
+            REQUIRE(result.get_elem({0}) == TestType(0.0));
+            REQUIRE(result.get_elem({1}) == TestType(0.0));
+            REQUIRE(result.get_elem({2}) == TestType(0.0));
+            REQUIRE(result.get_elem({3}) == TestType(0.0));
+        }
+
+        SECTION("matrix") {
+            label_type labels("i,j");
+            MDBuffer result;
+            result.subtraction_assignment(labels, matrix(labels),
+                                          matrix(labels));
+            REQUIRE(result.shape() == matrix_shape);
+            REQUIRE(result.get_elem({0, 0}) == TestType(0.0));
+            REQUIRE(result.get_elem({0, 1}) == TestType(0.0));
+            REQUIRE(result.get_elem({1, 0}) == TestType(0.0));
+            REQUIRE(result.get_elem({1, 1}) == TestType(0.0));
+        }
+    }
+
+    SECTION("scalar_multiplication_") {
+        // TODO: Test with other scalar types when public API supports it
+        using scalar_type = double;
+        scalar_type scalar_value_{2.0};
+        TestType scalar_value(scalar_value_);
+        SECTION("scalar") {
+            label_type labels("");
+            MDBuffer result;
+            result.scalar_multiplication(labels, scalar_value_, scalar(labels));
+            REQUIRE(result.shape() == scalar_shape);
+            REQUIRE(result.get_elem({}) == TestType(1.0) * scalar_value);
+        }
+
+        SECTION("vector") {
+            label_type labels("i");
+            MDBuffer result;
+            result.scalar_multiplication(labels, scalar_value_, vector(labels));
+            REQUIRE(result.shape() == vector_shape);
+            REQUIRE(result.get_elem({0}) == TestType(1.0) * scalar_value);
+            REQUIRE(result.get_elem({1}) == TestType(2.0) * scalar_value);
+            REQUIRE(result.get_elem({2}) == TestType(3.0) * scalar_value);
+            REQUIRE(result.get_elem({3}) == TestType(4.0) * scalar_value);
+        }
+
+        SECTION("matrix") {
+            label_type rhs_labels("i,j");
+            label_type lhs_labels("j,i");
+            MDBuffer result;
+            result.scalar_multiplication(lhs_labels, scalar_value_,
+                                         matrix(rhs_labels));
+            REQUIRE(result.shape() == matrix_shape);
+            REQUIRE(result.get_elem({0, 0}) == TestType(1.0) * scalar_value);
+            REQUIRE(result.get_elem({0, 1}) == TestType(3.0) * scalar_value);
+            REQUIRE(result.get_elem({1, 0}) == TestType(2.0) * scalar_value);
+            REQUIRE(result.get_elem({1, 1}) == TestType(4.0) * scalar_value);
+        }
+    }
+
+    SECTION("permute_assignment_") {
+        SECTION("scalar") {
+            label_type labels("");
+            MDBuffer result;
+            result.permute_assignment(labels, scalar(labels));
+            REQUIRE(result.shape() == scalar_shape);
+            REQUIRE(result.get_elem({}) == TestType(1.0));
+        }
+
+        SECTION("vector") {
+            label_type labels("i");
+            MDBuffer result;
+            result.permute_assignment(labels, vector(labels));
+            REQUIRE(result.shape() == vector_shape);
+            REQUIRE(result.get_elem({0}) == TestType(1.0));
+            REQUIRE(result.get_elem({1}) == TestType(2.0));
+            REQUIRE(result.get_elem({2}) == TestType(3.0));
+            REQUIRE(result.get_elem({3}) == TestType(4.0));
+        }
+
+        SECTION("matrix") {
+            label_type rhs_labels("i,j");
+            label_type lhs_labels("j,i");
+            MDBuffer result;
+            result.permute_assignment(lhs_labels, matrix(rhs_labels));
+            REQUIRE(result.shape() == matrix_shape);
+            REQUIRE(result.get_elem({0, 0}) == TestType(1.0));
+            REQUIRE(result.get_elem({0, 1}) == TestType(3.0));
+            REQUIRE(result.get_elem({1, 0}) == TestType(2.0));
+            REQUIRE(result.get_elem({1, 1}) == TestType(4.0));
+        }
+    }
+
     SECTION("to_string") {
         REQUIRE(defaulted.to_string().empty());
         REQUIRE_FALSE(scalar.to_string().empty());

From cbf51fb600e5eca7c6a95f292237e454658fbd21 Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Thu, 4 Dec 2025 09:11:00 -0600
Subject: [PATCH 18/18] fix gcc error

---
 src/tensorwrapper/buffer/detail_/binary_operation_visitor.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/tensorwrapper/buffer/detail_/binary_operation_visitor.hpp b/src/tensorwrapper/buffer/detail_/binary_operation_visitor.hpp
index 82bb3be3..25363fd1 100644
--- a/src/tensorwrapper/buffer/detail_/binary_operation_visitor.hpp
+++ b/src/tensorwrapper/buffer/detail_/binary_operation_visitor.hpp
@@ -76,6 +76,8 @@ class BinaryOperationVisitor : private UnaryOperationVisitor {
     }
 
 protected:
+    using base_class::make_this_eigen_tensor_;
+
     template<typename FloatType>
     auto make_lhs_eigen_tensor_(std::span<FloatType> data) {
         return base_class::make_other_eigen_tensor_(data);