fixup

PierreMarchand20 · PierreMarchand20 · commit 5ca493b89c41 · 2025-07-30T17:30:12.000+02:00
diff --git a/example/define_custom_local_operator.py b/example/define_custom_local_operator.py
@@ -6,28 +6,28 @@ class CustomLocalOperator(Htool.VirtualLocalOperator):
     def __init__(
         self,
         generator: Htool.VirtualGenerator,
-        target_offset: int,
-        target_size: int,
-        target_permutation,
-        source_offset: int,
-        source_size: int,
-        source_permutation,
+        target_cluster: Htool.Cluster,
+        global_source_cluster: Htool.Cluster,
+        local_source_offset: int,
+        local_source_size:int,
     ) -> None:
         super().__init__(
-            target_offset,
-            target_size,
-            source_offset,
-            source_size,
+            target_cluster,
+            global_source_cluster,
+            local_source_offset,
+            local_source_size,
         )
-        self.data = np.zeros((target_size, source_size))
+        target_offset = target_cluster.get_offset()
+        target_size = target_cluster.get_size()
+        self.data = np.zeros((target_size, local_source_size))
         generator.build_submatrix(
-            target_permutation[
+            target_cluster.get_permutation()[
                 target_offset : target_offset
                 + target_size
             ],
-            source_permutation[
-                source_offset : source_offset
-                + source_size
+            global_source_cluster.get_permutation()[
+                local_source_offset : local_source_offset
+                + local_source_size
             ],
             self.data,
         )
@@ -38,20 +38,20 @@ def add_vector_product(
         # Beware, inplace operation needed for output to keep the underlying data
         output *= beta
         if trans == "N":
-            output += alpha * self.data.dot(input)
+            output += alpha * self.data.dot(input[self.local_source_offset : self.local_source_offset+ self.local_source_size])
         elif trans == "T":
-            output += alpha * np.transpose(self.data).dot(input)
+            output += alpha * np.transpose(self.data).dot(input[self.local_source_offset : self.local_source_offset+ self.local_source_size])
         elif trans == "C":
-            output += alpha * np.vdot(np.transpose(self.data), input)
+            output += alpha * np.vdot(np.transpose(self.data), input[self.local_source_offset : self.local_source_offset+ self.local_source_size])
 
     def add_matrix_product_row_major(
         self, trans, alpha, input: np.array, beta, output: np.array
     ) -> None:
         output *= beta
         if trans == "N":
-            output += alpha * self.data @ input
+            output += alpha * self.data @ input[self.local_source_offset : self.local_source_offset+ self.local_source_size,:]
         elif trans == "T":
-            output += alpha * np.transpose(self.data) @ input
+            output += alpha * np.transpose(self.data) @ input[self.local_source_offset : self.local_source_offset+ self.local_source_size,:]
         elif trans == "C":
-            output += alpha * np.matrix.H(self.data) @ input
+            output += alpha * np.matrix.H(self.data) @ input[self.local_source_offset : self.local_source_offset+ self.local_source_size,:]
         output = np.asfortranarray(output)
diff --git a/example/use_local_hmatrix_compression.py b/example/use_local_hmatrix_compression.py
@@ -76,24 +76,20 @@
 if local_source_cluster.get_offset() > 0:
     local_operator_1 = CustomLocalOperator(
         generator,
-        local_target_cluster.get_offset(),
-        local_target_cluster.get_size(),
-        local_target_cluster.get_permutation(),
+        local_target_cluster,
+        source_cluster,
         0,
         local_source_cluster.get_offset(),
-        source_cluster.get_permutation(),
     )
 
 local_operator_2 = None
 if source_cluster.get_size()-local_source_cluster.get_size()-local_source_cluster.get_offset() > 0:
     local_operator_2 = CustomLocalOperator(
         generator,
-        local_target_cluster.get_offset(),
-        local_target_cluster.get_size(),
-        local_target_cluster.get_permutation(),
+        local_target_cluster,
+        source_cluster,
         local_source_cluster.get_size()+local_source_cluster.get_offset(),
         source_cluster.get_size()-local_source_cluster.get_size()-local_source_cluster.get_offset(),
-        source_cluster.get_permutation(),
     )
 
 if local_operator_1:
@@ -104,14 +100,13 @@
 # Test matrix vector product
 np.random.seed(0)
 x = np.random.rand(source_size)
-x = np.ones(source_size)
 y_1 = distributed_operator * x
 y_2 = generator.mat_vec(x)
 print(mpi4py.MPI.COMM_WORLD.rank, np.linalg.norm(y_1 - y_2) / np.linalg.norm(y_2))
 
 
 # Test matrix matrix product
-X = np.asfortranarray(np.random.rand(source_size, 2))
+X = np.asfortranarray(np.random.rand(source_size, 5))
 Y_1 = distributed_operator @ X
 Y_2 = generator.mat_mat(X)
 print(mpi4py.MPI.COMM_WORLD.rank, np.linalg.norm(Y_1 - Y_2) / np.linalg.norm(Y_2))
diff --git a/lib/htool b/lib/htool
@@ -1 +1 @@
-Subproject commit 7cba5bcadd8d64a525516afc53cdda022cdcab86
+Subproject commit cc29a071c02927462adc24f5aa795cdb640a81d4
diff --git a/src/htool/distributed_operator/distributed_operator.hpp b/src/htool/distributed_operator/distributed_operator.hpp
@@ -28,7 +28,7 @@ void declare_distributed_operator(py::module &m, const std::string &class_name)
             }
             py::array_t<CoefficientPrecision, py::array::f_style> result(std::array<long int, 1>{self.get_target_partition().get_global_size()});
             std::fill_n(result.mutable_data(), self.get_target_partition().get_global_size(), CoefficientPrecision(0));
-            htool::add_distributed_operator_vector_product_global_to_global<CoefficientPrecision>('N', 1, self, input.data(), 0, result.mutable_data(), nullptr);
+            htool::add_distributed_operator_vector_product_global_to_global<CoefficientPrecision>('N', CoefficientPrecision(1), self, input.data(), CoefficientPrecision(0), result.mutable_data(), nullptr);
 
             return result;
         },
@@ -49,6 +49,11 @@ void declare_distributed_operator(py::module &m, const std::string &class_name)
             std::array<long int, 2> shape{self.get_target_partition().get_global_size(), mu};
             py::array_t<CoefficientPrecision, py::array::f_style> result(shape);
             std::fill_n(result.mutable_data(), self.get_target_partition().get_global_size() * mu, CoefficientPrecision(0));
+            if (mu == 1) {
+                std::cout << "ICI?\n";
+                htool::add_distributed_operator_vector_product_global_to_global<CoefficientPrecision>('N', CoefficientPrecision(1), self, input.data(), CoefficientPrecision(0), result.mutable_data(), nullptr);
+                return result;
+            }
             MatrixView<const CoefficientPrecision> input_view(self.get_source_partition().get_global_size(), mu, input.data());
             MatrixView<CoefficientPrecision> output_view(self.get_target_partition().get_global_size(), mu, result.mutable_data());
             CoefficientPrecision *work = nullptr;
diff --git a/src/htool/local_operator/virtual_local_operator.hpp b/src/htool/local_operator/virtual_local_operator.hpp
@@ -4,36 +4,38 @@
 #include <htool/distributed_operator/interfaces/virtual_local_operator.hpp>
 #include <pybind11/pybind11.h>
 
-template <typename CoefficientPrecision, typename CoordinatePrecision = CoefficientPrecision>
+template <typename CoefficientPrecision, typename CoordinatePrecision = htool::underlying_type<CoefficientPrecision>>
 class VirtualLocalOperatorPython : public htool::VirtualLocalOperator<CoefficientPrecision> {
-    int m_target_offset;
-    int m_target_size;
-    int m_source_offset;
-    int m_source_size;
 
   public:
-    VirtualLocalOperatorPython(int target_offset, int target_size, int source_offset, int source_size) : m_target_offset(target_offset), m_target_size(target_size), m_source_offset(source_offset), m_source_size(source_size) {}
+    int m_target_offset;
+    int m_target_size;
+    int m_global_source_offset;
+    int m_global_source_size;
+    int m_local_source_offset;
+    int m_local_source_size;
+    VirtualLocalOperatorPython(const Cluster<CoordinatePrecision> &target_cluster, const Cluster<CoordinatePrecision> &global_source_cluster, int local_source_offset, int local_source_size) : m_target_offset(target_cluster.get_offset()), m_target_size(target_cluster.get_size()), m_global_source_offset(global_source_cluster.get_offset()), m_global_source_size(global_source_cluster.get_size()), m_local_source_offset(local_source_offset), m_local_source_size(local_source_size) {}
 
     int get_target_offset() const override { return m_target_offset; }
-    int get_source_offset() const override { return m_source_offset; }
+    int get_source_offset() const override { return m_local_source_offset; }
     int get_target_size() const override { return m_target_size; }
-    int get_source_size() const override { return m_source_size; }
+    int get_source_size() const override { return m_local_source_size; }
 
     void add_vector_product(char trans, CoefficientPrecision alpha, const CoefficientPrecision *const in, CoefficientPrecision beta, CoefficientPrecision *const out) const override {
-        py::array_t<CoefficientPrecision> input(std::array<long int, 1>{m_source_size}, in, py::capsule(in));
+        py::array_t<CoefficientPrecision> input(std::array<long int, 1>{m_global_source_size}, in, py::capsule(in));
         py::array_t<CoefficientPrecision> output(std::array<long int, 1>{m_target_size}, out, py::capsule(out));
 
         local_add_vector_product(trans, alpha, input, beta, output);
     }
     void add_matrix_product_row_major(char trans, CoefficientPrecision alpha, const CoefficientPrecision *const in, CoefficientPrecision beta, CoefficientPrecision *const out, int mu) const override {
-        py::array_t<CoefficientPrecision, py::array::c_style> input(std::array<long int, 2>{m_source_size, mu}, in, py::capsule(in));
+        py::array_t<CoefficientPrecision, py::array::c_style> input(std::array<long int, 2>{m_global_source_size, mu}, in, py::capsule(in));
         py::array_t<CoefficientPrecision, py::array::c_style> output(std::array<long int, 2>{m_target_size, mu}, out, py::capsule(out));
 
         local_add_matrix_product_row_major(trans, alpha, input, beta, output);
     }
 
     virtual void sub_matrix_product_to_local(const CoefficientPrecision *const in, CoefficientPrecision *const out, int mu, int offset, int size) const override {
-        std::vector<CoefficientPrecision> temp(m_source_size * mu, 0);
+        std::vector<CoefficientPrecision> temp(m_global_source_size * mu, 0);
         std::copy_n(in, size * mu, temp.data() + offset * mu);
         add_matrix_product_row_major('N', 1, temp.data(), 0, out, mu);
     };
@@ -75,16 +77,22 @@ class PyVirtualLocalOperator : public VirtualLocalOperatorPython<CoefficientPrec
     }
 };
 
-template <typename CoefficientPrecision>
+template <typename CoefficientPrecision, typename CoordinatePrecision = htool::underlying_type<CoefficientPrecision>>
 void declare_virtual_local_operator(py::module &m, const std::string &className, const std::string &base_class_name) {
     using BaseClass = htool::VirtualLocalOperator<CoefficientPrecision>;
     py::class_<BaseClass>(m, (base_class_name).c_str());
 
     using Class = VirtualLocalOperatorPython<CoefficientPrecision>;
     py::class_<Class, PyVirtualLocalOperator<CoefficientPrecision>, BaseClass> py_class(m, className.c_str());
-    py_class.def(py::init<int, int, int, int>());
+    py_class.def(py::init<const Cluster<CoordinatePrecision> &, const Cluster<CoordinatePrecision> &, int, int>());
     py_class.def("local_add_vector_product", &Class::add_vector_product, py::arg("trans"), py::arg("alpha"), py::arg("in").noconvert(true), py::arg("beta"), py::arg("out").noconvert(true));
     py_class.def("local_add_matrix_product_row_major", &Class::add_matrix_product_row_major);
+    py_class.def_readonly("target_offset", &Class::m_target_offset);
+    py_class.def_readonly("target_size", &Class::m_target_size);
+    py_class.def_readonly("global_source_offset", &Class::m_global_source_offset);
+    py_class.def_readonly("global_source_size", &Class::m_global_source_size);
+    py_class.def_readonly("local_source_offset", &Class::m_local_source_offset);
+    py_class.def_readonly("local_source_size", &Class::m_local_source_size);
 }
 
 #endif