Skip to content

Commit 5ca493b

Browse files
fixup
1 parent d4a3fa3 commit 5ca493b

File tree

5 files changed

+54
-46
lines changed

5 files changed

+54
-46
lines changed

example/define_custom_local_operator.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,28 @@ class CustomLocalOperator(Htool.VirtualLocalOperator):
66
def __init__(
77
self,
88
generator: Htool.VirtualGenerator,
9-
target_offset: int,
10-
target_size: int,
11-
target_permutation,
12-
source_offset: int,
13-
source_size: int,
14-
source_permutation,
9+
target_cluster: Htool.Cluster,
10+
global_source_cluster: Htool.Cluster,
11+
local_source_offset: int,
12+
local_source_size:int,
1513
) -> None:
1614
super().__init__(
17-
target_offset,
18-
target_size,
19-
source_offset,
20-
source_size,
15+
target_cluster,
16+
global_source_cluster,
17+
local_source_offset,
18+
local_source_size,
2119
)
22-
self.data = np.zeros((target_size, source_size))
20+
target_offset = target_cluster.get_offset()
21+
target_size = target_cluster.get_size()
22+
self.data = np.zeros((target_size, local_source_size))
2323
generator.build_submatrix(
24-
target_permutation[
24+
target_cluster.get_permutation()[
2525
target_offset : target_offset
2626
+ target_size
2727
],
28-
source_permutation[
29-
source_offset : source_offset
30-
+ source_size
28+
global_source_cluster.get_permutation()[
29+
local_source_offset : local_source_offset
30+
+ local_source_size
3131
],
3232
self.data,
3333
)
@@ -38,20 +38,20 @@ def add_vector_product(
3838
# Beware, inplace operation needed for output to keep the underlying data
3939
output *= beta
4040
if trans == "N":
41-
output += alpha * self.data.dot(input)
41+
output += alpha * self.data.dot(input[self.local_source_offset : self.local_source_offset+ self.local_source_size])
4242
elif trans == "T":
43-
output += alpha * np.transpose(self.data).dot(input)
43+
output += alpha * np.transpose(self.data).dot(input[self.local_source_offset : self.local_source_offset+ self.local_source_size])
4444
elif trans == "C":
45-
output += alpha * np.vdot(np.transpose(self.data), input)
45+
output += alpha * np.vdot(np.transpose(self.data), input[self.local_source_offset : self.local_source_offset+ self.local_source_size])
4646

4747
def add_matrix_product_row_major(
4848
self, trans, alpha, input: np.array, beta, output: np.array
4949
) -> None:
5050
output *= beta
5151
if trans == "N":
52-
output += alpha * self.data @ input
52+
output += alpha * self.data @ input[self.local_source_offset : self.local_source_offset+ self.local_source_size,:]
5353
elif trans == "T":
54-
output += alpha * np.transpose(self.data) @ input
54+
output += alpha * np.transpose(self.data) @ input[self.local_source_offset : self.local_source_offset+ self.local_source_size,:]
5555
elif trans == "C":
56-
output += alpha * np.matrix.H(self.data) @ input
56+
output += alpha * np.matrix.H(self.data) @ input[self.local_source_offset : self.local_source_offset+ self.local_source_size,:]
5757
output = np.asfortranarray(output)

example/use_local_hmatrix_compression.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -76,24 +76,20 @@
7676
if local_source_cluster.get_offset() > 0:
7777
local_operator_1 = CustomLocalOperator(
7878
generator,
79-
local_target_cluster.get_offset(),
80-
local_target_cluster.get_size(),
81-
local_target_cluster.get_permutation(),
79+
local_target_cluster,
80+
source_cluster,
8281
0,
8382
local_source_cluster.get_offset(),
84-
source_cluster.get_permutation(),
8583
)
8684

8785
local_operator_2 = None
8886
if source_cluster.get_size()-local_source_cluster.get_size()-local_source_cluster.get_offset() > 0:
8987
local_operator_2 = CustomLocalOperator(
9088
generator,
91-
local_target_cluster.get_offset(),
92-
local_target_cluster.get_size(),
93-
local_target_cluster.get_permutation(),
89+
local_target_cluster,
90+
source_cluster,
9491
local_source_cluster.get_size()+local_source_cluster.get_offset(),
9592
source_cluster.get_size()-local_source_cluster.get_size()-local_source_cluster.get_offset(),
96-
source_cluster.get_permutation(),
9793
)
9894

9995
if local_operator_1:
@@ -104,14 +100,13 @@
104100
# Test matrix vector product
105101
np.random.seed(0)
106102
x = np.random.rand(source_size)
107-
x = np.ones(source_size)
108103
y_1 = distributed_operator * x
109104
y_2 = generator.mat_vec(x)
110105
print(mpi4py.MPI.COMM_WORLD.rank, np.linalg.norm(y_1 - y_2) / np.linalg.norm(y_2))
111106

112107

113108
# Test matrix matrix product
114-
X = np.asfortranarray(np.random.rand(source_size, 2))
109+
X = np.asfortranarray(np.random.rand(source_size, 5))
115110
Y_1 = distributed_operator @ X
116111
Y_2 = generator.mat_mat(X)
117112
print(mpi4py.MPI.COMM_WORLD.rank, np.linalg.norm(Y_1 - Y_2) / np.linalg.norm(Y_2))

lib/htool

src/htool/distributed_operator/distributed_operator.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ void declare_distributed_operator(py::module &m, const std::string &class_name)
2828
}
2929
py::array_t<CoefficientPrecision, py::array::f_style> result(std::array<long int, 1>{self.get_target_partition().get_global_size()});
3030
std::fill_n(result.mutable_data(), self.get_target_partition().get_global_size(), CoefficientPrecision(0));
31-
htool::add_distributed_operator_vector_product_global_to_global<CoefficientPrecision>('N', 1, self, input.data(), 0, result.mutable_data(), nullptr);
31+
htool::add_distributed_operator_vector_product_global_to_global<CoefficientPrecision>('N', CoefficientPrecision(1), self, input.data(), CoefficientPrecision(0), result.mutable_data(), nullptr);
3232

3333
return result;
3434
},
@@ -49,6 +49,11 @@ void declare_distributed_operator(py::module &m, const std::string &class_name)
4949
std::array<long int, 2> shape{self.get_target_partition().get_global_size(), mu};
5050
py::array_t<CoefficientPrecision, py::array::f_style> result(shape);
5151
std::fill_n(result.mutable_data(), self.get_target_partition().get_global_size() * mu, CoefficientPrecision(0));
52+
if (mu == 1) {
53+
std::cout << "ICI?\n";
54+
htool::add_distributed_operator_vector_product_global_to_global<CoefficientPrecision>('N', CoefficientPrecision(1), self, input.data(), CoefficientPrecision(0), result.mutable_data(), nullptr);
55+
return result;
56+
}
5257
MatrixView<const CoefficientPrecision> input_view(self.get_source_partition().get_global_size(), mu, input.data());
5358
MatrixView<CoefficientPrecision> output_view(self.get_target_partition().get_global_size(), mu, result.mutable_data());
5459
CoefficientPrecision *work = nullptr;

src/htool/local_operator/virtual_local_operator.hpp

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,36 +4,38 @@
44
#include <htool/distributed_operator/interfaces/virtual_local_operator.hpp>
55
#include <pybind11/pybind11.h>
66

7-
template <typename CoefficientPrecision, typename CoordinatePrecision = CoefficientPrecision>
7+
template <typename CoefficientPrecision, typename CoordinatePrecision = htool::underlying_type<CoefficientPrecision>>
88
class VirtualLocalOperatorPython : public htool::VirtualLocalOperator<CoefficientPrecision> {
9-
int m_target_offset;
10-
int m_target_size;
11-
int m_source_offset;
12-
int m_source_size;
139

1410
public:
15-
VirtualLocalOperatorPython(int target_offset, int target_size, int source_offset, int source_size) : m_target_offset(target_offset), m_target_size(target_size), m_source_offset(source_offset), m_source_size(source_size) {}
11+
int m_target_offset;
12+
int m_target_size;
13+
int m_global_source_offset;
14+
int m_global_source_size;
15+
int m_local_source_offset;
16+
int m_local_source_size;
17+
VirtualLocalOperatorPython(const Cluster<CoordinatePrecision> &target_cluster, const Cluster<CoordinatePrecision> &global_source_cluster, int local_source_offset, int local_source_size) : m_target_offset(target_cluster.get_offset()), m_target_size(target_cluster.get_size()), m_global_source_offset(global_source_cluster.get_offset()), m_global_source_size(global_source_cluster.get_size()), m_local_source_offset(local_source_offset), m_local_source_size(local_source_size) {}
1618

1719
int get_target_offset() const override { return m_target_offset; }
18-
int get_source_offset() const override { return m_source_offset; }
20+
int get_source_offset() const override { return m_local_source_offset; }
1921
int get_target_size() const override { return m_target_size; }
20-
int get_source_size() const override { return m_source_size; }
22+
int get_source_size() const override { return m_local_source_size; }
2123

2224
void add_vector_product(char trans, CoefficientPrecision alpha, const CoefficientPrecision *const in, CoefficientPrecision beta, CoefficientPrecision *const out) const override {
23-
py::array_t<CoefficientPrecision> input(std::array<long int, 1>{m_source_size}, in, py::capsule(in));
25+
py::array_t<CoefficientPrecision> input(std::array<long int, 1>{m_global_source_size}, in, py::capsule(in));
2426
py::array_t<CoefficientPrecision> output(std::array<long int, 1>{m_target_size}, out, py::capsule(out));
2527

2628
local_add_vector_product(trans, alpha, input, beta, output);
2729
}
2830
void add_matrix_product_row_major(char trans, CoefficientPrecision alpha, const CoefficientPrecision *const in, CoefficientPrecision beta, CoefficientPrecision *const out, int mu) const override {
29-
py::array_t<CoefficientPrecision, py::array::c_style> input(std::array<long int, 2>{m_source_size, mu}, in, py::capsule(in));
31+
py::array_t<CoefficientPrecision, py::array::c_style> input(std::array<long int, 2>{m_global_source_size, mu}, in, py::capsule(in));
3032
py::array_t<CoefficientPrecision, py::array::c_style> output(std::array<long int, 2>{m_target_size, mu}, out, py::capsule(out));
3133

3234
local_add_matrix_product_row_major(trans, alpha, input, beta, output);
3335
}
3436

3537
virtual void sub_matrix_product_to_local(const CoefficientPrecision *const in, CoefficientPrecision *const out, int mu, int offset, int size) const override {
36-
std::vector<CoefficientPrecision> temp(m_source_size * mu, 0);
38+
std::vector<CoefficientPrecision> temp(m_global_source_size * mu, 0);
3739
std::copy_n(in, size * mu, temp.data() + offset * mu);
3840
add_matrix_product_row_major('N', 1, temp.data(), 0, out, mu);
3941
};
@@ -75,16 +77,22 @@ class PyVirtualLocalOperator : public VirtualLocalOperatorPython<CoefficientPrec
7577
}
7678
};
7779

78-
template <typename CoefficientPrecision>
80+
template <typename CoefficientPrecision, typename CoordinatePrecision = htool::underlying_type<CoefficientPrecision>>
7981
void declare_virtual_local_operator(py::module &m, const std::string &className, const std::string &base_class_name) {
8082
using BaseClass = htool::VirtualLocalOperator<CoefficientPrecision>;
8183
py::class_<BaseClass>(m, (base_class_name).c_str());
8284

8385
using Class = VirtualLocalOperatorPython<CoefficientPrecision>;
8486
py::class_<Class, PyVirtualLocalOperator<CoefficientPrecision>, BaseClass> py_class(m, className.c_str());
85-
py_class.def(py::init<int, int, int, int>());
87+
py_class.def(py::init<const Cluster<CoordinatePrecision> &, const Cluster<CoordinatePrecision> &, int, int>());
8688
py_class.def("local_add_vector_product", &Class::add_vector_product, py::arg("trans"), py::arg("alpha"), py::arg("in").noconvert(true), py::arg("beta"), py::arg("out").noconvert(true));
8789
py_class.def("local_add_matrix_product_row_major", &Class::add_matrix_product_row_major);
90+
py_class.def_readonly("target_offset", &Class::m_target_offset);
91+
py_class.def_readonly("target_size", &Class::m_target_size);
92+
py_class.def_readonly("global_source_offset", &Class::m_global_source_offset);
93+
py_class.def_readonly("global_source_size", &Class::m_global_source_size);
94+
py_class.def_readonly("local_source_offset", &Class::m_local_source_offset);
95+
py_class.def_readonly("local_source_size", &Class::m_local_source_size);
8896
}
8997

9098
#endif

0 commit comments

Comments
 (0)