|
9 | 9 | import numpy as np |
10 | 10 | import pandas as pd |
11 | 11 |
|
12 | | -from ...intermediate import Intermediate |
13 | 12 | from ...data_array import DataArray |
14 | | -from .common import CorrelationMethod, kendalltau, nanrankdata |
| 13 | +from ...intermediate import Intermediate |
| 14 | +from .common import CorrelationMethod, kendalltau, nanrankdata, corrcoef |
15 | 15 |
|
16 | 16 |
|
17 | 17 | def _calc_univariate( |
@@ -74,17 +74,17 @@ def _calc_univariate( |
74 | 74 | def _pearson_1xn(x: da.Array, data: da.Array) -> da.Array: |
75 | 75 | _, ncols = data.shape |
76 | 76 |
|
77 | | - datamask = da.isnan(data) |
78 | | - xmask = da.isnan(x)[:, 0] |
| 77 | + fused = da.concatenate([data, x], axis=1) |
| 78 | + mask = ~da.isnan(data) |
79 | 79 |
|
80 | 80 | corrs = [] |
81 | 81 | for j in range(ncols): |
82 | | - y = data[:, [j]] |
83 | | - |
84 | | - mask = ~(xmask | datamask[:, j]) |
85 | | - xy = np.concatenate([x, y], axis=1)[mask] |
86 | | - xy.compute_chunk_sizes() # Not optimal here |
87 | | - _, (corr, _) = da.corrcoef(xy, rowvar=False) |
| 82 | + xy = fused[:, [-1, j]] |
| 83 | + mask_ = mask[:, -1] & mask[:, j] |
| 84 | + xy = xy[mask_] |
| 85 | + corr = da.from_delayed(corrcoef(xy), dtype=np.float, shape=()) |
| 86 | + # not usable because xy has unknown rows due to the null filter |
| 87 | + # _, (corr, _) = da.corrcoef(xy, rowvar=False) |
88 | 88 | corrs.append(corr) |
89 | 89 |
|
90 | 90 | return da.stack(corrs) |
|
0 commit comments