From 8a3de9c91d2ccfd62f4752efd16ab829f576a8e5 Mon Sep 17 00:00:00 2001 From: Aaron Meyer <2065146+aarmey@users.noreply.github.com> Date: Tue, 1 Jul 2025 20:28:29 -0700 Subject: [PATCH 1/2] Fix edge cases of csr_array A csr_array can have non-sparse zeros potentially. --- parafac2/normalize.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/parafac2/normalize.py b/parafac2/normalize.py index 8908965..7ae258f 100644 --- a/parafac2/normalize.py +++ b/parafac2/normalize.py @@ -79,6 +79,9 @@ def get_deviance(data: csr_array) -> np.ndarray: where y=0 or n-y=0, and ensures the final deviance values are non-negative before taking the square root. """ + # check that there are no duplicate entries in the sparse matrix + data.sum_duplicates() + data.eliminate_zeros() # counts per gene pi_j = data.sum(axis=0) @@ -104,7 +107,7 @@ def get_deviance(data: csr_array) -> np.ndarray: # Term 1: y * log(y / mu) = xlogy(y, y) - xlogy(y, mu) # xlogy handles y=0 case correctly returning 0. row, col = data.nonzero() - mu_ij_nn = n_i_col[row, 0] * pi_j[0, col] + mu_ij_nn = n_i[row] * pi_j[col] term1 = data.data * np.log(data.data / mu_ij_nn) # Term 2: (n-y) * log((n-y) / (n-mu)) = xlogy(n-y, n-y) - xlogy(n-y, n-mu) From d429fd2c5fa5547e55323bfd85edc2c49a982dd6 Mon Sep 17 00:00:00 2001 From: Aaron Meyer <2065146+aarmey@users.noreply.github.com> Date: Tue, 1 Jul 2025 20:30:19 -0700 Subject: [PATCH 2/2] Update parafac2/normalize.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- parafac2/normalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parafac2/normalize.py b/parafac2/normalize.py index 7ae258f..8ac5041 100644 --- a/parafac2/normalize.py +++ b/parafac2/normalize.py @@ -79,7 +79,7 @@ def get_deviance(data: csr_array) -> np.ndarray: where y=0 or n-y=0, and ensures the final deviance values are non-negative before taking the square root. """ - # check that there are no duplicate entries in the sparse matrix + # merge duplicate entries in the sparse matrix by summing their values data.sum_duplicates() data.eliminate_zeros()