From 8a3de9c91d2ccfd62f4752efd16ab829f576a8e5 Mon Sep 17 00:00:00 2001
From: Aaron Meyer <2065146+aarmey@users.noreply.github.com>
Date: Tue, 1 Jul 2025 20:28:29 -0700
Subject: [PATCH 1/2] Fix edge cases of csr_array

A csr_array can have non-sparse zeros potentially.
---
 parafac2/normalize.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/parafac2/normalize.py b/parafac2/normalize.py
index 8908965..7ae258f 100644
--- a/parafac2/normalize.py
+++ b/parafac2/normalize.py
@@ -79,6 +79,9 @@ def get_deviance(data: csr_array) -> np.ndarray:
     where y=0 or n-y=0, and ensures the final deviance values are non-negative
     before taking the square root.
     """
+    # check that there are no duplicate entries in the sparse matrix
+    data.sum_duplicates()
+    data.eliminate_zeros()
 
     # counts per gene
     pi_j = data.sum(axis=0)
@@ -104,7 +107,7 @@ def get_deviance(data: csr_array) -> np.ndarray:
     # Term 1: y * log(y / mu) = xlogy(y, y) - xlogy(y, mu)
     # xlogy handles y=0 case correctly returning 0.
     row, col = data.nonzero()
-    mu_ij_nn = n_i_col[row, 0] * pi_j[0, col]
+    mu_ij_nn = n_i[row] * pi_j[col]
     term1 = data.data * np.log(data.data / mu_ij_nn)
 
     # Term 2: (n-y) * log((n-y) / (n-mu)) = xlogy(n-y, n-y) - xlogy(n-y, n-mu)

From d429fd2c5fa5547e55323bfd85edc2c49a982dd6 Mon Sep 17 00:00:00 2001
From: Aaron Meyer <2065146+aarmey@users.noreply.github.com>
Date: Tue, 1 Jul 2025 20:30:19 -0700
Subject: [PATCH 2/2] Update parafac2/normalize.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 parafac2/normalize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parafac2/normalize.py b/parafac2/normalize.py
index 7ae258f..8ac5041 100644
--- a/parafac2/normalize.py
+++ b/parafac2/normalize.py
@@ -79,7 +79,7 @@ def get_deviance(data: csr_array) -> np.ndarray:
     where y=0 or n-y=0, and ensures the final deviance values are non-negative
     before taking the square root.
     """
-    # check that there are no duplicate entries in the sparse matrix
+    # merge duplicate entries in the sparse matrix by summing their values
     data.sum_duplicates()
     data.eliminate_zeros()