From 8388c35cb2708173928fe03d63e8e164d54c0e98 Mon Sep 17 00:00:00 2001
From: Aaron Meyer <ameyer@ucla.edu>
Date: Wed, 28 May 2025 07:08:56 -0700
Subject: [PATCH 1/3] Perform parafac on the GPU

---
 parafac2/parafac2.py            | 55 ++++-----------------------------
 parafac2/tests/test_parafac2.py | 16 +++-------
 parafac2/utils.py               | 53 +++++++++++++++++++++++++++++--
 3 files changed, 62 insertions(+), 62 deletions(-)

diff --git a/parafac2/parafac2.py b/parafac2/parafac2.py
index 3089cb4..93a32ca 100644
--- a/parafac2/parafac2.py
+++ b/parafac2/parafac2.py
@@ -3,16 +3,15 @@
 from copy import deepcopy
 
 import anndata
+import cupy as cp
 import numpy as np
 from scipy.sparse.linalg import norm
 from sklearn.utils.extmath import randomized_svd
-from tensorly.cp_tensor import cp_normalize
-from tensorly.tenalg.core_tenalg import unfolding_dot_khatri_rao
 from tqdm import tqdm
 
-from .SECSI import SECSI
 from .utils import (
     anndata_to_list,
+    parafac,
     project_data,
     reconstruction_error,
     standardize_pf2,
@@ -59,54 +58,12 @@ def parafac2_init(
     return factors, norm_tensor
 
 
-def parafac(
-    tensor: np.ndarray,
-    factors: list[np.ndarray],
-    n_iter_max: int = 3,
-) -> list[np.ndarray]:
-    """Decomposes a tensor into a set of factor matrices using the PARAFAC2 algorithm.
-    PARAFAC2 decomposes a tensor into a set of factor matrices, where one factor
-    matrix can vary across slices in one mode. This function implements the core
-    PARAFAC2 algorithm using alternating least squares.
-    Args:
-        tensor (numpy.ndarray): The tensor to decompose.
-        factors (list of numpy.ndarray): Initial guess for the factor matrices.
-            The length of the list must be equal to the number of modes in the tensor.
-            Each element of the list is a numpy.ndarray of shape (size_mode_i, rank),
-            where size_mode_i is the size of the tensor along mode i, and rank is
-            the desired rank of the decomposition.
-        n_iter_max (int, optional): Maximum number of iterations for the algorithm.
-            Defaults to 3.
-    Returns:
-        list of numpy.ndarray: A list containing the factor matrices.
-            The order of the factor matrices corresponds to the modes of the input
-            tensor. The first factor matrix is scaled by the weights.
-    """
-    rank = factors[0].shape[1]
-
-    for _ in range(n_iter_max):
-        for mode in range(tensor.ndim):
-            pinv = np.ones((rank, rank))
-            for i, factor in enumerate(factors):
-                if i != mode:
-                    pinv *= factor.T @ factor
-
-            mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode)
-            factors[mode] = np.linalg.solve(pinv.T, mttkrp.T).T
-
-    weights, factors = cp_normalize((None, factors))
-    factors[0] *= weights[None, :]
-
-    return factors
-
-
 def parafac2_nd(
     X_in: anndata.AnnData,
     rank: int,
     n_iter_max: int = 100,
     tol: float = 1e-6,
     random_state: int | None = None,
-    SECSI_solver=False,
     callback: Callable[[int, float, list, list], None] | None = None,
 ) -> tuple[tuple, float]:
     r"""The same interface as regular PARAFAC2."""
@@ -133,10 +90,6 @@ def parafac2_nd(
     err = reconstruction_error(factors, projections, projected_X, norm_tensor)
     errs = [err]
 
-    if SECSI_solver:
-        SECSerror, factorOuts = SECSI(projected_X, rank, verbose=False)
-        factors = factorOuts[np.argmin(SECSerror)].factors
-
     print("")
     tq = tqdm(range(n_iter_max), disable=(not verbose), delay=1.0)
     for iteration in tq:
@@ -171,11 +124,15 @@ def parafac2_nd(
 
         factors_old = deepcopy(factors)
 
+        factors = [cp.array(f) for f in factors]
+
         factors = parafac(
             projected_X,
             factors,
         )
 
+        factors = [cp.asnumpy(f) for f in factors]
+
         delta = errs[-2] - errs[-1]
         tq.set_postfix(
             error=errs[-1], R2X=1.0 - errs[-1], Δ=delta, jump=jump, refresh=False
diff --git a/parafac2/tests/test_parafac2.py b/parafac2/tests/test_parafac2.py
index 6f09cc7..16677d4 100644
--- a/parafac2/tests/test_parafac2.py
+++ b/parafac2/tests/test_parafac2.py
@@ -59,9 +59,8 @@ def test_init_reprod(sparse: bool):
         cp.testing.assert_array_equal(proj1[ii], proj2[ii])
 
 
-@pytest.mark.parametrize("SECSI_solver", [False, True])
 @pytest.mark.parametrize("sparse", [False, True])
-def test_parafac2(sparse: bool, SECSI_solver: bool):
+def test_parafac2(sparse: bool):
     """Test for equivalence to TensorLy's PARAFAC2."""
     pf2shape = [(250, 1000)] * 8
     X: list[np.ndarray] = random_parafac2(pf2shape, rank=3, full=True, random_state=2)  # type: ignore
@@ -71,18 +70,14 @@ def test_parafac2(sparse: bool, SECSI_solver: bool):
 
     options = {"tol": 1e-9, "n_iter_max": 200}
 
-    (w1, f1, p1), e1 = parafac2_nd(
-        X_ann, rank=3, random_state=1, SECSI_solver=SECSI_solver, **options
-    )
+    (w1, f1, p1), e1 = parafac2_nd(X_ann, rank=3, random_state=1, **options)
 
     # Test that the model still matches the data
     err = _parafac2_reconstruction_error(X, (w1, f1, p1)) ** 2
     np.testing.assert_allclose(1.0 - err / norm_tensor, e1, rtol=1e-5)
 
     # Test reproducibility
-    (w2, f2, p2), e2 = parafac2_nd(
-        X_ann, rank=3, random_state=3, SECSI_solver=SECSI_solver, **options
-    )
+    (w2, f2, p2), e2 = parafac2_nd(X_ann, rank=3, random_state=3, **options)
     # Compare to TensorLy
     wT, fT, pT = parafac2(
         X,
@@ -129,9 +124,8 @@ def test_pf2_r2x():
     np.testing.assert_allclose(err, errCMF, rtol=1e-8)
 
 
-@pytest.mark.parametrize("SECSI_solver", [False, True])
 @pytest.mark.parametrize("sparse", [False, True])
-def test_performance(sparse: bool, SECSI_solver: bool):
+def test_performance(sparse: bool):
     """Test for equivalence to TensorLy's PARAFAC2."""
     # 5000 by 2000 by 300 is roughly the lupus data
     pf2shape = [(5_00, 2_00)] * 30
@@ -139,7 +133,7 @@ def test_performance(sparse: bool, SECSI_solver: bool):
 
     X = pf2_to_anndata(X, sparse=sparse)
 
-    (w1, f1, p1), e1 = parafac2_nd(X, rank=9, SECSI_solver=SECSI_solver)
+    (w1, f1, p1), e1 = parafac2_nd(X, rank=9)
 
 
 def test_pf2_proj_centering():
diff --git a/parafac2/utils.py b/parafac2/utils.py
index 519ecc5..43c8467 100644
--- a/parafac2/utils.py
+++ b/parafac2/utils.py
@@ -1,9 +1,56 @@
 import anndata
 import cupy as cp
 import numpy as np
+import tensorly as tl
 from cupyx.scipy import sparse as cupy_sparse
 from scipy.optimize import linear_sum_assignment
 from tensorly.cp_tensor import cp_flip_sign, cp_normalize
+from tensorly.tenalg.core_tenalg import unfolding_dot_khatri_rao
+
+
+def parafac(
+    tensor: cp.ndarray,
+    factors: list[cp.ndarray],
+    n_iter_max: int = 3,
+) -> list[cp.ndarray]:
+    """Decomposes a tensor into a set of factor matrices using the PARAFAC2 algorithm.
+    PARAFAC2 decomposes a tensor into a set of factor matrices, where one factor
+    matrix can vary across slices in one mode. This function implements the core
+    PARAFAC2 algorithm using alternating least squares.
+    Args:
+        tensor (numpy.ndarray): The tensor to decompose.
+        factors (list of numpy.ndarray): Initial guess for the factor matrices.
+            The length of the list must be equal to the number of modes in the tensor.
+            Each element of the list is a numpy.ndarray of shape (size_mode_i, rank),
+            where size_mode_i is the size of the tensor along mode i, and rank is
+            the desired rank of the decomposition.
+        n_iter_max (int, optional): Maximum number of iterations for the algorithm.
+            Defaults to 3.
+    Returns:
+        list of numpy.ndarray: A list containing the factor matrices.
+            The order of the factor matrices corresponds to the modes of the input
+            tensor. The first factor matrix is scaled by the weights.
+    """
+    rank = factors[0].shape[1]
+
+    tl.set_backend("cupy")
+
+    for _ in range(n_iter_max):
+        for mode in range(tensor.ndim):
+            pinv = cp.ones((rank, rank))
+            for i, factor in enumerate(factors):
+                if i != mode:
+                    pinv *= factor.T @ factor
+
+            mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode)
+            factors[mode] = cp.linalg.solve(pinv.T, mttkrp.T).T
+
+    weights, factors = cp_normalize((None, factors))
+    factors[0] *= weights[None, :]
+
+    tl.set_backend("numpy")
+
+    return factors
 
 
 def anndata_to_list(X_in: anndata.AnnData) -> list[cp.ndarray | cupy_sparse.csr_matrix]:
@@ -44,7 +91,7 @@ def project_data(
         centering = cp.outer(cp.sum(proj, axis=0), means)
         projected_X[i, :, :] = proj.T @ mat - centering
 
-    return projections, cp.asnumpy(projected_X)
+    return projections, projected_X
 
 
 def reconstruction_error(
@@ -63,7 +110,9 @@ def reconstruction_error(
         B_i = (proj @ B) * A[i]
 
         # trace of the multiplication products
-        norm_sq_err -= 2.0 * np.trace(A[i][:, np.newaxis] * B.T @ projected_X[i] @ C)
+        norm_sq_err -= 2.0 * np.trace(
+            A[i][:, np.newaxis] * B.T @ cp.asnumpy(projected_X[i]) @ C
+        )
         norm_sq_err += ((B_i.T @ B_i) * CtC).sum()
 
     return norm_sq_err

From 5d5629ad21401ea77ddab0cc70f6cb40a2a16234 Mon Sep 17 00:00:00 2001
From: Aaron Meyer <ameyer@ucla.edu>
Date: Wed, 28 May 2025 08:36:54 -0700
Subject: [PATCH 2/3] Fix deviance

---
 parafac2/normalize.py | 6 ++++--
 parafac2/parafac2.py  | 8 +++++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/parafac2/normalize.py b/parafac2/normalize.py
index eb5f33b..95b432d 100644
--- a/parafac2/normalize.py
+++ b/parafac2/normalize.py
@@ -2,6 +2,7 @@
 import numpy as np
 from scipy.sparse import csr_array, issparse
 from scipy.special import xlogy
+from sklearn.preprocessing import scale
 
 
 def prepare_dataset(
@@ -120,6 +121,7 @@ def get_deviance(data: csr_array) -> np.ndarray:
     deviance = np.maximum(deviance, 0.0)  # Ensure non-negative before sqrt
 
     # Calculate signed square root residuals: sign(y - mu) * sqrt(D)
-    signs = np.sign(y_ij - mu_ij)
+    residuals = np.sqrt(deviance) * np.sign(y_ij - mu_ij)
 
-    return signs * np.sqrt(deviance)
+    # z-score
+    return scale(residuals)
diff --git a/parafac2/parafac2.py b/parafac2/parafac2.py
index 93a32ca..105e449 100644
--- a/parafac2/parafac2.py
+++ b/parafac2/parafac2.py
@@ -6,6 +6,7 @@
 import cupy as cp
 import numpy as np
 from scipy.sparse.linalg import norm
+from sklearn.utils import resample
 from sklearn.utils.extmath import randomized_svd
 from tqdm import tqdm
 
@@ -52,7 +53,12 @@ def parafac2_init(
     else:
         norm_tensor = float(norm(X_in.X) ** 2.0 - 2 * np.sum(lmult))
 
-    _, _, C = randomized_svd(X_in.X, rank, random_state=random_state)
+    if X_in.shape[0] > X_in.shape[1] * 10:
+        X_svd = resample(X_in.X, n_samples=X_in.shape[1] * 10)
+    else:
+        X_svd = X_in.X
+
+    _, _, C = randomized_svd(X_svd, rank, random_state=random_state)  # type: ignore
 
     factors = [np.ones((n_cond, rank)), np.eye(rank), C.T]
     return factors, norm_tensor

From 45419cddfdaeb9dd36cbe5312274280c8357312d Mon Sep 17 00:00:00 2001
From: Aaron Meyer <ameyer@ucla.edu>
Date: Fri, 30 May 2025 20:57:28 -0700
Subject: [PATCH 3/3] Adjustments

---
 parafac2/parafac2.py | 4 +++-
 pyproject.toml       | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/parafac2/parafac2.py b/parafac2/parafac2.py
index 105e449..e51d21a 100644
--- a/parafac2/parafac2.py
+++ b/parafac2/parafac2.py
@@ -54,7 +54,9 @@ def parafac2_init(
         norm_tensor = float(norm(X_in.X) ** 2.0 - 2 * np.sum(lmult))
 
     if X_in.shape[0] > X_in.shape[1] * 10:
-        X_svd = resample(X_in.X, n_samples=X_in.shape[1] * 10)
+        X_svd = resample(
+            X_in.X, n_samples=X_in.shape[1] * 10, random_state=random_state
+        )
     else:
         X_svd = X_in.X
 
diff --git a/pyproject.toml b/pyproject.toml
index 99f64c1..ee814a9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ dependencies = [
 managed = true
 dev-dependencies = [
     "pytest>=8.3",
-    "pytest-cov>=6.0",
+    "pytest-cov>=6.1",
     "pyright>=1.1.380",
 ]