Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions parafac2/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import numpy as np
from scipy.sparse import csr_array, issparse
from scipy.special import xlogy
from sklearn.preprocessing import scale


def prepare_dataset(
Expand Down Expand Up @@ -120,6 +121,7 @@
deviance = np.maximum(deviance, 0.0) # Ensure non-negative before sqrt

# Calculate signed square root residuals: sign(y - mu) * sqrt(D)
signs = np.sign(y_ij - mu_ij)
residuals = np.sqrt(deviance) * np.sign(y_ij - mu_ij)

Check warning on line 124 in parafac2/normalize.py

View check run for this annotation

Codecov / codecov/patch

parafac2/normalize.py#L124

Added line #L124 was not covered by tests

return signs * np.sqrt(deviance)
# z-score
return scale(residuals)

Check warning on line 127 in parafac2/normalize.py

View check run for this annotation

Codecov / codecov/patch

parafac2/normalize.py#L127

Added line #L127 was not covered by tests
65 changes: 15 additions & 50 deletions parafac2/parafac2.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
from copy import deepcopy

import anndata
import cupy as cp
import numpy as np
from scipy.sparse.linalg import norm
from sklearn.utils import resample
from sklearn.utils.extmath import randomized_svd
from tensorly.cp_tensor import cp_normalize
from tensorly.tenalg.core_tenalg import unfolding_dot_khatri_rao
from tqdm import tqdm

from .SECSI import SECSI
from .utils import (
anndata_to_list,
parafac,
project_data,
reconstruction_error,
standardize_pf2,
Expand Down Expand Up @@ -53,60 +53,25 @@ def parafac2_init(
else:
norm_tensor = float(norm(X_in.X) ** 2.0 - 2 * np.sum(lmult))

_, _, C = randomized_svd(X_in.X, rank, random_state=random_state)
if X_in.shape[0] > X_in.shape[1] * 10:
X_svd = resample(
X_in.X, n_samples=X_in.shape[1] * 10, random_state=random_state
)
else:
X_svd = X_in.X

_, _, C = randomized_svd(X_svd, rank, random_state=random_state) # type: ignore

factors = [np.ones((n_cond, rank)), np.eye(rank), C.T]
return factors, norm_tensor


def parafac(
tensor: np.ndarray,
factors: list[np.ndarray],
n_iter_max: int = 3,
) -> list[np.ndarray]:
"""Decomposes a tensor into a set of factor matrices using the PARAFAC2 algorithm.
PARAFAC2 decomposes a tensor into a set of factor matrices, where one factor
matrix can vary across slices in one mode. This function implements the core
PARAFAC2 algorithm using alternating least squares.
Args:
tensor (numpy.ndarray): The tensor to decompose.
factors (list of numpy.ndarray): Initial guess for the factor matrices.
The length of the list must be equal to the number of modes in the tensor.
Each element of the list is a numpy.ndarray of shape (size_mode_i, rank),
where size_mode_i is the size of the tensor along mode i, and rank is
the desired rank of the decomposition.
n_iter_max (int, optional): Maximum number of iterations for the algorithm.
Defaults to 3.
Returns:
list of numpy.ndarray: A list containing the factor matrices.
The order of the factor matrices corresponds to the modes of the input
tensor. The first factor matrix is scaled by the weights.
"""
rank = factors[0].shape[1]

for _ in range(n_iter_max):
for mode in range(tensor.ndim):
pinv = np.ones((rank, rank))
for i, factor in enumerate(factors):
if i != mode:
pinv *= factor.T @ factor

mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode)
factors[mode] = np.linalg.solve(pinv.T, mttkrp.T).T

weights, factors = cp_normalize((None, factors))
factors[0] *= weights[None, :]

return factors


def parafac2_nd(
X_in: anndata.AnnData,
rank: int,
n_iter_max: int = 100,
tol: float = 1e-6,
random_state: int | None = None,
SECSI_solver=False,
callback: Callable[[int, float, list, list], None] | None = None,
) -> tuple[tuple, float]:
r"""The same interface as regular PARAFAC2."""
Expand All @@ -133,10 +98,6 @@ def parafac2_nd(
err = reconstruction_error(factors, projections, projected_X, norm_tensor)
errs = [err]

if SECSI_solver:
SECSerror, factorOuts = SECSI(projected_X, rank, verbose=False)
factors = factorOuts[np.argmin(SECSerror)].factors

print("")
tq = tqdm(range(n_iter_max), disable=(not verbose), delay=1.0)
for iteration in tq:
Expand Down Expand Up @@ -171,11 +132,15 @@ def parafac2_nd(

factors_old = deepcopy(factors)

factors = [cp.array(f) for f in factors]

factors = parafac(
projected_X,
factors,
)

factors = [cp.asnumpy(f) for f in factors]

delta = errs[-2] - errs[-1]
tq.set_postfix(
error=errs[-1], R2X=1.0 - errs[-1], Δ=delta, jump=jump, refresh=False
Expand Down
16 changes: 5 additions & 11 deletions parafac2/tests/test_parafac2.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,8 @@ def test_init_reprod(sparse: bool):
cp.testing.assert_array_equal(proj1[ii], proj2[ii])


@pytest.mark.parametrize("SECSI_solver", [False, True])
@pytest.mark.parametrize("sparse", [False, True])
def test_parafac2(sparse: bool, SECSI_solver: bool):
def test_parafac2(sparse: bool):
"""Test for equivalence to TensorLy's PARAFAC2."""
pf2shape = [(250, 1000)] * 8
X: list[np.ndarray] = random_parafac2(pf2shape, rank=3, full=True, random_state=2) # type: ignore
Expand All @@ -71,18 +70,14 @@ def test_parafac2(sparse: bool, SECSI_solver: bool):

options = {"tol": 1e-9, "n_iter_max": 200}

(w1, f1, p1), e1 = parafac2_nd(
X_ann, rank=3, random_state=1, SECSI_solver=SECSI_solver, **options
)
(w1, f1, p1), e1 = parafac2_nd(X_ann, rank=3, random_state=1, **options)

# Test that the model still matches the data
err = _parafac2_reconstruction_error(X, (w1, f1, p1)) ** 2
np.testing.assert_allclose(1.0 - err / norm_tensor, e1, rtol=1e-5)

# Test reproducibility
(w2, f2, p2), e2 = parafac2_nd(
X_ann, rank=3, random_state=3, SECSI_solver=SECSI_solver, **options
)
(w2, f2, p2), e2 = parafac2_nd(X_ann, rank=3, random_state=3, **options)
# Compare to TensorLy
wT, fT, pT = parafac2(
X,
Expand Down Expand Up @@ -129,17 +124,16 @@ def test_pf2_r2x():
np.testing.assert_allclose(err, errCMF, rtol=1e-8)


@pytest.mark.parametrize("SECSI_solver", [False, True])
@pytest.mark.parametrize("sparse", [False, True])
def test_performance(sparse: bool, SECSI_solver: bool):
def test_performance(sparse: bool):
"""Test for equivalence to TensorLy's PARAFAC2."""
# 5000 by 2000 by 300 is roughly the lupus data
pf2shape = [(5_00, 2_00)] * 30
X = random_parafac2(pf2shape, rank=12, full=True, random_state=2)

X = pf2_to_anndata(X, sparse=sparse)

(w1, f1, p1), e1 = parafac2_nd(X, rank=9, SECSI_solver=SECSI_solver)
(w1, f1, p1), e1 = parafac2_nd(X, rank=9)


def test_pf2_proj_centering():
Expand Down
53 changes: 51 additions & 2 deletions parafac2/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,56 @@
import anndata
import cupy as cp
import numpy as np
import tensorly as tl
from cupyx.scipy import sparse as cupy_sparse
from scipy.optimize import linear_sum_assignment
from tensorly.cp_tensor import cp_flip_sign, cp_normalize
from tensorly.tenalg.core_tenalg import unfolding_dot_khatri_rao


def parafac(
tensor: cp.ndarray,
factors: list[cp.ndarray],
n_iter_max: int = 3,
) -> list[cp.ndarray]:
"""Decomposes a tensor into a set of factor matrices using the PARAFAC2 algorithm.
PARAFAC2 decomposes a tensor into a set of factor matrices, where one factor
matrix can vary across slices in one mode. This function implements the core
PARAFAC2 algorithm using alternating least squares.
Args:
tensor (numpy.ndarray): The tensor to decompose.
factors (list of numpy.ndarray): Initial guess for the factor matrices.
The length of the list must be equal to the number of modes in the tensor.
Each element of the list is a numpy.ndarray of shape (size_mode_i, rank),
where size_mode_i is the size of the tensor along mode i, and rank is
the desired rank of the decomposition.
n_iter_max (int, optional): Maximum number of iterations for the algorithm.
Defaults to 3.
Returns:
list of numpy.ndarray: A list containing the factor matrices.
The order of the factor matrices corresponds to the modes of the input
tensor. The first factor matrix is scaled by the weights.
"""
rank = factors[0].shape[1]

tl.set_backend("cupy")

for _ in range(n_iter_max):
for mode in range(tensor.ndim):
pinv = cp.ones((rank, rank))
for i, factor in enumerate(factors):
if i != mode:
pinv *= factor.T @ factor

mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode)
factors[mode] = cp.linalg.solve(pinv.T, mttkrp.T).T

weights, factors = cp_normalize((None, factors))
factors[0] *= weights[None, :]

tl.set_backend("numpy")

return factors


def anndata_to_list(X_in: anndata.AnnData) -> list[cp.ndarray | cupy_sparse.csr_matrix]:
Expand Down Expand Up @@ -44,7 +91,7 @@ def project_data(
centering = cp.outer(cp.sum(proj, axis=0), means)
projected_X[i, :, :] = proj.T @ mat - centering

return projections, cp.asnumpy(projected_X)
return projections, projected_X


def reconstruction_error(
Expand All @@ -63,7 +110,9 @@ def reconstruction_error(
B_i = (proj @ B) * A[i]

# trace of the multiplication products
norm_sq_err -= 2.0 * np.trace(A[i][:, np.newaxis] * B.T @ projected_X[i] @ C)
norm_sq_err -= 2.0 * np.trace(
A[i][:, np.newaxis] * B.T @ cp.asnumpy(projected_X[i]) @ C
)
norm_sq_err += ((B_i.T @ B_i) * CtC).sum()

return norm_sq_err
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ dependencies = [
managed = true
dev-dependencies = [
"pytest>=8.3",
"pytest-cov>=6.0",
"pytest-cov>=6.1",
"pyright>=1.1.380",
]

Expand Down
Loading