From 42af51f7380900623bc8cca14e27b9cc27a993c7 Mon Sep 17 00:00:00 2001 From: Alan Morris Date: Wed, 18 Feb 2026 14:41:26 -0700 Subject: [PATCH 1/2] Fix #2495 - Fix PPCA noise variance dilution and null-space leakage in MorphologicalDeviationScore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PPCA noise variance divides by (d - q) instead of (rank - q), where d includes ~5950 structurally zero dimensions from the null space (e.g. for 2048 particles). This dilutes the estimate ~34× (scales linearly with particle count). The Woodbury-based precision matrix then applies this diluted 1/o^2 to all d dimensions including the null space, so most of the score is coming from insignificant and noise dimensions. - Correct denominator to (rank - q) — average only over estimable eigenvalues - Project into the rank-dimensional subspace instead of building a d × d precision matrix, eliminating null-space contributions entirely --- .../EarlyStop/MorphologicalDeviationScore.cpp | 134 +++++++----------- .../EarlyStop/MorphologicalDeviationScore.h | 30 +--- 2 files changed, 59 insertions(+), 105 deletions(-) diff --git a/Libs/Optimize/Function/EarlyStop/MorphologicalDeviationScore.cpp b/Libs/Optimize/Function/EarlyStop/MorphologicalDeviationScore.cpp index d8c2a875c0..925a5ac287 100644 --- a/Libs/Optimize/Function/EarlyStop/MorphologicalDeviationScore.cpp +++ b/Libs/Optimize/Function/EarlyStop/MorphologicalDeviationScore.cpp @@ -1,4 +1,5 @@ #include "MorphologicalDeviationScore.h" + #include namespace shapeworks { @@ -8,11 +9,9 @@ MorphologicalDeviationScore::MorphologicalDeviationScore() = default; bool MorphologicalDeviationScore::SetControlShapes(const Eigen::MatrixXd& X) { try { bool ppca_status = FitPPCA(X); - precision_matrix_ = ComputePrecisionMatrix(); is_fitted_ = ppca_status; } catch (std::exception& e) { - SW_ERROR("Exception in setting control shapes for early stopping {}", - e.what()); + SW_ERROR("Exception in setting control shapes for early stopping {}", e.what()); return false; } return is_fitted_; @@ -27,108 +26,79 @@ bool MorphologicalDeviationScore::FitPPCA(const Eigen::MatrixXd& X) { Eigen::MatrixXd X_c = X.rowwise() - mean_; // (n x d) try { - Eigen::JacobiSVD svd( - X_c, Eigen::ComputeThinU | Eigen::ComputeThinV); + Eigen::JacobiSVD svd(X_c, Eigen::ComputeThinU | Eigen::ComputeThinV); Eigen::VectorXd s = svd.singularValues(); // (r,) Eigen::MatrixXd V = svd.matrixV(); // (d x r) Eigen::VectorXd eigvals = (s.array().square()) / (n - 1); - // save_matrix(X, std::string(DEBUG_FILES_PTH) + "/X.npy"); - // save_matrix(X_c, std::string(DEBUG_FILES_PTH) + "/X_c.npy"); - // save_matrix(V, std::string(DEBUG_FILES_PTH) + "/V.npy"); - // save_vector(eigvals, std::string(DEBUG_FILES_PTH) + "/eigvals.npy"); - Eigen::VectorXd cumsum = eigvals; for (int i = 1; i < eigvals.size(); ++i) cumsum[i] += cumsum[i - 1]; Eigen::VectorXd cumvar = cumsum / eigvals.sum(); - n_components_ = 0; - while (n_components_ < cumvar.size() && cumvar[n_components_] <= retained_variance_ratio_) - ++n_components_; - int q = n_components_; - principal_components_variance_ = eigvals.head(q); // (q,) - components_ = V.leftCols(q); // (d x q) - // save_matrix(components_, std::string(DEBUG_FILES_PTH) + "/components.npy"); - // save_vector(principal_components_variance_, - // std::string(DEBUG_FILES_PTH) + "/eigvals.npy"); - noise_variance_ = eigvals.tail(eigvals.size() - q).sum() / double(d - q); - return true; + int q = 0; + while (q < cumvar.size() && cumvar[q] <= retained_variance_ratio_) ++q; - } catch (std::exception& e) { - SW_ERROR( - "SVD computation failed for MorphologicalDeviationScore: {}", - e.what()); - return false; - } -} + if (q == 0) { + SW_ERROR("PPCA: no components retained at {}% variance", retained_variance_ratio_ * 100); + return false; + } -//--------------------------------------------------------------------------- -Eigen::MatrixXd MorphologicalDeviationScore::ComputeCovarianceMatrix() { - const int d = components_.rows(); - const int q = components_.cols(); + // Determine effective rank: trim structurally zero eigenvalues + const double eps = 1e-10; + int rank = eigvals.size(); + while (rank > q && eigvals[rank - 1] < eps) { + --rank; + } - if (q == 0) { - return noise_variance_ * Eigen::MatrixXd::Identity(d, d); - } + if (rank <= q) { + SW_ERROR("PPCA: effective rank ({}) <= retained components ({}), cannot estimate noise variance", rank, q); + return false; + } - Eigen::MatrixXd diag_lambda = principal_components_variance_.asDiagonal(); // (q x q) - Eigen::MatrixXd cov = - components_ * - (diag_lambda - noise_variance_ * Eigen::MatrixXd::Identity(q, q)) * - components_.transpose() + - noise_variance_ * Eigen::MatrixXd::Identity(d, d); // (d x d) - return cov; -} + // Noise variance: average of excluded-but-real eigenvalues + // Uses (rank - q) denominator instead of (d - q) to avoid dilution + // by structurally zero dimensions beyond the data rank + noise_variance_ = eigvals.segment(q, rank - q).sum() / double(rank - q); -//--------------------------------------------------------------------------- -Eigen::MatrixXd MorphologicalDeviationScore::ComputePrecisionMatrix() { - try { - const int d = components_.rows(); - const int q = components_.cols(); + // Store full rank-dimensional basis for projection-based scoring + all_components_ = V.leftCols(rank); // (d x rank) - Eigen::MatrixXd A_inv = (1 / noise_variance_) * Eigen::MatrixXd::Identity(d, d); + // Build per-dimension precision weights + precision_weights_.resize(rank); + for (int i = 0; i < q; ++i) { + precision_weights_[i] = 1.0 / eigvals[i]; + } + for (int i = q; i < rank; ++i) { + precision_weights_[i] = 1.0 / noise_variance_; + } - Eigen::MatrixXd U = components_; - Eigen::MatrixXd C_inv = principal_components_variance_.cwiseInverse().asDiagonal(); - Eigen::MatrixXd V = components_.transpose(); - // Woodbury matrix identity - // (A + U C V)^(-1) - // = A^(-1) - // - A^(-1) * U * (C^(-1) + V * A^(-1) * U)^(-1) * V * A^(-1) + SW_LOG("PPCA fit: n={}, d={}, rank={}, q={}, noise_var={:.6f}", n, d, rank, q, noise_variance_); - Eigen::MatrixXd precision = A_inv - A_inv * U * (C_inv + V * A_inv * U).inverse() * V * A_inv; - // save_matrix(precision, std::string(DEBUG_FILES_PTH)+"/precision.npy"); + return true; - return precision; } catch (std::exception& e) { - SW_ERROR( - "Failed to compute precision matrix in the early stopping score function: {}", - e.what()); - return Eigen::MatrixXd(); + SW_ERROR("SVD computation failed for MorphologicalDeviationScore: {}", e.what()); + return false; } } //--------------------------------------------------------------------------- -Eigen::VectorXd MorphologicalDeviationScore::GetMorphoDevScore( - const Eigen::MatrixXd& X) { +Eigen::VectorXd MorphologicalDeviationScore::GetMorphoDevScore(const Eigen::MatrixXd& X) { try { - if (!is_fitted_) { - throw std::runtime_error( - "PPCA model is not fitted on control shapes."); + throw std::runtime_error("PPCA model is not fitted on control shapes."); } - const int n = X.rows(); - // Eigen::MatrixXd X_c = X.rowwise() - mean_; - // Eigen::VectorXd dist(n); - // for (int i = 0; i < n; ++i) { - // Eigen::RowVectorXd xi = X_c.row(i); - // dist(i) = std::sqrt( - // std::max(0.0, (xi * precision_matrix_ * xi.transpose())(0))); - // } - - Eigen::MatrixXd X_bar = X.rowwise() - mean_; - Eigen::MatrixXd Y = X_bar * precision_matrix_; // (n_samples × n_features) - Eigen::VectorXd sq_mahal = (X_bar.array() * Y.array()).rowwise().sum(); // quadratic forms - Eigen::VectorXd mahalanobis = sq_mahal.array().sqrt(); // final distances + + // Project into the rank-dimensional subspace, eliminating + // structurally zero dimensions that cannot be estimated from data + Eigen::MatrixXd X_bar = X.rowwise() - mean_; // (n x d) + Eigen::MatrixXd Z = X_bar * all_components_; // (n x rank) + + // Apply per-dimension precision weights: 1/lambda_i for retained PCs, + // 1/noise_variance for excluded-but-real PCs + Eigen::MatrixXd Z_weighted = Z.array().rowwise() * precision_weights_.transpose().array(); // (n x rank) + + Eigen::VectorXd sq_mahal = (Z.array() * Z_weighted.array()).rowwise().sum(); // (n,) + Eigen::VectorXd mahalanobis = sq_mahal.array().max(0.0).sqrt(); return mahalanobis; } catch (std::exception& e) { @@ -140,4 +110,4 @@ Eigen::VectorXd MorphologicalDeviationScore::GetMorphoDevScore( } } -} // namespace shapeworks \ No newline at end of file +} // namespace shapeworks diff --git a/Libs/Optimize/Function/EarlyStop/MorphologicalDeviationScore.h b/Libs/Optimize/Function/EarlyStop/MorphologicalDeviationScore.h index 0759237d99..dd0dd06fd8 100644 --- a/Libs/Optimize/Function/EarlyStop/MorphologicalDeviationScore.h +++ b/Libs/Optimize/Function/EarlyStop/MorphologicalDeviationScore.h @@ -1,7 +1,6 @@ #pragma once #include -// #include "cnpy.h" namespace shapeworks { class MorphologicalDeviationScore { @@ -14,32 +13,17 @@ class MorphologicalDeviationScore { Eigen::VectorXd GetMorphoDevScore(const Eigen::MatrixXd& X); // (n,) private: - /// Flag to ensure control shapes are set and PCA model is in place bool is_fitted_ = false; // Fitted model parameters - Eigen::RowVectorXd mean_; // (1 x d) - Eigen::MatrixXd components_; // (d x q) - Eigen::VectorXd principal_components_variance_; // (q,) - int n_components_ = 0; + Eigen::RowVectorXd mean_; // (1 x d) double noise_variance_ = 0.0; double retained_variance_ratio_ = 0.95; - // Derived matrices - Eigen::MatrixXd precision_matrix_; // (d x d) - // Helper functions + + // Full-rank basis and per-dimension precision weights for scoring + Eigen::MatrixXd all_components_; // (d x rank) — all non-zero eigenvectors + Eigen::VectorXd precision_weights_; // (rank,) — 1/lambda_i or 1/noise_variance + bool FitPPCA(const Eigen::MatrixXd& X); - Eigen::MatrixXd ComputeCovarianceMatrix(); - Eigen::MatrixXd ComputePrecisionMatrix(); -// inline static void save_vector(const Eigen::VectorXd& v, -// const std::string& fname) { -// cnpy::npy_save(fname, v.data(), {(size_t)v.size()}, "w"); -// }; -// inline static void save_matrix(const Eigen::MatrixXd m, -// const std::string& fname) { -// Eigen::Matrix -// X_rm = m; -// cnpy::npy_save(fname, X_rm.data(), {(size_t)m.rows(), (size_t)m.cols()}, -// "w"); -// }; }; -} // namespace shapeworks \ No newline at end of file +} // namespace shapeworks From 90142b692be080ad4154b891d88a2c44f9b208da Mon Sep 17 00:00:00 2001 From: Alan Morris Date: Wed, 18 Feb 2026 14:44:17 -0700 Subject: [PATCH 2/2] Add back comment --- Libs/Optimize/Function/EarlyStop/MorphologicalDeviationScore.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Libs/Optimize/Function/EarlyStop/MorphologicalDeviationScore.h b/Libs/Optimize/Function/EarlyStop/MorphologicalDeviationScore.h index dd0dd06fd8..a5316c07fe 100644 --- a/Libs/Optimize/Function/EarlyStop/MorphologicalDeviationScore.h +++ b/Libs/Optimize/Function/EarlyStop/MorphologicalDeviationScore.h @@ -13,6 +13,7 @@ class MorphologicalDeviationScore { Eigen::VectorXd GetMorphoDevScore(const Eigen::MatrixXd& X); // (n,) private: + /// Flag to ensure control shapes are set and PCA model is in place bool is_fitted_ = false; // Fitted model parameters