From a19ed167b208b04740b603f52d499c937474c174 Mon Sep 17 00:00:00 2001
From: MrBones1102 <jamespopoli@gmail.com>
Date: Tue, 14 Apr 2026 11:10:04 -0700
Subject: [PATCH 01/10] update dependencies

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6572865..7053fd9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ dependencies = [
     "networkx>=3.3",
     "gseapy~=1.1.6",
     "statsmodels>=0.14.5",
-    "scipy<=1.15.3",
+    "scipy<=1.16",
 ]
 
 

From a886f8facae28d6b2609bb80a1d49929e818f431 Mon Sep 17 00:00:00 2001
From: MrBones1102 <jamespopoli@gmail.com>
Date: Tue, 14 Apr 2026 11:10:22 -0700
Subject: [PATCH 02/10] drop-in pcr replacement

---
 pf2/predict.py | 185 ++++++++++++++++++++++++++-----------------------
 1 file changed, 100 insertions(+), 85 deletions(-)

diff --git a/pf2/predict.py b/pf2/predict.py
index 6b8ca48..0800ec9 100644
--- a/pf2/predict.py
+++ b/pf2/predict.py
@@ -1,177 +1,180 @@
 import pandas as pd
 import numpy as np
-from sklearn.cross_decomposition import PLSRegression
+from sklearn.decomposition import PCA
+from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import accuracy_score, roc_auc_score
 from sklearn.model_selection import StratifiedGroupKFold, StratifiedKFold
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
 import anndata
+
 SKF = StratifiedKFold(n_splits=10)
 SGKF = StratifiedGroupKFold(n_splits=10)
 
 
-def run_plsr(
+def run_pca_lr(
     data: pd.DataFrame,
     labels: pd.DataFrame,
     proba: bool = False,
     n_components: int = 1
-) -> tuple[pd.Series, PLSRegression]:
+) -> tuple[pd.Series, Pipeline]:
     """
-    Predicts labels via PLSR cross-validation.
+    Predicts labels via PCA + logistic regression cross-validation.
 
     Args:
         data (pd.DataFrame): data to predict
-        labels (pd.Series): classification labels
-        proba (bool, default:False): return probability of prediction
-        n_components (int, default:1): number of PLSR components to use
+        labels (pd.DataFrame): classification labels with binary_outcome and patient_id
+        proba (bool, default:False): return probability of positive class
+        n_components (int, default:1): number of PCA components to use
 
     Returns:
-        predicted (pd.Series): predicted mortality for patients; if proba is
-            True, returns probabilities of mortality
-        plsr (PLSRegression): fitted PLSR model
+        predicted (pd.Series): predicted mortality (or probabilities if proba=True)
+        pipeline (Pipeline): fitted PCA + LR pipeline
     """
     if not isinstance(data, pd.DataFrame):
         data = pd.DataFrame(data)
 
-    plsr = PLSRegression(n_components=n_components, scale=True, max_iter=int(1e5))
+    pipeline = Pipeline([
+        ("scaler", StandardScaler()),
+        ("pca", PCA(n_components=n_components)),
+        ("lr", LogisticRegression(max_iter=int(1e5))),
+    ])
 
-    probabilities = pd.Series(0, dtype=float, index=data.index)
+    probabilities = pd.Series(0.0, dtype=float, index=data.index)
     for train_index, test_index in SGKF.split(
         data,
         labels.loc[:, "binary_outcome"],
-        labels.loc[:, "patient_id"]
+        labels.loc[:, "patient_id"],
     ):
-        train_group_data = data.iloc[train_index, :]
+        train_data = data.iloc[train_index]
         train_labels = labels.iloc[train_index].loc[:, "binary_outcome"]
-        test_group_data = data.iloc[test_index]
-        plsr.fit(train_group_data, train_labels)
-        probabilities.iloc[test_index] = plsr.predict(test_group_data)
+        test_data = data.iloc[test_index]
+        pipeline.fit(train_data, train_labels)
+        probabilities.iloc[test_index] = pipeline.predict_proba(test_data)[:, 1]
 
-    plsr.fit(data, labels.loc[:, "binary_outcome"])
-    plsr.coef_ = pd.Series(plsr.coef_.squeeze(), index=data.columns)
+    pipeline.fit(data, labels.loc[:, "binary_outcome"])
 
     if proba:
-        return probabilities, plsr
-
+        return probabilities, pipeline
     else:
-        predicted = probabilities.round().astype(int)
-        return predicted, plsr
+        predicted = (probabilities > 0.5).astype(int)
+        return predicted, pipeline
+
 
-    
 def predict_mortality_all(
-    X: anndata.AnnData, data: pd.DataFrame, proba: bool = False, n_components=1, bulk=False
+    X: anndata.AnnData,
+    data: pd.DataFrame,
+    proba: bool = False,
+    n_components=1,
+    bulk=False,
 ):
     """
     Predicts mortality via cross-validation without breaking up by status.
 
     Parameters:
-        data (pd.DataFrame): data to predict
-        meta (pd.DataFrame): patient meta-data
+        X (anndata.AnnData): factorization results
+        data (pd.DataFrame): patient meta-data
         proba (bool, default:False): return probability of prediction
+        n_components (int, default:1): number of PCA components
+        bulk (bool, default:False): use bulk features instead of Pf2 components
 
     Returns:
         if proba:
-            probabilities (pd.Series): predicted probability of mortality for
-                patients
+            probabilities (pd.Series): predicted probability of mortality
             labels (pd.Series): classification targets
         else:
             accuracy (float): prediction accuracy
             labels (pd.Series): classification targets
-            model: fitted PLSR models
+            model: fitted pipeline
     """
     if not isinstance(data, pd.DataFrame):
         data = pd.DataFrame(data)
 
     cond_fact_meta_df = data[data["patient_category"] != "Non-Pneumonia Control"]
-    
+
     labels = cond_fact_meta_df.loc[:, ["binary_outcome", "patient_id"]]
     predictions = pd.Series(index=cond_fact_meta_df.index)
-    
+
+    n_cmps = X.uns["Pf2_A"].shape[1]
     if bulk is False:
-        predictions[:], all_plsr = run_plsr(
-            cond_fact_meta_df[[f"Cmp. {i}" for i in np.arange(1, X.uns["Pf2_A"].shape[1] + 1)]], 
+        predictions[:], model = run_pca_lr(
+            cond_fact_meta_df[[f"Cmp. {i}" for i in np.arange(1, n_cmps + 1)]],
             labels, proba=proba, n_components=n_components
         )
     else:
-        predictions[:], all_plsr = run_plsr(
+        predictions[:], model = run_pca_lr(
             cond_fact_meta_df.iloc[:, :-3],
             labels, proba=proba, n_components=n_components
         )
-        
+
     if proba:
         return predictions, labels.loc[:, "binary_outcome"]
-
     else:
         predicted = predictions.round().astype(int)
         return (
             accuracy_score(labels.loc[:, "binary_outcome"], predicted),
             labels.loc[:, "binary_outcome"],
-            all_plsr
+            model
         )
-    
+
 
 def predict_mortality(
     X: anndata.AnnData,
     data: pd.DataFrame,
     n_components: int = 1,
     proba: bool = False
-) -> tuple[pd.Series, pd.Series, tuple[PLSRegression, PLSRegression]]:
+) -> tuple[pd.Series, pd.Series, tuple[Pipeline, Pipeline]]:
     """
-    Predicts mortality via cross-validation.
+    Predicts mortality via cross-validation, separately for C19 and nC19.
 
     Parameters:
         X (anndata.AnnData): factorization results
         data (pd.DataFrame): patient meta-data
-        n_components (int, default:1): number of PLS components to use
+        n_components (int, default:1): number of PCA components
         proba (bool, default:False): return probability of prediction
 
     Returns:
-        predictions (pd.Series): if proba, probabilities of mortality for each
-            sample; else, predicted mortality outcome
+        predictions (pd.Series): if proba, probabilities; else predicted outcome
         labels (pd.Series): classification targets
-        models (tuple[COVID, Non-COVID]): fitted PLSR models
+        models (tuple[C19, nC19]): fitted pipelines
     """
     if not isinstance(data, pd.DataFrame):
         data = pd.DataFrame(data)
 
     cond_fact_meta_df = data.loc[
-        data.loc[:, "patient_category"] != "Non-Pneumonia Control",
-        :
+        data.loc[:, "patient_category"] != "Non-Pneumonia Control", :
     ]
-    
+
     labels = cond_fact_meta_df.loc[
-         :,
-         ["binary_outcome", "patient_id"]
-     ].astype(int)
-
-    covid_data = cond_fact_meta_df.loc[cond_fact_meta_df.loc[:, "patient_category"] == "COVID-19", :]
-    covid_labels = labels.loc[
-        cond_fact_meta_df.loc[:, "patient_category"] == "COVID-19",
-        :
-    ]
-    nc_data = cond_fact_meta_df.loc[cond_fact_meta_df.loc[:, "patient_category"] != "COVID-19", :]
-    nc_labels = labels.loc[
-        cond_fact_meta_df.loc[:, "patient_category"] != "COVID-19",
-        :
-    ]
+        :, ["binary_outcome", "patient_id"]
+    ].astype(int)
+
+    c19_mask = cond_fact_meta_df.loc[:, "patient_category"] == "COVID-19"
+    n_cmps = X.uns["Pf2_A"].shape[1]
+    cmps = [f"Cmp. {i}" for i in np.arange(1, n_cmps + 1)]
+
+    covid_data = cond_fact_meta_df.loc[c19_mask, :]
+    covid_labels = labels.loc[c19_mask, :]
+    nc_data = cond_fact_meta_df.loc[~c19_mask, :]
+    nc_labels = labels.loc[~c19_mask, :]
 
     predictions = pd.Series(index=cond_fact_meta_df.index)
-    predictions.loc[cond_fact_meta_df.loc[:, "patient_category"] == "COVID-19"], c_plsr = run_plsr(
-        covid_data[[f"Cmp. {i}" for i in np.arange(1, X.uns["Pf2_A"].shape[1] + 1)]],
-        covid_labels, proba=proba, n_components=n_components
+    predictions.loc[c19_mask], c_model = run_pca_lr(
+        covid_data[cmps], covid_labels, proba=proba, n_components=n_components
     )
-    predictions.loc[cond_fact_meta_df.loc[:, "patient_category"] != "COVID-19"], nc_plsr = run_plsr(
-        nc_data[[f"Cmp. {i}" for i in np.arange(1, X.uns["Pf2_A"].shape[1] + 1)]],
-        nc_labels, proba=proba, n_components=n_components
+    predictions.loc[~c19_mask], nc_model = run_pca_lr(
+        nc_data[cmps], nc_labels, proba=proba, n_components=n_components
     )
 
     return (
         predictions,
         labels.loc[:, "binary_outcome"].squeeze(),
-        (c_plsr, nc_plsr)
+        (c_model, nc_model)
     )
 
 
 def plsr_acc_proba(X, patient_factor_matrix, n_components=1, roc_auc=True):
-    """Runs PLSR and obtains average prediction accuracy"""
+    """Runs PCA+LR and obtains average prediction accuracy"""
 
     acc_df = pd.DataFrame(columns=["Overall", "C19", "nC19"])
 
@@ -179,20 +182,32 @@ def plsr_acc_proba(X, patient_factor_matrix, n_components=1, roc_auc=True):
         X, patient_factor_matrix, n_components=n_components, proba=True
     )
 
+    c19_mask = patient_factor_matrix.loc[:, "patient_category"] == "COVID-19"
+    nc19_mask = patient_factor_matrix.loc[:, "patient_category"] != "COVID-19"
+
     if roc_auc:
-        score = roc_auc_score
+        covid_acc = roc_auc_score(
+            labels_all.loc[c19_mask].to_numpy().astype(int),
+            probabilities_all.loc[c19_mask],
+        )
+        nc_acc = roc_auc_score(
+            labels_all.loc[nc19_mask].to_numpy().astype(int),
+            probabilities_all.loc[nc19_mask],
+        )
+        acc = roc_auc_score(labels_all.to_numpy().astype(int), probabilities_all)
     else:
-        score = accuracy_score
-        
-    covid_acc = score(
-        labels_all.loc[patient_factor_matrix.loc[:, "patient_category"] == "COVID-19"].to_numpy().astype(int),
-        probabilities_all.loc[patient_factor_matrix.loc[:, "patient_category"] == "COVID-19"].round().astype(int),
-    )
-    nc_acc = score(
-        labels_all.loc[patient_factor_matrix.loc[:, "patient_category"] != "COVID-19"].to_numpy().astype(int),
-        probabilities_all.loc[patient_factor_matrix.loc[:, "patient_category"] != "COVID-19"].round().astype(int),
-    )
-    acc = score(labels_all.to_numpy().astype(int), probabilities_all.round().astype(int))
+        covid_acc = accuracy_score(
+            labels_all.loc[c19_mask].to_numpy().astype(int),
+            probabilities_all.loc[c19_mask].round().astype(int),
+        )
+        nc_acc = accuracy_score(
+            labels_all.loc[nc19_mask].to_numpy().astype(int),
+            probabilities_all.loc[nc19_mask].round().astype(int),
+        )
+        acc = accuracy_score(
+            labels_all.to_numpy().astype(int),
+            probabilities_all.round().astype(int),
+        )
 
     acc_df.loc[0, :] = [acc, covid_acc, nc_acc]
 
@@ -200,10 +215,10 @@ def plsr_acc_proba(X, patient_factor_matrix, n_components=1, roc_auc=True):
 
 
 def plsr_acc(X, patient_factor_matrix, n_components=1):
-    """Runs PLSR and obtains average prediction accuracy for C19 and nC19"""
+    """Runs PCA+LR and returns labels and fitted models for C19 and nC19"""
 
-    _, labels, [c19_plsr, nc19_plsr] = predict_mortality(X,
+    _, labels, [c19_model, nc19_model] = predict_mortality(X,
         patient_factor_matrix, n_components=n_components,
     )
 
-    return labels, [c19_plsr, nc19_plsr]
+    return labels, [c19_model, nc19_model]

From 58e452e5cbf9049ba7fcf40f42c8a2614da64d92 Mon Sep 17 00:00:00 2001
From: MrBones1102 <jamespopoli@gmail.com>
Date: Tue, 14 Apr 2026 14:58:27 -0700
Subject: [PATCH 03/10] rename funcs

---
 pf2/predict.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pf2/predict.py b/pf2/predict.py
index 0800ec9..f29d461 100644
--- a/pf2/predict.py
+++ b/pf2/predict.py
@@ -173,7 +173,7 @@ def predict_mortality(
     )
 
 
-def plsr_acc_proba(X, patient_factor_matrix, n_components=1, roc_auc=True):
+def pca_acc_proba(X, patient_factor_matrix, n_components=1, roc_auc=True):
     """Runs PCA+LR and obtains average prediction accuracy"""
 
     acc_df = pd.DataFrame(columns=["Overall", "C19", "nC19"])
@@ -214,7 +214,7 @@ def plsr_acc_proba(X, patient_factor_matrix, n_components=1, roc_auc=True):
     return acc_df
 
 
-def plsr_acc(X, patient_factor_matrix, n_components=1):
+def pca_acc(X, patient_factor_matrix, n_components=1):
     """Runs PCA+LR and returns labels and fitted models for C19 and nC19"""
 
     _, labels, [c19_model, nc19_model] = predict_mortality(X,

From c846f0c09b2b449c67569e59cf51851f8396b7fa Mon Sep 17 00:00:00 2001
From: MrBones1102 <jamespopoli@gmail.com>
Date: Tue, 14 Apr 2026 14:59:04 -0700
Subject: [PATCH 04/10] refactor FigureA3b_c

---
 pf2/figures/figureA3b_c.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/pf2/figures/figureA3b_c.py b/pf2/figures/figureA3b_c.py
index 48517e7..49ec15d 100644
--- a/pf2/figures/figureA3b_c.py
+++ b/pf2/figures/figureA3b_c.py
@@ -6,7 +6,7 @@
 import anndata
 import seaborn as sns
 from ..data_import import condition_factors_meta, add_obs, combine_cell_types
-from ..predict import predict_mortality_all, plsr_acc_proba
+from ..predict import predict_mortality_all, pca_acc_proba
 from .common import subplotLabel, getSetup
 from sklearn.metrics import RocCurveDisplay
 
@@ -25,19 +25,19 @@ def makeFigure():
 
     roc_auc = [False, True]
     for i in range(2):
-        plsr_acc_df = pd.DataFrame([])
-        for j in range(3):
-            df = plsr_acc_proba(
+        pca_acc_df = pd.DataFrame([])
+        for j in range(5):
+            df = pca_acc_proba(
                 X, cond_fact_meta_df, n_components=j + 1, roc_auc=roc_auc[i]
             )
             df["Component"] = j + 1
-            plsr_acc_df = pd.concat([plsr_acc_df, df], axis=0)
+            pca_acc_df = pd.concat([pca_acc_df, df], axis=0)
 
-        plsr_acc_df = plsr_acc_df.melt(
+        pca_acc_df = pca_acc_df.melt(
             id_vars="Component", var_name="Category", value_name="Accuracy"
         )
         sns.barplot(
-            data=plsr_acc_df, x="Component", y="Accuracy", hue="Category", ax=ax[i],
+            data=pca_acc_df, x="Component", y="Accuracy", hue="Category", ax=ax[i],
             hue_order=["C19", "nC19", "Overall"]
         )
         if roc_auc[i] is True:
@@ -45,16 +45,22 @@ def makeFigure():
         else:
             ax[i].set(ylim=[0, 1], ylabel="Prediction Accuracy")
 
+    # Find the top performing PCA models based on best total accuracy within a component
+    top_acc_idx = pca_acc_df.groupby("Component")["Accuracy"].idxmax().tolist()
+    pca_acc_df_top = pca_acc_df.loc[top_acc_idx].sort_values("Accuracy", ascending=False)
+    top_performing = pca_acc_df_top["Component"].tolist()
+
+    # Get the top 2 performing PCA models and plot the ROC curve for each
     for i in range(2):
-        plot_plsr_auc_roc(X, cond_fact_meta_df, n_components=i + 1, ax=ax[i + 2])
-        ax[i + 2].set(title=f"PLSR {i + 1} Components")
+        plot_pca_auc_roc(X, cond_fact_meta_df, n_components=top_performing[i], ax=ax[i + 2])
+        ax[i + 2].set(title=f"PCA {top_performing[i]} Components")
 
     return f
 
 
 
-def plot_plsr_auc_roc(X, patient_factor_matrix, n_components, ax):
-    """Runs PLSR and plots ROC AUC based on actual and prediction labels"""
+def plot_pca_auc_roc(X, patient_factor_matrix, n_components, ax):
+    """Runs PCA and plots ROC AUC based on actual and prediction labels"""
     probabilities_all, labels_all = predict_mortality_all(X, 
         patient_factor_matrix, n_components=n_components, proba=True)
 

From ef126e955fdde4a7aebedc39fe79091894eb8bde Mon Sep 17 00:00:00 2001
From: MrBones1102 <jamespopoli@gmail.com>
Date: Tue, 14 Apr 2026 15:37:59 -0700
Subject: [PATCH 05/10] refactor Figure3Ad_g, add configurable PC

---
 pf2/figures/figureA3d_g.py | 91 +++++++++++++++++---------------------
 1 file changed, 40 insertions(+), 51 deletions(-)

diff --git a/pf2/figures/figureA3d_g.py b/pf2/figures/figureA3d_g.py
index 53edc65..7d49360 100644
--- a/pf2/figures/figureA3d_g.py
+++ b/pf2/figures/figureA3d_g.py
@@ -2,14 +2,14 @@
 Figure A3d_g
 """
 
+import anndata
 import numpy as np
 import pandas as pd
-import anndata
 import seaborn as sns
 from ..data_import import condition_factors_meta
-from ..predict import plsr_acc
+from ..predict import pca_acc
 from .common import subplotLabel, getSetup
-import seaborn as sns
+
 
 def makeFigure():
     """Get a list of the axis objects and create a figure."""
@@ -19,54 +19,47 @@ def makeFigure():
     X = anndata.read_h5ad("/opt/northwest_bal/full_fitted.h5ad")
 
     cond_fact_meta_df = condition_factors_meta(X)
-    
-    labels, plsr_results_both = plsr_acc(X, cond_fact_meta_df, n_components=1)
 
-    plot_plsr_loadings(plsr_results_both, ax[0], ax[1])
+    labels, pca_results_both = pca_acc(X, cond_fact_meta_df, n_components=3)
+
+    plot_pca_loadings(pca_results_both, ax[0], ax[1])
     ax[0].set(xlim=[-0.35, 0.35])
     ax[1].set(xlim=[-0.35, 0.35])
 
-    plot_plsr_scores(plsr_results_both, cond_fact_meta_df, labels, ax[2], ax[3])
+    plot_pca_scores(pca_results_both, cond_fact_meta_df, labels, ax[2], ax[3])
     ax[2].set(xlim=[-7, 7])
     ax[3].set(xlim=[-9.5, 9.5])
 
     return f
 
 
-def plot_plsr_loadings(plsr_results, ax1, ax2):
-    """Runs PLSR and plots ROC AUC based on actual and prediction labels"""
+def plot_pca_loadings(pca_results, ax1, ax2):
+    """Plots PCA component loadings for C19 and nC19 models."""
+    TARGET_PC = 3
+    TARGET_STR = f"PC {TARGET_PC}"
     ax = [ax1, ax2]
     type_of_data = ["C19", "nC19"]
 
     for i in range(2):
-        x_load = plsr_results[i].x_loadings_[:, 0]
+        pca = pca_results[i].named_steps["pca"]
+
+        x_load = pca.components_[TARGET_PC - 1, :]
         if i == 1:
-            x_load =-1*x_load
-        df_xload = pd.DataFrame(data=x_load, columns=["PLSR 1"])
+            x_load = -1 * x_load
+        df_xload = pd.DataFrame(data=x_load, columns=[TARGET_STR])
         df_xload["Component"] = np.arange(df_xload.shape[0]) + 1
-        print(df_xload.sort_values(by="PLSR 1"))
-        y_load = plsr_results[i].y_loadings_[0, 0]
-        if i == 1:
-            y_load =-1*y_load
-        df_yload = pd.DataFrame(data=[[y_load]], columns=["PLSR 1"])
-        sns.swarmplot(
-            data=df_xload,
-            x="PLSR 1",
-            ax=ax[i],
-            color="k",
-        )
-        sns.swarmplot(
-            data=df_yload,
-            x="PLSR 1",
-            ax=ax[i],
-            color="r",
-            
+        print(df_xload.sort_values(by=TARGET_STR))
+
+        sns.swarmplot(data=df_xload, x=TARGET_STR, ax=ax[i], color="k")
+        ax[i].set(
+            xlabel=TARGET_STR, ylabel="Pf2 Components", title=f"{type_of_data[i]}-loadings"
         )
-        ax[i].set(xlabel="PLSR 1", ylabel="Pf2 Components", title=f"{type_of_data[i]}-loadings")
 
 
-def plot_plsr_scores(plsr_results, cond_fact_meta_df, labels, ax1, ax2):
-    """Runs PLSR and plots ROC AUC based on actual and prediction labels"""
+def plot_pca_scores(pca_results, cond_fact_meta_df, labels, ax1, ax2):
+    """Plots PCA scores for C19 and nC19 patients colored by mortality outcome."""
+    TARGET_PC = 3
+    TARGET_STR = f"PC {TARGET_PC}"
     ax = [ax1, ax2]
     type_of_data = ["C19", "nC19"]
 
@@ -74,32 +67,28 @@ def plot_plsr_scores(plsr_results, cond_fact_meta_df, labels, ax1, ax2):
         cond_fact_meta_df.loc[:, "patient_category"] != "Non-Pneumonia Control", :
     ]
 
+    cmp_cols = [c for c in cond_fact_meta_df.columns if c.startswith("Cmp.")]
+    c19_mask = cond_fact_meta_df.loc[:, "patient_category"] == "COVID-19"
+
     for i in range(2):
-        if i == 0:
-            score_labels = labels.loc[
-                cond_fact_meta_df.loc[:, "patient_category"] == "COVID-19"
-            ]
-        else:
-            score_labels = labels.loc[
-                cond_fact_meta_df.loc[:, "patient_category"] != "COVID-19"
-            ]
+        mask = c19_mask if i == 0 else ~c19_mask
+        score_labels = labels.loc[mask]
+        subset_data = cond_fact_meta_df.loc[mask, cmp_cols]
 
-        pal = sns.color_palette()
-        if i == 0: 
-            numb1=0; numb2=2
-        else:
-            numb1=1; numb2=3
-        
-        x_scores = plsr_results[i].x_scores_[:, 0]
+        x_scores = pca_results[i][:-1].transform(subset_data)[:, TARGET_PC - 1]
         if i == 1:
-            x_scores =-1*x_scores
-        df_xscores = pd.DataFrame(data=x_scores, columns=["PLSR 1"])
+            x_scores = -1 * x_scores
+
+        pal = sns.color_palette()
+        numb1, numb2 = (0, 2) if i == 0 else (1, 3)
+
+        df_xscores = pd.DataFrame(data=x_scores, columns=[TARGET_STR])
         sns.swarmplot(
             data=df_xscores,
-            x="PLSR 1",
+            x=TARGET_STR,
             ax=ax[i],
             hue=score_labels.to_numpy(),
             palette=[pal[numb1], pal[numb2]],
             hue_order=[1, 0],
         )
-        ax[i].set(xlabel="PLSR 1", ylabel="Samples", title=f"{type_of_data[i]}-scores")
\ No newline at end of file
+        ax[i].set(xlabel=TARGET_STR, ylabel="Samples", title=f"{type_of_data[i]}-scores")

From 9175ae6b0d7f42526b634f24bf8421e667fd1426 Mon Sep 17 00:00:00 2001
From: MrBones1102 <jamespopoli@gmail.com>
Date: Wed, 15 Apr 2026 07:33:39 -0700
Subject: [PATCH 06/10] predict mortality defaults to 3 pca

---
 pf2/predict.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pf2/predict.py b/pf2/predict.py
index f29d461..9dd3a0b 100644
--- a/pf2/predict.py
+++ b/pf2/predict.py
@@ -121,7 +121,7 @@ def predict_mortality_all(
 def predict_mortality(
     X: anndata.AnnData,
     data: pd.DataFrame,
-    n_components: int = 1,
+    n_components: int = 3,
     proba: bool = False
 ) -> tuple[pd.Series, pd.Series, tuple[Pipeline, Pipeline]]:
     """

From b039b3c440a4674976bc98a6a17e6218c18d5df4 Mon Sep 17 00:00:00 2001
From: MrBones1102 <jamespopoli@gmail.com>
Date: Thu, 30 Apr 2026 14:39:36 -0700
Subject: [PATCH 07/10] 2d plot of scores

---
 pf2/figures/figureA3d_g.py | 48 +++++++++++++++++++++++++++++++++++---
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/pf2/figures/figureA3d_g.py b/pf2/figures/figureA3d_g.py
index 7d49360..26d6cd6 100644
--- a/pf2/figures/figureA3d_g.py
+++ b/pf2/figures/figureA3d_g.py
@@ -13,7 +13,7 @@
 
 def makeFigure():
     """Get a list of the axis objects and create a figure."""
-    ax, f = getSetup((3, 7), (4, 1))
+    ax, f = getSetup((6, 8), (3, 2))
     subplotLabel(ax)
 
     X = anndata.read_h5ad("/opt/northwest_bal/full_fitted.h5ad")
@@ -27,8 +27,8 @@ def makeFigure():
     ax[1].set(xlim=[-0.35, 0.35])
 
     plot_pca_scores(pca_results_both, cond_fact_meta_df, labels, ax[2], ax[3])
-    ax[2].set(xlim=[-7, 7])
-    ax[3].set(xlim=[-9.5, 9.5])
+
+    plot_pca_scores_2d(pca_results_both, cond_fact_meta_df, labels, ax[4], ax[5])
 
     return f
 
@@ -92,3 +92,45 @@ def plot_pca_scores(pca_results, cond_fact_meta_df, labels, ax1, ax2):
             hue_order=[1, 0],
         )
         ax[i].set(xlabel=TARGET_STR, ylabel="Samples", title=f"{type_of_data[i]}-scores")
+
+def plot_pca_scores_2d(pca_results, cond_fact_meta_df, labels, ax1, ax2):
+    """Plots PCA scores of PC1 vs PC2 and PC1 vs PC3 for C19 and nC19 patients colored by mortality outcome."""
+    ax = [ax1, ax2]
+    pc_pairs = [(0, 1), (0, 2)]
+
+    cond_fact_meta_df = cond_fact_meta_df.loc[
+        cond_fact_meta_df.loc[:, "patient_category"] != "Non-Pneumonia Control", :
+    ]
+
+    cmp_cols = [c for c in cond_fact_meta_df.columns if c.startswith("Cmp.")]
+    c19_mask = cond_fact_meta_df.loc[:, "patient_category"] == "COVID-19"
+
+    for i, (pc_x, pc_y) in enumerate(pc_pairs):
+        score_labels = labels.loc[c19_mask]
+        subset_data = cond_fact_meta_df.loc[c19_mask, cmp_cols]
+
+        scores = pca_results[0][:-1].transform(subset_data)[:, :3]
+
+        pal = sns.color_palette()
+        df_scores = pd.DataFrame(
+            {
+                f"PC {pc_x + 1}": scores[:, pc_x],
+                f"PC {pc_y + 1}": scores[:, pc_y],
+                "Outcome": score_labels.to_numpy(),
+            }
+        )
+        sns.scatterplot(
+            data=df_scores,
+            x=f"PC {pc_x + 1}",
+            y=f"PC {pc_y + 1}",
+            hue="Outcome",
+            palette=[pal[0], pal[2]],
+            hue_order=[1, 0],
+            ax=ax[i],
+        )
+        ax[i].set(
+            xlabel=f"PC {pc_x + 1}",
+            ylabel=f"PC {pc_y + 1}",
+            title="C19-scores 2D",
+        )
+

From dda5433ec66759eb968a83d2ca96532c0a5d2fca Mon Sep 17 00:00:00 2001
From: MrBones1102 <jamespopoli@gmail.com>
Date: Thu, 30 Apr 2026 14:39:59 -0700
Subject: [PATCH 08/10] add anova test of 5-component PCA model

---
 pf2/figures/figureA3b_c.py | 64 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 62 insertions(+), 2 deletions(-)

diff --git a/pf2/figures/figureA3b_c.py b/pf2/figures/figureA3b_c.py
index 49ec15d..fb1bf85 100644
--- a/pf2/figures/figureA3b_c.py
+++ b/pf2/figures/figureA3b_c.py
@@ -2,9 +2,14 @@
 Figure A3b_c
 """
 
+import numpy as np
 import pandas as pd
 import anndata
 import seaborn as sns
+from statsmodels.stats.anova import anova_lm
+from statsmodels.formula.api import ols
+from sklearn.decomposition import PCA
+from sklearn.preprocessing import StandardScaler
 from ..data_import import condition_factors_meta, add_obs, combine_cell_types
 from ..predict import predict_mortality_all, pca_acc_proba
 from .common import subplotLabel, getSetup
@@ -13,7 +18,7 @@
 
 def makeFigure():
     """Get a list of the axis objects and create a figure."""
-    ax, f = getSetup((6, 6), (2, 2))
+    ax, f = getSetup((9, 9), (3, 2))
     subplotLabel(ax)
 
     X = anndata.read_h5ad("/opt/northwest_bal/full_fitted.h5ad")
@@ -55,6 +60,10 @@ def makeFigure():
         plot_pca_auc_roc(X, cond_fact_meta_df, n_components=top_performing[i], ax=ax[i + 2])
         ax[i + 2].set(title=f"PCA {top_performing[i]} Components")
 
+    anova_df = pc_anova_analysis(cond_fact_meta_df)
+    plot_pc_anova_heatmap(anova_df, ax=ax[4])
+    ax[5].set_visible(False)
+
     return f
 
 
@@ -78,4 +87,55 @@ def plot_pca_auc_roc(X, patient_factor_matrix, n_components, ax):
     )
     RocCurveDisplay.from_predictions(
         labels_all.to_numpy().astype(int), probabilities_all, plot_chance_level=True, ax=ax, name="Overall"
-    )
\ No newline at end of file
+    )
+
+
+def pc_anova_analysis(cond_fact_meta_df, n_components=5):
+    """ANOVA for each PC against C19 outcome, nC19 outcome, and C19 vs nC19."""
+    feature_cols = [c for c in cond_fact_meta_df.columns if c.startswith("Cmp.")]
+    df = cond_fact_meta_df[
+        cond_fact_meta_df["patient_category"] != "Non-Pneumonia Control"
+    ].copy()
+
+    pca_scores = PCA(n_components=n_components).fit_transform(
+        StandardScaler().fit_transform(df[feature_cols])
+    )
+    pc_df = pd.DataFrame(
+        pca_scores,
+        columns=[f"PC{i+1}" for i in range(n_components)],
+        index=df.index,
+    )
+    pc_df["binary_outcome"] = df["binary_outcome"]
+    pc_df["patient_category"] = df["patient_category"]
+    pc_df["is_c19"] = (pc_df["patient_category"] == "COVID-19").astype(int)
+
+    def _p(formula, data, term):
+        return anova_lm(ols(formula, data=data).fit(), typ=2).loc[term, "PR(>F)"]
+
+    results = {}
+    for i in range(n_components):
+        pc = f"PC{i+1}"
+        c19 = pc_df[pc_df["patient_category"] == "COVID-19"].dropna(
+            subset=["binary_outcome"]
+        )
+        nc19 = pc_df[pc_df["patient_category"] != "COVID-19"].dropna(
+            subset=["binary_outcome"]
+        )
+        results[pc] = {
+            "C19 Outcome": _p(f"{pc} ~ C(binary_outcome)", c19, "C(binary_outcome)"),
+            "nC19 Outcome": _p(f"{pc} ~ C(binary_outcome)", nc19, "C(binary_outcome)"),
+            "C19 vs nC19": _p(f"{pc} ~ C(is_c19)", pc_df, "C(is_c19)"),
+        }
+
+    return pd.DataFrame(results).T
+
+
+def plot_pc_anova_heatmap(anova_df, ax):
+    """Heatmap of -log10(p-values) from per-PC ANOVA associations."""
+    neg_log_p = -np.log10(anova_df.astype(float))
+    sns.heatmap(
+        neg_log_p, ax=ax, cmap="YlOrRd", annot=True, fmt=".2f",
+        linewidths=0.5, cbar_kws={"label": "-log10(p-value)"},
+    )
+    ax.axhline(y=0, color="black", linewidth=0.5)
+    ax.set(title="PC Associations (ANOVA)", xlabel="", ylabel="")
\ No newline at end of file

From ce854a1184a59af9453900b15247169d62282bee Mon Sep 17 00:00:00 2001
From: MrBones1102 <jamespopoli@gmail.com>
Date: Fri, 1 May 2026 11:23:40 -0700
Subject: [PATCH 09/10] Add SVM to 2D plots

Co-authored-by: Copilot <copilot@github.com>
---
 pf2/figures/figureA3d_g.py | 80 +++++++++++++++++++++++++++-----------
 1 file changed, 58 insertions(+), 22 deletions(-)

diff --git a/pf2/figures/figureA3d_g.py b/pf2/figures/figureA3d_g.py
index 26d6cd6..76600d8 100644
--- a/pf2/figures/figureA3d_g.py
+++ b/pf2/figures/figureA3d_g.py
@@ -6,10 +6,24 @@
 import numpy as np
 import pandas as pd
 import seaborn as sns
+from sklearn.model_selection import cross_val_score
+from sklearn.svm import SVC
 from ..data_import import condition_factors_meta
 from ..predict import pca_acc
 from .common import subplotLabel, getSetup
 
+# Make modifying the target PCs easier while exploring
+C19_TARGET = 3
+nC19_TARGET = 4
+C19_TARGET_STR = f"PC {C19_TARGET}"
+nC19_TARGET_STR = f"PC {nC19_TARGET}"
+target_pcs = {C19_TARGET_STR: C19_TARGET, nC19_TARGET_STR: nC19_TARGET}
+
+def get_nth_key(target_dict, n):
+    for i, key in enumerate(target_dict):
+        if i == n:
+            return key
+    raise IndexError("Index out of range")
 
 def makeFigure():
     """Get a list of the axis objects and create a figure."""
@@ -20,7 +34,7 @@ def makeFigure():
 
     cond_fact_meta_df = condition_factors_meta(X)
 
-    labels, pca_results_both = pca_acc(X, cond_fact_meta_df, n_components=3)
+    labels, pca_results_both = pca_acc(X, cond_fact_meta_df, n_components=5)
 
     plot_pca_loadings(pca_results_both, ax[0], ax[1])
     ax[0].set(xlim=[-0.35, 0.35])
@@ -35,31 +49,27 @@ def makeFigure():
 
 def plot_pca_loadings(pca_results, ax1, ax2):
     """Plots PCA component loadings for C19 and nC19 models."""
-    TARGET_PC = 3
-    TARGET_STR = f"PC {TARGET_PC}"
     ax = [ax1, ax2]
     type_of_data = ["C19", "nC19"]
 
     for i in range(2):
         pca = pca_results[i].named_steps["pca"]
 
-        x_load = pca.components_[TARGET_PC - 1, :]
+        x_load = pca.components_[target_pcs[get_nth_key(target_pcs, i)] - 1, :]
         if i == 1:
             x_load = -1 * x_load
-        df_xload = pd.DataFrame(data=x_load, columns=[TARGET_STR])
+        df_xload = pd.DataFrame(data=x_load, columns=[get_nth_key(target_pcs, i)])
         df_xload["Component"] = np.arange(df_xload.shape[0]) + 1
-        print(df_xload.sort_values(by=TARGET_STR))
+        print(df_xload.sort_values(by=get_nth_key(target_pcs, i)))
 
-        sns.swarmplot(data=df_xload, x=TARGET_STR, ax=ax[i], color="k")
+        sns.swarmplot(data=df_xload, x=get_nth_key(target_pcs, i), ax=ax[i], color="k")
         ax[i].set(
-            xlabel=TARGET_STR, ylabel="Pf2 Components", title=f"{type_of_data[i]}-loadings"
+            xlabel=get_nth_key(target_pcs, i), ylabel="Pf2 Components", title=f"{type_of_data[i]}-loadings"
         )
 
 
 def plot_pca_scores(pca_results, cond_fact_meta_df, labels, ax1, ax2):
     """Plots PCA scores for C19 and nC19 patients colored by mortality outcome."""
-    TARGET_PC = 3
-    TARGET_STR = f"PC {TARGET_PC}"
     ax = [ax1, ax2]
     type_of_data = ["C19", "nC19"]
 
@@ -75,28 +85,28 @@ def plot_pca_scores(pca_results, cond_fact_meta_df, labels, ax1, ax2):
         score_labels = labels.loc[mask]
         subset_data = cond_fact_meta_df.loc[mask, cmp_cols]
 
-        x_scores = pca_results[i][:-1].transform(subset_data)[:, TARGET_PC - 1]
+        x_scores = pca_results[i][:-1].transform(subset_data)[:, target_pcs[get_nth_key(target_pcs, i)] - 1]
         if i == 1:
             x_scores = -1 * x_scores
 
         pal = sns.color_palette()
         numb1, numb2 = (0, 2) if i == 0 else (1, 3)
 
-        df_xscores = pd.DataFrame(data=x_scores, columns=[TARGET_STR])
+        df_xscores = pd.DataFrame(data=x_scores, columns=[get_nth_key(target_pcs, i)])
         sns.swarmplot(
             data=df_xscores,
-            x=TARGET_STR,
+            x=get_nth_key(target_pcs, i),
             ax=ax[i],
             hue=score_labels.to_numpy(),
             palette=[pal[numb1], pal[numb2]],
             hue_order=[1, 0],
         )
-        ax[i].set(xlabel=TARGET_STR, ylabel="Samples", title=f"{type_of_data[i]}-scores")
+        ax[i].set(xlabel=get_nth_key(target_pcs, i), ylabel="Samples", title=f"{type_of_data[i]}-scores")
 
 def plot_pca_scores_2d(pca_results, cond_fact_meta_df, labels, ax1, ax2):
-    """Plots PCA scores of PC1 vs PC2 and PC1 vs PC3 for C19 and nC19 patients colored by mortality outcome."""
+    """Plots PCA scores 2D with RBF-SVM decision boundaries and cross-validated AUC."""
     ax = [ax1, ax2]
-    pc_pairs = [(0, 1), (0, 2)]
+    pc_pairs = [(1, 2), (0, 2)]
 
     cond_fact_meta_df = cond_fact_meta_df.loc[
         cond_fact_meta_df.loc[:, "patient_category"] != "Non-Pneumonia Control", :
@@ -105,13 +115,39 @@ def plot_pca_scores_2d(pca_results, cond_fact_meta_df, labels, ax1, ax2):
     cmp_cols = [c for c in cond_fact_meta_df.columns if c.startswith("Cmp.")]
     c19_mask = cond_fact_meta_df.loc[:, "patient_category"] == "COVID-19"
 
-    for i, (pc_x, pc_y) in enumerate(pc_pairs):
-        score_labels = labels.loc[c19_mask]
-        subset_data = cond_fact_meta_df.loc[c19_mask, cmp_cols]
+    score_labels = labels.loc[c19_mask]
+    subset_data = cond_fact_meta_df.loc[c19_mask, cmp_cols]
+    scores = pca_results[0][:-1].transform(subset_data)[:, :3]
+    y_svm = score_labels.to_numpy().astype(int)
 
-        scores = pca_results[0][:-1].transform(subset_data)[:, :3]
+    pal = sns.color_palette()
+
+    for i, (pc_x, pc_y) in enumerate(pc_pairs):
+        X_svm = np.column_stack([scores[:, pc_x], scores[:, pc_y]])
+
+        svm = SVC(kernel="rbf", probability=True)
+        svm.fit(X_svm, y_svm)
+        cv_auc = cross_val_score(
+            SVC(kernel="rbf", probability=True),
+            X_svm, y_svm, cv=5, scoring="roc_auc",
+        ).mean()
+
+        # Decision boundary drawn before scatter so points sit on top
+        x_pad = (X_svm[:, 0].max() - X_svm[:, 0].min()) * 0.1
+        y_pad = (X_svm[:, 1].max() - X_svm[:, 1].min()) * 0.1
+        xx, yy = np.meshgrid(
+            np.linspace(X_svm[:, 0].min() - x_pad, X_svm[:, 0].max() + x_pad, 300),
+            np.linspace(X_svm[:, 1].min() - y_pad, X_svm[:, 1].max() + y_pad, 300),
+        )
+        Z = svm.decision_function(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
+        # Z<0 → class 0 (Lived, pal[2]); Z>0 → class 1 (Died, pal[0])
+        ax[i].contourf(
+            xx, yy, Z, levels=[-1000, 0, 1000], alpha=0.15, colors=[pal[2], pal[0]]
+        )
+        ax[i].contour(
+            xx, yy, Z, levels=[0], colors="k", linewidths=1.5, linestyles="--"
+        )
 
-        pal = sns.color_palette()
         df_scores = pd.DataFrame(
             {
                 f"PC {pc_x + 1}": scores[:, pc_x],
@@ -131,6 +167,6 @@ def plot_pca_scores_2d(pca_results, cond_fact_meta_df, labels, ax1, ax2):
         ax[i].set(
             xlabel=f"PC {pc_x + 1}",
             ylabel=f"PC {pc_y + 1}",
-            title="C19-scores 2D",
+            title=f"C19-scores 2D (CV AUC={cv_auc:.2f})",
         )
 

From 36edabb942d423b41e5eeb1ee72ac122333471dc Mon Sep 17 00:00:00 2001
From: MrBones1102 <jamespopoli@gmail.com>
Date: Wed, 6 May 2026 11:29:20 -0700
Subject: [PATCH 10/10] peform SVM for full 5-component model

Co-authored-by: Copilot <copilot@github.com>
---
 pf2/figures/figureA3d_g.py | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/pf2/figures/figureA3d_g.py b/pf2/figures/figureA3d_g.py
index 76600d8..8005743 100644
--- a/pf2/figures/figureA3d_g.py
+++ b/pf2/figures/figureA3d_g.py
@@ -19,6 +19,7 @@
 nC19_TARGET_STR = f"PC {nC19_TARGET}"
 target_pcs = {C19_TARGET_STR: C19_TARGET, nC19_TARGET_STR: nC19_TARGET}
 
+# Helper function to get keys by index since dicts don't support direct indexing
 def get_nth_key(target_dict, n):
     for i, key in enumerate(target_dict):
         if i == n:
@@ -106,7 +107,7 @@ def plot_pca_scores(pca_results, cond_fact_meta_df, labels, ax1, ax2):
 def plot_pca_scores_2d(pca_results, cond_fact_meta_df, labels, ax1, ax2):
     """Plots PCA scores 2D with RBF-SVM decision boundaries and cross-validated AUC."""
     ax = [ax1, ax2]
-    pc_pairs = [(1, 2), (0, 2)]
+    pc_pairs = [(1, 2)]  # PC2 vs. PC3
 
     cond_fact_meta_df = cond_fact_meta_df.loc[
         cond_fact_meta_df.loc[:, "patient_category"] != "Non-Pneumonia Control", :
@@ -117,29 +118,32 @@ def plot_pca_scores_2d(pca_results, cond_fact_meta_df, labels, ax1, ax2):
 
     score_labels = labels.loc[c19_mask]
     subset_data = cond_fact_meta_df.loc[c19_mask, cmp_cols]
-    scores = pca_results[0][:-1].transform(subset_data)[:, :3]
+    scores = pca_results[0][:-1].transform(subset_data)[:, :5]
     y_svm = score_labels.to_numpy().astype(int)
 
+    # Perform SVM on 5-component model for total accuracy reporting
+    cv_auc_5d = cross_val_score(
+        SVC(kernel="rbf", probability=True),
+        scores, y_svm, cv=5, scoring="roc_auc",
+    ).mean()
+
     pal = sns.color_palette()
 
+    # Plot just pairs of PCs with SVM decision boundaries for visualization
     for i, (pc_x, pc_y) in enumerate(pc_pairs):
-        X_svm = np.column_stack([scores[:, pc_x], scores[:, pc_y]])
+        X_2d = np.column_stack([scores[:, pc_x], scores[:, pc_y]])
 
-        svm = SVC(kernel="rbf", probability=True)
-        svm.fit(X_svm, y_svm)
-        cv_auc = cross_val_score(
-            SVC(kernel="rbf", probability=True),
-            X_svm, y_svm, cv=5, scoring="roc_auc",
-        ).mean()
+        svm_2d = SVC(kernel="rbf", probability=True)
+        svm_2d.fit(X_2d, y_svm)
 
         # Decision boundary drawn before scatter so points sit on top
-        x_pad = (X_svm[:, 0].max() - X_svm[:, 0].min()) * 0.1
-        y_pad = (X_svm[:, 1].max() - X_svm[:, 1].min()) * 0.1
+        x_pad = (X_2d[:, 0].max() - X_2d[:, 0].min()) * 0.1
+        y_pad = (X_2d[:, 1].max() - X_2d[:, 1].min()) * 0.1
         xx, yy = np.meshgrid(
-            np.linspace(X_svm[:, 0].min() - x_pad, X_svm[:, 0].max() + x_pad, 300),
-            np.linspace(X_svm[:, 1].min() - y_pad, X_svm[:, 1].max() + y_pad, 300),
+            np.linspace(X_2d[:, 0].min() - x_pad, X_2d[:, 0].max() + x_pad, 300),
+            np.linspace(X_2d[:, 1].min() - y_pad, X_2d[:, 1].max() + y_pad, 300),
         )
-        Z = svm.decision_function(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
+        Z = svm_2d.decision_function(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
         # Z<0 → class 0 (Lived, pal[2]); Z>0 → class 1 (Died, pal[0])
         ax[i].contourf(
             xx, yy, Z, levels=[-1000, 0, 1000], alpha=0.15, colors=[pal[2], pal[0]]
@@ -167,6 +171,6 @@ def plot_pca_scores_2d(pca_results, cond_fact_meta_df, labels, ax1, ax2):
         ax[i].set(
             xlabel=f"PC {pc_x + 1}",
             ylabel=f"PC {pc_y + 1}",
-            title=f"C19-scores 2D (CV AUC={cv_auc:.2f})",
+            title=f"C19-scores 2D (5-PC SVM CV AUC={cv_auc_5d:.2f})",
         )