From 7e2d983b26fdc675daf94380a6c5a8a57700fc86 Mon Sep 17 00:00:00 2001 From: Aman Srivastava Date: Fri, 3 Jul 2026 21:18:12 +0530 Subject: [PATCH 1/4] add return_components support to R-learner --- causalml/inference/meta/rlearner.py | 95 ++++++++++++++++++++++++----- tests/test_meta_learners.py | 79 +++++++++++++++++++++++- 2 files changed, 157 insertions(+), 17 deletions(-) diff --git a/causalml/inference/meta/rlearner.py b/causalml/inference/meta/rlearner.py index 3120038f..2fa7c078 100644 --- a/causalml/inference/meta/rlearner.py +++ b/causalml/inference/meta/rlearner.py @@ -149,6 +149,9 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True): yhat = cross_val_predict( self.model_mu, X, y_np, cv=self.cv, n_jobs=self.cv_n_jobs ) + # Fit the nuisance outcome model on the full data so it can be + # reused by predict(return_components=True). + self.model_mu.fit(X, y_np) for group in self.t_groups: mask = (treatment_np == group) | (treatment_np == self.control_name) @@ -186,21 +189,49 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True): ) return self - def predict(self, X, p=None): + def predict( + self, + X, + p=None, + return_components=False, + return_ci=False, + ): """Predict treatment effects. Args: X (np.matrix, np.array, pd.DataFrame, pl.DataFrame, or pl.LazyFrame): a feature matrix. - A pl.LazyFrame is collected once at the start of this method. + p (np.ndarray, pd.Series, pl.Series, or dict, optional): propensity scores. + return_components (bool): whether to return nuisance components. + return_ci (bool): included for API consistency with other meta-learners. Returns: - (numpy.ndarray): Predictions of treatment effects. + numpy.ndarray or tuple """ + + if return_ci and return_components: + raise ValueError("return_ci and return_components cannot both be True.") + X = collect_if_lazy(X) + + if p is None: + p = { + group: self.propensity_model[group].predict(X) + for group in self.t_groups + } + else: + p = self._format_p(p, self.t_groups) + + yhat = self.model_mu.predict(X) + te = np.zeros((n_rows(X), self.t_groups.shape[0])) + for i, group in enumerate(self.t_groups): te[:, i] = self.models_tau[group].predict(X) - return te + + if not return_components: + return te + + return te, yhat, p def fit_predict( self, @@ -212,6 +243,7 @@ def fit_predict( return_ci=False, n_bootstraps=1000, bootstrap_size=10000, + return_components=False, verbose=True, ): """Fit the R learner and predict treatment effects. @@ -228,13 +260,28 @@ def fit_predict( return_ci (bool): whether to return confidence intervals n_bootstraps (int): number of bootstrap iterations bootstrap_size (int): number of samples per bootstrap + return_components (bool, optional): whether to return the nuisance + outcome prediction (yhat) and propensity estimates (p) in addition + to treatment effects. verbose (bool): whether to output progress logs Returns: (numpy.ndarray): Predictions of treatment effects. """ + if return_ci and return_components: + raise ValueError("return_ci and return_components cannot both be True.") X = collect_if_lazy(X) self.fit(X, treatment, y, p, sample_weight, verbose=verbose) - te = self.predict(X) + + if p is None: + p = self.propensity + else: + p = self._format_p(p, self.t_groups) + + te = self.predict( + X, + p=p, + return_components=return_components, + ) if not return_ci: return te @@ -495,6 +542,7 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True): yhat = cross_val_predict( self.model_mu, X, y_np, cv=self.cv, method="predict_proba", n_jobs=-1 )[:, 1] + self.model_mu.fit(X, y_np) for group in self.t_groups: mask = (treatment_np == group) | (treatment_np == self.control_name) @@ -532,21 +580,37 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True): ) return self - def predict(self, X, p=None): - """Predict treatment effects. - - Args: - X (np.matrix, np.array, pd.DataFrame, pl.DataFrame, or pl.LazyFrame): a feature matrix. - A pl.LazyFrame is collected once at the start of this method. + def predict( + self, + X, + p=None, + return_components=False, + return_ci=False, + ): + if return_ci and return_components: + raise ValueError("return_ci and return_components cannot both be True.") - Returns: - (numpy.ndarray): Predictions of treatment effects. - """ X = collect_if_lazy(X) + + if p is None: + p = { + group: self.propensity_model[group].predict(X) + for group in self.t_groups + } + else: + p = self._format_p(p, self.t_groups) + + yhat = self.model_mu.predict_proba(X)[:, 1] + te = np.zeros((n_rows(X), self.t_groups.shape[0])) + for i, group in enumerate(self.t_groups): te[:, i] = self.models_tau[group].predict(X) - return te + + if not return_components: + return te + + return te, yhat, p class XGBRRegressor(BaseRRegressor): @@ -691,6 +755,7 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True): if verbose: logger.info("generating out-of-fold CV outcome estimates") yhat = cross_val_predict(self.model_mu, X, y_np, cv=self.cv, n_jobs=-1) + self.model_mu.fit(X, y_np) for group in self.t_groups: treatment_mask = (treatment_np == group) | ( diff --git a/tests/test_meta_learners.py b/tests/test_meta_learners.py index 2efb77b0..ada7b8a4 100644 --- a/tests/test_meta_learners.py +++ b/tests/test_meta_learners.py @@ -1657,15 +1657,90 @@ def _assert_plain_fit_predict(result, name): assert hasattr(rl, attr) and isinstance(getattr(rl, attr), dict) assert set(getattr(rl, attr).keys()) == set(rl.t_groups) - # R-learner: predict(X, p=...) returns CATE only (no return_components path). + # R-learner: predict(return_components=True) returns + # (te, yhat, propensity) where yhat and propensity are + # the nuisance components used by the R-learner. te = rl.predict(X=X, p=p_scores) _assert_te(te, name, "predict()") + out_pc = rl.predict(X=X, p=p_scores, return_components=True) + assert isinstance(out_pc, tuple) and len(out_pc) == 3 + + te2, yhat, p = out_pc + + np.testing.assert_array_equal( + te, + te2, + err_msg=f"{name}: predict inconsistency", + ) + + assert isinstance(yhat, np.ndarray) + assert yhat.shape == (n,) + assert np.all(np.isfinite(yhat)) + + assert isinstance(p, dict) + assert set(p.keys()) == set(rl.t_groups) + + for g in rl.t_groups: + assert isinstance(p[g], np.ndarray) + assert p[g].shape == (n,) + assert np.all(np.isfinite(p[g])) + fp_plain_r = rl.fit_predict( - X=X, treatment=treatment, y=y, p=p_scores, verbose=False + X=X, + treatment=treatment, + y=y, + p=p_scores, + verbose=False, ) + _assert_plain_fit_predict(fp_plain_r, name) _assert_te(fp_plain_r, name, "fit_predict()") + + fp_components = rl.fit_predict( + X=X, + treatment=treatment, + y=y, + p=p_scores, + return_components=True, + verbose=False, + ) + + assert isinstance(fp_components, tuple) + assert len(fp_components) == 3 + + te_fp, yhat_fp, p_fp = fp_components + + _assert_te(te_fp, name, "fit_predict(return_components=True)") + + assert isinstance(yhat_fp, np.ndarray) + assert yhat_fp.shape == (n,) + assert np.all(np.isfinite(yhat_fp)) + + assert isinstance(p_fp, dict) + assert set(p_fp.keys()) == set(rl.t_groups) + + for g in rl.t_groups: + assert isinstance(p_fp[g], np.ndarray) + assert p_fp[g].shape == (n,) + assert np.all(np.isfinite(p_fp[g])) + with pytest.raises(ValueError): + rl.fit_predict( + X=X, + treatment=treatment, + y=y, + p=p_scores, + return_ci=True, + return_components=True, + verbose=False, + ) + with pytest.raises(ValueError): + rl.predict( + X=X, + p=p_scores, + return_ci=True, + return_components=True, + ) _assert_ci_triple( rl.fit_predict( X=X, From 030f9d8d485bdd20322a2bcedd0dd25d782377de Mon Sep 17 00:00:00 2001 From: Aman Srivastava Date: Fri, 3 Jul 2026 21:38:10 +0530 Subject: [PATCH 2/4] preserve propensity estimates --- causalml/inference/meta/rlearner.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/causalml/inference/meta/rlearner.py b/causalml/inference/meta/rlearner.py index 2fa7c078..9a63024c 100644 --- a/causalml/inference/meta/rlearner.py +++ b/causalml/inference/meta/rlearner.py @@ -120,6 +120,7 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True): p = self.propensity else: p = self._format_p(p, self.t_groups) + self.propensity = p self._classes = {group: i for i, group in enumerate(self.t_groups)} @@ -214,10 +215,7 @@ def predict( X = collect_if_lazy(X) if p is None: - p = { - group: self.propensity_model[group].predict(X) - for group in self.t_groups - } + p = self.propensity else: p = self._format_p(p, self.t_groups) @@ -522,6 +520,7 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True): p = self.propensity else: p = self._format_p(p, self.t_groups) + self.propensity = p self._classes = {group: i for i, group in enumerate(self.t_groups)} @@ -593,10 +592,7 @@ def predict( X = collect_if_lazy(X) if p is None: - p = { - group: self.propensity_model[group].predict(X) - for group in self.t_groups - } + p = self.propensity else: p = self._format_p(p, self.t_groups) @@ -714,6 +710,7 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True): p = self.propensity else: p = self._format_p(p, self.t_groups) + self.propensity = p self._classes = {group: i for i, group in enumerate(self.t_groups)} From ff5f42db71e296188d3ebeedb77e6e1c87baf2fb Mon Sep 17 00:00:00 2001 From: Aman Srivastava Date: Sat, 4 Jul 2026 14:10:22 +0530 Subject: [PATCH 3/4] added tests --- causalml/inference/meta/rlearner.py | 29 +++++----- tests/test_meta_learners.py | 83 ++++++++++++++++++++++++++--- 2 files changed, 92 insertions(+), 20 deletions(-) diff --git a/causalml/inference/meta/rlearner.py b/causalml/inference/meta/rlearner.py index 9a63024c..dcfadd9c 100644 --- a/causalml/inference/meta/rlearner.py +++ b/causalml/inference/meta/rlearner.py @@ -120,7 +120,6 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True): p = self.propensity else: p = self._format_p(p, self.t_groups) - self.propensity = p self._classes = {group: i for i, group in enumerate(self.t_groups)} @@ -195,7 +194,6 @@ def predict( X, p=None, return_components=False, - return_ci=False, ): """Predict treatment effects. @@ -203,19 +201,22 @@ def predict( X (np.matrix, np.array, pd.DataFrame, pl.DataFrame, or pl.LazyFrame): a feature matrix. p (np.ndarray, pd.Series, pl.Series, or dict, optional): propensity scores. return_components (bool): whether to return nuisance components. - return_ci (bool): included for API consistency with other meta-learners. Returns: numpy.ndarray or tuple """ - if return_ci and return_components: - raise ValueError("return_ci and return_components cannot both be True.") - X = collect_if_lazy(X) if p is None: - p = self.propensity + if not hasattr(self, "propensity_model"): + raise ValueError( + "No propensity model is available. Please provide `p` or fit the learner with p=None." + ) + p = { + group: self.propensity_model[group].predict(X) + for group in self.t_groups + } else: p = self._format_p(p, self.t_groups) @@ -520,7 +521,6 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True): p = self.propensity else: p = self._format_p(p, self.t_groups) - self.propensity = p self._classes = {group: i for i, group in enumerate(self.t_groups)} @@ -584,15 +584,19 @@ def predict( X, p=None, return_components=False, - return_ci=False, ): - if return_ci and return_components: - raise ValueError("return_ci and return_components cannot both be True.") X = collect_if_lazy(X) if p is None: - p = self.propensity + if not hasattr(self, "propensity_model"): + raise ValueError( + "No propensity model is available. Please provide `p` or fit the learner with p=None." + ) + p = { + group: self.propensity_model[group].predict(X) + for group in self.t_groups + } else: p = self._format_p(p, self.t_groups) @@ -710,7 +714,6 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True): p = self.propensity else: p = self._format_p(p, self.t_groups) - self.propensity = p self._classes = {group: i for i, group in enumerate(self.t_groups)} diff --git a/tests/test_meta_learners.py b/tests/test_meta_learners.py index ada7b8a4..2daa7b5a 100644 --- a/tests/test_meta_learners.py +++ b/tests/test_meta_learners.py @@ -681,6 +681,54 @@ def test_BaseRLearner_without_p(generate_regression_data): assert auuc["cate_p"] > 0.5 +def test_BaseRLearner_predict_return_components_different_size( + generate_regression_data, +): + y, X, treatment, tau, b, e = generate_regression_data() + + learner = BaseRLearner(learner=LinearRegression()) + + learner.fit( + X=X[:200], + treatment=treatment[:200], + y=y[:200], + verbose=False, + ) + + te, yhat, p = learner.predict( + X=X[200:300], + return_components=True, + ) + + assert te.shape == (100, len(learner.t_groups)) + assert yhat.shape == (100,) + + for g in learner.t_groups: + assert p[g].shape == (100,) + + +def test_BaseRLearner_predict_without_propensity_model_raises( + generate_regression_data, +): + y, X, treatment, tau, b, e = generate_regression_data() + + learner = BaseRLearner(learner=LinearRegression()) + + learner.fit( + X=X, + treatment=treatment, + y=y, + p=e, + verbose=False, + ) + + with pytest.raises(ValueError): + learner.predict( + X=X, + return_components=True, + ) + + def test_BaseRRegressor_without_p(generate_regression_data): y, X, treatment, tau, b, e = generate_regression_data() @@ -916,6 +964,20 @@ def test_BaseRClassifier(generate_classification_data): tau_pred = uplift_model.predict(X=df_test[x_names].values) + te, yhat, p = uplift_model.predict( + X=df_test[x_names].values, + return_components=True, + ) + + assert te.shape == tau_pred.shape + assert yhat.shape == (len(df_test),) + + assert isinstance(p, dict) + assert set(p.keys()) == set(uplift_model.t_groups) + + for g in uplift_model.t_groups: + assert p[g].shape == (len(df_test),) + auuc_metrics = pd.DataFrame( { "tau_pred": tau_pred.flatten(), @@ -996,6 +1058,20 @@ def test_XGBRegressor_with_sample_weights(generate_regression_data): uplift_model = XGBRRegressor() uplift_model.fit(X=X, p=e, treatment=treatment, y=y, sample_weight=weights) tau_pred = uplift_model.predict(X=X) + + te, yhat, p = uplift_model.predict( + X=X, + return_components=True, + ) + + assert te.shape == tau_pred.shape + assert yhat.shape == (X.shape[0],) + + assert isinstance(p, dict) + + for g in uplift_model.t_groups: + assert p[g].shape == (X.shape[0],) + assert len(tau_pred) == len(weights) @@ -1734,13 +1810,6 @@ def _assert_plain_fit_predict(result, name): return_components=True, verbose=False, ) - with pytest.raises(ValueError): - rl.predict( - X=X, - p=p_scores, - return_ci=True, - return_components=True, - ) _assert_ci_triple( rl.fit_predict( X=X, From d75f0a3867c70a1d04d72928207d434188d6ca44 Mon Sep 17 00:00:00 2001 From: Aman Srivastava Date: Sat, 4 Jul 2026 14:23:27 +0530 Subject: [PATCH 4/4] fixing the CI errors --- causalml/dataset/synthetic.py | 12 ++++++++++-- tests/test_meta_learners.py | 17 ++++++++++++++--- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/causalml/dataset/synthetic.py b/causalml/dataset/synthetic.py index f58b30c3..5d42fc7c 100644 --- a/causalml/dataset/synthetic.py +++ b/causalml/dataset/synthetic.py @@ -409,11 +409,19 @@ def get_synthetic_preds_holdout( else: learner = base_learner(model()) learner.fit(X=X_train, p=p_hat_train, treatment=w_train, y=y_train) + preds_dict_train["{} Learner ({})".format(label_l, label_m)] = ( - learner.predict(X=X_train).flatten() + learner.predict( + X=X_train, + p=p_hat_train, + ).flatten() ) + preds_dict_valid["{} Learner ({})".format(label_l, label_m)] = ( - learner.predict(X=X_val).flatten() + learner.predict( + X=X_val, + p=p_hat_val, + ).flatten() ) return preds_dict_train, preds_dict_valid diff --git a/tests/test_meta_learners.py b/tests/test_meta_learners.py index 2daa7b5a..1bd495d3 100644 --- a/tests/test_meta_learners.py +++ b/tests/test_meta_learners.py @@ -962,10 +962,14 @@ def test_BaseRClassifier(generate_classification_data): y=df_train[CONVERSION].values, ) - tau_pred = uplift_model.predict(X=df_test[x_names].values) + tau_pred = uplift_model.predict( + X=df_test[x_names].values, + p=df_test["propensity_score"].values, + ) te, yhat, p = uplift_model.predict( X=df_test[x_names].values, + p=df_test["propensity_score"].values, return_components=True, ) @@ -1026,7 +1030,10 @@ def test_BaseRClassifier_with_sample_weights(generate_classification_data): sample_weight=df_train["sample_weights"], ) - tau_pred = uplift_model.predict(X=df_test[x_names].values) + tau_pred = uplift_model.predict( + X=df_test[x_names].values, + p=df_test["propensity_score"].values, + ) auuc_metrics = pd.DataFrame( { @@ -1057,10 +1064,14 @@ def test_XGBRegressor_with_sample_weights(generate_regression_data): # when sample_weight is passed uplift_model = XGBRRegressor() uplift_model.fit(X=X, p=e, treatment=treatment, y=y, sample_weight=weights) - tau_pred = uplift_model.predict(X=X) + tau_pred = uplift_model.predict( + X=X, + p=e, + ) te, yhat, p = uplift_model.predict( X=X, + p=e, return_components=True, )