Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions causalml/dataset/synthetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,11 +409,19 @@ def get_synthetic_preds_holdout(
else:
learner = base_learner(model())
learner.fit(X=X_train, p=p_hat_train, treatment=w_train, y=y_train)

preds_dict_train["{} Learner ({})".format(label_l, label_m)] = (
learner.predict(X=X_train).flatten()
learner.predict(
X=X_train,
p=p_hat_train,
).flatten()
)

preds_dict_valid["{} Learner ({})".format(label_l, label_m)] = (
learner.predict(X=X_val).flatten()
learner.predict(
X=X_val,
p=p_hat_val,
).flatten()
)

return preds_dict_train, preds_dict_valid
Expand Down
95 changes: 80 additions & 15 deletions causalml/inference/meta/rlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
yhat = cross_val_predict(
self.model_mu, X, y_np, cv=self.cv, n_jobs=self.cv_n_jobs
)
# Fit the nuisance outcome model on the full data so it can be
# reused by predict(return_components=True).
self.model_mu.fit(X, y_np)

for group in self.t_groups:
mask = (treatment_np == group) | (treatment_np == self.control_name)
Expand Down Expand Up @@ -186,21 +189,48 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
)
return self

def predict(self, X, p=None):
def predict(
self,
X,
p=None,
return_components=False,
):
"""Predict treatment effects.

Args:
X (np.matrix, np.array, pd.DataFrame, pl.DataFrame, or pl.LazyFrame): a feature matrix.
A pl.LazyFrame is collected once at the start of this method.
p (np.ndarray, pd.Series, pl.Series, or dict, optional): propensity scores.
return_components (bool): whether to return nuisance components.

Returns:
(numpy.ndarray): Predictions of treatment effects.
numpy.ndarray or tuple
"""

X = collect_if_lazy(X)

if p is None:
if not hasattr(self, "propensity_model"):
raise ValueError(
"No propensity model is available. Please provide `p` or fit the learner with p=None."
)
p = {
group: self.propensity_model[group].predict(X)
for group in self.t_groups
}
else:
p = self._format_p(p, self.t_groups)

yhat = self.model_mu.predict(X)

te = np.zeros((n_rows(X), self.t_groups.shape[0]))

for i, group in enumerate(self.t_groups):
te[:, i] = self.models_tau[group].predict(X)
return te

if not return_components:
return te

return te, yhat, p

def fit_predict(
self,
Expand All @@ -212,6 +242,7 @@ def fit_predict(
return_ci=False,
n_bootstraps=1000,
bootstrap_size=10000,
return_components=False,
verbose=True,
):
"""Fit the R learner and predict treatment effects.
Expand All @@ -228,13 +259,28 @@ def fit_predict(
return_ci (bool): whether to return confidence intervals
n_bootstraps (int): number of bootstrap iterations
bootstrap_size (int): number of samples per bootstrap
return_components (bool, optional): whether to return the nuisance
outcome prediction (yhat) and propensity estimates (p) in addition
to treatment effects.
verbose (bool): whether to output progress logs
Returns:
(numpy.ndarray): Predictions of treatment effects.
"""
if return_ci and return_components:
raise ValueError("return_ci and return_components cannot both be True.")
X = collect_if_lazy(X)
self.fit(X, treatment, y, p, sample_weight, verbose=verbose)
te = self.predict(X)

if p is None:
p = self.propensity
else:
p = self._format_p(p, self.t_groups)

te = self.predict(
X,
p=p,
return_components=return_components,
)

if not return_ci:
return te
Expand Down Expand Up @@ -495,6 +541,7 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
yhat = cross_val_predict(
self.model_mu, X, y_np, cv=self.cv, method="predict_proba", n_jobs=-1
)[:, 1]
self.model_mu.fit(X, y_np)

for group in self.t_groups:
mask = (treatment_np == group) | (treatment_np == self.control_name)
Expand Down Expand Up @@ -532,21 +579,38 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
)
return self

def predict(self, X, p=None):
"""Predict treatment effects.

Args:
X (np.matrix, np.array, pd.DataFrame, pl.DataFrame, or pl.LazyFrame): a feature matrix.
A pl.LazyFrame is collected once at the start of this method.
def predict(
self,
X,
p=None,
return_components=False,
):

Returns:
(numpy.ndarray): Predictions of treatment effects.
"""
X = collect_if_lazy(X)

if p is None:
if not hasattr(self, "propensity_model"):
raise ValueError(
"No propensity model is available. Please provide `p` or fit the learner with p=None."
)
p = {
group: self.propensity_model[group].predict(X)
for group in self.t_groups
}
else:
p = self._format_p(p, self.t_groups)

yhat = self.model_mu.predict_proba(X)[:, 1]

te = np.zeros((n_rows(X), self.t_groups.shape[0]))

for i, group in enumerate(self.t_groups):
te[:, i] = self.models_tau[group].predict(X)
return te

if not return_components:
return te

return te, yhat, p


class XGBRRegressor(BaseRRegressor):
Expand Down Expand Up @@ -691,6 +755,7 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
if verbose:
logger.info("generating out-of-fold CV outcome estimates")
yhat = cross_val_predict(self.model_mu, X, y_np, cv=self.cv, n_jobs=-1)
self.model_mu.fit(X, y_np)

for group in self.t_groups:
treatment_mask = (treatment_np == group) | (
Expand Down
165 changes: 160 additions & 5 deletions tests/test_meta_learners.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,54 @@ def test_BaseRLearner_without_p(generate_regression_data):
assert auuc["cate_p"] > 0.5


def test_BaseRLearner_predict_return_components_different_size(
generate_regression_data,
):
y, X, treatment, tau, b, e = generate_regression_data()

learner = BaseRLearner(learner=LinearRegression())

learner.fit(
X=X[:200],
treatment=treatment[:200],
y=y[:200],
verbose=False,
)

te, yhat, p = learner.predict(
X=X[200:300],
return_components=True,
)

assert te.shape == (100, len(learner.t_groups))
assert yhat.shape == (100,)

for g in learner.t_groups:
assert p[g].shape == (100,)


def test_BaseRLearner_predict_without_propensity_model_raises(
generate_regression_data,
):
y, X, treatment, tau, b, e = generate_regression_data()

learner = BaseRLearner(learner=LinearRegression())

learner.fit(
X=X,
treatment=treatment,
y=y,
p=e,
verbose=False,
)

with pytest.raises(ValueError):
learner.predict(
X=X,
return_components=True,
)


def test_BaseRRegressor_without_p(generate_regression_data):
y, X, treatment, tau, b, e = generate_regression_data()

Expand Down Expand Up @@ -914,7 +962,25 @@ def test_BaseRClassifier(generate_classification_data):
y=df_train[CONVERSION].values,
)

tau_pred = uplift_model.predict(X=df_test[x_names].values)
tau_pred = uplift_model.predict(
X=df_test[x_names].values,
p=df_test["propensity_score"].values,
)

te, yhat, p = uplift_model.predict(
X=df_test[x_names].values,
p=df_test["propensity_score"].values,
return_components=True,
)

assert te.shape == tau_pred.shape
assert yhat.shape == (len(df_test),)

assert isinstance(p, dict)
assert set(p.keys()) == set(uplift_model.t_groups)

for g in uplift_model.t_groups:
assert p[g].shape == (len(df_test),)

auuc_metrics = pd.DataFrame(
{
Expand Down Expand Up @@ -964,7 +1030,10 @@ def test_BaseRClassifier_with_sample_weights(generate_classification_data):
sample_weight=df_train["sample_weights"],
)

tau_pred = uplift_model.predict(X=df_test[x_names].values)
tau_pred = uplift_model.predict(
X=df_test[x_names].values,
p=df_test["propensity_score"].values,
)

auuc_metrics = pd.DataFrame(
{
Expand Down Expand Up @@ -995,7 +1064,25 @@ def test_XGBRegressor_with_sample_weights(generate_regression_data):
# when sample_weight is passed
uplift_model = XGBRRegressor()
uplift_model.fit(X=X, p=e, treatment=treatment, y=y, sample_weight=weights)
tau_pred = uplift_model.predict(X=X)
tau_pred = uplift_model.predict(
X=X,
p=e,
)

te, yhat, p = uplift_model.predict(
X=X,
p=e,
return_components=True,
)

assert te.shape == tau_pred.shape
assert yhat.shape == (X.shape[0],)

assert isinstance(p, dict)

for g in uplift_model.t_groups:
assert p[g].shape == (X.shape[0],)

assert len(tau_pred) == len(weights)


Expand Down Expand Up @@ -1657,15 +1744,83 @@ def _assert_plain_fit_predict(result, name):
assert hasattr(rl, attr) and isinstance(getattr(rl, attr), dict)
assert set(getattr(rl, attr).keys()) == set(rl.t_groups)

# R-learner: predict(X, p=...) returns CATE only (no return_components path).
# R-learner: predict(return_components=True) returns
# (te, yhat, propensity) where yhat and propensity are
# the nuisance components used by the R-learner.
te = rl.predict(X=X, p=p_scores)
_assert_te(te, name, "predict()")

out_pc = rl.predict(X=X, p=p_scores, return_components=True)
assert isinstance(out_pc, tuple) and len(out_pc) == 3

te2, yhat, p = out_pc

np.testing.assert_array_equal(
te,
te2,
err_msg=f"{name}: predict inconsistency",
)

assert isinstance(yhat, np.ndarray)
assert yhat.shape == (n,)
assert np.all(np.isfinite(yhat))

assert isinstance(p, dict)
assert set(p.keys()) == set(rl.t_groups)

for g in rl.t_groups:
assert isinstance(p[g], np.ndarray)
assert p[g].shape == (n,)
assert np.all(np.isfinite(p[g]))

fp_plain_r = rl.fit_predict(
X=X, treatment=treatment, y=y, p=p_scores, verbose=False
X=X,
treatment=treatment,
y=y,
p=p_scores,
verbose=False,
)

_assert_plain_fit_predict(fp_plain_r, name)
_assert_te(fp_plain_r, name, "fit_predict()")

fp_components = rl.fit_predict(
X=X,
treatment=treatment,
y=y,
p=p_scores,
return_components=True,
verbose=False,
)

assert isinstance(fp_components, tuple)
assert len(fp_components) == 3

te_fp, yhat_fp, p_fp = fp_components

_assert_te(te_fp, name, "fit_predict(return_components=True)")

assert isinstance(yhat_fp, np.ndarray)
assert yhat_fp.shape == (n,)
assert np.all(np.isfinite(yhat_fp))

assert isinstance(p_fp, dict)
assert set(p_fp.keys()) == set(rl.t_groups)

for g in rl.t_groups:
assert isinstance(p_fp[g], np.ndarray)
assert p_fp[g].shape == (n,)
assert np.all(np.isfinite(p_fp[g]))
with pytest.raises(ValueError):
rl.fit_predict(
X=X,
treatment=treatment,
y=y,
p=p_scores,
return_ci=True,
return_components=True,
verbose=False,
)
_assert_ci_triple(
rl.fit_predict(
X=X,
Expand Down