Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 46 additions & 23 deletions causalml/inference/meta/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,13 @@
from tqdm import tqdm

from causalml.inference.meta.explainer import Explainer
from causalml.inference.meta.utils import check_p_conditions, convert_pd_to_np
from causalml.inference.meta.utils import (
check_p_conditions,
filter_mask,
filter_index,
n_rows,
to_numpy,
)
from causalml.propensity import compute_propensity_score

logger = logging.getLogger("causalml")
Expand All @@ -30,8 +36,9 @@ def _fit_bootstrap_clone(learner_template, X, treatment, y, p, seed, bootstrap_s
A fitted clone of learner_template trained on a bootstrap sample.
"""
rng = np.random.RandomState(seed)
idxs = rng.choice(np.arange(X.shape[0]), size=bootstrap_size)
X_b = X[idxs]
idxs = rng.choice(np.arange(n_rows(X)), size=bootstrap_size)

X_b = filter_index(X, idxs)
treatment_b = treatment[idxs]
y_b = y[idxs]
p_b = {group: _p[idxs] for group, _p in p.items()} if p is not None else None
Expand Down Expand Up @@ -102,12 +109,27 @@ def estimate_ate(
pass

def bootstrap(self, X, treatment, y, p=None, size=10000, rng=None):
"""Runs a single bootstrap. Fits on bootstrapped sample, then predicts on whole population."""
"""Runs a single bootstrap. Fits on bootstrapped sample, then predicts on whole population.

Args:
X (np.matrix, np.array, pd.DataFrame, or pl.DataFrame): a feature matrix.
Resampled natively via :func:`filter_index`, so X stays in its
original format (numpy/pandas/polars) throughout.
treatment (np.array): a treatment vector (numpy)
y (np.array): an outcome vector (numpy)
p (dict, optional): a dict of {treatment group: propensity scores (numpy)}
size (int, optional): number of samples to draw with replacement
rng (np.random.Generator, optional): random number generator for
deterministic resampling
Returns:
(numpy.ndarray): Predictions of treatment effects on the full X
from a model trained on the resampled subset.
"""
if rng is not None:
idxs = rng.choice(np.arange(0, X.shape[0]), size=size)
idxs = rng.choice(np.arange(0, n_rows(X)), size=size)
else:
idxs = np.random.choice(np.arange(0, X.shape[0]), size=size)
X_b = X[idxs]
idxs = np.random.choice(np.arange(0, n_rows(X)), size=size)
X_b = filter_index(X, idxs)

if p is not None:
p_b = {group: _p[idxs] for group, _p in p.items()}
Expand Down Expand Up @@ -171,21 +193,19 @@ def _format_p(p, t_groups):
"""Format propensity scores into a dictionary of {treatment group: propensity scores}.

Args:
p (np.ndarray, pd.Series, or dict): propensity scores
p (np.ndarray, pd.Series, pl.Series, or dict): propensity scores
t_groups (list): treatment group names.

Returns:
dict of {treatment group: propensity scores}
dict of {treatment group: propensity scores (numpy.ndarray)}
"""
check_p_conditions(p, t_groups)

if isinstance(p, (np.ndarray, pd.Series)):
if isinstance(p, dict):
p = {treatment_name: to_numpy(_p) for treatment_name, _p in p.items()}
else:
treatment_name = t_groups[0]
p = {treatment_name: convert_pd_to_np(p)}
elif isinstance(p, dict):
p = {
treatment_name: convert_pd_to_np(_p) for treatment_name, _p in p.items()
}
p = {treatment_name: to_numpy(p)}

return p

Expand All @@ -199,19 +219,22 @@ def _set_propensity_models(self, X, treatment, y):
PropensityModel (i.e. ElasticNetPropensityModel).

Args:
X (np.matrix or np.array or pd.Dataframe): a feature matrix
treatment (np.array or pd.Series): a treatment vector
y (np.array or pd.Series): an outcome vector
X (np.matrix, np.array, pd.DataFrame, or pl.DataFrame): a feature matrix.
Kept in its native format; scikit-learn >= 1.6 accepts pandas
and Polars DataFrames natively, so no conversion is performed.
treatment (np.array, pd.Series, or pl.Series): a treatment vector
y (np.array, pd.Series, or pl.Series): an outcome vector
"""
logger.info("Generating propensity score")
treatment_np = to_numpy(treatment)
p = dict()
p_model = dict()
for group in self.t_groups:
mask = (treatment == group) | (treatment == self.control_name)
treatment_filt = treatment[mask]
X_filt = X[mask]
w_filt = (treatment_filt == group).astype(int)
w = (treatment == group).astype(int)
mask = (treatment_np == group) | (treatment_np == self.control_name)
treatment_filt_np = treatment_np[mask]
X_filt = filter_mask(X, mask)
w_filt = (treatment_filt_np == group).astype(int)
w = (treatment_np == group).astype(int)
propensity_model = self.model_p if hasattr(self, "model_p") else None
p[group], p_model[group] = compute_propensity_score(
X=X_filt,
Expand Down
Loading