From f8a7aed4e2c7ddaf91bdfc58cbb0ea9ba35e922e Mon Sep 17 00:00:00 2001 From: Aman Srivastava Date: Fri, 3 Jul 2026 22:59:50 +0530 Subject: [PATCH 1/2] add CatBoost support to Explainer auto importance --- causalml/inference/meta/explainer.py | 36 ++++++++++++++++++++++------ tests/test_meta_learners.py | 29 ++++++++++++++++++++++ 2 files changed, 58 insertions(+), 7 deletions(-) diff --git a/causalml/inference/meta/explainer.py b/causalml/inference/meta/explainer.py index b92fee86..456426dd 100644 --- a/causalml/inference/meta/explainer.py +++ b/causalml/inference/meta/explainer.py @@ -47,7 +47,7 @@ def __init__( X (np.matrix): a feature matrix tau (np.array): a treatment effect vector (estimated/actual) classes (dict): a mapping of treatment names to indices (used for indexing tau array) - model_tau (sklearn/lightgbm/xgboost model object): a model object + model_tau (sklearn/lightgbm/xgboost/catboost model object): a model object features (np.array): list/array of feature names. If None, an enumerated list will be used. normalize (bool): normalize by sum of importances if method=auto (defaults to True) test_size (float/int): if float, represents the proportion of the dataset to include in the test split. @@ -79,12 +79,33 @@ def __init__( self.create_feature_names() self.build_new_tau_models() + def _get_feature_importances(self, model): + """ + Returns feature importances for supported tree-based estimators. + + Supports: + - feature_importances_ (scikit-learn, LightGBM, XGBoost) + - get_feature_importance() (CatBoost) + """ + if hasattr(model, "feature_importances_"): + return model.feature_importances_ + + if hasattr(model, "get_feature_importance"): + return model.get_feature_importance() + + raise AttributeError( + "model_tau must expose feature importances via " + "`feature_importances_` or `get_feature_importance()` " + "(after fitting)." + ) + def check_conditions(self): """ Checks for multiple conditions: - method is valid - X, tau, and classes are specified - - model_tau has feature_importances_ attribute after fitting + - model_tau exposes feature importances via feature_importances_ or + get_feature_importance() after fitting """ assert self.method in VALID_METHODS, "Current supported methods: {}".format( ", ".join(VALID_METHODS) @@ -97,10 +118,9 @@ def check_conditions(self): model_test = deepcopy(self.model_tau) model_test.fit( [[0], [1]], [0, 1] - ) # Fit w/ dummy data to check for feature_importances_ below - assert hasattr( - model_test, "feature_importances_" - ), "model_tau must have the feature_importances_ method (after fitting)" + ) # Fit w/ dummy data to ensure feature importances are available + + self._get_feature_importances(model_test) def create_feature_names(self): """ @@ -157,7 +177,9 @@ def default_importance(self): if self.r_learners is not None: self.models_tau = deepcopy(self.r_learners) for group, idx in self.classes.items(): - importance_dict[group] = self.models_tau[group].feature_importances_ + importance_dict[group] = self._get_feature_importances( + self.models_tau[group] + ) if self.normalize: importance_dict[group] = ( importance_dict[group] / importance_dict[group].sum() diff --git a/tests/test_meta_learners.py b/tests/test_meta_learners.py index 2efb77b0..ad0dcd7b 100644 --- a/tests/test_meta_learners.py +++ b/tests/test_meta_learners.py @@ -36,6 +36,7 @@ from causalml.inference.meta import BaseDRLearner from causalml.inference.meta import BaseDRRegressor from causalml.inference.meta import BaseDRClassifier +from causalml.inference.meta.explainer import Explainer from causalml.metrics import ape, auuc_score from .const import RANDOM_SEED, N_SAMPLE, ERROR_THRESHOLD, CONTROL_NAME, CONVERSION @@ -640,6 +641,34 @@ def test_BaseRRegressor(generate_regression_data): assert auuc["cate_p"] > 0.5 +def test_explainer_auto_importance_catboost(generate_regression_data): + catboost = pytest.importorskip("catboost") + + y, X, treatment, tau, b, e = generate_regression_data() + + model_tau = catboost.CatBoostRegressor( + iterations=10, + verbose=False, + random_seed=RANDOM_SEED, + ) + + explainer = Explainer( + method="auto", + control_name=CONTROL_NAME, + X=X, + tau=tau, + classes={CONTROL_NAME: 0}, + model_tau=model_tau, + ) + + importance = explainer.get_importance() + + assert len(importance) == 1 + assert CONTROL_NAME in importance + assert isinstance(importance[CONTROL_NAME], pd.Series) + assert len(importance[CONTROL_NAME]) == X.shape[1] + + def test_BaseRLearner_without_p(generate_regression_data): y, X, treatment, tau, b, e = generate_regression_data() From c90cb23f9268d291aa7ef705021d0d8c91257617 Mon Sep 17 00:00:00 2001 From: Aman Srivastava Date: Sat, 4 Jul 2026 13:50:21 +0530 Subject: [PATCH 2/2] reframing the PR --- causalml/inference/meta/explainer.py | 31 +++++----------------------- 1 file changed, 5 insertions(+), 26 deletions(-) diff --git a/causalml/inference/meta/explainer.py b/causalml/inference/meta/explainer.py index 456426dd..b156b027 100644 --- a/causalml/inference/meta/explainer.py +++ b/causalml/inference/meta/explainer.py @@ -79,33 +79,12 @@ def __init__( self.create_feature_names() self.build_new_tau_models() - def _get_feature_importances(self, model): - """ - Returns feature importances for supported tree-based estimators. - - Supports: - - feature_importances_ (scikit-learn, LightGBM, XGBoost) - - get_feature_importance() (CatBoost) - """ - if hasattr(model, "feature_importances_"): - return model.feature_importances_ - - if hasattr(model, "get_feature_importance"): - return model.get_feature_importance() - - raise AttributeError( - "model_tau must expose feature importances via " - "`feature_importances_` or `get_feature_importance()` " - "(after fitting)." - ) - def check_conditions(self): """ Checks for multiple conditions: - method is valid - X, tau, and classes are specified - - model_tau exposes feature importances via feature_importances_ or - get_feature_importance() after fitting + - model_tau has feature_importances_ after fitting """ assert self.method in VALID_METHODS, "Current supported methods: {}".format( ", ".join(VALID_METHODS) @@ -120,7 +99,9 @@ def check_conditions(self): [[0], [1]], [0, 1] ) # Fit w/ dummy data to ensure feature importances are available - self._get_feature_importances(model_test) + assert hasattr( + model_test, "feature_importances_" + ), "model_tau must have the feature_importances_ method (after fitting)" def create_feature_names(self): """ @@ -177,9 +158,7 @@ def default_importance(self): if self.r_learners is not None: self.models_tau = deepcopy(self.r_learners) for group, idx in self.classes.items(): - importance_dict[group] = self._get_feature_importances( - self.models_tau[group] - ) + importance_dict[group] = self.models_tau[group].feature_importances_ if self.normalize: importance_dict[group] = ( importance_dict[group] / importance_dict[group].sum()