DoubleML
diff --git a/‎R/double_ml.R‎
Lines changed: 46 additions & 1 deletion b/‎R/double_ml.R‎
Lines changed: 46 additions & 1 deletion
diff --git a/‎R/double_ml_iivm.R‎
Lines changed: 17 additions & 9 deletions b/‎R/double_ml_iivm.R‎
Lines changed: 17 additions & 9 deletions
diff --git a/‎R/double_ml_irm.R‎
Lines changed: 12 additions & 5 deletions b/‎R/double_ml_irm.R‎
Lines changed: 12 additions & 5 deletions
diff --git a/‎R/double_ml_pliv.R‎
Lines changed: 40 additions & 27 deletions b/‎R/double_ml_pliv.R‎
Lines changed: 40 additions & 27 deletions
diff --git a/‎R/double_ml_plr.R‎
Lines changed: 15 additions & 9 deletions b/‎R/double_ml_plr.R‎
Lines changed: 15 additions & 9 deletions
@@ -201,6 +201,17 @@ DoubleML = R6Class("DoubleML",
       }
     },
 
+    #' @field models (`array()`) \cr
+    #' The fitted nuisance models after calling
+    #' `fit(store_models=TRUE)`.
+    models = function(value) {
+      if (missing(value)) {
+        return(private$models_)
+      } else {
+        stop("can't set field models")
+      }
+    },
+
     #' @field pval (`numeric()`) \cr
     #' p-values for the causal parameter(s) after calling `fit()`.
     pval = function(value) {
@@ -359,12 +370,21 @@ DoubleML = R6Class("DoubleML",
     #' Indicates whether the predictions for the nuisance functions should be
     #' stored in field `predictions`. Default is `FALSE`.
     #'
+    #'
+    #' @param store_models (`logical(1)`) \cr
+    #' Indicates whether the fitted models for the nuisance functions should be
+    #' stored in field `models` if you want to analyze the models or extract
+    #' information like variable importance. Default is `FALSE`.
+    #'
     #' @return self
-    fit = function(store_predictions = FALSE) {
+    fit = function(store_predictions = FALSE, store_models = FALSE) {
 
       if (store_predictions) {
         private$initialize_predictions()
       }
+      if (store_models) {
+        private$initialize_models()
+      }
 
       # TODO: insert check for tuned params
       for (i_rep in 1:self$n_rep) {
@@ -384,6 +404,9 @@ DoubleML = R6Class("DoubleML",
           if (store_predictions) {
             private$store_predictions(res$preds)
           }
+          if (store_models) {
+            private$store_models(res$models)
+          }
 
           # estimate the causal parameter
           private$all_coef_[private$i_treat, private$i_rep] = private$est_causal_pars()
@@ -1139,6 +1162,7 @@ DoubleML = R6Class("DoubleML",
     psi_a_ = NULL,
     psi_b_ = NULL,
     predictions_ = NULL,
+    models_ = NULL,
     pval_ = NULL,
     score_ = NULL,
     se_ = NULL,
@@ -1415,6 +1439,19 @@ DoubleML = R6Class("DoubleML",
         },
         simplify = F)
     },
+    initialize_models = function() {
+      private$models_ = sapply(self$params_names(),
+        function(x) {
+          sapply(self$data$d_cols,
+            function(x) {
+              lapply(
+                seq(self$n_rep),
+                function(x) vector("list", length = self$n_folds))
+            },
+            simplify = F)
+        },
+        simplify = F)
+    },
     store_predictions = function(preds) {
       for (learner in self$params_names()) {
         if (!is.null(preds[[learner]])) {
@@ -1424,6 +1461,14 @@ DoubleML = R6Class("DoubleML",
         }
       }
     },
+    store_models = function(models) {
+      for (learner in self$params_names()) {
+        if (!is.null(models[[learner]])) {
+          private$models_[[learner]][[self$data$treat_col]][[
+          private$i_rep]] = models[[learner]]
+        }
+      }
+    },
     # Comment from python: The private properties with __ always deliver the
     # single treatment, single (cross-fitting) sample subselection
     # The slicing is based on the two properties self._i_treat,
 
@@ -321,7 +321,7 @@ DoubleMLIIVM = R6Class("DoubleMLIIVM",
         fold_specific_params = private$fold_specific_params)
 
       if (self$subgroups$always_takers == FALSE) {
-        r0_hat = rep(0, self$data$n_obs)
+        r0_hat = list(preds = rep(0, self$data$n_obs), models = NULL)
       } else {
         r0_hat = dml_cv_predict(self$learner$ml_r,
           c(self$data$x_cols, self$data$other_treat_cols),
@@ -336,7 +336,7 @@ DoubleMLIIVM = R6Class("DoubleMLIIVM",
       }
 
       if (self$subgroups$never_takers == FALSE) {
-        r1_hat = rep(1, self$data$n_obs)
+        r1_hat = list(preds = rep(1, self$data$n_obs), models = NULL)
       } else {
         r1_hat = dml_cv_predict(self$learner$ml_r,
           c(self$data$x_cols, self$data$other_treat_cols),
@@ -356,14 +356,22 @@ DoubleMLIIVM = R6Class("DoubleMLIIVM",
       y = self$data$data_model[[self$data$y_col]]
 
       res = private$score_elements(
-        y, z, d, g0_hat, g1_hat, m_hat, r0_hat,
-        r1_hat, smpls)
+        y, z, d,
+        g0_hat$preds, g1_hat$preds, m_hat$preds,
+        r0_hat$preds, r1_hat$preds,
+        smpls)
       res$preds = list(
-        "ml_g0" = g0_hat,
-        "ml_g1" = g1_hat,
-        "ml_m" = m_hat,
-        "ml_r0" = r0_hat,
-        "ml_r1" = r1_hat)
+        "ml_g0" = g0_hat$preds,
+        "ml_g1" = g1_hat$preds,
+        "ml_m" = m_hat$preds,
+        "ml_r0" = r0_hat$preds,
+        "ml_r1" = r1_hat$preds)
+      res$models = list(
+        "ml_g0" = g0_hat$models,
+        "ml_g1" = g1_hat$models,
+        "ml_m" = m_hat$models,
+        "ml_r0" = r0_hat$models,
+        "ml_r1" = r1_hat$models)
       return(res)
     },
     score_elements = function(y = y, z = z, d = d,
 
@@ -242,7 +242,7 @@ DoubleMLIRM = R6Class("DoubleMLIRM",
         task_type = private$task_type$ml_g,
         fold_specific_params = private$fold_specific_params)
 
-      g1_hat = NULL
+      g1_hat = list(preds = NULL, models = NULL)
       if ((is.character(self$score) && self$score == "ATE") || is.function(self$score)) {
         g1_hat = dml_cv_predict(self$learner$ml_g,
           c(self$data$x_cols, self$data$other_treat_cols),
@@ -259,11 +259,18 @@ DoubleMLIRM = R6Class("DoubleMLIRM",
       d = self$data$data_model[[self$data$treat_col]]
       y = self$data$data_model[[self$data$y_col]]
 
-      res = private$score_elements(y, d, g0_hat, g1_hat, m_hat, smpls)
+      res = private$score_elements(
+        y, d,
+        g0_hat$preds, g1_hat$preds, m_hat$preds,
+        smpls)
       res$preds = list(
-        "ml_g0" = g0_hat,
-        "ml_g1" = g1_hat,
-        "ml_m" = m_hat)
+        "ml_g0" = g0_hat$preds,
+        "ml_g1" = g1_hat$preds,
+        "ml_m" = m_hat$preds)
+      res$models = list(
+        "ml_g0" = g0_hat$models,
+        "ml_g1" = g1_hat$models,
+        "ml_m" = m_hat$models)
       return(res)
     },
     score_elements = function(y, d, g0_hat, g1_hat, m_hat, smpls) {
 
@@ -496,7 +496,7 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV",
           fold_specific_params = private$fold_specific_params)
         z = self$data$data_model[[self$data$z_cols]]
       } else {
-        m_hat = do.call(
+        xx = do.call(
           cbind,
           lapply(
             self$data$z_cols,
@@ -510,19 +510,21 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV",
                 est_params = self$get_params(paste0("ml_m_", x)),
                 return_train_preds = FALSE,
                 task_type = private$task_type$ml_m,
-                fold_specific_params = private$fold_specific_params)
+                fold_specific_params = private$fold_specific_params)$preds
             }))
+        # TODO: Export of fitted models not implemented for this case
+        m_hat = list(preds = xx, models = NULL)
         z = self$data$data_model[, self$data$z_cols, with = FALSE]
       }
 
       d = self$data$data_model[[self$data$treat_col]]
       y = self$data$data_model[[self$data$y_col]]
 
-      g_hat = NULL
+      g_hat = list(preds = NULL, models = NULL)
       if (exists("ml_g", where = private$learner_)) {
         # get an initial estimate for theta using the partialling out score
-        psi_a = -(d - r_hat) * (z - m_hat)
-        psi_b = (z - m_hat) * (y - l_hat)
+        psi_a = -(d - r_hat$preds) * (z - m_hat$preds)
+        psi_b = (z - m_hat$preds) * (y - l_hat$preds)
         theta_initial = -mean(psi_b, na.rm = TRUE) / mean(psi_a, na.rm = TRUE)
 
         data_aux = data.table(self$data$data_model,
@@ -540,12 +542,19 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV",
           fold_specific_params = private$fold_specific_params)
       }
 
-      res = private$score_elements(y, z, d, l_hat, m_hat, r_hat, g_hat, smpls)
+      res = private$score_elements(
+        y, z, d, l_hat$preds, m_hat$preds,
+        r_hat$preds, g_hat$preds, smpls)
       res$preds = list(
-        "ml_l" = l_hat,
-        "ml_m" = m_hat,
-        "ml_r" = r_hat,
-        "ml_g" = g_hat)
+        "ml_l" = l_hat$preds,
+        "ml_m" = m_hat$preds,
+        "ml_r" = r_hat$preds,
+        "ml_g" = g_hat$preds)
+      res$models = list(
+        "ml_l" = l_hat$models,
+        "ml_m" = m_hat$models,
+        "ml_r" = r_hat$models,
+        "ml_g" = g_hat$models)
       return(res)
     },
     score_elements = function(y, z, d, l_hat, m_hat, r_hat, g_hat, smpls) {
@@ -615,7 +624,7 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV",
         task_type = private$task_type$ml_l,
         fold_specific_params = private$fold_specific_params)
 
-      m_hat_list = dml_cv_predict(self$learner$ml_m,
+      m_hat = dml_cv_predict(self$learner$ml_m,
         c(
           self$data$x_cols,
           self$data$other_treat_cols,
@@ -628,8 +637,7 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV",
         return_train_preds = TRUE,
         task_type = private$task_type$ml_m,
         fold_specific_params = private$fold_specific_params)
-      m_hat = m_hat_list$preds
-      data_aux_list = lapply(m_hat_list$train_preds, function(x) {
+      data_aux_list = lapply(m_hat$train_preds, function(x) {
         setnafill(data.table(self$data$data_model, "m_hat_on_train" = x),
           fill = -9999.99) # mlr3 does not allow NA's (values are not used)
       })
@@ -650,13 +658,13 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV",
       d = self$data$data_model[[self$data$treat_col]]
       y = self$data$data_model[[self$data$y_col]]
 
-      u_hat = y - l_hat
-      w_hat = d - m_hat_tilde
+      u_hat = y - l_hat$preds
+      w_hat = d - m_hat_tilde$preds
 
       if (is.character(self$score)) {
         if (self$score == "partialling out") {
-          psi_a = -w_hat * (m_hat - m_hat_tilde)
-          psi_b = (m_hat - m_hat_tilde) * u_hat
+          psi_a = -w_hat * (m_hat$preds - m_hat_tilde$preds)
+          psi_b = (m_hat$preds - m_hat_tilde$preds) * u_hat
         }
         res = list(
           psi_a = psi_a,
@@ -665,12 +673,16 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV",
         stop(paste(
           "Callable score not implemented for DoubleMLPLIV",
           "with partialX=TRUE and partialZ=TRUE."))
-        # res = self$score(y, d, g_hat, m_hat, m_hat_tilde)
+        # res = self$score(y, d, g_hat$preds, m_hat$preds, m_hat_tilde$preds)
       }
       res$preds = list(
-        "ml_l" = l_hat,
-        "ml_m" = m_hat,
-        "ml_r" = m_hat_tilde)
+        "ml_l" = l_hat$preds,
+        "ml_m" = m_hat$preds,
+        "ml_r" = m_hat_tilde$preds)
+      res$models = list(
+        "ml_l" = l_hat$models,
+        "ml_m" = m_hat$models,
+        "ml_r" = m_hat_tilde$models)
       return(res)
     },
 
@@ -697,17 +709,18 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV",
 
       if (is.character(self$score)) {
         if (self$score == "partialling out") {
-          psi_a = -r_hat * d
-          psi_b = r_hat * y
+          psi_a = -r_hat$preds * d
+          psi_b = r_hat$preds * y
         }
         res = list(psi_a = psi_a, psi_b = psi_b)
       } else if (is.function(self$score)) {
         stop(paste(
           "Callable score not implemented for DoubleMLPLIV",
           "with partialX=FALSE and partialZ=TRUE."))
-        # res = self$score(y, z, d, r_hat)
+        # res = self$score(y, z, d, r_hat$preds)
       }
-      res$preds = list("ml_r" = r_hat)
+      res$preds = list("ml_r" = r_hat$preds)
+      res$models = list("ml_r" = r_hat$models)
       return(res)
     },
 
@@ -819,8 +832,8 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV",
           y = self$data$data_model[[self$data$y_col]]
           z = self$data$data_model[[self$data$z_cols]]
 
-          psi_a = -(d - r_hat) * (z - m_hat)
-          psi_b = (z - m_hat) * (y - l_hat)
+          psi_a = -(d - r_hat$preds) * (z - m_hat$preds)
+          psi_b = (z - m_hat$preds) * (y - l_hat$preds)
           theta_initial = -mean(psi_b, na.rm = TRUE) / mean(psi_a, na.rm = TRUE)
 
           data_aux = data.table(self$data$data_model,
 
@@ -406,11 +406,11 @@ DoubleMLPLR = R6Class("DoubleMLPLR",
       d = self$data$data_model[[self$data$treat_col]]
       y = self$data$data_model[[self$data$y_col]]
 
-      g_hat = NULL
+      g_hat = list(preds = NULL, models = NULL)
       if (exists("ml_g", where = private$learner_)) {
         # get an initial estimate for theta using the partialling out score
-        psi_a = -(d - m_hat) * (d - m_hat)
-        psi_b = (d - m_hat) * (y - l_hat)
+        psi_a = -(d - m_hat$preds) * (d - m_hat$preds)
+        psi_b = (d - m_hat$preds) * (y - l_hat$preds)
         theta_initial = -mean(psi_b, na.rm = TRUE) / mean(psi_a, na.rm = TRUE)
 
         data_aux = data.table(self$data$data_model,
@@ -428,11 +428,17 @@ DoubleMLPLR = R6Class("DoubleMLPLR",
           fold_specific_params = private$fold_specific_params)
       }
 
-      res = private$score_elements(y, d, l_hat, m_hat, g_hat, smpls)
+      res = private$score_elements(
+        y, d, l_hat$preds, m_hat$preds, g_hat$preds,
+        smpls)
       res$preds = list(
-        "ml_l" = l_hat,
-        "ml_m" = m_hat,
-        "ml_g" = g_hat)
+        "ml_l" = l_hat$preds,
+        "ml_m" = m_hat$preds,
+        "ml_g" = g_hat$preds)
+      res$models = list(
+        "ml_l" = l_hat$models,
+        "ml_m" = m_hat$models,
+        "ml_g" = g_hat$models)
       return(res)
     },
     score_elements = function(y, d, l_hat, m_hat, g_hat, smpls) {
@@ -519,8 +525,8 @@ DoubleMLPLR = R6Class("DoubleMLPLR",
         d = self$data$data_model[[self$data$treat_col]]
         y = self$data$data_model[[self$data$y_col]]
 
-        psi_a = -(d - m_hat) * (d - m_hat)
-        psi_b = (d - m_hat) * (y - l_hat)
+        psi_a = -(d - m_hat$preds) * (d - m_hat$preds)
+        psi_b = (d - m_hat$preds) * (y - l_hat$preds)
         theta_initial = -mean(psi_b, na.rm = TRUE) / mean(psi_a, na.rm = TRUE)
 
         data_aux = data.table(self$data$data_model,