Merge pull request #160 from DoubleML/m-fix-bugs

MalteKurz · web-flow · commit 0537cbee3705 · 2022-05-20T09:29:46.000+02:00
A couple of bug fixes and minor improvements
diff --git a/R/double_ml.R b/R/double_ml.R
@@ -771,7 +771,7 @@ DoubleML = R6Class("DoubleML",
       n_folds_tune = 5,
       rsmp_tune = mlr3::rsmp("cv", folds = 5),
       measure = NULL,
-      terminator = mlr3tunin::trm("evals", n_evals = 20),
+      terminator = mlr3tuning::trm("evals", n_evals = 20),
       algorithm = mlr3tuning::tnr("grid_search"),
       resolution = 5),
     tune_on_folds = FALSE) {
@@ -1311,7 +1311,7 @@ DoubleML = R6Class("DoubleML",
         tune_settings$rsmp_tune = rsmp("cv", folds = tune_settings$n_folds_tune)
       }
 
-      if (test_names(names(tune_settings), must.include = "measure")) {
+      if (test_names(names(tune_settings), must.include = "measure") && !is.null(tune_settings$measure)) {
         assert_list(tune_settings$measure)
         if (!test_names(names(tune_settings$measure),
           subset.of = valid_learner)) {
@@ -1327,13 +1327,12 @@ DoubleML = R6Class("DoubleML",
             check_class(tune_settings$measure[[i_msr]], "Measure"))
         }
       } else {
-        tune_settings$measure = rep(list(NA), length(valid_learner))
+        tune_settings$measure = rep(list(NULL), length(valid_learner))
         names(tune_settings$measure) = valid_learner
       }
 
-      for (i_msr in seq_len(length(tune_settings$measure))) {
-        if (!test_class(tune_settings$measure[[i_msr]], "Measure")) {
-          this_learner = names(tune_settings$measure)[i_msr]
+      for (this_learner in valid_learner) {
+        if (!test_class(tune_settings$measure[[this_learner]], "Measure")) {
           tune_settings$measure[[this_learner]] = set_default_measure(
             tune_settings$measure[[this_learner]],
             private$task_type[[this_learner]])
diff --git a/R/double_ml_iivm.R b/R/double_ml_iivm.R
@@ -245,10 +245,6 @@ DoubleMLIIVM = R6Class("DoubleMLIIVM",
 
       private$check_data(self$data)
       private$check_score(self$score)
-      private$task_type = list(
-        "ml_g" = NULL,
-        "ml_m" = NULL,
-        "ml_r" = NULL)
       ml_g = private$assert_learner(ml_g, "ml_g", Regr = TRUE, Classif = TRUE)
       ml_m = private$assert_learner(ml_m, "ml_m", Regr = FALSE, Classif = TRUE)
       ml_r = private$assert_learner(ml_r, "ml_r", Regr = FALSE, Classif = TRUE)
diff --git a/R/double_ml_irm.R b/R/double_ml_irm.R
@@ -189,9 +189,6 @@ DoubleMLIRM = R6Class("DoubleMLIRM",
 
       private$check_data(self$data)
       private$check_score(self$score)
-      private$task_type = list(
-        "ml_g" = NULL,
-        "ml_m" = NULL)
       ml_g = private$assert_learner(ml_g, "ml_g", Regr = TRUE, Classif = TRUE)
       ml_m = private$assert_learner(ml_m, "ml_m", Regr = FALSE, Classif = TRUE)
 
diff --git a/R/double_ml_pliv.R b/R/double_ml_pliv.R
@@ -296,6 +296,7 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV",
           return_train_preds = FALSE,
           task_type = private$task_type$ml_m,
           fold_specific_params = private$fold_specific_params)
+        z = self$data$data_model[[self$data$z_cols]]
       } else {
         m_hat = do.call(
           cbind,
@@ -313,6 +314,7 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV",
                 task_type = private$task_type$ml_m,
                 fold_specific_params = private$fold_specific_params)
             }))
+        z = self$data$data_model[, self$data$z_cols, with = FALSE]
       }
 
       d = self$data$data_model[[self$data$treat_col]]
@@ -328,14 +330,21 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV",
     score_elements = function(y, z, d, g_hat, m_hat, r_hat, smpls) {
       u_hat = y - g_hat
       w_hat = d - r_hat
+      v_hat = z - m_hat
 
       if (self$data$n_instr == 1) {
-        z = self$data$data_model[[self$data$z_cols]]
-        v_hat = z - m_hat
+        if (is.character(self$score)) {
+          if (self$score == "partialling out") {
+            psi_a = -w_hat * v_hat
+            psi_b = v_hat * u_hat
+          }
+          psis = list(
+            psi_a = psi_a,
+            psi_b = psi_b)
+        } else if (is.function(self$score)) {
+          psis = self$score(y, z, d, g_hat, m_hat, r_hat, smpls)
+        }
       } else {
-        z = self$data$data_model[, self$data$z_cols, with = FALSE]
-        v_hat = z - m_hat
-
         stopifnot(self$apply_cross_fitting)
 
         # Projection: r_hat from projection on m_hat
@@ -349,29 +358,20 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV",
         r_r_tilde = resample(task_r_tilde, ml_r_tilde, resampling_r_tilde,
           store_models = TRUE)
         r_hat_tilde = as.data.table(r_r_tilde$prediction())$response
-      }
-      if (is.character(self$score)) {
-        if (self$data$n_instr == 1) {
-          if (self$score == "partialling out") {
-            psi_a = -w_hat * v_hat
-            psi_b = v_hat * u_hat
-          }
-        } else {
+
+        if (is.character(self$score)) {
           if (self$score == "partialling out") {
             psi_a = -w_hat * r_hat_tilde
             psi_b = r_hat_tilde * u_hat
           }
-        }
-        psis = list(
-          psi_a = psi_a,
-          psi_b = psi_b)
-      } else if (is.function(self$score)) {
-        if (self$data$n_instr > 1) {
+          psis = list(
+            psi_a = psi_a,
+            psi_b = psi_b)
+        } else if (is.function(self$score)) {
           stop(paste(
             "Callable score not implemented for DoubleMLPLIV with",
             "partialX=TRUE and partialZ=FALSE with several instruments."))
         }
-        psis = self$score(y, z, d, g_hat, m_hat, r_hat, smpls)
       }
       return(psis)
     },
@@ -608,7 +608,7 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV",
         self$data$treat_col, data_tune_list,
         nuisance_id = "nuis_m",
         param_set$ml_m, tune_settings,
-        tune_settings$measure$ml_g,
+        tune_settings$measure$ml_m,
         private$task_type$ml_m)
 
       m_params = tuning_result_m$params
diff --git a/R/double_ml_plr.R b/R/double_ml_plr.R
@@ -147,9 +147,6 @@ DoubleMLPLR = R6Class("DoubleMLPLR",
 
       private$check_data(self$data)
       private$check_score(self$score)
-      private$task_type = list(
-        "ml_g" = NULL,
-        "ml_m" = NULL)
       ml_g = private$assert_learner(ml_g, "ml_g", Regr = TRUE, Classif = FALSE)
       ml_m = private$assert_learner(ml_m, "ml_m", Regr = TRUE, Classif = TRUE)
 
diff --git a/R/helper.R b/R/helper.R
@@ -5,7 +5,9 @@ dml_cv_predict = function(learner, X_cols, y_col,
   return_train_preds = FALSE, task_type = NULL,
   fold_specific_params = FALSE) {
 
-  # TODO: Asserts
+  valid_task_type = c("regr", "classif")
+  assertChoice(task_type, valid_task_type)
+  # TODO: extend asserts
 
   if (fold_specific_params) {
     stopifnot(length(smpls$train_ids) == length(smpls$test_ids))
@@ -122,6 +124,7 @@ dml_cv_predict = function(learner, X_cols, y_col,
 
 dml_tune = function(learner, X_cols, y_col, data_tune_list,
   nuisance_id, param_set, tune_settings, measure, task_type) {
+
   task_tune = lapply(data_tune_list, function(x) {
     initiate_task(
       id = nuisance_id,
@@ -130,6 +133,9 @@ dml_tune = function(learner, X_cols, y_col, data_tune_list,
       select_cols = X_cols,
       task_type = task_type)
   })
+  valid_task_type = c("regr", "classif")
+  assertChoice(task_type, valid_task_type)
+
   ml_learner = initiate_learner(learner, task_type, params = learner$param_set$values)
   tuning_instance = lapply(task_tune, function(x) {
     TuningInstanceSingleCrit$new(
@@ -154,6 +160,10 @@ dml_tune = function(learner, X_cols, y_col, data_tune_list,
 
 extract_prediction = function(obj_resampling, task_type, n_obs,
   return_train_preds = FALSE) {
+
+  valid_task_type = c("regr", "classif")
+  assertChoice(task_type, valid_task_type)
+
   if (compareVersion(as.character(packageVersion("mlr3")), "0.11.0") < 0) {
     ind_name = "row_id"
   } else {
@@ -204,6 +214,10 @@ extract_prediction = function(obj_resampling, task_type, n_obs,
 }
 
 initiate_learner = function(learner, task_type, params, return_train_preds = FALSE) {
+
+  valid_task_type = c("regr", "classif")
+  assertChoice(task_type, valid_task_type)
+
   ml_learner = learner$clone()
 
   if (!is.null(params)) {
@@ -225,6 +239,9 @@ initiate_learner = function(learner, task_type, params, return_train_preds = FAL
 
 # Function to initialize task (regression or classification)
 initiate_task = function(id, data, target, select_cols, task_type) {
+  valid_task_type = c("regr", "classif")
+  assertChoice(task_type, valid_task_type)
+
   if (!is.null(select_cols)) {
     indx = (names(data) %in% c(select_cols, target))
     data = data[, indx, with = FALSE]
@@ -277,7 +294,10 @@ get_cond_samples = function(smpls, D) {
 }
 
 set_default_measure = function(measure_in = NA, task_type) {
-  if (is.na(measure_in)) {
+  valid_task_type = c("regr", "classif")
+  assertChoice(task_type, valid_task_type)
+
+  if (is.null(measure_in)) {
     if (task_type == "regr") {
       measure = msr("regr.mse")
     } else if (task_type == "classif") {
diff --git a/man/DoubleML.Rd b/man/DoubleML.Rd
diff --git a/tests/testthat/helper-13-dml_pliv_partial_x.R b/tests/testthat/helper-13-dml_pliv_partial_x.R
@@ -123,7 +123,7 @@ fit_nuisance_pliv_partial_x = function(data, y, d, z,
   data_r = data[, r_indx, drop = FALSE]
   task_r = mlr3::TaskRegr$new(id = paste0("nuis_r_", d), backend = data_r, target = d)
   if (!is.null(params_r)) {
-    ml_g$param_set$values = params_r
+    ml_r$param_set$values = params_r
   }
 
   resampling_r = mlr3::rsmp("custom")

Original file line number	Diff line number	Diff line change
`@@ -123,7 +123,7 @@ fit_nuisance_pliv_partial_x = function(data, y, d, z,`
`123`	`123`	`data_r = data[, r_indx, drop = FALSE]`
`124`	`124`	`task_r = mlr3::TaskRegr$new(id = paste0("nuis_r_", d), backend = data_r, target = d)`
`125`	`125`	`if (!is.null(params_r)) {`
`126`		`- ml_g$param_set$values = params_r`
	`126`	`+ ml_r$param_set$values = params_r`
`127`	`127`	`}`
`128`	`128`
`129`	`129`	`resampling_r = mlr3::rsmp("custom")`