@@ -744,3 +744,176 @@ make_iivm_data = function(n_obs = 500, dim_x = 20, theta = 1, alpha_x = 0.2,
744744 }
745745 }
746746}
747+
748+
749+ # ' @title Generates data from a partially linear IV regression model with
750+ # ' multiway cluster sample used in Chiang et al. (2021).
751+ # '
752+ # ' @description
753+ # ' Generates data from a partially linear IV regression model with multiway
754+ # ' cluster sample used in Chiang et al. (2021). The data generating process
755+ # ' is defined as
756+ # '
757+ # ' \eqn{Z_{ij} = X_{ij}' \xi_0 + V_{ij},}
758+ # '
759+ # ' \eqn{D_{ij} = Z_{ij}' \pi_{10} + X_{ij}' \pi_{20} + v_{ij},}
760+ # '
761+ # ' \eqn{Y_{ij} = D_{ij} \theta + X_{ij}' \zeta_0 + \varepsilon_{ij},}
762+ # '
763+ # ' with
764+ # '
765+ # ' \eqn{X_{ij} = (1 - \omega_1^X - \omega_2^X) \alpha_{ij}^X
766+ # ' + \omega_1^X \alpha_{i}^X + \omega_2^X \alpha_{j}^X,}
767+ # '
768+ # ' \eqn{\varepsilon_{ij} = (1 - \omega_1^\varepsilon - \omega_2^\varepsilon) \alpha_{ij}^\varepsilon
769+ # ' + \omega_1^\varepsilon \alpha_{i}^\varepsilon + \omega_2^\varepsilon \alpha_{j}^\varepsilon,}
770+ # '
771+ # ' \eqn{v_{ij} = (1 - \omega_1^v - \omega_2^v) \alpha_{ij}^v
772+ # ' + \omega_1^v \alpha_{i}^v + \omega_2^v \alpha_{j}^v,}
773+ # '
774+ # ' \eqn{V_{ij} = (1 - \omega_1^V - \omega_2^V) \alpha_{ij}^V
775+ # ' + \omega_1^V \alpha_{i}^V + \omega_2^V \alpha_{j}^V,}
776+ # '
777+ # ' and \eqn{\alpha_{ij}^X, \alpha_{i}^X, \alpha_{j}^X \sim \mathcal{N}(0, \Sigma)}
778+ # ' where \eqn{\Sigma} is a \eqn{p_x \times p_x} matrix with entries
779+ # ' \eqn{\Sigma_{kj} = s_X^{|j-k|}}.
780+ # '
781+ # ' Further
782+ # '
783+ # ' \eqn{\left(\begin{array}{c} \alpha_{ij}^\varepsilon \\ \alpha_{ij}^v \end{array}\right),
784+ # ' \left(\begin{array}{c} \alpha_{i}^\varepsilon \\ \alpha_{i}^v \end{array}\right),
785+ # ' \left(\begin{array}{c} \alpha_{j}^\varepsilon \\ \alpha_{j}^v \end{array}\right)
786+ # ' \sim \mathcal{N}\left(0, \left(\begin{array}{cc} 1 & s_{\varepsilon v} \\
787+ # ' s_{\varepsilon v} & 1 \end{array}\right) \right)}
788+ # '
789+ # ' and \eqn{\alpha_{ij}^V, \alpha_{i}^V, \alpha_{j}^V \sim \mathcal{N}(0, 1)}.
790+ # '
791+ # ' @references Chiang, H. D., Kato K., Ma, Y. and Sasaki, Y. (2021),
792+ # ' Multiway Cluster Robust Double/Debiased Machine Learning,
793+ # ' Journal of Business & Economic Statistics,
794+ # ' \doi{10.1080/07350015.2021.1895815}, https://arxiv.org/abs/1909.03489.
795+ # '
796+ # ' @param N (`integer(1)`) \cr
797+ # ' The number of observations (first dimension).
798+ # '
799+ # ' @param M (`integer(1)`) \cr
800+ # ' The number of observations (second dimension).
801+ # '
802+ # ' @param dim_X (`integer(1)`) \cr
803+ # ' The number of covariates.
804+ # '
805+ # ' @param theta (`numeric(1)`) \cr
806+ # ' The value of the causal parameter.
807+ # '
808+ # ' @param return_type (`character(1)`) \cr
809+ # ' If `"DoubleMLClusterData"`, returns a `DoubleMLClusterData` object.
810+ # ' If `"data.frame"` returns a `data.frame()`.
811+ # ' If `"data.table"` returns a `data.table()`.
812+ # ' If `"matrix"` a named `list()` with entries `X`, `y`, `d`, `z` and
813+ # ' `cluster_vars` is returned.
814+ # ' Every entry in the list is a `matrix()` object. Default is `"DoubleMLClusterData"`.
815+ # '
816+ # ' @param ... \cr
817+ # ' Additional keyword arguments to set non-default values for the parameters
818+ # ' \eqn{\pi_{10}=1.0},
819+ # ' \eqn{\omega_X = \omega_{\varepsilon} = \omega_V = \omega_v = (0.25, 0.25)},
820+ # ' \eqn{s_X = s_{\varepsilon v} = 0.25}, or the \eqn{p_x}-vectors
821+ # ' \eqn{\zeta_0 = \pi_{20} = \xi_0} with default entries
822+ # ' \eqn{\zeta_{0})_j = 0.5^j}.
823+ # '
824+ # ' @return A data object according to the choice of `return_type`.
825+ # '
826+ # ' @export
827+ make_pliv_multiway_cluster_CKMS2021 = function (N = 25 , M = 25 , dim_X = 100 ,
828+ theta = 1 . ,
829+ return_type = " DoubleMLClusterData" ,
830+ ... ) {
831+
832+ assert_choice(
833+ return_type ,
834+ c(" data.table" , " matrix" , " data.frame" , " DoubleMLClusterData" ))
835+ kwargs = list (... )
836+ pi_10 = if (" pi_10" %in% names(kwargs )) kwargs $ pi_10 else 1.0
837+ zeta_0 = if (" zeta_0" %in% names(kwargs )) kwargs $ zeta_0 else 0.5 ^ (1 : dim_X )
838+ pi_20 = if (" pi_20" %in% names(kwargs )) kwargs $ pi_20 else 0.5 ^ (1 : dim_X )
839+ xi_0 = if (" xi_0" %in% names(kwargs )) kwargs $ xi_0 else 0.5 ^ (1 : dim_X )
840+
841+ omega_X = if (" omega_X" %in% names(kwargs )) kwargs $ omega_X else c(0.25 , 0.25 )
842+ omega_epsilon = if (" omega_epsilon" %in% names(kwargs )) kwargs $ omega_epsilon else c(0.25 , 0.25 )
843+ omega_v = if (" omega_v" %in% names(kwargs )) kwargs $ omega_v else c(0.25 , 0.25 )
844+ omega_V = if (" omega_V" %in% names(kwargs )) kwargs $ omega_V else c(0.25 , 0.25 )
845+
846+ s_X = if (" s_X" %in% names(kwargs )) kwargs $ s_X else 0.25
847+ s_epsilon_v = if (" s_epsilon_v" %in% names(kwargs )) kwargs $ s_epsilon_v else 0.25
848+
849+ alpha_V = rnorm(N * M )
850+ alpha_V_i = rep(rnorm(N ), each = M )
851+ alpha_V_j = rep(rnorm(M ), times = N )
852+
853+ cov_mat = matrix (c(1 , s_epsilon_v , s_epsilon_v , 1 ), nrow = 2 )
854+ alpha_eps_v = rmvnorm(n = N * M , mean = rep(0 , 2 ), sigma = cov_mat )
855+ alpha_eps = alpha_eps_v [, 1 ]
856+ alpha_v = alpha_eps_v [, 2 ]
857+
858+ alpha_eps_v_i = rmvnorm(n = N , mean = rep(0 , 2 ), sigma = cov_mat )
859+ alpha_eps_i = rep(alpha_eps_v_i [, 1 ], each = M )
860+ alpha_v_i = rep(alpha_eps_v_i [, 2 ], each = M )
861+
862+ alpha_eps_v_j = rmvnorm(n = M , mean = rep(0 , 2 ), sigma = cov_mat )
863+ alpha_eps_j = rep(alpha_eps_v_j [, 1 ], times = N )
864+ alpha_v_j = rep(alpha_eps_v_j [, 2 ], times = N )
865+
866+ cov_mat = toeplitz(s_X ^ (0 : (dim_X - 1 )))
867+ alpha_X = rmvnorm(n = N * M , mean = rep(0 , dim_X ), sigma = cov_mat )
868+ xx = rmvnorm(N , mean = rep(0 , dim_X ), sigma = cov_mat )
869+ alpha_X_i = matrix (rep(xx , each = M ), ncol = ncol(xx ), byrow = FALSE )
870+ xx = rmvnorm(M , mean = rep(0 , dim_X ), sigma = cov_mat )
871+ alpha_X_j = matrix (rep(xx , each = N ), ncol = ncol(xx ), byrow = TRUE )
872+
873+ # generate variables
874+ x = (1 - omega_X [1 ] - omega_X [2 ]) * alpha_X +
875+ omega_X [1 ] * alpha_X_i + omega_X [2 ] * alpha_X_j
876+
877+ eps = (1 - omega_epsilon [1 ] - omega_epsilon [2 ]) * alpha_eps +
878+ omega_epsilon [1 ] * alpha_eps_i + omega_epsilon [2 ] * alpha_eps_j
879+
880+ v = (1 - omega_v [1 ] - omega_v [2 ]) * alpha_v +
881+ omega_v [1 ] * alpha_v_i + omega_v [2 ] * alpha_v_j
882+
883+ V = (1 - omega_V [1 ] - omega_V [2 ]) * alpha_V +
884+ omega_V [1 ] * alpha_V_i + omega_V [2 ] * alpha_V_j
885+
886+ z = x %*% xi_0 + V
887+ d = z * pi_10 + x %*% pi_20 + v
888+ y = d * theta + x %*% zeta_0 + eps
889+
890+ cluster_vars = expand.grid(seq(M ), seq(N ))[, c(2 , 1 )]
891+
892+ colnames(x ) = paste0(" X" , 1 : dim_X )
893+ colnames(y ) = " Y"
894+ colnames(d ) = " D"
895+ colnames(z ) = " Z"
896+ colnames(cluster_vars ) = c(" cluster_var_i" , " cluster_var_j" )
897+
898+ if (return_type == " matrix" ) {
899+ return (list (" X" = x , " y" = y , " d" = d , " cluster_vars" = cluster_vars , " z" = z ))
900+ } else {
901+ if (return_type == " data.frame" ) {
902+ data = data.frame (x , y , d , cluster_vars , z )
903+ return (data )
904+ } else if (return_type == " data.table" ) {
905+ data = data.table(x , y , d , cluster_vars , z )
906+ return (data )
907+ } else if (return_type == " DoubleMLClusterData" ) {
908+ dt = data.table(x , y , d , cluster_vars , z )
909+ data = DoubleMLClusterData $ new(dt ,
910+ y_col = " Y" , d_cols = " D" ,
911+ x_cols = colnames(x ),
912+ z_cols = " Z" ,
913+ cluster_cols = c(
914+ " cluster_var_i" ,
915+ " cluster_var_j" ))
916+ return (data )
917+ }
918+ }
919+ }
0 commit comments