3535# ' This implementation includes two options for the penalty: (1) MCP, and (2) L1 (or Lasso). This option
3636# ' is controlled by the \code{gamma} argument.
3737# '
38- # ' @param data Data as \code{\link[sparsebnUtils]{sparsebnData}}. Must be numeric and contain no missing values.
39- # ' @param betas Initial guess for the algorithm. Represents the weighted adjacency matrix
40- # ' of a DAG where the algorithm will begin searching for an optimal structure.
41- # ' @param lambdas (optional) Numeric vector containing a grid of lambda values (i.e. regularization
38+ # ' @param data Data as \code{\link[sparsebnUtils]{sparsebnData}} object. Must be numeric and contain no missing values.
39+ # ' @param lambdas Numeric vector containing a grid of lambda values (i.e. regularization
4240# ' parameters) to use in the solution path. If missing, a default grid of values will be
4341# ' used based on a decreasing log-scale (see also \link{generate.lambdas}).
4442# ' @param lambdas.length Integer number of values to include in the solution path. If \code{lambdas}
4543# ' has also been specified, this value will be ignored. Note also that the final
4644# ' solution path may contain fewer estimates (see
4745# ' \code{alpha}).
46+ # ' @param whitelist A two-column matrix of edges that are guaranteed to be in each
47+ # ' estimate (a "white list"). Each row in this matrix corresponds
48+ # ' to an edge that is to be whitelisted. These edges can be
49+ # ' specified by node name (as a \code{character} matrix), or by
50+ # ' index (as a \code{numeric} matrix).
51+ # ' @param blacklist A two-column matrix of edges that are guaranteed to be absent
52+ # ' from each estimate (a "black list"). See argument
53+ # ' "\code{whitelist}" above for more details.
4854# ' @param gamma Value of concavity parameter. If \code{gamma > 0}, then the MCP will be used
4955# ' with \code{gamma} as the concavity parameter. If \code{gamma < 0}, then the L1 penalty
5056# ' will be used and this value is otherwise ignored.
5157# ' @param error.tol Error tolerance for the algorithm, used to test for convergence.
5258# ' @param max.iters Maximum number of iterations for each internal sweep.
5359# ' @param alpha Threshold parameter used to terminate the algorithm whenever the number of edges in the
5460# ' current DAG estimate is \code{> alpha * ncol(data)}.
61+ # ' @param betas Initial guess for the algorithm. Represents the weighted adjacency matrix
62+ # ' of a DAG where the algorithm will begin searching for an optimal structure.
63+ # ' @param sigmas Numeric vector of known values of conditional variances for each node in the network. If this is
64+ # ' set by the user, these parameters will not be computed and the input will
65+ # ' be used as the "true" values of the variances in the algorithm. Note that setting
66+ # ' this to be all ones (i.e. \code{sigmas[j] = 1} for all \code{j}) is
67+ # ' equivalent to using the least-squares loss.
5568# ' @param verbose \code{TRUE / FALSE} whether or not to print out progress and summary reports.
5669# '
5770# ' @return A \code{\link[sparsebnUtils]{sparsebnPath}} object.
7790# '
7891# ' @export
7992ccdr.run <- function (data ,
80- betas ,
8193 lambdas = NULL ,
8294 lambdas.length = NULL ,
95+ whitelist = NULL ,
96+ blacklist = NULL ,
8397 gamma = 2.0 ,
8498 error.tol = 1e-4 ,
8599 max.iters = NULL ,
86100 alpha = 10 ,
101+ betas ,
102+ sigmas = NULL ,
87103 verbose = FALSE
88104){
89105 # ## Check data format
@@ -94,12 +110,24 @@ ccdr.run <- function(data,
94110 data_matrix <- data $ data
95111 ivn_list <- data $ ivn
96112
113+ # ## If ivn_list contains character names, convert to indices
114+ if (" character" %in% sparsebnUtils :: list_classes(ivn_list )){
115+ ivn_list <- lapply(ivn_list , function (x ){
116+ idx <- match(x , names(data_matrix ))
117+ if (length(idx ) == 0 ) NULL # return NULL if no match (=> observational)
118+ else idx
119+ })
120+ }
121+
97122 # ## Call the CCDr algorithm
98123 ccdr_call(data = data_matrix ,
99124 ivn = ivn_list ,
100125 betas = betas ,
126+ sigmas = sigmas ,
101127 lambdas = lambdas ,
102128 lambdas.length = lambdas.length ,
129+ whitelist = whitelist ,
130+ blacklist = blacklist ,
103131 gamma = gamma ,
104132 error.tol = error.tol ,
105133 rlam = NULL ,
@@ -120,15 +148,19 @@ MAX_CCS_ARRAY_SIZE <- function() 10000
120148ccdr_call <- function (data ,
121149 ivn = NULL ,
122150 betas ,
151+ sigmas ,
123152 lambdas ,
124153 lambdas.length ,
154+ whitelist = NULL ,
155+ blacklist = NULL ,
125156 gamma ,
126157 error.tol ,
127158 rlam ,
128159 max.iters ,
129160 alpha ,
130161 verbose = FALSE
131162){
163+ node_names <- names(data )
132164# ### Allow users to input a data.frame, but kindly warn them about doing this
133165# if(is.data.frame(data)){
134166# warning(sparsebnUtils::alg_input_data_frame())
@@ -171,6 +203,11 @@ ccdr_call <- function(data,
171203 nj [j ] <- sum(! sapply(lapply(ivn , is.element , j ), any )) # # optimize for sorted column?
172204 }
173205
206+ # ## Set default for sigmas (negative values => ignore initial value and update as usual)
207+ if (is.null(sigmas )){
208+ sigmas <- rep(- 1 . , pp )
209+ }
210+
174211 # ## Use default values for lambda if not specified
175212 if (is.null(lambdas )){
176213 if (is.null(lambdas.length )){
@@ -227,6 +264,23 @@ ccdr_call <- function(data,
227264 max.iters <- sparsebnUtils :: default_max_iters(pp )
228265 }
229266
267+ # ## White/black lists
268+ # Be careful about handling various NULL cases
269+ if (! is.null(whitelist )) whitelist <- bwlist_check(whitelist , node_names )
270+ if (! is.null(blacklist )) blacklist <- bwlist_check(blacklist , node_names )
271+
272+ if (! is.null(whitelist ) && ! is.null(blacklist )){
273+ if (length(intersect(whitelist , blacklist )) > 0 ){
274+ badinput <- vapply(intersect(whitelist , blacklist ), function (x ) sprintf(" \t [%s]\n " , paste(x , collapse = " ," )), FUN.VALUE = " vector" )
275+ badinput <- paste(badinput , collapse = " " )
276+ msg <- sprintf(" Duplicate entries found in blacklist and whitelist: \n %s" , badinput )
277+ stop(msg )
278+ }
279+ }
280+
281+ weights <- bwlist_to_weights(blacklist , whitelist , nnode = pp )
282+
283+ # ## Pre-process correlation data
230284 t1.cor <- proc.time()[3 ]
231285 # cors <- cor(data)
232286 # cors <- cors[upper.tri(cors, diag = TRUE)]
@@ -241,7 +295,9 @@ ccdr_call <- function(data,
241295 as.integer(nj ),
242296 as.integer(indexj ),
243297 betas ,
298+ as.numeric(sigmas ),
244299 as.numeric(lambdas ),
300+ as.integer(weights ),
245301 as.numeric(gamma ),
246302 as.numeric(error.tol ),
247303 as.integer(max.iters ),
@@ -260,7 +316,7 @@ ccdr_call <- function(data,
260316 names(fit [[k ]]$ edges ) <- names(data )
261317
262318 # ## Add node names to output
263- fit [[k ]] <- append(fit [[k ]], list (names( data ) ), after = 1 ) # insert node names into second slot
319+ fit [[k ]] <- append(fit [[k ]], list (node_names ), after = 1 ) # insert node names into second slot
264320 names(fit [[k ]])[2 ] <- " nodes"
265321 }
266322
@@ -276,7 +332,9 @@ ccdr_gridR <- function(cors,
276332 nj = NULL ,
277333 indexj = NULL ,
278334 betas ,
335+ sigmas ,
279336 lambdas ,
337+ weights ,
280338 gamma ,
281339 eps ,
282340 maxIters ,
@@ -307,7 +365,9 @@ ccdr_gridR <- function(cors,
307365 nj ,
308366 indexj ,
309367 betas ,
368+ sigmas ,
310369 lambdas [i ],
370+ weights ,
311371 gamma = gamma ,
312372 eps = eps ,
313373 maxIters = maxIters ,
@@ -345,39 +405,46 @@ ccdr_singleR <- function(cors,
345405 nj = NULL ,
346406 indexj = NULL ,
347407 betas ,
408+ sigmas ,
348409 lambda ,
410+ weights ,
349411 gamma ,
350412 eps ,
351413 maxIters ,
352414 alpha , # 2-9-15: No longer necessary in ccdr_singleR, but needed since the C++ call asks for it
353415 verbose = FALSE
354416){
355417
356- if (is.null(indexj )) indexj <- rep(0L , pp + 1 )
418+ # ## Check dimension parameters
419+ if (! is.integer(pp ) || ! is.integer(nn )) stop(" Both pp and nn must be integers!" )
420+ if (pp < = 0 || nn < = 0 ) stop(" Both pp and nn must be positive!" )
421+
422+ # ## These variables, if NULL, need to be initialized before checking anything
423+ if (is.null(indexj )) indexj <- rep(0L , pp + 1 ) # initialize indexj
424+ if (is.null(nj )) nj <- as.integer(rep(nn , pp )) # initialize nj
425+
357426 # ## Check indexj
358427 if (! is.vector(indexj )) stop(" Index vector for cors is not a vector." )
359428 if (length(indexj ) > pp + 1 ) stop(sprintf(" Index vector for cors is too long, expected to be no greater than %d, the number of columns of data." , pp ))
360429 if (! is.integer(indexj )) stop(" Index vector for cors has non-integer component(s)." )
430+ if (any(is.na(indexj ) | is.null(indexj ))) stop(" Index vector cannot have missing or NULL values." )
361431 if (any(indexj < 0 | indexj > pp + 1 )) stop(sprintf(" Index vector for cors has out-of-range component(s), expected to be between 0 and %d." , pp ))
362432
363- if (is.null(nj )) nj <- as.integer(rep(nn , pp ))
364433 # ## Check nj
365434 if (! is.vector(nj )) stop(" Intervention times vector is not a vector." )
366- if (length(nj ) != pp ) stop(sprintf(" Length of intervention times vector is %d, expected %d% to match the number of columns of data" , length(nj ), pp ))
435+ if (length(nj ) != pp ) stop(sprintf(" Length of intervention times vector is %d, expected to match the number of columns of data = %d " , length(nj ), pp ))
367436 if (! is.integer(nj )) stop(" Intervention times vector has non-integer component(s)." )
437+ if (any(is.na(nj ) | is.null(nj ))) stop(" Intervention times vector cannot have missing or NULL values." )
368438 if (any(nj < 0 | nj > nn )) stop(sprintf(" Intervention times vector has out-of-range component(s), expected to be between 0 and %d." , nn ))
369439
370- # ## add a weight a_j to penalty on beta_{ij}
371- # ## since now with intervention data, beta_{ij} only appears n_j times out of total nn samples
372- aj <- nj / nn
373-
374440 # ## Check cors
441+ # ## This check must come after the checks for indexj, nj since these values are used to check cors
375442 if (! is.numeric(cors )) stop(" cors must be a numeric vector!" )
376443 if (length(cors ) != length(unique(indexj ))* pp * (pp + 1 )/ 2 ) stop(paste0(" cors has incorrect length: Expected length = " , length(unique(indexj ))* pp * (pp + 1 )/ 2 , " input length = " , length(cors )))
377444
378- # ## Check dimension parameters
379- if ( ! is.integer( pp ) || ! is.integer( nn )) stop( " Both pp and nn must be integers! " )
380- if ( pp < = 0 || nn < = 0 ) stop( " Both pp and nn must be positive! " )
445+ # ## add a weight a_j to penalty on beta_{ij}
446+ # ## since now with intervention data, beta_{ij} only appears n_j times out of total nn samples
447+ aj <- nj / nn
381448
382449 # ## Check betas
383450 if (sparsebnUtils :: check_if_matrix(betas )){ # if the input is a matrix, convert to SBM object
@@ -387,10 +454,25 @@ ccdr_singleR <- function(cors,
387454 stop(" Incompatible data passed for betas parameter: Should be either matrix or list in SparseBlockMatrixR format." )
388455 }
389456
457+ # ## Check sigmas
458+ if (! is.numeric(sigmas )) stop(" sigmas must be numeric!" )
459+ if (length(sigmas ) != pp ) stop(sprintf(" sigmas must have length = %d!" , pp ))
460+ if (any(sigmas < 0 )){
461+ # -1 is a sentinel value for updating sigmas via the CD updates
462+ if (any(sigmas != - 1 . )){
463+ stop(" sigmas must be > 0!" )
464+ }
465+ }
466+
390467 # ## Check lambda
391468 if (! is.numeric(lambda )) stop(" lambda must be numeric!" )
392469 if (lambda < 0 ) stop(" lambda must be >= 0!" )
393470
471+ # ## Check weights
472+ if (length(weights ) != pp * pp ) stop(sprintf(" weights must have length p^2 = %d!" , pp * pp ))
473+ if (! is.numeric(weights )) stop(" weights must be numeric!" )
474+ if (weights < - 1 || weights > 1 ) stop(" weights out of bounds!" )
475+
394476 # ## Check gamma
395477 if (! is.numeric(gamma )) stop(" gamma must be numeric!" )
396478 if (gamma < 0 && gamma != - 1 ) stop(" gamma must be >= 0 (MCP) or = -1 (Lasso)!" )
@@ -412,10 +494,12 @@ ccdr_singleR <- function(cors,
412494 t1.ccdr <- proc.time()[3 ]
413495 ccdr.out <- singleCCDr(cors ,
414496 betas ,
497+ sigmas ,
415498 nj ,
416499 indexj ,
417500 aj ,
418501 lambda ,
502+ weights ,
419503 c(gamma , eps , maxIters , alpha ),
420504 verbose = verbose )
421505 t2.ccdr <- proc.time()[3 ]
0 commit comments