@@ -92,11 +92,14 @@ ccdr.run <- function(data,
9292 # ## Check data format
9393 if (! sparsebnUtils :: is.sparsebnData(data )) stop(sparsebnUtils :: input_not_sparsebnData(data ))
9494
95- # ## Extract the data (CCDr only works on observational data, so ignore the intervention part)
95+ # ## Extract the data and ivn
96+ # ## CCDr now works on both observational data and interventional data, and a mixture of both
9697 data_matrix <- data $ data
98+ ivn_list <- data $ ivn
9799
98100 # ## Call the CCDr algorithm
99101 ccdr_call(data = data_matrix ,
102+ ivn = ivn_list ,
100103 betas = betas ,
101104 lambdas = lambdas ,
102105 lambdas.length = lambdas.length ,
@@ -115,6 +118,7 @@ ccdr.run <- function(data,
115118# this is handled internally by ccdr_gridR and ccdr_singleR.
116119#
117120ccdr_call <- function (data ,
121+ ivn = NULL ,
118122 betas ,
119123 lambdas ,
120124 lambdas.length ,
@@ -149,6 +153,20 @@ ccdr_call <- function(data,
149153 nn <- as.integer(nrow(data ))
150154 pp <- as.integer(ncol(data ))
151155
156+ if (is.null(ivn )) ivn <- vector(" list" , nn ) # to pass testthat for observational data cases
157+ # ## Check ivn
158+ if (! check_if_ivn_list(ivn )) stop(" ivn must be a list of NULLs or vectors!" )
159+ if (! check_ivn_size(ivn , data )) stop(sprintf(" Length of ivn is %d, expected to match the number of rows in data: %d." , length(ivn ), nn ))
160+ check_ivn_label(ivn , data )
161+ # ## if(!check_ivn_label(ivn, data)) stop("Intervention labels are incorrect.")
162+
163+ # ## use a vector nj to count how many times each node is under intervention
164+ # ## refer to nj as "intervention times vector"
165+ nj <- rep(0 , pp )
166+ for (j in 1 : pp ) { # # include 0 here or not?
167+ nj [j ] <- sum(! sapply(lapply(ivn , is.element , j ), any )) # # optimize for sorted column?
168+ }
169+
152170 # ## Use default values for lambda if not specified
153171 if (is.null(lambdas )){
154172 if (is.null(lambdas.length )){
@@ -188,6 +206,7 @@ ccdr_call <- function(data,
188206# }
189207
190208 # ## By default, set the initial guess for betas to be all zeroes
209+
191210 if (missing(betas )){
192211 betas <- matrix (0 , nrow = pp , ncol = pp )
193212 # betas <- SparseBlockMatrixR(betas) # 2015-03-26: Deprecated and replaced with .init_sbm below
@@ -197,7 +216,6 @@ ccdr_call <- function(data,
197216 # Still need to set start = 0, though.
198217 betas $ start <- 0
199218 } # Type-checking for betas happens in ccdr_singleR
200-
201219 # This parameter can be set by the user, but in order to prevent the algorithm from taking too long to run
202220 # it is a good idea to keep the threshold used by default which is O(sqrt(pp))
203221 if (is.null(max.iters )){
@@ -207,12 +225,16 @@ ccdr_call <- function(data,
207225 t1.cor <- proc.time()[3 ]
208226 # cors <- cor(data)
209227 # cors <- cors[upper.tri(cors, diag = TRUE)]
210- cors <- sparsebnUtils :: cor_vector(data )
228+ corlist <- sparsebnUtils :: cor_vector_ivn(data , ivn )
229+ cors <- corlist $ cors
230+ indexj <- corlist $ indexj
211231 t2.cor <- proc.time()[3 ]
212232
213233 fit <- ccdr_gridR(cors ,
214234 as.integer(pp ),
215235 as.integer(nn ),
236+ as.integer(nj ),
237+ as.integer(indexj ),
216238 betas ,
217239 as.numeric(lambdas ),
218240 as.numeric(gamma ),
@@ -245,6 +267,8 @@ ccdr_call <- function(data,
245267# Main subroutine for running the CCDr algorithm on a grid of lambda values.
246268ccdr_gridR <- function (cors ,
247269 pp , nn ,
270+ nj = NULL ,
271+ indexj = NULL ,
248272 betas ,
249273 lambdas ,
250274 gamma ,
@@ -261,13 +285,21 @@ ccdr_gridR <- function(cors,
261285 # ## nlam is now set automatically
262286 nlam <- length(lambdas )
263287
288+ # ## Check indexj
289+ if (is.null(indexj )) indexj <- rep(0L , pp + 1 )
290+ # ## Check nj
291+ if (is.null(nj )) nj <- as.integer(rep(nn , pp ))
292+
264293 ccdr.out <- list ()
265294 for (i in 1 : nlam ){
295+
266296 if (verbose ) message(" Working on lambda = " , round(lambdas [i ], 5 ), " [" , i , " /" , nlam , " ]" )
267297
268298 t1.ccdr <- proc.time()[3 ]
269299 ccdr.out [[i ]] <- ccdr_singleR(cors ,
270300 pp , nn ,
301+ nj ,
302+ indexj ,
271303 betas ,
272304 lambdas [i ],
273305 gamma = gamma ,
@@ -304,6 +336,8 @@ ccdr_gridR <- function(cors,
304336# called. Type-checking is strongly enforced here.
305337ccdr_singleR <- function (cors ,
306338 pp , nn ,
339+ nj = NULL ,
340+ indexj = NULL ,
307341 betas ,
308342 lambda ,
309343 gamma ,
@@ -313,9 +347,27 @@ ccdr_singleR <- function(cors,
313347 verbose = FALSE
314348){
315349
350+ if (is.null(indexj )) indexj <- rep(0L , pp + 1 )
351+ # ## Check indexj
352+ if (! is.vector(indexj )) stop(" Index vector for cors is not a vector." )
353+ if (length(indexj ) > pp + 1 ) stop(sprintf(" Index vector for cors is too long, expected to be no greater than %d, the number of columns of data." , pp ))
354+ if (! is.integer(indexj )) stop(" Index vector for cors has non-integer component(s)." )
355+ if (any(indexj < 0 | indexj > pp + 1 )) stop(sprintf(" Index vector for cors has out-of-range component(s), expected to be between 0 and %d." , pp ))
356+
357+ if (is.null(nj )) nj <- as.integer(rep(nn , pp ))
358+ # ## Check nj
359+ if (! is.vector(nj )) stop(" Intervention times vector is not a vector." )
360+ if (length(nj ) != pp ) stop(sprintf(" Length of intervention times vector is %d, expected %d% to match the number of columns of data" , length(nj ), pp ))
361+ if (! is.integer(nj )) stop(" Intervention times vector has non-integer component(s)." )
362+ if (any(nj < 0 | nj > nn )) stop(sprintf(" Intervention times vector has out-of-range component(s), expected to be between 0 and %d." , nn ))
363+
364+ # ## add a weight a_j to penalty on beta_{ij}
365+ # ## since now with intervention data, beta_{ij} only appears n_j times out of total nn samples
366+ aj <- nj / nn
367+
316368 # ## Check cors
317369 if (! is.numeric(cors )) stop(" cors must be a numeric vector!" )
318- if (length(cors ) != pp * (pp + 1 )/ 2 ) stop(paste0(" cors has incorrect length: Expected length = " , pp * (pp + 1 )/ 2 , " input length = " , length(cors )))
370+ if (length(cors ) != length(unique( indexj )) * pp * (pp + 1 )/ 2 ) stop(paste0(" cors has incorrect length: Expected length = " , length(unique( indexj )) * pp * (pp + 1 )/ 2 , " input length = " , length(cors )))
319371
320372 # ## Check dimension parameters
321373 if (! is.integer(pp ) || ! is.integer(nn )) stop(" Both pp and nn must be integers!" )
@@ -354,7 +406,9 @@ ccdr_singleR <- function(cors,
354406 t1.ccdr <- proc.time()[3 ]
355407 ccdr.out <- singleCCDr(cors ,
356408 betas ,
357- nn ,
409+ nj ,
410+ indexj ,
411+ aj ,
358412 lambda ,
359413 c(gamma , eps , maxIters , alpha ),
360414 verbose = verbose )
0 commit comments