itsrainingdata
diff --git a/‎DESCRIPTION‎
Lines changed: 9 additions & 6 deletions b/‎DESCRIPTION‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 8 additions & 1 deletion b/‎NAMESPACE‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎NEWS.md‎
Lines changed: 11 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎R/ccdrAlgorithm-functions.R‎
Lines changed: 9 additions & 1 deletion b/‎R/ccdrAlgorithm-functions.R‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎R/ccdrAlgorithm-main.R‎
Lines changed: 16 additions & 10 deletions b/‎R/ccdrAlgorithm-main.R‎
Lines changed: 16 additions & 10 deletions
diff --git a/‎R/ccdrAlgorithm-messages.R‎
Lines changed: 27 additions & 0 deletions b/‎R/ccdrAlgorithm-messages.R‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎R/ccdrAlgorithm-mvn.R‎
Lines changed: 171 additions & 0 deletions b/‎R/ccdrAlgorithm-mvn.R‎
Lines changed: 171 additions & 0 deletions
diff --git a/‎R/s3-SparseBlockMatrixR.R‎
Lines changed: 1 addition & 1 deletion b/‎R/s3-SparseBlockMatrixR.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/s3-generics.R‎
Lines changed: 1 addition & 1 deletion b/‎R/s3-generics.R‎
Lines changed: 1 addition & 1 deletion
@@ -1,7 +1,7 @@
 Package: ccdrAlgorithm
 Title: CCDr Algorithm for Learning Sparse Gaussian Bayesian Networks
-Version: 0.0.2
-Date: 2016-11-19
+Version: 0.0.3
+Date: 2017-03-09
 Authors@R: c(
     person("Bryon", "Aragam", email = "sparsebn@gmail.com", role = c("aut", "cre")),
     person("Dacheng", "Zhang", role = c("aut"))
@@ -11,13 +11,16 @@ Description: Implementation of the CCDr (Concave penalized Coordinate Descent wi
 Depends:
     R (>= 3.2.3)
 Imports:
-    sparsebnUtils (>= 0.0.2),
-    Rcpp (>= 0.11.0)
+    sparsebnUtils (>= 0.0.4),
+    Rcpp (>= 0.11.0),
+    stats,
+    utils
 LinkingTo: Rcpp
 Suggests:
     testthat,
-    graph
+    graph,
+    igraph
 URL: https://github.com/itsrainingdata/ccdrAlgorithm
 BugReports: https://github.com/itsrainingdata/ccdrAlgorithm/issues
 License: GPL (>= 2)
-RoxygenNote: 5.0.1
+RoxygenNote: 6.0.1
@@ -3,14 +3,21 @@
 S3method(edgeList,SparseBlockMatrixR)
 S3method(sparse,SparseBlockMatrixR)
 export(ccdr.run)
+export(generate_mvn_data)
 importFrom(Rcpp,sourceCpp)
+importFrom(sparsebnUtils,as.sparse)
 importFrom(sparsebnUtils,edgeList)
 importFrom(sparsebnUtils,get.adjacency.matrix)
+importFrom(sparsebnUtils,is.edgeList)
+importFrom(sparsebnUtils,is.sparse)
 importFrom(sparsebnUtils,is.zero)
 importFrom(sparsebnUtils,num.edges)
 importFrom(sparsebnUtils,num.nodes)
 importFrom(sparsebnUtils,reIndexC)
 importFrom(sparsebnUtils,reIndexR)
 importFrom(sparsebnUtils,sparse)
 importFrom(sparsebnUtils,to_graphNEL)
-useDynLib(ccdrAlgorithm)
+importFrom(sparsebnUtils,to_igraph)
+importFrom(stats,rnorm)
+importFrom(utils,tail)
+useDynLib(ccdrAlgorithm, .registration = TRUE)
@@ -1,3 +1,14 @@
+# ccdrAlgorithm 0.0.3
+
+## Features
+
+* New `generate_mvn_data()` method to generate multivariate normal data from a DAG.
+
+## Notes
+
+* Added warning when dataset contains more than 10,000 columns: This requires building from source. The CCDr algorithm has been safely tested on datasets with up to 8,000 variables.
+* By default, `ccdr.run()` includes the node names in the `sparsebnPath` output.
+
 # ccdrAlgorithm 0.0.2
 
 ## Features
 
@@ -1,3 +1,11 @@
+#
+#  ccdrAlgorithm-functions.R
+#  ccdrAlgorithm
+#
+#  Created by Dacheng Zhang on 9/3/16.
+#  Copyright (c) 2014-2017 Bryon Aragam. All rights reserved.
+#
+
 ## returns TRUE if ivn_list is a list of vectors or NULL elements,
 check_if_ivn_list <- function(ivn) {
     ## check if it is a list
@@ -24,7 +32,7 @@ check_vector_label <- function(vec, pp) {
     ## e.g.: c(NA, 1L, NA, 3L, NA, 5L)
     ## However, c(1L, NA, 3L, 4, NA) returns all FALSE
     ## check if labels are integers
-    if(any(is.na(vec)) || !is.integer(vec)) {
+    if(any(is.na(vec)) || !is.numeric(vec)) {
         stop("Non-integer label(s) found in one or more components in ivn.")
         return(FALSE)
     }
 
@@ -1,9 +1,9 @@
 #
-#  ccdr-main-R.R
+#  ccdrAlgorithm-main.R
 #  ccdrAlgorithm
 #
 #  Created by Bryon Aragam (local) on 1/22/16.
-#  Copyright (c) 2014-2016 Bryon Aragam. All rights reserved.
+#  Copyright (c) 2014-2017 Bryon Aragam. All rights reserved.
 #
 
 #
@@ -17,7 +17,7 @@
 #
 
 ###--- These two lines are necessary to import the auto-generated Rcpp methods in RcppExports.R---###
-#' @useDynLib ccdrAlgorithm
+#' @useDynLib ccdrAlgorithm, .registration = TRUE
 #' @importFrom Rcpp sourceCpp
 NULL
 
@@ -58,25 +58,22 @@ NULL
 #'
 #' @examples
 #'
-#' \dontrun{
-#'
 #' ### Generate some random data
 #' dat <- matrix(rnorm(1000), nrow = 20)
-#' dat <- sparsebnData(dat, type = "continuous")
+#' dat <- sparsebnUtils::sparsebnData(dat, type = "continuous")
 #'
 #' # Run with default settings
-#' ccdr.run(data = dat)
+#' ccdr.run(data = dat, lambdas.length = 20)
 #'
 #' ### Optional: Adjust settings
-#' pp <- ncol(dat)
+#' pp <- ncol(dat$data)
 #'
 #' # Initialize algorithm with a random initial value
 #' init.betas <- matrix(0, nrow = pp, ncol = pp)
 #' init.betas[1,2] <- init.betas[1,3] <- init.betas[4,2] <- 1
 #'
 #' # Run with adjusted settings
-#' ccdr.run(data = dat, betas = init.betas, lambdas.length = 10, alpha = 10, verbose = TRUE)
-#' }
+#' ccdr.run(data = dat, betas = init.betas, lambdas.length = 20, alpha = 10, verbose = TRUE)
 #'
 #' @export
 ccdr.run <- function(data,
@@ -111,6 +108,9 @@ ccdr.run <- function(data,
               verbose = verbose)
 } # END CCDR.RUN
 
+### Maximum number of nodes allowed
+MAX_CCS_ARRAY_SIZE <- function() 10000
+
 # ccdr_call
 #
 #   Handles most of the bookkeeping for CCDr. Sets default values and prepares arguments for
@@ -153,6 +153,10 @@ ccdr_call <- function(data,
     nn <- as.integer(nrow(data))
     pp <- as.integer(ncol(data))
 
+    if(pp > MAX_CCS_ARRAY_SIZE()){
+        stop(max_nodes_warning(pp))
+    }
+
     if(is.null(ivn)) ivn <- vector("list", nn) # to pass testthat for observational data cases
     ### Check ivn
     if(!check_if_ivn_list(ivn)) stop("ivn must be a list of NULLs or vectors!")
@@ -216,6 +220,7 @@ ccdr_call <- function(data,
         #   Still need to set start = 0, though.
         betas$start <- 0
     } # Type-checking for betas happens in ccdr_singleR
+
     # This parameter can be set by the user, but in order to prevent the algorithm from taking too long to run
     #  it is a good idea to keep the threshold used by default which is O(sqrt(pp))
     if(is.null(max.iters)){
@@ -252,6 +257,7 @@ ccdr_call <- function(data,
         ### Coerce sbm output to edgeList
         names(fit[[k]])[1] <- "edges" # rename 'sbm' slot to 'edges': After the next line, this slot will no longer be an SBM object
         fit[[k]]$edges <- sparsebnUtils::as.edgeList(fit[[k]]$edges) # Before coercion, li$edges is actually an SBM object
+        names(fit[[k]]$edges) <- names(data)
 
         ### Add node names to output
         fit[[k]] <- append(fit[[k]], list(names(data)), after = 1) # insert node names into second slot
 
@@ -0,0 +1,27 @@
+#
+#  ccdrAlgorithm-messages.R
+#  ccdrAlgorithm
+#
+#  Created by Bryon Aragam (local) on 11/20/16.
+#  Copyright (c) 2014-2017 Bryon Aragam. All rights reserved.
+#
+
+#
+# PACKAGE CCDRALGORITHM: Messages
+#
+#   CONTENTS:
+#       max_nodes_warning
+#
+
+### These warnings are all internal to this package and hence
+###  do not need to be exported
+
+### User inputs invalid data object
+max_nodes_warning <- function(numnode){
+    msg <- "This dataset contains more than %d variables -- in order to
+            run CCDr on this dataset, please download the source, increase
+            _MAX_CCS_ARRAY_SIZE_ to at least %d, and re-build the package
+            from source. If you have any trouble, please contact the
+            maintainer."
+    stop(sprintf(msg, MAX_CCS_ARRAY_SIZE(), numnode))
+}
@@ -0,0 +1,171 @@
+#
+#  ccdrAlgorithm-mvn.R
+#  ccdrAlgorithm
+#
+#  Created by Bryon Aragam (local) on 1/15/17.
+#  Copyright (c) 2014-2017 Bryon Aragam. All rights reserved.
+#
+
+#' Generate data from a DAG
+#'
+#' Given a Gaussian DAG, generate data from the underlying distribution.
+#' Equivalently, generate data from a multivariate normal distribution given
+#' one of its SEM. Can generate both observational and intervention data.
+#'
+#' If \code{ivn = NULL}, then \code{n} observational samples are drawn. For each
+#' component of \code{ivn} that is not \code{NULL}, interventional samples will
+#' be drawn with the values of each node specified in the component.
+#'
+#' @param graph DAG in \code{\link{edgeList}} format.
+#' @param params Vector of parameters. Last p elements correspond to variances (p = number of nodes in \code{graph}), initial elements correspond to edge weights.
+#' @param n Number of samples to draw.
+#' @param ivn List of interventions (see \code{\link[sparsebnUtils]{sparsebnData}}). Must be a \code{list} with exactly \code{n} components.
+#' @param ivn.rand If \code{TRUE}, random N(0,1) values will be drawn for each intervention. Otherwise, these values need to supplied manually in \code{ivn}.
+#'
+#' @examples
+#'
+#' ### Generate observational data
+#' gr <- sparsebnUtils::random.graph(5, 5) # use sparsebnUtils package to generate a random graph
+#' gr.params <- runif(10) # there are 5 coefficients + 5 variances
+#' data.obs <- ccdrAlgorithm::generate_mvn_data(graph = gr,
+#'                                              n = 100,
+#'                                              params = gr.params)
+#'
+#' ### Generate experimental data
+#' ivn <- as.list(c(rep("V1", 50), rep("V2", 50))) # 50 interventions on V1, 50 interventions on V2
+#' data.ivn <- ccdrAlgorithm::generate_mvn_data(graph = gr,
+#'                                              n = 100,
+#'                                              params = gr.params,
+#'                                              ivn = ivn)
+#'
+#' @export
+generate_mvn_data <- function(graph, params, n = 1, ivn = NULL, ivn.rand = TRUE){
+    ### This function requires the 'igraph' package to be installed
+    if (!requireNamespace("igraph", quietly = TRUE)) {
+        stop("The igraph package is required for the method 'generate_mvn_data'. Please install it using install.packages(\"igraph\").", call. = FALSE)
+    }
+
+    stopifnot(sparsebnUtils::is.edgeList(graph))
+    stopifnot(is.numeric(params))
+    stopifnot(length(params) == sparsebnUtils::num.edges(graph) + sparsebnUtils::num.nodes(graph))
+
+    if(is.null(names(graph))){
+        stop("Input 'graph' requires node names!")
+    }
+
+    if(!is.null(ivn)){
+        stopifnot(is.list(ivn))
+        stopifnot(length(ivn) == n)
+
+        ### Generate random intervention values
+        if(ivn.rand){
+            ivn <- lapply(ivn, function(x) sapply(x, function(x) rnorm(n = 1, mean = 0, sd = 1))) # assume standard normal
+            # ivn <- lapply(ivn, function(x) sapply(x, function(x) 1)) # debugging
+        }
+    }
+
+    ### Need this to ensure the output has the same order as the input
+    ###  after things get shuffled around
+    original_node_order <- names(graph)
+
+    ### Get topological sort
+    ### Note that the check for the igraph pkg occurs in sparsebnUtils::to_igraph
+    topsort <- names(igraph::topo_sort(sparsebnUtils::to_igraph(graph)))
+
+    nnode <- length(original_node_order)
+    vars <- utils::tail(params, nnode) # parameters associated with variances
+    names(vars) <- original_node_order
+    coefs <- params[1:(length(params) - nnode)] # parameters associated with edge weights
+    sp <- sparsebnUtils::as.sparse(graph)
+    sp$vals <- coefs # previous line leaves NAs for values in sparse object; need to fill these in
+    edgelist <- sparse_to_edgeWeightList(sp, original_node_order)
+    nodes <- names(edgelist) # this will be sorted according to the topological order
+
+    ### The old way, efficient for obs data only
+    # x <- replicate(n, generate_mvn_vector(edgelist, nodes, topsort, vars))
+    # x <- t(x)[, original_node_order]
+
+    x <- vector("list", length = n)
+    for(i in 1:n){
+        x[[i]] <- generate_mvn_vector(edgelist, nodes, topsort, vars, ivn = ivn[[i]])
+    }
+    x <- do.call("rbind", x)
+
+    ### Permute columns back to original ordering
+    x <- x[, original_node_order]
+    x
+}
+
+generate_mvn_vector <- function(edgelist, nodes, topsort, vars = NULL, ivn = NULL){
+    normal_seed <- sapply(vars, function(x) rnorm(n = 1, mean = 0, sd = sqrt(x)))
+    gen_dag_vector_R(edgelist, nodes, topsort, seed = normal_seed, ivn = ivn)
+}
+
+#
+# edgelist = graph information
+# nodes = names of nodes in graph
+# topsort = topological sort (indexed by node names)
+# seed = random noise (Gaussian); bias term (binary)
+# ivn = named vector of intervention values (do(child = x))
+#
+gen_dag_vector_R <- function(edgelist, nodes, topsort, seed, ivn = NULL){
+    nnode <- length(edgelist)
+    x <- numeric(nnode)
+    names(x) <- nodes
+    ivnnames <- names(ivn)
+
+    for(j in seq_along(topsort)){
+        child <- topsort[j]
+
+        if(child %in% ivnnames){
+            ### If node is intervened on, fix value according to input in 'ivn'
+            x[child] <- ivn[child]
+        } else{
+            ### If no intervention, use DAG to determine value from parents
+            parents <- edgelist[[child]]$parents
+            weights <- edgelist[[child]]$weights
+            nparents <- length(parents)
+            if(nparents > 0){
+                ### Iterate over parents and add associated effects
+                for(i in seq_along(parents)){
+                    this.par <- parents[i]
+                    x[child] <- x[child] + weights[i] * x[this.par]
+                    # x[child] <- x[child] + weights[i] * x[index[i]] # equivalent to above line
+                }
+            }
+
+            ### Add noise: This is a crucial step. If nothing is added here, the
+            ###            output will be all zeroes since the root node(s) will
+            ###            have x[child] = 0 at this point.
+            ###
+            ### Gaussian model: This is random error ~ N(0, vars[j])
+            ### Logistic model: This a (deterministic) bias term
+            x[child] <- x[child] + seed[child]
+        }
+
+    }
+
+    x
+}
+
+sparse_to_edgeWeightList <- function(x, nodes){
+    stopifnot(sparsebnUtils::is.sparse((x)))
+    # sp <- sparsebnUtils::as.sparse(x) # NOTE: no longer a bottleneck under sparsebnUtils v0.0.4
+
+    # nodes <- colnames(x)
+    stopifnot(x$dim[1] == x$dim[2])
+
+    out <- lapply(vector("list", length = x$dim[1]), function(z) list(parents = character(0), index = integer(0), weights = numeric(0)))
+    names(out) <- nodes
+    for(j in seq_along(x$cols)){
+        child <- x$cols[[j]]
+        parent <- x$rows[[j]]
+        weight <- x$vals[[j]]
+        parents <- c(out[[child]]$parents, nodes[parent]) # !!! THIS IS SLOW
+        index <- c(out[[child]]$index, parent) # !!! THIS IS SLOW
+        weights <- c(out[[child]]$weights, weight) # !!! THIS IS SLOW
+        out[[nodes[child]]] <- list(parents = parents, index = index, weights = weights)
+    }
+
+    out
+}
@@ -3,7 +3,7 @@
 #  ccdrAlgorithm
 #
 #  Created by Bryon Aragam (local) on 1/22/16.
-#  Copyright (c) 2014-2016 Bryon Aragam. All rights reserved.
+#  Copyright (c) 2014-2017 Bryon Aragam. All rights reserved.
 #
 
 #------------------------------------------------------------------------------#
 
@@ -3,7 +3,7 @@
 #  ccdrAlgorithm
 #
 #  Created by Bryon Aragam (local) on 1/22/16.
-#  Copyright (c) 2014-2016 Bryon Aragam. All rights reserved.
+#  Copyright (c) 2014-2017 Bryon Aragam. All rights reserved.
 #
 
 #
Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,7 @@`
`3`	`3`	`# ccdrAlgorithm`
`4`	`4`	`#`
`5`	`5`	`# Created by Bryon Aragam (local) on 1/22/16.`
`6`		`-# Copyright (c) 2014-2016 Bryon Aragam. All rights reserved.`
	`6`	`+# Copyright (c) 2014-2017 Bryon Aragam. All rights reserved.`
`7`	`7`	`#`
`8`	`8`
`9`	`9`	`#------------------------------------------------------------------------------#`