Skip to content

Commit 9605e66

Browse files
Merge branch 'feature-bwlist_weights' into dev
2 parents e3b70dd + aaabd6a commit 9605e66

File tree

11 files changed

+295
-62
lines changed

11 files changed

+295
-62
lines changed

R/RcppExports.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
22
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
33

4-
gridCCDr <- function(cors, init_betas, nj, indexj, aj, lambdas, params, verbose) {
5-
.Call(`_ccdrAlgorithm_gridCCDr`, cors, init_betas, nj, indexj, aj, lambdas, params, verbose)
4+
gridCCDr <- function(cors, init_betas, nj, indexj, aj, lambdas, weights, params, verbose) {
5+
.Call(`_ccdrAlgorithm_gridCCDr`, cors, init_betas, nj, indexj, aj, lambdas, weights, params, verbose)
66
}
77

8-
singleCCDr <- function(cors, init_betas, nj, indexj, aj, lambda, params, verbose) {
9-
.Call(`_ccdrAlgorithm_singleCCDr`, cors, init_betas, nj, indexj, aj, lambda, params, verbose)
8+
singleCCDr <- function(cors, init_betas, nj, indexj, aj, lambda, weights, params, verbose) {
9+
.Call(`_ccdrAlgorithm_singleCCDr`, cors, init_betas, nj, indexj, aj, lambda, weights, params, verbose)
1010
}
1111

R/ccdrAlgorithm-bwlist.R

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#
2+
# ccdrAlgorithm-bwlist.R
3+
# ccdrAlgorithm
4+
#
5+
# Created by Bryon Aragam (local) on 8/11/17.
6+
# Copyright (c) 2014-2017 Bryon Aragam. All rights reserved.
7+
#
8+
9+
#
10+
# PACKAGE CCDRALGORITHM: Helper methods for black/white lists
11+
#
12+
# CONTENTS:
13+
# names_to_indices
14+
# rows_to_list
15+
# bwlist_check
16+
# bwlist_to_weights
17+
#
18+
19+
### Just a wrapper for match with a better name
20+
names_to_indices <- function(v, names){
21+
match(v, names)
22+
} # END NAMES_TO_INDICES
23+
24+
### Returns a list whose components are the rows of a matrix
25+
rows_to_list <- function(m){
26+
lapply(1:nrow(m), function(j) m[j,])
27+
} # END ROWS_TO_LIST
28+
29+
### Check correctness of input and transform from matrix to list of indices
30+
bwlist_check <- function(bwlist, names){
31+
## Consistency checks
32+
if(!is.matrix(bwlist) || ncol(bwlist) != 2){
33+
stop("Input must be a matrix with exactly 2 columns!")
34+
}
35+
36+
if(any(is.na(bwlist))){
37+
stop("Input cannot have missing values!")
38+
}
39+
40+
### Convert characters names to indices
41+
if(is.character(bwlist)){
42+
bwlist <- as.vector(bwlist)
43+
bwlist <- names_to_indices(bwlist, names)
44+
bwlist <- matrix(bwlist, ncol = 2)
45+
}
46+
47+
storage.mode(bwlist) <- "integer" # This is important in ccdr_call to check overlap between blacklist and whitelist, fails if numerics are mixed with ints
48+
rows_to_list(bwlist)
49+
} # END BWLIST_CHECK
50+
51+
### Convert b/w lists to weight matrix of {-1,0,1}
52+
# -1 = black listed (guaranteed to be absent / zero)
53+
# 0 = white listed (guaranteed to be present / nonzero)
54+
# 1 = gray listed (may or may not be final model)
55+
bwlist_to_weights <- function(black, white, nnode){
56+
weights <- matrix(1L, ncol = nnode, nrow = nnode)
57+
58+
if(!is.null(white)){
59+
for(k in 1:length(white)){
60+
weights[white[[k]][1], white[[k]][2]] <- 0L
61+
}
62+
}
63+
64+
if(!is.null(black)){
65+
for(k in 1:length(black)){
66+
weights[black[[k]][1], black[[k]][2]] <- -1L
67+
}
68+
}
69+
70+
weights
71+
} # END BWLIST_TO_WEIGHTS

R/ccdrAlgorithm-main.R

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,8 @@ NULL
3535
#' This implementation includes two options for the penalty: (1) MCP, and (2) L1 (or Lasso). This option
3636
#' is controlled by the \code{gamma} argument.
3737
#'
38-
#' @param data Data as \code{\link[sparsebnUtils]{sparsebnData}}. Must be numeric and contain no missing values.
39-
#' @param betas Initial guess for the algorithm. Represents the weighted adjacency matrix
40-
#' of a DAG where the algorithm will begin searching for an optimal structure.
41-
#' @param lambdas (optional) Numeric vector containing a grid of lambda values (i.e. regularization
38+
#' @param data Data as \code{\link[sparsebnUtils]{sparsebnData}} object. Must be numeric and contain no missing values.
39+
#' @param lambdas Numeric vector containing a grid of lambda values (i.e. regularization
4240
#' parameters) to use in the solution path. If missing, a default grid of values will be
4341
#' used based on a decreasing log-scale (see also \link{generate.lambdas}).
4442
#' @param lambdas.length Integer number of values to include in the solution path. If \code{lambdas}
@@ -48,10 +46,20 @@ NULL
4846
#' @param gamma Value of concavity parameter. If \code{gamma > 0}, then the MCP will be used
4947
#' with \code{gamma} as the concavity parameter. If \code{gamma < 0}, then the L1 penalty
5048
#' will be used and this value is otherwise ignored.
49+
#' @param whitelist A two-column matrix of edges that are guaranteed to be in each
50+
#' estimate (a "white list"). Each row in this matrix corresponds
51+
#' to an edge that is to be whitelisted. These edges can be
52+
#' specified by node name (as a \code{character} matrix), or by
53+
#' index (as a \code{numeric} matrix).
54+
#' @param blacklist A two-column matrix of edges that are guaranteed to be absent
55+
#' from each estimate (a "black list"). See argument
56+
#' "\code{whitelist}" above for more details.
5157
#' @param error.tol Error tolerance for the algorithm, used to test for convergence.
5258
#' @param max.iters Maximum number of iterations for each internal sweep.
5359
#' @param alpha Threshold parameter used to terminate the algorithm whenever the number of edges in the
5460
#' current DAG estimate is \code{> alpha * ncol(data)}.
61+
#' @param betas Initial guess for the algorithm. Represents the weighted adjacency matrix
62+
#' of a DAG where the algorithm will begin searching for an optimal structure.
5563
#' @param verbose \code{TRUE / FALSE} whether or not to print out progress and summary reports.
5664
#'
5765
#' @return A \code{\link[sparsebnUtils]{sparsebnPath}} object.
@@ -77,13 +85,15 @@ NULL
7785
#'
7886
#' @export
7987
ccdr.run <- function(data,
80-
betas,
8188
lambdas = NULL,
8289
lambdas.length = NULL,
90+
whitelist = NULL,
91+
blacklist = NULL,
8392
gamma = 2.0,
8493
error.tol = 1e-4,
8594
max.iters = NULL,
8695
alpha = 10,
96+
betas,
8797
verbose = FALSE
8898
){
8999
### Check data format
@@ -100,6 +110,8 @@ ccdr.run <- function(data,
100110
betas = betas,
101111
lambdas = lambdas,
102112
lambdas.length = lambdas.length,
113+
whitelist = whitelist,
114+
blacklist = blacklist,
103115
gamma = gamma,
104116
error.tol = error.tol,
105117
rlam = NULL,
@@ -122,13 +134,16 @@ ccdr_call <- function(data,
122134
betas,
123135
lambdas,
124136
lambdas.length,
137+
whitelist = NULL,
138+
blacklist = NULL,
125139
gamma,
126140
error.tol,
127141
rlam,
128142
max.iters,
129143
alpha,
130144
verbose = FALSE
131145
){
146+
node_names <- names(data)
132147
# ### Allow users to input a data.frame, but kindly warn them about doing this
133148
# if(is.data.frame(data)){
134149
# warning(sparsebnUtils::alg_input_data_frame())
@@ -227,6 +242,23 @@ ccdr_call <- function(data,
227242
max.iters <- sparsebnUtils::default_max_iters(pp)
228243
}
229244

245+
### White/black lists
246+
# Be careful about handling various NULL cases
247+
if(!is.null(whitelist)) whitelist <- bwlist_check(whitelist, node_names)
248+
if(!is.null(blacklist)) blacklist <- bwlist_check(blacklist, node_names)
249+
250+
if(!is.null(whitelist) && !is.null(blacklist)){
251+
if(length(intersect(whitelist, blacklist)) > 0){
252+
badinput <- vapply(intersect(whitelist, blacklist), function(x) sprintf("\t[%s]\n", paste(x, collapse = ",")), FUN.VALUE = "vector")
253+
badinput <- paste(badinput, collapse = "")
254+
msg <- sprintf("Duplicate entries found in blacklist and whitelist: \n%s", badinput)
255+
stop(msg)
256+
}
257+
}
258+
259+
weights <- bwlist_to_weights(blacklist, whitelist, nnode = pp)
260+
261+
### Pre-process correlation data
230262
t1.cor <- proc.time()[3]
231263
# cors <- cor(data)
232264
# cors <- cors[upper.tri(cors, diag = TRUE)]
@@ -242,6 +274,7 @@ ccdr_call <- function(data,
242274
as.integer(indexj),
243275
betas,
244276
as.numeric(lambdas),
277+
as.integer(weights),
245278
as.numeric(gamma),
246279
as.numeric(error.tol),
247280
as.integer(max.iters),
@@ -260,7 +293,7 @@ ccdr_call <- function(data,
260293
names(fit[[k]]$edges) <- names(data)
261294

262295
### Add node names to output
263-
fit[[k]] <- append(fit[[k]], list(names(data)), after = 1) # insert node names into second slot
296+
fit[[k]] <- append(fit[[k]], list(node_names), after = 1) # insert node names into second slot
264297
names(fit[[k]])[2] <- "nodes"
265298
}
266299

@@ -277,6 +310,7 @@ ccdr_gridR <- function(cors,
277310
indexj = NULL,
278311
betas,
279312
lambdas,
313+
weights,
280314
gamma,
281315
eps,
282316
maxIters,
@@ -308,6 +342,7 @@ ccdr_gridR <- function(cors,
308342
indexj,
309343
betas,
310344
lambdas[i],
345+
weights,
311346
gamma = gamma,
312347
eps = eps,
313348
maxIters = maxIters,
@@ -346,6 +381,7 @@ ccdr_singleR <- function(cors,
346381
indexj = NULL,
347382
betas,
348383
lambda,
384+
weights,
349385
gamma,
350386
eps,
351387
maxIters,
@@ -391,6 +427,11 @@ ccdr_singleR <- function(cors,
391427
if(!is.numeric(lambda)) stop("lambda must be numeric!")
392428
if(lambda < 0) stop("lambda must be >= 0!")
393429

430+
### Check weights
431+
if(length(weights) != pp*pp) stop("weights must have length p^2!")
432+
if(!is.numeric(weights)) stop("weights must be numeric!")
433+
if(weights < -1 || weights > 1) stop("weights out of bounds!")
434+
394435
### Check gamma
395436
if(!is.numeric(gamma)) stop("gamma must be numeric!")
396437
if(gamma < 0 && gamma != -1) stop("gamma must be >= 0 (MCP) or = -1 (Lasso)!")
@@ -416,6 +457,7 @@ ccdr_singleR <- function(cors,
416457
indexj,
417458
aj,
418459
lambda,
460+
weights,
419461
c(gamma, eps, maxIters, alpha),
420462
verbose = verbose)
421463
t2.ccdr <- proc.time()[3]

man/ccdr.run.Rd

Lines changed: 18 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/RcppExports.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
using namespace Rcpp;
77

88
// gridCCDr
9-
List gridCCDr(NumericVector cors, List init_betas, IntegerVector nj, IntegerVector indexj, NumericVector aj, NumericVector lambdas, NumericVector params, int verbose);
10-
RcppExport SEXP _ccdrAlgorithm_gridCCDr(SEXP corsSEXP, SEXP init_betasSEXP, SEXP njSEXP, SEXP indexjSEXP, SEXP ajSEXP, SEXP lambdasSEXP, SEXP paramsSEXP, SEXP verboseSEXP) {
9+
List gridCCDr(NumericVector cors, List init_betas, IntegerVector nj, IntegerVector indexj, NumericVector aj, NumericVector lambdas, IntegerVector weights, NumericVector params, int verbose);
10+
RcppExport SEXP _ccdrAlgorithm_gridCCDr(SEXP corsSEXP, SEXP init_betasSEXP, SEXP njSEXP, SEXP indexjSEXP, SEXP ajSEXP, SEXP lambdasSEXP, SEXP weightsSEXP, SEXP paramsSEXP, SEXP verboseSEXP) {
1111
BEGIN_RCPP
1212
Rcpp::RObject rcpp_result_gen;
1313
Rcpp::RNGScope rcpp_rngScope_gen;
@@ -17,15 +17,16 @@ BEGIN_RCPP
1717
Rcpp::traits::input_parameter< IntegerVector >::type indexj(indexjSEXP);
1818
Rcpp::traits::input_parameter< NumericVector >::type aj(ajSEXP);
1919
Rcpp::traits::input_parameter< NumericVector >::type lambdas(lambdasSEXP);
20+
Rcpp::traits::input_parameter< IntegerVector >::type weights(weightsSEXP);
2021
Rcpp::traits::input_parameter< NumericVector >::type params(paramsSEXP);
2122
Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP);
22-
rcpp_result_gen = Rcpp::wrap(gridCCDr(cors, init_betas, nj, indexj, aj, lambdas, params, verbose));
23+
rcpp_result_gen = Rcpp::wrap(gridCCDr(cors, init_betas, nj, indexj, aj, lambdas, weights, params, verbose));
2324
return rcpp_result_gen;
2425
END_RCPP
2526
}
2627
// singleCCDr
27-
List singleCCDr(NumericVector cors, List init_betas, IntegerVector nj, IntegerVector indexj, NumericVector aj, double lambda, NumericVector params, int verbose);
28-
RcppExport SEXP _ccdrAlgorithm_singleCCDr(SEXP corsSEXP, SEXP init_betasSEXP, SEXP njSEXP, SEXP indexjSEXP, SEXP ajSEXP, SEXP lambdaSEXP, SEXP paramsSEXP, SEXP verboseSEXP) {
28+
List singleCCDr(NumericVector cors, List init_betas, IntegerVector nj, IntegerVector indexj, NumericVector aj, double lambda, IntegerVector weights, NumericVector params, int verbose);
29+
RcppExport SEXP _ccdrAlgorithm_singleCCDr(SEXP corsSEXP, SEXP init_betasSEXP, SEXP njSEXP, SEXP indexjSEXP, SEXP ajSEXP, SEXP lambdaSEXP, SEXP weightsSEXP, SEXP paramsSEXP, SEXP verboseSEXP) {
2930
BEGIN_RCPP
3031
Rcpp::RObject rcpp_result_gen;
3132
Rcpp::RNGScope rcpp_rngScope_gen;
@@ -35,16 +36,17 @@ BEGIN_RCPP
3536
Rcpp::traits::input_parameter< IntegerVector >::type indexj(indexjSEXP);
3637
Rcpp::traits::input_parameter< NumericVector >::type aj(ajSEXP);
3738
Rcpp::traits::input_parameter< double >::type lambda(lambdaSEXP);
39+
Rcpp::traits::input_parameter< IntegerVector >::type weights(weightsSEXP);
3840
Rcpp::traits::input_parameter< NumericVector >::type params(paramsSEXP);
3941
Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP);
40-
rcpp_result_gen = Rcpp::wrap(singleCCDr(cors, init_betas, nj, indexj, aj, lambda, params, verbose));
42+
rcpp_result_gen = Rcpp::wrap(singleCCDr(cors, init_betas, nj, indexj, aj, lambda, weights, params, verbose));
4143
return rcpp_result_gen;
4244
END_RCPP
4345
}
4446

4547
static const R_CallMethodDef CallEntries[] = {
46-
{"_ccdrAlgorithm_gridCCDr", (DL_FUNC) &_ccdrAlgorithm_gridCCDr, 8},
47-
{"_ccdrAlgorithm_singleCCDr", (DL_FUNC) &_ccdrAlgorithm_singleCCDr, 8},
48+
{"_ccdrAlgorithm_gridCCDr", (DL_FUNC) &_ccdrAlgorithm_gridCCDr, 9},
49+
{"_ccdrAlgorithm_singleCCDr", (DL_FUNC) &_ccdrAlgorithm_singleCCDr, 9},
4850
{NULL, NULL, 0}
4951
};
5052

0 commit comments

Comments
 (0)