Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

S3method(autoplot,gg_auct)
S3method(autoplot,gg_beta_uvarpro)
S3method(autoplot,gg_beta_varpro)
S3method(autoplot,gg_brier)
S3method(autoplot,gg_error)
Expand All @@ -13,6 +14,7 @@ S3method(autoplot,gg_partialpro)
S3method(autoplot,gg_rfsrc)
S3method(autoplot,gg_rhf)
S3method(autoplot,gg_roc)
S3method(autoplot,gg_sdependent)
S3method(autoplot,gg_survival)
S3method(autoplot,gg_udependent)
S3method(autoplot,gg_variable)
Expand All @@ -21,6 +23,8 @@ S3method(autoplot,gg_vimp)
S3method(calc_roc,randomForest)
S3method(calc_roc,rfsrc)
S3method(gg_auct,rhf)
S3method(gg_beta_uvarpro,default)
S3method(gg_beta_uvarpro,uvarpro)
S3method(gg_beta_varpro,varpro)
S3method(gg_brier,rfsrc)
S3method(gg_error,randomForest)
Expand All @@ -34,13 +38,16 @@ S3method(gg_rhf,rhf)
S3method(gg_roc,default)
S3method(gg_roc,randomForest)
S3method(gg_roc,rfsrc)
S3method(gg_sdependent,default)
S3method(gg_sdependent,uvarpro)
S3method(gg_survival,default)
S3method(gg_survival,rfsrc)
S3method(gg_variable,randomForest)
S3method(gg_variable,rfsrc)
S3method(gg_vimp,randomForest)
S3method(gg_vimp,rfsrc)
S3method(plot,gg_auct)
S3method(plot,gg_beta_uvarpro)
S3method(plot,gg_beta_varpro)
S3method(plot,gg_brier)
S3method(plot,gg_error)
Expand All @@ -53,12 +60,14 @@ S3method(plot,gg_partialpro)
S3method(plot,gg_rfsrc)
S3method(plot,gg_rhf)
S3method(plot,gg_roc)
S3method(plot,gg_sdependent)
S3method(plot,gg_survival)
S3method(plot,gg_udependent)
S3method(plot,gg_variable)
S3method(plot,gg_varpro)
S3method(plot,gg_vimp)
S3method(print,gg_auct)
S3method(print,gg_beta_uvarpro)
S3method(print,gg_beta_varpro)
S3method(print,gg_brier)
S3method(print,gg_error)
Expand All @@ -71,6 +80,7 @@ S3method(print,gg_partialpro)
S3method(print,gg_rfsrc)
S3method(print,gg_rhf)
S3method(print,gg_roc)
S3method(print,gg_sdependent)
S3method(print,gg_survival)
S3method(print,gg_udependent)
S3method(print,gg_variable)
Expand All @@ -81,6 +91,7 @@ S3method(print,summary.gg_beta_varpro)
S3method(print,summary.gg_ivarpro)
S3method(print,summary.gg_udependent)
S3method(summary,gg_auct)
S3method(summary,gg_beta_uvarpro)
S3method(summary,gg_beta_varpro)
S3method(summary,gg_brier)
S3method(summary,gg_error)
Expand All @@ -93,6 +104,7 @@ S3method(summary,gg_partialpro)
S3method(summary,gg_rfsrc)
S3method(summary,gg_rhf)
S3method(summary,gg_roc)
S3method(summary,gg_sdependent)
S3method(summary,gg_survival)
S3method(summary,gg_udependent)
S3method(summary,gg_variable)
Expand All @@ -101,6 +113,7 @@ S3method(summary,gg_vimp)
export(calc_auc)
export(calc_roc)
export(gg_auct)
export(gg_beta_uvarpro)
export(gg_beta_varpro)
export(gg_brier)
export(gg_error)
Expand All @@ -113,6 +126,7 @@ export(gg_partialpro)
export(gg_rfsrc)
export(gg_rhf)
export(gg_roc)
export(gg_sdependent)
export(gg_survival)
export(gg_udependent)
export(gg_variable)
Expand All @@ -139,6 +153,7 @@ importFrom(ggplot2,geom_jitter)
importFrom(ggplot2,geom_line)
importFrom(ggplot2,geom_point)
importFrom(ggplot2,geom_ribbon)
importFrom(ggplot2,geom_segment)
importFrom(ggplot2,geom_vline)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,labs)
Expand Down
19 changes: 19 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,25 @@ Version: 3.1.0.9000
ggRandomForests v4.0.0 (development)
====================================
* Development version 3.1.0.9000, opened after the v3.1.0 CRAN release.
* `gg_beta_uvarpro()` / `plot.gg_beta_uvarpro()`: tidy wrapper and default
horizontal bar chart for `varPro::get.beta.entropy()` — the unsupervised
analogue of `gg_beta_varpro()`. From a `uvarpro()` fit it aggregates the
per-region lasso coefficients into `beta_mean = colMeans(|beta|)` (one row
per variable, most-important first) and flags variables above a selection
cutoff (default `mean(beta_mean)`). An optional `beta_fit` argument accepts
a precomputed `get.beta.entropy()` matrix so the expensive cross-validated
lasso runs once. Provenance records `source`, `family = "unsupv"`,
`cutoff`, `n_var`, and `n_released_regions`. Follows the `get.beta.entropy`
+ `sdependent` "lasso importance" workflow from the `varPro::uvarpro()`
help (iowa-housing example).
* `gg_sdependent()` / `plot.gg_sdependent()`: tidy wrapper and ranked
lollipop for `varPro::sdependent()`'s signal-variable detection. From a
`uvarpro()` fit (or a precomputed `beta_fit` entropy matrix) it returns one
row per candidate variable — `imp_score`, graph `degree`, and a `signal`
flag (membership in `sdependent()$signal.vars`) — ranked by `imp_score`.
Complements `gg_udependent()` (the dependency graph) with the "which
variables are signal" ranking. `print` / `summary` / `autoplot` companions
included.
* `gg_auct()` / `plot.gg_auct()`: tidy wrapper and plot for time-varying
AUC from `randomForestRHF::auct.rhf()` (RHF Phase 2). Returns a long
frame `time / auc / se / lower / upper / marker` with an `iauc`
Expand Down
219 changes: 219 additions & 0 deletions R/gg_beta_uvarpro.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
##=============================================================================
#' Per-variable lasso-beta importance from an unsupervised varPro fit
#'
#' Tidy wrapper around [varPro::get.beta.entropy()] for a `uvarpro` object.
#' Where [gg_beta_varpro()] refines the *supervised* release-rule contrast,
#' `gg_beta_uvarpro()` does the unsupervised analogue: `uvarpro()` builds
#' entropy regions with no response, and `get.beta.entropy()` fits a
#' cross-validated lasso within each region to ask how strongly every other
#' variable explains the released variable. Averaging the absolute lasso
#' coefficients per variable gives one number per variable: an unsupervised,
#' lasso-flavoured importance.
#'
#' @details
#' `get.beta.entropy(o)` returns a (released-variable x variable) numeric
#' matrix of absolute lasso coefficients. The column mean (`na.rm = TRUE`) is
#' the per-variable importance reported here, matching the canonical
#' `sort(colMeans(beta, na.rm = TRUE), decreasing = TRUE)` idiom in the
#' `varPro::uvarpro()` help ("iowa housing - illustrates lasso importance").
#'
#' Because `get.beta.entropy()` is expensive (a cross-validated `glmnet` per
#' region), the `beta_fit` argument accepts a pre-computed matrix so you can
#' iterate on the cutoff without re-fitting. The pairing mirrors the
#' `beta_fit` argument of [gg_beta_varpro()].
#'
#' @param object A `uvarpro` object from [varPro::uvarpro()].
#' @param ... Forwarded to [varPro::get.beta.entropy()] when
#' `beta_fit = NULL` (e.g. `pre.filter`, `second.stage`, `use.cv`).
#' Ignored, with a warning, when `beta_fit` is supplied.
#' @param cutoff Selection threshold on `beta_mean`. `NULL` (default) uses
#' `mean(beta_mean)`; a scalar sets it explicitly. Variables at or above the
#' cutoff are flagged `selected`.
#' @param beta_fit Optional pre-computed [varPro::get.beta.entropy()] matrix
#' for `object`. When supplied, must be a numeric matrix with column names
#' (the variables); `...` is then ignored.
#'
#' @return A `gg_beta_uvarpro` object (a `data.frame`), one row per variable,
#' most-important first, with columns:
#' \describe{
#' \item{`variable`}{factor; levels reversed so the most-important
#' variable lands at the top after `coord_flip()` (the `gg_vimp`
#' convention).}
#' \item{`beta_mean`}{`mean(|lasso beta|)` over the released regions
#' (`colMeans(beta, na.rm = TRUE)`).}
#' \item{`n_released`}{number of regions contributing a non-`NA`
#' coefficient for the variable.}
#' \item{`selected`}{logical; `beta_mean >= cutoff`.}
#' }
#' The `provenance` attribute records `source`, `family` (`"unsupv"`),
#' `cutoff`, `n_var`, `n_released_regions`, and `precomputed`.
#'
#' @seealso [gg_beta_varpro()] (supervised analogue), [gg_udependent()],
#' [varPro::get.beta.entropy()], [varPro::uvarpro()].
#'
#' @examples
#' \donttest{
#' if (requireNamespace("varPro", quietly = TRUE)) {
#' set.seed(1)
#' o <- varPro::uvarpro(mtcars, ntree = 50)
#' gg <- gg_beta_uvarpro(o)
#' plot(gg)
#' }
#' }
#'
#' @export
gg_beta_uvarpro <- function(object, ..., cutoff = NULL, beta_fit = NULL) {
UseMethod("gg_beta_uvarpro", object)
}

#' @export
gg_beta_uvarpro.default <- function(object, ..., cutoff = NULL,
beta_fit = NULL) {
stop("gg_beta_uvarpro: expected a 'uvarpro' object from varPro::uvarpro(); ",
"got an object of class ", paste(class(object), collapse = "/"), ".",
call. = FALSE)
}

#' @export
gg_beta_uvarpro.uvarpro <- function(object, ..., cutoff = NULL,
beta_fit = NULL) {
if (!inherits(object, "uvarpro")) {
stop("gg_beta_uvarpro: expected a 'uvarpro' object from varPro::uvarpro().",
call. = FALSE)
}

# Resolve the beta matrix (cache path)
if (is.null(beta_fit)) {
b <- varPro::get.beta.entropy(object, ...)
} else {
.validate_beta_uvarpro(beta_fit)
if (length(list(...)) > 0L) {
warning("gg_beta_uvarpro: arguments in '...' ignored because beta_fit is supplied.",
call. = FALSE)
}
b <- beta_fit
}

# Empty fast-path: no regions / no variables survived
if (is.null(b) || !is.matrix(b) || nrow(b) == 0L || ncol(b) == 0L) {
return(.gg_beta_uvarpro_empty(object, beta_fit, cutoff))
}

beta_mean_v <- colMeans(b, na.rm = TRUE)
n_released_v <- colSums(!is.na(b))

# Most-important first; reverse the factor levels so coord_flip() puts the
# top variable at the top (matches gg_vimp / gg_beta_varpro).
ord_names <- names(sort(beta_mean_v, decreasing = TRUE))

resolved_cutoff <- if (is.null(cutoff)) {
mean(beta_mean_v, na.rm = TRUE)
} else {
as.numeric(cutoff)
}

out <- data.frame(
variable = factor(ord_names, levels = rev(ord_names)),
beta_mean = unname(beta_mean_v[ord_names]),
n_released = as.integer(unname(n_released_v[ord_names])),
stringsAsFactors = FALSE
)
out$selected <- out$beta_mean >= resolved_cutoff
rownames(out) <- NULL

class(out) <- c("gg_beta_uvarpro", "data.frame")
attr(out, "provenance") <- list(
source = "varPro::get.beta.entropy",
family = "unsupv",
ntree = if (!is.null(object$ntree)) as.integer(object$ntree) else NA_integer_,
cutoff = stats::setNames(resolved_cutoff, "unsupv"),
cutoff_default = is.null(cutoff),
n_var = ncol(b),
n_released_regions = nrow(b),
precomputed = !is.null(beta_fit),
xvar.names = colnames(b)
)
out
}

#' @noRd
.validate_beta_uvarpro <- function(beta_fit) {
if (!is.matrix(beta_fit) || !is.numeric(beta_fit)) {
stop("gg_beta_uvarpro: beta_fit does not look like a ",
"varPro::get.beta.entropy() result. Expected a numeric matrix.",
call. = FALSE)
}
if (ncol(beta_fit) > 0L && is.null(colnames(beta_fit))) {
stop("gg_beta_uvarpro: beta_fit must have column names (the variables). ",
"varPro::get.beta.entropy() returns a named matrix.",
call. = FALSE)
}
invisible(NULL)
}

#' @rdname print.gg
#' @export
print.gg_beta_uvarpro <- function(x, ...) {
prov <- attr(x, "provenance")
precomputed <- isTRUE(if (!is.null(prov)) prov$precomputed else FALSE)
n_regions <- if (!is.null(prov)) prov$n_released_regions %||% NA_integer_ else NA_integer_
n_sel <- sum(x$selected, na.rm = TRUE)
cutoff <- if (!is.null(prov)) prov$cutoff %||% NA_real_ else NA_real_
cutoff_val <- if (length(cutoff) >= 1L) cutoff[[1]] else NA_real_
cutoff_default <- isTRUE(if (!is.null(prov)) prov$cutoff_default else FALSE)
cat(.gg_header(x, "gg_beta_uvarpro"),
sprintf(" | cutoff: %.4g%s", cutoff_val,
if (cutoff_default) " (default)" else ""),
sprintf(" | precomputed: %s", precomputed),
"\n",
sprintf(" %d of %d variables selected over %s released region(s)\n",
n_sel, nrow(x),
if (is.na(n_regions)) "NA" else format(n_regions)),
sep = "")
invisible(x)
}

#' @rdname summary.gg
#' @export
summary.gg_beta_uvarpro <- function(object, ...) {
v <- sort(stats::setNames(object$beta_mean, as.character(object$variable)),
decreasing = TRUE)
top <- utils::head(v, 5L)
body <- c(
sprintf("variables: %d (selected: %d)",
nrow(object), sum(object$selected, na.rm = TRUE)),
"top variables by mean |lasso beta|:",
sprintf(" %-14s %.4g", names(top), unname(top))
)
.summary_skel(object, "gg_beta_uvarpro", body)
}

#' @importFrom ggplot2 autoplot
#' @export
autoplot.gg_beta_uvarpro <- function(object, ...) {
plot.gg_beta_uvarpro(object, ...)
}

#' @noRd
.gg_beta_uvarpro_empty <- function(object, beta_fit, cutoff) {
out <- data.frame(
variable = factor(character(0)),
beta_mean = numeric(0),
n_released = integer(0),
selected = logical(0),
stringsAsFactors = FALSE
)
class(out) <- c("gg_beta_uvarpro", "data.frame")
attr(out, "provenance") <- list(
source = "varPro::get.beta.entropy",
family = "unsupv",
ntree = if (!is.null(object$ntree)) as.integer(object$ntree) else NA_integer_,
cutoff = stats::setNames(if (is.null(cutoff)) NA_real_ else as.numeric(cutoff), "unsupv"),
cutoff_default = is.null(cutoff),
n_var = 0L,
n_released_regions = 0L,
precomputed = !is.null(beta_fit),
xvar.names = character(0)
)
out
}
Loading
Loading