diff --git a/NAMESPACE b/NAMESPACE index 1d625987..b0b8e5a3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -245,6 +245,10 @@ export(time_calc) export(ungroup) export(var_sparse_auc) importFrom(dplyr,"%>%") +importFrom(dplyr,across) +importFrom(dplyr,any_of) +importFrom(dplyr,bind_rows) +importFrom(dplyr,case_when) importFrom(dplyr,filter) importFrom(dplyr,full_join) importFrom(dplyr,group_by) @@ -252,10 +256,16 @@ importFrom(dplyr,group_vars) importFrom(dplyr,inner_join) importFrom(dplyr,left_join) importFrom(dplyr,mutate) +importFrom(dplyr,n) importFrom(dplyr,right_join) +importFrom(dplyr,select) importFrom(dplyr,ungroup) importFrom(lifecycle,deprecated) importFrom(nlme,getGroups) importFrom(rlang,.data) +importFrom(rlang,syms) importFrom(stats,formula) importFrom(stats,model.frame) +importFrom(tidyr,fill) +importFrom(tidyr,unnest) +importFrom(utils,combn) diff --git a/R/class-PKNCAdata.R b/R/class-PKNCAdata.R index 495de041..68d14095 100644 --- a/R/class-PKNCAdata.R +++ b/R/class-PKNCAdata.R @@ -170,34 +170,8 @@ PKNCAdata.default <- function(data.conc, data.dose, ..., # Insert the unit conversion table if (missing(units)) { - # What unit types are recognized? - possible_units <- - setdiff( - grep(x = names(formals(pknca_units_table)), pattern = "_", invert = TRUE, value = TRUE), - "conversions" - ) - possible_units_pref <- paste0(possible_units, "_pref") - # Accumulate available units - conc_units_values <- ret$conc$units - conc_units_cols <- ret$conc$columns[names(ret$conc$columns) %in% possible_units] - - unit_args <- conc_units_values - for (nm in names(conc_units_cols)) { - unit_args[[nm]] <- unique(stats::na.omit(ret$conc$data[[conc_units_cols[[nm]]]])) - } - - if (!identical(ret$dose, NA)) { - unit_args <- append(unit_args, ret$dose$units) - dose_units_cols <- ret$dose$columns[names(ret$dose$columns) %in% possible_units] - for (nm in names(dose_units_cols)) { - unit_args[[nm]] <- unique(stats::na.omit(ret$dose$data[[dose_units_cols[[nm]]]])) - } - } - # If there are any units to set, set them here - if (length(unit_args) > 0) { - unit_args <- lapply(X = unit_args, FUN = drop_attributes) - ret$units <- do.call(pknca_units_table, args = unit_args) - } + # Use the new automatic units table builder + ret$units <- pknca_units_table(ret) } else { stopifnot("`units` must be a data.frame"=is.data.frame(units)) stopifnot( diff --git a/R/unit-support.R b/R/unit-support.R index 74982d4a..695262bf 100644 --- a/R/unit-support.R +++ b/R/unit-support.R @@ -14,6 +14,7 @@ #' by to get a standardized value. This argument overrides any preferred unit #' conversions from `concu_pref`, `doseu_pref`, `amountu_pref`, or #' `timeu_pref`. +#' @param ... Additional arguments (not used) #' @returns A unit conversion table with columns for "PPTESTCD" and "PPORRESU" #' if `conversions` is not given, and adding "PPSTRESU" and #' "conversion_factor" if `conversions` is given. @@ -50,10 +51,19 @@ #' concu = "ng/mL", doseu = "mg/kg", timeu = "hr", amountu = "mg", #' timeu_pref = "day" #' ) +#' +#' @export +pknca_units_table <- function(concu, ...) { + UseMethod("pknca_units_table") +} + +##' Default method for pknca_units_table +#' +#' @rdname pknca_units_table #' @export -pknca_units_table <- function(concu, doseu, amountu, timeu, +pknca_units_table.default <- function(concu, doseu, amountu, timeu, concu_pref = NULL, doseu_pref = NULL, amountu_pref = NULL, timeu_pref = NULL, - conversions = data.frame()) { + conversions = data.frame(), ...) { checkmate::assert_data_frame(conversions) if (nrow(conversions) > 0) { checkmate::assert_names( @@ -113,7 +123,7 @@ pknca_units_table <- function(concu, doseu, amountu, timeu, conversions <- conversions_pref } - extra_cols <- setdiff(ret$PPTESTCD, names(PKNCA::get.interval.cols())) + extra_cols <- setdiff(ret$PPTESTCD, names(get.interval.cols())) if (length(extra_cols) > 0) { stop("Please report a bug. Unknown NCA parameters have units defined: ", paste(extra_cols, collapse=", ")) # nocov } @@ -160,6 +170,91 @@ pknca_units_table <- function(concu, doseu, amountu, timeu, ret } +##' Method for PKNCAdata objects +#' +#' @rdname pknca_units_table +#' @importFrom dplyr across any_of bind_rows case_when filter group_by mutate n select ungroup group_vars +#' @importFrom tidyr fill unnest +#' @importFrom rlang syms +#' @export +pknca_units_table.PKNCAdata <- function(concu, ..., conversions = data.frame()) { + + # concu is the PKNCAdata object + o_conc <- as_PKNCAconc(concu) + o_dose <- as_PKNCAdose(concu) + + # PKNCAdose can optionally be no present, being unit undefining + if (is.null(o_dose) || all(is.na(o_dose))) o_dose <- o_conc + + # If needed, ensure that the PKNCA objects have the required unit columns + o_conc <- ensure_column_unit_exists(o_conc, c("concu", "timeu", "amountu")) + o_dose <- ensure_column_unit_exists(o_dose, c("doseu")) + + # Extract relevant columns from o_conc and o_dose + group_dose_cols <- dplyr::group_vars(o_dose) + group_conc_cols <- dplyr::group_vars(o_conc) + concu_col <- o_conc$columns$concu + amountu_col <- o_conc$columns$amountu + timeu_col <- o_conc$columns$timeu + doseu_col <- o_dose$columns$doseu + all_unit_cols <- c(concu_col, amountu_col, timeu_col, doseu_col) + + # Join dose units with concentration group columns and units + d_concu <- o_conc$data %>% + dplyr::select(dplyr::any_of(c(group_conc_cols, concu_col, amountu_col, timeu_col))) %>% + unique() + d_doseu <- o_dose$data %>% + dplyr::select(dplyr::any_of(c(group_dose_cols, doseu_col))) %>% + unique() + + groups_units_tbl <- merge(d_concu, d_doseu, all.x = TRUE) %>% + dplyr::mutate(dplyr::across(dplyr::everything(), ~ as.character(.))) %>% + dplyr::group_by(!!!rlang::syms(group_conc_cols)) %>% + tidyr::fill(!!!rlang::syms(all_unit_cols), .direction = "downup") %>% + dplyr::ungroup() %>% + unique() + + # Check that at least for each concentration group units are uniform + mismatching_units_groups <- groups_units_tbl %>% + dplyr::add_count(!!!rlang::syms(group_conc_cols), name = "n") %>% + dplyr::filter(n > 1) %>% + dplyr::select(-n) + if (nrow(mismatching_units_groups) > 0) { + stop( + "Units should be uniform at least across concentration groups. ", + "Review the units for the next group(s):\n", + paste(utils::capture.output(print(mismatching_units_groups)), collapse = "\n") + ) + } + + # Check that at least one unit column is not NA + units.are.all.na <- all(is.na(groups_units_tbl[,all_unit_cols])) + if (units.are.all.na) return(NULL) + + groups_units_tbl %>% + select_minimal_grouping_cols(all_unit_cols) %>% + unique() %>% + dplyr::rowwise() %>% + dplyr::mutate( + pknca_units_tbl = list( + pknca_units_table( + concu = !!rlang::sym(concu_col), + doseu = !!rlang::sym(doseu_col), + amountu = !!rlang::sym(amountu_col), + timeu = !!rlang::sym(timeu_col), + concu_pref = o_conc$units$concu_pref[1], + doseu_pref = o_dose$units$doseu_pref[1], + amountu_pref = o_conc$units$amountu_pref[1], + timeu_pref = o_conc$units$timeu_pref[1], + conversions = conversions + ) + ) + ) %>% + tidyr::unnest(cols = c(pknca_units_tbl)) %>% + dplyr::select(-dplyr::any_of(all_unit_cols)) %>% + as.data.frame() +} + pknca_units_table_unitless <- function() { rbind( data.frame( @@ -464,3 +559,92 @@ pknca_unit_conversion <- function(result, units, allow_partial_missing_units = F ret } + + +#' Ensure Unit Columns Exist in PKNCA Object +#' +#' Checks if specified unit columns exist in a PKNCA object (either PKNCAconc or PKNCAdose). +#' If the columns do not exist, it creates them and assigns default values (NA or existing units). +#' +#' @param pknca_obj A PKNCA object (either PKNCAconc or PKNCAdose). +#' @param unit_name A character vector of unit column names to ensure (concu, amountu, timeu...). +#' @returns The updated PKNCA object with ensured unit columns. +#' +#' @details +#' The function performs the following steps: +#' 1. Checks if the specified unit columns exist in the PKNCA object. +#' 2. If a column does not exist, it creates the column and assigns default values. +#' 3. If not default values are provided, it assigns NA to the new column. +#' @keywords Internal +ensure_column_unit_exists <- function(pknca_obj, unit_name) { + for (unit in unit_name) { + if (is.null(pknca_obj$columns[[unit]])) { + unit_colname <- make.unique(c(names(pknca_obj$data), unit))[ncol(pknca_obj$data) + 1] + pknca_obj$columns[[unit]] <- unit_colname + if (!is.null(pknca_obj$units[[unit]])) { + pknca_obj$data[[unit_colname]] <- pknca_obj$units[[unit]] + } else { + pknca_obj$data[[unit_colname]] <- NA_character_ + } + } + } + pknca_obj +} + +#' Find Minimal Grouping Columns for Strata Reconstruction +#' +#' This function identifies the smallest set of columns in a data frame whose unique combinations +#' can reconstruct the grouping structure defined by the specified strata columns. +#' It removes duplicate, constant, and redundant columns, then searches for the minimal combination +#' that uniquely identifies each stratum. +#' +#' @param df A data frame. +#' @param strata_cols Column names in df whose unique combination defines the strata. +#' @returns A data frame containing the strata columns and their minimal set of grouping columns. +#' @importFrom dplyr mutate select any_of +#' @importFrom rlang syms +#' @importFrom utils combn +#' @keywords Internal +select_minimal_grouping_cols <- function(df, strata_cols) { + # If there is no strata_cols specified, simply return the original df + if (length(strata_cols) == 0) return(df) + + # Obtain the comb_vals values of the target column(s) + strata_vals <- df %>% + mutate(strata_cols_comb = paste(!!!rlang::syms(strata_cols), sep = "_")) %>% + .[["strata_cols_comb"]] + + # If the target column(s) only has one level, there are no relevant columns + if (length(unique(strata_vals)) == 1) { + return(df[strata_cols]) + } + + candidate_cols <- setdiff(names(df), strata_cols) + # 1. Remove columns that are duplicates in levels terms + candidate_levels <- lapply( + df[candidate_cols], function(x) as.numeric(factor(x, levels = unique(x))) + ) + candidate_cols <- candidate_cols[!duplicated(candidate_levels)] + + # 2. Remove columns with only 1 level + candidate_n_levels <- sapply(df[candidate_cols], function(x) length(unique(x))) + candidate_cols <- candidate_cols[candidate_n_levels > 1] + + # 3. Check combinations of columns to find minimal key combination to level group strata_cols + for (n in seq_len(length(candidate_cols))) { + all_candidate_combs <- combn(candidate_cols, n, simplify = FALSE) + for (comb in all_candidate_combs) { + comb_vals <- apply(df[, comb, drop = FALSE], 1, paste, collapse = "_") + if (all(tapply(strata_vals, comb_vals, FUN = \(x) length(unique(x)) == 1))) { + return(df[c(comb, strata_cols)]) + } + } + } + df[strata_cols] +} + +# Add globalVariables for NSE/dplyr/rlang/tidyr usage +utils::globalVariables(c( + "group_vars", "select", "any_of", "across", "everything", "add_count", "syms", "rowwise", "unnest", "pull", + "n", "pknca_units_tbl", "PPORRESU", "PPTESTCD", "PPSTRESU", "conversion_factor", "strata_cols_comb" +)) diff --git a/man/ensure_column_unit_exists.Rd b/man/ensure_column_unit_exists.Rd new file mode 100644 index 00000000..5c630851 --- /dev/null +++ b/man/ensure_column_unit_exists.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/unit-support.R +\name{ensure_column_unit_exists} +\alias{ensure_column_unit_exists} +\title{Ensure Unit Columns Exist in PKNCA Object} +\usage{ +ensure_column_unit_exists(pknca_obj, unit_name) +} +\arguments{ +\item{pknca_obj}{A PKNCA object (either PKNCAconc or PKNCAdose).} + +\item{unit_name}{A character vector of unit column names to ensure (concu, amountu, timeu...).} +} +\value{ +The updated PKNCA object with ensured unit columns. +} +\description{ +Checks if specified unit columns exist in a PKNCA object (either PKNCAconc or PKNCAdose). +If the columns do not exist, it creates them and assigns default values (NA or existing units). +} +\details{ +The function performs the following steps: +\enumerate{ +\item Checks if the specified unit columns exist in the PKNCA object. +\item If a column does not exist, it creates the column and assigns default values. +\item If not default values are provided, it assigns NA to the new column. +} +} +\keyword{Internal} diff --git a/man/pknca_units_table.Rd b/man/pknca_units_table.Rd index 73339027..cd5a0867 100644 --- a/man/pknca_units_table.Rd +++ b/man/pknca_units_table.Rd @@ -2,9 +2,13 @@ % Please edit documentation in R/unit-support.R \name{pknca_units_table} \alias{pknca_units_table} +\alias{pknca_units_table.default} +\alias{pknca_units_table.PKNCAdata} \title{Create a unit assignment and conversion table} \usage{ -pknca_units_table( +pknca_units_table(concu, ...) + +\method{pknca_units_table}{default}( concu, doseu, amountu, @@ -13,13 +17,18 @@ pknca_units_table( doseu_pref = NULL, amountu_pref = NULL, timeu_pref = NULL, - conversions = data.frame() + conversions = data.frame(), + ... ) + +\method{pknca_units_table}{PKNCAdata}(concu, ..., conversions = data.frame()) } \arguments{ \item{concu, doseu, amountu, timeu}{Units for concentration, dose, amount, and time in the source data} +\item{...}{Additional arguments (not used)} + \item{concu_pref, doseu_pref, amountu_pref, timeu_pref}{Preferred units for reporting; \code{conversions} will be automatically.} @@ -72,6 +81,7 @@ pknca_units_table( concu = "ng/mL", doseu = "mg/kg", timeu = "hr", amountu = "mg", timeu_pref = "day" ) + } \seealso{ The \code{units} argument for \code{\link[=PKNCAdata]{PKNCAdata()}} diff --git a/man/select_minimal_grouping_cols.Rd b/man/select_minimal_grouping_cols.Rd new file mode 100644 index 00000000..d080bd42 --- /dev/null +++ b/man/select_minimal_grouping_cols.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/unit-support.R +\name{select_minimal_grouping_cols} +\alias{select_minimal_grouping_cols} +\title{Find Minimal Grouping Columns for Strata Reconstruction} +\usage{ +select_minimal_grouping_cols(df, strata_cols) +} +\arguments{ +\item{df}{A data frame.} + +\item{strata_cols}{Column names in df whose unique combination defines the strata.} +} +\value{ +A data frame containing the strata columns and their minimal set of grouping columns. +} +\description{ +This function identifies the smallest set of columns in a data frame whose unique combinations +can reconstruct the grouping structure defined by the specified strata columns. +It removes duplicate, constant, and redundant columns, then searches for the minimal combination +that uniquely identifies each stratum. +} +\keyword{Internal} diff --git a/tests/testthat/test-class-PKNCAdata.R b/tests/testthat/test-class-PKNCAdata.R index a401b373..2bcb03be 100644 --- a/tests/testthat/test-class-PKNCAdata.R +++ b/tests/testthat/test-class-PKNCAdata.R @@ -395,7 +395,7 @@ test_that("PKNCAdata units (#336)", { o_dose <- PKNCAdose(data = d_dose, dose~time, doseu = "doseu_x") expect_error( PKNCAdata(o_conc, o_dose), - regexp = "Only one unit may be provided at a time: A, C" + regexp = "Units should be uniform at least across concentration groups" ) }) diff --git a/tests/testthat/test-unit-support.R b/tests/testthat/test-unit-support.R index f9d0c198..b79a66df 100644 --- a/tests/testthat/test-unit-support.R +++ b/tests/testthat/test-unit-support.R @@ -330,3 +330,131 @@ test_that("pknca_unit_conversion", { fixed = TRUE ) }) + +# Tests for pknca_units_table for PKNCAdata +test_that("pknca_units_table for PKNCAdata", { + + # Subset the data to only include USUBJID 8 (2 analytes, A & B) + d_conc <- data.frame( + subject = 1, + time = rep(1:10, times = 4), + conc = rep(c(0:5, 4:1), times = 4), + analyte = rep(c("A", "B"), each = 20), + specimen = rep(c("blood", "urine"), each = 10, times = 4), + dose = 100, + treatment = rep(c("drug1", "drug2"), each = 10 * 4) + ) + d_dose <- unique(d_conc[d_conc$time %in% c(0, 5), c("dose", "time", "subject", "treatment")]) + + # Creates a stratified units table when PKNCAconc has a unit-stratifying group column + for (strat_var in c("specimen", "analyte")) { + d_conc$concu_col <- ifelse(d_conc[[strat_var]] == d_conc[[strat_var]][1], "ng/mL", "ug/mL") + + o_conc <- PKNCAconc(d_conc, conc ~ time | treatment + specimen + subject / analyte, concu = "concu_col") + o_dose <- PKNCAdose(d_dose, dose ~ time | treatment + subject) + o_data <- PKNCAdata(o_conc, o_dose) + units_table <- expect_no_error(pknca_units_table(o_data)) + + expect_equal( + units_table[units_table$PPTESTCD == "cmax", c(strat_var, "PPORRESU")], + data.frame( + specimen = c(unique(d_conc[[strat_var]])[1], unique(d_conc[[strat_var]])[2]), + PPORRESU = c("ng/mL", "ug/mL") + ), ignore_attr = TRUE + ) + } + + # Creates a stratified units table when PKNCAconc has two unit-stratifying group columns + d_conc$concu_col <- ifelse(d_conc$analyte == "A", "ng/mL", "ug/mL") + d_conc$concu_col <- ifelse(d_conc$specimen == "blood", d_conc$concu_col, "pg/mL") + o_conc <- PKNCAconc(d_conc, conc ~ time | treatment + specimen + subject / analyte, concu = "concu_col") + o_dose <- PKNCAdose(d_dose, dose ~ time | treatment + subject) + o_data <- PKNCAdata(o_conc, o_dose) + units_table <- expect_no_error(pknca_units_table(o_data)) + + expect_equal( + units_table[units_table$PPTESTCD == "cmax",], + data.frame( + specimen = c("blood", "urine", "blood", "urine"), + analyte = rep(c("A", "B"), each = 2), + PPORRESU = c("ng/mL", "pg/mL", "ug/mL", "pg/mL"), + PPTESTCD = "cmax" + ), ignore_attr = TRUE + ) + + # Creates a stratified units table when PKNCAdose has a unit-stratifying group column + d_dose$doseu_col <- ifelse(d_dose$treatment == d_dose$treatment[1], "mg", "ug") + o_conc <- PKNCAconc(d_conc, conc ~ time | treatment + specimen + subject / analyte) + o_dose <- PKNCAdose(d_dose, dose ~ time | treatment + subject, doseu = "doseu_col") + o_data <- PKNCAdata(o_conc, o_dose) + units_table <- expect_no_error(pknca_units_table(o_data)) + + expect_equal( + units_table[units_table$PPTESTCD == "totdose",], + data.frame( + treatment = c("drug1", "drug2"), + PPORRESU = c("mg", "ug"), + PPTESTCD = "totdose" + ), ignore_attr = TRUE + ) + + # Creates an uniform units table when units are not defined as columns + o_conc <- PKNCAconc( + d_conc, conc ~ time | treatment + specimen + subject / analyte, + concu = "ng/mL", timeu = "h" + ) + o_dose <- PKNCAdose( + d_dose, dose ~ time | treatment + subject, + doseu = "mg" + ) + o_data <- PKNCAdata(o_conc, o_dose) + units_table <- expect_no_error(pknca_units_table(o_data)) + expect_equal( + units_table[units_table$PPTESTCD == "cmax.dn",], + data.frame( + PPORRESU = c("(ng/mL)/mg"), + PPTESTCD = c("cmax.dn") + ), ignore_attr = TRUE + ) + + # Returns NULL when no units are defined anywhere + o_conc <- PKNCAconc(d_conc, conc ~ time | treatment + specimen + subject / analyte) + o_dose <- PKNCAdose(d_dose, dose ~ time | treatment + subject) + o_data <- PKNCAdata(o_conc, o_dose) + units_table <- expect_no_error(pknca_units_table(o_data)) + + expect_true(is.null(units_table)) + + # Errors when units are not uniform within a concentration group + d_conc$concu_col <- "ng/mL" + d_conc$concu_col[1] <- "pg/L" # Introduce inconsistency + o_conc <- PKNCAconc(d_conc, conc ~ time | treatment + specimen + subject / analyte, concu = "concu_col") + o_dose <- PKNCAdose(d_dose, dose ~ time | treatment + subject) + expect_error( + PKNCAdata(o_conc, o_dose), + regexp = "Units should be uniform at least across concentration groups.*" + ) +}) + +test_that("select_level_grouping_cols", { + # Make a dataset where a variable `d` depends on `a` & `b` + data <- data.frame( + a = rep(letters[c(1, 2, 3)], each = 4), + b = rep(letters[c(1, 2)], each = 3), + c = letters[1] + ) + data$d <- paste0(data$a, data$b) + + # Returns the minimal grouping_columns (a, b) for one target columns + result <- select_minimal_grouping_cols(data, "d") + expect_equal(result, data[c("a", "b", "d")]) + + # Returns the original data if all grouping columns are needed + result <- select_minimal_grouping_cols(data, c("c", "d")) + expect_equal(result, data) + + # Returns just the strata columns if no stratification groups are found + data[, "a"] <- 10 + result <- select_minimal_grouping_cols(data, "d") + expect_equal(result, data["d"]) +})