humanpred · Gero1999 · Jun 7, 2025 · Jun 7, 2025 · Jun 7, 2025 · Jun 7, 2025
diff --git a/NAMESPACE b/NAMESPACE
@@ -245,17 +245,27 @@ export(time_calc)
 export(ungroup)
 export(var_sparse_auc)
 importFrom(dplyr,"%>%")
+importFrom(dplyr,across)
+importFrom(dplyr,any_of)
+importFrom(dplyr,bind_rows)
+importFrom(dplyr,case_when)
 importFrom(dplyr,filter)
 importFrom(dplyr,full_join)
 importFrom(dplyr,group_by)
 importFrom(dplyr,group_vars)
 importFrom(dplyr,inner_join)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
+importFrom(dplyr,n)
 importFrom(dplyr,right_join)
+importFrom(dplyr,select)
 importFrom(dplyr,ungroup)
 importFrom(lifecycle,deprecated)
 importFrom(nlme,getGroups)
 importFrom(rlang,.data)
+importFrom(rlang,syms)
 importFrom(stats,formula)
 importFrom(stats,model.frame)
+importFrom(tidyr,fill)
+importFrom(tidyr,unnest)
+importFrom(utils,combn)
diff --git a/R/class-PKNCAdata.R b/R/class-PKNCAdata.R
@@ -170,34 +170,8 @@ PKNCAdata.default <- function(data.conc, data.dose, ...,
 
   # Insert the unit conversion table
   if (missing(units)) {
-    # What unit types are recognized?
-    possible_units <-
-      setdiff(
-        grep(x = names(formals(pknca_units_table)), pattern = "_", invert = TRUE, value = TRUE),
-        "conversions"
-      )
-    possible_units_pref <- paste0(possible_units, "_pref")
-    # Accumulate available units
-    conc_units_values <- ret$conc$units
-    conc_units_cols <- ret$conc$columns[names(ret$conc$columns) %in% possible_units]
-
-    unit_args <- conc_units_values
-    for (nm in names(conc_units_cols)) {
-      unit_args[[nm]] <- unique(stats::na.omit(ret$conc$data[[conc_units_cols[[nm]]]]))
-    }
-
-    if (!identical(ret$dose, NA)) {
-      unit_args <- append(unit_args, ret$dose$units)
-      dose_units_cols <- ret$dose$columns[names(ret$dose$columns) %in% possible_units]
-      for (nm in names(dose_units_cols)) {
-        unit_args[[nm]] <- unique(stats::na.omit(ret$dose$data[[dose_units_cols[[nm]]]]))
-      }
-    }
-    # If there are any units to set, set them here
-    if (length(unit_args) > 0) {
-      unit_args <- lapply(X = unit_args, FUN = drop_attributes)
-      ret$units <- do.call(pknca_units_table, args = unit_args)
-    }
+    # Use the new automatic units table builder
+    ret$units <- pknca_units_table(ret)
   } else {
     stopifnot("`units` must be a data.frame"=is.data.frame(units))
     stopifnot(

diff --git a/R/unit-support.R b/R/unit-support.R
@@ -14,6 +14,7 @@
 #'   by to get a standardized value.  This argument overrides any preferred unit
 #'   conversions from `concu_pref`, `doseu_pref`, `amountu_pref`, or
 #'   `timeu_pref`.
+#' @param ... Additional arguments (not used)
 #' @returns A unit conversion table with columns for "PPTESTCD" and "PPORRESU"
 #'   if `conversions` is not given, and adding "PPSTRESU" and
 #'   "conversion_factor" if `conversions` is given.
@@ -50,10 +51,19 @@
 #'   concu = "ng/mL", doseu = "mg/kg", timeu = "hr", amountu = "mg",
 #'   timeu_pref = "day"
 #' )
+#'
+#' @export
+pknca_units_table <- function(concu, ...) {
+  UseMethod("pknca_units_table")
+}
+
+##' Default method for pknca_units_table
+#'
+#' @rdname pknca_units_table
 #' @export
-pknca_units_table <- function(concu, doseu, amountu, timeu,
+pknca_units_table.default <- function(concu, doseu, amountu, timeu,
                               concu_pref = NULL, doseu_pref = NULL, amountu_pref = NULL, timeu_pref = NULL,
-                              conversions = data.frame()) {
+                              conversions = data.frame(), ...) {
   checkmate::assert_data_frame(conversions)
   if (nrow(conversions) > 0) {
     checkmate::assert_names(
@@ -113,7 +123,7 @@ pknca_units_table <- function(concu, doseu, amountu, timeu,
     conversions <- conversions_pref
   }
 
-  extra_cols <- setdiff(ret$PPTESTCD, names(PKNCA::get.interval.cols()))
+  extra_cols <- setdiff(ret$PPTESTCD, names(get.interval.cols()))
   if (length(extra_cols) > 0) {
     stop("Please report a bug.  Unknown NCA parameters have units defined: ", paste(extra_cols, collapse=", ")) # nocov
   }
@@ -160,6 +170,91 @@ pknca_units_table <- function(concu, doseu, amountu, timeu,
   ret
 }
 
+##' Method for PKNCAdata objects
+#'
+#' @rdname pknca_units_table
+#' @importFrom dplyr across any_of bind_rows case_when filter group_by mutate n select ungroup group_vars
+#' @importFrom tidyr fill unnest
+#' @importFrom rlang syms
+#' @export
+pknca_units_table.PKNCAdata <- function(concu, ..., conversions = data.frame()) {
+
+  # concu is the PKNCAdata object
+  o_conc <- as_PKNCAconc(concu)
+  o_dose <- as_PKNCAdose(concu)
+
+  # PKNCAdose can optionally be no present, being unit undefining
+  if (is.null(o_dose) || all(is.na(o_dose))) o_dose <- o_conc
+
+  # If needed, ensure that the PKNCA objects have the required unit columns
+  o_conc <- ensure_column_unit_exists(o_conc, c("concu", "timeu", "amountu"))
+  o_dose <- ensure_column_unit_exists(o_dose, c("doseu"))
+
+  # Extract relevant columns from o_conc and o_dose
+  group_dose_cols <- dplyr::group_vars(o_dose)
+  group_conc_cols <- dplyr::group_vars(o_conc)
+  concu_col <- o_conc$columns$concu
+  amountu_col <- o_conc$columns$amountu
+  timeu_col <- o_conc$columns$timeu
+  doseu_col <- o_dose$columns$doseu
+  all_unit_cols <- c(concu_col, amountu_col, timeu_col, doseu_col)
+
+  # Join dose units with concentration group columns and units
+  d_concu <- o_conc$data %>%
+    dplyr::select(dplyr::any_of(c(group_conc_cols, concu_col, amountu_col, timeu_col))) %>%
+    unique()
+  d_doseu <- o_dose$data %>%
+    dplyr::select(dplyr::any_of(c(group_dose_cols, doseu_col))) %>%
+    unique()
+
+  groups_units_tbl <- merge(d_concu, d_doseu, all.x = TRUE) %>%
+    dplyr::mutate(dplyr::across(dplyr::everything(), ~ as.character(.))) %>%
+    dplyr::group_by(!!!rlang::syms(group_conc_cols)) %>%
+    tidyr::fill(!!!rlang::syms(all_unit_cols), .direction = "downup") %>%
+    dplyr::ungroup() %>%
+    unique()
+
+  # Check that at least for each concentration group units are uniform
+  mismatching_units_groups <- groups_units_tbl %>%
+    dplyr::add_count(!!!rlang::syms(group_conc_cols), name = "n") %>%
+    dplyr::filter(n > 1) %>%
+    dplyr::select(-n)
+  if (nrow(mismatching_units_groups) > 0) {
+    stop(
+      "Units should be uniform at least across concentration groups. ",
+      "Review the units for the next group(s):\n",
+      paste(utils::capture.output(print(mismatching_units_groups)), collapse = "\n")
+    )
+  }
+
+  # Check that at least one unit column is not NA
+  units.are.all.na <- all(is.na(groups_units_tbl[,all_unit_cols]))
+  if (units.are.all.na) return(NULL)
+
+  groups_units_tbl %>%
+    select_minimal_grouping_cols(all_unit_cols) %>%
+    unique() %>%
+    dplyr::rowwise() %>%
+    dplyr::mutate(
+      pknca_units_tbl = list(
+        pknca_units_table(
+          concu = !!rlang::sym(concu_col),
+          doseu = !!rlang::sym(doseu_col),
+          amountu = !!rlang::sym(amountu_col),
+          timeu = !!rlang::sym(timeu_col),
+          concu_pref = o_conc$units$concu_pref[1],
+          doseu_pref = o_dose$units$doseu_pref[1],
+          amountu_pref = o_conc$units$amountu_pref[1],
+          timeu_pref = o_conc$units$timeu_pref[1],
+          conversions = conversions
+        )
+      )
+    ) %>%
+    tidyr::unnest(cols = c(pknca_units_tbl)) %>%
+    dplyr::select(-dplyr::any_of(all_unit_cols)) %>%
+    as.data.frame()
+}
+
 pknca_units_table_unitless <- function() {
   rbind(
     data.frame(
@@ -464,3 +559,92 @@ pknca_unit_conversion <- function(result, units, allow_partial_missing_units = F
   ret
 }
 
+
+
+#' Ensure Unit Columns Exist in PKNCA Object
+#'
+#' Checks if specified unit columns exist in a PKNCA object (either PKNCAconc or PKNCAdose).
+#' If the columns do not exist, it creates them and assigns default values (NA or existing units).
+#'
+#' @param pknca_obj A PKNCA object (either PKNCAconc or PKNCAdose).
+#' @param unit_name A character vector of unit column names to ensure (concu, amountu, timeu...).
+#' @returns The updated PKNCA object with ensured unit columns.
+#'
+#' @details
+#' The function performs the following steps:
+#' 1. Checks if the specified unit columns exist in the PKNCA object.
+#' 2. If a column does not exist, it creates the column and assigns default values.
+#' 3. If not default values are provided, it assigns NA to the new column.
+#' @keywords Internal
+ensure_column_unit_exists <- function(pknca_obj, unit_name) {
+  for (unit in unit_name) {
+    if (is.null(pknca_obj$columns[[unit]])) {
+      unit_colname <- make.unique(c(names(pknca_obj$data), unit))[ncol(pknca_obj$data) + 1]
+      pknca_obj$columns[[unit]] <- unit_colname
+      if (!is.null(pknca_obj$units[[unit]])) {
+        pknca_obj$data[[unit_colname]] <- pknca_obj$units[[unit]]
+      } else {
+        pknca_obj$data[[unit_colname]] <- NA_character_
+      }
+    }
+  }
+  pknca_obj
+}
+
+#' Find Minimal Grouping Columns for Strata Reconstruction
+#'
+#' This function identifies the smallest set of columns in a data frame whose unique combinations
+#' can reconstruct the grouping structure defined by the specified strata columns.
+#' It removes duplicate, constant, and redundant columns, then searches for the minimal combination
+#' that uniquely identifies each stratum.
+#'
+#' @param df A data frame.
+#' @param strata_cols Column names in df whose unique combination defines the strata.
+#' @returns A data frame containing the strata columns and their minimal set of grouping columns.
+#' @importFrom dplyr mutate select any_of
+#' @importFrom rlang syms
+#' @importFrom utils combn
+#' @keywords Internal
+select_minimal_grouping_cols <- function(df, strata_cols) {
+  # If there is no strata_cols specified, simply return the original df
+  if (length(strata_cols) == 0) return(df)
+
+  # Obtain the comb_vals values of the target column(s)
+  strata_vals <- df %>%
+    mutate(strata_cols_comb = paste(!!!rlang::syms(strata_cols), sep = "_")) %>%
+    .[["strata_cols_comb"]]
+
+  # If the target column(s) only has one level, there are no relevant columns
+  if (length(unique(strata_vals)) == 1) {
+    return(df[strata_cols])
+  }
+
+  candidate_cols <- setdiff(names(df), strata_cols)
+  # 1. Remove columns that are duplicates in levels terms
+  candidate_levels <- lapply(
+    df[candidate_cols], function(x) as.numeric(factor(x, levels = unique(x)))
+  )
+  candidate_cols <- candidate_cols[!duplicated(candidate_levels)]
+
+  # 2. Remove columns with only 1 level
+  candidate_n_levels <- sapply(df[candidate_cols], function(x) length(unique(x)))
+  candidate_cols <- candidate_cols[candidate_n_levels > 1]
+
+  # 3. Check combinations of columns to find minimal key combination to level group strata_cols
+  for (n in seq_len(length(candidate_cols))) {
+    all_candidate_combs <- combn(candidate_cols, n, simplify = FALSE)
+    for (comb in all_candidate_combs) {
+      comb_vals <- apply(df[, comb, drop = FALSE], 1, paste, collapse = "_")
+      if (all(tapply(strata_vals, comb_vals, FUN = \(x) length(unique(x)) == 1))) {
+        return(df[c(comb, strata_cols)])
+      }
+    }
+  }
+  df[strata_cols]
+}
+
+# Add globalVariables for NSE/dplyr/rlang/tidyr usage
+utils::globalVariables(c(
+  "group_vars", "select", "any_of", "across", "everything", "add_count", "syms", "rowwise", "unnest", "pull",
+  "n", "pknca_units_tbl", "PPORRESU", "PPTESTCD", "PPSTRESU", "conversion_factor", "strata_cols_comb"
+))
diff --git a/man/ensure_column_unit_exists.Rd b/man/ensure_column_unit_exists.Rd
diff --git a/man/pknca_units_table.Rd b/man/pknca_units_table.Rd
diff --git a/man/select_minimal_grouping_cols.Rd b/man/select_minimal_grouping_cols.Rd
diff --git a/tests/testthat/test-class-PKNCAdata.R b/tests/testthat/test-class-PKNCAdata.R
@@ -395,7 +395,7 @@ test_that("PKNCAdata units (#336)", {
   o_dose <- PKNCAdose(data = d_dose, dose~time, doseu = "doseu_x")
   expect_error(
     PKNCAdata(o_conc, o_dose),
-    regexp = "Only one unit may be provided at a time: A, C"
+    regexp = "Units should be uniform at least across concentration groups"
   )
 })