-
Notifications
You must be signed in to change notification settings - Fork 29
Allow non-unique units columns (PKNCAconc & PKNCAdose) #435
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
cc4d3c2
eae9cfb
5fa7fd5
5786c22
ebb0cab
9ded111
4b70ac2
518347f
41d8bb5
c4c6c69
72e6d55
64285e2
3b0aa18
bcfe407
02bff3d
1c71730
04b393c
957091b
3c7e6fc
efb9118
1d1983d
877a26e
67a5501
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,7 @@ | |
| #' by to get a standardized value. This argument overrides any preferred unit | ||
| #' conversions from `concu_pref`, `doseu_pref`, `amountu_pref`, or | ||
| #' `timeu_pref`. | ||
| #' @param ... Additional arguments (not used) | ||
| #' @returns A unit conversion table with columns for "PPTESTCD" and "PPORRESU" | ||
| #' if `conversions` is not given, and adding "PPSTRESU" and | ||
| #' "conversion_factor" if `conversions` is given. | ||
|
|
@@ -50,10 +51,19 @@ | |
| #' concu = "ng/mL", doseu = "mg/kg", timeu = "hr", amountu = "mg", | ||
| #' timeu_pref = "day" | ||
| #' ) | ||
| #' | ||
| #' @export | ||
| pknca_units_table <- function(concu, ...) { | ||
| UseMethod("pknca_units_table") | ||
| } | ||
|
|
||
| ##' Default method for pknca_units_table | ||
| #' | ||
| #' @rdname pknca_units_table | ||
| #' @export | ||
| pknca_units_table <- function(concu, doseu, amountu, timeu, | ||
| pknca_units_table.default <- function(concu, doseu, amountu, timeu, | ||
| concu_pref = NULL, doseu_pref = NULL, amountu_pref = NULL, timeu_pref = NULL, | ||
| conversions = data.frame()) { | ||
| conversions = data.frame(), ...) { | ||
| checkmate::assert_data_frame(conversions) | ||
| if (nrow(conversions) > 0) { | ||
| checkmate::assert_names( | ||
|
|
@@ -113,7 +123,7 @@ pknca_units_table <- function(concu, doseu, amountu, timeu, | |
| conversions <- conversions_pref | ||
| } | ||
|
|
||
| extra_cols <- setdiff(ret$PPTESTCD, names(PKNCA::get.interval.cols())) | ||
| extra_cols <- setdiff(ret$PPTESTCD, names(get.interval.cols())) | ||
| if (length(extra_cols) > 0) { | ||
| stop("Please report a bug. Unknown NCA parameters have units defined: ", paste(extra_cols, collapse=", ")) # nocov | ||
| } | ||
|
|
@@ -160,6 +170,91 @@ pknca_units_table <- function(concu, doseu, amountu, timeu, | |
| ret | ||
| } | ||
|
|
||
| ##' Method for PKNCAdata objects | ||
| #' | ||
| #' @rdname pknca_units_table | ||
| #' @importFrom dplyr across any_of bind_rows case_when filter group_by mutate n select ungroup group_vars | ||
| #' @importFrom tidyr fill unnest | ||
| #' @importFrom rlang syms | ||
| #' @export | ||
| pknca_units_table.PKNCAdata <- function(concu, ..., conversions = data.frame()) { | ||
|
|
||
| # concu is the PKNCAdata object | ||
| o_conc <- as_PKNCAconc(concu) | ||
| o_dose <- as_PKNCAdose(concu) | ||
|
|
||
| # PKNCAdose can optionally be no present, being unit undefining | ||
| if (is.null(o_dose) || all(is.na(o_dose))) o_dose <- o_conc | ||
|
|
||
| # If needed, ensure that the PKNCA objects have the required unit columns | ||
| o_conc <- ensure_column_unit_exists(o_conc, c("concu", "timeu", "amountu")) | ||
| o_dose <- ensure_column_unit_exists(o_dose, c("doseu")) | ||
|
|
||
| # Extract relevant columns from o_conc and o_dose | ||
| group_dose_cols <- dplyr::group_vars(o_dose) | ||
| group_conc_cols <- dplyr::group_vars(o_conc) | ||
| concu_col <- o_conc$columns$concu | ||
| amountu_col <- o_conc$columns$amountu | ||
| timeu_col <- o_conc$columns$timeu | ||
| doseu_col <- o_dose$columns$doseu | ||
| all_unit_cols <- c(concu_col, amountu_col, timeu_col, doseu_col) | ||
|
|
||
| # Join dose units with concentration group columns and units | ||
| d_concu <- o_conc$data %>% | ||
| dplyr::select(dplyr::any_of(c(group_conc_cols, concu_col, amountu_col, timeu_col))) %>% | ||
| unique() | ||
| d_doseu <- o_dose$data %>% | ||
| dplyr::select(dplyr::any_of(c(group_dose_cols, doseu_col))) %>% | ||
| unique() | ||
|
|
||
| groups_units_tbl <- merge(d_concu, d_doseu, all.x = TRUE) %>% | ||
| dplyr::mutate(dplyr::across(dplyr::everything(), ~ as.character(.))) %>% | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need to convert everything to character? I think that it should all be character from the original input. If it is not (perhaps it's a factor), then we should likely handle factor conversion elsewhere. This seems like it is possibly hiding a data problem from the user (possibly causing a hard-to-trace bug in the future) and should be corrected in source data rather than here. |
||
| dplyr::group_by(!!!rlang::syms(group_conc_cols)) %>% | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use |
||
| tidyr::fill(!!!rlang::syms(all_unit_cols), .direction = "downup") %>% | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is |
||
| dplyr::ungroup() %>% | ||
| unique() | ||
|
|
||
| # Check that at least for each concentration group units are uniform | ||
| mismatching_units_groups <- groups_units_tbl %>% | ||
| dplyr::add_count(!!!rlang::syms(group_conc_cols), name = "n") %>% | ||
| dplyr::filter(n > 1) %>% | ||
| dplyr::select(-n) | ||
| if (nrow(mismatching_units_groups) > 0) { | ||
| stop( | ||
| "Units should be uniform at least across concentration groups. ", | ||
| "Review the units for the next group(s):\n", | ||
| paste(utils::capture.output(print(mismatching_units_groups)), collapse = "\n") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than |
||
| ) | ||
| } | ||
|
|
||
| # Check that at least one unit column is not NA | ||
| units.are.all.na <- all(is.na(groups_units_tbl[,all_unit_cols])) | ||
| if (units.are.all.na) return(NULL) | ||
|
|
||
| groups_units_tbl %>% | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please convert this to a loop for readability (it will only likely be called a few times, so it should not be a performance issue). |
||
| select_minimal_grouping_cols(all_unit_cols) %>% | ||
| unique() %>% | ||
| dplyr::rowwise() %>% | ||
| dplyr::mutate( | ||
| pknca_units_tbl = list( | ||
| pknca_units_table( | ||
| concu = !!rlang::sym(concu_col), | ||
| doseu = !!rlang::sym(doseu_col), | ||
| amountu = !!rlang::sym(amountu_col), | ||
| timeu = !!rlang::sym(timeu_col), | ||
| concu_pref = o_conc$units$concu_pref[1], | ||
| doseu_pref = o_dose$units$doseu_pref[1], | ||
| amountu_pref = o_conc$units$amountu_pref[1], | ||
| timeu_pref = o_conc$units$timeu_pref[1], | ||
| conversions = conversions | ||
| ) | ||
| ) | ||
| ) %>% | ||
| tidyr::unnest(cols = c(pknca_units_tbl)) %>% | ||
| dplyr::select(-dplyr::any_of(all_unit_cols)) %>% | ||
| as.data.frame() | ||
| } | ||
|
|
||
| pknca_units_table_unitless <- function() { | ||
| rbind( | ||
| data.frame( | ||
|
|
@@ -464,3 +559,92 @@ pknca_unit_conversion <- function(result, units, allow_partial_missing_units = F | |
| ret | ||
| } | ||
|
|
||
|
|
||
|
|
||
| #' Ensure Unit Columns Exist in PKNCA Object | ||
| #' | ||
| #' Checks if specified unit columns exist in a PKNCA object (either PKNCAconc or PKNCAdose). | ||
| #' If the columns do not exist, it creates them and assigns default values (NA or existing units). | ||
| #' | ||
| #' @param pknca_obj A PKNCA object (either PKNCAconc or PKNCAdose). | ||
| #' @param unit_name A character vector of unit column names to ensure (concu, amountu, timeu...). | ||
| #' @returns The updated PKNCA object with ensured unit columns. | ||
| #' | ||
| #' @details | ||
| #' The function performs the following steps: | ||
| #' 1. Checks if the specified unit columns exist in the PKNCA object. | ||
| #' 2. If a column does not exist, it creates the column and assigns default values. | ||
| #' 3. If not default values are provided, it assigns NA to the new column. | ||
| #' @keywords Internal | ||
| ensure_column_unit_exists <- function(pknca_obj, unit_name) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that this is already handled as part of the |
||
| for (unit in unit_name) { | ||
| if (is.null(pknca_obj$columns[[unit]])) { | ||
| unit_colname <- make.unique(c(names(pknca_obj$data), unit))[ncol(pknca_obj$data) + 1] | ||
| pknca_obj$columns[[unit]] <- unit_colname | ||
| if (!is.null(pknca_obj$units[[unit]])) { | ||
| pknca_obj$data[[unit_colname]] <- pknca_obj$units[[unit]] | ||
| } else { | ||
| pknca_obj$data[[unit_colname]] <- NA_character_ | ||
| } | ||
| } | ||
| } | ||
| pknca_obj | ||
| } | ||
|
|
||
| #' Find Minimal Grouping Columns for Strata Reconstruction | ||
| #' | ||
| #' This function identifies the smallest set of columns in a data frame whose unique combinations | ||
| #' can reconstruct the grouping structure defined by the specified strata columns. | ||
| #' It removes duplicate, constant, and redundant columns, then searches for the minimal combination | ||
| #' that uniquely identifies each stratum. | ||
| #' | ||
| #' @param df A data frame. | ||
| #' @param strata_cols Column names in df whose unique combination defines the strata. | ||
| #' @returns A data frame containing the strata columns and their minimal set of grouping columns. | ||
| #' @importFrom dplyr mutate select any_of | ||
| #' @importFrom rlang syms | ||
| #' @importFrom utils combn | ||
| #' @keywords Internal | ||
| select_minimal_grouping_cols <- function(df, strata_cols) { | ||
| # If there is no strata_cols specified, simply return the original df | ||
| if (length(strata_cols) == 0) return(df) | ||
|
|
||
| # Obtain the comb_vals values of the target column(s) | ||
| strata_vals <- df %>% | ||
| mutate(strata_cols_comb = paste(!!!rlang::syms(strata_cols), sep = "_")) %>% | ||
| .[["strata_cols_comb"]] | ||
|
|
||
| # If the target column(s) only has one level, there are no relevant columns | ||
| if (length(unique(strata_vals)) == 1) { | ||
| return(df[strata_cols]) | ||
| } | ||
|
|
||
| candidate_cols <- setdiff(names(df), strata_cols) | ||
| # 1. Remove columns that are duplicates in levels terms | ||
| candidate_levels <- lapply( | ||
| df[candidate_cols], function(x) as.numeric(factor(x, levels = unique(x))) | ||
| ) | ||
| candidate_cols <- candidate_cols[!duplicated(candidate_levels)] | ||
|
|
||
| # 2. Remove columns with only 1 level | ||
| candidate_n_levels <- sapply(df[candidate_cols], function(x) length(unique(x))) | ||
| candidate_cols <- candidate_cols[candidate_n_levels > 1] | ||
|
|
||
| # 3. Check combinations of columns to find minimal key combination to level group strata_cols | ||
| for (n in seq_len(length(candidate_cols))) { | ||
| all_candidate_combs <- combn(candidate_cols, n, simplify = FALSE) | ||
| for (comb in all_candidate_combs) { | ||
| comb_vals <- apply(df[, comb, drop = FALSE], 1, paste, collapse = "_") | ||
| if (all(tapply(strata_vals, comb_vals, FUN = \(x) length(unique(x)) == 1))) { | ||
| return(df[c(comb, strata_cols)]) | ||
| } | ||
| } | ||
| } | ||
| df[strata_cols] | ||
| } | ||
|
|
||
| # Add globalVariables for NSE/dplyr/rlang/tidyr usage | ||
| utils::globalVariables(c( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use double colon notation or |
||
| "group_vars", "select", "any_of", "across", "everything", "add_count", "syms", "rowwise", "unnest", "pull", | ||
| "n", "pknca_units_tbl", "PPORRESU", "PPTESTCD", "PPSTRESU", "conversion_factor", "strata_cols_comb" | ||
| )) | ||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please use base R for readability: