Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: rtauargus
Type: Package
Title: Using Tau-Argus from R
Language: fr
Version: 1.3.1
Version: 1.3.2
Depends: R (>= 3.5.0)
Imports:
purrr (>= 0.2),
Expand Down Expand Up @@ -67,7 +67,7 @@ Description: Protects tables by calling the Tau-Argus software from R.
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.3.2
RoxygenNote: 7.3.3
VignetteBuilder: knitr
URL: https://inseefrlab.github.io/rtauargus,
https://github.com/inseefrlab/rtauargus,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export(from_5_to_3)
export(grp_tab_in_cluster)
export(grp_tab_names)
export(identify_hrc)
export(identify_hrc_with_eq)
export(import)
export(length_tabs)
export(micro_arb)
Expand Down Expand Up @@ -59,6 +60,7 @@ importFrom(dplyr,rowwise)
importFrom(dplyr,select)
importFrom(dplyr,summarise)
importFrom(dplyr,ungroup)
importFrom(dplyr,where)
importFrom(igraph,graph_from_data_frame)
importFrom(igraph,which_mutual)
importFrom(lifecycle,badge)
Expand Down
14 changes: 11 additions & 3 deletions R/analyse_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#' creating a structured output for further processing.
#'
#' @param df_metadata A dataframe containing metadata in wide format.
#' @param df_eq_indicator A datadframe containing the indicators equations if needed.
#' @param verbose Logical. If `TRUE`, returns a detailed list of intermediate results
#' from each processing step. If `FALSE`, returns only the cluster assignments. Defaults to `FALSE`.
#'
Expand All @@ -28,7 +29,7 @@
#' @details The function performs the following steps:
#' \itemize{
#' \item Converts the metadata from wide format to long format using \code{wide_to_long}.
#' \item Identifies hierarchical relationships and renames variables with \code{identify_hrc}.
#' \item Identifies hierarchical relationships and renames variables with \code{identify_hrc} or \code{identify_hrc_with_eq}.
#' \item Splits hierarchical relationships into clusters using \code{split_in_clusters}.
#' \item Creates edges to describe the relationships via \code{create_edges}.
#' \item Generates translation tables for regrouping with \code{grp_tab_names}.
Expand All @@ -53,7 +54,7 @@
#' `r lifecycle::badge("experimental")`
#'
#' @export
analyse_metadata <- function(df_metadata,verbose = FALSE){
analyse_metadata <- function(df_metadata,df_eq_indicator = NULL,verbose = FALSE){
# check that the input is in the right format: right column names
check_column_names <- function(df) {
# Expected fixed column names
Expand Down Expand Up @@ -102,7 +103,14 @@ analyse_metadata <- function(df_metadata,verbose = FALSE){

# start of the actual analysis
df_metadata_long <- wide_to_long(df_metadata)
list_hrc_identified <- identify_hrc(df_metadata_long)
if(is.null(df_eq_indicator)){
list_hrc_identified <- identify_hrc(df_metadata_long)
}else{
warning("For the variables part of equations specified in df_eq_indicator,
the hrc_indicator column will be ignored.")
list_hrc_identified <- identify_hrc_with_eq(df_metadata_long,df_eq_indicator)
}

list_split <- split_in_clusters(list_hrc_identified)
list_desc_links <- create_edges(list_split)
list_translation_tables <- grp_tab_names(list_desc_links)
Expand Down
23 changes: 20 additions & 3 deletions R/create_edges.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,26 +57,43 @@ create_edges <- function(list_split){
small_tibble$spanning}))
})
list_desc_links <- nested_crois %>% map(function(big_tibble) {
# Condition for clusters that only have one table
# Only compare the spanning variables for clusters that have more than 1 table
if(length(big_tibble$table_name) > 1) {
spannings_nom_tab <- combn(big_tibble$table_name, 2, FUN = list)

tab_to_keep <- map(spannings_nom_tab, function(crois) {
# Extract the first table (Ta) corresponding to the first table of the
# pair of tables we are studying
Ta <- big_tibble %>% filter(table_name == crois[[1]])
# Get the 'spanning' column for Ta
crois_Ta <- Ta$spanning[[1]]

# Extract the first table (Tb) corresponding to the second table of the
# pair of tables we are studying
Tb <- big_tibble %>% filter(table_name == crois[[2]])
# Get the 'spanning' column for Tb
crois_Tb <- Tb$spanning[[1]]

# Create an empty data frame to store origin → destination relationships
df_origin_dest <- data.frame(from = character(), to = character(), stringsAsFactors = FALSE)

# Check if all elements of Ta are included in Tb
if(all(crois_Ta %in% crois_Tb)) {
# Add a row indicating that Ta can go to Tb
df_origin_dest <- rbind(df_origin_dest,
data.frame(from = crois[[1]], to = crois[[2]],stringsAsFactors = FALSE))
data.frame(from = crois[[1]], to = crois[[2]], stringsAsFactors = FALSE))
}

# Check if all elements of Tb are included in Ta
if(all(crois_Tb %in% crois_Ta)) {
# Add a row indicating that Tb can go to Ta
df_origin_dest <- rbind(df_origin_dest,
data.frame(from = crois[[2]], to = crois[[1]], stringsAsFactors = FALSE))
}

# Return the data frame containing possible links
return(df_origin_dest)
})

# Filter non empty tables and combine them
tab_to_keep_compact <- tab_to_keep %>%
purrr::discard(is.null) %>%
Expand Down
26 changes: 19 additions & 7 deletions R/format_template.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,23 +78,35 @@ filter_on_marginal_of_spanning_var <- function(data, criteria, subset_keys) {
)
# Create filter expressions for all other keys with !=
other_keys <- setdiff(names(criteria), subset_keys)
filter_expr_not_in <- purrr::map2(
other_keys,
criteria[other_keys],
~ rlang::expr(!!rlang::sym(.x) != !!.y)
)
if(all(other_keys == "")){
# treat the eventuality of all spanning variables being crossed on their
# non total values
not_totals_all_spannings <- purrr::map2(
subset_keys,
criteria[subset_keys],
~ rlang::expr(!!rlang::sym(.x) != !!.y)
)
return(data %>% filter(!!!not_totals_all_spannings))
} else {
filter_expr_not_in <- purrr::map2(
other_keys,
criteria[other_keys],
~ rlang::expr(!!rlang::sym(.x) != !!.y)
)
}

# Combine the two sets of expressions
combined_filter_expr <- c(filter_expr_in, filter_expr_not_in)
# Apply the combined filter
data %>% filter(!!!combined_filter_expr) %>% select(-!!subset_keys)
return(data %>% filter(!!!combined_filter_expr) %>% select(-!!subset_keys))
}

#' Determines the tables described in a template gathering all the published cells
#'
#' @param data template gathering all the published cells
#' @param indicator_column name of the column in which the indicators are
#' @param spanning_var_tot a named list of the spanning variables and their totals
#' @param field_columns vecotr of all the columns that are fields (ex: year of collect)
#' @param field_columns vector of all the columns that are fields (ex: year of collect)
#'
#' @return named list of a dataframe describing the tables (metadata) and a list of
#' the modalities of each hierarchical variable (modalities)
Expand Down
3 changes: 2 additions & 1 deletion R/globals.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ utils::globalVariables(
c("table_name", "data","cluster","field","hrc_field","indicator","hrc_indicator",
"n_unique","column","unique_modalities","from.eg","to.eg","from","to","mutual_full",
"Group","table_eg","spanning","hrc_spanning","spanning_old","tab_inclus",
"starts_with","spanning_name","hrc_spanning_name",
"starts_with","spanning_name","hrc_spanning_name","eq_indicator","rhs","total","term_number",
"eq_name","unit","var","n_total","total_alt","group",
".")
)
11 changes: 9 additions & 2 deletions R/grp_tab_in_cluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
grp_tab_in_cluster <- function(list_split, list_translation_tables) {
# Nest each cluster of tables by `table_name`
nested_crois <- list_split %>%
purrr::map(function(tab) { tab %>% dplyr::group_by(table_name) %>% tidyr::nest() })
purrr::map(function(tab) {tab %>% dplyr::group_by(table_name) %>% tidyr::nest() })

# Process each cluster using inclusion relationships
purrr::map2(list_translation_tables, nested_crois, function(tab_to_keep, big_tibble) {
Expand All @@ -66,7 +66,7 @@ grp_tab_in_cluster <- function(list_split, list_translation_tables) {
# Process the cluster
big_tibble_eg <- big_tibble %>%
mutate(
spanning = map(data, function(small_tibble) { small_tibble$spanning })
spanning = map(data, function(small_tibble) {small_tibble$spanning})
) %>%
dplyr::left_join(tab_to_keep[[2]], by = c("table_name" = "Original")) %>%
mutate(table_eg = ifelse(is.na(Group), table_name, Group)) %>%
Expand All @@ -84,6 +84,13 @@ grp_tab_in_cluster <- function(list_split, list_translation_tables) {
)
})
) %>% dplyr::rename(table_name = table_eg)
# adding the tables that are not included in each other in the cluster
if(length(big_tibble$table_name) != length(tab_to_keep$passage_nom_tab$Original)){
tables_no_inclusion <- big_tibble %>%
filter(table_name %in% setdiff(big_tibble$table_name,tab_to_keep$passage_nom_tab$Original)) %>%
mutate(spanning = map(data, function(small_tibble) {small_tibble$spanning}))
big_tibble_eg <- bind_rows(tables_no_inclusion,big_tibble_eg)
}

return(big_tibble_eg)
} else {
Expand Down
2 changes: 1 addition & 1 deletion R/identify_hrc.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ identify_hrc <- function(df_metadata_long){
table_name = df_spannings$table_name
) %>% unique()
df_spannings <- df_spannings %>% select(-spanning_old)
if(all(is.na(df_spannings$hrc_indicator))){ # condition pour les hiérarchies sur les indicateurs
if(all(is.na(df_spannings$hrc_indicator))){ # condition for hierarchies on indicators
df_indicators <- df_spannings
return(list(df_indicators,df_variable_info))
} else {
Expand Down
Loading