From 15d6263754bdcaac4378cbbe51d9cd3beba982e2 Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Wed, 18 Feb 2026 10:18:11 +0000 Subject: [PATCH 01/20] Standardise stochastics --- R/stochastic_files.R | 88 ++++++ scripts/process_stoch_202310gavi.R | 362 +++++++++++++++++++++++ scripts/process_stoch_202409malaria.R | 37 +++ tests/testthat/test_stochastic_process.R | 46 ++- 4 files changed, 506 insertions(+), 27 deletions(-) create mode 100644 R/stochastic_files.R create mode 100644 scripts/process_stoch_202310gavi.R create mode 100644 scripts/process_stoch_202409malaria.R diff --git a/R/stochastic_files.R b/R/stochastic_files.R new file mode 100644 index 0000000..f2b83e0 --- /dev/null +++ b/R/stochastic_files.R @@ -0,0 +1,88 @@ +##' Convert a modelling group's stochastic files into an intermediate +##' format including all the original data, separated by country +##' and scenario, and in PQ format for later processing. As much as +##' possible, we'll try and detect the input formats. +##' +##' @export +##' @title Standardise stochastic data files +##' @import data.table +##' @import arrow +##' @param group The modelling group. +##' @param in_path The folder or network path where the original files are found. +##' @param out_path The folder or network path to write output files to. +##' @param scenarios A vector of strings giving each scenario name. +##' @param files This can either be a vector of strings of the same length to +##' the vector of scenarios, in which case each entry should match the scenario, +##' providing the filename for the original uploads for that scenario. Most +##' groups provide files numbered between 1 and 200 for their stochastic runs; +##' replace that number with `:index` in the filename to match. Alternatively, +##' files can be a single entry containing the string `:scenario`; in this case, +##' files must exist that match each entry in the `scenarios` parameter, and +##' the same file string can be used to match all of them (perhaps additionaly +##' wiht `:index`). +##' @params index This is usually a vector of ints, `1:200` to match the +##' range of stochastic files uploaded per scenario. A few groups upload one +##' large file containing everything, in which case `:index` shouldn't occur +##' in the `files` parameter, and should be omitted. +##' @params rubella_fix Historically Rubella uploads used the burden outcome +##' `rubella_deaths_congenital` and `rubella_cases_congenital` instead of +##' the simpler `deaths` and `cases`, and additionally provided a +##' `rubella_infections` field. `rubella_fix` needs to be TRUE to standardise +##' these to the simpler names, + +stone_stochastic_standardise <- function( + group, in_path, out_path, scenarios, files, index = 1, + rubella_fix = TRUE, missing_run_id_fix = TRUE) { + + dir.create(out_path, showWarnings = FALSE, recursive = TRUE) + if ((length(files) == 1) && (grepl(":scenario", files))) { + files <- rep(files, length(scenarios)) + for (j in seq_along(scenarios)) { + files[j] <- gsub(":scenario", scenarios[j], files[j]) + } + } + + for (i in seq_along(scenarios)) { + message(scenarios[i]) + all_data <- as.data.frame(data.table::rbindlist(lapply(index, function(j) { + cat("\r", j) + file <- gsub(":index", j, files[i]) + d <- read.csv(file.path(in_path, file)) + d$country_name <- NULL + + # Fixes needed to standardise Rubella + + names(d)[names(d) == "rubella_deaths_congenital"] <- "deaths" + names(d)[names(d) == "rubella_cases_congenital"] <- "cases" + d$rubella_infections <- NULL + + # Detect where run_id is missing, but in filenames + + if (missing_run_id_fix) { + if ((!"run_id" %in% names(d)) && (length(index) == 200)) d$run_id <- j + } + + # Round to integer, as per guidance. (Not using as.integer, as that + # has limits on how large numbers can be, so we are just truncating + # digits here) + + d$dalys <- round(d$dalys) + d$deaths <- round(d$deaths) + d$cases <- round(d$cases) + d$yll <- round(d$yll) + d$cohort_size <- round(d$cohort_size) + + d + }), use.names = TRUE)) + + countries <- sort(unique(all_data$country)) + for (country in countries) { + cat("\r", country) + d <- all_data[all_data$country == country, ] + d <- d[order(d$run_id, d$year, d$age), ] + file <- sprintf("%s_%s_%s.pq", group, scenarios[i], country) + arrow::write_parquet(d, file.path(out_path, file)) + } + cat("Completed\n") + } +} diff --git a/scripts/process_stoch_202310gavi.R b/scripts/process_stoch_202310gavi.R new file mode 100644 index 0000000..0c4915b --- /dev/null +++ b/scripts/process_stoch_202310gavi.R @@ -0,0 +1,362 @@ +base_in_path <- "//wpia-hn2.hpc.dide.ic.ac.uk/vimc_stochastics_dropbox/latest/202310gavi" +base_out_path <- "//wpia-hn2.hpc.dide.ic.ac.uk/vimc_stochastics/202310gavi" + + +############### +# Cholera + +scenarios = c("cholera-no-vaccination", "cholera-ocv1-default", "cholera-ocv1-ocv2-default") + +stoner::stone_stochastic_standardise( + group = "IVI-Kim", + in_path = file.path(base_in_path, "Cholera-IVI-Kim"), + out_path = file.path(base_out_path, "Cholera-IVI-Kim"), + scenarios = scenarios, + files = c("stoch_Cholera_novacc_20250704T130601 Jong-Hoon Kim.csv.xz", + "stoch_Cholera_campaign_default_ocv1_20250704T130601 Jong-Hoon Kim.csv.xz", + "stoch_Cholera_campaign_default_ocv12_20250704T130601 Jong-Hoon Kim.csv.xz") + ) + +stoner::stone_stochastic_standardise( + group = "JHU-Lee", + in_path = file.path(base_in_path, "Cholera-JHU-Lee"), + out_path = file.path(base_out_path, "Cholera-JHU-Lee"), + scenarios = scenarios, + files = c("stochastic-burden-template.202310gavi-4.Cholera_standard_template.529.no-vaccination Christina Alam.csv.xz", + "stochastic-burden-template.202310gavi-4.Cholera_standard_template.529.ocv1-default_one Christina Alam.csv.xz", + "stochastic-burden-template.202310gavi-4.Cholera_standard_template.529.ocv1-ocv2-default_two Christina Alam.csv.xz") +) + + +############### +# COVID + + scenarios = c("covid-no-vaccination", "covid-no-vaccination_severe", + "covid-primary-bluesky", "covid-primary-bluesky_severe", + "covid-primary-booster-bluesky", "covid-primary-booster-bluesky_severe", + "covid-primary-booster-default", "covid-primary-booster-default_severe", + "covid-primary-default", "covid-primary-default_severe") + +stoner::stone_stochastic_standardise( + group = "IC-Ghani", + in_path = file.path(base_in_path, "COVID-IC-Ghani", "wes-modified"), + out_path = file.path(base_ou6t_path, "COVID_IC-Ghani"), + scenarios = scenarios, + files = c("covid-no-vaccination Gemma Gilani_:index.csv.xz", + "covid-no-vaccination_severe Gemma Gilani_:index.csv.xz", + "covid-primary-bluesky Daniela Olivera_:index.csv.xz", + "covid-primary-bluesky_severe Daniela Olivera_:index.csv.xz", + "covid-primary-booster-bluesky Daniela Olivera_:index.csv.xz", + "covid-primary-booster-bluesky_severe Daniela Olivera_:index.csv.xz", + "covid-primary-booster-default Gemma Gilani_:index.csv.xz", + "covid-primary-booster-default_severe Gemma Gilani_:index.csv.xz", + "covid-primary-default Gemma Gilani_:index.csv.xz", + "covid-primary-default_severe Gemma Gilani_:index.csv.xz"), + index = 1:200) + +stoner::stone_stochastic_standardise( + group = "LSHTM-Liu", + in_path = file.path(base_in_path, "COVID-LSHTM-Liu"), + out_path = file.path(base_out_path, "COVID_LSHTM-Liu"), + scenarios = scenarios, + files = "stochastic_burden_est_covid-LSHTM-Liu_:scenario Yang Liu.csv.xz") + + +############### +# HepB + + scenarios <- scenarios = c("hepb-hepb3-bd-bluesky", "hepb-hepb3-bd-default", + "hepb-hepb3-bd-ia2030", "hepb-hepb3-bluesky", + "hepb-hepb3-default", "hepb-hepb3-ia2030", + "hepb-no-vaccination") + +stoner::stone_stochastic_standardise( + group = "Li", + in_path = file.path(base_in_path, "HepB-Li"), + out_path = file.path(base_out_path, "HepB_Li"), + scenarios = scenarios, + files = ":scenario:index.csv.xz", + index = 1:200) + +stoner::stone_stochastic_standardise( + group = "IC-Hallett", + in_path = file.path(base_in_path, "HepB-IC-Hallett"), + out_path = file.path(base_out_path, "HepB_IC-Hallett"), + scenarios = scenarios, + files = c("stochastic_burden_est_HepB-IC-Hallett_hepb_hepb3_bd_bluesky_:index.csv.xz", + "stochastic_burden_est_HepB-IC-Hallett_hepb_hepb3_bd_default_:index.csv.xz", + "stochastic_burden_est_HepB-IC-Hallett_hepb_hepb3_bd_ia2030_:index.csv.xz", + "stochastic_burden_est_HepB-IC-Hallett_hepb_hepb3_bluesky_:index.csv.xz", + "stochastic_burden_est_HepB-IC-Hallett_hepb_hepb3_default_:index.csv.xz", + "stochastic_burden_est_HepB-IC-Hallett_hepb_hepb3_ia2030_:index.csv.xz", + "stochastic_burden_est_HepB-IC-Hallett_hepb_no_vaccination_:index.csv.xz"), + index = 1:200) + +stoner::stone_stochastic_standardise( + group = "Burnet-Scott", + in_path = file.path(base_in_path, "HepB-Burnet-Scott"), + out_path = file.path(base_out_path, "HepB_Burnet-Scott"), + scenarios = scenarios, + files = c("stochastic_burden_hepb_hepb3_bd_bluesky_:index.csv.xz", + "stochastic_burden_hepb_hepb3_bd_default_:index.csv.xz", + "stochastic_burden_hepb_hepb3_bd_ia2030_:index.csv.xz", + "stochastic_burden_hepb_hepb3_bluesky_:index.csv.xz", + "stochastic_burden_hepb_hepb3_default_:index.csv.xz", + "stochastic_burden_hepb_hepb3_ia2030_:index.csv.xz", + "stochastic_burden_hepb_no_vaccination_:index.csv.xz"), + index = 1:117) + + +############### +# HPV + + scenarios = c("hpv-no-vaccination", "hpv-campaign-default", + "hpv-campaign-bluesky", "hpv-campaign-routine-bluesky", + "hpv-campaign-routine-default", "hpv-campaign-default_transition_hpv_1d", + "hpv-campaign-routine-default_transition_hpv_1d", + "hpv-campaign-ia2030", "hpv-campaign-routine-ia2030") + +stoner::stone_stochastic_standardise( + group = "Harvard-Sweet", + in_path = file.path(base_in_path, "HPV-Harvard-Kim"), + out_path = file.path(base_out_path, "HPV_Harvard-Kim"), + scenarios = scenarios, + files = c("stochastic-burden-est.novacc_run_:index Allison Portnoy.csv.xz", + "stochastic-burden-est.coverage_202310gavi-4_hpv-campaign-default_run_:index Allison Portnoy.csv.xz", + "stochastic-burden-est.coverage_202310gavi-4_hpv-campaign-bluesky_run_:index Allison Portnoy.csv.xz", + "stochastic-burden-est.coverage_202310gavi-4_hpv-campaign-routine-bluesky_run_:index Allison Portnoy.csv.xz", + "stochastic-burden-est.coverage_202310gavi-4_hpv-campaign-routine-default_run_:index Allison Portnoy.csv.xz", + "stochastic-burden-est.coverage_202310gavi-4_hpv-campaign-default_transition_hpv_1d_run_:index Allison Portnoy.csv.xz", + "stochastic-burden-est.coverage_202310gavi-4_hpv-campaign-routine-default_transition_hpv_1d_run_:index Allison Portnoy.csv.xz", + "stochastic-burden-est.coverage_202310gavi-4_hpv-campaign-ia2030_run_:index Allison Portnoy.csv.xz", + "stochastic-burden-est.coverage_202310gavi-4_hpv-campaign-routine-ia2030_run_:index Allison Portnoy.csv.xz"), + index = 1:200) + +stoner::stone_stochastic_standardise( + group = "LSHTM-Jit", + in_path = file.path(base_in_path, "HPV-LSHTM-Jit"), + out_path = file.path(base_out_path, "HPV_LSHTM-Jit"), + scenarios = scenarios, + files = c("stochastic-burden-novaccination_all_202310gavi-7_hpv-no-vaccination Kaja Abbas.csv.xz", + "stochastic-burden-vaccination_all_202310gavi-7_hpv-campaign-default Kaja Abbas.csv.xz", + "stochastic-burden-vaccination_all_202310gavi-7_hpv-campaign-bluesky Kaja Abbas.csv.xz", + "stochastic-burden-vaccination_all_202310gavi-7_hpv-campaign-routine-bluesky Kaja Abbas.csv.xz", + "stochastic-burden-vaccination_all_202310gavi-7_hpv-campaign-routine-default Kaja Abbas.csv.xz", + "stochastic-burden-vaccination_all_202310gavi-7_hpv-campaign-default_transition_hpv_1d Kaja Abbas.csv.xz", + "stochastic-burden-vaccination_all_202310gavi-7_hpv-campaign-routine-default_transition_hpv_1d Kaja Abbas.csv.xz", + "stochastic-burden-vaccination_all_202310gavi-7_hpv-campaign-ia2030 Kaja Abbas.csv.xz", + "stochastic-burden-vaccination_all_202310gavi-7_hpv-campaign-routine-ia2030 Kaja Abbas.csv.xz")) + + +############### +# Measles + + scenarios = c("measles-no-vaccination", "measles-mcv1-bluesky", "measles-mcv1-default", + "measles-mcv1-ia2030", "measles-mcv1-mcv2-bluesky", "measles-mcv1-mcv2-campaign-bluesky", + "measles-mcv1-mcv2-campaign-default", "measles-mcv1-mcv2-campaign-default_under5sia", + "measles-mcv1-mcv2-campaign-default_update", "measles-mcv1-mcv2-campaign-ia2030", + "measles-mcv1-mcv2-default", "measles-mcv1-mcv2-ia2030") + +stoner::stone_stochastic_standardise( + group = "PSU-Ferrari", + in_path = file.path(base_in_path, "Measles-PSU-Ferrari"), + out_path = file.path(base_out_path, "Measles_PSU-Ferrari"), + scenarios = scenarios, + files = c("no-vaccination.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "mcv1-bluesky.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "mcv1-default.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "mcv1-ia2030.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "mcv1-mcv2-bluesky.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "mcv1-mcv2-campaign-bluesky.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "mcv1-mcv2-campaign-default.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "measles-mcv1-mcv2-campaign-default_under5sia_PSU_Ferrari_Stochastic_Runs_Revised_05102025.csv.xz", + "measles-mcv1-mcv2-campaign-default_update_PSU_Ferrari_Stochastic_Runs_Revised_05102025.csv.xz", + "mcv1-mcv2-campaign-ia2030.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "mcv1-mcv2-default.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "mcv1-mcv2-ia2030.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz")) + +stoner::stone_stochastic_standardise( + group = "LSHTM-Jit", + in_path = file.path(base_in_path, "Measles-LSHTM-Jit"), + out_path = file.path(base_out_path, "Measles_LSHTM-Jit"), + scenarios = scenarios, + files = c("stochastic_burden_estimate_measles-LSHTM-Jit-no-vaccination Han Fu.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-bluesky Han Fu.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-default Han Fu.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-ia2030 Han Fu.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-mcv2-bluesky Han Fu.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-mcv2-campaign-bluesky Han Fu.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-mcv2-campaign-default.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-mcv2-campaign-default_under5sia.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-mcv2-campaign-default_update.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-mcv2-campaign-ia2030 Han Fu.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-mcv2-default Han Fu.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-mcv2-ia2030 Han Fu.csv.xz")) + + +############### +# MenA + + scenarios = c("mena-no-vaccination", "mena-campaign-default", "mena-campaign-routine-default", + "mena-campaign-bluesky", "mena-campaign-routine-bluesky", + "mena-campaign-ia2030", "mena-campaign-routine-ia2030") + +# NB campaign-default same as campaign-ia2030 +stoner::stone_stochastic_standardise( + group = "Cambridge-Trotter", + in_path = file.path(base_in_path, "MenA-Cambridge-Trotter"), + out_path = file.path(base_out_path, "MenA_Cambridge-Trotter"), + scenarios = scenarios, + files = c( + "stochastic-burden-template202310gavi-7MenA_Cambridge-Trotter_mena-novaccination (:index) Andromachi Karachaliou.csv.xz", + "stochastic-burden-template202310gavi-7MenA_Cambridge-Trotter_mena-campaign-default (:index) Andromachi Karachaliou.csv.xz", + "stochastic-burden-template202310gavi-7MenA_Cambridge-Trotter_mena-routine-default (:index) Andromachi Karachaliou.csv.xz", + "stochastic-burden-template202310gavi-7MenA_Cambridge-Trotter_mena-campaign-bluesky (:index) Andromachi Karachaliou.csv.xz", + "stochastic-burden-template202310gavi-7MenA_Cambridge-Trotter_mena-routine-bluesky (:index) Andromachi Karachaliou.csv.xz", + "stochastic-burden-template202310gavi-7MenA_Cambridge-Trotter_mena-campaign-default (:index) Andromachi Karachaliou.csv.xz", + "stochastic-burden-template202310gavi-7MenA_Cambridge-Trotter_mena-routine-ia2030 (:index) Andromachi Karachaliou.csv.xz"), + index = 1:26) + + +############### +# Rubella + + scenarios = c("rubella-no-vaccination", "rubella-campaign-default", + "rubella-campaign-bluesky", "rubella-campaign-rcv1-rcv2-bluesky", + "rubella-campaign-rcv1-bluesky", "rubella-campaign-rcv1-rcv2-default", + "rubella-campaign-rcv1-default", "rubella-campaign-ia2030", + "rubella-campaign-rcv1-rcv2-ia2030", "rubella-campaign-rcv1-ia2030") + +stoner::stone_stochastic_standardise( + group = "UGA-Winter", + in_path = file.path(base_in_path, "Rubella-UGA"), + out_path = file.path(base_out_path, "Rubella_UGA-Winter"), + scenarios = scenarios, + files = c("stochastic_burden_est-rubella-no-vaccination_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-default_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-bluesky_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-rcv1-rcv2-bluesky_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-rcv1-bluesky_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-rcv1-rcv2-default_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-rcv1-default_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-ia2030_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-rcv1-rcv2-ia2030_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-rcv1-ia2030_:index Amy Winter.csv.xz"), + index = 1:12) + +stoner::stone_stochastic_standardise( + group = "UKHSA-Vynnycky", + in_path = file.path(base_in_path, "Rubella-UKHSA-Vynnycky"), + out_path = file.path(base_out_path, "Rubella_UKHSA-Vynnycky"), + scenarios = scenarios, + files = c("imp.c:indexs401.csv.xz", + "stochastic_burden_est_rubella_Vynnycky_rubella-campaign-default_:index.csv.xz", + "stochastic_burden_est_rubella_Vynnycky_rubella-campaign-bluesky_:index.csv.xz", + "Vynnycky-camp-rcv1-rcv2-bluesky_country:index.csv.xz", + "Vynnycky-camp-rcv1-bluesky_country:index.csv.xz", + "stochastic_burden_est_rubella_Vynnycky_rubella-campaign-rcv1-rcv2-default_:index.csv.xz", + "stochastic_burden_est_rubella_Vynnycky_rubella-campaign-rcv1-default_:index.csv.xz", + "stochastic_burden_est_rubella_Vynnycky_rubella-campaign-ia2030_:index.csv.xz", + "Vynnycky-camp-rcv1-rcv2-ia2030_country:index.csv.xz", + "Vynnycky-camp-rcv1-ia2030_country:index.csv.xz"), + index = 1:117) + + +############### +# Typhoid + + scenarios = c("typhoid-no-vaccination", + "typhoid-campaign-default", "typhoid-campaign-routine-default", + "typhoid-campaign-bluesky", "typhoid-campaign-routine-bluesky") + +stoner::stone_stochastic_standardise( + group = "IVI-Kim", + in_path = file.path(base_in_path, "Typhoid-IVI-Kim"), + out_path = file.path(base_out_path, "Typhoid_IVI-Kim"), + scenarios = scenarios, + file = c("stoch_Typhoid_novacc_20240314T233526 Jong-Hoon Kim.csv.xz", + "stoch_Typhoid_campaign_default_20240314T233526 Jong-Hoon Kim.csv.xz", + "stoch_Typhoid_campaign_routine_default_20240314T233526 Jong-Hoon Kim.csv.xz", + "stoch_Typhoid_campaign_bluesky_20240314T233526 Jong-Hoon Kim.csv.xz", + "stoch_Typhoid_campaign_routine_bluesky_20240314T233526 Jong-Hoon Kim.csv.xz")) + +stoner::stone_stochastic_standardise( + group = "Yale-Pitzer", + in_path = file.path(base_in_path, "Typhoid-Yale-Pitzer"), + out_path = file.path(base_out_path, "Typhoid_Yale-Pitzer"), + scenarios = scenarios, + files = ":scenario_stochastic_output2023_Yale Virginia Pitzer.csv.xz") + + +############### +# YF + +scenarios = c("yf-no-vaccination", "yf-routine-bluesky", + "yf-routine-campaign-bluesky", "yf-routine-campaign-default", + "yf-routine-campaign-ia2030", "yf-routine-default", + "yf-routine-ia2030") + +stoner::stone_stochastic_standardise( + group = "IC-Garske", + in_path = file.path(base_in_path, "YF-IC-Garske"), + out_path = file.path(base_out_path, "YF_IC-Garske"), + scenarios = scenarios, + files = "burden_results_stochastic_202310gavi-3_:scenario Keith Fraser.csv.xz") + +stoner::stone_stochastic_standardise( + group = "UND-Perkins", + in_path = file.path(base_in_path, "YF-UND-Perkins"), + out_path = file.path(base_out_path, "YF_UND-Perkins"), + scenarios = scenarios, + files = c("stochastic_burden_est_YF_UND-Perkins_yf-no_vaccination_:index.csv.xz", + "stochastic_burden_est_YF_UND-Perkins_yf-routine_bluesky_:index.csv.xz", + "stochastic_burden_est_YF_UND-Perkins_yf-routine_campaign_bluesky_:index.csv.xz", + "stochastic_burden_est_YF_UND-Perkins_yf-routine_campaign_default_:index.csv.xz", + "stochastic_burden_est_YF_UND-Perkins_yf-routine_campaign_ia2030_:index.csv.xz", + "stochastic_burden_est_YF_UND-Perkins_yf-routine_default_:index.csv.xz", + "stochastic_burden_est_YF_UND-Perkins_yf-routine_ia2030_:index.csv.xz"), + index = 1:200) + + +############### +# Malaria R2 + +scenarios = c("malaria-no-vaccination", + "malaria-rts3-default", "malaria-rts3-rts4-default", + "malaria-rts3-bluesky", "malaria-rts3-rts4-bluesky"), + +stoner::stone_stochastic_standardise( + group = "UAC-Kakai", + in_path = file.path(base_in_path, "Malaria-UAC-Kakai", "round2"), + out_path = file.path(base_out_path, "Malaria_UAC-Kakai"), + scenarios = scenarios, + files = c("Stochastic_Burden_Estimates_Glele_Kakai_No_Vaccine_:index.csv.xz", + "Stochastic_Burden_Estimates_Glele_Kakai_Default_rts3_:index.csv.xz", + "Stochastic_Burden_Estimates_Glele_Kakai_Default_rts3_4_:index.csv.xz", + "stochastic_Burden_Estimates_Glele_Kakai_Blue_Sky_rts3_:index.csv.xz", + "Stochastic_Burden_Estimates_Glele_Kakai_Blue_Sky_rts34_:index.csv.xz"), + index = 1:200) + +stoner::stone_stochastic_standardise( + group = "IC-Okell", + in_path = file.path(base_in_path, "Malaria-IC_Okell"), + out_path = file.path(base_out_path, "Malaria_IC-Okell"), + scenarios = scenarios, + files = c("stochastic-burden-est.202310gavi-7.Malaria_IC-Okell_no-vaccination_draw_:index Lydia Haile.csv.xz", + "stochastic-burden-est.202310gavi-7.Malaria_IC-Okell_malaria-rts3-default_draw_:index Lydia Haile.csv.xz", + "stochastic-burden-est.202310gavi-7.Malaria_IC-Okell_malaria-rts3-rts4-default_draw_:index Lydia Haile.csv.xz", + "stochastic-burden-est.202310gavi-7.Malaria_IC-Okell_malaria-rts3-bluesky_draw_:index Lydia Haile.csv.xz", + "stochastic-burden-est.202310gavi-7.Malaria_IC-Okell_malaria-rts3-rts4-bluesky_draw_:index Lydia Haile.csv.xz"), + index = 1:200) + +stoner::stone_stochastic_standardise( + group = "TKI-Penny", + in_path = file.path(base_in_path, "Malaria-TKI-Penny"), + out_path = file.path(base_out_path, "Malaria_TKI-Penny"), + scenarios = scenarios, + files = c("stochastic_burden_est_malaria_:index_novaccine Josephine Malinga.csv.xz", + "stochastic_burden_est_malaria_:index_rtss_d3_default Josephine Malinga.csv.xz", + "stochastic_burden_est_malaria_:index_rtss_d4_default Josephine Malinga.csv.xz", + "stochastic_burden_est_malaria_:index_rtss_d3_bluesky Josephine Malinga.csv.xz", + "stochastic_burden_est_malaria_:index_rtss_d4_bluesky Josephine Malinga.csv.xz"), + index = 1:31) diff --git a/scripts/process_stoch_202409malaria.R b/scripts/process_stoch_202409malaria.R new file mode 100644 index 0000000..c150f99 --- /dev/null +++ b/scripts/process_stoch_202409malaria.R @@ -0,0 +1,37 @@ +base_in_path <- "//wpia-hn2.hpc.dide.ic.ac.uk/vimc_stochastics_dropbox/latest/202409malaria-1" +base_out_path <- "//wpia-hn2.hpc.dide.ic.ac.uk/vimc_stochastics/202409malaria" + +scenarios = c("malaria_no_vaccination", "malaria-r3-r4-default", "malaria-rts3-rts4-default") + +stoner::stone_stochastic_standardise( + group = "IC-Okell", + in_path = file.path(base_in_path, "Malaria-IC-Okell"), + out_path = file.path(base_out_path, "Malaria_IC-Okell"), + scenarios = scenarios, + files = c("stochastic-burden-est-no-vaccination_:index Lydia Haile.csv.xz", + "stochastic-burden-est-malaria-r3-r4-default_:index Lydia Haile.csv.xz", + "stochastic-burden-est-malaria-rts3-rts4-default_:index Lydia Haile.csv.xz"), + index = 1:200 +) + +stoner::stone_stochastic_standardise( + group = "UAC-Kakai", + in_path = file.path(base_in_path, "Malaria-UAC-Kakai"), + out_path = file.path(base_out_path, "Malaria_UAC-Kakai"), + scenarios = scenarios, + files = c("Stochastic_Burden_Estimates_Glele_Kakai_No_Vaccine_:index.csv.xz", + "Stochastic_Burden_Estimates_Glele_Kakai_Default_r34_:index.csv.xz", + "Stochastic_Burden_Estimates_Glele_Kakai_Default_rts34_:index.csv.xz"), + index = 1:200 +) + +stoner::stone_stochastic_standardise( + group = "TKI-Penny", + in_path = file.path(base_in_path, "Malaria-TKI-Penny"), + out_path = file.path(base_out_path, "Malaria_TKI-Penny"), + scenarios = scenarios, + files = c("stochastic_burden_est_malaria_:index_novaccine Josephine Malinga.csv.xz", + "stochastic_burden_est_malaria_:index_r21_d4_default Josephine Malinga.csv.xz", + "stochastic_burden_est_malaria_:index_rtss_d4_default Josephine Malinga.csv.xz"), + index = 1:31 +) diff --git a/tests/testthat/test_stochastic_process.R b/tests/testthat/test_stochastic_process.R index ce2da5c..4a4d9dd 100644 --- a/tests/testthat/test_stochastic_process.R +++ b/tests/testthat/test_stochastic_process.R @@ -159,21 +159,18 @@ test_that("Bad arguments", { expect_error(stone_stochastic_process(test$con, "LAP-elf", "flu", "nevis-1", "pies", test$path, "non_exist:index.xz", - "", 1, 1, ".", outcomes = list(deaths = c("deaths", "deaths")), - bypass_cert_check = TRUE), + "", 1, 1, ".", deaths = c("deaths", "deaths"), bypass_cert_check = TRUE), "Duplicated outcome in deaths") expect_error(stone_stochastic_process(test$con, "LAP-elf", "flu", "nevis-1", "pies", test$path, "non_exist:index.xz", - "", 1, 1, ".", - outcomes = list(deaths = "deaths", cases = "cases", dalys = "piles_dalys"), + "", 1, 1, ".", deaths = "deaths", cases = "cases", dalys = "piles_dalys", bypass_cert_check = TRUE), "Outcomes not found, dalys \\('piles_dalys'\\)") expect_error(stone_stochastic_process(test$con, "LAP-elf", "flu", "nevis-1", "pies", test$path, "non_exist:index.xz", - "", 1, 1, ".", - outcomes = list(deaths = "deaths", cases = "cases", dalys = "dalys"), + "", 1, 1, ".", deaths = "deaths", cases = "cases", dalys = "dalys", runid_from_file = TRUE, bypass_cert_check = TRUE), "Must have index_start and index_end as 1..200 to imply run_id") @@ -367,7 +364,7 @@ stochastic_runner <- function(same_countries = TRUE, upload = FALSE, allow_new_database = TRUE, bypass_cert_check = TRUE, - dalys_recipe = NULL, + dalys_df = NULL, cert = "", pre_aggregation_path = NULL, lines = Inf, @@ -378,9 +375,9 @@ stochastic_runner <- function(same_countries = TRUE, res <- random_stoch_data(test, same_countries, simple_outcomes, single_file_per_scenario, include_run_id, - include_disease, !is.null(dalys_recipe)) + include_disease, !is.null(dalys_df)) - if (is.data.frame(dalys_recipe)) { + if (is.data.frame(dalys_df)) { fake_lifetable_db(test$con) } @@ -396,21 +393,17 @@ stochastic_runner <- function(same_countries = TRUE, index_end <- 200 } + deaths <- "deaths" + cases <- "cases" + dalys <- "dalys" if (!simple_outcomes) { - outcomes <- list( - deaths = c("deaths_acute", "deaths_chronic"), - cases = c("cases_acute", "cases_chronic"), - dalys = c("dalys_men", "dalys_pneumo")) - } else { - outcomes <- list( - deaths = "deaths", - cases = "cases", - dalys = "dalys") - + deaths <- c("deaths_acute", "deaths_chronic") + cases <- c("cases_acute", "cases_chronic") + dalys <- c("dalys_men", "dalys_pneumo") } - if (!is.null(dalys_recipe)) { - outcomes$dalys <- NULL + if (!is.null(dalys_df)) { + dalys <- dalys_df } stone_stochastic_process(test$con, "LAP-elf", "flu", "nevis-1", @@ -418,8 +411,7 @@ stochastic_runner <- function(same_countries = TRUE, cert = cert, index_start, index_end, test$path, pre_aggregation_path, - outcomes, - dalys_recipe, + deaths, cases, dalys, runid_from_file = !include_run_id, allow_missing_disease = !include_disease, upload_to_annex = upload, annex = test$con, @@ -656,13 +648,13 @@ fake_lifetable_db <- function(con) { } test_that("Stochastic - with DALYs", { - dalys_recipe <- data_frame( + dalys_df <- data_frame( outcome = c("cases_acute", "deaths_chronic"), proportion = c(0.1, 0.2), average_duration = c(20, 1000), disability_weight = c(0.4, 0.6)) - result <- stochastic_runner(upload = FALSE, dalys_recipe = dalys_recipe, + result <- stochastic_runner(upload = FALSE, dalys_df = dalys_df, simple_outcomes = FALSE) lt <- stoner_life_table(result$test$con, "nevis-1", 2000, 2100, TRUE) @@ -829,10 +821,10 @@ test_that("Stochastic - with DALYs", { # Hurrah. We can *finally* test DALYs. out <- tempfile(fileext = ".qs") - dat <- stoner_dalys_for_db(con, dalys_recipe, + dat <- stoner_dalys_for_db(con, dalys_df, burden_estimate_set_id = new_bes, output_file = out) - dat2 <- stoner_dalys_for_db(con, dalys_recipe, + dat2 <- stoner_dalys_for_db(con, dalys_df, "LAP-elf", "flu", "nevis-1", "pies", output_file = out) From a828f85842607404d0eb1a0b0ff615ae2d1e055f Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Wed, 18 Feb 2026 12:38:53 +0000 Subject: [PATCH 02/20] Add test and document --- DESCRIPTION | 4 +- NAMESPACE | 7 ++- R/stochastic_files.R | 25 ++++++---- R/stochastic_graphs.R | 64 ++++++++++++++++++++++++++ R/stochastic_process.R | 2 +- R/stochastic_upload.R | 1 - man/stone_stochastic_graph.Rd | 44 ++++++++++++++++++ man/stone_stochastic_process.Rd | 5 -- man/stone_stochastic_standardise.Rd | 60 ++++++++++++++++++++++++ tests/testthat/test_stochastic_files.R | 62 +++++++++++++++++++++++++ 10 files changed, 255 insertions(+), 19 deletions(-) create mode 100644 R/stochastic_graphs.R create mode 100644 man/stone_stochastic_graph.Rd create mode 100644 man/stone_stochastic_standardise.Rd create mode 100644 tests/testthat/test_stochastic_files.R diff --git a/DESCRIPTION b/DESCRIPTION index 2dbe8ad..0c4615c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: stoner Title: Support for Building VIMC Montagu Touchstones, using Dettl -Version: 0.1.17 +Version: 0.1.18 Authors@R: c(person("Wes", "Hinsley",role = c("aut", "cre", "cst", "dnc", "elg", "itr", "sng", "ard"), email = "w.hinsley@imperial.ac.uk"), @@ -28,7 +28,7 @@ Imports: utils, withr Language: en-GB -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.3 Roxygen: list(markdown = TRUE) Suggests: knitr, diff --git a/NAMESPACE b/NAMESPACE index d3fedf2..6aac553 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,15 +4,20 @@ export(stone_dump) export(stone_extract) export(stone_load) export(stone_stochastic_cert_verify) +export(stone_stochastic_graph) export(stone_stochastic_process) +export(stone_stochastic_standardise) export(stone_stochastic_upload) export(stone_test_extract) export(stone_test_transform) export(stone_transform) export(stoner_calculate_dalys) export(stoner_dalys_for_db) -import(data.table) +import(arrow) +import(dplyr) import(readr) +importFrom(data.table,as.data.table) +importFrom(data.table,rbindlist) importFrom(magrittr,"%>%") importFrom(utils,capture.output) importFrom(utils,read.csv) diff --git a/R/stochastic_files.R b/R/stochastic_files.R index f2b83e0..c910d36 100644 --- a/R/stochastic_files.R +++ b/R/stochastic_files.R @@ -5,7 +5,7 @@ ##' ##' @export ##' @title Standardise stochastic data files -##' @import data.table +##' @importFrom data.table rbindlist ##' @import arrow ##' @param group The modelling group. ##' @param in_path The folder or network path where the original files are found. @@ -20,15 +20,21 @@ ##' files must exist that match each entry in the `scenarios` parameter, and ##' the same file string can be used to match all of them (perhaps additionaly ##' wiht `:index`). -##' @params index This is usually a vector of ints, `1:200` to match the +##' @param index This is usually a vector of ints, `1:200` to match the ##' range of stochastic files uploaded per scenario. A few groups upload one ##' large file containing everything, in which case `:index` shouldn't occur ##' in the `files` parameter, and should be omitted. -##' @params rubella_fix Historically Rubella uploads used the burden outcome +##' @param rubella_fix Historically Rubella uploads used the burden outcome ##' `rubella_deaths_congenital` and `rubella_cases_congenital` instead of ##' the simpler `deaths` and `cases`, and additionally provided a ##' `rubella_infections` field. `rubella_fix` needs to be TRUE to standardise -##' these to the simpler names, +##' these to the simpler names. Processing Rubella stochastic files without +##' this set to TRUE will fail - so while we should always do this, keeping +##' the parameter makes it more clear in the code what we're doing and why. +##' @param missing_run_id_fix Some groups in the past have omitted run_id +##' from the files, but included them in the filenames. This fix inserts +##' that into the files if the index parameter indicates we have 200 runs to +##' process. stone_stochastic_standardise <- function( group, in_path, out_path, scenarios, files, index = 1, @@ -51,10 +57,11 @@ stone_stochastic_standardise <- function( d$country_name <- NULL # Fixes needed to standardise Rubella - - names(d)[names(d) == "rubella_deaths_congenital"] <- "deaths" - names(d)[names(d) == "rubella_cases_congenital"] <- "cases" - d$rubella_infections <- NULL + if (rubella_fix) { + names(d)[names(d) == "rubella_deaths_congenital"] <- "deaths" + names(d)[names(d) == "rubella_cases_congenital"] <- "cases" + d$rubella_infections <- NULL + } # Detect where run_id is missing, but in filenames @@ -83,6 +90,6 @@ stone_stochastic_standardise <- function( file <- sprintf("%s_%s_%s.pq", group, scenarios[i], country) arrow::write_parquet(d, file.path(out_path, file)) } - cat("Completed\n") + cat("\r Completed\n\n") } } diff --git a/R/stochastic_graphs.R b/R/stochastic_graphs.R new file mode 100644 index 0000000..70b7b9d --- /dev/null +++ b/R/stochastic_graphs.R @@ -0,0 +1,64 @@ +##' Draw a stochastic plot showing all the different runs, with the mean, +##' median, 5% and 95% quantiles shown. +##' +##' @export +##' @title Stochastic plot +##' @import dplyr +##' @import arrow +##' @param base The folder in which the standardised stochastic files are found. +##' @param touchstone The touchstone name (for the graph title) +##' @param disease The disease, used for building the filename and graph title. +##' @param group The modelling group, used in the filename and graph title. +##' @param country The country to plot. +##' @param scenario The scenario to plot. +##' @param ages A vector of one or more ages to be selected and aggregated, or +##' if left as NULL, then all ages are used and aggregated. +##' @param by_cohort If TRUE, then age is subtracted from year to convert it to +##' year of birth before aggregating. +##' @param log If TRUE, then use a logged y-axis. + +stone_stochastic_graph <- function(base, touchstone, disease, group, country, + scenario, outcome, ages = NULL, + by_cohort = FALSE, log = FALSE) { + + pq <- sprintf("%s/%s/%s_%s/%s_%s_%s.pq", base, touchstone, disease, + group, group, scenario, country) + + title <- sprintf("%s, %s, %s\n%s, %s\n %s", touchstone, disease, group, scenario, country, age_string) + log <- if (log) "y" else "n" + d <- arrow::read_parquet(pq) + if (!is.null(ages)) { + d <- d[d$age %in% ages, ] + } + if (by_cohort) { + d$year <- d$year - d$age + } + d <- d[, c("run_id", "year", "age", outcome)] + d <- d %>% group_by(run_id, year) %>% + summarise( + !!outcome := sum(.data[[outcome]], na.rm = TRUE), + .groups = "drop") + + miny <- max(1, min(d[[outcome]])) + maxy <- max(d[[outcome]]) + plot(ylab = outcome, xlab = if (by_cohort) "Birth Cohort" else "year", + x = d$year[d$run_id == 1], y = d[[outcome]][d$run_id == 1], type="l", + col = "#b0b0b0", ylim = c(miny, maxy), main = title, log = log) + for (i in 2:200) { + lines(x = d$year[d$run_id == i], y = d[[outcome]][d$run_id == i], + col = "#b0b0b0") + } + + avgs <- d %>% group_by(year) %>% + summarise( + mean = mean(.data[[outcome]]), + median = median(.data[[outcome]]), + q05 = quantile(.data[[outcome]], 0.05), + q95 = quantile(.data[[outcome]], 0.95), + .groups = "drop" + ) + lines(x = avgs$year, y = avgs$mean, col = "#ff4040", lwd = 2) + lines(x = avgs$year, y = avgs$median, col = "#00ff00", lwd = 2) + lines(x = avgs$year, y = avgs$q05, col = "#202020", lwd = 2) + lines(x = avgs$year, y = avgs$q95, col = "#202020", lwd = 2) +} diff --git a/R/stochastic_process.R b/R/stochastic_process.R index f71af58..6be8ebf 100644 --- a/R/stochastic_process.R +++ b/R/stochastic_process.R @@ -5,7 +5,7 @@ ##' ##' @export ##' @title Process stochastic data -##' @import data.table +##' @importFrom data.table as.data.table ##' @import readr ##' @importFrom utils write.csv ##' @param con DBI connection to production. Used for verifying certificate diff --git a/R/stochastic_upload.R b/R/stochastic_upload.R index a20f59a..8c00eb0 100644 --- a/R/stochastic_upload.R +++ b/R/stochastic_upload.R @@ -2,7 +2,6 @@ ##' the annex database. ##' @export ##' @title Upload stochastic data to annex -##' @import data.table ##' @import readr ##' @importFrom utils read.csv ##' @param file A qs or csv file generated by stone_stochastic_process diff --git a/man/stone_stochastic_graph.Rd b/man/stone_stochastic_graph.Rd new file mode 100644 index 0000000..0c81c0e --- /dev/null +++ b/man/stone_stochastic_graph.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stochastic_graphs.R +\name{stone_stochastic_graph} +\alias{stone_stochastic_graph} +\title{Stochastic plot} +\usage{ +stone_stochastic_graph( + base, + touchstone, + disease, + group, + country, + scenario, + outcome, + ages = NULL, + by_cohort = FALSE, + log = FALSE +) +} +\arguments{ +\item{base}{The folder in which the standardised stochastic files are found.} + +\item{touchstone}{The touchstone name (for the graph title)} + +\item{disease}{The disease, used for building the filename and graph title.} + +\item{group}{The modelling group, used in the filename and graph title.} + +\item{country}{The country to plot.} + +\item{scenario}{The scenario to plot.} + +\item{ages}{A vector of one or more ages to be selected and aggregated, or +if left as NULL, then all ages are used and aggregated.} + +\item{by_cohort}{If TRUE, then age is subtracted from year to convert it to +year of birth before aggregating.} + +\item{log}{If TRUE, then use a logged y-axis.} +} +\description{ +Draw a stochastic plot showing all the different runs, with the mean, +median, 5\% and 95\% quantiles shown. +} diff --git a/man/stone_stochastic_process.Rd b/man/stone_stochastic_process.Rd index 4ee4542..5ade88a 100644 --- a/man/stone_stochastic_process.Rd +++ b/man/stone_stochastic_process.Rd @@ -112,11 +112,6 @@ doing the full run.} If file exists it will be appended to, otherwise file will be created.} \item{silent}{TRUE to silence console logs.} - -\item{yll}{Added in 2023, the years of life lost indicator is more -helpful especially for covid burden analysis. Usually leaving as "yll" -is enough, but if it is the sum of other outcomes, provide these as a -string vector.} } \description{ Convert a modelling group's stochastic files into the summary format, diff --git a/man/stone_stochastic_standardise.Rd b/man/stone_stochastic_standardise.Rd new file mode 100644 index 0000000..343275f --- /dev/null +++ b/man/stone_stochastic_standardise.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stochastic_files.R +\name{stone_stochastic_standardise} +\alias{stone_stochastic_standardise} +\title{Standardise stochastic data files} +\usage{ +stone_stochastic_standardise( + group, + in_path, + out_path, + scenarios, + files, + index = 1, + rubella_fix = TRUE, + missing_run_id_fix = TRUE +) +} +\arguments{ +\item{group}{The modelling group.} + +\item{in_path}{The folder or network path where the original files are found.} + +\item{out_path}{The folder or network path to write output files to.} + +\item{scenarios}{A vector of strings giving each scenario name.} + +\item{files}{This can either be a vector of strings of the same length to +the vector of scenarios, in which case each entry should match the scenario, +providing the filename for the original uploads for that scenario. Most +groups provide files numbered between 1 and 200 for their stochastic runs; +replace that number with \verb{:index} in the filename to match. Alternatively, +files can be a single entry containing the string \verb{:scenario}; in this case, +files must exist that match each entry in the \code{scenarios} parameter, and +the same file string can be used to match all of them (perhaps additionaly +wiht \verb{:index}).} + +\item{index}{This is usually a vector of ints, \code{1:200} to match the +range of stochastic files uploaded per scenario. A few groups upload one +large file containing everything, in which case \verb{:index} shouldn't occur +in the \code{files} parameter, and should be omitted.} + +\item{rubella_fix}{Historically Rubella uploads used the burden outcome +\code{rubella_deaths_congenital} and \code{rubella_cases_congenital} instead of +the simpler \code{deaths} and \code{cases}, and additionally provided a +\code{rubella_infections} field. \code{rubella_fix} needs to be TRUE to standardise +these to the simpler names. Processing Rubella stochastic files without +this set to TRUE will fail - so while we should always do this, keeping +the parameter makes it more clear in the code what we're doing and why.} + +\item{missing_run_id_fix}{Some groups in the past have omitted run_id +from the files, but included them in the filenames. This fix inserts +that into the files if the index parameter indicates we have 200 runs to +process.} +} +\description{ +Convert a modelling group's stochastic files into an intermediate +format including all the original data, separated by country +and scenario, and in PQ format for later processing. As much as +possible, we'll try and detect the input formats. +} diff --git a/tests/testthat/test_stochastic_files.R b/tests/testthat/test_stochastic_files.R new file mode 100644 index 0000000..0e3738c --- /dev/null +++ b/tests/testthat/test_stochastic_files.R @@ -0,0 +1,62 @@ +context("stochastic_files") + +test_that("Standardise works with :scenario and :index", { + fake <- data.frame( + disease = "elf-piles", + country = "LAP", + year = rep(2000:2005, each = 5), + age = rep(0:5, 5), + cases = 1:30, + cohort_size = c(100, 200, 300, 400, 500, 600), + deaths = 31:60, + dalys = 61:90, + yll = 91:120, + run_id = 1) + fake2 <- fake + fake2$run_id <- 2 + fake <- rbind(fake, fake2) + + tmpin <- tempdir() + tmpout <- tempdir() + + # Clean-up folder if necessary... + + for (f in c("north_pole_optimistic_LAP.pq", + "north_pole_fatalistic_LAP.pq")) { + if (file.exists(file.path(tmpout, f))) { + file.remove(file.path(tmpout, f)) + } + } + + if (file.exists(file.path(tmpout, "north_pole_optimistic_LAP.pq"))) + tmpfile <- tempfile(tmpdir = tmpin) + write.csv(fake, paste0(tmpfile, "_optimistic_1"), row.names = FALSE) + write.csv(fake, paste0(tmpfile, "_fatalistic_1"), row.names = FALSE) + + fake$country <- "POL" + write.csv(fake, paste0(tmpfile, "_optimistic_2"), row.names = FALSE) + write.csv(fake, paste0(tmpfile, "_fatalistic_2"), row.names = FALSE) + + stone_stochastic_standardise( + group = "north_pole", + in_path = tmpin, + out_path = tmpout, + scenarios = c("optimistic", "fatalistic"), + files = paste0(basename(tmpfile), "_:scenario_:index"), + index = 1:2 + ) + + files <- list.files(path = tmpout) + expect_true("north_pole_optimistic_LAP.pq" %in% files) + expect_true("north_pole_optimistic_POL.pq" %in% files) + expect_true("north_pole_fatalistic_LAP.pq" %in% files) + expect_true("north_pole_fatalistic_POL.pq" %in% files) + + + pq <- arrow::read_parquet(file.path(tmpout, "north_pole_optimistic_LAP.pq")) + expect_true(unique(pq$country) == "LAP") + expect_true(all.equal(sort(unique(pq$run_id)), 1:2)) + expect_true(all.equal(sort(unique(pq$age)), 0:5)) + expect_true(all.equal(sort(unique(pq$year)), 2000:2005)) +}) + From d82ddab597561c52dc9f765b6de31296c28ebcd6 Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Wed, 18 Feb 2026 13:29:19 +0000 Subject: [PATCH 03/20] Update workflows --- .github/workflows/R-CMD-check.yaml | 77 +++++++++------------------- .github/workflows/pkgdown.yaml | 60 +++++++++++----------- .github/workflows/test-coverage.yaml | 70 +++++++++++-------------- 3 files changed, 83 insertions(+), 124 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 583fbf0..cacf0f1 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,14 +1,10 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: - - main - - master + branches: [main, master] pull_request: - branches: - - main - - master + branches: [main, master] name: R-CMD-check @@ -22,61 +18,34 @@ jobs: fail-fast: false matrix: config: - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + + # Shorter timeout to prevent mac builders hanging for 6 hours! + timeout-minutes: 30 env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 - - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true - - uses: r-lib/actions/setup-pandoc@v1 - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Cache R packages - if: runner.os != 'Windows' - uses: actions/cache@v2 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} + extra-packages: any::rcmdcheck + needs: check - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main + - uses: r-lib/actions/check-r-package@v2 with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check + upload-snapshots: true diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 803da57..fb6645f 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -1,48 +1,46 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: - - main - - master + branches: [main, master] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: name: pkgdown jobs: pkgdown: - runs-on: macOS-latest + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - uses: r-lib/actions/setup-r@master + - uses: r-lib/actions/setup-pandoc@v2 - - uses: r-lib/actions/setup-pandoc@master - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true - - name: Cache R packages - uses: actions/cache@v1 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + extra-packages: any::pkgdown, local::. + needs: website - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - install.packages("pkgdown") + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) shell: Rscript {0} - - name: Install package - run: R CMD INSTALL . - - - name: Deploy package - run: | - git config --local user.email "actions@github.com" - git config --local user.name "GitHub Actions" - Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.4.1 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 453ff10..d3f65be 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -1,58 +1,50 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: - - main - - master + branches: [main, master] pull_request: - branches: - - main - - master + branches: [main, master] name: test-coverage jobs: test-coverage: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - ORDERLYWEB_TEST_TOKEN: ${{ secrets.ORDERLYWEB_TEST_TOKEN }} - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v1 - - uses: r-lib/actions/setup-pandoc@v1 + steps: + - uses: actions/checkout@v4 - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true - - name: Cache R packages - uses: actions/cache@v2 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + extra-packages: any::covr + needs: coverage - - name: Install system dependencies - if: runner.os == 'Linux' + - name: Test coverage run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - sudo apt-get install libcurl4-openssl-dev + covr::codecov( + quiet = FALSE, + clean = FALSE, + install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") + ) + shell: Rscript {0} - - name: Install dependencies + - name: Show testthat output + if: always() run: | - install.packages(c("remotes")) - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("covr") - shell: Rscript {0} + ## -------------------------------------------------------------------- + find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash - - name: Test coverage - run: covr::codecov() - shell: Rscript {0} + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v3 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package From f14f29575738c9ec4ce9e8d81b8ef58060639c54 Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Wed, 18 Feb 2026 13:30:52 +0000 Subject: [PATCH 04/20] Update more workflows --- .github/workflows/test-coverage.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index d3f65be..07badcc 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -44,7 +44,7 @@ jobs: - name: Upload test results if: failure() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: coverage-test-failures path: ${{ runner.temp }}/package From 29fef6b0a1822ebb59e2aa854011008dc53c2a9e Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Wed, 18 Feb 2026 13:34:43 +0000 Subject: [PATCH 05/20] Fix qs dependency --- .github/workflows/R-CMD-check.yaml | 6 ++++++ .github/workflows/pkgdown.yaml | 6 ++++++ .github/workflows/test-coverage.yaml | 6 ++++++ 3 files changed, 18 insertions(+) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index cacf0f1..bf41f63 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -41,6 +41,12 @@ jobs: http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true + - name: Install system build tools + if: runner.os == 'Linux' + run: | + sudo apt-get update + sudo apt-get install -y build-essential libcurl4-openssl-dev libssl-dev libxml2-dev + - uses: r-lib/actions/setup-r-dependencies@v2 with: extra-packages: any::rcmdcheck diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index fb6645f..b8d93f4 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -28,6 +28,12 @@ jobs: with: use-public-rspm: true + - name: Install system build tools + if: runner.os == 'Linux' + run: | + sudo apt-get update + sudo apt-get install -y build-essential libcurl4-openssl-dev libssl-dev libxml2-dev + - uses: r-lib/actions/setup-r-dependencies@v2 with: extra-packages: any::pkgdown, local::. diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 07badcc..758480a 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -21,6 +21,12 @@ jobs: with: use-public-rspm: true + - name: Install system build tools + if: runner.os == 'Linux' + run: | + sudo apt-get update + sudo apt-get install -y build-essential libcurl4-openssl-dev libssl-dev libxml2-dev + - uses: r-lib/actions/setup-r-dependencies@v2 with: extra-packages: any::covr From 9fc8131471e365b3b8fdaa765e9df5f88dd0b50d Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Wed, 18 Feb 2026 13:41:33 +0000 Subject: [PATCH 06/20] Try again --- .github/workflows/R-CMD-check.yaml | 1 + .github/workflows/pkgdown.yaml | 1 + .github/workflows/test-coverage.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index bf41f63..ff73174 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -29,6 +29,7 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes + R_CRAN_MIRROR: https://cloud.r-project.org steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index b8d93f4..1c8c996 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -19,6 +19,7 @@ jobs: group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_CRAN_MIRROR: https://cloud.r-project.org steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 758480a..4b4ea12 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -13,6 +13,7 @@ jobs: runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_CRAN_MIRROR: https://cloud.r-project.org steps: - uses: actions/checkout@v4 From 8ff3c9450d3b8aca4c2a440e195cc7ac6014bc86 Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Wed, 18 Feb 2026 13:54:14 +0000 Subject: [PATCH 07/20] qs deprecated --- NAMESPACE | 1 + R/dalys.R | 3 ++- R/stochastic_process.R | 11 ++++++----- R/stochastic_upload.R | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 6aac553..32f5206 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(stoner_calculate_dalys) export(stoner_dalys_for_db) import(arrow) import(dplyr) +import(qs2) import(readr) importFrom(data.table,as.data.table) importFrom(data.table,rbindlist) diff --git a/R/dalys.R b/R/dalys.R index 4edf2ea..bddaeb3 100644 --- a/R/dalys.R +++ b/R/dalys.R @@ -8,6 +8,7 @@ ##' ##' @export ##' @title Calculating DALYs for stochastic or central estimates +##' @import qs2 ##' @param con DBI connection to a Montagu database. Used for retrieving ##' demographic data for life expectancy. ##' @param touchstone The touchstone (including version); the demographic @@ -360,7 +361,7 @@ stoner_dalys_for_db <- function(con, dalys_params, modelling_group = NULL, # Optionally write a CSV file out. if (!is.null(output_file)) { - qs::qsave(as.data.frame(data_dalys$data), output_file) + qs2::qs_save(as.data.frame(data_dalys$data), output_file) } data_dalys diff --git a/R/stochastic_process.R b/R/stochastic_process.R index 6be8ebf..3a43455 100644 --- a/R/stochastic_process.R +++ b/R/stochastic_process.R @@ -7,6 +7,7 @@ ##' @title Process stochastic data ##' @importFrom data.table as.data.table ##' @import readr +##' @import qs2 ##' @importFrom utils write.csv ##' @param con DBI connection to production. Used for verifying certificate ##' against expected properties @@ -438,7 +439,7 @@ write_pre_aggregated_to_disk <- function(data, touchpoint, touchpoint$disease, country)) data <- as.data.frame(data) - qs::qsave(data[data$country == country, ], path) + qs2::qs_save(data[data$country == country, ], path) }, "Saved %s size %s", path, prettyunits::pretty_bytes(file.size(path))) } invisible(TRUE) @@ -448,7 +449,7 @@ write_pre_aggregated_to_disk <- function(data, touchpoint, write_output_to_disk <- function(output, out_path, modelling_group, disease) { all_u5_cal_file <- file.path(out_path, sprintf("%s_%s_calendar_u5.qs", modelling_group, disease)) - timed(qs::qsave(x = as.data.frame(output$u5_calendar_year), + timed(qs2::qs_save(x = as.data.frame(output$u5_calendar_year), file = all_u5_cal_file), "Saved %s size %s", all_u5_cal_file, prettyunits::pretty_bytes(file.size(all_u5_cal_file))) @@ -456,21 +457,21 @@ write_output_to_disk <- function(output, out_path, modelling_group, disease) { all_cal_file <- file.path(out_path, sprintf("%s_%s_calendar.qs", modelling_group, disease)) - timed(qs::qsave(x = as.data.frame(output$all_calendar_year), + timed(qs2::qs_save(x = as.data.frame(output$all_calendar_year), file = all_cal_file), "Saved %s size %s", all_u5_cal_file, prettyunits::pretty_bytes(file.size(all_u5_cal_file))) all_u5_coh_file <- file.path(out_path, sprintf("%s_%s_cohort_u5.qs", modelling_group, disease)) - timed(qs::qsave(x = as.data.frame(output$u5_cohort), + timed(qs2::qs_save(x = as.data.frame(output$u5_cohort), file = all_u5_coh_file), "Saved %s size %s", all_u5_cal_file, prettyunits::pretty_bytes(file.size(all_u5_cal_file))) all_coh_file <- file.path(out_path, sprintf("%s_%s_cohort.qs", modelling_group, disease)) - timed(qs::qsave(x = as.data.frame(output$all_cohort), + timed(qs2::qs_save(x = as.data.frame(output$all_cohort), file = all_coh_file), "Saved %s size %s", all_u5_cal_file, prettyunits::pretty_bytes(file.size(all_u5_cal_file))) diff --git a/R/stochastic_upload.R b/R/stochastic_upload.R index 8c00eb0..05d7f57 100644 --- a/R/stochastic_upload.R +++ b/R/stochastic_upload.R @@ -194,7 +194,7 @@ read_stochastic_qs <- function(file, is_cohort) { ## Read data message(sprintf("Reading %s", file)) - data <- qs::qread(file) + data <- qs2::qs_read(file) if (anyNA(match(expected_cols, colnames(data)))) { stop("Columns in qs file not as expected") } From b37faec474b753a748bbad45ec7cffdf5ef921cf Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Wed, 18 Feb 2026 13:56:15 +0000 Subject: [PATCH 08/20] Update DESCRIPTION for packages --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0c4615c..7be52b7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,11 +16,12 @@ License: MIT + file LICENSE Encoding: UTF-8 LazyData: true Imports: + arrow, DBI, data.table, jsonlite, lgr, - qs, + qs2, dplyr, magrittr, prettyunits, From 192e3447daa02a689b9109d59e8fbaae4ee73c31 Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Wed, 18 Feb 2026 14:05:54 +0000 Subject: [PATCH 09/20] Fix more imports --- NAMESPACE | 5 +++++ R/prune.R | 9 +++++---- R/stochastic_graphs.R | 2 ++ R/stochastic_process.R | 4 ++-- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 32f5206..7a14856 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,7 +19,12 @@ import(qs2) import(readr) importFrom(data.table,as.data.table) importFrom(data.table,rbindlist) +importFrom(graphics,lines) importFrom(magrittr,"%>%") +importFrom(stats,median) +importFrom(stats,quantile) +importFrom(testthat,expect_equal) +importFrom(testthat,expect_true) importFrom(utils,capture.output) importFrom(utils,read.csv) importFrom(utils,write.csv) diff --git a/R/prune.R b/R/prune.R index 59077df..ca03fc4 100644 --- a/R/prune.R +++ b/R/prune.R @@ -1,4 +1,5 @@ ##' @importFrom utils capture.output +##' @importFrom testthat expect_true expect_equal extract_prune <- function(e, path, con) { @@ -142,10 +143,10 @@ test_extract_prune <- function(e) { # bes <- e$burden_estimate_set - expect_true("id" %in% names(bes)) - expect_true("responsibility" %in% names(bes))# - expect_equal(0, sum(is.na(bes$id))) - expect_equal(0, sum(is.na(bes$responsibility))) + testthat::expect_true("id" %in% names(bes)) + testthat::expect_true("responsibility" %in% names(bes))# + testthat::expect_equal(0, sum(is.na(bes$id))) + testthat::expect_equal(0, sum(is.na(bes$responsibility))) } diff --git a/R/stochastic_graphs.R b/R/stochastic_graphs.R index 70b7b9d..a4de81f 100644 --- a/R/stochastic_graphs.R +++ b/R/stochastic_graphs.R @@ -5,6 +5,8 @@ ##' @title Stochastic plot ##' @import dplyr ##' @import arrow +##' @importFrom graphics lines +##' @importFrom stats quantile median ##' @param base The folder in which the standardised stochastic files are found. ##' @param touchstone The touchstone name (for the graph title) ##' @param disease The disease, used for building the filename and graph title. diff --git a/R/stochastic_process.R b/R/stochastic_process.R index 3a43455..4fa2646 100644 --- a/R/stochastic_process.R +++ b/R/stochastic_process.R @@ -317,9 +317,9 @@ aggregate_data <- function(scenario_data) { scen_coh <- agg_and_sort(scenario_data) scen_u5_coh <- scen_u5_coh %>% - dplyr::rename(cohort = year) + dplyr::rename(cohort = "year") scen_coh <- scen_coh %>% - dplyr::rename(cohort = year) + dplyr::rename(cohort = "year") list( u5_calendar_year = scen_u5_cal, From 05fc181611a0bc8c0eff5ba5ad6a453b83af396d Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Wed, 18 Feb 2026 16:18:38 +0000 Subject: [PATCH 10/20] Fix tests locally --- R/stochastic_process.R | 10 ++++---- tests/testthat/test_stochastic_files.R | 1 - tests/testthat/test_stochastic_process.R | 32 +++++++++++++----------- tests/testthat/test_stochastic_upload.R | 2 +- 4 files changed, 23 insertions(+), 22 deletions(-) diff --git a/R/stochastic_process.R b/R/stochastic_process.R index 4fa2646..6a84508 100644 --- a/R/stochastic_process.R +++ b/R/stochastic_process.R @@ -34,7 +34,7 @@ ##' @param out_path Path to writing output files into ##' @param pre_aggregation_path Path to dir to write out pre age-disaggregated ##' data into. If NULL then this is skipped. -##' @param outcomes A list of names vectors, where the name is the burden +##' @param outcomes A list of named vectors, where the name is the burden ##' outcome, and the elements of the list are the column names in the ##' stochastic files that should be summed to compute that outcome. The ##' default is to expect outcomes `deaths`, `cases`, `dalys`, and `yll`, @@ -449,7 +449,7 @@ write_pre_aggregated_to_disk <- function(data, touchpoint, write_output_to_disk <- function(output, out_path, modelling_group, disease) { all_u5_cal_file <- file.path(out_path, sprintf("%s_%s_calendar_u5.qs", modelling_group, disease)) - timed(qs2::qs_save(x = as.data.frame(output$u5_calendar_year), + timed(qs2::qs_save(as.data.frame(output$u5_calendar_year), file = all_u5_cal_file), "Saved %s size %s", all_u5_cal_file, prettyunits::pretty_bytes(file.size(all_u5_cal_file))) @@ -457,21 +457,21 @@ write_output_to_disk <- function(output, out_path, modelling_group, disease) { all_cal_file <- file.path(out_path, sprintf("%s_%s_calendar.qs", modelling_group, disease)) - timed(qs2::qs_save(x = as.data.frame(output$all_calendar_year), + timed(qs2::qs_save(as.data.frame(output$all_calendar_year), file = all_cal_file), "Saved %s size %s", all_u5_cal_file, prettyunits::pretty_bytes(file.size(all_u5_cal_file))) all_u5_coh_file <- file.path(out_path, sprintf("%s_%s_cohort_u5.qs", modelling_group, disease)) - timed(qs2::qs_save(x = as.data.frame(output$u5_cohort), + timed(qs2::qs_save(as.data.frame(output$u5_cohort), file = all_u5_coh_file), "Saved %s size %s", all_u5_cal_file, prettyunits::pretty_bytes(file.size(all_u5_cal_file))) all_coh_file <- file.path(out_path, sprintf("%s_%s_cohort.qs", modelling_group, disease)) - timed(qs2::qs_save(x = as.data.frame(output$all_cohort), + timed(qs2::qs_save(as.data.frame(output$all_cohort), file = all_coh_file), "Saved %s size %s", all_u5_cal_file, prettyunits::pretty_bytes(file.size(all_u5_cal_file))) diff --git a/tests/testthat/test_stochastic_files.R b/tests/testthat/test_stochastic_files.R index 0e3738c..9d0a0d8 100644 --- a/tests/testthat/test_stochastic_files.R +++ b/tests/testthat/test_stochastic_files.R @@ -28,7 +28,6 @@ test_that("Standardise works with :scenario and :index", { } } - if (file.exists(file.path(tmpout, "north_pole_optimistic_LAP.pq"))) tmpfile <- tempfile(tmpdir = tmpin) write.csv(fake, paste0(tmpfile, "_optimistic_1"), row.names = FALSE) write.csv(fake, paste0(tmpfile, "_fatalistic_1"), row.names = FALSE) diff --git a/tests/testthat/test_stochastic_process.R b/tests/testthat/test_stochastic_process.R index 4a4d9dd..67e31a9 100644 --- a/tests/testthat/test_stochastic_process.R +++ b/tests/testthat/test_stochastic_process.R @@ -159,18 +159,18 @@ test_that("Bad arguments", { expect_error(stone_stochastic_process(test$con, "LAP-elf", "flu", "nevis-1", "pies", test$path, "non_exist:index.xz", - "", 1, 1, ".", deaths = c("deaths", "deaths"), bypass_cert_check = TRUE), + "", 1, 1, ".", outcomes = list(deaths = c("deaths", "deaths")), bypass_cert_check = TRUE), "Duplicated outcome in deaths") expect_error(stone_stochastic_process(test$con, "LAP-elf", "flu", "nevis-1", "pies", test$path, "non_exist:index.xz", - "", 1, 1, ".", deaths = "deaths", cases = "cases", dalys = "piles_dalys", + "", 1, 1, ".", outcomes = list(deaths = "deaths", cases = "cases", dalys = "piles_dalys"), bypass_cert_check = TRUE), "Outcomes not found, dalys \\('piles_dalys'\\)") expect_error(stone_stochastic_process(test$con, "LAP-elf", "flu", "nevis-1", "pies", test$path, "non_exist:index.xz", - "", 1, 1, ".", deaths = "deaths", cases = "cases", dalys = "dalys", + "", 1, 1, ".", outcomes = list(deaths = "deaths", cases = "cases", dalys = "dalys"), runid_from_file = TRUE, bypass_cert_check = TRUE), "Must have index_start and index_end as 1..200 to imply run_id") @@ -392,7 +392,6 @@ stochastic_runner <- function(same_countries = TRUE, index_start <- 1 index_end <- 200 } - deaths <- "deaths" cases <- "cases" dalys <- "dalys" @@ -402,8 +401,10 @@ stochastic_runner <- function(same_countries = TRUE, dalys <- c("dalys_men", "dalys_pneumo") } - if (!is.null(dalys_df)) { - dalys <- dalys_df + if (is.data.frame(dalys_df)) { + outcomes <- list(deaths = deaths, cases = cases) + } else { + outcomes <- list(deaths = deaths, cases = cases, dalys = dalys) } stone_stochastic_process(test$con, "LAP-elf", "flu", "nevis-1", @@ -411,7 +412,8 @@ stochastic_runner <- function(same_countries = TRUE, cert = cert, index_start, index_end, test$path, pre_aggregation_path, - deaths, cases, dalys, + outcomes = outcomes, + dalys_recipe = dalys_df, runid_from_file = !include_run_id, allow_missing_disease = !include_disease, upload_to_annex = upload, annex = test$con, @@ -425,10 +427,10 @@ stochastic_runner <- function(same_countries = TRUE, test = test, raw = res$raw, data = res$data, - cal = qs::qread(file.path(test$path, "LAP-elf_flu_calendar.qs")), - cal_u5 = qs::qread(file.path(test$path, "LAP-elf_flu_calendar_u5.qs")), - coh = qs::qread(file.path(test$path, "LAP-elf_flu_cohort.qs")), - coh_u5 = qs::qread(file.path(test$path, "LAP-elf_flu_cohort_u5.qs")) + cal = qs2::qs_read(file.path(test$path, "LAP-elf_flu_calendar.qs")), + cal_u5 = qs2::qs_read(file.path(test$path, "LAP-elf_flu_calendar_u5.qs")), + coh = qs2::qs_read(file.path(test$path, "LAP-elf_flu_cohort.qs")), + coh_u5 = qs2::qs_read(file.path(test$path, "LAP-elf_flu_cohort_u5.qs")) ) } @@ -544,7 +546,7 @@ test_that("Stochastic - with upload", { result$cal$deaths_pies <- round(result$cal$deaths_pies / 2) new_qs_file <- tempfile(fileext = ".qs") - qs::qsave(x = result$cal, file = new_qs_file) + qs2::qs_save(result$cal, file = new_qs_file) stone_stochastic_upload(new_qs_file, result$test$con, result$test$con, "LAP-elf", "flu", "nevis-1", is_cohort = FALSE, @@ -828,7 +830,7 @@ test_that("Stochastic - with DALYs", { "LAP-elf", "flu", "nevis-1", "pies", output_file = out) - df <- qs::qread(out) + df <- qs2::qs_read(out) expect_identical(dat, dat2) expect_equal(dat$data$dalys, df$dalys) @@ -853,7 +855,7 @@ test_that("preaggregated data can be saved to disk", { expect_setequal(files, c("LAP-elf_flu_4_pre_aggregation.qs", "LAP-elf_flu_716_pre_aggregation.qs")) - country_4 <- qs::qread(file.path(t, "LAP-elf_flu_4_pre_aggregation.qs")) + country_4 <- qs2::qs_read(file.path(t, "LAP-elf_flu_4_pre_aggregation.qs")) expect_setequal(colnames(country_4), c("country", "year", "run_id", "age", "deaths_pies", "cases_pies", "dalys_pies", "deaths_hot_chocolate", @@ -861,7 +863,7 @@ test_that("preaggregated data can be saved to disk", { "deaths_holly", "cases_holly", "dalys_holly")) expect_true(all(country_4$country == 4)) - country_716 <- qs::qread(file.path(t, "LAP-elf_flu_716_pre_aggregation.qs")) + country_716 <- qs2::qs_read(file.path(t, "LAP-elf_flu_716_pre_aggregation.qs")) expect_setequal(colnames(country_716), c("country", "year", "run_id", "age", "deaths_pies", "cases_pies", "dalys_pies", "deaths_hot_chocolate", diff --git a/tests/testthat/test_stochastic_upload.R b/tests/testthat/test_stochastic_upload.R index 352c259..b513190 100644 --- a/tests/testthat/test_stochastic_upload.R +++ b/tests/testthat/test_stochastic_upload.R @@ -14,7 +14,7 @@ test_that("Bad arguments", { "file(.*)exists(.*)") new_file <- tempfile(fileext = ".qs") - qs::qsave(x = mtcars, file = new_file) + qs2::qs_save(mtcars, file = new_file) expect_error(stone_stochastic_upload(new_file, test$con, test$con, "Rudolph"), "Unknown modelling group: Rudolph") From fe7afdb88c5455c1e10670f04eb6ff03b5d7bcbd Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Wed, 18 Feb 2026 23:48:15 +0000 Subject: [PATCH 11/20] depend on testthat --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7be52b7..d0e6a92 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,6 +26,7 @@ Imports: magrittr, prettyunits, readr, + testthat, utils, withr Language: en-GB @@ -35,6 +36,5 @@ Suggests: knitr, rcmdcheck, rmarkdown, - RPostgres, - testthat + RPostgres VignetteBuilder: knitr From ecaf320b181b49066d13905940c4e637d3c62e56 Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Thu, 19 Feb 2026 11:07:46 +0000 Subject: [PATCH 12/20] Fix missing doc --- R/stochastic_graphs.R | 6 ++++-- man/stone_stochastic_graph.Rd | 3 +++ man/stone_stochastic_process.Rd | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/R/stochastic_graphs.R b/R/stochastic_graphs.R index a4de81f..d09a65b 100644 --- a/R/stochastic_graphs.R +++ b/R/stochastic_graphs.R @@ -13,6 +13,8 @@ ##' @param group The modelling group, used in the filename and graph title. ##' @param country The country to plot. ##' @param scenario The scenario to plot. +##' @param outcome The outcome to plot, for example `deaths`, `cases`, `dalys` or +##' since 2023, `yll`. ##' @param ages A vector of one or more ages to be selected and aggregated, or ##' if left as NULL, then all ages are used and aggregated. ##' @param by_cohort If TRUE, then age is subtracted from year to convert it to @@ -26,8 +28,8 @@ stone_stochastic_graph <- function(base, touchstone, disease, group, country, pq <- sprintf("%s/%s/%s_%s/%s_%s_%s.pq", base, touchstone, disease, group, group, scenario, country) - title <- sprintf("%s, %s, %s\n%s, %s\n %s", touchstone, disease, group, scenario, country, age_string) - log <- if (log) "y" else "n" + title <- sprintf("%s, %s, %s\n%s, %s\n", touchstone, disease, group, scenario, country) + log <- if (log) "y" else "" d <- arrow::read_parquet(pq) if (!is.null(ages)) { d <- d[d$age %in% ages, ] diff --git a/man/stone_stochastic_graph.Rd b/man/stone_stochastic_graph.Rd index 0c81c0e..9fa15c9 100644 --- a/man/stone_stochastic_graph.Rd +++ b/man/stone_stochastic_graph.Rd @@ -30,6 +30,9 @@ stone_stochastic_graph( \item{scenario}{The scenario to plot.} +\item{outcome}{The outcome to plot, for example \code{deaths}, \code{cases}, \code{dalys} or +since 2023, \code{yll}.} + \item{ages}{A vector of one or more ages to be selected and aggregated, or if left as NULL, then all ages are used and aggregated.} diff --git a/man/stone_stochastic_process.Rd b/man/stone_stochastic_process.Rd index 5ade88a..e226d1f 100644 --- a/man/stone_stochastic_process.Rd +++ b/man/stone_stochastic_process.Rd @@ -69,7 +69,7 @@ in each case.} \item{pre_aggregation_path}{Path to dir to write out pre age-disaggregated data into. If NULL then this is skipped.} -\item{outcomes}{A list of names vectors, where the name is the burden +\item{outcomes}{A list of named vectors, where the name is the burden outcome, and the elements of the list are the column names in the stochastic files that should be summed to compute that outcome. The default is to expect outcomes \code{deaths}, \code{cases}, \code{dalys}, and \code{yll}, From 1eea7c4304cf008ed2d17479eedb98a0266ff488 Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Thu, 19 Feb 2026 11:27:20 +0000 Subject: [PATCH 13/20] More import/doc fix --- DESCRIPTION | 1 + NAMESPACE | 1 + R/stochastic_graphs.R | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d0e6a92..53bd765 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,6 +26,7 @@ Imports: magrittr, prettyunits, readr, + rlang, testthat, utils, withr diff --git a/NAMESPACE b/NAMESPACE index 7a14856..26f808d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,6 +21,7 @@ importFrom(data.table,as.data.table) importFrom(data.table,rbindlist) importFrom(graphics,lines) importFrom(magrittr,"%>%") +importFrom(rlang,":=") importFrom(stats,median) importFrom(stats,quantile) importFrom(testthat,expect_equal) diff --git a/R/stochastic_graphs.R b/R/stochastic_graphs.R index d09a65b..f90544b 100644 --- a/R/stochastic_graphs.R +++ b/R/stochastic_graphs.R @@ -4,6 +4,7 @@ ##' @export ##' @title Stochastic plot ##' @import dplyr +##' @importFrom rlang := ##' @import arrow ##' @importFrom graphics lines ##' @importFrom stats quantile median @@ -38,7 +39,7 @@ stone_stochastic_graph <- function(base, touchstone, disease, group, country, d$year <- d$year - d$age } d <- d[, c("run_id", "year", "age", outcome)] - d <- d %>% group_by(run_id, year) %>% + d <- d %>% group_by(.data$run_id, .data$year) %>% summarise( !!outcome := sum(.data[[outcome]], na.rm = TRUE), .groups = "drop") From 22096f74dd24a4be0688ba1e78ae7ee4df3ae27b Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Thu, 19 Feb 2026 11:43:25 +0000 Subject: [PATCH 14/20] Fix last note --- R/stochastic_graphs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/stochastic_graphs.R b/R/stochastic_graphs.R index f90544b..6f7d9ab 100644 --- a/R/stochastic_graphs.R +++ b/R/stochastic_graphs.R @@ -54,7 +54,7 @@ stone_stochastic_graph <- function(base, touchstone, disease, group, country, col = "#b0b0b0") } - avgs <- d %>% group_by(year) %>% + avgs <- d %>% group_by(.data$year) %>% summarise( mean = mean(.data[[outcome]]), median = median(.data[[outcome]]), From 02fc7dc57e48a71a0bd3481e7dbe0878076b9aa6 Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Thu, 19 Feb 2026 12:07:55 +0000 Subject: [PATCH 15/20] Updated msgs for latest testthat --- tests/testthat/test_touchstone.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test_touchstone.R b/tests/testthat/test_touchstone.R index b457e08..f91e840 100644 --- a/tests/testthat/test_touchstone.R +++ b/tests/testthat/test_touchstone.R @@ -149,7 +149,7 @@ test_that("touchstone CSV invalid", { create_touchstone_csv(test$path, "nevis", 1) mess_with(test$path, "touchstone.csv", "haggis", 1, "yummy") expect_error(do_test(test), - "Correct columns in touchstone.csv not equal to (.*)", + "Expected Correct columns in touchstone.csv to equal (.*)", class = "expectation_failure") }) @@ -158,7 +158,7 @@ test_that("touchstone_name CSV invalid", { create_touchstone_name_csv(test$path, "nevis", 1) mess_with(test$path, "touchstone_name.csv", "pie_balm", 1, "clogg_banting") expect_error(do_test(test), - "Correct columns in touchstone_name.csv not equal to (.*)", + "Expected Correct columns in touchstone_name.csv to equal (.*)", class = "expectation_failure") }) From eb36ab4cb124a2810673e55d75a3ccaf932cd74d Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Thu, 19 Feb 2026 12:24:06 +0000 Subject: [PATCH 16/20] Remove MacOS/Win from CI - we never used to --- .github/workflows/R-CMD-check.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index ff73174..35a4649 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -18,8 +18,6 @@ jobs: fail-fast: false matrix: config: - - {os: macos-latest, r: 'release'} - - {os: windows-latest, r: 'release'} - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} From 3b9a9559648ce0d424fce800dbeb95dabff635e4 Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Thu, 19 Feb 2026 16:57:15 +0000 Subject: [PATCH 17/20] Tests on stochastic_graphs --- R/stochastic_graphs.R | 51 +++++++++++------- tests/testthat/test_stochastic_files.R | 43 +++++++++++++-- tests/testthat/test_stochastic_graphs.R | 69 +++++++++++++++++++++++++ 3 files changed, 142 insertions(+), 21 deletions(-) create mode 100644 tests/testthat/test_stochastic_graphs.R diff --git a/R/stochastic_graphs.R b/R/stochastic_graphs.R index 6f7d9ab..eee7184 100644 --- a/R/stochastic_graphs.R +++ b/R/stochastic_graphs.R @@ -26,32 +26,24 @@ stone_stochastic_graph <- function(base, touchstone, disease, group, country, scenario, outcome, ages = NULL, by_cohort = FALSE, log = FALSE) { - pq <- sprintf("%s/%s/%s_%s/%s_%s_%s.pq", base, touchstone, disease, - group, group, scenario, country) + d <- prepare_graph_data(base, touchstone, disease, group, country, + scenario, outcome, ages, by_cohort) - title <- sprintf("%s, %s, %s\n%s, %s\n", touchstone, disease, group, scenario, country) - log <- if (log) "y" else "" - d <- arrow::read_parquet(pq) - if (!is.null(ages)) { - d <- d[d$age %in% ages, ] - } - if (by_cohort) { - d$year <- d$year - d$age - } - d <- d[, c("run_id", "year", "age", outcome)] - d <- d %>% group_by(.data$run_id, .data$year) %>% - summarise( - !!outcome := sum(.data[[outcome]], na.rm = TRUE), - .groups = "drop") + title <- sprintf("%s, %s, %s\n%s, %s\n", touchstone, disease, group, + scenario, country) + runs <- max(d$run_id) miny <- max(1, min(d[[outcome]])) maxy <- max(d[[outcome]]) + log <- if (log) "y" else "" + plot(ylab = outcome, xlab = if (by_cohort) "Birth Cohort" else "year", x = d$year[d$run_id == 1], y = d[[outcome]][d$run_id == 1], type="l", col = "#b0b0b0", ylim = c(miny, maxy), main = title, log = log) - for (i in 2:200) { + + for (i in 2:runs) { lines(x = d$year[d$run_id == i], y = d[[outcome]][d$run_id == i], - col = "#b0b0b0") + col = "#b0b0b0") } avgs <- d %>% group_by(.data$year) %>% @@ -67,3 +59,26 @@ stone_stochastic_graph <- function(base, touchstone, disease, group, country, lines(x = avgs$year, y = avgs$q05, col = "#202020", lwd = 2) lines(x = avgs$year, y = avgs$q95, col = "#202020", lwd = 2) } + + +prepare_graph_data <- function(base, touchstone, disease, group, country, + scenario, outcome, ages, by_cohort) { + + pq <- sprintf("%s/%s/%s_%s/%s_%s_%s.pq", base, touchstone, disease, + group, group, scenario, country) + + d <- arrow::read_parquet(pq) + if (!is.null(ages)) { + d <- d[d$age %in% ages, ] + } + if (by_cohort) { + d$year <- d$year - d$age + } + d <- d[, c("run_id", "year", "age", outcome)] + d <- d %>% group_by(.data$run_id, .data$year) %>% + summarise( + !!outcome := sum(.data[[outcome]], na.rm = TRUE), + .groups = "drop") + d +} + diff --git a/tests/testthat/test_stochastic_files.R b/tests/testthat/test_stochastic_files.R index 9d0a0d8..153b3fb 100644 --- a/tests/testthat/test_stochastic_files.R +++ b/tests/testthat/test_stochastic_files.R @@ -1,7 +1,7 @@ context("stochastic_files") -test_that("Standardise works with :scenario and :index", { - fake <- data.frame( +fake_data <- function() { + data.frame( disease = "elf-piles", country = "LAP", year = rep(2000:2005, each = 5), @@ -12,6 +12,10 @@ test_that("Standardise works with :scenario and :index", { dalys = 61:90, yll = 91:120, run_id = 1) +} + +test_that("Standardise works with :scenario and :index", { + fake <- fake_data() fake2 <- fake fake2$run_id <- 2 fake <- rbind(fake, fake2) @@ -51,7 +55,6 @@ test_that("Standardise works with :scenario and :index", { expect_true("north_pole_fatalistic_LAP.pq" %in% files) expect_true("north_pole_fatalistic_POL.pq" %in% files) - pq <- arrow::read_parquet(file.path(tmpout, "north_pole_optimistic_LAP.pq")) expect_true(unique(pq$country) == "LAP") expect_true(all.equal(sort(unique(pq$run_id)), 1:2)) @@ -59,3 +62,37 @@ test_that("Standardise works with :scenario and :index", { expect_true(all.equal(sort(unique(pq$year)), 2000:2005)) }) + +test_that("Standardise works with missing run_id column", { + fake <- fake_data() + fake$run_id <- NULL + + tmpin <- tempdir() + tmpout <- tempdir() + tmpfile <- tempfile(tmpdir = tmpin) + for (i in 1:200) { + write.csv(fake, paste0(tmpfile, sprintf("_opt_%d", i)), row.names = FALSE) + write.csv(fake, paste0(tmpfile, sprintf("_fat_%d", i)), row.names = FALSE) + } + + stone_stochastic_standardise( + group = "north_pole", + in_path = tmpin, + out_path = tmpout, + scenarios = c("opt", "fat"), + files = paste0(basename(tmpfile), "_:scenario_:index"), + index = 1:200 + ) + + files <- list.files(path = tmpout) + expect_true("north_pole_fat_LAP.pq" %in% files) + expect_true("north_pole_opt_LAP.pq" %in% files) + + pq <- arrow::read_parquet(file.path(tmpout, "north_pole_fat_LAP.pq")) + tab <- table(pq$run_id) + expect_all_equal(as.integer(tab), 30) + expect_equal(length(tab), 200) + expect_true(all.equal(names(tab), as.character(1:200))) +}) + + diff --git a/tests/testthat/test_stochastic_graphs.R b/tests/testthat/test_stochastic_graphs.R new file mode 100644 index 0000000..243d3d3 --- /dev/null +++ b/tests/testthat/test_stochastic_graphs.R @@ -0,0 +1,69 @@ +context("stochastic_graphs") + +# Not a great amount of testing we can do here, without analysing +# the plot somehow. + +test_that("stochastic_graph data transforms", { + + base <- tempdir() + touchstone <- "t" + disease <- "d" + group <- "elf" + scenario <- "opt" + country <- "LAP" + folder <- file.path(base, touchstone, paste0(disease, "_", group)) + filename <- paste0(group,"_", scenario, "_", country, ".pq") + dir.create(folder, showWarnings = FALSE, recursive = TRUE) + + data <- data.frame(year = rep(2000:2004, each = 5), + age = rep(10:14, 5), run_id = 1) + data$deaths <- seq_len(nrow(data)) + orig <- data + for (i in 2:5) { + d <- orig + d$run_id <- i + d$deaths <- d$deaths * i + data <- rbind(data, d) + } + f <- file.path(folder, filename) + if (file.exists(f)) file.remove(f) + arrow::write_parquet(data, file.path(folder, filename)) + + # Aggregate all ages, not by cohort. Should have 1 point per year. + + res <- prepare_graph_data(base, touchstone, disease, group, country, + scenario, "deaths", NULL, FALSE) + + expect_equal(nrow(res), 25) # 5 runs, 5 years + expect_equal(res$deaths[res$run_id == 1 & res$year == 2002], + sum(data$deaths[data$year == 2002 & data$run_id == 1])) + + # Select ages + + res <- prepare_graph_data(base, touchstone, disease, group, country, + scenario, "deaths", c(10, 12, 14), FALSE) + + expect_equal(nrow(res), 25) # 5 runs, 5 years + expect_equal(res$deaths[res$run_id == 1 & res$year == 2003], + sum(data$deaths[data$year == 2003 & data$run_id == 1 & + data$age %in% c(10, 12, 14)])) + + # By cohort + + res <- prepare_graph_data(base, touchstone, disease, group, country, + scenario, "deaths", c(10, 12, 14), TRUE) + + expect_equal(min(res$year), min(data$year) - max(data$age)) + expect_equal(max(res$year), max(data$year) - min(data$age)) + + # Test graph - we can't really, but just check it doesn't crash. + + expect_no_error(stone_stochastic_graph( + base, touchstone, disease, group, country, + scenario, "deaths")) + + expect_no_error(stone_stochastic_graph( + base, touchstone, disease, group, country, + scenario, "deaths", log = TRUE)) + +}) From ed3742f962879459a1853db69982db4ee619d331 Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Fri, 20 Feb 2026 11:29:26 +0000 Subject: [PATCH 18/20] Remove qs, use arrow/pq --- DESCRIPTION | 1 - NAMESPACE | 2 +- R/dalys.R | 8 +++---- R/stochastic_process.R | 30 ++++++++++++------------ R/stochastic_upload.R | 14 +++++------ man/stone_stochastic_upload.Rd | 2 +- man/stoner_dalys_for_db.Rd | 2 +- tests/testthat/test_stochastic_process.R | 26 ++++++++++---------- tests/testthat/test_stochastic_upload.R | 12 +++++----- 9 files changed, 48 insertions(+), 49 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 53bd765..97033ec 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,7 +21,6 @@ Imports: data.table, jsonlite, lgr, - qs2, dplyr, magrittr, prettyunits, diff --git a/NAMESPACE b/NAMESPACE index 26f808d..ed12b3e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,8 +15,8 @@ export(stoner_calculate_dalys) export(stoner_dalys_for_db) import(arrow) import(dplyr) -import(qs2) import(readr) +importFrom(arrow,write_parquet) importFrom(data.table,as.data.table) importFrom(data.table,rbindlist) importFrom(graphics,lines) diff --git a/R/dalys.R b/R/dalys.R index bddaeb3..53d4282 100644 --- a/R/dalys.R +++ b/R/dalys.R @@ -8,7 +8,7 @@ ##' ##' @export ##' @title Calculating DALYs for stochastic or central estimates -##' @import qs2 +##' @importFrom arrow write_parquet ##' @param con DBI connection to a Montagu database. Used for retrieving ##' demographic data for life expectancy. ##' @param touchstone The touchstone (including version); the demographic @@ -233,7 +233,7 @@ stoner_life_table <- function(con, touchstone, year_min, year_max, ##' estimate set can be specified here, to extend that estimate set with ##' DALYs. Otherwise, leave as NULL to look up by the other four ##' fields. -##' @param output_file If provided, then write the output CSV with the +##' @param output_file If provided, then write an output pq file with the ##' additional DALYs field to this filename. The file will be ready to be ##' uploaded to Montagu. ##' @param life_table If provided, then re-use the life table provided by a @@ -358,10 +358,10 @@ stoner_dalys_for_db <- function(con, dalys_params, modelling_group = NULL, data_dalys$data <- data_dalys$data[, c("disease", "year", "age", "country", "country_name", "cohort_size", cols)] - # Optionally write a CSV file out. + # Optionally write a parquet file out. if (!is.null(output_file)) { - qs2::qs_save(as.data.frame(data_dalys$data), output_file) + arrow::write_parquet(as.data.frame(data_dalys$data), output_file) } data_dalys diff --git a/R/stochastic_process.R b/R/stochastic_process.R index 6a84508..dcd1b45 100644 --- a/R/stochastic_process.R +++ b/R/stochastic_process.R @@ -6,8 +6,8 @@ ##' @export ##' @title Process stochastic data ##' @importFrom data.table as.data.table +##' @importFrom arrow write_parquet ##' @import readr -##' @import qs2 ##' @importFrom utils write.csv ##' @param con DBI connection to production. Used for verifying certificate ##' against expected properties @@ -434,12 +434,12 @@ write_pre_aggregated_to_disk <- function(data, touchpoint, for (country in countries) { timed({ path <- file.path(pre_aggregation_path, - sprintf("%s_%s_%s_pre_aggregation.qs", + sprintf("%s_%s_%s_pre_aggregation.pq", touchpoint$modelling_group, touchpoint$disease, country)) data <- as.data.frame(data) - qs2::qs_save(data[data$country == country, ], path) + arrow::write_parquet(data[data$country == country, ], path) }, "Saved %s size %s", path, prettyunits::pretty_bytes(file.size(path))) } invisible(TRUE) @@ -447,32 +447,32 @@ write_pre_aggregated_to_disk <- function(data, touchpoint, write_output_to_disk <- function(output, out_path, modelling_group, disease) { - all_u5_cal_file <- file.path(out_path, sprintf("%s_%s_calendar_u5.qs", + all_u5_cal_file <- file.path(out_path, sprintf("%s_%s_calendar_u5.pq", modelling_group, disease)) - timed(qs2::qs_save(as.data.frame(output$u5_calendar_year), - file = all_u5_cal_file), + timed(arrow::write_parquet(as.data.frame(output$u5_calendar_year), + all_u5_cal_file), "Saved %s size %s", all_u5_cal_file, prettyunits::pretty_bytes(file.size(all_u5_cal_file))) - all_cal_file <- file.path(out_path, sprintf("%s_%s_calendar.qs", + all_cal_file <- file.path(out_path, sprintf("%s_%s_calendar.pq", modelling_group, disease)) - timed(qs2::qs_save(as.data.frame(output$all_calendar_year), - file = all_cal_file), + timed(arrow::write_parquet(as.data.frame(output$all_calendar_year), + all_cal_file), "Saved %s size %s", all_u5_cal_file, prettyunits::pretty_bytes(file.size(all_u5_cal_file))) - all_u5_coh_file <- file.path(out_path, sprintf("%s_%s_cohort_u5.qs", + all_u5_coh_file <- file.path(out_path, sprintf("%s_%s_cohort_u5.pq", modelling_group, disease)) - timed(qs2::qs_save(as.data.frame(output$u5_cohort), - file = all_u5_coh_file), + timed(arrow::write_parquet(as.data.frame(output$u5_cohort), + all_u5_coh_file), "Saved %s size %s", all_u5_cal_file, prettyunits::pretty_bytes(file.size(all_u5_cal_file))) - all_coh_file <- file.path(out_path, sprintf("%s_%s_cohort.qs", + all_coh_file <- file.path(out_path, sprintf("%s_%s_cohort.pq", modelling_group, disease)) - timed(qs2::qs_save(as.data.frame(output$all_cohort), - file = all_coh_file), + timed(arrow::write_parquet(as.data.frame(output$all_cohort), + all_coh_file), "Saved %s size %s", all_u5_cal_file, prettyunits::pretty_bytes(file.size(all_u5_cal_file))) list( diff --git a/R/stochastic_upload.R b/R/stochastic_upload.R index 05d7f57..12a27b0 100644 --- a/R/stochastic_upload.R +++ b/R/stochastic_upload.R @@ -4,7 +4,7 @@ ##' @title Upload stochastic data to annex ##' @import readr ##' @importFrom utils read.csv -##' @param file A qs or csv file generated by stone_stochastic_process +##' @param file A pq or csv file generated by stone_stochastic_process ##' @param con DBI connection to production (for argument validation) ##' @param annex DBI connection to annex, to receive stochastic uploads. ##' @param modelling_group The modelling group id @@ -153,10 +153,10 @@ read_processed_stochastic_data <- function(file, is_cohort) { if (type == "csv") { read_stochastic_csv(file, is_cohort) - } else if (type == "qs") { - read_stochastic_qs(file, is_cohort) + } else if (type == "pq") { + read_stochastic_pq(file, is_cohort) } else { - stop(paste0("Can only read csv or qs format stochastic data, got ", type)) + stop(paste0("Can only read csv or pq format stochastic data, got ", type)) } } @@ -185,7 +185,7 @@ read_stochastic_csv <- function(file, is_cohort) { readr::read_csv(file, col_types = col_types, progress = FALSE) } -read_stochastic_qs <- function(file, is_cohort) { +read_stochastic_pq <- function(file, is_cohort) { if (is_cohort) { expected_cols <- c("cohort", "country", "run_id") } else { @@ -194,9 +194,9 @@ read_stochastic_qs <- function(file, is_cohort) { ## Read data message(sprintf("Reading %s", file)) - data <- qs2::qs_read(file) + data <- arrow::read_parquet(file) if (anyNA(match(expected_cols, colnames(data)))) { - stop("Columns in qs file not as expected") + stop("Columns in pq file not as expected") } data } diff --git a/man/stone_stochastic_upload.Rd b/man/stone_stochastic_upload.Rd index 09e3735..50d5485 100644 --- a/man/stone_stochastic_upload.Rd +++ b/man/stone_stochastic_upload.Rd @@ -18,7 +18,7 @@ stone_stochastic_upload( ) } \arguments{ -\item{file}{A qs or csv file generated by stone_stochastic_process} +\item{file}{A pq or csv file generated by stone_stochastic_process} \item{con}{DBI connection to production (for argument validation)} diff --git a/man/stoner_dalys_for_db.Rd b/man/stoner_dalys_for_db.Rd index 1cca33b..ae2acb6 100644 --- a/man/stoner_dalys_for_db.Rd +++ b/man/stoner_dalys_for_db.Rd @@ -52,7 +52,7 @@ estimate set can be specified here, to extend that estimate set with DALYs. Otherwise, leave as NULL to look up by the other four fields.} -\item{output_file}{If provided, then write the output CSV with the +\item{output_file}{If provided, then write an output pq file with the additional DALYs field to this filename. The file will be ready to be uploaded to Montagu.} diff --git a/tests/testthat/test_stochastic_process.R b/tests/testthat/test_stochastic_process.R index 67e31a9..b64dce1 100644 --- a/tests/testthat/test_stochastic_process.R +++ b/tests/testthat/test_stochastic_process.R @@ -427,10 +427,10 @@ stochastic_runner <- function(same_countries = TRUE, test = test, raw = res$raw, data = res$data, - cal = qs2::qs_read(file.path(test$path, "LAP-elf_flu_calendar.qs")), - cal_u5 = qs2::qs_read(file.path(test$path, "LAP-elf_flu_calendar_u5.qs")), - coh = qs2::qs_read(file.path(test$path, "LAP-elf_flu_cohort.qs")), - coh_u5 = qs2::qs_read(file.path(test$path, "LAP-elf_flu_cohort_u5.qs")) + cal = arrow::read_parquet(file.path(test$path, "LAP-elf_flu_calendar.pq")), + cal_u5 = arrow::read_parquet(file.path(test$path, "LAP-elf_flu_calendar_u5.pq")), + coh = arrow::read_parquet(file.path(test$path, "LAP-elf_flu_cohort.pq")), + coh_u5 = arrow::read_parquet(file.path(test$path, "LAP-elf_flu_cohort_u5.pq")) ) } @@ -545,10 +545,10 @@ test_that("Stochastic - with upload", { result$cal$deaths_pies <- round(result$cal$deaths_pies / 2) - new_qs_file <- tempfile(fileext = ".qs") - qs2::qs_save(result$cal, file = new_qs_file) + new_pq_file <- tempfile(fileext = ".pq") + arrow::write_parquet(result$cal, new_pq_file) - stone_stochastic_upload(new_qs_file, result$test$con, result$test$con, + stone_stochastic_upload(new_pq_file, result$test$con, result$test$con, "LAP-elf", "flu", "nevis-1", is_cohort = FALSE, is_under5 = FALSE, allow_new_database = FALSE, testing = TRUE) @@ -822,7 +822,7 @@ test_that("Stochastic - with DALYs", { # Hurrah. We can *finally* test DALYs. - out <- tempfile(fileext = ".qs") + out <- tempfile(fileext = ".pq") dat <- stoner_dalys_for_db(con, dalys_df, burden_estimate_set_id = new_bes, output_file = out) @@ -830,7 +830,7 @@ test_that("Stochastic - with DALYs", { "LAP-elf", "flu", "nevis-1", "pies", output_file = out) - df <- qs2::qs_read(out) + df <- arrow::read_parquet(out) expect_identical(dat, dat2) expect_equal(dat$data$dalys, df$dalys) @@ -852,10 +852,10 @@ test_that("preaggregated data can be saved to disk", { files <- list.files(t) expect_length(files, 2) ## 2 countries, 1 modelling group, 1 disease = 2 files - expect_setequal(files, c("LAP-elf_flu_4_pre_aggregation.qs", - "LAP-elf_flu_716_pre_aggregation.qs")) + expect_setequal(files, c("LAP-elf_flu_4_pre_aggregation.pq", + "LAP-elf_flu_716_pre_aggregation.pq")) - country_4 <- qs2::qs_read(file.path(t, "LAP-elf_flu_4_pre_aggregation.qs")) + country_4 <- arrow::read_parquet(file.path(t, "LAP-elf_flu_4_pre_aggregation.pq")) expect_setequal(colnames(country_4), c("country", "year", "run_id", "age", "deaths_pies", "cases_pies", "dalys_pies", "deaths_hot_chocolate", @@ -863,7 +863,7 @@ test_that("preaggregated data can be saved to disk", { "deaths_holly", "cases_holly", "dalys_holly")) expect_true(all(country_4$country == 4)) - country_716 <- qs2::qs_read(file.path(t, "LAP-elf_flu_716_pre_aggregation.qs")) + country_716 <- arrow::read_parquet(file.path(t, "LAP-elf_flu_716_pre_aggregation.pq")) expect_setequal(colnames(country_716), c("country", "year", "run_id", "age", "deaths_pies", "cases_pies", "dalys_pies", "deaths_hot_chocolate", diff --git a/tests/testthat/test_stochastic_upload.R b/tests/testthat/test_stochastic_upload.R index b513190..a313086 100644 --- a/tests/testthat/test_stochastic_upload.R +++ b/tests/testthat/test_stochastic_upload.R @@ -10,11 +10,11 @@ test_that("Bad arguments", { do_test(test) expect_error(stoner::stone_stochastic_upload( - file.path(tempfile(), "non_existent_file.qs")), + file.path(tempfile(), "non_existent_file.pq")), "file(.*)exists(.*)") - new_file <- tempfile(fileext = ".qs") - qs2::qs_save(mtcars, file = new_file) + new_file <- tempfile(fileext = ".pq") + arrow::save_parquet(mtcars, new_file) expect_error(stone_stochastic_upload(new_file, test$con, test$con, "Rudolph"), "Unknown modelling group: Rudolph") @@ -27,7 +27,7 @@ test_that("Bad arguments", { expect_error(stone_stochastic_upload(new_file, test$con, test$con, "LAP-elf", "flu", "nevis-1", FALSE, FALSE, TRUE), - "Columns in qs file not as expected") + "Columns in pq file not as expected") }) test_that("stochastic_upload with csv file returns useful error", { @@ -45,7 +45,7 @@ test_that("stochastic_upload with csv file returns useful error", { "Columns in csv file not as expected") }) -test_that("stochastic_upload errors if file not csv or qs", { +test_that("stochastic_upload errors if file not csv or pq", { test <- new_test() standard_disease_touchstones(test) standard_responsibility_support(test) @@ -57,5 +57,5 @@ test_that("stochastic_upload errors if file not csv or qs", { expect_error( stone_stochastic_upload(new_file, test$con, test$con, "LAP-elf", "flu", "nevis-1", FALSE, FALSE, TRUE), - "Can only read csv or qs format stochastic data, got rds") + "Can only read csv or pq format stochastic data, got rds") }) From 0f14d7e2cb9f6c592d231fe70b051f6781ee223b Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Fri, 20 Feb 2026 11:51:57 +0000 Subject: [PATCH 19/20] Fix tests --- tests/testthat/test_stochastic_process.R | 6 +++--- tests/testthat/test_stochastic_upload.R | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test_stochastic_process.R b/tests/testthat/test_stochastic_process.R index b64dce1..b19e1a5 100644 --- a/tests/testthat/test_stochastic_process.R +++ b/tests/testthat/test_stochastic_process.R @@ -370,7 +370,7 @@ stochastic_runner <- function(same_countries = TRUE, lines = Inf, log_file = NULL, silent = TRUE) { - + browser() test <- new_test() res <- random_stoch_data(test, same_countries, simple_outcomes, @@ -576,7 +576,7 @@ test_that("stochastic_upload can upload csv file", { "Overwriting table with id 1") data <- DBI::dbGetQuery(result$test$con, "SELECT * FROM stochastic_1") - expect_equal(data, result$cal_u5) + expect_equal(data, as.data.frame(result$cal_u5)) cohort_csv <- tempfile(fileext = ".csv") write_csv(x = result$coh, file = cohort_csv) @@ -588,7 +588,7 @@ test_that("stochastic_upload can upload csv file", { "Overwriting table with id 4") data <- DBI::dbGetQuery(result$test$con, "SELECT * FROM stochastic_4") - expect_equal(data, result$coh) + expect_equal(data, as.data.frame(result$coh)) }) ############################################################################## diff --git a/tests/testthat/test_stochastic_upload.R b/tests/testthat/test_stochastic_upload.R index a313086..3c52416 100644 --- a/tests/testthat/test_stochastic_upload.R +++ b/tests/testthat/test_stochastic_upload.R @@ -14,7 +14,7 @@ test_that("Bad arguments", { "file(.*)exists(.*)") new_file <- tempfile(fileext = ".pq") - arrow::save_parquet(mtcars, new_file) + arrow::write_parquet(mtcars, new_file) expect_error(stone_stochastic_upload(new_file, test$con, test$con, "Rudolph"), "Unknown modelling group: Rudolph") From 54be05ec15ffbe74c19cbbe30cdaf47562522ee7 Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Fri, 20 Feb 2026 11:55:12 +0000 Subject: [PATCH 20/20] Remove debug --- tests/testthat/test_stochastic_process.R | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/testthat/test_stochastic_process.R b/tests/testthat/test_stochastic_process.R index b19e1a5..46a8303 100644 --- a/tests/testthat/test_stochastic_process.R +++ b/tests/testthat/test_stochastic_process.R @@ -370,7 +370,6 @@ stochastic_runner <- function(same_countries = TRUE, lines = Inf, log_file = NULL, silent = TRUE) { - browser() test <- new_test() res <- random_stoch_data(test, same_countries, simple_outcomes,