diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 583fbf0..35a4649 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,14 +1,10 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: - - main - - master + branches: [main, master] pull_request: - branches: - - main - - master + branches: [main, master] name: R-CMD-check @@ -22,61 +18,39 @@ jobs: fail-fast: false matrix: config: - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + + # Shorter timeout to prevent mac builders hanging for 6 hours! + timeout-minutes: 30 env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + R_CRAN_MIRROR: https://cloud.r-project.org steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v1 - with: - r-version: ${{ matrix.config.r }} - - - uses: r-lib/actions/setup-pandoc@v1 + - uses: actions/checkout@v4 - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} + - uses: r-lib/actions/setup-pandoc@v2 - - name: Cache R packages - if: runner.os != 'Windows' - uses: actions/cache@v2 + - uses: r-lib/actions/setup-r@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true - - name: Install system dependencies + - name: Install system build tools if: runner.os == 'Linux' run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') + sudo apt-get update + sudo apt-get install -y build-essential libcurl4-openssl-dev libssl-dev libxml2-dev - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main + - uses: r-lib/actions/check-r-package@v2 with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check + upload-snapshots: true diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 803da57..1c8c996 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -1,48 +1,53 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: - - main - - master + branches: [main, master] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: name: pkgdown jobs: pkgdown: - runs-on: macOS-latest + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_CRAN_MIRROR: https://cloud.r-project.org steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - uses: r-lib/actions/setup-r@master + - uses: r-lib/actions/setup-pandoc@v2 - - uses: r-lib/actions/setup-pandoc@master + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true - - name: Query dependencies + - name: Install system build tools + if: runner.os == 'Linux' run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} + sudo apt-get update + sudo apt-get install -y build-essential libcurl4-openssl-dev libssl-dev libxml2-dev - - name: Cache R packages - uses: actions/cache@v1 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + extra-packages: any::pkgdown, local::. + needs: website - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - install.packages("pkgdown") + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) shell: Rscript {0} - - name: Install package - run: R CMD INSTALL . - - - name: Deploy package - run: | - git config --local user.email "actions@github.com" - git config --local user.name "GitHub Actions" - Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.4.1 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 453ff10..4b4ea12 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -1,58 +1,57 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: - - main - - master + branches: [main, master] pull_request: - branches: - - main - - master + branches: [main, master] name: test-coverage jobs: test-coverage: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - ORDERLYWEB_TEST_TOKEN: ${{ secrets.ORDERLYWEB_TEST_TOKEN }} - steps: - - uses: actions/checkout@v2 + R_CRAN_MIRROR: https://cloud.r-project.org - - uses: r-lib/actions/setup-r@v1 + steps: + - uses: actions/checkout@v4 - - uses: r-lib/actions/setup-pandoc@v1 + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true - - name: Query dependencies + - name: Install system build tools + if: runner.os == 'Linux' run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} + sudo apt-get update + sudo apt-get install -y build-essential libcurl4-openssl-dev libssl-dev libxml2-dev - - name: Cache R packages - uses: actions/cache@v2 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + extra-packages: any::covr + needs: coverage - - name: Install system dependencies - if: runner.os == 'Linux' + - name: Test coverage run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - sudo apt-get install libcurl4-openssl-dev + covr::codecov( + quiet = FALSE, + clean = FALSE, + install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") + ) + shell: Rscript {0} - - name: Install dependencies + - name: Show testthat output + if: always() run: | - install.packages(c("remotes")) - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("covr") - shell: Rscript {0} + ## -------------------------------------------------------------------- + find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash - - name: Test coverage - run: covr::codecov() - shell: Rscript {0} + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/DESCRIPTION b/DESCRIPTION index 2dbe8ad..97033ec 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: stoner Title: Support for Building VIMC Montagu Touchstones, using Dettl -Version: 0.1.17 +Version: 0.1.18 Authors@R: c(person("Wes", "Hinsley",role = c("aut", "cre", "cst", "dnc", "elg", "itr", "sng", "ard"), email = "w.hinsley@imperial.ac.uk"), @@ -16,24 +16,25 @@ License: MIT + file LICENSE Encoding: UTF-8 LazyData: true Imports: + arrow, DBI, data.table, jsonlite, lgr, - qs, dplyr, magrittr, prettyunits, readr, + rlang, + testthat, utils, withr Language: en-GB -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.3 Roxygen: list(markdown = TRUE) Suggests: knitr, rcmdcheck, rmarkdown, - RPostgres, - testthat + RPostgres VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index d3fedf2..ed12b3e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,16 +4,28 @@ export(stone_dump) export(stone_extract) export(stone_load) export(stone_stochastic_cert_verify) +export(stone_stochastic_graph) export(stone_stochastic_process) +export(stone_stochastic_standardise) export(stone_stochastic_upload) export(stone_test_extract) export(stone_test_transform) export(stone_transform) export(stoner_calculate_dalys) export(stoner_dalys_for_db) -import(data.table) +import(arrow) +import(dplyr) import(readr) +importFrom(arrow,write_parquet) +importFrom(data.table,as.data.table) +importFrom(data.table,rbindlist) +importFrom(graphics,lines) importFrom(magrittr,"%>%") +importFrom(rlang,":=") +importFrom(stats,median) +importFrom(stats,quantile) +importFrom(testthat,expect_equal) +importFrom(testthat,expect_true) importFrom(utils,capture.output) importFrom(utils,read.csv) importFrom(utils,write.csv) diff --git a/R/dalys.R b/R/dalys.R index 4edf2ea..53d4282 100644 --- a/R/dalys.R +++ b/R/dalys.R @@ -8,6 +8,7 @@ ##' ##' @export ##' @title Calculating DALYs for stochastic or central estimates +##' @importFrom arrow write_parquet ##' @param con DBI connection to a Montagu database. Used for retrieving ##' demographic data for life expectancy. ##' @param touchstone The touchstone (including version); the demographic @@ -232,7 +233,7 @@ stoner_life_table <- function(con, touchstone, year_min, year_max, ##' estimate set can be specified here, to extend that estimate set with ##' DALYs. Otherwise, leave as NULL to look up by the other four ##' fields. -##' @param output_file If provided, then write the output CSV with the +##' @param output_file If provided, then write an output pq file with the ##' additional DALYs field to this filename. The file will be ready to be ##' uploaded to Montagu. ##' @param life_table If provided, then re-use the life table provided by a @@ -357,10 +358,10 @@ stoner_dalys_for_db <- function(con, dalys_params, modelling_group = NULL, data_dalys$data <- data_dalys$data[, c("disease", "year", "age", "country", "country_name", "cohort_size", cols)] - # Optionally write a CSV file out. + # Optionally write a parquet file out. if (!is.null(output_file)) { - qs::qsave(as.data.frame(data_dalys$data), output_file) + arrow::write_parquet(as.data.frame(data_dalys$data), output_file) } data_dalys diff --git a/R/prune.R b/R/prune.R index 59077df..ca03fc4 100644 --- a/R/prune.R +++ b/R/prune.R @@ -1,4 +1,5 @@ ##' @importFrom utils capture.output +##' @importFrom testthat expect_true expect_equal extract_prune <- function(e, path, con) { @@ -142,10 +143,10 @@ test_extract_prune <- function(e) { # bes <- e$burden_estimate_set - expect_true("id" %in% names(bes)) - expect_true("responsibility" %in% names(bes))# - expect_equal(0, sum(is.na(bes$id))) - expect_equal(0, sum(is.na(bes$responsibility))) + testthat::expect_true("id" %in% names(bes)) + testthat::expect_true("responsibility" %in% names(bes))# + testthat::expect_equal(0, sum(is.na(bes$id))) + testthat::expect_equal(0, sum(is.na(bes$responsibility))) } diff --git a/R/stochastic_files.R b/R/stochastic_files.R new file mode 100644 index 0000000..c910d36 --- /dev/null +++ b/R/stochastic_files.R @@ -0,0 +1,95 @@ +##' Convert a modelling group's stochastic files into an intermediate +##' format including all the original data, separated by country +##' and scenario, and in PQ format for later processing. As much as +##' possible, we'll try and detect the input formats. +##' +##' @export +##' @title Standardise stochastic data files +##' @importFrom data.table rbindlist +##' @import arrow +##' @param group The modelling group. +##' @param in_path The folder or network path where the original files are found. +##' @param out_path The folder or network path to write output files to. +##' @param scenarios A vector of strings giving each scenario name. +##' @param files This can either be a vector of strings of the same length to +##' the vector of scenarios, in which case each entry should match the scenario, +##' providing the filename for the original uploads for that scenario. Most +##' groups provide files numbered between 1 and 200 for their stochastic runs; +##' replace that number with `:index` in the filename to match. Alternatively, +##' files can be a single entry containing the string `:scenario`; in this case, +##' files must exist that match each entry in the `scenarios` parameter, and +##' the same file string can be used to match all of them (perhaps additionaly +##' wiht `:index`). +##' @param index This is usually a vector of ints, `1:200` to match the +##' range of stochastic files uploaded per scenario. A few groups upload one +##' large file containing everything, in which case `:index` shouldn't occur +##' in the `files` parameter, and should be omitted. +##' @param rubella_fix Historically Rubella uploads used the burden outcome +##' `rubella_deaths_congenital` and `rubella_cases_congenital` instead of +##' the simpler `deaths` and `cases`, and additionally provided a +##' `rubella_infections` field. `rubella_fix` needs to be TRUE to standardise +##' these to the simpler names. Processing Rubella stochastic files without +##' this set to TRUE will fail - so while we should always do this, keeping +##' the parameter makes it more clear in the code what we're doing and why. +##' @param missing_run_id_fix Some groups in the past have omitted run_id +##' from the files, but included them in the filenames. This fix inserts +##' that into the files if the index parameter indicates we have 200 runs to +##' process. + +stone_stochastic_standardise <- function( + group, in_path, out_path, scenarios, files, index = 1, + rubella_fix = TRUE, missing_run_id_fix = TRUE) { + + dir.create(out_path, showWarnings = FALSE, recursive = TRUE) + if ((length(files) == 1) && (grepl(":scenario", files))) { + files <- rep(files, length(scenarios)) + for (j in seq_along(scenarios)) { + files[j] <- gsub(":scenario", scenarios[j], files[j]) + } + } + + for (i in seq_along(scenarios)) { + message(scenarios[i]) + all_data <- as.data.frame(data.table::rbindlist(lapply(index, function(j) { + cat("\r", j) + file <- gsub(":index", j, files[i]) + d <- read.csv(file.path(in_path, file)) + d$country_name <- NULL + + # Fixes needed to standardise Rubella + if (rubella_fix) { + names(d)[names(d) == "rubella_deaths_congenital"] <- "deaths" + names(d)[names(d) == "rubella_cases_congenital"] <- "cases" + d$rubella_infections <- NULL + } + + # Detect where run_id is missing, but in filenames + + if (missing_run_id_fix) { + if ((!"run_id" %in% names(d)) && (length(index) == 200)) d$run_id <- j + } + + # Round to integer, as per guidance. (Not using as.integer, as that + # has limits on how large numbers can be, so we are just truncating + # digits here) + + d$dalys <- round(d$dalys) + d$deaths <- round(d$deaths) + d$cases <- round(d$cases) + d$yll <- round(d$yll) + d$cohort_size <- round(d$cohort_size) + + d + }), use.names = TRUE)) + + countries <- sort(unique(all_data$country)) + for (country in countries) { + cat("\r", country) + d <- all_data[all_data$country == country, ] + d <- d[order(d$run_id, d$year, d$age), ] + file <- sprintf("%s_%s_%s.pq", group, scenarios[i], country) + arrow::write_parquet(d, file.path(out_path, file)) + } + cat("\r Completed\n\n") + } +} diff --git a/R/stochastic_graphs.R b/R/stochastic_graphs.R new file mode 100644 index 0000000..eee7184 --- /dev/null +++ b/R/stochastic_graphs.R @@ -0,0 +1,84 @@ +##' Draw a stochastic plot showing all the different runs, with the mean, +##' median, 5% and 95% quantiles shown. +##' +##' @export +##' @title Stochastic plot +##' @import dplyr +##' @importFrom rlang := +##' @import arrow +##' @importFrom graphics lines +##' @importFrom stats quantile median +##' @param base The folder in which the standardised stochastic files are found. +##' @param touchstone The touchstone name (for the graph title) +##' @param disease The disease, used for building the filename and graph title. +##' @param group The modelling group, used in the filename and graph title. +##' @param country The country to plot. +##' @param scenario The scenario to plot. +##' @param outcome The outcome to plot, for example `deaths`, `cases`, `dalys` or +##' since 2023, `yll`. +##' @param ages A vector of one or more ages to be selected and aggregated, or +##' if left as NULL, then all ages are used and aggregated. +##' @param by_cohort If TRUE, then age is subtracted from year to convert it to +##' year of birth before aggregating. +##' @param log If TRUE, then use a logged y-axis. + +stone_stochastic_graph <- function(base, touchstone, disease, group, country, + scenario, outcome, ages = NULL, + by_cohort = FALSE, log = FALSE) { + + d <- prepare_graph_data(base, touchstone, disease, group, country, + scenario, outcome, ages, by_cohort) + + title <- sprintf("%s, %s, %s\n%s, %s\n", touchstone, disease, group, + scenario, country) + + runs <- max(d$run_id) + miny <- max(1, min(d[[outcome]])) + maxy <- max(d[[outcome]]) + log <- if (log) "y" else "" + + plot(ylab = outcome, xlab = if (by_cohort) "Birth Cohort" else "year", + x = d$year[d$run_id == 1], y = d[[outcome]][d$run_id == 1], type="l", + col = "#b0b0b0", ylim = c(miny, maxy), main = title, log = log) + + for (i in 2:runs) { + lines(x = d$year[d$run_id == i], y = d[[outcome]][d$run_id == i], + col = "#b0b0b0") + } + + avgs <- d %>% group_by(.data$year) %>% + summarise( + mean = mean(.data[[outcome]]), + median = median(.data[[outcome]]), + q05 = quantile(.data[[outcome]], 0.05), + q95 = quantile(.data[[outcome]], 0.95), + .groups = "drop" + ) + lines(x = avgs$year, y = avgs$mean, col = "#ff4040", lwd = 2) + lines(x = avgs$year, y = avgs$median, col = "#00ff00", lwd = 2) + lines(x = avgs$year, y = avgs$q05, col = "#202020", lwd = 2) + lines(x = avgs$year, y = avgs$q95, col = "#202020", lwd = 2) +} + + +prepare_graph_data <- function(base, touchstone, disease, group, country, + scenario, outcome, ages, by_cohort) { + + pq <- sprintf("%s/%s/%s_%s/%s_%s_%s.pq", base, touchstone, disease, + group, group, scenario, country) + + d <- arrow::read_parquet(pq) + if (!is.null(ages)) { + d <- d[d$age %in% ages, ] + } + if (by_cohort) { + d$year <- d$year - d$age + } + d <- d[, c("run_id", "year", "age", outcome)] + d <- d %>% group_by(.data$run_id, .data$year) %>% + summarise( + !!outcome := sum(.data[[outcome]], na.rm = TRUE), + .groups = "drop") + d +} + diff --git a/R/stochastic_process.R b/R/stochastic_process.R index f71af58..dcd1b45 100644 --- a/R/stochastic_process.R +++ b/R/stochastic_process.R @@ -5,7 +5,8 @@ ##' ##' @export ##' @title Process stochastic data -##' @import data.table +##' @importFrom data.table as.data.table +##' @importFrom arrow write_parquet ##' @import readr ##' @importFrom utils write.csv ##' @param con DBI connection to production. Used for verifying certificate @@ -33,7 +34,7 @@ ##' @param out_path Path to writing output files into ##' @param pre_aggregation_path Path to dir to write out pre age-disaggregated ##' data into. If NULL then this is skipped. -##' @param outcomes A list of names vectors, where the name is the burden +##' @param outcomes A list of named vectors, where the name is the burden ##' outcome, and the elements of the list are the column names in the ##' stochastic files that should be summed to compute that outcome. The ##' default is to expect outcomes `deaths`, `cases`, `dalys`, and `yll`, @@ -316,9 +317,9 @@ aggregate_data <- function(scenario_data) { scen_coh <- agg_and_sort(scenario_data) scen_u5_coh <- scen_u5_coh %>% - dplyr::rename(cohort = year) + dplyr::rename(cohort = "year") scen_coh <- scen_coh %>% - dplyr::rename(cohort = year) + dplyr::rename(cohort = "year") list( u5_calendar_year = scen_u5_cal, @@ -433,12 +434,12 @@ write_pre_aggregated_to_disk <- function(data, touchpoint, for (country in countries) { timed({ path <- file.path(pre_aggregation_path, - sprintf("%s_%s_%s_pre_aggregation.qs", + sprintf("%s_%s_%s_pre_aggregation.pq", touchpoint$modelling_group, touchpoint$disease, country)) data <- as.data.frame(data) - qs::qsave(data[data$country == country, ], path) + arrow::write_parquet(data[data$country == country, ], path) }, "Saved %s size %s", path, prettyunits::pretty_bytes(file.size(path))) } invisible(TRUE) @@ -446,32 +447,32 @@ write_pre_aggregated_to_disk <- function(data, touchpoint, write_output_to_disk <- function(output, out_path, modelling_group, disease) { - all_u5_cal_file <- file.path(out_path, sprintf("%s_%s_calendar_u5.qs", + all_u5_cal_file <- file.path(out_path, sprintf("%s_%s_calendar_u5.pq", modelling_group, disease)) - timed(qs::qsave(x = as.data.frame(output$u5_calendar_year), - file = all_u5_cal_file), + timed(arrow::write_parquet(as.data.frame(output$u5_calendar_year), + all_u5_cal_file), "Saved %s size %s", all_u5_cal_file, prettyunits::pretty_bytes(file.size(all_u5_cal_file))) - all_cal_file <- file.path(out_path, sprintf("%s_%s_calendar.qs", + all_cal_file <- file.path(out_path, sprintf("%s_%s_calendar.pq", modelling_group, disease)) - timed(qs::qsave(x = as.data.frame(output$all_calendar_year), - file = all_cal_file), + timed(arrow::write_parquet(as.data.frame(output$all_calendar_year), + all_cal_file), "Saved %s size %s", all_u5_cal_file, prettyunits::pretty_bytes(file.size(all_u5_cal_file))) - all_u5_coh_file <- file.path(out_path, sprintf("%s_%s_cohort_u5.qs", + all_u5_coh_file <- file.path(out_path, sprintf("%s_%s_cohort_u5.pq", modelling_group, disease)) - timed(qs::qsave(x = as.data.frame(output$u5_cohort), - file = all_u5_coh_file), + timed(arrow::write_parquet(as.data.frame(output$u5_cohort), + all_u5_coh_file), "Saved %s size %s", all_u5_cal_file, prettyunits::pretty_bytes(file.size(all_u5_cal_file))) - all_coh_file <- file.path(out_path, sprintf("%s_%s_cohort.qs", + all_coh_file <- file.path(out_path, sprintf("%s_%s_cohort.pq", modelling_group, disease)) - timed(qs::qsave(x = as.data.frame(output$all_cohort), - file = all_coh_file), + timed(arrow::write_parquet(as.data.frame(output$all_cohort), + all_coh_file), "Saved %s size %s", all_u5_cal_file, prettyunits::pretty_bytes(file.size(all_u5_cal_file))) list( diff --git a/R/stochastic_upload.R b/R/stochastic_upload.R index a20f59a..12a27b0 100644 --- a/R/stochastic_upload.R +++ b/R/stochastic_upload.R @@ -2,10 +2,9 @@ ##' the annex database. ##' @export ##' @title Upload stochastic data to annex -##' @import data.table ##' @import readr ##' @importFrom utils read.csv -##' @param file A qs or csv file generated by stone_stochastic_process +##' @param file A pq or csv file generated by stone_stochastic_process ##' @param con DBI connection to production (for argument validation) ##' @param annex DBI connection to annex, to receive stochastic uploads. ##' @param modelling_group The modelling group id @@ -154,10 +153,10 @@ read_processed_stochastic_data <- function(file, is_cohort) { if (type == "csv") { read_stochastic_csv(file, is_cohort) - } else if (type == "qs") { - read_stochastic_qs(file, is_cohort) + } else if (type == "pq") { + read_stochastic_pq(file, is_cohort) } else { - stop(paste0("Can only read csv or qs format stochastic data, got ", type)) + stop(paste0("Can only read csv or pq format stochastic data, got ", type)) } } @@ -186,7 +185,7 @@ read_stochastic_csv <- function(file, is_cohort) { readr::read_csv(file, col_types = col_types, progress = FALSE) } -read_stochastic_qs <- function(file, is_cohort) { +read_stochastic_pq <- function(file, is_cohort) { if (is_cohort) { expected_cols <- c("cohort", "country", "run_id") } else { @@ -195,9 +194,9 @@ read_stochastic_qs <- function(file, is_cohort) { ## Read data message(sprintf("Reading %s", file)) - data <- qs::qread(file) + data <- arrow::read_parquet(file) if (anyNA(match(expected_cols, colnames(data)))) { - stop("Columns in qs file not as expected") + stop("Columns in pq file not as expected") } data } diff --git a/man/stone_stochastic_graph.Rd b/man/stone_stochastic_graph.Rd new file mode 100644 index 0000000..9fa15c9 --- /dev/null +++ b/man/stone_stochastic_graph.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stochastic_graphs.R +\name{stone_stochastic_graph} +\alias{stone_stochastic_graph} +\title{Stochastic plot} +\usage{ +stone_stochastic_graph( + base, + touchstone, + disease, + group, + country, + scenario, + outcome, + ages = NULL, + by_cohort = FALSE, + log = FALSE +) +} +\arguments{ +\item{base}{The folder in which the standardised stochastic files are found.} + +\item{touchstone}{The touchstone name (for the graph title)} + +\item{disease}{The disease, used for building the filename and graph title.} + +\item{group}{The modelling group, used in the filename and graph title.} + +\item{country}{The country to plot.} + +\item{scenario}{The scenario to plot.} + +\item{outcome}{The outcome to plot, for example \code{deaths}, \code{cases}, \code{dalys} or +since 2023, \code{yll}.} + +\item{ages}{A vector of one or more ages to be selected and aggregated, or +if left as NULL, then all ages are used and aggregated.} + +\item{by_cohort}{If TRUE, then age is subtracted from year to convert it to +year of birth before aggregating.} + +\item{log}{If TRUE, then use a logged y-axis.} +} +\description{ +Draw a stochastic plot showing all the different runs, with the mean, +median, 5\% and 95\% quantiles shown. +} diff --git a/man/stone_stochastic_process.Rd b/man/stone_stochastic_process.Rd index 4ee4542..e226d1f 100644 --- a/man/stone_stochastic_process.Rd +++ b/man/stone_stochastic_process.Rd @@ -69,7 +69,7 @@ in each case.} \item{pre_aggregation_path}{Path to dir to write out pre age-disaggregated data into. If NULL then this is skipped.} -\item{outcomes}{A list of names vectors, where the name is the burden +\item{outcomes}{A list of named vectors, where the name is the burden outcome, and the elements of the list are the column names in the stochastic files that should be summed to compute that outcome. The default is to expect outcomes \code{deaths}, \code{cases}, \code{dalys}, and \code{yll}, @@ -112,11 +112,6 @@ doing the full run.} If file exists it will be appended to, otherwise file will be created.} \item{silent}{TRUE to silence console logs.} - -\item{yll}{Added in 2023, the years of life lost indicator is more -helpful especially for covid burden analysis. Usually leaving as "yll" -is enough, but if it is the sum of other outcomes, provide these as a -string vector.} } \description{ Convert a modelling group's stochastic files into the summary format, diff --git a/man/stone_stochastic_standardise.Rd b/man/stone_stochastic_standardise.Rd new file mode 100644 index 0000000..343275f --- /dev/null +++ b/man/stone_stochastic_standardise.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stochastic_files.R +\name{stone_stochastic_standardise} +\alias{stone_stochastic_standardise} +\title{Standardise stochastic data files} +\usage{ +stone_stochastic_standardise( + group, + in_path, + out_path, + scenarios, + files, + index = 1, + rubella_fix = TRUE, + missing_run_id_fix = TRUE +) +} +\arguments{ +\item{group}{The modelling group.} + +\item{in_path}{The folder or network path where the original files are found.} + +\item{out_path}{The folder or network path to write output files to.} + +\item{scenarios}{A vector of strings giving each scenario name.} + +\item{files}{This can either be a vector of strings of the same length to +the vector of scenarios, in which case each entry should match the scenario, +providing the filename for the original uploads for that scenario. Most +groups provide files numbered between 1 and 200 for their stochastic runs; +replace that number with \verb{:index} in the filename to match. Alternatively, +files can be a single entry containing the string \verb{:scenario}; in this case, +files must exist that match each entry in the \code{scenarios} parameter, and +the same file string can be used to match all of them (perhaps additionaly +wiht \verb{:index}).} + +\item{index}{This is usually a vector of ints, \code{1:200} to match the +range of stochastic files uploaded per scenario. A few groups upload one +large file containing everything, in which case \verb{:index} shouldn't occur +in the \code{files} parameter, and should be omitted.} + +\item{rubella_fix}{Historically Rubella uploads used the burden outcome +\code{rubella_deaths_congenital} and \code{rubella_cases_congenital} instead of +the simpler \code{deaths} and \code{cases}, and additionally provided a +\code{rubella_infections} field. \code{rubella_fix} needs to be TRUE to standardise +these to the simpler names. Processing Rubella stochastic files without +this set to TRUE will fail - so while we should always do this, keeping +the parameter makes it more clear in the code what we're doing and why.} + +\item{missing_run_id_fix}{Some groups in the past have omitted run_id +from the files, but included them in the filenames. This fix inserts +that into the files if the index parameter indicates we have 200 runs to +process.} +} +\description{ +Convert a modelling group's stochastic files into an intermediate +format including all the original data, separated by country +and scenario, and in PQ format for later processing. As much as +possible, we'll try and detect the input formats. +} diff --git a/man/stone_stochastic_upload.Rd b/man/stone_stochastic_upload.Rd index 09e3735..50d5485 100644 --- a/man/stone_stochastic_upload.Rd +++ b/man/stone_stochastic_upload.Rd @@ -18,7 +18,7 @@ stone_stochastic_upload( ) } \arguments{ -\item{file}{A qs or csv file generated by stone_stochastic_process} +\item{file}{A pq or csv file generated by stone_stochastic_process} \item{con}{DBI connection to production (for argument validation)} diff --git a/man/stoner_dalys_for_db.Rd b/man/stoner_dalys_for_db.Rd index 1cca33b..ae2acb6 100644 --- a/man/stoner_dalys_for_db.Rd +++ b/man/stoner_dalys_for_db.Rd @@ -52,7 +52,7 @@ estimate set can be specified here, to extend that estimate set with DALYs. Otherwise, leave as NULL to look up by the other four fields.} -\item{output_file}{If provided, then write the output CSV with the +\item{output_file}{If provided, then write an output pq file with the additional DALYs field to this filename. The file will be ready to be uploaded to Montagu.} diff --git a/scripts/process_stoch_202310gavi.R b/scripts/process_stoch_202310gavi.R new file mode 100644 index 0000000..0c4915b --- /dev/null +++ b/scripts/process_stoch_202310gavi.R @@ -0,0 +1,362 @@ +base_in_path <- "//wpia-hn2.hpc.dide.ic.ac.uk/vimc_stochastics_dropbox/latest/202310gavi" +base_out_path <- "//wpia-hn2.hpc.dide.ic.ac.uk/vimc_stochastics/202310gavi" + + +############### +# Cholera + +scenarios = c("cholera-no-vaccination", "cholera-ocv1-default", "cholera-ocv1-ocv2-default") + +stoner::stone_stochastic_standardise( + group = "IVI-Kim", + in_path = file.path(base_in_path, "Cholera-IVI-Kim"), + out_path = file.path(base_out_path, "Cholera-IVI-Kim"), + scenarios = scenarios, + files = c("stoch_Cholera_novacc_20250704T130601 Jong-Hoon Kim.csv.xz", + "stoch_Cholera_campaign_default_ocv1_20250704T130601 Jong-Hoon Kim.csv.xz", + "stoch_Cholera_campaign_default_ocv12_20250704T130601 Jong-Hoon Kim.csv.xz") + ) + +stoner::stone_stochastic_standardise( + group = "JHU-Lee", + in_path = file.path(base_in_path, "Cholera-JHU-Lee"), + out_path = file.path(base_out_path, "Cholera-JHU-Lee"), + scenarios = scenarios, + files = c("stochastic-burden-template.202310gavi-4.Cholera_standard_template.529.no-vaccination Christina Alam.csv.xz", + "stochastic-burden-template.202310gavi-4.Cholera_standard_template.529.ocv1-default_one Christina Alam.csv.xz", + "stochastic-burden-template.202310gavi-4.Cholera_standard_template.529.ocv1-ocv2-default_two Christina Alam.csv.xz") +) + + +############### +# COVID + + scenarios = c("covid-no-vaccination", "covid-no-vaccination_severe", + "covid-primary-bluesky", "covid-primary-bluesky_severe", + "covid-primary-booster-bluesky", "covid-primary-booster-bluesky_severe", + "covid-primary-booster-default", "covid-primary-booster-default_severe", + "covid-primary-default", "covid-primary-default_severe") + +stoner::stone_stochastic_standardise( + group = "IC-Ghani", + in_path = file.path(base_in_path, "COVID-IC-Ghani", "wes-modified"), + out_path = file.path(base_ou6t_path, "COVID_IC-Ghani"), + scenarios = scenarios, + files = c("covid-no-vaccination Gemma Gilani_:index.csv.xz", + "covid-no-vaccination_severe Gemma Gilani_:index.csv.xz", + "covid-primary-bluesky Daniela Olivera_:index.csv.xz", + "covid-primary-bluesky_severe Daniela Olivera_:index.csv.xz", + "covid-primary-booster-bluesky Daniela Olivera_:index.csv.xz", + "covid-primary-booster-bluesky_severe Daniela Olivera_:index.csv.xz", + "covid-primary-booster-default Gemma Gilani_:index.csv.xz", + "covid-primary-booster-default_severe Gemma Gilani_:index.csv.xz", + "covid-primary-default Gemma Gilani_:index.csv.xz", + "covid-primary-default_severe Gemma Gilani_:index.csv.xz"), + index = 1:200) + +stoner::stone_stochastic_standardise( + group = "LSHTM-Liu", + in_path = file.path(base_in_path, "COVID-LSHTM-Liu"), + out_path = file.path(base_out_path, "COVID_LSHTM-Liu"), + scenarios = scenarios, + files = "stochastic_burden_est_covid-LSHTM-Liu_:scenario Yang Liu.csv.xz") + + +############### +# HepB + + scenarios <- scenarios = c("hepb-hepb3-bd-bluesky", "hepb-hepb3-bd-default", + "hepb-hepb3-bd-ia2030", "hepb-hepb3-bluesky", + "hepb-hepb3-default", "hepb-hepb3-ia2030", + "hepb-no-vaccination") + +stoner::stone_stochastic_standardise( + group = "Li", + in_path = file.path(base_in_path, "HepB-Li"), + out_path = file.path(base_out_path, "HepB_Li"), + scenarios = scenarios, + files = ":scenario:index.csv.xz", + index = 1:200) + +stoner::stone_stochastic_standardise( + group = "IC-Hallett", + in_path = file.path(base_in_path, "HepB-IC-Hallett"), + out_path = file.path(base_out_path, "HepB_IC-Hallett"), + scenarios = scenarios, + files = c("stochastic_burden_est_HepB-IC-Hallett_hepb_hepb3_bd_bluesky_:index.csv.xz", + "stochastic_burden_est_HepB-IC-Hallett_hepb_hepb3_bd_default_:index.csv.xz", + "stochastic_burden_est_HepB-IC-Hallett_hepb_hepb3_bd_ia2030_:index.csv.xz", + "stochastic_burden_est_HepB-IC-Hallett_hepb_hepb3_bluesky_:index.csv.xz", + "stochastic_burden_est_HepB-IC-Hallett_hepb_hepb3_default_:index.csv.xz", + "stochastic_burden_est_HepB-IC-Hallett_hepb_hepb3_ia2030_:index.csv.xz", + "stochastic_burden_est_HepB-IC-Hallett_hepb_no_vaccination_:index.csv.xz"), + index = 1:200) + +stoner::stone_stochastic_standardise( + group = "Burnet-Scott", + in_path = file.path(base_in_path, "HepB-Burnet-Scott"), + out_path = file.path(base_out_path, "HepB_Burnet-Scott"), + scenarios = scenarios, + files = c("stochastic_burden_hepb_hepb3_bd_bluesky_:index.csv.xz", + "stochastic_burden_hepb_hepb3_bd_default_:index.csv.xz", + "stochastic_burden_hepb_hepb3_bd_ia2030_:index.csv.xz", + "stochastic_burden_hepb_hepb3_bluesky_:index.csv.xz", + "stochastic_burden_hepb_hepb3_default_:index.csv.xz", + "stochastic_burden_hepb_hepb3_ia2030_:index.csv.xz", + "stochastic_burden_hepb_no_vaccination_:index.csv.xz"), + index = 1:117) + + +############### +# HPV + + scenarios = c("hpv-no-vaccination", "hpv-campaign-default", + "hpv-campaign-bluesky", "hpv-campaign-routine-bluesky", + "hpv-campaign-routine-default", "hpv-campaign-default_transition_hpv_1d", + "hpv-campaign-routine-default_transition_hpv_1d", + "hpv-campaign-ia2030", "hpv-campaign-routine-ia2030") + +stoner::stone_stochastic_standardise( + group = "Harvard-Sweet", + in_path = file.path(base_in_path, "HPV-Harvard-Kim"), + out_path = file.path(base_out_path, "HPV_Harvard-Kim"), + scenarios = scenarios, + files = c("stochastic-burden-est.novacc_run_:index Allison Portnoy.csv.xz", + "stochastic-burden-est.coverage_202310gavi-4_hpv-campaign-default_run_:index Allison Portnoy.csv.xz", + "stochastic-burden-est.coverage_202310gavi-4_hpv-campaign-bluesky_run_:index Allison Portnoy.csv.xz", + "stochastic-burden-est.coverage_202310gavi-4_hpv-campaign-routine-bluesky_run_:index Allison Portnoy.csv.xz", + "stochastic-burden-est.coverage_202310gavi-4_hpv-campaign-routine-default_run_:index Allison Portnoy.csv.xz", + "stochastic-burden-est.coverage_202310gavi-4_hpv-campaign-default_transition_hpv_1d_run_:index Allison Portnoy.csv.xz", + "stochastic-burden-est.coverage_202310gavi-4_hpv-campaign-routine-default_transition_hpv_1d_run_:index Allison Portnoy.csv.xz", + "stochastic-burden-est.coverage_202310gavi-4_hpv-campaign-ia2030_run_:index Allison Portnoy.csv.xz", + "stochastic-burden-est.coverage_202310gavi-4_hpv-campaign-routine-ia2030_run_:index Allison Portnoy.csv.xz"), + index = 1:200) + +stoner::stone_stochastic_standardise( + group = "LSHTM-Jit", + in_path = file.path(base_in_path, "HPV-LSHTM-Jit"), + out_path = file.path(base_out_path, "HPV_LSHTM-Jit"), + scenarios = scenarios, + files = c("stochastic-burden-novaccination_all_202310gavi-7_hpv-no-vaccination Kaja Abbas.csv.xz", + "stochastic-burden-vaccination_all_202310gavi-7_hpv-campaign-default Kaja Abbas.csv.xz", + "stochastic-burden-vaccination_all_202310gavi-7_hpv-campaign-bluesky Kaja Abbas.csv.xz", + "stochastic-burden-vaccination_all_202310gavi-7_hpv-campaign-routine-bluesky Kaja Abbas.csv.xz", + "stochastic-burden-vaccination_all_202310gavi-7_hpv-campaign-routine-default Kaja Abbas.csv.xz", + "stochastic-burden-vaccination_all_202310gavi-7_hpv-campaign-default_transition_hpv_1d Kaja Abbas.csv.xz", + "stochastic-burden-vaccination_all_202310gavi-7_hpv-campaign-routine-default_transition_hpv_1d Kaja Abbas.csv.xz", + "stochastic-burden-vaccination_all_202310gavi-7_hpv-campaign-ia2030 Kaja Abbas.csv.xz", + "stochastic-burden-vaccination_all_202310gavi-7_hpv-campaign-routine-ia2030 Kaja Abbas.csv.xz")) + + +############### +# Measles + + scenarios = c("measles-no-vaccination", "measles-mcv1-bluesky", "measles-mcv1-default", + "measles-mcv1-ia2030", "measles-mcv1-mcv2-bluesky", "measles-mcv1-mcv2-campaign-bluesky", + "measles-mcv1-mcv2-campaign-default", "measles-mcv1-mcv2-campaign-default_under5sia", + "measles-mcv1-mcv2-campaign-default_update", "measles-mcv1-mcv2-campaign-ia2030", + "measles-mcv1-mcv2-default", "measles-mcv1-mcv2-ia2030") + +stoner::stone_stochastic_standardise( + group = "PSU-Ferrari", + in_path = file.path(base_in_path, "Measles-PSU-Ferrari"), + out_path = file.path(base_out_path, "Measles_PSU-Ferrari"), + scenarios = scenarios, + files = c("no-vaccination.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "mcv1-bluesky.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "mcv1-default.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "mcv1-ia2030.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "mcv1-mcv2-bluesky.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "mcv1-mcv2-campaign-bluesky.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "mcv1-mcv2-campaign-default.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "measles-mcv1-mcv2-campaign-default_under5sia_PSU_Ferrari_Stochastic_Runs_Revised_05102025.csv.xz", + "measles-mcv1-mcv2-campaign-default_update_PSU_Ferrari_Stochastic_Runs_Revised_05102025.csv.xz", + "mcv1-mcv2-campaign-ia2030.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "mcv1-mcv2-default.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz", + "mcv1-mcv2-ia2030.202310gavi-6.Measles_PSU-Ferrari_standard.csv.xz")) + +stoner::stone_stochastic_standardise( + group = "LSHTM-Jit", + in_path = file.path(base_in_path, "Measles-LSHTM-Jit"), + out_path = file.path(base_out_path, "Measles_LSHTM-Jit"), + scenarios = scenarios, + files = c("stochastic_burden_estimate_measles-LSHTM-Jit-no-vaccination Han Fu.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-bluesky Han Fu.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-default Han Fu.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-ia2030 Han Fu.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-mcv2-bluesky Han Fu.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-mcv2-campaign-bluesky Han Fu.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-mcv2-campaign-default.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-mcv2-campaign-default_under5sia.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-mcv2-campaign-default_update.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-mcv2-campaign-ia2030 Han Fu.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-mcv2-default Han Fu.csv.xz", + "stochastic_burden_estimate_measles-LSHTM-Jit-mcv1-mcv2-ia2030 Han Fu.csv.xz")) + + +############### +# MenA + + scenarios = c("mena-no-vaccination", "mena-campaign-default", "mena-campaign-routine-default", + "mena-campaign-bluesky", "mena-campaign-routine-bluesky", + "mena-campaign-ia2030", "mena-campaign-routine-ia2030") + +# NB campaign-default same as campaign-ia2030 +stoner::stone_stochastic_standardise( + group = "Cambridge-Trotter", + in_path = file.path(base_in_path, "MenA-Cambridge-Trotter"), + out_path = file.path(base_out_path, "MenA_Cambridge-Trotter"), + scenarios = scenarios, + files = c( + "stochastic-burden-template202310gavi-7MenA_Cambridge-Trotter_mena-novaccination (:index) Andromachi Karachaliou.csv.xz", + "stochastic-burden-template202310gavi-7MenA_Cambridge-Trotter_mena-campaign-default (:index) Andromachi Karachaliou.csv.xz", + "stochastic-burden-template202310gavi-7MenA_Cambridge-Trotter_mena-routine-default (:index) Andromachi Karachaliou.csv.xz", + "stochastic-burden-template202310gavi-7MenA_Cambridge-Trotter_mena-campaign-bluesky (:index) Andromachi Karachaliou.csv.xz", + "stochastic-burden-template202310gavi-7MenA_Cambridge-Trotter_mena-routine-bluesky (:index) Andromachi Karachaliou.csv.xz", + "stochastic-burden-template202310gavi-7MenA_Cambridge-Trotter_mena-campaign-default (:index) Andromachi Karachaliou.csv.xz", + "stochastic-burden-template202310gavi-7MenA_Cambridge-Trotter_mena-routine-ia2030 (:index) Andromachi Karachaliou.csv.xz"), + index = 1:26) + + +############### +# Rubella + + scenarios = c("rubella-no-vaccination", "rubella-campaign-default", + "rubella-campaign-bluesky", "rubella-campaign-rcv1-rcv2-bluesky", + "rubella-campaign-rcv1-bluesky", "rubella-campaign-rcv1-rcv2-default", + "rubella-campaign-rcv1-default", "rubella-campaign-ia2030", + "rubella-campaign-rcv1-rcv2-ia2030", "rubella-campaign-rcv1-ia2030") + +stoner::stone_stochastic_standardise( + group = "UGA-Winter", + in_path = file.path(base_in_path, "Rubella-UGA"), + out_path = file.path(base_out_path, "Rubella_UGA-Winter"), + scenarios = scenarios, + files = c("stochastic_burden_est-rubella-no-vaccination_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-default_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-bluesky_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-rcv1-rcv2-bluesky_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-rcv1-bluesky_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-rcv1-rcv2-default_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-rcv1-default_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-ia2030_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-rcv1-rcv2-ia2030_:index Amy Winter.csv.xz", + "stochastic_burden_est-rubella-campaign-rcv1-ia2030_:index Amy Winter.csv.xz"), + index = 1:12) + +stoner::stone_stochastic_standardise( + group = "UKHSA-Vynnycky", + in_path = file.path(base_in_path, "Rubella-UKHSA-Vynnycky"), + out_path = file.path(base_out_path, "Rubella_UKHSA-Vynnycky"), + scenarios = scenarios, + files = c("imp.c:indexs401.csv.xz", + "stochastic_burden_est_rubella_Vynnycky_rubella-campaign-default_:index.csv.xz", + "stochastic_burden_est_rubella_Vynnycky_rubella-campaign-bluesky_:index.csv.xz", + "Vynnycky-camp-rcv1-rcv2-bluesky_country:index.csv.xz", + "Vynnycky-camp-rcv1-bluesky_country:index.csv.xz", + "stochastic_burden_est_rubella_Vynnycky_rubella-campaign-rcv1-rcv2-default_:index.csv.xz", + "stochastic_burden_est_rubella_Vynnycky_rubella-campaign-rcv1-default_:index.csv.xz", + "stochastic_burden_est_rubella_Vynnycky_rubella-campaign-ia2030_:index.csv.xz", + "Vynnycky-camp-rcv1-rcv2-ia2030_country:index.csv.xz", + "Vynnycky-camp-rcv1-ia2030_country:index.csv.xz"), + index = 1:117) + + +############### +# Typhoid + + scenarios = c("typhoid-no-vaccination", + "typhoid-campaign-default", "typhoid-campaign-routine-default", + "typhoid-campaign-bluesky", "typhoid-campaign-routine-bluesky") + +stoner::stone_stochastic_standardise( + group = "IVI-Kim", + in_path = file.path(base_in_path, "Typhoid-IVI-Kim"), + out_path = file.path(base_out_path, "Typhoid_IVI-Kim"), + scenarios = scenarios, + file = c("stoch_Typhoid_novacc_20240314T233526 Jong-Hoon Kim.csv.xz", + "stoch_Typhoid_campaign_default_20240314T233526 Jong-Hoon Kim.csv.xz", + "stoch_Typhoid_campaign_routine_default_20240314T233526 Jong-Hoon Kim.csv.xz", + "stoch_Typhoid_campaign_bluesky_20240314T233526 Jong-Hoon Kim.csv.xz", + "stoch_Typhoid_campaign_routine_bluesky_20240314T233526 Jong-Hoon Kim.csv.xz")) + +stoner::stone_stochastic_standardise( + group = "Yale-Pitzer", + in_path = file.path(base_in_path, "Typhoid-Yale-Pitzer"), + out_path = file.path(base_out_path, "Typhoid_Yale-Pitzer"), + scenarios = scenarios, + files = ":scenario_stochastic_output2023_Yale Virginia Pitzer.csv.xz") + + +############### +# YF + +scenarios = c("yf-no-vaccination", "yf-routine-bluesky", + "yf-routine-campaign-bluesky", "yf-routine-campaign-default", + "yf-routine-campaign-ia2030", "yf-routine-default", + "yf-routine-ia2030") + +stoner::stone_stochastic_standardise( + group = "IC-Garske", + in_path = file.path(base_in_path, "YF-IC-Garske"), + out_path = file.path(base_out_path, "YF_IC-Garske"), + scenarios = scenarios, + files = "burden_results_stochastic_202310gavi-3_:scenario Keith Fraser.csv.xz") + +stoner::stone_stochastic_standardise( + group = "UND-Perkins", + in_path = file.path(base_in_path, "YF-UND-Perkins"), + out_path = file.path(base_out_path, "YF_UND-Perkins"), + scenarios = scenarios, + files = c("stochastic_burden_est_YF_UND-Perkins_yf-no_vaccination_:index.csv.xz", + "stochastic_burden_est_YF_UND-Perkins_yf-routine_bluesky_:index.csv.xz", + "stochastic_burden_est_YF_UND-Perkins_yf-routine_campaign_bluesky_:index.csv.xz", + "stochastic_burden_est_YF_UND-Perkins_yf-routine_campaign_default_:index.csv.xz", + "stochastic_burden_est_YF_UND-Perkins_yf-routine_campaign_ia2030_:index.csv.xz", + "stochastic_burden_est_YF_UND-Perkins_yf-routine_default_:index.csv.xz", + "stochastic_burden_est_YF_UND-Perkins_yf-routine_ia2030_:index.csv.xz"), + index = 1:200) + + +############### +# Malaria R2 + +scenarios = c("malaria-no-vaccination", + "malaria-rts3-default", "malaria-rts3-rts4-default", + "malaria-rts3-bluesky", "malaria-rts3-rts4-bluesky"), + +stoner::stone_stochastic_standardise( + group = "UAC-Kakai", + in_path = file.path(base_in_path, "Malaria-UAC-Kakai", "round2"), + out_path = file.path(base_out_path, "Malaria_UAC-Kakai"), + scenarios = scenarios, + files = c("Stochastic_Burden_Estimates_Glele_Kakai_No_Vaccine_:index.csv.xz", + "Stochastic_Burden_Estimates_Glele_Kakai_Default_rts3_:index.csv.xz", + "Stochastic_Burden_Estimates_Glele_Kakai_Default_rts3_4_:index.csv.xz", + "stochastic_Burden_Estimates_Glele_Kakai_Blue_Sky_rts3_:index.csv.xz", + "Stochastic_Burden_Estimates_Glele_Kakai_Blue_Sky_rts34_:index.csv.xz"), + index = 1:200) + +stoner::stone_stochastic_standardise( + group = "IC-Okell", + in_path = file.path(base_in_path, "Malaria-IC_Okell"), + out_path = file.path(base_out_path, "Malaria_IC-Okell"), + scenarios = scenarios, + files = c("stochastic-burden-est.202310gavi-7.Malaria_IC-Okell_no-vaccination_draw_:index Lydia Haile.csv.xz", + "stochastic-burden-est.202310gavi-7.Malaria_IC-Okell_malaria-rts3-default_draw_:index Lydia Haile.csv.xz", + "stochastic-burden-est.202310gavi-7.Malaria_IC-Okell_malaria-rts3-rts4-default_draw_:index Lydia Haile.csv.xz", + "stochastic-burden-est.202310gavi-7.Malaria_IC-Okell_malaria-rts3-bluesky_draw_:index Lydia Haile.csv.xz", + "stochastic-burden-est.202310gavi-7.Malaria_IC-Okell_malaria-rts3-rts4-bluesky_draw_:index Lydia Haile.csv.xz"), + index = 1:200) + +stoner::stone_stochastic_standardise( + group = "TKI-Penny", + in_path = file.path(base_in_path, "Malaria-TKI-Penny"), + out_path = file.path(base_out_path, "Malaria_TKI-Penny"), + scenarios = scenarios, + files = c("stochastic_burden_est_malaria_:index_novaccine Josephine Malinga.csv.xz", + "stochastic_burden_est_malaria_:index_rtss_d3_default Josephine Malinga.csv.xz", + "stochastic_burden_est_malaria_:index_rtss_d4_default Josephine Malinga.csv.xz", + "stochastic_burden_est_malaria_:index_rtss_d3_bluesky Josephine Malinga.csv.xz", + "stochastic_burden_est_malaria_:index_rtss_d4_bluesky Josephine Malinga.csv.xz"), + index = 1:31) diff --git a/scripts/process_stoch_202409malaria.R b/scripts/process_stoch_202409malaria.R new file mode 100644 index 0000000..c150f99 --- /dev/null +++ b/scripts/process_stoch_202409malaria.R @@ -0,0 +1,37 @@ +base_in_path <- "//wpia-hn2.hpc.dide.ic.ac.uk/vimc_stochastics_dropbox/latest/202409malaria-1" +base_out_path <- "//wpia-hn2.hpc.dide.ic.ac.uk/vimc_stochastics/202409malaria" + +scenarios = c("malaria_no_vaccination", "malaria-r3-r4-default", "malaria-rts3-rts4-default") + +stoner::stone_stochastic_standardise( + group = "IC-Okell", + in_path = file.path(base_in_path, "Malaria-IC-Okell"), + out_path = file.path(base_out_path, "Malaria_IC-Okell"), + scenarios = scenarios, + files = c("stochastic-burden-est-no-vaccination_:index Lydia Haile.csv.xz", + "stochastic-burden-est-malaria-r3-r4-default_:index Lydia Haile.csv.xz", + "stochastic-burden-est-malaria-rts3-rts4-default_:index Lydia Haile.csv.xz"), + index = 1:200 +) + +stoner::stone_stochastic_standardise( + group = "UAC-Kakai", + in_path = file.path(base_in_path, "Malaria-UAC-Kakai"), + out_path = file.path(base_out_path, "Malaria_UAC-Kakai"), + scenarios = scenarios, + files = c("Stochastic_Burden_Estimates_Glele_Kakai_No_Vaccine_:index.csv.xz", + "Stochastic_Burden_Estimates_Glele_Kakai_Default_r34_:index.csv.xz", + "Stochastic_Burden_Estimates_Glele_Kakai_Default_rts34_:index.csv.xz"), + index = 1:200 +) + +stoner::stone_stochastic_standardise( + group = "TKI-Penny", + in_path = file.path(base_in_path, "Malaria-TKI-Penny"), + out_path = file.path(base_out_path, "Malaria_TKI-Penny"), + scenarios = scenarios, + files = c("stochastic_burden_est_malaria_:index_novaccine Josephine Malinga.csv.xz", + "stochastic_burden_est_malaria_:index_r21_d4_default Josephine Malinga.csv.xz", + "stochastic_burden_est_malaria_:index_rtss_d4_default Josephine Malinga.csv.xz"), + index = 1:31 +) diff --git a/tests/testthat/test_stochastic_files.R b/tests/testthat/test_stochastic_files.R new file mode 100644 index 0000000..153b3fb --- /dev/null +++ b/tests/testthat/test_stochastic_files.R @@ -0,0 +1,98 @@ +context("stochastic_files") + +fake_data <- function() { + data.frame( + disease = "elf-piles", + country = "LAP", + year = rep(2000:2005, each = 5), + age = rep(0:5, 5), + cases = 1:30, + cohort_size = c(100, 200, 300, 400, 500, 600), + deaths = 31:60, + dalys = 61:90, + yll = 91:120, + run_id = 1) +} + +test_that("Standardise works with :scenario and :index", { + fake <- fake_data() + fake2 <- fake + fake2$run_id <- 2 + fake <- rbind(fake, fake2) + + tmpin <- tempdir() + tmpout <- tempdir() + + # Clean-up folder if necessary... + + for (f in c("north_pole_optimistic_LAP.pq", + "north_pole_fatalistic_LAP.pq")) { + if (file.exists(file.path(tmpout, f))) { + file.remove(file.path(tmpout, f)) + } + } + + tmpfile <- tempfile(tmpdir = tmpin) + write.csv(fake, paste0(tmpfile, "_optimistic_1"), row.names = FALSE) + write.csv(fake, paste0(tmpfile, "_fatalistic_1"), row.names = FALSE) + + fake$country <- "POL" + write.csv(fake, paste0(tmpfile, "_optimistic_2"), row.names = FALSE) + write.csv(fake, paste0(tmpfile, "_fatalistic_2"), row.names = FALSE) + + stone_stochastic_standardise( + group = "north_pole", + in_path = tmpin, + out_path = tmpout, + scenarios = c("optimistic", "fatalistic"), + files = paste0(basename(tmpfile), "_:scenario_:index"), + index = 1:2 + ) + + files <- list.files(path = tmpout) + expect_true("north_pole_optimistic_LAP.pq" %in% files) + expect_true("north_pole_optimistic_POL.pq" %in% files) + expect_true("north_pole_fatalistic_LAP.pq" %in% files) + expect_true("north_pole_fatalistic_POL.pq" %in% files) + + pq <- arrow::read_parquet(file.path(tmpout, "north_pole_optimistic_LAP.pq")) + expect_true(unique(pq$country) == "LAP") + expect_true(all.equal(sort(unique(pq$run_id)), 1:2)) + expect_true(all.equal(sort(unique(pq$age)), 0:5)) + expect_true(all.equal(sort(unique(pq$year)), 2000:2005)) +}) + + +test_that("Standardise works with missing run_id column", { + fake <- fake_data() + fake$run_id <- NULL + + tmpin <- tempdir() + tmpout <- tempdir() + tmpfile <- tempfile(tmpdir = tmpin) + for (i in 1:200) { + write.csv(fake, paste0(tmpfile, sprintf("_opt_%d", i)), row.names = FALSE) + write.csv(fake, paste0(tmpfile, sprintf("_fat_%d", i)), row.names = FALSE) + } + + stone_stochastic_standardise( + group = "north_pole", + in_path = tmpin, + out_path = tmpout, + scenarios = c("opt", "fat"), + files = paste0(basename(tmpfile), "_:scenario_:index"), + index = 1:200 + ) + + files <- list.files(path = tmpout) + expect_true("north_pole_fat_LAP.pq" %in% files) + expect_true("north_pole_opt_LAP.pq" %in% files) + + pq <- arrow::read_parquet(file.path(tmpout, "north_pole_fat_LAP.pq")) + tab <- table(pq$run_id) + expect_all_equal(as.integer(tab), 30) + expect_equal(length(tab), 200) + expect_true(all.equal(names(tab), as.character(1:200))) +}) + + diff --git a/tests/testthat/test_stochastic_graphs.R b/tests/testthat/test_stochastic_graphs.R new file mode 100644 index 0000000..243d3d3 --- /dev/null +++ b/tests/testthat/test_stochastic_graphs.R @@ -0,0 +1,69 @@ +context("stochastic_graphs") + +# Not a great amount of testing we can do here, without analysing +# the plot somehow. + +test_that("stochastic_graph data transforms", { + + base <- tempdir() + touchstone <- "t" + disease <- "d" + group <- "elf" + scenario <- "opt" + country <- "LAP" + folder <- file.path(base, touchstone, paste0(disease, "_", group)) + filename <- paste0(group,"_", scenario, "_", country, ".pq") + dir.create(folder, showWarnings = FALSE, recursive = TRUE) + + data <- data.frame(year = rep(2000:2004, each = 5), + age = rep(10:14, 5), run_id = 1) + data$deaths <- seq_len(nrow(data)) + orig <- data + for (i in 2:5) { + d <- orig + d$run_id <- i + d$deaths <- d$deaths * i + data <- rbind(data, d) + } + f <- file.path(folder, filename) + if (file.exists(f)) file.remove(f) + arrow::write_parquet(data, file.path(folder, filename)) + + # Aggregate all ages, not by cohort. Should have 1 point per year. + + res <- prepare_graph_data(base, touchstone, disease, group, country, + scenario, "deaths", NULL, FALSE) + + expect_equal(nrow(res), 25) # 5 runs, 5 years + expect_equal(res$deaths[res$run_id == 1 & res$year == 2002], + sum(data$deaths[data$year == 2002 & data$run_id == 1])) + + # Select ages + + res <- prepare_graph_data(base, touchstone, disease, group, country, + scenario, "deaths", c(10, 12, 14), FALSE) + + expect_equal(nrow(res), 25) # 5 runs, 5 years + expect_equal(res$deaths[res$run_id == 1 & res$year == 2003], + sum(data$deaths[data$year == 2003 & data$run_id == 1 & + data$age %in% c(10, 12, 14)])) + + # By cohort + + res <- prepare_graph_data(base, touchstone, disease, group, country, + scenario, "deaths", c(10, 12, 14), TRUE) + + expect_equal(min(res$year), min(data$year) - max(data$age)) + expect_equal(max(res$year), max(data$year) - min(data$age)) + + # Test graph - we can't really, but just check it doesn't crash. + + expect_no_error(stone_stochastic_graph( + base, touchstone, disease, group, country, + scenario, "deaths")) + + expect_no_error(stone_stochastic_graph( + base, touchstone, disease, group, country, + scenario, "deaths", log = TRUE)) + +}) diff --git a/tests/testthat/test_stochastic_process.R b/tests/testthat/test_stochastic_process.R index ce2da5c..46a8303 100644 --- a/tests/testthat/test_stochastic_process.R +++ b/tests/testthat/test_stochastic_process.R @@ -159,21 +159,18 @@ test_that("Bad arguments", { expect_error(stone_stochastic_process(test$con, "LAP-elf", "flu", "nevis-1", "pies", test$path, "non_exist:index.xz", - "", 1, 1, ".", outcomes = list(deaths = c("deaths", "deaths")), - bypass_cert_check = TRUE), + "", 1, 1, ".", outcomes = list(deaths = c("deaths", "deaths")), bypass_cert_check = TRUE), "Duplicated outcome in deaths") expect_error(stone_stochastic_process(test$con, "LAP-elf", "flu", "nevis-1", "pies", test$path, "non_exist:index.xz", - "", 1, 1, ".", - outcomes = list(deaths = "deaths", cases = "cases", dalys = "piles_dalys"), + "", 1, 1, ".", outcomes = list(deaths = "deaths", cases = "cases", dalys = "piles_dalys"), bypass_cert_check = TRUE), "Outcomes not found, dalys \\('piles_dalys'\\)") expect_error(stone_stochastic_process(test$con, "LAP-elf", "flu", "nevis-1", "pies", test$path, "non_exist:index.xz", - "", 1, 1, ".", - outcomes = list(deaths = "deaths", cases = "cases", dalys = "dalys"), + "", 1, 1, ".", outcomes = list(deaths = "deaths", cases = "cases", dalys = "dalys"), runid_from_file = TRUE, bypass_cert_check = TRUE), "Must have index_start and index_end as 1..200 to imply run_id") @@ -367,20 +364,19 @@ stochastic_runner <- function(same_countries = TRUE, upload = FALSE, allow_new_database = TRUE, bypass_cert_check = TRUE, - dalys_recipe = NULL, + dalys_df = NULL, cert = "", pre_aggregation_path = NULL, lines = Inf, log_file = NULL, silent = TRUE) { - test <- new_test() res <- random_stoch_data(test, same_countries, simple_outcomes, single_file_per_scenario, include_run_id, - include_disease, !is.null(dalys_recipe)) + include_disease, !is.null(dalys_df)) - if (is.data.frame(dalys_recipe)) { + if (is.data.frame(dalys_df)) { fake_lifetable_db(test$con) } @@ -395,22 +391,19 @@ stochastic_runner <- function(same_countries = TRUE, index_start <- 1 index_end <- 200 } - + deaths <- "deaths" + cases <- "cases" + dalys <- "dalys" if (!simple_outcomes) { - outcomes <- list( - deaths = c("deaths_acute", "deaths_chronic"), - cases = c("cases_acute", "cases_chronic"), - dalys = c("dalys_men", "dalys_pneumo")) - } else { - outcomes <- list( - deaths = "deaths", - cases = "cases", - dalys = "dalys") - + deaths <- c("deaths_acute", "deaths_chronic") + cases <- c("cases_acute", "cases_chronic") + dalys <- c("dalys_men", "dalys_pneumo") } - if (!is.null(dalys_recipe)) { - outcomes$dalys <- NULL + if (is.data.frame(dalys_df)) { + outcomes <- list(deaths = deaths, cases = cases) + } else { + outcomes <- list(deaths = deaths, cases = cases, dalys = dalys) } stone_stochastic_process(test$con, "LAP-elf", "flu", "nevis-1", @@ -418,8 +411,8 @@ stochastic_runner <- function(same_countries = TRUE, cert = cert, index_start, index_end, test$path, pre_aggregation_path, - outcomes, - dalys_recipe, + outcomes = outcomes, + dalys_recipe = dalys_df, runid_from_file = !include_run_id, allow_missing_disease = !include_disease, upload_to_annex = upload, annex = test$con, @@ -433,10 +426,10 @@ stochastic_runner <- function(same_countries = TRUE, test = test, raw = res$raw, data = res$data, - cal = qs::qread(file.path(test$path, "LAP-elf_flu_calendar.qs")), - cal_u5 = qs::qread(file.path(test$path, "LAP-elf_flu_calendar_u5.qs")), - coh = qs::qread(file.path(test$path, "LAP-elf_flu_cohort.qs")), - coh_u5 = qs::qread(file.path(test$path, "LAP-elf_flu_cohort_u5.qs")) + cal = arrow::read_parquet(file.path(test$path, "LAP-elf_flu_calendar.pq")), + cal_u5 = arrow::read_parquet(file.path(test$path, "LAP-elf_flu_calendar_u5.pq")), + coh = arrow::read_parquet(file.path(test$path, "LAP-elf_flu_cohort.pq")), + coh_u5 = arrow::read_parquet(file.path(test$path, "LAP-elf_flu_cohort_u5.pq")) ) } @@ -551,10 +544,10 @@ test_that("Stochastic - with upload", { result$cal$deaths_pies <- round(result$cal$deaths_pies / 2) - new_qs_file <- tempfile(fileext = ".qs") - qs::qsave(x = result$cal, file = new_qs_file) + new_pq_file <- tempfile(fileext = ".pq") + arrow::write_parquet(result$cal, new_pq_file) - stone_stochastic_upload(new_qs_file, result$test$con, result$test$con, + stone_stochastic_upload(new_pq_file, result$test$con, result$test$con, "LAP-elf", "flu", "nevis-1", is_cohort = FALSE, is_under5 = FALSE, allow_new_database = FALSE, testing = TRUE) @@ -582,7 +575,7 @@ test_that("stochastic_upload can upload csv file", { "Overwriting table with id 1") data <- DBI::dbGetQuery(result$test$con, "SELECT * FROM stochastic_1") - expect_equal(data, result$cal_u5) + expect_equal(data, as.data.frame(result$cal_u5)) cohort_csv <- tempfile(fileext = ".csv") write_csv(x = result$coh, file = cohort_csv) @@ -594,7 +587,7 @@ test_that("stochastic_upload can upload csv file", { "Overwriting table with id 4") data <- DBI::dbGetQuery(result$test$con, "SELECT * FROM stochastic_4") - expect_equal(data, result$coh) + expect_equal(data, as.data.frame(result$coh)) }) ############################################################################## @@ -656,13 +649,13 @@ fake_lifetable_db <- function(con) { } test_that("Stochastic - with DALYs", { - dalys_recipe <- data_frame( + dalys_df <- data_frame( outcome = c("cases_acute", "deaths_chronic"), proportion = c(0.1, 0.2), average_duration = c(20, 1000), disability_weight = c(0.4, 0.6)) - result <- stochastic_runner(upload = FALSE, dalys_recipe = dalys_recipe, + result <- stochastic_runner(upload = FALSE, dalys_df = dalys_df, simple_outcomes = FALSE) lt <- stoner_life_table(result$test$con, "nevis-1", 2000, 2100, TRUE) @@ -828,15 +821,15 @@ test_that("Stochastic - with DALYs", { # Hurrah. We can *finally* test DALYs. - out <- tempfile(fileext = ".qs") - dat <- stoner_dalys_for_db(con, dalys_recipe, + out <- tempfile(fileext = ".pq") + dat <- stoner_dalys_for_db(con, dalys_df, burden_estimate_set_id = new_bes, output_file = out) - dat2 <- stoner_dalys_for_db(con, dalys_recipe, + dat2 <- stoner_dalys_for_db(con, dalys_df, "LAP-elf", "flu", "nevis-1", "pies", output_file = out) - df <- qs::qread(out) + df <- arrow::read_parquet(out) expect_identical(dat, dat2) expect_equal(dat$data$dalys, df$dalys) @@ -858,10 +851,10 @@ test_that("preaggregated data can be saved to disk", { files <- list.files(t) expect_length(files, 2) ## 2 countries, 1 modelling group, 1 disease = 2 files - expect_setequal(files, c("LAP-elf_flu_4_pre_aggregation.qs", - "LAP-elf_flu_716_pre_aggregation.qs")) + expect_setequal(files, c("LAP-elf_flu_4_pre_aggregation.pq", + "LAP-elf_flu_716_pre_aggregation.pq")) - country_4 <- qs::qread(file.path(t, "LAP-elf_flu_4_pre_aggregation.qs")) + country_4 <- arrow::read_parquet(file.path(t, "LAP-elf_flu_4_pre_aggregation.pq")) expect_setequal(colnames(country_4), c("country", "year", "run_id", "age", "deaths_pies", "cases_pies", "dalys_pies", "deaths_hot_chocolate", @@ -869,7 +862,7 @@ test_that("preaggregated data can be saved to disk", { "deaths_holly", "cases_holly", "dalys_holly")) expect_true(all(country_4$country == 4)) - country_716 <- qs::qread(file.path(t, "LAP-elf_flu_716_pre_aggregation.qs")) + country_716 <- arrow::read_parquet(file.path(t, "LAP-elf_flu_716_pre_aggregation.pq")) expect_setequal(colnames(country_716), c("country", "year", "run_id", "age", "deaths_pies", "cases_pies", "dalys_pies", "deaths_hot_chocolate", diff --git a/tests/testthat/test_stochastic_upload.R b/tests/testthat/test_stochastic_upload.R index 352c259..3c52416 100644 --- a/tests/testthat/test_stochastic_upload.R +++ b/tests/testthat/test_stochastic_upload.R @@ -10,11 +10,11 @@ test_that("Bad arguments", { do_test(test) expect_error(stoner::stone_stochastic_upload( - file.path(tempfile(), "non_existent_file.qs")), + file.path(tempfile(), "non_existent_file.pq")), "file(.*)exists(.*)") - new_file <- tempfile(fileext = ".qs") - qs::qsave(x = mtcars, file = new_file) + new_file <- tempfile(fileext = ".pq") + arrow::write_parquet(mtcars, new_file) expect_error(stone_stochastic_upload(new_file, test$con, test$con, "Rudolph"), "Unknown modelling group: Rudolph") @@ -27,7 +27,7 @@ test_that("Bad arguments", { expect_error(stone_stochastic_upload(new_file, test$con, test$con, "LAP-elf", "flu", "nevis-1", FALSE, FALSE, TRUE), - "Columns in qs file not as expected") + "Columns in pq file not as expected") }) test_that("stochastic_upload with csv file returns useful error", { @@ -45,7 +45,7 @@ test_that("stochastic_upload with csv file returns useful error", { "Columns in csv file not as expected") }) -test_that("stochastic_upload errors if file not csv or qs", { +test_that("stochastic_upload errors if file not csv or pq", { test <- new_test() standard_disease_touchstones(test) standard_responsibility_support(test) @@ -57,5 +57,5 @@ test_that("stochastic_upload errors if file not csv or qs", { expect_error( stone_stochastic_upload(new_file, test$con, test$con, "LAP-elf", "flu", "nevis-1", FALSE, FALSE, TRUE), - "Can only read csv or qs format stochastic data, got rds") + "Can only read csv or pq format stochastic data, got rds") }) diff --git a/tests/testthat/test_touchstone.R b/tests/testthat/test_touchstone.R index b457e08..f91e840 100644 --- a/tests/testthat/test_touchstone.R +++ b/tests/testthat/test_touchstone.R @@ -149,7 +149,7 @@ test_that("touchstone CSV invalid", { create_touchstone_csv(test$path, "nevis", 1) mess_with(test$path, "touchstone.csv", "haggis", 1, "yummy") expect_error(do_test(test), - "Correct columns in touchstone.csv not equal to (.*)", + "Expected Correct columns in touchstone.csv to equal (.*)", class = "expectation_failure") }) @@ -158,7 +158,7 @@ test_that("touchstone_name CSV invalid", { create_touchstone_name_csv(test$path, "nevis", 1) mess_with(test$path, "touchstone_name.csv", "pie_balm", 1, "clogg_banting") expect_error(do_test(test), - "Correct columns in touchstone_name.csv not equal to (.*)", + "Expected Correct columns in touchstone_name.csv to equal (.*)", class = "expectation_failure") })