diff --git a/.Rbuildignore b/.Rbuildignore index 72868d2..a4b7a15 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -5,3 +5,7 @@ ^data-raw$ ^doc$ ^Meta$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ +^\.github$ diff --git a/.gitignore b/.gitignore index 41566ef..7bdecff 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ inst/doc /doc/ /Meta/ +docs diff --git a/DESCRIPTION b/DESCRIPTION index 03fdc6a..b427031 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,16 +23,19 @@ Imports: lubridate, magrittr, methods, - raster, sf, stringr, terra Suggests: + climateR (>= 0.3.5), knitr, + raster, rmarkdown, testthat (>= 3.0.0), tidyr, - tigris, - usethis + usethis, + tigris Config/testthat/edition: 3 VignetteBuilder: knitr +Remotes: + mikejohnson51/climateR diff --git a/R/data_cropland_world_2015_era5.R b/R/data_cropland_world_2015_era5.R index db7b86d..5ec6e5d 100644 --- a/R/data_cropland_world_2015_era5.R +++ b/R/data_cropland_world_2015_era5.R @@ -1,6 +1,11 @@ #' Global cropland weights #' #' A data.table with ERA5 resolution returned by running secondary_weights() with cropland data from -#' 2015 +#' 2015 from https://glad.umd.edu/dataset/croplands +#' +#' Dataset Reference: P. Potapov, S. Turubanova, M.C. Hansen, A. Tyukavina, V. +# Zalles, A. Khan, X.-P. Song, A. Pickens, Q. Shen, J. Cortez. (2021) Global +# maps of cropland extent and change show accelerated cropland expansion in the +# twenty-first century. Nature Food. https://doi.org/10.1038/s43016-021-00429-z #' "cropland_world_2015_era5" diff --git a/R/data_nj_counties.R b/R/data_nj_counties.R deleted file mode 100644 index bae7000..0000000 --- a/R/data_nj_counties.R +++ /dev/null @@ -1,6 +0,0 @@ -#' Polygons representing counties in the state of New Jersey. -#' -#' A multipolygon object depicting county boundaries in New Jersey, using census -#' data available through the tigris package. -#' -"nj_counties" diff --git a/R/global_variables.R b/R/global_variables.R index 779bc19..3a15752 100644 --- a/R/global_variables.R +++ b/R/global_variables.R @@ -4,4 +4,5 @@ globalVariables(c("cell_area_km2", "coverage_fraction", "day", "era5_grid", "hour", "month", "poly_id", "sum_weight", "value", "w_area", - "w_sum", "weight", "x", "y", "year", ".")) + "w_sum", "weight", "x", "y", "year", ".", "is_right_xmin", + "is_left_xmax", "x_low", "x_high", "y_low", "y_high", "..cols_to_keep")) diff --git a/R/overlay_weights.R b/R/overlay_weights.R index cbfe207..da0ae46 100644 --- a/R/overlay_weights.R +++ b/R/overlay_weights.R @@ -16,7 +16,7 @@ #' #' @examples #' overlay_output_with_secondary_weights <- overlay_weights( -#' polygons = nj_counties, # Polygons outlining the 21 counties of New Jersey +#' polygons = tigris::counties("nj"), # Polygons outlining the 21 counties of New Jersey #' polygon_id_col = "COUNTYFP", # The name of the column with the unique #' # county identifiers #' grid = era5_grid, # The grid to use when extracting area weights (era5_grid is the @@ -32,7 +32,7 @@ #' #' #' overlay_output_without_secondary_weights <- overlay_weights( -#' polygons = nj_counties, # Polygons outlining the 21 counties of New Jersey +#' polygons = tigris::counties("nj"), # Polygons outlining the 21 counties of New Jersey #' polygon_id_col = "COUNTYFP" # The name of the column with the unique county #' # identifiers #' ) diff --git a/R/staggregate.R b/R/staggregate.R index a5d0d69..f4729c3 100644 --- a/R/staggregate.R +++ b/R/staggregate.R @@ -1,25 +1,37 @@ # Function to convert raster to data.table from https://gist.github.com/etiennebr/9515738 -as.data.table.raster <- function(x, row.names = NULL, optional = FALSE, xy=FALSE, inmem = raster::canProcessInMemory(x, 2), ...) { +as_data_table_terra <- function(x, row.names = NULL, optional = FALSE, xy=FALSE, inmem = terra::inMemory(x), ...) { if(inmem) { - v <- data.table::as.data.table(raster::as.data.frame(x, row.names=row.names, optional=optional, xy=xy, ...)) + v <- data.table::as.data.table(terra::as.data.frame(x, row.names=row.names, optional=optional, xy=xy, ...)) + coln <- names(x) + if(xy) coln <- c("x", "y", coln) + data.table::setnames(v, coln) } else { - tr <- raster::blockSize(x, n=2) - l <- lapply(1:tr$n, function(i) - data.table::as.data.table(raster::as.data.frame(raster::getValues(x, - row=tr$row[i], - nrows=tr$nrows[i]), - row.names=row.names, optional=optional, xy=xy, ...))) + tr <- terra::blocks(x) + l <- lapply(1:tr$n, function(i) { + DT <- data.table::as.data.table(as.data.frame(terra::values(x, row = tr$row[i], nrows = tr$nrows[i]), ...)) + if(xy == TRUE) { + cells <- terra::cellFromRowCol(x, c(tr$row[i], tr$row[i] + tr$nrows[i] - 1), c(1, ncol(x))) + coords <- terra::xyFromCell(x, cell = cells[1]:cells[2]) + DT[, c("x", "y") := data.frame(terra::xyFromCell(x, cell = cells[1]:cells[2]))] + } + DT + }) v <- data.table::rbindlist(l) + coln <- names(x) + if(xy) { + coln <- c("x", "y", coln) + data.table::setcolorder(v, coln) + } } - coln <- names(x) - if(xy) coln <- c("x", "y", coln) - data.table::setnames(v, coln) v } # Function to convert raster to data.table and aggregate to daily values before transformation daily_aggregation <- function(data, overlay_weights, daily_agg, time_interval='1 hour'){ + # Input validation and read in data + # ---------------------------------------------------------------------------- + if(!daily_agg %in% c('average', 'sum', 'none')){ stop(crayon::red("daily_agg must be 'average', 'sum', or 'none'")) @@ -28,80 +40,131 @@ daily_aggregation <- function(data, overlay_weights, daily_agg, time_interval='1 # Data.table of weights weights_dt <- data.table::as.data.table(overlay_weights) - # Check if raster overlay_weights span prime meridian - is_pm <- FALSE - for(i in length(weights_dt$x)){ - if(weights_dt[i,x] < .5 | weights_dt[i,x] > 359.5){ - is_pm = TRUE # Check each x value - break # If near 0 value found exit loop - } + # read in climate data and coerce if not a spat raster + if(methods::is(data,"SpatRaster")){ + + clim_stack <- data + + } else{ + + clim_stack <- terra::rast(data) + } - ## read in climate data - clim_raster <- raster::stack(data) - ## shift into 0 to 360 if not already in that format - if(raster::extent(clim_raster)@xmin < 0 - raster::xres(clim_raster) / 2) { + # shift into 0 to 360 if not already in that format (inverse of rotate()) + # ---------------------------------------------------------------------------- + + # prime meridian = pm + + # For this section, consider any cell touching the pm (non-inclusive meaning + # not if the boarder is right on it) to be part of the right side - clim_raster_xmin <- raster::extent(clim_raster)@xmin - raster::xres(clim_raster) / 2 - clim_raster_xmax <- raster::extent(clim_raster)@xmax + raster::xres(clim_raster) / 2 - clim_raster_ymin <- raster::extent(clim_raster)@ymin - raster::yres(clim_raster) / 2 - clim_raster_ymax <- raster::extent(clim_raster)@ymax + raster::yres(clim_raster) / 2 + # Check to see if any of the grid cells are fully on the left of the pm, which + # indicates -180_180 format + if(terra::ext(clim_stack)$xmin <= 0 - terra::xres(clim_stack)) { - clim_raster1 <- raster::crop(clim_raster, c(clim_raster_xmin, min(clim_raster_xmax, 0), clim_raster_ymin, clim_raster_ymax)) + # If it's on both sides of 0, use rotate + if(terra::ext(clim_stack)$xmax > 0){ - clim_raster1 <- raster::shift(clim_raster1, dx = 360) + # create global extent for padding so rotate function can be used + global_extent <- terra::ext(-180, 180, -90, 90) - if(raster::extent(clim_raster)@xmax > 0) { + # pad + clim_stack <- terra::extend(clim_stack, global_extent) - clim_raster2 <- raster::crop(clim_raster, c(0, clim_raster_xmax, clim_raster_ymin, clim_raster_ymax)) - clim_raster <- raster::merge(clim_raster1, clim_raster2) + # rotate + clim_stack <- terra::rotate(clim_stack, left = FALSE) - } else { + } else{ # If it's only negative, use shift (much faster) - clim_raster <- clim_raster1 + clim_stack <- terra::shift(clim_stack, dx = 360) } - } - # If raster overlay_weights does not span prime meridian, crop as usual - if(is_pm == FALSE){ - # Extent of area weights with 2 cell buffer to make sure all cells are included - min_x <- min(weights_dt$x) - 2*raster::xres(clim_raster) - max_x <- max(weights_dt$x) + 2*raster::xres(clim_raster) - min_y <- min(weights_dt$y) - 2*raster::yres(clim_raster) - max_y <- max(weights_dt$y) + 2*raster::yres(clim_raster) - weights_ext <- raster::extent(min_x, max_x, min_y, max_y) - clim_raster <- raster::crop(clim_raster, weights_ext) - all_layers <- names(clim_raster) + } - else { # If yes, make 2 crops and stitch together - min_x_left <- min(weights_dt$x[weights_dt$x >= 180]) - 2*raster::xres(data) - max_x_left <- 360 + # Check if overlay_weights (the polygons) span prime meridian, but don't go + # all the way from 0 to 360 (continent of Europe, for instance). If so, we + # want to crop the data such that we don't have to read in a full band. + + # Has a cell within one of pm negative + is_near_360 <- max(weights_dt[,x]) > 360 - terra::xres(clim_stack) + + # Has a cell that touches 0 + is_near_0 <- min(weights_dt[,x]) < terra::xres(clim_stack) + + + # If it's near both edges, assume we are crossing the pm + if(is_near_360 & is_near_0){ + + # Find the widest xgap that exists to be the cropping location For instance, + # if x values are 0, 60, 120, 300, 360, then crop such that left is [0 - 120] and + # right is [300 - 360] (saves reading in 180 degrees of data) + x_vector <- unique(weights_dt[,x]) + + # Get cropping locations by finding largest gap and making left side's xmax + # the x value to the left and right side's xmin the x value to the right + crop_locs <- data.frame(x_vector) %>% + dplyr::mutate( + diff = x_vector - dplyr::lag(x_vector), + is_right_xmin = diff == max(diff, na.rm = TRUE), + is_left_xmax = dplyr::lead(diff) == max(diff, na.rm = TRUE) + ) - min_x_right <- -0.1 # This is necessary because cropping is non-inclusive (min_x_right = 0 excludes the cell at 0) - max_x_right <- max(weights_dt$x[weights_dt$x < 180]) + 2*raster::xres(data) + right_xmin <- crop_locs %>% + dplyr::filter(is_right_xmin) %>% + dplyr::slice(1) %>% + dplyr::pull(x_vector) - min_y <- min(weights_dt$y) - 2*raster::yres(data) - max_y <- max(weights_dt$y) + 2*raster::yres(data) + left_xmax <- crop_locs %>% + dplyr::filter(is_left_xmax) %>% + dplyr::slice(1) %>% + dplyr::pull(x_vector) - weights_ext_left <- raster::extent(min_x_left, max_x_left, min_y, max_y) - weights_ext_right <- raster::extent(min_x_right, max_x_right, min_y, max_y) + # Split into 2 and then merge back together + left_xmin <- 0 - 2*terra::xres(clim_stack) + left_xmax <- left_xmax + 2*terra::xres(clim_stack) - clim_raster_left <- raster::crop(clim_raster, weights_ext_left) - clim_raster_right <- raster::crop(clim_raster, weights_ext_right) + right_xmin <- right_xmin - 2*terra::xres(clim_stack) + right_xmax <- 360 + 2*terra::xres(clim_stack) + ymin <- min(weights_dt$y) - 2*terra::yres(clim_stack) + ymax <- max(weights_dt$y) + 2*terra::yres(clim_stack) - clim_raster <- raster::stack(raster::merge(clim_raster_left, clim_raster_right)) # Merge creates raster bricks without proper layer names + weights_ext_left <- terra::ext(left_xmin, left_xmax, ymin, ymax) + weights_ext_right <- terra::ext(right_xmin, right_xmax, ymin, ymax) + + clim_stack_left <- terra::crop(clim_stack, weights_ext_left, snap = 'out') + clim_stack_right <- terra::crop(clim_stack, weights_ext_right, snap = 'out') + + + clim_stack <- terra::merge(clim_stack_left, clim_stack_right) + + # Get layer names (dates) from clim_stack_left + all_layers <- names(clim_stack_left) + + + } else{ # If raster overlay_weights does not span prime meridian, crop as usual + + # Extent of area weights with 2 cell buffer to make sure all cells are included + xmin <- min(weights_dt$x) - 2*terra::xres(clim_stack) + xmax <- max(weights_dt$x) + 2*terra::xres(clim_stack) + ymin <- min(weights_dt$y) - 2*terra::yres(clim_stack) + ymax <- max(weights_dt$y) + 2*terra::yres(clim_stack) + + weights_ext <- terra::ext(xmin, xmax, ymin, ymax) + + clim_stack <- terra::crop(clim_stack, weights_ext) + + all_layers <- names(clim_stack) - # Get layer names (dates) from clim_raster_left - all_layers <- names(clim_raster_left) } @@ -111,8 +174,8 @@ daily_aggregation <- function(data, overlay_weights, daily_agg, time_interval='1 # Pass all layers through if not aggregating to daily level if(daily_agg == "none"){ message(crayon::yellow("Skipping pre-transformation aggregation to daily level")) - all_names <- names(clim_raster) - clim_hourly <- clim_raster + all_names <- names(clim_stack) + clim_hourly <- clim_stack return(list(clim_hourly, all_names)) } @@ -137,7 +200,7 @@ daily_aggregation <- function(data, overlay_weights, daily_agg, time_interval='1 } # Check that you have a dataset with a number of layers that is divisible by the number of timesteps in a day - if(!(raster::nlayers(clim_raster)%%timesteps_per_day == 0)){ + if(!(terra::nlyr(clim_stack)%%timesteps_per_day == 0)){ stop(crayon::red(sprintf("The data does not contain a number of layers that is a multiple of %d (the number of timesteps in a day calculated using the `time_interval` argument, currently set to %s). Please use complete data with all timesteps available for each day.", timesteps_per_day, time_interval))) } @@ -152,8 +215,8 @@ daily_aggregation <- function(data, overlay_weights, daily_agg, time_interval='1 message(crayon::green(sprintf("Averaging over %d layers per day to get daily values", timesteps_per_day))) # Average over each set of layers representing one day - indices<-rep(1:(raster::nlayers(clim_raster)/timesteps_per_day),each=timesteps_per_day) - clim_daily <- raster::stackApply(clim_raster, indices = indices, fun=mean) + indices<-rep(1:(terra::nlyr(clim_stack)/timesteps_per_day),each=timesteps_per_day) + clim_daily <- terra::tapp(clim_stack, indices, fun=mean) } ## Sum @@ -162,23 +225,33 @@ daily_aggregation <- function(data, overlay_weights, daily_agg, time_interval='1 message(crayon::green(sprintf("Summing over %d layers per day to get daily values", timesteps_per_day))) # Sum over each set of layers representing one day - indices<-rep(1:(raster::nlayers(clim_raster)/timesteps_per_day),each=timesteps_per_day) - clim_daily <- raster::stackApply(clim_raster, indices = indices, fun=sum) + indices<-rep(1:(terra::nlyr(clim_stack)/timesteps_per_day),each=timesteps_per_day) + clim_daily <- terra::tapp(clim_stack, indices, fun=sum) } - # Return a list containing, in order, daily aggregated climate data as a raster brick, and the layer_names created. + # Return a list containing, in order, daily aggregated climate data as a spatRaster stack, and the layer_names created. return(list(clim_daily, layer_names)) } -# Function to infer date-times for raster layers based on a time interval +# Function to infer date-times for spatRaster layers based on a time interval infer_layer_datetimes <- function(data, start_date, time_interval) { - # Turn the data into a raster stack in case it's a SpatRaster - raster_stack <- raster::stack(data) - # Number of layers in the raster stack - num_layers <- raster::nlayers(raster_stack) + # Running rast() on a stack that's already a spat raster removes the values for some reason + # read in climate data and coerce if not a spat raster + if(methods::is(data, "SpatRaster")){ + + clim_stack <- data + + } else{ + + clim_stack <- terra::rast(data) + + } + + # Number of layers in the spatRaster stack + num_layers <- terra::nlyr(clim_stack) # Convert start date to POSIXct start_date <- lubridate::as_datetime(start_date) @@ -189,16 +262,16 @@ infer_layer_datetimes <- function(data, start_date, time_interval) { # Make sure the full date shows up in the string every time formatted_dates <- format(layer_dates, "X%Y.%m.%d.%H.%M.%S") - # Assign the inferred date-times to the raster layers - names(raster_stack) <- as.character(formatted_dates) + # Assign the inferred date-times to the spatRaster layers + names(clim_stack) <- as.character(formatted_dates) - return(raster_stack) + return(clim_stack) } # Function to merge with geoweights and aggregate by polygon -polygon_aggregation <- function(clim_dt, weights_dt, list_names, time_agg){ +polygon_aggregation <- function(clim_dt, weights_dt, list_names, time_agg, weights_join_tolerance_x, weights_join_tolerance_y){ - ## Merge weights with climate raster + ## Merge weights with climate spatRaster ## ----------------------------------------------- # Set key column in the climate data.table @@ -210,8 +283,57 @@ polygon_aggregation <- function(clim_dt, weights_dt, list_names, time_agg){ clim_dt[, date := stringr::str_replace(date, "^[^0-9]+", "")] # Remove any non-digit characters from the start of the string clim_dt[, date := lubridate::as_datetime(date)] - # Keyed merge on the x/y column - merged_dt <- clim_dt[weights_dt, allow.cartesian = TRUE] # cols: x, y, date, value cols 1:k, poly_id, w_area, weight (if weights = T) + # Join overlay_weights and climate data table + if(weights_join_tolerance_x == 0 & weights_join_tolerance_y == 0){ + + # Keyed merge on the x/y column + merged_dt <- clim_dt[weights_dt, allow.cartesian = TRUE] # cols: x, y, date, value cols 1:k, poly_id, w_area, weight (if weights = T) + + } else{ + + # Create a copy of the weights data table so we don't change the user's data + # without their knowledge. (data.table creates "shallow copies" unless + # explicitly told to do otherwise). + copied_weights_dt <- data.table::copy(weights_dt) + + # Since data.table doesn't allow arithmatic in nonequi-joins, create tolerance columns now + copied_weights_dt[, x_low := x - weights_join_tolerance_x] + copied_weights_dt[, x_high := x + weights_join_tolerance_x] + copied_weights_dt[, y_low := y - weights_join_tolerance_y] + copied_weights_dt[, y_high := y + weights_join_tolerance_y] + + # Remove x and y columns to avoid confusion in join + copied_weights_dt[, x := NULL] + copied_weights_dt[, y := NULL] + + + # Determine which columns to keep in join, + # format as arguments that can directly be supplied to j + # through eval and parse + cols_to_keep <- c( + + # Add x and y in manually (referring to clim_dt$x by "x.x" prevents the + # column from being overwritten in the nonequi-join below) + 'x.x', + 'x.y', + + # Keep all column names except the tolerance columns created above + setdiff(colnames(clim_dt), c('x', 'y')), + setdiff(colnames(copied_weights_dt), c('x_low', 'x_high', 'y_low', 'y_high'))) + + + # Merge based on tolerance columns + merged_dt <- clim_dt[copied_weights_dt, # Right join + allow.cartesian = TRUE, + + # Specify which columns to keep + j = ..cols_to_keep, + on = .(x >= x_low, + x <= x_high, + y >= y_low, + y <= y_high)] + data.table::setnames(merged_dt, c('x.x', 'x.y'), c('x', 'y')) + } ## Multiply weights x climate value (all 1:k values); aggregate by month and polygon ## ----------------------------------------------- @@ -290,7 +412,7 @@ polygon_aggregation <- function(clim_dt, weights_dt, list_names, time_agg){ #' level, raises these daily values to the 1 through nth power, and aggregates #' the transformed values to the polygon level and desired temporal scale. #' -#' @param data The raster brick with the data to be transformed and aggregated +#' @param data The spatRasters with the data to be transformed and aggregated #' @param overlay_weights A table of weights which can be generated using the #' function `overlay_weights()` #' @param daily_agg How to aggregate hourly values to daily values prior to @@ -300,20 +422,29 @@ polygon_aggregation <- function(clim_dt, weights_dt, list_names, time_agg){ #' `'hour'`, `'day'`, `'month'`, or `'year'` (`'hour'` cannot be selected #' unless `daily_agg = 'none'`) #' @param start_date the date (and time, if applicable) of the first layer in -#' the raster. To be input in a format compatible with +#' the stack. To be input in a format compatible with #' lubridate::as_datetime(), e.g. `"1991-10-29"` or `"1991-10-29 00:00:00"`. -#' The default is `NA` since the rasters usually already contain temporal +#' The default is `NA` since the spatRasters usually already contain temporal #' information in the layer names and they do not need to be manually supplied. -#' @param time_interval the time interval between layers in the raster to be +#' @param time_interval the time interval between layers in the spatRaster to be #' aggregated. To be input in a format compatible with seq(), e.g. #' `'1 day'` or `'3 months'`. The default is `'1 hour'` and this argument is #' required if daily_agg is not `'none'` or if the `start_date` argument is not #' `NA`. +#' @param weights_join_tolerance the tolerance to use when joining +#' overlay_weights with the climate data by the x and y columns. This is useful +#' when the height/width of your data cells expressed in degrees is a very long +#' decimal. The default, `0`, performs a keyed equi-join. Anything other than 0 +#' performs a nonequi-join wherein latitudes/longitudes within the specified +#' tolerance (inclusive) are considered a match. Passing a single number results +#' in the tolerance being the same for x and y, but you can also pass a vector +#' of two numbers to have the first specify the x tolerance and second specify +#' the y tolerance. #' @param degree the highest exponent to raise the data to #' #' @examples #' polynomial_output <- staggregate_polynomial( -#' data = temp_nj_jun_2024_era5 - 273.15, # Climate data to transform and +#' data = terra::rast(temp_nj_jun_2024_era5) - 273.15, # Climate data to transform and #' # aggregate #' overlay_weights = overlay_weights_nj, # Output from overlay_weights() #' daily_agg = "average", # Average hourly values to produce daily values @@ -331,9 +462,9 @@ polygon_aggregation <- function(clim_dt, weights_dt, list_names, time_agg){ #' head(polynomial_output) #' #' @export -staggregate_polynomial <- function(data, overlay_weights, daily_agg, time_agg = "month", start_date = NA, time_interval = "1 hour", degree){ +staggregate_polynomial <- function(data, overlay_weights, daily_agg, time_agg = "month", start_date = NA, time_interval = "1 hour", weights_join_tolerance = 0, degree){ - # If the start date is supplied, overwrite the raster's layer names to reflect the specified temporal metadata + # If the start date is supplied, overwrite the spatRaster's layer names to reflect the specified temporal metadata if(!is.na(start_date)){ message(crayon::green(sprintf("Rewriting the data's temporal metadata (layer names) to reflect a dataset starting on the supplied start date and with a temporal interval of %s", time_interval))) data <- infer_layer_datetimes(data, start_date, time_interval) @@ -345,10 +476,22 @@ staggregate_polynomial <- function(data, overlay_weights, daily_agg, time_agg = daily_agg = "none" } + # If tolerance supplied is one number, apply to both x and y. + # If two numbers, apply first to x and second to y + if(length(weights_join_tolerance) == 1){ + weights_join_tolerance_x <- weights_join_tolerance + weights_join_tolerance_y <- weights_join_tolerance + } else if(length(weights_join_tolerance == 2)){ + weights_join_tolerance_x <- weights_join_tolerance[1] + weights_join_tolerance_y <- weights_join_tolerance[2] + } else{ + stop(crayon::red("Please provide one digit or a vector of only two digits for weights_join_tolerance.")) + } + # Aggregate climate data to daily values setup_list <- daily_aggregation(data, overlay_weights, daily_agg, time_interval) - clim_daily <- setup_list[[1]] # Pulls the daily aggregated raster brick + clim_daily <- setup_list[[1]] # Pulls the daily aggregated spatRaster stack layer_names <- setup_list[[2]] # Pulls the saved layer names # Polynomial transformation @@ -365,8 +508,8 @@ staggregate_polynomial <- function(data, overlay_weights, daily_agg, time_agg = ## Function: Set names of data.table by month, change from wide to long format, rename based on polynomial orders create_dt <- function(x){ - # Should output raster cells x/y with 365 days as column names - dt <- as.data.table.raster(r[[x]], xy=TRUE) + # Should output spatRaster cells x/y with 365 days as column names + dt <- as_data_table_terra(r[[x]], xy=TRUE) # Set column names with months new_names <- c('x', 'y', layer_names) @@ -380,7 +523,7 @@ staggregate_polynomial <- function(data, overlay_weights, daily_agg, time_agg = data.table::setnames(dt, old=c('variable', 'value'), new=var_names) } - # Make each raster layer a data.table + # Make each layer a data.table list_dt <- lapply(1:list_length, create_dt) # Merge all data.tables together if there are multiple @@ -393,7 +536,13 @@ staggregate_polynomial <- function(data, overlay_weights, daily_agg, time_agg = } # Aggregate by polygon - sum_by_poly <- polygon_aggregation(clim_dt, overlay_weights, list_names, time_agg) + sum_by_poly <- polygon_aggregation( + clim_dt, + overlay_weights, + list_names, + time_agg, + weights_join_tolerance_x = weights_join_tolerance_x, + weights_join_tolerance_y = weights_join_tolerance_y) return(sum_by_poly) @@ -416,7 +565,7 @@ staggregate_polynomial <- function(data, overlay_weights, daily_agg, time_agg = #' values, and aggregates the transformed values to the polygon level and #' desired temporal scale. #' -#' @param data The raster brick with the data to be transformed and aggregated +#' @param data The spatRaster stack with the data to be transformed and aggregated #' @param overlay_weights A table of weights which can be generated using the #' function `overlay_weights()` #' @param daily_agg How to aggregate hourly values to daily values prior to @@ -426,20 +575,29 @@ staggregate_polynomial <- function(data, overlay_weights, daily_agg, time_agg = #' `'hour'`, `'day'`, `'month'`, or `'year'` (`'hour'` cannot be selected #' unless `daily_agg = 'none'`) #' @param start_date the date (and time, if applicable) of the first layer in -#' the raster. To be input in a format compatible with +#' the stack. To be input in a format compatible with #' lubridate::as_datetime(), e.g. `"1991-10-29"` or `"1991-10-29 00:00:00"`. -#' The default is `NA` since the rasters usually already contain temporal +#' The default is `NA` since the spatRasters usually already contain temporal #' information in the layer names and they do not need to be manually supplied. -#' @param time_interval the time interval between layers in the raster to be +#' @param time_interval the time interval between layers in the spatRaster to be #' aggregated. To be input in a format compatible with seq(), e.g. #' `'1 day'` or `'3 months'`. The default is `'1 hour'` and this argument is #' required if daily_agg is not `'none'` or if the `start_date` argument is not #' `NA`. +#' @param weights_join_tolerance the tolerance to use when joining +#' overlay_weights with the climate data by the x and y columns. This is useful +#' when the height/width of your data cells expressed in degrees is a very long +#' decimal. The default, `0`, performs a keyed equi-join. Anything other than 0 +#' performs a nonequi-join wherein latitudes/longitudes within the specified +#' tolerance (inclusive) are considered a match. Passing a single number results +#' in the tolerance being the same for x and y, but you can also pass a vector +#' of two numbers to have the first specify the x tolerance and second specify +#' the y tolerance. #' @param knot_locs where to place the knots #' #' @examples #' spline_output <- staggregate_spline( -#' data = temp_nj_jun_2024_era5 - 273.15, # Climate data to transform and +#' data = terra::rast(temp_nj_jun_2024_era5) - 273.15, # Climate data to transform and #' # aggregate #' overlay_weights = overlay_weights_nj, # Output from overlay_weights() #' daily_agg = "average", # Average hourly values to produce daily values @@ -457,7 +615,7 @@ staggregate_polynomial <- function(data, overlay_weights, daily_agg, time_agg = #' head(spline_output) #' #' @export -staggregate_spline <- function(data, overlay_weights, daily_agg, time_agg = "month", start_date = NA, time_interval = "1 hour", knot_locs){ +staggregate_spline <- function(data, overlay_weights, daily_agg, time_agg = "month", start_date = NA, time_interval = "1 hour", weights_join_tolerance = 0, knot_locs){ # If the start date is supplied, overwrite the raster's layer names to reflect the specified temporal metadata if(!is.na(start_date)){ @@ -469,12 +627,24 @@ staggregate_spline <- function(data, overlay_weights, daily_agg, time_agg = "mon if(time_agg == "hour" & daily_agg != "none"){ message(crayon::yellow("Hourly output requested. Automatically setting daily_agg to \'none\'")) daily_agg = "none" - } + } + + # If tolerance supplied is one number, apply to both x and y. + # If two numbers, apply first to x and second to y + if(length(weights_join_tolerance) == 1){ + weights_join_tolerance_x <- weights_join_tolerance + weights_join_tolerance_y <- weights_join_tolerance + } else if(length(weights_join_tolerance == 2)){ + weights_join_tolerance_x <- weights_join_tolerance[1] + weights_join_tolerance_y <- weights_join_tolerance[2] + } else{ + stop(crayon::red("Please provide one digit or a vector of only two digits for weights_join_tolerance.")) + } # Aggregated climate data to daily values setup_list <- daily_aggregation(data, overlay_weights, daily_agg, time_interval) - clim_daily <- setup_list[[1]] # Pulls the daily aggregated raster brick + clim_daily <- setup_list[[1]] # Pulls the daily aggregated spatRaster stack layer_names <- setup_list[[2]] # Pulls the saved layer names @@ -494,7 +664,7 @@ staggregate_spline <- function(data, overlay_weights, daily_agg, time_agg = "mon } # Add in spline terms, all of which are 0 if negative else{ - clim_daily_table <- raster::values(clim_daily) + clim_daily_table <- terra::values(clim_daily) part1 <- ifelse((clim_daily_table - knot_locs[x]) > 0, (clim_daily_table - knot_locs[x])^3, 0) @@ -511,7 +681,7 @@ staggregate_spline <- function(data, overlay_weights, daily_agg, time_agg = "mon clim_daily_table <- part1 - part2 + part3 clim_daily_new <- clim_daily - raster::values(clim_daily_new) <- clim_daily_table + terra::values(clim_daily_new) <- clim_daily_table return(clim_daily_new) @@ -531,7 +701,7 @@ staggregate_spline <- function(data, overlay_weights, daily_agg, time_agg = "mon create_dt <- function(x){ # Should output raster cells x/y with 365 days as column names - dt <- as.data.table.raster(r[[x]], xy=TRUE) + dt <- as_data_table_terra(r[[x]], xy=TRUE) # Set column names with months new_names <- c('x', 'y', layer_names) @@ -557,7 +727,13 @@ staggregate_spline <- function(data, overlay_weights, daily_agg, time_agg = "mon # Aggregate by polygon - sum_by_poly <- polygon_aggregation(clim_dt, overlay_weights, list_names, time_agg) + sum_by_poly <- polygon_aggregation( + clim_dt, + overlay_weights, + list_names, + time_agg, + weights_join_tolerance_x = weights_join_tolerance_x, + weights_join_tolerance_y = weights_join_tolerance_y) return(sum_by_poly) } @@ -589,11 +765,20 @@ staggregate_spline <- function(data, overlay_weights, daily_agg, time_agg = "mon #' `'1 day'` or `'3 months'`. The default is `'1 hour'` and this argument is #' required if daily_agg is not `'none'` or if the `start_date` argument is not #' `NA`. +#' @param weights_join_tolerance the tolerance to use when joining +#' overlay_weights with the climate data by the x and y columns. This is useful +#' when the height/width of your data cells expressed in degrees is a very long +#' decimal. The default, `0`, performs a keyed equi-join. Anything other than 0 +#' performs a nonequi-join wherein latitudes/longitudes within the specified +#' tolerance (inclusive) are considered a match. Passing a single number results +#' in the tolerance being the same for x and y, but you can also pass a vector +#' of two numbers to have the first specify the x tolerance and second specify +#' the y tolerance. #' @param bin_breaks A vector of bin boundaries to split the data by #' #' @examples #' bin_output <- staggregate_bin( -#' data = temp_nj_jun_2024_era5 - 273.15, # Climate data to transform and +#' data = terra::rast(temp_nj_jun_2024_era5) - 273.15, # Climate data to transform and #' # aggregate #' overlay_weights = overlay_weights_nj, # Output from overlay_weights() #' daily_agg = "average", # Average hourly values to produce daily values @@ -613,7 +798,7 @@ staggregate_spline <- function(data, overlay_weights, daily_agg, time_agg = "mon #' head(bin_output) #' #' @export -staggregate_bin <- function(data, overlay_weights, daily_agg, time_agg = "month", start_date = NA, time_interval = '1 hour', bin_breaks){ +staggregate_bin <- function(data, overlay_weights, daily_agg, time_agg = "month", start_date = NA, time_interval = '1 hour', weights_join_tolerance = 0, bin_breaks){ # If the start date is supplied, overwrite the raster's layer names to reflect the specified temporal metadata if(!is.na(start_date)){ @@ -627,13 +812,26 @@ staggregate_bin <- function(data, overlay_weights, daily_agg, time_agg = "month" daily_agg = "none" } + + # If tolerance supplied is one number, apply to both x and y. + # If two numbers, apply first to x and second to y + if(length(weights_join_tolerance) == 1){ + weights_join_tolerance_x <- weights_join_tolerance + weights_join_tolerance_y <- weights_join_tolerance + } else if(length(weights_join_tolerance == 2)){ + weights_join_tolerance_x <- weights_join_tolerance[1] + weights_join_tolerance_y <- weights_join_tolerance[2] + } else{ + stop(crayon::red("Please provide one digit or a vector of only two digits for weights_join_tolerance.")) + } + # Aggregate climate data to daily values setup_list <- daily_aggregation(data, overlay_weights, daily_agg, time_interval) clim_daily <- setup_list[[1]] # Pulls the daily aggregated raster brick layer_names <- setup_list[[2]] # Pulls the saved layer names - clim_daily_table <- raster::values(clim_daily) + clim_daily_table <- terra::values(clim_daily) bin_breaks <- sort(bin_breaks) @@ -654,13 +852,13 @@ staggregate_bin <- function(data, overlay_weights, daily_agg, time_agg = "month" # Function check_bins to determine which bins data points fall into check_bins <- function(x){ - clim_daily_table <- raster::values(clim_daily) + clim_daily_table <- terra::values(clim_daily) if(x == 0){ clim_daily_table <- ifelse(min(bin_breaks) > clim_daily_table, 1, 0) } else if(x == length(bin_breaks)){ - clim_daily_table <- ifelse(max(bin_breaks) < clim_daily_table, 1, 0) + clim_daily_table <- ifelse(max(bin_breaks) <= clim_daily_table, 1, 0) } else{ clim_daily_table <- ifelse(bin_breaks[x] <= clim_daily_table & @@ -669,7 +867,7 @@ staggregate_bin <- function(data, overlay_weights, daily_agg, time_agg = "month" clim_daily_new <- clim_daily - raster::values(clim_daily_new) <- clim_daily_table + terra::values(clim_daily_new) <- clim_daily_table return(clim_daily_new) } @@ -683,7 +881,7 @@ staggregate_bin <- function(data, overlay_weights, daily_agg, time_agg = "month" create_dt <- function(x){ # Should output raster cells x/y with 365 days as column names - dt <- as.data.table.raster(r[[x]], xy=TRUE) + dt <- as_data_table_terra(r[[x]], xy=TRUE) # Set column names with months new_names <- c('x', 'y', layer_names) @@ -710,7 +908,13 @@ staggregate_bin <- function(data, overlay_weights, daily_agg, time_agg = "month" # Aggregate by polygon - sum_by_poly <- polygon_aggregation(clim_dt, overlay_weights, list_names, time_agg) + sum_by_poly <- polygon_aggregation( + clim_dt, + overlay_weights, + list_names, + time_agg, + weights_join_tolerance_x = weights_join_tolerance_x, + weights_join_tolerance_y = weights_join_tolerance_y) return(sum_by_poly) } @@ -741,11 +945,20 @@ staggregate_bin <- function(data, overlay_weights, daily_agg, time_agg = "month" #' aggregated. To be input in a format compatible with seq(), e.g. #' `'1 day'` or `'3 months'`. The default is `'1 hour'` and this argument is #' required if the `start_date` argument is not `NA`. +#' @param weights_join_tolerance the tolerance to use when joining +#' overlay_weights with the climate data by the x and y columns. This is useful +#' when the height/width of your data cells expressed in degrees is a very long +#' decimal. The default, `0`, performs a keyed equi-join. Anything other than 0 +#' performs a nonequi-join wherein latitudes/longitudes within the specified +#' tolerance (inclusive) are considered a match. Passing a single number results +#' in the tolerance being the same for x and y, but you can also pass a vector +#' of two numbers to have the first specify the x tolerance and second specify +#' the y tolerance. #' @param thresholds A vector of temperature thresholds critical to a crop #' #' @examples #' degree_days_output <- staggregate_degree_days( -#' data = temp_nj_jun_2024_era5 - 273.15, # Climate data to transform and +#' data = terra::rast(temp_nj_jun_2024_era5) - 273.15, # Climate data to transform and #' # aggregate #' overlay_weights = overlay_weights_nj, # Output from overlay_weights() #' time_agg = "month", # Sum the transformed daily values across months @@ -761,7 +974,7 @@ staggregate_bin <- function(data, overlay_weights, daily_agg, time_agg = "month" #' head(degree_days_output) #' #' @export -staggregate_degree_days <- function(data, overlay_weights, time_agg = "month", start_date = NA, time_interval = '1 hour', thresholds){ +staggregate_degree_days <- function(data, overlay_weights, time_agg = "month", start_date = NA, time_interval = '1 hour', weights_join_tolerance = 0, thresholds){ # If the start date is supplied, overwrite the raster's layer names to reflect the specified temporal metadata if(!is.na(start_date)){ @@ -769,6 +982,18 @@ staggregate_degree_days <- function(data, overlay_weights, time_agg = "month", s data <- infer_layer_datetimes(data, start_date, time_interval) } + # If tolerance supplied is one number, apply to both x and y. + # If two numbers, apply first to x and second to y + if(length(weights_join_tolerance) == 1){ + weights_join_tolerance_x <- weights_join_tolerance + weights_join_tolerance_y <- weights_join_tolerance + } else if(length(weights_join_tolerance == 2)){ + weights_join_tolerance_x <- weights_join_tolerance[1] + weights_join_tolerance_y <- weights_join_tolerance[2] + } else{ + stop(crayon::red("Please provide one digit or a vector of only two digits for weights_join_tolerance.")) + } + # Run climate data through daily_aggregation)() (not actually aggregating to daily values) setup_list <- daily_aggregation(data, overlay_weights, daily_agg = "none") clim_rast <- setup_list[[1]] # Pulls the raster brick @@ -793,7 +1018,7 @@ staggregate_degree_days <- function(data, overlay_weights, time_agg = "month", s # Create function to calculate degree days calc_deg_days <- function(x){ - clim_table <- raster::values(clim_rast) + clim_table <- terra::values(clim_rast) if(x == 0){ # For the lowest threshold, create a variable equal to 0 if the # value is greater than the threshold, and equal to the # threshold minus value otherwise @@ -816,7 +1041,7 @@ staggregate_degree_days <- function(data, overlay_weights, time_agg = "month", s } clim_rast_new <- clim_rast - raster::values(clim_rast_new) <- clim_table + terra::values(clim_rast_new) <- clim_table return(clim_rast_new) } @@ -830,7 +1055,7 @@ staggregate_degree_days <- function(data, overlay_weights, time_agg = "month", s create_dt <- function(x){ # Should output raster cells x/y with 365 days as column names - dt <- as.data.table.raster(r[[x]], xy=TRUE) + dt <- as_data_table_terra(r[[x]], xy=TRUE) # Set column names with months new_names <- c('x', 'y', layer_names) @@ -859,7 +1084,13 @@ staggregate_degree_days <- function(data, overlay_weights, time_agg = "month", s # Aggregate by polygon - sum_by_poly <- polygon_aggregation(clim_dt, overlay_weights, list_names, time_agg) + sum_by_poly <- polygon_aggregation( + clim_dt, + overlay_weights, + list_names, + time_agg, + weights_join_tolerance_x = weights_join_tolerance_x, + weights_join_tolerance_y = weights_join_tolerance_y) return(sum_by_poly) diff --git a/_pkgdown.yml b/_pkgdown.yml index a35401b..3b9a8e0 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,4 +1,4 @@ +url: https://tcarleton.github.io/stagg/ template: bootstrap: 5 -url: https://tcarleton.github.io/stagg/ diff --git a/data-raw/ERA5_one_hour.nc b/data-raw/ERA5_one_hour.nc new file mode 100644 index 0000000..873ec43 Binary files /dev/null and b/data-raw/ERA5_one_hour.nc differ diff --git a/data-raw/cropland_nj_2015.R b/data-raw/cropland_nj_2015.R index 963b2c6..01c408d 100644 --- a/data-raw/cropland_nj_2015.R +++ b/data-raw/cropland_nj_2015.R @@ -1,6 +1,6 @@ # code to prepare `cropland_nj_2015` dataset -# last run: August 14, 2024 +# last run: October 4, 2024 # Read in raw data, downloaded from https://glad.umd.edu/dataset/croplands and # crop @@ -10,11 +10,13 @@ # maps of cropland extent and change show accelerated cropland expansion in the # twenty-first century. Nature Food. https://doi.org/10.1038/s43016-021-00429-z +library(magrittr) + # Crop cropland_nj_2015 <- raster::crop( raster::raster("data-raw/Global_cropland_3km_2015.tif"), - c(-76.5, -73, 38.25, 42) -) + c(-76.5, -73, 38.25, 42)) %>% + raster::readAll() # Turn percentage to decimal cropland_nj_2015 <- cropland_nj_2015 / 100 diff --git a/data-raw/era5_grid.R b/data-raw/era5_grid.R new file mode 100644 index 0000000..a842869 --- /dev/null +++ b/data-raw/era5_grid.R @@ -0,0 +1,50 @@ +# code and instructions to prepare `era5_grid` dataset + +# Last download and run: October 4, 2024 + +# Read in data from +# https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels?tab=download + +# Options selected: reanalysis, 2m temperature, for Jan 1, 2024 for whole world +# as a NetCDF, renamed " ... .nc" to +# "ERA5_one_hour.nc" + +# Dataset reference: Hersbach, H., Bell, B., Berrisford, P., Biavati, G., +# Horányi, A., Muñoz Sabater, J., Nicolas, J., Peubey, C., Radu, R., Rozum, I., +# Schepers, D., Simmons, A., Soci, C., Dee, D., Thépaut, J-N. (2023): ERA5 +# hourly data on single levels from 1940 to present. Copernicus Climate Change +# Service (C3S) Climate Data Store (CDS), DOI: 10.24381/cds.adbb2d47 + +# API Query (Not used in actual download, but could be): +# import cdsapi +# +# dataset = "reanalysis-era5-single-levels" +# request = { +# "product_type": ["reanalysis"], +# "variable": ["2m_temperature"], +# "year": ["2024"], +# "month": ["01"], +# "day": ["01"], +# "time": ["00:00"], +# "data_format": "netcdf", +# "download_format": "unarchived" +# } +# +# client = cdsapi.Client() +# client.retrieve(dataset, request).download() + + +library(magrittr) + +era5_grid <- raster::raster("data-raw/ERA5_one_hour.nc") %>% + raster::readAll() %>% + raster::setValues(values = 0) + +names(era5_grid) <- "X" + + + +# Use in package +usethis::use_data(era5_grid, overwrite = TRUE) + + diff --git a/data-raw/nj_counties.R b/data-raw/nj_counties.R deleted file mode 100644 index 27f1670..0000000 --- a/data-raw/nj_counties.R +++ /dev/null @@ -1,5 +0,0 @@ -# code to prepare `nj_counties` dataset -nj_counties <- tigris::counties("nj") - -# Use in package -usethis::use_data(nj_counties, overwrite = TRUE) diff --git a/data-raw/overlay_weights_nj.R b/data-raw/overlay_weights_nj.R index 5c9ab89..d0916be 100644 --- a/data-raw/overlay_weights_nj.R +++ b/data-raw/overlay_weights_nj.R @@ -1,4 +1,6 @@ -## code to prepare `overlay_weights_nj` dataset +# code to prepare `overlay_weights_nj` dataset + +# last run: August 14, 2024 # load county polygons nj_counties <- tigris::counties("nj") diff --git a/data-raw/remaining_datasets.R b/data-raw/remaining_datasets.R new file mode 100644 index 0000000..20784aa --- /dev/null +++ b/data-raw/remaining_datasets.R @@ -0,0 +1,88 @@ +# This file documents how we obtained the user level datasets which currently do +# not have their raw forms in data-raw, nor the code used to generate them. This +# was written October 4, 2024 + +# ============================================================================== +# cropland_world_2015_era5 +# ============================================================================== + +# Time of creation: +# ------------------ +# 2022 (I believe this was done with the early pipeline code, and that my code +# below are to give the idea of what was done but may not be exact. That's also +# the case for the population data, but I think that one was done a little +# later. See +# github.com/tcarleton/climate-aggregate/code/0.5_resample_rasters.R). + + + +# Reason not in data-raw: +# ------------------------ +# The raw data file is too large to be stored on git and very difficult to run +# locally. We should write a script to download directly if possible. Even then, +# such a script needs a lot of memory and would likely require remote computing. + +# Steps to create: +# ----------------- + +# 1: Downloaded high resolution (not 3km) global cropland data from +# https://glad.umd.edu/dataset/croplands in the four quadrants the site +# provides + +# 2: For each quadrant, ran contemporary version of stagg::secondary_weights() +# with following code where 'downloaded_file' represents one of the four +#. data quadrants downloaded in step 1: + + # quadrant_XX <- secondary_weights( + # secondary_raster = raster::raster('downloaded_file'), + # grid = era5_grid, + # extent = full + # ) + +# 3: Row bound the results from step 2 and averaged any overlap by running + + # cropland_world_2015_era5 <- rbind( + # quadrant_NW, + # quadrant_NE, + # quadrant_SW, + # quadrant_SE + # ) + # keys <- colnames(cropland_world_2015_era5)[!grepl('weight',colnames(full_table))] + # full_table <- full_table[,list(weight= mean(weight)),keys] + +# 4: Saved output from step 2 as package data by running + + # usethis::use_data(cropland_world_2015_era5, overwrite = TRUE) + +# ============================================================================== +# pop_world_2015_era5 +# ============================================================================== + +# Time of creation: +# ------------------ +# 2022 (see comment in cropland time section above) + +# Reason not in data-raw: +# ------------------------ +# The raw data file is too large to be stored on git. We should write a script +# to query the API if possible. + +# Steps to create: +# ----------------- + +# 1: Downloaded high resolution population data from +# https://landscan.ornl.gov/ + +# 2: Ran contemporary version of stagg::secondary_weights() [pre terra +# conversion] with following code where 'downloaded_file' represents data +# downloaded in step 1: + +# pop_world_2015_era5 <- secondary_weights( +# secondary_raster = raster::raster('downloaded_file'), +# grid = era5_grid, +# extent = full +# ) + +# 3: Saved output from step 2 as package data by running +# usethis::use_data(pop_world_2015_era5, overwrite = TRUE) + diff --git a/data-raw/temp_nj_jun_2024_era5.R b/data-raw/temp_nj_jun_2024_era5.R index 7b35401..075aa3c 100644 --- a/data-raw/temp_nj_jun_2024_era5.R +++ b/data-raw/temp_nj_jun_2024_era5.R @@ -1,7 +1,10 @@ # code and instructions to prepare `temp_nj_jun_2024_era5` dataset +# Last download: August 14, 2024 +# Last code edit and run: October 4, 2024 + # Read in data from -# https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels?tab=form +# https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels?tab=download # Options selected: reanalysis, 2m temperature, for all days and hours in June # 2024 for a subregion from 38 S to 42.25 N and -76.5 W to -73 E, as a NetCDF, @@ -54,8 +57,10 @@ # 'format': 'netcdf', # }, # 'download.nc') +library(magrittr) -temp_nj_jun_2024_era5 <- raster::brick("data-raw/NJ_temperature_jun_2024.nc") +temp_nj_jun_2024_era5 <- raster::brick("data-raw/NJ_temperature_jun_2024.nc") %>% + raster::readAll() # Use in package usethis::use_data(temp_nj_jun_2024_era5, overwrite = TRUE) diff --git a/data/era5_grid.rda b/data/era5_grid.rda index 477ebb8..345d04d 100644 Binary files a/data/era5_grid.rda and b/data/era5_grid.rda differ diff --git a/data/nj_counties.rda b/data/nj_counties.rda deleted file mode 100644 index 51ba48a..0000000 Binary files a/data/nj_counties.rda and /dev/null differ diff --git a/data/temp_nj_jun_2024_era5.rda b/data/temp_nj_jun_2024_era5.rda index d7d718e..f579416 100644 Binary files a/data/temp_nj_jun_2024_era5.rda and b/data/temp_nj_jun_2024_era5.rda differ diff --git a/docs/404.html b/docs/404.html index 772ee89..56faeb6 100644 --- a/docs/404.html +++ b/docs/404.html @@ -66,7 +66,7 @@

Page not found (404)

diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index de96a0d..35b97dd 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -47,7 +47,7 @@

License

diff --git a/docs/LICENSE.html b/docs/LICENSE.html index b8b8101..f10d0f5 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -51,7 +51,7 @@

MIT License

diff --git a/docs/articles/data_sources.html b/docs/articles/data_sources.html index 6d350f3..ae49ad1 100644 --- a/docs/articles/data_sources.html +++ b/docs/articles/data_sources.html @@ -59,7 +59,7 @@

Using data from climateR or other sources with stagg

Tyler Liddell, Anna Boser, Sara Orofino, Tracey Mangin, and Tamma Carleton

-

2024-08-20

+

2024-11-05

Source: vignettes/data_sources.Rmd
data_sources.Rmd
@@ -78,7 +78,7 @@

Introduction
 library(climateR) # Obtain climate data
 library(tigris) # Polygons to request and aggregate climate data over
-library(stagg) # Aggregate the climate data
+library(stagg) # Aggregate the climate data

climateR example: Working with GridMET Data @@ -195,17 +195,17 @@

Perform Polynomial Aggregationpolynomial_output #> year month poly_id order_1 order_2 order_3 #> <num> <num> <char> <num> <num> <num> -#> 1: 2010 1 037 56.75739 1327.2879 39806.128 -#> 2: 2010 2 037 97.13325 2434.7100 80308.805 -#> 3: 2010 3 037 214.54942 9825.3591 563559.235 -#> 4: 2010 4 037 72.66265 1221.0374 22493.601 -#> 5: 2010 5 037 77.19771 1183.7247 23434.094 +#> 1: 2010 1 037 30.79827 698.5901 19485.618 +#> 2: 2010 2 037 49.60716 1194.6476 36381.718 +#> 3: 2010 3 037 107.00895 4598.5119 248442.534 +#> 4: 2010 4 037 39.93862 678.2517 12457.194 +#> 5: 2010 5 037 43.34992 703.7214 14583.161 #> --- -#> 266: 2010 8 021 32.03729 276.7235 4083.636 -#> 267: 2010 9 021 129.12420 4667.6112 265225.311 -#> 268: 2010 10 021 60.02752 520.8648 5385.785 -#> 269: 2010 11 021 60.80051 809.1176 13799.290 -#> 270: 2010 12 021 73.83445 1502.6363 33897.406

+#> 269: 2010 9 021 69.36754 3093.8891 208694.340 +#> 270: 2010 10 021 31.27158 277.4310 2929.496 +#> 271: 2010 11 021 30.83565 418.2693 7360.483 +#> 272: 2010 12 021 36.97638 794.4837 19675.276 +#> 273: NA NA 021 NA NA NA
@@ -252,17 +252,17 @@

Using data f polynomial_output #> year month poly_id order_1 order_2 order_3 #> <num> <num> <char> <num> <num> <num> -#> 1: 2010 1 037 56.75739 1327.2879 39806.128 -#> 2: 2010 2 037 97.13325 2434.7100 80308.805 -#> 3: 2010 3 037 214.54942 9825.3591 563559.235 -#> 4: 2010 4 037 72.66265 1221.0374 22493.601 -#> 5: 2010 5 037 77.19771 1183.7247 23434.094 +#> 1: 2010 1 037 30.79827 698.5901 19485.618 +#> 2: 2010 2 037 49.60716 1194.6476 36381.718 +#> 3: 2010 3 037 107.00895 4598.5119 248442.534 +#> 4: 2010 4 037 39.93862 678.2517 12457.194 +#> 5: 2010 5 037 43.34992 703.7214 14583.161 #> --- -#> 266: 2010 8 021 32.03729 276.7235 4083.636 -#> 267: 2010 9 021 129.12420 4667.6112 265225.311 -#> 268: 2010 10 021 60.02752 520.8648 5385.785 -#> 269: 2010 11 021 60.80051 809.1176 13799.290 -#> 270: 2010 12 021 73.83445 1502.6363 33897.406

+#> 269: 2010 9 021 69.36754 3093.8891 208694.340 +#> 270: 2010 10 021 31.27158 277.4310 2929.496 +#> 271: 2010 11 021 30.83565 418.2693 7360.483 +#> 272: 2010 12 021 36.97638 794.4837 19675.276 +#> 273: NA NA 021 NA NA NA @@ -275,7 +275,7 @@

Using data f diff --git a/docs/articles/index.html b/docs/articles/index.html index a944897..8a86557 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -49,7 +49,7 @@

All vignettes

diff --git a/docs/authors.html b/docs/authors.html index fa5a2a6..25b7e29 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -38,7 +38,7 @@

Authors and Citation

Authors

@@ -85,7 +85,7 @@

Citation

diff --git a/docs/index.html b/docs/index.html index 035ea22..c83360f 100644 --- a/docs/index.html +++ b/docs/index.html @@ -77,12 +77,11 @@

WorkflowBelow is example code and commentary aimed at demonstrating expected typical usage. The order of the steps is important, as output from each step is used in the one that follows it.

Important

The stagg package currently does not include functions to download climate data. There are many packages that can be used to download climate data. For example, climateR provides access to climate datasets from over 2000 different data providers. ERA5 climate data can be download through ecmwfr, which provides an interface to two European Centre for Medium-Range Weather Forecast APIs. The KrigR package provides a helpful wrapper function for the ecmwfr package to enable direct downloading of ERA5 climate data in R. ECMWF also provides a general guide to downloading ERA5 data.

-

In this example we use ERA5 climate data, and the package includes a global raster layer of ERA5 climate data which can be used in the secondary_weights() and overlay_weights() functions. However, other types of climate data can be used.

-

For compatibility with stagg climate data should be:
-- a raster or raster stack (e.g., raster::raster() or raster::stack(); can originally be stored in formats such as netCDF or .tif) - at least yearly temporal resolution, since the most coarse temporal aggregation offered by stagg is yearly. - if sub-daily, number of layers per day should be consistent. By default, hourly time steps are assumed (i.e. 24 time steps per day), though a different number can be specified using the time_interval argument in the staggregate_*() functions.
-- in order for the temporal aggregation to happen properly, stagg must have a way of knowing the temporal information associated with the raster stack being aggregated. This can be achieved one of two ways: - the layer names use a character-date-time or character-date format following a string header, or the start date-time and temporal time steps of the raster stack must be specified in staggregate_*(). For example, ERA5 uses the format x2021.01.01.00.00.00 or x2021.01.01, and data retrieved using climateR uses the format variable_2021-01-01, both of which are compatible with stagg. - specify the start datetime and temporal interval of your data in the staggregate_*() functions using the start_date and time_interval arguments.

+

In this example we use ERA5 climate data, and the package includes a global raster layer of ERA5 climate data which can be used as a default in the secondary_weights() and overlay_weights() functions. However, other climate datasets can easily be accommodated.

+

For compatibility with stagg, input climate data should be a raster or layered raster (read in using raster::raster()/terra::rast() or raster::stack()/terra::c(); data can originally be stored in formats such as netCDF or .tif) with at least an annual temporal resolution, since the most coarse temporal aggregation offered by stagg is annual. If the temporal resolution is sub-daily, the number of layers per day should be consistent. By default, hourly time steps are assumed (i.e., 24 time steps per day), though a different number can be specified using the time_interval argument in the staggregate_*() functions.

+

In order for the temporal aggregation to happen properly, stagg must have a way of knowing the temporal information associated with the raster stack being aggregated. This can be achieved one of two ways: 1) the layer names use a string-date-time or string-date format; or 2) the start date-time and temporal time steps of the raster stack must be specified in staggregate_*(). For example, layer names for ERA5 and data retrieved using climateR uses the format x2021.01.01.00.00.00 or x2021.01.01 and variable_2021-01-01 respectively, both of which are compatible with stagg. Alternatively, the user can specify the start datetime and temporal interval of the data in the staggregate_*() functions using the start_date and time_interval arguments.

-library(stagg)
+library(stagg)
  # Using polygons outlining counties of New Jersey as administrative regions
 nj_counties <- tigris::counties("NJ")
@@ -111,8 +110,8 @@

# option to define an extent for # cropping the secondary_raster, # which saves compute time. Input - # format must be compatible with - # raster::crop(), such as: + # must be in a compatible format, such as: + # nj_extent <- terra::ext(nj_counties) # nj_extent <- sf::st_bbox(nj_counties) # nj_extent <- raster::extent(nj_counties) # nj_extent <- c(-76, -73, 38, 42) @@ -132,18 +131,17 @@

#Display resulting table cropland_weights
#>           x     y    weight
-#>       <num> <num>     <num>
-#>   1: -76.25 41.75 0.2294976
-#>   2: -76.00 41.75 0.2198315
-#>   3: -75.75 41.75 0.1777267
-#>   4: -75.50 41.75 0.1362904
-#>   5: -75.25 41.75 0.1061073
-#>  ---                       
-#> 178: -74.25 38.50 0.0000000
-#> 179: -74.00 38.50 0.0000000
-#> 180: -73.75 38.50 0.0000000
-#> 181: -73.50 38.50 0.0000000
-#> 182: -73.25 38.50 0.0000000
+#> 1: -76.25 41.75 0.2294976 +#> 2: -76.00 41.75 0.2198315 +#> 3: -75.75 41.75 0.1777267 +#> 4: -75.50 41.75 0.1362904 +#> 5: -75.25 41.75 0.1061073 +#> --- +#> 178: -74.25 38.50 0.0000000 +#> 179: -74.00 38.50 0.0000000 +#> 180: -73.75 38.50 0.0000000 +#> 181: -73.50 38.50 0.0000000 +#> 182: -73.25 38.50 0.0000000

As you can see from the output, secondary_weights() checks for alignment, and rotates the secondary_raster coordinates if necessary. It also resamples the data to the spatial resolution of the climate grid using bilinear interpolation, and returns a data.table with latitudes, longitudes, and cropland weights.

@@ -186,18 +184,17 @@

Step 2: Overl # Display results print(county_weights, digits = 4)

#>          x     y poly_id    w_area    weight
-#>      <num> <num>  <char>     <num>     <num>
-#>   1: 284.5 39.25     011 0.0172211 0.0360401
-#>   2: 284.5 39.25     033 0.0058164 0.0060266
-#>   3: 284.5 39.50     011 0.0170331 0.0307720
-#>   4: 284.5 39.50     033 0.3550727 0.3175946
-#>   5: 284.5 39.75     015 0.0219317 0.0193611
-#>  ---                                        
-#> 137: 286.0 40.75     013 0.0095999 0.0057512
-#> 138: 286.0 40.75     003 0.1828252 0.1307450
-#> 139: 286.0 40.75     031 0.0065823 0.0028849
-#> 140: 286.0 41.00     003 0.5410449 0.6773374
-#> 141: 286.0 41.00     031 0.0007093 0.0005442
+#> 1: 284.5 39.25 011 0.0172211 0.0360401 +#> 2: 284.5 39.25 033 0.0058164 0.0060266 +#> 3: 284.5 39.50 011 0.0170331 0.0307720 +#> 4: 284.5 39.50 033 0.3550727 0.3175946 +#> 5: 284.5 39.75 015 0.0219317 0.0193611 +#> --- +#> 137: 286.0 40.75 013 0.0096002 0.0057514 +#> 138: 286.0 40.75 003 0.1828252 0.1307450 +#> 139: 286.0 40.75 031 0.0065855 0.0028879 +#> 140: 286.0 41.00 003 0.5410449 0.6773374 +#> 141: 286.0 41.00 031 0.0007097 0.0005448

You can see that the function outputs multiple rows for each polygon, one for every grid cell it overlaps. The column w_area represents the proportion of that polygon that falls within the grid cell corresponding to the x and y coordinates. If you included secondary_weights from Step 1, as we have here, overlay_weights() also creates a column named weight, which is determined by normalizing the secondary_weights by w_area. This is what will be used in the aggregation of values. If you wish only to use w_area in aggregating by polygon, you need not run secondary_weights() and can omit the argument secondary_weights from your call to overlay_weights().

Given all of this information, we can interpret the top row in the output as follows: About 1.7% of the area in the county represented by COUNTYFP 011 falls within the grid cell at 284.50 degrees longitude (0-360 range), 39.25 degrees latitude. It appears that this particular pixel has slightly more cropland than other pixels in this polygon though, since the cropland weight for this cell is 3.1%.

@@ -246,30 +243,29 @@

Polynomial Transformation
 # Display results
 polynomial_output
-
#>      year month poly_id  order_1  order_2  order_3
-#>     <num> <num>  <char>    <num>    <num>    <num>
-#>  1:  2024     6     011 729.4373 17894.66 442930.1
-#>  2:  2024     6     033 733.7415 18118.51 451662.6
-#>  3:  2024     6     015 729.0098 17892.01 443419.8
-#>  4:  2024     6     009 686.3300 15807.50 366563.4
-#>  5:  2024     6     007 730.9659 17987.04 446942.1
-#>  6:  2024     6     041 684.4207 15918.44 377139.5
-#>  7:  2024     6     019 705.7509 16878.38 410117.8
-#>  8:  2024     6     001 724.9572 17682.29 435310.3
-#>  9:  2024     6     005 726.9852 17802.18 440440.8
-#> 10:  2024     6     021 721.3909 17561.50 432688.6
-#> 11:  2024     6     027 687.8298 16056.75 381384.9
-#> 12:  2024     6     037 663.3331 14985.01 345520.2
-#> 13:  2024     6     023 718.5814 17424.03 427602.5
-#> 14:  2024     6     035 710.1241 17067.68 416336.7
-#> 15:  2024     6     029 720.6808 17499.87 429477.7
-#> 16:  2024     6     025 711.0168 17042.99 413123.1
-#> 17:  2024     6     039 702.9569 16690.27 401409.4
-#> 18:  2024     6     013 698.6268 16473.52 393206.4
-#> 19:  2024     6     031 670.6923 15279.34 354421.3
-#> 20:  2024     6     017 704.8867 16733.28 401281.4
-#> 21:  2024     6     003 688.9643 16026.13 377468.5
-#>      year month poly_id  order_1  order_2  order_3
+
#>     year month poly_id  order_1  order_2  order_3
+#>  1: 2024     6     011 729.4373 17894.66 442930.1
+#>  2: 2024     6     033 733.7415 18118.51 451662.6
+#>  3: 2024     6     015 729.0098 17892.01 443419.8
+#>  4: 2024     6     009 686.3300 15807.50 366563.4
+#>  5: 2024     6     007 730.9659 17987.04 446942.1
+#>  6: 2024     6     041 684.4207 15918.44 377139.5
+#>  7: 2024     6     019 705.7509 16878.38 410117.8
+#>  8: 2024     6     001 724.9572 17682.29 435310.3
+#>  9: 2024     6     005 726.9852 17802.18 440440.8
+#> 10: 2024     6     021 721.3909 17561.50 432688.6
+#> 11: 2024     6     027 687.8256 16056.56 381378.6
+#> 12: 2024     6     037 663.3327 14985.00 345519.8
+#> 13: 2024     6     023 718.5814 17424.03 427602.5
+#> 14: 2024     6     035 710.1241 17067.68 416336.7
+#> 15: 2024     6     029 720.6808 17499.87 429477.7
+#> 16: 2024     6     025 711.0168 17042.99 413123.1
+#> 17: 2024     6     039 702.9571 16690.28 401409.8
+#> 18: 2024     6     013 698.6267 16473.52 393206.3
+#> 19: 2024     6     031 670.6985 15279.59 354428.6
+#> 20: 2024     6     017 704.8867 16733.28 401281.4
+#> 21: 2024     6     003 688.9643 16026.13 377468.5
+#>     year month poly_id  order_1  order_2  order_3

You can see that 3 variables are created. order_1 represents the original values, linearly aggregated to the county, monthly level. order_2 and order_3 represent the original values squared and cubed, respectively, prior to being aggregated to the county and monthly level. In this case, our example is only 30 days of temperature data and so each polygon only has one row corresponding to the only month present, June Were this a full year of data, each polygon would appear 12 times. Note also that passing time_agg = "day" would create a data.table 30 times longer, with another column to the right of month called day.

@@ -311,30 +307,29 @@

Restricted Cubic Spline Transfor
 # Display output
 spline_output
-
#>      year month poly_id    value   term_1   term_2
-#>     <num> <num>  <char>    <num>    <num>    <num>
-#>  1:  2024     6     011 729.4373 303327.9 61769.48
-#>  2:  2024     6     033 733.7415 306556.2 62576.55
-#>  3:  2024     6     015 729.0098 303007.5 61689.39
-#>  4:  2024     6     009 686.3300 270997.7 53686.97
-#>  5:  2024     6     007 730.9659 304474.5 62056.15
-#>  6:  2024     6     041 684.4207 269634.4 53356.41
-#>  7:  2024     6     019 705.7509 285580.9 57335.39
-#>  8:  2024     6     001 724.9572 299967.9 60929.49
-#>  9:  2024     6     005 726.9852 301489.3 61309.89
-#> 10:  2024     6     021 721.3909 297294.5 60261.31
-#> 11:  2024     6     027 687.8298 272168.1 53986.38
-#> 12:  2024     6     037 663.3331 253919.2 49442.69
-#> 13:  2024     6     023 718.5814 295187.0 59734.38
-#> 14:  2024     6     035 710.1241 288852.2 58151.92
-#> 15:  2024     6     029 720.6808 296761.1 60127.84
-#> 16:  2024     6     025 711.0168 289513.4 58315.97
-#> 17:  2024     6     039 702.9569 283473.5 56806.76
-#> 18:  2024     6     013 698.6268 280224.5 55994.31
-#> 19:  2024     6     031 670.6923 259366.5 50793.73
-#> 20:  2024     6     017 704.8867 284915.9 57166.60
-#> 21:  2024     6     003 688.9643 272983.2 54184.80
-#>      year month poly_id    value   term_1   term_2
+
#>     year month poly_id    value   term_1   term_2
+#>  1: 2024     6     011 729.4373 303327.9 61769.48
+#>  2: 2024     6     033 733.7415 306556.2 62576.55
+#>  3: 2024     6     015 729.0098 303007.5 61689.39
+#>  4: 2024     6     009 686.3300 270997.7 53686.97
+#>  5: 2024     6     007 730.9659 304474.5 62056.15
+#>  6: 2024     6     041 684.4207 269634.4 53356.42
+#>  7: 2024     6     019 705.7509 285580.9 57335.39
+#>  8: 2024     6     001 724.9572 299967.9 60929.49
+#>  9: 2024     6     005 726.9852 301489.3 61309.89
+#> 10: 2024     6     021 721.3909 297294.5 60261.31
+#> 11: 2024     6     027 687.8256 272164.9 53985.60
+#> 12: 2024     6     037 663.3327 253918.9 49442.64
+#> 13: 2024     6     023 718.5814 295187.0 59734.38
+#> 14: 2024     6     035 710.1241 288852.2 58151.92
+#> 15: 2024     6     029 720.6808 296761.1 60127.84
+#> 16: 2024     6     025 711.0168 289513.4 58315.97
+#> 17: 2024     6     039 702.9571 283473.7 56806.81
+#> 18: 2024     6     013 698.6267 280224.5 55994.30
+#> 19: 2024     6     031 670.6985 259371.1 50794.87
+#> 20: 2024     6     017 704.8867 284915.9 57166.60
+#> 21: 2024     6     003 688.9643 272983.2 54184.80
+#>     year month poly_id    value   term_1   term_2

You can see that your output looks very similar to the table from the polynomial transformation. The only difference here is that 4 - 2 (number of knots minus two) new variables are being created. These data are now ready for use in a regression.

@@ -375,54 +370,52 @@

Binning Transformation
 # Display output
 bin_output

-
#>      year month poly_id bin_ninf_to_0 bin_0_to_2.5 bin_2.5_to_5 bin_5_to_7.5
-#>     <num> <num>  <char>         <num>        <num>        <num>        <num>
-#>  1:  2024     6     011             0            0            0            0
-#>  2:  2024     6     033             0            0            0            0
-#>  3:  2024     6     015             0            0            0            0
-#>  4:  2024     6     009             0            0            0            0
-#>  5:  2024     6     007             0            0            0            0
-#>  6:  2024     6     041             0            0            0            0
-#>  7:  2024     6     019             0            0            0            0
-#>  8:  2024     6     001             0            0            0            0
-#>  9:  2024     6     005             0            0            0            0
-#> 10:  2024     6     021             0            0            0            0
-#> 11:  2024     6     027             0            0            0            0
-#> 12:  2024     6     037             0            0            0            0
-#> 13:  2024     6     023             0            0            0            0
-#> 14:  2024     6     035             0            0            0            0
-#> 15:  2024     6     029             0            0            0            0
-#> 16:  2024     6     025             0            0            0            0
-#> 17:  2024     6     039             0            0            0            0
-#> 18:  2024     6     013             0            0            0            0
-#> 19:  2024     6     031             0            0            0            0
-#> 20:  2024     6     017             0            0            0            0
-#> 21:  2024     6     003             0            0            0            0
-#>      year month poly_id bin_ninf_to_0 bin_0_to_2.5 bin_2.5_to_5 bin_5_to_7.5
-#>     bin_7.5_to_10 bin_10_to_inf
-#>             <num>         <num>
-#>  1:             0            30
-#>  2:             0            30
-#>  3:             0            30
-#>  4:             0            30
-#>  5:             0            30
-#>  6:             0            30
-#>  7:             0            30
-#>  8:             0            30
-#>  9:             0            30
-#> 10:             0            30
-#> 11:             0            30
-#> 12:             0            30
-#> 13:             0            30
-#> 14:             0            30
-#> 15:             0            30
-#> 16:             0            30
-#> 17:             0            30
-#> 18:             0            30
-#> 19:             0            30
-#> 20:             0            30
-#> 21:             0            30
-#>     bin_7.5_to_10 bin_10_to_inf
+
#>     year month poly_id bin_ninf_to_0 bin_0_to_2.5 bin_2.5_to_5 bin_5_to_7.5
+#>  1: 2024     6     011             0            0            0            0
+#>  2: 2024     6     033             0            0            0            0
+#>  3: 2024     6     015             0            0            0            0
+#>  4: 2024     6     009             0            0            0            0
+#>  5: 2024     6     007             0            0            0            0
+#>  6: 2024     6     041             0            0            0            0
+#>  7: 2024     6     019             0            0            0            0
+#>  8: 2024     6     001             0            0            0            0
+#>  9: 2024     6     005             0            0            0            0
+#> 10: 2024     6     021             0            0            0            0
+#> 11: 2024     6     027             0            0            0            0
+#> 12: 2024     6     037             0            0            0            0
+#> 13: 2024     6     023             0            0            0            0
+#> 14: 2024     6     035             0            0            0            0
+#> 15: 2024     6     029             0            0            0            0
+#> 16: 2024     6     025             0            0            0            0
+#> 17: 2024     6     039             0            0            0            0
+#> 18: 2024     6     013             0            0            0            0
+#> 19: 2024     6     031             0            0            0            0
+#> 20: 2024     6     017             0            0            0            0
+#> 21: 2024     6     003             0            0            0            0
+#>     year month poly_id bin_ninf_to_0 bin_0_to_2.5 bin_2.5_to_5 bin_5_to_7.5
+#>     bin_7.5_to_10 bin_10_to_inf
+#>  1:             0            30
+#>  2:             0            30
+#>  3:             0            30
+#>  4:             0            30
+#>  5:             0            30
+#>  6:             0            30
+#>  7:             0            30
+#>  8:             0            30
+#>  9:             0            30
+#> 10:             0            30
+#> 11:             0            30
+#> 12:             0            30
+#> 13:             0            30
+#> 14:             0            30
+#> 15:             0            30
+#> 16:             0            30
+#> 17:             0            30
+#> 18:             0            30
+#> 19:             0            30
+#> 20:             0            30
+#> 21:             0            30
+#>     bin_7.5_to_10 bin_10_to_inf

Like before, the output table features one row for every county for every time period specified by the time_agg argument. What has changed is that there is a new column for each bin created, representing the number of days a polygon had a value that fell within that bin during the timespan specified by the time_agg argument. These outputs are not necessarily integers since the polygon is made up of pixels that are sorted into bins and then weighted by the overlay_weights provided and aggregated, here, to the county level. Here we specify bins, in degrees Celsius, from negative infinity to 0, 0 to 2.5, 2.5 to 5, 5 to 7.5, 7.5 to 10, and 10 to infinity by passing c(0, 2.5, 5, 7.5, 10) to bin_break. staggregate_bin() draws a bin between each pair of breaks, and adds edge bins that encompass all values below the minimum break and above the maximum break.

@@ -458,57 +451,78 @@

Degree Days Transformation #> Aggregating by polygon and month -#> year month poly_id threshold_ninf_to_0 threshold_0_to_10 -#> <num> <num> <char> <num> <num> -#> 1: 2024 6 011 0 7200 -#> 2: 2024 6 033 0 7200 -#> 3: 2024 6 015 0 7200 -#> 4: 2024 6 009 0 7200 -#> 5: 2024 6 007 0 7200 -#> 6: 2024 6 041 0 7200 -#> 7: 2024 6 019 0 7200 -#> 8: 2024 6 001 0 7200 -#> 9: 2024 6 005 0 7200 -#> 10: 2024 6 021 0 7200 -#> 11: 2024 6 027 0 7200 -#> 12: 2024 6 037 0 7200 -#> 13: 2024 6 023 0 7200 -#> 14: 2024 6 035 0 7200 -#> 15: 2024 6 029 0 7200 -#> 16: 2024 6 025 0 7200 -#> 17: 2024 6 039 0 7200 -#> 18: 2024 6 013 0 7200 -#> 19: 2024 6 031 0 7200 -#> 20: 2024 6 017 0 7200 -#> 21: 2024 6 003 0 7200 -#> year month poly_id threshold_ninf_to_0 threshold_0_to_10 -#> threshold_10_to_20 threshold_20_to_inf -#> <num> <num> -#> 1: 7033.869 3272.625 -#> 2: 6996.451 3413.344 -#> 3: 6929.859 3366.375 -#> 4: 7129.929 2141.990 -#> 5: 6935.451 3407.730 -#> 6: 6486.642 2739.455 -#> 7: 6661.205 3076.817 -#> 8: 6965.391 3233.581 -#> 9: 6902.068 3345.578 -#> 10: 6822.985 3290.398 -#> 11: 6549.322 2758.592 -#> 12: 6336.398 2383.596 -#> 13: 6834.659 3211.294 -#> 14: 6712.329 3130.650 -#> 15: 6894.822 3201.517 -#> 16: 6886.335 2978.067 -#> 17: 6794.598 2876.367 -#> 18: 6812.126 2754.916 -#> 19: 6437.996 2458.619 -#> 20: 6947.556 2769.725 -#> 21: 6755.068 2580.074 -#> threshold_10_to_20 threshold_20_to_inf

+#> year month poly_id threshold_ninf_to_0 threshold_0_to_10 threshold_10_to_20 +#> 1: 2024 6 011 0 7200 7033.869 +#> 2: 2024 6 033 0 7200 6996.451 +#> 3: 2024 6 015 0 7200 6929.859 +#> 4: 2024 6 009 0 7200 7129.929 +#> 5: 2024 6 007 0 7200 6935.451 +#> 6: 2024 6 041 0 7200 6486.642 +#> 7: 2024 6 019 0 7200 6661.205 +#> 8: 2024 6 001 0 7200 6965.391 +#> 9: 2024 6 005 0 7200 6902.068 +#> 10: 2024 6 021 0 7200 6822.985 +#> 11: 2024 6 027 0 7200 6549.290 +#> 12: 2024 6 037 0 7200 6336.393 +#> 13: 2024 6 023 0 7200 6834.659 +#> 14: 2024 6 035 0 7200 6712.329 +#> 15: 2024 6 029 0 7200 6894.822 +#> 16: 2024 6 025 0 7200 6886.335 +#> 17: 2024 6 039 0 7200 6794.602 +#> 18: 2024 6 013 0 7200 6812.126 +#> 19: 2024 6 031 0 7200 6438.093 +#> 20: 2024 6 017 0 7200 6947.556 +#> 21: 2024 6 003 0 7200 6755.068 +#> year month poly_id threshold_ninf_to_0 threshold_0_to_10 threshold_10_to_20 +#> threshold_20_to_inf +#> 1: 3272.625 +#> 2: 3413.344 +#> 3: 3366.375 +#> 4: 2141.990 +#> 5: 3407.730 +#> 6: 2739.455 +#> 7: 3076.817 +#> 8: 3233.581 +#> 9: 3345.578 +#> 10: 3290.398 +#> 11: 2758.524 +#> 12: 2383.593 +#> 13: 3211.295 +#> 14: 3130.650 +#> 15: 3201.517 +#> 16: 2978.067 +#> 17: 2876.369 +#> 18: 2754.916 +#> 19: 2458.671 +#> 20: 2769.725 +#> 21: 2580.074 +#> threshold_20_to_inf

staggregate_degree_days() operates directly on the hourly values. Passing a vector of length n to thresholds creates n + 1 columns, similar to how staggregate_bin() operates. For each value in the climate raster brick (or stack), the function determines which thresholds the value falls between. For example, a value of 15 falls between 10 and 20. All the variables corresponding to threshold pairs below these receive the difference between the two thresholds (threshold_0_to_10 gets 10) and all variables above (threshold_20_to_inf) get 0. The variable in which the value falls gets the difference between the lower threshold and the value (threshold_10_to20 gets 5). The low edge variable (threshold_ninf_to_0) is unique in that it measures how far below the smallest threshold a value falls. A value of -3 would get 3 for this variable, while any value above 0 would receive 0 here. Once all values are transformed in this way the hourly values are then aggregated to the polygon level and temporal scale desired as with all other staggregate_*() functions.

+
+

A note on spatiotemporally aggregated climate data without further transformations +

+

If desired, a user can employ stagg to return spatiotemporally aggregated climate data without further transformations using the staggregate_polynomial() function. The column order_1 represents the first order polynomial, i.e., merely spatiotemporal means (or sums) of input climate data. The temporal aggregation is dictated by the user-defined arguments daily_agg (options are “sum” “average”, and “none”) and time_agg (options are “hour”, “day”, “month”, and “year”). The spatial aggregation occurs for the user-defined administrative areas of interests (e.g., counties), which is incorporated through the overlay_weights input generated by the overlay_weights() function. The user can set the argument degree = 1 if this is the only desired output. However, it is important to note that aggregation bias can become severe when aggregations are taken over large spatial and/or temporal periods before nonlinear transformations are performed. Indeed, a main feature of the stagg package is that it performs the transformations and aggregations in a sequence that avoids aggregation bias. Thus, users are cautioned against performing transformations after aggregation. Users who wish to use other transformations are encouraged to submit them to the package via an issue or pull request on GitHub for future package integration.

+
+ +
+

Additional Resources +

+

Below are some helpful resources on climate data and the analysis of climate impacts on socioeconomic outcomes:

+