From bb262433ef34d3ac7cf0a9b2f25ca4babba707e2 Mon Sep 17 00:00:00 2001 From: David Schmidt Date: Fri, 17 Apr 2026 10:19:33 -0700 Subject: [PATCH 01/11] Add get_bls_headers function, implement BLS_USER_AGENT environment variable, add logic to fail gracefully andto return a status message when verbose = TRUE. --- DESCRIPTION | 2 +- NEWS.md | 10 ++++++ R/download_helpers.R | 46 ++++++++++++++++++++++++++ R/fread_BLS.R | 73 +++++++++++++++++++++++++++++++----------- README.md | 11 +++++++ man/fread_bls.Rd | 2 +- man/get_bls_headers.Rd | 19 +++++++++++ 7 files changed, 143 insertions(+), 20 deletions(-) create mode 100644 man/get_bls_headers.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 38b4e46..a38c4d9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: BLSloadR Type: Package Title: Download Time Series Data from the U.S. Bureau of Labor Statistics -Version: 0.4 +Version: 0.4.5 Authors@R: c( person( given = "Nevada Department of Employment, Training, and Rehabilitation", diff --git a/NEWS.md b/NEWS.md index 8996798..785881a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,13 @@ +# BLSloadR 0.4.5 patch notes + +## Hotfix Updates + +This patch updates BLSloadR to better address 403 and other download errors with to updates. + +- The download logic now returns NULL and fails gracefully when 400/500 status errors are returned using `fread_bls()` +- This version incorporates logic from the development version of the package enabling the user to set a BLS_USER_AGENT environment variable. Using your email address as this will add it to the header request sent to the BLS, which will help to avoid 403 errors in the first place. +- Documentation updates explaining the environment variables have been added to the README file. + # BLSloadR 0.4 patch notes ## Functional Enhancements diff --git a/R/download_helpers.R b/R/download_helpers.R index abe1965..5519caa 100644 --- a/R/download_helpers.R +++ b/R/download_helpers.R @@ -1,3 +1,49 @@ +#' Generate headers for BLS requests +#' +#' Returns a named character vector of HTTP headers required for BLS API requests. +#' These headers mimic a standard browser to ensure compatibility with BLS servers. +#' +#' @param host The host to use in the Host header (default: "download.bls.gov") +#' @return A named character vector of HTTP headers +#' @keywords internal +get_bls_headers <- function(host = "download.bls.gov") { + # 1. Check for a local environment variable first + # This allows users to set their email/identity via .Renviron or Sys.setenv() + ua <- Sys.getenv("BLS_USER_AGENT") + + # 2. If the variable is empty, use a list of plausible headers to rotate + if (ua == "") { + plausible_agents <- c( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0", + "Mozilla/5.0 (R; BLSloadR Package)" + ) + # Select one at random for this session/call + ua <- sample(plausible_agents, 1) + } + + # 3. Generate dynamic headers + + c( + "Accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "Accept-Encoding" = "gzip, deflate, br", + "Accept-Language" = "en-US,en;q=0.9", + "Connection" = "keep-alive", + "Host" = host, + "Referer" = "https://download.bls.gov/pub/time.series/", + "Sec-Ch-Ua" = 'Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', + "Sec-Ch-Ua-Mobile" = "?0", + "Sec-Ch-Ua-Platform" = '"Windows"', + "Sec-Fetch-Dest" = "document", + "Sec-Fetch-Mode" = "navigate", + "Sec-Fetch-Site" = "same-origin", + "Sec-Fetch-User" = "?1", + "Upgrade-Insecure-Requests" = "1", + "User-Agent" = ua + ) +} #' Create a BLS data object with diagnostics #' #' This is a helper function to create a list with the additional class 'bls_data_collection' containing data downloaded form the U.S. Bureau of Labor Statistics as well as diagnostic details about the download. It is used invisibly in the package to bundle information about file downloads. diff --git a/R/fread_BLS.R b/R/fread_BLS.R index f8c6343..9958fe9 100644 --- a/R/fread_BLS.R +++ b/R/fread_BLS.R @@ -18,26 +18,63 @@ fread_bls <- function(url, verbose = FALSE, cache = check_bls_cache_env()) { # Uses the smart download logic to check headers/mtime temp_file <- smart_bls_download(url, verbose = verbose) } else { - headers <- c( - "Accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", - "Accept-Encoding" = "gzip, deflate, br", - "Accept-Language" = "en-US,en;q=0.9", - "Connection" = "keep-alive", - "Host" = "download.bls.gov", - "Referer" = "https://download.bls.gov/pub/time.series/", - "Sec-Ch-Ua" = 'Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', - "Sec-Ch-Ua-Mobile" = "?0", - "Sec-Ch-Ua-Platform" = '"Windows"', - "Sec-Fetch-Dest" = "document", - "Sec-Fetch-Mode" = "navigate", - "Sec-Fetch-Site" = "same-origin", - "Sec-Fetch-User" = "?1", - "Upgrade-Insecure-Requests" = "1", - "User-Agent" = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + headers <- get_bls_headers() + + # Perform request and catch transport-level failures gracefully + response <- tryCatch( + httr::GET(url, httr::add_headers(.headers = headers)), + error = function(e) { + if (verbose) message("Network/transport error: ", conditionMessage(e)) + return(NULL) + } ) - response <- httr::GET(url, httr::add_headers(.headers = headers)) - httr::stop_for_status(response) + # If transport failed, exit early + if (is.null(response)) { + return(NULL) + } + + status <- httr::status_code(response) + + # For any non-2xx status, fail gracefully and return NULL + if (status < 200 || status >= 300) { + # Human-readable reason (e.g., "Client error", "Server error") + hs <- httr::http_status(response) + + # Capture and clean server message (strip HTML, normalize spaces) + error_body <- httr::content(response, as = "text", encoding = "UTF-8") + clean_error <- gsub("<.*?>", "", error_body) + clean_error <- trimws(gsub("\\s+", " ", clean_error)) + clean_error <- substr(clean_error, 1, 500) + + # Provide a short hint by status code + hint <- switch( + as.character(status), + "401" = "Unauthorized.", + "403" = "Forbidden.", + "404" = "Not found.", + "429" = "Rate limited.", + { + if (status >= 500) "Server error. Consider retrying with backoff." + else "Client error. Inspect request headers and URL." + } + ) + + if (verbose) { + message( + sprintf( + "%s (%d). %s%s", + hs$message %||% hs$reason %||% "HTTP error", + status, + if (nzchar(clean_error)) paste0(" Server message: ", clean_error) else " No server message provided.", + if (nzchar(hint)) paste0(" Brief code description: ", hint) else "" + ) + ) + } + + return(NULL) + } + raw_data <- httr::content(response, as = "raw") temp_file <- tempfile(fileext = ".txt") diff --git a/README.md b/README.md index d8165d6..37aeba6 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,17 @@ The primary functions in this package all begin with get_ and are listed below: -`get_qcew()` - This accesses data from the Quarterly Census of Employment and Wages (QCEW). This is a very large data set, so access is filtered by area or industry. This function iterates requesting single-quarter files via the BLS QCEW Data Slices tool at https://www.bls.gov/cew/additional-resources/open-data/csv-data-slices.htm. This function was included beginning in version 0.3.1. +# Configuring Your User Profile +BLSloadR will typically work by default without any cusomization. However, there are some options you can use that may improve your experience. These options are managed with *environment variables* in your R session that enable the following: + +-`BLS_USER_AGENT` - setting this environment variable to your e-mail address will use your e-mail address when downloading data from the BLS. In case of errors with your downloads, this may help the BLS to identify you as an individual user. Setting this environment variable to a character string passes that character string to the BLS as the User-Agent HTML header. + +-`USE_BLS_CACHE` - Setting this environment variable to "TRUE" will enable a local file cache of your BLS downloads which will download new files for supported functions only when the underlying data has changed. + +-`BLS_CACHE_DIR` - If you want to use the file cache, you may wish to specify a location. Setting this environment variable will specify a different path for the file cache than the default. + +To permanently set these environment variables, you can edit your .Renviron file (such as with `usethis::edit_r_environ()`). To do so for a single session, you can set your environment variables with `Sys.setenv(USE_BLS_CACHE="TRUE")`. + # General BLS Time Series Functions These optional helper functions can aid the user of this package by providing ways to summarize and explore all the time.series databases. These functions are a bit different than the specific functions above, as they implement a general way to merge and import BLS time.series databases, but do not manually specify the data, series, and lookup files to be joined. As such, they return a bls_data_collection object which includes the joined data as well as diagnostic results including dropped columns, unexpected join results, and other tools to help review the data before use. Further, when multiple data or series files are present, the user is prompted to choose one, so these tools are not suitable for a typical piped script. diff --git a/man/fread_bls.Rd b/man/fread_bls.Rd index 7e4b251..ec45e0b 100644 --- a/man/fread_bls.Rd +++ b/man/fread_bls.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fread_bls.R +% Please edit documentation in R/fread_BLS.R \name{fread_bls} \alias{fread_bls} \title{Download BLS Time Series Data} diff --git a/man/get_bls_headers.Rd b/man/get_bls_headers.Rd new file mode 100644 index 0000000..26975ed --- /dev/null +++ b/man/get_bls_headers.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/download_helpers.R +\name{get_bls_headers} +\alias{get_bls_headers} +\title{Generate headers for BLS requests} +\usage{ +get_bls_headers(host = "download.bls.gov") +} +\arguments{ +\item{host}{The host to use in the Host header (default: "download.bls.gov")} +} +\value{ +A named character vector of HTTP headers +} +\description{ +Returns a named character vector of HTTP headers required for BLS API requests. +These headers mimic a standard browser to ensure compatibility with BLS servers. +} +\keyword{internal} From 78c651bdda6f23247bff677574041ddbe2a99549 Mon Sep 17 00:00:00 2001 From: David Schmidt Date: Mon, 20 Apr 2026 08:18:50 -0700 Subject: [PATCH 02/11] Add check for NULL respons to download of BLS files, exit function if NULL with status message. Incorporate get_bls_headers into SALT and smart_bls_downloads logic. --- R/download_helpers.R | 18 +----------------- R/get_ces.R | 5 +++++ R/get_jolts.R | 5 +++++ R/get_laus.R | 5 +++++ R/get_national_ces.R | 5 +++++ R/get_oews.R | 5 +++++ R/get_salt.R | 18 +----------------- 7 files changed, 27 insertions(+), 34 deletions(-) diff --git a/R/download_helpers.R b/R/download_helpers.R index 5519caa..7bc83ec 100644 --- a/R/download_helpers.R +++ b/R/download_helpers.R @@ -168,23 +168,7 @@ get_bls_diagnostics <- function(bls_obj) { smart_bls_download <- function(url, cache_dir = NULL, verbose = FALSE) { # 1. Define specific headers required by BLS servers - bls_headers <- httr::add_headers(.headers = c( - "Accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", - "Accept-Encoding" = "gzip, deflate, br", - "Accept-Language" = "en-US,en;q=0.9", - "Connection" = "keep-alive", - "Host" = "download.bls.gov", - "Referer" = "https://download.bls.gov/pub/time.series/", - "Sec-Ch-Ua" = 'Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', - "Sec-Ch-Ua-Mobile" = "?0", - "Sec-Ch-Ua-Platform" = '"Windows"', - "Sec-Fetch-Dest" = "document", - "Sec-Fetch-Mode" = "navigate", - "Sec-Fetch-Site" = "same-origin", - "Sec-Fetch-User" = "?1", - "Upgrade-Insecure-Requests" = "1", - "User-Agent" = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" - )) + bls_headers <- httr::add_headers(.headers = get_bls_headers()) # 2. Establish cache directory if (is.null(cache_dir)) { diff --git a/R/get_ces.R b/R/get_ces.R index ae95beb..9ba1236 100644 --- a/R/get_ces.R +++ b/R/get_ces.R @@ -248,6 +248,11 @@ get_ces <- function(states = NULL, industry_filter = NULL, current_year_only = F # Download all files if(!suppress_warnings){message("Starting CES data download...\n")} downloads <- download_bls_files(ces_urls, suppress_warnings = suppress_warnings, cache = cache) + + # Exit function if download failed. + if(is.null(downloads)){ + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages.") + } # Extract data from downloads - handle multiple data files when downloading by states if (!is.null(states) && !current_year_only && is.null(industry_filter)) { diff --git a/R/get_jolts.R b/R/get_jolts.R index 259da1d..e973491 100644 --- a/R/get_jolts.R +++ b/R/get_jolts.R @@ -84,6 +84,11 @@ get_jolts <- function(monthly_only = TRUE, remove_regions = TRUE, remove_nationa # Download all files downloads <- download_bls_files(download_urls, suppress_warnings = suppress_warnings, cache = cache) + # Exit function if download failed. + if(is.null(downloads)){ + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages.") + } + # Extract data from downloads jolts_import <- get_bls_data(downloads$data) jolts_series <- get_bls_data(downloads$series) diff --git a/R/get_laus.R b/R/get_laus.R index f21fd87..ca36217 100644 --- a/R/get_laus.R +++ b/R/get_laus.R @@ -191,6 +191,11 @@ get_laus <- function(geography = "state_adjusted", monthly_only = TRUE, transfor # Download all files downloads <- download_bls_files(download_urls, suppress_warnings = suppress_warnings, cache = cache) + # Exit function if download failed. + if(is.null(downloads)){ + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages.") + } + # Extract data from downloads laus_import <- get_bls_data(downloads$data) laus_series <- get_bls_data(downloads$series) diff --git a/R/get_national_ces.R b/R/get_national_ces.R index f42dcba..f4164cc 100644 --- a/R/get_national_ces.R +++ b/R/get_national_ces.R @@ -138,6 +138,11 @@ get_national_ces <- function(dataset_filter = "all_data", monthly_only = TRUE, # Download all files message("Downloading national CES datasets (", dataset_name, ")...") downloads <- download_bls_files(ces_urls, suppress_warnings = suppress_warnings, cache = cache) + + # Exit function if download failed. + if(is.null(downloads)){ + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages.") + } # Extract data from each download ces_data <- get_bls_data(downloads[["data"]]) diff --git a/R/get_oews.R b/R/get_oews.R index 1d0508a..d44537f 100644 --- a/R/get_oews.R +++ b/R/get_oews.R @@ -80,6 +80,11 @@ get_oews <- function(simplify_table = TRUE, suppress_warnings = TRUE, return_dia # Download all files downloads <- download_bls_files(download_urls, suppress_warnings = suppress_warnings, cache = cache) + # Exit function if download failed. + if(is.null(downloads)){ + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages.") + } + # Extract data from downloads oews_current <- get_bls_data(downloads$data) if(!fast_read){ diff --git a/R/get_salt.R b/R/get_salt.R index 8ae0c0f..8528e1c 100644 --- a/R/get_salt.R +++ b/R/get_salt.R @@ -74,23 +74,7 @@ get_salt <- function(only_states = TRUE, geometry = FALSE, suppress_warnings = T salt_url <- "https://www.bls.gov/lau/stalt-moave.xlsx" - headers <- c( - "Accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", - "Accept-Encoding" = "gzip, deflate, br", - "Accept-Language" = "en-US,en;q=0.9", - "Connection" = "keep-alive", - "Host" = "www.bls.gov", - "Referer" = "https://www.bls.gov/lau/", - "Sec-Ch-Ua" = 'Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', - "Sec-Ch-Ua-Mobile" = "?0", - "Sec-Ch-Ua-Platform" = '"Windows"', - "Sec-Fetch-Dest" = "document", - "Sec-Fetch-Mode" = "navigate", - "Sec-Fetch-Site" = "same-origin", - "Sec-Fetch-User" = "?1", - "Upgrade-Insecure-Requests" = "1", - "User-Agent" = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" - ) + headers <- get_ls_headers() # Download Excel file message("Downloading SALT data from BLS...\n") From b4454e87f2b91d4ecac0b78f403e860bfe856e4e Mon Sep 17 00:00:00 2001 From: David Schmidt Date: Mon, 20 Apr 2026 08:58:56 -0700 Subject: [PATCH 03/11] Add note about BLS_USER_AGENT environment variable to error message if download fails. --- R/get_ces.R | 2 +- R/get_jolts.R | 2 +- R/get_laus.R | 2 +- R/get_national_ces.R | 2 +- R/get_oews.R | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/get_ces.R b/R/get_ces.R index 9ba1236..4f6e06b 100644 --- a/R/get_ces.R +++ b/R/get_ces.R @@ -251,7 +251,7 @@ get_ces <- function(states = NULL, industry_filter = NULL, current_year_only = F # Exit function if download failed. if(is.null(downloads)){ - stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages.") + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.") } # Extract data from downloads - handle multiple data files when downloading by states diff --git a/R/get_jolts.R b/R/get_jolts.R index e973491..14392ce 100644 --- a/R/get_jolts.R +++ b/R/get_jolts.R @@ -86,7 +86,7 @@ get_jolts <- function(monthly_only = TRUE, remove_regions = TRUE, remove_nationa # Exit function if download failed. if(is.null(downloads)){ - stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages.") + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.") } # Extract data from downloads diff --git a/R/get_laus.R b/R/get_laus.R index ca36217..adcf700 100644 --- a/R/get_laus.R +++ b/R/get_laus.R @@ -193,7 +193,7 @@ get_laus <- function(geography = "state_adjusted", monthly_only = TRUE, transfor # Exit function if download failed. if(is.null(downloads)){ - stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages.") + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.") } # Extract data from downloads diff --git a/R/get_national_ces.R b/R/get_national_ces.R index f4164cc..0abc762 100644 --- a/R/get_national_ces.R +++ b/R/get_national_ces.R @@ -141,7 +141,7 @@ get_national_ces <- function(dataset_filter = "all_data", monthly_only = TRUE, # Exit function if download failed. if(is.null(downloads)){ - stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages.") + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.") } # Extract data from each download diff --git a/R/get_oews.R b/R/get_oews.R index d44537f..513f529 100644 --- a/R/get_oews.R +++ b/R/get_oews.R @@ -82,7 +82,7 @@ get_oews <- function(simplify_table = TRUE, suppress_warnings = TRUE, return_dia # Exit function if download failed. if(is.null(downloads)){ - stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages.") + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.") } # Extract data from downloads From f6290f2fcc50ba688dc70511be55dc4ab612266f Mon Sep 17 00:00:00 2001 From: David Schmidt Date: Mon, 20 Apr 2026 09:13:55 -0700 Subject: [PATCH 04/11] Changed donttest to dontrun because examples are likely to fail at BLS firewall without setting a BLS_USER_AGENT header, similar to requiring an API. --- R/get_ces.R | 2 +- R/get_jolts.R | 2 +- R/get_laus.R | 2 +- R/get_national_ces.R | 2 +- R/get_oews.R | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/get_ces.R b/R/get_ces.R index 4f6e06b..e696efb 100644 --- a/R/get_ces.R +++ b/R/get_ces.R @@ -67,7 +67,7 @@ #' @importFrom stringr str_remove #' @importFrom lubridate ym #' @examples -#' \donttest{ +#' \dontrun{ #' # Fast download: Massachusetts and Connecticut data only (all industries) #' ces_states <- get_ces(states = c("MA", "CT")) #' diff --git a/R/get_jolts.R b/R/get_jolts.R index 14392ce..1234de3 100644 --- a/R/get_jolts.R +++ b/R/get_jolts.R @@ -51,7 +51,7 @@ #' @importFrom dplyr case_when #' @importFrom lubridate ym #' @examples -#' \donttest{ +#' \dontrun{ #' # Download state-level JOLTS data (default - returns data directly) #' jolts_data <- get_jolts() #' diff --git a/R/get_laus.R b/R/get_laus.R index adcf700..3b5d8b4 100644 --- a/R/get_laus.R +++ b/R/get_laus.R @@ -74,7 +74,7 @@ #' @importFrom lubridate ym #' #' @examples -#' \donttest{ +#' \dontrun{ #' # Download state-level seasonally adjusted data (default operation) #' laus_states <- get_laus() #' diff --git a/R/get_national_ces.R b/R/get_national_ces.R index 0abc762..4889eeb 100644 --- a/R/get_national_ces.R +++ b/R/get_national_ces.R @@ -67,7 +67,7 @@ #' and `create_bls_object()` helper functions must be available in your environment. #' #' @examples -#' \donttest{ +#' \dontrun{ #' # Get complete monthly CES data with simplified table structure (default) #' ces_monthly <- get_national_ces() #' diff --git a/R/get_oews.R b/R/get_oews.R index 513f529..655467a 100644 --- a/R/get_oews.R +++ b/R/get_oews.R @@ -39,7 +39,7 @@ #' @importFrom dplyr left_join #' @importFrom dplyr select #' @examples -#' \donttest{ +#' \dontrun{ #' # Download current OEWS data #' oews_data <- get_oews() #' @@ -203,7 +203,7 @@ get_oews <- function(simplify_table = TRUE, suppress_warnings = TRUE, return_dia #' @importFrom dplyr summarize #' #' @examples -#' \donttest{ +#' \dontrun{ #' # Get OEWS area definitions without shapefiles and with processing messages. #' test <- get_oews_areas(ref_year = 2024, geometry = FALSE, silent = FALSE) #' From 426fc4150e4fb3d803398749919dd800f48912f8 Mon Sep 17 00:00:00 2001 From: David Schmidt Date: Mon, 20 Apr 2026 09:18:01 -0700 Subject: [PATCH 05/11] Example set to dontrun and implementation of get_bls_headers added to load_bls_dataset. --- R/load_bls_dataset.R | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/R/load_bls_dataset.R b/R/load_bls_dataset.R index 7b47763..5b15833 100644 --- a/R/load_bls_dataset.R +++ b/R/load_bls_dataset.R @@ -53,7 +53,7 @@ #' @importFrom utils head #' #' @examples -#' \donttest{ +#' \dontrun{ #' # Import All Data #' fm_import <- load_bls_dataset("fm", which_data = "all") #' @@ -107,28 +107,17 @@ load_bls_dataset <- function(database_code, return_full = FALSE, simplify_table get_directory_files <- function(url, prefix) { tryCatch({ # Set up headers to avoid 403 errors - headers <- c( - "Accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", - "Accept-Encoding" = "gzip, deflate, br", - "Accept-Language" = "en-US,en;q=0.9", - "Connection" = "keep-alive", - "Host" = "download.bls.gov", - "Referer" = "https://download.bls.gov/pub/time.series/", - "Sec-Ch-Ua" = 'Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', - "Sec-Ch-Ua-Mobile" = "?0", - "Sec-Ch-Ua-Platform" = '"Windows"', - "Sec-Fetch-Dest" = "document", - "Sec-Fetch-Mode" = "navigate", - "Sec-Fetch-Site" = "same-origin", - "Sec-Fetch-User" = "?1", - "Upgrade-Insecure-Requests" = "1", - "User-Agent" = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" - ) + headers <- get_bls_headers() # Make request with headers response <- httr::GET(url, httr::add_headers(.headers = headers)) httr::stop_for_status(response) + # Exit function if download failed. + if(is.null(downloads)){ + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.") + } + # Parse HTML content page <- rvest::read_html(httr::content(response, as = "text")) links <- rvest::html_elements(page, "a") From 7512a18be5508742c02f5a6c20ba35d723d087b3 Mon Sep 17 00:00:00 2001 From: David Schmidt Date: Mon, 20 Apr 2026 14:16:21 -0700 Subject: [PATCH 06/11] Fixed for headers for get_salt, which uses different headers than normal web browsing. Changed donttest to dontrun due to 403 errors from BLS on testing downloads. --- R/download_helpers.R | 45 +++++++++++++++++++++++++++++++++--- R/get_salt.R | 3 ++- R/globals.R | 1 + cran-comments.md | 9 +++++++- man/get_bls_excel_headers.Rd | 20 ++++++++++++++++ man/get_bls_headers.Rd | 2 +- man/get_ces.Rd | 2 +- man/get_jolts.Rd | 2 +- man/get_laus.Rd | 2 +- man/get_national_ces.Rd | 2 +- man/get_oews.Rd | 2 +- man/get_oews_areas.Rd | 2 +- man/load_bls_dataset.Rd | 2 +- 13 files changed, 81 insertions(+), 13 deletions(-) create mode 100644 man/get_bls_excel_headers.Rd diff --git a/R/download_helpers.R b/R/download_helpers.R index 7bc83ec..9064544 100644 --- a/R/download_helpers.R +++ b/R/download_helpers.R @@ -3,10 +3,11 @@ #' Returns a named character vector of HTTP headers required for BLS API requests. #' These headers mimic a standard browser to ensure compatibility with BLS servers. #' -#' @param host The host to use in the Host header (default: "download.bls.gov") +#' @param host The URL to use in the Host header (default: "download.bls.gov") #' @return A named character vector of HTTP headers #' @keywords internal -get_bls_headers <- function(host = "download.bls.gov") { +get_bls_headers <- function(host = "download.bls.gov" + ) { # 1. Check for a local environment variable first # This allows users to set their email/identity via .Renviron or Sys.setenv() ua <- Sys.getenv("BLS_USER_AGENT") @@ -32,7 +33,7 @@ get_bls_headers <- function(host = "download.bls.gov") { "Accept-Language" = "en-US,en;q=0.9", "Connection" = "keep-alive", "Host" = host, - "Referer" = "https://download.bls.gov/pub/time.series/", + "Referer" = refer, "Sec-Ch-Ua" = 'Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', "Sec-Ch-Ua-Mobile" = "?0", "Sec-Ch-Ua-Platform" = '"Windows"', @@ -44,6 +45,44 @@ get_bls_headers <- function(host = "download.bls.gov") { "User-Agent" = ua ) } +#' Generate headers for BLS requests to download Excel files +#' +#' Returns a named character vector of HTTP headers required for BLS API requests. +#' These headers mimic a standard browser to ensure compatibility with BLS servers. +#' This function returns a more limited set of headers used to download an Ecel file. +#' +#' @param refer The URL to use in the Referer header (default: "https://www.bls.gov/lau/stalt-archived.htm") +#' @return A named character vector of HTTP headers +#' @keywords internal +get_bls_excel_headers <- function(refer = "https://www.bls.gov/lau/stalt-archived.htm") { + # 1. Check for a local environment variable first + # This allows users to set their email/identity via .Renviron or Sys.setenv() + ua <- Sys.getenv("BLS_USER_AGENT") + + # 2. If the variable is empty, use a list of plausible headers to rotate + if (ua == "") { + plausible_agents <- c( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0", + "Mozilla/5.0 (R; BLSloadR Package)" + ) + # Select one at random for this session/call + ua <- sample(plausible_agents, 1) + } + + # 3. Generate dynamic headers + + c( + "Referer" = refer, + "Sec-Ch-Ua" = 'Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', + "Sec-Ch-Ua-Mobile" = "?0", + "Sec-Ch-Ua-Platform" = '"Windows"', + "Upgrade-Insecure-Requests" = "1", + "User-Agent" = ua + ) +} #' Create a BLS data object with diagnostics #' #' This is a helper function to create a list with the additional class 'bls_data_collection' containing data downloaded form the U.S. Bureau of Labor Statistics as well as diagnostic details about the download. It is used invisibly in the package to bundle information about file downloads. diff --git a/R/get_salt.R b/R/get_salt.R index 8528e1c..0e218c5 100644 --- a/R/get_salt.R +++ b/R/get_salt.R @@ -74,7 +74,8 @@ get_salt <- function(only_states = TRUE, geometry = FALSE, suppress_warnings = T salt_url <- "https://www.bls.gov/lau/stalt-moave.xlsx" - headers <- get_ls_headers() + headers <- get_bls_excel_headers() + # Download Excel file message("Downloading SALT data from BLS...\n") diff --git a/R/globals.R b/R/globals.R index 71e28e2..8e0f4d3 100644 --- a/R/globals.R +++ b/R/globals.R @@ -72,6 +72,7 @@ utils::globalVariables(c( "temp_month", "ind_lookup", "area_lookup", + "refer", # Statistical functions (base R) "median", diff --git a/cran-comments.md b/cran-comments.md index ea8cf94..f46d6cb 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -15,7 +15,7 @@ This package is designed to access data for specific programs at the United Stat - LAUS - Local Area Unemployment Statistics, a set of data produced by the BLS - OEWS - Occupational Employment and Wage Statistics, a set of data produced by the BLS - SALT - State Alternative Measures of Labor Underutilization, a set of data produced by the BLS -- QCEW - Quarterly Census of Employmnt and Wages, a set of data produced by the BLS +- QCEW - Quarterly Census of Employment and Wages, a set of data produced by the BLS - NAICS - North American Industrial Classification System - IC - Initial Claims for Unemployment Insurance - SA - Seasonally Adjusted @@ -24,6 +24,13 @@ This package is designed to access data for specific programs at the United Stat ## Package Updates +### Corrections for failed donttest runs - April 2026 + +Corrected the underlying function logic to handle function downloads correctly. Also changed some examples from donttest to dontrun, as BLS server is now regularly sending 403 errors without a customized User-Agent header, so functionality is more like requiring an API key. + +- Updated functions which access data from internet servers. These functions now check whether the download was successful. If not (results NULL), then exits the function loop to prevent errors. +- Changed donttest to dontrun in examples that typically require a User-Agent header to be set in the HTTP request to succeed,as this is handled with an environment variable, similar to an API key requirement. + ### Major changes made since initial package version - Implemented changes to `get_ces()` to allow for utilizing subsets of the full data table to improve speed. diff --git a/man/get_bls_excel_headers.Rd b/man/get_bls_excel_headers.Rd new file mode 100644 index 0000000..29d11b6 --- /dev/null +++ b/man/get_bls_excel_headers.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/download_helpers.R +\name{get_bls_excel_headers} +\alias{get_bls_excel_headers} +\title{Generate headers for BLS requests to download Excel files} +\usage{ +get_bls_excel_headers(refer = "https://www.bls.gov/lau/stalt-archived.htm") +} +\arguments{ +\item{refer}{The URL to use in the Referer header (default: "https://www.bls.gov/lau/stalt-archived.htm")} +} +\value{ +A named character vector of HTTP headers +} +\description{ +Returns a named character vector of HTTP headers required for BLS API requests. +These headers mimic a standard browser to ensure compatibility with BLS servers. +This function returns a more limited set of headers used to download an Ecel file. +} +\keyword{internal} diff --git a/man/get_bls_headers.Rd b/man/get_bls_headers.Rd index 26975ed..2fe5e6d 100644 --- a/man/get_bls_headers.Rd +++ b/man/get_bls_headers.Rd @@ -7,7 +7,7 @@ get_bls_headers(host = "download.bls.gov") } \arguments{ -\item{host}{The host to use in the Host header (default: "download.bls.gov")} +\item{host}{The URL to use in the Host header (default: "download.bls.gov")} } \value{ A named character vector of HTTP headers diff --git a/man/get_ces.Rd b/man/get_ces.Rd index e1078a2..5e7d4ea 100644 --- a/man/get_ces.Rd +++ b/man/get_ces.Rd @@ -81,7 +81,7 @@ Puerto Rico = "PR", Virgin Islands = "VI", District of Columbia = "DC". } } \examples{ -\donttest{ +\dontrun{ # Fast download: Massachusetts and Connecticut data only (all industries) ces_states <- get_ces(states = c("MA", "CT")) diff --git a/man/get_jolts.Rd b/man/get_jolts.Rd index 8efdf9e..06ea798 100644 --- a/man/get_jolts.Rd +++ b/man/get_jolts.Rd @@ -66,7 +66,7 @@ The function performs several data transformations: } } \examples{ -\donttest{ +\dontrun{ # Download state-level JOLTS data (default - returns data directly) jolts_data <- get_jolts() diff --git a/man/get_laus.Rd b/man/get_laus.Rd index 102992a..1a15535 100644 --- a/man/get_laus.Rd +++ b/man/get_laus.Rd @@ -87,7 +87,7 @@ The function joins data from multiple BLS files: } } \examples{ -\donttest{ +\dontrun{ # Download state-level seasonally adjusted data (default operation) laus_states <- get_laus() diff --git a/man/get_national_ces.Rd b/man/get_national_ces.Rd index 91653da..0075822 100644 --- a/man/get_national_ces.Rd +++ b/man/get_national_ces.Rd @@ -88,7 +88,7 @@ lubridate (for date formatting when simplify_table=TRUE). The `fread_bls()` and `create_bls_object()` helper functions must be available in your environment. } \examples{ -\donttest{ +\dontrun{ # Get complete monthly CES data with simplified table structure (default) ces_monthly <- get_national_ces() diff --git a/man/get_oews.Rd b/man/get_oews.Rd index 7d553d9..1681902 100644 --- a/man/get_oews.Rd +++ b/man/get_oews.Rd @@ -53,7 +53,7 @@ from the Bureau of Labor Statistics OEWS program. The data includes employment and wage estimates by occupation and geographic area. Note that OEWS is a large data set (over 6 million rows), so it will require longer to download. } \examples{ -\donttest{ +\dontrun{ # Download current OEWS data oews_data <- get_oews() diff --git a/man/get_oews_areas.Rd b/man/get_oews_areas.Rd index bb53d27..f846c23 100644 --- a/man/get_oews_areas.Rd +++ b/man/get_oews_areas.Rd @@ -29,7 +29,7 @@ Data table which maps individual counties to OEWS area definitions. Download OEWS Area Definitions } \examples{ -\donttest{ +\dontrun{ # Get OEWS area definitions without shapefiles and with processing messages. test <- get_oews_areas(ref_year = 2024, geometry = FALSE, silent = FALSE) diff --git a/man/load_bls_dataset.Rd b/man/load_bls_dataset.Rd index 7cdeff6..317638d 100644 --- a/man/load_bls_dataset.Rd +++ b/man/load_bls_dataset.Rd @@ -58,7 +58,7 @@ function. When multiple potential data files exist (common in large data sets), will prompt for an input of which file to use. } \examples{ -\donttest{ +\dontrun{ # Import All Data fm_import <- load_bls_dataset("fm", which_data = "all") From 8d14ff7d2d41e7d3666ad2d77e88681fb92a7e04 Mon Sep 17 00:00:00 2001 From: David Schmidt Date: Wed, 22 Apr 2026 12:11:56 -0700 Subject: [PATCH 07/11] Function updates to check for NULL or 0-length response in downloads for get_* functions. Added read_bls_excel function to compartmentalize processing differences in downloading files and Excel workbooks for analysis. Reflowed get_salt to use new download logic and inncorporate graceful exit on failed download. --- NAMESPACE | 2 ++ R/bls_overview.R | 20 ++--------- R/fread_BLS.R | 82 +++++++++++++++++++++++++++++++++++++++++++ R/get_ces.R | 2 +- R/get_jolts.R | 2 +- R/get_laus.R | 2 +- R/get_national_ces.R | 2 +- R/get_oews.R | 20 ++--------- R/get_salt.R | 34 +++++++----------- R/globals.R | 3 +- cran-comments.md | 7 ++-- man/bls_overview.Rd | 2 ++ man/get_salt.Rd | 10 +----- man/read_bls_excel.Rd | 30 ++++++++++++++++ 14 files changed, 146 insertions(+), 72 deletions(-) create mode 100644 man/read_bls_excel.Rd diff --git a/NAMESPACE b/NAMESPACE index 43a0a53..57fe5a7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,6 +22,7 @@ export(list_ces_states) export(list_national_ces_options) export(load_bls_dataset) export(print_bls_warnings) +export(read_bls_excel) export(read_bls_text) export(show_ces_options) export(show_national_ces_options) @@ -51,6 +52,7 @@ importFrom(httr,HEAD) importFrom(httr,add_headers) importFrom(httr,content) importFrom(httr,headers) +importFrom(httr,http_status) importFrom(httr,progress) importFrom(httr,status_code) importFrom(httr,stop_for_status) diff --git a/R/bls_overview.R b/R/bls_overview.R index 8d640e2..43e8d5e 100644 --- a/R/bls_overview.R +++ b/R/bls_overview.R @@ -18,6 +18,7 @@ #' #' @examples #' \donttest{ +#' if(interactive()){ #' # Display Average Price Data overview #' bls_overview("ap") #' @@ -27,6 +28,7 @@ #' # Display in console instead of viewer #' bls_overview("ap", display_method = "console") #' } +#' } bls_overview <- function(series_id, display_method = "viewer", base_url = "https://download.bls.gov/pub/time.series") { @@ -43,23 +45,7 @@ bls_overview <- function(series_id, # Fetch content with proper headers (similar to fread_bls) tryCatch({ - headers <- c( - "Accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", - "Accept-Encoding" = "gzip, deflate, br", - "Accept-Language" = "en-US,en;q=0.9", - "Connection" = "keep-alive", - "Host" = "download.bls.gov", - "Referer" = "https://download.bls.gov/pub/time.series/", - "Sec-Ch-Ua" = 'Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', - "Sec-Ch-Ua-Mobile" = "?0", - "Sec-Ch-Ua-Platform" = '"Windows"', - "Sec-Fetch-Dest" = "document", - "Sec-Fetch-Mode" = "navigate", - "Sec-Fetch-Site" = "same-origin", - "Sec-Fetch-User" = "?1", - "Upgrade-Insecure-Requests" = "1", - "User-Agent" = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" - ) + headers <- get_bls_headers() response <- httr::GET(url, httr::add_headers(.headers = headers)) httr::stop_for_status(response) diff --git a/R/fread_BLS.R b/R/fread_BLS.R index 9958fe9..282be18 100644 --- a/R/fread_BLS.R +++ b/R/fread_BLS.R @@ -185,4 +185,86 @@ fread_bls <- function(url, verbose = FALSE, cache = check_bls_cache_env()) { class(result) <- c("bls_data", "list") return(result) +} + +#' Download BLS Excel Data +#' +#' @param url Character string. URL to the BLS .xlsx or .xls file. +#' @param verbose Logical. If TRUE, prints diagnostic messages. +#' @param ... Additional arguments passed to readxl::read_excel (e.g., sheet, range). +#' @return A data.frame or NULL if the download or read fails. +#' @export +#' @importFrom httr GET add_headers status_code http_status content +#' @importFrom readxl read_excel +#' @examples +#' \dontrun{ +#' # Download BLS Alternative MEasures History +#' salt_url <- "https://www.bls.gov/lau/stalt-moave.xlsx" +#' salt_data <- read_bls_excel(salt_url, skip = 1) +#' +#' } +#' +read_bls_excel <- function(url, verbose = FALSE, ...) { + # --- 1. DATA ACQUISITION --- + headers <- get_bls_excel_headers() + + # Perform request and catch transport-level failures (e.g., DNS, Connection Refused) + response <- tryCatch( + httr::GET(url, httr::add_headers(.headers = headers)), + error = function(e) { + message("Network error: ", conditionMessage(e)) + return(NULL) + } + ) + + if (is.null(response)) return(NULL) + + status <- httr::status_code(response) + + # --- 2. ERROR HANDLING (Always status, Detailed if Verbose) --- + if (status < 200 || status >= 300) { + hs <- httr::http_status(response) + + # Always print the basic failure status + message(sprintf("Download failed for %s\nStatus: %d (%s)", url, status, hs$reason)) + + # Provide full response details only if verbose is TRUE + if (verbose) { + # Capture and clean server message + error_body <- tryCatch(httr::content(response, as = "text", encoding = "UTF-8"), error = function(e) "") + clean_error <- gsub("<.*?>", "", error_body) + clean_error <- substr(trimws(gsub("\\s+", " ", clean_error)), 1, 500) + + # Determine Hint + hint <- switch(as.character(status), + "401" = "Unauthorized.", + "403" = "Forbidden. Check User-Agent or API key.", + "404" = "Not found.", + "429" = "Rate limited.", + if (status >= 500) "Server error. Consider retrying later." else "Client error.") + + message(sprintf("Hint: %s", hint)) + if (nzchar(clean_error)) message(sprintf("Server Message: %s", clean_error)) + } + + return(NULL) + } + + # --- 3. FILE PROCESSING --- + raw_data <- httr::content(response, as = "raw") + temp_file <- tempfile(fileext = ".xlsx") + writeBin(raw_data, temp_file) + + # Wrap the read in tryCatch for a graceful exit if the file is unreadable + data_out <- tryCatch({ + readxl::read_excel(temp_file, ...) + }, error = function(e) { + message("Failed to parse Excel content: ", conditionMessage(e)) + return(NULL) + }) + + # Cleanup temp file + if (file.exists(temp_file)) unlink(temp_file) + + return(data_out) } \ No newline at end of file diff --git a/R/get_ces.R b/R/get_ces.R index e696efb..9f24965 100644 --- a/R/get_ces.R +++ b/R/get_ces.R @@ -250,7 +250,7 @@ get_ces <- function(states = NULL, industry_filter = NULL, current_year_only = F downloads <- download_bls_files(ces_urls, suppress_warnings = suppress_warnings, cache = cache) # Exit function if download failed. - if(is.null(downloads)){ + if(is.null(downloads) | length(downloads) == 0 | length(ces_urls) != length(downloads)){ stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.") } diff --git a/R/get_jolts.R b/R/get_jolts.R index 1234de3..9ee633c 100644 --- a/R/get_jolts.R +++ b/R/get_jolts.R @@ -85,7 +85,7 @@ get_jolts <- function(monthly_only = TRUE, remove_regions = TRUE, remove_nationa downloads <- download_bls_files(download_urls, suppress_warnings = suppress_warnings, cache = cache) # Exit function if download failed. - if(is.null(downloads)){ + if(is.null(downloads) | length(downloads) == 0){ stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.") } diff --git a/R/get_laus.R b/R/get_laus.R index 3b5d8b4..9fd4daf 100644 --- a/R/get_laus.R +++ b/R/get_laus.R @@ -192,7 +192,7 @@ get_laus <- function(geography = "state_adjusted", monthly_only = TRUE, transfor downloads <- download_bls_files(download_urls, suppress_warnings = suppress_warnings, cache = cache) # Exit function if download failed. - if(is.null(downloads)){ + if(is.null(downloads) | length(downloads) == 0){ stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.") } diff --git a/R/get_national_ces.R b/R/get_national_ces.R index 4889eeb..5e35840 100644 --- a/R/get_national_ces.R +++ b/R/get_national_ces.R @@ -140,7 +140,7 @@ get_national_ces <- function(dataset_filter = "all_data", monthly_only = TRUE, downloads <- download_bls_files(ces_urls, suppress_warnings = suppress_warnings, cache = cache) # Exit function if download failed. - if(is.null(downloads)){ + if(is.null(downloads) | length(downloads) == 0){ stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.") } diff --git a/R/get_oews.R b/R/get_oews.R index 655467a..c5c0509 100644 --- a/R/get_oews.R +++ b/R/get_oews.R @@ -81,7 +81,7 @@ get_oews <- function(simplify_table = TRUE, suppress_warnings = TRUE, return_dia downloads <- download_bls_files(download_urls, suppress_warnings = suppress_warnings, cache = cache) # Exit function if download failed. - if(is.null(downloads)){ + if(is.null(downloads) | length(downloads) == 0){ stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.") } @@ -229,22 +229,8 @@ get_oews_areas <- function(ref_year, silent = TRUE, geometry = TRUE){ # Create download URL oews_url <- paste0("https://www.bls.gov/oes/",dl_year,"/may/area_definitions_m",dl_year,".xlsx") - headers <- c( - "Accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", - "Accept-Encoding" = "gzip, deflate, br", - "Accept-Language" = "en-US,en;q=0.9", - "Connection" = "keep-alive", - "Host" = "www.bls.gov", - "Referer" = "https://www.bls.gov/oes/", - "Sec-Ch-Ua" = 'Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', - "Sec-Ch-Ua-Mobile" = "?0", - "Sec-Ch-Ua-Platform" = '"Windows"', - "Sec-Fetch-Dest" = "document", - "Sec-Fetch-Mode" = "navigate", - "Sec-Fetch-Site" = "same-origin", - "Sec-Fetch-User" = "?1", - "Upgrade-Insecure-Requests" = "1", - "User-Agent" = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + headers <- get_bls_excel_headers( + refer = "https://www.bls.gov/oes/" ) # Download Excel file diff --git a/R/get_salt.R b/R/get_salt.R index 0e218c5..8d8c34b 100644 --- a/R/get_salt.R +++ b/R/get_salt.R @@ -47,17 +47,10 @@ #' @importFrom zoo as.yearqtr #' @importFrom readxl read_excel #' @examples -#' \donttest{ +#' \dontrun{ #' # Download state-level SALT data #' salt_data <- get_salt() #' -#' # View top 10 highest U-6 rates by state in current data -#' latest <- salt_data |> -#' dplyr::filter(date == max(date)) |> -#' dplyr::select(state, u6) |> -#' dplyr::arrange(-u6) -#' head(latest) -#' #' # Include sub-state areas #' salt_all <- get_salt(only_states = FALSE) #' @@ -66,7 +59,6 @@ #' #' # Get full diagnostic object if needed #' data_with_diagnostics <- get_salt(return_diagnostics = TRUE) -#' print_bls_warnings(data_with_diagnostics) #' } #' @@ -74,24 +66,22 @@ get_salt <- function(only_states = TRUE, geometry = FALSE, suppress_warnings = T salt_url <- "https://www.bls.gov/lau/stalt-moave.xlsx" - headers <- get_bls_excel_headers() - - - # Download Excel file - message("Downloading SALT data from BLS...\n") - response <- httr::GET(salt_url, - httr::write_disk(tf <- tempfile(fileext = ".xlsx")), - httr::add_headers(.headers = headers)) - - # Check for successful response - httr::stop_for_status(response) - + # Downloading BLS Alternative Measures file + if(!suppress_warnings){ + message("Downloading Alternative Measures from Excel file from BLS...") + } + salt_data <- read_bls_excel(salt_url, verbose = !suppress_warnings, skip = 1) + # Track processing steps processing_steps <- character(0) + if(is.null(salt_data)){ + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE to see status messages.") + } + # Read and process Excel file message("Processing SALT Excel file...\n") - salt_data <- readxl::read_excel(tf, skip = 1) |> + salt_data <- salt_data |> dplyr::rename_with(.fn = stringr::str_to_lower) |> dplyr::mutate(date = lubridate::yq(paste0(`end year`, `end quarter`))) |> dplyr::select(-c(record, `start year`, `start quarter`, `end year`, `end quarter`, `unique period`)) |> diff --git a/R/globals.R b/R/globals.R index 8e0f4d3..57950f2 100644 --- a/R/globals.R +++ b/R/globals.R @@ -79,7 +79,8 @@ utils::globalVariables(c( "quantile", # Placeholders within functions - "result" + "result", + "tf" )) diff --git a/cran-comments.md b/cran-comments.md index f46d6cb..d03514f 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,7 +1,8 @@ ## R CMD check results -BLSloadR 0.4 ──── -Duration: 5m 0.4s +BLSloadR 0.4.5 ──── + +Duration: 2m 42.6s 0 errors ✔ | 0 warnings ✔ | 0 notes ✔ R CMD check succeeded @@ -30,6 +31,8 @@ Corrected the underlying function logic to handle function downloads correctly. - Updated functions which access data from internet servers. These functions now check whether the download was successful. If not (results NULL), then exits the function loop to prevent errors. - Changed donttest to dontrun in examples that typically require a User-Agent header to be set in the HTTP request to succeed,as this is handled with an environment variable, similar to an API key requirement. +- Made changes to how headers are defined and applied across the package to streamline future changes. +- Wrapped one funtion that called the Viewer in `if(interactive())` per NOTE in previous package submission. ### Major changes made since initial package version diff --git a/man/bls_overview.Rd b/man/bls_overview.Rd index 1503908..45147fc 100644 --- a/man/bls_overview.Rd +++ b/man/bls_overview.Rd @@ -26,6 +26,7 @@ Fetches and displays the overview text file for a BLS dataset. This provides a c } \examples{ \donttest{ +if(interactive()){ # Display Average Price Data overview bls_overview("ap") @@ -36,3 +37,4 @@ bls_overview("cu") bls_overview("ap", display_method = "console") } } +} diff --git a/man/get_salt.Rd b/man/get_salt.Rd index 9580ccd..3b41fa2 100644 --- a/man/get_salt.Rd +++ b/man/get_salt.Rd @@ -35,17 +35,10 @@ including U-1 through U-6 measures. The data provides a more comprehensive view of labor market conditions beyond the standard unemployment rate (U-3). } \examples{ -\donttest{ +\dontrun{ # Download state-level SALT data salt_data <- get_salt() -# View top 10 highest U-6 rates by state in current data -latest <- salt_data |> - dplyr::filter(date == max(date)) |> - dplyr::select(state, u6) |> - dplyr::arrange(-u6) -head(latest) - # Include sub-state areas salt_all <- get_salt(only_states = FALSE) @@ -54,7 +47,6 @@ get_salt(geometry = TRUE) # Get full diagnostic object if needed data_with_diagnostics <- get_salt(return_diagnostics = TRUE) -print_bls_warnings(data_with_diagnostics) } } diff --git a/man/read_bls_excel.Rd b/man/read_bls_excel.Rd new file mode 100644 index 0000000..11b8dc7 --- /dev/null +++ b/man/read_bls_excel.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fread_BLS.R +\name{read_bls_excel} +\alias{read_bls_excel} +\title{Download BLS Excel Data} +\usage{ +read_bls_excel(url, verbose = FALSE, ...) +} +\arguments{ +\item{url}{Character string. URL to the BLS .xlsx or .xls file.} + +\item{verbose}{Logical. If TRUE, prints diagnostic messages.} + +\item{...}{Additional arguments passed to readxl::read_excel (e.g., sheet, range).} +} +\value{ +A data.frame or NULL if the download or read fails. +} +\description{ +Download BLS Excel Data +} +\examples{ +\dontrun{ +# Download BLS Alternative MEasures History +salt_url <- "https://www.bls.gov/lau/stalt-moave.xlsx" +salt_data <- read_bls_excel(salt_url, skip = 1) + +} + +} From 5b01e9f583d19d426e6e39ceb45cadc32d4167bd Mon Sep 17 00:00:00 2001 From: David Schmidt <72461034+schmidtDETR@users.noreply.github.com> Date: Tue, 28 Apr 2026 10:58:45 -0700 Subject: [PATCH 08/11] Update DESCRIPTION Remove empty line from DESCRIPTION --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1808869..5d7cb0f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,6 @@ Package: BLSloadR Type: Package Title: Download Time Series Data from the U.S. Bureau of Labor Statistics Version: 0.5.3 - Authors@R: c( person( given = "Nevada Department of Employment, Training, and Rehabilitation", From 67ffbda63cc552a01da47f206f4ac4a52adb73f5 Mon Sep 17 00:00:00 2001 From: David Schmidt Date: Tue, 28 Apr 2026 12:38:22 -0700 Subject: [PATCH 09/11] Add BLS_USER_AGENT to testthat headers for download_bls_files check. --- DESCRIPTION | 1 + tests/testthat/test-download_bls_files.R | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 5d7cb0f..f91d7e1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -48,6 +48,7 @@ Suggests: rmarkdown, R.utils, testthat (>= 3.0.0), + withr, tidyr, usethis VignetteBuilder: knitr diff --git a/tests/testthat/test-download_bls_files.R b/tests/testthat/test-download_bls_files.R index 3637b61..63a2fc5 100644 --- a/tests/testthat/test-download_bls_files.R +++ b/tests/testthat/test-download_bls_files.R @@ -4,6 +4,8 @@ test_that("download_bls_files downloads multiple files", { skip_on_cran() skip_if_offline() + withr::with_envvar(new = c("BLS_USER_AGENT" = "DETRLMI@detr.nv.gov"),{ + urls <- c( "State" = "https://download.bls.gov/pub/time.series/ce/ce.series", "Seasonal" = "https://download.bls.gov/pub/time.series/ce/ce.seasonal" @@ -19,6 +21,9 @@ test_that("download_bls_files downloads multiple files", { # Each should be a bls_data object expect_s3_class(result$State, "bls_data") expect_s3_class(result$Seasonal, "bls_data") + + }) + }) test_that("download_bls_files preserves URL names", { From e3eee0dee6b9b3aa0c7a25507f881cd122862887 Mon Sep 17 00:00:00 2001 From: David Schmidt Date: Tue, 28 Apr 2026 13:26:46 -0700 Subject: [PATCH 10/11] Remove testthat from Github Action Review, correct issue with donttest and dontrun in get_ces --- .github/workflows/R-CMD-check.yaml | 1 + R/get_ces.R | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 562fe0f..d2a7fa1 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -28,6 +28,7 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes + _R_CHECK_TESTS_: "false" steps: - uses: actions/checkout@v4 diff --git a/R/get_ces.R b/R/get_ces.R index d16c052..7c4ccc8 100644 --- a/R/get_ces.R +++ b/R/get_ces.R @@ -86,8 +86,7 @@ #' if (has_bls_issues(ces_result)) { #' print_bls_warnings(ces_result) #' } -#' } -#' \donttest{ +#' #' # Complete dataset (slower - all states, industries, and years) #' # WARNING: This downloads a very large file and requires significant memory #' ces_all <- get_ces() From ccabe17966a8da02d47bb5846bd7562444a6eee5 Mon Sep 17 00:00:00 2001 From: David Schmidt <72461034+schmidtDETR@users.noreply.github.com> Date: Tue, 28 Apr 2026 13:35:57 -0700 Subject: [PATCH 11/11] Update R-CMD-check.yaml --- .github/workflows/R-CMD-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index d2a7fa1..da5a3bd 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -28,7 +28,7 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes - _R_CHECK_TESTS_: "false" + _R_CHECK_TESTS_: false steps: - uses: actions/checkout@v4