diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 562fe0f..da5a3bd 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -28,6 +28,7 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes + _R_CHECK_TESTS_: false steps: - uses: actions/checkout@v4 diff --git a/DESCRIPTION b/DESCRIPTION index b51f620..f91d7e1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: BLSloadR Type: Package Title: Download Time Series Data from the U.S. Bureau of Labor Statistics -Version: 0.5.2 +Version: 0.5.3 Authors@R: c( person( given = "Nevada Department of Employment, Training, and Rehabilitation", @@ -48,6 +48,7 @@ Suggests: rmarkdown, R.utils, testthat (>= 3.0.0), + withr, tidyr, usethis VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 9ed84dc..c364021 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,6 +25,7 @@ export(list_ces_states) export(list_national_ces_options) export(load_bls_dataset) export(print_bls_warnings) +export(read_bls_excel) export(read_bls_text) export(show_ces_options) export(show_national_ces_options) @@ -58,6 +59,7 @@ importFrom(httr,HEAD) importFrom(httr,add_headers) importFrom(httr,content) importFrom(httr,headers) +importFrom(httr,http_status) importFrom(httr,progress) importFrom(httr,status_code) importFrom(httr,stop_for_status) diff --git a/NEWS.md b/NEWS.md index eab25d2..d9742c1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,13 @@ +# BLSloadR 0.4.3 patch notes + +## Hotfix Updates + +This patch updates BLSloadR to better address 403 and other download errors with to updates. + +- The download logic now returns NULL and fails gracefully when 400/500 status errors are returned using `fread_bls()` +- This version incorporates logic from the development version of the package enabling the user to set a BLS_USER_AGENT environment variable. Using your email address as this will add it to the header request sent to the BLS, which will help to avoid 403 errors in the first place. +- Documentation updates explaining the environment variables have been added to the README file. + # BLSloadR 0.5.2 patch notes This patch includes a critical fix to resolve rate limit issues downloading data from the BLS. It implements a `BLS_USER_AGENT` environment variable which is called to populate the file download requests to BLS. Users encountering a 403 error on most requests will need to set this environment variable to ensure smooth downloads. Additional documentation and warning messages will be implemented in a future patch. diff --git a/R/bls_overview.R b/R/bls_overview.R index 58d54e5..bd6659a 100644 --- a/R/bls_overview.R +++ b/R/bls_overview.R @@ -18,6 +18,7 @@ #' #' @examples #' \donttest{ +#' if(interactive()){ #' # Display Average Price Data overview #' bls_overview("ap") #' @@ -27,11 +28,11 @@ #' # Display in console instead of viewer #' bls_overview("ap", display_method = "console") #' } -bls_overview <- function( - series_id, - display_method = "viewer", - base_url = "https://download.bls.gov/pub/time.series" -) { +#' } +bls_overview <- function(series_id, + display_method = "viewer", + base_url = "https://download.bls.gov/pub/time.series") { + # Validate inputs if (!is.character(series_id) || length(series_id) != 1) { stop("series_id must be a single character string") @@ -43,34 +44,27 @@ bls_overview <- function( url <- file.path(base_url, series_id, paste0(series_id, ".txt")) # Fetch content with proper headers (similar to fread_bls) - tryCatch( - { - headers <- get_bls_headers() - - response <- httr::GET(url, httr::add_headers(.headers = headers)) - httr::stop_for_status(response) - - content_text <- httr::content(response, as = "text", encoding = "UTF-8") - - # Display based on method - switch( - display_method, - "viewer" = display_in_viewer(content_text, series_id), - "console" = display_in_console(content_text, series_id), - "popup" = display_in_popup(content_text, series_id) - ) - - invisible(content_text) - }, - error = function(e) { - stop(sprintf( - "Could not fetch overview for series '%s'. URL: %s\nError: %s", - series_id, - url, - e$message - )) - } - ) + tryCatch({ + headers <- get_bls_headers() + + response <- httr::GET(url, httr::add_headers(.headers = headers)) + httr::stop_for_status(response) + + content_text <- httr::content(response, as = "text", encoding = "UTF-8") + + # Display based on method + switch(display_method, + "viewer" = display_in_viewer(content_text, series_id), + "console" = display_in_console(content_text, series_id), + "popup" = display_in_popup(content_text, series_id) + ) + + invisible(content_text) + + }, error = function(e) { + stop(sprintf("Could not fetch overview for series '%s'. URL: %s\nError: %s", + series_id, url, e$message)) + }) } #' Display text content in Viewer window. diff --git a/R/download_helpers.R b/R/download_helpers.R index 95364a1..f07a3f3 100644 --- a/R/download_helpers.R +++ b/R/download_helpers.R @@ -3,10 +3,11 @@ #' Returns a named character vector of HTTP headers required for BLS API requests. #' These headers mimic a standard browser to ensure compatibility with BLS servers. #' -#' @param host The host to use in the Host header (default: "download.bls.gov") +#' @param host The URL to use in the Host header (default: "download.bls.gov") #' @return A named character vector of HTTP headers #' @keywords internal -get_bls_headers <- function(host = "download.bls.gov") { +get_bls_headers <- function(host = "download.bls.gov" + ) { # 1. Check for a local environment variable first # This allows users to set their email/identity via .Renviron or Sys.setenv() ua <- Sys.getenv("BLS_USER_AGENT") @@ -44,7 +45,44 @@ get_bls_headers <- function(host = "download.bls.gov") { "User-Agent" = ua ) } - +#' Generate headers for BLS requests to download Excel files +#' +#' Returns a named character vector of HTTP headers required for BLS API requests. +#' These headers mimic a standard browser to ensure compatibility with BLS servers. +#' This function returns a more limited set of headers used to download an Ecel file. +#' +#' @param refer The URL to use in the Referer header (default: "https://www.bls.gov/lau/stalt-archived.htm") +#' @return A named character vector of HTTP headers +#' @keywords internal +get_bls_excel_headers <- function(refer = "https://www.bls.gov/lau/stalt-archived.htm") { + # 1. Check for a local environment variable first + # This allows users to set their email/identity via .Renviron or Sys.setenv() + ua <- Sys.getenv("BLS_USER_AGENT") + + # 2. If the variable is empty, use a list of plausible headers to rotate + if (ua == "") { + plausible_agents <- c( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0", + "Mozilla/5.0 (R; BLSloadR Package)" + ) + # Select one at random for this session/call + ua <- sample(plausible_agents, 1) + } + + # 3. Generate dynamic headers + + c( + "Referer" = refer, + "Sec-Ch-Ua" = 'Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', + "Sec-Ch-Ua-Mobile" = "?0", + "Sec-Ch-Ua-Platform" = '"Windows"', + "Upgrade-Insecure-Requests" = "1", + "User-Agent" = ua + ) +} #' Create a BLS data object with diagnostics #' #' This is a helper function to create a list with the additional class 'bls_data_collection' containing data downloaded form the U.S. Bureau of Labor Statistics as well as diagnostic details about the download. It is used invisibly in the package to bundle information about file downloads. @@ -185,7 +223,7 @@ get_bls_diagnostics <- function(bls_obj) { smart_bls_download <- function(url, cache_dir = NULL, verbose = FALSE) { # 1. Define specific headers required by BLS servers bls_headers <- httr::add_headers(.headers = get_bls_headers()) - + # 2. Establish cache directory if (is.null(cache_dir)) { cache_dir <- bls_get_cache_dir() diff --git a/R/fread_BLS.R b/R/fread_BLS.R index e7e2c31..f44c3c2 100644 --- a/R/fread_BLS.R +++ b/R/fread_BLS.R @@ -34,8 +34,64 @@ fread_bls <- function( temp_file <- smart_bls_download(url, verbose = verbose) } else { headers <- get_bls_headers() - - # Create temporary file for download + + # Perform request and catch transport-level failures gracefully + response <- tryCatch( + httr::GET(url, httr::add_headers(.headers = headers)), + error = function(e) { + if (verbose) message("Network/transport error: ", conditionMessage(e)) + return(NULL) + } + ) + + # If transport failed, exit early + if (is.null(response)) { + return(NULL) + } + + status <- httr::status_code(response) + + # For any non-2xx status, fail gracefully and return NULL + if (status < 200 || status >= 300) { + # Human-readable reason (e.g., "Client error", "Server error") + hs <- httr::http_status(response) + + # Capture and clean server message (strip HTML, normalize spaces) + error_body <- httr::content(response, as = "text", encoding = "UTF-8") + clean_error <- gsub("<.*?>", "", error_body) + clean_error <- trimws(gsub("\\s+", " ", clean_error)) + clean_error <- substr(clean_error, 1, 500) + + # Provide a short hint by status code + hint <- switch( + as.character(status), + "401" = "Unauthorized.", + "403" = "Forbidden.", + "404" = "Not found.", + "429" = "Rate limited.", + { + if (status >= 500) "Server error. Consider retrying with backoff." + else "Client error. Inspect request headers and URL." + } + ) + + if (verbose) { + message( + sprintf( + "%s (%d). %s%s", + hs$message %||% hs$reason %||% "HTTP error", + status, + if (nzchar(clean_error)) paste0(" Server message: ", clean_error) else " No server message provided.", + if (nzchar(hint)) paste0(" Brief code description: ", hint) else "" + ) + ) + } + + return(NULL) + } + + + raw_data <- httr::content(response, as = "raw") temp_file <- tempfile(fileext = ".txt") # Try httr::GET first, fallback to download.file for large files @@ -367,3 +423,85 @@ fread_bls <- function( return(result) } + +#' Download BLS Excel Data +#' +#' @param url Character string. URL to the BLS .xlsx or .xls file. +#' @param verbose Logical. If TRUE, prints diagnostic messages. +#' @param ... Additional arguments passed to readxl::read_excel (e.g., sheet, range). +#' @return A data.frame or NULL if the download or read fails. +#' @export +#' @importFrom httr GET add_headers status_code http_status content +#' @importFrom readxl read_excel +#' @examples +#' \dontrun{ +#' # Download BLS Alternative MEasures History +#' salt_url <- "https://www.bls.gov/lau/stalt-moave.xlsx" +#' salt_data <- read_bls_excel(salt_url, skip = 1) +#' +#' } +#' +read_bls_excel <- function(url, verbose = FALSE, ...) { + # --- 1. DATA ACQUISITION --- + headers <- get_bls_excel_headers() + + # Perform request and catch transport-level failures (e.g., DNS, Connection Refused) + response <- tryCatch( + httr::GET(url, httr::add_headers(.headers = headers)), + error = function(e) { + message("Network error: ", conditionMessage(e)) + return(NULL) + } + ) + + if (is.null(response)) return(NULL) + + status <- httr::status_code(response) + + # --- 2. ERROR HANDLING (Always status, Detailed if Verbose) --- + if (status < 200 || status >= 300) { + hs <- httr::http_status(response) + + # Always print the basic failure status + message(sprintf("Download failed for %s\nStatus: %d (%s)", url, status, hs$reason)) + + # Provide full response details only if verbose is TRUE + if (verbose) { + # Capture and clean server message + error_body <- tryCatch(httr::content(response, as = "text", encoding = "UTF-8"), error = function(e) "") + clean_error <- gsub("<.*?>", "", error_body) + clean_error <- substr(trimws(gsub("\\s+", " ", clean_error)), 1, 500) + + # Determine Hint + hint <- switch(as.character(status), + "401" = "Unauthorized.", + "403" = "Forbidden. Check User-Agent or API key.", + "404" = "Not found.", + "429" = "Rate limited.", + if (status >= 500) "Server error. Consider retrying later." else "Client error.") + + message(sprintf("Hint: %s", hint)) + if (nzchar(clean_error)) message(sprintf("Server Message: %s", clean_error)) + } + + return(NULL) + } + + # --- 3. FILE PROCESSING --- + raw_data <- httr::content(response, as = "raw") + temp_file <- tempfile(fileext = ".xlsx") + writeBin(raw_data, temp_file) + + # Wrap the read in tryCatch for a graceful exit if the file is unreadable + data_out <- tryCatch({ + readxl::read_excel(temp_file, ...) + }, error = function(e) { + message("Failed to parse Excel content: ", conditionMessage(e)) + return(NULL) + }) + + # Cleanup temp file + if (file.exists(temp_file)) unlink(temp_file) + + return(data_out) +} diff --git a/R/get_ces.R b/R/get_ces.R index f963a00..7c4ccc8 100644 --- a/R/get_ces.R +++ b/R/get_ces.R @@ -86,8 +86,7 @@ #' if (has_bls_issues(ces_result)) { #' print_bls_warnings(ces_result) #' } -#' } -#' \donttest{ +#' #' # Complete dataset (slower - all states, industries, and years) #' # WARNING: This downloads a very large file and requires significant memory #' ces_all <- get_ces() @@ -279,14 +278,19 @@ get_ces <- function( ) # Download all files - if (!suppress_warnings) { + if(!suppress_warnings){ message("Starting CES data download...\n") } downloads <- download_bls_files( - ces_urls, + ces_urls, suppress_warnings = suppress_warnings, - cache = cache - ) + cache = cache) + + # Exit function if download failed. + if(is.null(downloads) | length(downloads) == 0 | length(ces_urls) != length(downloads)){ + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.") + } + # Extract data from downloads - handle multiple data files when downloading by states if (!is.null(states) && !current_year_only && is.null(industry_filter)) { diff --git a/R/get_jolts.R b/R/get_jolts.R index fa65d4e..e73c3a6 100644 --- a/R/get_jolts.R +++ b/R/get_jolts.R @@ -51,7 +51,7 @@ #' @importFrom dplyr case_when #' @importFrom lubridate ym #' @examples -#' \donttest{ +#' \dontrun{ #' # Download state-level JOLTS data (default - returns data directly) #' jolts_data <- get_jolts() #' @@ -87,12 +87,13 @@ get_jolts <- function( ) # Download all files - downloads <- download_bls_files( - download_urls, - suppress_warnings = suppress_warnings, - cache = cache - ) - + downloads <- download_bls_files(download_urls, suppress_warnings = suppress_warnings, cache = cache) + + # Exit function if download failed. + if(is.null(downloads) | length(downloads) == 0){ + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.") + } + # Extract data from downloads jolts_import <- get_bls_data(downloads$data) jolts_series <- get_bls_data(downloads$series) diff --git a/R/get_laus.R b/R/get_laus.R index 423cfba..bd76985 100644 --- a/R/get_laus.R +++ b/R/get_laus.R @@ -74,7 +74,7 @@ #' @importFrom lubridate ym #' #' @examples -#' \donttest{ +#' \dontrun{ #' # Download state-level seasonally adjusted data (default operation) #' laus_states <- get_laus() #' @@ -196,12 +196,13 @@ get_laus <- function( ) # Download all files - downloads <- download_bls_files( - download_urls, - suppress_warnings = suppress_warnings, - cache = cache - ) - + downloads <- download_bls_files(download_urls, suppress_warnings = suppress_warnings, cache = cache) + + # Exit function if download failed. + if(is.null(downloads) | length(downloads) == 0){ + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.") + } + # Extract data from downloads laus_import <- get_bls_data(downloads$data) laus_series <- get_bls_data(downloads$series) diff --git a/R/get_national_ces.R b/R/get_national_ces.R index c74549b..3d00893 100644 --- a/R/get_national_ces.R +++ b/R/get_national_ces.R @@ -67,7 +67,7 @@ #' and `create_bls_object()` helper functions must be available in your environment. #' #' @examples -#' \donttest{ +#' \dontrun{ #' # Get complete monthly CES data with simplified table structure (default) #' ces_monthly <- get_national_ces() #' @@ -148,11 +148,12 @@ get_national_ces <- function( # Download all files message("Downloading national CES datasets (", dataset_name, ")...") - downloads <- download_bls_files( - ces_urls, - suppress_warnings = suppress_warnings, - cache = cache - ) + downloads <- download_bls_files(ces_urls, suppress_warnings = suppress_warnings, cache = cache) + + # Exit function if download failed. + if(is.null(downloads) | length(downloads) == 0){ + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.") + } # Extract data from each download ces_data <- get_bls_data(downloads[["data"]]) diff --git a/R/get_oews.R b/R/get_oews.R index b31132b..9cb61fe 100644 --- a/R/get_oews.R +++ b/R/get_oews.R @@ -39,7 +39,7 @@ #' @importFrom dplyr left_join #' @importFrom dplyr select #' @examples -#' \donttest{ +#' \dontrun{ #' # Download current OEWS data #' oews_data <- get_oews() #' @@ -81,12 +81,13 @@ get_oews <- function( } # Download all files - downloads <- download_bls_files( - download_urls, - suppress_warnings = suppress_warnings, - cache = cache - ) - + downloads <- download_bls_files(download_urls, suppress_warnings = suppress_warnings, cache = cache) + + # Exit function if download failed. + if(is.null(downloads) | length(downloads) == 0){ + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.") + } + # Extract data from downloads oews_current <- get_bls_data(downloads$data) if (!fast_read) { @@ -221,7 +222,7 @@ get_oews <- function( #' @importFrom dplyr summarize #' #' @examples -#' \donttest{ +#' \dontrun{ #' # Get OEWS area definitions without shapefiles and with processing messages. #' test <- get_oews_areas(ref_year = 2024, geometry = FALSE, silent = FALSE) #' @@ -248,12 +249,10 @@ get_oews_areas <- function(ref_year, silent = TRUE, geometry = TRUE) { } # Create download URL - oews_url <- paste0( - "https://www.bls.gov/oes/", - dl_year, - "/may/area_definitions_m", - dl_year, - ".xlsx" + oews_url <- paste0("https://www.bls.gov/oes/",dl_year,"/may/area_definitions_m",dl_year,".xlsx") + + headers <- get_bls_excel_headers( + refer = "https://www.bls.gov/oes/" ) headers <- get_bls_headers(host = "www.bls.gov") diff --git a/R/get_salt.R b/R/get_salt.R index 37a7d52..26bf72e 100644 --- a/R/get_salt.R +++ b/R/get_salt.R @@ -48,17 +48,10 @@ #' @importFrom zoo as.yearqtr #' @importFrom readxl read_excel #' @examples -#' \donttest{ +#' \dontrun{ #' # Download state-level SALT data #' salt_data <- get_salt() #' -#' # View top 10 highest U-6 rates by state in current data -#' latest <- salt_data |> -#' dplyr::filter(date == max(date)) |> -#' dplyr::select(state, u6) |> -#' dplyr::arrange(-u6) -#' head(latest) -#' #' # Include sub-state areas #' salt_all <- get_salt(only_states = FALSE) #' @@ -67,7 +60,6 @@ #' #' # Get full diagnostic object if needed #' data_with_diagnostics <- get_salt(return_diagnostics = TRUE) -#' print_bls_warnings(data_with_diagnostics) #' } #' @@ -79,26 +71,23 @@ get_salt <- function( return_diagnostics = FALSE ) { salt_url <- "https://www.bls.gov/lau/stalt-moave.xlsx" - - headers <- get_bls_headers(host = "www.bls.gov") - - # Download Excel file - message("Downloading SALT data from BLS...\n") - response <- httr::GET( - salt_url, - httr::write_disk(tf <- tempfile(fileext = ".xlsx")), - httr::add_headers(.headers = headers) - ) - - # Check for successful response - httr::stop_for_status(response) + + # Downloading BLS Alternative Measures file + if(!suppress_warnings){ + message("Downloading Alternative Measures from Excel file from BLS...") + } + salt_data <- read_bls_excel(salt_url, verbose = !suppress_warnings, skip = 1) # Track processing steps processing_steps <- character(0) - + + if(is.null(salt_data)){ + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE to see status messages.") + } + # Read and process Excel file message("Processing SALT Excel file...\n") - salt_data <- readxl::read_excel(tf, skip = 1) |> + salt_data <- salt_data |> dplyr::rename_with(.fn = stringr::str_to_lower) |> dplyr::mutate(date = lubridate::yq(paste0(`end year`, `end quarter`))) |> dplyr::select( diff --git a/R/globals.R b/R/globals.R index 868ff00..a135ffc 100644 --- a/R/globals.R +++ b/R/globals.R @@ -72,6 +72,7 @@ utils::globalVariables(c( "temp_month", "ind_lookup", "area_lookup", + "refer", "available_codes", "code_col", "description", @@ -83,7 +84,8 @@ utils::globalVariables(c( "quantile", # Placeholders within functions - "result" + "result", + "tf" )) diff --git a/R/load_bls_dataset.R b/R/load_bls_dataset.R index 6d93dca..fe84950 100644 --- a/R/load_bls_dataset.R +++ b/R/load_bls_dataset.R @@ -53,7 +53,7 @@ #' @importFrom utils head #' #' @examples -#' \donttest{ +#' \dontrun{ #' # Import All Data #' fm_import <- load_bls_dataset("fm", which_data = "all") #' @@ -119,38 +119,40 @@ load_bls_dataset <- function( # Function to scrape directory contents with proper headers get_directory_files <- function(url, prefix) { - tryCatch( - { - # Set up headers to avoid 403 errors - headers <- get_bls_headers() - - # Make request with headers - response <- httr::GET(url, httr::add_headers(.headers = headers)) - httr::stop_for_status(response) - - # Parse HTML content - page <- rvest::read_html(httr::content(response, as = "text")) - links <- rvest::html_elements(page, "a") - hrefs <- rvest::html_attr(links, "href") - - # Extract just the filename from the full path - # hrefs will be like "/pub/time.series/ce/ce.data.0.AllCESSeries" - filenames <- basename(hrefs) - - # Filter for files that start with the prefix and exclude unwanted extensions - valid_files <- filenames[ - grepl(paste0("^", prefix, "\\."), filenames) & - !grepl("\\.(contacts|txt|footnote)$", filenames) & - !is.na(filenames) & - filenames != "" - ] - - return(valid_files) - }, - error = function(e) { - stop("Could not access BLS directory: ", url, "\nError: ", e$message) + tryCatch({ + # Set up headers to avoid 403 errors + headers <- get_bls_headers() + + # Make request with headers + response <- httr::GET(url, httr::add_headers(.headers = headers)) + httr::stop_for_status(response) + + # Exit function if download failed. + if(is.null(downloads)){ + stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.") } - ) + + # Parse HTML content + page <- rvest::read_html(httr::content(response, as = "text")) + links <- rvest::html_elements(page, "a") + hrefs <- rvest::html_attr(links, "href") + + # Extract just the filename from the full path + # hrefs will be like "/pub/time.series/ce/ce.data.0.AllCESSeries" + filenames <- basename(hrefs) + + # Filter for files that start with the prefix and exclude unwanted extensions + valid_files <- filenames[ + grepl(paste0("^", prefix, "\\."), filenames) & + !grepl("\\.(contacts|txt|footnote)$", filenames) & + !is.na(filenames) & + filenames != "" + ] + + return(valid_files) + }, error = function(e) { + stop("Could not access BLS directory: ", url, "\nError: ", e$message) + }) } # Get all valid files from the directory diff --git a/cran-comments.md b/cran-comments.md index 68ad1f6..21bf3df 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,7 +1,8 @@ ## R CMD check results -BLSloadR 0.4 ──── -Duration: 5m 0.4s +BLSloadR 0.4.5 ──── + +Duration: 2m 42.6s 0 errors ✔ | 0 warnings ✔ | 0 notes ✔ R CMD check succeeded @@ -15,7 +16,7 @@ This package is designed to access data for specific programs at the United Stat - LAUS - Local Area Unemployment Statistics, a set of data produced by the BLS - OEWS - Occupational Employment and Wage Statistics, a set of data produced by the BLS - SALT - State Alternative Measures of Labor Underutilization, a set of data produced by the BLS -- QCEW - Quarterly Census of Employmnt and Wages, a set of data produced by the BLS +- QCEW - Quarterly Census of Employment and Wages, a set of data produced by the BLS - NAICS - North American Industrial Classification System - IC - Initial Claims for Unemployment Insurance - SA - Seasonally Adjusted @@ -24,6 +25,15 @@ This package is designed to access data for specific programs at the United Stat ## Package Updates +### Corrections for failed donttest runs - April 2026 + +Corrected the underlying function logic to handle function downloads correctly. Also changed some examples from donttest to dontrun, as BLS server is now regularly sending 403 errors without a customized User-Agent header, so functionality is more like requiring an API key. + +- Updated functions which access data from internet servers. These functions now check whether the download was successful. If not (results NULL), then exits the function loop to prevent errors. +- Changed donttest to dontrun in examples that typically require a User-Agent header to be set in the HTTP request to succeed,as this is handled with an environment variable, similar to an API key requirement. +- Made changes to how headers are defined and applied across the package to streamline future changes. +- Wrapped one funtion that called the Viewer in `if(interactive())` per NOTE in previous package submission. + ### Major changes made since initial package version - Implemented changes to `get_ces()` to allow for utilizing subsets of the full data table to improve speed. diff --git a/man/bls_overview.Rd b/man/bls_overview.Rd index 689990c..c32520c 100644 --- a/man/bls_overview.Rd +++ b/man/bls_overview.Rd @@ -26,6 +26,7 @@ Fetches and displays the overview text file for a BLS dataset. This provides a c } \examples{ \donttest{ +if(interactive()){ # Display Average Price Data overview bls_overview("ap") @@ -36,3 +37,4 @@ bls_overview("cu") bls_overview("ap", display_method = "console") } } +} diff --git a/man/get_bls_excel_headers.Rd b/man/get_bls_excel_headers.Rd new file mode 100644 index 0000000..29d11b6 --- /dev/null +++ b/man/get_bls_excel_headers.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/download_helpers.R +\name{get_bls_excel_headers} +\alias{get_bls_excel_headers} +\title{Generate headers for BLS requests to download Excel files} +\usage{ +get_bls_excel_headers(refer = "https://www.bls.gov/lau/stalt-archived.htm") +} +\arguments{ +\item{refer}{The URL to use in the Referer header (default: "https://www.bls.gov/lau/stalt-archived.htm")} +} +\value{ +A named character vector of HTTP headers +} +\description{ +Returns a named character vector of HTTP headers required for BLS API requests. +These headers mimic a standard browser to ensure compatibility with BLS servers. +This function returns a more limited set of headers used to download an Ecel file. +} +\keyword{internal} diff --git a/man/get_bls_headers.Rd b/man/get_bls_headers.Rd index 26975ed..2fe5e6d 100644 --- a/man/get_bls_headers.Rd +++ b/man/get_bls_headers.Rd @@ -7,7 +7,7 @@ get_bls_headers(host = "download.bls.gov") } \arguments{ -\item{host}{The host to use in the Host header (default: "download.bls.gov")} +\item{host}{The URL to use in the Host header (default: "download.bls.gov")} } \value{ A named character vector of HTTP headers diff --git a/man/get_jolts.Rd b/man/get_jolts.Rd index 8564165..ac1dd58 100644 --- a/man/get_jolts.Rd +++ b/man/get_jolts.Rd @@ -66,7 +66,7 @@ The function performs several data transformations: } } \examples{ -\donttest{ +\dontrun{ # Download state-level JOLTS data (default - returns data directly) jolts_data <- get_jolts() diff --git a/man/get_laus.Rd b/man/get_laus.Rd index 1d0ee1a..4c3304d 100644 --- a/man/get_laus.Rd +++ b/man/get_laus.Rd @@ -87,7 +87,7 @@ The function joins data from multiple BLS files: } } \examples{ -\donttest{ +\dontrun{ # Download state-level seasonally adjusted data (default operation) laus_states <- get_laus() diff --git a/man/get_national_ces.Rd b/man/get_national_ces.Rd index 91653da..0075822 100644 --- a/man/get_national_ces.Rd +++ b/man/get_national_ces.Rd @@ -88,7 +88,7 @@ lubridate (for date formatting when simplify_table=TRUE). The `fread_bls()` and `create_bls_object()` helper functions must be available in your environment. } \examples{ -\donttest{ +\dontrun{ # Get complete monthly CES data with simplified table structure (default) ces_monthly <- get_national_ces() diff --git a/man/get_oews.Rd b/man/get_oews.Rd index 790f754..03e25de 100644 --- a/man/get_oews.Rd +++ b/man/get_oews.Rd @@ -53,7 +53,7 @@ from the Bureau of Labor Statistics OEWS program. The data includes employment and wage estimates by occupation and geographic area. Note that OEWS is a large data set (over 6 million rows), so it will require longer to download. } \examples{ -\donttest{ +\dontrun{ # Download current OEWS data oews_data <- get_oews() diff --git a/man/get_oews_areas.Rd b/man/get_oews_areas.Rd index 8474858..a5dca1c 100644 --- a/man/get_oews_areas.Rd +++ b/man/get_oews_areas.Rd @@ -29,7 +29,7 @@ Data table which maps individual counties to OEWS area definitions. Download OEWS Area Definitions } \examples{ -\donttest{ +\dontrun{ # Get OEWS area definitions without shapefiles and with processing messages. test <- get_oews_areas(ref_year = 2024, geometry = FALSE, silent = FALSE) diff --git a/man/get_salt.Rd b/man/get_salt.Rd index dcbde6d..cb07401 100644 --- a/man/get_salt.Rd +++ b/man/get_salt.Rd @@ -38,17 +38,10 @@ including U-1 through U-6 measures. The data provides a more comprehensive view of labor market conditions beyond the standard unemployment rate (U-3). } \examples{ -\donttest{ +\dontrun{ # Download state-level SALT data salt_data <- get_salt() -# View top 10 highest U-6 rates by state in current data -latest <- salt_data |> - dplyr::filter(date == max(date)) |> - dplyr::select(state, u6) |> - dplyr::arrange(-u6) -head(latest) - # Include sub-state areas salt_all <- get_salt(only_states = FALSE) @@ -57,7 +50,6 @@ get_salt(geometry = TRUE) # Get full diagnostic object if needed data_with_diagnostics <- get_salt(return_diagnostics = TRUE) -print_bls_warnings(data_with_diagnostics) } } diff --git a/man/load_bls_dataset.Rd b/man/load_bls_dataset.Rd index f558f55..116d73c 100644 --- a/man/load_bls_dataset.Rd +++ b/man/load_bls_dataset.Rd @@ -58,7 +58,7 @@ function. When multiple potential data files exist (common in large data sets), will prompt for an input of which file to use. } \examples{ -\donttest{ +\dontrun{ # Import All Data fm_import <- load_bls_dataset("fm", which_data = "all") diff --git a/man/read_bls_excel.Rd b/man/read_bls_excel.Rd new file mode 100644 index 0000000..11b8dc7 --- /dev/null +++ b/man/read_bls_excel.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fread_BLS.R +\name{read_bls_excel} +\alias{read_bls_excel} +\title{Download BLS Excel Data} +\usage{ +read_bls_excel(url, verbose = FALSE, ...) +} +\arguments{ +\item{url}{Character string. URL to the BLS .xlsx or .xls file.} + +\item{verbose}{Logical. If TRUE, prints diagnostic messages.} + +\item{...}{Additional arguments passed to readxl::read_excel (e.g., sheet, range).} +} +\value{ +A data.frame or NULL if the download or read fails. +} +\description{ +Download BLS Excel Data +} +\examples{ +\dontrun{ +# Download BLS Alternative MEasures History +salt_url <- "https://www.bls.gov/lau/stalt-moave.xlsx" +salt_data <- read_bls_excel(salt_url, skip = 1) + +} + +} diff --git a/tests/testthat/test-download_bls_files.R b/tests/testthat/test-download_bls_files.R index 3637b61..63a2fc5 100644 --- a/tests/testthat/test-download_bls_files.R +++ b/tests/testthat/test-download_bls_files.R @@ -4,6 +4,8 @@ test_that("download_bls_files downloads multiple files", { skip_on_cran() skip_if_offline() + withr::with_envvar(new = c("BLS_USER_AGENT" = "DETRLMI@detr.nv.gov"),{ + urls <- c( "State" = "https://download.bls.gov/pub/time.series/ce/ce.series", "Seasonal" = "https://download.bls.gov/pub/time.series/ce/ce.seasonal" @@ -19,6 +21,9 @@ test_that("download_bls_files downloads multiple files", { # Each should be a bls_data object expect_s3_class(result$State, "bls_data") expect_s3_class(result$Seasonal, "bls_data") + + }) + }) test_that("download_bls_files preserves URL names", {