Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ jobs:
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes
_R_CHECK_TESTS_: false

steps:
- uses: actions/checkout@v4
Expand Down
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: BLSloadR
Type: Package
Title: Download Time Series Data from the U.S. Bureau of Labor Statistics
Version: 0.5.2
Version: 0.5.3
Authors@R: c(
person(
given = "Nevada Department of Employment, Training, and Rehabilitation",
Expand Down Expand Up @@ -48,6 +48,7 @@ Suggests:
rmarkdown,
R.utils,
testthat (>= 3.0.0),
withr,
tidyr,
usethis
VignetteBuilder: knitr
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export(list_ces_states)
export(list_national_ces_options)
export(load_bls_dataset)
export(print_bls_warnings)
export(read_bls_excel)
export(read_bls_text)
export(show_ces_options)
export(show_national_ces_options)
Expand Down Expand Up @@ -58,6 +59,7 @@ importFrom(httr,HEAD)
importFrom(httr,add_headers)
importFrom(httr,content)
importFrom(httr,headers)
importFrom(httr,http_status)
importFrom(httr,progress)
importFrom(httr,status_code)
importFrom(httr,stop_for_status)
Expand Down
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# BLSloadR 0.4.3 patch notes

## Hotfix Updates

This patch updates BLSloadR to better address 403 and other download errors with to updates.

- The download logic now returns NULL and fails gracefully when 400/500 status errors are returned using `fread_bls()`
- This version incorporates logic from the development version of the package enabling the user to set a BLS_USER_AGENT environment variable. Using your email address as this will add it to the header request sent to the BLS, which will help to avoid 403 errors in the first place.
- Documentation updates explaining the environment variables have been added to the README file.

# BLSloadR 0.5.2 patch notes

This patch includes a critical fix to resolve rate limit issues downloading data from the BLS. It implements a `BLS_USER_AGENT` environment variable which is called to populate the file download requests to BLS. Users encountering a 403 error on most requests will need to set this environment variable to ensure smooth downloads. Additional documentation and warning messages will be implemented in a future patch.
Expand Down
60 changes: 27 additions & 33 deletions R/bls_overview.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#'
#' @examples
#' \donttest{
#' if(interactive()){
#' # Display Average Price Data overview
#' bls_overview("ap")
#'
Expand All @@ -27,11 +28,11 @@
#' # Display in console instead of viewer
#' bls_overview("ap", display_method = "console")
#' }
bls_overview <- function(
series_id,
display_method = "viewer",
base_url = "https://download.bls.gov/pub/time.series"
) {
#' }
bls_overview <- function(series_id,
display_method = "viewer",
base_url = "https://download.bls.gov/pub/time.series") {
# Validate inputs
if (!is.character(series_id) || length(series_id) != 1) {
stop("series_id must be a single character string")
Expand All @@ -43,34 +44,27 @@ bls_overview <- function(
url <- file.path(base_url, series_id, paste0(series_id, ".txt"))

# Fetch content with proper headers (similar to fread_bls)
tryCatch(
{
headers <- get_bls_headers()

response <- httr::GET(url, httr::add_headers(.headers = headers))
httr::stop_for_status(response)

content_text <- httr::content(response, as = "text", encoding = "UTF-8")

# Display based on method
switch(
display_method,
"viewer" = display_in_viewer(content_text, series_id),
"console" = display_in_console(content_text, series_id),
"popup" = display_in_popup(content_text, series_id)
)

invisible(content_text)
},
error = function(e) {
stop(sprintf(
"Could not fetch overview for series '%s'. URL: %s\nError: %s",
series_id,
url,
e$message
))
}
)
tryCatch({
headers <- get_bls_headers()

response <- httr::GET(url, httr::add_headers(.headers = headers))
httr::stop_for_status(response)

content_text <- httr::content(response, as = "text", encoding = "UTF-8")

# Display based on method
switch(display_method,
"viewer" = display_in_viewer(content_text, series_id),
"console" = display_in_console(content_text, series_id),
"popup" = display_in_popup(content_text, series_id)
)

invisible(content_text)

}, error = function(e) {
stop(sprintf("Could not fetch overview for series '%s'. URL: %s\nError: %s",
series_id, url, e$message))
})
}

#' Display text content in Viewer window.
Expand Down
46 changes: 42 additions & 4 deletions R/download_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
#' Returns a named character vector of HTTP headers required for BLS API requests.
#' These headers mimic a standard browser to ensure compatibility with BLS servers.
#'
#' @param host The host to use in the Host header (default: "download.bls.gov")
#' @param host The URL to use in the Host header (default: "download.bls.gov")
#' @return A named character vector of HTTP headers
#' @keywords internal
get_bls_headers <- function(host = "download.bls.gov") {
get_bls_headers <- function(host = "download.bls.gov"
) {
# 1. Check for a local environment variable first
# This allows users to set their email/identity via .Renviron or Sys.setenv()
ua <- Sys.getenv("BLS_USER_AGENT")
Expand Down Expand Up @@ -44,7 +45,44 @@ get_bls_headers <- function(host = "download.bls.gov") {
"User-Agent" = ua
)
}

#' Generate headers for BLS requests to download Excel files
#'
#' Returns a named character vector of HTTP headers required for BLS API requests.
#' These headers mimic a standard browser to ensure compatibility with BLS servers.
#' This function returns a more limited set of headers used to download an Ecel file.
#'
#' @param refer The URL to use in the Referer header (default: "https://www.bls.gov/lau/stalt-archived.htm")
#' @return A named character vector of HTTP headers
#' @keywords internal
get_bls_excel_headers <- function(refer = "https://www.bls.gov/lau/stalt-archived.htm") {
# 1. Check for a local environment variable first
# This allows users to set their email/identity via .Renviron or Sys.setenv()
ua <- Sys.getenv("BLS_USER_AGENT")

# 2. If the variable is empty, use a list of plausible headers to rotate
if (ua == "") {
plausible_agents <- c(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0",
"Mozilla/5.0 (R; BLSloadR Package)"
)
# Select one at random for this session/call
ua <- sample(plausible_agents, 1)
}

# 3. Generate dynamic headers

c(
"Referer" = refer,
"Sec-Ch-Ua" = 'Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
"Sec-Ch-Ua-Mobile" = "?0",
"Sec-Ch-Ua-Platform" = '"Windows"',
"Upgrade-Insecure-Requests" = "1",
"User-Agent" = ua
)
}
#' Create a BLS data object with diagnostics
#'
#' This is a helper function to create a list with the additional class 'bls_data_collection' containing data downloaded form the U.S. Bureau of Labor Statistics as well as diagnostic details about the download. It is used invisibly in the package to bundle information about file downloads.
Expand Down Expand Up @@ -185,7 +223,7 @@ get_bls_diagnostics <- function(bls_obj) {
smart_bls_download <- function(url, cache_dir = NULL, verbose = FALSE) {
# 1. Define specific headers required by BLS servers
bls_headers <- httr::add_headers(.headers = get_bls_headers())

# 2. Establish cache directory
if (is.null(cache_dir)) {
cache_dir <- bls_get_cache_dir()
Expand Down
142 changes: 140 additions & 2 deletions R/fread_BLS.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,64 @@ fread_bls <- function(
temp_file <- smart_bls_download(url, verbose = verbose)
} else {
headers <- get_bls_headers()

# Create temporary file for download

# Perform request and catch transport-level failures gracefully
response <- tryCatch(
httr::GET(url, httr::add_headers(.headers = headers)),
error = function(e) {
if (verbose) message("Network/transport error: ", conditionMessage(e))
return(NULL)
}
)

# If transport failed, exit early
if (is.null(response)) {
return(NULL)
}

status <- httr::status_code(response)

# For any non-2xx status, fail gracefully and return NULL
if (status < 200 || status >= 300) {
# Human-readable reason (e.g., "Client error", "Server error")
hs <- httr::http_status(response)

# Capture and clean server message (strip HTML, normalize spaces)
error_body <- httr::content(response, as = "text", encoding = "UTF-8")
clean_error <- gsub("<.*?>", "", error_body)
clean_error <- trimws(gsub("\\s+", " ", clean_error))
clean_error <- substr(clean_error, 1, 500)

# Provide a short hint by status code
hint <- switch(
as.character(status),
"401" = "Unauthorized.",
"403" = "Forbidden.",
"404" = "Not found.",
"429" = "Rate limited.",
{
if (status >= 500) "Server error. Consider retrying with backoff."
else "Client error. Inspect request headers and URL."
}
)

if (verbose) {
message(
sprintf(
"%s (%d). %s%s",
hs$message %||% hs$reason %||% "HTTP error",
status,
if (nzchar(clean_error)) paste0(" Server message: ", clean_error) else " No server message provided.",
if (nzchar(hint)) paste0(" Brief code description: ", hint) else ""
)
)
}

return(NULL)
}


raw_data <- httr::content(response, as = "raw")
temp_file <- tempfile(fileext = ".txt")

# Try httr::GET first, fallback to download.file for large files
Expand Down Expand Up @@ -367,3 +423,85 @@ fread_bls <- function(

return(result)
}

#' Download BLS Excel Data
#'
#' @param url Character string. URL to the BLS .xlsx or .xls file.
#' @param verbose Logical. If TRUE, prints diagnostic messages.
#' @param ... Additional arguments passed to readxl::read_excel (e.g., sheet, range).
#' @return A data.frame or NULL if the download or read fails.
#' @export
#' @importFrom httr GET add_headers status_code http_status content
#' @importFrom readxl read_excel
#' @examples
#' \dontrun{
#' # Download BLS Alternative MEasures History
#' salt_url <- "https://www.bls.gov/lau/stalt-moave.xlsx"
#' salt_data <- read_bls_excel(salt_url, skip = 1)
#'
#' }
#'
read_bls_excel <- function(url, verbose = FALSE, ...) {
# --- 1. DATA ACQUISITION ---
headers <- get_bls_excel_headers()

# Perform request and catch transport-level failures (e.g., DNS, Connection Refused)
response <- tryCatch(
httr::GET(url, httr::add_headers(.headers = headers)),
error = function(e) {
message("Network error: ", conditionMessage(e))
return(NULL)
}
)

if (is.null(response)) return(NULL)

status <- httr::status_code(response)

# --- 2. ERROR HANDLING (Always status, Detailed if Verbose) ---
if (status < 200 || status >= 300) {
hs <- httr::http_status(response)

# Always print the basic failure status
message(sprintf("Download failed for %s\nStatus: %d (%s)", url, status, hs$reason))

# Provide full response details only if verbose is TRUE
if (verbose) {
# Capture and clean server message
error_body <- tryCatch(httr::content(response, as = "text", encoding = "UTF-8"), error = function(e) "")
clean_error <- gsub("<.*?>", "", error_body)
clean_error <- substr(trimws(gsub("\\s+", " ", clean_error)), 1, 500)

# Determine Hint
hint <- switch(as.character(status),
"401" = "Unauthorized.",
"403" = "Forbidden. Check User-Agent or API key.",
"404" = "Not found.",
"429" = "Rate limited.",
if (status >= 500) "Server error. Consider retrying later." else "Client error.")

message(sprintf("Hint: %s", hint))
if (nzchar(clean_error)) message(sprintf("Server Message: %s", clean_error))
}

return(NULL)
}

# --- 3. FILE PROCESSING ---
raw_data <- httr::content(response, as = "raw")
temp_file <- tempfile(fileext = ".xlsx")
writeBin(raw_data, temp_file)

# Wrap the read in tryCatch for a graceful exit if the file is unreadable
data_out <- tryCatch({
readxl::read_excel(temp_file, ...)
}, error = function(e) {
message("Failed to parse Excel content: ", conditionMessage(e))
return(NULL)
})

# Cleanup temp file
if (file.exists(temp_file)) unlink(temp_file)

return(data_out)
}
16 changes: 10 additions & 6 deletions R/get_ces.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,7 @@
#' if (has_bls_issues(ces_result)) {
#' print_bls_warnings(ces_result)
#' }
#' }
#' \donttest{
#'
#' # Complete dataset (slower - all states, industries, and years)
#' # WARNING: This downloads a very large file and requires significant memory
#' ces_all <- get_ces()
Expand Down Expand Up @@ -279,14 +278,19 @@ get_ces <- function(
)

# Download all files
if (!suppress_warnings) {
if(!suppress_warnings){
message("Starting CES data download...\n")
}
downloads <- download_bls_files(
ces_urls,
ces_urls,
suppress_warnings = suppress_warnings,
cache = cache
)
cache = cache)

# Exit function if download failed.
if(is.null(downloads) | length(downloads) == 0 | length(ces_urls) != length(downloads)){
stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.")
}


# Extract data from downloads - handle multiple data files when downloading by states
if (!is.null(states) && !current_year_only && is.null(industry_filter)) {
Expand Down
Loading
Loading