From 26e5e360205858c409078be9b20d76b21d3ba7ff Mon Sep 17 00:00:00 2001 From: David Schmidt Date: Tue, 14 Apr 2026 08:44:33 -0700 Subject: [PATCH 1/2] Update download_helpers.R and get_bls_headers function to enable setting an email address via an environment variable. Updates to corresponding documentation and README file. --- R/download_helpers.R | 27 +++++++++++++++++++++++---- README.md | 12 ++++++++++++ man/get_bls_headers.Rd | 2 +- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/R/download_helpers.R b/R/download_helpers.R index 601b1db..95364a1 100644 --- a/R/download_helpers.R +++ b/R/download_helpers.R @@ -1,12 +1,31 @@ -#' Get standard BLS HTTP headers -#' +#' Generate headers for BLS requests +#' #' Returns a named character vector of HTTP headers required for BLS API requests. #' These headers mimic a standard browser to ensure compatibility with BLS servers. -#' +#' #' @param host The host to use in the Host header (default: "download.bls.gov") #' @return A named character vector of HTTP headers #' @keywords internal get_bls_headers <- function(host = "download.bls.gov") { + # 1. Check for a local environment variable first + # This allows users to set their email/identity via .Renviron or Sys.setenv() + ua <- Sys.getenv("BLS_USER_AGENT") + + # 2. If the variable is empty, use a list of plausible headers to rotate + if (ua == "") { + plausible_agents <- c( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0", + "Mozilla/5.0 (R; BLSloadR Package)" + ) + # Select one at random for this session/call + ua <- sample(plausible_agents, 1) + } + + # 3. Generate dynamic headers + c( "Accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Accept-Encoding" = "gzip, deflate, br", @@ -22,7 +41,7 @@ get_bls_headers <- function(host = "download.bls.gov") { "Sec-Fetch-Site" = "same-origin", "Sec-Fetch-User" = "?1", "Upgrade-Insecure-Requests" = "1", - "User-Agent" = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + "User-Agent" = ua ) } diff --git a/README.md b/README.md index 7d5aebd..1a80564 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,18 @@ The primary functions in this package all begin with get_ and are listed below: -`get_qcew()` - This accesses data from the Quarterly Census of Employment and Wages (QCEW). This is a very large data set, so access is filtered by area or industry. This function iterates requesting single-quarter files via the BLS QCEW Data Slices tool at https://www.bls.gov/cew/additional-resources/open-data/csv-data-slices.htm. This function was included beginning in version 0.3.1. -`get_cps_subset()` - This accesses data from the National Current Population Survey (CPS) which determines the national unemployment rate. Several demographic details are available here which are not available at the state or local levels. This is the "LN" database. This function was introduced in BLSloadR version 0.5. + +# Configuring Your User Profile +BLSloadR will typically work by default without any cusomization. However, there are some options you can use that may improve your experience. These options are managed with *environment variables* in your R session that enable the following: + +-`BLS_USER_AGENT` - setting this environment variable to your e-mail address will use your e-mail address when downloading data from the BLS. In case of errors with your downloads, this may help the BLS to identify you as an individual user. Setting this environment variable to a character string passes that character string to the BLS as the User-Agent HTML header. + +-`USE_BLS_CACHE` - Setting this environment variable to "TRUE" will enable a local file cache of your BLS downloads which will download new files for supported functions only when the underlying data has changed. + +-`BLS_CACHE_DIR` - If you want to use the file cache, you may wish to specify a location. Setting this environment variable will specify a different path for the file cache than the default. + +To permanently set these environment variables, you can edit your .Renviron file (such as with `usethis::edit_r_environ()`). To do so for a single session, you can set your environment variables with `Sys.setenv(USE_BLS_CACHE="TRUE")`. + # Enhanced CES Filtering for Performance The `get_ces()` and `get_national_ces()` functions now include powerful filtering options that significantly improve performance: diff --git a/man/get_bls_headers.Rd b/man/get_bls_headers.Rd index 425dde6..26975ed 100644 --- a/man/get_bls_headers.Rd +++ b/man/get_bls_headers.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/download_helpers.R \name{get_bls_headers} \alias{get_bls_headers} -\title{Get standard BLS HTTP headers} +\title{Generate headers for BLS requests} \usage{ get_bls_headers(host = "download.bls.gov") } From f62ded245b90d4fd21417c5484790ba9fba5be4d Mon Sep 17 00:00:00 2001 From: David Schmidt Date: Tue, 14 Apr 2026 10:09:55 -0700 Subject: [PATCH 2/2] Update DESCRIPTION to version 0.5.2, add note to NEWS.md, add usethis to suggested packages. --- DESCRIPTION | 5 +++-- NEWS.md | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index aa07ce3..b51f620 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: BLSloadR Type: Package Title: Download Time Series Data from the U.S. Bureau of Labor Statistics -Version: 0.5.1 +Version: 0.5.2 Authors@R: c( person( given = "Nevada Department of Employment, Training, and Rehabilitation", @@ -48,7 +48,8 @@ Suggests: rmarkdown, R.utils, testthat (>= 3.0.0), - tidyr + tidyr, + usethis VignetteBuilder: knitr URL: https://schmidtdetr.github.io/BLSloadR/ Config/Needs/website: rmarkdown diff --git a/NEWS.md b/NEWS.md index 5eda5b7..eab25d2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +# BLSloadR 0.5.2 patch notes + +This patch includes a critical fix to resolve rate limit issues downloading data from the BLS. It implements a `BLS_USER_AGENT` environment variable which is called to populate the file download requests to BLS. Users encountering a 403 error on most requests will need to set this environment variable to ensure smooth downloads. Additional documentation and warning messages will be implemented in a future patch. # BLSloadR 0.5.1 patch notes