Skip to content

Commit fdc9ae4

Browse files
Add 2021 EPA data (#51)
1 parent 9f41cb3 commit fdc9ae4

10 files changed

+191
-3
lines changed

NEWS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
* Added new datasets:
44
* `lizard_run` and `lizard_habitat` (thanks Stephen Adolph!)
5-
* `daycare_fines`, `biontech_adolescents`, `nyc_marathon`
5+
* `daycare_fines`, `biontech_adolescents`, `nyc_marathon`, `epa2021`
66
* Add tests for some of the new datasets
77

88
# openintro 2.1.0

R/data-epa2012.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#' Vehicle info from the EPA
1+
#' Vehicle info from the EPA for 2012
22
#'
33
#' Details from the EPA.
44
#'
@@ -37,6 +37,7 @@
3737
#' \item{fuel_cell}{Whether the car has a fuel cell or not, a factor with levels `N`, `Y`.}
3838
#' }
3939
#' @source Fueleconomy.gov, Shared MPG Estimates: Toyota Prius 2012.
40+
#' @seealso epa2021
4041
#' @keywords datasets
4142
#' @examples
4243
#'

R/data-epa2021.R

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#' Vehicle info from the EPA for 2021
2+
#'
3+
#' Details from the EPA.
4+
#'
5+
#'
6+
#' @name epa2021
7+
#' @docType data
8+
#' @format A data frame with 1108 observations on the following 28 variables.
9+
#' \describe{
10+
#' \item{model_yr}{a numeric vector}
11+
#' \item{mfr_name}{Manufacturer name.}
12+
#' \item{division}{Vehicle division.}
13+
#' \item{carline}{Vehicle line.}
14+
#' \item{mfr_code}{Manufacturer code.}
15+
#' \item{model_type_index}{Model type index.}
16+
#' \item{engine_displacement}{Engine displacement.}
17+
#' \item{no_cylinders}{Number of cylinders.}
18+
#' \item{transmission_speed}{Transmission speed.}
19+
#' \item{city_mpg}{City mileage.}
20+
#' \item{hwy_mpg}{Highway mileage.}
21+
#' \item{comb_mpg}{Combined mileage.}
22+
#' \item{guzzler}{Whether the car is considered a "guzzler" or not, a factor with levels `N` and `Y.`}
23+
#' \item{air_aspir_method}{Air aspiration method.}
24+
#' \item{air_aspir_method_desc}{Air aspiration method description.}
25+
#' \item{transmission}{Transmission type.}
26+
#' \item{transmission_desc}{Transmission type description.}
27+
#' \item{no_gears}{Number of gears.}
28+
#' \item{trans_lockup}{Whether transmission locks up, a factor with levels `N` and `Y`.}
29+
#' \item{trans_creeper_gear}{A factor with level `N` only.}
30+
#' \item{drive_sys}{Drive system, a factor with levels.}
31+
#' \item{drive_desc}{Drive system description.}
32+
#' \item{fuel_usage}{Fuel usage, a factor with levels.}
33+
#' \item{fuel_usage_desc}{Fuel usage description.}
34+
#' \item{class}{Class of car.}
35+
#' \item{car_truck}{Car or truck, a factor with levels `car`, `1`, `??`, `1`.}
36+
#' \item{release_date}{Date of vehicle release.}
37+
#' \item{fuel_cell}{Whether the car has a fuel cell or not, a factor with levels `N`, `NA`.}
38+
#' }
39+
#' @source Fuel Economy Data from [fueleconomy.gov](https://www.fueleconomy.gov/feg/download.shtml). Retrieved 6 May, 2021.
40+
#' @seealso epa2012
41+
#' @keywords datasets
42+
#' @examples
43+
#'
44+
#' library(ggplot2)
45+
#' library(dplyr)
46+
#'
47+
#' # Variable descriptions
48+
#' distinct(epa2021, air_aspir_method_desc, air_aspir_method)
49+
#' distinct(epa2021, transmission_desc, transmission)
50+
#' distinct(epa2021, drive_desc, drive_sys)
51+
#' distinct(epa2021, fuel_usage_desc, fuel_usage)
52+
#'
53+
#' # Guzzlers and their mileages
54+
#' ggplot(epa2021, aes(x = city_mpg, y = hwy_mpg, color = guzzler)) +
55+
#' geom_point() +
56+
#' facet_wrap(~guzzler, ncol = 1)
57+
#'
58+
#' # Compare to 2012
59+
#' epa2021 %>%
60+
#' bind_rows(epa2012) %>%
61+
#' group_by(model_yr) %>%
62+
#' summarise(
63+
#' mean_city = mean(city_mpg),
64+
#' mean_hwy = mean(hwy_mpg)
65+
#' )
66+
#'
67+
"epa2021"
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Load packages ----------------------------------------------------------------
2+
3+
library(tidyverse)
4+
library(usethis)
5+
library(readxl)
6+
7+
# Load data --------------------------------------------------------------------
8+
9+
epa2021_raw <- read_excel(here::here("data-raw/epa2021/2021 FE Guide for DOE-release dates before 4-10-2021-no-sales -4-9-2021UpdatePorscheforpublic.xlsx"),
10+
sheet = 1)
11+
12+
epa2021_selected <- epa2021_raw %>%
13+
select(
14+
`Model Year`:`Comb FE (Guide) - Conventional Fuel`,
15+
`Guzzler?`:`Trans Desc`,
16+
`# Gears`:`Drive Desc`,
17+
`Fuel Usage - Conventional Fuel`, `Fuel Usage Desc - Conventional Fuel`,
18+
`Carline Class Desc`:`Car/Truck Category - Cash for Clunkers Bill.`,
19+
`Release Date`, `Fuel Cell Vehicle (Y or N)`
20+
)
21+
22+
names(epa2021_selected) <- names(epa2012)
23+
24+
# Make empty levels NAs and fix date type
25+
epa2021 <- epa2021_selected %>%
26+
mutate(
27+
guzzler = case_when(
28+
guzzler == "G" ~ "Y",
29+
is.na(guzzler) ~ "N"
30+
),
31+
across(where(is.factor), str_replace, "^$", NA_character_),
32+
across(where(is.character), as.factor),
33+
air_aspir_method = if_else(air_aspir_method_desc == "Naturally Aspirated", "NA", as.character(air_aspir_method)),
34+
air_aspir_method = as.factor(air_aspir_method)
35+
)
36+
37+
# Add to package
38+
use_data(epa2021, overwrite = TRUE)

data/epa2021.rda

18.4 KB
Binary file not shown.

man/epa2012.Rd

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/epa2021.Rd

Lines changed: 78 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkgdown/_pkgdown.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ reference:
9595
- email50
9696
- env_regulation
9797
- epa2012
98+
- epa2021
9899
- esi
99100
- ethanol
100101
- evals

0 commit comments

Comments
 (0)