-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBoxplot_oct.R
More file actions
100 lines (76 loc) · 4.38 KB
/
Boxplot_oct.R
File metadata and controls
100 lines (76 loc) · 4.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#BOXPLOT
library(ggplot2)
ggplot_box_legend <- function(family = "sans"){
# Create data to use in the boxplot legend:
set.seed(100)
# Lettura del file interarrivals.txt
interarrivals <- read.table("C:/Users/jessi/Desktop/Università/Magistrale/II ANNO/Data Science/Homeworks/Homework1/ffdatools/tuples-bgloct_1-120/interarrivals.txt")
mean_val <- mean(interarrivals$V1)
# Function to calculate important values:
ggplot2_boxplot <- function(x){
quartiles <- as.numeric(quantile(x, probs = c(0.25, 0.5, 0.75)))
names(quartiles) <- c("25th percentile", "50th percentile (median)", "75th percentile")
IQR <- diff(quartiles[c(1,3)])
upper_whisker <- max(x[x < (quartiles[3] + 1.5 * IQR)])
lower_whisker <- min(x[x > (quartiles[1] - 1.5 * IQR)])
upper_dots <- x[x > (quartiles[3] + 1.5*IQR)]
lower_dots <- x[x < (quartiles[1] - 1.5*IQR)]
return(list("quartiles" = quartiles,
"25th percentile" = as.numeric(quartiles[1]),
"50th percentile (median)" = as.numeric(quartiles[2]),
"75th percentile" = as.numeric(quartiles[3]),
"IQR" = IQR,
"upper_whisker" = upper_whisker,
"lower_whisker" = lower_whisker,
"upper_dots" = upper_dots,
"lower_dots" = lower_dots))
}
# Get those values:
ggplot_output <- ggplot2_boxplot(interarrivals$V1)
# Lots of text in the legend, make it smaller and consistent font:
update_geom_defaults("text", list(size = 3, hjust = 0, family = family))
# Labels don't inherit text:
update_geom_defaults("label", list(size = 3, hjust = 0, family = family))
# Create the legend:
# The main elements of the plot (the boxplot, error bars, and count)
# are the easy part.
# The text describing each of those takes a lot of fiddling to
# get the location and style just right:
explain_plot <- ggplot() +
stat_boxplot(data = interarrivals, aes(x = "", y=V1),geom ='errorbar', width = 0.3) +
geom_boxplot(data = interarrivals,aes(x = "", y=V1), width = 0.3, fill = "lightgrey") +
geom_text(aes(x = 1, y = 40000, label = ""), hjust = 0.5) +
geom_text(aes(x = 1.17, y = 40000,label = ""), fontface = "bold", vjust = 0.4) +
theme_minimal(base_size = 5, base_family = family) +
geom_point(aes( x="", y=mean_val), col="red", size=2 ) +
geom_text(aes(x = c(1.33), y = mean_val, label = "Mean"), hjust = 0.5) +
geom_segment(aes(x = 2.3, xend = 2.3,
y = ggplot_output[["25th percentile"]], yend = ggplot_output[["75th percentile"]])) +
geom_segment(aes(x = 1.2, xend = 2.3,
y = ggplot_output[["25th percentile"]], yend = ggplot_output[["25th percentile"]])) +
geom_segment(aes(x = 1.2, xend = 2.3,
y = ggplot_output[["75th percentile"]], yend = ggplot_output[["75th percentile"]])) +
geom_text(aes(x = 2.4,
y = (ggplot_output[["50th percentile (median)"]])),label = "Interquartile range", fontface = "bold",vjust = 0.4) +
geom_text(aes(x = c(1.17,1.17),
y = c(ggplot_output[["upper_whisker"]], ggplot_output[["lower_whisker"]]),
label = c("Largest value within 1.5 times interquartile range above\n75th percentile","\nSmallest value within 1.5 times interquartile range below\n25th percentile")),
fontface = "bold", vjust = 0.9) +
geom_text(aes(x = c(1.17),y = 75000,label = "Outliers"),vjust = 0.5, fontface = "bold") +
geom_label(aes(x = 1.17, y = ggplot_output[["quartiles"]], label = names(ggplot_output[["quartiles"]])),vjust = c(0.4,0.85,0.4), fill = "white", label.size = 0) +
ylab("") +
xlab("") +
theme(axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line.y = element_line(color = "black"),
axis.ticks.y= element_line(color="black"),
axis.text.y = element_text(margin=unit(c(0.3,0.3,0.3,0.3), "cm"), size=10),
axis.ticks.length.y = unit(.25, "cm"),
panel.grid = element_blank(),
aspect.ratio = 4/3,
plot.title = element_text(hjust = 0.5, size = 10)) +
coord_cartesian(xlim = c(1.4,3.1), ylim = c(-2000 , 130000)) +
scale_y_continuous(breaks = seq(0, 130000, by=10000), limits=c(0,130000))
return(explain_plot)
}
ggplot_box_legend()