12.5 条形图
条形图特别适合分类变量的展示,我们这里展示钻石切割质量 cut 不同等级的数量,当然我们可以直接展示各类的数目,在图层 geom_bar
中指定 stat="identity"
# 需要映射数据框的两个变量,相当于自己先计算了每类的数量
with(diamonds, table(cut))
## cut
## Fair Good Very Good Premium Ideal
## 1610 4906 12082 13791 21551
<- as.data.frame(table(diamonds$cut))
cut_df ggplot(cut_df, aes(x = Var1, y = Freq)) + geom_bar(stat = "identity")
ggplot(diamonds, aes(x = cut)) + geom_bar()
还有另外三种表示方法
ggplot(diamonds, aes(x = cut)) + geom_bar(stat = "count")
ggplot(diamonds, aes(x = cut, y = ..count..)) + geom_bar()
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplot(diamonds, aes(x = cut, y = stat(count))) + geom_bar()
## Warning: `stat(count)` was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
我们还可以在图 12.23 的基础上再添加一个分类变量钻石的纯净度 clarity,形成堆积条形图
ggplot(diamonds, aes(x = cut, fill = clarity)) + geom_bar()
再添加一个分类变量钻石颜色 color 比较好的做法是分面
ggplot(diamonds, aes(x = color, fill = clarity)) +
geom_bar() +
facet_grid(~cut)
实际上,绘制图12.25包含了对分类变量的分组计数过程,如下
with(diamonds, table(cut, color))
## color
## cut D E F G H I J
## Fair 163 224 312 314 303 175 119
## Good 662 933 909 871 702 522 307
## Very Good 1513 2400 2164 2299 1824 1204 678
## Premium 1603 2337 2331 2924 2360 1428 808
## Ideal 2834 3903 3826 4884 3115 2093 896
还有一种堆积的方法是按比例,而不是按数量,如图12.26
ggplot(diamonds, aes(x = color, fill = clarity)) +
geom_bar(position = "fill") +
facet_grid(~cut)
接下来就是复合条形图
ggplot(diamonds, aes(x = color, fill = clarity)) +
geom_bar(position = "dodge")
再添加一个分类变量,就是需要分面大法了,图 12.27 展示了三个分类变量,其实我们还可以再添加一个分类变量用作分面的列依据
ggplot(diamonds, aes(x = color, fill = clarity)) +
geom_bar(position = "dodge") +
facet_grid(rows = vars(cut))
图 12.28 展示的数据如下
with(diamonds, table(color, clarity, cut))
## , , cut = Fair
##
## clarity
## color I1 SI2 SI1 VS2 VS1 VVS2 VVS1 IF
## D 4 56 58 25 5 9 3 3
## E 9 78 65 42 14 13 3 0
## F 35 89 83 53 33 10 5 4
## G 53 80 69 45 45 17 3 2
## H 52 91 75 41 32 11 1 0
## I 34 45 30 32 25 8 1 0
## J 23 27 28 23 16 1 1 0
##
## , , cut = Good
##
## clarity
## color I1 SI2 SI1 VS2 VS1 VVS2 VVS1 IF
## D 8 223 237 104 43 25 13 9
## E 23 202 355 160 89 52 43 9
## F 19 201 273 184 132 50 35 15
## G 19 163 207 192 152 75 41 22
## H 14 158 235 138 77 45 31 4
## I 9 81 165 110 103 26 22 6
## J 4 53 88 90 52 13 1 6
##
## , , cut = Very Good
##
## clarity
## color I1 SI2 SI1 VS2 VS1 VVS2 VVS1 IF
## D 5 314 494 309 175 141 52 23
## E 22 445 626 503 293 298 170 43
## F 13 343 559 466 293 249 174 67
## G 16 327 474 479 432 302 190 79
## H 12 343 547 376 257 145 115 29
## I 8 200 358 274 205 71 69 19
## J 8 128 182 184 120 29 19 8
##
## , , cut = Premium
##
## clarity
## color I1 SI2 SI1 VS2 VS1 VVS2 VVS1 IF
## D 12 421 556 339 131 94 40 10
## E 30 519 614 629 292 121 105 27
## F 34 523 608 619 290 146 80 31
## G 46 492 566 721 566 275 171 87
## H 46 521 655 532 336 118 112 40
## I 24 312 367 315 221 82 84 23
## J 13 161 209 202 153 34 24 12
##
## , , cut = Ideal
##
## clarity
## color I1 SI2 SI1 VS2 VS1 VVS2 VVS1 IF
## D 13 356 738 920 351 284 144 28
## E 18 469 766 1136 593 507 335 79
## F 42 453 608 879 616 520 440 268
## G 16 486 660 910 953 774 594 491
## H 38 450 763 556 467 289 326 226
## I 17 274 504 438 408 178 179 95
## J 2 110 243 232 201 54 29 25
# 漫谈条形图 https://cosx.org/2017/10/discussion-about-bar-graph
set.seed(2020)
<- data.frame(
dat age = rep(1:30, 2),
gender = rep(c("man", "woman"), each = 30),
num = sample(x = 1:100, size = 60, replace = T)
)# 重叠
<- ggplot(data = dat, aes(x = age, y = num, fill = gender)) +
p1 geom_col(position = "identity", alpha = 0.5)
# 堆积
<- ggplot(data = dat, aes(x = age, y = num, fill = gender)) +
p2 geom_col(position = "stack")
# 双柱
<- ggplot(data = dat, aes(x = age, y = num, fill = gender)) +
p3 geom_col(position = "dodge")
# 百分比
<- ggplot(data = dat, aes(x = age, y = num, fill = gender)) +
p4 geom_col(position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
labs(y = "%")
+ p2) / (p3 + p4) (p1
以数据集 diamonds 为例,按照纯净度 clarity 和切工 cut 分组统计钻石的数量,再按切工分组统计不同纯净度的钻石数量占比,如表 12.1 所示
library(data.table)
<- as.data.table(diamonds)
diamonds <- diamonds[, .(cnt = .N), by = .(cut, clarity)] %>%
dat := cnt / sum(cnt), by = .(cut)] %>%
.[, pct := paste0(cnt, " (", scales::percent(pct, accuracy = 0.01), ")") ]
.[, pct_pp # 分组计数 with(diamonds, table(clarity, cut))
dcast(dat, formula = clarity ~ cut, value.var = "pct_pp") %>%
::kable(align = "crrrrr", caption = "数值和比例组合呈现") knitr
clarity | Fair | Good | Very Good | Premium | Ideal |
---|---|---|---|---|---|
I1 | 210 (13.04%) | 96 (1.96%) | 84 (0.70%) | 205 (1.49%) | 146 (0.68%) |
SI2 | 466 (28.94%) | 1081 (22.03%) | 2100 (17.38%) | 2949 (21.38%) | 2598 (12.06%) |
SI1 | 408 (25.34%) | 1560 (31.80%) | 3240 (26.82%) | 3575 (25.92%) | 4282 (19.87%) |
VS2 | 261 (16.21%) | 978 (19.93%) | 2591 (21.45%) | 3357 (24.34%) | 5071 (23.53%) |
VS1 | 170 (10.56%) | 648 (13.21%) | 1775 (14.69%) | 1989 (14.42%) | 3589 (16.65%) |
VVS2 | 69 (4.29%) | 286 (5.83%) | 1235 (10.22%) | 870 (6.31%) | 2606 (12.09%) |
VVS1 | 17 (1.06%) | 186 (3.79%) | 789 (6.53%) | 616 (4.47%) | 2047 (9.50%) |
IF | 9 (0.56%) | 71 (1.45%) | 268 (2.22%) | 230 (1.67%) | 1212 (5.62%) |
分别以堆积条形图和百分比堆积条形图展示,添加注释到条形图上,见 12.30
= ggplot(data = dat, aes(x = cut, y = cnt, fill = clarity)) +
p1 geom_col(position = "dodge") +
geom_text(aes(label = cnt), position = position_dodge(1), vjust = -0.5) +
geom_text(aes(label = scales::percent(pct, accuracy = 0.1)),
position = position_dodge(1), vjust = 1, hjust = 0.5
+
) scale_fill_brewer(palette = "Spectral") +
labs(fill = "clarity", y = "", x = "cut") +
theme_minimal() +
theme(legend.position = "top")
= ggplot(data = dat, aes(y = cut, x = cnt, fill = clarity)) +
p2 geom_col(position = "fill") +
geom_text(aes(label = cnt), position = position_fill(1), vjust = -0.5) +
geom_text(aes(label = scales::percent(pct, accuracy = 0.1)),
position = position_fill(1), vjust = 1, hjust = 0.5
+
) scale_fill_brewer(palette = "Spectral") +
scale_x_continuous(labels = scales::percent) +
labs(fill = "clarity", y = "", x = "cut") +
theme_minimal() +
theme(legend.position = "top")
/ p2 p1
借助 plotly 制作相应的动态百分比堆积条形图
ggplot(data = diamonds, aes(x = cut, fill = clarity)) +
geom_bar(position = "dodge2") +
scale_fill_brewer(palette = "Spectral")
# 百分比堆积条形图
::plot_ly(dat,
plotlyx = ~cut, color = ~clarity, y = ~pct,
colors = "Spectral", type = "bar",
text = ~ paste0(
"颗 <br>",
cnt, "占比:", scales::percent(pct, accuracy = 0.1), "<br>"
),hoverinfo = "text"
%>%
) ::layout(
plotlybarmode = "stack",
yaxis = list(tickformat = ".0%")
%>%
) ::config(displayModeBar = FALSE) plotly
# `type = "histogram"` 以 cut 和 clarity 分组计数
::plot_ly(diamonds,
plotlyx = ~cut, color = ~clarity,
colors = "Spectral", type = "histogram"
%>%
) ::config(displayModeBar = FALSE) plotly
# 堆积图
::plot_ly(diamonds,
plotlyx = ~cut, color = ~clarity,
colors = "Spectral", type = "histogram"
%>%
) ::layout(
plotlybarmode = "stack",
yaxis = list(title = "cnt"),
legend = list(title = list(text = "clarity"))
%>%
) ::config(displayModeBar = FALSE) plotly