12.5 条形图

条形图特别适合分类变量的展示，我们这里展示钻石切割质量 cut 不同等级的数量，当然我们可以直接展示各类的数目，在图层 geom_bar 中指定 stat="identity"

# 需要映射数据框的两个变量，相当于自己先计算了每类的数量
with(diamonds, table(cut))

## cut
##      Fair      Good Very Good   Premium     Ideal 
##      1610      4906     12082     13791     21551

cut_df <- as.data.frame(table(diamonds$cut))
ggplot(cut_df, aes(x = Var1, y = Freq)) + geom_bar(stat = "identity")

ggplot(diamonds, aes(x = cut)) + geom_bar()

图 12.23: 频数条形图

还有另外三种表示方法

ggplot(diamonds, aes(x = cut)) + geom_bar(stat = "count")

ggplot(diamonds, aes(x = cut, y = ..count..)) + geom_bar()

## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(diamonds, aes(x = cut, y = stat(count))) + geom_bar()

## Warning: `stat(count)` was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

我们还可以在图 12.23 的基础上再添加一个分类变量钻石的纯净度 clarity，形成堆积条形图

ggplot(diamonds, aes(x = cut, fill = clarity)) + geom_bar()

图 12.24: 堆积条形图

再添加一个分类变量钻石颜色 color 比较好的做法是分面

ggplot(diamonds, aes(x = color, fill = clarity)) +
  geom_bar() +
  facet_grid(~cut)

图 12.25: 分面堆积条形图

实际上，绘制图12.25包含了对分类变量的分组计数过程，如下

with(diamonds, table(cut, color))

##            color
## cut            D    E    F    G    H    I    J
##   Fair       163  224  312  314  303  175  119
##   Good       662  933  909  871  702  522  307
##   Very Good 1513 2400 2164 2299 1824 1204  678
##   Premium   1603 2337 2331 2924 2360 1428  808
##   Ideal     2834 3903 3826 4884 3115 2093  896

还有一种堆积的方法是按比例，而不是按数量，如图12.26

ggplot(diamonds, aes(x = color, fill = clarity)) +
  geom_bar(position = "fill") +
  facet_grid(~cut)

图 12.26: 比例堆积条形图

接下来就是复合条形图

ggplot(diamonds, aes(x = color, fill = clarity)) +
  geom_bar(position = "dodge")

图 12.27: 复合条形图

再添加一个分类变量，就是需要分面大法了，图 12.27 展示了三个分类变量，其实我们还可以再添加一个分类变量用作分面的列依据

ggplot(diamonds, aes(x = color, fill = clarity)) +
  geom_bar(position = "dodge") +
  facet_grid(rows = vars(cut))

图 12.28: 分面复合条形图

图 12.28 展示的数据如下

with(diamonds, table(color, clarity, cut))

## , , cut = Fair
## 
##      clarity
## color   I1  SI2  SI1  VS2  VS1 VVS2 VVS1   IF
##     D    4   56   58   25    5    9    3    3
##     E    9   78   65   42   14   13    3    0
##     F   35   89   83   53   33   10    5    4
##     G   53   80   69   45   45   17    3    2
##     H   52   91   75   41   32   11    1    0
##     I   34   45   30   32   25    8    1    0
##     J   23   27   28   23   16    1    1    0
## 
## , , cut = Good
## 
##      clarity
## color   I1  SI2  SI1  VS2  VS1 VVS2 VVS1   IF
##     D    8  223  237  104   43   25   13    9
##     E   23  202  355  160   89   52   43    9
##     F   19  201  273  184  132   50   35   15
##     G   19  163  207  192  152   75   41   22
##     H   14  158  235  138   77   45   31    4
##     I    9   81  165  110  103   26   22    6
##     J    4   53   88   90   52   13    1    6
## 
## , , cut = Very Good
## 
##      clarity
## color   I1  SI2  SI1  VS2  VS1 VVS2 VVS1   IF
##     D    5  314  494  309  175  141   52   23
##     E   22  445  626  503  293  298  170   43
##     F   13  343  559  466  293  249  174   67
##     G   16  327  474  479  432  302  190   79
##     H   12  343  547  376  257  145  115   29
##     I    8  200  358  274  205   71   69   19
##     J    8  128  182  184  120   29   19    8
## 
## , , cut = Premium
## 
##      clarity
## color   I1  SI2  SI1  VS2  VS1 VVS2 VVS1   IF
##     D   12  421  556  339  131   94   40   10
##     E   30  519  614  629  292  121  105   27
##     F   34  523  608  619  290  146   80   31
##     G   46  492  566  721  566  275  171   87
##     H   46  521  655  532  336  118  112   40
##     I   24  312  367  315  221   82   84   23
##     J   13  161  209  202  153   34   24   12
## 
## , , cut = Ideal
## 
##      clarity
## color   I1  SI2  SI1  VS2  VS1 VVS2 VVS1   IF
##     D   13  356  738  920  351  284  144   28
##     E   18  469  766 1136  593  507  335   79
##     F   42  453  608  879  616  520  440  268
##     G   16  486  660  910  953  774  594  491
##     H   38  450  763  556  467  289  326  226
##     I   17  274  504  438  408  178  179   95
##     J    2  110  243  232  201   54   29   25

# 漫谈条形图 https://cosx.org/2017/10/discussion-about-bar-graph
set.seed(2020)
dat <- data.frame(
  age = rep(1:30, 2),
  gender = rep(c("man", "woman"), each = 30),
  num = sample(x = 1:100, size = 60, replace = T)
)
# 重叠
p1 <- ggplot(data = dat, aes(x = age, y = num, fill = gender)) +
  geom_col(position = "identity", alpha = 0.5)
# 堆积
p2 <- ggplot(data = dat, aes(x = age, y = num, fill = gender)) +
  geom_col(position = "stack")
# 双柱
p3 <- ggplot(data = dat, aes(x = age, y = num, fill = gender)) +
  geom_col(position = "dodge")
# 百分比
p4 <- ggplot(data = dat, aes(x = age, y = num, fill = gender)) +
  geom_col(position = "fill") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(y = "%")
(p1 + p2) / (p3 + p4)

图 12.29: 条形图的四种常见形态

以数据集 diamonds 为例，按照纯净度 clarity 和切工 cut 分组统计钻石的数量，再按切工分组统计不同纯净度的钻石数量占比，如表 12.1 所示

library(data.table)
diamonds <- as.data.table(diamonds)
dat <- diamonds[, .(cnt = .N), by = .(cut, clarity)] %>% 
  .[, pct := cnt / sum(cnt), by = .(cut)] %>% 
  .[, pct_pp := paste0(cnt, " (", scales::percent(pct, accuracy = 0.01), ")") ]
# 分组计数 with(diamonds, table(clarity, cut))
dcast(dat, formula = clarity ~ cut, value.var = "pct_pp") %>% 
  knitr::kable(align = "crrrrr", caption = "数值和比例组合呈现")

表 12.1: 数值和比例组合呈现
clarity	Fair	Good	Very Good	Premium	Ideal
I1	210 (13.04%)	96 (1.96%)	84 (0.70%)	205 (1.49%)	146 (0.68%)
SI2	466 (28.94%)	1081 (22.03%)	2100 (17.38%)	2949 (21.38%)	2598 (12.06%)
SI1	408 (25.34%)	1560 (31.80%)	3240 (26.82%)	3575 (25.92%)	4282 (19.87%)
VS2	261 (16.21%)	978 (19.93%)	2591 (21.45%)	3357 (24.34%)	5071 (23.53%)
VS1	170 (10.56%)	648 (13.21%)	1775 (14.69%)	1989 (14.42%)	3589 (16.65%)
VVS2	69 (4.29%)	286 (5.83%)	1235 (10.22%)	870 (6.31%)	2606 (12.09%)
VVS1	17 (1.06%)	186 (3.79%)	789 (6.53%)	616 (4.47%)	2047 (9.50%)
IF	9 (0.56%)	71 (1.45%)	268 (2.22%)	230 (1.67%)	1212 (5.62%)

分别以堆积条形图和百分比堆积条形图展示，添加注释到条形图上，见 12.30

p1 = ggplot(data = dat, aes(x = cut, y = cnt, fill = clarity)) +
  geom_col(position = "dodge") +
  geom_text(aes(label = cnt), position = position_dodge(1), vjust = -0.5) +
  geom_text(aes(label = scales::percent(pct, accuracy = 0.1)),
    position = position_dodge(1), vjust = 1, hjust = 0.5
  ) +
  scale_fill_brewer(palette = "Spectral") +
  labs(fill = "clarity", y = "", x = "cut") +
  theme_minimal() + 
  theme(legend.position = "top")

p2 = ggplot(data = dat, aes(y = cut, x = cnt, fill = clarity)) +
  geom_col(position = "fill") +
  geom_text(aes(label = cnt), position = position_fill(1), vjust = -0.5) +
  geom_text(aes(label = scales::percent(pct, accuracy = 0.1)),
    position = position_fill(1), vjust = 1, hjust = 0.5
  ) +
  scale_fill_brewer(palette = "Spectral") +
  scale_x_continuous(labels = scales::percent) +
  labs(fill = "clarity", y = "", x = "cut") +
  theme_minimal() + 
  theme(legend.position = "top")

p1 / p2

图 12.30: 添加注释到条形图

借助 plotly 制作相应的动态百分比堆积条形图

ggplot(data = diamonds, aes(x = cut, fill = clarity)) +
  geom_bar(position = "dodge2") +
  scale_fill_brewer(palette = "Spectral")

图 12.31: 百分比堆积条形图

# 百分比堆积条形图
plotly::plot_ly(dat,
  x = ~cut, color = ~clarity, y = ~pct,
  colors = "Spectral", type = "bar",
  text = ~ paste0(
    cnt, "颗 <br>",
    "占比：", scales::percent(pct, accuracy = 0.1), "<br>"
  ),
  hoverinfo = "text"
) %>%
  plotly::layout(
    barmode = "stack",
    yaxis = list(tickformat = ".0%")
  ) %>%
  plotly::config(displayModeBar = FALSE)

图 12.31: 百分比堆积条形图

# `type = "histogram"` 以 cut 和 clarity 分组计数
plotly::plot_ly(diamonds,
  x = ~cut, color = ~clarity,
  colors = "Spectral", type = "histogram"
) %>%
  plotly::config(displayModeBar = FALSE)

图 12.31: 百分比堆积条形图

# 堆积图
plotly::plot_ly(diamonds,
  x = ~cut, color = ~clarity,
  colors = "Spectral", type = "histogram"
) %>%
  plotly::layout(
    barmode = "stack", 
    yaxis = list(title = "cnt"),
    legend = list(title = list(text = "clarity"))
  ) %>%
  plotly::config(displayModeBar = FALSE)

图 12.31: 百分比堆积条形图