面积图
面积图主要用来展示数量随时间变化的情况,单一变量的面积图在数字起伏较大时视觉效果较好。
平平无奇面积图
library(ggplot2)
library(gcookbook)
# 太阳黑子数据集
sunspots
A Time Series: 235 × 12
| Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec |
---|
1749 | 58.0 | 62.6 | 70.0 | 55.7 | 85.0 | 83.5 | 94.8 | 66.3 | 75.9 | 75.5 | 158.6 | 85.2 |
---|
1750 | 73.3 | 75.9 | 89.2 | 88.3 | 90.0 | 100.0 | 85.4 | 103.0 | 91.2 | 65.7 | 63.3 | 75.4 |
---|
1751 | 70.0 | 43.5 | 45.3 | 56.4 | 60.7 | 50.7 | 66.3 | 59.8 | 23.5 | 23.2 | 28.5 | 44.0 |
---|
1752 | 35.0 | 50.0 | 71.0 | 59.3 | 59.7 | 39.6 | 78.4 | 29.3 | 27.1 | 46.6 | 37.6 | 40.0 |
---|
# 将timeseries数据转换成dataframe数据
dss = data.frame(year = as.numeric(time(sunspot.year)), ss = as.numeric(sunspot.year))
head(dss)
A data.frame: 6 × 2
year | ss |
---|
<dbl> | <dbl> |
---|
1700 | 5 |
1701 | 11 |
1702 | 16 |
1703 | 23 |
1704 | 36 |
1705 | 58 |
ggplot(dss, aes(x = year, y = ss)) +
geom_area()

精致优雅面积图
# color指定边框颜色,fill指定填充颜色,alpha指定透明度
ggplot(dss, aes(x = year, y = ss)) +
geom_area(color = "green", fill = "green", alpha = 0.3)

正负分开堆积图
dss[100:200,]$ss = -dss[100:200,]$ss
dss$tag <- dss$ss >= 0
# rle用来捕获连续运行长度,比如
# dat <- c(1, 2, 2, 2, 3, 1, 4, 4, 1, 1)
# r <- rle(dat)
# r
# lengths: int [1:6] 1 3 1 1 2 2
# values : num [1:6] 1 2 3 1 4 1
cat.rle = rle(dss$tag)
# rep.int用来生成重复序列
# rep.int(1:5,2)
# 1 2 3 4 5 1 2 3 4 5
dss$group = rep.int(1:length(cat.rle$lengths), times=cat.rle$lengths)
# group的作用是把每一正负分段分为不同的组
ggplot(dss, aes(x = year, y = ss, fill = tag, group = group)) +
geom_area(alpha = 0.3) +
scale_fill_manual(values = c('green','red'), guide = FALSE)

# 如果仅使用fill自动分组会出现奇怪的图形,中间部分上方的红色是由于ggplot将tag中被FALSE分隔的两个TRUE看做同一group连接起来导致的
ggplot(dss, aes(x = year, y = ss, fill = tag)) +
geom_area(alpha = 0.3) +
scale_fill_manual(values = c('green','red'), guide = FALSE)

# 可以看到每一段连续的tag共享同一个group
dss[107:115,]
A data.frame: 9 × 4
| year | ss | tag | group |
---|
| <dbl> | <dbl> | <lgl> | <int> |
---|
107 | 1806 | -28.1 | FALSE | 2 |
---|
108 | 1807 | -10.1 | FALSE | 2 |
---|
109 | 1808 | -8.1 | FALSE | 2 |
---|
110 | 1809 | -2.5 | FALSE | 2 |
---|
111 | 1810 | 0.0 | TRUE | 3 |
---|
112 | 1811 | -1.4 | FALSE | 4 |
---|
113 | 1812 | -5.0 | FALSE | 4 |
---|
114 | 1813 | -12.2 | FALSE | 4 |
---|
115 | 1814 | -13.9 | FALSE | 4 |
---|
堆积图
堆积图可以理解为操作对象扩展至多个变量的面积图。
平平无奇堆积图
head(uspopage)
A data.frame: 6 × 3
Year | AgeGroup | Thousands |
---|
<int> | <fct> | <int> |
---|
1900 | <5 | 9181 |
1900 | 5-14 | 16966 |
1900 | 15-24 | 14951 |
1900 | 25-34 | 12161 |
1900 | 35-44 | 9273 |
1900 | 45-54 | 6437 |
ggplot(uspopage, aes(x = Year, y = Thousands, fill = AgeGroup)) +
geom_area(color = 'black', size = 0.2, alpha = 0.3) +
scale_fill_brewer(palette = "YlOrRd")

顺序逆转堆积图
library(plyr)
ggplot(uspopage, aes(x = Year, y = Thousands, fill = factor(uspopage$AgeGroup, levels = rev(levels(uspopage$AgeGroup))))) +
geom_area(color = 'black', size = 0.2, alpha = 0.3) +
scale_fill_brewer(palette = "YlOrRd", breaks = rev(levels(uspopage$AgeGroup))) +
labs(fill="AgeGroup")

去除竖线堆积图
ggplot(uspopage, aes(x = Year, y = Thousands, fill = AgeGroup)) +
geom_area(alpha = 0.3) +
scale_fill_brewer(palette = "YlOrRd") +
geom_line(position = 'stack', color = 'black', size = 0.2)

# 折腾半天我感觉还是没线的最好看
ggplot(uspopage, aes(x = Year, y = Thousands, fill = AgeGroup)) +
geom_area(alpha = 0.3) +
scale_fill_brewer(palette = "YlOrRd")

百分比比堆积图
# 计算百分比,首先按年份分成多个独立的dataframe,在每个frame内部计算present
uspopage_prop <- ddply(uspopage, "Year", transform, Persent = Thousands/sum(Thousands)*100)
ggplot(uspopage_prop, aes(x = Year, y = Persent, fill = AgeGroup)) +
geom_area(alpha = 0.3) +
scale_fill_brewer(palette = "YlOrRd")
