拿R来画画(八):面积图与堆积图

面积图

面积图主要用来展示数量随时间变化的情况,单一变量的面积图在数字起伏较大时视觉效果较好。

平平无奇面积图
library(ggplot2)
library(gcookbook)
# 太阳黑子数据集
sunspots
A Time Series: 235 × 12
JanFebMarAprMayJunJulAugSepOctNovDec
1749 58.0 62.6 70.0 55.7 85.0 83.5 94.8 66.3 75.9 75.5158.6 85.2
1750 73.3 75.9 89.2 88.3 90.0100.0 85.4103.0 91.2 65.7 63.3 75.4
1751 70.0 43.5 45.3 56.4 60.7 50.7 66.3 59.8 23.5 23.2 28.5 44.0
1752 35.0 50.0 71.0 59.3 59.7 39.6 78.4 29.3 27.1 46.6 37.6 40.0
# 将timeseries数据转换成dataframe数据
dss = data.frame(year = as.numeric(time(sunspot.year)), ss = as.numeric(sunspot.year))
head(dss)
A data.frame: 6 × 2
yearss
<dbl><dbl>
1700 5
170111
170216
170323
170436
170558
ggplot(dss, aes(x = year, y = ss)) + 
geom_area()

在这里插入图片描述

精致优雅面积图
# color指定边框颜色,fill指定填充颜色,alpha指定透明度
ggplot(dss, aes(x = year, y = ss)) + 
geom_area(color = "green", fill = "green", alpha = 0.3)

在这里插入图片描述

正负分开堆积图
dss[100:200,]$ss = -dss[100:200,]$ss
dss$tag <- dss$ss >= 0
# rle用来捕获连续运行长度,比如
# dat <- c(1, 2, 2, 2, 3, 1, 4, 4, 1, 1)
# r <- rle(dat)
# r
#   lengths: int [1:6] 1 3 1 1 2 2
#   values : num [1:6] 1 2 3 1 4 1
cat.rle = rle(dss$tag)
# rep.int用来生成重复序列
# rep.int(1:5,2)
# 1 2 3 4 5 1 2 3 4 5
dss$group = rep.int(1:length(cat.rle$lengths), times=cat.rle$lengths)
# group的作用是把每一正负分段分为不同的组
ggplot(dss, aes(x = year, y = ss, fill = tag, group = group)) + 
geom_area(alpha = 0.3) + 
scale_fill_manual(values = c('green','red'), guide = FALSE)

在这里插入图片描述

# 如果仅使用fill自动分组会出现奇怪的图形,中间部分上方的红色是由于ggplot将tag中被FALSE分隔的两个TRUE看做同一group连接起来导致的
ggplot(dss, aes(x = year, y = ss, fill = tag)) + 
geom_area(alpha = 0.3) + 
scale_fill_manual(values = c('green','red'), guide = FALSE)

在这里插入图片描述

# 可以看到每一段连续的tag共享同一个group
dss[107:115,]
A data.frame: 9 × 4
yearsstaggroup
<dbl><dbl><lgl><int>
1071806-28.1FALSE2
1081807-10.1FALSE2
1091808 -8.1FALSE2
1101809 -2.5FALSE2
1111810 0.0 TRUE3
1121811 -1.4FALSE4
1131812 -5.0FALSE4
1141813-12.2FALSE4
1151814-13.9FALSE4

堆积图

堆积图可以理解为操作对象扩展至多个变量的面积图。

平平无奇堆积图
head(uspopage)
A data.frame: 6 × 3
YearAgeGroupThousands
<int><fct><int>
1900<5 9181
19005-14 16966
190015-2414951
190025-3412161
190035-44 9273
190045-54 6437
ggplot(uspopage, aes(x = Year, y = Thousands, fill = AgeGroup)) + 
geom_area(color = 'black', size = 0.2, alpha = 0.3) + 
scale_fill_brewer(palette = "YlOrRd")

在这里插入图片描述

顺序逆转堆积图
library(plyr)
ggplot(uspopage, aes(x = Year, y = Thousands, fill = factor(uspopage$AgeGroup, levels = rev(levels(uspopage$AgeGroup))))) + 
geom_area(color = 'black', size = 0.2, alpha = 0.3) + 
scale_fill_brewer(palette = "YlOrRd", breaks = rev(levels(uspopage$AgeGroup))) + 
labs(fill="AgeGroup")

在这里插入图片描述

去除竖线堆积图
ggplot(uspopage, aes(x = Year, y = Thousands, fill = AgeGroup)) + 
geom_area(alpha = 0.3) + 
scale_fill_brewer(palette = "YlOrRd") + 
geom_line(position = 'stack', color = 'black', size = 0.2)

在这里插入图片描述

# 折腾半天我感觉还是没线的最好看
ggplot(uspopage, aes(x = Year, y = Thousands, fill = AgeGroup)) + 
geom_area(alpha = 0.3) + 
scale_fill_brewer(palette = "YlOrRd")

在这里插入图片描述

百分比比堆积图
# 计算百分比,首先按年份分成多个独立的dataframe,在每个frame内部计算present
uspopage_prop <- ddply(uspopage, "Year", transform, Persent = Thousands/sum(Thousands)*100)
ggplot(uspopage_prop, aes(x = Year, y = Persent, fill = AgeGroup)) + 
geom_area(alpha = 0.3) + 
scale_fill_brewer(palette = "YlOrRd")

在这里插入图片描述