窗口函数
一. 什么是窗口函数
基本含义
窗口限定一个范围,它可以理解为满足某些条件的记录集合,窗口函数也就是在窗口范围内执行的函数
基本语法
窗口函数有over关键字,指定函数执行的范围,可分为三部分:分组子句(partition by), 排序子句(order by), 窗口子句(rows)
<函数名> over (partition by <分组的列> order by <排序的列> rows between <起始行> and <终止行>)
注意:Mysql8才支持窗口函数
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-px9leBH9-1680593139140)(file:///Users/zhangyi/Library/Application%20Support/marktext/images/2023-03-22-11-56-43-image.png?msec=1680075248844)]
建表SQL语句
CREATE TABLE student(
cid varchar(4),
sname varchar(4),
score int
);
INSERT INTO student (cid, sname, score)
VALUES
('001', '张三', 78),
('001', '李四', 82),
('002', '小明', 90),
('001', '王五', 67),
('002', '小红', 85),
('002', '小刚', 62)
二. 窗口的确定
select *
sum(score)
over (partition by cid order by score rows between unbounded preceding and unbounded following)
as '班级总分'
from student;
分组子句(partition by)
不分组可以写成partition by null 或者直接不写
后面可以跟多个列,如 partition by cid, sname
注意 partition by与group by的区别
-
前者不会压缩行数但是后者会
-
后者只能选取分组的列和聚合的列
也就是说 group by后生成的结果集与原表的行数和列数都不同
排序子句(order by)
不排序可以写成order by null 或者直接不写
asc或不写代表升序,desc表示降序
后面可以跟多个列,如 order by cid, sname
窗口子句(rows)
窗口子句的描述
-
起始行:N preceding / unbounded preceding
-
当前行: current row
-
终止行: N following / unbounded following
举例:
rows between unbounded preceding and current row 从之前所有的行到当前行
rows between 2 preceding and current row 从前面两行到当前行
rows between current row and unbounded following 从当前行到之后所有的行
rows between current row and 1 following 从当前行到后面一行
注意:
排序子句后面缺少窗口子句,窗口规范默认是 rows between unbounded preceding and current row
排序子句和窗口子句都缺失,窗口规范默认是 rows between unbounded preceding and unbounded following
执行流程
-
通过partition by 和 order by 子句确定大窗口 (定义出上界 unbounded preceding 和下界 unbounded following)
-
通过row子句针对每一行数据确定小窗口(滑动窗口)
-
对每行的小窗口内的数据执行函数并生成新的列
三. 函数分类
排序类
rank, dense_rank, row_number
---- 按班级分组后打上序号 不考虑并列
select *, row_number() over (partition by cid order by score desc) as '不可并列排名' from student;
-- 按班级分组后作跳跃排名 考虑并列
select *, rank() over (partition by cid order by score desc ) as '跳跃🉑️排名' from student;
-- 按班级分组后作连续排名 考虑并列
select *, dense_rank() over (partition by cid order by score desc ) as '连续🉑️并列排名' from student;
-- 合并起来对比
select *, row_number() over (partition by cid order by score desc ) as '不可并列排名',
rank() over (partition by cid order by score desc ) as '跳跃可并列排名',
dense_rank() over (partition by cid order by score desc ) as '连续可并列排名'
from student; [排序类]
聚合类
sum, avg, count, max, min
# 聚合类
-- 容同一班级每个学生都知道班级总分是多少
select *, sum(score) over(partition by cid) as '班级总分' from student;
-- 计算同一班级,每个同学和比他分数低的同学的累计总分是多少
select *, sum(score) over(partition by cid order by score) as '累加分数' from student;
跨行类
lag, lead
# [跨行类]
-- lag/lead 函数 参数1:比较的列;参数2:偏移量;参数3:找不到的默认值
-- 同一班级内,成绩比自己低一名的分数是多少
select *, lag(score, 1) over (partition by cid order by score) as '低一名的分数' from student;
-- 有默认值的写法
select *, lag(score, 1, 0) over (partition by cid order by score) as '低一名的分数' from student;
-- 同一班级内,成绩比自己高2名的分数是多少
select *, lead(score, 2, 0) over (partition by cid order by score) as '高两名的分数' from student;
相关题目
表格
001,张三,语文,78
002,小刚,语文,71
001,李四,数学,56
001,王五,数学,97
002,小明,数学,54
002,小刚,数学,67
002,小红,数学,82
001,王五,语文,80
001,张三,数学,77
002,小明,语文,58
002,小红,语文,87
001,李四,语文,60
001,张三,英语,66
002,小刚,英语,50
001,李四,地理,59
001,王五,地理,88
002,小明,地理,45
002,小刚,地理,66
002,小红,地理,82
001,王五,英语,81
001,张三,地理,77
002,小明,英语,55
002,小红,英语,87
001,李四,英语,61
脚本
# 创建表格
create table transcript (
cid varchar(4),
sname varchar(4),
course varchar(10),
score int
);
insert into transcript (cid, sname, course, score) values ('001', '张三', '语文', 78);
insert into transcript (cid, sname, course, score) values ('002', '小刚', '语文', 71);
insert into transcript (cid, sname, course, score) values ('001', '李四', '数学', 56);
insert into transcript (cid, sname, course, score) values ('001', '王五', '数学', 97);
insert into transcript (cid, sname, course, score) values ('002', '小明', '数学', 54);
insert into transcript (cid, sname, course, score) values ('002', '小刚', '数学', 67);
insert into transcript (cid, sname, course, score) values ('002', '小红', '数学', 82);
insert into transcript (cid, sname, course, score) values ('001', '王五', '语文', 80);
insert into transcript (cid, sname, course, score) values ('001', '张三', '数学', 77);
insert into transcript (cid, sname, course, score) values ('002', '小明', '语文', 58);
insert into transcript (cid, sname, course, score) values ('002', '小红', '语文', 87);
insert into transcript (cid, sname, course, score) values ('001', '李四', '语文', 60);
insert into transcript (cid, sname, course, score) values ('001', '张三', '英语', 66);
insert into transcript (cid, sname, course, score) values ('002', '小刚', '英语', 50);
insert into transcript (cid, sname, course, score) values ('001', '李四', '地理', 59);
insert into transcript (cid, sname, course, score) values ('001', '王五', '地理', 88);
insert into transcript (cid, sname, course, score) values ('002', '小明', '地理', 45);
insert into transcript (cid, sname, course, score) values ('002', '小刚', '地理', 66);
insert into transcript (cid, sname, course, score) values ('002', '小红', '地理', 82);
insert into transcript (cid, sname, course, score) values ('001', '王五', '英语', 81);
insert into transcript (cid, sname, course, score) values ('001', '张三', '地理', 77);
insert into transcript (cid, sname, course, score) values ('002', '小明', '英语', 55);
insert into transcript (cid, sname, course, score) values ('002', '小红', '英语', 87<