文章目录
flink table & sql 基本API使用
flink版本:1.13.1
scala版本:2.12
1 maven 依赖引用
<properties>
<flink.version>1.13.1</flink.version>
<scala.version>2.12</scala.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- 实现自定义的数据格式来做序列化,可以引入下面的依赖 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>${flink.version}</version>
</dependency>
</dependencies>
2 API
2.1 创建表执行环境
## 方式一
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
EnvironmentSettings settings = EnvironmentSettings
.newInstance()
.inStreamingMode() // 使用流处理模式
.build();
TableEnvironment tableEnv = TableEnvironment.create(settings);
## 方式二 较为简单推荐
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
StreamExecutionEnvironment env =
StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
2.2 创建表
- 连接器表
# 语法
tableEnv.executeSql("CREATE [TEMPORARY] TABLE MyTable ... WITH ( 'connector' = ... )");
# 案例 创建输入输出表
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
String inputTale = "CREATE TABLE INPUT (" +
"user_name String," +
"url String," +
"count_num Bigint" +
") WITH (" +
" 'connector' = 'filesystem'," +
" 'path' = 'input/input.txt', " +
" 'format' = 'csv'" +
")";
String oupTable = "CREATE TABLE OUPUT (" +
"user_name String," +
"url String," +
"count_num Bigint" +
") WITH (" +
" 'connector' = 'filesystem'," +
" 'path' = 'ouput', " +
" 'format' = 'csv'" +
")";
tableEnv.executeSql(inputTale);
tableEnv.executeSql(oupTable);
Table result1 = tableEnv.sqlQuery("select user_name, url, count_num from INPUT where user_name = 'mary'");
result1.executeInsert("OUPUT");
- 虚拟表
# 语法1
Table newTable = tableEnv.sqlQuery("SELECT ... FROM MyTable... ");
# 语法2
tableEnv.createTemporaryView("tableName", table);
- 表归属目录Catalog与database
创建一个表默认ID:default_catalog.default_database.MyTable
手动指定
# 在执行环境中进行设置
tEnv.useCatalog("custom_catalog");
tEnv.useDatabase("custom_database");
2.3 表查询
- SQL API 查询
TableEnvironment tableEnv = ...;
// 创建表
tableEnv.executeSql("CREATE TABLE EventTable ... WITH ( 'connector' = ... )");
// 查询用户 Alice 的点击事件,并提取表中前两个字段
Table aliceVisitTable = tableEnv.sqlQuery(
"SELECT user, url " +
"FROM EventTable " +
"WHERE user = 'Alice' "
);
- table API 查询
Table eventTable = tableEnv.from("EventTable");
Table maryClickTable = eventTable
.where($("user").isEqual("Alice"))
.select($("url"), $("user"));
2.4 输出表
// 注册表,用于输出数据到外部系统
tableEnv.executeSql("CREATE TABLE OutputTable ... WITH ( 'connector' = ... )");
// 经过查询转换,得到结果表
Table result = ...
// 将结果表写入已注册的输出表中,多并行度输出产生多个文件
result.executeInsert("OutputTable");
2.5 表和流得相互转换
2.5.1 将表(Table)转换成流(DataStream)
Table aliceVisitTable = tableEnv.sqlQuery(
"SELECT user, url " +
"FROM EventTable " +
"WHERE user = 'Alice' "
);
// 将表转换成数据流 只支持+I类型
tableEnv.toDataStream(aliceVisitTable).print();
// 支持+I -U +U类型
tableEnv.toChangelogStream(aliceVisitTable).print();
// 支持+I
//tableEnv.toAppendStream(aliceVisitTable).print();
// 支持+I -U +U类型
//tableEnv.toRetractStream(aliceVisitTable).print();
2.5.2 将流(DataStream)转换成表(Table)
- 相关语法
StreamExecutionEnvironment.getExecutionEnvironment();
// 获取表环境
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
// 读取数据源
SingleOutputStreamOperator<Event> eventStream = env.addSource(...)
// 将数据流转换成表
Table eventTable = tableEnv.fromDataStream(eventStream);
// 提取 Event 中的 timestamp 和 url 作为表中的列
Table eventTable2 = tableEnv.fromDataStream(eventStream, $("timestamp"), $("url"));
// 将 timestamp 字段重命名为 ts
Table eventTable2 = tableEnv.fromDataStream(eventStream, $("timestamp").as("ts"), $("url"));
//调用 createTemporaryView()方法
tableEnv.createTemporaryView("EventTable", eventStream, $("timestamp").as("ts"),$("url"));
- 综合案例
import com.flink.dto.Event;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
public class TableToStreamExample {
public static void main(String[] args) throws Exception {
// 获取流环境
StreamExecutionEnvironment env =
StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
// 读取数据源
SingleOutputStreamOperator<Event> eventStream = env
.fromElements(
new Event("Alice", "./home", 1000L),
new Event("Bob", "./cart", 1000L),
new Event("Alice", "./prod?id=1", 5 * 1000L),
new Event("Cary", "./home", 60 * 1000L),
new Event("Bob", "./prod?id=3", 90 * 1000L),
new Event("Alice", "./prod?id=7", 105 * 1000L)
);
// 获取表环境
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
// 将数据流转换成表
tableEnv.createTemporaryView("EventTable", eventStream);
// 查询 Alice 的访问 url 列表
Table aliceVisitTable = tableEnv.sqlQuery("SELECT url, user FROM EventTable WHERE user = 'Alice'");
// 统计每个用户的点击次数
Table urlCountTable = tableEnv.sqlQuery("SELECT user, COUNT(url) FROM EventTable GROUP BY user");
// 将表转换成数据流,在控制台打印输出
tableEnv.toDataStream(aliceVisitTable).print("alice visit");
tableEnv.toChangelogStream(urlCountTable).print("count");
// 执行程序
env.execute();
}
}
输出结果
alice visit > +I[./home, Alice]
alice visit > +I[./prod?id=1, Alice]
alice visit > +I[./prod?id=7, Alice]
2.4 SQL开窗滚动查询案例
import com.flink.dto.Event;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import static org.apache.flink.table.api.Expressions.$;
public class AppendQueryExample {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
// 读取数据源,并分配时间戳、生成水位线
SingleOutputStreamOperator<Event> eventStream = env
.fromElements(
new Event("Alice", "./home", 1000L),
new Event("Bob", "./cart", 1000L),
new Event("Alice", "./prod?id=1", 25 * 60 * 1000L),
new Event("Alice", "./prod?id=4", 55 * 60 * 1000L),
new Event("Bob", "./prod?id=5", 3600 * 1000L + 60 * 1000L),
new Event("Cary", "./home", 3600 * 1000L + 30 * 60 * 1000L),
new Event("Cary", "./prod?id=7", 3600 * 1000L + 59 * 60 * 1000L)
).assignTimestampsAndWatermarks(
WatermarkStrategy.<Event>forMonotonousTimestamps()
.withTimestampAssigner(new SerializableTimestampAssigner<Event>() {
@Override
public long extractTimestamp(Event element, long
recordTimestamp) {
return element.getTimestamp();
}
})
);
// 创建表环境
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
// 将数据流转换成表,并指定时间属性
Table eventTable = tableEnv.fromDataStream(
eventStream,
$("user"),
$("url"),
$("timestamp").rowtime().as("ts")// 将 timestamp 指定为事件时间,并命名为 ts
);
// 为方便在 SQL 中引用,在环境中注册表 EventTable
tableEnv.createTemporaryView("EventTable", eventTable);
// 设置 1 小时滚动窗口,执行 SQL 统计查询
Table result = tableEnv
.sqlQuery(
"SELECT " +
"user, " +
"window_end AS endT, " + // 窗口结束时间
"COUNT(url) AS cnt " + // 统计 url 访问次数
"FROM TABLE( " +
"TUMBLE( TABLE EventTable, " + // 1 小时滚动窗口
"DESCRIPTOR(ts), " +
"INTERVAL '1' HOUR)) " +
"GROUP BY user, window_start, window_end " // window_start, window_end固定写法
);
tableEnv.toDataStream(result).print("result");
env.execute();
}
}
输出结果
result> +I[Alice, 1970-01-01T01:00, 3]
result> +I[Bob, 1970-01-01T01:00, 1]
result> +I[Bob, 1970-01-01T02:00, 1]
result> +I[Cary, 1970-01-01T02:00, 2]