flink table & sql 基本API使用

本文档详细介绍了Apache Flink 1.13.1版本中Table & SQL API的使用,包括环境创建、表的创建与查询、流与表的转换以及SQL开窗滚动查询案例。通过实例展示了如何进行数据输入输出、表与流之间的相互转换,以及如何进行时间窗口的SQL查询,帮助读者理解Flink的流处理能力。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

flink table & sql 基本API使用

flink版本:1.13.1
scala版本:2.12

1 maven 依赖引用

    <properties>
        <flink.version>1.13.1</flink.version>
        <scala.version>2.12</scala.version>
    </properties>
    
    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_${scala.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-java-bridge_${scala.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <!-- 实现自定义的数据格式来做序列化,可以引入下面的依赖 -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-common</artifactId>
            <version>${flink.version}</version>
        </dependency> 
    </dependencies>

2 API

2.1 创建表执行环境

## 方式一
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;

EnvironmentSettings settings = EnvironmentSettings
 .newInstance()
 .inStreamingMode() // 使用流处理模式
 .build();
TableEnvironment tableEnv = TableEnvironment.create(settings);

## 方式二 较为简单推荐
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
StreamExecutionEnvironment env = 
StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

2.2 创建表

  1. 连接器表
# 语法
tableEnv.executeSql("CREATE [TEMPORARY] TABLE MyTable ... WITH ( 'connector' = ... )");
# 案例 创建输入输出表
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        String inputTale = "CREATE TABLE INPUT (" +
                "user_name String," +
                "url String," +
                "count_num Bigint" +
                ") WITH (" +
                " 'connector' = 'filesystem'," +
                " 'path' = 'input/input.txt', " +
                " 'format' = 'csv'" +
                ")";

        String oupTable = "CREATE TABLE OUPUT (" +
                "user_name String," +
                "url String," +
                "count_num Bigint" +
                ") WITH (" +
                " 'connector' = 'filesystem'," +
                " 'path' = 'ouput', " +
                " 'format' = 'csv'" +
                ")";

        tableEnv.executeSql(inputTale);
        tableEnv.executeSql(oupTable);
        Table result1 = tableEnv.sqlQuery("select user_name, url, count_num from INPUT where user_name = 'mary'");
        result1.executeInsert("OUPUT");

  1. 虚拟表
# 语法1
Table newTable = tableEnv.sqlQuery("SELECT ... FROM MyTable... ");
# 语法2 
tableEnv.createTemporaryView("tableName", table);
  1. 表归属目录Catalog与database
    创建一个表默认ID:default_catalog.default_database.MyTable
    手动指定
# 在执行环境中进行设置
tEnv.useCatalog("custom_catalog");
tEnv.useDatabase("custom_database");

2.3 表查询

  1. SQL API 查询
TableEnvironment tableEnv = ...; 
// 创建表
tableEnv.executeSql("CREATE TABLE EventTable ... WITH ( 'connector' = ... )");
// 查询用户 Alice 的点击事件,并提取表中前两个字段
Table aliceVisitTable = tableEnv.sqlQuery(
 "SELECT user, url " +
 "FROM EventTable " +
 "WHERE user = 'Alice' "
 );
  1. table API 查询
Table eventTable = tableEnv.from("EventTable");
Table maryClickTable = eventTable
 .where($("user").isEqual("Alice"))
 .select($("url"), $("user"));

2.4 输出表

// 注册表,用于输出数据到外部系统
tableEnv.executeSql("CREATE TABLE OutputTable ... WITH ( 'connector' = ... )");
// 经过查询转换,得到结果表
Table result = ...
// 将结果表写入已注册的输出表中,多并行度输出产生多个文件
result.executeInsert("OutputTable");

2.5 表和流得相互转换

2.5.1 将表(Table)转换成流(DataStream)
Table aliceVisitTable = tableEnv.sqlQuery(
 "SELECT user, url " +
 "FROM EventTable " +
 "WHERE user = 'Alice' "
 );
// 将表转换成数据流 只支持+I类型
tableEnv.toDataStream(aliceVisitTable).print();
// 支持+I -U +U类型
tableEnv.toChangelogStream(aliceVisitTable).print();
// 支持+I
//tableEnv.toAppendStream(aliceVisitTable).print();
// 支持+I -U +U类型
//tableEnv.toRetractStream(aliceVisitTable).print();
2.5.2 将流(DataStream)转换成表(Table)
  1. 相关语法
StreamExecutionEnvironment.getExecutionEnvironment();
// 获取表环境
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
// 读取数据源
SingleOutputStreamOperator<Event> eventStream = env.addSource(...)
// 将数据流转换成表
Table eventTable = tableEnv.fromDataStream(eventStream);
// 提取 Event 中的 timestamp 和 url 作为表中的列
Table eventTable2 = tableEnv.fromDataStream(eventStream, $("timestamp"), $("url"));
// 将 timestamp 字段重命名为 ts
Table eventTable2 = tableEnv.fromDataStream(eventStream, $("timestamp").as("ts"), $("url"));
//调用 createTemporaryView()方法
tableEnv.createTemporaryView("EventTable", eventStream, $("timestamp").as("ts"),$("url"));
  1. 综合案例
import com.flink.dto.Event;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

public class TableToStreamExample {
    public static void main(String[] args) throws Exception {

        // 获取流环境
        StreamExecutionEnvironment env =
                StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        // 读取数据源
        SingleOutputStreamOperator<Event> eventStream = env
                .fromElements(
                        new Event("Alice", "./home", 1000L),
                        new Event("Bob", "./cart", 1000L),
                        new Event("Alice", "./prod?id=1", 5 * 1000L),
                        new Event("Cary", "./home", 60 * 1000L),
                        new Event("Bob", "./prod?id=3", 90 * 1000L),
                        new Event("Alice", "./prod?id=7", 105 * 1000L)
                );
        // 获取表环境
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
        // 将数据流转换成表
        tableEnv.createTemporaryView("EventTable", eventStream);
        // 查询 Alice 的访问 url 列表
        Table aliceVisitTable = tableEnv.sqlQuery("SELECT url, user FROM EventTable WHERE user = 'Alice'");

        // 统计每个用户的点击次数
        Table urlCountTable = tableEnv.sqlQuery("SELECT user, COUNT(url) FROM EventTable GROUP BY user");
        // 将表转换成数据流,在控制台打印输出
        tableEnv.toDataStream(aliceVisitTable).print("alice visit");
        tableEnv.toChangelogStream(urlCountTable).print("count");

        // 执行程序
        env.execute();
    }
}

输出结果

alice visit > +I[./home, Alice]
alice visit > +I[./prod?id=1, Alice]
alice visit > +I[./prod?id=7, Alice]

2.4 SQL开窗滚动查询案例

import com.flink.dto.Event;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

import static org.apache.flink.table.api.Expressions.$;

public class AppendQueryExample {

    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        // 读取数据源,并分配时间戳、生成水位线
        SingleOutputStreamOperator<Event> eventStream = env
                .fromElements(
                        new Event("Alice", "./home", 1000L),
                        new Event("Bob", "./cart", 1000L),
                        new Event("Alice", "./prod?id=1", 25 * 60 * 1000L),
                        new Event("Alice", "./prod?id=4", 55 * 60 * 1000L),
                        new Event("Bob", "./prod?id=5", 3600 * 1000L + 60 * 1000L),
                        new Event("Cary", "./home", 3600 * 1000L + 30 * 60 * 1000L),
                        new Event("Cary", "./prod?id=7", 3600 * 1000L + 59 * 60 * 1000L)
                ).assignTimestampsAndWatermarks(
                        WatermarkStrategy.<Event>forMonotonousTimestamps()
                                .withTimestampAssigner(new SerializableTimestampAssigner<Event>() {
                                    @Override
                                    public long extractTimestamp(Event element, long
                                            recordTimestamp) {
                                        return element.getTimestamp();
                                    }
                                })
                );
        // 创建表环境
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
        // 将数据流转换成表,并指定时间属性
        Table eventTable = tableEnv.fromDataStream(
                eventStream,
                $("user"),
                $("url"),
                $("timestamp").rowtime().as("ts")// 将 timestamp 指定为事件时间,并命名为 ts
        );
        // 为方便在 SQL 中引用,在环境中注册表 EventTable
        tableEnv.createTemporaryView("EventTable", eventTable);
        // 设置 1 小时滚动窗口,执行 SQL 统计查询
        Table result = tableEnv
                .sqlQuery(
                        "SELECT " +
                                "user, " +
                                "window_end AS endT, " + // 窗口结束时间
                                "COUNT(url) AS cnt " + // 统计 url 访问次数
                                "FROM TABLE( " +
                                "TUMBLE( TABLE EventTable, " + // 1 小时滚动窗口
                                "DESCRIPTOR(ts), " +
                                "INTERVAL '1' HOUR)) " +
                                "GROUP BY user, window_start, window_end " // window_start, window_end固定写法
                );
        tableEnv.toDataStream(result).print("result");
        env.execute();
    }
}

输出结果

result> +I[Alice, 1970-01-01T01:00, 3]
result> +I[Bob, 1970-01-01T01:00, 1]
result> +I[Bob, 1970-01-01T02:00, 1]
result> +I[Cary, 1970-01-01T02:00, 2]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值