pyflink读取kafka数据写入mysql实例

本文展示了如何使用ApacheFlink进行Kafka数据流的读取和处理,以及将处理后的数据写入到MySQL数据库。代码实例包括从Kafka消费JSON数据,使用JsonRowSerializationSchema序列化,以及将数据写入到JDBC表中。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

依赖包下载

https://repo.maven.apache.org/maven2/org/apache/flink/flink-sql-connector-kafka/1.17.1/

版本

flink:1.16.0

kafka:2.13-3.2.0

实例

import logging
import sys

from pyflink.common import Types
from pyflink.datastream import StreamExecutionEnvironment
from pyflink.datastream.connectors.jdbc import JdbcSink, JdbcConnectionOptions
from pyflink.datastream.connectors.kafka import FlinkKafkaProducer, FlinkKafkaConsumer
from pyflink.datastream.formats.json import JsonRowSerializationSchema, JsonRowDeserializationSchema


def write_to_kafka(env):
    type_info = Types.ROW([Types.INT(), Types.STRING()])
    ds = env.from_collection(
        [(1, 'hi'), (2, 'hello'), (3, 'hi'), (4, 'hello'), (5, 'hi'), (6, 'hello'), (6, 'hello')],
        type_info=type_info)

    serialization_schema = JsonRowSerializationSchema.Builder() \
        .with_type_info(type_info) \
        .build()
    kafka_producer = FlinkKafkaProducer(
        topic='test_json_topic',
        serialization_schema=serialization_schema,
        producer_config={'security.protocol': 'SASL_PLAINTEXT', 'sasl.mechanism': 'PLAIN', 'bootstrap.servers': '192.168.1.110:9092', 'group.id': 'test-consumer-group', 'sasl.jaas.config': 'org.apache.kafka.common.security.scram.ScramLoginModule required username=\"aaaaaaaaa\" password=\"bbbbbbb\";'}
    )

    # note that the output type of ds must be RowTypeInfo
    ds.add_sink(kafka_producer)
    env.execute()

def read_from_kafka(env):
    deserialization_schema = JsonRowDeserializationSchema.Builder() \
        .type_info(Types.ROW([Types.INT(), Types.STRING()])) \
        .build()
    kafka_consumer = FlinkKafkaConsumer(
        topics='test_json_topic',
        deserialization_schema=deserialization_schema,
        properties={'security.protocol': 'SASL_PLAINTEXT', 'sasl.mechanism': 'PLAIN', 'bootstrap.servers': '192.168.1.110:9092', 'group.id': 'test-consumer-group', 'sasl.jaas.config': 'org.apache.kafka.common.security.scram.ScramLoginModule required username=\"aaaaa\" password=\"bbbbbb\";'}
    )
    kafka_consumer.set_start_from_earliest()

    env.add_source(kafka_consumer).print()
    env.execute()

def wirte_data_todb(env, data):
    type_info = Types.ROW([Types.INT(), Types.STRING()])
    env.from_collection(
        [(101, "Stream Processing with Apache Flink"),
        (102, "Streaming Systems"),
        (103, "Designing Data-Intensive Applications"),
        (104, "Kafka: The Definitive Guide")
        ], type_info=type_info) \
        .add_sink(
        JdbcSink.sink(
            "insert into flink (id, title) values (?, ?)",
            type_info,
            JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
                .with_url('jdbc:mysql://192.168.1.110:23006/test')
                .with_driver_name('com.mysql.jdbc.Driver')
                .with_user_name('sino')
                .with_password('Caib@sgcc-56')
                .build()
        ))

    env.execute()


if __name__ == '__main__':
    logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s")

    env = StreamExecutionEnvironment.get_execution_environment()
    #env.add_jars("file:///opt/flink/flink-sql-connector-kafka-1.15.0.jar")
    #env.add_jars("file:///opt/flink/kafka-clients-2.8.1.jar")
    #env.add_jars("file:///opt/flink/flink-connector-jdbc-1.16.0.jar")
    #env.add_jars("file:///opt/flink/mysql-connector-java-8.0.29.jar")
    

    print("start reading data from kafka")
    read_from_kafka(env)
    #wirte_data_todb(env, "")
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

墨痕诉清风

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值