python并发向mysql插入数据

本文介绍了一种使用Python并发地向MySQL数据库插入大量数据的方法。通过多线程技术和连接池优化了数据插入效率,并实现了10万到500万条记录的快速导入。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

python并发向mysql插入数据

前言

本次代码主要在mysql数据库中实现,并发插入10w,100w,500w行数据

环境

环境描述
系统环境ubuntu20.04
数据库mysql8.0.25
python3.9.5

数据表

create table t_normal (col1 int primary key auto_increment, col2 bigint,col3 timestamp )engine=innodb ;
create table t_partition (col1 int primary key auto_increment, col2 bigint, col3 timestamp)engine=innodb partition by hash(col1) partitions 4;
create table t_global (col1 int primary key auto_increment, col2 bigint, col3 timestamp)engine=innodb;

构建数据

10w.txt/100w.txt/500w.txt
------------------------------------------------------
10138000,2021-06-24 06:10:41
3602000,2021-06-24 06:10:41
25705000,2021-06-24 06:10:41
11145000,2021-06-24 06:10:41
17882000,2021-06-24 06:10:41
10666000,2021-06-24 06:10:41
31532000,2021-06-24 06:10:41
7480000,2021-06-24 06:10:41

实现步骤

step1: 连接数据库
def db_cnnection(self) :
    conn = config (sys.argv[1])
    host = conn.configp.get ('sqlnode1', 'host')
    port = int (conn.configp.get ('sqlnode1', 'port'))
    user = conn.configp.get ('sqlnode1', 'user')
    password = conn.configp.get ('sqlnode1', 'password')
    database = conn.configp.get ('sqlnode1', 'database')
    maxconnections = 15

    pool = PooledDB (
        pymysql,
        maxconnections,
        host=host,
        user=user,
        port=port,
        password=password,
        database=database
    )
    return pool

上步骤主要依靠python的configparser模块,实现对ini文件的解析.将configparser模块中的get方法改写为

import configparser

class config (object) :
    def __init__(self, path):
        self.fpath = path
        self.configp = configparser.ConfigParser ()
        self.configp.read (self.fpath,encoding="UTF-8")
    # 通过key获取value
    def get(self, option, value) :
        return self.configp.get (option, value)

config.ini文件

; define host
[hosts]
sqlnode1=172.21.0.3
sqlnode2=172.21.0.4
sqlnode3=172.21.0.5
; define sqlnode
[sqlnode1]
host=172.21.0.3
port=3306
user=greatdb
step2: 获取数据,将数据转换为元组的格式
def getData(self) :
    with open (sys.argv[2], 'r') as f :
        data = [] 
        for file in f.readlines () :
            file = file.replace ('\n', '')
            file = tuple (file.split (','))
            data.append (file)
   return data
# data格式: (xx,xx),(xx,xx)
    
step3: 插入数据
def mysql_insert(self, *args) :
    con = self.pool.connection ()
    cur = con.cursor ()
    for table in table_list :
        sql = "INSERT INTO test.{}(col2,col3) VALUES(%s, %s)".format (table)
        try :
            cur.executemany (sql, args) 
            con.commit ()
            except Exception as e :
                con.rollback ()
                logger.error ("SQL FAIL: ==>> {},result: ==>> {}".format (sql, e))
    con.close ()
    cur.close ()

executemany: 为批量执行,效果要比execute快

step4: 删除数据

当多次执行此程序时需要将数据清理后再导入!

def delete_data(self) :
    st = time.time ()
    con = self.pool.connection ()
    cur = con.cursor ()
    for table in table_list :
        sql = "TRUNCATE TABLE test.{}".format (table)
        cur.execute (sql)
        # con.commit ()
    cur.close ()
    con.close ()
   logger.info ("Clean Data .time cost: {} seconds".format (round (time.time () - st, 3)))
step5: 设置并发量

在此环境中,最大连接数为15(maxconnections),最大并发量为10(Queue(maxsize=10)),切记最大并量不能超过最大连接数!

def task(self) :
    q = Queue (maxsize=10)
    while self.data :
        content = self.data.pop ()
        t = threading.Thread (target=self.mysql_insert, args=(content,))
        q.put (t)
        if (q.full () == True) or (len (self.data)) == 0 :
            thread_list = []
            while q.empty () == False :
                t = q.get ()
                thread_list.append (t)
                t.start ()
                for t in thread_list :
                    t.join ()
                    
代码总体展示:
import pymysql
import threading
import re, sys
import time
from override_parser import config
from queue import Queue
from DBUtils.PooledDB import PooledDB
from general import logger
from ColumnOperation import ColumnOperation

class ThreadInsert (object) :
    def __init__(self) :
        start_time = time.time ()
        self.pool = self.db_cnnection ()
        self.data = self.getData ()
        self.delete_data ()
        self.task ()
        logger.info ("Concurrent insert data success,time cost: {} seconds".format (round (time.time() - start_time), 5))
    def db_cnnection(self) :
        conn = config (sys.argv[1])
        host = conn.configp.get ('sqlnode1', 'host')
        port = int (conn.configp.get ('sqlnode1', 'port'))
        user = conn.configp.get ('sqlnode1', 'user')
        password = conn.configp.get ('sqlnode1', 'password')
        database = conn.configp.get ('sqlnode1', 'database')
        maxconnections = 15

        pool = PooledDB (
            pymysql,
            maxconnections,
            host=host,
            user=user,
            port=port,
            password=password,
            database=database
        )
        return pool

    def getData(self) :
        with open (sys.argv[2], 'r') as f :
            data = []  # 将数据转换为元组的形式
            for file in f.readlines () :
                file = file.replace ('\n', '')
                file = tuple (file.split (','))
                data.append (file)
        return data

    def delete_data(self) :
        st = time.time ()
        con = self.pool.connection ()
        cur = con.cursor ()
        for table in table_list :
            sql = "TRUNCATE TABLE test.{}".format (table)
            cur.execute (sql)
            # con.commit ()
        cur.close ()
        con.close ()
        logger.info ("Clean Data .time cost: {} seconds".format (round (time.time () - st, 3)))

    def mysql_insert(self, *args) :
        con = self.pool.connection ()
        cur = con.cursor ()
        for table in table_list :
            sql = "INSERT INTO test.{}(col2,col3) VALUES(%s, %s)".format (table)
            try :
                cur.executemany (sql, args)
                con.commit ()
            except Exception as e :
                con.rollback ()
                logger.error ("SQL FAIL: ==>> {},result: ==>> {}".format (sql, e))
        con.close ()
        cur.close ()

    def task(self) :
        q = Queue (maxsize=10)  # 设定最大队列数和线程数
        while self.data :
            content = self.data.pop ()
            t = threading.Thread (target=self.mysql_insert, args=(content,))
            q.put (t)
            if (q.full () == True) or (len (self.data)) == 0 :
                thread_list = []
                while q.empty () == False :
                    t = q.get ()
                    thread_list.append (t)
                    t.start ()
                for t in thread_list :
                    t.join ()


if __name__ == '__main__' :
    table_list = ["t_global", "t_normal", "t_partition"] # 并发插入的表名
    ThreadInsert()
执行
python3 ThreadInsert.py config.ini 10w.txt
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值