python 多进程

from multiprocessing import Pool
from tqdm import tqdm
import pandas as pd

def mainmain(num):
    return [num, num + 1, num / 2]


num_list = list(range(1, 101))
# 使用 multiprocessing 的 Pool 来并行处理
with Pool(processes=15) as pool:
    results = pool.map(mainmain, tqdm(num_list, desc="Processing"))
# 将结果转化为 DataFrame
df = pd.DataFrame(results, columns=['num', 'num_plus_1', 'num_half'])
# 打印 DataFrame
print(df)
def mainmain(file):
    try:
        xlsx_file = file.replace("pdf","xlsx")
        if not os.path.exists(xlsx_file):
            df = pdf2csv(file)
            df.to_excel(xlsx_file,index=False)
    except Exception as e:
        # print(file, e)
        with open('error_log.txt', 'a') as f:  # 'a' 代表追加模式,如果文件不存在则创建
            f.write(f"{file}\t{e}\n")
            # traceback.print_exc(file=f)  # 将完整的错误堆栈信息写入文件
            # f.write("\n")  # 可选:在日志后添加一个换行

if __name__ == "__main__":
    pdf_path = './data/'
    file_list = [os.path.join(pdf_path, f) for f in os.listdir(pdf_path) if os.path.isfile(os.path.join(pdf_path, f))]
    with Pool(processes=15) as pool:
        results = pool.map(mainmain, file_list)

解码pdf

def try_decrypt_password(pdf_path, password):
    # 尝试密码解密并返回解密成功与否以及密码
    with open(pdf_path, 'rb') as file:
        reader = PdfReader(file)
        if reader.decrypt(password):
            return password  # 返回解密成功的密码
    return None  # 解密失败


def pdf_decrypt(pdf_path):
    # 创建一个密码范围,范围从 000000 到 999999
    passwords = [str(i).zfill(6) for i in range(1000000)]
    # 使用共享变量来通知所有进程停止
    found_password = None

    # 使用进程池来并行化密码尝试过程
    with ProcessPoolExecutor(max_workers=20) as executor:
        # 并行执行密码尝试
        # results = list(tqdm(executor.map(try_decrypt_password, passwords), total=len(passwords)))
        futures = {executor.submit(try_decrypt_password, pdf_path, password): password for password in passwords}
        try:
            for future in tqdm(as_completed(futures), total=len(passwords)):
                result = future.result()
                if result is not None:
                    found_password = result
                    print(f"解密成功的密码是: {result}")
                    for f in futures:
                        f.cancel()
                    break
        except KeyboardInterrupt:
            print("\n用户中断了进程")
            for f in futures:
                f.cancel()
    return found_password if found_password is not None else "没有找到有效密码"

pdf_path = "xxx.pdf"
password = pdf_decrypt(pdf_path)
print(password)
with pdfplumber.open(pdf_path, password=password) as pdf:
    first_page = pdf.pages[0]
    text = first_page.extract_text()
    print(text)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值