python常用的编程模块_python maxminscaler-CSDN博客

本文链接：https://blog.csdn.net/Blackoutdragon/article/details/116714925

本文介绍了文件流的基本操作，包括读取和写入文本文件的方法，并演示了如何使用Python处理文件数据，例如删除字符串中的特定字符、数据归一化等。此外，还介绍了批量修改文件夹内文件名及对Excel文件进行操作的技术。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

文章目录

文件流的读写

读取保存数据为数组的txt文件

使用try进行异常发现，使用while检测文件末尾进行读取

file_to_read = raw_input("Enter file name of tests (empty string to end program):")
try:
    infile = open(file_to_read, 'r')
    while file_to_read != " ":
        file_to_write = raw_input("Enter output file name (.csv will be appended to it):")
        file_to_write = file_to_write + ".csv"
        outfile = open(file_to_write, "w")
        readings = (infile.readline())
        print readings
        while readings != 0:
            global count
            readings = int(readings)
            minimum = (infile.readline())
            maximum = (infile.readline())

使用for遍历读取的每一行，进行一次性的读取和输入

下面调用的程序读取的数据是
在这里插入图片描述

 result = list()
    with open('../test/parameter.txt') as  f:
        for line in f.readlines():
            temp = list()
            # 逐个遍历对应每一行元素，将之转为对应的数据
            b = line.strip(",][").split(',')
            if(len(b) >= 5):
                b.pop()
            for a in b:
                a = a.replace('[','').replace(']','')
                temp.append(float(a))
            result.append(temp)
            #print("中途打印的temp是",temp)
            #print("加入到result中的结果是",result)

删除str中的特定字符

删除字符串首尾的多余字符串strip()

# 删除字符串中多余字符
def string_remove():
   str1 = ' abc     \n'
   print str1.strip()   # abc

   str2 = '----abcdf++++'
   print str2.strip('-+')  # abcdf

replace函数，删除字符串中某一个所有的字符串

ss = 'old old string'
ret = ss.replace('old', 'new', 1)
print(ret)

sub函数，同时删除多个字符串，这里使用了正则表达式

str2 = '\nabc\nwrt22\t666\t'  # 删除字符串中的所有\n,\t
import re
print(re.sub('[\n\t]','',str2))   # abcwrt22666

数据的归一化

min-max离差归一化标准化

min-max为离差标准化，对原始数据的线性变换，将结果映射到[0,1]之间。主要是根据输入数据的最大值和最小值进行定义的

from sklearn import preprocessing
import numpy as np

X = np.array([[ 1., -1., 2.],[ 2., 0., 0.],[ 0., 1., -1.]])
min_max_scaler = preprocessing.MinMaxScaler()
X_minMax = min_max_scaler.fit_transform(X)

Z-score标准化

求出原始数据的均值和标准差，进行数据得标准化。标准化之后的数据符合标准正态分布，均值为0，标准差为1

import numpy as np

arr = np.asarray([0, 10, 50, 80, 100])
for x in arr:
x = float(x - arr.mean())/arr.std()
print x

重命名并删改文件夹下的所有文件

import os
import os.path

'''
    描述：打开path路径对应的文件，全部改为txt文件并重命名
    参数：path是的文件夹的绝对路径
         senNumber是每篇你要写入的句子数
    返回：无返回值
'''
def HandleArticle(path,senNumber):
    # parent是当前文件的父目录
    # dirnames是当前文件夹下的所有文件夹的名字
    # filenames是当前文件夹下所有文件的名字
    for parent,dirnames,filenames in os.walk(path):

        # count是命名的习惯
        count = 1

        # filename是文件夹下的所有文件的名字
        for filename in filenames:
            filePath = path+"\\"+filename
            with open(filePath) as f:
                # targetPath是每篇文件命名的路径和名称，形如1.txt
                targetPath = path + "\\"+str(count)+".txt"
                with open(targetPath,"w") as w:
                    # 写入文件的句子的计数器
                    senTimes = 0
                    for line in f.readlines():
                        # 取出开头的网址等说明
                        if(line[0] != "-" and line[0] != "\n" and len(line) > 0):
                            senList = line.split(". ")
                            # print(senList)
                            for sentence in senList:
                                # 如果是特殊符号，就不写进去
                                if(len(sentence) > 3 and senTimes <= senNumber):
                                    w.write(sentence)
                                    w.write(". ")
                                    senTimes += 1
                count = count +1



path = r"C:\Users\gray\Desktop\lys"
HandleArticle(path,5)

操作excel的操作

根据字典，给每个用户增加额外信息

import pandas as pd
from pandas import DataFrame


# 打开对应的文件，这个文件保存了所有人的身份号码的和姓名，将之转换为字典
numDict = dict()
with open("./source.txt",encoding='utf-8',errors='ignore') as f:
     for line in f.readlines():
          line = line.split()
          numDict.update({line[0]:line[1]})

# 打开需要的填写的文件excel文件
target = pd.read_excel('.//目标值.xlsx')

# 遍历需要填写的姓名，然后找到对应的字典中的身份证号，然后将之天道表中
for i in target['姓名']:
     target['公民身份号码'][target['姓名'] == i] = numDict[i]
print(target)

# 修改之后的数据项
print("start to save the data")
DataFrame(target).to_excel('.//目标值.xlsx',sheet_name='Sheet1',
                           index = False,header=True)