1、全展开数组
from collections.abc import *
def flatten(lst, out_lst=None):
if out_lst is None:
out_lst = [] # 创建列表
for i in lst:
if isinstance(i, Iterable): # 判断i是否可迭代
flatten(i, out_lst) # 尾数递归
else:
out_lst.append(i) # 产生结果
return out_lst
print(flatten([[1,2,3],[4,5]]))
print(flatten([[1,2,3],[4,5]], [6,7]))
print(flatten([[[1,2,3],[4,5,6]]]))
2、等分列表
from math import ceil
def divide(lst, size):
if size <= 0: # SIZE分成几份
return [lst]
return [lst[i * size:(i+1)*size] for i in range(0, ceil(len(lst) / size))]
r = divide([1, 3, 5, 7, 9, 5, 8, 7, 15, 54], 3)
print(r) # [[1, 3, 5], [7, 9, 5], [8, 7, 15], [54]]
r = divide([1, 3, 5, 7, 9], 0)
print(r) # [[1, 3, 5, 7, 9]]
r = divide([1, 3, 5, 7, 9], -3)
print(r) # [[1, 3, 5, 7, 9]]
3、列表压缩
def filter_false(lst):
return list(filter(bool, lst))
r = filter_false([None, 0, False, '', [], 'ok', [1, 2]])
print(r) # ['ok', [1, 2]]
4、更长列表
def max_length(*lst):
return max(*lst, key=lambda v: len(v))
r = max_length([1, 2, 3], [4, 5, 6, 7], [8])
print(f'更长的列表是{r}') # [4, 5, 6, 7]
r = max_length([1, 2, 3], [4, 7, 5, 6, 7], [8, 9])
print(f'更长的列表是{r}') # [4, 7, 5, 6, 7]
5、多表之最
def max_lists(*lst):
return max(max(*lst, key=lambda v: max(v)))
# 表中最大的值
r = max_lists([1, 2, 3], [6, 7, 8], [4, 5])
print(r) # 8
6、列表查重
def has_duplicates(lst):
return len(lst) == len(set(lst))
x = [1, 1, 2, 2, 3, 2, 3, 4, 5, 6]
print(x)
7、浮点数等差数列
def rang(start, stop, n): # 列表初始值、结束值 公有n个元素
start, stop, n = float('%.2f' % start), float('%.2f' % stop),int('%.d' % n)
step = (stop-start) / n
lst = [start]
while n > 0:
start,n = start + step, n-1
lst.append(round((start), 2))
return lst
rang(1, 8, 5) # [1.0, 2.4, 3.8, 5.2, 6.6, 8.0]
8、按条件分组
def bif_by(lst, f):
return [ [x for x in lst if f(x)],[x for x in lst if not f(x)]]
records = [25,22,45,39,31,34]
bif_by(records, lambda x: x<35) # [[25, 22, 31, 34], [45, 39]]
9、map实现向量运算
# 多序列运算函数—map(function,iterabel,iterable2)
lst1=[1,2,3,4,5,6]
lst2=[3,4,5,6,3,2]
list(map(lambda x,y:x*y+1,lst1,lst2))
# [4, 9, 16, 25, 16, 13]
10、值最大的字典
def max_pairs(dic):
if len(dic) == 0:
return dic
max_val = max(map(lambda v: v[1], dic.items()))
return [item for item in dic.items() if item[1] == max_val]
r = max_pairs({'a': -10, 'b': 6, 'c': 3, 'd': 5})
print(r) # [('b', 6)]
11、合并两个字典
def merge_dict(dic1, dic2):
return {**dic1, **dic2}
merge_dict({'a': 1, 'b': 2}, {'c': 3})
# {'a': 1, 'b': 2, 'c': 3}
12、 topn字典
from heapq import nlargest
# 返回字典d前n个最大值对应的键
def topn_dict(d, n):
return nlargest(n, d, key=lambda k: d[k])
a = topn_dict({'a': 10, 'b': 8, 'c': 9, 'd': 10}, 2)
print(a) # ['a', 'd']
13、 命名元组提高可读性
from collections import namedtuple
Point = namedtuple('Point', ['x', 'y', 'z']) # 定义名字为Point的元祖,字段属性有x,y,z
lst = [Point(1.5, 2, 3.0), Point(-0.3, -1.0, 2.1), Point(1.3, 2.8, -2.5)]
print(lst[0].y - lst[1].y) # 3.0
14、样本抽样
from random import randint,sample
lst = [randint(0,50) for _ in range(100)]
print(lst[:5])
lst_sample = sample(lst,5) # 样本抽样从100个样本中随机抽样n个。
print(lst_sample)
15、 shuffle重洗数据集
得注意 shuffle 是对lst就地(in place)洗牌,节省存储空间。
from random import shuffle
lst = [randint(0,50) for _ in range(100)]
shuffle(lst)
print(lst[:5])
16、 uniform个均匀分布的坐标点
random模块中的 uniform(a,b) 生成 [a,b) 内的一个随机数,如下生成10个均匀分布的二维坐标点。
from random import uniform
a = [(uniform(0,10),uniform(0,10)) for _ in range(10)]
print(a)
17、高斯分布的坐标点
random模块中的 gauss(u,sigma) 生成均值为u, 标准差为sigma的满足高斯分布的值,如下生成10个二维坐标点,样本误差(y-2*x-1)满足均值为0,标准差为1的高斯分布:
from random import gauss
x = range(10)
y = [2 * xi + 1 + gauss(0,1) for xi in x]
points = list(zip(x,y))
print(points)
18、 chain高效串联多个容器对象
chain 函数串联a和b,兼顾内存效率同时写法更加优雅。
from itertools import chain
a = [1,3,5,0]
b = (2,4,6)
for i in chain(a,b):
print(i)