import numpy as np
import pandas as pd
from pandas import Series,DataFrame
一、创建多级索引
data = Series(np.random.randn(10),index=[['x','x','x','x','x','x','y','y','y','y'],
['a','a','a','b','b','b','c','c','d','d'],
[1,2,3,1,2,3,1,2,2,3]])
print(data)
x a 1 1.344209
2 0.110941
3 -0.194212
b 1 1.017832
2 -0.330067
3 -0.330298
y c 1 0.936063
2 0.727602
d 2 -1.859205
3 0.008185
dtype: float64
二、检索多级索引
1.选取外层索引
data['x']
a 1 1.344209
2 0.110941
3 -0.194212
b 1 1.017832
2 -0.330067
3 -0.330298
dtype: float64
data['x':'y']
x a 1 1.344209
2 0.110941
3 -0.194212
b 1 1.017832
2 -0.330067
3 -0.330298
y c 1 0.936063
2 0.727602
d 2 -1.859205
3 0.008185
dtype: float64
2.选取内层索引
data['x','a',1] # 类似多维数组的存取
1.3442085471583016
三、解除多级索引
默认为最内层的索引,即level=-1
print(data.unstack())
hello 1 2 3
x a 1.344209 0.110941 -0.194212
b 1.017832 -0.330067 -0.330298
y c 0.936063 0.727602 NaN
d NaN -1.859205 0.008185
解除其他层索引
print(data.unstack(level=-2))
a b c d
hello
x 1 1.344209 1.017832 NaN NaN
2 0.110941 -0.330067 NaN NaN
3 -0.194212 -0.330298 NaN NaN
y 1 NaN NaN 0.936063 NaN
2 NaN NaN 0.727602 -1.859205
3 NaN NaN NaN 0.008185
四、转化为多级索引
将中间层索引转换为内层索引
data.unstack(level=-2).stack()
x 1 a 1.344209
b 1.017832
2 a 0.110941
b -0.330067
3 a -0.194212
b -0.330298
y 1 c 0.936063
2 c 0.727602
d -1.859205
3 d 0.008185
dtype: float64
五、为不同层索引指定名称(name)
df = data.unstack()
print(df)
1 2 3
x a 1.344209 0.110941 -0.194212
b 1.017832 -0.330067 -0.330298
y c 0.936063 0.727602 NaN
d NaN -1.859205 0.008185
df.index.names = ['outer','inner'] # 指定两层索引的名称
df.columns.name = 'hello'
print(df)
hello 1 2 3
outer inner
x a 1.344209 0.110941 -0.194212
b 1.017832 -0.330067 -0.330298
y c 0.936063 0.727602 NaN
d NaN -1.859205 0.008185