Data Frame
Data Frame
import pandas as pd
dFrameEmt=pd.DataFrame()
print(dFrameEmt)
Empty DataFrame
Columns: [ ]
Index: [ ]
import numpy as np
array1= np.array([10,20,30])
array2=np.array([100,200,300])
array3= np.array([-10,-20,-30,-40])
dFrame=pd.DataFrame(array1)
print(dFrame)
dFrame2=pd.DataFrame([array1,array3,array2])
print(dFrame2)
0
0 10
1 20
2 30
0 1 2 3
0 10 20 30 NaN
1 -10 -20 -30 -40.0
2 100 200 300 NaN
dFrame3=pd.DataFrame([10,20,30],index=[1,2,3],columns=['A'])
print(dFrame3)
A
1 10
2 20
3 30
dFrame3=pd.DataFrame([10,20,30],[100,200,300])
print(dFrame3)
0
100 10
200 20
300 30
import pandas as pd
list1=[12,14,16,18,20]
dFrame4=pd.DataFrame(list1)
print(dFrame4)
0
0 12
1 14
2 16
3 18
4 20
list1=[12,14,16,18,20]
list2=[13,15,17,19,21]
dFrame5=pd.DataFrame([list1,list2])
print(dFrame5)
0 1 2 3 4
0 12 14 16 18 20
1 13 15 17 19 21
list1=[12,14,16,18,20]
list2=[13,15,17,19,21]
dFrame5=pd.DataFrame([list1,list2],[1,2],['A','B','C','D','E'])
print(dFrame5)
A B C D E
1 12 14 16 18 20
2 13 15 17 19 21
Creating Data Frame From Dictionary
import pandas as pd
dict1=[{'a':10,'b':20,'c':30,'d':40}]
dFrame6=pd.DataFrame(dict1)
print(dFrame6)
a b c d
0 10 20 30 40
dict1={'a':10,'b':20,'c':30,'d':40}
dict2={'a':20,'e':30,'f':50,'b':10,'c':5}
dFrame7=pd.DataFrame([dict1,dict2])
print(dFrame7)
a b c d e f
0 10 20 30 40.0 NaN NaN
1 20 10 5 NaN 30.0 50.0
List of Dictionaries
import pandas as pd
listdict=[{'a':2,'b':4},{'a':1,'b':3,'c':5}]
dFrame8=pd.DataFrame(listdict)
print(dFrame8)
a b c
0 2 4 NaN
1 1 3 5.0
Dictionary of Lists
dictForest={'State':['Assam','Delhi','Kerala'],'GArea':[78438,1483,38852],'VDF':[27
97,6.72,1663]}
dFrame9=pd.DataFrame(dictForest)
print(dFrame9)
State GArea VDF
0 Assam 78438 2797.00
1 Delhi 1483 6.72
2 Kerala 38852 1663.00
Changing sequence of columns
dFrame9a=pd.DataFrame(dictForest,columns=['State','VDF','GArea'])
print(dFrame9a)
1 2 3 4 5 6
0 2.0 4.0 6.0 8.0 10.0 NaN
1 12.0 14.0 16.0 NaN 20.0 18.0
Dictionary of Series
import pandas as pd
ResultSheet={'Arnab':pd.Series([90,91,97],index=['Maths','Science','Hindi']),'Ram
it':pd.Series([92,81,96],index=['Maths','Science','Hindi']),'Samriddhi':pd.Series([89
,98,81],index=['Maths','Science','Hindi']),'Riya':pd.Series([79,80,90],index=['Maths
','Science','Hindi']),'Mallika':pd.Series([90,89,80],['Maths','Science','Hindi'])}
dFrame12=pd.DataFrame(ResultSheet)
print(dFrame12)
dFrame16=dFrame13
dFrame16=dFrame16.drop('Science',axis=0) #Deleting row of Science
print(dFrame16)
89
dFrame19=dFrame12 #row
dFrame19.loc['Science']
Arnab 90
Ramit 93
Samriddhi 98
Riya 80
Mallika 89
Preeti 78
Name: Science, dtype: int64
dFrame20=dFrame12 #column
dFrame20.loc[:,'Arnab']
Maths 90
Science 90
Hindi 90
English 90
Name: Arnab, dtype: int64
Or
dFrame20=dFrame12 #column
print(dFrame20['Arnab'])
Maths 90
Science 90
Hindi 90
English 90
Name: Arnab, dtype: int64
dFrame21=pd.DataFrame([10,20,30,40,50,60]) #single
dFrame21.loc[2]
0 30
Name: 2, dtype: int64
Boolean Indexing
dFrame13.loc['Maths']>90 #row
Arnab False
Ramit True
Samriddhi False
Riya False
Mallika False
Preeti False
Name: Maths, dtype: bool
dFrame13.loc['English']<90
Arnab False
Ramit True
Samriddhi True
Riya True
Mallika True
Preeti False
Name: English, dtype: bool
dFrame13.loc[:,'Arnab']>70 #column
Maths True
Science True
Hindi True
English True
Name: Arnab, dtype: bool
Slicing
dFrame13.loc['Maths':'Science'] #rows
Maths 90 91 89 79 90 89
Science 90 93 98 80 89 78
dFrame13.loc[:,'Arnab':'Ramit'] #columns
Arnab Ramit
Maths 90 91
Science 90 93
Hindi 90 80
English 90 89
dFrame13.loc['Maths':'Science','Arnab'] #2 row and 1 column
Arnab Preeti
Maths 90 89
Science 90 78