Solution
Solution
S.No. 1673
# SECTION A
# Q1 (a)
rainfall = [5, 2, 7, 8, 2]
days = [1, 3, 5, 1, 9]
plt.xlabel("Days")
plt.ylabel("Rainfall")
plt.show()
# Q1 (b)
import pandas as pd
company = pd.DataFrame({'Name': ['Sangeeta', 'Sarika', 'Sangeeta', 'Babita', 'Sarika'], 'Age': [18, 30,
45, 32, 25]})
# (i)
company['Name'].unique()
# (ii)
company.groupby('Name')['Age'].mean()
Q1 (c)
# (i)
print(section1)
# (ii)
print(merged)
# (iii)
common = pd.merge(section1, section2, on=['Name', 'RollNo'])
print(common)
# Q1 (d)
al = np.zeros((2, 3))
[[0,0,0],[0,0,0]]
print(np.add(al, a2))
[[3.,4.,5.,],[7.,8.,9.]]
a1=np.append(a1,a2,axis=0)
print(a1)
[[0,0,0],[0,0,0], [3.,4.,5.,],[7.,8.,9.]]
print('shape of array',a1.shape)
(4,3)
# Q1 (e)
empSalary = np.array([4000, 5200, 6100, 7000, 4900, 8000, 3000, 9200, 6300, 4800])
# (i)
len(empSalary[empSalary>5000])# (ii)
print("Incentives:", incentive)
# Q1 (f)
data = pd.DataFrame([[2, 4, 6], [np.NaN, 8, 10], [np.NaN, 12, np.NaN], [np.NaN, np.NaN, np.NaN]])
print(data)
0 1 2
print(data.dropna(thresh=2))
0 1 2
print(data.fillna(method="ffill", limit=2))
0 1 2
# SECTION B
# Q2 (a)
print(df)
print(df1)
df2 = df1.sort_index(level=0)
print(df2)
# Q2 (b)
print(markSheet)
print("Datatype:", markSheet.dtype)
print("Shape:", markSheet.shape)
print("Dimension:", markSheet.ndim)
# Q2 (c)
itemRate = pd.DataFrame({'Item': ['Apples', 'Oranges'], 'Rate': [220, 90]})
itemRate['Rate'] *= 2
print(itemRate)
Q 3 import pandas as pd
# DataFrame
data = {
df_Student = pd.DataFrame(data)
max_marks = df_Student['Marks_obtained'].max()
avg_hours = df_Student['Hours_studied'].mean()
print("Correlation:\n", correlation)
print("Covariance:\n", covariance)
# 4. Heatmap
plt.show()
i.
[0 1 2 3 4 5]
ii.
[[1 2 3]
[4 6 8]]
iii.
[[2. 1. 0.66666667]
iv.
1 [4 6 8] [[1 2 3]]
v.
[0]
Q 4 a)
Q4 (b)
df=pd.DataFrame({'person':['A','B','C','D','E','A','B','C','D'],'sales':
[1000,300,400,500,800,1000,500,700,50],'quarter':[1,1,1,1,1,2,2,2,2],'country':
['US','Japna','Brazil','UK','US','Brazil','Japan','Brazil','US']})sns.boxplot(x='sales', data=data)
max_sales=df[df['country']=='Brazil']['sales'].max()
min_sales=df[df['country']=='Brazil']['sales'].min()
df.groupby('country')['sales'].sum()
max_avg_sales=df.groupby('person')['sales'].mean().max()
df[df['sales']==max_avg_sales]['person']
df['sales'].describe()
boxplot = df.boxplot(column='sales')
plt.show()
# Q5 (a)
c1 = np.arange(0, 24)
c2 = c1.reshape((2, 12))
c2[:, 3:] = 0
print(c1)
print(c2)
print(c1 * 2)
print(c2.reshape((3, 8)))
# Q5 (b)
excel_data = pd.DataFrame({
})
excel_data.to_excel("data.xlsx", index=False)
df1['Salary_bins'] = pd.cut(df1['Salary'], 3)
df1['Salary_bins'].value_counts().plot(kind='bar', ax=axes[1])
plt.savefig("Employees.png")
# Q6 (a)
print(s1)
print(s1.rank())
data1 = pd.DataFrame({'One': ['a', 'b'] * 2 + ['b'], 'Two': [21, 22, 21, 23, 24]})
print(data1)
print(data2)
df2['A'][1] = df2['A'][1] + 10
print(df1)
print(df2)
# Q6 (b)
ages = np.array([20, 22, 25, 27, 21, 23, 37, 31, 61, 45, 41, 32])
categories = pd.cut(ages, bins=[18, 25, 35, 60, 100], labels=['Youth', 'YoungAdult', 'MiddleAged',
'Senior'])
print(categories.value_counts())
print(quantile_bins.value_counts())
# Q7
empData = pd.DataFrame({
'Gender': ['Male', 'Male', 'Male', 'Male', 'Female', 'Female', 'Female', 'Female', 'Female', 'Male',
'Male', 'Male'],
'Role': ['Data Analyst']*3 + ['Data Scientist']*3 + ['Manager']*3 + ['Data Analyst', 'Data Scientist',
'Manager'],
'Salary': [48000, 42000, 51000, 62000, 71000, 73000, 82000, 87000, 91000, 45000, 56000, 66000]
})
# (a)
print(empData)
# (b)
print(empData.groupby('Role')['Salary'].sum())
# (c)
print(empData[empData['Gender'] == 'Female'].groupby('Role').size())
# (d)
print(empData.groupby('Gender')['Salary'].agg(['max', 'min']))
# (e)
avg_salary = empData['Salary'].mean()
print(empData)