0% found this document useful (0 votes)

3 views8 pages

Solution

The document provides a comprehensive guide on data analysis and visualization using Python libraries such as Matplotlib, Pandas, and Seaborn. It includes various coding examples covering topics like plotting rainfall data, data manipulation with DataFrames, statistical analysis, and generating visualizations like heatmaps and boxplots. Additionally, it discusses employee salary data analysis and categorization of ages using bins.

Uploaded by

Aditya singh Rajput

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

3 views8 pages

Solution

Uploaded by

Aditya singh Rajput

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 8

# Solutions for Data Analysis and Visualization (UPC: 2343012002)

S.No. 1673

# SECTION A

# Q1 (a)

import matplotlib.pyplot as plt

rainfall = [5, 2, 7, 8, 2]

days = [1, 3, 5, 1, 9]

plt.plot(days, rainfall, 'ro', markersize=10)

plt.title("Rainfall over Days")

plt.xlabel("Days")

plt.ylabel("Rainfall")

plt.show()

# Q1 (b)

import pandas as pd

company = pd.DataFrame({'Name': ['Sangeeta', 'Sarika', 'Sangeeta', 'Babita', 'Sarika'], 'Age': [18, 30,
45, 32, 25]})

# (i)

company['Name'].unique()

# (ii)

company.groupby('Name')['Age'].mean()

Q1 (c)

section1 = pd.DataFrame({'RollNo': [1,2,3, 4], 'Name': ['Abhav', 'Vihaan','Chitra','Devansh']})

section2 = pd.DataFrame({'RollNo': [1,5,3, 2], 'Name': ['Roni', 'Kabeer','Ishani','Vihaan']})

# (i)

print(section1)

# (ii)

merged = pd.merge(section2, section1, on='Name', how='inner')

print(merged)

# (iii)
common = pd.merge(section1, section2, on=['Name', 'RollNo'])

print(common)

# Q1 (d)

al = np.zeros((2, 3))

[[0,0,0],[0,0,0]]

a2 = [[3, 4, 5], [7, 8, 9]]

print(np.add(al, a2))

[[3.,4.,5.,],[7.,8.,9.]]

a1=np.append(a1,a2,axis=0)

print(a1)

[[0,0,0],[0,0,0], [3.,4.,5.,],[7.,8.,9.]]

print('shape of array',a1.shape)

(4,3)

# Q1 (e)

empSalary = np.array([4000, 5200, 6100, 7000, 4900, 8000, 3000, 9200, 6300, 4800])

# (i)

len(empSalary[empSalary>5000])# (ii)

incentive = empSalary * 0.1

print("Incentives:", incentive)

# Q1 (f)

data = pd.DataFrame([[2, 4, 6], [np.NaN, 8, 10], [np.NaN, 12, np.NaN], [np.NaN, np.NaN, np.NaN]])

print(data)

0 1 2

0 2.0 4.0 6.0

1 NaN 8.0 10.0

2 NaN 12.0 NaN

3 NaN NaN NaN

print(data.dropna(thresh=2))
0 1 2

0 2.0 4.0 6.0

1 NaN 8.0 10.0

print(data.fillna(method="ffill", limit=2))

0 1 2

0 2.0 4.0 6.0

1 2.0 8.0 10.0

2 2.0 12.0 10.0

3 NaN 12.0 10.0

# SECTION B

# Q2 (a)

df = pd.DataFrame(np.arange(12).reshape(4, 3), index=[['North', 'North', 'South', 'South'], [1, 2, 1,

2]], columns=[['Delhi', 'Delhi', 'Chandigarh'], ['Green', 'Red', 'Green']])

df.index.names = ['key1', 'key2']

print(df)

df1 = df.swaplevel('key1', 'key2')

print(df1)

df2 = df1.sort_index(level=0)

print(df2)

# Q2 (b)

markSheet = np.random.randint(60, 101, size=(2, 3))

print(markSheet)

print("Datatype:", markSheet.dtype)

print("Shape:", markSheet.shape)

print("Dimension:", markSheet.ndim)

# Q2 (c)
itemRate = pd.DataFrame({'Item': ['Apples', 'Oranges'], 'Rate': [220, 90]})

itemRate['Rate'] *= 2

print(itemRate)

print("Item with Min Rate:", itemRate.loc[itemRate['Rate'].idxmin()])

Q 3 import pandas as pd

import seaborn as sns

import matplotlib.pyplot as plt

# DataFrame

data = {

'Name': ['Mohan', 'Sohan', 'Jeevan', 'Gita', 'Meenu', 'Gopal', 'Rajeev'],

'Hours_studied': [2.5, 4.0, 6.0, 8.0, 10.0, 1.0, 5.0],

'Marks_obtained': [40, 52, 64, 70, 90, 10, 60]

df_Student = pd.DataFrame(data)

# 1. Students with maximum marks

max_marks = df_Student['Marks_obtained'].max()

top_students = df_Student[df_Student['Marks_obtained'] == max_marks]['Name'].tolist()

print("Students with maximum marks:", top_students)

# 2. Average hours studied

avg_hours = df_Student['Hours_studied'].mean()

print("Average hours studied:", avg_hours)

# 3. Correlation and Covariance

correlation = df_Student[['Hours_studied', 'Marks_obtained']].corr()

covariance = df_Student[['Hours_studied', 'Marks_obtained']].cov()

print("Correlation:\n", correlation)

print("Covariance:\n", covariance)
# 4. Heatmap

sns.heatmap(df_Student[['Hours_studied', 'Marks_obtained']].corr(), annot=True, cmap='coolwarm')

plt.title('Heatmap: Hours Studied vs Marks Obtained')

plt.show()

[0 1 2 3 4 5]

ii.

[[1 2 3]

[4 6 8]]

iii.

[[2. 1. 0.66666667]

[0.5 0.33333333 0.25 ]]

iv.

1 [4 6 8] [[1 2 3]]

[0]

Q 4 a)

Q4 (b)

df=pd.DataFrame({'person':['A','B','C','D','E','A','B','C','D'],'sales':
[1000,300,400,500,800,1000,500,700,50],'quarter':[1,1,1,1,1,2,2,2,2],'country':
['US','Japna','Brazil','UK','US','Brazil','Japan','Brazil','US']})sns.boxplot(x='sales', data=data)

max_sales=df[df['country']=='Brazil']['sales'].max()

min_sales=df[df['country']=='Brazil']['sales'].min()

df.groupby('country')['sales'].sum()

max_avg_sales=df.groupby('person')['sales'].mean().max()

df[df['sales']==max_avg_sales]['person']
df['sales'].describe()

boxplot = df.boxplot(column='sales')

plt.show()

# Q5 (a)

c1 = np.arange(0, 24)

c2 = c1.reshape((2, 12))

c2[:, 3:] = 0

print(c1)

print(c2)

print(c1 * 2)

print(c2.reshape((3, 8)))

# Q5 (b)

excel_data = pd.DataFrame({

'Employee id': [101, 102, 103, 104, 105, 106],

'Department': ['CS', 'CS', 'CS', 'English', 'English', 'English'],

'Salary': [2000, 2002, 2040, 2045, 2030, 2006],

'Age': [24, 23, 34, 39, 43, 34]

})

excel_data.to_excel("data.xlsx", index=False)

df1 = pd.read_excel("data.xlsx", index_col='Employee id')

fig, axes = plt.subplots(1, 2)

df1.plot.scatter(x='Salary', y='Age', ax=axes[0], title='Salary vs Age')

df1['Salary_bins'] = pd.cut(df1['Salary'], 3)

df1['Salary_bins'].value_counts().plot(kind='bar', ax=axes[1])

plt.savefig("Employees.png")

# Q6 (a)

s1 = pd.Series([5, 0, -4, 8])

print(s1)

print(s1.rank())
data1 = pd.DataFrame({'One': ['a', 'b'] * 2 + ['b'], 'Two': [21, 22, 21, 23, 24]})

print(data1)

data2 = data1.drop_duplicates(['One', 'Two'], keep='last')

print(data2)

df1 = pd.DataFrame({'A': [21, 32], 'B': [27, 30]})

df2 = pd.DataFrame({'A': [23, 41]})

df2['A'][1] = df2['A'][1] + 10

print(df1)

print(df2)

print(df2 > df1['B'].min())

# Q6 (b)

ages = np.array([20, 22, 25, 27, 21, 23, 37, 31, 61, 45, 41, 32])

categories = pd.cut(ages, bins=[18, 25, 35, 60, 100], labels=['Youth', 'YoungAdult', 'MiddleAged',
'Senior'])

print(categories.value_counts())

quantile_bins = pd.qcut(ages, q=4)

print(quantile_bins.value_counts())

# Q7

empData = pd.DataFrame({

'Gender': ['Male', 'Male', 'Male', 'Male', 'Female', 'Female', 'Female', 'Female', 'Female', 'Male',
'Male', 'Male'],

'Role': ['Data Analyst']*3 + ['Data Scientist']*3 + ['Manager']*3 + ['Data Analyst', 'Data Scientist',
'Manager'],

'Experience': [1, 1, 3, 5, 6, 1, 2, 3, 5, 6, 10, 11],

'Salary': [48000, 42000, 51000, 62000, 71000, 73000, 82000, 87000, 91000, 45000, 56000, 66000]

})

# (a)

print(empData)

# (b)

print(empData.groupby('Role')['Salary'].sum())
# (c)

print(empData[empData['Gender'] == 'Female'].groupby('Role').size())

# (d)

print(empData.groupby('Gender')['Salary'].agg(['max', 'min']))

# (e)

avg_salary = empData['Salary'].mean()

empData = empData[empData['Salary'] >= avg_salary]

print(empData)

White Plume Mountain
100% (1)
White Plume Mountain
43 pages
SAS™ Kit POG
No ratings yet
SAS™ Kit POG
16 pages
Python Cheat Sheet: Pandas - Numpy - Sklearn Matplotlib - Seaborn BS4 - Selenium - Scrapy
100% (3)
Python Cheat Sheet: Pandas - Numpy - Sklearn Matplotlib - Seaborn BS4 - Selenium - Scrapy
9 pages
DAVP PYQ 2023 SOLUTION
No ratings yet
DAVP PYQ 2023 SOLUTION
15 pages
Lab Record IP
No ratings yet
Lab Record IP
13 pages
Answers Practical File
No ratings yet
Answers Practical File
19 pages
Pyhtonpractice Questions
No ratings yet
Pyhtonpractice Questions
5 pages
Programs of Python Pandas
No ratings yet
Programs of Python Pandas
15 pages
Data Analysis CheatSheet
No ratings yet
Data Analysis CheatSheet
2 pages
Python Slips
No ratings yet
Python Slips
9 pages
Print Print Print Print: Import As
No ratings yet
Print Print Print Print: Import As
6 pages
Grade 12 - IP Practicals (1 To 9)
No ratings yet
Grade 12 - IP Practicals (1 To 9)
12 pages
Marking scheme practical paper (1)
No ratings yet
Marking scheme practical paper (1)
5 pages
Informatics Practices Record class 12
No ratings yet
Informatics Practices Record class 12
60 pages
ANS KEY SET A
No ratings yet
ANS KEY SET A
6 pages
XII IP PRACTICAL LIST 2022-23-1
No ratings yet
XII IP PRACTICAL LIST 2022-23-1
23 pages
Mayank Chaudhary DEV Practicals
No ratings yet
Mayank Chaudhary DEV Practicals
14 pages
12 Ip Practical List With Solution Complete
No ratings yet
12 Ip Practical List With Solution Complete
5 pages
IP Practical File
No ratings yet
IP Practical File
18 pages
Cs Sem III Dav Upc 2343012002 Sl. No. Qp. 1673 Dec '23
No ratings yet
Cs Sem III Dav Upc 2343012002 Sl. No. Qp. 1673 Dec '23
12 pages
Data_Analyzer
No ratings yet
Data_Analyzer
10 pages
Ip Practical File
No ratings yet
Ip Practical File
20 pages
Practical File Questions With Answers
No ratings yet
Practical File Questions With Answers
7 pages
GEC PRACTICALS
No ratings yet
GEC PRACTICALS
31 pages
Creation of Series Using List, Dictionary & Ndarray
No ratings yet
Creation of Series Using List, Dictionary & Ndarray
65 pages
Python Pandas-DataFrames Complete - Jupyter Notebook
No ratings yet
Python Pandas-DataFrames Complete - Jupyter Notebook
34 pages
EDA (2)
No ratings yet
EDA (2)
7 pages
23bet10114 Naman Gupta Assignment-1
No ratings yet
23bet10114 Naman Gupta Assignment-1
17 pages
Ip Project
No ratings yet
Ip Project
27 pages
Ip Practical File
No ratings yet
Ip Practical File
20 pages
1.5
No ratings yet
1.5
39 pages
L-2 (Data Frame Part 1).Ipynb - Colab
No ratings yet
L-2 (Data Frame Part 1).Ipynb - Colab
5 pages
Dataframe in Pandas
No ratings yet
Dataframe in Pandas
23 pages
CheatSheet
No ratings yet
CheatSheet
15 pages
practice_questions2
No ratings yet
practice_questions2
2 pages
dav 2024 pyq
No ratings yet
dav 2024 pyq
7 pages
DSDBAAssignment2_SUMEET (1)
No ratings yet
DSDBAAssignment2_SUMEET (1)
8 pages
Practical File ANKIT RAJ CLASS 12-F
No ratings yet
Practical File ANKIT RAJ CLASS 12-F
48 pages
QP DAV 3rd Sem Dec 2023
No ratings yet
QP DAV 3rd Sem Dec 2023
12 pages
Week 3 GGG
No ratings yet
Week 3 GGG
17 pages
List of Practical Ip065 Xii Session 2025 Ckc Academy
No ratings yet
List of Practical Ip065 Xii Session 2025 Ckc Academy
19 pages
Vanshika Goyal Gec Practicals
No ratings yet
Vanshika Goyal Gec Practicals
31 pages
Numpy
No ratings yet
Numpy
9 pages
Practical File Programs
No ratings yet
Practical File Programs
8 pages
3rd Semester DDM AI DAA DEV Print Pages For Spiral Record 25-1-24 - Removed
No ratings yet
3rd Semester DDM AI DAA DEV Print Pages For Spiral Record 25-1-24 - Removed
28 pages
EXP-3
No ratings yet
EXP-3
10 pages
XII CBSE IP Lab Solutions(2024-25)
No ratings yet
XII CBSE IP Lab Solutions(2024-25)
15 pages
Practical File IP
No ratings yet
Practical File IP
27 pages
Set B
No ratings yet
Set B
8 pages
Half Yearly Answers
No ratings yet
Half Yearly Answers
10 pages
Class 12 Practical File Informatics Practices
No ratings yet
Class 12 Practical File Informatics Practices
28 pages
Preksha Ai Practical Class 10th_070428
No ratings yet
Preksha Ai Practical Class 10th_070428
13 pages
Python Amit
No ratings yet
Python Amit
11 pages
Interactive Data Analysis With Jupyter Cheatsheet 1731972443
No ratings yet
Interactive Data Analysis With Jupyter Cheatsheet 1731972443
10 pages
12 Pandas
100% (1)
12 Pandas
21 pages
Week 5 LAB
No ratings yet
Week 5 LAB
23 pages
Suryadatta National School Class 12 CBSE Informatics Practices Practicals List
No ratings yet
Suryadatta National School Class 12 CBSE Informatics Practices Practicals List
19 pages
EDP-3[2]
No ratings yet
EDP-3[2]
16 pages
DAV Practical
No ratings yet
DAV Practical
12 pages
Pyspark Interview Questions
No ratings yet
Pyspark Interview Questions
4 pages
Panda Merged
No ratings yet
Panda Merged
19 pages
No Ph.D. Game Design With Three.js
From Everand
No Ph.D. Game Design With Three.js
Nikiforos Kontopoulos
No ratings yet
D_AU_220225_11535671740132472
No ratings yet
D_AU_220225_11535671740132472
1 page
The Evolving Landscape of Cyber Security and Cyberspace
No ratings yet
The Evolving Landscape of Cyber Security and Cyberspace
8 pages
Successive Differentiation An Overview
No ratings yet
Successive Differentiation An Overview
10 pages
Threats in The Digital World Data Breaches and Cyber Attacks
No ratings yet
Threats in The Digital World Data Breaches and Cyber Attacks
9 pages
Cyber Security 14SL
No ratings yet
Cyber Security 14SL
14 pages
Bronchopneumonia: Case Study
67% (3)
Bronchopneumonia: Case Study
18 pages
Shin Megami Tensei - Persona 4 FAQs, Walkthroughs, and Guides For PlayStation 2 - GameFAQs
No ratings yet
Shin Megami Tensei - Persona 4 FAQs, Walkthroughs, and Guides For PlayStation 2 - GameFAQs
1 page
Brenoob7-1663947604 HVMAIN - Ovpn
No ratings yet
Brenoob7-1663947604 HVMAIN - Ovpn
3 pages
9-Kinroad XT150GK Users Manual
No ratings yet
9-Kinroad XT150GK Users Manual
47 pages
Osteoporosis Case Study
50% (2)
Osteoporosis Case Study
2 pages
2022 Fall - Pe 9 Softball Lesson Plan 2
No ratings yet
2022 Fall - Pe 9 Softball Lesson Plan 2
5 pages
بورتلات
No ratings yet
بورتلات
6 pages
The History of Hot Wheels Sizzlers
No ratings yet
The History of Hot Wheels Sizzlers
3 pages
Sports Day 2024-2025 Order of Events
No ratings yet
Sports Day 2024-2025 Order of Events
3 pages
Repair Information Igx700 800 en
No ratings yet
Repair Information Igx700 800 en
1 page
Soft Cam
100% (1)
Soft Cam
14 pages
Sunny Meadows Woodland School
No ratings yet
Sunny Meadows Woodland School
13 pages
9702 - p1 - Forces - All (Finished Upto May-June 2012)
No ratings yet
9702 - p1 - Forces - All (Finished Upto May-June 2012)
40 pages
WBAdventures 01
No ratings yet
WBAdventures 01
2 pages
أ.مشعل الشلوي الرسومات التنفيذية الانشائية
No ratings yet
أ.مشعل الشلوي الرسومات التنفيذية الانشائية
22 pages
College Football Preview 2017
100% (1)
College Football Preview 2017
16 pages
Listening Unit 5 Cuarto
No ratings yet
Listening Unit 5 Cuarto
4 pages
Air Hogs Triple Booster Instructions
No ratings yet
Air Hogs Triple Booster Instructions
1 page
Loona JinSoul Biography
No ratings yet
Loona JinSoul Biography
2 pages
Manual Book Suzuki Katana sj410 PDF
50% (2)
Manual Book Suzuki Katana sj410 PDF
2 pages
PEH 4 Reviewer
No ratings yet
PEH 4 Reviewer
1 page
BATCH-15
No ratings yet
BATCH-15
17 pages
Car List
No ratings yet
Car List
8 pages
Sunburst Primary 6 Activity Book
No ratings yet
Sunburst Primary 6 Activity Book
128 pages
Yamaha PW 80 Parts List WWW - Manualedereparatie.info
100% (1)
Yamaha PW 80 Parts List WWW - Manualedereparatie.info
41 pages
Drive
No ratings yet
Drive
105 pages
Directoryandsalarydetails
No ratings yet
Directoryandsalarydetails
4 pages
T-Call-A7670-V1.0
No ratings yet
T-Call-A7670-V1.0
2 pages

Solution

Uploaded by

Solution

Uploaded by

# Solutions for Data Analysis and Visualization (UPC: 2343012002)

import matplotlib.pyplot as plt

plt.plot(days, rainfall, 'ro', markersize=10)

plt.title("Rainfall over Days")

section1 = pd.DataFrame({'RollNo': [1,2,3, 4], 'Name': ['Abhav', 'Vihaan','Chitra','Devansh']})

section2 = pd.DataFrame({'RollNo': [1,5,3, 2], 'Name': ['Roni', 'Kabeer','Ishani','Vihaan']})

merged = pd.merge(section2, section1, on='Name', how='inner')

a2 = [[3, 4, 5], [7, 8, 9]]

incentive = empSalary * 0.1

0 2.0 4.0 6.0

1 NaN 8.0 10.0

2 NaN 12.0 NaN

3 NaN NaN NaN

0 2.0 4.0 6.0

1 NaN 8.0 10.0

0 2.0 4.0 6.0

1 2.0 8.0 10.0

2 2.0 12.0 10.0

3 NaN 12.0 10.0

df = pd.DataFrame(np.arange(12).reshape(4, 3), index=[['North', 'North', 'South', 'South'], [1, 2, 1,

df.index.names = ['key1', 'key2']

df1 = df.swaplevel('key1', 'key2')

markSheet = np.random.randint(60, 101, size=(2, 3))

print("Item with Min Rate:", itemRate.loc[itemRate['Rate'].idxmin()])

import seaborn as sns

import matplotlib.pyplot as plt

'Name': ['Mohan', 'Sohan', 'Jeevan', 'Gita', 'Meenu', 'Gopal', 'Rajeev'],

'Hours_studied': [2.5, 4.0, 6.0, 8.0, 10.0, 1.0, 5.0],

'Marks_obtained': [40, 52, 64, 70, 90, 10, 60]

# 1. Students with maximum marks

top_students = df_Student[df_Student['Marks_obtained'] == max_marks]['Name'].tolist()

print("Students with maximum marks:", top_students)

# 2. Average hours studied

print("Average hours studied:", avg_hours)

# 3. Correlation and Covariance

correlation = df_Student[['Hours_studied', 'Marks_obtained']].corr()

covariance = df_Student[['Hours_studied', 'Marks_obtained']].cov()

sns.heatmap(df_Student[['Hours_studied', 'Marks_obtained']].corr(), annot=True, cmap='coolwarm')

plt.title('Heatmap: Hours Studied vs Marks Obtained')

[0.5 0.33333333 0.25 ]]

'Employee id': [101, 102, 103, 104, 105, 106],

'Department': ['CS', 'CS', 'CS', 'English', 'English', 'English'],

'Salary': [2000, 2002, 2040, 2045, 2030, 2006],

'Age': [24, 23, 34, 39, 43, 34]

df1 = pd.read_excel("data.xlsx", index_col='Employee id')

fig, axes = plt.subplots(1, 2)

df1.plot.scatter(x='Salary', y='Age', ax=axes[0], title='Salary vs Age')

s1 = pd.Series([5, 0, -4, 8])

data2 = data1.drop_duplicates(['One', 'Two'], keep='last')

df1 = pd.DataFrame({'A': [21, 32], 'B': [27, 30]})

df2 = pd.DataFrame({'A': [23, 41]})

print(df2 > df1['B'].min())

quantile_bins = pd.qcut(ages, q=4)

'Experience': [1, 1, 3, 5, 6, 1, 2, 3, 5, 6, 10, 11],

empData = empData[empData['Salary'] >= avg_salary]

You might also like