0% found this document useful (0 votes)
3 views22 pages

Python Data Science Practical Complete

The document contains practical Python codes for data science, covering topics such as NumPy array creation, bivariate analysis using logistic regression, basic arithmetic operations, and descriptive analytics with Pandas. It includes examples of data visualization techniques like density plots, contour plots, and 3D plotting, along with statistical analyses on datasets like diabetes and iris. Additionally, it demonstrates file operations, data manipulation, and the creation of DataFrames from various data structures.

Uploaded by

sshivasreetha
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views22 pages

Python Data Science Practical Complete

The document contains practical Python codes for data science, covering topics such as NumPy array creation, bivariate analysis using logistic regression, basic arithmetic operations, and descriptive analytics with Pandas. It includes examples of data visualization techniques like density plots, contour plots, and 3D plotting, along with statistical analyses on datasets like diabetes and iris. Additionally, it demonstrates file operations, data manipulation, and the creation of DataFrames from various data structures.

Uploaded by

sshivasreetha
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 22

Python for Data Science Practical Codes

Question 1.a:

# 1.a Creation of different types of NumPy arrays


import numpy as np

# 1D array
array_1d = np.array([1, 2, 3, 4, 5])
print("1D Array:", array_1d)

# 2D array
array_2d = np.array([[1, 2, 3], [4, 5, 6]])
print("2D Array:\n", array_2d)

# 3D array
array_3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print("3D Array:\n", array_3d)

# Using built-in functions


zeros = np.zeros((2, 3))
ones = np.ones((3, 3))
arange_array = np.arange(10)
linspace_array = np.linspace(0, 1, 5)

print("Zeros:\n", zeros)
print("Ones:\n", ones)
print("Arange:\n", arange_array)
print("Linspace:\n", linspace_array)
Python for Data Science Practical Codes

Question 1.b:

# 1.b Bivariate Analysis on Diabetes Data using Logistic Regression


import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Replace with correct path or link


url = 'diabetes.csv'
data = pd.read_csv(url)

# Bivariate Analysis
sns.pairplot(data, hue='Outcome')
plt.show()

# Logistic Regression
X = data.drop('Outcome', axis=1)
y = data['Outcome']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,


random_state=42)
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))


print(classification_report(y_test, y_pred))
Python for Data Science Practical Codes

Question 2.a:

# 2.a Basic Arithmetic Operations with NumPy Arrays


import numpy as np

a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

print("Addition:", a + b)
print("Subtraction:", a - b)
print("Multiplication:", a * b)
print("Division:", a / b)
print("Power:", a ** 2)
Python for Data Science Practical Codes

Question 2.b:

# 2.b Density and Contour Plots on Adult Dataset


import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Replace with correct path or link


url = 'adult.csv'
data = pd.read_csv(url)

# Drop rows with missing values for plotting


data = data.dropna()

# Convert categorical to numeric if needed


data['income'] = data['income'].astype('category').cat.codes

# Density plot
sns.kdeplot(data=data, x='age', hue='income', fill=True)
plt.title('Density Plot of Age by Income')
plt.show()

# Contour plot (using a sample)


sns.kdeplot(data=data, x='age', y='hours-per-week', fill=True)
plt.title('Contour Plot of Age vs Hours-per-week')
plt.show()
Python for Data Science Practical Codes

Question 3.a:

# 3.a Creation of an Array using Built-In NumPy Functions


import numpy as np

zeros_array = np.zeros((2, 2))


ones_array = np.ones((3, 3))
identity_matrix = np.eye(4)
random_array = np.random.rand(2, 3)

print("Zeros Array:\n", zeros_array)


print("Ones Array:\n", ones_array)
print("Identity Matrix:\n", identity_matrix)
print("Random Array:\n", random_array)
Python for Data Science Practical Codes

Question 3.b:

# 3.b Descriptive Analytics with Pandas on Iris Dataset


import pandas as pd

# Replace with correct path or link


url = 'https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv'
data = pd.read_csv(url)

print("First 5 Rows:\n", data.head())


print("\nSummary Statistics:\n", data.describe())
print("\nSpecies Count:\n", data['species'].value_counts())
Python for Data Science Practical Codes

Question 4.a:

# 4.a Creation of a DataFrame from Dictionary


import pandas as pd

data = {
'Name': ['Alice', 'Bob', 'Charlie'],
'Age': [25, 30, 35],
'City': ['New York', 'Los Angeles', 'Chicago']
}

df = pd.DataFrame(data)
print(df)
Python for Data Science Practical Codes

Question 4.b:

# 4.b Descriptive Analytics on Iris Dataset from scikit-learn


from sklearn.datasets import load_iris
import pandas as pd

iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target

print("First 5 Rows:\n", df.head())


print("\nSummary Statistics:\n", df.describe())
print("\nTarget Count:\n", df['target'].value_counts())
Python for Data Science Practical Codes

Question 5.a:

# 5.a Creation of a DataFrame from N-Dimensional Arrays


import numpy as np
import pandas as pd

array = np.array([[1, 2, 3], [4, 5, 6]])


df = pd.DataFrame(array, columns=['Column1', 'Column2', 'Column3'])
print(df)
Python for Data Science Practical Codes

Question 5.b:

# 5.b Univariate Statistical Analysis on Diabetes Data


import pandas as pd

# Replace with correct path or link


url = 'diabetes.csv'
data = pd.read_csv(url)

print(data.describe())
print("\nOutcome Counts:\n", data['Outcome'].value_counts())
Python for Data Science Practical Codes

Question 6.a:

# 6.a Bivariate Analysis on Diabetes Data using Linear Regression


import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression

# Replace with correct path or link


url = 'diabetes.csv'
data = pd.read_csv(url)

# Example: BMI vs Glucose


sns.scatterplot(x='BMI', y='Glucose', data=data)
plt.title('BMI vs Glucose')
plt.show()

X = data[['BMI']]
y = data['Glucose']

model = LinearRegression()
model.fit(X, y)

print("Coefficient:", model.coef_)
print("Intercept:", model.intercept_)
Python for Data Science Practical Codes

Question 6.b:

# 6.b Creation of different types of NumPy arrays and displaying basic information
import numpy as np

a = np.array([[1, 2, 3], [4, 5, 6]])


print("Array:\n", a)
print("Shape:", a.shape)
print("Data Type:", a.dtype)
print("Size:", a.size)
print("Dimension:", a.ndim)
Python for Data Science Practical Codes

Question 7.a:

# 7.a NumPy File Operations


import numpy as np

array = np.array([1, 2, 3, 4, 5])


np.save('my_array.npy', array)

# Load the array


loaded_array = np.load('my_array.npy')
print("Loaded Array:", loaded_array)
Python for Data Science Practical Codes

Question 7.b:

# 7.b Descriptive Analytics with Pandas on Iris Dataset (from path or web)
import pandas as pd

# Replace with correct path or link


url = 'https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv'
df = pd.read_csv(url)

print(df.describe())
print(df['species'].value_counts())
Python for Data Science Practical Codes

Question 8.a:

# 8.a 3D Plotting on Adult Dataset


import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# Replace with correct path or link


url = 'adult.csv'
df = pd.read_csv(url)

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(df['age'], df['hours-per-week'], df['education-num'], c='red')
ax.set_xlabel('Age')
ax.set_ylabel('Hours per Week')
ax.set_zlabel('Education Num')
plt.show()
Python for Data Science Practical Codes

Question 8.b:

# 8.b Creation of a DataFrame from Series


import pandas as pd

s1 = pd.Series([1, 2, 3], name="A")


s2 = pd.Series([4, 5, 6], name="B")

df = pd.concat([s1, s2], axis=1)


print(df)
Python for Data Science Practical Codes

Question 9.a:

# 9.a Histograms on Adult Dataset


import pandas as pd
import matplotlib.pyplot as plt

# Replace with correct path or link


url = 'adult.csv'
df = pd.read_csv(url)

df['age'].hist(bins=20)
plt.title('Histogram of Age')
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.show()
Python for Data Science Practical Codes

Question 9.b:

# 9.b NumPy Built-in Array Creation and Operations


import numpy as np

array = np.arange(1, 6)
print("Array:", array)
print("Squared:", array ** 2)
print("Mean:", np.mean(array))
print("Standard Deviation:", np.std(array))
Python for Data Science Practical Codes

Question 10.a:

# 10.a Univariate Statistical Analysis on Diabetes Data


import pandas as pd

# Replace with correct path or link


url = 'diabetes.csv'
df = pd.read_csv(url)

print("Summary Statistics:\n", df.describe())


print("Outcome Distribution:\n", df['Outcome'].value_counts())
Python for Data Science Practical Codes

Question 10.b:

# 10.b Array Creation using Built-in NumPy Functions


import numpy as np

a = np.linspace(1, 10, 5)
b = np.full((2, 2), 7)
print("Linspace Array:", a)
print("Full Array:\n", b)
Python for Data Science Practical Codes

Question 11.a:

# 11.a Normal Curves and Scatter Plots on UCI Dataset


import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Replace with correct path or link


url = 'adult.csv'
df = pd.read_csv(url)

# Normal curve on 'age'


sns.kdeplot(df['age'], fill=True)
plt.title("Normal Curve of Age")
plt.show()

# Scatter plot
sns.scatterplot(data=df, x='age', y='hours-per-week')
plt.title("Scatter Plot: Age vs Hours-per-week")
plt.show()
Python for Data Science Practical Codes

Question 11.b:

# 11.b NumPy Array Types and Info


import numpy as np

arr = np.array([[10, 20, 30], [40, 50, 60]])


print("Array:\n", arr)
print("Shape:", arr.shape)
print("Size:", arr.size)
print("Datatype:", arr.dtype)
print("Dimension:", arr.ndim)

You might also like