0% found this document useful (0 votes)

34 views14 pages

ML Lab

The document outlines several programming tasks involving data analysis and machine learning using Python. It includes programs for statistical analysis, PCA, k-NN classification, Locally Weighted Regression, Linear and Polynomial Regression, and Decision Tree classification. Each program demonstrates loading datasets, performing computations, visualizing results, and evaluating model performance.

Uploaded by

maheshkumarrbiradar67

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

34 views14 pages

ML Lab

Uploaded by

maheshkumarrbiradar67

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 14

Program 1

Develop a program to Load a dataset and select one numerical column. Compute mean,
median, mode, standard deviation, variance, and range for a given numerical column in a
dataset. Generate a histogram and boxplot to understand the distribution of the data. Identify
any outliers in the data using IQR. Select a categorical variable from a dataset. Compute the
frequency of each category and display it as a bar chart or pie chart.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset (Modify the file path or URL as needed)

file_path = "your_dataset.csv" # Update with the actual dataset path
df = pd.read_csv(file_path)

# Display first few rows

print("First 5 rows of the dataset:")
print(df.head())

# Select a numerical column

num_col = "your_numerical_column" # Replace with actual numerical column name
if num_col not in df.columns:
raise ValueError(f"Column '{num_col}' not found in dataset")

# Compute statistics
mean_value = df[num_col].mean()
median_value = df[num_col].median()
mode_value = df[num_col].mode()[0] # Mode might return multiple values
std_dev = df[num_col].std()
variance = df[num_col].var()
data_range = df[num_col].max() - df[num_col].min()

# Print statistics
print("\nStatistical Measures for", num_col)
print(f"Mean: {mean_value}")
print(f"Median: {median_value}")
print(f"Mode: {mode_value}")
print(f"Standard Deviation: {std_dev}")
print(f"Variance: {variance}")
print(f"Range: {data_range}")

# Plot Histogram
plt.figure(figsize=(6, 4))
sns.histplot(df[num_col], bins=20, kde=True)
plt.title(f"Histogram of {num_col}")
plt.xlabel(num_col)
plt.ylabel("Frequency")
plt.show()
# Plot Boxplot
plt.figure(figsize=(6, 4))
sns.boxplot(x=df[num_col])
plt.title(f"Boxplot of {num_col}")
plt.show()

# Detect Outliers using IQR

Q1 = df[num_col].quantile(0.25)
Q3 = df[num_col].quantile(0.75)
IQR = Q3 - Q1

lower_bound = Q1 - 1.5 * IQR

upper_bound = Q3 + 1.5 * IQR

outliers = df[(df[num_col] < lower_bound) | (df[num_col] > upper_bound)]

print(f"\nNumber of Outliers in {num_col}: {len(outliers)}")
print(outliers)

# Select a categorical column

cat_col = "your_categorical_column" # Replace with actual categorical column name
if cat_col not in df.columns:
raise ValueError(f"Column '{cat_col}' not found in dataset")

# Compute category frequency

category_counts = df[cat_col].value_counts()

# Plot Bar Chart

plt.figure(figsize=(6, 4))
sns.barplot(x=category_counts.index, y=category_counts.values)
plt.title(f"Bar Chart of {cat_col}")
plt.xlabel(cat_col)
plt.ylabel("Frequency")
plt.xticks(rotation=45)
plt.show()

# Plot Pie Chart

plt.figure(figsize=(6, 4))
plt.pie(category_counts, labels=category_counts.index, autopct='%1.1f%%', startangle=140)
plt.title(f"Pie Chart of {cat_col}")
plt.show()
Program 2

Develop a program to Load a dataset with at least two numerical columns (e.g., Iris, Titanic).
Plot a scatter plot of two variables and calculate their Pearson correlation coefficient. Write a
program to compute the covariance and correlation matrix for a dataset. Visualize the
correlation matrix using a heatmap to know which variables have strong positive/negative
correlations.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset (Update file path or dataset)

file_path = "your_dataset.csv" # Replace with actual dataset path
df = pd.read_csv(file_path)

# Display first few rows

print("First 5 rows of the dataset:")
print(df.head())

# Select two numerical columns for scatter plot and correlation

num_col1 = "your_numerical_column1" # Replace with actual column name
num_col2 = "your_numerical_column2" # Replace with actual column name

if num_col1 not in df.columns or num_col2 not in df.columns:

raise ValueError(f"Columns '{num_col1}' or '{num_col2}' not found in dataset")

# Scatter plot
plt.figure(figsize=(6, 4))
sns.scatterplot(x=df[num_col1], y=df[num_col2])
plt.title(f"Scatter Plot: {num_col1} vs {num_col2}")
plt.xlabel(num_col1)
plt.ylabel(num_col2)
plt.show()

# Compute Pearson correlation coefficient

pearson_corr = df[num_col1].corr(df[num_col2])
print(f"\nPearson Correlation Coefficient between {num_col1} and {num_col2}:
{pearson_corr:.4f}")

# Compute Covariance Matrix

cov_matrix = df[[num_col1, num_col2]].cov()
print("\nCovariance Matrix:")
print(cov_matrix)

# Compute Correlation Matrix

corr_matrix = df.corr()
print("\nCorrelation Matrix:")
print(corr_matrix)
# Heatmap of Correlation Matrix
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
plt.title("Correlation Matrix Heatmap")
plt.show()
Program 3

Develop a program to implement Principal Component Analysis (PCA) for reducing the
dimensionality of the Iris dataset from 4 features to 2.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris

# Load the Iris dataset

iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['species'] = iris.target

# Standardize the data (PCA is affected by scale)

scaler = StandardScaler()
scaled_data = scaler.fit_transform(df.iloc[:, :-1]) # Exclude the species column

# Apply PCA to reduce to 2 dimensions

pca = PCA(n_components=2)
pca_data = pca.fit_transform(scaled_data)

# Create a new DataFrame with PCA components

pca_df = pd.DataFrame(pca_data, columns=['PC1', 'PC2'])
pca_df['species'] = df['species']

# Scatter plot of PCA results

plt.figure(figsize=(8, 6))
sns.scatterplot(x=pca_df['PC1'], y=pca_df['PC2'], hue=pca_df['species'], palette='coolwarm',
alpha=0.7)
plt.title('PCA of Iris Dataset (4D → 2D)')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(title='Species', labels=iris.target_names)
plt.show()

# Explained variance ratio

explained_variance = pca.explained_variance_ratio_
print(f"Explained Variance by PC1: {explained_variance[0]:.4f}")
print(f"Explained Variance by PC2: {explained_variance[1]:.4f}")
print(f"Total Variance Explained: {sum(explained_variance):.4f}")
Program 4

Develop a program to load the Iris dataset. Implement the k-Nearest Neighbors (k-NN)
algorithm for classifying flowers based on their features. Split the dataset into training and

different values of 𝑘 (e.g., k=1,3,5) and evaluate the accuracy. Extend the k-NN algorithm to
testing sets and evaluate the model using metrics like accuracy and F1-score. Test it for

assign weights based on the distance of neighbors (e.g., 𝑤𝑒𝑖𝑔ℎ𝑡=1/𝑑2 ). Compare the
performance of weighted k-NN and regular k-NN on a synthetic or real-world dataset.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score

# Load the Iris dataset

iris = load_iris()
X = iris.data # Features
y = iris.target # Labels

# Split dataset into 80% training and 20% testing

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42,
stratify=y)

# Standardize the dataset (important for distance-based models)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Function to evaluate k-NN with different k values

def evaluate_knn(k_values, weighted=False):
results = []
for k in k_values:
if weighted:
knn = KNeighborsClassifier(n_neighbors=k, weights=lambda d: 1 / (d**2 + 1e-5)) #
Weighted k-NN
else:
knn = KNeighborsClassifier(n_neighbors=k, weights="uniform") # Regular k-NN

knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

f1 = f1_score(y_test, y_pred, average='weighted')
results.append((k, accuracy, f1))
return results

# Test different k values

k_values = [1, 3, 5]
knn_results = evaluate_knn(k_values, weighted=False)
weighted_knn_results = evaluate_knn(k_values, weighted=True)

# Convert results to DataFrame

knn_df = pd.DataFrame(knn_results, columns=['k', 'Accuracy', 'F1-Score'])
weighted_knn_df = pd.DataFrame(weighted_knn_results, columns=['k', 'Accuracy', 'F1-
Score'])

# Print results
print("\nRegular k-NN Performance:")
print(knn_df)
print("\nWeighted k-NN Performance:")
print(weighted_knn_df)

# Plot comparison
plt.figure(figsize=(8, 5))
plt.plot(knn_df['k'], knn_df['Accuracy'], marker='o', label='Regular k-NN')
plt.plot(weighted_knn_df['k'], weighted_knn_df['Accuracy'], marker='s', linestyle='dashed',
label='Weighted k-NN')
plt.xlabel("k (Number of Neighbors)")
plt.ylabel("Accuracy")
plt.title("k-NN vs. Weighted k-NN Performance")
plt.legend()
plt.show()
Program 5

Implement the non-parametric Locally Weighted Regression algorithm in order to fit data
points. Select appropriate data set for your experiment and draw graphs.

import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist

# Generate synthetic dataset (Non-linear function)

np.random.seed(42)
X = np.linspace(-3, 3, 100)
y = np.sin(X) + np.random.normal(scale=0.1, size=len(X)) # True function + noise

# Reshape for matrix operations

X = X.reshape(-1, 1)

# Gaussian Kernel for Weights

def get_weights(X_train, x_query, tau):
distances = cdist(X_train, x_query.reshape(1, -1), metric='euclidean')
weights = np.exp(- (distances*2) / (2 * tau*2))
return np.diag(weights.flatten())

# Locally Weighted Regression function

def locally_weighted_regression(X_train, y_train, x_query, tau):
W = get_weights(X_train, x_query, tau)
X_bias = np.c_[np.ones(X_train.shape[0]), X_train] # Add bias term
theta = np.linalg.pinv(X_bias.T @ W @ X_bias) @ X_bias.T @ W @ y_train
return np.array([1, x_query]) @ theta # Prediction for x_query

# Fit LWR on the dataset for multiple query points

tau_values = [0.1, 0.5, 1.0] # Different bandwidth values
plt.figure(figsize=(10, 6))

for tau in tau_values:

y_pred = np.array([locally_weighted_regression(X, y, x, tau) for x in X])
plt.plot(X, y_pred, label=f"LWR (τ={tau})")

# Plot original data

plt.scatter(X, y, color='black', label='Data Points', alpha=0.6)
plt.title("Locally Weighted Regression (LWR) for Different τ")
plt.xlabel("X")
plt.ylabel("y")
plt.legend()
plt.show()
Program 6

Develop a program to demonstrate the working of Linear Regression and Polynomial

Regression. Use Boston Housing Dataset for Linear Regression and Auto MPG Dataset (for
vehicle fuel efficiency prediction) for Polynomial Regression.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the Boston Housing dataset

boston = fetch_openml(name='boston', version=1, as_frame=True)
df = boston.frame

# Display dataset information

print(df.info())
print(df.describe())

# Define features and target variable

X = df.drop(columns='MEDV')
y = df['MEDV']

# Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Linear Regression model

lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Make predictions on the test set

y_pred = lr_model.predict(X_test)

# Evaluate the model

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")
print(f"R^2 Score: {r2:.2f}")

# Plotting Actual vs Predicted values

plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.7, color='b')
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=2)
plt.xlabel('Actual MEDV')
plt.ylabel('Predicted MEDV')
plt.title('Actual vs Predicted MEDV')
plt.show()
Program 7

Develop a program to load the Titanic dataset. Split the data into training and test sets. Train
a decision tree classifier. Visualize the tree structure. Evaluate accuracy, precision, recall, and
F1-score.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import LabelEncoder

# Load Titanic dataset

url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df = pd.read_csv(url)

# Display dataset info

print(df.info())

# Select relevant features & preprocess data

df = df[['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']] # Relevant
columns
df.dropna(inplace=True) # Drop rows with missing values

# Encode categorical variables

df['Sex'] = LabelEncoder().fit_transform(df['Sex']) # Convert 'Sex' to 0 (female) & 1 (male)
df['Embarked'] = LabelEncoder().fit_transform(df['Embarked']) # Encode 'Embarked'
categories

# Define features & target variable

X = df.drop(columns='Survived')
y = df['Survived']

# Split into training (80%) and testing (20%) sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42,
stratify=y)

# Train Decision Tree model

dt_model = DecisionTreeClassifier(max_depth=4, random_state=42)
dt_model.fit(X_train, y_train)

# Predictions
y_pred = dt_model.predict(X_test)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Visualize the Decision Tree

plt.figure(figsize=(15, 8))
plot_tree(dt_model, feature_names=X.columns, class_names=['Died', 'Survived'],
filled=True)
plt.title("Decision Tree for Titanic Survival Prediction")
plt.show()
Program 8

Develop a program to implement the Naive Bayesian classifier considering Iris dataset for
training. Compute the accuracy of the classifier, considering the test data.

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the Iris dataset

iris = datasets.load_iris()
X = iris.data # Features
y = iris.target # Labels

# Split into training (80%) and testing (20%) sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42,
stratify=y)

# Train Naïve Bayes model

nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# Predict test data

y_pred = nb_model.predict(X_test)

# Compute accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Display classification report

print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=iris.target_names))

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 5))
sns.heatmap(conf_matrix, annot=True, cmap='Blues', xticklabels=iris.target_names,
yticklabels=iris.target_names)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
Program 9

Develop a program to implement k-means clustering using Wisconsin Breast Cancer data set
and visualize the clustering result.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Load the Wisconsin Breast Cancer dataset

cancer = load_breast_cancer()
df = pd.DataFrame(cancer.data, columns=cancer.feature_names)

# Standardize the dataset (important for K-Means)

scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)

# Apply K-Means clustering (2 clusters since we have benign & malignant)

kmeans = KMeans(n_clusters=2, random_state=42)
df['Cluster'] = kmeans.fit_predict(df_scaled)

# Visualize clusters using PCA (reduce to 2D)

pca = PCA(n_components=2)
df_pca = pca.fit_transform(df_scaled)
df['PCA1'] = df_pca[:, 0]
df['PCA2'] = df_pca[:, 1]

plt.figure(figsize=(10, 6))
sns.scatterplot(x=df['PCA1'], y=df['PCA2'], hue=df['Cluster'], palette='coolwarm', alpha=0.7)
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('K-Means Clustering on Breast Cancer Dataset')
plt.legend(title="Cluster")
plt.show()

# Compare with actual labels

print(pd.crosstab(cancer.target, df['Cluster'], rownames=['Actual'], colnames=['Cluster']))

MDFSD007 The Boeing FA-18 Super Hornet
100% (7)
MDFSD007 The Boeing FA-18 Super Hornet
104 pages
(Feature Engineering) (Extended-Cheatsheet)
No ratings yet
(Feature Engineering) (Extended-Cheatsheet)
9 pages
Industrial Training Report - Ahmad Haikal
No ratings yet
Industrial Training Report - Ahmad Haikal
26 pages
ModuleAr Merged
No ratings yet
ModuleAr Merged
42 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
18 pages
ML Labmanual
No ratings yet
ML Labmanual
33 pages
M PDF
No ratings yet
M PDF
13 pages
ML 3
No ratings yet
ML 3
24 pages
MACHINE LEARNING Manual
No ratings yet
MACHINE LEARNING Manual
36 pages
Ai&Ml Bail606 ML Lab Manual
No ratings yet
Ai&Ml Bail606 ML Lab Manual
50 pages
MLLab Manual
No ratings yet
MLLab Manual
24 pages
ML Short Code - Under Updating
No ratings yet
ML Short Code - Under Updating
4 pages
Unit1 ML Programs
No ratings yet
Unit1 ML Programs
5 pages
Dav Lab Manual
No ratings yet
Dav Lab Manual
28 pages
Machine Learning Programs
No ratings yet
Machine Learning Programs
10 pages
ML Lab Manual
No ratings yet
ML Lab Manual
24 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
26 pages
Data Science Practical Book - Ipynb
No ratings yet
Data Science Practical Book - Ipynb
21 pages
Time Series Analysis Group 9
No ratings yet
Time Series Analysis Group 9
16 pages
1
No ratings yet
1
13 pages
Data Science and Analtics Laboratory
No ratings yet
Data Science and Analtics Laboratory
21 pages
ML Lab Manual
No ratings yet
ML Lab Manual
43 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
9 pages
Roll NO 2020
No ratings yet
Roll NO 2020
8 pages
Mlalllabprgs
No ratings yet
Mlalllabprgs
17 pages
ML Lab Manual
No ratings yet
ML Lab Manual
25 pages
Machine Learning Laboratory
No ratings yet
Machine Learning Laboratory
23 pages
Strangers
No ratings yet
Strangers
8 pages
ML Lab Mannual1
No ratings yet
ML Lab Mannual1
37 pages
Batch1 Ds
No ratings yet
Batch1 Ds
15 pages
PP DWDM 4 5
No ratings yet
PP DWDM 4 5
26 pages
Ankit Python
No ratings yet
Ankit Python
26 pages
Data Science
No ratings yet
Data Science
18 pages
ML Lab File
No ratings yet
ML Lab File
43 pages
Abhiml ML File
No ratings yet
Abhiml ML File
74 pages
Asset-V1 VIT+MBA109+2020+type@asset+block@Introductio To ML Using Python
No ratings yet
Asset-V1 VIT+MBA109+2020+type@asset+block@Introductio To ML Using Python
7 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
42 pages
Even Students
No ratings yet
Even Students
36 pages
BCSL606 Machine Learning Lab
No ratings yet
BCSL606 Machine Learning Lab
33 pages
Practical (Data Science)
No ratings yet
Practical (Data Science)
13 pages
ML Lab - Exp1-10
No ratings yet
ML Lab - Exp1-10
4 pages
ML - Datascience Manual
No ratings yet
ML - Datascience Manual
64 pages
Pattern Recognition
No ratings yet
Pattern Recognition
26 pages
ML Lab Manual PRGM 2&3
No ratings yet
ML Lab Manual PRGM 2&3
6 pages
ML Lab Manual
No ratings yet
ML Lab Manual
36 pages
BCSL606 Machine Learning Lab Final Draft
No ratings yet
BCSL606 Machine Learning Lab Final Draft
32 pages
ML Lab Manual
No ratings yet
ML Lab Manual
60 pages
ML Shristi File
No ratings yet
ML Shristi File
49 pages
DAV Guidelines
No ratings yet
DAV Guidelines
4 pages
Code Shabab Error 7
No ratings yet
Code Shabab Error 7
5 pages
Data Analyzer
No ratings yet
Data Analyzer
10 pages
Oddstudents
No ratings yet
Oddstudents
35 pages
ML Spy Programs
No ratings yet
ML Spy Programs
16 pages
ML - Lab Manual
No ratings yet
ML - Lab Manual
54 pages
Lab Extern L
No ratings yet
Lab Extern L
8 pages
MLFILE
No ratings yet
MLFILE
21 pages
ML Manual
No ratings yet
ML Manual
30 pages
Final ML File
No ratings yet
Final ML File
34 pages
CS F320 - Assignment II - Draft (Subject To A Few Changes in The Description of Problems)
No ratings yet
CS F320 - Assignment II - Draft (Subject To A Few Changes in The Description of Problems)
12 pages
Vanshika Goyal Gec Practicals
No ratings yet
Vanshika Goyal Gec Practicals
31 pages
Print Print Print Print: Import As
No ratings yet
Print Print Print Print: Import As
6 pages
Eptry Test: Agenda Grammar Vocabulary
No ratings yet
Eptry Test: Agenda Grammar Vocabulary
14 pages
Air Conditioning
No ratings yet
Air Conditioning
19 pages
Cao Guidelines Eu Efta.
0% (1)
Cao Guidelines Eu Efta.
66 pages
American Embassyt Medical List
No ratings yet
American Embassyt Medical List
4 pages
Revision and Innovation
No ratings yet
Revision and Innovation
11 pages
Sil Poster A1 2307
No ratings yet
Sil Poster A1 2307
1 page
Medieval Course Timeline
No ratings yet
Medieval Course Timeline
3 pages
Delgado Esdras - Classification Essay Final Draft
No ratings yet
Delgado Esdras - Classification Essay Final Draft
6 pages
JMC JX1080XLYG8 Fuel Tank 8m3, Single Cab
No ratings yet
JMC JX1080XLYG8 Fuel Tank 8m3, Single Cab
2 pages
Study Guide The Biosphere 2011
No ratings yet
Study Guide The Biosphere 2011
31 pages
Crystal Field Theory 2
No ratings yet
Crystal Field Theory 2
24 pages
Reka Bentuk Eksperimen
100% (1)
Reka Bentuk Eksperimen
31 pages
Wetlands of Assam
No ratings yet
Wetlands of Assam
7 pages
Key Principles For Hydrogen Offtake Agreements
No ratings yet
Key Principles For Hydrogen Offtake Agreements
7 pages
Kozhikode (: History
No ratings yet
Kozhikode (: History
5 pages
Quiz - Grade 2 - Week 6
No ratings yet
Quiz - Grade 2 - Week 6
6 pages
LG NeON® 2 (LG395N2W-A5)
No ratings yet
LG NeON® 2 (LG395N2W-A5)
4 pages
Dead Girl in 2A (ARC) Carter Wilson (Wilson Download
100% (2)
Dead Girl in 2A (ARC) Carter Wilson (Wilson Download
62 pages
WHAPS
67% (3)
WHAPS
3 pages
Emini Day Trading
100% (1)
Emini Day Trading
11 pages
Asis Risk Management Module Overview
100% (1)
Asis Risk Management Module Overview
5 pages
Us004871 PDF
No ratings yet
Us004871 PDF
710 pages
19 22 Facebook Audiences v2
No ratings yet
19 22 Facebook Audiences v2
144 pages
Robbie CV
No ratings yet
Robbie CV
2 pages
Speech Sounds Games Activities
No ratings yet
Speech Sounds Games Activities
26 pages
Convocation Program 22 - Final
No ratings yet
Convocation Program 22 - Final
53 pages
Skill of Supervisor
No ratings yet
Skill of Supervisor
16 pages
Parts of Speech
100% (1)
Parts of Speech
11 pages

ML Lab

Uploaded by

ML Lab

Uploaded by

Program 1

# Load dataset (Modify the file path or URL as needed)

# Display first few rows

# Select a numerical column

# Detect Outliers using IQR

lower_bound = Q1 - 1.5 * IQR

outliers = df[(df[num_col] < lower_bound) | (df[num_col] > upper_bound)]

# Select a categorical column

# Compute category frequency

# Plot Bar Chart

# Plot Pie Chart

# Load dataset (Update file path or dataset)

# Display first few rows

# Select two numerical columns for scatter plot and correlation

if num_col1 not in df.columns or num_col2 not in df.columns:

# Compute Pearson correlation coefficient

# Compute Covariance Matrix

# Compute Correlation Matrix

# Load the Iris dataset

# Standardize the data (PCA is affected by scale)

# Apply PCA to reduce to 2 dimensions

# Create a new DataFrame with PCA components

# Scatter plot of PCA results

# Explained variance ratio

# Load the Iris dataset

# Split dataset into 80% training and 20% testing

# Standardize the dataset (important for distance-based models)

# Function to evaluate k-NN with different k values

accuracy = accuracy_score(y_test, y_pred)

# Test different k values

# Convert results to DataFrame

# Generate synthetic dataset (Non-linear function)

# Reshape for matrix operations

# Gaussian Kernel for Weights

# Locally Weighted Regression function

# Fit LWR on the dataset for multiple query points

for tau in tau_values:

# Plot original data

Develop a program to demonstrate the working of Linear Regression and Polynomial

# Load the Boston Housing dataset

# Display dataset information

# Define features and target variable

# Split the data into training and testing sets

# Initialize and train the Linear Regression model

# Make predictions on the test set

# Evaluate the model

# Plotting Actual vs Predicted values

# Load Titanic dataset

# Display dataset info

# Select relevant features & preprocess data

# Encode categorical variables

# Define features & target variable

# Split into training (80%) and testing (20%) sets

# Train Decision Tree model

# Visualize the Decision Tree

# Load the Iris dataset

# Split into training (80%) and testing (20%) sets

# Train Naïve Bayes model

# Predict test data

# Display classification report

# Load the Wisconsin Breast Cancer dataset

# Standardize the dataset (important for K-Means)

# Apply K-Means clustering (2 clusters since we have benign & malignant)

# Visualize clusters using PCA (reduce to 2D)

# Compare with actual labels

You might also like