0% found this document useful (0 votes)
35 views9 pages

ML Lab

The document contains multiple Python programs that perform various data analysis tasks, including outlier detection in housing data, visualization of correlations and distributions, PCA on the Iris dataset, and k-NN classification on randomly generated data. Each program utilizes libraries such as NumPy, pandas, Matplotlib, and Seaborn for data manipulation and visualization. The overall focus is on data exploration and machine learning techniques.

Uploaded by

Vaishnavi Y. U
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
35 views9 pages

ML Lab

The document contains multiple Python programs that perform various data analysis tasks, including outlier detection in housing data, visualization of correlations and distributions, PCA on the Iris dataset, and k-NN classification on randomly generated data. Each program utilizes libraries such as NumPy, pandas, Matplotlib, and Seaborn for data manipulation and visualization. The overall focus is on data exploration and machine learning techniques.

Uploaded by

Vaishnavi Y. U
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd

Program

1
import numpy as np
plt.tight_layout()
import seaborn as
[Link]()
sns
print("Outliers Detection:")
import [Link] as plt
outliers_summary = {}
from [Link] import
fetch_california_housing for feature in numerical_features:

data = Q1 =
housing_df[feature].quantile(0
fetch_california_housing(as_frame=True) .25)
housing_df = [Link] Q3 =
housing_df = housing_df[feature].quantile(0
.75)
[Link]
IQR = Q3 - Q1
housing_df =
lower_bound = Q1 - 1.5 * IQR
[Link]
upper_bound = Q3 + 1.5 * IQR
housing_df =
outliers =
[Link] housing_df[(housing_df[feature]
<
housing_df =
lower_bound) |
data['frame'] (housing_df[feature]
numerical_features = > upper_bound)]
housing_df.select_dtypes(in outliers_summary[feature] =
clude= len(outliers)

[[Link]]).columns print(f"{feature}:
{len(outliers)} outliers")
n_features =
len(numerical_features) n_cols = 3
n_rows = (n_features // n_cols) +
(n_features % n_cols > 0)
[Link](figsize=(15, 5 * n_rows))
for i, feature in
enumerate(numerical_features):
[Link](n_rows, n_cols, i + 1)
[Link](housing_df[feature],
kde=True, bins=30, color='blue')
[Link](f'Distribution of {feature}')
plt.tight_layout()
Program
2
[Link]()
[Link](figsize=(15, 5 * n_rows))
for i, feature in
enumerate(numerical_features):
[Link](n_rows, n_cols, i + 1)
[Link](x=housing_df[feature],
color='orange') [Link](f'Box Plot of
{feature}')
Program
3
import pandas as
pd import seaborn
as sns
import [Link] as plt
from [Link] import fetch_california_housing

california_data =
fetch_california_housing(as_frame=True) data =
california_data.frame

correlation_matrix = [Link]()

[Link](figsize=(10, 8))
[Link](correlation_matrix, annot=True, cmap='coolwarm',
fmt='.2f', linewidths=0.5)
[Link]('Correlation Matrix of California Housing
Features') [Link]()

[Link](data, diag_kind='kde',
plot_kws={'alpha': 0.5}) [Link]('Pair Plot of
California Housing Features', y=1.02) [Link]()
Program
4
import numpy as np
import pandas as pd
from [Link] import
load_iris from
[Link] import PCA
import [Link] as plt
iris =
load_iris()
data =
[Link]
labels =
[Link]
label_names = iris.target_names
iris_df = [Link](data,
columns=iris.feature_names) pca =
PCA(n_components=2)
data_reduced = pca.fit_transform(data)
reduced_df = [Link](data_reduced, columns=['Principal
Component 1', 'Principal Component 2'])
reduced_df['Label'] =
labels
[Link](figsize=(8, 6))
colors = ['r', 'g', 'b']
for i, label in enumerate([Link](labels)):
[Link](
reduced_df[reduced_df['Label'] == label]['Principal
Component 1'], reduced_df[reduced_df['Label'] ==
label]['Principal Component 2'],
label=label_names[label],
color=colors[i]
)
[Link]('PCA on Iris
Dataset')
Program
5
[Link]('Principal
Component 1')
[Link]('Principal
Component 2') [Link]()
[Link]
()
[Link]
w()
Program
6
import pandas as pd
def find_s_algorithm(file_path):
data =
pd.read_csv(file_path)
print("Training data:")
print(data)
attributes =
[Link][:-1]
class_label =
[Link][-1]
hypothesis = ['?' for _ in
attributes] for index, row in
[Link]():
if row[class_label] == 'Yes':
for i, value in enumerate(row[attributes]):
if hypothesis[i] == '?' or hypothesis[i] ==
value: hypothesis[i] = value
else:
hypothesis[i]
= '?' return
hypothesis
file_path = 'C:\\Users\\Admin\\Desktop\\
[Link]' hypothesis =
find_s_algorithm(file_path)
print("\nThe final hypothesis is:", hypothesis)
Program
7
import numpy as np
import [Link] as
plt from collections import
Counter data =
[Link](100)
labels = ["Class1" if x <= 0.5 else "Class2" for x
in data[:50]] def euclidean_distance(x1, x2):
return abs(x1 - x2)
def knn_classifier(train_data, train_labels, test_point, k):
distances = [(euclidean_distance(test_point, train_data[i]), train_labels[i])
for i in range(len(train_data))]
[Link](key=lambda
x: x[0]) k_nearest_neighbors
= distances[:k]
k_nearest_labels = [label for _, label in
k_nearest_neighbors] return
Counter(k_nearest_labels).most_common(1)[0][0]
train_data = data[:50]
train_labels = labels
test_data = data[50:]
k_values = [1, 2, 3, 4, 5, 20, 30]
print("--- k-Nearest Neighbors Classification ---")
print("Training dataset: First 50 points labeled based on the rule (x
<= 0.5 -> Class1, x > 0.5 -> Class2)")
print("Testing dataset: Remaining 50 points to be
classified\n") results = {}
for k in k_values:
print(f"Results for k = {k}:")
classified_labels = [knn_classifier(train_data, train_labels,
test_point, k) for test_point in test_data]
results[k] = classified_labels
for i, label in enumerate(classified_labels, start=51):
print(f"Point x{i} (value: {test_data[i - 51]:.4f}) is classified as
Program
8 {label}") print("\n")
print("Classification
complete.\n") for k in
k_values:
classified_labels = results[k]
class1_points = [test_data[i] for i in range(len(test_data)) if
classified_labels[i] == "Class1"]
class2_points = [test_data[i] for i in range(len(test_data)) if
classified_labels[i] == "Class2"]
[Link](figsize=(10, 6))
[Link](train_data, [0] * len(train_data),
c=["blue" if label == "Class1" else "red" for label in
train_labels], label="Training Data", marker="o")
[Link](class1_points, [1] * len(class1_points), c="blue", label="Class1
(Test)", marker="x")
[Link](class2_points, [1] * len(class2_points), c="red", label="Class2
(Test)", marker="x")
[Link](f"k-NN Classification Results
for k = {k}") [Link]("Data Points")
[Link]("Classification
Level") [Link]()
[Link](True)
[Link]()

You might also like