Program
1
import numpy as np
plt.tight_layout()
import seaborn as
[Link]()
sns
print("Outliers Detection:")
import [Link] as plt
outliers_summary = {}
from [Link] import
fetch_california_housing for feature in numerical_features:
data = Q1 =
housing_df[feature].quantile(0
fetch_california_housing(as_frame=True) .25)
housing_df = [Link] Q3 =
housing_df = housing_df[feature].quantile(0
.75)
[Link]
IQR = Q3 - Q1
housing_df =
lower_bound = Q1 - 1.5 * IQR
[Link]
upper_bound = Q3 + 1.5 * IQR
housing_df =
outliers =
[Link] housing_df[(housing_df[feature]
<
housing_df =
lower_bound) |
data['frame'] (housing_df[feature]
numerical_features = > upper_bound)]
housing_df.select_dtypes(in outliers_summary[feature] =
clude= len(outliers)
[[Link]]).columns print(f"{feature}:
{len(outliers)} outliers")
n_features =
len(numerical_features) n_cols = 3
n_rows = (n_features // n_cols) +
(n_features % n_cols > 0)
[Link](figsize=(15, 5 * n_rows))
for i, feature in
enumerate(numerical_features):
[Link](n_rows, n_cols, i + 1)
[Link](housing_df[feature],
kde=True, bins=30, color='blue')
[Link](f'Distribution of {feature}')
plt.tight_layout()
Program
2
[Link]()
[Link](figsize=(15, 5 * n_rows))
for i, feature in
enumerate(numerical_features):
[Link](n_rows, n_cols, i + 1)
[Link](x=housing_df[feature],
color='orange') [Link](f'Box Plot of
{feature}')
Program
3
import pandas as
pd import seaborn
as sns
import [Link] as plt
from [Link] import fetch_california_housing
california_data =
fetch_california_housing(as_frame=True) data =
california_data.frame
correlation_matrix = [Link]()
[Link](figsize=(10, 8))
[Link](correlation_matrix, annot=True, cmap='coolwarm',
fmt='.2f', linewidths=0.5)
[Link]('Correlation Matrix of California Housing
Features') [Link]()
[Link](data, diag_kind='kde',
plot_kws={'alpha': 0.5}) [Link]('Pair Plot of
California Housing Features', y=1.02) [Link]()
Program
4
import numpy as np
import pandas as pd
from [Link] import
load_iris from
[Link] import PCA
import [Link] as plt
iris =
load_iris()
data =
[Link]
labels =
[Link]
label_names = iris.target_names
iris_df = [Link](data,
columns=iris.feature_names) pca =
PCA(n_components=2)
data_reduced = pca.fit_transform(data)
reduced_df = [Link](data_reduced, columns=['Principal
Component 1', 'Principal Component 2'])
reduced_df['Label'] =
labels
[Link](figsize=(8, 6))
colors = ['r', 'g', 'b']
for i, label in enumerate([Link](labels)):
[Link](
reduced_df[reduced_df['Label'] == label]['Principal
Component 1'], reduced_df[reduced_df['Label'] ==
label]['Principal Component 2'],
label=label_names[label],
color=colors[i]
)
[Link]('PCA on Iris
Dataset')
Program
5
[Link]('Principal
Component 1')
[Link]('Principal
Component 2') [Link]()
[Link]
()
[Link]
w()
Program
6
import pandas as pd
def find_s_algorithm(file_path):
data =
pd.read_csv(file_path)
print("Training data:")
print(data)
attributes =
[Link][:-1]
class_label =
[Link][-1]
hypothesis = ['?' for _ in
attributes] for index, row in
[Link]():
if row[class_label] == 'Yes':
for i, value in enumerate(row[attributes]):
if hypothesis[i] == '?' or hypothesis[i] ==
value: hypothesis[i] = value
else:
hypothesis[i]
= '?' return
hypothesis
file_path = 'C:\\Users\\Admin\\Desktop\\
[Link]' hypothesis =
find_s_algorithm(file_path)
print("\nThe final hypothesis is:", hypothesis)
Program
7
import numpy as np
import [Link] as
plt from collections import
Counter data =
[Link](100)
labels = ["Class1" if x <= 0.5 else "Class2" for x
in data[:50]] def euclidean_distance(x1, x2):
return abs(x1 - x2)
def knn_classifier(train_data, train_labels, test_point, k):
distances = [(euclidean_distance(test_point, train_data[i]), train_labels[i])
for i in range(len(train_data))]
[Link](key=lambda
x: x[0]) k_nearest_neighbors
= distances[:k]
k_nearest_labels = [label for _, label in
k_nearest_neighbors] return
Counter(k_nearest_labels).most_common(1)[0][0]
train_data = data[:50]
train_labels = labels
test_data = data[50:]
k_values = [1, 2, 3, 4, 5, 20, 30]
print("--- k-Nearest Neighbors Classification ---")
print("Training dataset: First 50 points labeled based on the rule (x
<= 0.5 -> Class1, x > 0.5 -> Class2)")
print("Testing dataset: Remaining 50 points to be
classified\n") results = {}
for k in k_values:
print(f"Results for k = {k}:")
classified_labels = [knn_classifier(train_data, train_labels,
test_point, k) for test_point in test_data]
results[k] = classified_labels
for i, label in enumerate(classified_labels, start=51):
print(f"Point x{i} (value: {test_data[i - 51]:.4f}) is classified as
Program
8 {label}") print("\n")
print("Classification
complete.\n") for k in
k_values:
classified_labels = results[k]
class1_points = [test_data[i] for i in range(len(test_data)) if
classified_labels[i] == "Class1"]
class2_points = [test_data[i] for i in range(len(test_data)) if
classified_labels[i] == "Class2"]
[Link](figsize=(10, 6))
[Link](train_data, [0] * len(train_data),
c=["blue" if label == "Class1" else "red" for label in
train_labels], label="Training Data", marker="o")
[Link](class1_points, [1] * len(class1_points), c="blue", label="Class1
(Test)", marker="x")
[Link](class2_points, [1] * len(class2_points), c="red", label="Class2
(Test)", marker="x")
[Link](f"k-NN Classification Results
for k = {k}") [Link]("Data Points")
[Link]("Classification
Level") [Link]()
[Link](True)
[Link]()