Machine Learning Lab Assignment 2
Machine Learning Lab Assignment 2
Dataset:
Input:
Output:
2.KNN Classification with Decision Boundary
Code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix,classification_report
#Load Dataset
df=pd.read_csv("student_pass.csv")
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=4
2)
#Train KNN model(K=3)
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train,y_train)
Input:
Output:
3.Practice Questions
3(a)
Code:
import pandas as pd
#Creating a series from a list
data = [10,20,30,40,50]
series1 = pd.Series(data)
print(series1)
Input:
Output:
3(b)
Code:
#Creating a pandas dataframe
import pandas as pd
#creating a dataframe froom a dictionary
data={
'Name':['Alice','Bob','Charlie'],
'Age':[25,30,35],
'Salary':[50000,60000,70000]
}
df=pd.DataFrame(data)
print(df)
Input:
Output:
3(c)
Code:
#From a list of lists
data = [
['Alice',25,50000],
['Bob',30,60000],
['Charlie',35,70000]
]
df = pd.DataFrame(data,columns=['Name','Age','Salary'])
print(df)
Input:
Output:
3(d)
Code:
#missing values
import pandas as pd
import numpy as np
#creating a dataset with some missing values
data = {
'Name': ['Alice','Bob','Charlie','David','Emma'],
'Age': [25,np.nan,30,35,np.nan],
'Salary': [50000,60000,np.nan,80000,75000],
'Department': ['HR','IT',np.nan,'Finance','IT']
}
df = pd.DataFrame(data)
print("Original Dataset with Missing Values:")
print(df)
Input:
Output:
3(e)
Code:
print("Missing Values in Each Column:")
print(df.isnull().sum()) #count missing values in each column
Input:
Output:
3(f)
Code:
import pandas as pd
import numpy as np
#Fill missing Age with the mean age
df['Age'].fillna(df['Age'].mean(),inplace=True)
3(g)
Code:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
#minmax normalization
#sample data
data = np.array([[1,2],[3,4],[5,6],[7,8]])
#initialize the scaler
scaler = MinMaxScaler()
#fit and transform the data
print(data)
normalized_data = scaler.fit_transform(data)
print("Normalized Data (Min-Max Scaling)")
print(normalized_data)
Input:
Output:
3(h)
Code:
import pandas as pd
import numpy as np
#dictionary
data={
'Name':['Geek1','Geek2','Geek3','Geek4'],
'Salary':[18000,20000,15000,35000]
}
#create a dataframe
data = pd.DataFrame(data,
columns=['Name',
'Salary'])
#show the dataframe
data
data['logarithm_base2'] = np.log2(data['Salary'])
#Show the dataframe
print(data)
Input:
Output:
3(i)
Code:
import pandas as pd
import numpy as np
#sample dataset
data = [50,60,70,80,90,100]
Output:
4. Naïve Bayes Classification
Code:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import
accuracy_score,classification_report,confusion_matrix
#Sample weather Dataset
data = pd.read_csv("weather.csv")
df=pd.DataFrame(data)
#Encoding categorical features
label_enc=LabelEncoder()
df['Outlook'] = label_enc.fit_transform(df['Outlook']) #Convert
#'Sunny','Rain' etc. to numbets
df['Wind'] = label_enc.fit_transform(df['Wind']) #Covert 'Yes'
#No' to 1,0
df['Humidity'] = label_enc.fit_transform(df['Humidity']) #Convert 'Yes'
df['Temperature'] = label_enc.fit_transform(df['Humidity'])
#Splitting features and target
X=df[['Outlook','Temperature','Humidity','Wind']]
y=df['PlayTennis']
#Train test split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=4
2)
#Train Naive Bayes Classifier
model=GaussianNB()
model.fit(X_train,y_train)
#Predictions
y_pred=model.predict(X_test)
#Evaluate Model
print("Accuracy:",accuracy_score(y_test,y_pred))
print("Confusion Matrix:\n",confusion_matrix(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))
Dataset:
Input:
Output:
5.EM-Model
Code:
#EM-Model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix,accuracy_score
#Load dataset
df = pd.read_csv("student_data.csv")
#Extraxt features(Math Score, Science Score)
X = df[["Math_Score","Science_Score"]].values
y_true = df["Category"].values #True labels(0 or 1)