0% found this document useful (0 votes)
75 views

Aiml Lab

The document contains code for multiple machine learning programs: 1. A local weighted linear regression algorithm for predicting tips based on total bills in a restaurant dataset. 2. A k-nearest neighbors classifier to predict iris flower types from sepal and petal measurements. 3. Clustering algorithms (KMeans and Gaussian mixture model) to cluster iris flowers. 4. A decision tree classifier to predict tennis play based on weather attributes. 5. A neural network to predict exam scores from features using sigmoid activation and backpropagation. 6. An ID3 decision tree algorithm to classify examples into yes/no based on attributes.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
75 views

Aiml Lab

The document contains code for multiple machine learning programs: 1. A local weighted linear regression algorithm for predicting tips based on total bills in a restaurant dataset. 2. A k-nearest neighbors classifier to predict iris flower types from sepal and petal measurements. 3. Clustering algorithms (KMeans and Gaussian mixture model) to cluster iris flowers. 4. A decision tree classifier to predict tennis play based on weather attributes. 5. A neural network to predict exam scores from features using sigmoid activation and backpropagation. 6. An ID3 decision tree algorithm to classify examples into yes/no based on attributes.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 14

9th program:

import matplotlib.pyplot as plt


import pandas as pd
import numpy as np

def kernel(point, xmat, k):


m,n = np.shape(xmat)
weights = np.mat(np.eye((m)))
for j in range(m):
diff = point - X[j]
weights[j,j] = np.exp(diff*diff.T/(-2.0*k**2))
return weights

def localWeight(point, xmat, ymat, k):


wei = kernel(point,xmat,k)
W = (X.T*(wei*X)).I*(X.T*(wei*ymat.T))
return W

def localWeightRegression(xmat, ymat, k):


m,n = np.shape(xmat)
ypred = np.zeros(m)
for i in range(m):
ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k)
return ypred

# load data points


data = pd.read_csv('/content/sample_data/10-dataset.csv')
bill = np.array(data.total_bill)
tip = np.array(data.tip)

#preparing and add 1 in bill


mbill = np.mat(bill)
mtip = np.mat(tip)

m= np.shape(mbill)[1]
one = np.mat(np.ones(m))
X = np.hstack((one.T,mbill.T))

#set k here
ypred = localWeightRegression(X,mtip,0.5)
SortIndex = X[:,1].argsort(0)
xsort = X[SortIndex][:,0]

fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(bill,tip, color='green')
ax.plot(xsort[:,1],ypred[SortIndex], color = 'red', linewidth=5)
plt.xlabel('Total bill')
plt.ylabel('Tip')
plt.show();

8th program:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

names=['petal-width','petal-length','sepal-width','sepal-
length','Class']

dataset=pd.read_csv("/content/sample_data/8-dataset.csv",names=names)

X=dataset.iloc[:,: -1]
y=dataset.iloc[:,-1]
print(X.head())
Xtrain,Xtest,ytrain,ytest=train_test_split(X,y,test_size=.10)
classifier=KNeighborsClassifier(n_neighbors=5).fit(Xtrain,ytrain)
ypred=classifier.predict(Xtest)
print("%-25s %-25s %-
25s"%("Original value","Predicted value","Answer"))
i=0
for label in ytest:
print("%-25s %-25s"%(label,ypred[i]),end="")
if label==ypred[i]:
print("%-25s"%"Correct")
else:
print("%-25s"%"Wrong")
i=i+1

print("Confusion matrix",metrics.confusion_matrix(ytest,ypred))
print("Classification report",metrics.classification_report(ytest,ypred
))
print("Accuracy score",metrics.accuracy_score(ytest,ypred))

7th program:

from sklearn.cluster import KMeans

from sklearn.mixture import GaussianMixture

import sklearn.metrics as metrics

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

names = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width', 'Class']

dataset = pd.read_csv("/content/sample_data/8-dataset.csv", names=names)

X = dataset.iloc[:, :-1]

label = {'Iris-setosa': 0,'Iris-versicolor': 1, 'Iris-virginica': 2}

y = [label[c] for c in dataset.iloc[:, -1]]

plt.figure(figsize=(14,7))

colormap=np.array(['red','blue','black'])

# REAL PLOT

plt.subplot(1,3,1)

plt.title('Real')

plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y])

# K-PLOT

model=KMeans(n_clusters=3, random_state=0).fit(X)

plt.subplot(1,3,2)

plt.title('KMeans')

plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[model.labels_])
print('The accuracy score of K-Mean: ',metrics.accuracy_score(y, model.labels_))

print('The Confusion matrixof K-Mean:\n',metrics.confusion_matrix(y, model.labels_))

# GMM PLOT

gmm=GaussianMixture(n_components=3, random_state=0).fit(X)

y_cluster_gmm=gmm.predict(X)

plt.subplot(1,3,3)

plt.title('GMM Classification')

plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y_cluster_gmm])

print('The accuracy score of EM: ',metrics.accuracy_score(y, y_cluster_gmm))

print('The Confusion matrix of EM:\n ',metrics.confusion_matrix(y, y_cluster_gmm))

6th program:

import pandas as pd
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB

data=pd.read_csv("/content/sample_data/tennisdata.csv")
print("First 5 values of train data are\n",data.head())
X=data.iloc[:,:-1]
print("First 5 values of train output are\n",X.head())
y=data.iloc[:,-1]
print("First 5 values of train input are\n",y.head())

le_Outlook=LabelEncoder()
X.Outlook=le_Outlook.fit_transform(X.Outlook)

le_Temperature=LabelEncoder()
X.Temperature=le_Temperature.fit_transform(X.Temperature)

le_Humidity=LabelEncoder()
X.Humidity=le_Humidity.fit_transform(X.Humidity)

le_Windy=LabelEncoder()
X.Windy=le_Windy.fit_transform(X.Windy)
le_PlayTennis=LabelEncoder()
y=le_PlayTennis.fit_transform(y)
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=.20)
classy=GaussianNB()
f=classy.fit(X_train,y_train)

from sklearn.metrics import accuracy_score


print("Accuracy score",accuracy_score(classy.predict(X_test),y_test))

3rd program:

import csv

with open("/content/sample_data/train.csv") as f:
csv_file = csv.reader(f)
data = list(csv_file)

specific = data[1][:-1]
general = [['?' for i in range(len(specific))] for j in range(len(s
pecific))]

for i in data:
if i[-1] == "Yes":
for j in range(len(specific)):
if i[j] != specific[j]:
specific[j] = "?"
general[j][j] = "?"

elif i[-1] == "No":


for j in range(len(specific)):
if i[j] != specific[j]:
general[j][j] = specific[j]
else:
general[j][j] = "?"

print("\nStep " + str(data.index(i)+1) + " of Candidate Elimina


tion Algorithm")
print(specific)
print(general)

gh = [] # gh = general Hypothesis
for i in general:
for j in i:
if j != '?':
gh.append(i)
break
print("\nFinal Specific hypothesis:\n", specific)
print("\nFinal General hypothesis:\n", gh)

5th program:

import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)
X = X/np.amax(X,axis=0) # maximum of X array longitudinally
y = y/100

#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))

#Derivative of Sigmoid Function


def derivatives_sigmoid(x):
return x * (1 - x)

#Variable initialization
epoch=5000 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer

#weight and bias initialization


wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))

#draws a random range of numbers uniformly of dim x*y


for i in range(epoch):

#Forward Propogation
hinp1=np.dot(X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp= outinp1+ bout
output = sigmoid(outinp)

#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)

#how much hidden layer wts contributed to error


hiddengrad = derivatives_sigmoid(hlayer_act)
d_hiddenlayer = EH * hiddengrad

# dotproduct of nextlayererror and currentlayerop


wout += hlayer_act.T.dot(d_output) *lr
wh += X.T.dot(d_hiddenlayer) *lr

print("Input: \n" + str(X))


print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)

4th program:

import pandas as pd
import math
import numpy as np

data = pd.read_csv("/content/sample_data/3-dataset.csv")
features = [feat for feat in data]
features.remove("answer")

class Node:
def __init__(self):
self.children = []
self.value = ""
self.isLeaf = False
self.pred = ""

def entropy(examples):
pos = 0.0
neg = 0.0
for _, row in examples.iterrows():
if row["answer"] == "yes":
pos += 1
else:
neg += 1
if pos == 0.0 or neg == 0.0:
return 0.0
else:
p = pos / (pos + neg)
n = neg / (pos + neg)
return -(p * math.log(p, 2) + n * math.log(n, 2))
def info_gain(examples, attr):
uniq = np.unique(examples[attr])
#print ("\n",uniq)
gain = entropy(examples)
#print ("\n",gain)
for u in uniq:
subdata = examples[examples[attr] == u]
#print ("\n",subdata)
sub_e = entropy(subdata)
gain -= (float(len(subdata)) / float(len(examples))) * sub_e
#print ("\n",gain)
return gain

def ID3(examples, attrs):


root = Node()

max_gain = 0
max_feat = ""
for feature in attrs:
#print ("\n",examples)
gain = info_gain(examples, feature)
if gain > max_gain:
max_gain = gain
max_feat = feature
root.value = max_feat
#print ("\nMax feature attr",max_feat)
uniq = np.unique(examples[max_feat])
#print ("\n",uniq)
for u in uniq:
#print ("\n",u)
subdata = examples[examples[max_feat] == u]
#print ("\n",subdata)
if entropy(subdata) == 0.0:
newNode = Node()
newNode.isLeaf = True
newNode.value = u
newNode.pred = np.unique(subdata["answer"])
root.children.append(newNode)
else:
dummyNode = Node()
dummyNode.value = u
new_attrs = attrs.copy()
new_attrs.remove(max_feat)
child = ID3(subdata, new_attrs)
dummyNode.children.append(child)
root.children.append(dummyNode)

return root
def printTree(root: Node, depth=0):
for i in range(depth):
print("\t", end="")
print(root.value, end="")
if root.isLeaf:
print(" -> ", root.pred)
print()
for child in root.children:
printTree(child, depth + 1)

def classify(root: Node, new):


for child in root.children:
if child.value == new[root.value]:
if child.isLeaf:
print ("Predicted Label for new example", new," is:", c
hild.pred)
exit
else:
classify (child.children[0], new)

root = ID3(data, features)


print("Decision Tree is:")
printTree(root)
print ("------------------")

new = {"outlook":"sunny", "temperature":"hot", "humidity":"normal", "wi


nd":"strong"}
classify (root, new)

1st program:

def aStarAlgo(start_node, stop_node):


open_set = set(start_node)
closed_set = set()
g = {} #store distance from starting node
parents = {} # parents contains an adjacency map of all nod
es
#distance of starting node from itself is zero
g[start_node] = 0
#start_node is root node i.e it has no parent nodes
#so start_node is set to its own parent node
parents[start_node] = start_node
while len(open_set) > 0:
n = None
#node with lowest f() is found
for v in open_set:
if n == None or g[v] + heuristic(v) < g[n] + heuristic(n):
n = v
if n == stop_node or Graph_nodes[n] == None:
pass
else:
for (m, weight) in get_neighbors(n):
#nodes 'm' not in first and last set are added to first
#n is set its parent
if m not in open_set and m not in closed_set:
open_set.add(m)
parents[m] = n
g[m] = g[n] + weight
#for each node m,compare its distance from start i.e g(
m) to the
#from start through n node
else:
if g[m] > g[n] + weight:
#update g(m)
g[m] = g[n] + weight
#change parent of m to n
parents[m] = n
#if m in closed set,remove and add to open
if m in closed_set:
closed_set.remove(m)
open_set.add(m)
if n == None:
print('Path does not exist!')
return None

# if the current node is the stop_node


# then we begin reconstructin the path from it to the start_nod
e
if n == stop_node:
path = []
while parents[n] != n:
path.append(n)
n = parents[n]
path.append(start_node)
path.reverse()
print('Path found: {}'.format(path))
return path
# remove n from the open_list, and add it to closed_list
# because all of his neighbors were inspected
open_set.remove(n)
closed_set.add(n)
print('Path does not exist!')
return None

#define fuction to return neighbor and its distance


#from the passed node
def get_neighbors(v):
if v in Graph_nodes:
return Graph_nodes[v]
else:
return None

#for simplicity we ll consider heuristic distances given


#and this function returns heuristic distance for all nodes
def heuristic(n):
H_dist = {
'A': 11,
'B': 6,
'C': 5,
'D': 7,
'E': 3,
'F': 6,
'G': 5,
'H': 3,
'I': 1,
'J': 0
}
return H_dist[n]

#Describe your graph here


Graph_nodes = {
'A': [('B', 6), ('F', 3)],
'B': [('A', 6), ('C', 3), ('D', 2)],
'C': [('B', 3), ('D', 1), ('E', 5)],
'D': [('B', 2), ('C', 1), ('E', 8)],
'E': [('C', 5), ('D', 8), ('I', 5), ('J', 5)],
'F': [('A', 3), ('G', 1), ('H', 7)],
'G': [('F', 1), ('I', 3)],
'H': [('F', 7), ('I', 2)],
'I': [('E', 5), ('G', 3), ('H', 2), ('J', 3)],
}

aStarAlgo('B', 'D')

2nd program:

class Graph:
def __init__(self, graph, heuristicNodeList, startNode): #instantiate
graph object with graph topology, heuristic values, start node

self.graph = graph
self.H=heuristicNodeList
self.start=startNode
self.parent={}
self.status={}
self.solutionGraph={}

def applyAOStar(self): # starts a recursive AO* algorithm


self.aoStar(self.start, False)

def getNeighbors(self, v): # gets the Neighbors of a given node


return self.graph.get(v,'')

def getStatus(self,v): # return the status of a given node


return self.status.get(v,0)

def setStatus(self,v, val): # set the status of a given node


self.status[v]=val

def getHeuristicNodeValue(self, n):


return self.H.get(n,0) # always return the heuristic value of a
given node

def setHeuristicNodeValue(self, n, value):


self.H[n]=value # set the revised heuristic value of a given node

def printSolution(self):
print("FOR GRAPH SOLUTION, TRAVERSE THE GRAPH FROM THE
STARTNODE:",self.start)
print("------------------------------------------------------------
")
print(self.solutionGraph)
print("------------------------------------------------------------
")

def computeMinimumCostChildNodes(self, v): # Computes the Minimum Cost


of child nodes of a given node v
minimumCost=0
costToChildNodeListDict={}
costToChildNodeListDict[minimumCost]=[]
flag=True
for nodeInfoTupleList in self.getNeighbors(v): # iterate over all
the set of child node/s
cost=0
nodeList=[]
for c, weight in nodeInfoTupleList:
cost=cost+self.getHeuristicNodeValue(c)+weight
nodeList.append(c)

if flag==True: # initialize Minimum Cost with the cost of first


set of child node/s
minimumCost=cost
costToChildNodeListDict[minimumCost]=nodeList # set the
Minimum Cost child node/s
flag=False
else: # checking the Minimum Cost nodes with the current
Minimum Cost
if minimumCost>cost:
minimumCost=cost
costToChildNodeListDict[minimumCost]=nodeList # set the
Minimum Cost child node/s

return minimumCost, costToChildNodeListDict[minimumCost] # return


Minimum Cost and Minimum Cost child node/s

def aoStar(self, v, backTracking): # AO* algorithm for a start node and


backTracking status flag

print("HEURISTIC VALUES :", self.H)


print("SOLUTION GRAPH :", self.solutionGraph)
print("PROCESSING NODE :", v)

print("------------------------------------------------------------
-----------------------------")

if self.getStatus(v) >= 0: # if status node v >= 0, compute Minimum


Cost nodes of v
minimumCost, childNodeList =
self.computeMinimumCostChildNodes(v)
self.setHeuristicNodeValue(v, minimumCost)
self.setStatus(v,len(childNodeList))

solved=True # check the Minimum Cost nodes of v are solved

for childNode in childNodeList:


self.parent[childNode]=v
if self.getStatus(childNode)!=-1:
solved=solved & False

if solved==True: # if the Minimum Cost nodes of v are solved,


set the current node status as solved(-1)
self.setStatus(v,-1)
self.solutionGraph[v]=childNodeList # update the solution
graph with the solved nodes which may be a part of solution

if v!=self.start: # check the current node is the start node


for backtracking the current node value
self.aoStar(self.parent[v], True) # backtracking the
current node value with backtracking status set to true

if backTracking==False: # check the current call is not for


backtracking
for childNode in childNodeList: # for each Minimum Cost
child node
self.setStatus(childNode,0) # set the status of child
node to 0(needs exploration)
self.aoStar(childNode, False) # Minimum Cost child node
is further explored with backtracking status as false

h1 = {'A': 1, 'B': 6, 'C': 2, 'D': 12, 'E': 2, 'F': 1, 'G': 5, 'H': 7, 'I':
7, 'J':1, 'T': 3}
graph1 = {
'A': [[('B', 1), ('C', 1)], [('D', 1)]],
'B': [[('G', 1)], [('H', 1)]],
'C': [[('J', 1)]],
'D': [[('E', 1), ('F', 1)]],
'G': [[('I', 1)]]
}
G1= Graph(graph1, h1, 'A')
G1.applyAOStar()
G1.printSolution()

h2 = {'A': 1, 'B': 6, 'C': 12, 'D': 10, 'E': 4, 'F': 4, 'G': 5, 'H': 7} #
Heuristic values of Nodes
graph2 = { # Graph of Nodes and Edges
'A': [[('B', 1), ('C', 1)], [('D', 1)]], # Neighbors of Node 'A', B, C
& D with repective weights
'B': [[('G', 1)], [('H', 1)]], # Neighbors are included in a list of
lists
'D': [[('E', 1), ('F', 1)]] # Each sublist indicate a "OR" node or
"AND" nodes
}

G2 = Graph(graph2, h2, 'A') # Instantiate Graph object with graph,


heuristic values and start Node
G2.applyAOStar() # Run the AO* algorithm
G2.printSolution() # print the solution graph as AO* Algorithm search

You might also like