1)
import csv
import pandas as pd
d = pd.read_csv(r"sample_data/enjoysport.csv")
print(d)
a = []
with open(r'sample_data/enjoysport.csv', 'r') as csvfile:
for row in csv.reader(csvfile):
a.append(row)
print(a)
print("The number of training instances are:", len(a))
num_attribute = len(a[0]) - 1
print("\nThe initial hypothesis:")
hypothesis = ['0'] * num_attribute
print(hypothesis)
for i in range(1, len(a)):
if a[i][num_attribute] == 'yes':
for j in range(num_attribute):
if hypothesis[j] == '0' or hypothesis[j] == a[i][j]:
hypothesis[j] = a[i][j]
else:
hypothesis[j] = '?'
print("\nThe hypothesis for the training instance {} is:\n".format(i), hypothesis)
print("\nThe Maximally specific hypothesis for the training data is:")
print(hypothesis)
2)import numpy as np
import pandas as pd
data=pd.DataFrame(data=pd.read_csv('/home/sahyadri/Downloads/enjoysport.csv',header=None))
concepts=np.array(data.iloc[:,0:-1])
print("concepts")
print(concepts)
target=np.array(data.iloc[:,-1])
print("target : ")
print(target)
def learn(concepts,target):
specific_h=concepts[0].copy()
general_h=[["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
for i,h in enumerate(concepts):
if target[i]=="yes":
for x in range(len(specific_h)):
if h[x]!=specific_h[x]:
specific_h[x]='?'
general_h[x][x]='?'
if target[i]=="no":
for x in range(len(specific_h)):
if h[x]!=specific_h[x]:
general_h[x][x]=specific_h[x]
else:
general_h[x][x]='?'
indices=[i for i,val in enumerate(general_h) if val==['?','?','?','?','?','?']]
for i in indices:
general_h.remove(['?','?','?','?','?','?'])
return specific_h,general_h
s_final,g_final=learn(concepts,target)
print("Final specifc_h",s_final,sep="\n")
print("Final general_h",g_final,sep="\n");
4)
import numpy as np
X = np.array(([2,9],[1,5],[3,6]), dtype=float)
y = np.array(([92],[86],[89]), dtype=float)
X = X/np.amax(X,axis=0)
y = y / 100
def sigmoid(x):
return 1/(1+np.exp(-x))
def derivatives_sigmoid(x):
return x * (1-x)
epoch = 1000
learning_rate = 0.6
inputlayer_neurons = 2
hiddenlayer_neurons = 3
output_neurons = 1
wh = np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh = np.random.uniform(size=(1,hiddenlayer_neurons))
wo = np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bo = np.random.uniform(size=(1,output_neurons))
for i in range(epoch):
net_h = np.dot(X,wh)+bh
sigma_h = sigmoid(net_h)
net_o = np.dot(sigma_h,wo)+bo
output = sigmoid(net_o)
deltaK = (y-output)*derivatives_sigmoid(output)
deltaH = deltaK.dot(wo.T)*derivatives_sigmoid(sigma_h)
wo = wo+sigma_h.T.dot(deltaK)*learning_rate
wh = wh+X.T.dot(deltaH)*learning_rate
print(f"Input: \n {X}")
print(f"Actual Output: \n {y}")
print(f"Predicted Output: \n {output}")
5)import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
data = {
'text':[
'I love Programming in Python',
'Python is an amazing language',
'I hate getting errors in my code',
'DEbugging can be frustrating',
'Machine leraning is fascinating',
'I dislike Syntax Errors'
],
'label':['positive','positive', 'negative','negative', 'positive', 'negative']
df = pd.DataFrame(data)
df['label'] = df['label'].map({'positive':1, 'negative':0})
X_train, X_test, y_train, y_test = train_test_split(
df['text'], df['label'], test_size = 0.2, stratify = df['label'], random_state=42)
vectorizer = CountVectorizer(lowercase=True, stop_words='english')
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)
clf = MultinomialNB()
clf.fit(X_train_vec, y_train)
y_pred = clf.predict(X_test_vec)
accuracy = metrics.accuracy_score(y_test, y_pred)
print("Accuracy of the classifier:", accuracy)
sample_text = ["I dislike Syntax Errors"]
sample_vec = vectorizer.transform(sample_text)
predicted_label = clf.predict(sample_vec)
print("Predicted Label for sample text:", "positive" if predicted_label[0] == 1 else "negative")
6)
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns
df=pd.read_csv("/home/sahyadri/Downloads/pima_indian(1).csv")
feature_col_names=['num_preg','glucose_conc','diastolic_bp','thickness','insulin','bmi','diab_pred','age'
]
predicted_class_names=['diabetes']
x=df[feature_col_names].values
y=df[predicted_class_names].values
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.20)
print('Total number of Training Data : ',y_train.shape)
print('Total number of Test Data : ',y_test.shape)
clf=GaussianNB()
clf.fit(x_train,y_train)
predicted=clf.predict(x_test)
accuracy=metrics.accuracy_score(y_test,predicted)
print('\nAccuracy of the classifier : ',accuracy)
conf_matrix=metrics.confusion_matrix(y_test,predicted)
plt.figure(figsize=(8,6))
sns.heatmap(conf_matrix,annot=True,fmt='d',cmap='Blues',xticklabels=['No
Diabetes','Diabetes'],yticklabels=['No Diabetes','Diabetes'])
plt.title('Confusion Matrix')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
test_data=[[6,148,72,35,0,33.6,0.672,50]]
predict_test_data=clf.predict(test_data)
print("\nPredicted value for Individual Test Data : ",predict_test_data)
7)
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
df=pd.read_csv("/home/sahyadri/Downloads/Heart_Disease.csv")
feature_col_names=df.columns[df.columns!='CHDRisk']
predicted_class_names=['CHDRisk']
x=df[feature_col_names]
y=df[predicted_class_names]
label_encoder=LabelEncoder()
categorical_columns=x.select_dtypes(include=['object']).columns
for col in categorical_columns:
x[col]=label_encoder.fit_transform(x[col])
y=label_encoder.fit_transform(y)
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.20,random_state=42)
print('Total number of Training Data : ',y_train.shape)
print('Total number of Test Data : ',y_test.shape)
clf=GaussianNB()
clf.fit(x_train,y_train)
predicted=clf.predict(x_test)
accuracy=metrics.accuracy_score(y_test,predicted)
print('\nAccuracy of the classifier : ',accuracy)
conf_matrix=metrics.confusion_matrix(y_test,predicted)
plt.figure(figsize=(8,6))
sns.heatmap(conf_matrix,annot=True,fmt='d',cmap='Blues',xticklabels=['No Disease','Heart Disease'])
plt.title('Confusion Matrix')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
test_data=[[0,63,2,0,5,0,0,0,0,240,120,80,23.5,70,88]]
predict_test_data=clf.predict(test_data)
print("\nPredicted value for Individual Test Data : ",predict_test_data)
8)
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets
iris = datasets.load_iris()
print('Iris Data set loaded...')
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size = 0.1)
print("Dataset is split into training and testing...")
print('Size of training data and its label', x_train.shape, y_train.shape)
print('Size of testing data and its label', x_test.shape, y_test.shape)
for i in range(len(iris.target_names)):
print("Label", i, "-", str(iris.target_names[i]))
classifier = KNeighborsClassifier(n_neighbors=1)
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
print("Results of classification using K-nn with k=1")ss
for r in range (0, len(x_test)):
print("Sample:", str(x_test[r]), "Actual-label:", str(y_test[r]), "Predicted-label:", str(y_pred[r]))
print("Classification Accuarcy:", classifier.score(x_test, y_test))