1.Implement and demonstrate FIND-S alorithm for finding the mot spcific hypotheis based on a given set of training data samples. Read the training data from a .CSV file.
import csv
with open('C:/xampp/htdocs/girish/PlayTennis.csv', 'r') as f:
reader = csv.reader(f)
your_list = list(reader)
h = [['0', '0', '0', '0', '0', '0']]
for i in your_list:
print(i)
if i[-1] == "True":
j = 0
for x in i:
if x != "True":
if x != h[0][j] and h[0][j] == '0':
h[0][j] = x
elif x != h[0][j] and h[0] * [j] != '0':
h[0][j] = '?'
else:
pass
j = j + 1
print("Most specific hypothesis is")
print(h)
output:
['Outlook', 'Temperature', 'Humidity', 'Wind', 'Play Tennis']
['Sunny', 'Hot', 'High', 'Weak', 'No']
['Sunny', 'Hot', 'High', 'Strong', 'No']
['Overcast', 'Hot', 'High', 'Weak', 'Yes']
['Rain', 'Mild', 'High', 'Weak', 'Yes']
['Rain', 'Cool', 'Normal', 'Weak', 'Yes']
['Rain', 'Cool', 'Normal', 'Strong', 'No']
['Overcast', 'Cool', 'Normal', 'Strong', 'Yes']
['Sunny', 'Mild', 'High', 'Weak', 'No']
['Sunny', 'Cool', 'Normal', 'Weak', 'Yes']
['Rain', 'Mild', 'Normal', 'Weak', 'Yes']
['Sunny', 'Mild', 'Normal', 'Strong', 'Yes']
['Overcast', 'Mild', 'High', 'Strong', 'Yes']
['Overcast', 'Hot', 'Normal', 'Weak', 'Yes']
['Rain', 'Mild', 'High', 'Strong', 'No']
Most specific hypothesis is
[['0', '0', '0', '0', '0', '0']]
2.For a given set of training data examples stored in a .CSV file, implement and demonstrate the Candidate-Elimination algorithmto output a description of the set of all hypotheses consistent with the training examples.
import numpy as np
import pandas as pd
import csv
file = open('ai2.csv')
data = list(csv.reader(file))[:]
concepts = []
target = []
for i in data:
concepts.append(i[:-1])
target.append(i[-1])
specific_h = ['O']*len(concepts[0])
general_h = [['?' for _ in range(len(specific_h))] for _ in range(len(specific_h))]
for i, instance in enumerate(concepts):
if target[i] == "Yes":
for x in range(len(specific_h)):
if specific_h[x] == 'O':
specific_h[x] = instance[x]
elif instance[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
if target[i] == "No":
for x in range(len(specific_h)):
general_h[x][x] = specific_h[x] if instance[x] != specific_h[x] else '?'
indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]
for _ in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
print("Final Specific : ", specific_h, sep = '\n')
print("Final General : ", general_h, sep = '\n')
Output
Final Specific :
['Sunny', 'Warm', '?', 'Strong', '?', '?']
Final General :
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']]
3)Demonstrate working of decision tree based ID3 algorithm .Use an appropriate data set for building
the decision tree and apply this knowledge to classify a new sample.
import pandas as pd
from pandas import DataFrame
df_tennis = pd.DataFrame(data=pd.read_csv('C:/Users/Akshatha/PycharmProjects/pythonPrograms/playtennis.csv'))
def entropy(probs):
import math
return sum([-prob * math.log(prob, 2) for prob in probs])
def entropy_of_list(a_list):
from collections import Counter
cnt = Counter(x for x in a_list)
print("No and Yes Classes:", a_list.name, cnt)
num_instances = len(a_list) * 1.0
probs = [x / num_instances for x in cnt.values()]
return entropy(probs)
total_entropy = entropy_of_list(df_tennis['playtennis'])
print("Entropy of given PlayTennis Data Set:", total_entropy)
def information_gain(df, split_attribute_name, target_attribute_name, trace=0):
print("Information Gain Calculation of ", split_attribute_name)
df_split = df.groupby(split_attribute_name)
for name, group in df_split:
print(name)
print(group)
nobs = len(df.index) * 1.0
df_agg_ent = df_split.agg({target_attribute_name: [entropy_of_list, lambda x:len(x) / nobs]})[target_attribute_name]
df_agg_ent.columns = ['Entropy', 'PropObservations']
print(df_agg_ent)
new_entropy = sum(df_agg_ent['Entropy'] * df_agg_ent['PropObservations'])
old_entropy = entropy_of_list(df[target_attribute_name])
return old_entropy - new_entropy
print("\n Info-gain for Outlook is :'+str( information_gain(df_tennis, 'outlook','playtennis')),\n")
print("\n Info-gain for Humidity is: ' + str( information_gain(df_tennis,'humidity','playtennis')),\n")
print("\n Info-gain for Wind is:' + str( information_gain(df_tennis, 'wind','playtennis')),\n")
print("\n Info-gain for Temperature is:' + str(information_gain(df_tennis, 'temperature','playtennis')),\
n")
def id3(df, target_attribute_name, attribute_names, default_class=None):
from collections import Counter
cnt = Counter(x for x in df[target_attribute_name])
if len(cnt) == 1:
return next(iter(cnt))
elif df.empty or (not attribute_names):
return default_class
else:
default_class = max(cnt.keys())
gainz = [information_gain(df, attr, target_attribute_name) for attr in attribute_names]
index_of_max = gainz.index(max(gainz))
best_attr = attribute_names[index_of_max]
tree = {best_attr: {}}
remaining_attribute_names = [i for i in attribute_names if i != best_attr]
for attr_val, data_subset in df.groupby(best_attr):
subtree = id3(data_subset, target_attribute_name, remaining_attribute_names, default_class)
tree[best_attr][attr_val] = subtree
return tree
attribute_names = list(df_tennis.columns)
print("List of Attributes:", attribute_names)
attribute_names.remove('playtennis')
print("Predicting Attributes:", attribute_names)
from pprint import pprint
tree = id3(df_tennis, 'playtennis', attribute_names)
print("\n\nThe Resultant Decision Tree is :\n")
pprint(tree)
def classify(instance, tree, default=None):
attribute = next(iter(tree))
if instance[attribute] in tree[attribute].keys():
result = tree[attribute][instance[attribute]]
if isinstance(result, dict):
return classify(instance, result)
else:
return result # this is a label
else:
return default
df_tennis['predicted'] = df_tennis.apply(classify, axis=1, args=(tree, 'no'))
print('Accuracy is:' + str(sum(df_tennis['playtennis'] == df_tennis['predicted']) / (1.0 * len(df_tennis.index))
))
df_tennis[['playtennis', 'predicted']]
training_data = df_tennis.loc[1:-4]
test_data = df_tennis.loc[-4:]
train_tree = id3(training_data, 'playtennis', attribute_names)
test_data['predicted2'] = test_data.apply(
# <----test_data source
classify,
axis=1,
args=(train_tree, 'yes')) # <---- train_data tree
print('\n\n Accuracy is: ' + str(
sum(test_data['playtennis'] == test_data['predicted2']) / (1.0 * len(test_data.index))))
OUTPUT:
output is vey huge go n excecute😂
4.BuildanArtificial Neural Network by inmplementing the Backpropagation algoríthm and test the sanne using appropriate data sets.
import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y= np.array(([92], [86], [89]), dtype=float)
X = X/np.amax(X,axis=0)
y = y/100
def sigmoid (x):
return 1/(1+ np.exp(-x))
def derivatives_sigmoid(x):
return x* (1-x)
epoch=7000
lr=0.1
inputlayer_neurons = 2
hiddenlayer_neurons=3
output_neurons = 1
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))
for i in range(epoch):
hinp1 = np.dot(X, wh)
hinp = hinp1 + bh
hlayer_act=sigmoid(hinp)
outinp1 = np.dot(hlayer_act, wout)
outinp=outinp1 + bout
output=sigmoid(outinp)
EO = y - output
outgrad = derivatives_sigmoid(output)
d_output = EO * outgrad
EH = d_output.dot(wout.T)
hiddengrad = derivatives_sigmoid(hlayer_act)
d_hiddenlayer = EH * hiddengrad
wout += hlayer_act.T.dot(d_output) * lr
print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n", output)
OUTPUT:
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.88071043]
[0.86746983]
[0.88574158]]
5.WriteAÁ progran to implement the naive Bayesian classifier for a sample dafa set stored as a .CSVfile. Compute the accuracy few of the classifier, considering test data sets.
import csv
import random
import math
def loadcsv(filename):
lines = csv.reader(open(filename, "r"));
dataset = list(lines)
for i in range(len(dataset)):
# converting strings into numbers for processing
dataset[i] = [float(x) for x in dataset[i]]
return dataset
def splitdataset(dataset, splitratio):
# 67% training size
trainsize = int(len(dataset) * splitratio);
trainset = []
copy = list(dataset);
while len(trainset) < trainsize:
# generate indices for the dataset list randomly to pick ele for training data
index = random.randrange(len(copy));
trainset.append(copy.pop(index))
return [trainset, copy]
def separatebyclass(dataset):
separated = {} # dictionary of classes 1 and 0
# creates a dictionary of classes 1 and 0 where the values are
# the instances belonging to each class
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated
def mean(numbers):
return sum(numbers) / float(len(numbers))
def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x - avg, 2) for x in numbers]) / float(len(numbers) - 1)
return math.sqrt(variance)
def summarize(dataset): # creates a dictionary of classes
summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)];
del summaries[-1] # excluding labels +ve or -ve
return summaries
def summarizebyclass(dataset):
separated = separatebyclass(dataset);
# print(separated)
summaries = {}
for classvalue, instances in separated.items():
# for key,value in dic.items()
# summaries is a dic of tuples(mean,std) for each class value
summaries[classvalue] = summarize(instances) # summarize is used to cal to mean and std
return summaries
def calculateprobability(x, mean, stdev):
exponent = math.exp(-(math.pow(x - mean, 2) / (2 * math.pow(stdev, 2))))
return (1 / (math.sqrt(2 * math.pi) * stdev)) * exponent
def calculateclassprobabilities(summaries, inputvector):
probabilities = {} # probabilities contains the all prob of all class of test data
for classvalue, classsummaries in summaries.items(): # class and attribute information as mean and sd
probabilities[classvalue] = 1
for i in range(len(classsummaries)):
mean, stdev = classsummaries[i] # take mean and sd of every attribute for class 0 and 1 seperaely
x = inputvector[i] # testvector's first attribute
probabilities[classvalue] *= calculateprobability(x, mean, stdev); # use normal dist
return probabilities
def predict(summaries, inputvector): # training and test data is passed
probabilities = calculateclassprobabilities(summaries, inputvector)
bestLabel, bestProb = None, -1
for classvalue, probability in probabilities.items(): # assigns that class which has he highest prob
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classvalue
return bestLabel
def getpredictions(summaries, testset):
predictions = []
for i in range(len(testset)):
result = predict(summaries, testset[i])
predictions.append(result)
return predictions
def getaccuracy(testset, predictions):
correct = 0
for i in range(len(testset)):
if testset[i][-1] == predictions[i]:
correct += 1
return (correct / float(len(testset))) * 100.0
def main():
filename = 'naivedata.csv'
splitratio = 0.67
dataset = loadcsv(filename);
trainingset, testset = splitdataset(dataset, splitratio)
print('Split {0} rows into train={1} and test={2} rows'.format(len(dataset), len(trainingset), len(testset)))
# prepare model
summaries = summarizebyclass(trainingset);
# print(summaries)
# test model
predictions = getpredictions(summaries, testset) # find the predictions of test data with the training data
accuracy = getaccuracy(testset, predictions)
print('Accuracy of the classifier is : {0}%'.format(accuracy))
main()
OUTPUT:
Split 768 rows into train=514 and test=254 rows
Accuracy of the classifier is : 77.16535433070865%
8.Demonstrate working of decision tree based ID3 algorithm .Use an appropriate data set for building
the decision tree and apply this knowledge to classify a new sample.
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
iris = datasets.load_iris()
X = pd.DataFrame(iris.data)
X.columns = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y = pd.DataFrame(iris.target)
y.columns = ['Targets']
model = KMeans(n_clusters=3)
model.fit(X)
plt.figure(figsize=(14,14))
colormap = np.array(['red','lime','black'])
plt.subplot(2,2,1)
plt.scatter(X.Petal_Length,X.Petal_Width, c=colormap[y.Targets], s = 40)
plt.title('Real Clusters')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
plt.subplot(2,2,2)
plt.scatter(X.Petal_Length,X.Petal_Width, c=colormap[model.labels_], s = 40)
plt.title('K-Means Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
from sklearn import preprocessing
scaler = preprocessing.StandardScaler()
scaler.fit(X)
xsa = scaler.transform(X)
xs = pd.DataFrame(xsa,columns=X.columns)
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(n_components=3)
gmm.fit(xs)
gmm_y=gmm.predict(xs)
plt.subplot(2,2,3)
plt.scatter(X.Petal_Length,X.Petal_Width, c=colormap[gmm_y], s = 40)
plt.title('GMM Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
print('Observation: The GMM using EM algorithm based clustering matched the true labels more closely than the Kmeans.')
plt.show()
output:
Observation: The GMM using EM algorithm based clustering matched the true labels more closely than the Kmeans.
9.Write arprogram to m implement k-Nearest Neighbour algorithm 1o classify the iris
dataset. Print both correct and wrong predictions. Java/Python ML library classes
can be used for this problem.
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets
iris = datasets.load_iris()
print("iris datasets loaded")
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.1)
print("dataset is training and testing ....")
print("size of training data and its label", x_train.shape, y_train.shape)
print("size of training data and its label", x_test.shape, y_test.shape)
for i in range(len(iris.target_names)):
print("label", i, "-", str(iris.target_names[i]))
classifier = KNeighborsClassifier(n_neighbors=1)
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
print("Results of classification using K-NN K=1")
for r in range(0, len(x_test)):
print("Sample: ", str(x_test[r]), "Actual-Label: ", str(y_test[r]), "Predicted-Label: ", str(y_pred[r]))
print("Classification Accuracy: ", classifier.score(x_test, y_test))
OUTPUT:
iris datasets loaded
dataset is training and testing ....
size of training data and its label (135, 4) (135,)
size of training data and its label (15, 4) (15,)
label 0 - setosa
Results of classification using K-NN K=1
Sample: [5. 2.3 3.3 1. ] Actual-Label: 1 Predicted-Label: 1
Classification Accuracy: 0.9333333333333333
Sample: [6.1 3. 4.6 1.4] Actual-Label: 1 Predicted-Label: 1
Classification Accuracy: 0.9333333333333333
Sample: [5.8 2.7 5.1 1.9] Actual-Label: 2 Predicted-Label: 2
Classification Accuracy: 0.9333333333333333
Sample: [6.3 2.5 4.9 1.5] Actual-Label: 1 Predicted-Label: 2
Classification Accuracy: 0.9333333333333333
Sample: [6.7 2.5 5.8 1.8] Actual-Label: 2 Predicted-Label: 2
Classification Accuracy: 0.9333333333333333
Sample: [6.7 3.1 4.4 1.4] Actual-Label: 1 Predicted-Label: 1
Classification Accuracy: 0.9333333333333333
Sample: [5.5 2.6 4.4 1.2] Actual-Label: 1 Predicted-Label: 1
Classification Accuracy: 0.9333333333333333
Sample: [6. 2.2 4. 1. ] Actual-Label: 1 Predicted-Label: 1
Classification Accuracy: 0.9333333333333333
Sample: [6.4 2.8 5.6 2.1] Actual-Label: 2 Predicted-Label: 2
Classification Accuracy: 0.9333333333333333
Sample: [5.6 2.9 3.6 1.3] Actual-Label: 1 Predicted-Label: 1
Classification Accuracy: 0.9333333333333333
Sample: [6.9 3.1 5.1 2.3] Actual-Label: 2 Predicted-Label: 2
Classification Accuracy: 0.9333333333333333
Sample: [5.2 2.7 3.9 1.4] Actual-Label: 1 Predicted-Label: 1
Classification Accuracy: 0.9333333333333333
Sample: [7.1 3. 5.9 2.1] Actual-Label: 2 Predicted-Label: 2
Classification Accuracy: 0.9333333333333333
Sample: [6.3 3.4 5.6 2.4] Actual-Label: 2 Predicted-Label: 2
Classification Accuracy: 0.9333333333333333
Sample: [6.1 2.8 4. 1.3] Actual-Label: 1 Predicted-Label: 1
Classification Accuracy: 0.9333333333333333
10.Intplement the non-parametric Locally Weighted Regression algorithm fit data points. Select in order 10 appropriate dataset for your experiment and draw graphs.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
def kernel(point,xmat,k):
m,n = np.shape(xmat)
weights = np.mat(np.eye((m)))
for j in range(m):
diff = point - X[j]
weights[j,j] = np.exp(diff * diff.T / (-2.0 * k ** 2))
return weights
def localWeight(point,xmat,ymat,k):
wei = kernel(point, xmat, k)
W = (X.T * (wei * X)).I * (X.T * (wei * ymat.T))
return W
def localWeightRegression(xmat, ymat, k):
m, n = np.shape(xmat)
ypred = np.zeros(m)
for i in range(m):
ypred[i] = xmat[i] * localWeight(xmat[i], xmat, ymat, k)
return ypred
def graphPlot(X, ypred):
sortindex = X[:, 1].argsort(0)
xsort = X[sortindex][:, 0]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.scatter(bill, tip, color='green')
ax.plot(xsort[:, 1], ypred[sortindex], color='red', linewidth=5)
plt.xlabel('Total bill')
plt.ylabel('Tip')
plt.show();
data = pd.read_csv('data10_tips.csv')
bill = np.array(data.total_bill)
tip = np.array(data.tip)
mbill = np.mat(bill)
mtip = np.mat(tip)
m = np.shape(mbill)[1]
one = np.mat(np.ones(m))
X = np.hstack((one.T, mbill.T))
ypred = localWeightRegression(X,mtip,0.5)
graphPlot(X, ypred)
OUTPUT:
Regession with parameter k = 3
Regession with parameter k = 9
Comments
Post a Comment