B.tech 5th Sem Machin learning lab practical
Program 1: Implement and demonstrate the FIND-S algorithm for finding the most specific hypothesis based on a given set of training data samples. Read the training data from a .CSV file.
import random
import csv
import pandas as pd
att= [['Sunny','Rainy'],
['Warm','Cold'],
['Normal','High'],
['Strong','Weak'],
['Warm','Cool'],
['Same','Change']]
attribute=len(att)
print("attribute length=",attribute)
print("\n Most general hypothesis:['?','?','?','?,'?','?']")
print("\n Most specific hypothesis:['0','0','0','0','0','0']")
a=[]
print("\n Given training data set")
with open('C:\\Users\pc1\\Desktop\\machine\\ws.csv') as file:
reader=csv.reader(file)
for r in reader:
a.append(r)
print(r)
print("\n The initial value of hypothesis: ")
hypothesis = ['0'] * attribute
print(hypothesis)
# Comparing with First Training Example
for j in range(0,attribute):
hypothesis[j] = a[0][j];
print(hypothesis[j])
# Comparing with Remaining Training Examples of Given Data Set
print("\n Find S: Finding a Maximally Specific Hypothesis\n")
b=len(a)
print("\n length of a=",b)
for i in range(0,len(a)):
if a[i][attribute]=='Yes':
for j in range(0,attribute):
if a[i][j]!=hypothesis[j]:
hypothesis[j]='?'
else :
hypothesis[j]= a[i][j]
print(" For Training Example No :{0} the hypothesis is ".format(i),hypothesis)
print("\n The Maximally Specific Hypothesis for a given Training Examples :\n")
Output:-
attribute length= 6
Most general hypothesis:['?','?','?','?,'?','?'] Most specific hypothesis:['0','0','0','0','0','0']Given training data set ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes'] ['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Yes'] ['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'No'] ['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'Yes']The initial value of hypothesis: ['0', '0', '0', '0', '0', '0']Sunny Warm Normal Strong Warm SameFind S: Finding a Maximally Specific Hypothesis length of a= 4 For Training Example No :0 the hypothesis is ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same'] For Training Example No :1 the hypothesis is ['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same'] For Training Example No :2 the hypothesis is ['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same'] For Training Example No :3 the hypothesis is ['Sunny', 'Warm', '?', 'Strong', '?', '?'] The Maximally Specific Hypothesis for a given Training Examples :Program 2: For a given set of training data examples stored in a .CSV file, implement and demonstrate the Candidate-Elimination algorithm to output a description on of the set of all hypotheses consistent with the training examples.import numpy as np import pandas as pd# Loading Data from a CSV File d=pd.read_csv(r"C:\Users\pc1\Desktop\machine\trainingdata.csv") data = pd.DataFrame(d) print(d)# Separating concept features from Target concepts = np.array(data.iloc[:,0:-1]) print(concepts)# Isolating target into a separate DataFrame # copying last column to target array target = np.array(data.iloc[:,-1]) print(target)def learn(concepts, target): ''' learn() function implements the learning method of the Candidate elimination algorithm. Arguments: concepts - a data frame with all the features target - a data frame with corresponding output values ''' # Initialise S0 with the first instance from concepts # .copy() makes sure a new list is created instead of just pointing to the same memory location specific_h = concepts[0].copy() print("\nInitialization of specific_h and general_h") print(specific_h) #h=["#" for i in range(0,5)] #print(h) general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))] print(general_h) # The learning iterations for i, h in enumerate(concepts): # Checking if the hypothesis has a positive target if target[i] == "Yes": for x in range(len(specific_h)): # Change values in S & G only if values change if h[x] != specific_h[x]: specific_h[x] = '?' general_h[x][x] = '?' # Checking if the hypothesis has a positive target if target[i] == "No": for x in range(len(specific_h)): # For negative hyposthesis change values only in G if h[x] != specific_h[x]: general_h[x][x] = specific_h[x] else: general_h[x][x] = '?' print("\nSteps of Candidate Elimination Algorithm",i+1) print(specific_h) print(general_h) # find indices where we have empty rows, meaning those that are unchanged indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']] for i in indices: # remove those rows from general_h general_h.remove(['?', '?', '?', '?', '?', '?']) # Return final values return specific_h, general_hs_final, g_final = learn(concepts, target) print("\nFinal Specific_h:", s_final, sep="\n") print("\nFinal General_h:", g_final, sep="\n")Output:sky airTemp humidity wind water forecast enjoySport 0 Sunny Warm Normal Strong Warm Same Yes 1 Sunny Warm High Strong Warm Same Yes 2 Rainy Cold High Strong Warm Change No 3 Sunny Warm High Strong Cool Change Yes[['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same'] ['Sunny' 'Warm' 'High' 'Strong' 'Warm' 'Same'] ['Rainy' 'Cold' 'High' 'Strong' 'Warm' 'Change'] ['Sunny' 'Warm' 'High' 'Strong' 'Cool' 'Change']]['Yes' 'Yes' 'No' 'Yes']Initialization of specific_h and general_h ['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same'] [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']] Steps of Candidate Elimination Algorithm 1 ['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same'] [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']] Steps of Candidate Elimination Algorithm 2 ['Sunny' 'Warm' '?' 'Strong' 'Warm' 'Same'] [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']] Steps of Candidate Elimination Algorithm 3 ['Sunny' 'Warm' '?' 'Strong' 'Warm' 'Same'] [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', 'Same']] Steps of Candidate Elimination Algorithm 4 ['Sunny' 'Warm' '?' 'Strong' '?' '?'] [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']] Final Specific_h: ['Sunny' 'Warm' '?' 'Strong' '?' '?'] Final General_h: [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']]Program-3. Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use an appropriate data set for building the decision tree and apply this
knowledge to classify a new sample.
Solution:import numpy as np import math import csvdef read_data(filename): with open(filename, 'r') as csvfile: datareader = csv.reader(csvfile, delimiter=',') headers = next(datareader) metadata = [] traindata = [] for name in headers: metadata.append(name) for row in datareader: traindata.append(row) return (metadata, traindata)class Node: def __init__(self, attribute): self.attribute = attribute self.children = [] self.answer = "" def __str__(self): return self.attributedef subtables(data, col, delete): dict = {} items = np.unique(data[:, col]) count = np.zeros((items.shape[0], 1), dtype=np.int32) for x in range(items.shape[0]): for y in range(data.shape[0]): if data[y, col] == items[x]: count[x] += 1 for x in range(items.shape[0]): dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32") pos = 0 for y in range(data.shape[0]): if data[y, col] == items[x]: dict[items[x]][pos] = data[y] pos += 1 if delete: dict[items[x]] = np.delete(dict[items[x]], col, 1) return items, dictdef entropy(S): items = np.unique(S) if items.size == 1: return 0 counts = np.zeros((items.shape[0], 1)) sums = 0 for x in range(items.shape[0]): counts[x] = sum(S == items[x]) / (S.size * 1.0) for count in counts: sums += -1 * count * math.log(count, 2) return sumsdef gain_ratio(data, col): items, dict = subtables(data, col, delete=False) total_size = data.shape[0] entropies = np.zeros((items.shape[0], 1)) intrinsic = np.zeros((items.shape[0], 1)) for x in range(items.shape[0]): ratio = dict[items[x]].shape[0]/(total_size * 1.0) entropies[x] = ratio * entropy(dict[items[x]][:, -1]) intrinsic[x] = ratio * math.log(ratio, 2) total_entropy = entropy(data[:, -1]) iv = -1 * sum(intrinsic) for x in range(entropies.shape[0]): total_entropy -= entropies[x] return total_entropy / ivdef create_node(data, metadata): if (np.unique(data[:, -1])).shape[0] == 1: node = Node("") node.answer = np.unique(data[:, -1])[0] return node gains = np.zeros((data.shape[1] - 1, 1)) for col in range(data.shape[1] - 1): gains[col] = gain_ratio(data, col) split = np.argmax(gains) node = Node(metadata[split]) metadata = np.delete(metadata, split, 0) items, dict = subtables(data, split, delete=True) for x in range(items.shape[0]): child = create_node(dict[items[x]], metadata) node.children.append((items[x], child)) return nodedef empty(size): s = "" for x in range(size): s += " " return s def print_tree(node, level): if node.answer != "": print(empty(level), node.answer) return print(empty(level), node.attribute) for value, n in node.children: print(empty(level + 1), value) print_tree(n, level + 2)metadata, traindata = read_data(r"C:\Users\pc1\Desktop\machine\tennisdata.csv") data = np.array(traindata) node = create_node(data, metadata) print_tree(node, 0)OUTPUT:-Outlook Overcast b'Yes' Rainy Windy b'False' b'Yes' b'True' b'No' Sunny Humidity b'High' b'No' b'Normal' b'Yes'PROGRAM NO.4 Build an Artificial Neural Network by implementing the Backpropagation algorithm and test the same using appropriate data sets.Solution:import numpy as np X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float) # X = (hours sleeping, hours studying) y = np.array(([92], [86], [89]), dtype=float) # y = score on test print("array list of x") print(X) print("\n array list of y") print(y)
# scale units d=np.amax(X, axis=0) print("maximum x-axis of array=",d) X = X/np.amax(X, axis=0) # maximum of X array y = y/100 # max test score is 100 print("maximum of X array","\n",X) print(" max test score is \n ", y)class Neural_Network(object): def __init__(self): # Parameters self.inputSize = 2 self.outputSize = 1 self.hiddenSize = 3 # Weights self.W1 = np.random.randn(self.inputSize, self.hiddenSize) # (3x2) weight matrix from input to hidden layer self.W2 = np.random.randn(self.hiddenSize, self.outputSize) # (3x1) weight matrix from hidden to output layer def forward(self, X): #forward propagation through our network self.z = np.dot(X, self.W1) # dot product of X (input) and first set of 3x2 weights self.z2 = self.sigmoid(self.z) # activation function self.z3 = np.dot(self.z2, self.W2) # dot product of hidden layer (z2) and second set of 3x1 weights o = self.sigmoid(self.z3) # final activation function return o def sigmoid(self, s): return 1/(1+np.exp(-s)) # activation function def sigmoidPrime(self, s): return s * (1 - s) # derivative of sigmoid def backward(self, X, y, o): # backward propgate through the network self.o_error = y - o # error in output self.o_delta = self.o_error*self.sigmoidPrime(o) # applying derivative of sigmoid to self.z2_error = self.o_delta.dot(self.W2.T) # z2 error: how much our hidden layer weights contributed to output error self.z2_delta = self.z2_error*self.sigmoidPrime(self.z2) # applying derivative of sigmoid to z2 error self.W1 += X.T.dot(self.z2_delta) # adjusting first set (input --> hidden) weights self.W2 += self.z2.T.dot(self.o_delta) # adjusting second set (hidden --> output) weights def train (self, X, y): o = self.forward(X) self.backward(X, y, o)NN = Neural_Network() for i in range(2): # trains the NN 1,000 times print ("\nInput: \n" + str(X)) print ("\nActual Output: \n" + str(y)) print ("\nPredicted Output: \n" + str(NN.forward(X))) print ("\nLoss: \n" + str(np.mean(np.square(y - NN.forward(X))))) # mean sum squared loss) NN.train(X, y)OUTPUT:-Input: [[0.66666667 1. ] [0.33333333 0.55555556] [1. 0.66666667]] Actual Output: [[0.92] [0.86] [0.89]] Predicted Output: [[0.65759806] [0.62637973] [0.70915383]] Loss: 0.05204618258402152 Input: [[0.66666667 1. ] [0.33333333 0.55555556] [1. 0.66666667]] Actual Output: [[0.92] [0.86] [0.89]] Predicted Output: [[0.69195641] [0.65907504] [0.74242558]] Loss: 0.038050976333914674PROGRAM NO. 5 Write a program to implement the naïve Bayesian classifier for a sample training data set stored as a .CSV file. Compute the accuracy of the classifier, considering few test data sets.sOLUTION:-# import necessary libarities import pandas as pd from sklearn import tree from sklearn.preprocessing import LabelEncoder from sklearn.naive_bayes import GaussianNB # load data from CSV data = pd.read_csv('tennisdata.csv') print("THe first 5 values of data is :\n",data.head())# obtain Train data and Train outputX = data.iloc[:,:-1]print("\nThe First 5 values of train data is\n",X.head())
y = data.iloc[:,-1] print("\nThe first 5 values of Train output is\n",y.head())# Convert then in numbers le_outlook = LabelEncoder() X.Outlook = le_outlook.fit_transform(X.Outlook) le_Temperature = LabelEncoder() X.Temperature = le_Temperature.fit_transform(X.Temperature) le_Humidity = LabelEncoder() X.Humidity = le_Humidity.fit_transform(X.Humidity) le_Windy = LabelEncoder() X.Windy = le_Windy.fit_transform(X.Windy) print("\nNow the Train data is :\n",X.head())le_PlayTennis = LabelEncoder() y = le_PlayTennis.fit_transform(y) print("\nNow the Train output is\n",y)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20)
classifier = GaussianNB()
classifier.fit(X_train,y_train)
from sklearn.metrics import accuracy_score
print("Accuracy is:",accuracy_score(classifier.predict(X_test),y_test))
output:-
THe first 5 values of data is : Outlook Temperature Humidity Windy PlayTennis 0 Sunny Hot High False No 1 Sunny Hot High True No 2 Overcast Hot High False Yes 3 Rainy Mild High False Yes 4 Rainy Cool Normal False YesThe First 5 values of train data is Outlook Temperature Humidity Windy 0 Sunny Hot High False 1 Sunny Hot High True 2 Overcast Hot High False 3 Rainy Mild High False 4 Rainy Cool Normal FalseThe first 5 values of Train output is 0 No 1 No 2 Yes 3 Yes 4 Yes Name: PlayTennis, dtype: objectNow the Train data is : Outlook Temperature Humidity Windy 0 2 1 0 0 1 2 1 0 1 2 0 1 0 0 3 1 2 0 0 4 1 0 1 0Now the Train output is [0 0 1 1 1 0 1 0 1 1 1 1 1 0]Accuracy is: 0.6666666666666666
Comments
Post a Comment