B.tech 5th Sem Machin learning lab practical

 Program 1:   Implement and demonstrate the FIND-S algorithm for finding the most specific hypothesis  based on a  given set of training data samples. Read the training data from a .CSV file.

import random

import csv

import pandas as pd

att= [['Sunny','Rainy'],







print("attribute length=",attribute)

print("\n Most general hypothesis:['?','?','?','?,'?','?']")

print("\n Most specific hypothesis:['0','0','0','0','0','0']")


print("\n Given training data set")

with open('C:\\Users\pc1\\Desktop\\machine\\ws.csv') as file:


    for r in reader:



print("\n The initial value of hypothesis: ")

hypothesis = ['0'] * attribute


# Comparing with First Training Example 

for j in range(0,attribute):

        hypothesis[j] = a[0][j];


# Comparing with Remaining Training Examples of Given Data Set

print("\n Find S: Finding a Maximally Specific Hypothesis\n")


print("\n length of a=",b)

for i in range(0,len(a)):

    if a[i][attribute]=='Yes':

            for j in range(0,attribute):

                if a[i][j]!=hypothesis[j]:


                else :

                    hypothesis[j]= a[i][j] 

    print(" For Training Example No :{0} the hypothesis is ".format(i),hypothesis)


print("\n The Maximally Specific Hypothesis for a given Training Examples :\n")


attribute length= 6
 Most general hypothesis:['?','?','?','?,'?','?']

 Most specific hypothesis:['0','0','0','0','0','0']

 Given training data set
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes']
['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Yes']
['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'No']
['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'Yes']

 The initial value of hypothesis: 
['0', '0', '0', '0', '0', '0']


Find S: Finding a Maximally Specific Hypothesis

 length of a= 4
 For Training Example No :0 the hypothesis is  ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']
 For Training Example No :1 the hypothesis is  ['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
 For Training Example No :2 the hypothesis is  ['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
 For Training Example No :3 the hypothesis is  ['Sunny', 'Warm', '?', 'Strong', '?', '?']

 The Maximally Specific Hypothesis for a given Training Examples :

Program 2: For a given set of training data examples stored in a .CSV file, implement and demonstrate the Candidate-Elimination algorithm to output a description on of the set of all hypotheses consistent with the training examples.

import numpy as np import pandas as pd
# Loading Data from a CSV File d=pd.read_csv(r"C:\Users\pc1\Desktop\machine\trainingdata.csv") data = pd.DataFrame(d) print(d)
# Separating concept features from Target concepts = np.array(data.iloc[:,0:-1]) print(concepts)
# Isolating target into a separate DataFrame # copying last column to target array target = np.array(data.iloc[:,-1]) print(target)
def learn(concepts, target): ''' learn() function implements the learning method of the Candidate elimination algorithm. Arguments: concepts - a data frame with all the features target - a data frame with corresponding output values ''' # Initialise S0 with the first instance from concepts # .copy() makes sure a new list is created instead of just pointing to the same memory location specific_h = concepts[0].copy() print("\nInitialization of specific_h and general_h") print(specific_h) #h=["#" for i in range(0,5)] #print(h) general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))] print(general_h) # The learning iterations for i, h in enumerate(concepts): # Checking if the hypothesis has a positive target if target[i] == "Yes": for x in range(len(specific_h)): # Change values in S & G only if values change if h[x] != specific_h[x]: specific_h[x] = '?' general_h[x][x] = '?' # Checking if the hypothesis has a positive target if target[i] == "No": for x in range(len(specific_h)): # For negative hyposthesis change values only in G if h[x] != specific_h[x]: general_h[x][x] = specific_h[x] else: general_h[x][x] = '?' print("\nSteps of Candidate Elimination Algorithm",i+1) print(specific_h) print(general_h) # find indices where we have empty rows, meaning those that are unchanged indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']] for i in indices: # remove those rows from general_h general_h.remove(['?', '?', '?', '?', '?', '?']) # Return final values return specific_h, general_h
s_final, g_final = learn(concepts, target) print("\nFinal Specific_h:", s_final, sep="\n") print("\nFinal General_h:", g_final, sep="\n")


     sky airTemp humidity    wind water forecast enjoySport
0  Sunny    Warm   Normal  Strong  Warm     Same        Yes
1  Sunny    Warm     High  Strong  Warm     Same        Yes
2  Rainy    Cold     High  Strong  Warm   Change         No
3  Sunny    Warm     High  Strong  Cool   Change        Yes

[['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
 ['Sunny' 'Warm' 'High' 'Strong' 'Warm' 'Same']
 ['Rainy' 'Cold' 'High' 'Strong' 'Warm' 'Change']
 ['Sunny' 'Warm' 'High' 'Strong' 'Cool' 'Change']]

['Yes' 'Yes' 'No' 'Yes']

Initialization of specific_h and general_h
['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Steps of Candidate Elimination Algorithm 1
['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Steps of Candidate Elimination Algorithm 2
['Sunny' 'Warm' '?' 'Strong' 'Warm' 'Same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Steps of Candidate Elimination Algorithm 3
['Sunny' 'Warm' '?' 'Strong' 'Warm' 'Same']
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', 'Same']]

Steps of Candidate Elimination Algorithm 4
['Sunny' 'Warm' '?' 'Strong' '?' '?']
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Final Specific_h:
['Sunny' 'Warm' '?' 'Strong' '?' '?']

Final General_h:
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']]

Program-3. Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use an  appropriate data set for building the decision tree and apply this

knowledge to classify a new sample.

import numpy as np import math import csv
def read_data(filename): with open(filename, 'r') as csvfile: datareader = csv.reader(csvfile, delimiter=',') headers = next(datareader) metadata = [] traindata = [] for name in headers: metadata.append(name) for row in datareader: traindata.append(row) return (metadata, traindata)
class Node: def __init__(self, attribute): self.attribute = attribute self.children = [] self.answer = "" def __str__(self): return self.attribute
def subtables(data, col, delete): dict = {} items = np.unique(data[:, col]) count = np.zeros((items.shape[0], 1), dtype=np.int32) for x in range(items.shape[0]): for y in range(data.shape[0]): if data[y, col] == items[x]: count[x] += 1 for x in range(items.shape[0]): dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32") pos = 0 for y in range(data.shape[0]): if data[y, col] == items[x]: dict[items[x]][pos] = data[y] pos += 1 if delete: dict[items[x]] = np.delete(dict[items[x]], col, 1) return items, dict
def entropy(S): items = np.unique(S) if items.size == 1: return 0 counts = np.zeros((items.shape[0], 1)) sums = 0 for x in range(items.shape[0]): counts[x] = sum(S == items[x]) / (S.size * 1.0) for count in counts: sums += -1 * count * math.log(count, 2) return sums
def gain_ratio(data, col): items, dict = subtables(data, col, delete=False) total_size = data.shape[0] entropies = np.zeros((items.shape[0], 1)) intrinsic = np.zeros((items.shape[0], 1)) for x in range(items.shape[0]): ratio = dict[items[x]].shape[0]/(total_size * 1.0) entropies[x] = ratio * entropy(dict[items[x]][:, -1]) intrinsic[x] = ratio * math.log(ratio, 2) total_entropy = entropy(data[:, -1]) iv = -1 * sum(intrinsic) for x in range(entropies.shape[0]): total_entropy -= entropies[x] return total_entropy / iv
def create_node(data, metadata): if (np.unique(data[:, -1])).shape[0] == 1: node = Node("") node.answer = np.unique(data[:, -1])[0] return node gains = np.zeros((data.shape[1] - 1, 1)) for col in range(data.shape[1] - 1): gains[col] = gain_ratio(data, col) split = np.argmax(gains) node = Node(metadata[split]) metadata = np.delete(metadata, split, 0) items, dict = subtables(data, split, delete=True) for x in range(items.shape[0]): child = create_node(dict[items[x]], metadata) node.children.append((items[x], child)) return node
def empty(size): s = "" for x in range(size): s += " " return s def print_tree(node, level): if node.answer != "": print(empty(level), node.answer) return print(empty(level), node.attribute) for value, n in node.children: print(empty(level + 1), value) print_tree(n, level + 2)
metadata, traindata = read_data(r"C:\Users\pc1\Desktop\machine\tennisdata.csv") data = np.array(traindata) node = create_node(data, metadata) print_tree(node, 0)


PROGRAM NO.4  Build an Artificial Neural Network by implementing the Backpropagation algorithm and test the same  using appropriate data sets.

import numpy as np

X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)     # X = (hours sleeping, hours studying)
y = np.array(([92], [86], [89]), dtype=float)           # y = score on test

print("array list of x")
print("\n array list of y")
# scale units d=np.amax(X, axis=0) print("maximum x-axis of array=",d) X = X/np.amax(X, axis=0) # maximum of X array y = y/100 # max test score is 100 print("maximum of X array","\n",X) print(" max test score is \n ", y)
class Neural_Network(object): def __init__(self): # Parameters self.inputSize = 2 self.outputSize = 1 self.hiddenSize = 3 # Weights self.W1 = np.random.randn(self.inputSize, self.hiddenSize) # (3x2) weight matrix from input to hidden layer self.W2 = np.random.randn(self.hiddenSize, self.outputSize) # (3x1) weight matrix from hidden to output layer def forward(self, X): #forward propagation through our network self.z = np.dot(X, self.W1) # dot product of X (input) and first set of 3x2 weights self.z2 = self.sigmoid(self.z) # activation function self.z3 = np.dot(self.z2, self.W2) # dot product of hidden layer (z2) and second set of 3x1 weights o = self.sigmoid(self.z3) # final activation function return o def sigmoid(self, s): return 1/(1+np.exp(-s)) # activation function def sigmoidPrime(self, s): return s * (1 - s) # derivative of sigmoid def backward(self, X, y, o): # backward propgate through the network self.o_error = y - o # error in output self.o_delta = self.o_error*self.sigmoidPrime(o) # applying derivative of sigmoid to self.z2_error = self.o_delta.dot(self.W2.T) # z2 error: how much our hidden layer weights contributed to output error self.z2_delta = self.z2_error*self.sigmoidPrime(self.z2) # applying derivative of sigmoid to z2 error self.W1 += X.T.dot(self.z2_delta) # adjusting first set (input --> hidden) weights self.W2 += self.z2.T.dot(self.o_delta) # adjusting second set (hidden --> output) weights def train (self, X, y): o = self.forward(X) self.backward(X, y, o)
NN = Neural_Network() for i in range(2): # trains the NN 1,000 times print ("\nInput: \n" + str(X)) print ("\nActual Output: \n" + str(y)) print ("\nPredicted Output: \n" + str(NN.forward(X))) print ("\nLoss: \n" + str(np.mean(np.square(y - NN.forward(X))))) # mean sum squared loss) NN.train(X, y)


[[0.66666667 1.        ]
 [0.33333333 0.55555556]
 [1.         0.66666667]]

Actual Output: 

Predicted Output: 


[[0.66666667 1.        ]
 [0.33333333 0.55555556]
 [1.         0.66666667]]

Actual Output: 

Predicted Output: 


PROGRAM NO. 5  Write a program to implement the naïve Bayesian classifier for a sample training data set stored as a  .CSV file. Compute the accuracy of the classifier, considering few test data sets.

# import necessary libarities
import pandas as pd
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB

# load data from CSV
data = pd.read_csv('tennisdata.csv')
print("THe first 5 values of data is :\n",data.head())# obtain Train data and Train output
X = data.iloc[:,:-1]
print("\nThe First 5 values of train data is\n",X.head())
y = data.iloc[:,-1]
print("\nThe first 5 values of Train output is\n",y.head())
# Convert then in numbers 
le_outlook = LabelEncoder()
X.Outlook = le_outlook.fit_transform(X.Outlook)

le_Temperature = LabelEncoder()
X.Temperature = le_Temperature.fit_transform(X.Temperature)

le_Humidity = LabelEncoder()
X.Humidity = le_Humidity.fit_transform(X.Humidity)

le_Windy = LabelEncoder()
X.Windy = le_Windy.fit_transform(X.Windy)

print("\nNow the Train data is :\n",X.head())
le_PlayTennis = LabelEncoder()
y = le_PlayTennis.fit_transform(y)
print("\nNow the Train output is\n",y)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20)

classifier = GaussianNB()


from sklearn.metrics import accuracy_score

print("Accuracy is:",accuracy_score(classifier.predict(X_test),y_test))

THe first 5 values of data is :
     Outlook Temperature Humidity  Windy PlayTennis
0     Sunny         Hot     High  False         No
1     Sunny         Hot     High   True         No
2  Overcast         Hot     High  False        Yes
3     Rainy        Mild     High  False        Yes
4     Rainy        Cool   Normal  False        Yes

The First 5 values of train data is
     Outlook Temperature Humidity  Windy
0     Sunny         Hot     High  False
1     Sunny         Hot     High   True
2  Overcast         Hot     High  False
3     Rainy        Mild     High  False
4     Rainy        Cool   Normal  False

The first 5 values of Train output is
 0     No
1     No
2    Yes
3    Yes
4    Yes
Name: PlayTennis, dtype: object

Now the Train data is :
    Outlook  Temperature  Humidity  Windy
0        2            1         0      0
1        2            1         0      1
2        0            1         0      0
3        1            2         0      0
4        1            0         1      0

Now the Train output is
 [0 0 1 1 1 0 1 0 1 1 1 1 1 0]

Accuracy is: 0.6666666666666666


