VTU CSE LABS: July 2019

ML 1 - FIND-S ALGORITHM

1. IMPLEMENT AND DEMONSTRATE THE FIND-S ALGORITHM FOR FINDING THE MOST SPECIFIC HYPOTHESIS BASED ON A GIVEN SET OF TRAINING DATA SAMPLES. READ THE TRAINING DATA FROM A.CSV FILE.

SOLUTION 1 - To display only the final output

trainingdata.csv


Sunny	Warm	Normal	Strong	Warm	Same	Yes
Sunny	Warm	High	Strong	Warm	Same	Yes
Rainy	Cold	High	Strong	Warm	Change	No
Sunny	Warm	High	Strong	Cool	Change	Yes

prog1.py

import csv

h=['0'for i in range(6)]
with open("trainingdata.csv") as f:
data=csv.reader(f)
data=list(data)

for i in data:
if i[-1]=="Yes":
for j in range(6):
if h[j]=='0':
h[j]=i[j]
elif h[j]!=i[j]:
h[j]='?'

print(h)

Output

['Sunny', 'Warm', '?', 'Strong', '?', '?']

OR

SOLUTION 1 - To display steps & final output

import csv

h=['0'for i in range(6)]
with open("trainingdata.csv") as f:
data=csv.reader(f)
data=list(data)

print("The +ve examples are:")
for i in data:
if i[-1]=="Yes":
print(i)

print("\nThe steps of Find-S Algo are:")
for i in data:
if i[-1]=="Yes":
for j in range(6):
if h[j]=='0':
h[j]=i[j]
elif h[j]!=i[j]:
h[j]='?'
print(h)

print("\nFinal specific hypothesis:\n",h)

Output

The +ve examples are:
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes']
['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Yes']
['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'Yes']

The steps of Find-S Algo are:
['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
['Sunny', 'Warm', '?', 'Strong', '?', 'Same']
['Sunny', 'Warm', '?', 'Strong', '?', '?']

Final specific hypothesis:
['Sunny', 'Warm', '?', 'Strong', '?', '?']

SOLUTION 2

trainingdata.csv


Sunny	Warm	Normal	Strong	Warm	Same	Yes
Sunny	Warm	High	Strong	Warm	Same	Yes
Rainy	Cold	High	Strong	Warm	Change	No
Sunny	Warm	High	Strong	Cool	Change	Yes

lab1.py

import csv
hypo = ['%','%','%','%','%','%'];

with open('trainingdata.csv') as csv_file:
readcsv = csv.reader(csv_file, delimiter=',')
print(readcsv)

data = []
print("\nThe given training examples are:")
for row in readcsv:
print(row)
if row[len(row)-1].upper() == "YES":
data.append(row)

print("\nThe positive examples are:");
for x in data:
print(x);
print("\n");

TotalExamples = len(data);
i=0;
j=0;
k=0;
print("The steps of the Find-s algorithm are :\n",hypo);
list = [];
p=0;
d=len(data[p])-1;
for j in range(d):
list.append(data[i][j]);
hypo=list;
i=1;
for i in range(TotalExamples):
for k in range(d):
if hypo[k]!=data[i][k]:
hypo[k]='?';
k=k+1;
else:
hypo[k];
print(hypo);
i=i+1;

print("\nThe maximally specific Find-s hypothesis for the given training examples is :");
list=[];
for i in range(d):
list.append(hypo[i]);
print(list);

STEPS & OUTPUT:

to view steps & output click HERE

ML 2 - CANDIDATE-ELIMINATION ALGORITHM

2. FOR A GIVEN SET OF TRAINING DATA EXAMPLES STORED IN A .CSV FILE, IMPLEMENT AND DEMONSTRATE THE CANDIDATE-ELIMINATION ALGORITHM TO OUTPUT A DESCRIPTION OF THE SET OF ALL HYPOTHESES CONSISTENT WITH THE TRAINING EXAMPLES.

SOLUTION 1

trainingdata.csv


Sunny	Warm	Normal	Strong	Warm	Same	Yes
Sunny	Warm	High	Strong	Warm	Same	Yes
Rainy	Cold	High	Strong	Warm	Change	No
Sunny	Warm	High	Strong	Cool	Change	Yes

prog2.py

import csv

with open("trainingdata.csv") as f:
csv_file=csv.reader(f)
data=list(csv_file)

s=data[1][:-1]
g=[['?' for i in range(len(s))] for j in range(len(s))]

for i in data:
if i[-1]=="Yes":
for j in range(len(s)):
if i[j]!=s[j]:
s[j]='?'
g[j][j]='?'

elif i[-1]=="No":
for j in range(len(s)):
if i[j]!=s[j]:
g[j][j]=s[j]
else:
g[j][j]="?"
print("\nSteps of Candidate Elimination Algorithm",data.index(i)+1)
print(s)
print(g)
gh=[]
for i in g:
for j in i:
if j!='?':
gh.append(i)
break
print("\nFinal specific hypothesis:\n",s)

print("\nFinal general hypothesis:\n",gh)

Output

Steps of Candidate Elimination Algorithm 1
['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Steps of Candidate Elimination Algorithm 2
['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Steps of Candidate Elimination Algorithm 3
['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', 'Same']]

Steps of Candidate Elimination Algorithm 4
['Sunny', 'Warm', '?', 'Strong', '?', '?']
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Final specific hypothesis:
['Sunny', 'Warm', '?', 'Strong', '?', '?']

Final general hypothesis:

[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']]

SOLUTION 2

trainingdata.csv

sky	airTemp	humidity	wind	water	forecast	enjoySport
Sunny	Warm	Normal	Strong	Warm	Same	Yes
Sunny	Warm	High	Strong	Warm	Same	Yes
Rainy	Cold	High	Strong	Warm	Change	No
Sunny	Warm	High	Strong	Cool	Change	Yes

lab2.py

import numpy as np
import pandas as pd

# Loading Data from a CSV File
data = pd.DataFrame(data=pd.read_csv('trainingdata.csv'))
print(data)

# Separating concept features from Target
concepts = np.array(data.iloc[:,0:-1])
print(concepts)

# Isolating target into a separate DataFrame
# copying last column to target array
target = np.array(data.iloc[:,-1])
print(target)

def learn(concepts, target):

'''
learn() function implements the learning method of the Candidate elimination algorithm.
Arguments:
concepts - a data frame with all the features
target - a data frame with corresponding output values
'''

# Initialise S0 with the first instance from concepts
# .copy() makes sure a new list is created instead of just pointing to the same memory location
specific_h = concepts[0].copy()
print("\nInitialization of specific_h and general_h")
print(specific_h)
#h=["#" for i in range(0,5)]
#print(h)

general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
print(general_h)
# The learning iterations
for i, h in enumerate(concepts):

# Checking if the hypothesis has a positive target
if target[i] == "Yes":
for x in range(len(specific_h)):

# Change values in S & G only if values change
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'

# Checking if the hypothesis has a positive target
if target[i] == "No":
for x in range(len(specific_h)):
# For negative hyposthesis change values only in G
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'

print("\nSteps of Candidate Elimination Algorithm",i+1)
print(specific_h)
print(general_h)

# find indices where we have empty rows, meaning those that are unchanged
indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]
for i in indices:
# remove those rows from general_h
general_h.remove(['?', '?', '?', '?', '?', '?'])
# Return final values
return specific_h, general_h

s_final, g_final = learn(concepts, target)
print("\nFinal Specific_h:", s_final, sep="\n")
print("\nFinal General_h:", g_final, sep="\n")

STEPS & OUTPUT:

to view steps & output click HERE

ML 3 - ID3 ALGORITHM

3. WRITE A PROGRAM TO DEMONSTRATE THE WORKING OF THE DECISION TREE BASED ID3 ALGORITHM. USE AN APPROPRIATE DATA SET FOR BUILDING THE DECISION TREE AND APPLY THIS KNOWLEDGE TO CLASSIFY A NEW SAMPLE.

SOLUTION 1 ( with packages) (given by Lokesh sir)

tennisdata.csv

Outlook	Temperature	Humidity	Windy	PlayTennis
Sunny	Hot	High	FALSE	No
Sunny	Hot	High	TRUE	No
Overcast	Hot	High	FALSE	Yes
Rainy	Mild	High	FALSE	Yes
Rainy	Cool	Normal	FALSE	Yes
Rainy	Cool	Normal	TRUE	No
Overcast	Cool	Normal	TRUE	Yes
Sunny	Mild	High	FALSE	No
Sunny	Cool	Normal	FALSE	Yes
Rainy	Mild	Normal	FALSE	Yes
Sunny	Mild	Normal	TRUE	Yes
Overcast	Mild	High	TRUE	Yes
Overcast	Hot	Normal	FALSE	Yes
Rainy	Mild	High	TRUE	No

lab3.py

import pandas as pd
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.externals.six import StringIO

data = pd.read_csv('tennisdata.csv')
print("The first 5 values of data is \n",data.head())

X = data.iloc[:,:-1]
print("\nThe first 5 values of Train data is \n",X.head())
y = data.iloc[:,-1]
print("\nThe first 5 values of Train output is \n",y.head())

le_outlook = LabelEncoder()
X.Outlook = le_outlook.fit_transform(X.Outlook)
le_Temperature = LabelEncoder()
X.Temperature = le_Temperature.fit_transform(X.Temperature)
le_Humidity = LabelEncoder()
X.Humidity = le_Humidity.fit_transform(X.Humidity)
le_Windy = LabelEncoder()
X.Windy = le_Windy.fit_transform(X.Windy)

print("\nNow the Train data is",X.head())

le_PlayTennis = LabelEncoder()
y = le_PlayTennis.fit_transform(y)
print("\nNow the Train data is\n",y)

classifier = DecisionTreeClassifier()
classifier.fit(X,y)

def labelEncoderForInput(list1):
list1[0] = le_outlook.transform([list1[0]])[0]
list1[1] = le_Temperature.transform([list1[1]])[0]
list1[2] = le_Humidity.transform([list1[2]])[0]
list1[3] = le_Windy.transform([list1[3]])[0]
return [list1]

inp = ["Rainy","Mild","High","False"]
inp1=["Rainy","Cool","High","False"]
pred1 = labelEncoderForInput(inp1)
y_pred = classifier.predict(pred1)
y_pred
print("\nfor input {0}, we obtain {1}".format(inp1, le_PlayTennis.inverse_transform(y_pred[0])))

STEPS & OUTPUT:

to view steps & output click HERE

SOLUTION 2 ( without packages)

tennisdata.csv

Outlook	Temperature	Humidity	Windy	PlayTennis
Sunny	Hot	High	FALSE	No
Sunny	Hot	High	TRUE	No
Overcast	Hot	High	FALSE	Yes
Rainy	Mild	High	FALSE	Yes
Rainy	Cool	Normal	FALSE	Yes
Rainy	Cool	Normal	TRUE	No
Overcast	Cool	Normal	TRUE	Yes
Sunny	Mild	High	FALSE	No
Sunny	Cool	Normal	FALSE	Yes
Rainy	Mild	Normal	FALSE	Yes
Sunny	Mild	Normal	TRUE	Yes
Overcast	Mild	High	TRUE	Yes
Overcast	Hot	Normal	FALSE	Yes
Rainy	Mild	High	TRUE	No

lab3.py

import numpy as np
import math
import csv

def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile, delimiter=',')
headers = next(datareader)
metadata = []
traindata = []
for name in headers:
metadata.append(name)
for row in datareader:
traindata.append(row)

return (metadata, traindata)

class Node:
def __init__(self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""

def __str__(self):
return self.attribute

def subtables(data, col, delete):
dict = {}
items = np.unique(data[:, col])
count = np.zeros((items.shape[0], 1), dtype=np.int32)

for x in range(items.shape[0]):
for y in range(data.shape[0]):
if data[y, col] == items[x]:
count[x] += 1

for x in range(items.shape[0]):
dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32")
pos = 0
for y in range(data.shape[0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1
if delete:
dict[items[x]] = np.delete(dict[items[x]], col, 1)

return items, dict

def entropy(S):
items = np.unique(S)

if items.size == 1:
return 0

counts = np.zeros((items.shape[0], 1))
sums = 0

for x in range(items.shape[0]):
counts[x] = sum(S == items[x]) / (S.size * 1.0)

for count in counts:
sums += -1 * count * math.log(count, 2)
return sums

def gain_ratio(data, col):
items, dict = subtables(data, col, delete=False)

total_size = data.shape[0]
entropies = np.zeros((items.shape[0], 1))
intrinsic = np.zeros((items.shape[0], 1))

for x in range(items.shape[0]):
ratio = dict[items[x]].shape[0]/(total_size * 1.0)
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * math.log(ratio, 2)

total_entropy = entropy(data[:, -1])
iv = -1 * sum(intrinsic)

for x in range(entropies.shape[0]):
total_entropy -= entropies[x]

return total_entropy / iv

def create_node(data, metadata):
if (np.unique(data[:, -1])).shape[0] == 1:
node = Node("")
node.answer = np.unique(data[:, -1])[0]
return node

gains = np.zeros((data.shape[1] - 1, 1))

for col in range(data.shape[1] - 1):
gains[col] = gain_ratio(data, col)

split = np.argmax(gains)

node = Node(metadata[split])
metadata = np.delete(metadata, split, 0)

items, dict = subtables(data, split, delete=True)

for x in range(items.shape[0]):
child = create_node(dict[items[x]], metadata)
node.children.append((items[x], child))

return node

def empty(size):
s = ""
for x in range(size):
s += " "
return s

def print_tree(node, level):
if node.answer != "":
print(empty(level), node.answer)
return
print(empty(level), node.attribute)
for value, n in node.children:
print(empty(level + 1), value)
print_tree(n, level + 2)

metadata, traindata = read_data("tennisdata.csv")
data = np.array(traindata)
node = create_node(data, metadata)
print_tree(node, 0)

STEPS & OUTPUT:

to view steps & output click HERE

VTU CSE LABS

LAB TABS

SUBJECT TABS

Wednesday, July 3, 2019

ML 1 - FIND-S ALGORITHM

ML 2 - CANDIDATE-ELIMINATION ALGORITHM

ML 3 - ID3 ALGORITHM