-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBernoulliNaiveClassifier.py
63 lines (59 loc) · 2.59 KB
/
BernoulliNaiveClassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import numpy as np
# Creating a class for bernoulli naive bayse classifier
class BernoulliNaiveBayse():
def __init__(self):
print('Bernoulli Naive Bayse Classifier')
# function for training of bernoulli naive bayse classification and predicting output of the test data
def BNB_Classifier(self, X_train, Y_train, X_test):
Smooth = 1
NumFeat = X_train[0].size # number of features in the given train data
UM=[]
ClassPr = []
FeatPr = []
LenM = Y_train.size # length of measurment vector, Y
UM.append(np.unique(Y_train))
LenF = X_train[0].size # length of feature vector, X
NumClass = int(UM[0].size) # number of classes in the given train data
Cfeats = {} # defining a dictionary for count of each feature
Cclass = {} # defining a dictionary for count of each class
# constructing count of each features matrix then obtaining class probability and features probability
for ind in range(LenM):
if Y_train[ind] not in Cfeats:
Cfeats[Y_train[ind]] = [0 for j in range (LenF)]
for ind in range(LenM):
for con in range(LenF):
Cfeats[Y_train[ind]][con] += X_train[ind][con]
for ind in range(LenM):
if Y_train[ind] in Cclass:
Cclass[Y_train[ind]] += 1
else:
Cclass[Y_train[ind]] = 1
for CN in Cfeats:
YtrainSize=int(LenM)
temp = np.array([])
ClassPr.append(float((float((Cclass[CN] + Smooth)))/(float((YtrainSize + (NumClass * Smooth))))))
for i in range(LenF):
temp = np.append(temp , float(((Cfeats[CN][i] + Smooth))/float((Cclass[CN]+(2*Smooth)))))
FeatPr.append(temp)
PC = ClassPr
PF = FeatPr
NC = NumClass
NF = NumFeat
Output = np.array([])
for i in range(X_test.shape[0]):
ClassID = 0 # ID of Class
prob_max = -10**10 # maximum probability
prob = 0 # probability of each working feature
for CN in range(NC):
prob = np.log(PC[CN])
for j in range(NF):
Curent_Class_ID = X_test[i][j]
if(Curent_Class_ID == 0):
prob += np.log(1-PF[CN][j])
else:
prob += np.log(PF[CN][j])
if(prob > prob_max):
prob_max = prob
ClassID = CN
Output = np.append(Output , ClassID)
return Output