-
Notifications
You must be signed in to change notification settings - Fork 0
/
naive-bayes.cc
100 lines (84 loc) · 2.82 KB
/
naive-bayes.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#include "naive-bayes.h"
#include <algorithm>
namespace ml {
namespace {
// Assumes that y is a vector with elements 0 .. n.
int calculate_num_classes(const Ref<const IVector> &y) {
int y_max = 0;
for (Index i = 0; i < y.rows(); ++i) {
y_max = std::max(y_max, y(i));
}
return y_max + 1;
}
}
MultinomialNaiveBayes::MultinomialNaiveBayes(double alpha)
: _is_fitted(false), _alpha(alpha) {}
void MultinomialNaiveBayes::fit(const Ref<const IMatrix> &X,
const Ref<const IVector> &y) {
assert(X.rows() == y.rows());
_is_fitted = true;
auto m = X.rows();
const int num_classes = calculate_num_classes(y);
const int num_features = X.cols();
assert(num_classes > 1);
_theta_y = Vector::Zero(num_classes);
for (Index i = 0; i < m; ++i) {
_theta_y(y(i)) += 1;
}
// _theta_y(i) = # samples of class i.
_theta_xy = Matrix::Zero(num_classes, num_features);
for (Index i = 0; i < m; ++i) {
const int which_class = y(i);
for (Index feature = 0; feature < num_features; ++feature) {
if (X(i, feature) > 0) {
_theta_xy(which_class, feature) += 1;
}
}
}
// _theta_xy(i)(j) = # samples with class i and x_j > 1.
// normalize each (class, feature) -> conditional probability
for (Index which_class = 0; which_class < num_classes; ++which_class) {
for (Index feature = 0; feature < num_features; ++feature) {
// Laplace smoothening
_theta_xy(which_class, feature) =
(_theta_xy(which_class, feature) + _alpha) /
(_theta_y(which_class) + _alpha * num_features);
}
}
// normalize each class -> probability
_theta_y /= m;
}
Matrix MultinomialNaiveBayes::predict(const Ref<const IMatrix> &X) const {
assert(_is_fitted);
const int num_classes = _theta_y.rows();
const int num_features = _theta_xy.cols();
Matrix output(X.rows(), num_classes);
for (Index row = 0; row < X.rows(); ++row) {
double denominator = 0.0;
for (Index which_class = 0; which_class < num_classes; ++which_class) {
double product = 1.0;
for (Index feature = 0; feature < num_features; ++feature) {
if (X(row, feature) > 0) {
product *= _theta_xy(which_class, feature);
} else {
product *= (1.0 - _theta_xy(which_class, feature));
}
}
denominator += product * _theta_y(which_class);
}
for (Index which_class = 0; which_class < num_classes; ++which_class) {
double p_x_given_y = 1.0;
for (Index feature = 0; feature < num_features; ++feature) {
if (X(row, feature) > 0) {
p_x_given_y *= _theta_xy(which_class, feature);
} else {
p_x_given_y *= (1.0 - _theta_xy(which_class, feature));
}
}
output(row, which_class) =
p_x_given_y * _theta_y(which_class) / denominator;
}
}
return output;
}
}