-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNaiveBayes.php
86 lines (74 loc) · 2.8 KB
/
NaiveBayes.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
<?php
/**
* Created By ${pROJECT_NAME}.
* User: pfinal
* Date: 2019/8/14
* Time: 下午4:46
* ----------------------------------------
*
*/
// 标签类 用来定义 需要识别的 类
class Type
{
const GOOD = '好的';
const BAD = '坏的';
}
//朴素贝叶斯算法是基于一个训练集合工作的,根据这个训练集从而做出相应的预测。
class Classifier
{
private $types = [Type::GOOD, Type::BAD];
private $words = [Type::GOOD => [], Type::BAD => []];
private $documents = [Type::GOOD => 0, Type::BAD => 0]; // 好坏各为0
public function guess($statement)
{
$words = $this->getWords($statement); // 获得单词
$best_likelihood = 0;
$best_type = null;
foreach ($this->types as $type) {
$likelihood = $this->pTotal($type); // calculate P(Type)\
foreach ($words as $word) {
$likelihood *= $this->p($word, $type); // calculate P(word, Type)
}
if ($likelihood > $best_likelihood) {
$best_likelihood = $likelihood;
$best_type = $type;
}
}
return $best_type;
}
public function getWords($string)
{
// 这里应该用中文分词
return preg_split('/\s+/', preg_replace('/[^A-Za-z0-9\s]/', '', strtolower($string)));
}
public function pTotal($type)
{
return ($this->documents[$type] + 1) / (array_sum($this->documents) + 1);
}
public function p($word, $type)
{
$count = 0;
if (isset($this->words[$type][$word])) {
$count = $this->words[$type][$word];
}
return ($count + 1) / (array_sum($this->words[$type]) + 1);
}
public function learn($statement, $type)
{
$words = $this->getWords($statement);
foreach ($words as $word) {
if (!isset($this->words[$type][$word])) {
$this->words[$type][$word] = 0;
}
$this->words[$type][$word]++; // increment the word count for the type
}
$this->documents[$type]++; // increment the document count for the type
}
}
$classifier = new Classifier();
$classifier->learn('Symfony is the best', Type::GOOD);
$classifier->learn('PhpStorm is great', Type::GOOD);
$classifier->learn('Iltar complains a lot', Type::BAD);
$classifier->learn('No Symfony is bad', Type::BAD);
var_dump($classifier->guess('Symfony is great')); // string(8) "positive"
var_dump($classifier->guess('I complain a lot')); // string(8) "negative"