-
Notifications
You must be signed in to change notification settings - Fork 71
/
class_Classifier.ahk
126 lines (109 loc) · 4.03 KB
/
class_Classifier.ahk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#NoEnv
class Classifier
{
__New()
{
this.Features := Object() ;counts of categories assigned to features
this.Items := Object() ;counts of item categories
}
Sanitize(Data)
{
Data := RegExReplace(Data,"S)[^\w]"," ") ;remove anything that is not a word
Data := RegExReplace(Data,"S)\b(?:0x)?\d+\b") ;remove pure numbers
While, InStr(Data," ") ;collapse spaces
StringReplace, Data, Data, %A_Space%%A_Space%, %A_Space%, All
Data := Trim(Data) ;trim leading and trailing whitespace
Result := []
Loop, Parse, Data, %A_Space%
Result.Insert(A_LoopField)
Return, Result
}
Train(Item,Category)
{
For Index, Feature In this.Sanitize(Item)
{
;update the feature category counts
If !ObjHasKey(this.Features,Feature)
this.Features[Feature] := Object()
If !ObjHasKey(this.Features[Feature],Category)
this.Features[Feature][Category] := 0
this.Features[Feature][Category] ++
}
;update the item category counts
If !ObjHasKey(this.Items,Category)
this.Items[Category] := 0
this.Items[Category] ++
}
Classify(Item)
{
Features := this.Sanitize(Item)
;determine the probabilities of each item
Result := []
For Category In this.Items
{
Probability := this.FeaturesCategoryProbability(Features,Category)
Entry := Object()
Entry.Category := Category
Entry.Probability := Probability
Result.Insert(Entry)
}
;sort categories by probability, descending
MaxIndex := ObjMaxIndex(Result), (MaxIndex = "") ? (MaxIndex := 0) : ""
If MaxIndex < 2
Return, Result
Loop, % MaxIndex - 1
{
Index := A_Index
While, Index > 0 && Result[Index].Probability < Result[Index + 1].Probability
Value := Result[Index + 1], Result[Index + 1] := Result[Index], Result[Index] := Value, Index --
}
Return, Result
}
FeaturesCategoryProbability(Features,Category)
{
;determine the probability of the category given features
Probability := 0
For Index, Feature In Features
Probability += Ln(this.WeightedProbability(Feature,Category))
Probability := Exp(Probability)
;determine the fit of the probability to an inverse chi squared distribution
Term := Ln(Probability)
Sum := Probability
Value := -Term
Loop, % ObjMaxIndex(Features) - 1
{
Term += Ln(Value / A_Index)
Sum += Exp(Term)
}
Return, Sum
}
WeightedProbability(Feature,Category)
{
AssumedProbability = 0.5 ;the probability a feature is assumed to have if not previously encountered
AssumedProbabilityWeight = 1.0 ;weight of the assumed probability, as a measure of the number of occurances
If !ObjHasKey(this.Features,Feature)
Return, AssumedProbability
Probability := this.Probability(Feature,Category)
Totals := 0
For Category In this.Items
{
If ObjHasKey(this.Features[Feature],Category)
Totals += this.Features[Feature][Category]
}
WeightedProbability := ((AssumedProbability * AssumedProbabilityWeight) + (Totals * Probability)) / (AssumedProbabilityWeight + Totals)
Return, WeightedProbability
}
Probability(Feature,Category)
{
If !ObjHasKey(this.Features[Feature],Category)
Return, 0
FeatureCategoryProbability := this.Features[Feature][Category] / this.Items[Category]
FeatureTotalProbability := 0
For Category, Count In this.Items
{
If ObjHasKey(this.Features[Feature],Category)
FeatureTotalProbability += this.Features[Feature][Category] / Count
}
Return, FeatureCategoryProbability / FeatureTotalProbability
}
}