-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
140 lines (120 loc) · 3.37 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
package main
import (
"bufio"
"fmt"
"math/rand"
"os"
"strconv"
"strings"
"time"
)
// "github.com/hlts2/gohot"
// parameters
var (
testPercentage = 0.1 //presentasi data test
datafile = "data-cuaca.csv"
threshold = 0.1
//exampleif `threshold` is `1.5` this means the category with the highest probability
// needs to be 1.5 times higher than the second highest probability.
// If the top category fails the threshold we will classify it as `unknown`.
)
var categories = []string{"Hujan", "Berawan", "Cerah"}
// datasets
type document struct {
time string
class string
dmin float64
dmax float64
tmin float64
tmax float64
}
//mean numerical
var mtdmin []float64
var mtdmax []float64
var mttmin []float64
var mttmax []float64
//dipisahkan untuk training dan test
var train []document
var test []document
func main() {
setupData(datafile)
fmt.Println("Data file used:", datafile)
fmt.Println("no of docs in TRAIN dataset:", len(train))
fmt.Println("no of docs in TEST dataset:", len(test))
// buat classifier dengan parameter yang ada
c := createClassifier(categories, threshold)
// train on train dataset
for _, doc := range train {
c.Train(doc.class, doc.time, doc.dmin, doc.dmax, doc.tmin, doc.tmax)
}
// fmt.Println("hasil training", c.totalDocuments)
// validate on test dataset
count, accurates, unknowns := 0, 0, 0
for _, doc := range test {
count++
sentiment := c.Classify(doc.time, doc.dmin, doc.dmax, doc.tmin, doc.tmax)
if sentiment == doc.class {
accurates++
}
if sentiment == "unknown" {
unknowns++
}
}
fmt.Printf("Accuracy on TEST dataset is %2.1f%% with %2.1f%% unknowns",
float64(accurates)*100/float64(count), float64(unknowns)*100/float64(count))
// validate on the first 100 docs in the train dataset
count, accurates, unknowns = 0, 0, 0
for _, doc := range train[0:100] {
count++
sentiment := c.Classify(doc.time, doc.dmin, doc.dmax, doc.tmin, doc.tmax)
if sentiment == doc.class {
accurates++
}
if sentiment == "unknown" {
unknowns++
}
}
fmt.Printf("\nAccuracy on TRAIN dataset is %2.1f%% with %2.1f%% unknowns",
float64(accurates)*100/float64(count), float64(unknowns)*100/float64(count))
}
func setupData(file string) {
rand.Seed(time.Now().UTC().UnixNano())
data, err := readLines(file)
if err != nil {
fmt.Println("Cannot read file", err)
os.Exit(1)
}
for _, line := range data {
s := strings.Split(line, ",")
waktu, class, dens_min, dens_max, temp_min, temp_max := s[0], s[1], s[2], s[3], s[4], s[5]
dmin, _ := strconv.ParseFloat(dens_min, 64)
dmax, _ := strconv.ParseFloat(dens_max, 64)
tmin, _ := strconv.ParseFloat(temp_min, 64)
tmax, _ := strconv.ParseFloat(temp_max, 64)
//dibagi data train dan test
if rand.Float64() > testPercentage {
train = append(train, document{waktu, class, dmin, dmax, tmin, tmax})
//tapi ini belum tau classnya
mtdmin = append(mtdmin, dmin)
mtdmax = append(mtdmax, dmax)
mttmin = append(mttmin, tmin)
mttmax = append(mttmax, tmax)
} else {
test = append(test, document{waktu, class, dmin, dmax, tmin, tmax})
}
}
}
// read the file line by line
func readLines(path string) ([]string, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
return lines, scanner.Err()
}