-
Notifications
You must be signed in to change notification settings - Fork 0
/
centroid.go
64 lines (57 loc) · 1.34 KB
/
centroid.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
package go_k_means
import (
"math/rand"
)
type RandomCentroidStrategy struct {
randomSeed int64
}
func (s RandomCentroidStrategy) Select(data []Datum, k int) []Datum {
rand.Seed(s.randomSeed)
visited := make([]bool, len(data))
centroids := make([]Datum, k)
for i := 0; i < k; {
index := rand.Intn(len(data))
if visited[index] {
continue
}
visited[index] = true
centroids[i] = data[index]
data[i].cluster = i
i++
}
return centroids
}
type KMeansPlusPlusCentroidStrategy struct {
randomSeed int64
}
// https://en.wikipedia.org/wiki/K-means%2B%2B
func (s KMeansPlusPlusCentroidStrategy) Select(data []Datum, k int) []Datum {
rand.Seed(s.randomSeed)
centroids := make([]Datum, k)
initialIndex := rand.Intn(len(data))
centroids[0] = data[initialIndex]
data[initialIndex].cluster = 0
distances := make([]float64, len(data))
for i := 1; i < k; i++ {
sum := 0.0
for j, datum := range data {
minDistance := 10e10
for l := range centroids[:i] {
distance := datum.Vector.Distance(centroids[l].Vector)
if distance < minDistance {
minDistance = distance
}
}
distances[j] = minDistance * minDistance
sum += distances[j]
}
nextIndex := rand.Float64() * sum
j := 0
for sum = distances[j]; sum < nextIndex; sum += distances[j] {
j++
}
centroids[i] = data[j]
data[j].cluster = i
}
return centroids
}