Skip to content

Commit

Permalink
Updating comments.
Browse files Browse the repository at this point in the history
Remove r[i]
  • Loading branch information
arjunsk committed Oct 19, 2023
1 parent ea972f4 commit 59b825b
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 7 deletions.
15 changes: 9 additions & 6 deletions clusterer/elkan.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ type KmeansElkan struct {
assignments []int // maps vector index to cluster index
lowerBounds [][]float64 // distances for vector and all clusters centroids
upperBounds []float64 // distance between each point and its assigned cluster centroid ie d(x, c(x))
r []bool // indicates that upper bound needs to be recalculated

// local state
vectors [][]float64 // input vectors
Expand Down Expand Up @@ -48,7 +47,6 @@ func NewKmeansElkan(vectors [][]float64, clusterCnt int,
initializer: init,
vectors: vectors,
clusterCnt: clusterCnt,
r: make([]bool, n),
assignments: make([]int, n),
upperBounds: make([]float64, n),
}
Expand Down Expand Up @@ -76,7 +74,11 @@ func (el *KmeansElkan) Cluster() (containers.Clusters, error) {
return clusters, nil
}

// kmeansElkan Complexity := closer to O(n); n = number of vectors
// kmeansElkan
// During each iteration of the algorithm, the lower bounds l(x, c) are updated for all points x and centers
// c. These updates take O(nk) time, so the complexity of the algorithm remains at least O(nke), even
// though the number of distance calculations is roughly O(n) only.
// Ref:https://www.cse.iitd.ac.in/~rjaiswal/2015/col870/Project/Nipun.pdf
func (el *KmeansElkan) kmeansElkan(clusters containers.Clusters) error {
for i := 0; ; i++ {
movement := 0
Expand Down Expand Up @@ -184,6 +186,8 @@ func (el *KmeansElkan) assignData(centroidDistances [][]float64,
continue
}

r := true //indicates that upper bound needs to be recalculated

// step 3.
// For all remaining points x and centers c such that
// (i) c != c(x) and
Expand All @@ -205,14 +209,14 @@ func (el *KmeansElkan) assignData(centroidDistances [][]float64,

//step 3.a
// If r(x) then compute d(x, c(x)) and assign r(x)= false. Otherwise, d(x, c(x))=u(x).
if el.r[x] {
if r {
distance, err := el.distFn(vectors[x], clusters[meanIndex].Center())
if err != nil {
return 0, err
}
el.upperBounds[x] = distance
el.lowerBounds[x][meanIndex] = distance
el.r[x] = false
r = false
}

//step 3.b
Expand Down Expand Up @@ -257,7 +261,6 @@ func (el *KmeansElkan) updateBounds(moveDistances []float64, data [][]float64) {
// u(x)=u(x)+d(m(c(x)), c(x))
// r(x)= true
el.upperBounds[x] += moveDistances[el.assignments[x]]
el.r[x] = true
}
}

Expand Down
6 changes: 5 additions & 1 deletion initializer/kmeans_plus_plus.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ func NewKmeansPlusPlusInitializer(distFn containers.DistanceFunction) Initialize
// InitCentroids initializes the centroids using kmeans++ algorithm
// Ref: https://www.youtube.com/watch?v=HatwtJSsj5Q
// Ref (animation):https://www.youtube.com/watch?v=efKGmOH4Y_A
// Complexity: O(n*k); n = number of vectors, k = number of clusters
// Complexity: O(k*n*k); n = number of vectors, k = number of clusters
// Reason: For k-1 times, compute the distance of each vector to its nearest center O(nk)
func (kpp *KmeansPlusPlus) InitCentroids(vectors [][]float64, clusterCnt int) (clusters containers.Clusters, err error) {
err = validateArgs(vectors, clusterCnt)
if err != nil {
Expand All @@ -36,15 +37,18 @@ func (kpp *KmeansPlusPlus) InitCentroids(vectors [][]float64, clusterCnt int) (c
randIdx := rand.Intn(len(vectors))
clusters[0] = containers.NewCluster(vectors[randIdx])

// O(k-1)
for i := 1; i < clusterCnt; i++ {
// NOTE: Since Nearest function is called on clusters-1, parallel handling
// can cause bugs, since all the clusters are not initialized.
distances := make([]float64, len(vectors))
sum := 0.0
minDistance := 0.0
// 2. for each data point, compute the distance to the existing centers
// O(n)
for vecIdx, vec := range vectors {

// O(k)
_, minDistance, err = clusters[:i].Nearest(vec, kpp.DistFn)
if err != nil {
return nil, err
Expand Down

0 comments on commit 59b825b

Please sign in to comment.