forked from Wei-1/Scala-Machine-Learning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEMCluster.scala
70 lines (65 loc) · 2.45 KB
/
EMCluster.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
// Wei Chen - K-Mean
// 2016-11-07
package com.scalaml.algorithm
import com.scalaml.general.MatrixFunc._
class EMCluster() extends Clustering {
val algoname: String = "EMCluster"
val version: String = "0.1"
var gaussians = Array[((Int, Array[Double], Array[Array[Double]]), Int)]()
var groupdata = Array[(Array[Double], Int)]()
var k = 2
var iter = 100
override def clear(): Boolean = {
gaussians = Array[((Int, Array[Double], Array[Array[Double]]), Int)]()
groupdata = Array[(Array[Double], Int)]()
k = 2
iter = 100
true
}
override def config(paras: Map[String, Any]): Boolean = try {
k = paras.getOrElse("K", paras.getOrElse("k", 2)).asInstanceOf[Int]
iter = paras.getOrElse("ITERATION", paras.getOrElse("iteration", paras.getOrElse("iter", 100))).asInstanceOf[Int]
true
} catch { case e: Exception =>
Console.err.println(e)
false
}
// --- Start K-Mean Function ---
override def cluster( // K Mean
data: Array[Array[Double]] // Data Array(xi)
): Array[Int] = { // Return gaussians
gaussians = data.zipWithIndex
.groupBy { l => l._2 % k + 1 }
.map { l =>
val c = l._2.size
val tempdata = l._2.map(_._1)
val m = matrixaccumulate(tempdata).map(_/c)
val s = covariance(tempdata)
((c, m, s), l._1)
}.toArray
var i = 0
while (i < iter) {
groupdata = data.map { d =>
(d, gaussians.map { c =>
(c._2, c._1._1 * gaussianprobability(d, c._1._2, c._1._3))
}.maxBy(_._2)._1)
}
val tempgaussians = groupdata.groupBy(_._2).toArray
.sortBy(_._1).map { l =>
val c = l._2.size
val tempdata = l._2.map(_._1)
val m = matrixaccumulate(tempdata).map(_/c)
val s = covariance(tempdata)
((c, m, s), l._1)
}
if (gaussians.size == tempgaussians.size &&
gaussians.zip(tempgaussians).map { l =>
if (l._1._2 == l._2._2) arrayequal(l._1._1._2, l._2._1._2)
else false
}.reduce(_ & _)) i = iter
else gaussians = tempgaussians
}
// println(groupdata.map(_._2).mkString(","))
return groupdata.map(_._2)
}
}