forked from Wei-1/Scala-Machine-Learning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHDBSCAN.scala
110 lines (102 loc) · 3.79 KB
/
HDBSCAN.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
// Wei Chen - HDBSCAN
// 2016-11-12
package com.scalaml.algorithm
import com.scalaml.general.MatrixFunc._
// val data = Array(Array(1.0, 2.0), Array(1.0, 1.0), Array(0.8, 1.0),
// Array(2.0, 3.0), Array(1.1, 1.1), Array(2.0, 2.2), Array(6.0, 5.0),
// Array(6.0, 7.0), Array(6.0, 6.6), Array(6.0, 6.1), Array(6.0, 6.2))
// val hdbscan = new HDBSCAN()
// hdbscan.config(Map("limit" -> 2, "k" -> 2))
// hdbscan.cluster(data)
class HDBSCAN() extends Clustering {
val algoname: String = "HDBSCAN"
val version: String = "0.1"
var splittree = Array[(Int, Int)]()
var treecheck = Map[Int, Int]()
var k = 2
var limit = 2
def distArr(a1: Array[Double], a2: Array[Double]): Double =
Math.sqrt(arrayminussquare(a1, a2).sum)
def cascade(p: Int, c: Int) {
splittree.map{ l =>
if (l._1 == p && treecheck(l._2) < 0) {
treecheck += (l._2 -> c)
cascade(l._2, c)
} else if (l._2 == p && treecheck(l._1) < 0) {
treecheck += (l._1 -> c)
cascade(l._1, c)
}
}
}
override def clear(): Boolean = {
splittree = Array[(Int, Int)]()
treecheck = Map[Int, Int]()
k = 2
limit = 2
true
}
override def config(paras: Map[String, Any]): Boolean = try {
k = paras.getOrElse("K", paras.getOrElse("k", 2)).asInstanceOf[Int]
limit = paras.getOrElse("LIMIT", paras.getOrElse("limit", 2)).asInstanceOf[Int]
true
} catch { case e: Exception =>
Console.err.println(e)
false
}
// --- HDBSCAN ---
override def cluster( // DBSCAN
data: Array[Array[Double]] // Data Array(xi)
): Array[Int] = {
val n = data.size;
val m = data.head.size;
var distMatrix3 = new Array[Double](n*(n-1)/2)
def setM3(x: Int, y: Int, v: Double) {
if (x < y) distMatrix3(x*n + y - ((Math.pow(x, 2) + 3*x)/2).toInt - 1) = v
else if (y < x) distMatrix3(y*n + x - ((Math.pow(y, 2) + 3*y)/2).toInt - 1) = v
}
def getM3xy(x: Int, y: Int): Double = {
if (x < y) return distMatrix3(x*n + y - ((Math.pow(x, 2) + 3*x)/2).toInt - 1)
else if (y < x) return distMatrix3(y*n + x - ((Math.pow(y, 2) + 3*y)/2).toInt - 1)
else return 0.0
}
def getM3x(x: Int): Array[Double] = (for (i <- 0 until n) yield getM3xy(i, x)).toArray
for (i <- 0 to n-2) {
for (j <- i+1 until n) {
setM3(i, j, distArr(data(i), data(j)))
}
}
for (i <- 0 until n) setM3(i, i, getM3x(i).sortBy(l => l).lift(limit).get)
for (i <- 0 to n-2) {
for (j <- i+1 until n) {
setM3(i, j, Math.max(Math.max(getM3xy(i, i), getM3xy(j, j)), getM3xy(i, j)))
}
}
var undone = Map(0 -> 0.0)
undone ++= (1 until n).map((_, Double.MaxValue))
var tree = Map[Int, (Int, Double)]()
while (!undone.isEmpty) {
val node = undone.minBy(_._2)._1
undone -= node
for (i <- 0 until n) {
if (i != node) {
val v = getM3xy(node, i)
if (undone.contains(i) && v < undone(i)) {
undone += (i -> v)
tree += (i -> (node, v))
}
}
}
}
splittree = tree.toArray.sortBy(_._2._2).dropRight(k - 1).map(l => (l._1, l._2._1))
treecheck = (0 until n).map((_, -1)).toMap
var c = 1
for (i <- 0 until n) {
if (treecheck(i) < 0) {
treecheck += (i -> c)
cascade(i, c)
c += 1
}
}
return treecheck.toArray.sortBy(_._1).map(_._2)
}
}