6
6
from fast1dkmeans .regularized_kmeans import __Wilber , relabel_clusters
7
7
8
8
9
-
10
- @jitclass ([('cumsum' , float64 [:]), ('cumsum2' , float64 [:]), ('D' , float64 [:,:]), ('D_row' , int64 )])
11
- class XiaolinCalculator ():
9
+ @jitclass (
10
+ [
11
+ ("cumsum" , float64 [:]),
12
+ ("cumsum2" , float64 [:]),
13
+ ("D" , float64 [:, :]),
14
+ ("D_row" , int64 ),
15
+ ]
16
+ )
17
+ class XiaolinCalculator :
12
18
def __init__ (self , cumsum , cumsum2 , D ):
13
19
self .cumsum = cumsum
14
20
self .cumsum2 = cumsum2
15
21
self .D = D
16
- self .D_row = 0
22
+ self .D_row = 0
17
23
18
24
def set_d_row (self , val ):
19
- self .D_row = val
25
+ self .D_row = val
20
26
21
27
def calc (self , i , j ):
22
28
col = i if i < j - 1 else j - 1
23
29
return self .D [self .D_row , col ] + calc_objective (self .cumsum , self .cumsum2 , j , i )
24
30
31
+
25
32
@njit (cache = True )
26
33
def cluster_xi (v , k ):
27
34
"""Optimal quantization by matrix searching by Xiaolin Wu"""
@@ -30,28 +37,30 @@ def cluster_xi(v, k):
30
37
n = len (v )
31
38
D = np .empty ((2 , n ), dtype = np .float64 )
32
39
T = np .empty ((k , n ), dtype = np .int64 )
33
- T [0 ,:] = 0
40
+ T [0 , :] = 0
34
41
for j in range (n ):
35
- D [0 ,j ] = cost_calculator .calc (0 , j )
36
- xi_calculator = XiaolinCalculator (cost_calculator .cumsum , cost_calculator .cumsum2 , D )
37
-
42
+ D [0 , j ] = cost_calculator .calc (0 , j )
43
+ xi_calculator = XiaolinCalculator (
44
+ cost_calculator .cumsum , cost_calculator .cumsum2 , D
45
+ )
38
46
39
47
n = len (v )
40
48
row_argmins = np .empty (n , dtype = T .dtype )
41
49
rows = np .arange (n )
42
50
cols = np .arange (n )
43
51
for _k in range (1 , k ):
44
- D_row = (_k - 1 ) % 2
52
+ D_row = (_k - 1 ) % 2
45
53
xi_calculator .set_d_row (D_row )
46
54
_smawk_iter (rows , cols , xi_calculator , row_argmins )
47
- T [_k ,:] = row_argmins
48
- #print(row_argmins)
49
- next_d_row = _k % 2
55
+ T [_k , :] = row_argmins
56
+ # print(row_argmins)
57
+ next_d_row = _k % 2
50
58
for i , argmin in enumerate (row_argmins ):
51
59
min_val = xi_calculator .calc (i , argmin )
52
60
D [next_d_row , i ] = min_val
53
61
return back_track_to_get_clustering (T , n , k )
54
62
63
+
55
64
@njit (cache = True )
56
65
def cluster_xi_space (v , k ):
57
66
"""Same as cluster_xi but with space saving technique applied"""
@@ -62,34 +71,36 @@ def cluster_xi_space(v, k):
62
71
n = len (v )
63
72
D = np .empty ((2 , n ), dtype = np .float64 )
64
73
T = np .empty (n , dtype = np .int64 )
65
- T [:]= 0
74
+ T [:] = 0
66
75
for j in range (n ):
67
- D [0 ,j ] = cost_calculator .calc (0 , j )
68
- xi_calculator = XiaolinCalculator (cost_calculator .cumsum , cost_calculator .cumsum2 , D )
69
-
76
+ D [0 , j ] = cost_calculator .calc (0 , j )
77
+ xi_calculator = XiaolinCalculator (
78
+ cost_calculator .cumsum , cost_calculator .cumsum2 , D
79
+ )
70
80
71
81
n = len (v )
72
82
rows = np .arange (n )
73
83
cols = np .arange (n )
74
84
D_row = 0
75
85
next_d_row = 0
76
- for _k in range (1 , k + 1 ):
77
- D_row = (_k - 1 ) % 2
86
+ for _k in range (1 , k + 1 ):
87
+ D_row = (_k - 1 ) % 2
78
88
xi_calculator .set_d_row (D_row )
79
89
_smawk_iter (rows , cols , xi_calculator , T )
80
- #print(row_argmins)
81
- next_d_row = _k % 2
90
+ # print(row_argmins)
91
+ next_d_row = _k % 2
82
92
for i , argmin in enumerate (T ):
83
93
min_val = xi_calculator .calc (i , argmin )
84
94
D [next_d_row , i ] = min_val
85
- #print(k)
86
- k_plus1_row = next_d_row # (k+1) % 2
87
- k_row = D_row # (k) % 2
88
- lambda_ = D [k_row , n - 1 ] - D [k_plus1_row , n - 1 ]
95
+ # print(k)
96
+ k_plus1_row = next_d_row # (k+1) % 2
97
+ k_row = D_row # (k) % 2
98
+ lambda_ = D [k_row , n - 1 ] - D [k_plus1_row , n - 1 ]
89
99
assert lambda_ >= 0
90
100
result = __Wilber (n , xi_calculator .cumsum , xi_calculator .cumsum2 , lambda_ )
91
101
return relabel_clusters (result )
92
102
103
+
93
104
@njit
94
105
def back_track_to_get_clustering (T , n , k ):
95
106
"""compute cluster assignmento of n points to k clsuters from T
@@ -103,14 +114,14 @@ def back_track_to_get_clustering(T, n, k):
103
114
if k > 0:
104
115
# assign the remaining n' points to k-1 clusters
105
116
backtrack(T, n', k-1, last_n=n)
106
-
117
+
107
118
"""
108
119
out = np .empty (n , dtype = np .int64 )
109
-
120
+
110
121
start = n
111
- for k_ in range (k - 1 , - 1 , - 1 ):
122
+ for k_ in range (k - 1 , - 1 , - 1 ):
112
123
stop = start
113
- start = T [k_ , start - 1 ]
114
- for i in range (start , stop ): # assign points to clusters
124
+ start = T [k_ , start - 1 ]
125
+ for i in range (start , stop ): # assign points to clusters
115
126
out [i ] = k_
116
- return out
127
+ return out
0 commit comments