Skip to content

Commit 936f0d9

Browse files
committed
applied pre-commit
1 parent 8e67228 commit 936f0d9

20 files changed

+578
-421
lines changed

.github/workflows/pythonapp.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,4 @@ jobs:
4141
# # stop the build if there are Python syntax errors or undefined names
4242
# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
4343
# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
44-
# flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
44+
# flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics

.github/workflows/release_to_pypi.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,4 @@ jobs:
3737
- name: Publish a Python distribution to PyPI
3838
uses: pypa/gh-action-pypi-publish@release/v1
3939
with:
40-
password: ${{ secrets.PYPI_API_TOKEN }}
40+
password: ${{ secrets.PYPI_API_TOKEN }}

fast1dkmeans/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
from fast1dkmeans.main import cluster
1+
from fast1dkmeans.main import cluster

fast1dkmeans/common.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,38 +2,38 @@
22
from numba import njit, float64, int64
33
from numba.experimental import jitclass
44

5-
USE_CACHE=True
5+
USE_CACHE = True
6+
67

78
@njit([(float64[:],)], cache=USE_CACHE)
89
def calc_cumsum(v):
9-
cumsum = np.empty(len(v)+1, dtype=np.float64)
10-
cumsum[0]=0
10+
cumsum = np.empty(len(v) + 1, dtype=np.float64)
11+
cumsum[0] = 0
1112
cumsum[1:] = np.cumsum(v)
1213
return cumsum
1314

15+
1416
@njit([(float64[:],)], cache=USE_CACHE)
1517
def calc_cumsum2(v):
16-
cumsum2 = np.empty(len(v)+1, dtype=np.float64)
17-
cumsum2[0]=0
18+
cumsum2 = np.empty(len(v) + 1, dtype=np.float64)
19+
cumsum2[0] = 0
1820
cumsum2[1:] = np.cumsum(np.square(v))
1921
return cumsum2
2022

2123

22-
2324
@njit([(float64[:], float64[:], int64, int64)], cache=USE_CACHE)
2425
def calc_objective(cumsum, cumsum2, i, j):
2526
if j <= i:
2627
return 0.0
27-
# raise ValueError("j should never be larger than i")
28-
mu = (cumsum[j+1]-cumsum[i])/(j-i+1)
28+
# raise ValueError("j should never be larger than i")
29+
mu = (cumsum[j + 1] - cumsum[i]) / (j - i + 1)
2930
result = cumsum2[j + 1] - cumsum2[i]
3031
result += (j - i + 1) * (mu * mu)
3132
result -= (2 * mu) * (cumsum[j + 1] - cumsum[i])
3233
return max(result, 0)
3334

3435

35-
36-
@jitclass([('cumsum', float64[:]), ('cumsum2', float64[:])])
36+
@jitclass([("cumsum", float64[:]), ("cumsum2", float64[:])])
3737
class CumsumCalculator:
3838
def __init__(self, v):
3939
self.cumsum = calc_cumsum(v)
@@ -44,9 +44,9 @@ def calc(self, i, j):
4444

4545

4646
@njit([(float64[:],)], cache=USE_CACHE)
47-
def create_cumsum_calculator(arr): # pragma: no cover
47+
def create_cumsum_calculator(arr): # pragma: no cover
4848
calculator = CumsumCalculator(arr)
49-
print(calculator.calc(0,1))
49+
print(calculator.calc(0, 1))
5050

5151

5252
@njit(cache=USE_CACHE)
@@ -58,9 +58,9 @@ def cost_of_clustering(vals, res):
5858
cost = 0
5959
for i, val in enumerate(res):
6060
if val != last_val:
61-
cost += calc.calc(last_i, i-1)
61+
cost += calc.calc(last_i, i - 1)
6262
last_val = val
6363
last_i = i
64-
cost += calc.calc(last_i, len(vals)-1)
64+
cost += calc.calc(last_i, len(vals) - 1)
6565

66-
return cost
66+
return cost

fast1dkmeans/kmeans.py

Lines changed: 42 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,29 @@
66
from fast1dkmeans.regularized_kmeans import __Wilber, relabel_clusters
77

88

9-
10-
@jitclass([('cumsum', float64[:]), ('cumsum2', float64[:]), ('D', float64[:,:]), ('D_row', int64)])
11-
class XiaolinCalculator():
9+
@jitclass(
10+
[
11+
("cumsum", float64[:]),
12+
("cumsum2", float64[:]),
13+
("D", float64[:, :]),
14+
("D_row", int64),
15+
]
16+
)
17+
class XiaolinCalculator:
1218
def __init__(self, cumsum, cumsum2, D):
1319
self.cumsum = cumsum
1420
self.cumsum2 = cumsum2
1521
self.D = D
16-
self.D_row=0
22+
self.D_row = 0
1723

1824
def set_d_row(self, val):
19-
self.D_row=val
25+
self.D_row = val
2026

2127
def calc(self, i, j):
2228
col = i if i < j - 1 else j - 1
2329
return self.D[self.D_row, col] + calc_objective(self.cumsum, self.cumsum2, j, i)
2430

31+
2532
@njit(cache=True)
2633
def cluster_xi(v, k):
2734
"""Optimal quantization by matrix searching by Xiaolin Wu"""
@@ -30,28 +37,30 @@ def cluster_xi(v, k):
3037
n = len(v)
3138
D = np.empty((2, n), dtype=np.float64)
3239
T = np.empty((k, n), dtype=np.int64)
33-
T[0,:]=0
40+
T[0, :] = 0
3441
for j in range(n):
35-
D[0,j] = cost_calculator.calc(0, j)
36-
xi_calculator = XiaolinCalculator(cost_calculator.cumsum, cost_calculator.cumsum2, D)
37-
42+
D[0, j] = cost_calculator.calc(0, j)
43+
xi_calculator = XiaolinCalculator(
44+
cost_calculator.cumsum, cost_calculator.cumsum2, D
45+
)
3846

3947
n = len(v)
4048
row_argmins = np.empty(n, dtype=T.dtype)
4149
rows = np.arange(n)
4250
cols = np.arange(n)
4351
for _k in range(1, k):
44-
D_row = (_k-1) % 2
52+
D_row = (_k - 1) % 2
4553
xi_calculator.set_d_row(D_row)
4654
_smawk_iter(rows, cols, xi_calculator, row_argmins)
47-
T[_k,:] = row_argmins
48-
#print(row_argmins)
49-
next_d_row = _k % 2
55+
T[_k, :] = row_argmins
56+
# print(row_argmins)
57+
next_d_row = _k % 2
5058
for i, argmin in enumerate(row_argmins):
5159
min_val = xi_calculator.calc(i, argmin)
5260
D[next_d_row, i] = min_val
5361
return back_track_to_get_clustering(T, n, k)
5462

63+
5564
@njit(cache=True)
5665
def cluster_xi_space(v, k):
5766
"""Same as cluster_xi but with space saving technique applied"""
@@ -62,34 +71,36 @@ def cluster_xi_space(v, k):
6271
n = len(v)
6372
D = np.empty((2, n), dtype=np.float64)
6473
T = np.empty(n, dtype=np.int64)
65-
T[:]=0
74+
T[:] = 0
6675
for j in range(n):
67-
D[0,j] = cost_calculator.calc(0, j)
68-
xi_calculator = XiaolinCalculator(cost_calculator.cumsum, cost_calculator.cumsum2, D)
69-
76+
D[0, j] = cost_calculator.calc(0, j)
77+
xi_calculator = XiaolinCalculator(
78+
cost_calculator.cumsum, cost_calculator.cumsum2, D
79+
)
7080

7181
n = len(v)
7282
rows = np.arange(n)
7383
cols = np.arange(n)
7484
D_row = 0
7585
next_d_row = 0
76-
for _k in range(1, k+1):
77-
D_row = (_k-1) % 2
86+
for _k in range(1, k + 1):
87+
D_row = (_k - 1) % 2
7888
xi_calculator.set_d_row(D_row)
7989
_smawk_iter(rows, cols, xi_calculator, T)
80-
#print(row_argmins)
81-
next_d_row = _k % 2
90+
# print(row_argmins)
91+
next_d_row = _k % 2
8292
for i, argmin in enumerate(T):
8393
min_val = xi_calculator.calc(i, argmin)
8494
D[next_d_row, i] = min_val
85-
#print(k)
86-
k_plus1_row = next_d_row #(k+1) % 2
87-
k_row = D_row #(k) % 2
88-
lambda_ = D[k_row, n-1] - D[k_plus1_row, n-1]
95+
# print(k)
96+
k_plus1_row = next_d_row # (k+1) % 2
97+
k_row = D_row # (k) % 2
98+
lambda_ = D[k_row, n - 1] - D[k_plus1_row, n - 1]
8999
assert lambda_ >= 0
90100
result = __Wilber(n, xi_calculator.cumsum, xi_calculator.cumsum2, lambda_)
91101
return relabel_clusters(result)
92102

103+
93104
@njit
94105
def back_track_to_get_clustering(T, n, k):
95106
"""compute cluster assignmento of n points to k clsuters from T
@@ -103,14 +114,14 @@ def back_track_to_get_clustering(T, n, k):
103114
if k > 0:
104115
# assign the remaining n' points to k-1 clusters
105116
backtrack(T, n', k-1, last_n=n)
106-
117+
107118
"""
108119
out = np.empty(n, dtype=np.int64)
109-
120+
110121
start = n
111-
for k_ in range(k-1, -1, -1):
122+
for k_ in range(k - 1, -1, -1):
112123
stop = start
113-
start = T[k_, start-1]
114-
for i in range(start, stop): # assign points to clusters
124+
start = T[k_, start - 1]
125+
for i in range(start, stop): # assign points to clusters
115126
out[i] = k_
116-
return out
127+
return out

fast1dkmeans/main.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,23 @@ def cluster(x, k, method="binary-search-interpolation", **kwargs):
1616
"Fast Exact k-Means, k-Medians and Bregman Divergence Clustering in 1D"
1717
"""
1818

19-
assert method in ("binary-search-interpolation",
20-
"binary-search-normal",
21-
"dynamic-programming-kn",
22-
"dynamic-programming-space",
23-
"dynamic-programming"), f"wrong method string provided {method}"
19+
assert method in (
20+
"binary-search-interpolation",
21+
"binary-search-normal",
22+
"dynamic-programming-kn",
23+
"dynamic-programming-space",
24+
"dynamic-programming",
25+
), f"wrong method string provided {method}"
2426

2527
if method == "dynamic-programming":
2628
method = "dynamic-programming-space"
27-
29+
2830
x = np.squeeze(np.asarray(x))
29-
assert len(x.shape)==1, "provided array is not 1d"
31+
assert len(x.shape) == 1, "provided array is not 1d"
3032
assert k > 0, f"negative or zero values for k({k}) are not supported"
31-
assert k <= len(x), f"values of k({k}) larger than the length of the provided array ({len(x)}) are not supported"
33+
assert k <= len(
34+
x
35+
), f"values of k({k}) larger than the length of the provided array ({len(x)}) are not supported"
3236

3337
order = np.argsort(x)
3438
x = np.array(x, dtype=np.float64)[order]
@@ -41,18 +45,20 @@ def cluster(x, k, method="binary-search-interpolation", **kwargs):
4145
clusters = cluster_xi(x, k)
4246
elif method == "dynamic-programming-space":
4347
clusters = cluster_xi_space(x, k)
48+
else:
49+
assert False
4450
return undo_argsort(clusters, order)
45-
51+
4652

4753
def undo_argsort(sorted_arr, order):
4854
revert = np.empty_like(order)
49-
revert[order]=np.arange(len(sorted_arr))
55+
revert[order] = np.arange(len(sorted_arr))
5056
return sorted_arr[revert]
5157

58+
5259
@njit(cache=True)
5360
def undo_argsort_numba(sorted_arr, order):
5461
out = np.empty_like(sorted_arr)
5562
for i, val in enumerate(order):
5663
out[val] = sorted_arr[i]
5764
return out
58-

fast1dkmeans/monge.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import numpy as np
22

3+
34
def is_monge(M):
45
"""Checks whether matrix M is Monge"""
5-
m,n = M.shape
6-
for i in range(m-1):
7-
for j in range(n-1):
8-
if M[i,j]+M[i+1, j+1] > M[i, j+1] + M[i+1,j]:
6+
m, n = M.shape
7+
for i in range(m - 1):
8+
for j in range(n - 1):
9+
if M[i, j] + M[i + 1, j + 1] > M[i, j + 1] + M[i + 1, j]:
910
return False
1011
return True
1112

@@ -15,19 +16,19 @@ def _random_monge(m, n, rands):
1516
row-constant, column-constant, and upper-right block arrays. (This characterization was proved
1617
independently by Rudolf and Woeginger in 1995, Bein and Pathak in 1990, Burdyok and Trofimov
1718
in 1976, and possibly others.)"""
18-
row_const = np.repeat(rands[-m-n:-n].reshape(m,1),repeats=n, axis=1)
19-
col_const = np.repeat(rands[-n:].reshape(1,n),repeats=m, axis=0)
20-
arr = row_const+col_const
19+
row_const = np.repeat(rands[-m - n : -n].reshape(m, 1), repeats=n, axis=1)
20+
col_const = np.repeat(rands[-n:].reshape(1, n), repeats=m, axis=0)
21+
arr = row_const + col_const
2122
for i in range(m):
2223
for j in range(n):
23-
arr[i:, j:] += rands[i*m+n]
24+
arr[i:, j:] += rands[i * m + n]
2425
return np.flip(arr, axis=0)
2526

2627

2728
def random_int_monge(m, n, block_max_val, row_max_val, col_max_val):
28-
""" Generates a random monge array with integer values"""
29-
rands= np.empty(m*n+m+n, dtype=int)
30-
rands[:m*n] = np.random.randint(block_max_val, size=m*n)
31-
rands[-m-n:-n] = np.random.randint(row_max_val, size=m)
29+
"""Generates a random monge array with integer values"""
30+
rands = np.empty(m * n + m + n, dtype=int)
31+
rands[: m * n] = np.random.randint(block_max_val, size=m * n)
32+
rands[-m - n : -n] = np.random.randint(row_max_val, size=m)
3233
rands[-n:] = np.random.randint(col_max_val, size=n)
33-
return _random_monge(m, n, rands)
34+
return _random_monge(m, n, rands)

0 commit comments

Comments
 (0)