-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkmean.py
137 lines (87 loc) · 3.2 KB
/
kmean.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import numpy as np
import matplotlib.pyplot as plt
def kMeans_init_centroids(X, K):
# Randomly reorder the indices of examples
randidx = np.random.permutation(X.shape[0])
# Take the first K examples as centroids
centroids = X[randidx[:K]]
return centroids
def find_closest_centroid(X,centroids):
K= centroids.shape[0]
idx=np.zeros(X.shape[0],dtype=int)
for i in range(X.shape[0]):
distance = []
for j in range(centroids.shape[0]):
norm_ij = np.linalg.norm(X[i] - centroids[j])
distance.append(norm_ij)
idx[i] =np.argmin(distance)
return idx
def compute_centroids(X, idx, K):
m, n = X.shape
centroids = np.zeros((K, n))
for i in range(K):
points = X[idx == i]
centroids[i] = np.mean(points, axis = 0)
return centroids
def run_kMeans(X, initial_centroids, max_iters=10, plot_progress=False):
m, n = X.shape
K = initial_centroids.shape[0]
centroids = initial_centroids
previous_centroids = centroids
idx = np.zeros(m)
plt.figure(figsize=(8, 6))
for i in range(max_iters):
print("K-Means iteration %d/%d" % (i, max_iters-1))
idx = find_closest_centroid(X, centroids)
if plot_progress:
previous_centroids = centroids
centroids = compute_centroids(X, idx, K)
plt.show()
return centroids, idx
original_img = plt.imread('img.jpeg')
X_img = np.reshape(original_img, (original_img.shape[0] * original_img.shape[1], 3))
K = 16
max_iters = 10
initial_centroids = kMeans_init_centroids(X_img, K)
centroids, idx = run_kMeans(X_img, initial_centroids, max_iters)
# Find the closest centroid of each pixel
idx = find_closest_centroid(X_img, centroids)
# Replace each pixel with the color of the closest centroid
X_recovered = centroids[idx, :]
# Reshape image into proper dimensions
X_recovered = np.reshape(X_recovered, original_img.shape)
fig, ax = plt.subplots(1,2, figsize=(16,16))
plt.axis('off')
ax[0].imshow(original_img)
ax[0].set_title('Original')
ax[0].set_axis_off()
# Display compressed image
ax[1].imshow(X_recovered)
ax[1].set_title('Compressed with %d colours'%K)
ax[1].set_axis_off()
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
# Function to calculate WCSS for different K values
def calculate_wcss(X, max_k=10):
wcss = []
for k in range(1, max_k + 1):
kmeans = KMeans(n_clusters=k, init='k-means++', max_iter=300, random_state=42)
kmeans.fit(X)
wcss.append(kmeans.inertia_) # WCSS value
return wcss
# Load and preprocess the image
original_img = plt.imread('img.jpeg')
X_img = np.reshape(original_img, (original_img.shape[0] * original_img.shape[1], 3))
# Calculate WCSS for K values from 1 to 10
max_k = 10
wcss = calculate_wcss(X_img, max_k)
# Plot the Elbow Graph
plt.figure(figsize=(8, 6))
plt.plot(range(1, max_k + 1), wcss, marker='o', linestyle='--', color='b')
plt.title('Elbow Method for Optimal K')
plt.xlabel('Number of Clusters (K)')
plt.ylabel('Within-Cluster Sum of Squares (WCSS)')
plt.xticks(range(1, max_k + 1))
plt.grid(True)
plt.show()