-
Notifications
You must be signed in to change notification settings - Fork 3
/
means.py
40 lines (28 loc) · 1.01 KB
/
means.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from datapoint import DataPoint
verbose = False
class KMeans:
def __init__(self, k, inputFile):
self.dataPoints = []
if verbose:
print inputFile
f = open(inputFile, 'r')
for line in f:
val = eval(line)
self.d = len(val)
self.dataPoints.append(DataPoint(len(val), val))
self.k = k
self.means = self.dataPoints[:k]
def getClusterCentroids(self):
centroids = [[DataPoint(self.d),0] for i in range(self.k)]
for point in self.dataPoints:
closestCluster =min([(self.means[i].distanceTo(point), i) for i in range(self.k)])[1]
point.setCluster(self.means[closestCluster])
centroids[closestCluster][0].addVector(point)
centroids[closestCluster][1]+=1
return centroids
def updateMeans(self, means):
self.means = means
def __str__(self):
return str([mean.getVector() for mean in self.means])
def getDims(self):
return self.d