-
Notifications
You must be signed in to change notification settings - Fork 157
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2b5206a
commit 8e78c76
Showing
6 changed files
with
1,206 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
|
||
from __future__ import division | ||
def GetAverage(mat): | ||
|
||
n=len(mat) | ||
m= width(mat) | ||
num = [0]*m | ||
for j in range(0,m): | ||
for i in mat: | ||
num[j]=num[j]+i[j] | ||
num[j]=num[j]/n | ||
return num | ||
|
||
def width(lst): | ||
i=0 | ||
for j in lst[0]: | ||
i=i+1 | ||
return i | ||
|
||
def GetVar(average,mat): | ||
ListMat=[] | ||
for i in mat: | ||
ListMat.append(list(map(lambda x: x[0]-x[1], zip(average, i)))) | ||
|
||
n=len(ListMat) | ||
m= width(ListMat) | ||
num = [0]*m | ||
for j in range(0,m): | ||
for i in ListMat: | ||
num[j]=num[j]+(i[j]*i[j]) | ||
num[j]=num[j]/n | ||
return num | ||
|
||
def DenoisMat(mat): | ||
average=GetAverage(mat) | ||
variance=GetVar(average,mat) | ||
section=list(map(lambda x: x[0]+x[1], zip(average, variance))) | ||
|
||
n=len(mat) | ||
m= width(mat) | ||
num = [0]*m | ||
denoisMat=[] | ||
for i in mat: | ||
for j in range(0,m): | ||
if i[j]>section[j]: | ||
i[j]=section[j] | ||
denoisMat.append(i) | ||
return denoisMat | ||
|
||
def AutoNorm(mat): | ||
n=len(mat) | ||
m= width(mat) | ||
MinNum=[9999999999]*m | ||
MaxNum = [0]*m | ||
for i in mat: | ||
for j in range(0,m): | ||
if i[j]>MaxNum[j]: | ||
MaxNum[j]=i[j] | ||
|
||
for p in mat: | ||
for q in range(0,m): | ||
if p[q]<=MinNum[q]: | ||
MinNum[q]=p[q] | ||
|
||
section=list(map(lambda x: x[0]-x[1], zip(MaxNum, MinNum))) | ||
print section | ||
NormMat=[] | ||
|
||
for k in mat: | ||
|
||
distance=list(map(lambda x: x[0]-x[1], zip(k, MinNum))) | ||
value=list(map(lambda x: x[0]/x[1], zip(distance,section))) | ||
NormMat.append(value) | ||
return NormMat | ||
|
||
if __name__=='__main__': | ||
mat=[[1,42,512],[4,5,6],[7,8,9],[2,2,2],[2,10,5]] | ||
a=GetAverage(mat) | ||
b=GetVar(a,mat) | ||
print a, | ||
print DenoisMat(mat) | ||
|
||
# print list(map(lambda x: x[0]-x[1], zip(v2, v1))) | ||
print AutoNorm(mat) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<projectDescription> | ||
<name>K-means</name> | ||
<comment></comment> | ||
<projects> | ||
</projects> | ||
<buildSpec> | ||
<buildCommand> | ||
<name>org.python.pydev.PyDevBuilder</name> | ||
<arguments> | ||
</arguments> | ||
</buildCommand> | ||
</buildSpec> | ||
<natures> | ||
<nature>org.python.pydev.pythonNature</nature> | ||
</natures> | ||
</projectDescription> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
<?xml version="1.0" encoding="UTF-8" standalone="no"?> | ||
<?eclipse-pydev version="1.0"?> | ||
|
||
<pydev_project> | ||
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property> | ||
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property> | ||
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH"> | ||
<path>/K-means/src</path> | ||
</pydev_pathproperty> | ||
</pydev_project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
''' | ||
@author: hakuri | ||
''' | ||
from numpy import * | ||
import matplotlib.pyplot as plt | ||
def loadDataSet(fileName): #general function to parse tab -delimited floats | ||
dataMat = [] #assume last column is target value | ||
fr = open(fileName) | ||
for line in fr.readlines(): | ||
curLine = line.strip().split('\t') | ||
fltLine = map(float,curLine) #map all elements to float() | ||
dataMat.append(fltLine) | ||
return dataMat | ||
|
||
def distEclud(vecA, vecB): | ||
return sqrt(sum(power(vecA - vecB, 2))) #la.norm(vecA-vecB) | ||
|
||
def randCent(dataSet, k): | ||
n = shape(dataSet)[1] | ||
centroids = mat(zeros((k,n)))#create centroid mat | ||
for j in range(n):#create random cluster centers, within bounds of each dimension | ||
minJ = min(array(dataSet)[:,j]) | ||
|
||
rangeJ = float(max(array(dataSet)[:,j]) - minJ) | ||
centroids[:,j] = mat(minJ + rangeJ * random.rand(k,1)) | ||
|
||
return centroids | ||
|
||
def kMeans(dataSet, k, distMeas=distEclud, createCent=randCent): | ||
m = shape(dataSet)[0] | ||
clusterAssment = mat(zeros((m,2)))#create mat to assign data points #to a centroid, also holds SE of each point | ||
centroids = createCent(dataSet, k) | ||
clusterChanged = True | ||
while clusterChanged: | ||
clusterChanged = False | ||
for i in range(m):#for each data point assign it to the closest centroid | ||
minDist = inf; minIndex = -1 | ||
for j in range(k): | ||
distJI = distMeas(array(centroids)[j,:],array(dataSet)[i,:]) | ||
if distJI < minDist: | ||
minDist = distJI; minIndex = j | ||
if clusterAssment[i,0] != minIndex: clusterChanged = True | ||
clusterAssment[i,:] = minIndex,minDist**2 | ||
print centroids | ||
# print nonzero(array(clusterAssment)[:,0] | ||
for cent in range(k):#recalculate centroids | ||
ptsInClust = dataSet[nonzero(array(clusterAssment)[:,0]==cent)[0][0]]#get all the point in this cluster | ||
|
||
centroids[cent,:] = mean(ptsInClust, axis=0) #assign centroid to mean | ||
id=nonzero(array(clusterAssment)[:,0]==cent)[0] | ||
return centroids, clusterAssment,id | ||
|
||
def plotBestFit(dataSet,id,centroids): | ||
|
||
dataArr = array(dataSet) | ||
cent=array(centroids) | ||
n = shape(dataArr)[0] | ||
n1=shape(cent)[0] | ||
xcord1 = []; ycord1 = [] | ||
xcord2 = []; ycord2 = [] | ||
xcord3=[];ycord3=[] | ||
j=0 | ||
for i in range(n): | ||
if j in id: | ||
xcord1.append(dataArr[i,0]); ycord1.append(dataArr[i,1]) | ||
else: | ||
xcord2.append(dataArr[i,0]); ycord2.append(dataArr[i,1]) | ||
j=j+1 | ||
for k in range(n1): | ||
xcord3.append(cent[k,0]);ycord3.append(cent[k,1]) | ||
|
||
fig = plt.figure() | ||
ax = fig.add_subplot(111) | ||
ax.scatter(xcord1, ycord1, s=30, c='red', marker='s') | ||
ax.scatter(xcord2, ycord2, s=30, c='green') | ||
ax.scatter(xcord3, ycord3, s=50, c='black') | ||
|
||
plt.xlabel('X1'); plt.ylabel('X2'); | ||
plt.show() | ||
|
||
|
||
if __name__=='__main__': | ||
dataSet=loadDataSet('/Users/hakuri/Desktop/testSet.txt') | ||
# # print randCent(dataSet,2) | ||
# print dataSet | ||
# | ||
# print kMeans(dataSet,2) | ||
a=[] | ||
b=[] | ||
a, b,id=kMeans(dataSet,2) | ||
plotBestFit(dataSet,id,a) | ||
|
||
|
||
|
||
|
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.