-
Notifications
You must be signed in to change notification settings - Fork 0
/
sample2.py
37 lines (28 loc) · 1.25 KB
/
sample2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#in this code, we have a data of automobiles containing their mpg, cylinderss, displacement, horsepower, weight, acceleration, year, origin, and name, we want to use kmeans classification to make 5 classes of similar cars, and output their names. To get Auto.csv, use wget http://www-bcf.usc.edu/~gareth/ISL/Auto.csv
import numpy as np
import cv2
import csv
#read the file Auto.csv
reader = csv.reader(file('/YourPath/Auto.csv', 'rb'), delimiter=',')
list = list(reader)
#out put csv to an array
oldArray = np.array(list)
newArray = []
#there are some invalid data '?' in Auto data, skip it
for line in range(1, oldArray.shape[0]):
if '?' not in oldArray[line, :]:
newArray.append(oldArray[line, :])
newArray = np.array(newArray)
#use the auto parameters except their name as input
input = newArray[:, 0:7].astype(np.float32)
#make a criteria, which is the accuracy of the kmeans, then do kmeans with k = 5
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
ret,label,center=cv2.kmeans(input,5,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)
#get the results, which are 5 cluster of cars
result = []
for tmp in range(0, 5):
result.append(newArray[label.ravel() == tmp, 8])
result = np.array(result)
for tmp in range(0, 5):
print result[tmp]
print