-
Notifications
You must be signed in to change notification settings - Fork 0
/
KNN implementation from scratch.py
88 lines (42 loc) · 1.38 KB
/
KNN implementation from scratch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/env python
# coding: utf-8
# In[22]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from collections import Counter;
# In[33]:
dataset=datasets.load_breast_cancer();
X_train,X_test,Y_train,Y_test=train_test_split(dataset.data,dataset.target,test_size=0.2,random_state=0);
# In[17]:
def train(X,Y):
return;
# here we are not doing anything in training we can classify the classes in training
# In[28]:
def predict_one(X_train,Y_train,X_test,K):
distances=[];
for i in range(len(X_train)):
distance=((X_train[i,:]-X_test)**2).sum();
distances.append([distance,i]);
distance=sorted(distances);
target=[];
for i in range(K):
index_of_predicting_class=distance[i][1];
target.append(Y_train[index_of_predicting_class]);
return Counter(target).most_common(1)[0][0];
# In[24]:
def predict(X_train,Y_train,X_tests_data,K):
predictions=[];
for x_test in X_tests_data:
predict=predict_one(X_train,Y_train,x_test,K);
predictions.append(predict)
return predictions;
# In[29]:
# lets assume k to be 7 we can find optimal k using cross validation
Y_predict=predict(X_train,Y_train,X_test,7);
print(accuracy_score(Y_test,Y_predict));
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]: