Skip to content

Commit 7054c73

Browse files
committed
refactoring test module
1 parent eca3b60 commit 7054c73

26 files changed

+167
-148
lines changed

pytrain/lib/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
12
from convert import *
2-
from batch import *
3+
from autotest import *
34
from nlp import *
45
from fs import *
56
from normalize import *

pytrain/lib/batch.py pytrain/lib/autotest.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
11
#
2-
# library for batch processing module
2+
# library for autotest processing module
33
#
44
# @ author becxer
55
66
#
77

8-
from numpy import *
98
import operator
109
import math
1110
import sys
1211

13-
1412
# abstracted evaluation logic
1513
# p_module is pytrain module that you already trained
1614
def eval_predict(p_module, mat_test, label_test, log_on = True):
@@ -25,7 +23,7 @@ def eval_predict(p_module, mat_test, label_test, log_on = True):
2523

2624
def eval_predict_one(p_module, input_array_test, label_one_test, log_on = True):
2725
res = p_module.predict(input_array_test)
28-
if log_on : print "input : '" + str(input_array_test[:2]) + \
26+
if log_on : print "input : '" + str(input_array_test[:3]) + \
2927
"' --> predicted : '" + str(res) + "' --? origin : '" \
3028
+ str(label_one_test) + "'"
3129
if list(str(res)) != list(str(label_one_test)) :

pytrain/lib/dataset.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import os, struct
22
from array import array as pyarray
33
from numpy import append, array, int8, uint8, zeros
4+
import numpy as np
5+
from pytrain.lib import fs
46

57
def load_mnist(path=".", dataset="training", digits=np.arange(10)):
68

@@ -32,10 +34,17 @@ def load_mnist(path=".", dataset="training", digits=np.arange(10)):
3234
images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols))
3335
labels[i] = lbl[ind[i]]
3436

35-
return images, labels:
37+
return images, labels
3638

3739
def load_iris(path=".", dataset="training"):
38-
pass
39-
40-
41-
40+
41+
sample_data = os.path.join(path, "iris.csv")
42+
dmat_train, dlabel_train, dmat_test, dlabel_test \
43+
= fs.csv_loader(sample_data, 0.2)
44+
45+
if dataset == "training":
46+
return dmat_train, dlabel_train
47+
elif dataset == "testing":
48+
return dmat_test, dlabel_test
49+
else:
50+
raise ValueError("dataset must be 'testing' or 'training'")

pytrain/lib/fs.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# to matrix_train, label_train, matrix_test, label_test
1717
# according to ho_ratio
1818
# ho_ratio is test_set ratio how you want
19-
def f2mat(filename, ho_ratio):
19+
def csv_loader(filename, ho_ratio):
2020
fr = open(filename)
2121
lines = fr.readlines()
2222
mat_train = []
@@ -45,8 +45,8 @@ def f2mat(filename, ho_ratio):
4545
return mat_train, label_train, mat_test, label_test
4646

4747

48-
def f2wordmat(filename, ho_ratio, nlp_lib):
49-
wmat = f2mat(filename, ho_ratio)
48+
def csv_loader_with_nlp(filename, ho_ratio, nlp_lib):
49+
wmat = csv_loader(filename, ho_ratio)
5050
wmat_train, label_train = wmat[:2]
5151

5252
mat_train = []
@@ -56,12 +56,12 @@ def f2wordmat(filename, ho_ratio, nlp_lib):
5656
vocabulary = nlp_lib.extract_vocabulary(wmat_train)
5757

5858
for row in wmat_train:
59-
mat_train.append(nlp_lib.bag_of_words2vector(vocabulary, row))
59+
mat_train.append(nlp_lib.bag_of_word2vector(vocabulary, row))
6060

6161
if len(wmat) > 2 and ho_ratio != 0:
6262
wmat_test, label_test = wmat[2:4]
6363
for row in wmat_test:
64-
mat_test.append(nlp_lib.bag_of_words2vector(vocabulary, row))
64+
mat_test.append(nlp_lib.bag_of_word2vector(vocabulary, row))
6565

6666
if ho_ratio == 0:
6767
return mat_train,label_train, vocabulary

pytrain/lib/nlp.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def extract_vocabulary(self, documents):
6969
vocabulary = vocabulary | set(ndoc)
7070
return list(vocabulary)
7171

72-
def set_of_words2vector(self, vocabulary, sentence):
72+
def set_of_word2vector(self, vocabulary, sentence):
7373
voca_vector = [0] * len(vocabulary)
7474
if str(type(sentence).__name__) == 'str':
7575
sentence = self.split2words(sentence)
@@ -78,7 +78,7 @@ def set_of_words2vector(self, vocabulary, sentence):
7878
voca_vector[vocabulary.index(word)] = 1
7979
return voca_vector
8080

81-
def bag_of_words2vector(self, vocabulary, sentence):
81+
def bag_of_word2vector(self, vocabulary, sentence):
8282
voca_vector = [0] * len(vocabulary)
8383
if str(type(sentence).__name__) == 'str':
8484
sentence = self.split2words(sentence)

run_dev.sh

-4
This file was deleted.

sample_data/iris/train.csv sample_data/iris/iris.csv

+30
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,33 @@
118118
0 5.2 3.4 1.4 0.2
119119
1 6.9 3.1 4.9 1.5
120120
2 6.5 3.0 5.8 2.2
121+
0 4.4 2.9 1.4 0.2
122+
1 6.7 3.1 4.7 1.5
123+
2 6.5 3.0 5.2 2.0
124+
0 5.1 3.3 1.7 0.5
125+
1 5.7 2.8 4.1 1.3
126+
2 6.5 3.2 5.1 2.0
127+
0 5.1 3.5 1.4 0.3
128+
1 5.7 2.6 3.5 1.0
129+
2 7.1 3.0 5.9 2.1
130+
0 5.1 3.8 1.6 0.2
131+
1 5.5 2.5 4.0 1.3
132+
2 7.9 3.8 6.4 2.0
133+
0 5.8 4.0 1.2 0.2
134+
1 5.0 2.3 3.3 1.0
135+
2 6.4 2.8 5.6 2.2
136+
0 5.2 3.5 1.5 0.2
137+
1 5.6 2.5 3.9 1.1
138+
2 7.2 3.2 6.0 1.8
139+
0 4.6 3.1 1.5 0.2
140+
1 5.5 2.4 3.7 1.0
141+
2 7.2 3.6 6.1 2.5
142+
0 5.7 4.4 1.5 0.4
143+
1 5.6 3.0 4.1 1.3
144+
2 4.9 2.5 4.5 1.7
145+
0 4.8 3.0 1.4 0.1
146+
1 6.3 3.3 4.7 1.6
147+
2 6.0 2.2 5.0 1.5
148+
0 4.8 3.4 1.9 0.2
149+
1 5.8 2.7 4.1 1.0
150+
2 6.3 2.9 5.6 1.8

sample_data/iris/test.csv

-30
This file was deleted.
7.48 MB
Binary file not shown.
-1.57 MB
Binary file not shown.
9.77 KB
Binary file not shown.
-4.44 KB
Binary file not shown.
44.9 MB
Binary file not shown.
-9.45 MB
Binary file not shown.
58.6 KB
Binary file not shown.
-28.2 KB
Binary file not shown.

test.py

100644100755
+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1+
#!/usr/bin/python
12
from test_pytrain import test_main
23

test_pytrain/test_KNN/test_KNN.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from test_pytrain import test_Suite
88
from pytrain.KNN import KNN
99
from pytrain.lib import fs
10-
from pytrain.lib import batch
10+
from pytrain.lib import autotest
1111

1212

1313
class test_KNN(test_Suite):
@@ -20,8 +20,8 @@ def test_process(self):
2020
sample_label = ['A','A','B','B']
2121
knn = KNN(sample_mat, sample_label, 3, 'manhattan')
2222

23-
r1 = batch.eval_predict_one(knn, [0.9,0.9] , 'A', self.logging)
24-
r2 = batch.eval_predict_one(knn, [0.1,0.4] , 'B', self.logging)
23+
r1 = autotest.eval_predict_one(knn, [0.9,0.9] , 'A', self.logging)
24+
r2 = autotest.eval_predict_one(knn, [0.1,0.4] , 'B', self.logging)
2525

2626
assert r1 == True
2727
assert r2 == True
@@ -36,6 +36,6 @@ def test_process(self):
3636
dg_mat_train, dg_label_train = fs.f2mat("sample_data/digit/digit-train.txt",0)
3737
dg_mat_test, dg_label_test = fs.f2mat("sample_data/digit/digit-test.txt",0)
3838
knn_digit = KNN(dg_mat_train, dg_label_train, 3, 'euclidean')
39-
error_rate = batch.eval_predict(knn_digit, dg_mat_test, dg_label_test, self.logging)
39+
error_rate = autotest.eval_predict(knn_digit, dg_mat_test, dg_label_test, self.logging)
4040
self.tlog("digit predict (with basic knn) error rate :" + str(error_rate))
4141

test_pytrain/test_lib/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from test_fs import *
22
from test_normalize import *
3-
from test_batch import *
3+
from test_autotest import *
44
from test_nlp import *
5+
from test_dataset import *
+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#
2+
# test lib.autotest
3+
#
4+
# @ author becxer
5+
6+
#
7+
from test_pytrain import test_Suite
8+
from pytrain.KNN import KNN
9+
from pytrain.lib import autotest
10+
11+
class test_autotest(test_Suite):
12+
13+
def __init__(self, logging = True):
14+
test_Suite.__init__(self, logging)
15+
16+
def test_process(self):
17+
normed_dmat_train = self.get_global_value('normed_iris_mat_train')
18+
normed_dmat_test = self.get_global_value('normed_iris_mat_test')
19+
dlabel_train = self.get_global_value('iris_label_train')
20+
dlabel_test = self.get_global_value('iris_label_test')
21+
22+
knn_date = KNN(normed_dmat_train, dlabel_train, 3, 'euclidean')
23+
error_rate = autotest.eval_predict(knn_date, normed_dmat_test, dlabel_test, self.logging)
24+
self.tlog("date predict (with basic knn) error rate : " + str(error_rate))

test_pytrain/test_lib/test_batch.py

-27
This file was deleted.

test_pytrain/test_lib/test_dataset.py

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#
2+
# test lib.normalize
3+
#
4+
# @ author becxer
5+
6+
#
7+
from test_pytrain import test_Suite
8+
from pytrain.lib import dataset
9+
10+
class test_dataset(test_Suite):
11+
12+
def __init__(self, logging = True):
13+
test_Suite.__init__(self, logging)
14+
15+
def test_load_iris(self):
16+
iris_mat_train, iris_label_train = dataset.load_iris("sample_data/iris", "training")
17+
iris_mat_test, iris_label_test = dataset.load_iris("sample_data/iris", "testing")
18+
self.tlog("iris train data size : " + str(len(iris_mat_train)))
19+
self.tlog("iris test data size : " + str(len(iris_mat_test)))
20+
21+
def test_load_mnist(self):
22+
mnist_mat_train, mnist_label_train \
23+
= dataset.load_mnist("sample_data/mnist", "training", [0,1,2,3,4])
24+
mnist_mat_test, mnist_label_test \
25+
= dataset.load_mnist("sample_data/mnist", "testing", [0,1,2,3,4])
26+
self.tlog("mnist train data size : " + str(len(mnist_mat_train)))
27+
self.tlog("mnist test data size : " + str(len(mnist_mat_test)))
28+
29+
def test_process(self):
30+
self.test_load_iris()
31+
self.test_load_mnist()

test_pytrain/test_lib/test_fs.py

+16-21
Original file line numberDiff line numberDiff line change
@@ -13,39 +13,34 @@ class test_fs(test_Suite):
1313
def __init__(self, logging = True):
1414
test_Suite.__init__(self, logging)
1515

16-
def test_fs_f2mat(self):
17-
sample_data = "sample_data/dating/date_info.txt"
16+
def test_fs_csv_loader(self):
17+
sample_data = "sample_data/iris/iris.csv"
1818
self.tlog("loading matrix => " + sample_data)
1919

2020
dmat_train, dlabel_train, dmat_test, dlabel_test \
21-
= fs.f2mat(sample_data, 0.1)
22-
assert len(dmat_train) == 900
23-
assert len(dlabel_train) == 900
24-
assert len(dmat_test) == 100
25-
assert len(dlabel_test) == 100
21+
= fs.csv_loader(sample_data, 0.2)
2622

27-
self.set_global_value('dmat_train',dmat_train)
28-
self.set_global_value('dlabel_train',dlabel_train)
29-
self.set_global_value('dmat_test',dmat_test)
30-
self.set_global_value('dlabel_test',dlabel_test)
23+
self.tlog('iris train data size : ' + str(len(dmat_train)))
24+
self.tlog('iris test data size : ' + str(len(dmat_test)))
3125

26+
self.set_global_value('iris_mat_train',dmat_train)
27+
self.set_global_value('iris_label_train',dlabel_train)
28+
self.set_global_value('iris_mat_test',dmat_test)
29+
self.set_global_value('iris_label_test',dlabel_test)
3230

33-
def test_fs_f2wordmat(self):
34-
sample_words = "sample_data/email/email_word_small.txt"
31+
def test_fs_csv_loader_with_nlp(self):
32+
sample_words = "sample_data/email/email.csv"
3533
self.tlog("loading words => " + sample_words)
3634

37-
3835
nlp_eng = nlp("eng")
3936
wordmat_train, wordlabel_train, voca, wordmat_test, wordlabel_test \
40-
= fs.f2wordmat(sample_words, 0.1, nlp_eng)
41-
42-
assert len(voca) == 7
43-
assert len(wordmat_train) == 4
44-
assert len(wordlabel_train) == 4
37+
= fs.csv_loader_with_nlp(sample_words, 0.1, nlp_eng)
4538

39+
self.tlog('email data voca size : ' + str(len(voca)))
40+
self.tlog('voca sample : ' + str(voca[:5]))
4641

4742
def test_process(self):
48-
self.test_fs_f2mat()
49-
self.test_fs_f2wordmat()
43+
self.test_fs_csv_loader()
44+
self.test_fs_csv_loader_with_nlp()
5045
# To see test of storing module, check test_decision_tree
5146

0 commit comments

Comments
 (0)