forked from opencog/python-destin
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cifar_old.py
107 lines (88 loc) · 2.92 KB
/
cifar_old.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
from numpy import *
import cPickle
import scipy.io as io
from random import randrange
def read_cifar_file(fn):
fo = open(fn, 'rb')
dict = cPickle.load(fo)
fo.close()
return dict
def load_cifar(psz=4):
# file strings
# /home/teddy/Desktop/PyDeSTIN/cifar-10-batches-py
cifar_dir = '/home/teddy/Desktop/PyDeSTIN/cifar-10-batches-py/'
# cifar_dir = '/home/syoung22/Data/cifar-10-batches-py/'
filenames = ['data_batch_1', 'data_batch_2',
'data_batch_3', 'data_batch_4',
'data_batch_5', 'test_batch']
# gather data
train_data = empty((50000, 3072))
test_data = empty((10000, 3072))
train_labels = empty(50000)
test_labels = empty(10000)
start = 0
width = 10000
for file in filenames:
dic = read_cifar_file(cifar_dir + file)
if start < 50000:
train_data[start:start + width, :] = dic['data']
train_labels[start:start + width] = array(dic['labels'])
else:
test_data[:, :] = dic['data']
test_labels[:] = array(dic['labels'])
start += width
# reshape data into images
for x in range(50000):
image = train_data[x]
image.shape = (3, 32, 32)
image2 = copy(image.transpose((1, 2, 0)))
image2 = reshape(image2, (1, 3072))
train_data[x] = image2
for x in range(10000):
image = test_data[x]
image.shape = (3, 32, 32)
image2 = copy(image.transpose((1, 2, 0)))
image2 = reshape(image2, (1, 3072))
test_data[x] = image2
# set dims
train_data.shape = (50000, 32, 32, 3)
test_data.shape = (10000, 32, 32, 3)
# get random patches
patches = empty((200000, psz * psz * 3))
# psz = 4
for i in range(200000):
im = randrange(50000)
a = randrange(32 - psz)
b = randrange(32 - psz)
patch = reshape(
train_data[im, a:a + psz, b:b + psz, :], (1, psz * psz * 3))
patches[i] = patch
# get statistics
patch_mean = mean(patches, axis=0)
patch_std = std(patches, axis=0)
# zero mean and unit variance
patches = patches - patch_mean
patches = patches / patch_std
# whitening stuff using notation from:
# http://web.eecs.utk.edu/~itamar/Papers/ICMLA2012_Derek.pdf
eps = 1e-9
patch_cov = cov(patches, rowvar=0)
d, e = linalg.eig(patch_cov)
d = diag(d) + eps
v = e.dot(linalg.inv(sqrt(d))).dot(e.T)
patches = patches.dot(v)
ret = {}
# ret['train_data'] = train_data
# ret['test_data'] = test_data
# ret['train_labels'] = train_labels
# ret['test_labels'] = test_labels
ret['patch_mean'] = patch_mean
ret['patch_std'] = patch_std
ret['whiten_mat'] = v
return ret
# vts = {}
# vts['images'] = patches
# io.savemat('/home/syoung22/Data/cifar.mat',vts)
if __name__ == '__main__':
load_cifar()