forked from opencog/python-destin
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathload_data.py
More file actions
150 lines (129 loc) · 4.63 KB
/
Copy pathload_data.py
File metadata and controls
150 lines (129 loc) · 4.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# -*- coding: utf-8 -*-
from numpy import *
import cPickle
from random import randrange
import numpy as np
cifar_dir = '/home/teddy/Desktop/Cifar/'
# cifar_dir = '/home/eskender/Destin/cifar-10-batches-py/'
# Contains loading cifar batches and
# feeding input to lower layer nodes
def read_cifar_file(fn):
fo = open(fn, 'rb')
dict = cPickle.load(fo)
fo.close()
return dict
def load_cifar(psz=4):
# file strings
filenames = ['data_batch_1', 'data_batch_2',
'data_batch_3', 'data_batch_4',
'data_batch_5', 'test_batch']
# gather data
train_data = empty((50000, 3072))
test_data = empty((10000, 3072))
train_labels = empty(50000)
test_labels = empty(10000)
start = 0
width = 10000
for file in filenames:
dic = read_cifar_file(cifar_dir + file)
if start < 50000:
train_data[start:start + width, :] = dic['data']
train_labels[start:start + width] = array(dic['labels'])
else:
test_data[:, :] = dic['data']
test_labels[:] = array(dic['labels'])
start += width
# reshape data into images
for x in range(50000):
image = train_data[x]
image.shape = (3, 32, 32)
image2 = copy(image.transpose((1, 2, 0)))
image2 = reshape(image2, (1, 3072))
train_data[x] = image2
for x in range(10000):
image = test_data[x]
image.shape = (3, 32, 32)
image2 = copy(image.transpose((1, 2, 0)))
image2 = reshape(image2, (1, 3072))
test_data[x] = image2
# set dims
train_data.shape = (50000, 32, 32, 3)
test_data.shape = (10000, 32, 32, 3)
# get random patches
patches = empty((200000, psz * psz * 3))
# psz = 4
for i in range(200000):
im = randrange(50000)
a = randrange(32 - psz)
b = randrange(32 - psz)
patch = reshape(
train_data[im, a:a + psz, b:b + psz, :], (1, psz * psz * 3))
patches[i] = patch
# get statistics
patch_mean = mean(patches, axis=0)
patch_std = std(patches, axis=0)
# zero mean and unit variance
patches = patches - patch_mean
patches = patches / patch_std
# whitening stuff using notation from:
# http://web.eecs.utk.edu/~itamar/Papers/ICMLA2012_Derek.pdf
eps = 1e-9
patch_cov = cov(patches, rowvar=0)
d, e = linalg.eig(patch_cov)
d = diag(d) + eps
v = e.dot(linalg.inv(sqrt(d))).dot(e.T)
patches = patches.dot(v)
ret = {}
# ret['train_data'] = train_data
# ret['test_data'] = test_data
# ret['train_labels'] = train_labels
# ret['test_labels'] = test_labels
ret['patch_mean'] = patch_mean
ret['patch_std'] = patch_std
ret['whiten_mat'] = v
return ret
def loadCifar(batchNum):
# For training_batches specify numbers 1 to 5
# for the test set pass 6
if batchNum <= 5:
file_name = cifar_dir + '/data_batch_' + str(batchNum)
file_id = open(file_name, 'rb')
dict = cPickle.load(file_id)
file_id.close()
return dict['data'], dict['labels']
elif batchNum == 6:
file_name = cifar_dir + '/test_batch'
file_id = open(file_name, 'rb')
dict = cPickle.load(file_id)
file_id.close()
return dict['data'], dict['labels']
else: # here we will get the whole 50,000x3072 dataset
I = 0
file_name = cifar_dir + '/data_batch_' + str(I + 1)
file_id = open(file_name, 'rb')
dict = cPickle.load(file_id)
file_id.close()
data = dict['data']
labels = dict['labels']
for I in range(1, 5):
file_name = cifar_dir + '/data_batch_' + str(I + 1)
file_id = open(file_name, 'rb')
dict = cPickle.load(file_id)
file_id.close()
data = np.concatenate((data, dict['data']), axis=0)
labels = np.concatenate((labels, dict['labels']), axis=0)
return data, labels
def return_node_input(input_, Position, Ratio, mode, image_type):
if mode == 'Adjacent': # Non overlapping or Adjacent Patches
PatchWidth = Ratio
PatchHeight = Ratio
if image_type == 'Color':
PatchDepth = 3
else:
PatchDepth = 1
Patch = input_[Position[0]:Position[0] + PatchWidth, Position[1]:Position[1] + PatchHeight].reshape(1,
PatchWidth * PatchWidth * PatchDepth)
else: # TODO Overlapping Patch could be fed to a node
print('Overlapping Patches Are Not Implemented Yet')
patch = np.array([])
return Patch