forked from pandamt/estimation
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreate_natural_data.py
More file actions
167 lines (128 loc) · 5.06 KB
/
create_natural_data.py
File metadata and controls
167 lines (128 loc) · 5.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# Four conditions in Experiment 2
# 1) CTA and f(N)=1000 (constant)
# 2) CTA and f(N)∝1/(N^2)
# 3) CAA and f(N)=1000 (constant)
# 4) CAA and f(N)∝1/(N^2)
# where CTA denotes constant total area, CAA denotes constant average area
import sys
import numpy as np
import random
from model_settings import img_height, img_width, min_edge, max_edge, min_blobs_train, max_blobs_train, min_blobs_test, max_blobs_test # MT
def get_S(max_blobs):
"""Get the denominator of proportion of blobs with a specific number of blobs."""
S = 0.0
for i in range(1, max_blobs):
S += i ** (-2)
return S
def get_size(testing, num_blobs, max_blobs):
"""Get amount of images for each number"""
if testing:
return 1000 # even: make 1000 images for each number
else:
return int(10000*(num_blobs**(-2))/get_S(max_blobs)) # uneven distribution
def get_total(testing, min_blobs, max_blobs): # MT
"""Get total number of images."""
c = min_blobs
total = 0
while (c < max_blobs + 1):
total = total + get_size(testing, c, max_blobs)
c = c + 1
return total
def get_k(testing=False):
"""Get the constant scale factor for an individual blob."""
if testing:
return 3 # testing: k is set to 3 throughout
else:
return 3 # CAA
#return random.randint(2, 4) # CTA
# training: k is chosen from the uniform distribution of discrete values 2, 3, and 4
def get_s(testing=False, n=None):
"""Get the relative scale factor."""
if testing:
if n is not None:
return random.uniform(.8*3 * (n ** (-.5)), 1.2*3 * (n ** (-.5))) # CTA, first 500 displays
else:
return random.uniform(.8, 1.2) # CAA, latter 500 displays
else:
return random.uniform(.8, 1.2) # CAA
#return random.uniform(.8*2 * (n ** (-.3154)), 1.2*2 * (n ** (-.3154))) # CTA
def pir(x):
"""Perform probabilistic integer rounding"""
f, rem = divmod(x, 1)
retval = int(f)
if np.random.uniform() < rem:
retval = retval + 1
return retval
def get_dims(testing, i, num_blobs):
"""Get the dimensions of the ith blob."""
k1 = get_k(testing)
if testing:
k2 = k1
else:
k2 = get_k(testing)
if testing and i >= 500:
s = get_s(testing)
elif testing and i < 500: # equal total area
s = get_s(testing, num_blobs)
elif not testing:
s = get_s(False, num_blobs)
width = pir(k1 * s)
height = pir(k2 * s)
return width, height
def generate_data(testing, min_blobs, max_blobs): # MT
n_labels = max_blobs_train - min_blobs_train + 1
total = get_total(testing, min_blobs, max_blobs)
train = np.zeros([total, img_height*img_width]) # input img size
label = np.zeros([total, n_labels])
area = np.zeros(total)
#total_area_blobs = np.zeros([total, 1])
#mean_area_blobs = np.zeros([total, 1])
num_blobs = min_blobs
img_count = 0
#average_edge = []
while (num_blobs < max_blobs + 1):
nOfItem = get_size(testing, num_blobs, max_blobs) # testing: 1000; training, 10000*num_blobs**(-2)/sum(i**(-2))
i = 0 # amount of images for each blob number
#sum_edge = 0.0
#count_edge = 0.0
while (i < nOfItem):
img = np.zeros(img_height*img_width) # input img size
num_count = 0 # amount of blobs in each image
used = np.zeros((num_blobs, 4)) # check overlapping
total_area = 0.0
while num_count < num_blobs:
width, height = get_dims(testing, i, num_blobs)
cX = random.randint(1, 99-width) # top left corner
cY = random.randint(1, 99-height)
index = 0
while index < num_count:
if cX+width+1 <= used[index, 0] or used[index, 0]+1+used[index, 2] <= cX or used[index, 1]+1+used[index,3] <= cY or cY+height+1<=used[index,1]: # check for no overlapping blobs
index = index + 1
else:
cX = random.randint(1, 99-width)
cY = random.randint(1, 99-height)
index = 0
used[index, 0] = cX
used[index, 1] = cY
used[index, 2] = width
used[index, 3] = height
total_area += width * height
#sum_edge += width
#count_edge += 1.0
#sum_edge += height
#count_edge += 1.0
for p in range(cY, cY+height):
for q in range(cX, cX+width):
img[p*img_width+q] = 255
num_count += 1
train[img_count] = img
label[img_count, num_blobs - 1] = 1
area[img_count] = total_area
#total_area_blobs[img_count] = total_area
#mean_area_blobs[img_count] = total_area / num_blobs
img_count += 1
i += 1
#average_edge.append(sum_edge / count_edge)
num_blobs += 1
np.set_printoptions(threshold=np.nan)
return train, label, area