Tensorflow-Style-Transfer/mask_style_transfer.py at master · aefernandes/Tensorflow-Style-Transfer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
"""
Tensorflow Implementation of Gradient Mask based Style Transfer

Academic Paper:
Author: Ying Chan
GitHub: https://github.com/yc5915/Tensorflow-Implementations/

"""

import skimage.io
import skimage.transform
import tensorflow as tf
import numpy as np

"""
Model Creation Functions
"""

def VGG19(img, vgg_path='vgg19.npy'):
    """
    Description:
        Creates a pre-trained VGG19 model without the Fully Connected Layers.
        This allows the input image to have a variable size.

        link to download vgg19.npy @ https://github.com/machrisaa/tensorflow-vgg

    Args:
        img: TF Tensor with shape [N,H,W,C]. Values should be in range 0.0-1.0
        vgg_path: path to vgg19.npy

    Returns:
        vgg: dictionary of layers in VGG19 model
    """

    # Scale img to 0.0-255.0 and split into R, G, B channels
    red, green, blue = tf.split(img * 255.0, num_or_size_splits=3, axis=3)

    # Normalize the input image by subtracting mean
    # Gather the channels in B, G, R order
    vgg_mean = [103.939, 116.779, 123.68]
    bgr = tf.concat(axis=3, values=[
        blue - vgg_mean[0],
        green - vgg_mean[1],
        red - vgg_mean[2],
    ])

    # Load pre-trained VGG19 parameters
    vgg_params = np.load(vgg_path, encoding='latin1').item()

    # Note that we do not include FC layers
    vgg_layers = ["conv1_1", "conv1_2", "pool1",
              "conv2_1", "conv2_2", "pool2",
              "conv3_1", "conv3_2", "conv3_3", "conv3_4", "pool3",
              "conv4_1", "conv4_2", "conv4_3", "conv4_4", "pool4",
              "conv5_1", "conv5_2", "conv5_3", "conv5_4", "pool5"]

    # Dictionary to store the layers
    vgg = {}

    # Create VGG19 model and load pre-trained parameters
    curr = bgr
    for layer in vgg_layers:
        if layer[:4] == "conv":
            curr = tf.nn.conv2d(curr, filter=vgg_params[layer][0],
                                     strides=[1, 1, 1, 1], padding='SAME')
            curr += vgg_params[layer][1]
            curr = tf.nn.relu(curr)
        elif layer[:4] == "pool":
            curr = tf.nn.max_pool(curr, ksize=[1, 2, 2, 1],
                                  strides=[1, 2, 2, 1], padding='SAME')
        vgg[layer] = curr

    return vgg


def FeatureLayer(layer):
    """
    Description:
        Flattens 2D images into 1D vectors

    Args:
        layer: TF Tensor with shape [N,H,W,C] (expecting N=1 image)

    Returns:
        Flattened layer with shape [D,C] where:
            D = NxHxW   No. of Features in a Column Vector
            C           Number of Column Vectors
    """
    shape = layer.get_shape().as_list()
    return tf.reshape(layer, shape=[-1, shape[3]])


def StyleLayer(layer):
    """
    Description:
        Calculates a Gramm Matrix from Array of Column Vectors
        (uses all Features in a Column Vector)

    Args:
        layer: TF Tensor with shape [D,C] (expecting FeatureLayer output)

    Returns:
        Gramm Matrix with shape [C,C]
    """
    return tf.matmul(a=layer, b=layer, transpose_a=True)


def StyleLayerMask(layer, mask):
    """
    Description:
        Multiplies the mask with the Features from Array of Column Vectors before
        calculating a Gramm Matrix

    Args:
        layer: TF Tensor with shape [D,C] (expecting FeatureLayer output)
        mask: array with shape [D,1]. Values should be in range 0.0-1.0
                where 0 = Mask, 1 = Not Masked

    Returns:
        Gramm Matrixs with shape [C,C]
    """
    # mask_w has shape [D,1]
    mask_w = tf.constant(mask, dtype=tf.float32)

    # layer_masked has shape [D,C]
    layer_masked = tf.multiply(layer, mask_w)

    # Calculate Gramm Matrix with masked Features
    return StyleLayer(layer_masked)


"""
Helper Functions
"""
def NHWC(img):
    """
    Description:
        Transforms image with shape [H,W,C] to [N,H,W,C] where N = 1
    """
    return np.expand_dims(a=img, axis=0).astype(np.float32)

"""
Parameters
"""
# Define layers of VGG19 model to use for Content and Style
content_layers = ["conv4_2"]
style_layers = ["conv2_1","conv3_1"]

# Load images & scale values from 0.0-255.0 to 0.0-1.0
content_img = skimage.io.imread("images\\lion.jpg") / 255.0
style_img = skimage.io.imread("images\\wave.jpg") / 255.0


# Load gradient mask. We apply the mask to the content image
mask = skimage.io.imread("images\\gradient_mask.jpg") / 255.0

# we turn the image into grayscale with values 0.0-1.0 by reducing the RGB
# channels into 1 channel by taking calculating the euclidean "distance"
# before dividing by the maximum possible distance sqrt(1^2 + 1^2 + 1^2)
mask = np.linalg.norm(mask,axis=2) / np.sqrt(3)


"""
Resize Mask
"""
print("Resizing Mask for Content Image")

# Dictionary to store the flattened masks
flat_masks = {}

# unique no. of times layers in style layers have been pooled
# e.g. both conv2_1 and conv2_2 have been pooled only once
pool_layers = set([int(layer[4]) for layer in style_layers])


mask_shape = np.array(mask.shape)
for pool in range(1,6):
    if pool in pool_layers:
        # Resize the mask and flatten it
        flat_masks[pool] = skimage.transform.resize(mask, mask_shape).reshape([-1,1])

    # resize the mask the same way as VGG19's max pool layers
    mask_shape[0:2] = (mask_shape[0:2] + 1) // 2


"""
Evaluate Content and Style Image
"""
print("Building Model to Evaluate Content & Style Image")

tf.reset_default_graph()

# Build a Model with variable sized image
img = tf.placeholder(tf.float32, shape=[1,None,None,3])
vgg = VGG19(tf.Variable(img, dtype=tf.float32, validate_shape=False))

# for every layer in style and content layers, create a feature layer
for layer in set([*content_layers, *style_layers]):
    vgg["feat_" + layer] = FeatureLayer(vgg[layer])

# for every layer in style layers, create a style layer
for layer in style_layers:
    pool = int(layer[4])
    # we don't apply the mask here because we want the style of the full style image
    vgg["style_" + layer] = StyleLayer(vgg["feat_" + layer])


# Dictionaries to store Content and Style Layer outputs
content = {}
style = {}

with tf.Session() as sess:
    print("Evaluating Content Image")
    # initialize img as Content Image
    sess.run(tf.global_variables_initializer(), feed_dict={img: NHWC(content_img)})
    for layer in content_layers:
        name = "feat_" + layer
        content[name] = vgg[name].eval()

    print("Evaluating Style Image")
    # initialize img as Style Image
    sess.run(tf.global_variables_initializer(), feed_dict={img: NHWC(style_img)})
    for layer in style_layers:
        name = "style_" + layer
        style[name] = vgg[name].eval()


"""
Style Transfer Model
"""
print("Building Model to Transfer Style")

tf.reset_default_graph()

# Build a Model with Content Image (alternatively can use random noise)
# We use Variable instead of placeholder as we wish to make use of Tensorflow's optimizers
img = tf.Variable(NHWC(content_img))
vgg = VGG19(img)

# for every layer in style and content layers, create a feature layer
for layer in set([*content_layers, *style_layers]):
    vgg["feat_" + layer] = FeatureLayer(vgg[layer])

# for every layer in style layers and for every region, create a style layer
for layer in style_layers:
    pool = int(layer[4])
    vgg["style_" + layer] = StyleLayerMask(vgg["feat_" + layer], flat_masks[pool])


"""
Loss Function
"""

print("Creating Loss Function")

content_loss = 0
for layer in content_layers:
    name = "feat_" + layer
    content_loss += tf.reduce_mean(tf.square(content[name] - vgg[name]))

content_loss /= len(content_layers) # each content layer has equal weight


style_loss = 0
for layer in style_layers:
    name = "style_" + layer
    style_loss += tf.reduce_mean(tf.square(style[name] - vgg[name]))

style_loss /= len(style_layers) # each style layer has equal weight


# Total variation loss (encourages adjacent pixels to be similar color)
tv_loss = tf.reduce_mean(((img[:,1:,:-1,:] - img[:,:-1,:-1,:])**2 + (img[:,:-1,1:,:] - img[:,:-1,:-1,:])**2)**1.25)


loss = 5*content_loss + 25*style_loss + 1*tv_loss


"""
Backprop
"""
print("Stylising")

# Backpropagate error to img using scipy's L-BFGS optimizer
train_step =tf.contrib.opt.ScipyOptimizerInterface(loss,
                                                   var_list=[img],
                                                   options={"maxiter":200, "disp" : True})

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    train_step.minimize(sess,
                        fetches=[loss],
                        loss_callback=lambda x : print("loss: %f"%x))

    # img is TF Tensor with shape [N,H,W,C]. Discard the first dimension
    stylised_img = img.eval()[0]

    # Clip the values to 0.0-1.0
    stylised_img = np.clip(stylised_img, 0, 1)

    # Show img
    skimage.io.imshow(stylised_img)