-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4681cf5
commit 53d0e6f
Showing
19 changed files
with
436 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
|
||
pretrained-model/imagenet-vgg-verydeep-19\.mat | ||
|
||
\.idea/ | ||
|
||
__pycache__/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import numpy as np | ||
|
||
class CONFIG: | ||
IMAGE_WIDTH = 400 | ||
IMAGE_HEIGHT = 300 | ||
COLOR_CHANNELS = 3 | ||
NOISE_RATIO = 0.6 | ||
MEANS = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3)) # Normalizing factor along 3 color channels | ||
VGG_MODEL = 'pretrained-model/imagenet-vgg-verydeep-19.mat' # Download from http://www.vlfeat.org/matconvnet/pretrained/ | ||
STYLE_IMAGE = 'images/400/monet.jpg' # Style image to use. | ||
CONTENT_IMAGE = 'images/400/louvre_small.jpg' # Content image to use. | ||
OUTPUT_DIR = 'output/' | ||
CONTENT_LAYER = 'conv4_2' | ||
STYLE_LAYERS = [ | ||
('conv1_1', 0.2), | ||
('conv2_1', 0.2), | ||
('conv3_1', 0.2), | ||
('conv4_1', 0.2), | ||
('conv5_1', 0.2)] | ||
ALPHA = 10 | ||
BETA = 40 | ||
LEARNING_RATE = 2.0 | ||
NUM_ITERATIONS = 200 |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
import tensorflow as tf | ||
|
||
def compute_layer_content_cost(a_C, a_G): | ||
""" | ||
Computes the content cost | ||
Arguments: | ||
a_C -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing content of the image C | ||
a_G -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing content of the image G | ||
Returns: | ||
J_content_layer -- tensor representing a scalar value | ||
""" | ||
|
||
# Retrieve dimensions from a_G | ||
m, n_H, n_W, n_C = a_G.get_shape().as_list() | ||
|
||
# Reshape a_C and a_G | ||
a_C_unrolled = tf.reshape(tf.transpose(a_C), shape=[n_C, n_H * n_W]) | ||
a_G_unrolled = tf.reshape(tf.transpose(a_G), shape=[n_C, n_H * n_W]) | ||
|
||
# compute the cost with tensorflow | ||
J_content_layer = (1 / (4 * n_H * n_W * n_C)) * tf.reduce_sum(tf.square(tf.subtract(a_C_unrolled, a_G_unrolled))) | ||
|
||
return J_content_layer | ||
|
||
|
||
def compute_content_cost(sess, model, CONTENT_LAYER): | ||
""" | ||
Computes the overall content cost from chosen layer | ||
Arguments: | ||
sess -- interactive session | ||
model -- our tensorflow model | ||
CONTENT_LAYER -- string containing layer name | ||
Returns: | ||
J_content -- tensor representing a scalar value | ||
""" | ||
|
||
# Select the output tensor of the currently selected layer | ||
out = model[CONTENT_LAYER] | ||
|
||
# Set a_C to be the hidden layer activation from the layer selected | ||
a_C = sess.run(out) | ||
|
||
# Set a_G to be the hidden layer activation from same layer. Here, a_G references model[layer_name] | ||
# and isn't evaluated yet. Activations will be drawn from the appropriate layer, with G as input. | ||
a_G = out | ||
|
||
# Compute the content cost | ||
J_content = compute_layer_content_cost(a_C, a_G) | ||
|
||
return J_content | ||
|
||
def gram_matrix(A): | ||
""" | ||
Argument: | ||
A -- matrix of shape (n_C, n_H*n_W) | ||
Returns: | ||
GA -- Gram matrix of A, of shape (n_C, n_C) | ||
""" | ||
|
||
GA = tf.matmul(A, tf.transpose(A)) | ||
|
||
return GA | ||
|
||
|
||
def compute_layer_style_cost(a_S, a_G): | ||
""" | ||
Arguments: | ||
a_S -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image S | ||
a_G -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image G | ||
Returns: | ||
J_style_layer -- tensor representing a scalar value | ||
""" | ||
|
||
# Retrieve dimensions from a_G | ||
m, n_H, n_W, n_C = a_G.get_shape().as_list() | ||
|
||
# Reshape the images to have them of shape (n_C, n_H*n_W) | ||
a_S = tf.reshape(tf.transpose(a_S), shape=[n_C, n_H * n_W]) | ||
a_G = tf.reshape(tf.transpose(a_G), shape=[n_C, n_H * n_W]) | ||
|
||
# Computing gram_matrices for both images S and G | ||
GS = gram_matrix(a_S) | ||
GG = gram_matrix(a_G) | ||
|
||
# Computing the loss | ||
J_style_layer = (1 / (4 * n_C * n_C * n_H * n_H * n_W * n_W)) * tf.reduce_sum(tf.square(tf.subtract(GS, GG))) | ||
|
||
return J_style_layer | ||
|
||
|
||
def compute_style_cost(sess, model, STYLE_LAYERS): | ||
""" | ||
Computes the overall style cost from several chosen layers | ||
Arguments: | ||
sess -- interactive session | ||
model -- our tensorflow model | ||
STYLE_LAYERS -- A python list containing: | ||
- the names of the layers to extract style from | ||
- a coefficient for each of them | ||
Returns: | ||
J_style -- tensor representing a scalar value | ||
""" | ||
|
||
# initialize the overall style cost | ||
J_style = 0 | ||
|
||
for layer_name, coeff in STYLE_LAYERS: | ||
# Select the output tensor of the currently selected layer | ||
out = model[layer_name] | ||
|
||
# Set a_S to be the hidden layer activation from the layer selected, by running the session on out | ||
a_S = sess.run(out) | ||
|
||
# Set a_G to be the hidden layer activation from same layer. Here, a_G references model[layer_name] | ||
# and isn't evaluated yet. Activations will be drawn from the appropriate layer, with G as input. | ||
a_G = out | ||
|
||
# Compute style_cost for the current layer | ||
J_style_layer = compute_layer_style_cost(a_S, a_G) | ||
|
||
# Add coeff * J_style_layer of this layer to overall style cost | ||
J_style += coeff * J_style_layer | ||
|
||
return J_style | ||
|
||
|
||
def total_cost(J_content, J_style, alpha=10, beta=40): | ||
""" | ||
Computes the total cost function | ||
Arguments: | ||
J_content -- content cost coded above | ||
J_style -- style cost coded above | ||
alpha -- hyperparameter weighting the importance of the content cost | ||
beta -- hyperparameter weighting the importance of the style cost | ||
Returns: | ||
J -- total cost as defined by the formula above. | ||
""" | ||
|
||
J = (alpha * J_content) + (beta * J_style) | ||
|
||
return J | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import scipy.misc | ||
import tensorflow as tf | ||
|
||
from nst_utils import load_vgg_model, reshape_and_normalize_image, generate_noise_image, save_image | ||
from nst_app_utils import compute_content_cost, compute_style_cost, total_cost | ||
from config import CONFIG | ||
|
||
# Step 1: Create an interactive session | ||
tf.reset_default_graph() | ||
sess = tf.InteractiveSession() | ||
|
||
# Step 2: Load the content image | ||
content_image = scipy.misc.imread(CONFIG.CONTENT_IMAGE) | ||
content_image = reshape_and_normalize_image(content_image) | ||
|
||
# Step 3: Load the style image | ||
style_image = scipy.misc.imread(CONFIG.STYLE_IMAGE) | ||
style_image = reshape_and_normalize_image(style_image) | ||
|
||
# Step 4: Randomly initialize the image to be generated | ||
generated_image = generate_noise_image(content_image) | ||
|
||
# Step 5: Load the VGG16 model | ||
model = load_vgg_model(CONFIG.VGG_MODEL) | ||
|
||
# Step 6: Build the tensorflow graph | ||
# Step 6a: Run the content image through VGG16 model and compute content cost | ||
sess.run(model['input'].assign(content_image)) | ||
J_content = compute_content_cost(sess, model, CONFIG.CONTENT_LAYER) | ||
|
||
# Step 6b: Run the style image through VGG16 model and compute style cost | ||
sess.run(model['input'].assign(style_image)) | ||
J_style = compute_style_cost(sess, model, CONFIG.STYLE_LAYERS) | ||
|
||
# Step 6c: Compute the total cost | ||
J = total_cost(J_content, J_style, alpha = CONFIG.ALPHA, beta = CONFIG.BETA) | ||
|
||
# Step 6d: Define the optimizer and learning rate | ||
optimizer = tf.train.AdamOptimizer(CONFIG.LEARNING_RATE) | ||
train_step = optimizer.minimize(J) | ||
|
||
# Step 7: Run graph for a large number of iterations, updating the generated image at every step | ||
# Initialize global variable | ||
sess.run(tf.global_variables_initializer()) | ||
|
||
# Run the noisy initial generated image through the model. | ||
sess.run(model['input'].assign(generated_image)) | ||
|
||
for i in range(CONFIG.NUM_ITERATIONS): | ||
|
||
# Run the session on the train_step to minimize the total cost | ||
sess.run(train_step) | ||
|
||
# Compute the generated image by running the session on the current model['input'] | ||
generated_image = sess.run(model['input']) | ||
|
||
# Print every 20 iteration. | ||
if i % 20 == 0: | ||
Jt, Jc, Js = sess.run([J, J_content, J_style]) | ||
print("Iteration " + str(i) + " :") | ||
print("total cost = " + str(Jt)) | ||
print("content cost = " + str(Jc)) | ||
print("style cost = " + str(Js)) | ||
|
||
# save current generated image in the "/output" directory | ||
save_image("output/" + str(i) + ".png", generated_image) | ||
|
||
# save last generated image | ||
save_image('output/generated_image.jpg', generated_image) |
Oops, something went wrong.