v1.0

tejaslodaya · Nov 28, 2017 · 53d0e6f · 53d0e6f
1 parent 4681cf5
commit 53d0e6f
Show file tree

Hide file tree

Showing 19 changed files with 436 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,6 @@
+
+pretrained-model/imagenet-vgg-verydeep-19\.mat
+
+\.idea/
+
+__pycache__/
diff --git a/config.py b/config.py
@@ -0,0 +1,23 @@
+import numpy as np
+
+class CONFIG:
+    IMAGE_WIDTH = 400
+    IMAGE_HEIGHT = 300
+    COLOR_CHANNELS = 3
+    NOISE_RATIO = 0.6
+    MEANS = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3)) # Normalizing factor along 3 color channels
+    VGG_MODEL = 'pretrained-model/imagenet-vgg-verydeep-19.mat' # Download from http://www.vlfeat.org/matconvnet/pretrained/
+    STYLE_IMAGE = 'images/400/monet.jpg' # Style image to use.
+    CONTENT_IMAGE = 'images/400/louvre_small.jpg' # Content image to use.
+    OUTPUT_DIR = 'output/'
+    CONTENT_LAYER = 'conv4_2'
+    STYLE_LAYERS = [
+        ('conv1_1', 0.2),
+        ('conv2_1', 0.2),
+        ('conv3_1', 0.2),
+        ('conv4_1', 0.2),
+        ('conv5_1', 0.2)]
+    ALPHA = 10
+    BETA = 40
+    LEARNING_RATE = 2.0
+    NUM_ITERATIONS = 200
diff --git a/images/300/claude-monet.jpg b/images/300/claude-monet.jpg
diff --git a/images/300/content300.jpg b/images/300/content300.jpg
diff --git a/images/300/persian_cat_content.jpg b/images/300/persian_cat_content.jpg
diff --git a/images/300/stone_style.jpg b/images/300/stone_style.jpg
diff --git a/images/300/style300.jpg b/images/300/style300.jpg
diff --git a/images/400/camp-nou.jpg b/images/400/camp-nou.jpg
diff --git a/images/400/cat.jpg b/images/400/cat.jpg
diff --git a/images/400/drop-of-water.jpg b/images/400/drop-of-water.jpg
diff --git a/images/400/louvre_small.jpg b/images/400/louvre_small.jpg
diff --git a/images/400/monet.jpg b/images/400/monet.jpg
diff --git a/images/400/sandstone.jpg b/images/400/sandstone.jpg
diff --git a/images/800/content.jpeg b/images/800/content.jpeg
diff --git a/images/800/louvre.jpg b/images/800/louvre.jpg
diff --git a/images/800/monet_800600.jpg b/images/800/monet_800600.jpg
diff --git a/nst_app_utils.py b/nst_app_utils.py
@@ -0,0 +1,152 @@
+import tensorflow as tf
+
+def compute_layer_content_cost(a_C, a_G):
+    """
+    Computes the content cost
+
+    Arguments:
+    a_C -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing content of the image C
+    a_G -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing content of the image G
+
+    Returns:
+    J_content_layer -- tensor representing a scalar value
+    """
+
+    # Retrieve dimensions from a_G
+    m, n_H, n_W, n_C = a_G.get_shape().as_list()
+
+    # Reshape a_C and a_G
+    a_C_unrolled = tf.reshape(tf.transpose(a_C), shape=[n_C, n_H * n_W])
+    a_G_unrolled = tf.reshape(tf.transpose(a_G), shape=[n_C, n_H * n_W])
+
+    # compute the cost with tensorflow
+    J_content_layer = (1 / (4 * n_H * n_W * n_C)) * tf.reduce_sum(tf.square(tf.subtract(a_C_unrolled, a_G_unrolled)))
+
+    return J_content_layer
+
+
+def compute_content_cost(sess, model, CONTENT_LAYER):
+    """
+    Computes the overall content cost from chosen layer
+
+    Arguments:
+    sess -- interactive session
+    model -- our tensorflow model
+    CONTENT_LAYER -- string containing layer name
+
+    Returns:
+    J_content -- tensor representing a scalar value
+    """
+
+    # Select the output tensor of the currently selected layer
+    out = model[CONTENT_LAYER]
+
+    # Set a_C to be the hidden layer activation from the layer selected
+    a_C = sess.run(out)
+
+    # Set a_G to be the hidden layer activation from same layer. Here, a_G references model[layer_name]
+    # and isn't evaluated yet. Activations will be drawn from the appropriate layer, with G as input.
+    a_G = out
+
+    # Compute the content cost
+    J_content = compute_layer_content_cost(a_C, a_G)
+
+    return J_content
+
+def gram_matrix(A):
+    """
+    Argument:
+    A -- matrix of shape (n_C, n_H*n_W)
+
+    Returns:
+    GA -- Gram matrix of A, of shape (n_C, n_C)
+    """
+
+    GA = tf.matmul(A, tf.transpose(A))
+
+    return GA
+
+
+def compute_layer_style_cost(a_S, a_G):
+    """
+    Arguments:
+    a_S -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image S
+    a_G -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image G
+
+    Returns:
+    J_style_layer -- tensor representing a scalar value
+    """
+
+    # Retrieve dimensions from a_G
+    m, n_H, n_W, n_C = a_G.get_shape().as_list()
+
+    # Reshape the images to have them of shape (n_C, n_H*n_W)
+    a_S = tf.reshape(tf.transpose(a_S), shape=[n_C, n_H * n_W])
+    a_G = tf.reshape(tf.transpose(a_G), shape=[n_C, n_H * n_W])
+
+    # Computing gram_matrices for both images S and G
+    GS = gram_matrix(a_S)
+    GG = gram_matrix(a_G)
+
+    # Computing the loss
+    J_style_layer = (1 / (4 * n_C * n_C * n_H * n_H * n_W * n_W)) * tf.reduce_sum(tf.square(tf.subtract(GS, GG)))
+
+    return J_style_layer
+
+
+def compute_style_cost(sess, model, STYLE_LAYERS):
+    """
+    Computes the overall style cost from several chosen layers
+
+    Arguments:
+    sess -- interactive session
+    model -- our tensorflow model
+    STYLE_LAYERS -- A python list containing:
+                        - the names of the layers to extract style from
+                        - a coefficient for each of them
+
+    Returns:
+    J_style -- tensor representing a scalar value
+    """
+
+    # initialize the overall style cost
+    J_style = 0
+
+    for layer_name, coeff in STYLE_LAYERS:
+        # Select the output tensor of the currently selected layer
+        out = model[layer_name]
+
+        # Set a_S to be the hidden layer activation from the layer selected, by running the session on out
+        a_S = sess.run(out)
+
+        # Set a_G to be the hidden layer activation from same layer. Here, a_G references model[layer_name]
+        # and isn't evaluated yet. Activations will be drawn from the appropriate layer, with G as input.
+        a_G = out
+
+        # Compute style_cost for the current layer
+        J_style_layer = compute_layer_style_cost(a_S, a_G)
+
+        # Add coeff * J_style_layer of this layer to overall style cost
+        J_style += coeff * J_style_layer
+
+    return J_style
+
+
+def total_cost(J_content, J_style, alpha=10, beta=40):
+    """
+    Computes the total cost function
+
+    Arguments:
+    J_content -- content cost coded above
+    J_style -- style cost coded above
+    alpha -- hyperparameter weighting the importance of the content cost
+    beta -- hyperparameter weighting the importance of the style cost
+
+    Returns:
+    J -- total cost as defined by the formula above.
+    """
+
+    J = (alpha * J_content) + (beta * J_style)
+
+    return J
+
diff --git a/nst_main.py b/nst_main.py
@@ -0,0 +1,69 @@
+import scipy.misc
+import tensorflow as tf
+
+from nst_utils import load_vgg_model, reshape_and_normalize_image, generate_noise_image, save_image
+from nst_app_utils import compute_content_cost, compute_style_cost, total_cost
+from config import CONFIG
+
+# Step 1: Create an interactive session
+tf.reset_default_graph()
+sess = tf.InteractiveSession()
+
+# Step 2: Load the content image
+content_image = scipy.misc.imread(CONFIG.CONTENT_IMAGE)
+content_image = reshape_and_normalize_image(content_image)
+
+# Step 3: Load the style image
+style_image = scipy.misc.imread(CONFIG.STYLE_IMAGE)
+style_image = reshape_and_normalize_image(style_image)
+
+# Step 4: Randomly initialize the image to be generated
+generated_image = generate_noise_image(content_image)
+
+# Step 5: Load the VGG16 model
+model = load_vgg_model(CONFIG.VGG_MODEL)
+
+# Step 6: Build the tensorflow graph
+# Step 6a: Run the content image through VGG16 model and compute content cost
+sess.run(model['input'].assign(content_image))
+J_content = compute_content_cost(sess, model, CONFIG.CONTENT_LAYER)
+
+# Step 6b: Run the style image through VGG16 model and compute style cost
+sess.run(model['input'].assign(style_image))
+J_style = compute_style_cost(sess, model, CONFIG.STYLE_LAYERS)
+
+# Step 6c: Compute the total cost
+J = total_cost(J_content, J_style, alpha = CONFIG.ALPHA, beta = CONFIG.BETA)
+
+# Step 6d: Define the optimizer and learning rate
+optimizer = tf.train.AdamOptimizer(CONFIG.LEARNING_RATE)
+train_step = optimizer.minimize(J)
+
+# Step 7: Run graph for a large number of iterations, updating the generated image at every step
+# Initialize global variable
+sess.run(tf.global_variables_initializer())
+
+# Run the noisy initial generated image through the model.
+sess.run(model['input'].assign(generated_image))
+
+for i in range(CONFIG.NUM_ITERATIONS):
+
+    # Run the session on the train_step to minimize the total cost
+    sess.run(train_step)
+
+    # Compute the generated image by running the session on the current model['input']
+    generated_image = sess.run(model['input'])
+
+    # Print every 20 iteration.
+    if i % 20 == 0:
+        Jt, Jc, Js = sess.run([J, J_content, J_style])
+        print("Iteration " + str(i) + " :")
+        print("total cost = " + str(Jt))
+        print("content cost = " + str(Jc))
+        print("style cost = " + str(Js))
+
+        # save current generated image in the "/output" directory
+        save_image("output/" + str(i) + ".png", generated_image)
+
+# save last generated image
+save_image('output/generated_image.jpg', generated_image)