Possible to evaluate on LRR-4x with different flow methods.

dlsrbgg33 · Jun 12, 2018 · 4ed4a3d · 4ed4a3d
1 parent 1f17916
commit 4ed4a3d
Show file tree

Hide file tree

Showing 13 changed files with 1,947 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -1,13 +1,25 @@
 ## Semantic Video Segmentation by Gated Recurrent Flow Propagation
-This repo will contain the code for the CVPR 2018 paper "Semantic Video Segmentation by Gated Recurrent Flow Propagation" by David Nilsson and Cristian Sminchisescu [[pdf]](http://openaccess.thecvf.com/content_cvpr_2018/papers/Nilsson_Semantic_Video_Segmentation_CVPR_2018_paper.pdf)
+This repo contains the code for the CVPR 2018 paper "Semantic Video Segmentation by Gated Recurrent Flow Propagation" by David Nilsson and Cristian Sminchisescu. [[pdf]](http://openaccess.thecvf.com/content_cvpr_2018/papers/Nilsson_Semantic_Video_Segmentation_CVPR_2018_paper.pdf)
 
 ### Setup
 
-Not available yet.
+Check config.py. Download all data from the cityscapes dataset and change the paths in config.py. Check that you can run python config.py without any errors.
+
+Run misc/compile.sh to compile the bilinear warping operator. Change the include directory on line 9 if you get errors related to libcudart.
+
+Run misc/download_pretrained_models.sh to download the models used in the paper.
 
 ### Usage
 
-Not available yet.
+Reproduce the values in table 9 by running the following. It takes about 4 hours on a titan X GPU.
+```
+python -u evaluate.py --static lrr --flow flownet1 2>&1 | tee logs/log_flownet1.txt
+python -u evaluate.py --static lrr --flow flownet2 2>&1 | tee logs/log_flownet2.txt
+python -u evaluate.py --static lrr --flow farneback 2>&1 | tee logs/log_farneback.txt
+python -u evaluate.py --static lrr --flow farneback --frames 1 2>&1 | tee logs/log_lrr_static.txt
+```
+
+Evaluation on PSP and Dilation10 as well as code for training will be added soon.
 
 ### Citation
 If you use the code in your own research, please cite
@@ -20,3 +32,5 @@ month = {June},
 year = {2018}
 }
 ```
+
+Depending on the setup you use, consider also citing PSP, LRR, Dilation, Flownet1, Flownet2 or Farnebäck.
diff --git a/config.py b/config.py
@@ -0,0 +1,20 @@
+import os
+# See "https://www.cityscapes-dataset.com/downloads/" for the zip files referenced below
+
+# Where "gtFine_trainvaltest.zip" is unpacked
+cityscapes_dir = ''
+
+# Where "leftImg8bit_sequence_trainvaltest.zip" is unpacked. May be the same path as above.
+cityscapes_video_dir = ''
+
+# Where "https://github.com/mcordts/cityscapesScripts" is unpacked
+cityscapes_scripts_root = os.path.join(cityscapes_dir, 'scripts')
+
+example_im = os.path.join(cityscapes_dir, 'gtFine', 'train', 'aachen', 'aachen_000000_000019_gtFine_labelIds.png')
+assert os.path.isfile(example_im), "The CityScapes root directory is incorrect. Could not find %s" % (example_im)
+
+example_im = os.path.join(cityscapes_video_dir, 'leftImg8bit_sequence', 'train', 'aachen', 'aachen_000000_000000_leftImg8bit.png')
+assert os.path.isfile(example_im), "The CityScapes video root directory is incorrect. Could not find %s" % (example_im)
+
+file = os.path.join(cityscapes_scripts_root, 'evaluation', 'evalPixelLevelSemanticLabeling.py')
+assert os.path.isfile(file), "Could not find the evaluation script %s" % file
diff --git a/evaluate.py b/evaluate.py
@@ -0,0 +1,150 @@
+import argparse, glob, os, cv2, sys, pickle
+import numpy as np
+import tensorflow as tf
+import config as cfg
+from models.stgru import STGRU
+from models.lrr import LRR
+from models.flownet2 import Flownet2
+from models.flownet1 import Flownet1
+from tensorflow.python.framework import ops
+
+sys.path.insert(0, os.path.join(cfg.cityscapes_scripts_root, 'evaluation'))
+import evalPixelLevelSemanticLabeling
+
+bilinear_warping_module = tf.load_op_library('./misc/bilinear_warping.so')
+@ops.RegisterGradient("BilinearWarping")
+def _BilinearWarping(op, grad):
+  return bilinear_warping_module.bilinear_warping_grad(grad, op.inputs[0], op.inputs[1])
+
+def evaluate(args):
+    data_split = 'val'
+    nbr_classes = 19
+    im_size = [1024, 2048]
+    image_mean = [72.39,82.91,73.16] # the mean is automatically subtracted in some modules e.g. flownet2, so be careful
+
+    f = open('misc/cityscapes_labels.pckl')
+    cs_id2trainid, cs_id2name = pickle.load(f)
+    f.close()
+
+    assert args.static == 'lrr', "Only LRR is supported for now."
+
+    if args.flow == 'flownet2':
+        with tf.variable_scope('flow'):
+            flow_network = Flownet2(bilinear_warping_module)
+            flow_img0 = tf.placeholder(tf.float32)
+            flow_img1 = tf.placeholder(tf.float32)
+            flow_tensor = flow_network(flow_img0, flow_img1, flip=True)
+    elif args.flow == 'flownet1':
+        with tf.variable_scope('flow'):
+            flow_network = Flownet1()
+            flow_img0 = tf.placeholder(tf.float32)
+            flow_img1 = tf.placeholder(tf.float32)
+            flow_tensor = flow_network.get_output_tensor(flow_img0, flow_img1, im_size)
+
+    RNN = STGRU([nbr_classes, im_size[0], im_size[1]], [7, 7], bilinear_warping_module)
+
+    input_images_tensor, input_flow, \
+        input_segmentation, prev_h, new_h, \
+        prediction = RNN.get_one_step_predictor()
+
+    static_input = tf.placeholder(tf.float32)
+    static_network = LRR()
+    static_output = static_network(static_input)
+
+    saver = tf.train.Saver([k for k in tf.global_variables() if not k.name.startswith('flow/')])
+    if args.flow in ['flownet1', 'flownet2']:
+        saver_fn = tf.train.Saver([k for k in tf.global_variables() if k.name.startswith('flow/')])
+
+    init = tf.global_variables_initializer()
+    with tf.Session() as sess:
+        #sess.run(init)
+
+        saver.restore(sess, './checkpoints/lrr_grfp')
+
+        if args.flow == 'flownet1':
+            #flow_network.load_parameters(sess)
+            #saver_fn.save(sess, './checkpoints/flownet1')
+            saver_fn.restore(sess, './checkpoints/flownet1')
+        elif args.flow == 'flownet2':
+            saver_fn.restore(sess, './checkpoints/flownet2')
+
+        #saver.save(sess, './checkpoints/lrr_grfp')
+
+        L = glob.glob(os.path.join(cfg.cityscapes_dir, 'gtFine', data_split, "*", "*labelIds.png"))
+        for (progress_counter, im_path) in enumerate(L):
+            parts = im_path.split('/')[-1].split('_')
+            city, seq, frame = parts[0], parts[1], parts[2]
+
+            print("Processing sequence %d/%d" % (progress_counter+1, len(L)))
+            for dt in range(-args.frames + 1, 1):
+                first_frame = dt == -args.frames + 1
+                t = int(frame) + dt
+
+                frame_path = os.path.join(cfg.cityscapes_video_dir, 'leftImg8bit_sequence', data_split, 
+                        city, ("%s_%s_%06d_leftImg8bit.png" % (city, seq, t)))
+                im = cv2.imread(frame_path, 1).astype(np.float32)[np.newaxis,...]
+
+                # Compute optical flow
+                if not first_frame:
+                    if args.flow == 'flownet2':
+                        flow = sess.run(flow_tensor, feed_dict={flow_img0: im, flow_img1: last_im})
+                    elif args.flow == 'flownet1':
+                        flow = sess.run(flow_tensor, feed_dict={flow_img0: im, flow_img1: last_im})
+                        flow = flow[...,(1, 0)]
+                    elif args.flow == 'farneback':
+                        im_gray = cv2.cvtColor(im[0], cv2.COLOR_BGR2GRAY)
+                        last_im_gray = cv2.cvtColor(last_im[0], cv2.COLOR_BGR2GRAY)
+
+                        flow = cv2.calcOpticalFlowFarneback(im_gray, last_im_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
+                        flow = flow[...,(1, 0)]
+                        flow = flow[np.newaxis,...]
+                        print("Flow!")
+
+                # Static segmentation
+                x = sess.run(static_output, feed_dict={static_input: im})
+
+                if first_frame:
+                    # the hidden state is simple the static segmentation for the first frame
+                    h = x
+                    pred = np.argmax(h, axis=3)
+                else:
+                    inputs = {
+                        input_images_tensor: np.stack([last_im, im]),
+                        input_flow: flow,
+                        input_segmentation: x,
+                        prev_h: h
+                    }
+                    # GRFP
+                    h, pred = sess.run([new_h, prediction], feed_dict=inputs)
+
+                last_im = im
+
+            # save it
+            S = pred[0]
+            S_new = S.copy()
+            for (idx, train_idx) in cs_id2trainid.iteritems():
+                S_new[S == train_idx] = idx
+
+            output_path = '%s_%s_%s.png' % (city, seq, frame)
+            cv2.imwrite(os.path.join(cfg.cityscapes_dir, 'results', output_path), S_new)
+
+
+        # Evaluate using the official CityScapes code
+        evalPixelLevelSemanticLabeling.main([])
+
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Evluate GRFP on the CityScapes validation set.')
+
+    parser.add_argument('--static', help='Which static network to use.', required=True)
+    parser.add_argument('--flow', help='Which optical flow method to use.', required=True)
+    parser.add_argument('--frames', type=int, help='Number of frames to use.', default=5, required=False)
+
+    args = parser.parse_args()
+
+    assert args.flow in ['flownet1', 'flownet2', 'farneback'], "Unknown flow method %s." % args.flow
+    assert args.static in ['dilation', 'dilation_grfp', 'lrr', 'lrr_grfp'], "Unknown static method %s." % args.static
+    assert args.frames >= 1 and args.frames <= 20, "The number of frames must be between 1 and 20."
+
+    evaluate(args)
diff --git a/misc/bilinear_warping.cc b/misc/bilinear_warping.cc
@@ -0,0 +1,131 @@
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+
+using namespace tensorflow;
+
+REGISTER_OP("BilinearWarping")
+  .Input("x: float")
+  .Input("flow: float")
+  .Output("y: float");
+
+REGISTER_OP("BilinearWarpingGrad")
+  .Input("grad_y: float")
+  .Input("x: float")
+  .Input("flow: float")
+  .Output("grad_x: float")
+  .Output("grad_flow: float");
+
+class BilinearWarpingOp : public OpKernel {
+public:
+  explicit BilinearWarpingOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input_tensor = context->input(0);
+    auto input = input_tensor.flat<float>();
+
+    const Tensor& flow_tensor = context->input(1);
+    auto flow = flow_tensor.flat<float>();
+
+    //OP_REQUIRES_OK(context, input_tensor.dims() == 4);
+    //OP_REQUIRES_OK(context, flow_tensor.dims() == 4);
+    OP_REQUIRES(context, false, errors::InvalidArgument("Cpu not supported!"))
+
+    Tensor* output_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), &output_tensor));
+    auto output = output_tensor->flat<float>();
+
+    const int N = input.size();
+    for(int i = 0; i < N; i++) {
+      output(i) = 0;
+    }
+
+    if(N > 0) {
+      output(0) = input(0) + 1.;
+    }
+  }
+};
+
+void BilinearWarpingLauncher(const float* input, const float* flow, float* out, 
+    const int count, const int channels, const int height, const int width);
+
+class BilinearWarpingGPUOp : public OpKernel {
+public:
+  explicit BilinearWarpingGPUOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input_tensor = context->input(0);
+    auto input = input_tensor.flat<float>();
+
+    const Tensor& flow_tensor = context->input(1);
+    auto flow = flow_tensor.flat<float>();
+
+    OP_REQUIRES(context, input_tensor.dims() == 4, errors::InvalidArgument("input dim != 4"));
+    OP_REQUIRES(context, flow_tensor.dims() == 4, errors::InvalidArgument("flow dim != 4"));
+
+    Tensor* output_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), &output_tensor));
+    auto output = output_tensor->flat<float>();
+
+    const int input_dims = input_tensor.dims();
+    OP_REQUIRES(context, input_dims == 4, errors::InvalidArgument("input dim != 4"));
+    OP_REQUIRES(context, flow_tensor.dims() == 4, errors::InvalidArgument("flow dim != 4"));
+    OP_REQUIRES(context, flow_tensor.dim_size(0) == input_tensor.dim_size(0), errors::InvalidArgument("flow dim 0 != input dim 0"));
+    OP_REQUIRES(context, flow_tensor.dim_size(1) == input_tensor.dim_size(1), errors::InvalidArgument("flow dim 1 != input dim 1"));
+    OP_REQUIRES(context, flow_tensor.dim_size(2) == input_tensor.dim_size(2), errors::InvalidArgument("flow dim 2 != input dim 2"));
+    OP_REQUIRES(context, flow_tensor.dim_size(3) == 2, errors::InvalidArgument("Flow dim 3 != 2"));
+
+    const int count = input_tensor.NumElements();
+    const int channels = input_tensor.dim_size(3);
+    const int height = input_tensor.dim_size(1);
+    const int width = input_tensor.dim_size(2);
+    BilinearWarpingLauncher(input.data(), flow.data(), output.data(), count, channels, height, width);
+  }
+};
+
+void BilinearWarpingGradLauncher(const float* grad_y, const float* input, 
+    const float* flow, float* grad_x, float* grad_flow, 
+    const int count, const int channels, const int height, const int width);
+
+class BilinearWarpingGradGPUOp : public OpKernel {
+public:
+  explicit BilinearWarpingGradGPUOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& grad_y_tensor = context->input(0);
+    auto grad_y = grad_y_tensor.flat<float>();
+
+    const Tensor& input_tensor = context->input(1);
+    auto input = input_tensor.flat<float>();
+
+    const Tensor& flow_tensor = context->input(2);
+    auto flow = flow_tensor.flat<float>();
+
+    OP_REQUIRES(context, input_tensor.dims() == 4, errors::InvalidArgument("input dim != 4"));
+    OP_REQUIRES(context, flow_tensor.dims() == 4, errors::InvalidArgument("flow dim != 4"));
+
+    Tensor* grad_x_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), &grad_x_tensor));
+    auto grad_x = grad_x_tensor->flat<float>();
+
+    Tensor* grad_flow_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(1, flow_tensor.shape(), &grad_flow_tensor));
+    auto grad_flow = grad_flow_tensor->flat<float>();
+
+    const int input_dims = input_tensor.dims();
+    OP_REQUIRES(context, input_dims == 4, errors::InvalidArgument("input dim != 4"));
+    OP_REQUIRES(context, flow_tensor.dims() == 4, errors::InvalidArgument("flow dim != 4"));
+    OP_REQUIRES(context, flow_tensor.dim_size(0) == input_tensor.dim_size(0), errors::InvalidArgument("flow dim 0 != input dim 0"));
+    OP_REQUIRES(context, flow_tensor.dim_size(1) == input_tensor.dim_size(1), errors::InvalidArgument("flow dim 1 != input dim 1"));
+    OP_REQUIRES(context, flow_tensor.dim_size(2) == input_tensor.dim_size(2), errors::InvalidArgument("flow dim 2 != input dim 2"));
+    OP_REQUIRES(context, flow_tensor.dim_size(3) == 2, errors::InvalidArgument("Flow dim 3 != 2"));
+
+    const int count = input_tensor.NumElements();
+    const int channels = input_tensor.dim_size(3);
+    const int height = input_tensor.dim_size(1);
+    const int width = input_tensor.dim_size(2);
+    BilinearWarpingGradLauncher(grad_y.data(), input.data(), flow.data(), grad_x.data(), grad_flow.data(), count, channels, height, width);
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("BilinearWarping").Device(DEVICE_GPU), BilinearWarpingGPUOp);
+REGISTER_KERNEL_BUILDER(Name("BilinearWarpingGrad").Device(DEVICE_GPU), BilinearWarpingGradGPUOp);