jhartford · mylzwq · Jun 10, 2021 · Jun 10, 2021 · Jun 10, 2021 · Jun 10, 2021
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,4 @@
+{
+    "python.pythonPath": "/home/liux3941/miniconda3/envs/deepivMY/bin/python",
+    "jupyter.jupyterServerType": "local"
+}
diff --git a/README.rst b/README.rst
@@ -18,7 +18,7 @@ DeepIV
      :alt: Updates
 
 
-IMPORTANT: Newer versions of Keras have broken this implementation. This code currently only support Keras 2.0.6 (which is what will be installed if you use the pip install instructions described below).
+IMPORTANT: Newer versions of Keras have broken this implementation. This code currently supports Keras 2.3.1, and the tensorflow version is 2.5. See details in ``setup.py``.
 
 A package for counterfactual prediction using deep instrument variable methods that builds on Keras_. 
 

diff --git a/deepiv/architectures.py b/deepiv/architectures.py
@@ -1,16 +1,17 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
-
-import keras
-import keras.backend as K
-from keras.layers import (Convolution2D, Dense, Dropout, Flatten,
-                          MaxPooling2D)
-from keras.models import Sequential
-from keras.regularizers import l2
-from keras.constraints import maxnorm
-from keras.utils import np_utils
-
+import tensorflow as tf
+import tensorflow.keras as keras
+import tensorflow.keras.backend as K
+from tensorflow.keras.layers import (Convolution2D, Dense, Dropout, Flatten,
+                                     MaxPooling2D)
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.regularizers import l2
+from tensorflow.keras.constraints import MaxNorm as maxnorm
+#from tensorflow.keras.utils import np_utils
+from tensorflow.python.keras import utils as np_utils
 import numpy as np
 
+
 def feed_forward_net(input, output, hidden_layers=[64, 64], activations='relu',
                      dropout_rate=0., l2=0., constrain_norm=False):
     '''
@@ -26,18 +27,19 @@ def feed_forward_net(input, output, hidden_layers=[64, 64], activations='relu',
     state = input
     if isinstance(activations, str):
         activations = [activations] * len(hidden_layers)
-    
+
     for h, a in zip(hidden_layers, activations):
         if l2 > 0.:
-            w_reg = keras.regularizers.l2(l2)
+            w_reg = tf.keras.regularizers.l2(l2)
         else:
             w_reg = None
-        const = maxnorm(2) if constrain_norm else  None
+        const = maxnorm(2) if constrain_norm else None
         state = Dense(h, activation=a, kernel_regularizer=w_reg, kernel_constraint=const)(state)
         if dropout_rate > 0.:
             state = Dropout(dropout_rate)(state)
     return output(state)
 
+
 def convnet(input, output, dropout_rate=0., input_shape=(1, 28, 28), batch_size=100,
             l2_rate=0.001, nb_epoch=12, img_rows=28, img_cols=28, nb_filters=64,
             pool_size=(2, 2), kernel_size=(3, 3), activations='relu', constrain_norm=False):
@@ -73,6 +75,7 @@ def convnet(input, output, dropout_rate=0., input_shape=(1, 28, 28), batch_size=
         state = Dropout(dropout_rate)(state)
     return output(state)
 
+
 def feature_to_image(features, height=28, width=28, channels=1, backend=K):
     '''
     Reshape a flattened image to the input format for convolutions.
@@ -86,4 +89,3 @@ def feature_to_image(features, height=28, width=28, channels=1, backend=K):
         return backend.reshape(features, (-1, channels, height, width))
     else:
         return backend.reshape(features, (-1, height, width, channels))
-
diff --git a/deepiv/custom_gradients.py b/deepiv/custom_gradients.py
@@ -1,26 +1,33 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
+import types
+
+import tensorflow.keras
+from tensorflow.keras import backend as K
+
 
-import keras
-from keras import backend as K
 if K.backend() == "theano":
     import theano.tensor as tensor
     Lop = tensor.Lop
 elif K.backend() == "tensorflow":
     import tensorflow as tf
-    def Lop(output, wrt, eval_points):
-        grads = tf.gradients(output, wrt, grad_ys=eval_points)
-        return grads
-import types
+
+
+def Lop(output, wrt, eval_points):
+    grads = tf.gradients(output, wrt, grad_ys=eval_points)
+    return grads
+
 
 # Used to modify the default keras Optimizer object to allow
 # for custom gradient computation.
 
+
 def get_gradients(self, loss, params):
     '''
     Replacement for the default keras get_gradients() function.
     Modification: checks if the object has the attribute grads and 
     returns that rather than calculating the gradients using automatic
     differentiation. 
+    In keras, it is gradients = K.gradients(outputTensor, listOfVariableTensors)
     '''
     if hasattr(self, 'grads'):
         grads = self.grads
@@ -33,40 +40,122 @@ def get_gradients(self, loss, params):
         grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads]
     return grads
 
-def replace_gradients_mse(model, opt, batch_size, n_samples = 1):
+
+def replace_gradients_mse(model, opt, batch_size, n_samples=1):
     '''
     Replace the gradients of a Keras model with mean square error loss.
+    #
+    # TODO: check model components, only work with py2.7
     '''
     # targets has been repeated twice so the below creates two identical columns
     # of the target values - we'll only use the first column.
     targets = K.reshape(model.targets[0], (batch_size, n_samples * 2))
-    output =  K.mean(K.reshape(model.outputs[0], (batch_size, n_samples, 2)), axis=1)
+    output = K.mean(K.reshape(model.outputs[0], (batch_size, n_samples, 2)), axis=1)
     # compute d Loss / d output
-    dL_dOutput = (output[:,0] - targets[:,0]) * (2.) / batch_size
+    dL_dOutput = (output[:, 0] - targets[:, 0]) * (2.) / batch_size
     # compute (d Loss / d output) (d output / d theta) for each theta
     trainable_weights = model.trainable_weights
-    grads = Lop(output[:,1], wrt=trainable_weights, eval_points=dL_dOutput) 
+    # grads = tf.gradients(output, wrt, grad_ys=eval_points)
+    grads = Lop(output[:, 1], wrt=trainable_weights, eval_points=dL_dOutput)
     # compute regularizer gradients
 
     # add loss with respect to regularizers
     reg_loss = model.total_loss * 0.
     for r in model.losses:
-         reg_loss += r
+        reg_loss += r
     reg_grads = K.gradients(reg_loss, trainable_weights)
-    grads = [g+r for g,r in zip(grads, reg_grads)]
-    
+    grads = [g+r for g, r in zip(grads, reg_grads)]
+
     opt = keras.optimizers.get(opt)
     # Patch keras gradient calculation to allow for user defined gradients
-    opt.get_gradients = types.MethodType( get_gradients, opt )
+    opt.get_gradients = types.MethodType(get_gradients, opt)
     opt.grads = grads
     model.optimizer = opt
     return model
 
+
+def custom_mse_unbiased_gradients(model, y_true, y_pred):
+    """
+    in the unbiased case, we sample two independent samples each time, and y_ture has already been repeated 2 times, 
+    """
+    (batch_size, n_samples) = y_true.shape
+    batch_size //= 2
+
+    targets = K.reshape(y_true, (batch_size, n_samples * 2))
+    output = K.mean(K.reshape(y_pred, (batch_size, n_samples, 2)), axis=1)
+    targets = tf.cast(targets, dtype=output.dtype)
+    # compute d Loss / d output
+    dL_dOutput = (output[:, 0] - targets[:, 0]) * (2.) / batch_size
+    # compute (d Loss / d output) (d output / d theta) for each theta
+    trainable_weights = model.trainable_weights
+    # grads = tf.gradients(output, wrt, grad_ys=eval_points)
+
+    grads = Lop(output[:, 1], wrt=trainable_weights, eval_points=dL_dOutput)
+    # compute regularizer gradients
+
+    with tf.GradientTape() as tape2:
+        # add loss with respect to regularizers
+        #reg_loss = model.total_loss * 0.
+        reg_loss = 0.
+        for r in model.losses:
+            reg_loss += r
+
+    reg_grads = tape.gradient(reg_loss, trainable_weights)
+    grads = [g+r for g, r in zip(grads, reg_grads)]
+
+    opt = keras.optimizers.get(opt)
+    # Patch keras gradient calculation to allow for user defined gradients
+    opt.get_gradients = types.MethodType(get_gradients, opt)
+    opt.grads = grads
+    model.optimizer = opt
+    return model
+
+
 def build_mc_mse_loss(n_samples):
-    def mc_mse(y_true, y_predicted):
-        n_examples = y_true.shape[0] /  n_samples / 2
-        targets = y_true.reshape((n_examples , n_samples * 2))
-        output = y_predicted.reshape((n_examples, n_samples * 2)).mean(axis=1)
-        return K.mean(K.square(targets[:,0] - output))
+    """
+    return MC mse loss function 
+    """
+    def mc_mse(y_true, y_pred):
+        n_examples = y_true.shape[0] / n_samples / 2
+        targets = y_true.reshape((n_examples, n_samples * 2))
+        output = y_pred.reshape((n_examples, n_samples, 2)).mean(axis=1)
+        return K.mean(K.square(targets[:, 0] - output))
     return mc_mse
 
+
+def unbiased_mse_loss_and_gradients(model,  y_true, y_pred, batch_size, n_samples=1):
+    """
+    In custom loss function, ytrue and y_pred need to be tensor with same dtype
+    n_samples is B in equattion (10)
+    """
+
+    # total_size = y_pred.shape[0]
+    # batch_size = total_size//n_samples//2
+    targets = K.reshape(y_true, (batch_size, n_samples*2))
+    output = K.mean(K.reshape(y_pred, (batch_size, n_samples, 2)), axis=1)
+    targets = tf.cast(targets, dtype=output.dtype)
+
+    # compute d Loss / d output
+    dL_dOutput = (output[:, 0] - targets[:, 0]) * (2.) / batch_size
+    # compute (d Loss / d output) (d output / d theta) for each theta
+    trainable_weights = model.trainable_weights
+    grads = tf.gradients(output[:, 1], trainable_weights, grad_ys=dL_dOutput)
+
+    # # add loss with respect to regularizers
+    # reg_loss = 0.
+    # for r in model.losses:
+    #     reg_loss += r
+    # reg_grads = K.gradients(reg_loss, trainable_weights)
+
+    # grads = [g+r for g, r in zip(grads, reg_grads)]
+
+    # opt = tensorflow.keras.optimizers.get(optimizer)
+    # opt.apply_gradients(zip(grads, trainable_weights))
+    # Patch keras gradient calculation to allow for user defined gradients
+    # opt.get_gradients = types.MethodType(get_gradients, opt)
+    # opt.grads = grads
+    # model.optimizer = opt
+
+    # loss = tf.math.multiply(output[:, 1] - targets[:, 1], output[:, 0] - targets[:, 0])
+
+    return grads
diff --git a/deepiv/densities.py b/deepiv/densities.py
@@ -1,12 +1,12 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
 
 import numpy
-import keras
-from keras import backend as K
+import tensorflow.keras as keras
+from tensorflow.keras import backend as K
 
-from keras.layers.merge import Concatenate
-from keras.layers import Lambda
-from keras.layers.core import Reshape
+from tensorflow.keras.layers import Concatenate
+from tensorflow.keras.layers import Lambda
+from tensorflow.keras.layers import Reshape
 
 def split(start, stop):
     return Lambda(lambda x: x[:, start:stop], output_shape=(None, stop-start))