Skip to content

Commit

Permalink
Update tf.optimizers -> tf.keras.optimizers.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 577231286
  • Loading branch information
jburnim authored and tensorflower-gardener committed Oct 27, 2023
1 parent b8e04a1 commit bc2e0e0
Show file tree
Hide file tree
Showing 54 changed files with 99 additions and 96 deletions.
4 changes: 2 additions & 2 deletions STYLE_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,8 @@ they supersede all previous conventions.
1. Submodule names should be singular, except where they overlap to TF.
Justification: Having plural looks strange in user code, ie,
tf.optimizer.Foo reads nicer than tf.optimizers.Foo since submodules are
only used to access a single, specific thing (at a time).
tf.optimizer.Foo reads nicer than tf.keras.optimizers.Foo since submodules
are only used to access a single, specific thing (at a time).
1. Use `tf.newaxis` rather than `None` to `tf.expand_dims`.
Expand Down
6 changes: 3 additions & 3 deletions SUBSTRATES.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,11 @@ vmap, etc.), we will special-case using an `if JAX_MODE:` block.
tests, TFP impl, etc), with `tfp.math.value_and_gradient` or similar. Then,
we can special-case `JAX_MODE` inside the body of `value_and_gradient`.

* __`tf.Variable`, `tf.optimizers.Optimizer`__
* __`tf.Variable`, `tf.keras.optimizers.Optimizer`__

TF provides a `Variable` abstraction so that graph functions may modify
state, including using the TF `Optimizer` subclasses like `Adam`. JAX, in
contrast, operates only on pure functions. In general, TFP is fairly
state, including using the Keras `Optimizer` subclasses like `Adam`. JAX,
in contrast, operates only on pure functions. In general, TFP is fairly
functional (e.g. `tfp.optimizer.lbfgs_minimize`), but in some cases (e.g.
`tfp.vi.fit_surrogate_posterior`,
`tfp.optimizer.StochasticGradientLangevinDynamics`) we have felt the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@
" unconstrained_observation_noise,\n",
" latent_index_points]\n",
"\n",
"optimizer = tf.optimizers.Adam(learning_rate=1.0)\n",
"optimizer = tf.keras.optimizers.Adam(learning_rate=1.0)\n",
"\n",
"@tf.function(autograph=False, jit_compile=True)\n",
"def train_model():\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@
"source": [
"# Now we optimize the model parameters.\n",
"num_iters = 1000\n",
"optimizer = tf.optimizers.Adam(learning_rate=.01)\n",
"optimizer = tf.keras.optimizers.Adam(learning_rate=.01)\n",
"\n",
"# Use `tf.function` to trace the loss for more efficient evaluation.\n",
"@tf.function(autograph=False, jit_compile=False)\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -800,7 +800,7 @@
},
"outputs": [],
"source": [
"optimizer = tf.optimizers.Adam(learning_rate=1e-2)\n",
"optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)\n",
"\n",
"losses = tfp.vi.fit_surrogate_posterior(\n",
" target_log_prob_fn, \n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@
" previous_kernel_results=kernel_results)\n",
" return next_state, next_kernel_results\n",
"\n",
"optimizer = tf.optimizers.Adam(learning_rate=.01)\n",
"optimizer = tf.keras.optimizers.Adam(learning_rate=.01)\n",
"\n",
"# Set up M-step (gradient descent).\n",
"@tf.function(autograph=False, jit_compile=True)\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@
"\n",
"losses = tfp.math.minimize(\n",
" lambda: -log_prob(),\n",
" optimizer=tf.optimizers.Adam(learning_rate=0.1),\n",
" optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),\n",
" num_steps=100)\n",
"plt.plot(losses)\n",
"plt.ylabel('Negative log marginal likelihood')"
Expand Down Expand Up @@ -740,7 +740,7 @@
"source": [
"losses = tfp.math.minimize(\n",
" lambda: -log_prob(),\n",
" optimizer=tf.optimizers.Adam(0.1),\n",
" optimizer=tf.keras.optimizers.Adam(0.1),\n",
" num_steps=100)\n",
"plt.plot(losses)\n",
"plt.ylabel('Negative log marginal likelihood')"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@
"])\n",
"\n",
"# Do inference.\n",
"model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=negloglik)\n",
"model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss=negloglik)\n",
"model.fit(x, y, epochs=1000, verbose=False);\n",
"\n",
"# Profit.\n",
Expand Down Expand Up @@ -391,7 +391,7 @@
"])\n",
"\n",
"# Do inference.\n",
"model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=negloglik)\n",
"model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss=negloglik)\n",
"model.fit(x, y, epochs=1000, verbose=False);\n",
"\n",
"# Profit.\n",
Expand Down Expand Up @@ -540,7 +540,7 @@
"])\n",
"\n",
"# Do inference.\n",
"model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=negloglik)\n",
"model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss=negloglik)\n",
"model.fit(x, y, epochs=1000, verbose=False);\n",
"\n",
"# Profit.\n",
Expand Down Expand Up @@ -650,7 +650,7 @@
"])\n",
"\n",
"# Do inference.\n",
"model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=negloglik)\n",
"model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss=negloglik)\n",
"model.fit(x, y, epochs=1000, verbose=False);\n",
"\n",
"# Profit.\n",
Expand Down Expand Up @@ -806,7 +806,7 @@
"batch_size = 32\n",
"loss = lambda y, rv_y: rv_y.variational_loss(\n",
" y, kl_weight=np.array(batch_size, x.dtype) / x.shape[0])\n",
"model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=loss)\n",
"model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss=loss)\n",
"model.fit(x, y, batch_size=batch_size, epochs=1000, verbose=False)\n",
"\n",
"# Profit.\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@
"source": [
"negloglik = lambda x, rv_x: -rv_x.log_prob(x)\n",
"\n",
"vae.compile(optimizer=tf.optimizers.Adam(learning_rate=1e-3),\n",
"vae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),\n",
" loss=negloglik)\n",
"\n",
"_ = vae.fit(train_dataset,\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@
"target_log_prob_fn = lambda w, z: model.log_prob((w, z, x_train))\n",
"losses = tfp.math.minimize(\n",
" lambda: -target_log_prob_fn(w, z),\n",
" optimizer=tf.optimizers.Adam(learning_rate=0.05),\n",
" optimizer=tf.keras.optimizers.Adam(learning_rate=0.05),\n",
" num_steps=200)"
]
},
Expand Down Expand Up @@ -479,7 +479,7 @@
"losses = tfp.vi.fit_surrogate_posterior(\n",
" target_log_prob_fn,\n",
" surrogate_posterior=surrogate_posterior,\n",
" optimizer=tf.optimizers.Adam(learning_rate=0.05),\n",
" optimizer=tf.keras.optimizers.Adam(learning_rate=0.05),\n",
" num_steps=200)"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,7 @@
"t0 = time.time()\n",
"losses = tfp.vi.fit_surrogate_posterior(pinned_model.unnormalized_log_prob,\n",
" surrogate_posterior,\n",
" optimizer=tf.optimizers.Adam(0.1),\n",
" optimizer=tf.keras.optimizers.Adam(0.1),\n",
" num_steps=num_variational_steps)\n",
"t1 = time.time()\n",
"print(\"Inference ran in {:.2f}s.\".format(t1-t0))"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1237,7 +1237,7 @@
"\r\n",
"asvi_losses = tfp.vi.fit_surrogate_posterior(target_log_prob,\r\n",
" asvi_surrogate_posterior,\r\n",
" optimizer=tf.optimizers.Adam(learning_rate=0.1),\r\n",
" optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),\r\n",
" num_steps=500)\r\n",
"logging.getLogger('tensorflow').setLevel(logging.NOTSET)"
]
Expand All @@ -1255,7 +1255,7 @@
"\r\n",
"factored_losses = tfp.vi.fit_surrogate_posterior(target_log_prob,\r\n",
" factored_surrogate_posterior,\r\n",
" optimizer=tf.optimizers.Adam(learning_rate=0.1),\r\n",
" optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),\r\n",
" num_steps=500)"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@
}
],
"source": [
"optimizer = tf.optimizers.Adam(learning_rate=1e-2)\n",
"optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)\n",
"mvn_loss = tfp.vi.fit_surrogate_posterior(\n",
" target_model.unnormalized_log_prob,\n",
" surrogate_posterior,\n",
Expand Down Expand Up @@ -706,7 +706,7 @@
}
],
"source": [
"optimizer=tf.optimizers.Adam(learning_rate=1e-2)\n",
"optimizer=tf.keras.optimizers.Adam(learning_rate=1e-2)\n",
"iaf_loss = tfp.vi.fit_surrogate_posterior(\n",
" target_model.unnormalized_log_prob,\n",
" iaf_surrogate_posterior,\n",
Expand Down Expand Up @@ -830,7 +830,7 @@
" mean_field_scale # apply the block matrix transformation to the standard Normal distribution\n",
" ]))\n",
"\n",
"optimizer=tf.optimizers.Adam(learning_rate=1e-2)\n",
"optimizer=tf.keras.optimizers.Adam(learning_rate=1e-2)\n",
"mean_field_loss = tfp.vi.fit_surrogate_posterior(\n",
" target_model.unnormalized_log_prob,\n",
" mean_field_surrogate_posterior,\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,7 @@ class AutoregressiveNetwork(tf.keras.layers.Layer):
log_prob_ = distribution.log_prob(x_)
model = tfk.Model(x_, log_prob_)

model.compile(optimizer=tf.optimizers.Adam(),
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss=lambda _, log_prob: -log_prob)

batch_size = 25
Expand Down Expand Up @@ -718,7 +718,7 @@ class AutoregressiveNetwork(tf.keras.layers.Layer):
x_, bijector_kwargs={'conditional_input': c_})
model = tfk.Model([x_, c_], log_prob_)

model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.1),
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
loss=lambda _, log_prob: -log_prob)

batch_size = 25
Expand Down Expand Up @@ -780,7 +780,7 @@ class AutoregressiveNetwork(tf.keras.layers.Layer):
log_prob_ = distribution.log_prob(x_)
model = tfk.Model(x_, log_prob_)

model.compile(optimizer=tf.optimizers.Adam(),
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss=lambda _, log_prob: -log_prob)

batch_size = 10
Expand Down Expand Up @@ -838,7 +838,7 @@ class AutoregressiveNetwork(tf.keras.layers.Layer):
log_prob_ = distribution.log_prob(x_)
model = tfk.Model(x_, log_prob_)

model.compile(optimizer=tf.optimizers.Adam(),
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss=lambda _, log_prob: -log_prob)

batch_size = 10
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ class GaussianProcess(

gp = tfd.GaussianProcess(kernel, observed_index_points)

optimizer = tf.optimizers.Adam()
optimizer = tf.keras.optimizers.Adam()

@tf.function
def optimize():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ class GaussianProcessRegressionModel(
index_points=observation_index_points,
observation_noise_variance=observation_noise_variance)

optimizer = tf.optimizers.Adam(learning_rate=.05, beta_1=.5, beta_2=.99)
optimizer = tf.keras.optimizers.Adam(learning_rate=.05, beta_1=.5, beta_2=.99)

@tf.function
def optimize():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ class StudentTProcess(distribution.AutoCompositeTensorDistribution):

tp = tfd.StudentTProcess(3., kernel, observed_index_points)

optimizer = tf.optimizers.Adam()
optimizer = tf.keras.optimizers.Adam()

@tf.function
def optimize():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ class VariationalGaussianProcess(gaussian_process.GaussianProcess,
# For training, we use some simplistic numpy-based minibatching.
batch_size = 64

optimizer = tf.optimizers.Adam(learning_rate=.1)
optimizer = tf.keras.optimizers.Adam(learning_rate=.1)

@tf.function
def optimize(x_train_batch, y_train_batch):
Expand Down Expand Up @@ -670,7 +670,7 @@ def optimize(x_train_batch, y_train_batch):
# For training, we use some simplistic numpy-based minibatching.
batch_size = 64

optimizer = tf.optimizers.Adam(learning_rate=.05, beta_1=.5, beta_2=.99)
optimizer = tf.keras.optimizers.Adam(learning_rate=.05, beta_1=.5, beta_2=.99)

@tf.function
def optimize(x_train_batch, y_train_batch):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def make_distribution_bijector(distribution, name='make_distribution_bijector'):
pinned_model)
_ = tfp.vi.fit_surrogate_posterior(pinned_model.unnormalized_log_prob,
surrogate_posterior=surrogate_posterior,
optimizer=tf.optimizers.Adam(0.01),
optimizer=tf.keras.optimizers.Adam(0.01),
num_steps=200)
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def model_with_funnel():
optimization.fit_surrogate_posterior(
pinned_model.unnormalized_log_prob,
surrogate_posterior=surrogate_posterior,
optimizer=tf.optimizers.Adam(0.01),
optimizer=tf.keras.optimizers.Adam(0.01),
sample_size=10,
num_steps=1)
bijector = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def target_log_prob_fn(x):
importance_weighted_losses = tfp.vi.fit_surrogate_posterior(
target_log_prob_fn,
surrogate_posterior=proposal_distribution,
optimizer=tf.optimizers.Adam(0.1),
optimizer=tf.keras.optimizers.Adam(0.1),
num_steps=200,
importance_sample_size=importance_sample_size)
approximate_posterior = tfed.ImportanceResample(
Expand All @@ -167,7 +167,7 @@ def target_log_prob_fn(x):
proposal_distribution=proposal_distribution,
target_log_prob_fn=target_log_prob_fn,
importance_sample_size=importance_sample_size),
optimizer=tf.optimizers.Adam(0.1),
optimizer=tf.keras.optimizers.Adam(0.1),
num_steps=200)
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def target_log_prob_fn(loc, scale):
pulled_back_shape)
vars = tf.nest.map_structure(tf.Variable, uniform_init)

opt = tf.optimizers.Adam(.01)
opt = tf.keras.optimizers.Adam(.01)

@tf.function(autograph=False)
def one_step():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def loss_fn():
kl = bnn.extra_loss / tf.cast(train_size, tf.float32)
loss = nll + kl
return loss, (nll, kl)
opt = tf.optimizers.Adam()
opt = tf.keras.optimizers.Adam()
fit_op = tfn.util.make_fit_op(loss_fn, opt, bnn.trainable_variables)
for _ in range(200):
loss, (nll, kl), g = fit_op()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def loss_fn():
nll = -tf.reduce_mean(bnn(x).log_prob(y), axis=-1)
kl = tfn.losses.compute_extra_loss(bnn) / n
return nll + kl, (nll, kl)
opt = tf.optimizers.Adam()
opt = tf.keras.optimizers.Adam()
fit_op = tfn.util.make_fit_op(loss_fn, opt, bnn.trainable_variables)
for _ in range(2):
loss, (nll, kl) = fit_op() # pylint: disable=unused-variable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def loss_fn():
kl = bnn.extra_loss # Already normalized via `penalty_weight` arg.
loss = nll + kl
return loss, (nll, kl)
opt = tf.optimizers.Adam()
opt = tf.keras.optimizers.Adam()
fit_op = tfn.util.make_fit_op(loss_fn, opt, bnn.trainable_variables)
for _ in range(200):
loss, (nll, kl), g = fit_op()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def loss_fn():
nll = -tf.reduce_mean(bnn(x).log_prob(y), axis=-1)
kl = tfn.losses.compute_extra_loss(bnn) / n
return nll + kl, (nll, kl)
opt = tf.optimizers.Adam()
opt = tf.keras.optimizers.Adam()
fit_op = tfn.util.make_fit_op(loss_fn, opt, bnn.trainable_variables)
for _ in range(2):
loss, (nll, kl) = fit_op() # pylint: disable=unused-variable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def loss_fn():
kl = bnn.extra_loss # Already normalized via `penalty_weight` arg.
loss = nll + kl
return loss, (nll, kl)
opt = tf.optimizers.Adam()
opt = tf.keras.optimizers.Adam()
fit_op = tfn.util.make_fit_op(loss_fn, opt, bnn.trainable_variables)
for _ in range(200):
loss, (nll, kl), g = fit_op()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def loss_fn():
nll = -tf.reduce_mean(bnn(x).log_prob(y), axis=-1)
kl = tfn.losses.compute_extra_loss(bnn) / n
return nll + kl, (nll, kl)
opt = tf.optimizers.Adam()
opt = tf.keras.optimizers.Adam()
fit_op = tfn.util.make_fit_op(loss_fn, opt, bnn.trainable_variables)
for _ in range(2):
loss, (nll, kl) = fit_op() # pylint: disable=unused-variable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def loss_fn():
kl = bnn.extra_loss / tf.cast(train_size, tf.float32)
loss = nll + kl
return loss, (nll, kl)
opt = tf.optimizers.Adam()
opt = tf.keras.optimizers.Adam()
fit_op = tfn.util.make_fit_op(loss_fn, opt, bnn.trainable_variables)
for _ in range(200):
loss, (nll, kl), g = fit_op()
Expand Down
Loading

0 comments on commit bc2e0e0

Please sign in to comment.