Skip to content

Commit

Permalink
fix-and-test
Browse files Browse the repository at this point in the history
  • Loading branch information
gmjw committed Oct 2, 2023
1 parent 6cc612f commit 5c2e49f
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 5 deletions.
21 changes: 16 additions & 5 deletions tensorflow_probability/python/optimizer/bfgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,11 @@
# `final_position`. If the search converged
# the max-norm of this tensor should be
# below the tolerance.
'inverse_hessian_estimate' # A tensor containing the inverse of the
# estimated Hessian.
'inverse_hessian_estimate', # A tensor containing the inverse of the
# estimated Hessian.
'scale_initial_inverse_hessian' # Should the initial inverse Hessian
# be rescaled on the first iteration,
# as per Chapter 6 of Nocedal and Wright.
])


Expand All @@ -72,6 +75,7 @@ def minimize(value_and_gradients_function,
x_tolerance=0,
f_relative_tolerance=0,
initial_inverse_hessian_estimate=None,
scale_initial_inverse_hessian=True,
max_iterations=50,
parallel_iterations=1,
stopping_condition=None,
Expand Down Expand Up @@ -290,6 +294,7 @@ def _body(state):
tolerance,
control_inputs)
kwargs['inverse_hessian_estimate'] = initial_inv_hessian
kwargs['scale_initial_inverse_hessian'] = scale_initial_inverse_hessian
initial_state = BfgsOptimizerResults(**kwargs)
return tf.while_loop(
cond=_cond,
Expand Down Expand Up @@ -355,9 +360,15 @@ def _update_inv_hessian(prev_state, next_state):
# Rescale the initial hessian at the first step, as suggested
# in Chapter 6 of Numerical Optimization, by Nocedal and Wright.
scale_factor = tf.where(
tf.math.equal(prev_state.num_iterations, 0),
normalization_factor / tf.reduce_sum(
tf.math.square(gradient_delta), axis=-1), 1.)
(
tf.math.equal(prev_state.num_iterations, 0) &
prev_state.scale_initial_inverse_hessian
),
normalization_factor / tf.reduce_sum(
tf.math.square(gradient_delta), axis=-1
),
1.
)

inverse_hessian_estimate = scale_factor[
..., tf.newaxis, tf.newaxis] * prev_state.inverse_hessian_estimate
Expand Down
44 changes: 44 additions & 0 deletions tensorflow_probability/python/optimizer/bfgs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,50 @@ def himmelblau(coord):
self.assertArrayNear(actual, expected, 1e-5)
self.assertEqual(batch_results.num_objective_evaluations, 31)

def test_scale_initial_inverse_hessian(self):
"""Tests optional scaling of the initial inverse Hessian estimate.
Shows that the choice of the option determines the behaviour inside
the BFGS optimisation.
"""
@_make_val_and_grad_fn
def sin_x_times_sin_y(coord):
x, y = coord[0], coord[1]
return tf.math.sin(x) + tf.math.sin(y)

start = tf.constant((1, -2), dtype=np.float64)

results = {}
for scale in (True, False):
for max_iter in (1, 2, 50):
results[scale, max_iter] = self.evaluate(
bfgs.minimize(
sin_x_times_sin_y,
initial_position=start,
tolerance=1e-8,
scale_initial_inverse_hessian=scale,
max_iterations=max_iter,
)
)

expected_positions = {
# Positions traced by the optimisation on the first iteration
# are not affected by the choice of `scale_initial_inverse_hessian`.
(True, 1): (-0.62581634, -0.7477782),
(False, 1): (-0.62581634, -0.7477782),
# However, gradient calculations on the first iteration _are_ affected,
# and this affects positions identified on the second iteration.
(True, 2): (-1.70200959, -0.37774139),
(False, 2): (-1.24714478, -0.55028845),
# Both approaches converge to the same maximum eventually (although
# this is not guaranteed, it depends on the exact problem being solved).
(True, 50): (-1.57079633, -1.57079633),
(False, 50): (-1.57079633, -1.57079633),
}

for key, res in results.items():
self.assertArrayNear(res.position, expected_positions[key], 1e-6)

def test_data_fitting(self):
"""Tests MLE estimation for a simple geometric GLM."""
n, dim = 100, 3
Expand Down

0 comments on commit 5c2e49f

Please sign in to comment.