diff --git a/optimization/adagrad.py b/optimization/adagrad.py new file mode 100644 index 0000000..e42e9c4 --- /dev/null +++ b/optimization/adagrad.py @@ -0,0 +1,58 @@ +import tensorflow as tf +from typing import Callable +class AdaGrad: + def __init__(self, + initial_weights:tf.Tensor, + initial_gradients:tf.Tensor, + learning_rate:int, + max_iteration:int, + loss_fn:Callable + ) -> None: + self.weights = initial_weights + self.G = initial_gradients + self.learning_rate = learning_rate + self.max_iteration = max_iteration + self.loss_fn=loss_fn + self.epsilon = 1e-6 + + def optimize(self): + #G_i+1 = G_i + gT.g + first = True + firts_loss = None + for iteration in range(self.max_iteration): + with tf.GradientTape()as tape1: + loss = self.loss_fn(self.weights) + if first: + firts_loss = loss.numpy() + first = False + grad = tape1.gradient(loss,self.weights) + + assert grad.shape == self.G.shape + + self.G.assign_add(tf.square(grad)) + step = grad/tf.sqrt(self.G +self.epsilon) * self.learning_rate + self.weights.assign_sub(step) + final_loss = self.loss_fn(self.weights).numpy() + return self.weights,final_loss,firts_loss + + + # Define a simple quadratic loss function +def quadratic_loss(weights): + return tf.reduce_sum(tf.square(weights)) + +if __name__ == '__main__': + # Learning rate and maximum iterations + learning_rate = 0.1 + max_iteration = 1000 + # Initial weights and gradients + initial_weights = tf.Variable([1.0, 2.0, 3.0], dtype=tf.float32) + initial_gradients = tf.Variable([0.1, 0.2, 0.3], dtype=tf.float32) + optimizer = AdaGrad(initial_weights=initial_weights, + initial_gradients=initial_gradients, + learning_rate=learning_rate, + max_iteration=max_iteration, + loss_fn=quadratic_loss) + final_weights,final_loss,initial_loss = optimizer.optimize() + print(final_loss) + print(initial_loss) + print(final_weights) \ No newline at end of file