wip

acharneski · acharneski · commit dbe64dfb2f47 · 2025-12-21T14:00:58.000-05:00
diff --git a/src/line_search/cubic_quadratic.rs b/src/line_search/cubic_quadratic.rs
@@ -306,7 +306,7 @@ impl CubicQuadraticLineSearch {
 impl LineSearch for CubicQuadraticLineSearch {
     fn search(
         &mut self,
-        context: OptimizationContext,
+        mut context: OptimizationContext,
         current_params: &[f64],
         direction: &[f64],
         initial_loss: f64,
@@ -325,9 +325,10 @@ impl LineSearch for CubicQuadraticLineSearch {
             return Err(anyhow!("Direction is not a descent direction: g0 = {:.6e} >= 0. This indicates the search direction is pointing uphill.", g0));
         }
         // Helper to evaluate function and gradient
-        let evaluate = |alpha: f64, cx: &mut Graph| -> anyhow::Result<(f64, f64)> {
+        let ctx1 = &mut context;
+        let mut evaluate = |alpha: f64| -> anyhow::Result<(f64, f64)> {
             let (loss_val, grad_val) =
-                self.evaluate_with_gradient(&context, current_params, direction, alpha)?;
+                self.evaluate_with_gradient(ctx1, current_params, direction, alpha)?;
             let dir_deriv: f64 = grad_val
                 .iter()
                 .zip(direction.iter())
@@ -338,12 +339,12 @@ impl LineSearch for CubicQuadraticLineSearch {
 
         // Verify we can make progress
         let test_step = self.config.min_step;
-        let (f_test, _) = evaluate(test_step, context.graph())?;
+        let (f_test, _) = evaluate(test_step)?;
         num_f_evals += 1;
         num_g_evals += 1;
         if f_test >= f0 {
             let eps_step = f64::EPSILON.sqrt();
-            let (f_eps, _) = evaluate(eps_step, context.graph())?;
+            let (f_eps, _) = evaluate(eps_step)?;
             num_f_evals += 1;
             num_g_evals += 1;
             if f_eps < f0 {
@@ -357,7 +358,7 @@ impl LineSearch for CubicQuadraticLineSearch {
             }
             // Try a slightly larger step
             let small_step = 1e-8;
-            let (f_small, _) = evaluate(small_step, context.graph())?;
+            let (f_small, _) = evaluate(small_step)?;
             num_f_evals += 1;
             num_g_evals += 1;
             if f_small < f0 {
@@ -389,7 +390,7 @@ impl LineSearch for CubicQuadraticLineSearch {
         ));
         for iter in 0..self.config.max_iterations {
             // Evaluate at current step
-            let (f_alpha, g_alpha) = evaluate(alpha, context.graph())?;
+            let (f_alpha, g_alpha) = evaluate(alpha)?;
             num_f_evals += 1;
             num_g_evals += 1;
             // Track best point
@@ -476,7 +477,7 @@ impl LineSearch for CubicQuadraticLineSearch {
         } else {
             // Try a very small step as last resort
             let small_step = self.config.min_step * 10.0;
-            let (f_small, _) = evaluate(small_step, context.graph())?;
+            let (f_small, _) = evaluate(small_step)?;
             num_f_evals += 1;
             num_g_evals += 1;
             if f_small < f0 {
@@ -692,4 +693,4 @@ mod tests {
         let line_search = CubicQuadraticLineSearch::with_config(custom_config);
         assert_eq!(line_search.config.c1, 1e-5);
     }
-}
+}
diff --git a/src/line_search/golden_section.rs b/src/line_search/golden_section.rs
@@ -128,7 +128,7 @@ pub struct GoldenSectionLineSearch {
 impl LineSearch for GoldenSectionLineSearch {
     fn search(
         &mut self,
-        context: OptimizationContext,
+        mut context: OptimizationContext,
         current_params: &[f64],
         direction: &[f64],
         initial_loss: f64,
@@ -143,7 +143,7 @@ impl LineSearch for GoldenSectionLineSearch {
             }
             num_f_evals += 1;
 
-            self.evaluate_at_step(&context, current_params, direction, step)
+            self.evaluate_at_step(&mut context, current_params, direction, step)
         };
 
         let mut result =
diff --git a/src/line_search/line_search.rs b/src/line_search/line_search.rs
@@ -152,6 +152,24 @@ pub fn create_line_search(config: LineSearchConfig) -> Box<dyn LineSearch> {
     }
 }
 
+fn unflatten_tensors(
+    flat: &[f64],
+    shapes: &[Vec<usize>],
+) -> Result<Vec<Vec<f32>>> {
+    let mut result = Vec::new();
+    let mut offset = 0;
+    for shape in shapes {
+        let size: usize = shape.iter().product();
+        if offset + size > flat.len() {
+            return Err(anyhow::anyhow!("Size mismatch in unflattening"));
+        }
+        let chunk = &flat[offset..offset + size];
+        result.push(chunk.iter().map(|&x| x as f32).collect());
+        offset += size;
+    }
+    Ok(result)
+}
+
 /// Trait for line search algorithms
 pub trait LineSearch: Send + Sync + Debug {
     /// Perform 1D line search optimization
@@ -191,32 +209,26 @@ pub trait LineSearch: Send + Sync + Debug {
     /// executes the graph, and returns the loss value.
     fn evaluate_at_step(
         &self,
-        context: &OptimizationContext,
+        context: &mut OptimizationContext,
         current_params: &[f64],
         direction: &[f64],
         step: f64,
     ) -> Result<f64> {
         if self.is_verbose() {
             println!("LineSearch: Evaluating f(x + alpha * d) at alpha = {:.6e}", step);
         }
-        let candidate_params: Vec<f32> = current_params
+        let candidate_params: Vec<f64> = current_params
             .iter()
             .zip(direction.iter())
-            .map(|(x, d)| (x + step * d) as f32)
+            .map(|(x, d)| x + step * d)
             .collect();
 
-        let mut offset = 0;
-        for weight in &context.weights {
-            let len: usize = weight.shape.to_shape().iter().map(|d| d.to_usize().unwrap()).product();
-
-            if offset + len > candidate_params.len() {
-                return Err(anyhow::anyhow!("Parameter size mismatch"));
-            }
-
-            let chunk = &candidate_params[offset..offset + len];
-            context.graph().set_tensor(weight.id, 0, Tensor::new(chunk.to_vec()));
-            offset += len;
-        }
+        let shapes = context.weights.iter().map(|w| w.shape.to_shape().iter().map(
+            |&d| d.to_usize().unwrap()
+        ).collect_vec()).collect::<Vec<_>>();
+        
+        let mut weights_data = unflatten_tensors(&candidate_params, &shapes)?;
+        context.write_weights(&mut weights_data);
 
         context.graph().execute();
         let f_val = context
@@ -235,32 +247,26 @@ pub trait LineSearch: Send + Sync + Debug {
     /// This is more efficient than separate calls when both are needed.
     fn evaluate_with_gradient(
         &self,
-        context: &OptimizationContext,
+        context: &mut OptimizationContext,
         current_params: &[f64],
         direction: &[f64],
         step: f64,
     ) -> Result<(f64, Vec<f64>)> {
         if self.is_verbose() {
             println!("LineSearch: Evaluating f and g at alpha = {:.6e}", step);
         }
-        let candidate_params: Vec<f32> = current_params
+        let candidate_params: Vec<f64> = current_params
             .iter()
             .zip(direction.iter())
-            .map(|(x, d)| (x + step * d) as f32)
+            .map(|(x, d)| x + step * d)
             .collect();
 
-        let mut offset = 0;
-        for weight in &context.weights {
-            let len: usize = weight.shape.to_shape().iter().map(|d| d.to_usize().unwrap()).product();
-
-            if offset + len > candidate_params.len() {
-                return Err(anyhow::anyhow!("Parameter size mismatch"));
-            }
-
-            let chunk = &candidate_params[offset..offset + len];
-            context.graph().set_tensor(weight.id, 0, Tensor::new(chunk.to_vec()));
-            offset += len;
-        }
+        let shapes = context.weights.iter().map(|w| w.shape.to_shape().iter().map(
+            |&d| d.to_usize().unwrap()
+        ).collect_vec()).collect::<Vec<_>>();
+        
+        let mut weights_data = unflatten_tensors(&candidate_params, &shapes)?;
+        context.write_weights(&mut weights_data);
 
         context.graph().execute();
         // Get loss
diff --git a/src/line_search/more_thuente.rs b/src/line_search/more_thuente.rs
@@ -445,7 +445,7 @@ impl MoreThuenteLineSearch {
 impl LineSearch for MoreThuenteLineSearch {
     fn search(
         &mut self,
-        context: OptimizationContext,
+        mut context: OptimizationContext,
         current_params: &[f64],
         direction: &[f64],
         initial_loss: f64,
@@ -471,7 +471,7 @@ impl LineSearch for MoreThuenteLineSearch {
         // Helper to evaluate function and gradient at a step size
         let mut evaluate = |step: f64| -> Result<(f64, f64)> {
             let (loss_val, grad_data) =
-                self.evaluate_with_gradient(&context, current_params, direction, step)?;
+                self.evaluate_with_gradient(&mut context, current_params, direction, step)?;
             let dir_deriv: f64 = grad_data
                 .iter()
                 .zip(direction.iter())
diff --git a/src/line_search/strong_wolfe.rs b/src/line_search/strong_wolfe.rs
@@ -396,7 +396,7 @@ impl StrongWolfeLineSearch {
 impl LineSearch for StrongWolfeLineSearch {
     fn search(
         &mut self,
-        context: OptimizationContext,
+        mut context: OptimizationContext,
         current_params: &[f64],
         direction: &[f64],
         initial_loss: f64,
@@ -426,7 +426,7 @@ impl LineSearch for StrongWolfeLineSearch {
 
         let mut evaluate = |alpha: f64| -> anyhow::Result<(f64, f64)> {
             let (loss_val, grad_val) =
-                self.evaluate_with_gradient(&context, current_params, direction, alpha)?;
+                self.evaluate_with_gradient(&mut context, current_params, direction, alpha)?;
             let dir_deriv = grad_val
                 .iter()
                 .zip(direction.iter())
diff --git a/src/optimizers/qqn.rs b/src/optimizers/qqn.rs
diff --git a/tests/benchmark_reports.rs b/tests/benchmark_reports.rs