From 4668d314fb894a7141f1a3354c38b63556df24bc Mon Sep 17 00:00:00 2001
From: Ryan Lutz <lutz23@llnl.gov>
Date: Thu, 12 Jun 2025 12:02:33 -0700
Subject: [PATCH 01/56] Neohookean enzyme test

---
 strain_energy.cpp | 743 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 743 insertions(+)
 create mode 100644 strain_energy.cpp
diff --git a/strain_energy.cpp b/strain_energy.cpp
new file mode 100644
index 00000000..3e903679
--- /dev/null
+++ b/strain_energy.cpp
@@ -0,0 +1,743 @@
+#include <iostream>
+#include <cstdlib> 
+#include <cstdio>
+#include "gtest/gtest.h"
+#include <array>
+#include "axom/core.hpp"
+#include "axom/slic/interface/slic.hpp"
+#include <cmath>
+#include <limits>
+extern void* enzyme_dup;
+extern void* enzyme_const;
+
+
+template <typename return_type, typename... Args>
+return_type __enzyme_fwddiff( Args... );
+
+template <typename return_type, typename... Args>
+return_type __enzyme_autodiff( Args... );
+
+
+void multiply3x3(const double F[9], const double F_T[9], double C[9]) {
+        for (int i = 0; i < 3; ++i) {        // row of A
+        for (int j = 0; j < 3; ++j) {    // column of B
+            C[i * 3 + j] = 0.0;
+            for (int k = 0; k < 3; ++k) {
+                C[i * 3 + j] += F_T[i * 3 + k] * F[k * 3 + j];
+            }
+        }
+    }
+}
+
+void calc_E_from_F(const double F[9], double E[9]) {
+    double I[9] = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0};
+    double F_T[9] = {0.0};
+    F_T[0] = F[0];
+    F_T[1] = F[3];
+    F_T[2] = F[6];
+    F_T[3] = F[1];
+    F_T[4] = F[4];
+    F_T[5] = F[7];
+    F_T[6] = F[2];
+    F_T[7] = F[5];
+    F_T[8] = F[8];
+    double C[9] = {0.0};
+    multiply3x3(F, F_T, C);
+    for(int i = 0; i < 9; ++i) {
+        E[i] = 0.5 * (C[i] - I[i]);
+    }
+}
+
+//Calculates right cauchy stress tensor
+void calc_cauchy_stress_tensor(const double E[9], double C[9]){
+    double I[9] = {1, 0, 0, 0, 1, 0, 0, 0, 1};
+    for(int i = 0; i < 9; ++i) {
+        C[i] = 2 * E[i];
+        C[i] += I[i];
+    }
+}
+
+
+//Calculates Trace 
+double calc_trace(const double C[9]) {
+    double Tr_C = C[0] + C[4] + C[8];
+    return Tr_C;
+}
+
+
+
+double calc_determinant(const double C[9]) {
+    double J = C[0] * (C[4] * C[8] - C[5] * C[7]) - C[1] * (C[3] * C[8] - C[5] * C[6]) + C[2] * (C[3] * C[7] - C[4] * C[6]);
+    J = std::sqrt(J);
+    return J;
+}
+
+
+bool invert3x3(const double F[9], double Finv[9])
+{
+    // Compute the determinant
+    double det =
+        F[0]*(F[4]*F[8] - F[5]*F[7]) -
+        F[1]*(F[3]*F[8] - F[5]*F[6]) +
+        F[2]*(F[3]*F[7] - F[4]*F[6]);
+
+    if (std::abs(det) < std::numeric_limits<double>::epsilon())
+        return false; // Singular matrix
+
+    double invDet = 1.0 / det;
+
+    // Compute the inverse using the formula for the inverse of a 3x3 matrix
+    Finv[0] =  (F[4]*F[8] - F[5]*F[7]) * invDet;
+    Finv[1] = -(F[1]*F[8] - F[2]*F[7]) * invDet;
+    Finv[2] =  (F[1]*F[5] - F[2]*F[4]) * invDet;
+
+    Finv[3] = -(F[3]*F[8] - F[5]*F[6]) * invDet;
+    Finv[4] =  (F[0]*F[8] - F[2]*F[6]) * invDet;
+    Finv[5] = -(F[0]*F[5] - F[2]*F[3]) * invDet;
+
+    Finv[6] =  (F[3]*F[7] - F[4]*F[6]) * invDet;
+    Finv[7] = -(F[0]*F[7] - F[1]*F[6]) * invDet;
+    Finv[8] =  (F[0]*F[4] - F[1]*F[3]) * invDet;
+
+    return true;
+}
+
+
+//build strain energy equation
+void strain_energy(double* E, double mu, double lambda, double* W) {
+    double C[9] = {0.0};
+    calc_cauchy_stress_tensor(E, C);
+    double Tr_C = calc_trace(C);
+    double J = calc_determinant(C);
+    *W = mu/2.0 * (Tr_C - 3.0) - mu * log(J) + lambda/2.0 * pow((log(J)), 2.0);
+}
+
+//calc stress using enzyme fwddiff
+void stress(double* E, double mu, double lambda, double* dW_dE) {
+    double W = 0.0;
+    for(int i = 0; i < 9; ++i) {
+        double dE[9] = {0.0};
+        dE[i] = 1.0;
+        double dmu = 0.0;
+        double dlambda = 0.0;
+        double dw = 0.0;
+        __enzyme_fwddiff<void>( (void*) strain_energy, E, dE, mu, dmu, lambda, dlambda, &W, &dw);
+        dW_dE[i] = dw;
+    }
+
+}
+
+//calc stress using enzyme autodiff
+void stress_reverse(double* E, double mu, double lambda, double* dW_dE) {
+    double dE[9] = {0.0};
+    double W = 0.0;
+    double dW = 1.0;
+    __enzyme_autodiff<void>( strain_energy, enzyme_dup, E, dE, enzyme_const, mu, enzyme_const, lambda, enzyme_dup, &W, &dW); 
+
+    for (int i = 0; i < 9; ++i) {
+        dW_dE[i] = dE[i];
+    }
+}
+
+
+//calc stress using finite Difference 
+void stress_FD(double* E, double mu, double lambda, double* dW_dE, double h = 1e-7) {
+    double E_plus[9] = {0.0};
+    double E_minus[9] = {0.0};
+    double W_plus;
+    double W_minus;
+    for(int i = 0; i < 9; ++i) {
+        for(int j = 0; j < 9; ++j) {
+            E_plus[j] = E[j];
+            E_minus[j] = E[j];
+        }
+        E_plus[i] = E[i] + h;
+        E_minus[i] = E[i] - h;
+        strain_energy(E_plus, mu, lambda, &W_plus);
+        strain_energy(E_minus, mu, lambda, &W_minus);
+        dW_dE[i] = (W_plus - W_minus) / (2 * h);
+
+    }
+
+}
+
+
+void hand_code_deriv(double* E, double mu, double lambda, double* S) {
+    double C[9] = {0.0};
+    double Cinv[9];
+    double I[9] = {1.0, 0.0, 0.0, 0, 1.0, 0.0, 0.0, 0.0, 1.0};
+    calc_cauchy_stress_tensor(E, C);
+    double J = calc_determinant(C);
+    invert3x3(C, Cinv);
+    double first_term[9];
+    for(int i = 0; i < 9; ++i) {
+        first_term[i] = lambda * std::log(J) * Cinv[i];
+    }
+    double second_term[9];
+    for(int i = 0; i < 9; ++i) {
+        second_term[i] = I[i] - Cinv[i];
+        second_term[i] *= mu;
+    }
+    for(int i = 0; i < 9; ++i) {
+        S[i] = first_term[i] + second_term[i];
+    }
+
+}
+
+void second_deriv_fwd_fwd(double* E, double mu, double lambda, double* d2W_d2E) {
+    double dW[9] = {0.0};
+    double d2w[9] = {0.0};
+
+    for(int i  = 0; i < 9; ++i) {
+        double d2E[9] = {0.0};
+        d2E[i] = 1.0;
+        double d2mu = 0.0;
+        double d2lambda = 0.0;
+        __enzyme_fwddiff<void> ( (void*) stress, E, d2E, mu, d2mu, lambda, d2lambda, &dW, &d2w  );
+        for(int j = 0; j < 9; ++j) {
+            d2W_d2E[9 * i + j] = d2w[j];
+        }   
+    }
+}
+
+void second_deriv_rev_fwd(double* E, double mu, double lambda, double* d2W_d2E) {
+        double dW[9] = {0.0};
+    double d2w[9] = {0.0};
+
+    for(int i  = 0; i < 9; ++i) {
+        double d2E[9] = {0.0};
+        d2E[i] = 1.0;
+        double d2mu = 0.0;
+        double d2lambda = 0.0;
+        __enzyme_fwddiff<void> ( (void*) stress_reverse, E, d2E, mu, d2mu, lambda, d2lambda, &dW, &d2w  );
+        for(int j = 0; j < 9; ++j) {
+            d2W_d2E[9 * i + j] = d2w[j];
+        }   
+    }
+}
+
+void second_deriv_rev_rev(double* E, double mu, double lambda, double* d2W_d2E) {
+    for (int i = 0; i < 9; ++i) {
+    double d2E[81] = {0.0};
+    double W[9] = {0.0};
+    double d2W[9] = {0.0};
+    d2W[i] = 1.0;
+    __enzyme_autodiff<void>( stress_reverse, enzyme_dup, E, d2E, enzyme_const, mu, enzyme_const, lambda, enzyme_dup, &W, &d2W);
+        for(int j = 0; j < 9; ++j) {
+            d2W_d2E[9 * j + i] = d2E[j];
+        }
+    }
+}
+
+void second_deriv_fwd_rev(double* E, double mu, double lambda, double* d2W_d2E) {
+    for (int i = 0; i < 9; ++i) {
+    double d2E[81] = {0.0};
+    double W[9] = {0.0};
+    double d2W[9] = {0.0};
+    d2W[i] = 1.0;
+    __enzyme_autodiff<void>( stress, enzyme_dup, E, d2E, enzyme_const, mu, enzyme_const, lambda, enzyme_dup, &W, &d2W);
+        for(int j = 0; j < 9; ++j) {
+            d2W_d2E[9 * i + j] = d2E[j];
+        }
+    }
+}
+
+
+void second_deriv_fwd_FD(double* E, double mu, double lambda, double* d2W_d2E, double h = 1e-7){
+    double E_plus[9] = {0.0};
+    double E_minus[9] = {0.0};
+    double dW_plus[9] = {0.0};
+    double dW_minus[9] = {0.0};
+    for(int i = 0; i < 9; ++i) {
+        for(int j = 0; j < 9; ++j) {
+            E_plus[j] = E[j];
+            E_minus[j] = E[j];
+        }
+        E_plus[i] = E[i] + h;
+        E_minus[i] = E[i] - h;
+        stress(E_plus, mu, lambda, dW_plus);
+        stress(E_minus, mu, lambda, dW_minus);
+        for(int j = 0; j < 9; ++j) {
+            d2W_d2E[9 * i + j] = (dW_plus[j] - dW_minus[j]) / (2 * h);
+        }
+    }
+}
+
+void second_deriv_hand_fwd(double *E, double mu, double lambda, double* d2W_d2E) {
+    double dW[9] = {0.0};
+    double d2w[9] = {0.0};
+
+
+    for(int i  = 0; i < 9; ++i) {
+        double d2E[9] = {0.0};
+        d2E[i] = 1.0;
+        double d2mu = 0.0;
+        double d2lambda = 0.0;
+        __enzyme_fwddiff<void> ( (void*) hand_code_deriv, E, d2E, mu, d2mu, lambda, d2lambda, &dW, &d2w  );
+        for(int j = 0; j < 9; ++j) {
+            d2W_d2E[9 * i + j] = d2w[j];
+        }   
+    }
+}
+
+void second_deriv_hand_FD(double* E, double mu, double lambda, double* d2W_d2E, double h = 1e-7) {
+    double E_plus[9] = {0.0};
+    double E_minus[9] = {0.0};
+    double dw_minus[9] = {0.0};
+    double dw_plus[9] = {0.0};
+    for(int i = 0; i < 9; ++i){
+        for (int j = 0; j < 9; ++j) {
+            E_plus[j] = E[j];
+            E_minus[j] = E[j];
+        }
+        E_plus[i] = E[i] + h;
+        E_minus[i] = E[i] - h;
+        hand_code_deriv(E_plus, mu, lambda, dw_plus);
+        hand_code_deriv(E_minus, mu, lambda, dw_minus);
+        for(int j = 0; j < 9; ++j) {
+            d2W_d2E[9 * i + j] = (dw_plus[j] - dw_minus[j]) / (2 * h);
+        }
+    }
+}
+
+
+
+
+
+void run_fwd_mode(double* E, double mu, double lambda, double* dw_df, int N) {
+    axom::utilities::Timer timer{ false };
+    stress(E, mu, lambda, dw_df);
+    double Dw[9] = {0.0};
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        stress(E, mu, lambda, dw_df);
+        for(int j = 0; j < 9; ++j) {
+            Dw[j] += dw_df[j];
+        }
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc fwd_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+void run_bkwd_mode(double* E, double mu, double lambda, double* dw_dE, int N) {
+    axom::utilities::Timer timer{ false };
+
+    stress_reverse(E, mu, lambda, dw_dE);
+    double Dw[9] = {0.0};
+    
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        stress_reverse(E, mu, lambda, dw_dE);
+        for(int j = 0; j < 9; ++j) {
+            Dw[j] += dw_dE[j];
+        }
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc backward_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+void run_hand_derivative(double* E, double mu, double lambda, double* S, int N) {
+    axom::utilities::Timer timer{ false };
+    hand_code_deriv(E, mu, lambda, S);
+    double Dw[9] = {0.0};
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        hand_code_deriv(E, mu, lambda, S);
+        for(int j = 0; j < 9; ++j) {
+            Dw[j] += S[j];
+        }
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc hand_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+void run_fwd_fwd(double* E, double mu, double lambda, double* S, int N) {
+    axom::utilities::Timer timer{ false };
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        second_deriv_fwd_fwd(E, mu, lambda, S);
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc fwd_fwd_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+void run_rev_fwd(double* E, double mu, double lambda, double* S, int N) {
+    axom::utilities::Timer timer{ false };
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        second_deriv_rev_fwd(E, mu, lambda, S);
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc rev_fwd_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+void run_fwd_rev(double* E, double mu, double lambda, double* S, int N) {
+    axom::utilities::Timer timer{ false };
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        second_deriv_fwd_rev(E, mu, lambda, S);
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc fwd_rev_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+void run_rev_rev(double* E, double mu, double lambda, double* S, int N) {
+    axom::utilities::Timer timer{ false };
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        second_deriv_rev_rev(E, mu, lambda, S);
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc rev_rev_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+void run_fwd_FD(double* E, double mu, double lambda, double* S, int N, double h) {
+    axom::utilities::Timer timer{ false };
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        second_deriv_fwd_FD(E, mu, lambda, S, h);
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc fwd_FD_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+void run_hand_fwd(double* E, double mu, double lambda, double* S, int N) {
+    axom::utilities::Timer timer{ false };
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        second_deriv_hand_fwd(E, mu, lambda, S);
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc hand_fwd_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+
+// int main() {
+//     double E[9] = {0.1, 0.05, 0.02, 0.05, 0.2, 0.01, 0.02, 0.01, 0.15};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     int N = 0;
+//     std::cout << "Enter Number: ";
+//     std::cin >> N;
+//     double dw_dE[9] = {0.0};
+//     double d2W_d2E[81] = {0.0};
+//     double h = 1e-7;
+
+//     // run_fwd_mode(E, mu, lambda, dw_dE, N);
+//     // run_bkwd_mode(E, mu, lambda, dw_dE, N);
+//     // run_hand_derivative(E, mu, lambda, dw_dE, N);
+//     // run_fwd_fwd(E, mu, lambda, d2W_d2E, N);
+//     // run_rev_fwd(E, mu, lambda, d2W_d2E, N);
+//     // run_fwd_rev(E, mu, lambda, d2W_d2E, N);
+//     // run_rev_rev(E, mu, lambda, d2W_d2E, N);
+//     // run_fwd_FD(E, mu, lambda, d2W_d2E, N, h);
+//     run_hand_fwd(E, mu, lambda, d2W_d2E, N);
+
+    
+
+//     // second_deriv_hand_fwd(E, mu, lambda, d2W_d2E);
+//     // std::cout << " { ";
+//     // for(int i = 0; i < 81; ++i) {
+//     //     std::cout << d2W_d2E[i] << ", ";
+//     // }
+//     // std::cout << " }" << std::endl;
+
+//     // return 0;
+
+// }
+
+// TEST(StrainEnergyTest, StressFiniteDifferenceVsAutodiff) {
+//     double F[9] = {1.0, 0.5, 0.0, 0.0, 1.2, 0.1, 0.0, 0.0, 1.0};
+//     double E[9] = {0.0};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     double dW_dF_fd[9];
+//     double dW_dF_ad[9];
+//     double J = calc_determinant(F);
+//     calc_green_lagrange(F, E);
+//     std::cout << "Calling stress_FD..." << std::endl;
+//     stress_FD(E, J, mu, lambda, dW_dF_fd);
+//     std::cout << "Finite difference stress computed:" << std::endl;
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "dW_dF_fd[" << i << "] = " << dW_dF_fd[i] << std::endl;
+//     }
+
+//     std::cout << "Calling stress..." << std::endl;
+//     stress(E, J, mu, lambda, dW_dF_ad);
+//     std::cout << "Autodiff stress computed:" << std::endl;
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "dW_dF_ad[" << i << "] = " << dW_dF_ad[i] << std::endl;
+//     }
+
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "Comparing index " << i << ": FD = " << dW_dF_fd[i]
+//                   << ", AD = " << dW_dF_ad[i] << std::endl;
+//         EXPECT_NEAR(dW_dF_fd[i], dW_dF_ad[i], 1e-6) << "Mismatch at index " << i;
+//     }
+// }
+
+// TEST(StrainEnergyTest, StressFiniteDifferenceVsReverseAutodiff) {
+//     double E[9] = {1.0, 0.5, 0.0, 0.0, 1.2, 0.1, 0.0, 0.0, 1.0};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     double dW_dF_fd[9];
+//     double dW_dF_rev[9];
+//     std::cout << "E: ";
+// for (int i = 0; i < 9; ++i) std::cout << E[i] << " ";
+// std::cout << std::endl;
+//     std::cout << "Calling stress_FD..." << std::endl;
+//     stress_FD(E, mu, lambda, dW_dF_fd);
+//     std::cout << "Finite difference stress computed:" << std::endl;
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "dW_dF_fd[" << i << "] = " << dW_dF_fd[i] << std::endl;
+//     }
+
+//     std::cout << "Calling stress_reverse..." << std::endl;
+//     stress_reverse(E, mu, lambda, dW_dF_rev);
+//     std::cout << "Reverse-mode autodiff stress computed:" << std::endl;
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "dW_dF_rev[" << i << "] = " << dW_dF_rev[i] << std::endl;
+//     }
+
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "Comparing index " << i << ": FD = " << dW_dF_fd[i]
+//                   << ", Reverse AD = " << dW_dF_rev[i] << std::endl;
+//         EXPECT_NEAR(dW_dF_fd[i], dW_dF_rev[i], 1e-6) << "Mismatch at index " << i;
+//     }
+// }
+
+// TEST(StrainEnergyTest, StressFiniteDifferenceVsReverseAutodiff) {
+//     double E[9] = {0.1, 0.05, 0.02, 0.05, 0.2, 0.01, 0.02, 0.01, 0.15};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     double dW_dF_fd[9];
+//     double dW_dF_hand[9];
+//     std::cout << "E: ";
+// for (int i = 0; i < 9; ++i) std::cout << E[i] << " ";
+// std::cout << std::endl;
+//     std::cout << "Calling stress_FD..." << std::endl;
+//     stress_FD(E, mu, lambda, dW_dF_fd);
+//     std::cout << "Finite difference stress computed:" << std::endl;
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "dW_dF_fd[" << i << "] = " << dW_dF_fd[i] << std::endl;
+//     }
+
+//     std::cout << "Calling hand_code_deriv..." << std::endl;
+//     hand_code_deriv(E, mu, lambda, dW_dF_hand);
+//     std::cout << "Hand coded stress computed:" << std::endl;
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "dW_dF_hand[" << i << "] = " << dW_dF_hand[i] << std::endl;
+//     }
+
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "Comparing index " << i << ": FD = " << dW_dF_fd[i]
+//                   << ", Hand derivative = " << dW_dF_hand[i] << std::endl;
+//         EXPECT_NEAR(dW_dF_fd[i], dW_dF_hand[i], 1e-6) << "Mismatch at index " << i;
+//     }
+// }
+
+// TEST(StrainEnergyTest, StressFiniteDifferenceVsReverseAutodiff) {
+//     double E[9] = {0.1, 0.05, 0.02, 0.05, 0.2, 0.01, 0.02, 0.01, 0.15};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     double dW_dF_fwd_fd[81];
+//     double dW_dF_fwd_fwd[81];
+//     std::cout << "E: ";
+// for (int i = 0; i < 81; ++i) std::cout << E[i] << " ";
+// std::cout << std::endl;
+//     std::cout << "Calling stress_FD..." << std::endl;
+//     second_deriv_fwd_FD(E, mu, lambda, dW_dF_fwd_fd);
+//     std::cout << "Finite difference stress computed:" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_fd[" << i << "] = " << dW_dF_fwd_fd[i] << std::endl;
+//     }
+
+//     std::cout << "Calling hand_code_deriv..." << std::endl;
+//     second_deriv_fwd_fwd(E, mu, lambda, dW_dF_fwd_fwd);
+//     std::cout << "Hand coded stress computed:" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_hand[" << i << "] = " << dW_dF_fwd_fwd[i] << std::endl;
+//     }
+
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "Comparing index " << i << ": FD = " << dW_dF_fwd_fd[i]
+//                   << ", Hand derivative = " << dW_dF_fwd_fwd[i] << std::endl;
+//         EXPECT_NEAR(dW_dF_fwd_fd[i], dW_dF_fwd_fwd[i], 1e-6) << "Mismatch at index " << i;
+//     }
+// }
+
+// TEST(StrainEnergyTest, StressFiniteDifferenceVsReverseAutodiff) {
+//     double E[9] = {0.1, 0.05, 0.02, 0.05, 0.2, 0.01, 0.02, 0.01, 0.15};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     double dW_dF_fwd_fd[81];
+//     double dW_dF_fwd_rev[81];
+
+//     std::cout << "E: ";
+//     for (int i = 0; i < 9; ++i) std::cout << E[i] << " ";
+//     std::cout << std::endl;
+
+//     std::cout << "Calling stress_FD..." << std::endl;
+//     second_deriv_fwd_FD(E, mu, lambda, dW_dF_fwd_fd);
+//     std::cout << "Finite difference stress computed:" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_fd[" << i << "] = " << dW_dF_fwd_fd[i] << std::endl;
+//     }
+
+//     std::cout << "Calling hand_code_deriv (fwd_rev)..." << std::endl;
+//     second_deriv_fwd_rev(E, mu, lambda, dW_dF_fwd_rev);
+//     std::cout << "Hand coded stress computed (fwd_rev):" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_hand[" << i << "] = " << dW_dF_fwd_rev[i] << std::endl;
+//     }
+
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "Comparing index " << i << ": FD = " << dW_dF_fwd_fd[i]
+//                   << ", Hand derivative = " << dW_dF_fwd_rev[i] << std::endl;
+//         EXPECT_NEAR(dW_dF_fwd_fd[i], dW_dF_fwd_rev[i], 1e-6) << "Mismatch at index " << i;
+//     }
+// }
+
+// TEST(StrainEnergyTest, StressFiniteDifferenceVsReverseAutodiff) {
+//     double E[9] = {0.1, 0.05, 0.02, 0.05, 0.2, 0.01, 0.02, 0.01, 0.15};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     double dW_dF_fwd_fd[81];
+//     double dW_dF_rev_rev[81];
+
+//     std::cout << "E: ";
+//     for (int i = 0; i < 9; ++i) std::cout << E[i] << " ";
+//     std::cout << std::endl;
+
+//     std::cout << "Calling stress_FD..." << std::endl;
+//     second_deriv_fwd_FD(E, mu, lambda, dW_dF_fwd_fd);
+//     std::cout << "Finite difference stress computed:" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_fd[" << i << "] = " << dW_dF_fwd_fd[i] << std::endl;
+//     }
+
+//     std::cout << "Calling hand_code_deriv (rev_rev)..." << std::endl;
+//     second_deriv_rev_rev(E, mu, lambda, dW_dF_rev_rev);
+//     std::cout << "Hand coded stress computed (rev_rev):" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_hand[" << i << "] = " << dW_dF_rev_rev[i] << std::endl;
+//     }
+
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "Comparing index " << i << ": FD = " << dW_dF_fwd_fd[i]
+//                   << ", Hand derivative = " << dW_dF_rev_rev[i] << std::endl;
+//         EXPECT_NEAR(dW_dF_fwd_fd[i], dW_dF_rev_rev[i], 1e-6) << "Mismatch at index " << i;
+//     }
+// }
+
+// TEST(StrainEnergyTest, StressFiniteDifferenceVsFwdRevAutodiff) {
+//     double E[9] = {0.1, 0.05, 0.02, 0.05, 0.2, 0.01, 0.02, 0.01, 0.15};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     double dW_dF_fwd_fd[81];
+//     double dW_dF_fwd_rev[81];
+
+//     std::cout << "E: ";
+//     for (int i = 0; i < 9; ++i) std::cout << E[i] << " ";
+//     std::cout << std::endl;
+
+//     std::cout << "Calling stress_FD..." << std::endl;
+//     second_deriv_fwd_FD(E, mu, lambda, dW_dF_fwd_fd);
+//     std::cout << "Finite difference stress computed:" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_fd[" << i << "] = " << dW_dF_fwd_fd[i] << std::endl;
+//     }
+
+//     std::cout << "Calling hand_code_deriv (fwd_rev)..." << std::endl;
+//     second_deriv_fwd_rev(E, mu, lambda, dW_dF_fwd_rev);
+//     std::cout << "Hand coded stress computed (fwd_rev):" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_hand[" << i << "] = " << dW_dF_fwd_rev[i] << std::endl;
+//     }
+
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "Comparing index " << i << ": FD = " << dW_dF_fwd_fd[i]
+//                   << ", Hand derivative = " << dW_dF_fwd_rev[i] << std::endl;
+//         EXPECT_NEAR(dW_dF_fwd_fd[i], dW_dF_fwd_rev[i], 1e-6) << "Mismatch at index " << i;
+//     }
+// }
+
+
+// TEST(StrainEnergyTest, StressFiniteDifferenceVsFwdRevAutodiff) {
+//     double E[9] = {0.12, -0.03, 0.01, -0.03, 0.08, 0.02, 0.01, 0.02, 0.11};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     double dW_dF_fwd_fd[81];
+//     double dW_dF_fwd_rev[81];
+
+//     std::cout << "E: ";
+//     for (int i = 0; i < 9; ++i) std::cout << E[i] << " ";
+//     std::cout << std::endl;
+
+//     std::cout << "Calling stress_FD..." << std::endl;
+//     second_deriv_fwd_FD(E, mu, lambda, dW_dF_fwd_fd);
+//     std::cout << "Finite difference stress computed:" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_fd[" << i << "] = " << dW_dF_fwd_fd[i] << std::endl;
+//     }
+
+//     std::cout << "Calling hand_code_deriv (fwd_rev)..." << std::endl;
+//     second_deriv_hand_fwd(E, mu, lambda, dW_dF_fwd_rev);
+//     std::cout << "Hand coded stress computed (fwd_rev):" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_hand[" << i << "] = " << dW_dF_fwd_rev[i] << std::endl;
+//     }
+
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "Comparing index " << i << ": FD = " << dW_dF_fwd_fd[i]
+//                   << ", Hand derivative = " << dW_dF_fwd_rev[i] << std::endl;
+//         EXPECT_NEAR(dW_dF_fwd_fd[i], dW_dF_fwd_rev[i], 1e-6) << "Mismatch at index " << i;
+//     }
+// }
+
+TEST(StrainEnergyTest, StressFiniteDifferenceVsFwdRevAutodiff) {
+    double F[9] = {1.01, -0.03, 0.01, -0.03, 1.05, 0.02, 0.01, 0.02, 1.0};
+    double E[9] = {0};
+    double mu = 1.0;
+    double lambda = 1.0;
+    double* S = nullptr;
+    calc_E_from_F(F, E);
+    hand_code_deriv(E, mu, lambda, S);
+    std::cout << "{ ";
+    for (int i = 0; i < 9; ++i) {
+        std::cout << ", " << S[i];
+    }
+    std::cout << " }" << std::endl;
+    std::cout << "E: { ";
+    for (int i = 0; i < 9; ++i) {
+        std::cout << ", " << E[i];
+    }
+    std::cout << "}" << std::endl;
+
+    double dW_dF_hand_fd[81];
+    double dW_dF_hand_fwd[81];
+
+    std::cout << "E: ";
+    for (int i = 0; i < 9; ++i) std::cout << E[i] << " ";
+    std::cout << std::endl;
+
+    std::cout << "Calling stress_FD..." << std::endl;
+    second_deriv_hand_FD(E, mu, lambda, dW_dF_hand_fd);
+    std::cout << "Finite difference stress computed:" << std::endl;
+    for (int i = 0; i < 81; ++i) {
+        std::cout << "dW_dF_hand_fd[" << i << "] = " << dW_dF_hand_fd[i] << std::endl;
+    }
+
+    std::cout << "Calling hand_code_deriv (fwd_rev)..." << std::endl;
+    second_deriv_hand_fwd(E, mu, lambda, dW_dF_hand_fwd);
+    std::cout << "Hand coded stress computed (fwd_rev):" << std::endl;
+    for (int i = 0; i < 81; ++i) {
+        std::cout << "dW_dF_hand_fwd[" << i << "] = " << dW_dF_hand_fwd[i] << std::endl;
+    }
+
+    for (int i = 0; i < 81; ++i) {
+        std::cout << "Comparing index " << i << ": FD = " << dW_dF_hand_fd[i]
+                  << ", Hand derivative = " << dW_dF_hand_fwd[i] << std::endl;
+        EXPECT_NEAR(dW_dF_hand_fd[i], dW_dF_hand_fd[i], 1e-6) << "Mismatch at index " << i;
+    }
+}
\ No newline at end of file

From 286f9824689b632309bfff89de01dc605c42d99a Mon Sep 17 00:00:00 2001
From: Ryan Lutz <lutz23@llnl.gov>
Date: Thu, 22 Jan 2026 16:18:09 -0800
Subject: [PATCH 02/56] new method and old method updates

---
 src/examples/step_1_lobatto.cpp   | 1393 +++++++++++++++++++++++++++++
 src/tribol/physics/new_method.cpp | 1129 +++++++++++++++++++++++
 src/tribol/physics/new_method.hpp |  135 +++
 3 files changed, 2657 insertions(+)
 create mode 100644 src/examples/step_1_lobatto.cpp
 create mode 100644 src/tribol/physics/new_method.cpp
 create mode 100644 src/tribol/physics/new_method.hpp

diff --git a/src/examples/step_1_lobatto.cpp b/src/examples/step_1_lobatto.cpp
new file mode 100644
index 00000000..9ce1dbee
--- /dev/null
+++ b/src/examples/step_1_lobatto.cpp
@@ -0,0 +1,1393 @@
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+#include <vector>
+#include <array>
+#include <cmath>
+#include <algorithm>
+#include "tribol/common/Parameters.hpp"
+#include "tribol/geom/GeomUtilities.hpp"
+#include "tribol/common/Enzyme.hpp"
+
+
+template <typename return_type, typename... Args>
+return_type __enzyme_autodiff( Args... );
+
+
+void find_normal(const double* coord1, const double* coord2, double* normal) {
+    double dx = coord2[0] - coord1[0];
+    double dy = coord2[1] - coord1[1];
+    double len = std::sqrt(dy * dy + dx * dx);
+    dx /= len;
+    dy /= len;
+    normal[0] = dy;
+    normal[1] = -dx;
+}
+
+void determine_lobatto_nodes(int N, double* N_i) {
+    if (N == 1) {
+        N_i[0] = 0.0;
+    }
+    else if (N == 2) {
+        N_i[0] = -1.0;
+        N_i[1] = 1.0;
+    }
+    else if (N == 3) {
+        N_i[0] = -1.0;
+        N_i[1] = 0.0;
+        N_i[2] = 1.0;
+    }
+    else if(N == 4) {
+        N_i[0] = -1.0;
+        N_i[1] = -1.0 / std::sqrt(5.0);
+        N_i[2] = 1.0 / std::sqrt(5.0);
+        N_i[3] = 1.0;
+    }
+    else {
+        N_i[0] = -1.0;
+        N_i[1] = -1.0 * std::sqrt(3.0 / 7.0);
+        N_i[2] = 0.0;
+        N_i[3] =  std::sqrt(3.0 / 7.0);
+        N_i[4] = 1.0;
+    }
+}
+
+void determine_lobatto_weights(int N, double* weights) {
+    if (N == 1) {
+        weights[0] = 2.0;
+    }
+    else if (N == 2) {
+        weights[0] = 1.0;
+        weights[1] = 1.0;
+    } else if (N == 3) {
+        weights[0] = 1.0 / 3.0;
+        weights[1] = 4.0 / 3.0;
+        weights[2] = 1.0 / 3.0;
+    } else if (N == 4) {
+        weights[0] = 1.0 / 6.0;
+        weights[1] = 5.0 / 6.0;
+        weights[2] = 5.0 / 6.0;
+        weights[3] = 1.0 / 6.0;
+    } else {
+        weights[0] = 1.0 / 10.0;
+        weights[1] = 49.0 / 90.0;
+        weights[2] = 32.0 / 45.0;
+        weights[3] = 49.0 / 90.0;
+        weights[4] = 1.0 / 10.0;
+    }
+}
+ void determine_legendre_nodes(int N, double* N_i) {
+    if (N==1) {
+       N_i[0] = 0.0; 
+    }
+    else if(N==2) {
+        N_i[0] = -1 / std::sqrt(3);
+        N_i[1] = 1 / std::sqrt(3);
+    }
+    else if(N==3) {
+        N_i[0] = -std::sqrt(3.0/5.0);
+        N_i[1] = 0.0;
+        N_i[2] = std::sqrt(3.0/5.0);
+    }
+    else {
+        N_i[0] = -1.0 * std::sqrt((15 + 2 * std::sqrt(30)) / 35);
+        N_i[1] = -1.0 * std::sqrt((15 - 2 * std::sqrt(30)) / 35);
+        N_i[2] = -std::sqrt((15 - 2 * std::sqrt(30)) / 35);
+        N_i[4] = -std::sqrt((15 + 2 * std::sqrt(30)) / 35);
+    }
+ }
+
+ void determine_legendre_weights(int N, double* W) {
+    if (N == 1) {
+        W[0] = 2.0;
+    }
+    else if(N == 2) {
+        W[0] = 1.0;
+        W[1] = 1.0;
+    }
+    else if (N == 3) {
+        W[0] = 5.0 / 9.0;
+        W[1] = 8.0 / 9.0;
+        W[2] = 5.0 / 9.0;
+    }
+    else {
+        W[0] = (18 - std::sqrt(30)) / 36.0;
+        W[1] = (18 + std::sqrt(30)) / 36.0;
+        W[2] = (18 + std::sqrt(30)) / 36.0;
+        W[3] = (18 - std::sqrt(30)) / 36.0;
+    }
+ }
+
+void iso_map(const double* coord1, const double* coord2, double xi,  double* mapped_coord) {
+    double N1 = 1.0 - xi;
+    double N2 = xi;
+    // double N1 = 0.5 - xi;
+    // double N2 = 0.5 + xi;
+    mapped_coord[0] = N1 * coord1[0] + N2 * coord2[0];
+    mapped_coord[1] =  N1 * coord1[1] + N2 * coord2[1];
+}
+
+void iso_map2(const double* coord1, const double* coord2, double xi, double* mapped_coord){
+    double N1 = 0.5 - xi;
+    double N2 = 0.5 + xi;
+    mapped_coord[0] = N1 * coord1[0] + N2 * coord2[0];
+    mapped_coord[1] =  N1 * coord1[1] + N2 * coord2[1];
+}
+
+
+
+void iso_map_deriv(const double* coord1, const double* coord2, double* deriv) {
+    deriv[0] = 0.5 * (coord2[0] - coord1[0]);
+    deriv[1] = 0.5 * (coord2[1] - coord1[1]);
+}
+
+bool segmentsIntersect(const double A0[2], const double A1[2],
+                       const double B0[2], const double B1[2],
+                       double intersection[2]) {
+    auto cross = [](double x0, double y0, double x1, double y1) {
+        return x0 * y1 - y0 * x1;
+    };
+
+    double dxA = A1[0] - A0[0], dyA = A1[1] - A0[1];
+    double dxB = B1[0] - B0[0], dyB = B1[1] - B0[1];
+    double dxAB = B0[0] - A0[0], dyAB = B0[1] - A0[1];
+
+    double denom = cross(dxA, dyA, dxB, dyB);
+    double numeA = cross(dxAB, dyAB, dxB, dyB);
+    double numeB = cross(dxAB, dyAB, dxA, dyA);
+
+    // Collinear or parallel
+    if (std::abs(denom) < 1e-12) {
+        if (std::abs(numeA) > 1e-12 || std::abs(numeB) > 1e-12)
+            return false; // Parallel, not collinear
+
+        // Collinear: check for overlap
+        auto between = [](double a, double b, double c) {
+            return std::min(a, b) <= c && c <= std::max(a, b);
+        };
+
+        // Check if endpoints overlap
+        for (int i = 0; i < 2; ++i) {
+            if (between(A0[0], A1[0], B0[0]) && between(A0[1], A1[1], B0[1])) {
+                intersection[0] = B0[0];
+                intersection[1] = B0[1];
+                return true;
+            }
+            if (between(A0[0], A1[0], B1[0]) && between(A0[1], A1[1], B1[1])) {
+                intersection[0] = B1[0];
+                intersection[1] = B1[1];
+                return true;
+            }
+            if (between(B0[0], B1[0], A0[0]) && between(B0[1], B1[1], A0[1])) {
+                intersection[0] = A0[0];
+                intersection[1] = A0[1];
+                return true;
+            }
+            if (between(B0[0], B1[0], A1[0]) && between(B0[1], B1[1], A1[1])) {
+                intersection[0] = A1[0];
+                intersection[1] = A1[1];
+                return true;
+            }
+        }
+        // Overlap but not at a single point
+        return false;
+    }
+
+    double ua = numeA / denom;
+    double ub = numeB / denom;
+
+    if (ua >= 0.0 && ua <= 1.0 && ub >= 0.0 && ub <= 1.0) {
+        intersection[0] = A0[0] + ua * dxA;
+        intersection[1] = A0[1] + ua * dyA;
+        return true;
+    }
+    return false;
+}
+
+
+
+
+
+// void lagrange_shape_functions(int N, double xi, const double* nodes, double* N_i) {
+//     for(int i = 0; i < N; ++i) {
+//         N_i[i] = 1.0;
+//         for(int j = 0; j < N; j++){
+//             if(i != j) {
+//                 N_i[i] *= (xi - nodes[j]) / (nodes[i] - nodes[j]);
+//             }
+//         }
+//     }
+// }
+
+// void iso_map(const double* coords, int N, double* mapped_coords, double xi) {
+//     double nodes[N];
+//     double shape_functions[N];
+//     determine_lobatto_nodes(N, nodes);
+//     lagrange_shape_functions(N, xi, nodes, shape_functions);
+//     mapped_coords[0] = 0.0;
+//     mapped_coords[1] = 0.0;
+//     for(int i = 0; i < N; ++i) {
+//         mapped_coords[0] += shape_functions[i] * coords[2 * i];
+//         mapped_coords[1] += shape_functions[i] * coords[2 * i + 1]; 
+//     }
+// }
+
+
+// void iso_map_deriv(double xi, const double* coords, int N, double* dxi_dx) {
+//     double mapped_coords[2] = {0.0, 0.0};
+//     double d_mapped_coords[2] = {0.0, 0.0};
+//     double dxi = 1.0;
+//     __enzyme_autodiff<void>( iso_map, enzyme_const, coords, enzyme_const, N, enzyme_dup, mapped_coords, d_mapped_coords, enzyme_dup, xi, dxi);
+
+//     dxi_dx[0] = d_mapped_coords[0];
+//     dxi_dx[1] = d_mapped_coords[1];
+// }
+
+// double compute_jacobian(const double* coords, const double* derivs, int N) {
+//     double dx_dxi = 0.0;
+//     double dy_dxi = 0.0;
+
+//     for (int i = 0; i < N; ++i) {
+//         dx_dxi += derivs[i] * coords[2 * i];
+//         dy_dxi += derivs[i] * coords[2 * i + 1];
+//     }
+
+//     double J = 0.5 * std::sqrt(dx_dxi * dx_dxi + dy_dxi * dy_dxi);
+//     return J;
+// }
+
+
+// double newtons_method(const double* p, const double* coord1, const double* coord2, double tol = 1e-20, int iter = 20) {
+//     double xi = 0.0; //initial guess
+
+//     for(int i = 0; i < iter; ++i) {
+//         double mapped_coords[2] = {0.0, 0.0};
+//         iso_map(coord1, coord2, xi, mapped_coords);
+
+//         //compute residuals
+//         double rx = mapped_coords[0] - p[0];
+//         double ry = mapped_coords[1] - p[1];
+
+//         double dx_dxi[2] = {0.0, 0.0};
+//         iso_map_deriv(coord1, coord2, dx_dxi);
+
+//         double grad = 2.0 * (rx * dx_dxi[0] + ry * dx_dxi[1]);
+//         double hess = 2.0 * (dx_dxi[0] * dx_dxi[0] + dx_dxi[1] * dx_dxi[1]);
+//         //newton step
+//         double step = grad / hess;
+//         xi -= step;
+
+//         //clamp xi to [-1, 1] for segment
+//         xi = std::max(-1.0, std::min(1.0, xi));
+
+//         if (std::abs(step) < tol) {
+//             break;
+//         }
+
+        
+//     }
+//     return xi;
+// }
+
+
+
+
+void find_intersection(const double* A0, const double* A1, const double* p, const double* nB, double* intersection) {
+    double tA[2] = {A1[0] - A0[0], A1[1] - A0[1] };
+    double d[2] = {p[0] - A0[0], p[1] - A0[1]};
+
+    double det = tA[0] * nB[1] - tA[1] * nB[0];
+
+    if(std::abs(det) < 1e-12) {
+        intersection[0] = p[0];
+        intersection[1] = p[1];
+        return;
+    }
+
+    double inv_det = 1.0 / det;
+
+    double alpha = (d[0] * nB[1] - d[1] * nB[0]) * inv_det;
+
+    // if (alpha < 0.0) alpha = 0.0;
+    // if (alpha > 1.0) alpha = 1.0;
+
+    intersection[0] = (A0[0] + alpha * tA[0]);
+    intersection[1] = A0[1]  + alpha * tA[1];
+
+}
+
+
+// void get_projections(const double* A0, const double* A1, const double* B0, const double* B1, double* projections) {
+//     double nA[2] = {0.0};
+//     double nB[2] = {0.0}; 
+//     find_normal(A0, A1, nA);
+//     find_normal(B0, B1, nB);
+//     // double eta_values[N];
+//     // determine_lobatto_nodes(N, eta_values)
+//     double end_points[2] = {-0.5, 0.5}; // change for [-0.5, 0.5] mapping
+//     for (int i = 0; i < 2; ++i) {
+//         double p[2] = {0.0};
+
+//         double intersection[2] = {0.0};
+//         double seg_intersection[2] = {0.0};
+//         iso_map2(B0, B1, end_points[i], p);
+
+//         // std::cout << "gx: " << p[0] << "gy: " << p[1] << std::endl;
+//         // // double xiA = newtons_method(p, A0, A1);
+//         // // tribol::ProjectPointToSegment(p[0], p[1],  nB[0], nB[1], A0[0], A0[1], px, py); 
+//         // std::cout << "px: " << p[0] << ", " << "py: " << p[1] <<std::endl;
+
+//         find_intersection(A0, A1, p, nB, intersection);
+
+
+
+//         // std::cout << "intersection: " << intersection[0] << ',' << intersection[1] << std::endl;
+
+
+//         double dx = A1[0] - A0[0];
+//         double dy = A1[1] - A0[1];
+//         double len2 = dx*dx + dy*dy;
+//         double xiA = ((intersection[0] - A0[0]) * dx + (intersection[1] - A0[1]) * dy) / len2;
+
+//         // bool current_inside = (xiA >= 0.0 && xiA <= 1.0);
+
+//         double nB_unit[2] = { nB[0], nB[1] };
+//         double norm = std::sqrt(nB_unit[0]*nB_unit[0] + nB_unit[1]*nB_unit[1]);
+//         nB_unit[0] /= norm;
+//         nB_unit[1] /= norm;
+
+//         double dx_gap = intersection[0] - p[0];
+//         double dy_gap = intersection[1] - p[1];
+//         double gap = dx_gap * nB_unit[0] + dy_gap * nB_unit[1];
+
+
+
+//         // if (gap > 0) {
+//         //     xiA_was_inside[i] = true;  // mark this slot as valid
+//         // }
+
+//         double del = 0.1;
+
+//         if(segmentsIntersect(A0, A1, B0, B1, seg_intersection) &&  gap > 0.0) {
+//             // std::cout << "Segments intersect" << std::endl;
+//             // if(xiA < 0.0 || xiA > 1.0) {
+//                 // std::cout << "entered loop" << std::endl;
+//                 // std::cout << "Seg intersection: " << seg_intersection[0] << ", " << seg_intersection[1] << std::endl;
+//                 // std::cout << "xia before: " << xiA << std::endl;
+//                 xiA = ((seg_intersection[0] - A0[0]) * dx + (seg_intersection[1] - A0[1]) * dy) / len2;
+//                 // std::cout << "xia after: " << xiA << std::endl;
+//                 if (xiA < del) {
+//                     xiA = del;
+//                 }
+//                 // std::cout << "xia after: " << xiA << std::endl;
+//             // }
+//         }
+//         xiA = xiA - 0.5;
+//         // xiA = (xiA + 1) / 2;
+//         // std::cout << "Xia: " << xiA << std::endl;  //PICK UP HERE******
+//         projections[i] = xiA;
+//     }
+// }
+
+// void get_endpoint_projections(const double* A0, const double* A1, const double* B0, const double* B1, double* proj0, double* proj1) {
+//     double nA[2];
+//     find_normal(A0, A1, nA);
+//     find_intersection(B0, B1, A0, nA, proj0);
+//     find_intersection(B0, B1, A1, nA, proj1);
+
+// }
+
+
+// void get_projections(const double* A0, const double* A1, const double* B0, const double* B1, double* projections, double del) {
+//     double nA[2] = {0.0}; 
+//     find_normal(A0, A1, nA);
+    
+//     double end_points[2] = {-0.5, 0.5}; 
+//     for (int i = 0; i < 2; ++i) {
+//         double p[2] = {0.0};
+//         iso_map2(B0, B1, end_points[i], p);
+//         std::cout << "EndPoints: " << end_points[0] << ", " << end_points[1] << std::endl;
+        
+//         double intersection[2] = {0.0};
+//         find_intersection(B0, B1, p, nA, intersection);
+//         std::cout << "intersection: " << intersection[0] << ", " << intersection[1] << std::endl;
+
+//         // Convert intersection to parametric coordinate on A
+//         // double dx = A1[0] - A0[0];
+//         // double dy = A1[1] - A0[1];
+//         // double len2 = dx*dx + dy*dy;
+//         // std::cout << "len2: " << len2 << std::endl;
+//         // double xiA = ((intersection[0] - A0[0]) * dx + (intersection[1] - A0[1]) * dy) / len2;
+//         // std::cout << "Xia: " << xiA << std::endl;
+        
+//         // Apply constraints and convert to reference interval
+//         // xiA = std::max(del, std::min(1.0 - del, xiA)) - 0.5;
+  
+//         // xiA = 0.5 - xiA;
+//         projections[i] = intersection[i];
+//     }
+// }
+void get_projections(const double* A0, const double* A1, const double* B0, const double* B1, double* projections, double del) {
+    double nB[2] = {0.0};
+    find_normal(B0, B1, nB);
+    double B_endpoints[2][2];
+    B_endpoints[0][0] = B0[0]; B_endpoints[0][1] = B0[1];
+    B_endpoints[1][0] = B1[0]; B_endpoints[1][1] = B1[1];
+    
+    for (int i =0; i < 2; ++i) {
+        //prohect A endpoints onto B
+        double intersection[2] = {0.0};
+        find_intersection(A0,A1, B_endpoints[i], nB, intersection);
+
+        // std::cout << "Intersection: " << intersection[0] << ", " << intersection[1] << std::endl;
+    
+
+        //convert to parametric coords
+        double dx = A1[0] - A0[0];
+        // std::cout << "dx: " << dx << std::endl;
+        double dy = A1[1] - A0[1];
+        // std::cout << "dy: " << dy << std::endl;
+        double len2 = dx*dx + dy*dy;
+        double alpha = ((intersection[0] - A0[0]) * dx + (intersection[1] - A0[1]) * dy) / len2;
+        //map to xiB
+        // std::cout << "alpha: " << alpha << std::endl;
+        // double xiB = 0.5 - alpha;
+        double xiB = alpha - 0.5;
+        // xiB = std::max(-0.5, std::min(0.5, xiB));
+        
+        // std::cout << "xi on B: " << xiB << std::endl;
+        
+        projections[i] = xiB;
+    }
+
+}
+
+
+
+
+
+// void compute_integration_bounds(const double* projections, double* integration_bounds, int N) {
+//     double xi_min = projections[0];
+//     double xi_max = projections[0];
+//     for (int i = 0; i < 2; ++i) {
+//         if (xi_min > projections[i]) {
+//             xi_min = projections[i];
+//         }
+//         if(xi_max < projections[i]) {
+//             xi_max = projections[i]; 
+//         }
+
+//     }
+
+//     if (xi_max < -0.5) {
+//         xi_max = -0.5;
+//     }
+//     if(xi_min > 0.5) {
+//         xi_min  = 0.5;
+//     }
+//     if (xi_min < -0.5) { 
+//         xi_min = -0.5;
+//     }
+//     if (xi_max > 0.5) {
+//         xi_max = 0.5;
+//     }
+
+//     double del = 0.1;
+
+//     integration_bounds[0] = xi_min;
+//     integration_bounds[1] = xi_max;
+//     // std::cout << "x_min: " << xi_min << "  xi_max: " << xi_max << std::endl;
+
+// }
+
+void compute_integration_bounds(const double* projections, double* integration_bounds, double del) {
+    // std::cout << "Projections in Compute bounds: " << projections[0] << ", " <<  projections[1] << std::endl;
+    double xi_min = projections[0];
+    double xi_max = projections[0];
+    for (int i = 0; i < 2; ++i) {
+        if (xi_min > projections[i]) {
+            xi_min = projections[i];
+        }
+        if(xi_max < projections[i]) {
+            xi_max = projections[i]; 
+        }
+
+    }
+
+    // std::cout << "BEFORE xi min: " << xi_min << " xi_max: " << xi_max << std::endl;
+
+
+    if (xi_max < -0.5 -del ) {
+        xi_max = -0.5 - del;
+    }
+    if(xi_min > 0.5 + del) {
+        xi_min  = 0.5 + del;
+    }
+    if (xi_min < -0.5 - del) { 
+        xi_min = -0.5 -del;
+    }
+    if (xi_max > 0.5 + del) {
+        xi_max = 0.5 + del;
+    }
+
+    // if (xi_max < -0.5) {
+    //     xi_max = -0.5;
+    // }
+    // if(xi_min > 0.5) {
+    //     xi_min  = 0.5;
+    // }
+    // if (xi_min < -0.5) { 
+    //     xi_min = -0.5;
+    // }
+    // if (xi_max > 0.5) {
+    //     xi_max = 0.5;
+    // }
+
+    integration_bounds[0] = xi_min;
+    integration_bounds[1] = xi_max;
+    // std::cout << "xi min: " << xi_min << " xi_max: " << xi_max << std::endl;
+}
+
+
+void modify_bounds(double* integration_bounds, double del, double* modified_bounds) {
+    double xi = 0.0;
+    double int_bound[2] = {0.0};
+    for(int i = 0; i < 2; ++i) {
+        int_bound[i] = integration_bounds[i];
+    }
+    // int_bound[0] -= del;
+    // int_bound[1] += del;
+
+
+    for (int i = 0; i < 2; ++i) {
+        double xi_hat = 0.0;
+        // xi = 0.5 * (integration_bounds[i] + 1.0);
+        xi = int_bound[i] + 0.5;
+        // std::cout << "xi in smoothoing: " << xi << std::endl;
+        if (0.0 - del <= xi && xi <= del) {
+            xi_hat = (1.0/(4*del)) * (xi*xi) + 0.5 * xi + del/4.0;
+            // std::cout << "zone1" << std::endl;
+        }
+        else if((1.0 - del) <= xi && xi <= 1.0 + del) {
+        double b = -1.0/(4.0*del);
+        double c = 0.5 + 1.0/(2.0*del);
+        double d = 1.0 - del + (1.0/(4.0*del)) * pow(1.0-del, 2) - 0.5*(1.0-del) - (1.0-del)/(2.0*del);
+
+        xi_hat = b*xi*xi + c*xi + d;
+
+            // xi_hat = (1.0/del) * xi*xi - (2.0*(1.0-del)/del) * xi + (-1.0 + 1.0/del);
+            // xi_hat = -1.0/del * xi*xi + 2.0/del * xi + (1.0 - 1.0/del);
+
+
+            // xi_hat= (-1.0/(del*del))*pow(xi,3) + ((3.0/(del*del)) - (2.0/del))*pow(xi,2) + ((-3.0/(del*del)) + (4.0/del))*xi + (1.0 + (1.0/(del*del)) - (2.0/del));
+
+
+            // xi_hat = -1.0/(del*del)*pow(xi,3) + (3.0+del)/(del*del)*pow(xi,2) + (1.0 + (-3.0-2.0*del)/(del*del))*xi + (1.0+del)/(del*del);
+
+        //     double d = 1 - del
+        //  + (1.0 / (4.0 * del * del)) * (1 - 3 * del + 3 * del * del - del * del * del)
+        //  - ((-1.0 / (4.0 * del) + 3.0 / (4.0 * del * del)) * (1 - 2 * del + del * del))
+        //  - ((5.0 / 4.0 + 1.0 / (2.0 * del) - 3.0 / (4.0 * del * del)) * (1 - del));
+        //         xi_hat = 
+        // -1.0*(xi*xi*xi) / (4.0 * del * del)
+        // + (-1.0/(4.0*del) + 3.0/(4.0*del*del)) * (xi*xi)
+        // + (1.25 + 1.0/(2.0*del) - 3.0/(4.0*del*del)) * xi
+        // + d;
+            // std::cout << "d: " << d << std::endl;
+   
+        //  std::cout << "zone2" << std::endl;
+        }
+        else if(del <= xi && xi <= (1.0 - del)) { 
+            xi_hat = xi;
+            // std::cout << "zone3" << std::endl;
+        }
+        else{ 
+            // std::cerr << "Xi did not fall in an expected range for modifying bounds for 1" << std::endl;
+        }
+        // modified_bounds[i] = 2.0 * xi_hat - 1;
+        modified_bounds[i] = xi_hat - 0.5;
+    }
+    // std::cout << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
+}
+
+// void modify_bounds(double* integration_bounds, double del, double* modified_bounds) {
+//     double xi = 0.0;
+//     // integration_bounds[0] -= del;
+//     // integration_bounds[1] += del;
+
+
+//     for (int i = 0; i < 2; ++i) {
+//         double xi_hat = 0.0;
+//         // xi = 0.5 * (integration_bounds[i] + 1.0);
+//         xi = integration_bounds[i] + 0.5;
+//         // std::cout << "xi: " << xi << std::endl;
+//         if (0.0 <= xi && xi <= del) {
+//             xi_hat = (1.0/del) * (xi*xi) + 0.5 * xi + del/4.0;
+//             // std::cout << "zone1" << std::endl;
+//         }
+//         else if((1.0 - del) <= xi && xi <= 1.0) {
+//             xi_hat =  1.0 -(((1.0 - xi) * (1.0 - xi)) / (2 * del * (1.0 - del)));
+//             // std::cout << "zone2" << std::endl;
+//         }
+//         else if(del <= xi && xi <= (1.0)) { 
+//             xi_hat = xi;
+//             // std::cout << "zone3" << std::endl;
+//         }
+//         else{ 
+//             std::cerr << "Xi did not fall in an expected range for modifying bounds for 1" << std::endl;
+//         }
+//         // modified_bounds[i] = 2.0 * xi_hat - 1;
+//         modified_bounds[i] = xi_hat - 0.5;
+//     }
+//     // std::cout << "modified bounds: " << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
+// }
+
+void modify_bounds_for_weight(double* integration_bounds, double del, double* modified_bounds) {
+    double xi = 0.0;
+    integration_bounds[0];
+    integration_bounds[1];
+    for (int i = 0; i < 2; ++i) {
+        double xi_hat = 0.0;
+        // xi = 0.5 * (integration_bounds[i] + 1.0);
+        xi = integration_bounds[i] + 0.5;
+        if (xi < std::abs(1e-10)) {
+            xi = 0.0;
+        
+        }
+        // std::cout << "xi: " << xi << std::endl;
+        if (0 <= xi && xi <= del) {
+            xi_hat = ((xi)*(xi)) / (2.0 * del * (1.0 - del));
+            // std::cout << "zone1" << std::endl;
+        }
+        else if((1.0 - del) <= xi && xi <= 1.0) {
+            xi_hat =  1.0 -(((1.0 - xi) * (1.0 - xi)) / (2 * del * (1.0 - del)));
+            // std::cout << "zone2" << std::endl;
+        }
+        else if(del <= xi && xi <= (1.0 - del)) { 
+            xi_hat = ((2.0 * xi) - del) / (2.0 * (1.0 - del));
+            // std::cout << "zone3" << std::endl;
+        }
+        else{ 
+            std::cerr << "Xi did not fall in an expected range for modifying bounds for weight fpr 2" << std::endl;
+        }
+        // modified_bounds[i] = 2.0 * xi_hat - 1;
+        modified_bounds[i] = xi_hat - 0.5;
+    }
+    // std::cout << "modified bounds: " << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
+}
+
+
+void compute_quadrature_point(double* integration_bounds, const double* A0, const double* A1, int N, double* quad_points) {
+    // std::cout << "=== ENTERING compute_quadrature_point ===" << std::endl;
+    double eta_values[N];
+    determine_legendre_nodes(N, eta_values);
+    // for(int i = 0; i < N; ++i) {
+    //     eta_values[i] = (eta_values[i] + 1) / 2;
+    // }
+
+
+
+    // for (int i = 0; i < N; ++i) {
+    //     eta_values[i] = eta_values[i] - 0.5;  // scale to [-0.5, 0.5] per suggestion of mike.
+    // }
+
+    // for (int i = 0; i < N; ++i) {
+    //     eta_values[i] *= 0.5;
+    // }
+
+    double xi_min = integration_bounds[0];
+    double xi_max = integration_bounds[1];
+    // std::cout << "xi values: " << xi_min << ", " << xi_max << std::endl;
+
+    for ( int i = 0; i < N; ++i) {
+        double xi_i = 0.5 * (xi_max - xi_min) * eta_values[i] + 0.5 * (xi_max + xi_min); //this was th original implementation
+        // double xi_i = 0.5 * (xi_max + xi_min) + eta_values[i] + 0.5 *(xi_max - xi_min); //mikes suggestions
+        // double xi_i = xi_min + (xi_max - xi_min) * eta_values[i];
+        // xi_i *= 0.5;
+        double mapped_coords[2] = {0.0, 0.0};
+
+
+        iso_map2(A0, A1, xi_i, mapped_coords);
+        quad_points[2 * i] = mapped_coords[0];
+        quad_points[2 * i + 1] = mapped_coords[1];
+        // std::cout << "x: " << quad_points[2 * i] << " y: " << quad_points[2 * i + 1] << std::endl;
+        
+    }
+    
+     
+}
+
+void assign_weights(const double* integration_bounds, int N, double* weights) {
+    double ref_weights[N];
+    determine_legendre_weights(N, ref_weights);
+    // std::cout << integration_bounds[0] << ' ' << integration_bounds[1] << std::endl;
+    double J = 0.0;
+  
+
+
+    double xi_min = integration_bounds[0];
+    double xi_max = integration_bounds[1];
+    
+    J = 0.5 * (xi_max - xi_min);
+
+    for( int i = 0; i < N; ++i) {
+        weights[i] = ref_weights[i] * J;
+    }
+}
+
+
+// double compute_gap(const double* p, const double* B0, const double* B1, double* A0, double* A1, double* nB) {
+//     double nB_orig[2] = {nB[0], nB[1]};
+//     double len = std::sqrt(nB[0] * nB[0] + nB[1] * nB[1]);
+//     // std::cout << len << std:: endl;
+//     nB_orig[0] /= len;
+//     nB_orig[1] /= len;
+//     // std::cout << "nbx: " << nB_orig[0] << " nby: " << nB_orig[1] << std::endl;
+
+//     double intersection[2] = {0.0};
+//     find_intersection(B0, B1, p, nB_orig, intersection);
+
+//     // std::cout << "intersection at B: " << intersection[0] << ", " << intersection[1] << std::endl;
+  
+//     //  std::cout << "intersection for gap: " << intersection[0] << ',' << intersection[1] << std::endl;
+
+//     // double eta = newtons_method(p, B0, B1); //closest projection of p onto elem B
+//     // double px, py;
+//     // tribol::ProjectPointToSegment(p[0], p[1],nB_orig[0], nB_orig[1], B0[0], B0[1], px, py);
+
+//     // double q[2] = {0.0, 0.0};
+//     // iso_map(B0, B1, eta, q); //map eta back to physical space to get closest point q on A
+
+//     double dx = intersection[0] - p[0];
+//     double dy = intersection[1] - p[1];
+//     // std::cout << "px: " << p[0] << "py: " << p[1] << std::endl;
+//     // std::cout << "dx: " << dx << "dy: " << dy << std::endl;
+
+//     double gap = dx * nB_orig[0] + dy * nB_orig[1];
+
+//     // if(dx == 0 && dy == 0){
+//     //     gap = (A0[1] - p[1]) * nB_orig[1];
+//     //     // std::cout << "gap in loop: " << gap << std::endl;
+//     //     return gap;
+        
+
+//     // }
+//     // std::cout << "gap in compute_gap: " << gap << std::endl;
+//     return gap;
+// }
+
+double compute_gap(const double* p, const double* B0, const double* B1, const double* nA, const double* A0, const double* A1) {
+    double nA_orig[2] = {nA[0], nA[1]};
+    // std::cout << "nA: " << nA_orig[0] << ", " << nA_orig[1] << std::endl;
+
+    double len = std::sqrt(nA[0] * nA[0] + nA[1] * nA[1]);
+    // std::cout << "LEN: " << len << std::endl;
+    nA_orig[0] /= len;
+    nA_orig[1] /= len;
+    double intersection[2] = {0.0};
+    find_intersection(B0, B1, p, nA_orig, intersection);
+    // std::cout << "INTERSECTION: " << intersection[0] << ", " << intersection[1] << std::endl;
+
+
+    double dx = intersection[0] - p[0];
+    double dy = intersection[1] - p[1];
+
+    double gap = dx * nA_orig[0] + dy * nA_orig[1];
+    gap *= -1;
+    // std::cout << "GAP: " << gap << std::endl;
+    return gap;
+}
+
+
+double compute_modified_gap(double gap, double* nA, double* nB) {
+    double dot = nA[0] * nB[0] + nA[1] * nB[1];
+    double eta = (dot < 0) ? -dot:0.0;
+
+//    if(nu >= 0) {
+//         nu = 0;
+//     } 
+
+//     gap *= nu;
+    // std::cout << "gap in modify gap: " << gap << std::endl;
+    // std::cout << "eta: " << eta << std::endl;
+    return gap * eta;
+}
+
+
+double compute_contact_potential(double gap, double k1, double k2) {
+    if (gap < 1e-12) {
+        return 0;
+    }
+    double gap1 = gap;
+    double pot = k1 * (gap1 * gap1) - k2 * (gap1 * gap1 * gap1);
+    // std::cout << "potential: " << pot << std::endl;
+    return pot;
+}
+
+
+void compute_contact_energy(const double* coords, double del, double k1, double k2, int N, double lenA, double* projections, double* energy) {
+    double A0[2] = {coords[0], coords[1]};
+    double A1[2] = {coords[2], coords[3]};
+    double B0[2] = {coords[4], coords[5]};
+    double B1[2] = {coords[6], coords[7]};
+
+    // double lenA = sqrt((A1[0] - A0[0]) * (A1[0] - A0[0]) + (A1[1] - A0[1]) * (A1[1] - A0[1]));
+    double lenB = sqrt((B1[0] - B0[0]) * (B1[0] - B0[0]) + (B1[1] - B0[1]) * (B1[1] - B0[1]));
+    
+    double AC[2] = {0.5 * (A0[0]+A1[0]), 0.5*(A0[1]+A1[1])};
+    double AR[2] = {0.5 * (A0[0]-A1[0]), 0.5*(A0[1]-A1[1])};
+    double normAR = std::sqrt(AR[0]*AR[0] + AR[1]*AR[1]);
+
+    double BC[2] = {0.5 * (B0[0]+B1[0]), 0.5*(B0[1]+B1[1])};
+    double BR[2] = {0.5 * (B0[0]-B1[0]), 0.5*(B0[1]-B1[1])};
+    double normBR = std::sqrt(BR[0]*BR[0] + BR[1]*BR[1]);
+
+    A0[0] = AC[0] + AR[0] * lenA * 0.5 / normAR;
+    A0[1] = AC[1] + AR[1] * lenA * 0.5 / normAR;
+
+    A1[0] = AC[0] - AR[0] * lenA * 0.5 / normAR;
+    A1[1] = AC[1] - AR[1] * lenA * 0.5 / normAR;
+
+    B0[0] = BC[0] + BR[0] * lenB * 0.5 / normBR;
+    B0[1] = BC[1] + BR[1] * lenB * 0.5 / normBR;;
+
+    B1[0] = BC[0] - BR[0] * lenB * 0.5 / normBR;;
+    B1[1] = BC[1] - BR[1] * lenB * 0.5 / normBR;;
+
+    double nA[2] = {0.0};
+    double nB[2] = {0.0};
+    find_normal(A0, A1, nA);
+    find_normal(B0, B1, nB);
+
+    double dot_product = nA[0] * nB[0] + nA[1] * nB[1];
+
+    if (std::abs(dot_product) < 1e-10) {
+        *energy = 0;
+    }
+
+    else{
+ 
+    // std::cout << "length: " << lenA << std::endl;
+
+
+
+    // double projections[2];
+    // get_projections(A0, A1, B0, B1, projections);
+
+    double integration_bounds[2];
+    compute_integration_bounds(projections, integration_bounds, del);
+
+    // double len = sqrt((A1[0] - A0[0]) * (A1[0] - A0[0]) + (A1[1] - A0[1]) * (A1[1] - A0[1]));
+    // std::cout << "length: " << len << std::endl;
+
+
+
+    double modified_bounds[2];
+    modify_bounds(integration_bounds, del, modified_bounds);
+    // std::cout << "Integration Bounds Original" << integration_bounds[0] << ", " << integration_bounds[1] << std::endl;
+    // std::cout << "Modifed Bounds" << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
+
+    // double modified_bounds_w[2];
+    // modify_bounds_for_weight(integration_bounds, del, modified_bounds_w);
+
+//     std::cout << "A: x from " << A0[0] << " to " << A1[0] << std::endl;
+// std::cout << "B: x from " << B0[0] << " to " << B1[0] << std::endl;
+// std::cout << "Raw projections from get_projections: [" << projections[0] << ", " << projections[1] << "]" << std::endl;
+// std::cout << "Integration bounds: [" << integration_bounds[0] << ", " << integration_bounds[1] << "]" << std::endl;
+// std::cout << "Modified bounds for quadrature: [" << modified_bounds[0] << ", " << modified_bounds[1] << "]" << std::endl;
+    
+
+    double quad_points[2 * N];
+    compute_quadrature_point(modified_bounds, A0, A1, N, quad_points);
+
+    
+    // std::cout << "integration Bounds" << integration_bounds[0] << ", " << integration_bounds[1] << std::endl;
+    // double modified_bounds_w[2];
+    // modify_bounds_for_weight(integration_bounds, del, modified_bounds_w);
+
+    double weights[N];
+    assign_weights(modified_bounds, N, weights); //was for weigh orginalally 
+
+    *energy = 0.0;
+    for(int i = 0; i < N; ++i) {
+        // double p[2] = {quad_points[2 * i], quad_points[2 * i + 1]};
+        double mapped_coords[2] = {quad_points[2 * i], quad_points[2 * i + 1]};
+        // iso_map2(A0, A1, quad_points[i], mapped_coords); 
+        // std::cout << "quad point: " << quad_points[2*i] << std::endl;
+
+        // std::cout << "Mapped coords: " << mapped_coords[0] << ", " << mapped_coords[1] << std::endl;
+
+        double gap = compute_gap(mapped_coords, B0, B1, nA, A0, A1);
+        // if (gap < 0.0) {
+        //     continue;
+        // }
+        double smooth_gap = compute_modified_gap(gap, nA, nB);
+        // std::cout << "gap: " << smooth_gap << std::endl;
+
+        double potential = compute_contact_potential(smooth_gap, k1, k2);
+
+        *energy +=  weights[i] * potential;
+        // std::cout << "energy: " << *energy << std::endl;
+
+    }
+    *energy *= lenA * 0.5;
+    // std::cout << "energy: " << *energy << std::endl;
+    }
+}
+
+// void compute_sym_energy(const double* coords, double del, double k1, double k2, int N, double len, double* energy) {
+//     double energy1 = 0.0; 
+//     compute_contact_energy(coords, del, k1, k2, N, len, &energy1); 
+
+//     double A0[2] = {coords[0], coords[1]};
+//     double A1[2] = {coords[2], coords[3]};
+//     double B0[2] = {coords[4], coords[5]};
+//     double B1[2] = {coords[6], coords[7]};
+
+//     double nA[2] = {0.0};
+//     double nB[2] = {0.0};
+ 
+//     // std::cout << "length: " << len << std::endl;
+//     double energy2 = 0.0;
+
+//     find_normal(A0, A1, nA);
+//     find_normal(B0, B1, nB);
+
+//     double projections[2];
+//     get_projections(A0, A1, B0, B1, projections, N);
+
+//     double integration_bounds[2];
+//     compute_integration_bounds(projections, integration_bounds, N);
+
+//     // double switch_bounds[2] = {integration_bounds[1], integration_bounds[0]};
+
+
+
+//     double modified_bounds[2];
+//     modify_bounds(integration_bounds, del, modified_bounds);
+//     // std::cout << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
+    
+//      double switch_bounds[2] = {modified_bounds[1], modified_bounds[0]};
+
+//     double quad_points[2 * N];
+//     compute_quadrature_point(switch_bounds, A0, A1, N, quad_points);
+    
+    
+
+//     // double modified_bounds[2];
+//     // modify_bounds(switch_bounds, del, modified_bounds);
+
+//     double weights[N];
+//     assign_weights(switch_bounds, N, weights);
+
+//     *energy = 0.0;
+//     for(int i = 0; i < N; ++i) {
+//         double p[2] = {quad_points[2 * i], quad_points[2 * i + 1]};
+
+//         double gap = compute_gap(p, B0, B1, A0, A1, nB);
+//         double smooth_gap = compute_modified_gap(gap, nA, nB);
+//         // std::cout << smooth_gap << std::endl;
+
+//         double potential = compute_contact_potential(smooth_gap, k1, k2);
+
+//         energy2 +=  weights[i] * potential;
+
+//     }
+//     energy2 *= len * 0.5;
+
+//     *energy = 0.5 * (energy1 - energy2);
+    
+
+// }
+
+
+
+
+
+
+
+void read_element_coords(int N, std::vector<double>& coords) {
+    for(int i = 0; i < 2; ++i) {
+        double x;
+        double y;
+        std::cout << "Enter x" << i+1 << ": ";
+        std::cin >> x;
+        
+        std::cout << "Enter y" << i+1 << ": ";
+        std::cin >> y;
+        
+        coords.push_back(x);
+        coords.push_back(y);
+    }
+}
+
+void populate_C_arrays(double* C, const std::vector<double>& elem) {
+    for (size_t i = 0; i < elem.size(); ++i){
+        C[i] = elem[i];
+    }
+}
+
+
+// void calc_force(double* coords, double del, double k1, double k2, int N, double len, double* dE_dX) {
+// double E = 0.0;
+// for (int i = 0; i < 8; ++i) {
+//     double dcoords[8] = {0.0};
+//     dcoords[i] = 1.0;
+//     double dk1 = 0.0;
+//     double dk2 = 0.0;
+//     double ddel = 0.0;
+//     double dE = 1.0;
+//     double dlen = 0.0;
+//     __enzyme_fwddiff<void>( compute_contact_energy, coords, dcoords, del, ddel, k1, dk1, k2, dk2, enzyme_const, N, dlen, len, &E, &dE);
+//     dE_dX[i] = -dE;
+
+// }
+// }
+
+void calc_force_reverse(const double* coords, double del, double k1, double k2, int N, double len, double* projections, double* dE_dX) {
+    double dcoords[8] = {0.0};
+    double E = 0.0;
+    double dE = 1.0;
+    __enzyme_autodiff<void>( compute_contact_energy, enzyme_dup, coords, dcoords, enzyme_const, del, enzyme_const, k1, enzyme_const, k2, enzyme_const, N, enzyme_const, len, enzyme_const, projections, enzyme_dup, &E, &dE);
+
+    for(int i = 0; i < 8; ++i) {
+        dE_dX[i] = dcoords[i];
+    }
+}
+
+// void calc_force_FD(double* coords, double del, double k1, double k2, int N, double* dE_dX, double h = 1e-10) {
+//     double X_plus[8] = {0.0};
+//     double X_minus[8] = {0.0};
+//     double  E_plus = 0.0;
+//     double E_minus;
+//     for(int i = 0; i < 8; ++i) {
+//         for (int j = 0; j < 8; ++j) {
+//             X_plus[j] = coords[j];
+//             X_minus[j] = coords[j];
+//         }
+//         X_plus[i] = coords[i] + h;
+//         X_minus[i] = coords[i] - h;
+//         compute_contact_energy(X_plus, del, k1, k2, N, len, &E_plus);
+//         compute_contact_energy(X_minus, del, k1, k2, N, len, &E_minus);
+//         dE_dX[i] = (E_plus - E_minus) / (2 * h);
+//      }
+
+// }
+
+
+void calc_stiffness_rev_fwd(double* coords, double del, double k1, double k2, int N, double lenA, double* projections, double* force, double* d2E_d2X) {
+    double dE[8] = {0.0};
+    double d2E[8] = {0.0};
+    double dEF[8] = {0.0};
+    calc_force_reverse(coords, del, k1, k2, N, lenA, projections, dEF);
+    for (int i = 0; i < 8; ++i) {
+        force[i] = dEF[i];
+    }
+    for(int i = 0; i < 8; ++i) {
+        double d2coords[8] = {0.0};
+        d2coords[i] = 1.0;
+        double d2k1 = 0.0;
+        double d2del = 0.0;
+        double d2k2 = 0.0;
+        double d2lenA = 0.0;
+        double d2projections[] = {0.0};
+        __enzyme_fwddiff<void>( (void*) calc_force_reverse, coords, d2coords, del, d2del, k1, d2k1, k2, d2k2, N, lenA, d2lenA, projections, d2projections, dE, d2E);
+        for(int j = 0; j < 8; ++j) {
+            d2E_d2X[8 * i + j] = d2E[j];
+        }
+
+    }
+}
+
+// void calc_stiffness_rev_rev(double* coords, double del, double k1, double k2, int N, double lenA, double lenB, double* d2E_d2X) {
+//     for (int i = 0; i < 8; ++i) {
+//         double d2X[8] = {0.0};
+//         double dE[8] = {0.0};
+//         double d2E[8] = {0.0};
+//         d2E[i] = 1.0;
+//         __enzyme_autodiff<void>( (void*)calc_force_reverse, enzyme_dup, coords, d2X, enzyme_const, del, enzyme_const, k1, enzyme_const, k2, enzyme_const, N, enzyme_const, lenA, enzyme_const, lenB, enzyme_dup, dE, d2E);
+//         for(int j = 0; j < 8; ++j) {
+//             d2E_d2X[8 * i + j] = d2X[j];
+//         }
+//     }
+// }
+
+// void calc_stiffness_FD(double* coords, double del, double k1, double k2, double lenA , double lenB, int N, double *d2E_d2X, double h = 1e-7) {
+//     double dX_plus[8] = {0.0};
+//     double dX_minus[8] = {0.0};
+//     double dW_plus[8] = {0.0};
+//     double dW_minus[8] = {0.0};
+//     for (int i = 0; i < 8; ++i) {
+//         for (int j = 0; j < 8; ++j) {
+//             dX_plus[j] = coords[j];
+//             dX_minus[j] = coords[j];
+//         }
+//         dX_plus[i] = coords[i] + h;
+//         dX_minus[i] = coords[i] - h;
+        
+//         calc_force_reverse(dX_plus, del, k1, k2, N, lenA, lenB, dW_plus);
+//         calc_force_reverse(dX_minus, del, k1, k2, N, lenA, lenB, dW_minus);
+//         for(int j = 0; j < 8; ++j){
+//         d2E_d2X[8 * i + j] = (dW_plus[j] - dW_minus[j]) / (2  * h);
+        
+//     }
+
+// }
+// }
+
+// void calc_ab(const double* coord1, const double* coord2, const double* normal, double* a){
+//     double y_diff = coord1[1] - coord2[1];
+//     double x_diff = coord1[0] - coord2[0];
+//     *a = (x_diff * normal[0]) + (y_diff * normal[1]);
+
+// }
+
+// void analytical_integral(const double* coords, double del, double k1, double k2, int N, double len, double* energy) {
+//     double A0[2] = {coords[0], coords[1]};
+//     double A1[2] = {coords[2], coords[3]};
+//     double B0[2] = {coords[4], coords[5]};
+//     double B1[2] = {coords[6], coords[7]};
+
+//     double nB[2] = {0.0};
+//     find_normal(B0, B1, nB);
+//     double a = 0.0;
+//     calc_ab(A1, A0, nB, &a);
+//     double b = 0.0;
+//     calc_ab(A0, B0, nB, &b);
+
+//     double projections[2] = {0.0};
+//     get_projections(A0, A1, B0, B1, projections);
+
+//     double integration_bounds[2] = {0.0};
+//     compute_integration_bounds(projections, integration_bounds, N);
+//     double xi[2] = {0.0};
+//     modify_bounds(integration_bounds,del, xi);
+
+//     double term_one = (k1 * ((a * a) * (xi[1] * xi[1] * xi[1] / 3) + a * b * xi[1] + (b * b * xi[1])) + k2 * ((a * a * a) * (xi[1] * xi[1] * xi[1] * xi[1]) / 4) + (a * a) * (xi[1] * xi[1]) * b + ((3 * a * (xi[1] * xi[1] * xi[1]) * b) / 2) + (b * b * b) * (xi[1]));
+//     double term_two = (k1 * ((a * a) * (xi[0] * xi[0] * xi[0] / 3) + a * b * xi[0] + (b * b * xi[0])) + k2 * ((a * a * a) * (xi[0] * xi[0] * xi[0] * xi[0]) / 4) + (a * a) * (xi[0] * xi[0]) * b + ((3 * a * (xi[0] * xi[0] * xi[0]) * b) / 2) + (b * b * b) * (xi[0]));
+
+//     *energy = term_one - term_two;
+//     *energy *= len;
+// }
+
+// void calc_force_reverse_exact(double* coords, double del, double k1, double k2, int N, double len, double* dE_dX) {
+//     double dcoords[8] = {0.0};
+//     double E = 0.0;
+//     double dE = 1.0;
+//     __enzyme_autodiff<void>( analytical_integral, enzyme_dup, coords, dcoords, enzyme_const, del, enzyme_const, k1, enzyme_const, k2, enzyme_const, N, enzyme_const, len, enzyme_dup, &E, &dE);
+
+//     for(int i = 0; i < 8; ++i) {
+//         dE_dX[i] = -dcoords[i];
+//     }
+// }
+
+// void calc_force_reverse_sym(double* coords, double del, double k1, double k2, int N, double len, double* dE_dX) {
+//     double dcoords[8] = {0.0};
+//     double E = 0.0;
+//     double dE = 1.0;
+//     __enzyme_autodiff<void>( compute_sym_energy, enzyme_dup, coords, dcoords, enzyme_const, del, enzyme_const, k1, enzyme_const, k2, enzyme_const, N, enzyme_const, len, enzyme_dup, &E, &dE);
+
+//     for(int i = 0; i < 8; ++i) {
+//         dE_dX[i] = -dcoords[i];
+//     }
+// }
+
+
+
+
+
+
+int main() {
+    // int N;
+    // std::cout << "Enter N quadrature points: ";
+    // std::cin >> N;
+    
+    // if(N !=3 && N != 4 && N != 5) {
+    //     std::cerr << "Error: not a valid number qaud pts" << std::endl;
+    // }
+     
+    // std::vector<double> elem_A;
+    // std::vector<double> elem_B;
+
+    // std::cout << "Enter coordinates for element A: 
+    // read_element_coords(N, elem_A);
+
+    // std::cout << "Eneter coordinates for element B: ";
+    // read_element_coords(N, elem_B);
+
+    // double A[4] = {0.0};
+    // double B[4] = {0.0};
+
+    // populate_C_arrays(A, elem_A);
+    // populate_C_arrays(B, elem_B);
+
+    int N = 3;
+
+    // double A0[2] = {A[0], A[1]};
+    // double A1[2] = {A[2], A[3]};
+    // double B0[2] = {B[0], B[1]};
+    // double B1[2] = {B[2], B[3]};
+
+    double A0_i[2] = {-0.3, -0.05};
+    double A1_i[2] = {0.0, -0.05};
+    double B0[2] = {1.0, 0.0};
+    double B1[2] = {0.1, 0.0};
+    double del = 0.05;
+    double k1 = 100;
+    double k2 = 0.0;
+    for(int i = 0; i < 140; ++i) {
+        // std::cout << i << std::endl;
+        double energy = 0.0;
+        double energy2;
+        double shift = 0.01 * i;
+        
+        // std::cout << i << std::endl;
+        // std::cout << "location: " << shift << std::endl;
+        double A0[2] = {A0_i[0] + shift, A0_i[1]};
+        double A1[2] = {A1_i[0] + shift, A1_i[1]};
+        
+        
+        // std::cout << "A0x: " << A0[0] << " A0y: " << A0[1] << std::endl; 
+        // std::cout << "A1x: " << A1[0] << " A1y: " << A1[1] << std::endl; 
+
+        double coords[8] = {A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1]};
+        double lenA = sqrt((A1[0] - A0[0]) * (A1[0] - A0[0]) + (A1[1] - A0[1]) * (A1[1] - A0[1]));
+        
+        // double lenB = sqrt((B1[0] - B0[0]) * (B1[0] - B0[0]) + (B1[1] - B0[1]) * (B1[1] - B0[1]));
+        // analytical_integral(coords, del, k1, k2, N, len, &energy2); 
+        // if (i == 410) {
+            // std::cout << "B0x: " << B0[0] << ' ' << "B1x: " << B1[0] << std::endl;
+        // }
+        // // compute_sym_energy(coords, k1, k2, del, N, len, &energy);
+        // compute_contact_energy(coords, del, k1, k2, N, lenA, lenB, &energy);
+ 
+        double dE_dX[8] = {0.0};
+        double projections[2] = {0.0};
+        double proj1[2];
+        double proj0[2];
+        // get_endpoint_projections(A0, A1, B0, B1, proj0, proj1);
+        // std::cout << "Proj 0: " << proj0[0] << ", " << proj0[1] << std::endl;
+        // std::cout << "Proj 1: " << proj1[0] << ", " << proj1[1] << std::endl;
+        get_projections(A0, A1, B0, B1, projections, del);
+        // std::cout << "Projections in Main: " << projections[0] << ", " << projections[1] << std::endl;
+        compute_contact_energy(coords, del, k1, k2, N, lenA, projections, &energy);
+        // calc_force_reverse_sym(coords, del, k1, k2, N, len, dE_dX);
+        calc_force_reverse(coords, del, k1, k2, N, lenA, projections, dE_dX);
+        //  calc_force_reverse_exact(coords, del, k1, k2, N, len, dE_dX);
+        // std::cout << '[';
+        // for(int j = 0; j < 8; ++j) {
+       
+                    for(int j = 0; j < 8; ++j) {
+                      
+                        if (j == 0) {
+                            
+                            std::cout << dE_dX[j];
+        
+                        }
+                        else{
+
+            std::cout << "," << dE_dX[j];
+            
+                        }
+        }
+        std::cout << std::endl;
+        // }
+        
+// // //         std::cout << ']' << std::endl;
+    //    std::cout << i * 0.01 << ',' << energy << std::endl;
+       double dE_dXrev[8] = {0.0};
+    //    calc_force_reverse(coords, del, k1, k2, N, len,dE_dXrev);
+    //    std::cout << "[";
+    //    for (int j = 0; j < 8; ++j) {
+    //     std::cout << ", " << dE_dXrev[j];
+    //    }
+    //    std::cout << "]" << std::endl;
+    //    double dE_dXFD[8] = {0.0};
+    //    calc_force_FD(coords, del, k1, k2, N, dE_dXFD);
+    //    std::cout << "[";
+    //     for (int j = 0; j < 8; ++j) {
+    //     std::cout << ", " << dE_dXFD[j];
+    //    }
+    //    std::cout << "]" << std::endl;
+    // double d2E_d2XFD[64] = {0.0};
+    // calc_stiffness_rev_fwd(coords, del, k1, k2, N, lenA, lenB, dE_dX, d2E_d2X);
+    // std::cout << " rev fwd: [";
+    // for (int j = 0; j < 64; ++j) {
+    //     std::cout << ", " << d2E_d2X[j];
+    // }
+    // std::cout << "]" << std::endl;
+//      double d2E_d2XFD[64] = {0.0};
+    // calc_stiffness_FD(coords, del, k1, k2, lenA, lenB, N, d2E_d2XFD);
+//     for (int i = 0; i < 16; ++i) {
+//     // Create unit vector e_i
+//     double v[16] = {0.0};
+//     v[i] = 1.0;
+
+//     // Multiply: result = K * v
+//     double result[16] = {0.0};
+//     for (int row = 0; row < 16; ++row) {
+//         for (int col = 0; col < 16; ++col){
+//             result[row] += d2E_d2XFD[16 * row + col] * v[col];
+//             if (std::abs(result[row]) < 1e-10) {
+//                 result[row] = 0.0;
+//             }
+//         }
+//     }
+
+//     std::cout << "Column " << i << ": [";
+//     for (int j = 0; j < 16; ++j) {
+//         std::cout << result[j];
+//         if (j < 15) std::cout << ", ";
+//     }
+//     std::cout << "]" << std::endl;
+// }
+
+// const int N = 8;
+// int k = 5; // The DOF (column) you want
+
+// double result[N] = {0.0};
+// for (int j = 0; j < N; ++j) {
+//     result[j] = d2E_d2XFD[N * j + k];
+//     // This grabs the k-th column (since your matrix is row-major)
+//     // If you want the k-th row, swap indices
+// }
+
+// // Print result to compare with J_exact
+// for (int j = 0; j < N; ++j) {
+//     printf("J exact: %.17g\n", result[j]);
+// }
+
+
+   
+    //     double d2E_d2XFD[64] = {0.0};
+    // calc_stiffness_FD(coords, del, k1, k2, lenA, lenB, N, d2E_d2XFD);
+    // std::cout << "FD: [";
+    // for (int j = 0; j < 64; ++j) {
+    //     std::cout << ", " << d2E_d2XFD[j];
+    // }
+    // std::cout << "]" << std::endl;
+
+//             double d2E_d2Xrevrev[64] = {0.0};
+//     calc_stiffness_rev_rev(coords, del, k1, k2, lenA, lenB, N, d2E_d2Xrevrev);
+//     std::cout << "Rev rev: [";
+//     for (int j = 0; j < 64; ++j) {
+//         std::cout << ", " << d2E_d2Xrevrev[j];
+//     }
+//     std::cout << "]" << std::endl;
+
+//     std::cout << "Difference rev fwd - FD: [";
+// for (int j = 0; j < 64; ++j) {
+//     std::cout << ", " << (d2E_d2X[j] - d2E_d2XFD[j]);
+// }
+// std::cout << "]" << std::endl;
+
+// std::cout << "Difference rev rev - FD: [";
+// for (int j = 0; j < 64; ++j) {
+//     std::cout << ", " << (d2E_d2Xrevrev[j] - d2E_d2XFD[j]);
+// }
+// std::cout << "]" << std::endl;
+
+    // double energy = compute_contact_energy(A0, A1, B0, B1, del, k1, k2, N);
+    // std::cout << "Energy: " << energy << std::endl;
+}
+}
\ No newline at end of file
diff --git a/src/tribol/physics/new_method.cpp b/src/tribol/physics/new_method.cpp
new file mode 100644
index 00000000..d71d70fa
--- /dev/null
+++ b/src/tribol/physics/new_method.cpp
@@ -0,0 +1,1129 @@
+#include "new_method.hpp"
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+#include <vector>
+#include <array>
+#include <cmath>
+#include <algorithm>
+#include <cassert>
+#include <iomanip>
+#include "tribol/common/Parameters.hpp"
+#include "tribol/geom/GeomUtilities.hpp"
+#include "tribol/common/Enzyme.hpp"
+#include <set>
+#include <map>
+
+namespace {
+
+    struct Gparams {
+        int N;
+        const double* qp;
+        const double* w;
+        const double* x2;
+    };
+
+
+    void find_normal(const double* coord1, const double* coord2, double* normal) {
+    double dx = coord2[0] - coord1[0];
+    double dy = coord2[1] - coord1[1];
+    double len = std::sqrt(dy * dy + dx * dx);
+    dx /= len;
+    dy /= len;
+    normal[0] = dy;
+    normal[1] = -dx;
+    }
+
+void determine_legendre_nodes(int N, std::vector<double>& x)
+{
+    x.resize(N);
+    if (N == 1) {
+        x[0] = 0.0;
+    } else if (N == 2) {
+        const double a = 1.0 / std::sqrt(3.0);
+        x[0] = -a; x[1] =  a;
+    } else if (N == 3) {
+        const double a = std::sqrt(3.0/5.0);
+        x[0] = -a; x[1] = 0.0; x[2] = a;
+    } else if (N == 4) {
+        const double a = std::sqrt((3.0 - 2.0*std::sqrt(6.0/5.0))/7.0);
+        const double b = std::sqrt((3.0 + 2.0*std::sqrt(6.0/5.0))/7.0);
+        x[0] = -b; x[1] = -a; x[2] = a; x[3] = b;
+    } else {
+        assert(false && "Unsupported quadrature order");
+    }
+}
+
+  void determine_legendre_weights(int N, std::vector<double>& W) {
+
+    W.resize(N);
+    if (N == 1) {
+        W[0] = 2.0;
+    }
+    else if(N == 2) {
+        W[0] = 1.0;
+        W[1] = 1.0;
+    }
+    else if (N == 3) {
+        W[0] = 5.0 / 9.0;
+        W[1] = 8.0 / 9.0;
+        W[2] = 5.0 / 9.0;
+    }
+    else {
+        W[0] = (18 - std::sqrt(30)) / 36.0;
+        W[1] = (18 + std::sqrt(30)) / 36.0;
+        W[2] = (18 + std::sqrt(30)) / 36.0;
+        W[3] = (18 - std::sqrt(30)) / 36.0;
+    }
+ }
+
+ void iso_map(const double* coord1, const double* coord2, double xi, double* mapped_coord){
+    double N1 = 0.5 - xi;
+    double N2 = 0.5 + xi;
+    mapped_coord[0] = N1 * coord1[0] + N2 * coord2[0];
+    mapped_coord[1] =  N1 * coord1[1] + N2 * coord2[1];
+}
+
+inline void endpoints(const Mesh& mesh, const Element& e, double P0[2], double P1[2])
+{
+    const Node& n0 = mesh.node(e.node_ids[0]);
+    const Node& n1 = mesh.node(e.node_ids[1]);
+    P0[0] = n0.x; P0[1] = n0.y;
+    P1[0] = n1.x; P1[1] = n1.y;
+}
+
+void find_intersection(const double* A0, const double* A1,
+                       const double* p, const double* nB,
+                       double* intersection)
+{
+    const double tA[2] = { A1[0] - A0[0], A1[1] - A0[1] };
+    const double d[2]  = { p[0] - A0[0],  p[1] - A0[1] };
+
+    const double nlen = std::sqrt(nB[0]*nB[0] + nB[1]*nB[1]);
+    if (nlen < 1e-14) {
+        intersection[0] = p[0];
+        intersection[1] = p[1];
+        return;
+    }
+    const double n[2] = { nB[0]/nlen, nB[1]/nlen };
+
+    const double det = tA[0]*n[1] - tA[1]*n[0];
+
+    if (std::abs(det) < 1e-12) {
+        intersection[0] = p[0];
+        intersection[1] = p[1];
+        return;
+    }
+
+    const double inv_det = 1.0 / det;
+    double alpha = (d[0]*n[1] - d[1]*n[0]) * inv_det;
+
+    // if (alpha < 0.0) alpha = 0.0;
+    // if (alpha > 1.0) alpha = 1.0;
+
+    intersection[0] = A0[0] + alpha * tA[0];
+    intersection[1] = A0[1] + alpha * tA[1];
+}
+
+
+
+void get_projections(const double* A0, const double* A1,
+                     const double* B0, const double* B1,
+                     double* projections)
+{
+    double nB[2] = {0.0, 0.0};
+    find_normal(B0, B1, nB);
+
+    const double dxA = A1[0] - A0[0];
+    const double dyA = A1[1] - A0[1];
+    const double len2A = dxA*dxA + dyA*dyA;
+
+    const double* B_endpoints[2] = { B0, B1 };
+
+    double xi0 = 0.0, xi1 = 0.0;
+    for (int i = 0; i < 2; ++i) {
+        double q[2] = {0.0, 0.0};                 
+        find_intersection(A0, A1, B_endpoints[i], nB, q);
+
+        // std::cout << "Intersection on A: " << q[0] << ", " << q[1] << std::endl;
+
+        const double alphaA =
+            ((q[0] - A0[0]) * dxA + (q[1] - A0[1]) * dyA) / len2A;  
+        const double xiA = alphaA - 0.5;                           
+
+        if (i == 0) xi0 = xiA;
+        else        xi1 = xiA;
+    }
+
+    double xi_min = std::min(xi0, xi1);
+    double xi_max = std::max(xi0, xi1);
+
+    // double X[2], tA, uB;
+    // if (segmentsIntersect2D(A0, A1, B0, B1, X, &tA, &uB)) {
+    //     const double xiA_int = tA - 0.5;           
+    //     xi_min = std::min(xi_min, xiA_int);
+    //     xi_max = std::max(xi_max, xiA_int);
+    // }
+    // std::cout << "xi_min in gp: " << xi_min << " xi_max: " << xi_max << std::endl;
+
+    projections[0] = xi_min;
+    projections[1] = xi_max;
+}
+
+
+//     double compute_local_gap(const double A0[2], const double A1[2], const double B0[2], const double B1[2], double xiA) {
+//     double nA[2], nB[2];
+
+//     find_normal(A0, A1, nA);
+//     find_normal(B0, B1, nB);
+
+//     double x1[2];
+//     iso_map(A0, A1, xiA, x1);
+
+//     double x2[2];
+//     find_intersection(B0, B1, x1, nB, x2);
+
+//     double dx = x1[0] - x2[0];
+//     double dy = x1[1] - x2[1];
+
+//     double gn = -(dx*nB[0] + dy*nB[1]);
+
+//     double dot = nB[0] * nA[0] + nB[1] * nA[1];
+//     double eta = (dot < 0) ? dot: 0.0;
+//     return gn * eta;
+// }
+
+    static void gtilde_kernel(const double* x, const Gparams* gp, double* g_tilde_out) {
+        const double A0[2] = {x[0], x[1]};
+        const double A1[2] = {x[2], x[3]};
+        const double B0[2] = {x[4], x[5]};
+        const double B1[2] = {x[6], x[7]};
+        
+        const double J = std::sqrt((A1[0]-A0[0])*(A1[0]-A0[0]) + (A1[1]-A0[1])*(A1[1]-A0[1]));
+
+        double nB[2];
+        find_normal(B0, B1, nB);
+
+        double nA[2];
+        find_normal(A0, A1, nA);
+        double dot = nB[0] * nA[0] + nB[1] * nA[1];
+        double eta = (dot < 0) ? dot : 0.0;
+
+        double g1 = 0.0, g2 = 0.0;
+
+        for (int i = 0; i < gp->N; ++i) {
+            const double xiA = gp -> qp[i]; 
+            const double w = gp->w[i];
+
+            const double N1 = 0.5 - xiA;
+            const double N2 = 0.5 + xiA;
+
+            // x1 on segment A
+            double x1[2];
+            iso_map(A0, A1, xiA, x1);
+
+            double x2[2];
+            find_intersection(B0, B1, x1, nB, x2);
+
+            // lagged coupled point on B for this qp
+            // const double x2x = gp->x2[2*i + 0];
+            // const double x2y = gp->x2[2*i + 1];
+
+            const double dx = x1[0] - x2[0];
+            const double dy = x1[1] - x2[1];
+
+            // lagged normal on B
+             const double gn = -(dx * nB[0] + dy * nB[1]);
+             const double g  = gn * eta;
+
+            g1 += w * N1 * g * J;
+            g2 += w * N2 * g * J;
+        }
+
+        g_tilde_out[0] = g1;
+        g_tilde_out[1] = g2;
+        // std::cout << "G tilde: " << g1 << ", " << g2 << std::endl;
+    }
+
+// static void gtilde1_out(const double* x, const Gparams* gp, double* out)
+// {
+//   double gt[2];
+//   gtilde_kernel(x, gp, gt);
+//   *out = gt[0];
+// }
+
+// static void gtilde2_out(const double* x, const Gparams* gp, double* out)
+// {
+//   double gt[2];
+//   gtilde_kernel(x, gp, gt);
+//   *out = gt[1];
+// }
+
+static void gtilde1_out(const double* x, const void* gp_void, double* out)
+{
+  const Gparams* gp = static_cast<const Gparams*>(gp_void);
+  double gt[2];
+  gtilde_kernel(x, gp, gt);
+  *out = gt[0];
+}
+
+static void gtilde2_out(const double* x, const void* gp_void, double* out)
+{
+  const Gparams* gp = static_cast<const Gparams*>(gp_void);
+  double gt[2];
+  gtilde_kernel(x, gp, gt);
+  *out = gt[1];
+}
+
+void grad_gtilde1(const double* x, const Gparams* gp, double* dgt1_du) {
+    double dx[8] = {0.0};
+    double out = 0.0;
+    double dout = 1.0;
+
+    __enzyme_autodiff<void>((void*) gtilde1_out, enzyme_dup, x, dx, enzyme_const, (const void*)gp, enzyme_dup, &out, &dout);
+
+    for (int i = 0; i < 8; ++i) {
+        dgt1_du[i] = dx[i];
+    }
+}
+
+
+void grad_gtilde2(const double*x, const Gparams* gp, double* dgt2_du) {
+    double dx[8] = {0.0};
+    double out = 0.0;
+    double dout = 1.0;
+
+    __enzyme_autodiff<void>((void*) gtilde2_out, enzyme_dup, x, dx, enzyme_const, (const void*)gp, enzyme_dup, &out, &dout);
+
+    for (int i = 0; i < 8; ++i) {
+        dgt2_du[i] = dx[i]; 
+    }
+}
+
+void d2gtilde1(const double* x, const Gparams* gp, double* H1) {
+  for (int col = 0; col < 8; ++col) {
+    double dx[8] = {0.0};
+    dx[col] = 1.0;                
+
+    double grad[8]  = {0.0};       
+    double dgrad[8] = {0.0};       
+
+    __enzyme_fwddiff<void>((void*)grad_gtilde1, x, dx, enzyme_const, (const void*)gp, enzyme_dup, grad, dgrad);
+
+    for (int row = 0; row < 8; ++row) {
+      H1[row*8 + col] = dgrad[row]; 
+    }
+  }
+}
+
+void d2gtilde2(const double* x, const Gparams* gp, double* H2) {
+  for (int col = 0; col < 8; ++col) {
+    double dx[8] = {0.0};
+    dx[col] = 1.0;                
+
+    double grad[8]  = {0.0};       
+    double dgrad[8] = {0.0};       
+
+    __enzyme_fwddiff<void>((void*)grad_gtilde2, x, dx, enzyme_const, (const void*)gp, enzyme_dup, grad, dgrad);
+
+    for (int row = 0; row < 8; ++row) {
+      H2[row*8 + col] = dgrad[row]; 
+    }
+  }
+}
+
+}
+
+std::array<double, 2> ContactEvaluator::projections(const Mesh& mesh, 
+                                                    const Element& A,
+                                                    const Element& B) const {
+    const Node& A0 = mesh.node(A.node_ids[0]);
+    const Node& A1 = mesh.node(A.node_ids[1]);
+    const Node& B0 = mesh.node(B.node_ids[0]);
+    const Node& B1 = mesh.node(B.node_ids[1]);
+
+    double A0_arr[2] = {A0.x, A0.y};
+    double A1_arr[2] = {A1.x, A1.y};
+    double B0_arr[2] = {B0.x, B0.y};
+    double B1_arr[2] = {B1.x, B1.y};
+
+    double projs[2];
+    get_projections(A0_arr, A1_arr, B0_arr, B1_arr, projs);
+
+    // std::cout << "Projections: " << projs[0] << ", " << projs[1] << std::endl;
+    return {projs[0], projs[1]};
+                                                    }
+
+
+std::array<double, 2> ContactSmoothing::bounds_from_projections(const std::array<double, 2>& proj) const {
+    double xi_min = std::min(proj[0], proj[1]);
+    double xi_max = std::max(proj[0], proj[1]);
+
+    const double del = p_.del;
+
+    if (xi_max < -0.5 - del) {
+        xi_max = -0.5 - del;
+    }
+    if(xi_min > 0.5 + del) {
+        xi_min  = 0.5 + del;
+    }
+    if (xi_min < -0.5 - del) { 
+        xi_min = -0.5 -del;
+    }
+    if (xi_max > 0.5 + del) {
+        xi_max = 0.5 + del;
+    }
+
+
+    // xi_min = std::max(xi_min, -0.5 - del);
+    // xi_max = std::min(xi_max,  0.5 + del);
+    // std::cout << "xi min: " << xi_min  << " xi max: " << xi_max << std::endl;
+
+    return {xi_min, xi_max}; 
+}
+
+// std::array<double, 2> ContactSmoothing::smooth_bounds(const std::array<double, 2>& bounds) const {
+//     std::array<double, 2> smooth_bounds;
+//     const double del = p_.del;
+//     for (int i = 0; i < 2; ++i) {
+//         double xi = 0.0;
+//         double xi_hat = 0.0;
+//         xi = bounds[i] + 0.5;
+//         // std::cout << "xi: " << xi << std::endl;
+//         if (0 <= xi && xi <= del) {
+//             xi_hat = ((xi)*(xi)) / (2.0 * del * (1.0 - del));
+//             // std::cout << "Zone 1" << std::endl;
+//         }
+//         else if((1.0 - del) <= xi && xi <= 1.0) {
+//             xi_hat =  1.0 -(((1.0 - xi) * (1.0 - xi)) / (2 * del * (1.0 - del)));
+//             // std::cout << "Zone 2" << std::endl;
+//         }
+//         else if(del <= xi && xi <= (1.0 - del)) { 
+//             xi_hat = ((2.0 * xi) - del) / (2.0 * (1.0 - del));
+//             // std::cout << "Zone 3" << std::endl;
+//         }
+//         smooth_bounds[i] = xi_hat - 0.5;
+//         // std::cout << "Smooth Bounds: " << smooth_bounds[i] << std::endl;
+        
+//     }
+
+//     return smooth_bounds;
+
+// }
+
+std::array<double, 2> ContactSmoothing::smooth_bounds(const std::array<double, 2>& bounds) const {
+    std::array<double, 2> smooth_bounds;
+    const double del = p_.del;
+        for (int i = 0; i < 2; ++i) {
+        double xi = 0.0;
+        double xi_hat = 0.0;
+        xi = bounds[i] + 0.5;
+        if (0.0 - del <= xi && xi <= del) {
+            xi_hat = (1.0/(4*del)) * (xi*xi) + 0.5 * xi + del/4.0;
+            // std::cout << "zone1" << std::endl;
+        }
+        else if((1.0 - del) <= xi && xi <= 1.0 + del) {
+            // std::cout << "Zone 2: " << std::endl;
+        double b = -1.0/(4.0*del);
+        double c = 0.5 + 1.0/(2.0*del);
+        double d = 1.0 - del + (1.0/(4.0*del)) * pow(1.0-del, 2) - 0.5*(1.0-del) - (1.0-del)/(2.0*del);
+
+        xi_hat = b*xi*xi + c*xi + d;
+                }
+        else if(del <= xi && xi <= (1.0 - del)) { 
+            xi_hat = xi;
+            // std::cout << "zone3" << std::endl;
+        } 
+      smooth_bounds[i] = xi_hat - 0.5;
+    //   std::cout << "Smooth Bounds: " << smooth_bounds[i] << std::endl;
+    }
+    
+
+    return smooth_bounds;
+}
+
+QuadPoints ContactEvaluator::compute_quadrature(const std::array<double, 2>& xi_bounds) const {
+    const int N = p_.N;
+    QuadPoints out;
+    out.qp.resize(N);
+    out.w.resize(N);
+
+    std::vector<double> qpoints(N); 
+    std::vector<double> weights(N);
+
+    determine_legendre_nodes(N, qpoints);
+    determine_legendre_weights(N, weights);
+
+    const double xi_min = xi_bounds[0];
+    const double xi_max = xi_bounds[1];
+    const double J = 0.5 * (xi_max - xi_min);
+
+    for (int i = 0; i < N; ++i){
+        out.qp[i] = 0.5 * (xi_max - xi_min) * qpoints[i] + 0.5 * (xi_max + xi_min);
+        out.w[i] = weights[i] * J;
+    }
+
+        // Print quadrature points
+    // std::cout << "Quad points: ";
+    // for (int i = 0; i < N; ++i) {
+    //     // std::cout << out.qp[i] << " ";
+    // }
+    // // std::cout << std::endl;
+
+
+    return out;
+}
+
+double ContactEvaluator::gap(const Mesh& mesh, const Element& A, const Element& B, double xiA) const {
+    double A0[2], A1[2], B0[2], B1[2];
+
+    endpoints(mesh, A, A0, A1);
+    endpoints(mesh, B, B0, B1);
+
+    double nA[2] = {0.0};
+    double nB[2] = {0.0};
+    find_normal(A0, A1, nA);
+    find_normal(B0, B1, nB);
+
+
+    double x1[2] = {0.0};
+    iso_map(A0, A1, xiA, x1);
+
+    // std::cout << "x1: " << x1[0] << ", " << x1[1] << std::endl;
+    
+    double x2[2] = {0.0};
+    find_intersection(B0, B1, x1, nB, x2);
+
+    // std::cout << "x2: " << x2[0] << ", " << x2[1] << std::endl;
+
+    // double dx1 = B1[0] - B0[0];
+    // double dy1 = B1[1] - B0[1];
+
+    // double norm_sq = dx1*dx1 + dy1*dy1;
+
+    // double cross = (x2[0] - B0[0]) * dy1 - (x2[1] - B0[1]) * dx1;
+
+    // double t = ((x2[0] - B0[0]) * dx1 + (x2[1] - B0[1]) * dy1) / norm_sq;
+
+    // bool on_segment = (std::abs(cross) < 1e-10) && (t >= 0.0 && t <= 1.0);
+
+ 
+
+    double dx = x1[0] - x2[0];
+    double dy = x1[1] - x2[1];
+
+    // if (on_segment == false) {
+    //     dx = 0.0;
+    //     dy = 0.0;
+    // }
+
+    double gn = -(dx * nB[0] + dy * nB[1]); //signed normal gap
+    // std::cout << "gap: " << gn << std::endl;
+    double dot = nB[0] * nA[0] + nB[1] * nA[1];
+    double eta = (dot < 0) ? dot:0.0;
+
+    // std::cout << "GAP: " << gn << "  eta = " << eta << " smooth gap = " << gn * eta << std::endl;
+
+    return gn * eta; //eta is the dot product that smoothes the gap (I forget if we are doing this trick or not)
+}
+
+
+NodalContactData ContactEvaluator::compute_nodal_contact_data(const Mesh& mesh, const Element& A, const Element& B) const {
+    double A0[2], A1[2];
+    endpoints(mesh, A, A0, A1);
+
+    double J = std::sqrt((std::pow((A1[0] - A0[0]),2) + std::pow((A1[1] - A0[1]),2)));
+    double J_ref = std::sqrt(std::pow(A1[0] - A0[0], 2) + 
+                             std::pow(A1[1] - A0[1], 2));
+
+    auto projs = projections(mesh, A, B);
+
+    auto bounds = smoother_.bounds_from_projections(projs);
+    auto smooth_bounds = smoother_.smooth_bounds(bounds);
+
+    auto qp = compute_quadrature(smooth_bounds);
+    auto qp_full = compute_quadrature({-0.5, 0.5}); //for Ai
+
+    double g_tilde1 = 0.0;
+    double g_tilde2 = 0.0;
+    double AI_1 = 0.0;
+    double AI_2 = 0.0;
+
+    // for (size_t i = 0; i < qp_full.qp.size(); ++i) {
+    //     double xiA_full = qp_full.qp[i];
+    //     double w_full = qp_full.w[i];
+    //     double N1_full = 0.5 - xiA_full;
+    //     double N2_full = 0.5 + xiA_full;
+        
+    //     AI_1 += w_full * N1_full * J_ref;
+    //     AI_2 += w_full * N2_full * J_ref;
+    // }
+
+    for (size_t i = 0; i < qp.qp.size(); ++i) {
+        double xiA = qp.qp[i];
+        double w = qp.w[i];
+        // double w_full = qp_full.w[i];
+        // double xiA_full = qp_full.qp[i];
+        
+        // std::cout << "xiA: " << xiA << std::endl;
+
+        double N1 = 0.5 - xiA;
+        double N2 = 0.5 + xiA;
+
+        // double N1_full = 0.5 - xiA_full;
+        // double N2_full = 0.5 + xiA_full;
+
+        double  gn = gap(mesh, A, B, xiA);
+        // double gn_active = (gn < 0.0) ? gn : 0.0;
+        double gn_active = gn;
+
+        g_tilde1 += w * N1 * gn_active * J;
+        g_tilde2 += w * N2 * gn_active * J;
+        double G = g_tilde1 + g_tilde2; 
+        // std::cout << "G: " << G << std::endl;
+
+        // std::cout << "G~1: " << g_tilde1 << ", G~2:" << g_tilde2 << std::endl; 
+
+
+        AI_1 += w * N1 * J_ref;   
+        AI_2 += w * N2 * J_ref;  
+        // std::cout << "AI_1: " << AI_1 << ", AI_2: " << AI_2 << std::endl; 
+    }
+    // std::cout << "A: " << AI_1 << ", " << AI_2 << std::endl;
+
+    double g1 = g_tilde1 / AI_1;
+    double g2 = g_tilde2 / AI_2;
+
+    //KKT Conditons
+    double p1 = (g1 < 0.0) ? p_.k * g1 : 0.0;
+    double p2 = (g2 < 0.0) ? p_.k * g2 : 0.0;
+
+    NodalContactData contact_data;
+
+    contact_data.pressures = {p1, p2};
+    contact_data.g_tilde = {g_tilde1, g_tilde2};
+
+    return contact_data;
+
+}
+
+double ContactEvaluator::compute_contact_energy(const Mesh& mesh, const Element& A, const Element& B) const {
+    NodalContactData contact_data;
+     contact_data = compute_nodal_contact_data(mesh, A, B);
+
+    double contact_energy = contact_data.pressures[0] * contact_data.g_tilde[0] + contact_data.pressures[1] * contact_data.g_tilde[1];
+    return contact_energy;
+}
+
+void ContactEvaluator::grad_gtilde(const Mesh& mesh, const Element& A, const Element& B, double dgt1_dx[8], double dgt2_dx[8]) const {
+    double A0[2], A1[2], B0[2], B1[2];
+    
+
+    endpoints(mesh, A, A0, A1);
+    endpoints(mesh, B, B0, B1);
+
+    double x[8] = {A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1]};
+
+    double nB[2], nA[2];
+    find_normal(B0, B1, nB);
+    find_normal(A0, A1, nA);
+
+    double dot = nB[0] * nA[0] + nB[1] * nA[1];
+    double eta = (dot < 0) ? dot:0.0;
+
+    auto projs = projections(mesh, A, B);
+
+    auto bounds = smoother_.bounds_from_projections(projs);
+    auto smooth_bounds = smoother_.smooth_bounds(bounds);
+
+    auto qp = compute_quadrature(smooth_bounds);
+
+    const int N = static_cast<int>(qp.qp.size());
+
+    std::vector<double> x2(2 * N);
+
+    for (int i = 0; i < N; ++i) {
+        double x1[2] = {0.0};
+        iso_map(A0, A1, qp.qp[i], x1);
+        double x2_i[2] = {0.0};
+        find_intersection(B0, B1, x1, nB, x2_i);
+        x2[2*i] = x2_i[0];
+        x2[2*i+1] = x2_i[1];
+    }
+
+    Gparams gp;
+    gp.N = N;
+    gp.qp = qp.qp.data();
+    gp.w = qp.w.data();
+    gp.x2 =x2.data();
+    // gp.nB[0] = nB[0];
+    // gp.nB[1] = nB[1];
+    // gp.eta = eta;
+    // gp.del = p_.del;
+
+    double dg1_du[8] = {0.0};
+    double dg2_du[8] = {0.0};
+
+    grad_gtilde1(x, &gp, dg1_du);
+    grad_gtilde2(x, &gp, dg2_du);
+
+    for (int i = 0; i < 8; ++i) {
+  dgt1_dx[i] = dg1_du[i];
+  dgt2_dx[i] = dg2_du[i];
+}
+
+}
+
+std::array<double, 8> ContactEvaluator::compute_contact_forces(const Mesh& mesh, const Element& A, const Element& B) const {
+    double dg_tilde1[8] = {0.0};
+    double dg_tilde2[8] = {0.0};
+
+    grad_gtilde(mesh, A, B, dg_tilde1, dg_tilde2);
+
+    NodalContactData ncd;
+
+    ncd = compute_nodal_contact_data(mesh, A, B);
+
+    std::array<double,  8> f = {0.0};
+
+    for(int i = 0; i < 8; ++i) {
+        f[i] = 2.0 * (ncd.pressures[0] * dg_tilde1[i] + ncd.pressures[1] * dg_tilde2[i]);
+    }
+
+    return f;
+}
+
+void ContactEvaluator::d2_g2tilde(const Mesh& mesh, const Element& A, const Element& B, double H1[64], double H2[64]) const {
+    double A0[2], A1[2], B0[2], B1[2];
+
+    endpoints(mesh, A, A0, A1);
+    endpoints(mesh, B, B0, B1);
+
+    double x[8] = {A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1]};
+
+    double nB[2], nA[2];
+    find_normal(B0, B1, nB);
+    find_normal(A0, A1, nA);
+
+    double dot = nB[0] * nA[0] + nB[1] * nA[1];
+    double eta = (dot < 0) ? dot:0.0;
+
+    auto projs = projections(mesh, A, B);
+    auto bounds = smoother_.bounds_from_projections(projs);
+    auto smooth_bounds = smoother_.smooth_bounds(bounds);
+
+    auto qp = compute_quadrature(smooth_bounds);
+
+    const int N = static_cast<int>(qp.qp.size());
+    std::vector<double> x2(2 * N);
+
+    for (int i = 0; i < N; ++i) {
+        double x1[2] = {0.0};
+        iso_map(A0, A1, qp.qp[i], x1);
+        double x2_i[2] = {0.0};
+        find_intersection(B0, B1, x1, nB, x2_i);
+        x2[2*i] = x2_i[0];
+        x2[2*i+1] = x2_i[1];
+    }
+    
+    Gparams gp;
+    gp.N = N;
+    gp.qp = qp.qp.data();
+    gp.w = qp.w.data();
+    gp.x2 =x2.data();
+
+    double d2g1_d2u[64] = {0.0};
+    double d2g2_d2u[64] = {0.0};
+
+    d2gtilde1(x, &gp, d2g1_d2u);
+    d2gtilde2(x, &gp, d2g2_d2u);
+
+    for (int i = 0; i < 64; ++i) {
+        H1[i] = d2g1_d2u[i];
+        H2[i] = d2g2_d2u[i];
+    }
+}
+
+
+
+
+
+
+std::pair<double, double> ContactEvaluator::eval_gtilde(const Mesh& mesh, const Element& A, const Element& B) const {
+    NodalContactData ncd = compute_nodal_contact_data(mesh, A, B);
+    double gt1 = ncd.g_tilde[0];
+    double gt2 = ncd.g_tilde[1];
+
+    return {gt1, gt2};
+}
+
+
+
+std::pair<double,double>
+ContactEvaluator::eval_gtilde_fixed_qp(Mesh& mesh,
+                                       const Element& A,
+                                       const Element& B,
+                                       const QuadPoints& qp_fixed) const
+{
+    double A0[2], A1[2];
+    endpoints(mesh, A, A0, A1);
+
+    const double J = std::sqrt((A1[0]-A0[0])*(A1[0]-A0[0]) + (A1[1]-A0[1])*(A1[1]-A0[1]));
+
+    double gt1 = 0.0, gt2 = 0.0;
+
+    for (size_t i = 0; i < qp_fixed.qp.size(); ++i) {
+        const double xiA = qp_fixed.qp[i];
+        const double w   = qp_fixed.w[i];
+
+        const double N1 = 0.5 - xiA;
+        const double N2 = 0.5 + xiA;
+
+        const double gn = gap(mesh, A, B, xiA);   // still depends on geometry
+        const double gn_active = gn;              // or your (gn<0?gn:0) logic
+
+        gt1 += w * N1 * gn_active * J;
+        gt2 += w * N2 * gn_active * J;
+    }
+
+    return {gt1, gt2};
+}
+
+
+
+FiniteDiffResult ContactEvaluator::validate_g_tilde(Mesh& mesh, const Element& A, const Element& B, double epsilon) const {
+        
+    FiniteDiffResult result;
+
+    auto projs0 = projections(mesh, A, B);
+    auto bounds0 = smoother_.bounds_from_projections(projs0);
+    auto smooth_bounds0 = smoother_.smooth_bounds(bounds0);
+    QuadPoints qp0 = compute_quadrature(smooth_bounds0);
+
+// auto [g1_base, g2_base] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+
+    auto [g1_base, g2_base] = eval_gtilde(mesh, A, B);
+    result.g_tilde1_baseline = g1_base;
+    result.g_tilde2_baseline = g2_base;
+
+    // Collect nodes in sorted order
+    std::set<int> node_set;
+    node_set.insert(A.node_ids[0]);
+    node_set.insert(A.node_ids[1]);
+    node_set.insert(B.node_ids[0]);
+    node_set.insert(B.node_ids[1]);
+
+    result.node_ids = std::vector<int>(node_set.begin(), node_set.end());
+    std::sort(result.node_ids.begin(), result.node_ids.end());
+
+    int num_dofs = result.node_ids.size() * 2;
+    result.fd_gradient_g1.resize(num_dofs);
+    result.fd_gradient_g2.resize(num_dofs);
+    
+    // ===== GET AND REORDER ENZYME GRADIENTS =====
+    double dgt1_dx[8] = {0.0};
+    double dgt2_dx[8] = {0.0};
+    grad_gtilde(mesh, A, B, dgt1_dx, dgt2_dx);
+    
+    // Map from node_id to position in x[8]
+    std::map<int, int> node_to_x_idx;
+    node_to_x_idx[A.node_ids[0]] = 0;  // A0 → x[0,1]
+    node_to_x_idx[A.node_ids[1]] = 1;  // A1 → x[2,3]
+    node_to_x_idx[B.node_ids[0]] = 2;  // B0 → x[4,5]
+    node_to_x_idx[B.node_ids[1]] = 3;  // B1 → x[6,7]
+    
+    // Reorder Enzyme gradients to match sorted node order
+    result.analytical_gradient_g1.resize(num_dofs);
+    result.analytical_gradient_g2.resize(num_dofs);
+    
+    for (size_t i = 0; i < result.node_ids.size(); ++i) {
+        int node_id = result.node_ids[i];
+        int x_idx = node_to_x_idx[node_id];
+        
+        result.analytical_gradient_g1[2*i + 0] = dgt1_dx[2*x_idx + 0];  // x component
+        result.analytical_gradient_g1[2*i + 1] = dgt1_dx[2*x_idx + 1];  // y component
+        result.analytical_gradient_g2[2*i + 0] = dgt2_dx[2*x_idx + 0];
+        result.analytical_gradient_g2[2*i + 1] = dgt2_dx[2*x_idx + 1];
+    }
+    // =
+
+
+    int dof_idx = 0;
+    //X-direction
+    for (int node_id : result.node_ids) {
+        {
+            double original = mesh.node(node_id).x;
+
+            mesh.node(node_id).x = original + epsilon;
+            auto [g1_plus, g2_plus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+
+            mesh.node(node_id).x = original - epsilon;
+            auto [g1_minus, g2_minus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+
+            //Restorre orginal
+            mesh.node(node_id).x = original;
+
+            result.fd_gradient_g1[dof_idx] = (g1_plus - g1_minus) / (2.0 * epsilon);
+            result.fd_gradient_g2[dof_idx] = (g2_plus - g2_minus) / (2.0 * epsilon);
+
+            dof_idx++;
+        }
+    
+    //y - direction 
+        {
+            double original = mesh.node(node_id).y;
+            
+            // +epsilon
+            mesh.node(node_id).y = original + epsilon;
+            auto [g1_plus, g2_plus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+            
+            // -epsilon
+            mesh.node(node_id).y = original - epsilon;
+            auto [g1_minus, g2_minus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+            
+            // Restore
+            mesh.node(node_id).y = original;
+            
+            // Central difference
+            result.fd_gradient_g1[dof_idx] = (g1_plus - g1_minus) / (2.0 * epsilon);
+            result.fd_gradient_g2[dof_idx] = (g2_plus - g2_minus) / (2.0 * epsilon);
+            
+            dof_idx++;
+        }
+    }
+    return result;
+}
+
+void ContactEvaluator::grad_gtilde_with_qp(const Mesh& mesh, const Element& A, const Element& B,
+                         const QuadPoints& qp_fixed, 
+                         double dgt1_dx[8], double dgt2_dx[8]) const {
+    double A0[2], A1[2], B0[2], B1[2];
+    endpoints(mesh, A, A0, A1);
+    endpoints(mesh, B, B0, B1);
+    
+    double x[8] = {A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1]};
+    
+    const int N = static_cast<int>(qp_fixed.qp.size());
+    
+    Gparams gp;
+    gp.N = N;
+    gp.qp = qp_fixed.qp.data();  // Use FIXED quadrature
+    gp.w = qp_fixed.w.data();
+    
+    grad_gtilde1(x, &gp, dgt1_dx);
+    grad_gtilde2(x, &gp, dgt2_dx);
+}
+
+FiniteDiffResult ContactEvaluator::validate_hessian(Mesh& mesh, const Element& A, const Element& B, double epsilon) const {
+    FiniteDiffResult result;
+
+    auto projs0 = projections(mesh, A, B);
+    auto bounds0 = smoother_.bounds_from_projections(projs0);
+    auto smooth_bounds0 = smoother_.smooth_bounds(bounds0);
+    QuadPoints qp0 = compute_quadrature(smooth_bounds0);
+    double hess1[64] = {0.0};
+    double hess2[64] = {0.0};
+    d2_g2tilde(mesh, A, B, hess1, hess2);
+
+    const int ndof = 8;
+    result.fd_gradient_g1.assign(ndof*ndof, 0.0);
+    result.fd_gradient_g2.assign(ndof*ndof, 0.0);
+    result.analytical_gradient_g1.resize(ndof * ndof);
+    result.analytical_gradient_g2.resize(ndof * ndof);
+
+    result.analytical_gradient_g1.assign(hess1, hess1 + 64);
+    result.analytical_gradient_g2.assign(hess2, hess2 + 64);
+
+int nodes[4] = { A.node_ids[0], A.node_ids[1], B.node_ids[0], B.node_ids[1] };
+
+int col = 0;
+for (int k = 0; k < 4; ++k) {
+  for (int comp = 0; comp < 2; ++comp) { // 0=x, 1=y
+    Node& n = mesh.node(nodes[k]);
+    double& coord = (comp == 0) ? n.x : n.y;
+    double orig = coord;
+
+    double g1p[8]={0}, g1m[8]={0}, g2p[8]={0}, g2m[8]={0};
+
+    coord = orig + epsilon; grad_gtilde_with_qp(mesh, A, B, qp0, g1p, g2p);
+    coord = orig - epsilon; grad_gtilde_with_qp(mesh, A, B, qp0, g1m, g2m);
+    coord = orig;
+
+    for (int i = 0; i < 8; ++i) {
+      result.fd_gradient_g1[i*8 + col] = (g1p[i] - g1m[i]) / (2*epsilon);
+      result.fd_gradient_g2[i*8 + col] = (g2p[i] - g2m[i]) / (2*epsilon);
+    }
+    ++col;
+  }
+}
+return result;
+}
+
+static const char* C_RESET = "\033[0m";
+static const char* C_OK    = "\033[32m";
+static const char* C_WARN  = "\033[33m";
+static const char* C_BAD   = "\033[31m";
+
+
+void ContactEvaluator::print_hessian_comparison(const FiniteDiffResult& val) const
+{
+    std::cout << std::setprecision(12) << std::scientific;
+    std::cout << "\n" << std::string(120, '=') << "\n";
+    std::cout << "Hessian Validation for g_tilde\n";
+    std::cout << std::string(120, '=') << "\n";
+    std::cout << "Baseline: g_tilde1 = " << val.g_tilde1_baseline
+              << ", g_tilde2 = " << val.g_tilde2_baseline << "\n\n";
+
+    const int ndof = 8;
+    const char* dof_names[8] = {"A0_x","A0_y","A1_x","A1_y","B0_x","B0_y","B1_x","B1_y"};
+
+    const double abs_tol_ok   = 1e-6;
+    const double abs_tol_warn = 1e-3;
+    const double rel_tol_pct  = 10.0;
+    const double eps_denom    = 1e-14;
+
+    auto print_one = [&](const char* label,
+                         const double* fdH,
+                         const double* anH,
+                         int& error_count_out)
+    {
+        std::cout << std::string(120, '-') << "\n";
+        std::cout << label << "\n";
+        std::cout << std::string(120, '-') << "\n";
+
+        // --- Matrix header ---
+        std::cout << std::setw(8) << "Row\\Col";
+        for (int j = 0; j < ndof; ++j) std::cout << std::setw(12) << dof_names[j];
+        std::cout << "\n" << std::string(120, '-') << "\n";
+
+        // --- Matrix view (FD value; colored by abs diff vs analytical) ---
+        for (int i = 0; i < ndof; ++i) {
+            std::cout << std::setw(8) << dof_names[i];
+            for (int j = 0; j < ndof; ++j) {
+                const double fd = fdH[i*ndof + j];
+                const double an = anH[i*ndof + j];
+                const double abs_err = std::abs(fd - an);
+
+                const char* c = C_OK;
+                if (abs_err >= abs_tol_warn) c = C_BAD;
+                else if (abs_err >= abs_tol_ok) c = C_WARN;
+
+                std::cout << c << std::setw(12) << fd << C_RESET;
+            }
+            std::cout << "\n";
+        }
+
+        // --- Detailed comparison ---
+        std::cout << "\n" << std::string(120, '-') << "\n";
+        std::cout << "Detailed mismatches:\n";
+        std::cout << std::string(120, '-') << "\n";
+        std::cout << std::setw(8)  << "Row"
+                  << std::setw(8)  << "Col"
+                  << std::setw(20) << "FD (central)"
+                  << std::setw(20) << "Analytical"
+                  << std::setw(20) << "Abs Error"
+                  << std::setw(20) << "Rel Error (%)"
+                  << std::setw(14) << "Sign\n";
+        std::cout << std::string(120, '-') << "\n";
+
+        error_count_out = 0;
+
+        double max_abs_err = 0.0;
+        double max_rel_pct = 0.0;
+        int max_i_abs = -1, max_j_abs = -1;
+        int max_i_rel = -1, max_j_rel = -1;
+
+        int sign_flip_count = 0;
+
+        for (int i = 0; i < ndof; ++i) {
+            for (int j = 0; j < ndof; ++j) {
+                const double fd = fdH[i*ndof + j];
+                const double an = anH[i*ndof + j];
+                const double abs_err = std::abs(fd - an);
+
+                const double denom = std::max(std::max(std::abs(fd), std::abs(an)), eps_denom);
+                const double rel_pct = (abs_err / denom) * 100.0;
+
+                const bool both_tiny = (std::abs(fd) < eps_denom && std::abs(an) < eps_denom);
+                const bool sign_match = both_tiny || (fd * an >= 0.0);
+
+                if (!sign_match) sign_flip_count++;
+
+                if (abs_err > max_abs_err) { max_abs_err = abs_err; max_i_abs = i; max_j_abs = j; }
+                if (rel_pct > max_rel_pct) { max_rel_pct = rel_pct; max_i_rel = i; max_j_rel = j; }
+
+                const bool print = (abs_err > abs_tol_ok) || (!sign_match) || (rel_pct > rel_tol_pct);
+
+                if (print) {
+                    const char* c = (abs_err >= abs_tol_warn || !sign_match) ? C_BAD :
+                                    (abs_err >= abs_tol_ok) ? C_WARN : C_OK;
+
+                    std::cout << c
+                              << std::setw(8)  << i
+                              << std::setw(8)  << j
+                              << std::setw(20) << fd
+                              << std::setw(20) << an
+                              << std::setw(20) << abs_err
+                              << std::setw(20) << rel_pct
+                              << std::setw(14) << (sign_match ? "✓" : "✗ FLIP")
+                              << C_RESET << "\n";
+
+                    // Count as "problem" if big rel error or sign flip (your original logic)
+                    if (!sign_match || rel_pct > rel_tol_pct) error_count_out++;
+                }
+            }
+        }
+
+        // --- Symmetry check (optional but useful) ---
+        double max_asym_fd = 0.0, max_asym_an = 0.0;
+        for (int i = 0; i < ndof; ++i) {
+            for (int j = i+1; j < ndof; ++j) {
+                max_asym_fd = std::max(max_asym_fd, std::abs(fdH[i*ndof+j] - fdH[j*ndof+i]));
+                max_asym_an = std::max(max_asym_an, std::abs(anH[i*ndof+j] - anH[j*ndof+i]));
+            }
+        }
+
+        std::cout << "\n";
+        if (error_count_out == 0) {
+            std::cout << "All entries match within thresholds! ✓\n";
+        } else {
+            std::cout << "Found " << error_count_out << " problematic entries\n";
+        }
+
+        std::cout << "Max abs error: " << max_abs_err
+                  << " at (" << max_i_abs << "," << max_j_abs << ")\n";
+        std::cout << "Max rel error: " << max_rel_pct
+                  << "% at (" << max_i_rel << "," << max_j_rel << ")\n";
+        std::cout << "Sign flips: " << sign_flip_count << "\n";
+        std::cout << "Symmetry (FD max |H_ij - H_ji|): " << max_asym_fd << "\n";
+        std::cout << "Symmetry (AN max |H_ij - H_ji|): " << max_asym_an << "\n";
+    };
+
+    int error_count_g1 = 0;
+    int error_count_g2 = 0;
+
+    print_one("∂²g̃₁/∂x² Hessian:",
+              val.fd_gradient_g1.data(),          // if these are std::vector<double>
+              val.analytical_gradient_g1.data(),
+              error_count_g1);
+
+    std::cout << "\n";
+
+    print_one("∂²g̃₂/∂x² Hessian:",
+              val.fd_gradient_g2.data(),
+              val.analytical_gradient_g2.data(),
+              error_count_g2);
+
+    std::cout << "\n" << std::string(120, '=') << "\n";
+    std::cout << "SUMMARY:\n";
+    std::cout << "  g_tilde1 Hessian: " << (error_count_g1 == 0 ? "✓ PASS" : "✗ FAIL")
+              << " (" << error_count_g1 << " errors)\n";
+    std::cout << "  g_tilde2 Hessian: " << (error_count_g2 == 0 ? "✓ PASS" : "✗ FAIL")
+              << " (" << error_count_g2 << " errors)\n";
+    std::cout << std::string(120, '=') << "\n\n";
+}
+
+
+
+
+
diff --git a/src/tribol/physics/new_method.hpp b/src/tribol/physics/new_method.hpp
new file mode 100644
index 00000000..63ef44c2
--- /dev/null
+++ b/src/tribol/physics/new_method.hpp
@@ -0,0 +1,135 @@
+#pragma once
+#include <vector>
+#include <array>
+
+struct Node {
+    double x, y;
+    int id;
+};
+
+struct Element {
+    int id;
+    std::array<int, 2> node_ids;
+};
+
+struct Mesh {
+    std::vector<Node> nodes; 
+    std::vector<Element> elements; 
+
+    const Node& node(int i) const { return nodes[i]; }
+    Node& node(int i) {return nodes[i]; }
+};
+
+struct QuadPoints {
+    std::vector<double> qp; //quadpoints 
+    std::vector<double> w; //weights
+};
+
+struct ContactParams {
+    double del;
+    double k;
+    int N;
+};
+
+struct NodalContactData {
+    std::array<double, 2> pressures;
+    std::array<double, 2> g_tilde;
+};
+
+struct FDResult {
+    std::array<double, 2> dgt;
+};
+
+struct FiniteDiffResult {
+    std::vector<double> fd_gradient_g1;
+    std::vector<double> fd_gradient_g2;
+    std::vector<double> analytical_gradient_g1;
+    std::vector<double> analytical_gradient_g2;
+    std::vector<int> node_ids;
+    double g_tilde1_baseline;
+    double g_tilde2_baseline;
+};
+
+class ContactSmoothing {
+    public:
+        explicit ContactSmoothing (const ContactParams& p) : p_(p) {} //Constructor 
+
+        std::array<double, 2> bounds_from_projections(const std::array<double, 2>& proj) const;
+
+        std::array<double, 2> smooth_bounds(const std::array<double, 2>& bounds) const;
+
+    private: 
+        ContactParams p_;
+
+};
+
+class ContactEvaluator {
+    public: 
+        explicit ContactEvaluator(const ContactParams& p) 
+        : p_(p), smoother_(p) {} //constructor - copies params into the object 
+
+
+
+        double compute_contact_energy(const Mesh& mesh, 
+                      const Element& A, 
+                      const Element& B) const;
+
+        void grad_gtilde(const Mesh& mesh, const Element& A, const Element& B,
+                         double dgt1_dx[64], double dgt2_dx[64]) const;
+
+        void d2_g2tilde(const Mesh& mesh, const Element& A, const Element& B,
+                        double dgt1_dx[8], double dgt2_dx[8]) const;
+
+        std::array<double, 8> compute_contact_forces(const Mesh& mesh, const Element& A, const Element& B) const;
+
+        std::pair<double, double> eval_gtilde(const Mesh& mesh,
+                                              const Element& A, 
+                                              const Element& B) const;
+
+        FiniteDiffResult validate_g_tilde(Mesh& mesh,
+                                          const Element& A,
+                                          const Element& B,
+                                          double epsilon = 1e-7) const;
+        
+        void print_gradient_comparison(const FiniteDiffResult& val) const;
+        
+        std::pair<double,double> eval_gtilde_fixed_qp(Mesh& mesh,
+                                                      const Element& A,
+                                                      const Element& B,
+                                                      const QuadPoints& qp_fixed) const;
+
+        FiniteDiffResult validate_hessian(Mesh& mesh, 
+                                          const Element& A, 
+                                          const Element& B, 
+                                          double epsilon = 1e-7) const;
+
+        void grad_gtilde_with_qp(const Mesh& mesh, const Element& A, const Element& B,
+                            const QuadPoints& qp_fixed, 
+                            double dgt1_dx[8], double dgt2_dx[8]) const;
+
+        void print_hessian_comparison(const FiniteDiffResult& val) const;
+
+    private: 
+        ContactParams p_;
+        ContactSmoothing smoother_;
+        QuadPoints compute_quadrature(const std::array<double,2>& xi_bounds) const;
+
+        std::array<double, 2> projections(const Mesh& mesh,
+                                          const Element& A, 
+                                          const Element& B) const; 
+
+        double gap(const Mesh& mesh,
+                   const Element& A, 
+                   const Element& B,
+                   double xiA) const;
+
+        NodalContactData compute_nodal_contact_data(const Mesh& mesh,
+                                                         const Element& A, 
+                                                         const Element& B) const; 
+        
+
+        
+
+
+                                       
+};

From 5afc035e0a90b2e00bb316c2c9c65adcd430e92d Mon Sep 17 00:00:00 2001
From: Ryan Lutz <lutz23@llnl.gov>
Date: Fri, 30 Jan 2026 09:59:47 -0800
Subject: [PATCH 03/56] updated mortar contact

---
 src/examples/CMakeLists.txt       |   2 +
 src/tribol/CMakeLists.txt         |   4 +
 src/tribol/physics/new_method.cpp | 456 +++++++++++++++++++++---------
 src/tribol/physics/new_method.hpp |  17 +-
 4 files changed, 348 insertions(+), 131 deletions(-)

diff --git a/src/examples/CMakeLists.txt b/src/examples/CMakeLists.txt
index d0345d3a..263c72eb 100644
--- a/src/examples/CMakeLists.txt
+++ b/src/examples/CMakeLists.txt
@@ -8,6 +8,8 @@ set( contact_examples
      common_plane_gpu.cpp
      common_plane.cpp
      mortar_lm_patch_test.cpp
+     step_1_lobatto.cpp
+     new_method_test.cpp
      )
 
 
diff --git a/src/tribol/CMakeLists.txt b/src/tribol/CMakeLists.txt
index 22709d5d..2ca329ae 100644
--- a/src/tribol/CMakeLists.txt
+++ b/src/tribol/CMakeLists.txt
@@ -41,6 +41,7 @@ set(tribol_headers
     physics/CommonPlane.hpp
     physics/Mortar.hpp
     physics/Physics.hpp
+    physics/new_method.hpp
 
     search/InterfacePairFinder.hpp
 
@@ -49,6 +50,7 @@ set(tribol_headers
     utils/DataManager.hpp
     utils/Math.hpp
     utils/TestUtils.hpp
+
     )
 
 ## list of sources
@@ -76,12 +78,14 @@ set(tribol_sources
     physics/CommonPlane.cpp
     physics/Mortar.cpp
     physics/Physics.cpp
+    physics/new_method.cpp
      
     search/InterfacePairFinder.cpp
 
     utils/ContactPlaneOutput.cpp
     utils/Math.cpp
     utils/TestUtils.cpp
+
     )
 
 if (ENABLE_FORTRAN)
diff --git a/src/tribol/physics/new_method.cpp b/src/tribol/physics/new_method.cpp
index d71d70fa..f1ca825d 100644
--- a/src/tribol/physics/new_method.cpp
+++ b/src/tribol/physics/new_method.cpp
@@ -118,8 +118,6 @@ void find_intersection(const double* A0, const double* A1,
     const double inv_det = 1.0 / det;
     double alpha = (d[0]*n[1] - d[1]*n[0]) * inv_det;
 
-    // if (alpha < 0.0) alpha = 0.0;
-    // if (alpha > 1.0) alpha = 1.0;
 
     intersection[0] = A0[0] + alpha * tA[0];
     intersection[1] = A0[1] + alpha * tA[1];
@@ -158,42 +156,13 @@ void get_projections(const double* A0, const double* A1,
     double xi_min = std::min(xi0, xi1);
     double xi_max = std::max(xi0, xi1);
 
-    // double X[2], tA, uB;
-    // if (segmentsIntersect2D(A0, A1, B0, B1, X, &tA, &uB)) {
-    //     const double xiA_int = tA - 0.5;           
-    //     xi_min = std::min(xi_min, xiA_int);
-    //     xi_max = std::max(xi_max, xiA_int);
-    // }
-    // std::cout << "xi_min in gp: " << xi_min << " xi_max: " << xi_max << std::endl;
 
     projections[0] = xi_min;
     projections[1] = xi_max;
 }
 
 
-//     double compute_local_gap(const double A0[2], const double A1[2], const double B0[2], const double B1[2], double xiA) {
-//     double nA[2], nB[2];
-
-//     find_normal(A0, A1, nA);
-//     find_normal(B0, B1, nB);
-
-//     double x1[2];
-//     iso_map(A0, A1, xiA, x1);
-
-//     double x2[2];
-//     find_intersection(B0, B1, x1, nB, x2);
-
-//     double dx = x1[0] - x2[0];
-//     double dy = x1[1] - x2[1];
-
-//     double gn = -(dx*nB[0] + dy*nB[1]);
-
-//     double dot = nB[0] * nA[0] + nB[1] * nA[1];
-//     double eta = (dot < 0) ? dot: 0.0;
-//     return gn * eta;
-// }
-
-    static void gtilde_kernel(const double* x, const Gparams* gp, double* g_tilde_out) {
+    static void gtilde_kernel(const double* x, const Gparams* gp, double* g_tilde_out, double* A_out) {
         const double A0[2] = {x[0], x[1]};
         const double A1[2] = {x[2], x[3]};
         const double B0[2] = {x[4], x[5]};
@@ -201,6 +170,8 @@ void get_projections(const double* A0, const double* A1,
         
         const double J = std::sqrt((A1[0]-A0[0])*(A1[0]-A0[0]) + (A1[1]-A0[1])*(A1[1]-A0[1]));
 
+        const double J_ref = std::sqrt((A1[0]-A0[0])*(A1[0]-A0[0]) + (A1[1]-A0[1])*(A1[1]-A0[1]));
+
         double nB[2];
         find_normal(B0, B1, nB);
 
@@ -210,6 +181,7 @@ void get_projections(const double* A0, const double* A1,
         double eta = (dot < 0) ? dot : 0.0;
 
         double g1 = 0.0, g2 = 0.0;
+        double AI_1 = 0.0, AI_2 = 0.0; 
 
         for (int i = 0; i < gp->N; ++i) {
             const double xiA = gp -> qp[i]; 
@@ -225,10 +197,6 @@ void get_projections(const double* A0, const double* A1,
             double x2[2];
             find_intersection(B0, B1, x1, nB, x2);
 
-            // lagged coupled point on B for this qp
-            // const double x2x = gp->x2[2*i + 0];
-            // const double x2y = gp->x2[2*i + 1];
-
             const double dx = x1[0] - x2[0];
             const double dy = x1[1] - x2[1];
 
@@ -238,32 +206,26 @@ void get_projections(const double* A0, const double* A1,
 
             g1 += w * N1 * g * J;
             g2 += w * N2 * g * J;
+
+            AI_1 += w * N1 * J_ref;
+            AI_2 += w * N2 * J_ref;
         }
 
         g_tilde_out[0] = g1;
         g_tilde_out[1] = g2;
+
+        A_out[0] = AI_1;
+        A_out[1] = AI_2;
         // std::cout << "G tilde: " << g1 << ", " << g2 << std::endl;
     }
 
-// static void gtilde1_out(const double* x, const Gparams* gp, double* out)
-// {
-//   double gt[2];
-//   gtilde_kernel(x, gp, gt);
-//   *out = gt[0];
-// }
-
-// static void gtilde2_out(const double* x, const Gparams* gp, double* out)
-// {
-//   double gt[2];
-//   gtilde_kernel(x, gp, gt);
-//   *out = gt[1];
-// }
 
 static void gtilde1_out(const double* x, const void* gp_void, double* out)
 {
   const Gparams* gp = static_cast<const Gparams*>(gp_void);
   double gt[2];
-  gtilde_kernel(x, gp, gt);
+  double A_out[2];
+  gtilde_kernel(x, gp, gt, A_out);
   *out = gt[0];
 }
 
@@ -271,10 +233,29 @@ static void gtilde2_out(const double* x, const void* gp_void, double* out)
 {
   const Gparams* gp = static_cast<const Gparams*>(gp_void);
   double gt[2];
-  gtilde_kernel(x, gp, gt);
+  double A_out[2];
+  gtilde_kernel(x, gp, gt, A_out);
   *out = gt[1];
 }
 
+static void A1_out(const double* x, const void* gp_void, double* out)
+{
+  const Gparams* gp = static_cast<const Gparams*>(gp_void);
+  double gt[2];
+  double A_out[2];
+  gtilde_kernel(x, gp, gt, A_out);
+  *out = A_out[0];
+}
+
+static void A2_out(const double* x, const void* gp_void, double* out)
+{
+  const Gparams* gp = static_cast<const Gparams*>(gp_void);
+  double gt[2];
+  double A_out[2];
+  gtilde_kernel(x, gp, gt, A_out);
+  *out = A_out[1];
+}
+
 void grad_gtilde1(const double* x, const Gparams* gp, double* dgt1_du) {
     double dx[8] = {0.0};
     double out = 0.0;
@@ -300,6 +281,32 @@ void grad_gtilde2(const double*x, const Gparams* gp, double* dgt2_du) {
     }
 }
 
+void grad_A1(const double* x, const Gparams* gp, double* dA1_du) {
+    double dx[8] = {0.0};
+    double out = 0.0;
+    double dout = 1.0;
+
+    __enzyme_autodiff<void>((void*) A1_out, enzyme_dup, x, dx, enzyme_const, (const void*)gp, enzyme_dup, &out, &dout);
+
+    for (int i = 0; i < 8; ++i) {
+        dA1_du[i] = dx[i];
+    }
+}
+
+
+void grad_A2(const double*x, const Gparams* gp, double* dA2_du) {
+    double dx[8] = {0.0};
+    double out = 0.0;
+    double dout = 1.0;
+
+    __enzyme_autodiff<void>((void*) A2_out, enzyme_dup, x, dx, enzyme_const, (const void*)gp, enzyme_dup, &out, &dout);
+
+    for (int i = 0; i < 8; ++i) {
+        dA2_du[i] = dx[i]; 
+    }
+}
+
+
 void d2gtilde1(const double* x, const Gparams* gp, double* H1) {
   for (int col = 0; col < 8; ++col) {
     double dx[8] = {0.0};
@@ -332,6 +339,38 @@ void d2gtilde2(const double* x, const Gparams* gp, double* H2) {
   }
 }
 
+void get_d2A1(const double* x, const Gparams* gp, double* H1) {
+  for (int col = 0; col < 8; ++col) {
+    double dx[8] = {0.0};
+    dx[col] = 1.0;                
+
+    double grad[8]  = {0.0};       
+    double dgrad[8] = {0.0};       
+
+    __enzyme_fwddiff<void>((void*)grad_A1, x, dx, enzyme_const, (const void*)gp, enzyme_dup, grad, dgrad);
+
+    for (int row = 0; row < 8; ++row) {
+      H1[row*8 + col] = dgrad[row]; 
+    }
+  }
+}
+
+void get_d2A2(const double* x, const Gparams* gp, double* H1) {
+  for (int col = 0; col < 8; ++col) {
+    double dx[8] = {0.0};
+    dx[col] = 1.0;                
+
+    double grad[8]  = {0.0};       
+    double dgrad[8] = {0.0};       
+
+    __enzyme_fwddiff<void>((void*)grad_A2, x, dx, enzyme_const, (const void*)gp, enzyme_dup, grad, dgrad);
+
+    for (int row = 0; row < 8; ++row) {
+      H1[row*8 + col] = dgrad[row]; 
+    }
+  }
+}
+
 }
 
 std::array<double, 2> ContactEvaluator::projections(const Mesh& mesh, 
@@ -374,42 +413,9 @@ std::array<double, 2> ContactSmoothing::bounds_from_projections(const std::array
         xi_max = 0.5 + del;
     }
 
-
-    // xi_min = std::max(xi_min, -0.5 - del);
-    // xi_max = std::min(xi_max,  0.5 + del);
-    // std::cout << "xi min: " << xi_min  << " xi max: " << xi_max << std::endl;
-
     return {xi_min, xi_max}; 
 }
 
-// std::array<double, 2> ContactSmoothing::smooth_bounds(const std::array<double, 2>& bounds) const {
-//     std::array<double, 2> smooth_bounds;
-//     const double del = p_.del;
-//     for (int i = 0; i < 2; ++i) {
-//         double xi = 0.0;
-//         double xi_hat = 0.0;
-//         xi = bounds[i] + 0.5;
-//         // std::cout << "xi: " << xi << std::endl;
-//         if (0 <= xi && xi <= del) {
-//             xi_hat = ((xi)*(xi)) / (2.0 * del * (1.0 - del));
-//             // std::cout << "Zone 1" << std::endl;
-//         }
-//         else if((1.0 - del) <= xi && xi <= 1.0) {
-//             xi_hat =  1.0 -(((1.0 - xi) * (1.0 - xi)) / (2 * del * (1.0 - del)));
-//             // std::cout << "Zone 2" << std::endl;
-//         }
-//         else if(del <= xi && xi <= (1.0 - del)) { 
-//             xi_hat = ((2.0 * xi) - del) / (2.0 * (1.0 - del));
-//             // std::cout << "Zone 3" << std::endl;
-//         }
-//         smooth_bounds[i] = xi_hat - 0.5;
-//         // std::cout << "Smooth Bounds: " << smooth_bounds[i] << std::endl;
-        
-//     }
-
-//     return smooth_bounds;
-
-// }
 
 std::array<double, 2> ContactSmoothing::smooth_bounds(const std::array<double, 2>& bounds) const {
     std::array<double, 2> smooth_bounds;
@@ -494,29 +500,9 @@ double ContactEvaluator::gap(const Mesh& mesh, const Element& A, const Element&
     double x2[2] = {0.0};
     find_intersection(B0, B1, x1, nB, x2);
 
-    // std::cout << "x2: " << x2[0] << ", " << x2[1] << std::endl;
-
-    // double dx1 = B1[0] - B0[0];
-    // double dy1 = B1[1] - B0[1];
-
-    // double norm_sq = dx1*dx1 + dy1*dy1;
-
-    // double cross = (x2[0] - B0[0]) * dy1 - (x2[1] - B0[1]) * dx1;
-
-    // double t = ((x2[0] - B0[0]) * dx1 + (x2[1] - B0[1]) * dy1) / norm_sq;
-
-    // bool on_segment = (std::abs(cross) < 1e-10) && (t >= 0.0 && t <= 1.0);
-
- 
-
     double dx = x1[0] - x2[0];
     double dy = x1[1] - x2[1];
 
-    // if (on_segment == false) {
-    //     dx = 0.0;
-    //     dy = 0.0;
-    // }
-
     double gn = -(dx * nB[0] + dy * nB[1]); //signed normal gap
     // std::cout << "gap: " << gn << std::endl;
     double dot = nB[0] * nA[0] + nB[1] * nA[1];
@@ -524,7 +510,7 @@ double ContactEvaluator::gap(const Mesh& mesh, const Element& A, const Element&
 
     // std::cout << "GAP: " << gn << "  eta = " << eta << " smooth gap = " << gn * eta << std::endl;
 
-    return gn * eta; //eta is the dot product that smoothes the gap (I forget if we are doing this trick or not)
+    return gn * eta; 
 }
 
 
@@ -535,6 +521,7 @@ NodalContactData ContactEvaluator::compute_nodal_contact_data(const Mesh& mesh,
     double J = std::sqrt((std::pow((A1[0] - A0[0]),2) + std::pow((A1[1] - A0[1]),2)));
     double J_ref = std::sqrt(std::pow(A1[0] - A0[0], 2) + 
                              std::pow(A1[1] - A0[1], 2));
+    // double J_ref = std::sqrt((std::pow((1.0 - 0.0), 2) + std::pow((-0.5 + 0.5), 2)));
 
     auto projs = projections(mesh, A, B);
 
@@ -576,6 +563,7 @@ NodalContactData ContactEvaluator::compute_nodal_contact_data(const Mesh& mesh,
         double  gn = gap(mesh, A, B, xiA);
         // double gn_active = (gn < 0.0) ? gn : 0.0;
         double gn_active = gn;
+        // std::cout << "gap: " << gn << std::endl;
 
         g_tilde1 += w * N1 * gn_active * J;
         g_tilde2 += w * N2 * gn_active * J;
@@ -587,31 +575,71 @@ NodalContactData ContactEvaluator::compute_nodal_contact_data(const Mesh& mesh,
 
         AI_1 += w * N1 * J_ref;   
         AI_2 += w * N2 * J_ref;  
-        // std::cout << "AI_1: " << AI_1 << ", AI_2: " << AI_2 << std::endl; 
+        // std::cout <<  AI_1 << ","<<  AI_2 << std::endl; 
     }
+    // std::cout <<  AI_1 << ","<<  AI_2 << std::endl; 
     // std::cout << "A: " << AI_1 << ", " << AI_2 << std::endl;
-
-    double g1 = g_tilde1 / AI_1;
-    double g2 = g_tilde2 / AI_2;
-
-    //KKT Conditons
-    double p1 = (g1 < 0.0) ? p_.k * g1 : 0.0;
-    double p2 = (g2 < 0.0) ? p_.k * g2 : 0.0;
+    // std::cout <<  g_tilde1 << ","<<  g_tilde2 << std::endl;
 
     NodalContactData contact_data;
 
-    contact_data.pressures = {p1, p2};
+    contact_data.AI = {AI_1, AI_2};
     contact_data.g_tilde = {g_tilde1, g_tilde2};
+    // double g1 = g_tilde1 / AI_1;
+    // double g2 = g_tilde2 / AI_2;
+    // // std::cout <<  g1 << ","<<  g2 << std::endl;
+
+    // //KKT Conditons
+    // double p1 = (g1 < 0.0) ? p_.k * g1 : 0.0;
+    // double p2 = (g2 < 0.0) ? p_.k * g2 : 0.0;
+
+    // NodalContactData contact_data;
+
+    // contact_data.pressures = {p1, p2};
+    // contact_data.g_tilde = {g_tilde1, g_tilde2};
 
     return contact_data;
+}
+
+std::array<double, 2> ContactEvaluator::compute_pressures(const NodalContactData& ncd) const {
+    double gt1 = ncd.g_tilde[0];
+    double gt2 = ncd.g_tilde[1];
+
+    // std::cout << "gt: " << gt1 << ", " << gt2 << std::endl;
+
+
+    double A1 = ncd.AI[0];
+    double A2 = ncd.AI[1];
+
+    double g1 = gt1/A1;
+    double g2 = gt2/A2;
+
+    // //KKT Conditons
+    double p1 = (g1 < 0.0) ? p_.k * g1 : 0.0;
+    double p2 = (g2 < 0.0) ? p_.k * g2 : 0.0;
+    std::array<double, 2> pressures;
+
+    pressures = {p1, p2};
+
+    for (int i = 0; i < 2; ++i) {
+        if (ncd.AI[i] < 1e-12) {
+            pressures[i] = 0.0;
+        }
+    }
+    // std::cout << "pressures: " << pressures[0] << ", " << pressures[1] << std::endl;
 
+    return pressures;
 }
 
 double ContactEvaluator::compute_contact_energy(const Mesh& mesh, const Element& A, const Element& B) const {
     NodalContactData contact_data;
-     contact_data = compute_nodal_contact_data(mesh, A, B);
+    contact_data = compute_nodal_contact_data(mesh, A, B);
 
-    double contact_energy = contact_data.pressures[0] * contact_data.g_tilde[0] + contact_data.pressures[1] * contact_data.g_tilde[1];
+    std::array<double, 2> pressures;
+    pressures = compute_pressures(contact_data);
+
+
+    double contact_energy = pressures[0] * contact_data.g_tilde[0] + pressures[1] * contact_data.g_tilde[1];
     return contact_energy;
 }
 
@@ -671,25 +699,94 @@ void ContactEvaluator::grad_gtilde(const Mesh& mesh, const Element& A, const Ele
   dgt1_dx[i] = dg1_du[i];
   dgt2_dx[i] = dg2_du[i];
 }
+}
+
+void ContactEvaluator::grad_trib_area(const Mesh& mesh, const Element& A, const Element& B, double dA1_dx[8], double dA2_dx[8]) const {
+    double A0[2], A1[2], B0[2], B1[2];
+
+    endpoints(mesh, A, A0, A1);
+    endpoints(mesh, B, B0, B1);
+
+    double x[8] = {A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1]};
+
+    double nB[2], nA[2];
+    find_normal(B0, B1, nB);
+    find_normal(A0, A1, nA);
+
+    double dot = nB[0] * nA[0] + nB[1] * nA[1];
+    double eta = (dot < 0) ? dot:0.0;
+
+    auto projs = projections(mesh, A, B);
+
+    auto bounds = smoother_.bounds_from_projections(projs);
+    auto smooth_bounds = smoother_.smooth_bounds(bounds);
+
+    auto qp = compute_quadrature(smooth_bounds);
+
+    const int N = static_cast<int>(qp.qp.size());
+
+    std::vector<double> x2(2 * N);
+
+    for (int i = 0; i < N; ++i) {
+        double x1[2] = {0.0};
+        iso_map(A0, A1, qp.qp[i], x1);
+        double x2_i[2] = {0.0};
+        find_intersection(B0, B1, x1, nB, x2_i);
+        x2[2*i] = x2_i[0];
+        x2[2*i+1] = x2_i[1];
+    }
+
+
+    Gparams gp;
+    gp.N = N;
+    gp.qp = qp.qp.data();
+    gp.w = qp.w.data();
+    gp.x2 =x2.data();
+
+    grad_A1(x, &gp, dA1_dx);
+    grad_A2(x, &gp, dA2_dx);
 
 }
 
+
 std::array<double, 8> ContactEvaluator::compute_contact_forces(const Mesh& mesh, const Element& A, const Element& B) const {
     double dg_tilde1[8] = {0.0};
     double dg_tilde2[8] = {0.0};
+    double dA1[8] = {0.0};
+    double dA2[8] = {0.0};
+    std::array<double*, 2> dg_t;
+    std::array<double*, 2> dA_I;
+    dg_t = {dg_tilde1, dg_tilde2};
+    dA_I = {dA1, dA2};
+
+
 
     grad_gtilde(mesh, A, B, dg_tilde1, dg_tilde2);
+    grad_trib_area(mesh, A, B, dA1, dA2); 
 
     NodalContactData ncd;
-
     ncd = compute_nodal_contact_data(mesh, A, B);
+    // std::cout << "A: " << ncd.AI[0] << ", " << ncd.AI[1] << std::endl;
+    // std::cout << "g: " << ncd.g_tilde[0] << ", " << ncd.g_tilde[1] << std::endl;
+
+
+    std::array<double, 2> pressures;
+    pressures = compute_pressures(ncd);
+    // std::cout << "Pressures: " << pressures[0] << ", " << pressures[1] << std::endl;
 
     std::array<double,  8> f = {0.0};
 
     for(int i = 0; i < 8; ++i) {
-        f[i] = 2.0 * (ncd.pressures[0] * dg_tilde1[i] + ncd.pressures[1] * dg_tilde2[i]);
+        for (int j = 0; j < 2; ++j) {
+            double g = 0.0;
+            g = ncd.g_tilde[j] / ncd.AI[j];
+            if (ncd.AI[j] < 1e-12) {
+                g = 0.0;
+            }
+            f[i] += (2*pressures[j]*dg_t[j][i] - pressures[j] * g * dA_I[j][i]);
+            
+        }
     }
-
     return f;
 }
 
@@ -744,8 +841,113 @@ void ContactEvaluator::d2_g2tilde(const Mesh& mesh, const Element& A, const Elem
     }
 }
 
+void ContactEvaluator::compute_d2A_d2u(const Mesh& mesh, const Element& A, const Element& B, double d2A1[64], double d2A2[64]) const {
+    double A0[2], A1[2], B0[2], B1[2];
+
+    endpoints(mesh, A, A0, A1);
+    endpoints(mesh, B, B0, B1);
+
+    double x[8] = {A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1]};
+
+    double nB[2], nA[2];
+
+    find_normal(B0, B1, nB);
+    find_normal(A0, A1, nA);
+
+    double dot = nB[0] * nA[0] + nB[1] * nA[1];
+
+    auto projs = projections(mesh, A, B);
+    auto bounds = smoother_.bounds_from_projections(projs);
+    auto smooth_bounds = smoother_.smooth_bounds(bounds);
+
+    auto qp = compute_quadrature(smooth_bounds);
+
+    const int N = static_cast<int>(qp.qp.size());
+    std::vector<double> x2(2 * N);
+
+    for (int i = 0; i < N; ++i) {
+        double x1[2] = {0.0};
+        iso_map(A0, A1, qp.qp[i], x1);
+        double x2_i[2] = {0.0};
+        find_intersection(B0, B1, x1, nB, x2_i);
+        x2[2*i] = x2_i[0];
+        x2[2*i+1] = x2_i[1];
+    }
+
+        Gparams gp;
+    gp.N = N;
+    gp.qp = qp.qp.data();
+    gp.w = qp.w.data();
+    gp.x2 =x2.data();
+
+    double d2A1_d2u[64] = {0.0};
+    double d2A2_d2u[64] = {0.0};
+
+    get_d2A1(x, &gp, d2A1_d2u);
+    get_d2A2(x, &gp, d2A2_d2u);
+
+    for (int i = 0; i < 64; ++i) {
+        d2A1[i] = d2A1_d2u[i];
+        d2A2[i] = d2A2_d2u[i];
+    }
+}
+
+std::array<std::array<double, 8>, 8> ContactEvaluator::compute_stiffness_matrix(const Mesh &mesh, const Element& A, const Element& B) const {
+    NodalContactData ncd;
+    ncd = compute_nodal_contact_data(mesh, A , B); 
+
+    std::array<double, 2> gI;
+    for (int i = 0; i < 2; ++i) {
+        gI[i] = ncd.g_tilde[i] / ncd.AI[i];
+    }
+
+    double dg_tilde1[8], dg_tilde2[8], dAI1[8], dAI2[8];
+
+    grad_gtilde(mesh, A, B, dg_tilde1, dg_tilde2); 
+    grad_trib_area(mesh, A, B, dAI1, dAI2);
+
+    double d2_gtilde1[64], d2_gtilde2[64], d2_dA1[64], d2_dA2[64];
+
+    d2_g2tilde(mesh, A, B, d2_gtilde1, d2_gtilde2); 
+    compute_d2A_d2u(mesh, A, B, d2_dA1, d2_dA2); 
+
+    std::array<double*, 2> dg_t = {dg_tilde1, dg_tilde2};
+    std::array<double*, 2> dA = {dAI1, dAI2};
 
+    std::array<double*, 2> ddg_t = {d2_gtilde1, d2_gtilde2};
+    std::array<double*, 2> ddA = {d2_dA1, d2_dA2};
 
+    std::array<std::array<double, 8>, 8> K_mat = {0.0};
+
+    for (int i = 0; i < 2; ++i) {
+        for (int k = 0; k < 8; ++k) {
+            for (int j = 0; j < 8; ++j) {
+                //term 1: 
+                K_mat[k][j] += p_.k*(2 / ncd.AI[i]) * dg_t[i][k] * dg_t[i][j];
+
+                //term2:
+                K_mat[k][j] += -p_.k*(2 * gI[i] / ncd.AI[i]) * dg_t[i][k] * dA[i][j];
+
+                //term3:
+                K_mat[k][j] += -p_.k*(2 * gI[i] / ncd.AI[i]) * dA[i][k] * dg_t[i][j];
+
+                //term 4:
+                K_mat[k][j] += p_.k*(2 *gI[i]*gI[i] / ncd.AI[i]) * dA[i][k] * dA[i][j];
+
+                //term 5;
+                K_mat[k][j] += p_.k*2.0 * gI[i] * ddg_t[i][k*8 + j];
+
+                //term 6:
+                K_mat[k][j] += -p_.k*gI[i]*gI[i] * ddA[i][k*8 + j];
+
+                if (ncd.AI[i] < 1e-12) {
+                    K_mat[k][j] = 0.0;
+                }
+        }
+    }
+    }
+    return K_mat;
+}
 
 
 
@@ -753,8 +955,10 @@ std::pair<double, double> ContactEvaluator::eval_gtilde(const Mesh& mesh, const
     NodalContactData ncd = compute_nodal_contact_data(mesh, A, B);
     double gt1 = ncd.g_tilde[0];
     double gt2 = ncd.g_tilde[1];
+    double A1 = ncd.AI[0];
+    double A2 = ncd.AI[1];
 
-    return {gt1, gt2};
+    return {A1, A2};
 }
 
 
@@ -782,8 +986,8 @@ ContactEvaluator::eval_gtilde_fixed_qp(Mesh& mesh,
         const double gn = gap(mesh, A, B, xiA);   // still depends on geometry
         const double gn_active = gn;              // or your (gn<0?gn:0) logic
 
-        gt1 += w * N1 * gn_active * J;
-        gt2 += w * N2 * gn_active * J;
+        gt1 += w * N1  * J;
+        gt2 += w * N2 * J;
     }
 
     return {gt1, gt2};
@@ -823,7 +1027,7 @@ FiniteDiffResult ContactEvaluator::validate_g_tilde(Mesh& mesh, const Element& A
     // ===== GET AND REORDER ENZYME GRADIENTS =====
     double dgt1_dx[8] = {0.0};
     double dgt2_dx[8] = {0.0};
-    grad_gtilde(mesh, A, B, dgt1_dx, dgt2_dx);
+    grad_trib_area(mesh, A, B, dgt1_dx, dgt2_dx);
     
     // Map from node_id to position in x[8]
     std::map<int, int> node_to_x_idx;
@@ -910,8 +1114,8 @@ void ContactEvaluator::grad_gtilde_with_qp(const Mesh& mesh, const Element& A, c
     gp.qp = qp_fixed.qp.data();  // Use FIXED quadrature
     gp.w = qp_fixed.w.data();
     
-    grad_gtilde1(x, &gp, dgt1_dx);
-    grad_gtilde2(x, &gp, dgt2_dx);
+    grad_A1(x, &gp, dgt1_dx);
+    grad_A2(x, &gp, dgt2_dx);
 }
 
 FiniteDiffResult ContactEvaluator::validate_hessian(Mesh& mesh, const Element& A, const Element& B, double epsilon) const {
@@ -923,7 +1127,7 @@ FiniteDiffResult ContactEvaluator::validate_hessian(Mesh& mesh, const Element& A
     QuadPoints qp0 = compute_quadrature(smooth_bounds0);
     double hess1[64] = {0.0};
     double hess2[64] = {0.0};
-    d2_g2tilde(mesh, A, B, hess1, hess2);
+    compute_d2A_d2u(mesh, A, B, hess1, hess2);
 
     const int ndof = 8;
     result.fd_gradient_g1.assign(ndof*ndof, 0.0);
diff --git a/src/tribol/physics/new_method.hpp b/src/tribol/physics/new_method.hpp
index 63ef44c2..6ce4dad7 100644
--- a/src/tribol/physics/new_method.hpp
+++ b/src/tribol/physics/new_method.hpp
@@ -32,7 +32,7 @@ struct ContactParams {
 };
 
 struct NodalContactData {
-    std::array<double, 2> pressures;
+    std::array<double, 2> AI;
     std::array<double, 2> g_tilde;
 };
 
@@ -75,13 +75,21 @@ class ContactEvaluator {
                       const Element& B) const;
 
         void grad_gtilde(const Mesh& mesh, const Element& A, const Element& B,
-                         double dgt1_dx[64], double dgt2_dx[64]) const;
+                         double dgt1_dx[8], double dgt2_dx[8]) const;
+
+        void grad_trib_area(const Mesh& mesh, const Element& A, const Element& B, 
+                            double dA1_dx[8], double dA2_dx[8]) const; 
 
         void d2_g2tilde(const Mesh& mesh, const Element& A, const Element& B,
-                        double dgt1_dx[8], double dgt2_dx[8]) const;
+                        double dgt1_dx[64], double dgt2_dx[64]) const;
+
+        void compute_d2A_d2u(const Mesh& mesh, const Element& A, const Element& B,
+                             double dgt1_dx[64], double dgt2_dx[64]) const;
 
         std::array<double, 8> compute_contact_forces(const Mesh& mesh, const Element& A, const Element& B) const;
 
+        std::array<std::array<double, 8>, 8> compute_stiffness_matrix(const Mesh& mesh, const Element& A, const Element& B) const;
+
         std::pair<double, double> eval_gtilde(const Mesh& mesh,
                                               const Element& A, 
                                               const Element& B) const;
@@ -127,9 +135,8 @@ class ContactEvaluator {
                                                          const Element& A, 
                                                          const Element& B) const; 
         
-
+        std::array<double, 2> compute_pressures(const NodalContactData& ncd) const;
         
 
-
                                        
 };

From 0b464dd501e2dd687e0f8b1d5bfcb2a57264d59a Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Wed, 28 Jan 2026 23:12:36 -0800
Subject: [PATCH 04/56] add base class

---
 src/tribol/CMakeLists.txt                 |   1 +
 src/tribol/physics/ContactFormulation.hpp | 190 ++++++++++++++++++++++
 2 files changed, 191 insertions(+)
 create mode 100644 src/tribol/physics/ContactFormulation.hpp

diff --git a/src/tribol/CMakeLists.txt b/src/tribol/CMakeLists.txt
index a3371b11..b5764512 100644
--- a/src/tribol/CMakeLists.txt
+++ b/src/tribol/CMakeLists.txt
@@ -40,6 +40,7 @@ set(tribol_headers
 
     physics/AlignedMortar.hpp
     physics/CommonPlane.hpp
+    physics/ContactFormulation.hpp
     physics/Mortar.hpp
     physics/Physics.hpp
     physics/new_method.hpp
diff --git a/src/tribol/physics/ContactFormulation.hpp b/src/tribol/physics/ContactFormulation.hpp
new file mode 100644
index 00000000..6da0688b
--- /dev/null
+++ b/src/tribol/physics/ContactFormulation.hpp
@@ -0,0 +1,190 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#ifndef SRC_TRIBOL_PHYSICS_CONTACTFORMULATION_HPP_
+#define SRC_TRIBOL_PHYSICS_CONTACTFORMULATION_HPP_
+
+#include "tribol/common/Parameters.hpp"
+#include "tribol/common/ArrayTypes.hpp"
+#include "tribol/mesh/InterfacePairs.hpp"
+#include "tribol/mesh/MeshData.hpp"
+
+#include <memory>
+
+// Forward declarations for MFEM types
+namespace mfem {
+class Vector;
+class HypreParMatrix;
+class ParGridFunction;
+}
+
+namespace tribol {
+
+// Forward declaration
+class MethodData;
+
+/*!
+ * \brief Base class for contact formulations.
+ *
+ * This class provides a polymorphic interface for contact algorithms,
+ * allowing for modular implementation of new physics and formulations.
+ */
+class ContactFormulation {
+ public:
+  /**
+   * @brief Constructor
+   *
+   * @param mesh1 Reference to the first mesh
+   * @param mesh2 Reference to the second mesh
+   */
+  ContactFormulation( MeshData& mesh1, MeshData& mesh2 )
+      : mesh1_( mesh1 ), mesh2_( mesh2 )
+  {
+  }
+
+  /**
+   * @brief Virtual destructor
+   */
+  virtual ~ContactFormulation() = default;
+
+  /**
+   * @brief Checks if the necessary data is defined on the meshes
+   *
+   * @note This is a static interface method that should be implemented by
+   *       derived classes to validate input before instantiation.
+   *
+   * @param mesh1 Reference to the first mesh
+   * @param mesh2 Reference to the second mesh
+   * @param params Coupling scheme parameters
+   * @return 0 if valid, non-zero error code otherwise
+   */
+  static int checkData( MeshData& mesh1, MeshData& mesh2, const Parameters& params );
+
+  /**
+   * @brief Sets the initial set of candidate interface pairs
+   *
+   * @param pairs View of the coarse-binned interface pairs
+   */
+  virtual void setInterfacePairs( ArrayViewT<InterfacePair> pairs ) = 0;
+
+  /**
+   * @brief Updates the integration rule
+   *
+   * Determines the active set of contact pairs and computes necessary
+   * integration data (e.g. quadrature points, weights).
+   */
+  virtual void updateIntegrationRule() = 0;
+
+  /**
+   * @brief Updates nodal gaps
+   *
+   * @note Requires initialize() to be called first to register meshes.
+   */
+  virtual void updateNodalGaps() = 0;
+
+  /**
+   * @brief Updates nodal forces/residual
+   *
+   * @note Requires initialize() to be called first to register meshes.
+   */
+  virtual void updateNodalForces() = 0;
+
+  /**
+   * @brief Updates nodal energies
+   *
+   * @note Requires initialize() to be called first to register meshes.
+   */
+  virtual void updateNodalEnergies() = 0;
+
+  /**
+   * @brief Computes the maximum allowable timestep for the formulation
+   *
+   * @return maximum allowable timestep
+   */
+  virtual RealT computeTimeStep() = 0;
+
+  /**
+   * @brief Get read-only view of computed nodal gaps
+   *
+   * @return ArrayViewT<const RealT> View of gaps
+   */
+  virtual ArrayViewT<const RealT> getGaps() const = 0;
+
+  /**
+   * @brief Get read-only view of computed nodal forces
+   *
+   * @return ArrayViewT<const RealT> View of forces
+   */
+  virtual ArrayViewT<const RealT> getForces() const = 0;
+
+  /**
+   * @brief Get read-only view of pressures
+   *
+   * @return ArrayViewT<const RealT> View of pressures
+   */
+  virtual ArrayViewT<const RealT> getPressure() const = 0;
+
+  /**
+   * @brief Get pointer to Jacobian data
+   *
+   * @return MethodData* Pointer to method data containing Jacobian
+   */
+  virtual MethodData* getJacobian() const = 0;
+
+#ifdef BUILD_REDECOMP
+  /**
+   * @brief Adds computed forces to the provided MFEM vector
+   *
+   * @param [in,out] forces MFEM vector to add forces to
+   */
+  virtual void getMfemForces( mfem::Vector& forces ) const = 0;
+
+  /**
+   * @brief Populates the provided MFEM vector with gap values
+   *
+   * Resizes the vector if necessary, zeros it out, and sets gap values.
+   *
+   * @param [out] gaps MFEM vector to store gaps in
+   */
+  virtual void getMfemGap( mfem::Vector& gaps ) const = 0;
+
+  /**
+   * @brief Returns a reference to the MFEM pressure grid function
+   *
+   * @return mfem::ParGridFunction& Reference to the pressure grid function
+   */
+  virtual mfem::ParGridFunction& getMfemPressure() = 0;
+
+  /**
+   * @brief Get the derivative of force with respect to displacement
+   *
+   * @return Unique pointer to MFEM HypreParMatrix
+   */
+  virtual std::unique_ptr<mfem::HypreParMatrix> getMfemDfDx() const = 0;
+
+  /**
+   * @brief Get the derivative of gap with respect to displacement
+   *
+   * @return Unique pointer to MFEM HypreParMatrix
+   */
+  virtual std::unique_ptr<mfem::HypreParMatrix> getMfemDgDx() const = 0;
+
+  /**
+   * @brief Get the derivative of force with respect to pressure
+   *
+   * @return Unique pointer to MFEM HypreParMatrix
+   */
+  virtual std::unique_ptr<mfem::HypreParMatrix> getMfemDfDp() const = 0;
+#endif
+
+ protected:
+  MeshData& mesh1_;        ///< Reference to the first mesh
+  MeshData& mesh2_;        ///< Reference to the second mesh
+
+};
+
+} // namespace tribol
+
+#endif /* SRC_TRIBOL_PHYSICS_CONTACTFORMULATION_HPP_ */

From 16d243271c2f0c6af6ec2afe6578fe682b45ee02 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Thu, 29 Jan 2026 12:56:17 -0800
Subject: [PATCH 05/56] initial templated mortar class

---
 src/tribol/physics/ContactFormulation.hpp |  98 ++++-------------
 src/tribol/physics/MortarFormulation.cpp  |  82 ++++++++++++++
 src/tribol/physics/MortarFormulation.hpp  | 127 ++++++++++++++++++++++
 3 files changed, 232 insertions(+), 75 deletions(-)
 create mode 100644 src/tribol/physics/MortarFormulation.cpp
 create mode 100644 src/tribol/physics/MortarFormulation.hpp

diff --git a/src/tribol/physics/ContactFormulation.hpp b/src/tribol/physics/ContactFormulation.hpp
index 6da0688b..c32b3b7f 100644
--- a/src/tribol/physics/ContactFormulation.hpp
+++ b/src/tribol/physics/ContactFormulation.hpp
@@ -9,7 +9,6 @@
 #include "tribol/common/Parameters.hpp"
 #include "tribol/common/ArrayTypes.hpp"
 #include "tribol/mesh/InterfacePairs.hpp"
-#include "tribol/mesh/MeshData.hpp"
 
 #include <memory>
 
@@ -18,7 +17,7 @@ namespace mfem {
 class Vector;
 class HypreParMatrix;
 class ParGridFunction;
-}
+}  // namespace mfem
 
 namespace tribol {
 
@@ -33,71 +32,43 @@ class MethodData;
  */
 class ContactFormulation {
  public:
-  /**
-   * @brief Constructor
-   *
-   * @param mesh1 Reference to the first mesh
-   * @param mesh2 Reference to the second mesh
-   */
-  ContactFormulation( MeshData& mesh1, MeshData& mesh2 )
-      : mesh1_( mesh1 ), mesh2_( mesh2 )
-  {
-  }
-
   /**
    * @brief Virtual destructor
    */
   virtual ~ContactFormulation() = default;
 
-  /**
-   * @brief Checks if the necessary data is defined on the meshes
-   *
-   * @note This is a static interface method that should be implemented by
-   *       derived classes to validate input before instantiation.
-   *
-   * @param mesh1 Reference to the first mesh
-   * @param mesh2 Reference to the second mesh
-   * @param params Coupling scheme parameters
-   * @return 0 if valid, non-zero error code otherwise
-   */
-  static int checkData( MeshData& mesh1, MeshData& mesh2, const Parameters& params );
-
   /**
    * @brief Sets the initial set of candidate interface pairs
    *
    * @param pairs View of the coarse-binned interface pairs
+   * @param check_level In general, higher values mean more checks and 0 means don't do checks. See specific methods for
+   * details.
    */
-  virtual void setInterfacePairs( ArrayViewT<InterfacePair> pairs ) = 0;
+  virtual void setInterfacePairs( ArrayT<InterfacePair>&& pairs, int check_level ) = 0;
 
   /**
    * @brief Updates the integration rule
    *
-   * Determines the active set of contact pairs and computes necessary
-   * integration data (e.g. quadrature points, weights).
+   * Determines overlapping contact pairs and computes necessary integration data (e.g. quadrature points, weights).
+   *
+   * @note Requires setInterfacePairs() to be called first.
    */
   virtual void updateIntegrationRule() = 0;
 
   /**
    * @brief Updates nodal gaps
    *
-   * @note Requires initialize() to be called first to register meshes.
+   * @note Requires updateIntegrationRule() to be called first.
    */
   virtual void updateNodalGaps() = 0;
 
   /**
    * @brief Updates nodal forces/residual
    *
-   * @note Requires initialize() to be called first to register meshes.
+   * @note Requires updateNodalGaps() to be called first.
    */
   virtual void updateNodalForces() = 0;
 
-  /**
-   * @brief Updates nodal energies
-   *
-   * @note Requires initialize() to be called first to register meshes.
-   */
-  virtual void updateNodalEnergies() = 0;
-
   /**
    * @brief Computes the maximum allowable timestep for the formulation
    *
@@ -105,41 +76,15 @@ class ContactFormulation {
    */
   virtual RealT computeTimeStep() = 0;
 
-  /**
-   * @brief Get read-only view of computed nodal gaps
-   *
-   * @return ArrayViewT<const RealT> View of gaps
-   */
-  virtual ArrayViewT<const RealT> getGaps() const = 0;
-
-  /**
-   * @brief Get read-only view of computed nodal forces
-   *
-   * @return ArrayViewT<const RealT> View of forces
-   */
-  virtual ArrayViewT<const RealT> getForces() const = 0;
-
-  /**
-   * @brief Get read-only view of pressures
-   *
-   * @return ArrayViewT<const RealT> View of pressures
-   */
-  virtual ArrayViewT<const RealT> getPressure() const = 0;
-
-  /**
-   * @brief Get pointer to Jacobian data
-   *
-   * @return MethodData* Pointer to method data containing Jacobian
-   */
-  virtual MethodData* getJacobian() const = 0;
-
 #ifdef BUILD_REDECOMP
   /**
    * @brief Adds computed forces to the provided MFEM vector
    *
    * @param [in,out] forces MFEM vector to add forces to
+   *
+   * @note Requires updateNodalForces() to be called first.
    */
-  virtual void getMfemForces( mfem::Vector& forces ) const = 0;
+  virtual void getMfemForce( mfem::Vector& forces ) const = 0;
 
   /**
    * @brief Populates the provided MFEM vector with gap values
@@ -147,6 +92,8 @@ class ContactFormulation {
    * Resizes the vector if necessary, zeros it out, and sets gap values.
    *
    * @param [out] gaps MFEM vector to store gaps in
+   *
+   * @note Requires updateNodalGaps() to be called first.
    */
   virtual void getMfemGap( mfem::Vector& gaps ) const = 0;
 
@@ -161,6 +108,8 @@ class ContactFormulation {
    * @brief Get the derivative of force with respect to displacement
    *
    * @return Unique pointer to MFEM HypreParMatrix
+   *
+   * @note Requires updateNodalForces() to be called first.
    */
   virtual std::unique_ptr<mfem::HypreParMatrix> getMfemDfDx() const = 0;
 
@@ -168,23 +117,22 @@ class ContactFormulation {
    * @brief Get the derivative of gap with respect to displacement
    *
    * @return Unique pointer to MFEM HypreParMatrix
+   *
+   * @note Requires updateNodalGaps() to be called first.
    */
   virtual std::unique_ptr<mfem::HypreParMatrix> getMfemDgDx() const = 0;
 
   /**
    * @brief Get the derivative of force with respect to pressure
    *
-   * @return Unique pointer to MFEM HypreParMatrix
+   * @return Unique pointer to mfem::HypreParMatrix
+   *
+   * @note Requires updateNodalForces() to be called first.
    */
   virtual std::unique_ptr<mfem::HypreParMatrix> getMfemDfDp() const = 0;
 #endif
-
- protected:
-  MeshData& mesh1_;        ///< Reference to the first mesh
-  MeshData& mesh2_;        ///< Reference to the second mesh
-
 };
 
-} // namespace tribol
+}  // namespace tribol
 
-#endif /* SRC_TRIBOL_PHYSICS_CONTACTFORMULATION_HPP_ */
+#endif /* SRC_TRIBOL_PHYSICS_CONTACTFORMULATION_HPP_ */
\ No newline at end of file
diff --git a/src/tribol/physics/MortarFormulation.cpp b/src/tribol/physics/MortarFormulation.cpp
new file mode 100644
index 00000000..9f43f14f
--- /dev/null
+++ b/src/tribol/physics/MortarFormulation.cpp
@@ -0,0 +1,82 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#include "tribol/physics/MortarFormulation.hpp"
+
+namespace tribol {
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+void MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::setInterfacePairs(
+    ArrayT<InterfacePair>&& pairs, int check_level )
+{
+  integration_rule_.template findPairsInContact<PointwiseGapAndNormal>( std::move( pairs ), check_level );
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+void MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::updateIntegrationRule()
+{
+  integration_rule_.template updateRule<PointwiseGapAndNormal>();
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+void MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::updateNodalGaps()
+{
+  force_and_gap_method_.template updateNodalGaps<PointwiseGapAndNormal>( integration_rule_.getRule() );
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+void MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::updateNodalForces()
+{
+  force_and_gap_method_.template updateNodalForces<PointwiseGapAndNormal>( integration_rule_.getRule() );
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+RealT MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::computeTimeStep()
+{
+  force_and_gap_method_.computeTimeStep( integration_rule_.getRule() );
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+void MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::getMfemForce(
+    mfem::Vector& forces ) const
+{
+  force_and_gap_method_.getMfemForce( forces );
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+void MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::getMfemGap(
+    mfem::Vector& gaps ) const
+{
+  force_and_gap_method_.getMfemGap( gaps );
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+mfem::ParGridFunction& MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::getMfemPressure()
+{
+  return force_and_gap_method_.getMfemPressure();
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+std::unique_ptr<mfem::HypreParMatrix>
+MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::getMfemDfDx() const
+{
+  return force_and_gap_method_.getMfemDfDx();
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+std::unique_ptr<mfem::HypreParMatrix>
+MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::getMfemDgDx() const
+{
+  return force_and_gap_method_.getMfemDgDx();
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+std::unique_ptr<mfem::HypreParMatrix>
+MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::getMfemDfDp() const
+{
+  return force_and_gap_method_.getMfemDfDp();
+}
+
+}  // namespace tribol
\ No newline at end of file
diff --git a/src/tribol/physics/MortarFormulation.hpp b/src/tribol/physics/MortarFormulation.hpp
new file mode 100644
index 00000000..30c16c64
--- /dev/null
+++ b/src/tribol/physics/MortarFormulation.hpp
@@ -0,0 +1,127 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#ifndef SRC_TRIBOL_PHYSICS_MORTARFORMULATION_HPP_
+#define SRC_TRIBOL_PHYSICS_MORTARFORMULATION_HPP_
+
+#include "tribol/physics/ContactFormulation.hpp"
+
+namespace tribol {
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+class MortarFormulation : public ContactFormulation {
+ public:
+  MortarFormulation( IntegrationRule&& integration_rule, PointwiseGapAndNormal&& pointwise_gap_and_normal,
+                     ForceAndGapMethod&& force_and_gap_method )
+      : integration_rule_( std::move( integration_rule ) ),
+        pointwise_gap_and_normal_( std::move( pointwise_gap_and_normal ) ),
+        force_and_gap_method_( std::move( force_and_gap_method ) )
+  {
+  }
+
+  /**
+   * @brief Sets the initial set of candidate interface pairs
+   *
+   * @param pairs View of the coarse-binned interface pairs
+   * @param check_level In general, higher values mean more checks and 0 means don't do checks. See
+   * IntegrationRule::findPairsInContact() for details.
+   */
+  void setInterfacePairs( ArrayT<InterfacePair>&& pairs, int check_level ) override;
+
+  /**
+   * @brief Updates the integration rule
+   *
+   * Determines overlapping contact pairs and computes necessary integration data (e.g. quadrature points, weights).
+   *
+   * @note Requires setInterfacePairs() to be called first.
+   */
+  void updateIntegrationRule() override;
+
+  /**
+   * @brief Updates nodal gaps
+   *
+   * @note Requires updateIntegrationRule() to be called first.
+   */
+  void updateNodalGaps() override;
+
+  /**
+   * @brief Updates nodal forces/residual
+   *
+   * @note Requires updateNodalGaps() to be called first.
+   */
+  void updateNodalForces() override;
+
+  /**
+   * @brief Computes the maximum allowable timestep for the formulation
+   *
+   * @return maximum allowable timestep
+   */
+  RealT computeTimeStep() override;
+
+#ifdef BUILD_REDECOMP
+  /**
+   * @brief Adds computed forces to the provided MFEM vector
+   *
+   * @param [in,out] forces MFEM vector to add forces to
+   *
+   * @note Requires updateNodalForces() to be called first.
+   */
+  void getMfemForce( mfem::Vector& forces ) const override;
+
+  /**
+   * @brief Populates the provided MFEM vector with gap values
+   *
+   * Resizes the vector if necessary, zeros it out, and sets gap values.
+   *
+   * @param [out] gaps MFEM vector to store gaps in
+   *
+   * @note Requires updateNodalGaps() to be called first.
+   */
+  void getMfemGap( mfem::Vector& gaps ) const override;
+
+  /**
+   * @brief Returns a reference to the MFEM pressure grid function
+   *
+   * @return mfem::ParGridFunction& Reference to the pressure grid function
+   */
+  mfem::ParGridFunction& getMfemPressure() override;
+
+  /**
+   * @brief Get the derivative of force with respect to displacement
+   *
+   * @return Unique pointer to MFEM HypreParMatrix
+   *
+   * @note Requires updateNodalForces() to be called first.
+   */
+  std::unique_ptr<mfem::HypreParMatrix> getMfemDfDx() const = 0;
+
+  /**
+   * @brief Get the derivative of gap with respect to displacement
+   *
+   * @return Unique pointer to MFEM HypreParMatrix
+   *
+   * @note Requires updateNodalGaps() to be called first.
+   */
+  std::unique_ptr<mfem::HypreParMatrix> getMfemDgDx() const override;
+
+  /**
+   * @brief Get the derivative of force with respect to pressure
+   *
+   * @return Unique pointer to mfem::HypreParMatrix
+   *
+   * @note Requires updateNodalForces() to be called first.
+   */
+  std::unique_ptr<mfem::HypreParMatrix> getMfemDfDp() const override;
+#endif
+
+ private:
+  IntegrationRule integration_rule_;
+  PointwiseGapAndNormal pointwise_gap_and_normal_;
+  ForceAndGapMethod force_and_gap_method_;
+};
+
+}  // namespace tribol
+
+#endif /* SRC_TRIBOL_PHYSICS_MORTARFORMULATION_HPP_ */
\ No newline at end of file

From ac22d6508d743713f92972a04adcb313d8d2276e Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Mon, 2 Feb 2026 14:45:41 -0800
Subject: [PATCH 06/56] use tribol::MeshData and fix warnings

---
 src/tribol/physics/new_method.cpp | 742 +++++++++++++++---------------
 src/tribol/physics/new_method.hpp |  55 +--
 2 files changed, 397 insertions(+), 400 deletions(-)

diff --git a/src/tribol/physics/new_method.cpp b/src/tribol/physics/new_method.cpp
index f1ca825d..73a58971 100644
--- a/src/tribol/physics/new_method.cpp
+++ b/src/tribol/physics/new_method.cpp
@@ -14,6 +14,8 @@
 #include <set>
 #include <map>
 
+namespace tribol {
+
 namespace {
 
     struct Gparams {
@@ -84,12 +86,12 @@ void determine_legendre_nodes(int N, std::vector<double>& x)
     mapped_coord[1] =  N1 * coord1[1] + N2 * coord2[1];
 }
 
-inline void endpoints(const Mesh& mesh, const Element& e, double P0[2], double P1[2])
+inline void endpoints(const MeshData::Viewer& mesh, int elem_id, double P0[2], double P1[2])
 {
-    const Node& n0 = mesh.node(e.node_ids[0]);
-    const Node& n1 = mesh.node(e.node_ids[1]);
-    P0[0] = n0.x; P0[1] = n0.y;
-    P1[0] = n1.x; P1[1] = n1.y;
+    double P0_P1[4];
+    mesh.getFaceCoords(elem_id, P0_P1);
+    P0[0] = P0_P1[0]; P0[1] = P0_P1[1];
+    P1[0] = P0_P1[2]; P1[1] = P0_P1[3];
 }
 
 void find_intersection(const double* A0, const double* A1,
@@ -162,7 +164,7 @@ void get_projections(const double* A0, const double* A1,
 }
 
 
-    static void gtilde_kernel(const double* x, const Gparams* gp, double* g_tilde_out, double* A_out) {
+    void gtilde_kernel(const double* x, const Gparams* gp, double* g_tilde_out, double* A_out) {
         const double A0[2] = {x[0], x[1]};
         const double A1[2] = {x[2], x[3]};
         const double B0[2] = {x[4], x[5]};
@@ -373,21 +375,16 @@ void get_d2A2(const double* x, const Gparams* gp, double* H1) {
 
 }
 
-std::array<double, 2> ContactEvaluator::projections(const Mesh& mesh, 
-                                                    const Element& A,
-                                                    const Element& B) const {
-    const Node& A0 = mesh.node(A.node_ids[0]);
-    const Node& A1 = mesh.node(A.node_ids[1]);
-    const Node& B0 = mesh.node(B.node_ids[0]);
-    const Node& B1 = mesh.node(B.node_ids[1]);
-
-    double A0_arr[2] = {A0.x, A0.y};
-    double A1_arr[2] = {A1.x, A1.y};
-    double B0_arr[2] = {B0.x, B0.y};
-    double B1_arr[2] = {B1.x, B1.y};
+std::array<double, 2> ContactEvaluator::projections(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const {
+    double A0[2];
+    double A1[2];
+    endpoints(mesh1, pair.m_element_id1, A0, A1);
+    double B0[2];
+    double B1[2];
+    endpoints(mesh2, pair.m_element_id2, B0, B1);
 
     double projs[2];
-    get_projections(A0_arr, A1_arr, B0_arr, B1_arr, projs);
+    get_projections(A0, A1, B0, B1, projs);
 
     // std::cout << "Projections: " << projs[0] << ", " << projs[1] << std::endl;
     return {projs[0], projs[1]};
@@ -480,11 +477,11 @@ QuadPoints ContactEvaluator::compute_quadrature(const std::array<double, 2>& xi_
     return out;
 }
 
-double ContactEvaluator::gap(const Mesh& mesh, const Element& A, const Element& B, double xiA) const {
+double ContactEvaluator::gap(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, double xiA) const {
     double A0[2], A1[2], B0[2], B1[2];
 
-    endpoints(mesh, A, A0, A1);
-    endpoints(mesh, B, B0, B1);
+    endpoints(mesh1, pair.m_element_id1, A0, A1);
+    endpoints(mesh2, pair.m_element_id2, B0, B1);
 
     double nA[2] = {0.0};
     double nB[2] = {0.0};
@@ -514,16 +511,17 @@ double ContactEvaluator::gap(const Mesh& mesh, const Element& A, const Element&
 }
 
 
-NodalContactData ContactEvaluator::compute_nodal_contact_data(const Mesh& mesh, const Element& A, const Element& B) const {
-    double A0[2], A1[2];
-    endpoints(mesh, A, A0, A1);
+NodalContactData ContactEvaluator:: compute_nodal_contact_data(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const {
+    double A0[2];
+    double A1[2];
+    endpoints(mesh1, pair.m_element_id1, A0, A1);
 
     double J = std::sqrt((std::pow((A1[0] - A0[0]),2) + std::pow((A1[1] - A0[1]),2)));
     double J_ref = std::sqrt(std::pow(A1[0] - A0[0], 2) + 
                              std::pow(A1[1] - A0[1], 2));
     // double J_ref = std::sqrt((std::pow((1.0 - 0.0), 2) + std::pow((-0.5 + 0.5), 2)));
 
-    auto projs = projections(mesh, A, B);
+    auto projs = projections(pair, mesh1, mesh2);
 
     auto bounds = smoother_.bounds_from_projections(projs);
     auto smooth_bounds = smoother_.smooth_bounds(bounds);
@@ -560,14 +558,14 @@ NodalContactData ContactEvaluator::compute_nodal_contact_data(const Mesh& mesh,
         // double N1_full = 0.5 - xiA_full;
         // double N2_full = 0.5 + xiA_full;
 
-        double  gn = gap(mesh, A, B, xiA);
+        double  gn = gap(pair, mesh1, mesh2, xiA);
         // double gn_active = (gn < 0.0) ? gn : 0.0;
         double gn_active = gn;
         // std::cout << "gap: " << gn << std::endl;
 
         g_tilde1 += w * N1 * gn_active * J;
         g_tilde2 += w * N2 * gn_active * J;
-        double G = g_tilde1 + g_tilde2; 
+        // double G = g_tilde1 + g_tilde2; 
         // std::cout << "G: " << G << std::endl;
 
         // std::cout << "G~1: " << g_tilde1 << ", G~2:" << g_tilde2 << std::endl; 
@@ -631,9 +629,9 @@ std::array<double, 2> ContactEvaluator::compute_pressures(const NodalContactData
     return pressures;
 }
 
-double ContactEvaluator::compute_contact_energy(const Mesh& mesh, const Element& A, const Element& B) const {
+double ContactEvaluator::compute_contact_energy(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const {
     NodalContactData contact_data;
-    contact_data = compute_nodal_contact_data(mesh, A, B);
+    contact_data = compute_nodal_contact_data(pair, mesh1, mesh2);
 
     std::array<double, 2> pressures;
     pressures = compute_pressures(contact_data);
@@ -643,12 +641,20 @@ double ContactEvaluator::compute_contact_energy(const Mesh& mesh, const Element&
     return contact_energy;
 }
 
-void ContactEvaluator::grad_gtilde(const Mesh& mesh, const Element& A, const Element& B, double dgt1_dx[8], double dgt2_dx[8]) const {
+void ContactEvaluator::gtilde_and_area(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, double gtilde[2], double area[2]) const {
+    auto ncd = compute_nodal_contact_data(pair, mesh1, mesh2);
+    gtilde[0] = ncd.g_tilde[0];
+    gtilde[1] = ncd.g_tilde[1];
+    area[0] = ncd.AI[0];
+    area[1] = ncd.AI[1];
+}
+
+void ContactEvaluator::grad_gtilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, double dgt1_dx[8], double dgt2_dx[8]) const {
     double A0[2], A1[2], B0[2], B1[2];
     
 
-    endpoints(mesh, A, A0, A1);
-    endpoints(mesh, B, B0, B1);
+    endpoints(mesh1, pair.m_element_id1, A0, A1);
+    endpoints(mesh2, pair.m_element_id2, B0, B1);
 
     double x[8] = {A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1]};
 
@@ -656,10 +662,10 @@ void ContactEvaluator::grad_gtilde(const Mesh& mesh, const Element& A, const Ele
     find_normal(B0, B1, nB);
     find_normal(A0, A1, nA);
 
-    double dot = nB[0] * nA[0] + nB[1] * nA[1];
-    double eta = (dot < 0) ? dot:0.0;
+    // double dot = nB[0] * nA[0] + nB[1] * nA[1];
+    // double eta = (dot < 0) ? dot:0.0;
 
-    auto projs = projections(mesh, A, B);
+    auto projs = projections(pair, mesh1, mesh2);
 
     auto bounds = smoother_.bounds_from_projections(projs);
     auto smooth_bounds = smoother_.smooth_bounds(bounds);
@@ -701,11 +707,11 @@ void ContactEvaluator::grad_gtilde(const Mesh& mesh, const Element& A, const Ele
 }
 }
 
-void ContactEvaluator::grad_trib_area(const Mesh& mesh, const Element& A, const Element& B, double dA1_dx[8], double dA2_dx[8]) const {
+void ContactEvaluator::grad_trib_area(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, double dA1_dx[8], double dA2_dx[8]) const {
     double A0[2], A1[2], B0[2], B1[2];
 
-    endpoints(mesh, A, A0, A1);
-    endpoints(mesh, B, B0, B1);
+    endpoints(mesh1, pair.m_element_id1, A0, A1);
+    endpoints(mesh2, pair.m_element_id2, B0, B1);
 
     double x[8] = {A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1]};
 
@@ -713,10 +719,10 @@ void ContactEvaluator::grad_trib_area(const Mesh& mesh, const Element& A, const
     find_normal(B0, B1, nB);
     find_normal(A0, A1, nA);
 
-    double dot = nB[0] * nA[0] + nB[1] * nA[1];
-    double eta = (dot < 0) ? dot:0.0;
+    // double dot = nB[0] * nA[0] + nB[1] * nA[1];
+    // double eta = (dot < 0) ? dot:0.0;
 
-    auto projs = projections(mesh, A, B);
+    auto projs = projections(pair, mesh1, mesh2);
 
     auto bounds = smoother_.bounds_from_projections(projs);
     auto smooth_bounds = smoother_.smooth_bounds(bounds);
@@ -749,7 +755,7 @@ void ContactEvaluator::grad_trib_area(const Mesh& mesh, const Element& A, const
 }
 
 
-std::array<double, 8> ContactEvaluator::compute_contact_forces(const Mesh& mesh, const Element& A, const Element& B) const {
+std::array<double, 8> ContactEvaluator::compute_contact_forces(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const {
     double dg_tilde1[8] = {0.0};
     double dg_tilde2[8] = {0.0};
     double dA1[8] = {0.0};
@@ -761,11 +767,11 @@ std::array<double, 8> ContactEvaluator::compute_contact_forces(const Mesh& mesh,
 
 
 
-    grad_gtilde(mesh, A, B, dg_tilde1, dg_tilde2);
-    grad_trib_area(mesh, A, B, dA1, dA2); 
+    grad_gtilde(pair, mesh1, mesh2, dg_tilde1, dg_tilde2);
+    grad_trib_area(pair, mesh1, mesh2, dA1, dA2); 
 
     NodalContactData ncd;
-    ncd = compute_nodal_contact_data(mesh, A, B);
+    ncd = compute_nodal_contact_data(pair, mesh1, mesh2);
     // std::cout << "A: " << ncd.AI[0] << ", " << ncd.AI[1] << std::endl;
     // std::cout << "g: " << ncd.g_tilde[0] << ", " << ncd.g_tilde[1] << std::endl;
 
@@ -790,11 +796,11 @@ std::array<double, 8> ContactEvaluator::compute_contact_forces(const Mesh& mesh,
     return f;
 }
 
-void ContactEvaluator::d2_g2tilde(const Mesh& mesh, const Element& A, const Element& B, double H1[64], double H2[64]) const {
+void ContactEvaluator::d2_g2tilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, double H1[64], double H2[64]) const {
     double A0[2], A1[2], B0[2], B1[2];
 
-    endpoints(mesh, A, A0, A1);
-    endpoints(mesh, B, B0, B1);
+    endpoints(mesh1, pair.m_element_id1, A0, A1);
+    endpoints(mesh2, pair.m_element_id2, B0, B1);
 
     double x[8] = {A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1]};
 
@@ -802,10 +808,10 @@ void ContactEvaluator::d2_g2tilde(const Mesh& mesh, const Element& A, const Elem
     find_normal(B0, B1, nB);
     find_normal(A0, A1, nA);
 
-    double dot = nB[0] * nA[0] + nB[1] * nA[1];
-    double eta = (dot < 0) ? dot:0.0;
+    // double dot = nB[0] * nA[0] + nB[1] * nA[1];
+    // double eta = (dot < 0) ? dot:0.0;
 
-    auto projs = projections(mesh, A, B);
+    auto projs = projections(pair, mesh1, mesh2);
     auto bounds = smoother_.bounds_from_projections(projs);
     auto smooth_bounds = smoother_.smooth_bounds(bounds);
 
@@ -841,11 +847,11 @@ void ContactEvaluator::d2_g2tilde(const Mesh& mesh, const Element& A, const Elem
     }
 }
 
-void ContactEvaluator::compute_d2A_d2u(const Mesh& mesh, const Element& A, const Element& B, double d2A1[64], double d2A2[64]) const {
+void ContactEvaluator::compute_d2A_d2u(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, double d2A1[64], double d2A2[64]) const {
     double A0[2], A1[2], B0[2], B1[2];
 
-    endpoints(mesh, A, A0, A1);
-    endpoints(mesh, B, B0, B1);
+    endpoints(mesh1, pair.m_element_id1, A0, A1);
+    endpoints(mesh2, pair.m_element_id2, B0, B1);
 
     double x[8] = {A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1]};
 
@@ -854,9 +860,9 @@ void ContactEvaluator::compute_d2A_d2u(const Mesh& mesh, const Element& A, const
     find_normal(B0, B1, nB);
     find_normal(A0, A1, nA);
 
-    double dot = nB[0] * nA[0] + nB[1] * nA[1];
+    // double dot = nB[0] * nA[0] + nB[1] * nA[1];
 
-    auto projs = projections(mesh, A, B);
+    auto projs = projections(pair, mesh1, mesh2);
     auto bounds = smoother_.bounds_from_projections(projs);
     auto smooth_bounds = smoother_.smooth_bounds(bounds);
 
@@ -892,9 +898,9 @@ void ContactEvaluator::compute_d2A_d2u(const Mesh& mesh, const Element& A, const
     }
 }
 
-std::array<std::array<double, 8>, 8> ContactEvaluator::compute_stiffness_matrix(const Mesh &mesh, const Element& A, const Element& B) const {
+std::array<std::array<double, 8>, 8> ContactEvaluator::compute_stiffness_matrix(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const {
     NodalContactData ncd;
-    ncd = compute_nodal_contact_data(mesh, A , B); 
+    ncd = compute_nodal_contact_data(pair, mesh1, mesh2); 
 
     std::array<double, 2> gI;
     for (int i = 0; i < 2; ++i) {
@@ -903,13 +909,13 @@ std::array<std::array<double, 8>, 8> ContactEvaluator::compute_stiffness_matrix(
 
     double dg_tilde1[8], dg_tilde2[8], dAI1[8], dAI2[8];
 
-    grad_gtilde(mesh, A, B, dg_tilde1, dg_tilde2); 
-    grad_trib_area(mesh, A, B, dAI1, dAI2);
+    grad_gtilde(pair, mesh1, mesh2, dg_tilde1, dg_tilde2); 
+    grad_trib_area(pair, mesh1, mesh2, dAI1, dAI2);
 
     double d2_gtilde1[64], d2_gtilde2[64], d2_dA1[64], d2_dA2[64];
 
-    d2_g2tilde(mesh, A, B, d2_gtilde1, d2_gtilde2); 
-    compute_d2A_d2u(mesh, A, B, d2_dA1, d2_dA2); 
+    d2_g2tilde(pair, mesh1, mesh2, d2_gtilde1, d2_gtilde2); 
+    compute_d2A_d2u(pair, mesh1, mesh2, d2_dA1, d2_dA2); 
 
     std::array<double*, 2> dg_t = {dg_tilde1, dg_tilde2};
     std::array<double*, 2> dA = {dAI1, dAI2};
@@ -917,7 +923,7 @@ std::array<std::array<double, 8>, 8> ContactEvaluator::compute_stiffness_matrix(
     std::array<double*, 2> ddg_t = {d2_gtilde1, d2_gtilde2};
     std::array<double*, 2> ddA = {d2_dA1, d2_dA2};
 
-    std::array<std::array<double, 8>, 8> K_mat = {0.0};
+    std::array<std::array<double, 8>, 8> K_mat = {{{0.0}}};
 
     for (int i = 0; i < 2; ++i) {
         for (int k = 0; k < 8; ++k) {
@@ -951,10 +957,10 @@ std::array<std::array<double, 8>, 8> ContactEvaluator::compute_stiffness_matrix(
 
 
 
-std::pair<double, double> ContactEvaluator::eval_gtilde(const Mesh& mesh, const Element& A, const Element& B) const {
-    NodalContactData ncd = compute_nodal_contact_data(mesh, A, B);
-    double gt1 = ncd.g_tilde[0];
-    double gt2 = ncd.g_tilde[1];
+std::pair<double, double> ContactEvaluator::eval_gtilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const {
+    NodalContactData ncd = compute_nodal_contact_data(pair, mesh1, mesh2);
+    // double gt1 = ncd.g_tilde[0];
+    // double gt2 = ncd.g_tilde[1];
     double A1 = ncd.AI[0];
     double A2 = ncd.AI[1];
 
@@ -964,13 +970,11 @@ std::pair<double, double> ContactEvaluator::eval_gtilde(const Mesh& mesh, const
 
 
 std::pair<double,double>
-ContactEvaluator::eval_gtilde_fixed_qp(Mesh& mesh,
-                                       const Element& A,
-                                       const Element& B,
+ContactEvaluator::eval_gtilde_fixed_qp(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& /*mesh2*/,
                                        const QuadPoints& qp_fixed) const
 {
     double A0[2], A1[2];
-    endpoints(mesh, A, A0, A1);
+    endpoints(mesh1, pair.m_element_id1, A0, A1);
 
     const double J = std::sqrt((A1[0]-A0[0])*(A1[0]-A0[0]) + (A1[1]-A0[1])*(A1[1]-A0[1]));
 
@@ -983,8 +987,8 @@ ContactEvaluator::eval_gtilde_fixed_qp(Mesh& mesh,
         const double N1 = 0.5 - xiA;
         const double N2 = 0.5 + xiA;
 
-        const double gn = gap(mesh, A, B, xiA);   // still depends on geometry
-        const double gn_active = gn;              // or your (gn<0?gn:0) logic
+        // const double gn = gap(pair, mesh1, mesh2, xiA);   // still depends on geometry
+        // const double gn_active = gn;              // or your (gn<0?gn:0) logic
 
         gt1 += w * N1  * J;
         gt2 += w * N2 * J;
@@ -995,115 +999,117 @@ ContactEvaluator::eval_gtilde_fixed_qp(Mesh& mesh,
 
 
 
-FiniteDiffResult ContactEvaluator::validate_g_tilde(Mesh& mesh, const Element& A, const Element& B, double epsilon) const {
+// FiniteDiffResult ContactEvaluator::validate_g_tilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, double epsilon) const {
         
-    FiniteDiffResult result;
-
-    auto projs0 = projections(mesh, A, B);
-    auto bounds0 = smoother_.bounds_from_projections(projs0);
-    auto smooth_bounds0 = smoother_.smooth_bounds(bounds0);
-    QuadPoints qp0 = compute_quadrature(smooth_bounds0);
-
-// auto [g1_base, g2_base] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
-
-    auto [g1_base, g2_base] = eval_gtilde(mesh, A, B);
-    result.g_tilde1_baseline = g1_base;
-    result.g_tilde2_baseline = g2_base;
-
-    // Collect nodes in sorted order
-    std::set<int> node_set;
-    node_set.insert(A.node_ids[0]);
-    node_set.insert(A.node_ids[1]);
-    node_set.insert(B.node_ids[0]);
-    node_set.insert(B.node_ids[1]);
-
-    result.node_ids = std::vector<int>(node_set.begin(), node_set.end());
-    std::sort(result.node_ids.begin(), result.node_ids.end());
-
-    int num_dofs = result.node_ids.size() * 2;
-    result.fd_gradient_g1.resize(num_dofs);
-    result.fd_gradient_g2.resize(num_dofs);
+//     FiniteDiffResult result;
+
+//     auto projs0 = projections(pair, mesh1, mesh2);
+//     auto bounds0 = smoother_.bounds_from_projections(projs0);
+//     auto smooth_bounds0 = smoother_.smooth_bounds(bounds0);
+//     QuadPoints qp0 = compute_quadrature(smooth_bounds0);
+
+// // auto [g1_base, g2_base] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+
+//     auto [g1_base, g2_base] = eval_gtilde(pair, mesh1, mesh2);
+//     result.g_tilde1_baseline = g1_base;
+//     result.g_tilde2_baseline = g2_base;
+
+//     // Collect nodes in sorted order
+//     std::set<int> node_set;
+//     auto A_conn = mesh1.getConnectivity()(pair.m_element_id1);
+//     node_set.insert(A_conn[0]);
+//     node_set.insert(A_conn[1]);
+//     auto B_conn = mesh2.getConnectivity()(pair.m_element_id2);
+//     node_set.insert(B_conn[0]);
+//     node_set.insert(B_conn[1]);
+
+//     result.node_ids = std::vector<int>(node_set.begin(), node_set.end());
+//     std::sort(result.node_ids.begin(), result.node_ids.end());
+
+//     int num_dofs = result.node_ids.size() * 2;
+//     result.fd_gradient_g1.resize(num_dofs);
+//     result.fd_gradient_g2.resize(num_dofs);
     
-    // ===== GET AND REORDER ENZYME GRADIENTS =====
-    double dgt1_dx[8] = {0.0};
-    double dgt2_dx[8] = {0.0};
-    grad_trib_area(mesh, A, B, dgt1_dx, dgt2_dx);
+//     // ===== GET AND REORDER ENZYME GRADIENTS =====
+//     double dgt1_dx[8] = {0.0};
+//     double dgt2_dx[8] = {0.0};
+//     grad_trib_area(pair, mesh1, mesh2, dgt1_dx, dgt2_dx);
     
-    // Map from node_id to position in x[8]
-    std::map<int, int> node_to_x_idx;
-    node_to_x_idx[A.node_ids[0]] = 0;  // A0 → x[0,1]
-    node_to_x_idx[A.node_ids[1]] = 1;  // A1 → x[2,3]
-    node_to_x_idx[B.node_ids[0]] = 2;  // B0 → x[4,5]
-    node_to_x_idx[B.node_ids[1]] = 3;  // B1 → x[6,7]
+//     // Map from node_id to position in x[8]
+//     std::map<int, int> node_to_x_idx;
+//     node_to_x_idx[A_conn[0]] = 0;  // A0 → x[0,1]
+//     node_to_x_idx[A_conn[1]] = 1;  // A1 → x[2,3]
+//     node_to_x_idx[B_conn[0]] = 2;  // B0 → x[4,5]
+//     node_to_x_idx[B_conn[1]] = 3;  // B1 → x[6,7]
     
-    // Reorder Enzyme gradients to match sorted node order
-    result.analytical_gradient_g1.resize(num_dofs);
-    result.analytical_gradient_g2.resize(num_dofs);
+//     // Reorder Enzyme gradients to match sorted node order
+//     result.analytical_gradient_g1.resize(num_dofs);
+//     result.analytical_gradient_g2.resize(num_dofs);
     
-    for (size_t i = 0; i < result.node_ids.size(); ++i) {
-        int node_id = result.node_ids[i];
-        int x_idx = node_to_x_idx[node_id];
+//     for (size_t i = 0; i < result.node_ids.size(); ++i) {
+//         int node_id = result.node_ids[i];
+//         int x_idx = node_to_x_idx[node_id];
         
-        result.analytical_gradient_g1[2*i + 0] = dgt1_dx[2*x_idx + 0];  // x component
-        result.analytical_gradient_g1[2*i + 1] = dgt1_dx[2*x_idx + 1];  // y component
-        result.analytical_gradient_g2[2*i + 0] = dgt2_dx[2*x_idx + 0];
-        result.analytical_gradient_g2[2*i + 1] = dgt2_dx[2*x_idx + 1];
-    }
-    // =
+//         result.analytical_gradient_g1[2*i + 0] = dgt1_dx[2*x_idx + 0];  // x component
+//         result.analytical_gradient_g1[2*i + 1] = dgt1_dx[2*x_idx + 1];  // y component
+//         result.analytical_gradient_g2[2*i + 0] = dgt2_dx[2*x_idx + 0];
+//         result.analytical_gradient_g2[2*i + 1] = dgt2_dx[2*x_idx + 1];
+//     }
+//     // =
 
 
-    int dof_idx = 0;
-    //X-direction
-    for (int node_id : result.node_ids) {
-        {
-            double original = mesh.node(node_id).x;
+//     int dof_idx = 0;
+//     //X-direction
+//     for (int node_id : result.node_ids) {
+//         {
+//             double original = mesh.node(node_id).x;
 
-            mesh.node(node_id).x = original + epsilon;
-            auto [g1_plus, g2_plus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+//             mesh.node(node_id).x = original + epsilon;
+//             auto [g1_plus, g2_plus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
 
-            mesh.node(node_id).x = original - epsilon;
-            auto [g1_minus, g2_minus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+//             mesh.node(node_id).x = original - epsilon;
+//             auto [g1_minus, g2_minus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
 
-            //Restorre orginal
-            mesh.node(node_id).x = original;
+//             //Restorre orginal
+//             mesh.node(node_id).x = original;
 
-            result.fd_gradient_g1[dof_idx] = (g1_plus - g1_minus) / (2.0 * epsilon);
-            result.fd_gradient_g2[dof_idx] = (g2_plus - g2_minus) / (2.0 * epsilon);
+//             result.fd_gradient_g1[dof_idx] = (g1_plus - g1_minus) / (2.0 * epsilon);
+//             result.fd_gradient_g2[dof_idx] = (g2_plus - g2_minus) / (2.0 * epsilon);
 
-            dof_idx++;
-        }
+//             dof_idx++;
+//         }
     
-    //y - direction 
-        {
-            double original = mesh.node(node_id).y;
+//     //y - direction 
+//         {
+//             double original = mesh.node(node_id).y;
             
-            // +epsilon
-            mesh.node(node_id).y = original + epsilon;
-            auto [g1_plus, g2_plus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+//             // +epsilon
+//             mesh.node(node_id).y = original + epsilon;
+//             auto [g1_plus, g2_plus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
             
-            // -epsilon
-            mesh.node(node_id).y = original - epsilon;
-            auto [g1_minus, g2_minus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+//             // -epsilon
+//             mesh.node(node_id).y = original - epsilon;
+//             auto [g1_minus, g2_minus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
             
-            // Restore
-            mesh.node(node_id).y = original;
+//             // Restore
+//             mesh.node(node_id).y = original;
             
-            // Central difference
-            result.fd_gradient_g1[dof_idx] = (g1_plus - g1_minus) / (2.0 * epsilon);
-            result.fd_gradient_g2[dof_idx] = (g2_plus - g2_minus) / (2.0 * epsilon);
+//             // Central difference
+//             result.fd_gradient_g1[dof_idx] = (g1_plus - g1_minus) / (2.0 * epsilon);
+//             result.fd_gradient_g2[dof_idx] = (g2_plus - g2_minus) / (2.0 * epsilon);
             
-            dof_idx++;
-        }
-    }
-    return result;
-}
+//             dof_idx++;
+//         }
+//     }
+//     return result;
+// }
 
-void ContactEvaluator::grad_gtilde_with_qp(const Mesh& mesh, const Element& A, const Element& B,
+void ContactEvaluator::grad_gtilde_with_qp(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
                          const QuadPoints& qp_fixed, 
                          double dgt1_dx[8], double dgt2_dx[8]) const {
     double A0[2], A1[2], B0[2], B1[2];
-    endpoints(mesh, A, A0, A1);
-    endpoints(mesh, B, B0, B1);
+    endpoints(mesh1, pair.m_element_id1, A0, A1);
+    endpoints(mesh2, pair.m_element_id2, B0, B1);
     
     double x[8] = {A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1]};
     
@@ -1118,216 +1124,216 @@ void ContactEvaluator::grad_gtilde_with_qp(const Mesh& mesh, const Element& A, c
     grad_A2(x, &gp, dgt2_dx);
 }
 
-FiniteDiffResult ContactEvaluator::validate_hessian(Mesh& mesh, const Element& A, const Element& B, double epsilon) const {
-    FiniteDiffResult result;
-
-    auto projs0 = projections(mesh, A, B);
-    auto bounds0 = smoother_.bounds_from_projections(projs0);
-    auto smooth_bounds0 = smoother_.smooth_bounds(bounds0);
-    QuadPoints qp0 = compute_quadrature(smooth_bounds0);
-    double hess1[64] = {0.0};
-    double hess2[64] = {0.0};
-    compute_d2A_d2u(mesh, A, B, hess1, hess2);
-
-    const int ndof = 8;
-    result.fd_gradient_g1.assign(ndof*ndof, 0.0);
-    result.fd_gradient_g2.assign(ndof*ndof, 0.0);
-    result.analytical_gradient_g1.resize(ndof * ndof);
-    result.analytical_gradient_g2.resize(ndof * ndof);
-
-    result.analytical_gradient_g1.assign(hess1, hess1 + 64);
-    result.analytical_gradient_g2.assign(hess2, hess2 + 64);
-
-int nodes[4] = { A.node_ids[0], A.node_ids[1], B.node_ids[0], B.node_ids[1] };
-
-int col = 0;
-for (int k = 0; k < 4; ++k) {
-  for (int comp = 0; comp < 2; ++comp) { // 0=x, 1=y
-    Node& n = mesh.node(nodes[k]);
-    double& coord = (comp == 0) ? n.x : n.y;
-    double orig = coord;
-
-    double g1p[8]={0}, g1m[8]={0}, g2p[8]={0}, g2m[8]={0};
+// FiniteDiffResult ContactEvaluator::validate_hessian(Mesh& mesh, const Element& A, const Element& B, double epsilon) const {
+//     FiniteDiffResult result;
+
+//     auto projs0 = projections(mesh, A, B);
+//     auto bounds0 = smoother_.bounds_from_projections(projs0);
+//     auto smooth_bounds0 = smoother_.smooth_bounds(bounds0);
+//     QuadPoints qp0 = compute_quadrature(smooth_bounds0);
+//     double hess1[64] = {0.0};
+//     double hess2[64] = {0.0};
+//     compute_d2A_d2u(mesh, A, B, hess1, hess2);
+
+//     const int ndof = 8;
+//     result.fd_gradient_g1.assign(ndof*ndof, 0.0);
+//     result.fd_gradient_g2.assign(ndof*ndof, 0.0);
+//     result.analytical_gradient_g1.resize(ndof * ndof);
+//     result.analytical_gradient_g2.resize(ndof * ndof);
+
+//     result.analytical_gradient_g1.assign(hess1, hess1 + 64);
+//     result.analytical_gradient_g2.assign(hess2, hess2 + 64);
+
+// int nodes[4] = { A.node_ids[0], A.node_ids[1], B.node_ids[0], B.node_ids[1] };
+
+// int col = 0;
+// for (int k = 0; k < 4; ++k) {
+//   for (int comp = 0; comp < 2; ++comp) { // 0=x, 1=y
+//     Node& n = mesh.node(nodes[k]);
+//     double& coord = (comp == 0) ? n.x : n.y;
+//     double orig = coord;
+
+//     double g1p[8]={0}, g1m[8]={0}, g2p[8]={0}, g2m[8]={0};
+
+//     coord = orig + epsilon; grad_gtilde_with_qp(mesh, A, B, qp0, g1p, g2p);
+//     coord = orig - epsilon; grad_gtilde_with_qp(mesh, A, B, qp0, g1m, g2m);
+//     coord = orig;
+
+//     for (int i = 0; i < 8; ++i) {
+//       result.fd_gradient_g1[i*8 + col] = (g1p[i] - g1m[i]) / (2*epsilon);
+//       result.fd_gradient_g2[i*8 + col] = (g2p[i] - g2m[i]) / (2*epsilon);
+//     }
+//     ++col;
+//   }
+// }
+// return result;
+// }
+
+// static const char* C_RESET = "\033[0m";
+// static const char* C_OK    = "\033[32m";
+// static const char* C_WARN  = "\033[33m";
+// static const char* C_BAD   = "\033[31m";
+
+
+// void ContactEvaluator::print_hessian_comparison(const FiniteDiffResult& val) const
+// {
+//     std::cout << std::setprecision(12) << std::scientific;
+//     std::cout << "\n" << std::string(120, '=') << "\n";
+//     std::cout << "Hessian Validation for g_tilde\n";
+//     std::cout << std::string(120, '=') << "\n";
+//     std::cout << "Baseline: g_tilde1 = " << val.g_tilde1_baseline
+//               << ", g_tilde2 = " << val.g_tilde2_baseline << "\n\n";
+
+//     const int ndof = 8;
+//     const char* dof_names[8] = {"A0_x","A0_y","A1_x","A1_y","B0_x","B0_y","B1_x","B1_y"};
+
+//     const double abs_tol_ok   = 1e-6;
+//     const double abs_tol_warn = 1e-3;
+//     const double rel_tol_pct  = 10.0;
+//     const double eps_denom    = 1e-14;
+
+//     auto print_one = [&](const char* label,
+//                          const double* fdH,
+//                          const double* anH,
+//                          int& error_count_out)
+//     {
+//         std::cout << std::string(120, '-') << "\n";
+//         std::cout << label << "\n";
+//         std::cout << std::string(120, '-') << "\n";
+
+//         // --- Matrix header ---
+//         std::cout << std::setw(8) << "Row\\Col";
+//         for (int j = 0; j < ndof; ++j) std::cout << std::setw(12) << dof_names[j];
+//         std::cout << "\n" << std::string(120, '-') << "\n";
+
+//         // --- Matrix view (FD value; colored by abs diff vs analytical) ---
+//         for (int i = 0; i < ndof; ++i) {
+//             std::cout << std::setw(8) << dof_names[i];
+//             for (int j = 0; j < ndof; ++j) {
+//                 const double fd = fdH[i*ndof + j];
+//                 const double an = anH[i*ndof + j];
+//                 const double abs_err = std::abs(fd - an);
+
+//                 const char* c = C_OK;
+//                 if (abs_err >= abs_tol_warn) c = C_BAD;
+//                 else if (abs_err >= abs_tol_ok) c = C_WARN;
+
+//                 std::cout << c << std::setw(12) << fd << C_RESET;
+//             }
+//             std::cout << "\n";
+//         }
+
+//         // --- Detailed comparison ---
+//         std::cout << "\n" << std::string(120, '-') << "\n";
+//         std::cout << "Detailed mismatches:\n";
+//         std::cout << std::string(120, '-') << "\n";
+//         std::cout << std::setw(8)  << "Row"
+//                   << std::setw(8)  << "Col"
+//                   << std::setw(20) << "FD (central)"
+//                   << std::setw(20) << "Analytical"
+//                   << std::setw(20) << "Abs Error"
+//                   << std::setw(20) << "Rel Error (%)"
+//                   << std::setw(14) << "Sign\n";
+//         std::cout << std::string(120, '-') << "\n";
+
+//         error_count_out = 0;
+
+//         double max_abs_err = 0.0;
+//         double max_rel_pct = 0.0;
+//         int max_i_abs = -1, max_j_abs = -1;
+//         int max_i_rel = -1, max_j_rel = -1;
+
+//         int sign_flip_count = 0;
+
+//         for (int i = 0; i < ndof; ++i) {
+//             for (int j = 0; j < ndof; ++j) {
+//                 const double fd = fdH[i*ndof + j];
+//                 const double an = anH[i*ndof + j];
+//                 const double abs_err = std::abs(fd - an);
+
+//                 const double denom = std::max(std::max(std::abs(fd), std::abs(an)), eps_denom);
+//                 const double rel_pct = (abs_err / denom) * 100.0;
+
+//                 const bool both_tiny = (std::abs(fd) < eps_denom && std::abs(an) < eps_denom);
+//                 const bool sign_match = both_tiny || (fd * an >= 0.0);
+
+//                 if (!sign_match) sign_flip_count++;
+
+//                 if (abs_err > max_abs_err) { max_abs_err = abs_err; max_i_abs = i; max_j_abs = j; }
+//                 if (rel_pct > max_rel_pct) { max_rel_pct = rel_pct; max_i_rel = i; max_j_rel = j; }
+
+//                 const bool print = (abs_err > abs_tol_ok) || (!sign_match) || (rel_pct > rel_tol_pct);
+
+//                 if (print) {
+//                     const char* c = (abs_err >= abs_tol_warn || !sign_match) ? C_BAD :
+//                                     (abs_err >= abs_tol_ok) ? C_WARN : C_OK;
+
+//                     std::cout << c
+//                               << std::setw(8)  << i
+//                               << std::setw(8)  << j
+//                               << std::setw(20) << fd
+//                               << std::setw(20) << an
+//                               << std::setw(20) << abs_err
+//                               << std::setw(20) << rel_pct
+//                               << std::setw(14) << (sign_match ? "✓" : "✗ FLIP")
+//                               << C_RESET << "\n";
+
+//                     // Count as "problem" if big rel error or sign flip (your original logic)
+//                     if (!sign_match || rel_pct > rel_tol_pct) error_count_out++;
+//                 }
+//             }
+//         }
+
+//         // --- Symmetry check (optional but useful) ---
+//         double max_asym_fd = 0.0, max_asym_an = 0.0;
+//         for (int i = 0; i < ndof; ++i) {
+//             for (int j = i+1; j < ndof; ++j) {
+//                 max_asym_fd = std::max(max_asym_fd, std::abs(fdH[i*ndof+j] - fdH[j*ndof+i]));
+//                 max_asym_an = std::max(max_asym_an, std::abs(anH[i*ndof+j] - anH[j*ndof+i]));
+//             }
+//         }
+
+//         std::cout << "\n";
+//         if (error_count_out == 0) {
+//             std::cout << "All entries match within thresholds! ✓\n";
+//         } else {
+//             std::cout << "Found " << error_count_out << " problematic entries\n";
+//         }
+
+//         std::cout << "Max abs error: " << max_abs_err
+//                   << " at (" << max_i_abs << "," << max_j_abs << ")\n";
+//         std::cout << "Max rel error: " << max_rel_pct
+//                   << "% at (" << max_i_rel << "," << max_j_rel << ")\n";
+//         std::cout << "Sign flips: " << sign_flip_count << "\n";
+//         std::cout << "Symmetry (FD max |H_ij - H_ji|): " << max_asym_fd << "\n";
+//         std::cout << "Symmetry (AN max |H_ij - H_ji|): " << max_asym_an << "\n";
+//     };
+
+//     int error_count_g1 = 0;
+//     int error_count_g2 = 0;
+
+//     print_one("∂²g̃₁/∂x² Hessian:",
+//               val.fd_gradient_g1.data(),          // if these are std::vector<double>
+//               val.analytical_gradient_g1.data(),
+//               error_count_g1);
+
+//     std::cout << "\n";
+
+//     print_one("∂²g̃₂/∂x² Hessian:",
+//               val.fd_gradient_g2.data(),
+//               val.analytical_gradient_g2.data(),
+//               error_count_g2);
+
+//     std::cout << "\n" << std::string(120, '=') << "\n";
+//     std::cout << "SUMMARY:\n";
+//     std::cout << "  g_tilde1 Hessian: " << (error_count_g1 == 0 ? "✓ PASS" : "✗ FAIL")
+//               << " (" << error_count_g1 << " errors)\n";
+//     std::cout << "  g_tilde2 Hessian: " << (error_count_g2 == 0 ? "✓ PASS" : "✗ FAIL")
+//               << " (" << error_count_g2 << " errors)\n";
+//     std::cout << std::string(120, '=') << "\n\n";
+// }
 
-    coord = orig + epsilon; grad_gtilde_with_qp(mesh, A, B, qp0, g1p, g2p);
-    coord = orig - epsilon; grad_gtilde_with_qp(mesh, A, B, qp0, g1m, g2m);
-    coord = orig;
-
-    for (int i = 0; i < 8; ++i) {
-      result.fd_gradient_g1[i*8 + col] = (g1p[i] - g1m[i]) / (2*epsilon);
-      result.fd_gradient_g2[i*8 + col] = (g2p[i] - g2m[i]) / (2*epsilon);
-    }
-    ++col;
-  }
-}
-return result;
-}
-
-static const char* C_RESET = "\033[0m";
-static const char* C_OK    = "\033[32m";
-static const char* C_WARN  = "\033[33m";
-static const char* C_BAD   = "\033[31m";
-
-
-void ContactEvaluator::print_hessian_comparison(const FiniteDiffResult& val) const
-{
-    std::cout << std::setprecision(12) << std::scientific;
-    std::cout << "\n" << std::string(120, '=') << "\n";
-    std::cout << "Hessian Validation for g_tilde\n";
-    std::cout << std::string(120, '=') << "\n";
-    std::cout << "Baseline: g_tilde1 = " << val.g_tilde1_baseline
-              << ", g_tilde2 = " << val.g_tilde2_baseline << "\n\n";
-
-    const int ndof = 8;
-    const char* dof_names[8] = {"A0_x","A0_y","A1_x","A1_y","B0_x","B0_y","B1_x","B1_y"};
-
-    const double abs_tol_ok   = 1e-6;
-    const double abs_tol_warn = 1e-3;
-    const double rel_tol_pct  = 10.0;
-    const double eps_denom    = 1e-14;
-
-    auto print_one = [&](const char* label,
-                         const double* fdH,
-                         const double* anH,
-                         int& error_count_out)
-    {
-        std::cout << std::string(120, '-') << "\n";
-        std::cout << label << "\n";
-        std::cout << std::string(120, '-') << "\n";
-
-        // --- Matrix header ---
-        std::cout << std::setw(8) << "Row\\Col";
-        for (int j = 0; j < ndof; ++j) std::cout << std::setw(12) << dof_names[j];
-        std::cout << "\n" << std::string(120, '-') << "\n";
-
-        // --- Matrix view (FD value; colored by abs diff vs analytical) ---
-        for (int i = 0; i < ndof; ++i) {
-            std::cout << std::setw(8) << dof_names[i];
-            for (int j = 0; j < ndof; ++j) {
-                const double fd = fdH[i*ndof + j];
-                const double an = anH[i*ndof + j];
-                const double abs_err = std::abs(fd - an);
-
-                const char* c = C_OK;
-                if (abs_err >= abs_tol_warn) c = C_BAD;
-                else if (abs_err >= abs_tol_ok) c = C_WARN;
-
-                std::cout << c << std::setw(12) << fd << C_RESET;
-            }
-            std::cout << "\n";
-        }
-
-        // --- Detailed comparison ---
-        std::cout << "\n" << std::string(120, '-') << "\n";
-        std::cout << "Detailed mismatches:\n";
-        std::cout << std::string(120, '-') << "\n";
-        std::cout << std::setw(8)  << "Row"
-                  << std::setw(8)  << "Col"
-                  << std::setw(20) << "FD (central)"
-                  << std::setw(20) << "Analytical"
-                  << std::setw(20) << "Abs Error"
-                  << std::setw(20) << "Rel Error (%)"
-                  << std::setw(14) << "Sign\n";
-        std::cout << std::string(120, '-') << "\n";
-
-        error_count_out = 0;
-
-        double max_abs_err = 0.0;
-        double max_rel_pct = 0.0;
-        int max_i_abs = -1, max_j_abs = -1;
-        int max_i_rel = -1, max_j_rel = -1;
-
-        int sign_flip_count = 0;
-
-        for (int i = 0; i < ndof; ++i) {
-            for (int j = 0; j < ndof; ++j) {
-                const double fd = fdH[i*ndof + j];
-                const double an = anH[i*ndof + j];
-                const double abs_err = std::abs(fd - an);
-
-                const double denom = std::max(std::max(std::abs(fd), std::abs(an)), eps_denom);
-                const double rel_pct = (abs_err / denom) * 100.0;
-
-                const bool both_tiny = (std::abs(fd) < eps_denom && std::abs(an) < eps_denom);
-                const bool sign_match = both_tiny || (fd * an >= 0.0);
-
-                if (!sign_match) sign_flip_count++;
-
-                if (abs_err > max_abs_err) { max_abs_err = abs_err; max_i_abs = i; max_j_abs = j; }
-                if (rel_pct > max_rel_pct) { max_rel_pct = rel_pct; max_i_rel = i; max_j_rel = j; }
-
-                const bool print = (abs_err > abs_tol_ok) || (!sign_match) || (rel_pct > rel_tol_pct);
-
-                if (print) {
-                    const char* c = (abs_err >= abs_tol_warn || !sign_match) ? C_BAD :
-                                    (abs_err >= abs_tol_ok) ? C_WARN : C_OK;
-
-                    std::cout << c
-                              << std::setw(8)  << i
-                              << std::setw(8)  << j
-                              << std::setw(20) << fd
-                              << std::setw(20) << an
-                              << std::setw(20) << abs_err
-                              << std::setw(20) << rel_pct
-                              << std::setw(14) << (sign_match ? "✓" : "✗ FLIP")
-                              << C_RESET << "\n";
-
-                    // Count as "problem" if big rel error or sign flip (your original logic)
-                    if (!sign_match || rel_pct > rel_tol_pct) error_count_out++;
-                }
-            }
-        }
-
-        // --- Symmetry check (optional but useful) ---
-        double max_asym_fd = 0.0, max_asym_an = 0.0;
-        for (int i = 0; i < ndof; ++i) {
-            for (int j = i+1; j < ndof; ++j) {
-                max_asym_fd = std::max(max_asym_fd, std::abs(fdH[i*ndof+j] - fdH[j*ndof+i]));
-                max_asym_an = std::max(max_asym_an, std::abs(anH[i*ndof+j] - anH[j*ndof+i]));
-            }
-        }
-
-        std::cout << "\n";
-        if (error_count_out == 0) {
-            std::cout << "All entries match within thresholds! ✓\n";
-        } else {
-            std::cout << "Found " << error_count_out << " problematic entries\n";
-        }
-
-        std::cout << "Max abs error: " << max_abs_err
-                  << " at (" << max_i_abs << "," << max_j_abs << ")\n";
-        std::cout << "Max rel error: " << max_rel_pct
-                  << "% at (" << max_i_rel << "," << max_j_rel << ")\n";
-        std::cout << "Sign flips: " << sign_flip_count << "\n";
-        std::cout << "Symmetry (FD max |H_ij - H_ji|): " << max_asym_fd << "\n";
-        std::cout << "Symmetry (AN max |H_ij - H_ji|): " << max_asym_an << "\n";
-    };
-
-    int error_count_g1 = 0;
-    int error_count_g2 = 0;
-
-    print_one("∂²g̃₁/∂x² Hessian:",
-              val.fd_gradient_g1.data(),          // if these are std::vector<double>
-              val.analytical_gradient_g1.data(),
-              error_count_g1);
-
-    std::cout << "\n";
-
-    print_one("∂²g̃₂/∂x² Hessian:",
-              val.fd_gradient_g2.data(),
-              val.analytical_gradient_g2.data(),
-              error_count_g2);
-
-    std::cout << "\n" << std::string(120, '=') << "\n";
-    std::cout << "SUMMARY:\n";
-    std::cout << "  g_tilde1 Hessian: " << (error_count_g1 == 0 ? "✓ PASS" : "✗ FAIL")
-              << " (" << error_count_g1 << " errors)\n";
-    std::cout << "  g_tilde2 Hessian: " << (error_count_g2 == 0 ? "✓ PASS" : "✗ FAIL")
-              << " (" << error_count_g2 << " errors)\n";
-    std::cout << std::string(120, '=') << "\n\n";
 }
 
 
 
-
-
diff --git a/src/tribol/physics/new_method.hpp b/src/tribol/physics/new_method.hpp
index 6ce4dad7..ef6a92f2 100644
--- a/src/tribol/physics/new_method.hpp
+++ b/src/tribol/physics/new_method.hpp
@@ -2,6 +2,11 @@
 #include <vector>
 #include <array>
 
+#include "tribol/mesh/InterfacePairs.hpp"
+#include "tribol/mesh/MeshData.hpp"
+
+namespace tribol {
+
 struct Node {
     double x, y;
     int id;
@@ -68,50 +73,40 @@ class ContactEvaluator {
         explicit ContactEvaluator(const ContactParams& p) 
         : p_(p), smoother_(p) {} //constructor - copies params into the object 
 
+        double compute_contact_energy(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const;
 
+        void gtilde_and_area(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, double gtilde[2], double area[2]) const; 
 
-        double compute_contact_energy(const Mesh& mesh, 
-                      const Element& A, 
-                      const Element& B) const;
-
-        void grad_gtilde(const Mesh& mesh, const Element& A, const Element& B,
+        void grad_gtilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
                          double dgt1_dx[8], double dgt2_dx[8]) const;
 
-        void grad_trib_area(const Mesh& mesh, const Element& A, const Element& B, 
+        void grad_trib_area(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, 
                             double dA1_dx[8], double dA2_dx[8]) const; 
 
-        void d2_g2tilde(const Mesh& mesh, const Element& A, const Element& B,
+        void d2_g2tilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
                         double dgt1_dx[64], double dgt2_dx[64]) const;
 
-        void compute_d2A_d2u(const Mesh& mesh, const Element& A, const Element& B,
+        void compute_d2A_d2u(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
                              double dgt1_dx[64], double dgt2_dx[64]) const;
 
-        std::array<double, 8> compute_contact_forces(const Mesh& mesh, const Element& A, const Element& B) const;
+        std::array<double, 8> compute_contact_forces(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const;
 
-        std::array<std::array<double, 8>, 8> compute_stiffness_matrix(const Mesh& mesh, const Element& A, const Element& B) const;
+        std::array<std::array<double, 8>, 8> compute_stiffness_matrix(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const;
 
-        std::pair<double, double> eval_gtilde(const Mesh& mesh,
-                                              const Element& A, 
-                                              const Element& B) const;
+        std::pair<double, double> eval_gtilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const;
 
-        FiniteDiffResult validate_g_tilde(Mesh& mesh,
-                                          const Element& A,
-                                          const Element& B,
+        FiniteDiffResult validate_g_tilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
                                           double epsilon = 1e-7) const;
         
         void print_gradient_comparison(const FiniteDiffResult& val) const;
         
-        std::pair<double,double> eval_gtilde_fixed_qp(Mesh& mesh,
-                                                      const Element& A,
-                                                      const Element& B,
+        std::pair<double,double> eval_gtilde_fixed_qp(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
                                                       const QuadPoints& qp_fixed) const;
 
-        FiniteDiffResult validate_hessian(Mesh& mesh, 
-                                          const Element& A, 
-                                          const Element& B, 
+        FiniteDiffResult validate_hessian(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, 
                                           double epsilon = 1e-7) const;
 
-        void grad_gtilde_with_qp(const Mesh& mesh, const Element& A, const Element& B,
+        void grad_gtilde_with_qp(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
                             const QuadPoints& qp_fixed, 
                             double dgt1_dx[8], double dgt2_dx[8]) const;
 
@@ -122,21 +117,17 @@ class ContactEvaluator {
         ContactSmoothing smoother_;
         QuadPoints compute_quadrature(const std::array<double,2>& xi_bounds) const;
 
-        std::array<double, 2> projections(const Mesh& mesh,
-                                          const Element& A, 
-                                          const Element& B) const; 
+        std::array<double, 2> projections(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const; 
 
-        double gap(const Mesh& mesh,
-                   const Element& A, 
-                   const Element& B,
+        double gap(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
                    double xiA) const;
 
-        NodalContactData compute_nodal_contact_data(const Mesh& mesh,
-                                                         const Element& A, 
-                                                         const Element& B) const; 
+        NodalContactData compute_nodal_contact_data(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const; 
         
         std::array<double, 2> compute_pressures(const NodalContactData& ncd) const;
         
 
                                        
 };
+
+}

From 9208f93c5549322ad1ee2a66994c3ece2e9e59aa Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Mon, 2 Feb 2026 14:46:05 -0800
Subject: [PATCH 07/56] formatting

---
 src/tribol/physics/new_method.cpp | 1507 +++++++++++++++--------------
 src/tribol/physics/new_method.hpp |  147 +--
 2 files changed, 833 insertions(+), 821 deletions(-)

diff --git a/src/tribol/physics/new_method.cpp b/src/tribol/physics/new_method.cpp
index 73a58971..45c0f2f0 100644
--- a/src/tribol/physics/new_method.cpp
+++ b/src/tribol/physics/new_method.cpp
@@ -18,989 +18,1001 @@ namespace tribol {
 
 namespace {
 
-    struct Gparams {
-        int N;
-        const double* qp;
-        const double* w;
-        const double* x2;
-    };
-
-
-    void find_normal(const double* coord1, const double* coord2, double* normal) {
-    double dx = coord2[0] - coord1[0];
-    double dy = coord2[1] - coord1[1];
-    double len = std::sqrt(dy * dy + dx * dx);
-    dx /= len;
-    dy /= len;
-    normal[0] = dy;
-    normal[1] = -dx;
-    }
-
-void determine_legendre_nodes(int N, std::vector<double>& x)
+struct Gparams {
+  int N;
+  const double* qp;
+  const double* w;
+  const double* x2;
+};
+
+void find_normal( const double* coord1, const double* coord2, double* normal )
 {
-    x.resize(N);
-    if (N == 1) {
-        x[0] = 0.0;
-    } else if (N == 2) {
-        const double a = 1.0 / std::sqrt(3.0);
-        x[0] = -a; x[1] =  a;
-    } else if (N == 3) {
-        const double a = std::sqrt(3.0/5.0);
-        x[0] = -a; x[1] = 0.0; x[2] = a;
-    } else if (N == 4) {
-        const double a = std::sqrt((3.0 - 2.0*std::sqrt(6.0/5.0))/7.0);
-        const double b = std::sqrt((3.0 + 2.0*std::sqrt(6.0/5.0))/7.0);
-        x[0] = -b; x[1] = -a; x[2] = a; x[3] = b;
-    } else {
-        assert(false && "Unsupported quadrature order");
-    }
+  double dx = coord2[0] - coord1[0];
+  double dy = coord2[1] - coord1[1];
+  double len = std::sqrt( dy * dy + dx * dx );
+  dx /= len;
+  dy /= len;
+  normal[0] = dy;
+  normal[1] = -dx;
 }
 
-  void determine_legendre_weights(int N, std::vector<double>& W) {
-
-    W.resize(N);
-    if (N == 1) {
-        W[0] = 2.0;
-    }
-    else if(N == 2) {
-        W[0] = 1.0;
-        W[1] = 1.0;
-    }
-    else if (N == 3) {
-        W[0] = 5.0 / 9.0;
-        W[1] = 8.0 / 9.0;
-        W[2] = 5.0 / 9.0;
-    }
-    else {
-        W[0] = (18 - std::sqrt(30)) / 36.0;
-        W[1] = (18 + std::sqrt(30)) / 36.0;
-        W[2] = (18 + std::sqrt(30)) / 36.0;
-        W[3] = (18 - std::sqrt(30)) / 36.0;
-    }
- }
+void determine_legendre_nodes( int N, std::vector<double>& x )
+{
+  x.resize( N );
+  if ( N == 1 ) {
+    x[0] = 0.0;
+  } else if ( N == 2 ) {
+    const double a = 1.0 / std::sqrt( 3.0 );
+    x[0] = -a;
+    x[1] = a;
+  } else if ( N == 3 ) {
+    const double a = std::sqrt( 3.0 / 5.0 );
+    x[0] = -a;
+    x[1] = 0.0;
+    x[2] = a;
+  } else if ( N == 4 ) {
+    const double a = std::sqrt( ( 3.0 - 2.0 * std::sqrt( 6.0 / 5.0 ) ) / 7.0 );
+    const double b = std::sqrt( ( 3.0 + 2.0 * std::sqrt( 6.0 / 5.0 ) ) / 7.0 );
+    x[0] = -b;
+    x[1] = -a;
+    x[2] = a;
+    x[3] = b;
+  } else {
+    assert( false && "Unsupported quadrature order" );
+  }
+}
 
- void iso_map(const double* coord1, const double* coord2, double xi, double* mapped_coord){
-    double N1 = 0.5 - xi;
-    double N2 = 0.5 + xi;
-    mapped_coord[0] = N1 * coord1[0] + N2 * coord2[0];
-    mapped_coord[1] =  N1 * coord1[1] + N2 * coord2[1];
+void determine_legendre_weights( int N, std::vector<double>& W )
+{
+  W.resize( N );
+  if ( N == 1 ) {
+    W[0] = 2.0;
+  } else if ( N == 2 ) {
+    W[0] = 1.0;
+    W[1] = 1.0;
+  } else if ( N == 3 ) {
+    W[0] = 5.0 / 9.0;
+    W[1] = 8.0 / 9.0;
+    W[2] = 5.0 / 9.0;
+  } else {
+    W[0] = ( 18 - std::sqrt( 30 ) ) / 36.0;
+    W[1] = ( 18 + std::sqrt( 30 ) ) / 36.0;
+    W[2] = ( 18 + std::sqrt( 30 ) ) / 36.0;
+    W[3] = ( 18 - std::sqrt( 30 ) ) / 36.0;
+  }
 }
 
-inline void endpoints(const MeshData::Viewer& mesh, int elem_id, double P0[2], double P1[2])
+void iso_map( const double* coord1, const double* coord2, double xi, double* mapped_coord )
 {
-    double P0_P1[4];
-    mesh.getFaceCoords(elem_id, P0_P1);
-    P0[0] = P0_P1[0]; P0[1] = P0_P1[1];
-    P1[0] = P0_P1[2]; P1[1] = P0_P1[3];
+  double N1 = 0.5 - xi;
+  double N2 = 0.5 + xi;
+  mapped_coord[0] = N1 * coord1[0] + N2 * coord2[0];
+  mapped_coord[1] = N1 * coord1[1] + N2 * coord2[1];
 }
 
-void find_intersection(const double* A0, const double* A1,
-                       const double* p, const double* nB,
-                       double* intersection)
+inline void endpoints( const MeshData::Viewer& mesh, int elem_id, double P0[2], double P1[2] )
 {
-    const double tA[2] = { A1[0] - A0[0], A1[1] - A0[1] };
-    const double d[2]  = { p[0] - A0[0],  p[1] - A0[1] };
-
-    const double nlen = std::sqrt(nB[0]*nB[0] + nB[1]*nB[1]);
-    if (nlen < 1e-14) {
-        intersection[0] = p[0];
-        intersection[1] = p[1];
-        return;
-    }
-    const double n[2] = { nB[0]/nlen, nB[1]/nlen };
+  double P0_P1[4];
+  mesh.getFaceCoords( elem_id, P0_P1 );
+  P0[0] = P0_P1[0];
+  P0[1] = P0_P1[1];
+  P1[0] = P0_P1[2];
+  P1[1] = P0_P1[3];
+}
 
-    const double det = tA[0]*n[1] - tA[1]*n[0];
+void find_intersection( const double* A0, const double* A1, const double* p, const double* nB, double* intersection )
+{
+  const double tA[2] = { A1[0] - A0[0], A1[1] - A0[1] };
+  const double d[2] = { p[0] - A0[0], p[1] - A0[1] };
+
+  const double nlen = std::sqrt( nB[0] * nB[0] + nB[1] * nB[1] );
+  if ( nlen < 1e-14 ) {
+    intersection[0] = p[0];
+    intersection[1] = p[1];
+    return;
+  }
+  const double n[2] = { nB[0] / nlen, nB[1] / nlen };
 
-    if (std::abs(det) < 1e-12) {
-        intersection[0] = p[0];
-        intersection[1] = p[1];
-        return;
-    }
+  const double det = tA[0] * n[1] - tA[1] * n[0];
 
-    const double inv_det = 1.0 / det;
-    double alpha = (d[0]*n[1] - d[1]*n[0]) * inv_det;
+  if ( std::abs( det ) < 1e-12 ) {
+    intersection[0] = p[0];
+    intersection[1] = p[1];
+    return;
+  }
 
+  const double inv_det = 1.0 / det;
+  double alpha = ( d[0] * n[1] - d[1] * n[0] ) * inv_det;
 
-    intersection[0] = A0[0] + alpha * tA[0];
-    intersection[1] = A0[1] + alpha * tA[1];
+  intersection[0] = A0[0] + alpha * tA[0];
+  intersection[1] = A0[1] + alpha * tA[1];
 }
 
-
-
-void get_projections(const double* A0, const double* A1,
-                     const double* B0, const double* B1,
-                     double* projections)
+void get_projections( const double* A0, const double* A1, const double* B0, const double* B1, double* projections )
 {
-    double nB[2] = {0.0, 0.0};
-    find_normal(B0, B1, nB);
-
-    const double dxA = A1[0] - A0[0];
-    const double dyA = A1[1] - A0[1];
-    const double len2A = dxA*dxA + dyA*dyA;
+  double nB[2] = { 0.0, 0.0 };
+  find_normal( B0, B1, nB );
 
-    const double* B_endpoints[2] = { B0, B1 };
+  const double dxA = A1[0] - A0[0];
+  const double dyA = A1[1] - A0[1];
+  const double len2A = dxA * dxA + dyA * dyA;
 
-    double xi0 = 0.0, xi1 = 0.0;
-    for (int i = 0; i < 2; ++i) {
-        double q[2] = {0.0, 0.0};                 
-        find_intersection(A0, A1, B_endpoints[i], nB, q);
+  const double* B_endpoints[2] = { B0, B1 };
 
-        // std::cout << "Intersection on A: " << q[0] << ", " << q[1] << std::endl;
+  double xi0 = 0.0, xi1 = 0.0;
+  for ( int i = 0; i < 2; ++i ) {
+    double q[2] = { 0.0, 0.0 };
+    find_intersection( A0, A1, B_endpoints[i], nB, q );
 
-        const double alphaA =
-            ((q[0] - A0[0]) * dxA + (q[1] - A0[1]) * dyA) / len2A;  
-        const double xiA = alphaA - 0.5;                           
+    // std::cout << "Intersection on A: " << q[0] << ", " << q[1] << std::endl;
 
-        if (i == 0) xi0 = xiA;
-        else        xi1 = xiA;
-    }
+    const double alphaA = ( ( q[0] - A0[0] ) * dxA + ( q[1] - A0[1] ) * dyA ) / len2A;
+    const double xiA = alphaA - 0.5;
 
-    double xi_min = std::min(xi0, xi1);
-    double xi_max = std::max(xi0, xi1);
+    if ( i == 0 )
+      xi0 = xiA;
+    else
+      xi1 = xiA;
+  }
 
+  double xi_min = std::min( xi0, xi1 );
+  double xi_max = std::max( xi0, xi1 );
 
-    projections[0] = xi_min;
-    projections[1] = xi_max;
+  projections[0] = xi_min;
+  projections[1] = xi_max;
 }
 
+void gtilde_kernel( const double* x, const Gparams* gp, double* g_tilde_out, double* A_out )
+{
+  const double A0[2] = { x[0], x[1] };
+  const double A1[2] = { x[2], x[3] };
+  const double B0[2] = { x[4], x[5] };
+  const double B1[2] = { x[6], x[7] };
 
-    void gtilde_kernel(const double* x, const Gparams* gp, double* g_tilde_out, double* A_out) {
-        const double A0[2] = {x[0], x[1]};
-        const double A1[2] = {x[2], x[3]};
-        const double B0[2] = {x[4], x[5]};
-        const double B1[2] = {x[6], x[7]};
-        
-        const double J = std::sqrt((A1[0]-A0[0])*(A1[0]-A0[0]) + (A1[1]-A0[1])*(A1[1]-A0[1]));
-
-        const double J_ref = std::sqrt((A1[0]-A0[0])*(A1[0]-A0[0]) + (A1[1]-A0[1])*(A1[1]-A0[1]));
+  const double J = std::sqrt( ( A1[0] - A0[0] ) * ( A1[0] - A0[0] ) + ( A1[1] - A0[1] ) * ( A1[1] - A0[1] ) );
 
-        double nB[2];
-        find_normal(B0, B1, nB);
+  const double J_ref = std::sqrt( ( A1[0] - A0[0] ) * ( A1[0] - A0[0] ) + ( A1[1] - A0[1] ) * ( A1[1] - A0[1] ) );
 
-        double nA[2];
-        find_normal(A0, A1, nA);
-        double dot = nB[0] * nA[0] + nB[1] * nA[1];
-        double eta = (dot < 0) ? dot : 0.0;
+  double nB[2];
+  find_normal( B0, B1, nB );
 
-        double g1 = 0.0, g2 = 0.0;
-        double AI_1 = 0.0, AI_2 = 0.0; 
+  double nA[2];
+  find_normal( A0, A1, nA );
+  double dot = nB[0] * nA[0] + nB[1] * nA[1];
+  double eta = ( dot < 0 ) ? dot : 0.0;
 
-        for (int i = 0; i < gp->N; ++i) {
-            const double xiA = gp -> qp[i]; 
-            const double w = gp->w[i];
+  double g1 = 0.0, g2 = 0.0;
+  double AI_1 = 0.0, AI_2 = 0.0;
 
-            const double N1 = 0.5 - xiA;
-            const double N2 = 0.5 + xiA;
+  for ( int i = 0; i < gp->N; ++i ) {
+    const double xiA = gp->qp[i];
+    const double w = gp->w[i];
 
-            // x1 on segment A
-            double x1[2];
-            iso_map(A0, A1, xiA, x1);
+    const double N1 = 0.5 - xiA;
+    const double N2 = 0.5 + xiA;
 
-            double x2[2];
-            find_intersection(B0, B1, x1, nB, x2);
+    // x1 on segment A
+    double x1[2];
+    iso_map( A0, A1, xiA, x1 );
 
-            const double dx = x1[0] - x2[0];
-            const double dy = x1[1] - x2[1];
+    double x2[2];
+    find_intersection( B0, B1, x1, nB, x2 );
 
-            // lagged normal on B
-             const double gn = -(dx * nB[0] + dy * nB[1]);
-             const double g  = gn * eta;
+    const double dx = x1[0] - x2[0];
+    const double dy = x1[1] - x2[1];
 
-            g1 += w * N1 * g * J;
-            g2 += w * N2 * g * J;
+    // lagged normal on B
+    const double gn = -( dx * nB[0] + dy * nB[1] );
+    const double g = gn * eta;
 
-            AI_1 += w * N1 * J_ref;
-            AI_2 += w * N2 * J_ref;
-        }
+    g1 += w * N1 * g * J;
+    g2 += w * N2 * g * J;
 
-        g_tilde_out[0] = g1;
-        g_tilde_out[1] = g2;
+    AI_1 += w * N1 * J_ref;
+    AI_2 += w * N2 * J_ref;
+  }
 
-        A_out[0] = AI_1;
-        A_out[1] = AI_2;
-        // std::cout << "G tilde: " << g1 << ", " << g2 << std::endl;
-    }
+  g_tilde_out[0] = g1;
+  g_tilde_out[1] = g2;
 
+  A_out[0] = AI_1;
+  A_out[1] = AI_2;
+  // std::cout << "G tilde: " << g1 << ", " << g2 << std::endl;
+}
 
-static void gtilde1_out(const double* x, const void* gp_void, double* out)
+static void gtilde1_out( const double* x, const void* gp_void, double* out )
 {
-  const Gparams* gp = static_cast<const Gparams*>(gp_void);
+  const Gparams* gp = static_cast<const Gparams*>( gp_void );
   double gt[2];
   double A_out[2];
-  gtilde_kernel(x, gp, gt, A_out);
+  gtilde_kernel( x, gp, gt, A_out );
   *out = gt[0];
 }
 
-static void gtilde2_out(const double* x, const void* gp_void, double* out)
+static void gtilde2_out( const double* x, const void* gp_void, double* out )
 {
-  const Gparams* gp = static_cast<const Gparams*>(gp_void);
+  const Gparams* gp = static_cast<const Gparams*>( gp_void );
   double gt[2];
   double A_out[2];
-  gtilde_kernel(x, gp, gt, A_out);
+  gtilde_kernel( x, gp, gt, A_out );
   *out = gt[1];
 }
 
-static void A1_out(const double* x, const void* gp_void, double* out)
+static void A1_out( const double* x, const void* gp_void, double* out )
 {
-  const Gparams* gp = static_cast<const Gparams*>(gp_void);
+  const Gparams* gp = static_cast<const Gparams*>( gp_void );
   double gt[2];
   double A_out[2];
-  gtilde_kernel(x, gp, gt, A_out);
+  gtilde_kernel( x, gp, gt, A_out );
   *out = A_out[0];
 }
 
-static void A2_out(const double* x, const void* gp_void, double* out)
+static void A2_out( const double* x, const void* gp_void, double* out )
 {
-  const Gparams* gp = static_cast<const Gparams*>(gp_void);
+  const Gparams* gp = static_cast<const Gparams*>( gp_void );
   double gt[2];
   double A_out[2];
-  gtilde_kernel(x, gp, gt, A_out);
+  gtilde_kernel( x, gp, gt, A_out );
   *out = A_out[1];
 }
 
-void grad_gtilde1(const double* x, const Gparams* gp, double* dgt1_du) {
-    double dx[8] = {0.0};
-    double out = 0.0;
-    double dout = 1.0;
+void grad_gtilde1( const double* x, const Gparams* gp, double* dgt1_du )
+{
+  double dx[8] = { 0.0 };
+  double out = 0.0;
+  double dout = 1.0;
 
-    __enzyme_autodiff<void>((void*) gtilde1_out, enzyme_dup, x, dx, enzyme_const, (const void*)gp, enzyme_dup, &out, &dout);
+  __enzyme_autodiff<void>( (void*)gtilde1_out, enzyme_dup, x, dx, enzyme_const, (const void*)gp, enzyme_dup, &out,
+                           &dout );
 
-    for (int i = 0; i < 8; ++i) {
-        dgt1_du[i] = dx[i];
-    }
+  for ( int i = 0; i < 8; ++i ) {
+    dgt1_du[i] = dx[i];
+  }
 }
 
+void grad_gtilde2( const double* x, const Gparams* gp, double* dgt2_du )
+{
+  double dx[8] = { 0.0 };
+  double out = 0.0;
+  double dout = 1.0;
 
-void grad_gtilde2(const double*x, const Gparams* gp, double* dgt2_du) {
-    double dx[8] = {0.0};
-    double out = 0.0;
-    double dout = 1.0;
-
-    __enzyme_autodiff<void>((void*) gtilde2_out, enzyme_dup, x, dx, enzyme_const, (const void*)gp, enzyme_dup, &out, &dout);
+  __enzyme_autodiff<void>( (void*)gtilde2_out, enzyme_dup, x, dx, enzyme_const, (const void*)gp, enzyme_dup, &out,
+                           &dout );
 
-    for (int i = 0; i < 8; ++i) {
-        dgt2_du[i] = dx[i]; 
-    }
+  for ( int i = 0; i < 8; ++i ) {
+    dgt2_du[i] = dx[i];
+  }
 }
 
-void grad_A1(const double* x, const Gparams* gp, double* dA1_du) {
-    double dx[8] = {0.0};
-    double out = 0.0;
-    double dout = 1.0;
+void grad_A1( const double* x, const Gparams* gp, double* dA1_du )
+{
+  double dx[8] = { 0.0 };
+  double out = 0.0;
+  double dout = 1.0;
 
-    __enzyme_autodiff<void>((void*) A1_out, enzyme_dup, x, dx, enzyme_const, (const void*)gp, enzyme_dup, &out, &dout);
+  __enzyme_autodiff<void>( (void*)A1_out, enzyme_dup, x, dx, enzyme_const, (const void*)gp, enzyme_dup, &out, &dout );
 
-    for (int i = 0; i < 8; ++i) {
-        dA1_du[i] = dx[i];
-    }
+  for ( int i = 0; i < 8; ++i ) {
+    dA1_du[i] = dx[i];
+  }
 }
 
+void grad_A2( const double* x, const Gparams* gp, double* dA2_du )
+{
+  double dx[8] = { 0.0 };
+  double out = 0.0;
+  double dout = 1.0;
 
-void grad_A2(const double*x, const Gparams* gp, double* dA2_du) {
-    double dx[8] = {0.0};
-    double out = 0.0;
-    double dout = 1.0;
-
-    __enzyme_autodiff<void>((void*) A2_out, enzyme_dup, x, dx, enzyme_const, (const void*)gp, enzyme_dup, &out, &dout);
+  __enzyme_autodiff<void>( (void*)A2_out, enzyme_dup, x, dx, enzyme_const, (const void*)gp, enzyme_dup, &out, &dout );
 
-    for (int i = 0; i < 8; ++i) {
-        dA2_du[i] = dx[i]; 
-    }
+  for ( int i = 0; i < 8; ++i ) {
+    dA2_du[i] = dx[i];
+  }
 }
 
+void d2gtilde1( const double* x, const Gparams* gp, double* H1 )
+{
+  for ( int col = 0; col < 8; ++col ) {
+    double dx[8] = { 0.0 };
+    dx[col] = 1.0;
 
-void d2gtilde1(const double* x, const Gparams* gp, double* H1) {
-  for (int col = 0; col < 8; ++col) {
-    double dx[8] = {0.0};
-    dx[col] = 1.0;                
-
-    double grad[8]  = {0.0};       
-    double dgrad[8] = {0.0};       
+    double grad[8] = { 0.0 };
+    double dgrad[8] = { 0.0 };
 
-    __enzyme_fwddiff<void>((void*)grad_gtilde1, x, dx, enzyme_const, (const void*)gp, enzyme_dup, grad, dgrad);
+    __enzyme_fwddiff<void>( (void*)grad_gtilde1, x, dx, enzyme_const, (const void*)gp, enzyme_dup, grad, dgrad );
 
-    for (int row = 0; row < 8; ++row) {
-      H1[row*8 + col] = dgrad[row]; 
+    for ( int row = 0; row < 8; ++row ) {
+      H1[row * 8 + col] = dgrad[row];
     }
   }
 }
 
-void d2gtilde2(const double* x, const Gparams* gp, double* H2) {
-  for (int col = 0; col < 8; ++col) {
-    double dx[8] = {0.0};
-    dx[col] = 1.0;                
+void d2gtilde2( const double* x, const Gparams* gp, double* H2 )
+{
+  for ( int col = 0; col < 8; ++col ) {
+    double dx[8] = { 0.0 };
+    dx[col] = 1.0;
 
-    double grad[8]  = {0.0};       
-    double dgrad[8] = {0.0};       
+    double grad[8] = { 0.0 };
+    double dgrad[8] = { 0.0 };
 
-    __enzyme_fwddiff<void>((void*)grad_gtilde2, x, dx, enzyme_const, (const void*)gp, enzyme_dup, grad, dgrad);
+    __enzyme_fwddiff<void>( (void*)grad_gtilde2, x, dx, enzyme_const, (const void*)gp, enzyme_dup, grad, dgrad );
 
-    for (int row = 0; row < 8; ++row) {
-      H2[row*8 + col] = dgrad[row]; 
+    for ( int row = 0; row < 8; ++row ) {
+      H2[row * 8 + col] = dgrad[row];
     }
   }
 }
 
-void get_d2A1(const double* x, const Gparams* gp, double* H1) {
-  for (int col = 0; col < 8; ++col) {
-    double dx[8] = {0.0};
-    dx[col] = 1.0;                
+void get_d2A1( const double* x, const Gparams* gp, double* H1 )
+{
+  for ( int col = 0; col < 8; ++col ) {
+    double dx[8] = { 0.0 };
+    dx[col] = 1.0;
 
-    double grad[8]  = {0.0};       
-    double dgrad[8] = {0.0};       
+    double grad[8] = { 0.0 };
+    double dgrad[8] = { 0.0 };
 
-    __enzyme_fwddiff<void>((void*)grad_A1, x, dx, enzyme_const, (const void*)gp, enzyme_dup, grad, dgrad);
+    __enzyme_fwddiff<void>( (void*)grad_A1, x, dx, enzyme_const, (const void*)gp, enzyme_dup, grad, dgrad );
 
-    for (int row = 0; row < 8; ++row) {
-      H1[row*8 + col] = dgrad[row]; 
+    for ( int row = 0; row < 8; ++row ) {
+      H1[row * 8 + col] = dgrad[row];
     }
   }
 }
 
-void get_d2A2(const double* x, const Gparams* gp, double* H1) {
-  for (int col = 0; col < 8; ++col) {
-    double dx[8] = {0.0};
-    dx[col] = 1.0;                
+void get_d2A2( const double* x, const Gparams* gp, double* H1 )
+{
+  for ( int col = 0; col < 8; ++col ) {
+    double dx[8] = { 0.0 };
+    dx[col] = 1.0;
 
-    double grad[8]  = {0.0};       
-    double dgrad[8] = {0.0};       
+    double grad[8] = { 0.0 };
+    double dgrad[8] = { 0.0 };
 
-    __enzyme_fwddiff<void>((void*)grad_A2, x, dx, enzyme_const, (const void*)gp, enzyme_dup, grad, dgrad);
+    __enzyme_fwddiff<void>( (void*)grad_A2, x, dx, enzyme_const, (const void*)gp, enzyme_dup, grad, dgrad );
 
-    for (int row = 0; row < 8; ++row) {
-      H1[row*8 + col] = dgrad[row]; 
+    for ( int row = 0; row < 8; ++row ) {
+      H1[row * 8 + col] = dgrad[row];
     }
   }
 }
 
-}
-
-std::array<double, 2> ContactEvaluator::projections(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const {
-    double A0[2];
-    double A1[2];
-    endpoints(mesh1, pair.m_element_id1, A0, A1);
-    double B0[2];
-    double B1[2];
-    endpoints(mesh2, pair.m_element_id2, B0, B1);
-
-    double projs[2];
-    get_projections(A0, A1, B0, B1, projs);
-
-    // std::cout << "Projections: " << projs[0] << ", " << projs[1] << std::endl;
-    return {projs[0], projs[1]};
-                                                    }
+}  // namespace
 
+std::array<double, 2> ContactEvaluator::projections( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                                     const MeshData::Viewer& mesh2 ) const
+{
+  double A0[2];
+  double A1[2];
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  double B0[2];
+  double B1[2];
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
+
+  double projs[2];
+  get_projections( A0, A1, B0, B1, projs );
+
+  // std::cout << "Projections: " << projs[0] << ", " << projs[1] << std::endl;
+  return { projs[0], projs[1] };
+}
 
-std::array<double, 2> ContactSmoothing::bounds_from_projections(const std::array<double, 2>& proj) const {
-    double xi_min = std::min(proj[0], proj[1]);
-    double xi_max = std::max(proj[0], proj[1]);
+std::array<double, 2> ContactSmoothing::bounds_from_projections( const std::array<double, 2>& proj ) const
+{
+  double xi_min = std::min( proj[0], proj[1] );
+  double xi_max = std::max( proj[0], proj[1] );
 
-    const double del = p_.del;
+  const double del = p_.del;
 
-    if (xi_max < -0.5 - del) {
-        xi_max = -0.5 - del;
-    }
-    if(xi_min > 0.5 + del) {
-        xi_min  = 0.5 + del;
-    }
-    if (xi_min < -0.5 - del) { 
-        xi_min = -0.5 -del;
-    }
-    if (xi_max > 0.5 + del) {
-        xi_max = 0.5 + del;
-    }
+  if ( xi_max < -0.5 - del ) {
+    xi_max = -0.5 - del;
+  }
+  if ( xi_min > 0.5 + del ) {
+    xi_min = 0.5 + del;
+  }
+  if ( xi_min < -0.5 - del ) {
+    xi_min = -0.5 - del;
+  }
+  if ( xi_max > 0.5 + del ) {
+    xi_max = 0.5 + del;
+  }
 
-    return {xi_min, xi_max}; 
+  return { xi_min, xi_max };
 }
 
-
-std::array<double, 2> ContactSmoothing::smooth_bounds(const std::array<double, 2>& bounds) const {
-    std::array<double, 2> smooth_bounds;
-    const double del = p_.del;
-        for (int i = 0; i < 2; ++i) {
-        double xi = 0.0;
-        double xi_hat = 0.0;
-        xi = bounds[i] + 0.5;
-        if (0.0 - del <= xi && xi <= del) {
-            xi_hat = (1.0/(4*del)) * (xi*xi) + 0.5 * xi + del/4.0;
-            // std::cout << "zone1" << std::endl;
-        }
-        else if((1.0 - del) <= xi && xi <= 1.0 + del) {
-            // std::cout << "Zone 2: " << std::endl;
-        double b = -1.0/(4.0*del);
-        double c = 0.5 + 1.0/(2.0*del);
-        double d = 1.0 - del + (1.0/(4.0*del)) * pow(1.0-del, 2) - 0.5*(1.0-del) - (1.0-del)/(2.0*del);
-
-        xi_hat = b*xi*xi + c*xi + d;
-                }
-        else if(del <= xi && xi <= (1.0 - del)) { 
-            xi_hat = xi;
-            // std::cout << "zone3" << std::endl;
-        } 
-      smooth_bounds[i] = xi_hat - 0.5;
-    //   std::cout << "Smooth Bounds: " << smooth_bounds[i] << std::endl;
+std::array<double, 2> ContactSmoothing::smooth_bounds( const std::array<double, 2>& bounds ) const
+{
+  std::array<double, 2> smooth_bounds;
+  const double del = p_.del;
+  for ( int i = 0; i < 2; ++i ) {
+    double xi = 0.0;
+    double xi_hat = 0.0;
+    xi = bounds[i] + 0.5;
+    if ( 0.0 - del <= xi && xi <= del ) {
+      xi_hat = ( 1.0 / ( 4 * del ) ) * ( xi * xi ) + 0.5 * xi + del / 4.0;
+      // std::cout << "zone1" << std::endl;
+    } else if ( ( 1.0 - del ) <= xi && xi <= 1.0 + del ) {
+      // std::cout << "Zone 2: " << std::endl;
+      double b = -1.0 / ( 4.0 * del );
+      double c = 0.5 + 1.0 / ( 2.0 * del );
+      double d = 1.0 - del + ( 1.0 / ( 4.0 * del ) ) * pow( 1.0 - del, 2 ) - 0.5 * ( 1.0 - del ) -
+                 ( 1.0 - del ) / ( 2.0 * del );
+
+      xi_hat = b * xi * xi + c * xi + d;
+    } else if ( del <= xi && xi <= ( 1.0 - del ) ) {
+      xi_hat = xi;
+      // std::cout << "zone3" << std::endl;
     }
-    
+    smooth_bounds[i] = xi_hat - 0.5;
+    //   std::cout << "Smooth Bounds: " << smooth_bounds[i] << std::endl;
+  }
 
-    return smooth_bounds;
+  return smooth_bounds;
 }
 
-QuadPoints ContactEvaluator::compute_quadrature(const std::array<double, 2>& xi_bounds) const {
-    const int N = p_.N;
-    QuadPoints out;
-    out.qp.resize(N);
-    out.w.resize(N);
-
-    std::vector<double> qpoints(N); 
-    std::vector<double> weights(N);
+QuadPoints ContactEvaluator::compute_quadrature( const std::array<double, 2>& xi_bounds ) const
+{
+  const int N = p_.N;
+  QuadPoints out;
+  out.qp.resize( N );
+  out.w.resize( N );
 
-    determine_legendre_nodes(N, qpoints);
-    determine_legendre_weights(N, weights);
+  std::vector<double> qpoints( N );
+  std::vector<double> weights( N );
 
-    const double xi_min = xi_bounds[0];
-    const double xi_max = xi_bounds[1];
-    const double J = 0.5 * (xi_max - xi_min);
+  determine_legendre_nodes( N, qpoints );
+  determine_legendre_weights( N, weights );
 
-    for (int i = 0; i < N; ++i){
-        out.qp[i] = 0.5 * (xi_max - xi_min) * qpoints[i] + 0.5 * (xi_max + xi_min);
-        out.w[i] = weights[i] * J;
-    }
+  const double xi_min = xi_bounds[0];
+  const double xi_max = xi_bounds[1];
+  const double J = 0.5 * ( xi_max - xi_min );
 
-        // Print quadrature points
-    // std::cout << "Quad points: ";
-    // for (int i = 0; i < N; ++i) {
-    //     // std::cout << out.qp[i] << " ";
-    // }
-    // // std::cout << std::endl;
+  for ( int i = 0; i < N; ++i ) {
+    out.qp[i] = 0.5 * ( xi_max - xi_min ) * qpoints[i] + 0.5 * ( xi_max + xi_min );
+    out.w[i] = weights[i] * J;
+  }
 
+  // Print quadrature points
+  // std::cout << "Quad points: ";
+  // for (int i = 0; i < N; ++i) {
+  //     // std::cout << out.qp[i] << " ";
+  // }
+  // // std::cout << std::endl;
 
-    return out;
+  return out;
 }
 
-double ContactEvaluator::gap(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, double xiA) const {
-    double A0[2], A1[2], B0[2], B1[2];
+double ContactEvaluator::gap( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                              double xiA ) const
+{
+  double A0[2], A1[2], B0[2], B1[2];
 
-    endpoints(mesh1, pair.m_element_id1, A0, A1);
-    endpoints(mesh2, pair.m_element_id2, B0, B1);
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
 
-    double nA[2] = {0.0};
-    double nB[2] = {0.0};
-    find_normal(A0, A1, nA);
-    find_normal(B0, B1, nB);
+  double nA[2] = { 0.0 };
+  double nB[2] = { 0.0 };
+  find_normal( A0, A1, nA );
+  find_normal( B0, B1, nB );
 
+  double x1[2] = { 0.0 };
+  iso_map( A0, A1, xiA, x1 );
 
-    double x1[2] = {0.0};
-    iso_map(A0, A1, xiA, x1);
+  // std::cout << "x1: " << x1[0] << ", " << x1[1] << std::endl;
 
-    // std::cout << "x1: " << x1[0] << ", " << x1[1] << std::endl;
-    
-    double x2[2] = {0.0};
-    find_intersection(B0, B1, x1, nB, x2);
+  double x2[2] = { 0.0 };
+  find_intersection( B0, B1, x1, nB, x2 );
 
-    double dx = x1[0] - x2[0];
-    double dy = x1[1] - x2[1];
+  double dx = x1[0] - x2[0];
+  double dy = x1[1] - x2[1];
 
-    double gn = -(dx * nB[0] + dy * nB[1]); //signed normal gap
-    // std::cout << "gap: " << gn << std::endl;
-    double dot = nB[0] * nA[0] + nB[1] * nA[1];
-    double eta = (dot < 0) ? dot:0.0;
+  double gn = -( dx * nB[0] + dy * nB[1] );  // signed normal gap
+  // std::cout << "gap: " << gn << std::endl;
+  double dot = nB[0] * nA[0] + nB[1] * nA[1];
+  double eta = ( dot < 0 ) ? dot : 0.0;
 
-    // std::cout << "GAP: " << gn << "  eta = " << eta << " smooth gap = " << gn * eta << std::endl;
+  // std::cout << "GAP: " << gn << "  eta = " << eta << " smooth gap = " << gn * eta << std::endl;
 
-    return gn * eta; 
+  return gn * eta;
 }
 
+NodalContactData ContactEvaluator::compute_nodal_contact_data( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                                               const MeshData::Viewer& mesh2 ) const
+{
+  double A0[2];
+  double A1[2];
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
 
-NodalContactData ContactEvaluator:: compute_nodal_contact_data(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const {
-    double A0[2];
-    double A1[2];
-    endpoints(mesh1, pair.m_element_id1, A0, A1);
+  double J = std::sqrt( ( std::pow( ( A1[0] - A0[0] ), 2 ) + std::pow( ( A1[1] - A0[1] ), 2 ) ) );
+  double J_ref = std::sqrt( std::pow( A1[0] - A0[0], 2 ) + std::pow( A1[1] - A0[1], 2 ) );
+  // double J_ref = std::sqrt((std::pow((1.0 - 0.0), 2) + std::pow((-0.5 + 0.5), 2)));
 
-    double J = std::sqrt((std::pow((A1[0] - A0[0]),2) + std::pow((A1[1] - A0[1]),2)));
-    double J_ref = std::sqrt(std::pow(A1[0] - A0[0], 2) + 
-                             std::pow(A1[1] - A0[1], 2));
-    // double J_ref = std::sqrt((std::pow((1.0 - 0.0), 2) + std::pow((-0.5 + 0.5), 2)));
+  auto projs = projections( pair, mesh1, mesh2 );
 
-    auto projs = projections(pair, mesh1, mesh2);
+  auto bounds = smoother_.bounds_from_projections( projs );
+  auto smooth_bounds = smoother_.smooth_bounds( bounds );
 
-    auto bounds = smoother_.bounds_from_projections(projs);
-    auto smooth_bounds = smoother_.smooth_bounds(bounds);
+  auto qp = compute_quadrature( smooth_bounds );
+  auto qp_full = compute_quadrature( { -0.5, 0.5 } );  // for Ai
 
-    auto qp = compute_quadrature(smooth_bounds);
-    auto qp_full = compute_quadrature({-0.5, 0.5}); //for Ai
+  double g_tilde1 = 0.0;
+  double g_tilde2 = 0.0;
+  double AI_1 = 0.0;
+  double AI_2 = 0.0;
 
-    double g_tilde1 = 0.0;
-    double g_tilde2 = 0.0;
-    double AI_1 = 0.0;
-    double AI_2 = 0.0;
+  // for (size_t i = 0; i < qp_full.qp.size(); ++i) {
+  //     double xiA_full = qp_full.qp[i];
+  //     double w_full = qp_full.w[i];
+  //     double N1_full = 0.5 - xiA_full;
+  //     double N2_full = 0.5 + xiA_full;
 
-    // for (size_t i = 0; i < qp_full.qp.size(); ++i) {
-    //     double xiA_full = qp_full.qp[i];
-    //     double w_full = qp_full.w[i];
-    //     double N1_full = 0.5 - xiA_full;
-    //     double N2_full = 0.5 + xiA_full;
-        
-    //     AI_1 += w_full * N1_full * J_ref;
-    //     AI_2 += w_full * N2_full * J_ref;
-    // }
+  //     AI_1 += w_full * N1_full * J_ref;
+  //     AI_2 += w_full * N2_full * J_ref;
+  // }
 
-    for (size_t i = 0; i < qp.qp.size(); ++i) {
-        double xiA = qp.qp[i];
-        double w = qp.w[i];
-        // double w_full = qp_full.w[i];
-        // double xiA_full = qp_full.qp[i];
-        
-        // std::cout << "xiA: " << xiA << std::endl;
+  for ( size_t i = 0; i < qp.qp.size(); ++i ) {
+    double xiA = qp.qp[i];
+    double w = qp.w[i];
+    // double w_full = qp_full.w[i];
+    // double xiA_full = qp_full.qp[i];
 
-        double N1 = 0.5 - xiA;
-        double N2 = 0.5 + xiA;
+    // std::cout << "xiA: " << xiA << std::endl;
 
-        // double N1_full = 0.5 - xiA_full;
-        // double N2_full = 0.5 + xiA_full;
+    double N1 = 0.5 - xiA;
+    double N2 = 0.5 + xiA;
 
-        double  gn = gap(pair, mesh1, mesh2, xiA);
-        // double gn_active = (gn < 0.0) ? gn : 0.0;
-        double gn_active = gn;
-        // std::cout << "gap: " << gn << std::endl;
+    // double N1_full = 0.5 - xiA_full;
+    // double N2_full = 0.5 + xiA_full;
 
-        g_tilde1 += w * N1 * gn_active * J;
-        g_tilde2 += w * N2 * gn_active * J;
-        // double G = g_tilde1 + g_tilde2; 
-        // std::cout << "G: " << G << std::endl;
+    double gn = gap( pair, mesh1, mesh2, xiA );
+    // double gn_active = (gn < 0.0) ? gn : 0.0;
+    double gn_active = gn;
+    // std::cout << "gap: " << gn << std::endl;
 
-        // std::cout << "G~1: " << g_tilde1 << ", G~2:" << g_tilde2 << std::endl; 
+    g_tilde1 += w * N1 * gn_active * J;
+    g_tilde2 += w * N2 * gn_active * J;
+    // double G = g_tilde1 + g_tilde2;
+    // std::cout << "G: " << G << std::endl;
 
+    // std::cout << "G~1: " << g_tilde1 << ", G~2:" << g_tilde2 << std::endl;
 
-        AI_1 += w * N1 * J_ref;   
-        AI_2 += w * N2 * J_ref;  
-        // std::cout <<  AI_1 << ","<<  AI_2 << std::endl; 
-    }
-    // std::cout <<  AI_1 << ","<<  AI_2 << std::endl; 
-    // std::cout << "A: " << AI_1 << ", " << AI_2 << std::endl;
-    // std::cout <<  g_tilde1 << ","<<  g_tilde2 << std::endl;
+    AI_1 += w * N1 * J_ref;
+    AI_2 += w * N2 * J_ref;
+    // std::cout <<  AI_1 << ","<<  AI_2 << std::endl;
+  }
+  // std::cout <<  AI_1 << ","<<  AI_2 << std::endl;
+  // std::cout << "A: " << AI_1 << ", " << AI_2 << std::endl;
+  // std::cout <<  g_tilde1 << ","<<  g_tilde2 << std::endl;
 
-    NodalContactData contact_data;
+  NodalContactData contact_data;
 
-    contact_data.AI = {AI_1, AI_2};
-    contact_data.g_tilde = {g_tilde1, g_tilde2};
-    // double g1 = g_tilde1 / AI_1;
-    // double g2 = g_tilde2 / AI_2;
-    // // std::cout <<  g1 << ","<<  g2 << std::endl;
+  contact_data.AI = { AI_1, AI_2 };
+  contact_data.g_tilde = { g_tilde1, g_tilde2 };
+  // double g1 = g_tilde1 / AI_1;
+  // double g2 = g_tilde2 / AI_2;
+  // // std::cout <<  g1 << ","<<  g2 << std::endl;
 
-    // //KKT Conditons
-    // double p1 = (g1 < 0.0) ? p_.k * g1 : 0.0;
-    // double p2 = (g2 < 0.0) ? p_.k * g2 : 0.0;
+  // //KKT Conditons
+  // double p1 = (g1 < 0.0) ? p_.k * g1 : 0.0;
+  // double p2 = (g2 < 0.0) ? p_.k * g2 : 0.0;
 
-    // NodalContactData contact_data;
+  // NodalContactData contact_data;
 
-    // contact_data.pressures = {p1, p2};
-    // contact_data.g_tilde = {g_tilde1, g_tilde2};
+  // contact_data.pressures = {p1, p2};
+  // contact_data.g_tilde = {g_tilde1, g_tilde2};
 
-    return contact_data;
+  return contact_data;
 }
 
-std::array<double, 2> ContactEvaluator::compute_pressures(const NodalContactData& ncd) const {
-    double gt1 = ncd.g_tilde[0];
-    double gt2 = ncd.g_tilde[1];
-
-    // std::cout << "gt: " << gt1 << ", " << gt2 << std::endl;
+std::array<double, 2> ContactEvaluator::compute_pressures( const NodalContactData& ncd ) const
+{
+  double gt1 = ncd.g_tilde[0];
+  double gt2 = ncd.g_tilde[1];
 
+  // std::cout << "gt: " << gt1 << ", " << gt2 << std::endl;
 
-    double A1 = ncd.AI[0];
-    double A2 = ncd.AI[1];
+  double A1 = ncd.AI[0];
+  double A2 = ncd.AI[1];
 
-    double g1 = gt1/A1;
-    double g2 = gt2/A2;
+  double g1 = gt1 / A1;
+  double g2 = gt2 / A2;
 
-    // //KKT Conditons
-    double p1 = (g1 < 0.0) ? p_.k * g1 : 0.0;
-    double p2 = (g2 < 0.0) ? p_.k * g2 : 0.0;
-    std::array<double, 2> pressures;
+  // //KKT Conditons
+  double p1 = ( g1 < 0.0 ) ? p_.k * g1 : 0.0;
+  double p2 = ( g2 < 0.0 ) ? p_.k * g2 : 0.0;
+  std::array<double, 2> pressures;
 
-    pressures = {p1, p2};
+  pressures = { p1, p2 };
 
-    for (int i = 0; i < 2; ++i) {
-        if (ncd.AI[i] < 1e-12) {
-            pressures[i] = 0.0;
-        }
+  for ( int i = 0; i < 2; ++i ) {
+    if ( ncd.AI[i] < 1e-12 ) {
+      pressures[i] = 0.0;
     }
-    // std::cout << "pressures: " << pressures[0] << ", " << pressures[1] << std::endl;
+  }
+  // std::cout << "pressures: " << pressures[0] << ", " << pressures[1] << std::endl;
 
-    return pressures;
+  return pressures;
 }
 
-double ContactEvaluator::compute_contact_energy(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const {
-    NodalContactData contact_data;
-    contact_data = compute_nodal_contact_data(pair, mesh1, mesh2);
-
-    std::array<double, 2> pressures;
-    pressures = compute_pressures(contact_data);
+double ContactEvaluator::compute_contact_energy( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                                 const MeshData::Viewer& mesh2 ) const
+{
+  NodalContactData contact_data;
+  contact_data = compute_nodal_contact_data( pair, mesh1, mesh2 );
 
+  std::array<double, 2> pressures;
+  pressures = compute_pressures( contact_data );
 
-    double contact_energy = pressures[0] * contact_data.g_tilde[0] + pressures[1] * contact_data.g_tilde[1];
-    return contact_energy;
+  double contact_energy = pressures[0] * contact_data.g_tilde[0] + pressures[1] * contact_data.g_tilde[1];
+  return contact_energy;
 }
 
-void ContactEvaluator::gtilde_and_area(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, double gtilde[2], double area[2]) const {
-    auto ncd = compute_nodal_contact_data(pair, mesh1, mesh2);
-    gtilde[0] = ncd.g_tilde[0];
-    gtilde[1] = ncd.g_tilde[1];
-    area[0] = ncd.AI[0];
-    area[1] = ncd.AI[1];
+void ContactEvaluator::gtilde_and_area( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                        const MeshData::Viewer& mesh2, double gtilde[2], double area[2] ) const
+{
+  auto ncd = compute_nodal_contact_data( pair, mesh1, mesh2 );
+  gtilde[0] = ncd.g_tilde[0];
+  gtilde[1] = ncd.g_tilde[1];
+  area[0] = ncd.AI[0];
+  area[1] = ncd.AI[1];
 }
 
-void ContactEvaluator::grad_gtilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, double dgt1_dx[8], double dgt2_dx[8]) const {
-    double A0[2], A1[2], B0[2], B1[2];
-    
+void ContactEvaluator::grad_gtilde( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                    const MeshData::Viewer& mesh2, double dgt1_dx[8], double dgt2_dx[8] ) const
+{
+  double A0[2], A1[2], B0[2], B1[2];
 
-    endpoints(mesh1, pair.m_element_id1, A0, A1);
-    endpoints(mesh2, pair.m_element_id2, B0, B1);
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
 
-    double x[8] = {A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1]};
+  double x[8] = { A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1] };
 
-    double nB[2], nA[2];
-    find_normal(B0, B1, nB);
-    find_normal(A0, A1, nA);
+  double nB[2], nA[2];
+  find_normal( B0, B1, nB );
+  find_normal( A0, A1, nA );
 
-    // double dot = nB[0] * nA[0] + nB[1] * nA[1];
-    // double eta = (dot < 0) ? dot:0.0;
+  // double dot = nB[0] * nA[0] + nB[1] * nA[1];
+  // double eta = (dot < 0) ? dot:0.0;
 
-    auto projs = projections(pair, mesh1, mesh2);
+  auto projs = projections( pair, mesh1, mesh2 );
 
-    auto bounds = smoother_.bounds_from_projections(projs);
-    auto smooth_bounds = smoother_.smooth_bounds(bounds);
+  auto bounds = smoother_.bounds_from_projections( projs );
+  auto smooth_bounds = smoother_.smooth_bounds( bounds );
 
-    auto qp = compute_quadrature(smooth_bounds);
+  auto qp = compute_quadrature( smooth_bounds );
 
-    const int N = static_cast<int>(qp.qp.size());
+  const int N = static_cast<int>( qp.qp.size() );
 
-    std::vector<double> x2(2 * N);
+  std::vector<double> x2( 2 * N );
 
-    for (int i = 0; i < N; ++i) {
-        double x1[2] = {0.0};
-        iso_map(A0, A1, qp.qp[i], x1);
-        double x2_i[2] = {0.0};
-        find_intersection(B0, B1, x1, nB, x2_i);
-        x2[2*i] = x2_i[0];
-        x2[2*i+1] = x2_i[1];
-    }
+  for ( int i = 0; i < N; ++i ) {
+    double x1[2] = { 0.0 };
+    iso_map( A0, A1, qp.qp[i], x1 );
+    double x2_i[2] = { 0.0 };
+    find_intersection( B0, B1, x1, nB, x2_i );
+    x2[2 * i] = x2_i[0];
+    x2[2 * i + 1] = x2_i[1];
+  }
 
-    Gparams gp;
-    gp.N = N;
-    gp.qp = qp.qp.data();
-    gp.w = qp.w.data();
-    gp.x2 =x2.data();
-    // gp.nB[0] = nB[0];
-    // gp.nB[1] = nB[1];
-    // gp.eta = eta;
-    // gp.del = p_.del;
-
-    double dg1_du[8] = {0.0};
-    double dg2_du[8] = {0.0};
-
-    grad_gtilde1(x, &gp, dg1_du);
-    grad_gtilde2(x, &gp, dg2_du);
-
-    for (int i = 0; i < 8; ++i) {
-  dgt1_dx[i] = dg1_du[i];
-  dgt2_dx[i] = dg2_du[i];
-}
+  Gparams gp;
+  gp.N = N;
+  gp.qp = qp.qp.data();
+  gp.w = qp.w.data();
+  gp.x2 = x2.data();
+  // gp.nB[0] = nB[0];
+  // gp.nB[1] = nB[1];
+  // gp.eta = eta;
+  // gp.del = p_.del;
+
+  double dg1_du[8] = { 0.0 };
+  double dg2_du[8] = { 0.0 };
+
+  grad_gtilde1( x, &gp, dg1_du );
+  grad_gtilde2( x, &gp, dg2_du );
+
+  for ( int i = 0; i < 8; ++i ) {
+    dgt1_dx[i] = dg1_du[i];
+    dgt2_dx[i] = dg2_du[i];
+  }
 }
 
-void ContactEvaluator::grad_trib_area(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, double dA1_dx[8], double dA2_dx[8]) const {
-    double A0[2], A1[2], B0[2], B1[2];
-
-    endpoints(mesh1, pair.m_element_id1, A0, A1);
-    endpoints(mesh2, pair.m_element_id2, B0, B1);
+void ContactEvaluator::grad_trib_area( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                       const MeshData::Viewer& mesh2, double dA1_dx[8], double dA2_dx[8] ) const
+{
+  double A0[2], A1[2], B0[2], B1[2];
 
-    double x[8] = {A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1]};
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
 
-    double nB[2], nA[2];
-    find_normal(B0, B1, nB);
-    find_normal(A0, A1, nA);
+  double x[8] = { A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1] };
 
-    // double dot = nB[0] * nA[0] + nB[1] * nA[1];
-    // double eta = (dot < 0) ? dot:0.0;
+  double nB[2], nA[2];
+  find_normal( B0, B1, nB );
+  find_normal( A0, A1, nA );
 
-    auto projs = projections(pair, mesh1, mesh2);
+  // double dot = nB[0] * nA[0] + nB[1] * nA[1];
+  // double eta = (dot < 0) ? dot:0.0;
 
-    auto bounds = smoother_.bounds_from_projections(projs);
-    auto smooth_bounds = smoother_.smooth_bounds(bounds);
+  auto projs = projections( pair, mesh1, mesh2 );
 
-    auto qp = compute_quadrature(smooth_bounds);
+  auto bounds = smoother_.bounds_from_projections( projs );
+  auto smooth_bounds = smoother_.smooth_bounds( bounds );
 
-    const int N = static_cast<int>(qp.qp.size());
+  auto qp = compute_quadrature( smooth_bounds );
 
-    std::vector<double> x2(2 * N);
+  const int N = static_cast<int>( qp.qp.size() );
 
-    for (int i = 0; i < N; ++i) {
-        double x1[2] = {0.0};
-        iso_map(A0, A1, qp.qp[i], x1);
-        double x2_i[2] = {0.0};
-        find_intersection(B0, B1, x1, nB, x2_i);
-        x2[2*i] = x2_i[0];
-        x2[2*i+1] = x2_i[1];
-    }
+  std::vector<double> x2( 2 * N );
 
+  for ( int i = 0; i < N; ++i ) {
+    double x1[2] = { 0.0 };
+    iso_map( A0, A1, qp.qp[i], x1 );
+    double x2_i[2] = { 0.0 };
+    find_intersection( B0, B1, x1, nB, x2_i );
+    x2[2 * i] = x2_i[0];
+    x2[2 * i + 1] = x2_i[1];
+  }
 
-    Gparams gp;
-    gp.N = N;
-    gp.qp = qp.qp.data();
-    gp.w = qp.w.data();
-    gp.x2 =x2.data();
-
-    grad_A1(x, &gp, dA1_dx);
-    grad_A2(x, &gp, dA2_dx);
+  Gparams gp;
+  gp.N = N;
+  gp.qp = qp.qp.data();
+  gp.w = qp.w.data();
+  gp.x2 = x2.data();
 
+  grad_A1( x, &gp, dA1_dx );
+  grad_A2( x, &gp, dA2_dx );
 }
 
+std::array<double, 8> ContactEvaluator::compute_contact_forces( const InterfacePair& pair,
+                                                                const MeshData::Viewer& mesh1,
+                                                                const MeshData::Viewer& mesh2 ) const
+{
+  double dg_tilde1[8] = { 0.0 };
+  double dg_tilde2[8] = { 0.0 };
+  double dA1[8] = { 0.0 };
+  double dA2[8] = { 0.0 };
+  std::array<double*, 2> dg_t;
+  std::array<double*, 2> dA_I;
+  dg_t = { dg_tilde1, dg_tilde2 };
+  dA_I = { dA1, dA2 };
+
+  grad_gtilde( pair, mesh1, mesh2, dg_tilde1, dg_tilde2 );
+  grad_trib_area( pair, mesh1, mesh2, dA1, dA2 );
+
+  NodalContactData ncd;
+  ncd = compute_nodal_contact_data( pair, mesh1, mesh2 );
+  // std::cout << "A: " << ncd.AI[0] << ", " << ncd.AI[1] << std::endl;
+  // std::cout << "g: " << ncd.g_tilde[0] << ", " << ncd.g_tilde[1] << std::endl;
+
+  std::array<double, 2> pressures;
+  pressures = compute_pressures( ncd );
+  // std::cout << "Pressures: " << pressures[0] << ", " << pressures[1] << std::endl;
+
+  std::array<double, 8> f = { 0.0 };
+
+  for ( int i = 0; i < 8; ++i ) {
+    for ( int j = 0; j < 2; ++j ) {
+      double g = 0.0;
+      g = ncd.g_tilde[j] / ncd.AI[j];
+      if ( ncd.AI[j] < 1e-12 ) {
+        g = 0.0;
+      }
+      f[i] += ( 2 * pressures[j] * dg_t[j][i] - pressures[j] * g * dA_I[j][i] );
+    }
+  }
+  return f;
+}
 
-std::array<double, 8> ContactEvaluator::compute_contact_forces(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const {
-    double dg_tilde1[8] = {0.0};
-    double dg_tilde2[8] = {0.0};
-    double dA1[8] = {0.0};
-    double dA2[8] = {0.0};
-    std::array<double*, 2> dg_t;
-    std::array<double*, 2> dA_I;
-    dg_t = {dg_tilde1, dg_tilde2};
-    dA_I = {dA1, dA2};
-
-
+void ContactEvaluator::d2_g2tilde( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                   const MeshData::Viewer& mesh2, double H1[64], double H2[64] ) const
+{
+  double A0[2], A1[2], B0[2], B1[2];
 
-    grad_gtilde(pair, mesh1, mesh2, dg_tilde1, dg_tilde2);
-    grad_trib_area(pair, mesh1, mesh2, dA1, dA2); 
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
 
-    NodalContactData ncd;
-    ncd = compute_nodal_contact_data(pair, mesh1, mesh2);
-    // std::cout << "A: " << ncd.AI[0] << ", " << ncd.AI[1] << std::endl;
-    // std::cout << "g: " << ncd.g_tilde[0] << ", " << ncd.g_tilde[1] << std::endl;
+  double x[8] = { A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1] };
 
+  double nB[2], nA[2];
+  find_normal( B0, B1, nB );
+  find_normal( A0, A1, nA );
 
-    std::array<double, 2> pressures;
-    pressures = compute_pressures(ncd);
-    // std::cout << "Pressures: " << pressures[0] << ", " << pressures[1] << std::endl;
+  // double dot = nB[0] * nA[0] + nB[1] * nA[1];
+  // double eta = (dot < 0) ? dot:0.0;
 
-    std::array<double,  8> f = {0.0};
+  auto projs = projections( pair, mesh1, mesh2 );
+  auto bounds = smoother_.bounds_from_projections( projs );
+  auto smooth_bounds = smoother_.smooth_bounds( bounds );
 
-    for(int i = 0; i < 8; ++i) {
-        for (int j = 0; j < 2; ++j) {
-            double g = 0.0;
-            g = ncd.g_tilde[j] / ncd.AI[j];
-            if (ncd.AI[j] < 1e-12) {
-                g = 0.0;
-            }
-            f[i] += (2*pressures[j]*dg_t[j][i] - pressures[j] * g * dA_I[j][i]);
-            
-        }
-    }
-    return f;
-}
+  auto qp = compute_quadrature( smooth_bounds );
 
-void ContactEvaluator::d2_g2tilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, double H1[64], double H2[64]) const {
-    double A0[2], A1[2], B0[2], B1[2];
+  const int N = static_cast<int>( qp.qp.size() );
+  std::vector<double> x2( 2 * N );
 
-    endpoints(mesh1, pair.m_element_id1, A0, A1);
-    endpoints(mesh2, pair.m_element_id2, B0, B1);
-
-    double x[8] = {A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1]};
-
-    double nB[2], nA[2];
-    find_normal(B0, B1, nB);
-    find_normal(A0, A1, nA);
-
-    // double dot = nB[0] * nA[0] + nB[1] * nA[1];
-    // double eta = (dot < 0) ? dot:0.0;
+  for ( int i = 0; i < N; ++i ) {
+    double x1[2] = { 0.0 };
+    iso_map( A0, A1, qp.qp[i], x1 );
+    double x2_i[2] = { 0.0 };
+    find_intersection( B0, B1, x1, nB, x2_i );
+    x2[2 * i] = x2_i[0];
+    x2[2 * i + 1] = x2_i[1];
+  }
 
-    auto projs = projections(pair, mesh1, mesh2);
-    auto bounds = smoother_.bounds_from_projections(projs);
-    auto smooth_bounds = smoother_.smooth_bounds(bounds);
+  Gparams gp;
+  gp.N = N;
+  gp.qp = qp.qp.data();
+  gp.w = qp.w.data();
+  gp.x2 = x2.data();
 
-    auto qp = compute_quadrature(smooth_bounds);
+  double d2g1_d2u[64] = { 0.0 };
+  double d2g2_d2u[64] = { 0.0 };
 
-    const int N = static_cast<int>(qp.qp.size());
-    std::vector<double> x2(2 * N);
+  d2gtilde1( x, &gp, d2g1_d2u );
+  d2gtilde2( x, &gp, d2g2_d2u );
 
-    for (int i = 0; i < N; ++i) {
-        double x1[2] = {0.0};
-        iso_map(A0, A1, qp.qp[i], x1);
-        double x2_i[2] = {0.0};
-        find_intersection(B0, B1, x1, nB, x2_i);
-        x2[2*i] = x2_i[0];
-        x2[2*i+1] = x2_i[1];
-    }
-    
-    Gparams gp;
-    gp.N = N;
-    gp.qp = qp.qp.data();
-    gp.w = qp.w.data();
-    gp.x2 =x2.data();
-
-    double d2g1_d2u[64] = {0.0};
-    double d2g2_d2u[64] = {0.0};
-
-    d2gtilde1(x, &gp, d2g1_d2u);
-    d2gtilde2(x, &gp, d2g2_d2u);
-
-    for (int i = 0; i < 64; ++i) {
-        H1[i] = d2g1_d2u[i];
-        H2[i] = d2g2_d2u[i];
-    }
+  for ( int i = 0; i < 64; ++i ) {
+    H1[i] = d2g1_d2u[i];
+    H2[i] = d2g2_d2u[i];
+  }
 }
 
-void ContactEvaluator::compute_d2A_d2u(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, double d2A1[64], double d2A2[64]) const {
-    double A0[2], A1[2], B0[2], B1[2];
+void ContactEvaluator::compute_d2A_d2u( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                        const MeshData::Viewer& mesh2, double d2A1[64], double d2A2[64] ) const
+{
+  double A0[2], A1[2], B0[2], B1[2];
 
-    endpoints(mesh1, pair.m_element_id1, A0, A1);
-    endpoints(mesh2, pair.m_element_id2, B0, B1);
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
 
-    double x[8] = {A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1]};
+  double x[8] = { A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1] };
 
-    double nB[2], nA[2];
+  double nB[2], nA[2];
 
-    find_normal(B0, B1, nB);
-    find_normal(A0, A1, nA);
+  find_normal( B0, B1, nB );
+  find_normal( A0, A1, nA );
 
-    // double dot = nB[0] * nA[0] + nB[1] * nA[1];
+  // double dot = nB[0] * nA[0] + nB[1] * nA[1];
 
-    auto projs = projections(pair, mesh1, mesh2);
-    auto bounds = smoother_.bounds_from_projections(projs);
-    auto smooth_bounds = smoother_.smooth_bounds(bounds);
+  auto projs = projections( pair, mesh1, mesh2 );
+  auto bounds = smoother_.bounds_from_projections( projs );
+  auto smooth_bounds = smoother_.smooth_bounds( bounds );
 
-    auto qp = compute_quadrature(smooth_bounds);
+  auto qp = compute_quadrature( smooth_bounds );
 
-    const int N = static_cast<int>(qp.qp.size());
-    std::vector<double> x2(2 * N);
+  const int N = static_cast<int>( qp.qp.size() );
+  std::vector<double> x2( 2 * N );
 
-    for (int i = 0; i < N; ++i) {
-        double x1[2] = {0.0};
-        iso_map(A0, A1, qp.qp[i], x1);
-        double x2_i[2] = {0.0};
-        find_intersection(B0, B1, x1, nB, x2_i);
-        x2[2*i] = x2_i[0];
-        x2[2*i+1] = x2_i[1];
-    }
+  for ( int i = 0; i < N; ++i ) {
+    double x1[2] = { 0.0 };
+    iso_map( A0, A1, qp.qp[i], x1 );
+    double x2_i[2] = { 0.0 };
+    find_intersection( B0, B1, x1, nB, x2_i );
+    x2[2 * i] = x2_i[0];
+    x2[2 * i + 1] = x2_i[1];
+  }
 
-        Gparams gp;
-    gp.N = N;
-    gp.qp = qp.qp.data();
-    gp.w = qp.w.data();
-    gp.x2 =x2.data();
+  Gparams gp;
+  gp.N = N;
+  gp.qp = qp.qp.data();
+  gp.w = qp.w.data();
+  gp.x2 = x2.data();
 
-    double d2A1_d2u[64] = {0.0};
-    double d2A2_d2u[64] = {0.0};
+  double d2A1_d2u[64] = { 0.0 };
+  double d2A2_d2u[64] = { 0.0 };
 
-    get_d2A1(x, &gp, d2A1_d2u);
-    get_d2A2(x, &gp, d2A2_d2u);
+  get_d2A1( x, &gp, d2A1_d2u );
+  get_d2A2( x, &gp, d2A2_d2u );
 
-    for (int i = 0; i < 64; ++i) {
-        d2A1[i] = d2A1_d2u[i];
-        d2A2[i] = d2A2_d2u[i];
-    }
+  for ( int i = 0; i < 64; ++i ) {
+    d2A1[i] = d2A1_d2u[i];
+    d2A2[i] = d2A2_d2u[i];
+  }
 }
 
-std::array<std::array<double, 8>, 8> ContactEvaluator::compute_stiffness_matrix(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const {
-    NodalContactData ncd;
-    ncd = compute_nodal_contact_data(pair, mesh1, mesh2); 
+std::array<std::array<double, 8>, 8> ContactEvaluator::compute_stiffness_matrix( const InterfacePair& pair,
+                                                                                 const MeshData::Viewer& mesh1,
+                                                                                 const MeshData::Viewer& mesh2 ) const
+{
+  NodalContactData ncd;
+  ncd = compute_nodal_contact_data( pair, mesh1, mesh2 );
 
-    std::array<double, 2> gI;
-    for (int i = 0; i < 2; ++i) {
-        gI[i] = ncd.g_tilde[i] / ncd.AI[i];
-    }
+  std::array<double, 2> gI;
+  for ( int i = 0; i < 2; ++i ) {
+    gI[i] = ncd.g_tilde[i] / ncd.AI[i];
+  }
 
-    double dg_tilde1[8], dg_tilde2[8], dAI1[8], dAI2[8];
+  double dg_tilde1[8], dg_tilde2[8], dAI1[8], dAI2[8];
 
-    grad_gtilde(pair, mesh1, mesh2, dg_tilde1, dg_tilde2); 
-    grad_trib_area(pair, mesh1, mesh2, dAI1, dAI2);
+  grad_gtilde( pair, mesh1, mesh2, dg_tilde1, dg_tilde2 );
+  grad_trib_area( pair, mesh1, mesh2, dAI1, dAI2 );
 
-    double d2_gtilde1[64], d2_gtilde2[64], d2_dA1[64], d2_dA2[64];
+  double d2_gtilde1[64], d2_gtilde2[64], d2_dA1[64], d2_dA2[64];
 
-    d2_g2tilde(pair, mesh1, mesh2, d2_gtilde1, d2_gtilde2); 
-    compute_d2A_d2u(pair, mesh1, mesh2, d2_dA1, d2_dA2); 
+  d2_g2tilde( pair, mesh1, mesh2, d2_gtilde1, d2_gtilde2 );
+  compute_d2A_d2u( pair, mesh1, mesh2, d2_dA1, d2_dA2 );
 
-    std::array<double*, 2> dg_t = {dg_tilde1, dg_tilde2};
-    std::array<double*, 2> dA = {dAI1, dAI2};
+  std::array<double*, 2> dg_t = { dg_tilde1, dg_tilde2 };
+  std::array<double*, 2> dA = { dAI1, dAI2 };
 
-    std::array<double*, 2> ddg_t = {d2_gtilde1, d2_gtilde2};
-    std::array<double*, 2> ddA = {d2_dA1, d2_dA2};
+  std::array<double*, 2> ddg_t = { d2_gtilde1, d2_gtilde2 };
+  std::array<double*, 2> ddA = { d2_dA1, d2_dA2 };
 
-    std::array<std::array<double, 8>, 8> K_mat = {{{0.0}}};
+  std::array<std::array<double, 8>, 8> K_mat = { { { 0.0 } } };
 
-    for (int i = 0; i < 2; ++i) {
-        for (int k = 0; k < 8; ++k) {
-            for (int j = 0; j < 8; ++j) {
-                //term 1: 
-                K_mat[k][j] += p_.k*(2 / ncd.AI[i]) * dg_t[i][k] * dg_t[i][j];
+  for ( int i = 0; i < 2; ++i ) {
+    for ( int k = 0; k < 8; ++k ) {
+      for ( int j = 0; j < 8; ++j ) {
+        // term 1:
+        K_mat[k][j] += p_.k * ( 2 / ncd.AI[i] ) * dg_t[i][k] * dg_t[i][j];
 
-                //term2:
-                K_mat[k][j] += -p_.k*(2 * gI[i] / ncd.AI[i]) * dg_t[i][k] * dA[i][j];
+        // term2:
+        K_mat[k][j] += -p_.k * ( 2 * gI[i] / ncd.AI[i] ) * dg_t[i][k] * dA[i][j];
 
-                //term3:
-                K_mat[k][j] += -p_.k*(2 * gI[i] / ncd.AI[i]) * dA[i][k] * dg_t[i][j];
+        // term3:
+        K_mat[k][j] += -p_.k * ( 2 * gI[i] / ncd.AI[i] ) * dA[i][k] * dg_t[i][j];
 
-                //term 4:
-                K_mat[k][j] += p_.k*(2 *gI[i]*gI[i] / ncd.AI[i]) * dA[i][k] * dA[i][j];
+        // term 4:
+        K_mat[k][j] += p_.k * ( 2 * gI[i] * gI[i] / ncd.AI[i] ) * dA[i][k] * dA[i][j];
 
-                //term 5;
-                K_mat[k][j] += p_.k*2.0 * gI[i] * ddg_t[i][k*8 + j];
+        // term 5;
+        K_mat[k][j] += p_.k * 2.0 * gI[i] * ddg_t[i][k * 8 + j];
 
-                //term 6:
-                K_mat[k][j] += -p_.k*gI[i]*gI[i] * ddA[i][k*8 + j];
+        // term 6:
+        K_mat[k][j] += -p_.k * gI[i] * gI[i] * ddA[i][k * 8 + j];
 
-                if (ncd.AI[i] < 1e-12) {
-                    K_mat[k][j] = 0.0;
-                }
+        if ( ncd.AI[i] < 1e-12 ) {
+          K_mat[k][j] = 0.0;
         }
+      }
     }
-    }
-    return K_mat;
+  }
+  return K_mat;
 }
 
+std::pair<double, double> ContactEvaluator::eval_gtilde( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                                         const MeshData::Viewer& mesh2 ) const
+{
+  NodalContactData ncd = compute_nodal_contact_data( pair, mesh1, mesh2 );
+  // double gt1 = ncd.g_tilde[0];
+  // double gt2 = ncd.g_tilde[1];
+  double A1 = ncd.AI[0];
+  double A2 = ncd.AI[1];
 
-
-std::pair<double, double> ContactEvaluator::eval_gtilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const {
-    NodalContactData ncd = compute_nodal_contact_data(pair, mesh1, mesh2);
-    // double gt1 = ncd.g_tilde[0];
-    // double gt2 = ncd.g_tilde[1];
-    double A1 = ncd.AI[0];
-    double A2 = ncd.AI[1];
-
-    return {A1, A2};
+  return { A1, A2 };
 }
 
-
-
-std::pair<double,double>
-ContactEvaluator::eval_gtilde_fixed_qp(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& /*mesh2*/,
-                                       const QuadPoints& qp_fixed) const
+std::pair<double, double> ContactEvaluator::eval_gtilde_fixed_qp( const InterfacePair& pair,
+                                                                  const MeshData::Viewer& mesh1,
+                                                                  const MeshData::Viewer& /*mesh2*/,
+                                                                  const QuadPoints& qp_fixed ) const
 {
-    double A0[2], A1[2];
-    endpoints(mesh1, pair.m_element_id1, A0, A1);
+  double A0[2], A1[2];
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
 
-    const double J = std::sqrt((A1[0]-A0[0])*(A1[0]-A0[0]) + (A1[1]-A0[1])*(A1[1]-A0[1]));
+  const double J = std::sqrt( ( A1[0] - A0[0] ) * ( A1[0] - A0[0] ) + ( A1[1] - A0[1] ) * ( A1[1] - A0[1] ) );
 
-    double gt1 = 0.0, gt2 = 0.0;
+  double gt1 = 0.0, gt2 = 0.0;
 
-    for (size_t i = 0; i < qp_fixed.qp.size(); ++i) {
-        const double xiA = qp_fixed.qp[i];
-        const double w   = qp_fixed.w[i];
+  for ( size_t i = 0; i < qp_fixed.qp.size(); ++i ) {
+    const double xiA = qp_fixed.qp[i];
+    const double w = qp_fixed.w[i];
 
-        const double N1 = 0.5 - xiA;
-        const double N2 = 0.5 + xiA;
+    const double N1 = 0.5 - xiA;
+    const double N2 = 0.5 + xiA;
 
-        // const double gn = gap(pair, mesh1, mesh2, xiA);   // still depends on geometry
-        // const double gn_active = gn;              // or your (gn<0?gn:0) logic
+    // const double gn = gap(pair, mesh1, mesh2, xiA);   // still depends on geometry
+    // const double gn_active = gn;              // or your (gn<0?gn:0) logic
 
-        gt1 += w * N1  * J;
-        gt2 += w * N2 * J;
-    }
+    gt1 += w * N1 * J;
+    gt2 += w * N2 * J;
+  }
 
-    return {gt1, gt2};
+  return { gt1, gt2 };
 }
 
+// FiniteDiffResult ContactEvaluator::validate_g_tilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const
+// MeshData::Viewer& mesh2, double epsilon) const {
 
-
-// FiniteDiffResult ContactEvaluator::validate_g_tilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, double epsilon) const {
-        
 //     FiniteDiffResult result;
 
 //     auto projs0 = projections(pair, mesh1, mesh2);
@@ -1029,27 +1041,27 @@ ContactEvaluator::eval_gtilde_fixed_qp(const InterfacePair& pair, const MeshData
 //     int num_dofs = result.node_ids.size() * 2;
 //     result.fd_gradient_g1.resize(num_dofs);
 //     result.fd_gradient_g2.resize(num_dofs);
-    
+
 //     // ===== GET AND REORDER ENZYME GRADIENTS =====
 //     double dgt1_dx[8] = {0.0};
 //     double dgt2_dx[8] = {0.0};
 //     grad_trib_area(pair, mesh1, mesh2, dgt1_dx, dgt2_dx);
-    
+
 //     // Map from node_id to position in x[8]
 //     std::map<int, int> node_to_x_idx;
 //     node_to_x_idx[A_conn[0]] = 0;  // A0 → x[0,1]
 //     node_to_x_idx[A_conn[1]] = 1;  // A1 → x[2,3]
 //     node_to_x_idx[B_conn[0]] = 2;  // B0 → x[4,5]
 //     node_to_x_idx[B_conn[1]] = 3;  // B1 → x[6,7]
-    
+
 //     // Reorder Enzyme gradients to match sorted node order
 //     result.analytical_gradient_g1.resize(num_dofs);
 //     result.analytical_gradient_g2.resize(num_dofs);
-    
+
 //     for (size_t i = 0; i < result.node_ids.size(); ++i) {
 //         int node_id = result.node_ids[i];
 //         int x_idx = node_to_x_idx[node_id];
-        
+
 //         result.analytical_gradient_g1[2*i + 0] = dgt1_dx[2*x_idx + 0];  // x component
 //         result.analytical_gradient_g1[2*i + 1] = dgt1_dx[2*x_idx + 1];  // y component
 //         result.analytical_gradient_g2[2*i + 0] = dgt2_dx[2*x_idx + 0];
@@ -1057,7 +1069,6 @@ ContactEvaluator::eval_gtilde_fixed_qp(const InterfacePair& pair, const MeshData
 //     }
 //     // =
 
-
 //     int dof_idx = 0;
 //     //X-direction
 //     for (int node_id : result.node_ids) {
@@ -1078,53 +1089,55 @@ ContactEvaluator::eval_gtilde_fixed_qp(const InterfacePair& pair, const MeshData
 
 //             dof_idx++;
 //         }
-    
-//     //y - direction 
+
+//     //y - direction
 //         {
 //             double original = mesh.node(node_id).y;
-            
+
 //             // +epsilon
 //             mesh.node(node_id).y = original + epsilon;
 //             auto [g1_plus, g2_plus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
-            
+
 //             // -epsilon
 //             mesh.node(node_id).y = original - epsilon;
 //             auto [g1_minus, g2_minus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
-            
+
 //             // Restore
 //             mesh.node(node_id).y = original;
-            
+
 //             // Central difference
 //             result.fd_gradient_g1[dof_idx] = (g1_plus - g1_minus) / (2.0 * epsilon);
 //             result.fd_gradient_g2[dof_idx] = (g2_plus - g2_minus) / (2.0 * epsilon);
-            
+
 //             dof_idx++;
 //         }
 //     }
 //     return result;
 // }
 
-void ContactEvaluator::grad_gtilde_with_qp(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
-                         const QuadPoints& qp_fixed, 
-                         double dgt1_dx[8], double dgt2_dx[8]) const {
-    double A0[2], A1[2], B0[2], B1[2];
-    endpoints(mesh1, pair.m_element_id1, A0, A1);
-    endpoints(mesh2, pair.m_element_id2, B0, B1);
-    
-    double x[8] = {A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1]};
-    
-    const int N = static_cast<int>(qp_fixed.qp.size());
-    
-    Gparams gp;
-    gp.N = N;
-    gp.qp = qp_fixed.qp.data();  // Use FIXED quadrature
-    gp.w = qp_fixed.w.data();
-    
-    grad_A1(x, &gp, dgt1_dx);
-    grad_A2(x, &gp, dgt2_dx);
+void ContactEvaluator::grad_gtilde_with_qp( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                            const MeshData::Viewer& mesh2, const QuadPoints& qp_fixed,
+                                            double dgt1_dx[8], double dgt2_dx[8] ) const
+{
+  double A0[2], A1[2], B0[2], B1[2];
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
+
+  double x[8] = { A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1] };
+
+  const int N = static_cast<int>( qp_fixed.qp.size() );
+
+  Gparams gp;
+  gp.N = N;
+  gp.qp = qp_fixed.qp.data();  // Use FIXED quadrature
+  gp.w = qp_fixed.w.data();
+
+  grad_A1( x, &gp, dgt1_dx );
+  grad_A2( x, &gp, dgt2_dx );
 }
 
-// FiniteDiffResult ContactEvaluator::validate_hessian(Mesh& mesh, const Element& A, const Element& B, double epsilon) const {
+// FiniteDiffResult ContactEvaluator::validate_hessian(Mesh& mesh, const Element& A, const Element& B, double epsilon)
+// const {
 //     FiniteDiffResult result;
 
 //     auto projs0 = projections(mesh, A, B);
@@ -1174,7 +1187,6 @@ void ContactEvaluator::grad_gtilde_with_qp(const InterfacePair& pair, const Mesh
 // static const char* C_WARN  = "\033[33m";
 // static const char* C_BAD   = "\033[31m";
 
-
 // void ContactEvaluator::print_hessian_comparison(const FiniteDiffResult& val) const
 // {
 //     std::cout << std::setprecision(12) << std::scientific;
@@ -1333,7 +1345,4 @@ void ContactEvaluator::grad_gtilde_with_qp(const InterfacePair& pair, const Mesh
 //     std::cout << std::string(120, '=') << "\n\n";
 // }
 
-}
-
-
-
+}  // namespace tribol
diff --git a/src/tribol/physics/new_method.hpp b/src/tribol/physics/new_method.hpp
index ef6a92f2..af11061a 100644
--- a/src/tribol/physics/new_method.hpp
+++ b/src/tribol/physics/new_method.hpp
@@ -8,126 +8,129 @@
 namespace tribol {
 
 struct Node {
-    double x, y;
-    int id;
+  double x, y;
+  int id;
 };
 
 struct Element {
-    int id;
-    std::array<int, 2> node_ids;
+  int id;
+  std::array<int, 2> node_ids;
 };
 
 struct Mesh {
-    std::vector<Node> nodes; 
-    std::vector<Element> elements; 
+  std::vector<Node> nodes;
+  std::vector<Element> elements;
 
-    const Node& node(int i) const { return nodes[i]; }
-    Node& node(int i) {return nodes[i]; }
+  const Node& node( int i ) const { return nodes[i]; }
+  Node& node( int i ) { return nodes[i]; }
 };
 
 struct QuadPoints {
-    std::vector<double> qp; //quadpoints 
-    std::vector<double> w; //weights
+  std::vector<double> qp;  // quadpoints
+  std::vector<double> w;   // weights
 };
 
 struct ContactParams {
-    double del;
-    double k;
-    int N;
+  double del;
+  double k;
+  int N;
 };
 
 struct NodalContactData {
-    std::array<double, 2> AI;
-    std::array<double, 2> g_tilde;
+  std::array<double, 2> AI;
+  std::array<double, 2> g_tilde;
 };
 
 struct FDResult {
-    std::array<double, 2> dgt;
+  std::array<double, 2> dgt;
 };
 
 struct FiniteDiffResult {
-    std::vector<double> fd_gradient_g1;
-    std::vector<double> fd_gradient_g2;
-    std::vector<double> analytical_gradient_g1;
-    std::vector<double> analytical_gradient_g2;
-    std::vector<int> node_ids;
-    double g_tilde1_baseline;
-    double g_tilde2_baseline;
+  std::vector<double> fd_gradient_g1;
+  std::vector<double> fd_gradient_g2;
+  std::vector<double> analytical_gradient_g1;
+  std::vector<double> analytical_gradient_g2;
+  std::vector<int> node_ids;
+  double g_tilde1_baseline;
+  double g_tilde2_baseline;
 };
 
 class ContactSmoothing {
-    public:
-        explicit ContactSmoothing (const ContactParams& p) : p_(p) {} //Constructor 
+ public:
+  explicit ContactSmoothing( const ContactParams& p ) : p_( p ) {}  // Constructor
 
-        std::array<double, 2> bounds_from_projections(const std::array<double, 2>& proj) const;
+  std::array<double, 2> bounds_from_projections( const std::array<double, 2>& proj ) const;
 
-        std::array<double, 2> smooth_bounds(const std::array<double, 2>& bounds) const;
-
-    private: 
-        ContactParams p_;
+  std::array<double, 2> smooth_bounds( const std::array<double, 2>& bounds ) const;
 
+ private:
+  ContactParams p_;
 };
 
 class ContactEvaluator {
-    public: 
-        explicit ContactEvaluator(const ContactParams& p) 
-        : p_(p), smoother_(p) {} //constructor - copies params into the object 
+ public:
+  explicit ContactEvaluator( const ContactParams& p )
+      : p_( p ), smoother_( p ) {}  // constructor - copies params into the object
+
+  double compute_contact_energy( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                 const MeshData::Viewer& mesh2 ) const;
+
+  void gtilde_and_area( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                        double gtilde[2], double area[2] ) const;
 
-        double compute_contact_energy(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const;
+  void grad_gtilde( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                    double dgt1_dx[8], double dgt2_dx[8] ) const;
 
-        void gtilde_and_area(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, double gtilde[2], double area[2]) const; 
+  void grad_trib_area( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                       double dA1_dx[8], double dA2_dx[8] ) const;
 
-        void grad_gtilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
-                         double dgt1_dx[8], double dgt2_dx[8]) const;
+  void d2_g2tilde( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                   double dgt1_dx[64], double dgt2_dx[64] ) const;
 
-        void grad_trib_area(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, 
-                            double dA1_dx[8], double dA2_dx[8]) const; 
+  void compute_d2A_d2u( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                        double dgt1_dx[64], double dgt2_dx[64] ) const;
 
-        void d2_g2tilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
-                        double dgt1_dx[64], double dgt2_dx[64]) const;
+  std::array<double, 8> compute_contact_forces( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                                const MeshData::Viewer& mesh2 ) const;
 
-        void compute_d2A_d2u(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
-                             double dgt1_dx[64], double dgt2_dx[64]) const;
+  std::array<std::array<double, 8>, 8> compute_stiffness_matrix( const InterfacePair& pair,
+                                                                 const MeshData::Viewer& mesh1,
+                                                                 const MeshData::Viewer& mesh2 ) const;
 
-        std::array<double, 8> compute_contact_forces(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const;
+  std::pair<double, double> eval_gtilde( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                         const MeshData::Viewer& mesh2 ) const;
 
-        std::array<std::array<double, 8>, 8> compute_stiffness_matrix(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const;
+  FiniteDiffResult validate_g_tilde( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                     const MeshData::Viewer& mesh2, double epsilon = 1e-7 ) const;
 
-        std::pair<double, double> eval_gtilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const;
+  void print_gradient_comparison( const FiniteDiffResult& val ) const;
 
-        FiniteDiffResult validate_g_tilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
-                                          double epsilon = 1e-7) const;
-        
-        void print_gradient_comparison(const FiniteDiffResult& val) const;
-        
-        std::pair<double,double> eval_gtilde_fixed_qp(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
-                                                      const QuadPoints& qp_fixed) const;
+  std::pair<double, double> eval_gtilde_fixed_qp( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                                  const MeshData::Viewer& mesh2, const QuadPoints& qp_fixed ) const;
 
-        FiniteDiffResult validate_hessian(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2, 
-                                          double epsilon = 1e-7) const;
+  FiniteDiffResult validate_hessian( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                     const MeshData::Viewer& mesh2, double epsilon = 1e-7 ) const;
 
-        void grad_gtilde_with_qp(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
-                            const QuadPoints& qp_fixed, 
-                            double dgt1_dx[8], double dgt2_dx[8]) const;
+  void grad_gtilde_with_qp( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                            const QuadPoints& qp_fixed, double dgt1_dx[8], double dgt2_dx[8] ) const;
 
-        void print_hessian_comparison(const FiniteDiffResult& val) const;
+  void print_hessian_comparison( const FiniteDiffResult& val ) const;
 
-    private: 
-        ContactParams p_;
-        ContactSmoothing smoother_;
-        QuadPoints compute_quadrature(const std::array<double,2>& xi_bounds) const;
+ private:
+  ContactParams p_;
+  ContactSmoothing smoother_;
+  QuadPoints compute_quadrature( const std::array<double, 2>& xi_bounds ) const;
 
-        std::array<double, 2> projections(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const; 
+  std::array<double, 2> projections( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                     const MeshData::Viewer& mesh2 ) const;
 
-        double gap(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
-                   double xiA) const;
+  double gap( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+              double xiA ) const;
 
-        NodalContactData compute_nodal_contact_data(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const; 
-        
-        std::array<double, 2> compute_pressures(const NodalContactData& ncd) const;
-        
+  NodalContactData compute_nodal_contact_data( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                               const MeshData::Viewer& mesh2 ) const;
 
-                                       
+  std::array<double, 2> compute_pressures( const NodalContactData& ncd ) const;
 };
 
-}
+}  // namespace tribol

From b82cccfb2bfaaa4bb7e675f22b1c32545ec53024 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Tue, 3 Feb 2026 18:55:11 -0800
Subject: [PATCH 08/56] add a new diagonalMatrix() method

---
 src/tests/tribol_par_sparse_mat.cpp | 31 +++++++++++++++++++++++++++++
 src/tribol/utils/ParSparseMat.cpp   | 31 +++++++++++++++++++++++++++++
 src/tribol/utils/ParSparseMat.hpp   | 12 +++++++++++
 3 files changed, 74 insertions(+)

diff --git a/src/tests/tribol_par_sparse_mat.cpp b/src/tests/tribol_par_sparse_mat.cpp
index 62fd41ef..a3f874a7 100644
--- a/src/tests/tribol_par_sparse_mat.cpp
+++ b/src/tests/tribol_par_sparse_mat.cpp
@@ -375,6 +375,37 @@ TEST_F( ParSparseMatTest, Accessors )
   EXPECT_EQ( A->Height(), local_size );
 }
 
+// Test Construction from Vector
+TEST_F( ParSparseMatTest, DiagonalFromVector )
+{
+  int rank;
+  MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+  if ( rank == 0 ) std::cout << "Testing Construction from Vector..." << std::endl;
+
+  int num_procs;
+  MPI_Comm_size( MPI_COMM_WORLD, &num_procs );
+  constexpr int size = 10;
+  int local_size = size / num_procs + ( rank < ( size % num_procs ) ? 1 : 0 );
+
+  auto row_starts = GetRowStarts( MPI_COMM_WORLD, size );
+
+  mfem::Vector diag_vals( local_size );
+  for ( int i = 0; i < local_size; ++i ) {
+    diag_vals[i] = static_cast<double>( rank * 100 + i );
+  }
+
+  tribol::ParSparseMat A =
+      tribol::ParSparseMat::diagonalMatrix( MPI_COMM_WORLD, size, row_starts.GetData(), diag_vals );
+
+  mfem::Vector x( local_size ), y( local_size );
+  x = 1.0;
+  y = A * x;
+
+  for ( int i = 0; i < local_size; ++i ) {
+    EXPECT_NEAR( y[i], static_cast<double>( rank * 100 + i ), 1e-12 );
+  }
+}
+
 //------------------------------------------------------------------------------
 #include "axom/slic/core/SimpleLogger.hpp"
 
diff --git a/src/tribol/utils/ParSparseMat.cpp b/src/tribol/utils/ParSparseMat.cpp
index 6f862663..24b9a534 100644
--- a/src/tribol/utils/ParSparseMat.cpp
+++ b/src/tribol/utils/ParSparseMat.cpp
@@ -203,4 +203,35 @@ ParSparseMat ParSparseMat::diagonalMatrix( MPI_Comm comm, HYPRE_BigInt global_si
   return diagonalMatrix( comm, global_size, row_starts_array, diag_val, ordered_rows, skip_rows );
 }
 
+ParSparseMat ParSparseMat::diagonalMatrix( MPI_Comm comm, HYPRE_BigInt global_size, HYPRE_BigInt* row_starts,
+                                           const mfem::Vector& diag_vals )
+{
+  int num_local_rows = diag_vals.Size();
+
+  mfem::Array<int> rows( num_local_rows + 1 );
+  mfem::Array<int> cols( num_local_rows );
+  rows[0] = 0;
+
+  for ( int i = 0; i < num_local_rows; ++i ) {
+    rows[i + 1] = i + 1;
+    cols[i] = i;
+  }
+
+  rows.GetMemory().SetHostPtrOwner( false );
+  cols.GetMemory().SetHostPtrOwner( false );
+
+  mfem::Vector vals = diag_vals;
+  vals.GetMemory().SetHostPtrOwner( false );
+
+  mfem::SparseMatrix inactive_diag( rows.GetData(), cols.GetData(), vals.GetData(), num_local_rows, num_local_rows,
+                                    false, false, true );
+  inactive_diag.SetDataOwner( false );
+
+  auto mat = std::make_unique<mfem::HypreParMatrix>( comm, global_size, row_starts, &inactive_diag );
+  mat->CopyRowStarts();
+  auto mfem_owned_arrays = 3;
+  mat->SetOwnerFlags( mfem_owned_arrays, mat->OwnsOffd(), mat->OwnsColMap() );
+  return ParSparseMat( std::move( mat ) );
+}
+
 }  // namespace tribol
diff --git a/src/tribol/utils/ParSparseMat.hpp b/src/tribol/utils/ParSparseMat.hpp
index 08384279..f8fc3863 100644
--- a/src/tribol/utils/ParSparseMat.hpp
+++ b/src/tribol/utils/ParSparseMat.hpp
@@ -238,6 +238,18 @@ class ParSparseMat : public ParSparseMatView {
                                       double diag_val, const mfem::Array<int>& ordered_rows = mfem::Array<int>(),
                                       bool skip_rows = true );
 
+  /**
+   * @brief Returns a diagonal matrix with the values from the given vector on the diagonal
+   *
+   * @param comm MPI communicator
+   * @param global_size Global size of the matrix (rows and columns)
+   * @param row_starts Row partitioning (global offsets)
+   * @param diag_vals Vector containing the values for the diagonal entries. Size must match local rows.
+   * @return ParSparseMat The constructed diagonal matrix
+   */
+  static ParSparseMat diagonalMatrix( MPI_Comm comm, HYPRE_BigInt global_size, HYPRE_BigInt* row_starts,
+                                      const mfem::Vector& diag_vals );
+
  private:
   std::unique_ptr<mfem::HypreParMatrix> m_owned_mat;
 };

From 85528118d595efa4910452f416f42eec732c28c4 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Tue, 3 Feb 2026 18:55:42 -0800
Subject: [PATCH 09/56] implement adapter for 2d smooth contact

---
 src/tribol/physics/NewMethodAdapter.cpp | 339 ++++++++++++++++++++++++
 src/tribol/physics/NewMethodAdapter.hpp | 111 ++++++++
 2 files changed, 450 insertions(+)
 create mode 100644 src/tribol/physics/NewMethodAdapter.cpp
 create mode 100644 src/tribol/physics/NewMethodAdapter.hpp

diff --git a/src/tribol/physics/NewMethodAdapter.cpp b/src/tribol/physics/NewMethodAdapter.cpp
new file mode 100644
index 00000000..4dd69622
--- /dev/null
+++ b/src/tribol/physics/NewMethodAdapter.cpp
@@ -0,0 +1,339 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#include "tribol/physics/NewMethodAdapter.hpp"
+
+namespace tribol {
+
+NewMethodAdapter::NewMethodAdapter( MfemMeshData& mfem_data, MfemSubmeshData& submesh_data, MfemJacobianData& jac_data,
+                                    MeshData& mesh1, MeshData& mesh2, double k, double delta, int N )
+    : mfem_data_( mfem_data ), submesh_data_( submesh_data ), jac_data_( jac_data ), mesh1_( mesh1 ), mesh2_( mesh2 )
+{
+  params_.k = k;
+  params_.del = delta;
+  params_.N = N;
+  evaluator_ = std::make_unique<ContactEvaluator>( params_ );
+}
+
+void NewMethodAdapter::setInterfacePairs( ArrayT<InterfacePair>&& pairs, int /*check_level*/ )
+{
+  // TODO: improved pair identification
+  pairs_ = std::move( pairs );
+}
+
+void NewMethodAdapter::updateIntegrationRule()
+{
+  // TODO: break out integration rule as a separate method
+}
+
+void NewMethodAdapter::updateNodalGaps()
+{
+  // NOTE: user should have called updateMfemParallelDecomposition() with updated coords before calling this
+
+  // Tribol level data structures for storing gap, area, and derivatives
+  auto redecomp_gap = submesh_data_.GetRedecompGap();
+  mfem::GridFunction redecomp_area( redecomp_gap.FESpace() );
+  redecomp_area = 0.0;
+  MethodData dg_tilde_dx;
+  dg_tilde_dx.reserveBlockJ( { BlockSpace::NONMORTAR, BlockSpace::MORTAR, BlockSpace::LAGRANGE_MULTIPLIER },
+                             pairs_.size() );
+  MethodData dA_dx;
+  dA_dx.reserveBlockJ( { BlockSpace::NONMORTAR, BlockSpace::MORTAR, BlockSpace::LAGRANGE_MULTIPLIER }, pairs_.size() );
+
+  auto mesh1_view = mesh1_.getView();
+  auto mesh2_view = mesh2_.getView();
+
+  // Compute local contributions
+  for ( const auto& pair : pairs_ ) {
+    const auto elem1 = static_cast<int>( pair.m_element_id1 );
+    const auto elem2 = static_cast<int>( pair.m_element_id2 );
+
+    double g_tilde_elem[2];
+    double A_elem[2];
+
+    evaluator_->gtilde_and_area( pair, mesh1_view, mesh2_view, g_tilde_elem, A_elem );
+
+    if ( A_elem[0] <= 0.0 && A_elem[1] <= 0.0 ) {
+      continue;
+    }
+
+    auto A_conn = mesh1_view.getConnectivity()( elem1 );
+
+    // Add to nodes of Element A
+    redecomp_gap[A_conn[0]] += g_tilde_elem[0];
+    redecomp_gap[A_conn[1]] += g_tilde_elem[1];
+
+    redecomp_area[A_conn[0]] += A_elem[0];
+    redecomp_area[A_conn[1]] += A_elem[1];
+
+    // compute g_tilde first derivative
+    double dg_dx_node1[8];
+    double dg_dx_node2[8];
+    evaluator_->grad_gtilde( pair, mesh1_view, mesh2_view, dg_dx_node1, dg_dx_node2 );
+    StackArray<DeviceArray2D<RealT>, 9> dg_tilde_dx_block( 3 );
+    dg_tilde_dx_block( 2, 0 ) = DeviceArray2D<RealT>( 2, 4 );
+    dg_tilde_dx_block( 2, 0 ).fill( 0.0 );
+    dg_tilde_dx_block( 2, 1 ) = DeviceArray2D<RealT>( 2, 4 );
+    dg_tilde_dx_block( 2, 1 ).fill( 0.0 );
+    for ( int i{ 0 }; i < 4; ++i ) {
+      dg_tilde_dx_block( 2, 0 )( 0, i ) = dg_dx_node1[i];
+    }
+    for ( int i{ 0 }; i < 4; ++i ) {
+      dg_tilde_dx_block( 2, 0 )( 1, i ) = dg_dx_node2[i];
+    }
+    for ( int i{ 0 }; i < 4; ++i ) {
+      dg_tilde_dx_block( 2, 1 )( 0, i ) = dg_dx_node1[i + 4];
+    }
+    for ( int i{ 0 }; i < 4; ++i ) {
+      dg_tilde_dx_block( 2, 1 )( 1, i ) = dg_dx_node2[i + 4];
+    }
+    dg_tilde_dx.storeElemBlockJ( { elem1, elem2, elem1 }, dg_tilde_dx_block );
+
+    // compute area first derivative
+    double dA_dx_node1[8];
+    double dA_dx_node2[8];
+    evaluator_->grad_trib_area( pair, mesh1_view, mesh2_view, dA_dx_node1, dA_dx_node2 );
+    StackArray<DeviceArray2D<RealT>, 9> dA_dx_block( 3 );
+    dA_dx_block( 2, 0 ) = DeviceArray2D<RealT>( 2, 4 );
+    dA_dx_block( 2, 0 ).fill( 0.0 );
+    dA_dx_block( 2, 1 ) = DeviceArray2D<RealT>( 2, 4 );
+    dA_dx_block( 2, 1 ).fill( 0.0 );
+    for ( int i{ 0 }; i < 4; ++i ) {
+      dA_dx_block( 2, 0 )( 0, i ) = dA_dx_node1[i];
+    }
+    for ( int i{ 0 }; i < 4; ++i ) {
+      dA_dx_block( 2, 0 )( 1, i ) = dA_dx_node2[i];
+    }
+    for ( int i{ 0 }; i < 4; ++i ) {
+      dA_dx_block( 2, 1 )( 0, i ) = dA_dx_node1[i + 4];
+    }
+    for ( int i{ 0 }; i < 4; ++i ) {
+      dA_dx_block( 2, 1 )( 1, i ) = dA_dx_node2[i + 4];
+    }
+    dA_dx.storeElemBlockJ( { elem1, elem2, elem1 }, dA_dx_block );
+  }
+
+  // Move gap and area to submesh level vectors
+  mfem::ParLinearForm g_tilde_linear_form(
+      const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_data_.GetSubmeshGap( g_tilde_linear_form );
+  auto& P_submesh = *submesh_data_.GetSubmeshFESpace().GetProlongationMatrix();
+  g_tilde_vec_ = mfem::HypreParVector( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  g_tilde_vec_ = 0.0;
+  P_submesh.MultTranspose( g_tilde_linear_form, g_tilde_vec_ );
+
+  mfem::ParLinearForm A_linear_form( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_data_.GetPressureTransfer().RedecompToSubmesh( redecomp_area, A_linear_form );
+  A_vec_ = mfem::HypreParVector( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  A_vec_ = 0.0;
+  P_submesh.MultTranspose( A_linear_form, A_vec_ );
+
+  // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
+  const std::vector<std::pair<int, BlockSpace>> all_info{
+      { 0, BlockSpace::NONMORTAR }, { 0, BlockSpace::MORTAR }, { 1, BlockSpace::LAGRANGE_MULTIPLIER } };
+  auto dg_tilde_dx_block = jac_data_.GetMfemBlockJacobian( dg_tilde_dx, all_info, all_info );
+  dg_tilde_dx_block->owns_blocks = false;
+  dg_tilde_dx_ = ParSparseMat( static_cast<mfem::HypreParMatrix*>( &dg_tilde_dx_block->GetBlock( 1, 0 ) ) );
+  delete &dg_tilde_dx_block->GetBlock( 0, 0 );
+  delete &dg_tilde_dx_block->GetBlock( 0, 1 );
+  delete &dg_tilde_dx_block->GetBlock( 1, 1 );
+
+  auto dA_dx_block = jac_data_.GetMfemBlockJacobian( dA_dx, all_info, all_info );
+  dA_dx_block->owns_blocks = false;
+  dA_dx_ = ParSparseMat( static_cast<mfem::HypreParMatrix*>( &dA_dx_block->GetBlock( 1, 0 ) ) );
+  delete &dA_dx_block->GetBlock( 0, 0 );
+  delete &dA_dx_block->GetBlock( 0, 1 );
+  delete &dA_dx_block->GetBlock( 1, 1 );
+}
+
+void NewMethodAdapter::updateNodalForces()
+{
+  // NOTE: user should have called updateNodalGaps() with updated coords before calling this
+
+  // compute nodal pressures. these are used in the Hessian vector product below so we don't have to assemble a Hessian
+  // NOTE: in general, pressure should likely be set by the host code
+  pressure_vec_.SetSize( g_tilde_vec_.Size() );
+  pressure_vec_ = 0.0;
+  for ( int i{ 0 }; i < pressure_vec_.Size(); ++i ) {
+    if ( A_vec_[i] > 1.0e-14 && g_tilde_vec_[i] <= 0.0 ) {
+      pressure_vec_[i] = params_.k * g_tilde_vec_[i] / A_vec_[i];
+    }
+  }
+
+  mfem::HypreParVector k_over_a( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  k_over_a = 0.0;
+  for ( int i{ 0 }; i < k_over_a.Size(); ++i ) {
+    if ( A_vec_[i] > 1.0e-14 ) {
+      k_over_a[i] = params_.k / A_vec_[i];
+    }
+  }
+
+  mfem::HypreParVector p_over_a( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  p_over_a = 0.0;
+  for ( int i{ 0 }; i < p_over_a.Size(); ++i ) {
+    if ( A_vec_[i] > 1.0e-14 ) {
+      p_over_a[i] = pressure_vec_[i] / A_vec_[i];
+    }
+  }
+
+  ParSparseMat dp_dx( *dg_tilde_dx_.get() );
+  dp_dx->ScaleRows( k_over_a );
+  ParSparseMat dp_dx_temp( *dA_dx_.get() );
+  dp_dx_temp->ScaleRows( p_over_a );
+  dp_dx -= dp_dx_temp;
+
+  force_vec_ = pressure_vec_ * dg_tilde_dx_ - g_tilde_vec_ * dp_dx;
+
+  MethodData df_dx_data;
+  df_dx_data.reserveBlockJ( { BlockSpace::NONMORTAR, BlockSpace::MORTAR }, pairs_.size() );
+
+  auto mesh1_view = mesh1_.getView();
+  auto mesh2_view = mesh2_.getView();
+
+  // get pairwise action of second derivatives of gaps and pressure for stiffness contribution
+  for ( auto& pair : pairs_ ) {
+    const auto elem1 = static_cast<int>( pair.m_element_id1 );
+    const auto node11 = mesh1_view.getConnectivity()( elem1, 0 );
+    const auto node12 = mesh1_view.getConnectivity()( elem1, 1 );
+    const auto elem2 = static_cast<int>( pair.m_element_id2 );
+
+    const RealT pressure1 = 2.0 * pressure_vec_[node11];
+    const RealT pressure2 = 2.0 * pressure_vec_[node12];
+
+    if ( pressure1 == 0.0 && pressure2 == 0.0 ) {
+      continue;
+    }
+
+    const RealT g_p_ainv1 = -g_tilde_vec_[node11] * pressure_vec_[node11] / A_vec_[node11];
+    const RealT g_p_ainv2 = -g_tilde_vec_[node12] * pressure_vec_[node12] / A_vec_[node12];
+
+    double df_dx_node1[64];
+    double df_dx_node2[64];
+    // ordering: [dg/(dx0dx0) dg/(dy0dx0) dg/(dx1dx0) ...]
+    evaluator_->d2_g2tilde( pair, mesh1_view, mesh2_view, df_dx_node1, df_dx_node2 );
+    StackArray<DeviceArray2D<RealT>, 9> df_dx_block( 2 );
+    df_dx_block( 0, 0 ) = DeviceArray2D<RealT>( 4, 4 );
+    df_dx_block( 0, 0 ).fill( 0.0 );
+    df_dx_block( 0, 1 ) = DeviceArray2D<RealT>( 4, 4 );
+    df_dx_block( 0, 1 ).fill( 0.0 );
+    df_dx_block( 1, 0 ) = DeviceArray2D<RealT>( 4, 4 );
+    df_dx_block( 1, 0 ).fill( 0.0 );
+    df_dx_block( 1, 1 ) = DeviceArray2D<RealT>( 4, 4 );
+    df_dx_block( 1, 1 ).fill( 0.0 );
+    for ( int j{ 0 }; j < 4; ++j ) {
+      for ( int i{ 0 }; i < 4; ++i ) {
+        df_dx_block( 0, 0 )( i, j ) = pressure1 * df_dx_node1[i + j * 8] + pressure2 * df_dx_node2[i + j * 8];
+      }
+    }
+    for ( int j{ 0 }; j < 4; ++j ) {
+      for ( int i{ 0 }; i < 4; ++i ) {
+        df_dx_block( 0, 1 )( i, j ) =
+            pressure1 * df_dx_node1[i + ( j + 4 ) * 8] + pressure2 * df_dx_node2[i + ( j + 4 ) * 8];
+      }
+    }
+    for ( int j{ 0 }; j < 4; ++j ) {
+      for ( int i{ 0 }; i < 4; ++i ) {
+        df_dx_block( 1, 0 )( i, j ) = pressure1 * df_dx_node1[i + 4 + j * 8] + pressure2 * df_dx_node2[i + 4 + j * 8];
+      }
+    }
+    for ( int j{ 0 }; j < 4; ++j ) {
+      for ( int i{ 0 }; i < 4; ++i ) {
+        df_dx_block( 1, 1 )( i, j ) =
+            pressure1 * df_dx_node1[i + 4 + ( j + 4 ) * 8] + pressure2 * df_dx_node2[i + 4 + ( j + 4 ) * 8];
+      }
+    }
+    evaluator_->compute_d2A_d2u( pair, mesh1_view, mesh2_view, df_dx_node1, df_dx_node2 );
+    for ( int j{ 0 }; j < 4; ++j ) {
+      for ( int i{ 0 }; i < 4; ++i ) {
+        df_dx_block( 0, 0 )( i, j ) += g_p_ainv1 * df_dx_node1[i + j * 8] + g_p_ainv2 * df_dx_node2[i + j * 8];
+      }
+    }
+    for ( int j{ 0 }; j < 4; ++j ) {
+      for ( int i{ 0 }; i < 4; ++i ) {
+        df_dx_block( 0, 1 )( i, j ) +=
+            g_p_ainv1 * df_dx_node1[i + ( j + 4 ) * 8] + g_p_ainv2 * df_dx_node2[i + ( j + 4 ) * 8];
+      }
+    }
+    for ( int j{ 0 }; j < 4; ++j ) {
+      for ( int i{ 0 }; i < 4; ++i ) {
+        df_dx_block( 1, 0 )( i, j ) += g_p_ainv1 * df_dx_node1[i + 4 + j * 8] + g_p_ainv2 * df_dx_node2[i + 4 + j * 8];
+      }
+    }
+    for ( int j{ 0 }; j < 4; ++j ) {
+      for ( int i{ 0 }; i < 4; ++i ) {
+        df_dx_block( 1, 1 )( i, j ) +=
+            g_p_ainv1 * df_dx_node1[i + 4 + ( j + 4 ) * 8] + g_p_ainv2 * df_dx_node2[i + 4 + ( j + 4 ) * 8];
+      }
+    }
+    df_dx_data.storeElemBlockJ( { elem1, elem2 }, df_dx_block );
+  }
+
+  // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
+  const std::vector<std::pair<int, BlockSpace>> all_info{ { 0, BlockSpace::NONMORTAR }, { 0, BlockSpace::MORTAR } };
+  auto df_dx_block = jac_data_.GetMfemBlockJacobian( df_dx_data, all_info, all_info );
+  df_dx_block->owns_blocks = false;
+  df_dx_ = ParSparseMat( static_cast<mfem::HypreParMatrix*>( &df_dx_block->GetBlock( 0, 0 ) ) );
+
+  mfem::HypreParVector pg2_over_asq( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  pg2_over_asq = 0.0;
+  for ( int i{ 0 }; i < pg2_over_asq.Size(); ++i ) {
+    if ( A_vec_[i] > 1.0e-14 ) {
+      pg2_over_asq[i] = 2.0 * pressure_vec_[i] * g_tilde_vec_[i] / ( A_vec_[i] * A_vec_[i] );
+    }
+  }
+
+  auto& parent_fes = *mfem_data_.GetParentCoords().ParFESpace();
+  auto p_over_a_diag = ParSparseMat::diagonalMatrix( parent_fes.GetComm(), parent_fes.GlobalTrueVSize(),
+                                                     parent_fes.GetTrueDofOffsets(), p_over_a );
+  auto pg2_over_asq_diag = ParSparseMat::diagonalMatrix( parent_fes.GetComm(), parent_fes.GlobalTrueVSize(),
+                                                         parent_fes.GetTrueDofOffsets(), pg2_over_asq );
+
+  df_dx_ -= ParSparseMat::RAP( dg_tilde_dx_, p_over_a_diag, dA_dx_ );
+  df_dx_ -= ParSparseMat::RAP( dA_dx_, p_over_a_diag, dg_tilde_dx_ );
+  df_dx_ += ParSparseMat::RAP( dA_dx_, pg2_over_asq_diag, dg_tilde_dx_ );
+  df_dx_ += dp_dx.transpose() * dg_tilde_dx_;
+  df_dx_ += dg_tilde_dx_.transpose() * dp_dx;
+}
+
+RealT NewMethodAdapter::computeTimeStep()
+{
+  // TODO: implement timestep calculation
+  return 1.0;
+}
+
+void NewMethodAdapter::getMfemForce( mfem::Vector& forces ) const { forces = force_vec_; }
+
+void NewMethodAdapter::getMfemGap( mfem::Vector& gaps ) const
+{
+  gaps.SetSize( g_tilde_vec_.Size() );
+
+  for ( int i = 0; i < gaps.Size(); ++i ) {
+    if ( A_vec_[i] > 1.0e-14 )
+      gaps[i] = g_tilde_vec_[i] / A_vec_[i];
+    else
+      gaps[i] = 0.0;
+  }
+}
+
+mfem::ParGridFunction& NewMethodAdapter::getMfemPressure()
+{
+  auto& pressure = submesh_data_.GetSubmeshPressure();
+  pressure.SetFromTrueDofs( pressure_vec_ );
+  return pressure;
+}
+
+std::unique_ptr<mfem::HypreParMatrix> NewMethodAdapter::getMfemDfDx() const
+{
+  return std::unique_ptr<mfem::HypreParMatrix>( df_dx_.release() );
+}
+
+std::unique_ptr<mfem::HypreParMatrix> NewMethodAdapter::getMfemDgDx() const
+{
+  return std::unique_ptr<mfem::HypreParMatrix>( dg_tilde_dx_.release() );
+}
+
+std::unique_ptr<mfem::HypreParMatrix> NewMethodAdapter::getMfemDfDp() const { return nullptr; }
+
+}  // namespace tribol
diff --git a/src/tribol/physics/NewMethodAdapter.hpp b/src/tribol/physics/NewMethodAdapter.hpp
new file mode 100644
index 00000000..727be02d
--- /dev/null
+++ b/src/tribol/physics/NewMethodAdapter.hpp
@@ -0,0 +1,111 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#ifndef SRC_TRIBOL_PHYSICS_NEWMETHODADAPTER_HPP_
+#define SRC_TRIBOL_PHYSICS_NEWMETHODADAPTER_HPP_
+
+#include "tribol/physics/ContactFormulation.hpp"
+#include "tribol/physics/new_method.hpp"
+#include "tribol/mesh/MfemData.hpp"
+#include "tribol/common/Parameters.hpp"
+
+#include "mfem.hpp"
+
+#include <memory>
+
+namespace tribol {
+
+class NewMethodAdapter : public ContactFormulation {
+ public:
+  /**
+   * @brief Constructor
+   *
+   * @param mfem_data Reference to Tribol's MFEM mesh data
+   * @param k Penalty stiffness
+   * @param delta Smoothing length
+   * @param N Quadrature order
+   */
+  NewMethodAdapter( MfemMeshData& mfem_data, MfemSubmeshData& submesh_data, MfemJacobianData& jac_data, MeshData& mesh1,
+                    MeshData& mesh2, double k, double delta, int N );
+
+  virtual ~NewMethodAdapter() = default;
+
+  // --- ContactFormulation Interface Implementation ---
+
+  void setInterfacePairs( ArrayT<InterfacePair>&& pairs, int check_level ) override;
+
+  void updateIntegrationRule() override;
+
+  void updateNodalGaps() override;
+
+  void updateNodalForces() override;
+
+  RealT computeTimeStep() override;
+
+  void getMfemForce( mfem::Vector& forces ) const override;
+
+  void getMfemGap( mfem::Vector& gaps ) const override;
+
+  mfem::ParGridFunction& getMfemPressure() override;
+
+  std::unique_ptr<mfem::HypreParMatrix> getMfemDfDx() const override;
+
+  std::unique_ptr<mfem::HypreParMatrix> getMfemDgDx() const override;
+
+  std::unique_ptr<mfem::HypreParMatrix> getMfemDfDp() const override;
+
+ private:
+  /**
+   * @brief Updates the local Mesh struct used by the new_method physics
+   *
+   * Syncs coordinates from MfemMeshData's RedecompMesh to the local new_method::Mesh.
+   */
+  void updateLocalMesh();
+
+  /**
+   * @brief Assembles global Jacobian matrix J_g = d(g_tilde)/dx
+   *
+   * @return std::unique_ptr<mfem::HypreParMatrix>
+   */
+  std::unique_ptr<mfem::HypreParMatrix> assembleJacobianG() const;
+
+  /**
+   * @brief Assembles global Jacobian matrix J_A = d(A)/dx
+   *
+   * @return std::unique_ptr<mfem::HypreParMatrix>
+   */
+  std::unique_ptr<mfem::HypreParMatrix> assembleJacobianA() const;
+
+  // --- Member Variables ---
+
+  MfemMeshData& mfem_data_;
+  MfemSubmeshData& submesh_data_;
+  MfemJacobianData& jac_data_;
+  MeshData& mesh1_;
+  MeshData& mesh2_;
+  ContactParams params_;
+  std::unique_ptr<ContactEvaluator> evaluator_;
+
+  // Stored InterfacePairs
+  ArrayT<InterfacePair> pairs_;
+
+  // These store the assembled nodal values
+  mfem::HypreParVector g_tilde_vec_;
+  mfem::HypreParVector A_vec_;
+  mutable ParSparseMat dg_tilde_dx_;
+  ParSparseMat dA_dx_;
+
+  mfem::HypreParVector pressure_vec_;  // This holds p = k * g / A
+  mfem::Vector force_vec_;
+  mutable ParSparseMat df_dx_;
+
+  // Pressure GridFunction wrapper (required by interface)
+  // We wrap the pressure_vec_ in a ParGridFunction for return
+  std::unique_ptr<mfem::ParGridFunction> pressure_gf_;
+};
+
+}  // namespace tribol
+
+#endif /* SRC_TRIBOL_PHYSICS_NEWMETHODADAPTER_HPP_ */

From 8a6c42a488a22ee5223391075208e2e7bba87f27 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Wed, 4 Feb 2026 12:56:26 -0800
Subject: [PATCH 10/56] bugfixes and add energy method

---
 src/tribol/physics/ContactFormulation.hpp |  9 +++++
 src/tribol/physics/NewMethodAdapter.cpp   | 42 +++++++++++++----------
 src/tribol/physics/NewMethodAdapter.hpp   | 24 ++-----------
 3 files changed, 35 insertions(+), 40 deletions(-)

diff --git a/src/tribol/physics/ContactFormulation.hpp b/src/tribol/physics/ContactFormulation.hpp
index c32b3b7f..a912b2fd 100644
--- a/src/tribol/physics/ContactFormulation.hpp
+++ b/src/tribol/physics/ContactFormulation.hpp
@@ -76,6 +76,15 @@ class ContactFormulation {
    */
   virtual RealT computeTimeStep() = 0;
 
+  /**
+   * @brief Returns the energy stored by the contact constraints (if supported by the method)
+   *
+   * @note Requires updateNodalForces() to be called first.
+   *
+   * @return contact energy
+   */
+  virtual RealT getEnergy() const = 0;
+
 #ifdef BUILD_REDECOMP
   /**
    * @brief Adds computed forces to the provided MFEM vector
diff --git a/src/tribol/physics/NewMethodAdapter.cpp b/src/tribol/physics/NewMethodAdapter.cpp
index 4dd69622..849cb8ca 100644
--- a/src/tribol/physics/NewMethodAdapter.cpp
+++ b/src/tribol/physics/NewMethodAdapter.cpp
@@ -131,21 +131,15 @@ void NewMethodAdapter::updateNodalGaps()
   P_submesh.MultTranspose( A_linear_form, A_vec_ );
 
   // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
-  const std::vector<std::pair<int, BlockSpace>> all_info{
-      { 0, BlockSpace::NONMORTAR }, { 0, BlockSpace::MORTAR }, { 1, BlockSpace::LAGRANGE_MULTIPLIER } };
-  auto dg_tilde_dx_block = jac_data_.GetMfemBlockJacobian( dg_tilde_dx, all_info, all_info );
+  const std::vector<std::pair<int, BlockSpace>> row_info{ { 1, BlockSpace::LAGRANGE_MULTIPLIER } };
+  const std::vector<std::pair<int, BlockSpace>> col_info{ { 0, BlockSpace::NONMORTAR }, { 0, BlockSpace::MORTAR } };
+  auto dg_tilde_dx_block = jac_data_.GetMfemBlockJacobian( dg_tilde_dx, row_info, col_info );
   dg_tilde_dx_block->owns_blocks = false;
   dg_tilde_dx_ = ParSparseMat( static_cast<mfem::HypreParMatrix*>( &dg_tilde_dx_block->GetBlock( 1, 0 ) ) );
-  delete &dg_tilde_dx_block->GetBlock( 0, 0 );
-  delete &dg_tilde_dx_block->GetBlock( 0, 1 );
-  delete &dg_tilde_dx_block->GetBlock( 1, 1 );
 
-  auto dA_dx_block = jac_data_.GetMfemBlockJacobian( dA_dx, all_info, all_info );
+  auto dA_dx_block = jac_data_.GetMfemBlockJacobian( dA_dx, row_info, col_info );
   dA_dx_block->owns_blocks = false;
   dA_dx_ = ParSparseMat( static_cast<mfem::HypreParMatrix*>( &dA_dx_block->GetBlock( 1, 0 ) ) );
-  delete &dA_dx_block->GetBlock( 0, 0 );
-  delete &dA_dx_block->GetBlock( 0, 1 );
-  delete &dA_dx_block->GetBlock( 1, 1 );
 }
 
 void NewMethodAdapter::updateNodalForces()
@@ -162,6 +156,12 @@ void NewMethodAdapter::updateNodalForces()
     }
   }
 
+  energy_ = 0.0;
+  for ( int i{ 0 }; i < pressure_vec_.Size(); ++i ) {
+    energy_ += pressure_vec_[i] * g_tilde_vec_[i];
+  }
+  MPI_Allreduce( MPI_IN_PLACE, &energy_, 1, MPI_DOUBLE, MPI_SUM, submesh_data_.GetSubmeshFESpace().GetComm() );
+
   mfem::HypreParVector k_over_a( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
   k_over_a = 0.0;
   for ( int i{ 0 }; i < k_over_a.Size(); ++i ) {
@@ -178,13 +178,13 @@ void NewMethodAdapter::updateNodalForces()
     }
   }
 
-  ParSparseMat dp_dx( *dg_tilde_dx_.get() );
+  ParSparseMat dp_dx( dg_tilde_dx_.get() );
   dp_dx->ScaleRows( k_over_a );
-  ParSparseMat dp_dx_temp( *dA_dx_.get() );
+  ParSparseMat dp_dx_temp( dA_dx_.get() );
   dp_dx_temp->ScaleRows( p_over_a );
   dp_dx -= dp_dx_temp;
 
-  force_vec_ = pressure_vec_ * dg_tilde_dx_ - g_tilde_vec_ * dp_dx;
+  force_vec_ = ( pressure_vec_ * dg_tilde_dx_ ).Add( 1.0, g_tilde_vec_ * dp_dx );
 
   MethodData df_dx_data;
   df_dx_data.reserveBlockJ( { BlockSpace::NONMORTAR, BlockSpace::MORTAR }, pairs_.size() );
@@ -284,11 +284,11 @@ void NewMethodAdapter::updateNodalForces()
     }
   }
 
-  auto& parent_fes = *mfem_data_.GetParentCoords().ParFESpace();
-  auto p_over_a_diag = ParSparseMat::diagonalMatrix( parent_fes.GetComm(), parent_fes.GlobalTrueVSize(),
-                                                     parent_fes.GetTrueDofOffsets(), p_over_a );
-  auto pg2_over_asq_diag = ParSparseMat::diagonalMatrix( parent_fes.GetComm(), parent_fes.GlobalTrueVSize(),
-                                                         parent_fes.GetTrueDofOffsets(), pg2_over_asq );
+  auto& submesh_fes = submesh_data_.GetSubmeshFESpace();
+  auto p_over_a_diag = ParSparseMat::diagonalMatrix( submesh_fes.GetComm(), submesh_fes.GlobalTrueVSize(),
+                                                     submesh_fes.GetTrueDofOffsets(), p_over_a );
+  auto pg2_over_asq_diag = ParSparseMat::diagonalMatrix( submesh_fes.GetComm(), submesh_fes.GlobalTrueVSize(),
+                                                         submesh_fes.GetTrueDofOffsets(), pg2_over_asq );
 
   df_dx_ -= ParSparseMat::RAP( dg_tilde_dx_, p_over_a_diag, dA_dx_ );
   df_dx_ -= ParSparseMat::RAP( dA_dx_, p_over_a_diag, dg_tilde_dx_ );
@@ -334,6 +334,10 @@ std::unique_ptr<mfem::HypreParMatrix> NewMethodAdapter::getMfemDgDx() const
   return std::unique_ptr<mfem::HypreParMatrix>( dg_tilde_dx_.release() );
 }
 
-std::unique_ptr<mfem::HypreParMatrix> NewMethodAdapter::getMfemDfDp() const { return nullptr; }
+std::unique_ptr<mfem::HypreParMatrix> NewMethodAdapter::getMfemDfDp() const
+{
+  SLIC_ERROR_ROOT( "NewMethod does not support getMfemDfDp()" );
+  return nullptr;
+}
 
 }  // namespace tribol
diff --git a/src/tribol/physics/NewMethodAdapter.hpp b/src/tribol/physics/NewMethodAdapter.hpp
index 727be02d..2ea368d4 100644
--- a/src/tribol/physics/NewMethodAdapter.hpp
+++ b/src/tribol/physics/NewMethodAdapter.hpp
@@ -44,6 +44,8 @@ class NewMethodAdapter : public ContactFormulation {
 
   RealT computeTimeStep() override;
 
+  RealT getEnergy() const override { return energy_; }
+
   void getMfemForce( mfem::Vector& forces ) const override;
 
   void getMfemGap( mfem::Vector& gaps ) const override;
@@ -57,27 +59,6 @@ class NewMethodAdapter : public ContactFormulation {
   std::unique_ptr<mfem::HypreParMatrix> getMfemDfDp() const override;
 
  private:
-  /**
-   * @brief Updates the local Mesh struct used by the new_method physics
-   *
-   * Syncs coordinates from MfemMeshData's RedecompMesh to the local new_method::Mesh.
-   */
-  void updateLocalMesh();
-
-  /**
-   * @brief Assembles global Jacobian matrix J_g = d(g_tilde)/dx
-   *
-   * @return std::unique_ptr<mfem::HypreParMatrix>
-   */
-  std::unique_ptr<mfem::HypreParMatrix> assembleJacobianG() const;
-
-  /**
-   * @brief Assembles global Jacobian matrix J_A = d(A)/dx
-   *
-   * @return std::unique_ptr<mfem::HypreParMatrix>
-   */
-  std::unique_ptr<mfem::HypreParMatrix> assembleJacobianA() const;
-
   // --- Member Variables ---
 
   MfemMeshData& mfem_data_;
@@ -98,6 +79,7 @@ class NewMethodAdapter : public ContactFormulation {
   ParSparseMat dA_dx_;
 
   mfem::HypreParVector pressure_vec_;  // This holds p = k * g / A
+  RealT energy_;
   mfem::Vector force_vec_;
   mutable ParSparseMat df_dx_;
 

From 7167e03c3612cd184fabe57e8d326185fe6b8b0f Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Wed, 4 Feb 2026 12:56:52 -0800
Subject: [PATCH 11/56] add factory for creating formulations

---
 .../physics/ContactFormulationFactory.cpp     | 55 +++++++++++++++++++
 .../physics/ContactFormulationFactory.hpp     | 27 +++++++++
 2 files changed, 82 insertions(+)
 create mode 100644 src/tribol/physics/ContactFormulationFactory.cpp
 create mode 100644 src/tribol/physics/ContactFormulationFactory.hpp

diff --git a/src/tribol/physics/ContactFormulationFactory.cpp b/src/tribol/physics/ContactFormulationFactory.cpp
new file mode 100644
index 00000000..889c1ce7
--- /dev/null
+++ b/src/tribol/physics/ContactFormulationFactory.cpp
@@ -0,0 +1,55 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#include "tribol/physics/ContactFormulationFactory.hpp"
+#include "tribol/physics/NewMethodAdapter.hpp"
+#include "tribol/mesh/CouplingScheme.hpp"
+#include "tribol/common/Parameters.hpp"
+
+namespace tribol {
+
+std::unique_ptr<ContactFormulation> createContactFormulation( CouplingScheme* cs )
+{
+  if ( !cs ) {
+    return nullptr;
+  }
+
+  if ( cs->getContactMethod() == ENERGY_MORTAR ) {
+    // Default parameters for now, or extract from CouplingScheme if available
+    double k = 1.0;
+    double delta = cs->getParameters().binning_proximity_scale; 
+    int N = 3;
+
+#ifdef BUILD_REDECOMP
+    if ( cs->hasMfemData() ) {
+        // Attempt to get penalty from MfemMeshData if available
+        auto* k_ptr = cs->getMfemMeshData()->GetMesh1KinematicConstantPenalty();
+        if ( k_ptr ) {
+            k = *k_ptr;
+        }
+    }
+
+    SLIC_ERROR_ROOT_IF( !cs->hasMfemData(), "ENERGY_MORTAR requires MFEM mesh data." );
+    SLIC_ERROR_ROOT_IF( !cs->hasMfemSubmeshData(), "ENERGY_MORTAR requires MFEM submesh data." );
+    SLIC_ERROR_ROOT_IF( !cs->hasMfemJacobianData(), "ENERGY_MORTAR requires MFEM Jacobian data." );
+
+    return std::make_unique<NewMethodAdapter>(
+        *cs->getMfemMeshData(),
+        *cs->getMfemSubmeshData(),
+        *cs->getMfemJacobianData(),
+        cs->getMesh1(),
+        cs->getMesh2(),
+        k, delta, N
+    );
+#else
+    SLIC_ERROR_ROOT("ENERGY_MORTAR requires BUILD_REDECOMP");
+    return nullptr;
+#endif
+  }
+
+  return nullptr;
+}
+
+}  // namespace tribol
diff --git a/src/tribol/physics/ContactFormulationFactory.hpp b/src/tribol/physics/ContactFormulationFactory.hpp
new file mode 100644
index 00000000..7c6dcff8
--- /dev/null
+++ b/src/tribol/physics/ContactFormulationFactory.hpp
@@ -0,0 +1,27 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#ifndef SRC_TRIBOL_PHYSICS_CONTACTFORMULATIONFACTORY_HPP_
+#define SRC_TRIBOL_PHYSICS_CONTACTFORMULATIONFACTORY_HPP_
+
+#include "tribol/physics/ContactFormulation.hpp"
+#include <memory>
+
+namespace tribol {
+
+// Forward declaration
+class CouplingScheme;
+
+/**
+ * @brief Factory function to create a ContactFormulation based on the CouplingScheme settings.
+ * 
+ * @param cs Pointer to the CouplingScheme
+ * @return std::unique_ptr<ContactFormulation> The created formulation, or nullptr if no formulation applies.
+ */
+std::unique_ptr<ContactFormulation> createContactFormulation( CouplingScheme* cs );
+
+}  // namespace tribol
+
+#endif /* SRC_TRIBOL_PHYSICS_CONTACTFORMULATIONFACTORY_HPP_ */

From 897b6252c5bef4095c118b92508e7aecead913b3 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Wed, 4 Feb 2026 12:57:36 -0800
Subject: [PATCH 12/56] add formulation hooks for coupling scheme

---
 src/tribol/mesh/CouplingScheme.cpp | 29 ++++++++++++++++++++++++++--
 src/tribol/mesh/CouplingScheme.hpp | 31 ++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/src/tribol/mesh/CouplingScheme.cpp b/src/tribol/mesh/CouplingScheme.cpp
index c7c3bdce..a29da30d 100644
--- a/src/tribol/mesh/CouplingScheme.cpp
+++ b/src/tribol/mesh/CouplingScheme.cpp
@@ -21,6 +21,7 @@
 #include "tribol/search/InterfacePairFinder.hpp"
 #include "tribol/common/Parameters.hpp"
 #include "tribol/physics/Physics.hpp"
+#include "tribol/physics/ContactFormulationFactory.hpp"
 
 namespace tribol {
 
@@ -1021,6 +1022,18 @@ void CouplingScheme::performBinning()
 //------------------------------------------------------------------------------
 int CouplingScheme::apply( int cycle, RealT t, RealT& dt )
 {
+  if ( m_formulation_impl ) {
+    // performBinning();
+    if ( m_interface_pairs.size() > 0 ) {
+      m_formulation_impl->setInterfacePairs( std::move( m_interface_pairs ), 0 );
+      m_formulation_impl->updateIntegrationRule();
+    }
+    m_formulation_impl->updateNodalGaps();
+    m_formulation_impl->updateNodalForces();
+    dt = m_formulation_impl->computeTimeStep();
+    return 0;
+  }
+
   auto& params = m_parameters;
 
   // loop over number of interface pairs
@@ -1135,8 +1148,20 @@ int CouplingScheme::apply( int cycle, RealT t, RealT& dt )
 //------------------------------------------------------------------------------
 bool CouplingScheme::init()
 {
-  // check for valid coupling scheme only for non-null-meshes
-  this->m_isValid = this->isValidCouplingScheme();
+  if ( !m_formulation_impl && m_contactMethod == ENERGY_MORTAR ) {
+    // these calls still need to be made to set mesh pointers and allocator id
+    if ( !setMeshPointers() || checkExecutionModeData() != 0 ) {
+      return false;
+    }
+    m_formulation_impl = createContactFormulation( this );
+  }
+
+  if ( m_formulation_impl ) {
+    this->m_isValid = true;
+  } else {
+    // check for valid coupling scheme only for non-null-meshes
+    this->m_isValid = this->isValidCouplingScheme();
+  }
 
   if ( this->m_isValid ) {
     // set individual coupling scheme logging level
diff --git a/src/tribol/mesh/CouplingScheme.hpp b/src/tribol/mesh/CouplingScheme.hpp
index 23e7911f..922b1f63 100644
--- a/src/tribol/mesh/CouplingScheme.hpp
+++ b/src/tribol/mesh/CouplingScheme.hpp
@@ -14,6 +14,7 @@
 #include "tribol/mesh/MethodCouplingData.hpp"
 #include "tribol/mesh/MfemData.hpp"
 #include "tribol/physics/Physics.hpp"
+#include "tribol/physics/ContactFormulation.hpp"
 #include "tribol/utils/DataManager.hpp"
 #include "tribol/mesh/InterfacePairs.hpp"
 #include "tribol/geom/CompGeom.hpp"
@@ -727,6 +728,30 @@ class CouplingScheme {
    */
   MethodData* getDnDxMethodData() const { return m_dndxJacobian.get(); }
 
+  /**
+   * @brief Set the ContactFormulation implementation
+   *
+   * @param formulation Unique pointer to the formulation
+   */
+  void setContactFormulation( std::unique_ptr<ContactFormulation> formulation )
+  {
+    m_formulation_impl = std::move( formulation );
+  }
+
+  /**
+   * @brief Check if a ContactFormulation implementation is set
+   *
+   * @return true if set
+   */
+  bool hasContactFormulation() const { return m_formulation_impl != nullptr; }
+
+  /**
+   * @brief Get the ContactFormulation implementation
+   *
+   * @return ContactFormulation*
+   */
+  ContactFormulation* getContactFormulation() const { return m_formulation_impl.get(); }
+
 #ifdef BUILD_REDECOMP
 
   /**
@@ -920,6 +945,8 @@ class CouplingScheme {
   std::unique_ptr<MethodData> m_dfdnJacobian;  ///< Store derivative of force w.r.t. normal on element pairs
   std::unique_ptr<MethodData> m_dndxJacobian;  ///< Store derivative of normal w.r.t. nodal coordinates on element pairs
 
+  std::unique_ptr<ContactFormulation> m_formulation_impl;  ///< Polymorphic contact formulation implementation
+
   ArrayT<InterfacePair> m_interface_pairs;  ///< List of interface pairs
 
   CompGeom m_cg_pairs;  ///< Computational geometry container object
@@ -1030,6 +1057,10 @@ TRIBOL_HOST_DEVICE inline RealT CouplingScheme::Viewer::getGapTol( int fid1, int
 //------------------------------------------------------------------------------
 TRIBOL_HOST_DEVICE inline bool CouplingScheme::Viewer::pruneMethodFacePair( const IndexT fid1, const IndexT fid2 ) const
 {
+  if ( m_contact_method == ENERGY_MORTAR ) {
+    return false;
+  }
+
   constexpr int max_dim = 3;
   constexpr int max_nodes_per_face = 4;
 

From 764e38aff66d3aaa95e7097fdeb0eae071ec474d Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Wed, 4 Feb 2026 12:58:22 -0800
Subject: [PATCH 13/56] add ContactFormulation hooks

---
 src/tribol/interface/mfem_tribol.cpp | 98 ++++++++++++++++++++++++++--
 src/tribol/interface/mfem_tribol.hpp | 24 +++++++
 2 files changed, 116 insertions(+), 6 deletions(-)

diff --git a/src/tribol/interface/mfem_tribol.cpp b/src/tribol/interface/mfem_tribol.cpp
index 95663a8d..e4d5f19b 100644
--- a/src/tribol/interface/mfem_tribol.cpp
+++ b/src/tribol/interface/mfem_tribol.cpp
@@ -306,6 +306,12 @@ void getMfemResponse( IndexT cs_id, mfem::Vector& r )
       !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
                               "to create a coupling scheme with this cs_id.",
                               cs_id ) );
+
+  if ( cs->hasContactFormulation() ) {
+    cs->getContactFormulation()->getMfemForce( r );
+    return;
+  }
+
   SLIC_ERROR_ROOT_IF( !cs->hasMfemData(),
                       "Coupling scheme does not contain MFEM data. "
                       "Create the coupling scheme using registerMfemCouplingScheme() to return a response vector." );
@@ -319,6 +325,31 @@ std::unique_ptr<mfem::BlockOperator> getMfemBlockJacobian( IndexT cs_id )
       !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
                               "to create a coupling scheme with this cs_id.",
                               cs_id ) );
+
+  if ( cs->hasContactFormulation() ) {
+    auto* formulation = cs->getContactFormulation();
+    // Use formulation derivatives
+    auto DfDx = formulation->getMfemDfDx();
+    auto DfDp = formulation->getMfemDfDp();
+    auto DgDx = formulation->getMfemDgDx();
+
+    // Determine sizes
+    mfem::Array<int> offsets( 3 );
+    offsets[0] = 0;
+    offsets[1] = DfDx->Height();                             // Force rows (displacement dofs)
+    offsets[2] = offsets[1] + ( DfDp ? DfDp->Width() : 0 );  // Pressure cols (pressure dofs)
+
+    auto blockOp = std::make_unique<mfem::BlockOperator>( offsets );
+    if ( DfDx ) blockOp->SetBlock( 0, 0, DfDx.release() );
+    if ( DfDp ) blockOp->SetBlock( 0, 1, DfDp.release() );
+    if ( DgDx ) blockOp->SetBlock( 1, 0, DgDx.release() );
+    // 1,1 block (DgDp) is implicitly zero for standard contact
+
+    // Manually set ownership to avoid leaks, as BlockOperator owns nothing by default
+    blockOp->owns_blocks = 1;
+    return blockOp;
+  }
+
   SparseMode sparse_mode = cs->getEnforcementOptions().lm_implicit_options.sparse_mode;
   if ( sparse_mode != SparseMode::MFEM_ELEMENT_DENSE ) {
     SLIC_ERROR_ROOT(
@@ -335,14 +366,12 @@ std::unique_ptr<mfem::BlockOperator> getMfemBlockJacobian( IndexT cs_id )
           cs_id ) );
   // creates a block Jacobian on the parent mesh/parent-linked boundary submesh based on the element Jacobians stored in
   // the coupling scheme's method data
-  const std::vector<std::pair<int, BlockSpace>> all_info{ { 0, BlockSpace::MORTAR },
-                                                         { 0, BlockSpace::NONMORTAR },
-                                                         { 1, BlockSpace::LAGRANGE_MULTIPLIER } };
+  const std::vector<std::pair<int, BlockSpace>> all_info{
+      { 0, BlockSpace::MORTAR }, { 0, BlockSpace::NONMORTAR }, { 1, BlockSpace::LAGRANGE_MULTIPLIER } };
   if ( cs->isEnzymeEnabled() ) {
     auto dfdx = cs->getMfemJacobianData()->GetMfemBlockJacobian( *cs->getMethodData(), all_info, all_info );
     const std::vector<std::pair<int, BlockSpace>> nonmortar_info{ { 0, BlockSpace::NONMORTAR } };
-    auto dfdn =
-        cs->getMfemJacobianData()->GetMfemBlockJacobian( *cs->getDfDnMethodData(), all_info, nonmortar_info );
+    auto dfdn = cs->getMfemJacobianData()->GetMfemBlockJacobian( *cs->getDfDnMethodData(), all_info, nonmortar_info );
     auto dndx =
         cs->getMfemJacobianData()->GetMfemBlockJacobian( *cs->getDnDxMethodData(), nonmortar_info, nonmortar_info );
 
@@ -362,6 +391,51 @@ std::unique_ptr<mfem::BlockOperator> getMfemBlockJacobian( IndexT cs_id )
   }
 }
 
+std::unique_ptr<mfem::HypreParMatrix> getMfemDfDx( IndexT cs_id )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
+
+  if ( cs->hasContactFormulation() ) {
+    return cs->getContactFormulation()->getMfemDfDx();
+  }
+  SLIC_ERROR_ROOT( "getMfemDfDx() is only supported for coupling schemes with a ContactFormulation." );
+  return nullptr;
+}
+
+std::unique_ptr<mfem::HypreParMatrix> getMfemDfDp( IndexT cs_id )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
+
+  if ( cs->hasContactFormulation() ) {
+    return cs->getContactFormulation()->getMfemDfDp();
+  }
+  SLIC_ERROR_ROOT( "getMfemDfDp() is only supported for coupling schemes with a ContactFormulation." );
+  return nullptr;
+}
+
+std::unique_ptr<mfem::HypreParMatrix> getMfemDgDx( IndexT cs_id )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
+
+  if ( cs->hasContactFormulation() ) {
+    return cs->getContactFormulation()->getMfemDgDx();
+  }
+  SLIC_ERROR_ROOT( "getMfemDgDx() is only supported for coupling schemes with a ContactFormulation." );
+  return nullptr;
+}
+
 void getMfemGap( IndexT cs_id, mfem::Vector& g )
 {
   auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
@@ -369,6 +443,12 @@ void getMfemGap( IndexT cs_id, mfem::Vector& g )
       !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
                               "to create a coupling scheme with this cs_id.",
                               cs_id ) );
+
+  if ( cs->hasContactFormulation() ) {
+    cs->getContactFormulation()->getMfemGap( g );
+    return;
+  }
+
   SLIC_ERROR_ROOT_IF( !cs->hasMfemSubmeshData(),
                       axom::fmt::format( "Coupling scheme cs_id={0} does not contain MFEM pressure field data. "
                                          "Create the coupling scheme using registerMfemCouplingScheme() and set the "
@@ -384,6 +464,11 @@ mfem::ParGridFunction& getMfemPressure( IndexT cs_id )
       !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
                               "to create a coupling scheme with this cs_id.",
                               cs_id ) );
+
+  if ( cs->hasContactFormulation() ) {
+    return cs->getContactFormulation()->getMfemPressure();
+  }
+
   SLIC_ERROR_ROOT_IF( !cs->hasMfemSubmeshData(),
                       axom::fmt::format( "Coupling scheme cs_id={0} does not contain MFEM pressure field data. "
                                          "Create the coupling scheme using registerMfemCouplingScheme() and set the "
@@ -436,7 +521,8 @@ void updateMfemParallelDecomposition( int n_ranks, bool force_new_redecomp )
       }
       if ( cs.getEnforcementMethod() == LAGRANGE_MULTIPLIER ) {
         SLIC_ERROR_ROOT_IF( cs.getContactModel() != FRICTIONLESS, "Only frictionless contact is supported." );
-        SLIC_ERROR_ROOT_IF( cs.getContactMethod() != SINGLE_MORTAR, "Only single mortar contact is supported." );
+        SLIC_ERROR_ROOT_IF( cs.getContactMethod() != SINGLE_MORTAR && cs.getContactMethod() != ENERGY_MORTAR,
+                            "Only single mortar or ENERGY_MORTAR contact is supported." );
         auto submesh_data = cs.getMfemSubmeshData();
         // updates submesh-native grid functions and transfer operators on
         // the new redecomp mesh
diff --git a/src/tribol/interface/mfem_tribol.hpp b/src/tribol/interface/mfem_tribol.hpp
index 2a142962..53a144af 100644
--- a/src/tribol/interface/mfem_tribol.hpp
+++ b/src/tribol/interface/mfem_tribol.hpp
@@ -268,6 +268,30 @@ void getMfemResponse( IndexT cs_id, mfem::Vector& r );
  */
 std::unique_ptr<mfem::BlockOperator> getMfemBlockJacobian( IndexT cs_id );
 
+/**
+ * @brief Get the derivative of the force with respect to displacement
+ *
+ * @param cs_id Coupling scheme id with a registered MFEM mesh
+ * @return Unique pointer to MFEM HypreParMatrix
+ */
+std::unique_ptr<mfem::HypreParMatrix> getMfemDfDx( IndexT cs_id );
+
+/**
+ * @brief Get the derivative of the force with respect to pressure
+ *
+ * @param cs_id Coupling scheme id with a registered MFEM mesh
+ * @return Unique pointer to MFEM HypreParMatrix
+ */
+std::unique_ptr<mfem::HypreParMatrix> getMfemDfDp( IndexT cs_id );
+
+/**
+ * @brief Get the derivative of the gap with respect to displacement
+ *
+ * @param cs_id Coupling scheme id with a registered MFEM mesh
+ * @return Unique pointer to MFEM HypreParMatrix
+ */
+std::unique_ptr<mfem::HypreParMatrix> getMfemDgDx( IndexT cs_id );
+
 /**
  * @brief Returns gap vector to a given mfem::Vector
  *

From b6033e3a1ccb347b5d987185cc454314ee3b9a40 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Wed, 4 Feb 2026 12:58:40 -0800
Subject: [PATCH 14/56] add ENERGY_MORTAR

---
 src/tribol/common/Parameters.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/tribol/common/Parameters.hpp b/src/tribol/common/Parameters.hpp
index c944a135..8a28c0ec 100644
--- a/src/tribol/common/Parameters.hpp
+++ b/src/tribol/common/Parameters.hpp
@@ -119,6 +119,7 @@ enum ContactMethod  // all mortar methods go first
   SINGLE_MORTAR,   ///! Single mortar per Puso 2003
   ALIGNED_MORTAR,  ///! Aligned mortar to be used with ContactCase = NO_SLIDING
   MORTAR_WEIGHTS,  ///! Method that only returns mortar weights per single mortar method
+  ENERGY_MORTAR,   ///! Energy-based mortar method
   COMMON_PLANE,    ///! Common plane method, currently with single integration point
   NUM_CONTACT_METHODS
 };

From 893736cb2cef64ea61038950d7f3d762619970ed Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Wed, 4 Feb 2026 12:58:52 -0800
Subject: [PATCH 15/56] update file list

---
 src/tribol/CMakeLists.txt | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/tribol/CMakeLists.txt b/src/tribol/CMakeLists.txt
index ecb9c688..eb343964 100644
--- a/src/tribol/CMakeLists.txt
+++ b/src/tribol/CMakeLists.txt
@@ -41,9 +41,11 @@ set(tribol_headers
     physics/AlignedMortar.hpp
     physics/CommonPlane.hpp
     physics/ContactFormulation.hpp
+    physics/ContactFormulationFactory.hpp
     physics/Mortar.hpp
-    physics/Physics.hpp
     physics/new_method.hpp
+    physics/NewMethodAdapter.hpp
+    physics/Physics.hpp
 
     search/InterfacePairFinder.hpp
 
@@ -77,9 +79,11 @@ set(tribol_sources
 
     physics/AlignedMortar.cpp
     physics/CommonPlane.cpp
+    physics/ContactFormulationFactory.cpp
     physics/Mortar.cpp
-    physics/Physics.cpp
     physics/new_method.cpp
+    physics/NewMethodAdapter.cpp
+    physics/Physics.cpp
      
     search/InterfacePairFinder.cpp
 

From ca8480db5d94f5366fc5c7a61b01cca79a576aaf Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Wed, 4 Feb 2026 12:59:02 -0800
Subject: [PATCH 16/56] add an example and test

---
 src/examples/CMakeLists.txt               |   2 +-
 src/examples/mfem_mortar_energy_patch.cpp | 305 ++++++++++++++++++++++
 src/tests/CMakeLists.txt                  |   1 +
 src/tests/tribol_mfem_mortar_energy.cpp   | 252 ++++++++++++++++++
 4 files changed, 559 insertions(+), 1 deletion(-)
 create mode 100644 src/examples/mfem_mortar_energy_patch.cpp
 create mode 100644 src/tests/tribol_mfem_mortar_energy.cpp

diff --git a/src/examples/CMakeLists.txt b/src/examples/CMakeLists.txt
index e5905589..50e6cc5b 100644
--- a/src/examples/CMakeLists.txt
+++ b/src/examples/CMakeLists.txt
@@ -9,7 +9,6 @@ set( contact_examples
      common_plane.cpp
      mortar_lm_patch_test.cpp
      step_1_lobatto.cpp
-     new_method_test.cpp
      )
 
 
@@ -102,6 +101,7 @@ if ( BUILD_REDECOMP )
 
   set( examples
       mfem_mortar_lm_patch.cpp
+      mfem_mortar_energy_patch.cpp
       mfem_common_plane.cpp
       )
 
diff --git a/src/examples/mfem_mortar_energy_patch.cpp b/src/examples/mfem_mortar_energy_patch.cpp
new file mode 100644
index 00000000..1b19bcf4
--- /dev/null
+++ b/src/examples/mfem_mortar_energy_patch.cpp
@@ -0,0 +1,305 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+/**
+ * @file mfem_mortar_energy_patch.cpp
+ *
+ * @brief Demonstrates contact patch test using the energy mortar method
+ *
+ * Demonstrates a three dimensional contact patch test using the energy mortar method in Tribol. Contact is enforced
+ * between two blocks which are initially in contact. The blocks occupy [0, 1]^3 and [0, 1]x[0, 1]x[0.99, 1.99]. To
+ * enforce symmetry and prevent rigid body modes, Dirichlet boundary conditions are applied in the x-direction along the
+ * x = 0 plane, in the y-direction along y = 0 plane, and in the z-direction along the z = 0 and z = 1.99 planes.
+ * Enforcement is through Penalty. Small deformation contact is assumed and, consequently, the system is linear and the
+ * solution is determined through a single linear solve (no timestepping).
+ *
+ * The linear system solved is
+ *  (K + K_contact) u = f_contact
+ *
+ * where K is the system matrix for elasticity, K_contact is the stiffness matrix from contact penalty,
+ * u is the vector of nodal displacements, and f_contact is the vector of nodal contact forces.
+ *
+ * The example uses the Tribol MFEM interface, which supports decomposed (MPI) meshes.
+ *
+ * Example runs (from repo root directory):
+ *   - mpirun -np 4 {build_dir}/examples/mfem_mortar_energy_patch_ex
+ *
+ * Example output can be viewed in VisIt or ParaView.
+ */
+
+#include <set>
+
+#ifdef TRIBOL_USE_UMPIRE
+// Umpire includes
+#include "umpire/ResourceManager.hpp"
+#endif
+
+// MFEM includes
+#include "mfem.hpp"
+
+// Axom includes
+#include "axom/CLI11.hpp"
+#include "axom/core.hpp"
+#include "axom/slic.hpp"
+
+// Shared includes
+#include "shared/mesh/MeshBuilder.hpp"
+
+// Tribol includes
+#include "tribol/config.hpp"
+#include "tribol/common/Parameters.hpp"
+#include "tribol/interface/tribol.hpp"
+#include "tribol/interface/mfem_tribol.hpp"
+
+int main( int argc, char** argv )
+{
+  // initialize MPI
+  MPI_Init( &argc, &argv );
+  int rank;
+  MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+
+#ifdef TRIBOL_USE_UMPIRE
+  umpire::ResourceManager::getInstance();  // initialize umpire's ResouceManager
+#endif
+
+  // initialize logger
+  axom::slic::SimpleLogger logger;
+  axom::slic::setIsRoot( rank == 0 );
+
+  // define command line options
+  // number of times to uniformly refine the serial mesh before constructing the parallel mesh
+  int ref_levels = 2;
+  // polynomial order of the finite element discretization
+  int order = 1;
+  // Lame parameter lambda
+  double lambda = 50.0;
+  // Lame parameter mu (shear modulus)
+  double mu = 50.0;
+  // Penalty parameter
+  double penalty = 50.0;
+  // device configuration string (see mfem::Device::Configure() for valid options)
+  std::string device_config = "cpu";
+
+  // parse command line options
+  axom::CLI::App app{ "mfem_mortar_energy_patch" };
+  app.add_option( "-r,--refine", ref_levels, "Number of times to refine the mesh uniformly." )->capture_default_str();
+  app.add_option( "-l,--lambda", lambda, "Lame parameter lambda." )->capture_default_str();
+  app.add_option( "-m,--mu", mu, "Lame parameter mu (shear modulus)." )->capture_default_str();
+  app.add_option( "-p,--penalty", penalty, "Contact penalty parameter." )->capture_default_str();
+  // app.add_option( "-d,--device", device_config, "Device configuration string." )->capture_default_str();
+
+  CLI11_PARSE( app, argc, argv );
+
+  SLIC_INFO_ROOT( "Running mfem_mortar_energy_patch with the following options:" );
+  SLIC_INFO_ROOT( axom::fmt::format( "refine:   {0}", ref_levels ) );
+  SLIC_INFO_ROOT( axom::fmt::format( "lambda:   {0}", lambda ) );
+  SLIC_INFO_ROOT( axom::fmt::format( "mu:       {0}", mu ) );
+  SLIC_INFO_ROOT( axom::fmt::format( "penalty:  {0}\n", penalty ) );
+
+  // configure the devices available for MFEM kernel launches
+  mfem::Device device( device_config );
+  if ( rank == 0 ) {
+    device.Print();
+  }
+
+  // fixed options
+  // boundary element attributes of mortar surface, the z = 1 plane of the first block
+  std::set<int> mortar_attrs( { 4 } );
+  // boundary element attributes of nonmortar surface, the z = 0.99 plane of the second block
+  std::set<int> nonmortar_attrs( { 5 } );
+  // boundary element attributes of x-fixed surfaces (at x = 0)
+  std::vector<std::set<int>> fixed_attrs( 3 );
+  fixed_attrs[0] = { 1 };
+  // boundary element attributes of y-fixed surfaces (at y = 0)
+  fixed_attrs[1] = { 2 };
+  // boundary element attributes of z-fixed surfaces (3: surface at z = 0, 6: surface at z = 1.99)
+  fixed_attrs[2] = { 3, 6 };
+
+  // create an axom timer to give wall times for each step
+  axom::utilities::Timer timer{ false };
+
+  timer.start();
+  // build mesh of 2 cubes
+  int nel_per_dir = std::pow( 2, ref_levels );
+  auto elem_type = mfem::Element::HEXAHEDRON;
+  // clang-format off
+  mfem::ParMesh mesh = shared::ParMeshBuilder(MPI_COMM_WORLD, shared::MeshBuilder::Unify({
+    shared::MeshBuilder::CubeMesh(nel_per_dir, nel_per_dir, nel_per_dir, elem_type)
+      .updateBdrAttrib(3, 7)
+      .updateBdrAttrib(1, 3)
+      .updateBdrAttrib(4, 7)
+      .updateBdrAttrib(5, 1)
+      .updateBdrAttrib(6, 4),
+    shared::MeshBuilder::CubeMesh(nel_per_dir, nel_per_dir, nel_per_dir, elem_type)
+      .translate({0.0, 0.0, 0.99})
+      .updateBdrAttrib(1, 8)
+      .updateBdrAttrib(3, 7)
+      .updateBdrAttrib(4, 7)
+      .updateBdrAttrib(5, 1)
+      .updateBdrAttrib(8, 5)
+  }));
+  // clang-format on
+  timer.stop();
+  SLIC_INFO_ROOT( axom::fmt::format( "Time to create parallel mesh: {0:f}ms", timer.elapsedTimeInMilliSec() ) );
+
+  // Set up an MFEM data collection for output. We output data in Paraview and
+  // VisIt formats.
+  mfem::ParaViewDataCollection paraview_datacoll( "mortar_energy_patch_pv", &mesh );
+  mfem::VisItDataCollection visit_datacoll( "mortar_energy_patch_vi", &mesh );
+
+  timer.start();
+  // Finite element collection (shared between all grid functions).
+  mfem::H1_FECollection fec( order, mesh.SpaceDimension() );
+  // Finite element space (shared between all grid functions).
+  mfem::ParFiniteElementSpace fespace( &mesh, &fec, mesh.SpaceDimension() );
+  // Create coordinate grid function
+  mfem::ParGridFunction coords( &fespace );
+  mesh.SetNodalGridFunction( &coords );
+  paraview_datacoll.RegisterField( "position", &coords );
+  visit_datacoll.RegisterField( "position", &coords );
+
+  // Create a grid function for displacement
+  mfem::ParGridFunction displacement( &fespace );
+  paraview_datacoll.RegisterField( "displacement", &displacement );
+  visit_datacoll.RegisterField( "displacement", &displacement );
+  displacement = 0.0;
+  timer.stop();
+  SLIC_INFO_ROOT( axom::fmt::format( "Time to create grid functions: {0:f}ms", timer.elapsedTimeInMilliSec() ) );
+
+  // save initial configuration
+  paraview_datacoll.Save();
+  visit_datacoll.Save();
+
+  timer.start();
+  mfem::Array<int> ess_tdof_list;
+  {
+    // First, build an array of "markers" (i.e. booleans) to denote which vdofs are in the list.
+    mfem::Array<int> ess_vdof_marker( fespace.GetVSize() );
+    ess_vdof_marker = 0;
+    for ( int d = 0; d < 3; ++d ) {
+      // convert boundary attributes into markers for active attributes on the dimension d
+      mfem::Array<int> ess_bdr( mesh.bdr_attributes.Max() );
+      ess_bdr = 0;
+      for ( auto xfixed_attr : fixed_attrs[d] ) {
+        ess_bdr[xfixed_attr - 1] = 1;
+      }
+      mfem::Array<int> new_ess_vdof_marker;
+      // Find all vdofs with the given boundary marker
+      fespace.GetEssentialVDofs( ess_bdr, new_ess_vdof_marker, d );
+      // Compute union of existing marked vdofs with vdofs marked on dimension d
+      for ( int j = 0; j < new_ess_vdof_marker.Size(); ++j ) {
+        ess_vdof_marker[j] = ess_vdof_marker[j] || new_ess_vdof_marker[j];
+      }
+    }
+    // Convert the vdofs to tdofs to remove duplicate values over ranks
+    mfem::Array<int> ess_tdof_marker;
+    fespace.GetRestrictionMatrix()->BooleanMult( ess_vdof_marker, ess_tdof_marker );
+    // Convert the tdof marker array to a tdof list
+    mfem::FiniteElementSpace::MarkerToList( ess_tdof_marker, ess_tdof_list );
+  }
+  timer.stop();
+  SLIC_INFO_ROOT( axom::fmt::format( "Time to set up boundary conditions: {0:f}ms", timer.elapsedTimeInMilliSec() ) );
+
+  // This block of code constructs a small-deformation linear elastic bilinear form.
+  timer.start();
+  mfem::ParBilinearForm a( &fespace );
+  mfem::ConstantCoefficient lambda_coeff( lambda );
+  mfem::ConstantCoefficient mu_coeff( mu );
+  a.AddDomainIntegrator( new mfem::ElasticityIntegrator( lambda_coeff, mu_coeff ) );
+
+  // Assemble the on-rank bilinear form stiffness matrix.
+  a.Assemble();
+  // Reduce to tdofs and form a hypre parallel matrix for parallel solution of the linear system.
+  auto A_elasticity = std::make_unique<mfem::HypreParMatrix>();
+  a.FormSystemMatrix( ess_tdof_list, *A_elasticity );
+  timer.stop();
+  SLIC_INFO_ROOT(
+      axom::fmt::format( "Time to create and assemble internal stiffness: {0:f}ms", timer.elapsedTimeInMilliSec() ) );
+
+  // This block of code does initial setup of Tribol.
+  timer.start();
+
+  int coupling_scheme_id = 0;
+  int mesh1_id = 0;
+  int mesh2_id = 1;
+  tribol::registerMfemCouplingScheme( coupling_scheme_id, mesh1_id, mesh2_id, mesh, coords, mortar_attrs,
+                                      nonmortar_attrs, tribol::SURFACE_TO_SURFACE, tribol::NO_SLIDING,
+                                      tribol::ENERGY_MORTAR, tribol::FRICTIONLESS, tribol::LAGRANGE_MULTIPLIER,
+                                      tribol::BINNING_GRID, tribol::ExecutionMode::Sequential );
+  tribol::setMPIComm( coupling_scheme_id, MPI_COMM_WORLD );
+  tribol::setLagrangeMultiplierOptions( coupling_scheme_id, tribol::ImplicitEvalMode::MORTAR_RESIDUAL_JACOBIAN );
+  tribol::setMfemKinematicConstantPenalty( coupling_scheme_id, penalty, penalty );
+
+  // Update the cycle information for the data collections. Also update time with a pseudotime for the solution.
+  int cycle = 1;
+  double time = 1.0;  // time is arbitrary here (no timesteps)
+  double dt = 1.0;
+  paraview_datacoll.SetCycle( cycle );
+  paraview_datacoll.SetTime( time );
+  paraview_datacoll.SetTimeStep( dt );
+  visit_datacoll.SetCycle( cycle );
+  visit_datacoll.SetTime( time );
+  visit_datacoll.SetTimeStep( dt );
+
+  // This creates the parallel adjacency-based mesh redecomposition. It also constructs new Tribol meshes as subsets of
+  // the redecomposed mesh.
+  tribol::updateMfemParallelDecomposition();
+  // This API call computes the contact response and Jacobian given the current mesh configuration.
+  tribol::update( cycle, time, dt );
+
+  // Get Contact Stiffness
+  auto A_contact = tribol::getMfemDfDx( coupling_scheme_id );
+
+  // Add contact stiffness to elasticity stiffness
+  auto A_total = std::unique_ptr<mfem::HypreParMatrix>( mfem::Add( 1.0, *A_elasticity, 1.0, *A_contact ) );
+
+  timer.stop();
+  SLIC_INFO_ROOT(
+      axom::fmt::format( "Time to setup Tribol and compute Jacobian: {0:f}ms", timer.elapsedTimeInMilliSec() ) );
+
+  int n_disp_dofs = fespace.GetTrueVSize();
+  SLIC_INFO_ROOT( axom::fmt::format( "  Number of displacement DOFs:        {0}", n_disp_dofs ) );
+
+  timer.start();
+
+  // Retrieve contact force (response)
+  mfem::Vector f_contact( fespace.GetTrueVSize() );
+  f_contact = 0.0;
+  tribol::getMfemResponse( coupling_scheme_id, f_contact );
+
+  // Create a solution vector storing displacement
+  mfem::Vector X( fespace.GetTrueVSize() );
+  X.UseDevice( true );
+  X = 0.0;
+
+  // Use a linear solver to find the block displacement/pressure vector.
+  mfem::MINRESSolver solver( MPI_COMM_WORLD );
+  solver.SetRelTol( 1.0e-8 );
+  solver.SetAbsTol( 1.0e-12 );
+  solver.SetMaxIter( 5000 );
+  solver.SetPrintLevel( 3 );
+  solver.SetOperator( *A_total );
+  solver.Mult( f_contact, X );
+
+  // Move the block displacements to the displacement grid function.
+  fespace.GetProlongationMatrix()->Mult( X, displacement );
+
+  // Update mesh coordinates given the displacement.
+  coords += displacement;
+
+  timer.stop();
+  SLIC_INFO_ROOT(
+      axom::fmt::format( "Time to solve for updated displacements: {0:f}ms", timer.elapsedTimeInMilliSec() ) );
+
+  // Save the deformed configuration
+  paraview_datacoll.Save();
+  visit_datacoll.Save();
+
+  // Tribol cleanup: deletes the coupling schemes and clears associated memory.
+  tribol::finalize();
+  MPI_Finalize();
+
+  return 0;
+}
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 86424763..03d894f2 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -155,6 +155,7 @@ if ( BUILD_REDECOMP AND TRIBOL_USE_MPI )
   set( combined_tests
       tribol_mfem_common_plane.cpp
       tribol_mfem_mortar_lm.cpp
+      tribol_mfem_mortar_energy.cpp
       tribol_proximity_check.cpp
       tribol_redecomp_tol.cpp
       )
diff --git a/src/tests/tribol_mfem_mortar_energy.cpp b/src/tests/tribol_mfem_mortar_energy.cpp
new file mode 100644
index 00000000..7024aedd
--- /dev/null
+++ b/src/tests/tribol_mfem_mortar_energy.cpp
@@ -0,0 +1,252 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#include <set>
+
+#include <gtest/gtest.h>
+
+#ifdef TRIBOL_USE_UMPIRE
+// Umpire includes
+#include "umpire/ResourceManager.hpp"
+#endif
+
+// MFEM includes
+#include "mfem.hpp"
+
+// Axom includes
+#include "axom/CLI11.hpp"
+#include "axom/slic.hpp"
+
+// Shared includes
+#include "shared/mesh/MeshBuilder.hpp"
+
+// Redecomp includes
+#include "redecomp/redecomp.hpp"
+
+// Tribol includes
+#include "tribol/config.hpp"
+#include "tribol/common/Parameters.hpp"
+#include "tribol/interface/tribol.hpp"
+#include "tribol/interface/mfem_tribol.hpp"
+
+/**
+ * @brief This tests the Tribol MFEM interface running a contact patch test using ENERGY_MORTAR.
+ *
+ */
+class MfemMortarEnergyTest : public testing::TestWithParam<std::tuple<int, mfem::Element::Type>> {
+ protected:
+  tribol::RealT max_disp_;
+  void SetUp() override
+  {
+    // number of times to uniformly refine the serial mesh before constructing the
+    // parallel mesh
+    int ref_levels = std::get<0>( GetParam() );
+    // polynomial order of the finite element discretization
+    int order = 1;
+
+    // fixed options
+    // boundary element attributes of mortar surface (bottom of top square)
+    auto mortar_attrs = std::set<int>( { 5 } );
+    // boundary element attributes of nonmortar surface (top of bottom square)
+    auto nonmortar_attrs = std::set<int>( { 3 } );
+    // boundary element attributes of x-fixed surfaces (left side)
+    auto xfixed_attrs = std::set<int>( { 4 } );
+    // boundary element attributes of y-fixed surfaces (bottom of bottom square)
+    auto yfixed_attrs = std::set<int>( { 1 } );
+
+    // build mesh of 2 squares
+    int nel_per_dir = std::pow( 2, ref_levels );
+
+    // clang-format off
+    mfem::ParMesh mesh = shared::ParMeshBuilder(MPI_COMM_WORLD, shared::MeshBuilder::Unify({
+      shared::MeshBuilder::SquareMesh(nel_per_dir, nel_per_dir) // Bottom mesh [0,1]x[0,1]
+        .updateBdrAttrib(1, 1) // Bottom (Fixed Y)
+        .updateBdrAttrib(2, 2) // Right
+        .updateBdrAttrib(3, 3) // Top (NonMortar)
+        .updateBdrAttrib(4, 4), // Left (Fixed X)
+      shared::MeshBuilder::SquareMesh(nel_per_dir, nel_per_dir) // Top mesh [0,1]x[0,1]
+        .translate({0.0, 0.99}) // Shift up to [0,1]x[0.99, 1.99]. Overlap 0.01.
+        .updateBdrAttrib(1, 5) // Bottom (Mortar)
+        .updateBdrAttrib(2, 2) // Right
+        .updateBdrAttrib(3, 6) // Top
+        .updateBdrAttrib(4, 4) // Left (Fixed X)
+    }));
+    // clang-format on
+
+    // grid function for higher-order nodes
+    auto fe_coll = mfem::H1_FECollection( order, mesh.SpaceDimension() );
+    auto par_fe_space = mfem::ParFiniteElementSpace( &mesh, &fe_coll, mesh.SpaceDimension() );
+    auto coords = mfem::ParGridFunction( &par_fe_space );
+    if ( order > 1 ) {
+      mesh.SetNodalGridFunction( &coords, false );
+    } else {
+      mesh.GetNodes( coords );
+    }
+
+    // grid function for displacement
+    mfem::ParGridFunction displacement{ &par_fe_space };
+    displacement = 0.0;
+
+    // recover dirichlet bc tdof list
+    mfem::Array<int> ess_tdof_list;
+    {
+      mfem::Array<int> ess_vdof_marker;
+      mfem::Array<int> ess_bdr( mesh.bdr_attributes.Max() );
+      ess_bdr = 0;
+      for ( auto xfixed_attr : xfixed_attrs ) {
+        if ( xfixed_attr <= ess_bdr.Size() ) ess_bdr[xfixed_attr - 1] = 1;
+      }
+      par_fe_space.GetEssentialVDofs( ess_bdr, ess_vdof_marker, 0 );
+      mfem::Array<int> new_ess_vdof_marker;
+      ess_bdr = 0;
+      for ( auto yfixed_attr : yfixed_attrs ) {
+        if ( yfixed_attr <= ess_bdr.Size() ) ess_bdr[yfixed_attr - 1] = 1;
+      }
+      par_fe_space.GetEssentialVDofs( ess_bdr, new_ess_vdof_marker, 1 );
+      for ( int i{ 0 }; i < ess_vdof_marker.Size(); ++i ) {
+        ess_vdof_marker[i] = ess_vdof_marker[i] || new_ess_vdof_marker[i];
+      }
+      mfem::Array<int> ess_tdof_marker;
+      par_fe_space.GetRestrictionMatrix()->BooleanMult( ess_vdof_marker, ess_tdof_marker );
+      mfem::FiniteElementSpace::MarkerToList( ess_tdof_marker, ess_tdof_list );
+    }
+
+    // set up mfem elasticity bilinear form
+    mfem::ParBilinearForm a( &par_fe_space );
+    mfem::ConstantCoefficient lambda( 50.0 );
+    mfem::ConstantCoefficient mu( 50.0 );
+    a.AddDomainIntegrator( new mfem::ElasticityIntegrator( lambda, mu ) );
+    a.Assemble();
+
+    // compute elasticity contribution to stiffness
+    auto A = std::make_unique<mfem::HypreParMatrix>();
+    a.FormSystemMatrix( ess_tdof_list, *A );
+
+    // set up tribol
+    coords.ReadWrite();
+    int coupling_scheme_id = 0;
+    int mesh1_id = 0;
+    int mesh2_id = 1;
+    tribol::registerMfemCouplingScheme( coupling_scheme_id, mesh1_id, mesh2_id, mesh, coords, mortar_attrs,
+                                        nonmortar_attrs, tribol::SURFACE_TO_SURFACE, tribol::NO_SLIDING,
+                                        tribol::ENERGY_MORTAR, tribol::FRICTIONLESS, tribol::LAGRANGE_MULTIPLIER,
+                                        tribol::BINNING_GRID );
+    tribol::setLagrangeMultiplierOptions( coupling_scheme_id, tribol::ImplicitEvalMode::MORTAR_RESIDUAL_JACOBIAN );
+
+    // Set Penalty options
+    tribol::setMfemKinematicConstantPenalty( coupling_scheme_id, 50.0, 50.0 );
+
+    coords.ReadWrite();
+    // update tribol (compute contact contribution to force and stiffness)
+    tribol::updateMfemParallelDecomposition();
+    tribol::RealT dt{ 1.0 };  // time is arbitrary here (no timesteps)
+    tribol::update( 1, 1.0, dt );
+
+    // retrieve contact stiffness matrix
+    auto A_cont = tribol::getMfemDfDx( coupling_scheme_id );
+
+    // retrieve contact force (response)
+    mfem::Vector f_contact( par_fe_space.GetTrueVSize() );
+    f_contact = 0.0;
+    tribol::getMfemResponse( coupling_scheme_id, f_contact );
+
+    // Add contact stiffness to elasticity stiffness
+    // mfem::Add(1.0, *A, 1.0, *A_cont) returns a new HypreParMatrix
+    auto A_total = std::unique_ptr<mfem::HypreParMatrix>( mfem::Add( 1.0, *A, 1.0, *A_cont ) );
+
+    // Create RHS.
+    // We want to solve (K_elast + K_contact) * du = f_contact + (f_ext=0)
+    // f_contact returned by Tribol is usually the force exerted by contact on the nodes.
+    // Ideally, Residual R = F_int(u) - F_ext - F_contact.
+    // Here we assume linear: K_elast * u - F_contact = 0 ?
+    // If we start at u=0, F_int=0.
+    // If we have overlap, F_contact is repulsive (pushing nodes apart).
+    // So K * du = F_contact.
+
+    // Solve for X (displacement)
+    mfem::Vector X( par_fe_space.GetTrueVSize() );
+    X = 0.0;
+
+    mfem::MINRESSolver solver( MPI_COMM_WORLD );
+    solver.SetRelTol( 1.0e-8 );
+    solver.SetAbsTol( 1.0e-12 );
+    solver.SetMaxIter( 5000 );
+    solver.SetPrintLevel( 3 );
+    solver.SetOperator( *A_total );
+    solver.Mult( f_contact, X );
+
+    // move displacements to grid function
+    {
+      auto& P = *par_fe_space.GetProlongationMatrix();
+      P.Mult( X, displacement );
+    }
+    // Note: tribol_mfem_mortar_lm.cpp does `displacement.Neg()` because LM solution X_blk usually gives displacement
+    // correction. Here, if F_contact pushes apart, X should separate the meshes. Top mesh moves up, Bottom mesh moves
+    // down. Top mesh (y~1.0) -> +dy Bottom mesh (y~1.0) -> -dy
+
+    // We can check max displacement magnitude.
+    auto local_max = displacement.Max();
+    max_disp_ = 0.0;
+    MPI_Allreduce( &local_max, &max_disp_, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD );
+  }
+};
+
+TEST_P( MfemMortarEnergyTest, check_mortar_displacement )
+{
+  // Expected displacement:
+  // Overlap is 0.01.
+  // Two blocks of equal stiffness.
+  // They should push apart to resolve overlap?
+  // If penalty is high enough, they separate by ~0.005 each.
+  // If penalty is comparable to modulus (50.0 vs 50.0), it might be soft.
+  // With k=50, E=50...
+  // Force ~ k * delta.
+  // Displacement ~ F / k_elast.
+  // This is a coupled system.
+  // Let's just check that max_disp_ is positive and roughly correct order of magnitude.
+  // In LM test (overlap 0.01?), result was 0.005.
+  // Here we use penalty. If k=infinite, it should be 0.005.
+  // With k=50, it will be less than 0.005 (remaining overlap).
+
+  // Actually, tribol_mfem_mortar_lm.cpp test uses overlap 0.01 (0.99 vs 1.0).
+  // Displacement result is 0.005.
+  // Here we use same parameters.
+
+  // We expect some displacement.
+  EXPECT_GT( max_disp_, 0.0 );
+  EXPECT_LT( max_disp_, 0.01 );
+
+  MPI_Barrier( MPI_COMM_WORLD );
+}
+
+INSTANTIATE_TEST_SUITE_P( tribol, MfemMortarEnergyTest,
+                          testing::Values( std::make_tuple( 2, mfem::Element::Type::QUADRILATERAL ),
+                                           std::make_tuple( 2, mfem::Element::Type::TRIANGLE ) ) );
+
+//------------------------------------------------------------------------------
+#include "axom/slic/core/SimpleLogger.hpp"
+
+int main( int argc, char* argv[] )
+{
+  int result = 0;
+
+  MPI_Init( &argc, &argv );
+
+  ::testing::InitGoogleTest( &argc, argv );
+
+#ifdef TRIBOL_USE_UMPIRE
+  umpire::ResourceManager::getInstance();  // initialize umpire's ResouceManager
+#endif
+
+  axom::slic::SimpleLogger logger;  // create & initialize test logger, finalized when
+                                    // exiting main scope
+
+  result = RUN_ALL_TESTS();
+
+  tribol::finalize();
+  MPI_Finalize();
+
+  return result;
+}

From 2b3c8f1a6509ad5d145dc46c677ebc9209957838 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Thu, 5 Feb 2026 22:56:36 -0800
Subject: [PATCH 17/56] make test 2d and add debug prints

---
 src/examples/mfem_mortar_energy_patch.cpp | 157 ++++++++++++++++------
 1 file changed, 113 insertions(+), 44 deletions(-)

diff --git a/src/examples/mfem_mortar_energy_patch.cpp b/src/examples/mfem_mortar_energy_patch.cpp
index 1b19bcf4..8b6de2e6 100644
--- a/src/examples/mfem_mortar_energy_patch.cpp
+++ b/src/examples/mfem_mortar_energy_patch.cpp
@@ -60,6 +60,19 @@ int main( int argc, char** argv )
   int rank;
   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
 
+  // // Only make Rank 0 wait (or whichever rank you want to debug)
+  // if ( rank == 0 ) {
+  //   volatile int debug_wait = 1;
+  //   printf( "Rank %d is ready to attach. PID: %d\n", rank, getpid() );
+  //   fflush( stdout );
+
+  //   while ( debug_wait ) {
+  //     sleep( 1 );  // Sleep to avoid burning 100% CPU
+  //   }
+  // }
+
+  // MPI_Barrier( MPI_COMM_WORLD );  // Keep other ranks from running ahead
+
 #ifdef TRIBOL_USE_UMPIRE
   umpire::ResourceManager::getInstance();  // initialize umpire's ResouceManager
 #endif
@@ -78,7 +91,9 @@ int main( int argc, char** argv )
   // Lame parameter mu (shear modulus)
   double mu = 50.0;
   // Penalty parameter
-  double penalty = 50.0;
+  double penalty = 1000.0;
+  // Write debug data to screen (force and stiffness)
+  bool debug = false;
   // device configuration string (see mfem::Device::Configure() for valid options)
   std::string device_config = "cpu";
 
@@ -88,6 +103,7 @@ int main( int argc, char** argv )
   app.add_option( "-l,--lambda", lambda, "Lame parameter lambda." )->capture_default_str();
   app.add_option( "-m,--mu", mu, "Lame parameter mu (shear modulus)." )->capture_default_str();
   app.add_option( "-p,--penalty", penalty, "Contact penalty parameter." )->capture_default_str();
+  app.add_option( "-d,--debug", debug, "Write debug data to screen (force and stiffness)." )->capture_default_str();
   // app.add_option( "-d,--device", device_config, "Device configuration string." )->capture_default_str();
 
   CLI11_PARSE( app, argc, argv );
@@ -106,41 +122,37 @@ int main( int argc, char** argv )
 
   // fixed options
   // boundary element attributes of mortar surface, the z = 1 plane of the first block
-  std::set<int> mortar_attrs( { 4 } );
+  std::set<int> mortar_attrs( { 5 } );
   // boundary element attributes of nonmortar surface, the z = 0.99 plane of the second block
-  std::set<int> nonmortar_attrs( { 5 } );
-  // boundary element attributes of x-fixed surfaces (at x = 0)
-  std::vector<std::set<int>> fixed_attrs( 3 );
-  fixed_attrs[0] = { 1 };
-  // boundary element attributes of y-fixed surfaces (at y = 0)
-  fixed_attrs[1] = { 2 };
-  // boundary element attributes of z-fixed surfaces (3: surface at z = 0, 6: surface at z = 1.99)
-  fixed_attrs[2] = { 3, 6 };
+  std::set<int> nonmortar_attrs( { 3 } );
+  // boundary element attributes of x-fixed surfaces (left side)
+  auto xfixed_attrs = std::set<int>( { 4 } );
+  // boundary element attributes of y-fixed surfaces (bottom of bottom square, top of top square)
+  auto yfixed_attrs = std::set<int>( { 1 } );
 
   // create an axom timer to give wall times for each step
   axom::utilities::Timer timer{ false };
 
   timer.start();
-  // build mesh of 2 cubes
+  // build mesh of 2 squares
   int nel_per_dir = std::pow( 2, ref_levels );
-  auto elem_type = mfem::Element::HEXAHEDRON;
+
   // clang-format off
   mfem::ParMesh mesh = shared::ParMeshBuilder(MPI_COMM_WORLD, shared::MeshBuilder::Unify({
-    shared::MeshBuilder::CubeMesh(nel_per_dir, nel_per_dir, nel_per_dir, elem_type)
-      .updateBdrAttrib(3, 7)
-      .updateBdrAttrib(1, 3)
-      .updateBdrAttrib(4, 7)
-      .updateBdrAttrib(5, 1)
-      .updateBdrAttrib(6, 4),
-    shared::MeshBuilder::CubeMesh(nel_per_dir, nel_per_dir, nel_per_dir, elem_type)
-      .translate({0.0, 0.0, 0.99})
-      .updateBdrAttrib(1, 8)
-      .updateBdrAttrib(3, 7)
-      .updateBdrAttrib(4, 7)
-      .updateBdrAttrib(5, 1)
-      .updateBdrAttrib(8, 5)
+    shared::MeshBuilder::SquareMesh(nel_per_dir, nel_per_dir) // Bottom mesh [0,1]x[0,1]
+      .updateBdrAttrib(1, 1) // Bottom (Fixed Y)
+      .updateBdrAttrib(2, 2) // Right
+      .updateBdrAttrib(3, 3) // Top (NonMortar)
+      .updateBdrAttrib(4, 4), // Left (Fixed X)
+    shared::MeshBuilder::SquareMesh(nel_per_dir, nel_per_dir) // Top mesh [0,1]x[0,1]
+      .translate({0.0, 0.99}) // Shift up to [0,1]x[0.99, 1.99]. Overlap 0.01.
+      .updateBdrAttrib(1, 5) // Bottom (Mortar)
+      .updateBdrAttrib(2, 2) // Right
+      .updateBdrAttrib(3, 1) // Top (Fixed Y)
+      .updateBdrAttrib(4, 4) // Left (Fixed X)
   }));
   // clang-format on
+
   timer.stop();
   SLIC_INFO_ROOT( axom::fmt::format( "Time to create parallel mesh: {0:f}ms", timer.elapsedTimeInMilliSec() ) );
 
@@ -175,28 +187,24 @@ int main( int argc, char** argv )
   timer.start();
   mfem::Array<int> ess_tdof_list;
   {
-    // First, build an array of "markers" (i.e. booleans) to denote which vdofs are in the list.
-    mfem::Array<int> ess_vdof_marker( fespace.GetVSize() );
-    ess_vdof_marker = 0;
-    for ( int d = 0; d < 3; ++d ) {
-      // convert boundary attributes into markers for active attributes on the dimension d
-      mfem::Array<int> ess_bdr( mesh.bdr_attributes.Max() );
-      ess_bdr = 0;
-      for ( auto xfixed_attr : fixed_attrs[d] ) {
-        ess_bdr[xfixed_attr - 1] = 1;
-      }
-      mfem::Array<int> new_ess_vdof_marker;
-      // Find all vdofs with the given boundary marker
-      fespace.GetEssentialVDofs( ess_bdr, new_ess_vdof_marker, d );
-      // Compute union of existing marked vdofs with vdofs marked on dimension d
-      for ( int j = 0; j < new_ess_vdof_marker.Size(); ++j ) {
-        ess_vdof_marker[j] = ess_vdof_marker[j] || new_ess_vdof_marker[j];
-      }
+    mfem::Array<int> ess_vdof_marker;
+    mfem::Array<int> ess_bdr( mesh.bdr_attributes.Max() );
+    ess_bdr = 0;
+    for ( auto xfixed_attr : xfixed_attrs ) {
+      if ( xfixed_attr <= ess_bdr.Size() ) ess_bdr[xfixed_attr - 1] = 1;
+    }
+    fespace.GetEssentialVDofs( ess_bdr, ess_vdof_marker, 0 );
+    mfem::Array<int> new_ess_vdof_marker;
+    ess_bdr = 0;
+    for ( auto yfixed_attr : yfixed_attrs ) {
+      if ( yfixed_attr <= ess_bdr.Size() ) ess_bdr[yfixed_attr - 1] = 1;
+    }
+    fespace.GetEssentialVDofs( ess_bdr, new_ess_vdof_marker, 1 );
+    for ( int i{ 0 }; i < ess_vdof_marker.Size(); ++i ) {
+      ess_vdof_marker[i] = ess_vdof_marker[i] || new_ess_vdof_marker[i];
     }
-    // Convert the vdofs to tdofs to remove duplicate values over ranks
     mfem::Array<int> ess_tdof_marker;
     fespace.GetRestrictionMatrix()->BooleanMult( ess_vdof_marker, ess_tdof_marker );
-    // Convert the tdof marker array to a tdof list
     mfem::FiniteElementSpace::MarkerToList( ess_tdof_marker, ess_tdof_list );
   }
   timer.stop();
@@ -254,6 +262,7 @@ int main( int argc, char** argv )
 
   // Add contact stiffness to elasticity stiffness
   auto A_total = std::unique_ptr<mfem::HypreParMatrix>( mfem::Add( 1.0, *A_elasticity, 1.0, *A_contact ) );
+  A_total->EliminateRowsCols( ess_tdof_list );
 
   timer.stop();
   SLIC_INFO_ROOT(
@@ -268,6 +277,66 @@ int main( int argc, char** argv )
   mfem::Vector f_contact( fespace.GetTrueVSize() );
   f_contact = 0.0;
   tribol::getMfemResponse( coupling_scheme_id, f_contact );
+  f_contact.Neg();
+  for ( int i{ 0 }; i < ess_tdof_list.Size(); ++i ) {
+    f_contact( ess_tdof_list[i] ) = 0.0;
+  }
+
+  if ( debug ) {
+    int my_rank;
+    MPI_Comm_rank( MPI_COMM_WORLD, &my_rank );
+    int num_ranks;
+    MPI_Comm_size( MPI_COMM_WORLD, &num_ranks );
+    int dim = mesh.SpaceDimension();
+    int ndofs = fespace.GetNDofs();
+
+    // Prolong contact force to grid function space
+    mfem::Vector f_contact_nodes( fespace.GetVSize() );
+    fespace.GetProlongationMatrix()->Mult( f_contact, f_contact_nodes );
+
+    for ( int r = 0; r < num_ranks; ++r ) {
+      if ( my_rank == r ) {
+        std::cout << "Rank " << my_rank << " Coordinates:" << std::endl;
+        for ( int i = 0; i < ndofs; ++i ) {
+          std::cout << "node " << i << ": (";
+          for ( int d = 0; d < dim; ++d ) {
+            std::cout << coords( fespace.DofToVDof( i, d ) ) << ( d < dim - 1 ? ", " : "" );
+          }
+          std::cout << ")" << std::endl;
+        }
+
+        std::cout << "Rank " << my_rank << " Contact Forces:" << std::endl;
+        for ( int i = 0; i < ndofs; ++i ) {
+          std::cout << "node " << i << ": (";
+          for ( int d = 0; d < dim; ++d ) {
+            std::cout << f_contact_nodes( fespace.DofToVDof( i, d ) ) << ( d < dim - 1 ? ", " : "" );
+          }
+          std::cout << ")" << std::endl;
+        }
+        mfem::SparseMatrix sm_ela;
+        A_elasticity->MergeDiagAndOffd( sm_ela );
+        mfem::DenseMatrix dm_ela;
+        sm_ela.ToDenseMatrix( dm_ela );
+        std::cout << "Rank " << my_rank << " Elasticity Stiffness:" << std::endl;
+        dm_ela.Print( std::cout );
+
+        mfem::SparseMatrix sm_con;
+        A_contact->MergeDiagAndOffd( sm_con );
+        mfem::DenseMatrix dm_con;
+        sm_con.ToDenseMatrix( dm_con );
+        std::cout << "Rank " << my_rank << " Contact Stiffness:" << std::endl;
+        dm_con.Print( std::cout );
+
+        mfem::SparseMatrix sm_tot;
+        A_total->MergeDiagAndOffd( sm_tot );
+        mfem::DenseMatrix dm_tot;
+        sm_tot.ToDenseMatrix( dm_tot );
+        std::cout << "Rank " << my_rank << " Total Stiffness:" << std::endl;
+        dm_tot.Print( std::cout );
+      }
+      MPI_Barrier( MPI_COMM_WORLD );
+    }
+  }
 
   // Create a solution vector storing displacement
   mfem::Vector X( fespace.GetTrueVSize() );

From 136b8c4bf2aebf812340d77c4415e5892c6c012c Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Thu, 5 Feb 2026 22:57:14 -0800
Subject: [PATCH 18/56] get rid of mpi wait for debugging

---
 src/examples/mfem_mortar_energy_patch.cpp | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/src/examples/mfem_mortar_energy_patch.cpp b/src/examples/mfem_mortar_energy_patch.cpp
index 8b6de2e6..70529b6a 100644
--- a/src/examples/mfem_mortar_energy_patch.cpp
+++ b/src/examples/mfem_mortar_energy_patch.cpp
@@ -60,19 +60,6 @@ int main( int argc, char** argv )
   int rank;
   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
 
-  // // Only make Rank 0 wait (or whichever rank you want to debug)
-  // if ( rank == 0 ) {
-  //   volatile int debug_wait = 1;
-  //   printf( "Rank %d is ready to attach. PID: %d\n", rank, getpid() );
-  //   fflush( stdout );
-
-  //   while ( debug_wait ) {
-  //     sleep( 1 );  // Sleep to avoid burning 100% CPU
-  //   }
-  // }
-
-  // MPI_Barrier( MPI_COMM_WORLD );  // Keep other ranks from running ahead
-
 #ifdef TRIBOL_USE_UMPIRE
   umpire::ResourceManager::getInstance();  // initialize umpire's ResouceManager
 #endif

From a9ca63c6fef31c97f2120ad85d7f945a7e7341fe Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Thu, 5 Feb 2026 23:23:59 -0800
Subject: [PATCH 19/56] test cleanup

---
 src/tests/tribol_mfem_mortar_energy.cpp | 54 ++++++-------------------
 1 file changed, 13 insertions(+), 41 deletions(-)

diff --git a/src/tests/tribol_mfem_mortar_energy.cpp b/src/tests/tribol_mfem_mortar_energy.cpp
index 7024aedd..0caabe10 100644
--- a/src/tests/tribol_mfem_mortar_energy.cpp
+++ b/src/tests/tribol_mfem_mortar_energy.cpp
@@ -35,7 +35,7 @@
  * @brief This tests the Tribol MFEM interface running a contact patch test using ENERGY_MORTAR.
  *
  */
-class MfemMortarEnergyTest : public testing::TestWithParam<std::tuple<int, mfem::Element::Type>> {
+class MfemMortarEnergyTest : public testing::TestWithParam<std::tuple<int>> {
  protected:
   tribol::RealT max_disp_;
   void SetUp() override
@@ -53,8 +53,8 @@ class MfemMortarEnergyTest : public testing::TestWithParam<std::tuple<int, mfem:
     auto nonmortar_attrs = std::set<int>( { 3 } );
     // boundary element attributes of x-fixed surfaces (left side)
     auto xfixed_attrs = std::set<int>( { 4 } );
-    // boundary element attributes of y-fixed surfaces (bottom of bottom square)
-    auto yfixed_attrs = std::set<int>( { 1 } );
+    // boundary element attributes of y-fixed surfaces (bottom of bottom square, top of top square)
+    auto yfixed_attrs = std::set<int>( { 1, 6 } );
 
     // build mesh of 2 squares
     int nel_per_dir = std::pow( 2, ref_levels );
@@ -70,7 +70,7 @@ class MfemMortarEnergyTest : public testing::TestWithParam<std::tuple<int, mfem:
         .translate({0.0, 0.99}) // Shift up to [0,1]x[0.99, 1.99]. Overlap 0.01.
         .updateBdrAttrib(1, 5) // Bottom (Mortar)
         .updateBdrAttrib(2, 2) // Right
-        .updateBdrAttrib(3, 6) // Top
+        .updateBdrAttrib(3, 6) // Top (Fixed Y)
         .updateBdrAttrib(4, 4) // Left (Fixed X)
     }));
     // clang-format on
@@ -136,7 +136,7 @@ class MfemMortarEnergyTest : public testing::TestWithParam<std::tuple<int, mfem:
     tribol::setLagrangeMultiplierOptions( coupling_scheme_id, tribol::ImplicitEvalMode::MORTAR_RESIDUAL_JACOBIAN );
 
     // Set Penalty options
-    tribol::setMfemKinematicConstantPenalty( coupling_scheme_id, 50.0, 50.0 );
+    tribol::setMfemKinematicConstantPenalty( coupling_scheme_id, 1000.0, 1000.0 );
 
     coords.ReadWrite();
     // update tribol (compute contact contribution to force and stiffness)
@@ -151,19 +151,14 @@ class MfemMortarEnergyTest : public testing::TestWithParam<std::tuple<int, mfem:
     mfem::Vector f_contact( par_fe_space.GetTrueVSize() );
     f_contact = 0.0;
     tribol::getMfemResponse( coupling_scheme_id, f_contact );
+    f_contact.Neg();
+    for ( int i{ 0 }; i < ess_tdof_list.Size(); ++i ) {
+      f_contact( ess_tdof_list[i] ) = 0.0;
+    }
 
     // Add contact stiffness to elasticity stiffness
-    // mfem::Add(1.0, *A, 1.0, *A_cont) returns a new HypreParMatrix
     auto A_total = std::unique_ptr<mfem::HypreParMatrix>( mfem::Add( 1.0, *A, 1.0, *A_cont ) );
-
-    // Create RHS.
-    // We want to solve (K_elast + K_contact) * du = f_contact + (f_ext=0)
-    // f_contact returned by Tribol is usually the force exerted by contact on the nodes.
-    // Ideally, Residual R = F_int(u) - F_ext - F_contact.
-    // Here we assume linear: K_elast * u - F_contact = 0 ?
-    // If we start at u=0, F_int=0.
-    // If we have overlap, F_contact is repulsive (pushing nodes apart).
-    // So K * du = F_contact.
+    A_total->EliminateRowsCols( ess_tdof_list );
 
     // Solve for X (displacement)
     mfem::Vector X( par_fe_space.GetTrueVSize() );
@@ -182,9 +177,6 @@ class MfemMortarEnergyTest : public testing::TestWithParam<std::tuple<int, mfem:
       auto& P = *par_fe_space.GetProlongationMatrix();
       P.Mult( X, displacement );
     }
-    // Note: tribol_mfem_mortar_lm.cpp does `displacement.Neg()` because LM solution X_blk usually gives displacement
-    // correction. Here, if F_contact pushes apart, X should separate the meshes. Top mesh moves up, Bottom mesh moves
-    // down. Top mesh (y~1.0) -> +dy Bottom mesh (y~1.0) -> -dy
 
     // We can check max displacement magnitude.
     auto local_max = displacement.Max();
@@ -195,35 +187,15 @@ class MfemMortarEnergyTest : public testing::TestWithParam<std::tuple<int, mfem:
 
 TEST_P( MfemMortarEnergyTest, check_mortar_displacement )
 {
-  // Expected displacement:
-  // Overlap is 0.01.
-  // Two blocks of equal stiffness.
-  // They should push apart to resolve overlap?
-  // If penalty is high enough, they separate by ~0.005 each.
-  // If penalty is comparable to modulus (50.0 vs 50.0), it might be soft.
-  // With k=50, E=50...
-  // Force ~ k * delta.
-  // Displacement ~ F / k_elast.
-  // This is a coupled system.
-  // Let's just check that max_disp_ is positive and roughly correct order of magnitude.
-  // In LM test (overlap 0.01?), result was 0.005.
-  // Here we use penalty. If k=infinite, it should be 0.005.
-  // With k=50, it will be less than 0.005 (remaining overlap).
-
-  // Actually, tribol_mfem_mortar_lm.cpp test uses overlap 0.01 (0.99 vs 1.0).
-  // Displacement result is 0.005.
-  // Here we use same parameters.
-
-  // We expect some displacement.
+  // Penalty enforcement with nonlinear contact enforcement. Let's just check that max_disp_ is positive and roughly
+  // correct order of magnitude.
   EXPECT_GT( max_disp_, 0.0 );
   EXPECT_LT( max_disp_, 0.01 );
 
   MPI_Barrier( MPI_COMM_WORLD );
 }
 
-INSTANTIATE_TEST_SUITE_P( tribol, MfemMortarEnergyTest,
-                          testing::Values( std::make_tuple( 2, mfem::Element::Type::QUADRILATERAL ),
-                                           std::make_tuple( 2, mfem::Element::Type::TRIANGLE ) ) );
+INSTANTIATE_TEST_SUITE_P( tribol, MfemMortarEnergyTest, testing::Values( std::make_tuple( 2 ) ) );
 
 //------------------------------------------------------------------------------
 #include "axom/slic/core/SimpleLogger.hpp"

From c86c39df001e2903e1bb63ccb1db6a194d250fa1 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Thu, 5 Feb 2026 23:24:12 -0800
Subject: [PATCH 20/56] remove unneeded calls

---
 src/tribol/mesh/CouplingScheme.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/tribol/mesh/CouplingScheme.cpp b/src/tribol/mesh/CouplingScheme.cpp
index a29da30d..66d18b37 100644
--- a/src/tribol/mesh/CouplingScheme.cpp
+++ b/src/tribol/mesh/CouplingScheme.cpp
@@ -1023,10 +1023,8 @@ void CouplingScheme::performBinning()
 int CouplingScheme::apply( int cycle, RealT t, RealT& dt )
 {
   if ( m_formulation_impl ) {
-    // performBinning();
     if ( m_interface_pairs.size() > 0 ) {
       m_formulation_impl->setInterfacePairs( std::move( m_interface_pairs ), 0 );
-      m_formulation_impl->updateIntegrationRule();
     }
     m_formulation_impl->updateNodalGaps();
     m_formulation_impl->updateNodalForces();

From 4899cbc367ae6a1da39d4fed3f7b828a5a9035e2 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Thu, 5 Feb 2026 23:25:15 -0800
Subject: [PATCH 21/56] cleanup to work with changes to method

---
 src/tribol/mesh/MfemData.hpp                  |  7 ++++++
 .../physics/ContactFormulationFactory.cpp     | 25 +++++++------------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/tribol/mesh/MfemData.hpp b/src/tribol/mesh/MfemData.hpp
index 17798cce..5152ed08 100644
--- a/src/tribol/mesh/MfemData.hpp
+++ b/src/tribol/mesh/MfemData.hpp
@@ -1529,6 +1529,13 @@ class MfemSubmeshData {
    */
   const mfem::GridFunction& GetRedecompGap() const { return redecomp_gap_; }
 
+  /**
+   * @brief Get the gap grid function on the redecomp mesh
+   *
+   * @return const mfem::GridFunction&
+   */
+  mfem::GridFunction& GetRedecompGap() { return redecomp_gap_; }
+
   /**
    * @brief Get the gap vector on the parent-linked boundary submesh
    *
diff --git a/src/tribol/physics/ContactFormulationFactory.cpp b/src/tribol/physics/ContactFormulationFactory.cpp
index 889c1ce7..23a91125 100644
--- a/src/tribol/physics/ContactFormulationFactory.cpp
+++ b/src/tribol/physics/ContactFormulationFactory.cpp
@@ -19,32 +19,25 @@ std::unique_ptr<ContactFormulation> createContactFormulation( CouplingScheme* cs
   if ( cs->getContactMethod() == ENERGY_MORTAR ) {
     // Default parameters for now, or extract from CouplingScheme if available
     double k = 1.0;
-    double delta = cs->getParameters().binning_proximity_scale; 
+    double delta = 0.01;
     int N = 3;
 
 #ifdef BUILD_REDECOMP
     if ( cs->hasMfemData() ) {
-        // Attempt to get penalty from MfemMeshData if available
-        auto* k_ptr = cs->getMfemMeshData()->GetMesh1KinematicConstantPenalty();
-        if ( k_ptr ) {
-            k = *k_ptr;
-        }
+      // Attempt to get penalty from MfemMeshData if available
+      auto* k_ptr = cs->getMfemMeshData()->GetMesh1KinematicConstantPenalty();
+      if ( k_ptr ) {
+        k = *k_ptr;
+      }
     }
 
-    SLIC_ERROR_ROOT_IF( !cs->hasMfemData(), "ENERGY_MORTAR requires MFEM mesh data." );
     SLIC_ERROR_ROOT_IF( !cs->hasMfemSubmeshData(), "ENERGY_MORTAR requires MFEM submesh data." );
     SLIC_ERROR_ROOT_IF( !cs->hasMfemJacobianData(), "ENERGY_MORTAR requires MFEM Jacobian data." );
 
-    return std::make_unique<NewMethodAdapter>(
-        *cs->getMfemMeshData(),
-        *cs->getMfemSubmeshData(),
-        *cs->getMfemJacobianData(),
-        cs->getMesh1(),
-        cs->getMesh2(),
-        k, delta, N
-    );
+    return std::make_unique<NewMethodAdapter>( *cs->getMfemSubmeshData(), *cs->getMfemJacobianData(), cs->getMesh1(),
+                                               cs->getMesh2(), k, delta, N );
 #else
-    SLIC_ERROR_ROOT("ENERGY_MORTAR requires BUILD_REDECOMP");
+    SLIC_ERROR_ROOT( "ENERGY_MORTAR requires BUILD_REDECOMP" );
     return nullptr;
 #endif
   }

From 608d107f94a29f4adbd6b6920be5748a4c22023b Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Thu, 5 Feb 2026 23:26:01 -0800
Subject: [PATCH 22/56] various bugfixes and cleanup

---
 src/tribol/physics/NewMethodAdapter.cpp | 110 ++++++++++++++++--------
 src/tribol/physics/NewMethodAdapter.hpp |   5 +-
 2 files changed, 75 insertions(+), 40 deletions(-)

diff --git a/src/tribol/physics/NewMethodAdapter.cpp b/src/tribol/physics/NewMethodAdapter.cpp
index 849cb8ca..b753e6b2 100644
--- a/src/tribol/physics/NewMethodAdapter.cpp
+++ b/src/tribol/physics/NewMethodAdapter.cpp
@@ -7,10 +7,17 @@
 
 namespace tribol {
 
-NewMethodAdapter::NewMethodAdapter( MfemMeshData& mfem_data, MfemSubmeshData& submesh_data, MfemJacobianData& jac_data,
-                                    MeshData& mesh1, MeshData& mesh2, double k, double delta, int N )
-    : mfem_data_( mfem_data ), submesh_data_( submesh_data ), jac_data_( jac_data ), mesh1_( mesh1 ), mesh2_( mesh2 )
+NewMethodAdapter::NewMethodAdapter( MfemSubmeshData& submesh_data, MfemJacobianData& jac_data, MeshData& mesh1,
+                                    MeshData& mesh2, double k, double delta, int N )
+    // NOTE: mesh1 maps to mesh2_ and mesh2 maps to mesh1_. This is to keep consistent with mesh1_ being non-mortar and
+    // mesh2_ being mortar as is typical in the literature, but different from Tribol convention.
+    : submesh_data_( submesh_data ), jac_data_( jac_data ), mesh1_( mesh2 ), mesh2_( mesh1 )
 {
+  if ( mesh1.numberOfNodes() > 0 && mesh2.numberOfNodes() > 0 ) {
+    SLIC_ERROR_ROOT_IF( mesh1.spatialDimension() != 2 || mesh2.spatialDimension() != 2,
+                        "ENERGY_MORTAR requires 2D meshes." );
+  }
+
   params_.k = k;
   params_.del = delta;
   params_.N = N;
@@ -33,7 +40,7 @@ void NewMethodAdapter::updateNodalGaps()
   // NOTE: user should have called updateMfemParallelDecomposition() with updated coords before calling this
 
   // Tribol level data structures for storing gap, area, and derivatives
-  auto redecomp_gap = submesh_data_.GetRedecompGap();
+  auto& redecomp_gap = submesh_data_.GetRedecompGap();
   mfem::GridFunction redecomp_area( redecomp_gap.FESpace() );
   redecomp_area = 0.0;
   MethodData dg_tilde_dx;
@@ -41,19 +48,23 @@ void NewMethodAdapter::updateNodalGaps()
                              pairs_.size() );
   MethodData dA_dx;
   dA_dx.reserveBlockJ( { BlockSpace::NONMORTAR, BlockSpace::MORTAR, BlockSpace::LAGRANGE_MULTIPLIER }, pairs_.size() );
+  const int node_idx[8] = { 0, 2, 1, 3, 4, 6, 5, 7 };
 
   auto mesh1_view = mesh1_.getView();
   auto mesh2_view = mesh2_.getView();
 
   // Compute local contributions
   for ( const auto& pair : pairs_ ) {
-    const auto elem1 = static_cast<int>( pair.m_element_id1 );
-    const auto elem2 = static_cast<int>( pair.m_element_id2 );
+    // These need to be flipped, since the pairs are determined with element 1 associated with mesh 1, and we flipped
+    // the mesh numbers to be consistent with the literature and since the underlying method integrates on element 1
+    InterfacePair flipped_pair( pair.m_element_id2, pair.m_element_id1 );
+    const auto elem1 = static_cast<int>( flipped_pair.m_element_id1 );
+    const auto elem2 = static_cast<int>( flipped_pair.m_element_id2 );
 
     double g_tilde_elem[2];
     double A_elem[2];
 
-    evaluator_->gtilde_and_area( pair, mesh1_view, mesh2_view, g_tilde_elem, A_elem );
+    evaluator_->gtilde_and_area( flipped_pair, mesh1_view, mesh2_view, g_tilde_elem, A_elem );
 
     if ( A_elem[0] <= 0.0 && A_elem[1] <= 0.0 ) {
       continue;
@@ -71,46 +82,46 @@ void NewMethodAdapter::updateNodalGaps()
     // compute g_tilde first derivative
     double dg_dx_node1[8];
     double dg_dx_node2[8];
-    evaluator_->grad_gtilde( pair, mesh1_view, mesh2_view, dg_dx_node1, dg_dx_node2 );
+    evaluator_->grad_gtilde( flipped_pair, mesh1_view, mesh2_view, dg_dx_node1, dg_dx_node2 );
     StackArray<DeviceArray2D<RealT>, 9> dg_tilde_dx_block( 3 );
     dg_tilde_dx_block( 2, 0 ) = DeviceArray2D<RealT>( 2, 4 );
     dg_tilde_dx_block( 2, 0 ).fill( 0.0 );
     dg_tilde_dx_block( 2, 1 ) = DeviceArray2D<RealT>( 2, 4 );
     dg_tilde_dx_block( 2, 1 ).fill( 0.0 );
     for ( int i{ 0 }; i < 4; ++i ) {
-      dg_tilde_dx_block( 2, 0 )( 0, i ) = dg_dx_node1[i];
+      dg_tilde_dx_block( 2, 0 )( 0, i ) = dg_dx_node1[node_idx[i]];
     }
     for ( int i{ 0 }; i < 4; ++i ) {
-      dg_tilde_dx_block( 2, 0 )( 1, i ) = dg_dx_node2[i];
+      dg_tilde_dx_block( 2, 0 )( 1, i ) = dg_dx_node2[node_idx[i]];
     }
     for ( int i{ 0 }; i < 4; ++i ) {
-      dg_tilde_dx_block( 2, 1 )( 0, i ) = dg_dx_node1[i + 4];
+      dg_tilde_dx_block( 2, 1 )( 0, i ) = dg_dx_node1[node_idx[i + 4]];
     }
     for ( int i{ 0 }; i < 4; ++i ) {
-      dg_tilde_dx_block( 2, 1 )( 1, i ) = dg_dx_node2[i + 4];
+      dg_tilde_dx_block( 2, 1 )( 1, i ) = dg_dx_node2[node_idx[i + 4]];
     }
     dg_tilde_dx.storeElemBlockJ( { elem1, elem2, elem1 }, dg_tilde_dx_block );
 
     // compute area first derivative
     double dA_dx_node1[8];
     double dA_dx_node2[8];
-    evaluator_->grad_trib_area( pair, mesh1_view, mesh2_view, dA_dx_node1, dA_dx_node2 );
+    evaluator_->grad_trib_area( flipped_pair, mesh1_view, mesh2_view, dA_dx_node1, dA_dx_node2 );
     StackArray<DeviceArray2D<RealT>, 9> dA_dx_block( 3 );
     dA_dx_block( 2, 0 ) = DeviceArray2D<RealT>( 2, 4 );
     dA_dx_block( 2, 0 ).fill( 0.0 );
     dA_dx_block( 2, 1 ) = DeviceArray2D<RealT>( 2, 4 );
     dA_dx_block( 2, 1 ).fill( 0.0 );
     for ( int i{ 0 }; i < 4; ++i ) {
-      dA_dx_block( 2, 0 )( 0, i ) = dA_dx_node1[i];
+      dA_dx_block( 2, 0 )( 0, i ) = dA_dx_node1[node_idx[i]];
     }
     for ( int i{ 0 }; i < 4; ++i ) {
-      dA_dx_block( 2, 0 )( 1, i ) = dA_dx_node2[i];
+      dA_dx_block( 2, 0 )( 1, i ) = dA_dx_node2[node_idx[i]];
     }
     for ( int i{ 0 }; i < 4; ++i ) {
-      dA_dx_block( 2, 1 )( 0, i ) = dA_dx_node1[i + 4];
+      dA_dx_block( 2, 1 )( 0, i ) = dA_dx_node1[node_idx[i + 4]];
     }
     for ( int i{ 0 }; i < 4; ++i ) {
-      dA_dx_block( 2, 1 )( 1, i ) = dA_dx_node2[i + 4];
+      dA_dx_block( 2, 1 )( 1, i ) = dA_dx_node2[node_idx[i + 4]];
     }
     dA_dx.storeElemBlockJ( { elem1, elem2, elem1 }, dA_dx_block );
   }
@@ -188,31 +199,52 @@ void NewMethodAdapter::updateNodalForces()
 
   MethodData df_dx_data;
   df_dx_data.reserveBlockJ( { BlockSpace::NONMORTAR, BlockSpace::MORTAR }, pairs_.size() );
+  const int node_idx[8] = { 0, 2, 1, 3, 4, 6, 5, 7 };
+
+  mfem::GridFunction redecomp_pressure( submesh_data_.GetRedecompGap() );
+  mfem::ParGridFunction submesh_pressure(
+      const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_pressure.SetFromTrueDofs( pressure_vec_ );
+  submesh_data_.GetPressureTransfer().SubmeshToRedecomp( submesh_pressure, redecomp_pressure );
+
+  mfem::GridFunction redecomp_g_tilde( submesh_data_.GetRedecompGap() );
+  mfem::ParGridFunction submesh_g_tilde(
+      const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_g_tilde.SetFromTrueDofs( g_tilde_vec_ );
+  submesh_data_.GetPressureTransfer().SubmeshToRedecomp( submesh_g_tilde, redecomp_g_tilde );
+
+  mfem::GridFunction redecomp_A( submesh_data_.GetRedecompGap() );
+  mfem::ParGridFunction submesh_A( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_A.SetFromTrueDofs( A_vec_ );
+  submesh_data_.GetPressureTransfer().SubmeshToRedecomp( submesh_A, redecomp_A );
 
   auto mesh1_view = mesh1_.getView();
   auto mesh2_view = mesh2_.getView();
 
   // get pairwise action of second derivatives of gaps and pressure for stiffness contribution
   for ( auto& pair : pairs_ ) {
-    const auto elem1 = static_cast<int>( pair.m_element_id1 );
+    // These need to be flipped, since the pairs are determined with element 1 associated with mesh 1, and we flipped
+    // the mesh numbers to be consistent with the literature and since the underlying method integrates on element 1
+    InterfacePair flipped_pair( pair.m_element_id2, pair.m_element_id1 );
+    const auto elem1 = static_cast<int>( flipped_pair.m_element_id1 );
     const auto node11 = mesh1_view.getConnectivity()( elem1, 0 );
     const auto node12 = mesh1_view.getConnectivity()( elem1, 1 );
-    const auto elem2 = static_cast<int>( pair.m_element_id2 );
+    const auto elem2 = static_cast<int>( flipped_pair.m_element_id2 );
 
-    const RealT pressure1 = 2.0 * pressure_vec_[node11];
-    const RealT pressure2 = 2.0 * pressure_vec_[node12];
+    const RealT pressure1 = 2.0 * redecomp_pressure[node11];
+    const RealT pressure2 = 2.0 * redecomp_pressure[node12];
 
     if ( pressure1 == 0.0 && pressure2 == 0.0 ) {
       continue;
     }
 
-    const RealT g_p_ainv1 = -g_tilde_vec_[node11] * pressure_vec_[node11] / A_vec_[node11];
-    const RealT g_p_ainv2 = -g_tilde_vec_[node12] * pressure_vec_[node12] / A_vec_[node12];
+    const RealT g_p_ainv1 = -redecomp_g_tilde[node11] * redecomp_pressure[node11] / redecomp_A[node11];
+    const RealT g_p_ainv2 = -redecomp_g_tilde[node12] * redecomp_pressure[node12] / redecomp_A[node12];
 
     double df_dx_node1[64];
     double df_dx_node2[64];
     // ordering: [dg/(dx0dx0) dg/(dy0dx0) dg/(dx1dx0) ...]
-    evaluator_->d2_g2tilde( pair, mesh1_view, mesh2_view, df_dx_node1, df_dx_node2 );
+    evaluator_->d2_g2tilde( flipped_pair, mesh1_view, mesh2_view, df_dx_node1, df_dx_node2 );
     StackArray<DeviceArray2D<RealT>, 9> df_dx_block( 2 );
     df_dx_block( 0, 0 ) = DeviceArray2D<RealT>( 4, 4 );
     df_dx_block( 0, 0 ).fill( 0.0 );
@@ -224,47 +256,51 @@ void NewMethodAdapter::updateNodalForces()
     df_dx_block( 1, 1 ).fill( 0.0 );
     for ( int j{ 0 }; j < 4; ++j ) {
       for ( int i{ 0 }; i < 4; ++i ) {
-        df_dx_block( 0, 0 )( i, j ) = pressure1 * df_dx_node1[i + j * 8] + pressure2 * df_dx_node2[i + j * 8];
+        df_dx_block( 0, 0 )( i, j ) = pressure1 * df_dx_node1[node_idx[i] + node_idx[j] * 8] +
+                                      pressure2 * df_dx_node2[node_idx[i] + node_idx[j] * 8];
       }
     }
     for ( int j{ 0 }; j < 4; ++j ) {
       for ( int i{ 0 }; i < 4; ++i ) {
-        df_dx_block( 0, 1 )( i, j ) =
-            pressure1 * df_dx_node1[i + ( j + 4 ) * 8] + pressure2 * df_dx_node2[i + ( j + 4 ) * 8];
+        df_dx_block( 0, 1 )( i, j ) = pressure1 * df_dx_node1[node_idx[i] + node_idx[j + 4] * 8] +
+                                      pressure2 * df_dx_node2[node_idx[i] + node_idx[j + 4] * 8];
       }
     }
     for ( int j{ 0 }; j < 4; ++j ) {
       for ( int i{ 0 }; i < 4; ++i ) {
-        df_dx_block( 1, 0 )( i, j ) = pressure1 * df_dx_node1[i + 4 + j * 8] + pressure2 * df_dx_node2[i + 4 + j * 8];
+        df_dx_block( 1, 0 )( i, j ) = pressure1 * df_dx_node1[node_idx[i + 4] + node_idx[j] * 8] +
+                                      pressure2 * df_dx_node2[node_idx[i + 4] + node_idx[j] * 8];
       }
     }
     for ( int j{ 0 }; j < 4; ++j ) {
       for ( int i{ 0 }; i < 4; ++i ) {
-        df_dx_block( 1, 1 )( i, j ) =
-            pressure1 * df_dx_node1[i + 4 + ( j + 4 ) * 8] + pressure2 * df_dx_node2[i + 4 + ( j + 4 ) * 8];
+        df_dx_block( 1, 1 )( i, j ) = pressure1 * df_dx_node1[node_idx[i + 4] + node_idx[j + 4] * 8] +
+                                      pressure2 * df_dx_node2[node_idx[i + 4] + node_idx[j + 4] * 8];
       }
     }
-    evaluator_->compute_d2A_d2u( pair, mesh1_view, mesh2_view, df_dx_node1, df_dx_node2 );
+    evaluator_->compute_d2A_d2u( flipped_pair, mesh1_view, mesh2_view, df_dx_node1, df_dx_node2 );
     for ( int j{ 0 }; j < 4; ++j ) {
       for ( int i{ 0 }; i < 4; ++i ) {
-        df_dx_block( 0, 0 )( i, j ) += g_p_ainv1 * df_dx_node1[i + j * 8] + g_p_ainv2 * df_dx_node2[i + j * 8];
+        df_dx_block( 0, 0 )( i, j ) += g_p_ainv1 * df_dx_node1[node_idx[i] + node_idx[j] * 8] +
+                                       g_p_ainv2 * df_dx_node2[node_idx[i] + node_idx[j] * 8];
       }
     }
     for ( int j{ 0 }; j < 4; ++j ) {
       for ( int i{ 0 }; i < 4; ++i ) {
-        df_dx_block( 0, 1 )( i, j ) +=
-            g_p_ainv1 * df_dx_node1[i + ( j + 4 ) * 8] + g_p_ainv2 * df_dx_node2[i + ( j + 4 ) * 8];
+        df_dx_block( 0, 1 )( i, j ) += g_p_ainv1 * df_dx_node1[node_idx[i] + node_idx[j + 4] * 8] +
+                                       g_p_ainv2 * df_dx_node2[node_idx[i] + node_idx[j + 4] * 8];
       }
     }
     for ( int j{ 0 }; j < 4; ++j ) {
       for ( int i{ 0 }; i < 4; ++i ) {
-        df_dx_block( 1, 0 )( i, j ) += g_p_ainv1 * df_dx_node1[i + 4 + j * 8] + g_p_ainv2 * df_dx_node2[i + 4 + j * 8];
+        df_dx_block( 1, 0 )( i, j ) += g_p_ainv1 * df_dx_node1[node_idx[i + 4] + node_idx[j] * 8] +
+                                       g_p_ainv2 * df_dx_node2[node_idx[i + 4] + node_idx[j] * 8];
       }
     }
     for ( int j{ 0 }; j < 4; ++j ) {
       for ( int i{ 0 }; i < 4; ++i ) {
-        df_dx_block( 1, 1 )( i, j ) +=
-            g_p_ainv1 * df_dx_node1[i + 4 + ( j + 4 ) * 8] + g_p_ainv2 * df_dx_node2[i + 4 + ( j + 4 ) * 8];
+        df_dx_block( 1, 1 )( i, j ) += g_p_ainv1 * df_dx_node1[node_idx[i + 4] + node_idx[j + 4] * 8] +
+                                       g_p_ainv2 * df_dx_node2[node_idx[i + 4] + node_idx[j + 4] * 8];
       }
     }
     df_dx_data.storeElemBlockJ( { elem1, elem2 }, df_dx_block );
diff --git a/src/tribol/physics/NewMethodAdapter.hpp b/src/tribol/physics/NewMethodAdapter.hpp
index 2ea368d4..e22b6ccb 100644
--- a/src/tribol/physics/NewMethodAdapter.hpp
+++ b/src/tribol/physics/NewMethodAdapter.hpp
@@ -27,8 +27,8 @@ class NewMethodAdapter : public ContactFormulation {
    * @param delta Smoothing length
    * @param N Quadrature order
    */
-  NewMethodAdapter( MfemMeshData& mfem_data, MfemSubmeshData& submesh_data, MfemJacobianData& jac_data, MeshData& mesh1,
-                    MeshData& mesh2, double k, double delta, int N );
+  NewMethodAdapter( MfemSubmeshData& submesh_data, MfemJacobianData& jac_data, MeshData& mesh1, MeshData& mesh2,
+                    double k, double delta, int N );
 
   virtual ~NewMethodAdapter() = default;
 
@@ -61,7 +61,6 @@ class NewMethodAdapter : public ContactFormulation {
  private:
   // --- Member Variables ---
 
-  MfemMeshData& mfem_data_;
   MfemSubmeshData& submesh_data_;
   MfemJacobianData& jac_data_;
   MeshData& mesh1_;

From b3bf93a0a8b4c1f97561aac2ee02392c221d1d1f Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Fri, 6 Feb 2026 09:00:54 -0800
Subject: [PATCH 23/56] add options output for debug

---
 src/examples/mfem_mortar_energy_patch.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/examples/mfem_mortar_energy_patch.cpp b/src/examples/mfem_mortar_energy_patch.cpp
index 70529b6a..5f105df8 100644
--- a/src/examples/mfem_mortar_energy_patch.cpp
+++ b/src/examples/mfem_mortar_energy_patch.cpp
@@ -99,7 +99,8 @@ int main( int argc, char** argv )
   SLIC_INFO_ROOT( axom::fmt::format( "refine:   {0}", ref_levels ) );
   SLIC_INFO_ROOT( axom::fmt::format( "lambda:   {0}", lambda ) );
   SLIC_INFO_ROOT( axom::fmt::format( "mu:       {0}", mu ) );
-  SLIC_INFO_ROOT( axom::fmt::format( "penalty:  {0}\n", penalty ) );
+  SLIC_INFO_ROOT( axom::fmt::format( "penalty:  {0}", penalty ) );
+  SLIC_INFO_ROOT( axom::fmt::format( "debug:    {0}\n", debug ) );
 
   // configure the devices available for MFEM kernel launches
   mfem::Device device( device_config );

From 61c3ac3f9964a3173d3df0b218b1fdf129307496 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Tue, 10 Feb 2026 10:06:04 -0800
Subject: [PATCH 24/56] initial implementation

---
 src/examples/CMakeLists.txt                   |   1 +
 src/examples/jacobian_transfer_comparison.cpp | 201 +++++++++++++++
 src/redecomp/transfer/MatrixTransfer.cpp      | 169 ++++++++++++
 src/redecomp/transfer/MatrixTransfer.hpp      |  16 ++
 src/tests/CMakeLists.txt                      |   1 +
 src/tests/tribol_mfem_jacobian.cpp            | 244 ++++++++++++++++++
 src/tribol/mesh/MfemData.cpp                  | 104 ++++++++
 src/tribol/mesh/MfemData.hpp                  |  20 ++
 8 files changed, 756 insertions(+)
 create mode 100644 src/examples/jacobian_transfer_comparison.cpp
 create mode 100644 src/tests/tribol_mfem_jacobian.cpp

diff --git a/src/examples/CMakeLists.txt b/src/examples/CMakeLists.txt
index a8cbbbe2..f44ae861 100644
--- a/src/examples/CMakeLists.txt
+++ b/src/examples/CMakeLists.txt
@@ -101,6 +101,7 @@ if ( BUILD_REDECOMP )
   set( examples
       mfem_mortar_lm_patch.cpp
       mfem_common_plane.cpp
+      jacobian_transfer_comparison.cpp
       )
 
   foreach( example ${examples} )
diff --git a/src/examples/jacobian_transfer_comparison.cpp b/src/examples/jacobian_transfer_comparison.cpp
new file mode 100644
index 00000000..b960c5a7
--- /dev/null
+++ b/src/examples/jacobian_transfer_comparison.cpp
@@ -0,0 +1,201 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#include <set>
+#include <vector>
+#include <chrono>
+
+#include "mfem.hpp"
+#include "axom/core.hpp"
+#include "axom/slic.hpp"
+#include "shared/mesh/MeshBuilder.hpp"
+#include "tribol/config.hpp"
+#include "tribol/common/Parameters.hpp"
+#include "tribol/interface/tribol.hpp"
+#include "tribol/interface/mfem_tribol.hpp"
+#include "tribol/mesh/CouplingScheme.hpp"
+#include "tribol/mesh/MfemData.hpp"
+#include "tribol/mesh/CouplingScheme.hpp"
+
+#ifdef TRIBOL_USE_UMPIRE
+#include "umpire/ResourceManager.hpp"
+#endif
+
+int main( int argc, char** argv )
+{
+  MPI_Init( &argc, &argv );
+  int rank, n_ranks;
+  MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+  MPI_Comm_size( MPI_COMM_WORLD, &n_ranks );
+
+#ifdef TRIBOL_USE_UMPIRE
+  umpire::ResourceManager::getInstance();
+#endif
+
+  axom::slic::SimpleLogger logger;
+  axom::slic::setIsRoot( rank == 0 );
+
+  int ref_levels = 2;
+  axom::CLI::App app{ "jacobian_transfer_comparison" };
+  app.add_option( "-r,--refine", ref_levels, "Number of times to refine the mesh uniformly." )->capture_default_str();
+  CLI11_PARSE( app, argc, argv );
+
+  // 1. Setup mesh (2 blocks)
+  int nel_per_dir = std::pow( 2, ref_levels );
+  auto elem_type = mfem::Element::HEXAHEDRON;
+  auto mortar_attrs = std::set<int>( { 4 } );
+  auto nonmortar_attrs = std::set<int>( { 5 } );
+
+  mfem::ParMesh mesh = shared::ParMeshBuilder(MPI_COMM_WORLD, shared::MeshBuilder::Unify({
+    shared::MeshBuilder::CubeMesh(nel_per_dir, nel_per_dir, nel_per_dir, elem_type)
+      .updateBdrAttrib(4, 7)
+      .updateBdrAttrib(6, 4),
+    shared::MeshBuilder::CubeMesh(nel_per_dir, nel_per_dir, nel_per_dir, elem_type)
+      .translate({0.0, 0.0, 0.99})
+      .updateBdrAttrib(8, 5)
+  }));
+
+  int dim = mesh.SpaceDimension();
+  mfem::H1_FECollection fec( 1, dim );
+  mfem::ParFiniteElementSpace fespace( &mesh, &fec, dim );
+  mfem::ParGridFunction coords( &fespace );
+  mesh.GetNodes( coords );
+
+  // 2. Register Tribol Coupling Scheme
+  int cs_id = 0;
+  tribol::registerMfemCouplingScheme( cs_id, 0, 1, mesh, coords, mortar_attrs,
+                                      nonmortar_attrs, tribol::SURFACE_TO_SURFACE, tribol::NO_SLIDING,
+                                      tribol::SINGLE_MORTAR, tribol::FRICTIONLESS, tribol::LAGRANGE_MULTIPLIER,
+                                      tribol::BINNING_GRID );
+  tribol::setMPIComm( cs_id, MPI_COMM_WORLD );
+  tribol::updateMfemParallelDecomposition();
+
+  // 3. Setup MfemJacobianData
+  auto& cs_manager = tribol::CouplingSchemeManager::getInstance();
+  auto* cs = cs_manager.findData( cs_id );
+  if (!cs->hasMfemJacobianData()) {
+      cs->setMfemJacobianData( std::make_unique<tribol::MfemJacobianData>( 
+          *cs->getMfemMeshData(), *cs->getMfemSubmeshData(), cs->getContactMethod() ) );
+  }
+  auto* jac_data = cs->getMfemJacobianData();
+  jac_data->UpdateJacobianXfer();
+
+  // 4. Synthesize Jacobian data for comparison
+  // We'll create some dummy element matrices for elements on Mesh 1 (Mortar)
+  int ne1 = cs->getMfemMeshData()->GetMesh1NE();
+  int num_nodes_per_elem = 4; // Quad faces
+  int num_dofs_per_elem = num_nodes_per_elem * dim;
+  
+  // New format: ComputedElementData
+  tribol::ComputedElementData new_data;
+  new_data.row_space = tribol::BlockSpace::MORTAR;
+  new_data.col_space = tribol::BlockSpace::MORTAR;
+  new_data.jacobian_data.resize(ne1 * num_dofs_per_elem * num_dofs_per_elem);
+  new_data.jacobian_offsets.resize(ne1);
+  new_data.row_elem_ids.resize(ne1);
+  new_data.col_elem_ids.resize(ne1);
+
+  // Prepare MethodData for OLD path comparison
+  if ( cs->getMethodData() == nullptr ) {
+      cs->allocateMethodData();
+  }
+  auto* method_data = cs->getMethodData();
+  SLIC_ASSERT( method_data != nullptr );
+
+  tribol::ArrayT<tribol::BlockSpace> spaces({tribol::BlockSpace::MORTAR});
+  method_data->reserveBlockJ(std::move(spaces), ne1);
+  
+  for (int e = 0; e < ne1; ++e) {
+      new_data.row_elem_ids[e] = e;
+      new_data.col_elem_ids[e] = e;
+      new_data.jacobian_offsets[e] = e * num_dofs_per_elem * num_dofs_per_elem;
+      
+      tribol::StackArray<tribol::DeviceArray2D<tribol::RealT>, 9> blockJ;
+      blockJ[0] = tribol::DeviceArray2D<tribol::RealT>(num_dofs_per_elem, num_dofs_per_elem);
+      
+      for (int i = 0; i < num_dofs_per_elem; ++i) {
+          for (int j = 0; j < num_dofs_per_elem; ++j) {
+              double val = static_cast<double>(e + i + j);
+              blockJ[0](i, j) = val;
+              new_data.jacobian_data[new_data.jacobian_offsets[e] + i + j * num_dofs_per_elem] = val;
+          }
+      }
+      tribol::ArrayT<int> ids({e});
+      method_data->storeElemBlockJ(std::move(ids), blockJ);
+  }
+
+  // 5. Time and assemble using Old Method
+  auto start_old = std::chrono::high_resolution_clock::now();
+  
+  // Simulated OLD path logic
+  auto xfer = cs->getMfemJacobianData()->GetMfemBlockJacobian(
+      *method_data, {{0, tribol::BlockSpace::MORTAR}}, {{0, tribol::BlockSpace::MORTAR}}
+  );
+  
+  auto end_old = std::chrono::high_resolution_clock::now();
+  
+  // 6. Time and assemble using New Method
+  // We must call this on ALL ranks collectively.
+  std::vector<tribol::ComputedElementData> contribs_vec;
+  if (ne1 > 0) {
+      contribs_vec.push_back(std::move(new_data));
+  }
+  
+  auto start_new = std::chrono::high_resolution_clock::now();
+  auto par_J_new = jac_data->GetMfemJacobian(contribs_vec);
+  auto end_new = std::chrono::high_resolution_clock::now();
+
+  // 7. Verify match
+  // We need to extract the block from BlockOperator if we used GetMfemBlockJacobian
+  auto* old_hypre = dynamic_cast<mfem::HypreParMatrix*>( &xfer->GetBlock( 0, 0 ) );
+  auto* new_hypre = &par_J_new.get();
+
+  // Check difference: A_old - A_new
+  tribol::ParSparseMat diff_psm = tribol::ParSparseMatView( old_hypre ) - tribol::ParSparseMatView( new_hypre );
+  
+  // Verify match by checking NNZ of difference
+  // Since we subtracted, exact match means NNZ should be 0 (or values very small)
+  // mfem::HypreParMatrix doesn't have an easy "max norm" without converting to SparseMatrix
+  // but we can check NNZ. Note that operator- might keep zero entries.
+  // A better way is to check the data array of the resulting matrix.
+  
+  double max_err = 0.0;
+  HYPRE_ParCSRMatrix diff_csr = diff_psm.get();
+  hypre_ParCSRMatrix* diff_parcsr = (hypre_ParCSRMatrix*)diff_csr;
+  hypre_CSRMatrix* diag = hypre_ParCSRMatrixDiag(diff_parcsr);
+  double* data = hypre_CSRMatrixData(diag);
+  int num_nonzeros = hypre_CSRMatrixNumNonzeros(diag);
+  for(int i=0; i<num_nonzeros; ++i) {
+      max_err = std::max(max_err, std::abs(data[i]));
+  }
+  // Also check off-diagonal block
+  hypre_CSRMatrix* offd = hypre_ParCSRMatrixOffd(diff_parcsr);
+  data = hypre_CSRMatrixData(offd);
+  num_nonzeros = hypre_CSRMatrixNumNonzeros(offd);
+  for(int i=0; i<num_nonzeros; ++i) {
+      max_err = std::max(max_err, std::abs(data[i]));
+  }
+  
+  double global_max_err = 0.0;
+  MPI_Allreduce(&max_err, &global_max_err, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+
+  if ( rank == 0 ) {
+    std::cout << "Old method time: " << std::chrono::duration_cast<std::chrono::microseconds>( end_old - start_old ).count()
+              << " us" << std::endl;
+    std::cout << "New method time: " << std::chrono::duration_cast<std::chrono::microseconds>( end_new - start_new ).count()
+              << " us" << std::endl;
+    std::cout << "Matrix difference max err: " << global_max_err << std::endl;
+  }
+
+  if ( global_max_err > 1e-12 ) {
+    SLIC_ERROR_ROOT( "Matrices do not match!" );
+  } else {
+    SLIC_INFO_ROOT( "Verification successful: Matrices match." );
+  }
+
+  tribol::finalize();
+  MPI_Finalize();
+  return 0;
+}
diff --git a/src/redecomp/transfer/MatrixTransfer.cpp b/src/redecomp/transfer/MatrixTransfer.cpp
index 5dbc25eb..9ccd8f89 100644
--- a/src/redecomp/transfer/MatrixTransfer.cpp
+++ b/src/redecomp/transfer/MatrixTransfer.cpp
@@ -142,6 +142,175 @@ mfem::SparseMatrix MatrixTransfer::TransferToParallelSparse( const axom::Array<i
   return parentJ;
 }
 
+std::unique_ptr<mfem::HypreParMatrix> MatrixTransfer::TransferToParallelSparse(
+    const axom::Array<int>& test_elem_idx, const axom::Array<int>& trial_elem_idx,
+    const axom::Array<double>& src_elem_mat_data, const axom::Array<int>& src_elem_mat_offsets ) const
+{
+  // verify inputs
+  SLIC_ERROR_IF( test_elem_idx.size() != trial_elem_idx.size() || test_elem_idx.size() != src_elem_mat_offsets.size(),
+                 "Element index arrays and element Jacobian offsets array must be the same size." );
+  for ( int i{ 0 }; i < test_elem_idx.size(); ++i ) {
+    auto test_e = test_elem_idx[i];
+    auto trial_e = trial_elem_idx[i];
+
+    SLIC_ERROR_IF( test_e < 0, "Invalid primary index value." );
+    SLIC_ERROR_IF( trial_e < 0, "Invalid secondary index value." );
+
+    auto n_test_elem_vdofs = redecomp_test_fes_.GetFE( test_e )->GetDof() * redecomp_test_fes_.GetVDim();
+    auto n_trial_elem_vdofs = redecomp_trial_fes_.GetFE( trial_e )->GetDof() * redecomp_trial_fes_.GetVDim();
+    auto expected_size = n_test_elem_vdofs * n_trial_elem_vdofs;
+
+    // Check that we don't go out of bounds of the data array
+    SLIC_ERROR_IF( src_elem_mat_offsets[i] < 0 || src_elem_mat_offsets[i] + expected_size > src_elem_mat_data.size(),
+                   "Matrix offset and size exceeds data array bounds." );
+  }
+
+  auto test_redecomp = dynamic_cast<const RedecompMesh*>( redecomp_test_fes_.GetMesh() );
+  auto trial_redecomp = dynamic_cast<const RedecompMesh*>( redecomp_trial_fes_.GetMesh() );
+
+  // List of entries in src_elem_mat that belong on each parent test space rank.
+  // This is needed so we know which rank to send entries in src_elem_mat to.
+  auto send_array_ids = buildSendArrayIDs( test_elem_idx );
+  // Number of matrix entries to be sent to each parent test space rank.  This
+  // is used to size the array of element matrix values to be sent to other ranks.
+  auto send_num_mat_entries = buildSendNumMatEntries( test_elem_idx, trial_elem_idx );
+  // Number of test and trial vdofs received from test space redecomp ranks.
+  // This is used to determine the beginning and end of each element matrix
+  // received as a single array of values and the beginning and end of vdof indices
+  // received as a single array of values.
+  auto recv_mat_sizes = buildRecvMatSizes( test_elem_idx, trial_elem_idx );
+  // List of test element offsets received from test space redecomp ranks.  The
+  // offset is used with the parent to redecomp map (and the redecomp rank
+  // received from) to determine the parent element ID.  Parent element ID is
+  // used to determine the test vldofs of the element matrix entries received
+  // from redecomp ranks.
+  auto recv_test_elem_offsets = buildRecvTestElemOffsets( *test_redecomp, test_elem_idx );
+  // List of trial element global vdofs corresponding to the element matrix
+  // entries received from redecomp ranks.  The second column of recv_mat_sizes
+  // determines the offset for each trial element.
+  auto recv_trial_elem_dofs = buildRecvTrialElemDofs( *trial_redecomp, test_elem_idx, trial_elem_idx );
+
+  // Intermediate storage for triplets
+  struct Triplet {
+    int row;
+    HYPRE_BigInt col;
+    double val;
+  };
+  std::vector<Triplet> triplets;
+  // Estimate capacity to reduce reallocations
+  int total_recv_entries = 0;
+  for ( int src = 0; src < getMPIUtility().NRanks(); ++src ) {
+    for ( int e = 0; e < recv_mat_sizes[src].shape()[0]; ++e ) {
+      total_recv_entries += recv_mat_sizes[src]( e, 0 ) * recv_mat_sizes[src]( e, 1 );
+    }
+  }
+  triplets.reserve( total_recv_entries );
+
+  // aggregate dense matrix values, send and assemble
+  getMPIUtility().SendRecvEach(
+      type<axom::Array<double>>(),
+      [&send_array_ids, &send_num_mat_entries, &src_elem_mat_data, &src_elem_mat_offsets, &test_elem_idx,
+       &trial_elem_idx, this]( axom::IndexType dst ) {
+        auto send_vals = axom::Array<double>( 0, send_num_mat_entries[dst] );
+
+        for ( auto src_array_idx : send_array_ids[dst] ) {
+          // Calculate size from FE spaces
+          auto test_e = test_elem_idx[src_array_idx];
+          auto trial_e = trial_elem_idx[src_array_idx];
+          auto n_test_elem_vdofs = redecomp_test_fes_.GetFE( test_e )->GetDof() * redecomp_test_fes_.GetVDim();
+          auto n_trial_elem_vdofs = redecomp_trial_fes_.GetFE( trial_e )->GetDof() * redecomp_trial_fes_.GetVDim();
+          auto size = n_test_elem_vdofs * n_trial_elem_vdofs;
+
+          send_vals.append( axom::ArrayView<const double>( &src_elem_mat_data[src_elem_mat_offsets[src_array_idx]],
+                                                           size ) );
+        }
+
+        return send_vals;
+      },
+      [this, test_redecomp, &triplets, &recv_mat_sizes, &recv_trial_elem_dofs, &recv_test_elem_offsets](
+          axom::Array<double>&& send_vals, axom::IndexType src ) {
+        if ( recv_trial_elem_dofs[src].empty() ) {
+          return;
+        }
+        auto trial_dof_ct = 0;
+        auto dof_ct = 0;
+        // element loop
+        for ( int e{ 0 }; e < recv_test_elem_offsets[src].size(); ++e ) {
+          auto test_elem_id = test_redecomp->getParentToRedecompElems().first[src][recv_test_elem_offsets[src][e]];
+          auto test_elem_dofs = mfem::Array<int>();
+          parent_test_fes_.GetElementVDofs( test_elem_id, test_elem_dofs );
+          auto trial_elem_dofs =
+              mfem::Array<HYPRE_BigInt>( &recv_trial_elem_dofs[src][trial_dof_ct], recv_mat_sizes[src]( e, 1 ) );
+          // trial loop
+          for ( int j{ 0 }; j < trial_elem_dofs.Size(); ++j ) {
+            // test loop
+            for ( int i{ 0 }; i < test_elem_dofs.Size(); ++i ) {
+              triplets.push_back( { test_elem_dofs[i], trial_elem_dofs[j],
+                                    // send_vals comes from mfem::SparseMatrix (column major)
+                                    send_vals[dof_ct + i + j * test_elem_dofs.Size()] } );
+            }
+          }
+          trial_dof_ct += recv_mat_sizes[src]( e, 1 );
+          dof_ct += recv_mat_sizes[src]( e, 0 ) * recv_mat_sizes[src]( e, 1 );
+        }
+      } );
+
+  // Sort triplets by row then column
+  std::sort( triplets.begin(), triplets.end(), []( const Triplet& a, const Triplet& b ) {
+    if ( a.row != b.row ) return a.row < b.row;
+    return a.col < b.col;
+  } );
+
+  // Count non-zeros and merge duplicates
+  int num_unique_nonzeros = 0;
+  if ( !triplets.empty() ) {
+    num_unique_nonzeros = 1;
+    for ( size_t i = 1; i < triplets.size(); ++i ) {
+      if ( triplets[i].row != triplets[i - 1].row || triplets[i].col != triplets[i - 1].col ) {
+        num_unique_nonzeros++;
+      }
+    }
+  }
+
+  auto num_rows = parent_test_fes_.GetVSize();
+  int* I_ptr = new int[num_rows + 1];
+  HYPRE_BigInt* J_ptr = new HYPRE_BigInt[num_unique_nonzeros];
+  double* data_ptr = new double[num_unique_nonzeros];
+
+  // Initialize I_ptr with zeros
+  for ( int i = 0; i <= num_rows; ++i ) {
+    I_ptr[i] = 0;
+  }
+
+  if ( !triplets.empty() ) {
+    int unique_idx = 0;
+    J_ptr[0] = triplets[0].col;
+    data_ptr[0] = triplets[0].val;
+    I_ptr[triplets[0].row + 1]++;
+
+    for ( size_t i = 1; i < triplets.size(); ++i ) {
+      if ( triplets[i].row == triplets[i - 1].row && triplets[i].col == triplets[i - 1].col ) {
+        data_ptr[unique_idx] += triplets[i].val;
+      } else {
+        unique_idx++;
+        J_ptr[unique_idx] = triplets[i].col;
+        data_ptr[unique_idx] = triplets[i].val;
+        I_ptr[triplets[i].row + 1]++;
+      }
+    }
+  }
+
+  // Transform I_ptr from counts to offsets (prefix sum)
+  for ( int i = 0; i < num_rows; ++i ) {
+    I_ptr[i + 1] += I_ptr[i];
+  }
+
+  // Construct rectangular HypreParMatrix
+  return std::make_unique<mfem::HypreParMatrix>(
+      getMPIUtility().MPIComm(), num_rows, parent_test_fes_.GlobalVSize(), parent_trial_fes_.GlobalVSize(), I_ptr,
+      J_ptr, data_ptr, parent_test_fes_.GetDofOffsets(), parent_trial_fes_.GetDofOffsets() );
+}
+
 std::unique_ptr<mfem::HypreParMatrix> MatrixTransfer::ConvertToHypreParMatrix( mfem::SparseMatrix& sparse,
                                                                                bool parallel_assemble ) const
 {
diff --git a/src/redecomp/transfer/MatrixTransfer.hpp b/src/redecomp/transfer/MatrixTransfer.hpp
index 484699ce..b34c56fe 100644
--- a/src/redecomp/transfer/MatrixTransfer.hpp
+++ b/src/redecomp/transfer/MatrixTransfer.hpp
@@ -82,6 +82,22 @@ class MatrixTransfer {
                                                const axom::Array<int>& trial_elem_idx,
                                                const axom::Array<mfem::DenseMatrix>& src_elem_mat ) const;
 
+  /**
+   * @brief Transfers element RedecompMesh matrices to parent mfem::ParMesh
+   *
+   * @param test_elem_idx List of element IDs on the redecomp test space
+   * @param trial_elem_idx List of element IDs on the redecomp trial space
+   * @param src_elem_mat_data Flattened array of element-level dense matrices from the redecomp mesh
+   * @param src_elem_mat_offsets Offsets into src_elem_mat_data for each element
+   * @return mfem::HypreParMatrix on the parent mesh (ldofs on the rows, global
+   * ldofs on the columns) in rectangular format
+   *
+   * @note This method constructs the parallel matrix directly, bypassing mfem::SparseMatrix.
+   */
+  std::unique_ptr<mfem::HypreParMatrix> TransferToParallelSparse(
+      const axom::Array<int>& test_elem_idx, const axom::Array<int>& trial_elem_idx,
+      const axom::Array<double>& src_elem_mat_data, const axom::Array<int>& src_elem_mat_offsets ) const;
+
   /**
    * @brief Converts SparseMatrix from TransferToParallelSparse to HypreParMatrix
    *
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 1c1f8777..ba4649ce 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -156,6 +156,7 @@ if ( BUILD_REDECOMP AND TRIBOL_USE_MPI )
   set( combined_tests
       tribol_mfem_common_plane.cpp
       tribol_mfem_mortar_lm.cpp
+      tribol_mfem_jacobian.cpp
       tribol_proximity_check.cpp
       tribol_redecomp_tol.cpp
       )
diff --git a/src/tests/tribol_mfem_jacobian.cpp b/src/tests/tribol_mfem_jacobian.cpp
new file mode 100644
index 00000000..0cc81c75
--- /dev/null
+++ b/src/tests/tribol_mfem_jacobian.cpp
@@ -0,0 +1,244 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#include <gtest/gtest.h>
+
+// MFEM includes
+#include "mfem.hpp"
+
+// Axom includes
+#include "axom/slic.hpp"
+
+// Shared includes
+#include "shared/mesh/MeshBuilder.hpp"
+
+// Tribol includes
+#include "tribol/config.hpp"
+#include "tribol/common/Parameters.hpp"
+#include "tribol/interface/tribol.hpp"
+#include "tribol/interface/mfem_tribol.hpp"
+#include "tribol/mesh/CouplingScheme.hpp"
+#include "tribol/mesh/MfemData.hpp"
+
+#ifdef TRIBOL_USE_UMPIRE
+#include "umpire/ResourceManager.hpp"
+#endif
+
+// Helper to access the CouplingScheme (which is usually hidden behind the interface)
+// We need to look up the scheme from the manager.
+// Since CouplingSchemeManager is a singleton/static internal, we rely on `tribol::getCouplingScheme` if available
+// or we might need to rely on the fact that we can't easily get the pointer via public API without include internal headers.
+// Actually, `tribol::getMfemBlockJacobian` implementation finds the scheme.
+// We can include `tribol/search/InterfacePairs.hpp` or similar if needed, but let's check if there is a way.
+// Ah, `tribol::CouplingSchemeManager` is in `tribol/coupling/CouplingSchemeManager.hpp`.
+
+#include "tribol/mesh/CouplingScheme.hpp"
+
+class MfemJacobianTest : public testing::Test {
+ protected:
+  void SetUp() override
+  {
+    // Silence Tribol output
+    // tribol::setLoggingLevel( tribol::LoggingLevel::ERROR );
+  }
+
+  void TearDown() override
+  {
+    // tribol::finalize() is called in main
+  }
+};
+
+TEST_F( MfemJacobianTest, direct_jacobian_assembly )
+{
+  int n_ranks;
+  MPI_Comm_size( MPI_COMM_WORLD, &n_ranks );
+
+  // 1. Setup simple mesh (2 cubes)
+  int ref_levels = 0;
+  int nel_per_dir = std::pow( 2, ref_levels );
+  
+  // Attributes:
+  // Mesh 1 contact surface: 4
+  // Mesh 2 contact surface: 5
+  auto mortar_attrs = std::set<int>( { 4 } );
+  auto nonmortar_attrs = std::set<int>( { 5 } );
+
+  // clang-format off
+  mfem::ParMesh mesh = shared::ParMeshBuilder(MPI_COMM_WORLD, shared::MeshBuilder::Unify({
+    shared::MeshBuilder::CubeMesh(nel_per_dir, nel_per_dir, nel_per_dir)
+      .updateBdrAttrib(3, 7)
+      .updateBdrAttrib(1, 3)
+      .updateBdrAttrib(4, 7) // Mortar
+      .updateBdrAttrib(5, 1)
+      .updateBdrAttrib(6, 4),
+    shared::MeshBuilder::CubeMesh(nel_per_dir, nel_per_dir, nel_per_dir)
+      .translate({0.0, 0.0, 0.99}) // Slightly overlapping or close
+      .updateBdrAttrib(1, 8)
+      .updateBdrAttrib(3, 7)
+      .updateBdrAttrib(4, 7)
+      .updateBdrAttrib(5, 1) // Nonmortar
+      .updateBdrAttrib(8, 5)
+  }));
+  // clang-format on
+
+  int dim = mesh.SpaceDimension();
+  int order = 1;
+  mfem::H1_FECollection fe_coll( order, dim );
+  mfem::ParFiniteElementSpace par_fe_space( &mesh, &fe_coll, dim );
+  mfem::ParGridFunction coords( &par_fe_space );
+  mesh.GetNodes( coords );
+
+  // 2. Register Coupling Scheme
+  int cs_id = 0;
+  int mesh1_id = 0;
+  int mesh2_id = 1;
+  tribol::registerMfemCouplingScheme( cs_id, mesh1_id, mesh2_id, mesh, coords, mortar_attrs,
+                                      nonmortar_attrs, tribol::SURFACE_TO_SURFACE, tribol::NO_SLIDING,
+                                      tribol::SINGLE_MORTAR, tribol::FRICTIONLESS, tribol::LAGRANGE_MULTIPLIER,
+                                      tribol::BINNING_GRID );
+
+  // 3. Update Decomp to build internal MfemData
+  tribol::updateMfemParallelDecomposition();
+
+  // 4. Access CouplingScheme and MfemJacobianData
+  auto& cs_manager = tribol::CouplingSchemeManager::getInstance();
+  auto* cs = cs_manager.findData( cs_id );
+  ASSERT_NE( cs, nullptr );
+
+  auto* jac_data = cs->getMfemJacobianData();
+  if ( jac_data == nullptr ) {
+    auto* mesh_data = cs->getMfemMeshData();
+    auto* submesh_data = cs->getMfemSubmeshData();
+    ASSERT_NE( mesh_data, nullptr );
+    ASSERT_NE( submesh_data, nullptr );
+    
+    cs->setMfemJacobianData( std::make_unique<tribol::MfemJacobianData>( 
+        *mesh_data, *submesh_data, cs->getContactMethod() ) );
+    jac_data = cs->getMfemJacobianData();
+  }
+  ASSERT_NE( jac_data, nullptr );
+
+  // We need to call UpdateJacobianXfer explicitly or ensure it's called. 
+  // It is usually called in `getMfemBlockJacobian`. Let's call it manually to be safe.
+  jac_data->UpdateJacobianXfer();
+
+  // 5. Synthesize ComputedElementData
+  // We'll add a contribution for the first element of Mesh 1 (Mortar) against itself (Mortar-Mortar block).
+  // BlockSpace::MORTAR -> BlockSpace::MORTAR
+  // In the implementation of GetMfemJacobian, MORTAR maps to index 0 (Displacement).
+  
+  std::vector<tribol::ComputedElementData> contributions;
+  
+  // Find a valid element ID on the contact surface (Tribol mesh)
+  // Since we have a simple cube surface, index 0 should be valid on at least one rank that owns the surface.
+  // We need to know if *this* rank owns any surface elements.
+  // The MfemMeshData has this info.
+  // But strictly, we can just try to add data for element 0. If this rank maps element 0 to something valid in redecomp, it works.
+  // MfemJacobianData::GetMfemJacobian logic maps Tribol ID -> Redecomp ID.
+  // We need `parent_data_.GetElemMap1()` to have entry for 0.
+  // Since we are running in parallel, element 0 of the *Tribol Mesh* (which is local to the rank?)
+  // Tribol meshes are rank-local surface meshes. So element 0 is valid if `GetMesh1NE() > 0`.
+
+  // Access MfemMeshData to check element counts
+  // We can't easily access MfemMeshData from MfemJacobianData public API, but we know usage:
+  // If we just use element 0, we must check if we have any elements.
+  // For this test, we can try to find a rank that has elements.
+  
+  // Note: We can't easily check `GetMesh1NE` because `MfemMeshData` is hidden in `MfemJacobianData`.
+  // However, we can construct the contribution anyway. If the element map doesn't contain the ID, it might crash or throw if we are not careful,
+  // but `GetElemMap1` is an array. Accessing index 0 is valid only if size > 0.
+  
+  // Let's protect with a check on the mesh attributes or just try-catch or ensure all ranks have elements?
+  // With 2 cubes and default partition, usually ranks have boundary elements.
+  // But to be safe, let's look at the method signatures again.
+  // `GetElemMap1` returns `Array1D<int>`.
+  
+  // Actually, we can just use `tribol::getMfemJacobianData` or similar? No.
+  
+  // Let's proceed assuming standard partition gives elements.
+  
+  int num_dofs_per_elem = 8 * dim; // Hex element, 8 nodes, 3 dims
+  int mat_size = num_dofs_per_elem * num_dofs_per_elem;
+  
+  tribol::ComputedElementData contrib;
+  contrib.row_space = tribol::BlockSpace::MORTAR;
+  contrib.col_space = tribol::BlockSpace::MORTAR;
+  
+  // We'll just add one element's contribution if possible.
+  // We need to know how many elements are on this rank's surface mesh.
+  // There isn't a direct Tribol API to query "number of surface elements on mesh 1 on this rank" easily exposed without MfemMeshData.
+  // BUT, we can try to guess. Or just send empty if we don't know.
+  // Wait, if we send invalid IDs, it will likely crash.
+  
+  // Let's assume we want to test the *mechanism*.
+  // We can use `cs->getMfemMeshData()->GetMesh1NE()`?
+  // `getMfemMeshData` is likely available on `CouplingScheme`.
+  // Let's check CouplingScheme.hpp content I read earlier? 
+  // I didn't read the whole file, but typically getters are there.
+  
+  // Let's try to access it. If not, we'll need another way.
+  // Assuming `getMfemMeshData()` exists and returns `MfemMeshData*`.
+  
+  auto* mesh_data = cs->getMfemMeshData(); // This might need verification
+  // Based on `getMfemJacobianData`, it's likely `getMfemMeshData` exists.
+  
+  if (mesh_data && mesh_data->GetMesh1NE() > 0) {
+      contrib.row_elem_ids.push_back(0);
+      contrib.col_elem_ids.push_back(0);
+      
+      contrib.jacobian_data.resize(mat_size);
+      for(int i=0; i<mat_size; ++i) contrib.jacobian_data[i] = 1.0; // Fill with 1.0
+      
+      contrib.jacobian_offsets.push_back(0);
+      
+      contributions.push_back(contrib);
+  }
+
+  // 6. Call the new method
+  auto ParJ = jac_data->GetMfemJacobian( contributions );
+
+  // 7. Verify
+  // If contributions were added, ParJ should have some non-zeros.
+  // We can check the Global Num Nonzeros or Norm.
+  
+  // Reduce to see if *any* rank added something.
+  int local_contrib = contributions.size();
+  int global_contrib = 0;
+  MPI_Allreduce(&local_contrib, &global_contrib, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+  
+  if (global_contrib > 0) {
+      // We expect some non-zeros
+      EXPECT_GT( ParJ->NNZ(), 0 );
+  } else {
+      // If no elements were found (unlikely with 2 cubes), this test is vacuous but passes
+      // But we should warn.
+      if (n_ranks == 1) {
+          // Serial run should definitely have elements
+          FAIL() << "No surface elements found on mesh 1 in serial run.";
+      }
+  }
+}
+
+int main( int argc, char* argv[] )
+{
+  int result = 0;
+
+  MPI_Init( &argc, &argv );
+
+  ::testing::InitGoogleTest( &argc, argv );
+
+#ifdef TRIBOL_USE_UMPIRE
+  umpire::ResourceManager::getInstance();
+#endif
+
+  axom::slic::SimpleLogger logger;
+
+  result = RUN_ALL_TESTS();
+
+  tribol::finalize();
+  MPI_Finalize();
+
+  return result;
+}
diff --git a/src/tribol/mesh/MfemData.cpp b/src/tribol/mesh/MfemData.cpp
index 9aa7dce1..902e8649 100644
--- a/src/tribol/mesh/MfemData.cpp
+++ b/src/tribol/mesh/MfemData.cpp
@@ -1084,6 +1084,110 @@ const MfemJacobianData::UpdateData& MfemJacobianData::GetUpdateData() const
   return *update_data_;
 }
 
+ParSparseMat MfemJacobianData::GetMfemJacobian( const std::vector<ComputedElementData>& contributions ) const
+{
+  std::unique_ptr<ParSparseMat> par_J;
+
+  // Maps BlockSpaces (MORTAR, NONMORTAR, LAGRANGE_MULTIPLIER) to a tribol element map
+  const std::vector<const Array1D<int>*> elem_map_by_space{ &parent_data_.GetElemMap1(), &parent_data_.GetElemMap2(),
+                                                            &parent_data_.GetElemMap2() };
+
+  auto comm = parent_data_.GetParentCoords().ParFESpace()->GetComm();
+
+  // Iterate over all possible blocks: (0,0), (0,1), (1,0), (1,1)
+  // 0: Displacement (MORTAR/NONMORTAR)
+  // 1: Pressure/Gap (LAGRANGE_MULTIPLIER)
+  for ( int r_blk = 0; r_blk < 2; ++r_blk ) {
+    for ( int c_blk = 0; c_blk < 2; ++c_blk ) {
+      
+      // Check if we have a transfer operator for this block
+      if ( GetUpdateData().submesh_redecomp_xfer_.shape()[0] <= r_blk ||
+           GetUpdateData().submesh_redecomp_xfer_.shape()[1] <= c_blk ||
+           !GetUpdateData().submesh_redecomp_xfer_( r_blk, c_blk ) ) {
+        continue;
+      }
+
+      axom::Array<int> row_redecomp_ids;
+      axom::Array<int> col_redecomp_ids;
+      axom::Array<double> jacobian_data;
+      axom::Array<int> jacobian_offsets;
+
+      // Aggregate data for this block pair
+      for ( const auto& contrib : contributions ) {
+        int contrib_r_blk = ( contrib.row_space == BlockSpace::LAGRANGE_MULTIPLIER ) ? 1 : 0;
+        int contrib_c_blk = ( contrib.col_space == BlockSpace::LAGRANGE_MULTIPLIER ) ? 1 : 0;
+
+        if ( contrib_r_blk == r_blk && contrib_c_blk == c_blk ) {
+          int current_offset = jacobian_data.size();
+          row_redecomp_ids.reserve( row_redecomp_ids.size() + contrib.row_elem_ids.size() );
+          for ( auto id : contrib.row_elem_ids ) {
+            row_redecomp_ids.push_back(
+                ( *elem_map_by_space[static_cast<size_t>( contrib.row_space )] )[static_cast<size_t>( id )] );
+          }
+          col_redecomp_ids.reserve( col_redecomp_ids.size() + contrib.col_elem_ids.size() );
+          for ( auto id : contrib.col_elem_ids ) {
+            col_redecomp_ids.push_back(
+                ( *elem_map_by_space[static_cast<size_t>( contrib.col_space )] )[static_cast<size_t>( id )] );
+          }
+          jacobian_data.append( axom::ArrayView<const double>( contrib.jacobian_data ) );
+          jacobian_offsets.reserve( jacobian_offsets.size() + contrib.jacobian_offsets.size() );
+          for ( auto offset : contrib.jacobian_offsets ) {
+            jacobian_offsets.push_back( current_offset + offset );
+          }
+        }
+      }
+
+      // Check globally if any rank has data for this block
+      int local_has_data = row_redecomp_ids.empty() ? 0 : 1;
+      int global_has_data = 0;
+      MPI_Allreduce( &local_has_data, &global_has_data, 1, MPI_INT, MPI_MAX, comm );
+
+      if ( global_has_data ) {
+        redecomp::MatrixTransfer* xfer = GetUpdateData().submesh_redecomp_xfer_( r_blk, c_blk ).get();
+        auto submesh_J_hypre = xfer->TransferToParallelSparse( row_redecomp_ids, col_redecomp_ids, jacobian_data,
+                                                               jacobian_offsets );
+
+        ParSparseMatView submesh_J_view( submesh_J_hypre.get() );
+        std::unique_ptr<ParSparseMat> contrib_J;
+
+        if ( r_blk == 0 && c_blk == 0 ) {
+          auto parent_J = submesh_J_view.RAP( *submesh_parent_vdof_xfer_ );
+          ParSparseMatView parent_P( parent_data_.GetParentCoords().ParFESpace()->Dof_TrueDof_Matrix() );
+          contrib_J = std::make_unique<ParSparseMat>( parent_J.RAP( parent_P ) );
+        } else if ( r_blk == 0 && c_blk == 1 ) {
+          auto parent_J = submesh_parent_vdof_xfer_->transpose() * submesh_J_hypre.get();
+          contrib_J = std::make_unique<ParSparseMat>(
+              ParSparseMat::RAP( parent_data_.GetParentCoords().ParFESpace()->Dof_TrueDof_Matrix(), parent_J,
+                                 submesh_data_.GetSubmeshFESpace().Dof_TrueDof_Matrix() ) );
+        } else if ( r_blk == 1 && c_blk == 0 ) {
+          auto parent_J = submesh_J_view * ( *submesh_parent_vdof_xfer_ );
+          ParSparseMatView submesh_P( submesh_data_.GetSubmeshFESpace().Dof_TrueDof_Matrix() );
+          ParSparseMatView parent_P( parent_data_.GetParentCoords().ParFESpace()->Dof_TrueDof_Matrix() );
+          contrib_J = std::make_unique<ParSparseMat>( ParSparseMat::RAP( submesh_P, parent_J, parent_P ) );
+        } else {
+          // (1, 1) block
+          ParSparseMatView submesh_P( submesh_data_.GetSubmeshFESpace().Dof_TrueDof_Matrix() );
+          contrib_J = std::make_unique<ParSparseMat>( submesh_J_view.RAP( submesh_P ) );
+        }
+
+        if ( !par_J ) {
+          par_J = std::move( contrib_J );
+        } else {
+          ( *par_J ) += *contrib_J;
+        }
+      }
+    }
+  }
+
+  if ( !par_J ) {
+    auto& fes = *parent_data_.GetParentCoords().ParFESpace();
+    return ParSparseMat::diagonalMatrix( TRIBOL_COMM_WORLD, fes.GetTrueVSize(), fes.GetTrueDofOffsets(), 0.0,
+                                         mfem::Array<int>(), true );
+  }
+
+  return std::move( *par_J );
+}
+
 }  // namespace tribol
 
 #endif /* BUILD_REDECOMP */
diff --git a/src/tribol/mesh/MfemData.hpp b/src/tribol/mesh/MfemData.hpp
index 17798cce..5f2f5e48 100644
--- a/src/tribol/mesh/MfemData.hpp
+++ b/src/tribol/mesh/MfemData.hpp
@@ -1631,6 +1631,18 @@ class MfemSubmeshData {
   std::unique_ptr<UpdateData> update_data_;
 };
 
+/**
+ * @brief Struct to hold computed element data for Jacobian assembly
+ */
+struct ComputedElementData {
+  BlockSpace row_space;              ///< Block space for row elements
+  BlockSpace col_space;              ///< Block space for column elements
+  axom::Array<int> row_elem_ids;     ///< Tribol element IDs for rows
+  axom::Array<int> col_elem_ids;     ///< Tribol element IDs for columns
+  axom::Array<double> jacobian_data; ///< Flattened Jacobian data
+  axom::Array<int> jacobian_offsets; ///< Offsets into data for each element
+};
+
 /**
  * @brief Simplifies transfer of Jacobian matrix data between MFEM and Tribol
  */
@@ -1665,6 +1677,14 @@ class MfemJacobianData {
       const MethodData& method_data, const std::vector<std::pair<int, BlockSpace>>& row_info,
       const std::vector<std::pair<int, BlockSpace>>& col_info ) const;
 
+  /**
+   * @brief Returns a Jacobian as a single ParSparseMat
+   *
+   * @param contributions List of element computed data chunks
+   * @return ParSparseMat
+   */
+  ParSparseMat GetMfemJacobian( const std::vector<ComputedElementData>& contributions ) const;
+
  private:
   /**
    * @brief Creates and stores data that changes when the redecomp mesh is

From 25818d8ad5996eb50bedd2c19beb4fdc9244ae9a Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Wed, 11 Feb 2026 15:18:23 -0800
Subject: [PATCH 25/56] switch to new ParSparseMat and ParVector

---
 src/tests/shared_par_sparse_mat.cpp       | 10 +--
 src/tribol/physics/ContactFormulation.hpp |  2 +
 src/tribol/physics/NewMethodAdapter.cpp   | 79 ++++++++++-------------
 src/tribol/physics/NewMethodAdapter.hpp   | 20 ++++--
 4 files changed, 54 insertions(+), 57 deletions(-)

diff --git a/src/tests/shared_par_sparse_mat.cpp b/src/tests/shared_par_sparse_mat.cpp
index d15d7a81..daebafda 100644
--- a/src/tests/shared_par_sparse_mat.cpp
+++ b/src/tests/shared_par_sparse_mat.cpp
@@ -404,12 +404,12 @@ TEST_F( ParSparseMatTest, DiagonalFromVector )
     diag_vals[i] = static_cast<double>( rank * 100 + i );
   }
 
-  tribol::ParSparseMat A =
-      tribol::ParSparseMat::diagonalMatrix( MPI_COMM_WORLD, size, row_starts.GetData(), diag_vals );
+  shared::ParSparseMat A =
+      shared::ParSparseMat::diagonalMatrix( MPI_COMM_WORLD, size, row_starts.GetData(), diag_vals );
 
-  mfem::Vector x( local_size ), y( local_size );
-  x = 1.0;
-  y = A * x;
+  shared::ParVector x( A.get(), 0 );
+  x.Fill( 1.0 );
+  auto y = A * x;
 
   for ( int i = 0; i < local_size; ++i ) {
     EXPECT_NEAR( y[i], static_cast<double>( rank * 100 + i ), 1e-12 );
diff --git a/src/tribol/physics/ContactFormulation.hpp b/src/tribol/physics/ContactFormulation.hpp
index a912b2fd..9319ae81 100644
--- a/src/tribol/physics/ContactFormulation.hpp
+++ b/src/tribol/physics/ContactFormulation.hpp
@@ -6,6 +6,8 @@
 #ifndef SRC_TRIBOL_PHYSICS_CONTACTFORMULATION_HPP_
 #define SRC_TRIBOL_PHYSICS_CONTACTFORMULATION_HPP_
 
+#include "tribol/config.hpp"
+
 #include "tribol/common/Parameters.hpp"
 #include "tribol/common/ArrayTypes.hpp"
 #include "tribol/mesh/InterfacePairs.hpp"
diff --git a/src/tribol/physics/NewMethodAdapter.cpp b/src/tribol/physics/NewMethodAdapter.cpp
index b753e6b2..ccff0b5a 100644
--- a/src/tribol/physics/NewMethodAdapter.cpp
+++ b/src/tribol/physics/NewMethodAdapter.cpp
@@ -131,26 +131,26 @@ void NewMethodAdapter::updateNodalGaps()
       const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
   submesh_data_.GetSubmeshGap( g_tilde_linear_form );
   auto& P_submesh = *submesh_data_.GetSubmeshFESpace().GetProlongationMatrix();
-  g_tilde_vec_ = mfem::HypreParVector( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
-  g_tilde_vec_ = 0.0;
-  P_submesh.MultTranspose( g_tilde_linear_form, g_tilde_vec_ );
+  g_tilde_vec_ = shared::ParVector( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  g_tilde_vec_.Fill( 0.0 );
+  P_submesh.MultTranspose( g_tilde_linear_form, g_tilde_vec_.get() );
 
   mfem::ParLinearForm A_linear_form( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
   submesh_data_.GetPressureTransfer().RedecompToSubmesh( redecomp_area, A_linear_form );
-  A_vec_ = mfem::HypreParVector( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
-  A_vec_ = 0.0;
-  P_submesh.MultTranspose( A_linear_form, A_vec_ );
+  A_vec_ = shared::ParVector( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  A_vec_.Fill( 0.0 );
+  P_submesh.MultTranspose( A_linear_form, A_vec_.get() );
 
   // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
   const std::vector<std::pair<int, BlockSpace>> row_info{ { 1, BlockSpace::LAGRANGE_MULTIPLIER } };
   const std::vector<std::pair<int, BlockSpace>> col_info{ { 0, BlockSpace::NONMORTAR }, { 0, BlockSpace::MORTAR } };
   auto dg_tilde_dx_block = jac_data_.GetMfemBlockJacobian( dg_tilde_dx, row_info, col_info );
   dg_tilde_dx_block->owns_blocks = false;
-  dg_tilde_dx_ = ParSparseMat( static_cast<mfem::HypreParMatrix*>( &dg_tilde_dx_block->GetBlock( 1, 0 ) ) );
+  dg_tilde_dx_ = shared::ParSparseMat( static_cast<mfem::HypreParMatrix*>( &dg_tilde_dx_block->GetBlock( 1, 0 ) ) );
 
   auto dA_dx_block = jac_data_.GetMfemBlockJacobian( dA_dx, row_info, col_info );
   dA_dx_block->owns_blocks = false;
-  dA_dx_ = ParSparseMat( static_cast<mfem::HypreParMatrix*>( &dA_dx_block->GetBlock( 1, 0 ) ) );
+  dA_dx_ = shared::ParSparseMat( static_cast<mfem::HypreParMatrix*>( &dA_dx_block->GetBlock( 1, 0 ) ) );
 }
 
 void NewMethodAdapter::updateNodalForces()
@@ -159,13 +159,9 @@ void NewMethodAdapter::updateNodalForces()
 
   // compute nodal pressures. these are used in the Hessian vector product below so we don't have to assemble a Hessian
   // NOTE: in general, pressure should likely be set by the host code
-  pressure_vec_.SetSize( g_tilde_vec_.Size() );
-  pressure_vec_ = 0.0;
-  for ( int i{ 0 }; i < pressure_vec_.Size(); ++i ) {
-    if ( A_vec_[i] > 1.0e-14 && g_tilde_vec_[i] <= 0.0 ) {
-      pressure_vec_[i] = params_.k * g_tilde_vec_[i] / A_vec_[i];
-    }
-  }
+  pressure_vec_ = ( params_.k * g_tilde_vec_ ).divideInPlace( A_vec_, area_tol_ );
+
+  // energy_ = pressure_vec_.dot( g_tilde_vec_ );
 
   energy_ = 0.0;
   for ( int i{ 0 }; i < pressure_vec_.Size(); ++i ) {
@@ -173,6 +169,8 @@ void NewMethodAdapter::updateNodalForces()
   }
   MPI_Allreduce( MPI_IN_PLACE, &energy_, 1, MPI_DOUBLE, MPI_SUM, submesh_data_.GetSubmeshFESpace().GetComm() );
 
+  // auto k_over_a = params_.k * A_vec_.inverse( area_tol_ );
+
   mfem::HypreParVector k_over_a( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
   k_over_a = 0.0;
   for ( int i{ 0 }; i < k_over_a.Size(); ++i ) {
@@ -181,21 +179,15 @@ void NewMethodAdapter::updateNodalForces()
     }
   }
 
-  mfem::HypreParVector p_over_a( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
-  p_over_a = 0.0;
-  for ( int i{ 0 }; i < p_over_a.Size(); ++i ) {
-    if ( A_vec_[i] > 1.0e-14 ) {
-      p_over_a[i] = pressure_vec_[i] / A_vec_[i];
-    }
-  }
+  auto p_over_a = pressure_vec_.divide( A_vec_, area_tol_ );
 
-  ParSparseMat dp_dx( dg_tilde_dx_.get() );
+  shared::ParSparseMat dp_dx( dg_tilde_dx_.get() );
   dp_dx->ScaleRows( k_over_a );
-  ParSparseMat dp_dx_temp( dA_dx_.get() );
-  dp_dx_temp->ScaleRows( p_over_a );
+  shared::ParSparseMat dp_dx_temp( dA_dx_.get() );
+  dp_dx_temp->ScaleRows( p_over_a.get() );
   dp_dx -= dp_dx_temp;
 
-  force_vec_ = ( pressure_vec_ * dg_tilde_dx_ ).Add( 1.0, g_tilde_vec_ * dp_dx );
+  force_vec_ = ( pressure_vec_ * dg_tilde_dx_ ) + ( g_tilde_vec_ * dp_dx );
 
   MethodData df_dx_data;
   df_dx_data.reserveBlockJ( { BlockSpace::NONMORTAR, BlockSpace::MORTAR }, pairs_.size() );
@@ -204,18 +196,18 @@ void NewMethodAdapter::updateNodalForces()
   mfem::GridFunction redecomp_pressure( submesh_data_.GetRedecompGap() );
   mfem::ParGridFunction submesh_pressure(
       const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
-  submesh_pressure.SetFromTrueDofs( pressure_vec_ );
+  submesh_pressure.SetFromTrueDofs( pressure_vec_.get() );
   submesh_data_.GetPressureTransfer().SubmeshToRedecomp( submesh_pressure, redecomp_pressure );
 
   mfem::GridFunction redecomp_g_tilde( submesh_data_.GetRedecompGap() );
   mfem::ParGridFunction submesh_g_tilde(
       const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
-  submesh_g_tilde.SetFromTrueDofs( g_tilde_vec_ );
+  submesh_g_tilde.SetFromTrueDofs( g_tilde_vec_.get() );
   submesh_data_.GetPressureTransfer().SubmeshToRedecomp( submesh_g_tilde, redecomp_g_tilde );
 
   mfem::GridFunction redecomp_A( submesh_data_.GetRedecompGap() );
   mfem::ParGridFunction submesh_A( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
-  submesh_A.SetFromTrueDofs( A_vec_ );
+  submesh_A.SetFromTrueDofs( A_vec_.get() );
   submesh_data_.GetPressureTransfer().SubmeshToRedecomp( submesh_A, redecomp_A );
 
   auto mesh1_view = mesh1_.getView();
@@ -310,25 +302,22 @@ void NewMethodAdapter::updateNodalForces()
   const std::vector<std::pair<int, BlockSpace>> all_info{ { 0, BlockSpace::NONMORTAR }, { 0, BlockSpace::MORTAR } };
   auto df_dx_block = jac_data_.GetMfemBlockJacobian( df_dx_data, all_info, all_info );
   df_dx_block->owns_blocks = false;
-  df_dx_ = ParSparseMat( static_cast<mfem::HypreParMatrix*>( &df_dx_block->GetBlock( 0, 0 ) ) );
+  df_dx_ = shared::ParSparseMat( static_cast<mfem::HypreParMatrix*>( &df_dx_block->GetBlock( 0, 0 ) ) );
 
-  mfem::HypreParVector pg2_over_asq( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
-  pg2_over_asq = 0.0;
-  for ( int i{ 0 }; i < pg2_over_asq.Size(); ++i ) {
-    if ( A_vec_[i] > 1.0e-14 ) {
-      pg2_over_asq[i] = 2.0 * pressure_vec_[i] * g_tilde_vec_[i] / ( A_vec_[i] * A_vec_[i] );
-    }
-  }
+  auto pg2_over_asq = ( 2.0 * pressure_vec_ )
+                          .multiplyInPlace( g_tilde_vec_ )
+                          .divideInPlace( A_vec_, area_tol_ )
+                          .divideInPlace( A_vec_, area_tol_ );
 
   auto& submesh_fes = submesh_data_.GetSubmeshFESpace();
-  auto p_over_a_diag = ParSparseMat::diagonalMatrix( submesh_fes.GetComm(), submesh_fes.GlobalTrueVSize(),
-                                                     submesh_fes.GetTrueDofOffsets(), p_over_a );
-  auto pg2_over_asq_diag = ParSparseMat::diagonalMatrix( submesh_fes.GetComm(), submesh_fes.GlobalTrueVSize(),
-                                                         submesh_fes.GetTrueDofOffsets(), pg2_over_asq );
-
-  df_dx_ -= ParSparseMat::RAP( dg_tilde_dx_, p_over_a_diag, dA_dx_ );
-  df_dx_ -= ParSparseMat::RAP( dA_dx_, p_over_a_diag, dg_tilde_dx_ );
-  df_dx_ += ParSparseMat::RAP( dA_dx_, pg2_over_asq_diag, dg_tilde_dx_ );
+  auto p_over_a_diag = shared::ParSparseMat::diagonalMatrix( submesh_fes.GetComm(), submesh_fes.GlobalTrueVSize(),
+                                                             submesh_fes.GetTrueDofOffsets(), p_over_a.get() );
+  auto pg2_over_asq_diag = shared::ParSparseMat::diagonalMatrix( submesh_fes.GetComm(), submesh_fes.GlobalTrueVSize(),
+                                                                 submesh_fes.GetTrueDofOffsets(), pg2_over_asq.get() );
+
+  df_dx_ -= shared::ParSparseMat::RAP( dg_tilde_dx_, p_over_a_diag, dA_dx_ );
+  df_dx_ -= shared::ParSparseMat::RAP( dA_dx_, p_over_a_diag, dg_tilde_dx_ );
+  df_dx_ += shared::ParSparseMat::RAP( dA_dx_, pg2_over_asq_diag, dg_tilde_dx_ );
   df_dx_ += dp_dx.transpose() * dg_tilde_dx_;
   df_dx_ += dg_tilde_dx_.transpose() * dp_dx;
 }
diff --git a/src/tribol/physics/NewMethodAdapter.hpp b/src/tribol/physics/NewMethodAdapter.hpp
index e22b6ccb..2da313cf 100644
--- a/src/tribol/physics/NewMethodAdapter.hpp
+++ b/src/tribol/physics/NewMethodAdapter.hpp
@@ -6,6 +6,8 @@
 #ifndef SRC_TRIBOL_PHYSICS_NEWMETHODADAPTER_HPP_
 #define SRC_TRIBOL_PHYSICS_NEWMETHODADAPTER_HPP_
 
+#include "tribol/config.hpp"
+
 #include "tribol/physics/ContactFormulation.hpp"
 #include "tribol/physics/new_method.hpp"
 #include "tribol/mesh/MfemData.hpp"
@@ -46,6 +48,7 @@ class NewMethodAdapter : public ContactFormulation {
 
   RealT getEnergy() const override { return energy_; }
 
+#ifdef BUILD_REDECOMP
   void getMfemForce( mfem::Vector& forces ) const override;
 
   void getMfemGap( mfem::Vector& gaps ) const override;
@@ -57,10 +60,13 @@ class NewMethodAdapter : public ContactFormulation {
   std::unique_ptr<mfem::HypreParMatrix> getMfemDgDx() const override;
 
   std::unique_ptr<mfem::HypreParMatrix> getMfemDfDp() const override;
+#endif
 
  private:
   // --- Member Variables ---
 
+  double area_tol_{ 1.0e-14 };
+
   MfemSubmeshData& submesh_data_;
   MfemJacobianData& jac_data_;
   MeshData& mesh1_;
@@ -72,15 +78,15 @@ class NewMethodAdapter : public ContactFormulation {
   ArrayT<InterfacePair> pairs_;
 
   // These store the assembled nodal values
-  mfem::HypreParVector g_tilde_vec_;
-  mfem::HypreParVector A_vec_;
-  mutable ParSparseMat dg_tilde_dx_;
-  ParSparseMat dA_dx_;
+  shared::ParVector g_tilde_vec_;
+  shared::ParVector A_vec_;
+  mutable shared::ParSparseMat dg_tilde_dx_;
+  shared::ParSparseMat dA_dx_;
 
-  mfem::HypreParVector pressure_vec_;  // This holds p = k * g / A
+  shared::ParVector pressure_vec_;  // This holds p = k * g / A
   RealT energy_;
-  mfem::Vector force_vec_;
-  mutable ParSparseMat df_dx_;
+  shared::ParVector force_vec_;
+  mutable shared::ParSparseMat df_dx_;
 
   // Pressure GridFunction wrapper (required by interface)
   // We wrap the pressure_vec_ in a ParGridFunction for return

From a05c1f84c8ca3847b31314f3ced4c1f518cae3c8 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Wed, 11 Feb 2026 16:16:59 -0800
Subject: [PATCH 26/56] simplify interface return types

---
 src/tribol/physics/ContactFormulation.hpp | 20 ++++-----
 src/tribol/physics/NewMethodAdapter.cpp   | 49 ++++++-----------------
 src/tribol/physics/NewMethodAdapter.hpp   | 11 ++---
 3 files changed, 23 insertions(+), 57 deletions(-)

diff --git a/src/tribol/physics/ContactFormulation.hpp b/src/tribol/physics/ContactFormulation.hpp
index 9319ae81..3a190f86 100644
--- a/src/tribol/physics/ContactFormulation.hpp
+++ b/src/tribol/physics/ContactFormulation.hpp
@@ -89,31 +89,25 @@ class ContactFormulation {
 
 #ifdef BUILD_REDECOMP
   /**
-   * @brief Adds computed forces to the provided MFEM vector
-   *
-   * @param [in,out] forces MFEM vector to add forces to
+   * @brief Returns t-dof vector of forces on parent mesh
    *
    * @note Requires updateNodalForces() to be called first.
    */
-  virtual void getMfemForce( mfem::Vector& forces ) const = 0;
+  virtual const mfem::HypreParVector& getMfemForce() const = 0;
 
   /**
-   * @brief Populates the provided MFEM vector with gap values
-   *
-   * Resizes the vector if necessary, zeros it out, and sets gap values.
-   *
-   * @param [out] gaps MFEM vector to store gaps in
+   * @brief Returns t-dof vector of gaps on submesh
    *
    * @note Requires updateNodalGaps() to be called first.
    */
-  virtual void getMfemGap( mfem::Vector& gaps ) const = 0;
+  virtual const mfem::HypreParVector& getMfemGap() const = 0;
 
   /**
-   * @brief Returns a reference to the MFEM pressure grid function
+   * @brief Returns a reference to the MFEM pressure t-dof vector
    *
-   * @return mfem::ParGridFunction& Reference to the pressure grid function
+   * @return Reference to the pressure t-dof vector
    */
-  virtual mfem::ParGridFunction& getMfemPressure() = 0;
+  virtual mfem::HypreParVector& getMfemPressure() = 0;
 
   /**
    * @brief Get the derivative of force with respect to displacement
diff --git a/src/tribol/physics/NewMethodAdapter.cpp b/src/tribol/physics/NewMethodAdapter.cpp
index ccff0b5a..e1415b1b 100644
--- a/src/tribol/physics/NewMethodAdapter.cpp
+++ b/src/tribol/physics/NewMethodAdapter.cpp
@@ -141,6 +141,8 @@ void NewMethodAdapter::updateNodalGaps()
   A_vec_.Fill( 0.0 );
   P_submesh.MultTranspose( A_linear_form, A_vec_.get() );
 
+  gap_vec_ = g_tilde_vec_.divide( A_vec_, area_tol_ );
+
   // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
   const std::vector<std::pair<int, BlockSpace>> row_info{ { 1, BlockSpace::LAGRANGE_MULTIPLIER } };
   const std::vector<std::pair<int, BlockSpace>> col_info{ { 0, BlockSpace::NONMORTAR }, { 0, BlockSpace::MORTAR } };
@@ -159,25 +161,19 @@ void NewMethodAdapter::updateNodalForces()
 
   // compute nodal pressures. these are used in the Hessian vector product below so we don't have to assemble a Hessian
   // NOTE: in general, pressure should likely be set by the host code
-  pressure_vec_ = ( params_.k * g_tilde_vec_ ).divideInPlace( A_vec_, area_tol_ );
+  pressure_vec_ = params_.k * gap_vec_;
 
-  // energy_ = pressure_vec_.dot( g_tilde_vec_ );
+  energy_ = pressure_vec_.dot( g_tilde_vec_ );
 
-  energy_ = 0.0;
-  for ( int i{ 0 }; i < pressure_vec_.Size(); ++i ) {
-    energy_ += pressure_vec_[i] * g_tilde_vec_[i];
-  }
-  MPI_Allreduce( MPI_IN_PLACE, &energy_, 1, MPI_DOUBLE, MPI_SUM, submesh_data_.GetSubmeshFESpace().GetComm() );
+  auto k_over_a = params_.k * A_vec_.inverse( area_tol_ );
 
-  // auto k_over_a = params_.k * A_vec_.inverse( area_tol_ );
-
-  mfem::HypreParVector k_over_a( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
-  k_over_a = 0.0;
-  for ( int i{ 0 }; i < k_over_a.Size(); ++i ) {
-    if ( A_vec_[i] > 1.0e-14 ) {
-      k_over_a[i] = params_.k / A_vec_[i];
-    }
-  }
+  // mfem::HypreParVector k_over_a( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  // k_over_a = 0.0;
+  // for ( int i{ 0 }; i < k_over_a.Size(); ++i ) {
+  //   if ( A_vec_[i] > 1.0e-14 ) {
+  //     k_over_a[i] = params_.k / A_vec_[i];
+  //   }
+  // }
 
   auto p_over_a = pressure_vec_.divide( A_vec_, area_tol_ );
 
@@ -328,27 +324,6 @@ RealT NewMethodAdapter::computeTimeStep()
   return 1.0;
 }
 
-void NewMethodAdapter::getMfemForce( mfem::Vector& forces ) const { forces = force_vec_; }
-
-void NewMethodAdapter::getMfemGap( mfem::Vector& gaps ) const
-{
-  gaps.SetSize( g_tilde_vec_.Size() );
-
-  for ( int i = 0; i < gaps.Size(); ++i ) {
-    if ( A_vec_[i] > 1.0e-14 )
-      gaps[i] = g_tilde_vec_[i] / A_vec_[i];
-    else
-      gaps[i] = 0.0;
-  }
-}
-
-mfem::ParGridFunction& NewMethodAdapter::getMfemPressure()
-{
-  auto& pressure = submesh_data_.GetSubmeshPressure();
-  pressure.SetFromTrueDofs( pressure_vec_ );
-  return pressure;
-}
-
 std::unique_ptr<mfem::HypreParMatrix> NewMethodAdapter::getMfemDfDx() const
 {
   return std::unique_ptr<mfem::HypreParMatrix>( df_dx_.release() );
diff --git a/src/tribol/physics/NewMethodAdapter.hpp b/src/tribol/physics/NewMethodAdapter.hpp
index 2da313cf..d2bbb1ab 100644
--- a/src/tribol/physics/NewMethodAdapter.hpp
+++ b/src/tribol/physics/NewMethodAdapter.hpp
@@ -49,11 +49,11 @@ class NewMethodAdapter : public ContactFormulation {
   RealT getEnergy() const override { return energy_; }
 
 #ifdef BUILD_REDECOMP
-  void getMfemForce( mfem::Vector& forces ) const override;
+  const mfem::HypreParVector& getMfemForce() const override { return force_vec_.get(); }
 
-  void getMfemGap( mfem::Vector& gaps ) const override;
+  const mfem::HypreParVector& getMfemGap() const override { return gap_vec_.get(); }
 
-  mfem::ParGridFunction& getMfemPressure() override;
+  mfem::HypreParVector& getMfemPressure() override { return pressure_vec_.get(); }
 
   std::unique_ptr<mfem::HypreParMatrix> getMfemDfDx() const override;
 
@@ -80,6 +80,7 @@ class NewMethodAdapter : public ContactFormulation {
   // These store the assembled nodal values
   shared::ParVector g_tilde_vec_;
   shared::ParVector A_vec_;
+  shared::ParVector gap_vec_;
   mutable shared::ParSparseMat dg_tilde_dx_;
   shared::ParSparseMat dA_dx_;
 
@@ -87,10 +88,6 @@ class NewMethodAdapter : public ContactFormulation {
   RealT energy_;
   shared::ParVector force_vec_;
   mutable shared::ParSparseMat df_dx_;
-
-  // Pressure GridFunction wrapper (required by interface)
-  // We wrap the pressure_vec_ in a ParGridFunction for return
-  std::unique_ptr<mfem::ParGridFunction> pressure_gf_;
 };
 
 }  // namespace tribol

From 0209f681d682f808ab32b385760f529a11fcb983 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Wed, 11 Feb 2026 17:08:09 -0800
Subject: [PATCH 27/56] formatting

---
 src/examples/step_1_lobatto.cpp | 1518 +++++++++++++++----------------
 1 file changed, 738 insertions(+), 780 deletions(-)

diff --git a/src/examples/step_1_lobatto.cpp b/src/examples/step_1_lobatto.cpp
index 9ce1dbee..343d9833 100644
--- a/src/examples/step_1_lobatto.cpp
+++ b/src/examples/step_1_lobatto.cpp
@@ -5,208 +5,197 @@
 #include <array>
 #include <cmath>
 #include <algorithm>
+
+#include "tribol/config.hpp"
+
 #include "tribol/common/Parameters.hpp"
 #include "tribol/geom/GeomUtilities.hpp"
 #include "tribol/common/Enzyme.hpp"
 
+#ifdef TRIBOL_USE_ENZYME
 
 template <typename return_type, typename... Args>
 return_type __enzyme_autodiff( Args... );
 
+void find_normal( const double* coord1, const double* coord2, double* normal )
+{
+  double dx = coord2[0] - coord1[0];
+  double dy = coord2[1] - coord1[1];
+  double len = std::sqrt( dy * dy + dx * dx );
+  dx /= len;
+  dy /= len;
+  normal[0] = dy;
+  normal[1] = -dx;
+}
 
-void find_normal(const double* coord1, const double* coord2, double* normal) {
-    double dx = coord2[0] - coord1[0];
-    double dy = coord2[1] - coord1[1];
-    double len = std::sqrt(dy * dy + dx * dx);
-    dx /= len;
-    dy /= len;
-    normal[0] = dy;
-    normal[1] = -dx;
+void determine_lobatto_nodes( int N, double* N_i )
+{
+  if ( N == 1 ) {
+    N_i[0] = 0.0;
+  } else if ( N == 2 ) {
+    N_i[0] = -1.0;
+    N_i[1] = 1.0;
+  } else if ( N == 3 ) {
+    N_i[0] = -1.0;
+    N_i[1] = 0.0;
+    N_i[2] = 1.0;
+  } else if ( N == 4 ) {
+    N_i[0] = -1.0;
+    N_i[1] = -1.0 / std::sqrt( 5.0 );
+    N_i[2] = 1.0 / std::sqrt( 5.0 );
+    N_i[3] = 1.0;
+  } else {
+    N_i[0] = -1.0;
+    N_i[1] = -1.0 * std::sqrt( 3.0 / 7.0 );
+    N_i[2] = 0.0;
+    N_i[3] = std::sqrt( 3.0 / 7.0 );
+    N_i[4] = 1.0;
+  }
 }
 
-void determine_lobatto_nodes(int N, double* N_i) {
-    if (N == 1) {
-        N_i[0] = 0.0;
-    }
-    else if (N == 2) {
-        N_i[0] = -1.0;
-        N_i[1] = 1.0;
-    }
-    else if (N == 3) {
-        N_i[0] = -1.0;
-        N_i[1] = 0.0;
-        N_i[2] = 1.0;
-    }
-    else if(N == 4) {
-        N_i[0] = -1.0;
-        N_i[1] = -1.0 / std::sqrt(5.0);
-        N_i[2] = 1.0 / std::sqrt(5.0);
-        N_i[3] = 1.0;
-    }
-    else {
-        N_i[0] = -1.0;
-        N_i[1] = -1.0 * std::sqrt(3.0 / 7.0);
-        N_i[2] = 0.0;
-        N_i[3] =  std::sqrt(3.0 / 7.0);
-        N_i[4] = 1.0;
-    }
+void determine_lobatto_weights( int N, double* weights )
+{
+  if ( N == 1 ) {
+    weights[0] = 2.0;
+  } else if ( N == 2 ) {
+    weights[0] = 1.0;
+    weights[1] = 1.0;
+  } else if ( N == 3 ) {
+    weights[0] = 1.0 / 3.0;
+    weights[1] = 4.0 / 3.0;
+    weights[2] = 1.0 / 3.0;
+  } else if ( N == 4 ) {
+    weights[0] = 1.0 / 6.0;
+    weights[1] = 5.0 / 6.0;
+    weights[2] = 5.0 / 6.0;
+    weights[3] = 1.0 / 6.0;
+  } else {
+    weights[0] = 1.0 / 10.0;
+    weights[1] = 49.0 / 90.0;
+    weights[2] = 32.0 / 45.0;
+    weights[3] = 49.0 / 90.0;
+    weights[4] = 1.0 / 10.0;
+  }
+}
+void determine_legendre_nodes( int N, double* N_i )
+{
+  if ( N == 1 ) {
+    N_i[0] = 0.0;
+  } else if ( N == 2 ) {
+    N_i[0] = -1 / std::sqrt( 3 );
+    N_i[1] = 1 / std::sqrt( 3 );
+  } else if ( N == 3 ) {
+    N_i[0] = -std::sqrt( 3.0 / 5.0 );
+    N_i[1] = 0.0;
+    N_i[2] = std::sqrt( 3.0 / 5.0 );
+  } else {
+    N_i[0] = -1.0 * std::sqrt( ( 15 + 2 * std::sqrt( 30 ) ) / 35 );
+    N_i[1] = -1.0 * std::sqrt( ( 15 - 2 * std::sqrt( 30 ) ) / 35 );
+    N_i[2] = -std::sqrt( ( 15 - 2 * std::sqrt( 30 ) ) / 35 );
+    N_i[4] = -std::sqrt( ( 15 + 2 * std::sqrt( 30 ) ) / 35 );
+  }
 }
 
-void determine_lobatto_weights(int N, double* weights) {
-    if (N == 1) {
-        weights[0] = 2.0;
-    }
-    else if (N == 2) {
-        weights[0] = 1.0;
-        weights[1] = 1.0;
-    } else if (N == 3) {
-        weights[0] = 1.0 / 3.0;
-        weights[1] = 4.0 / 3.0;
-        weights[2] = 1.0 / 3.0;
-    } else if (N == 4) {
-        weights[0] = 1.0 / 6.0;
-        weights[1] = 5.0 / 6.0;
-        weights[2] = 5.0 / 6.0;
-        weights[3] = 1.0 / 6.0;
-    } else {
-        weights[0] = 1.0 / 10.0;
-        weights[1] = 49.0 / 90.0;
-        weights[2] = 32.0 / 45.0;
-        weights[3] = 49.0 / 90.0;
-        weights[4] = 1.0 / 10.0;
-    }
+void determine_legendre_weights( int N, double* W )
+{
+  if ( N == 1 ) {
+    W[0] = 2.0;
+  } else if ( N == 2 ) {
+    W[0] = 1.0;
+    W[1] = 1.0;
+  } else if ( N == 3 ) {
+    W[0] = 5.0 / 9.0;
+    W[1] = 8.0 / 9.0;
+    W[2] = 5.0 / 9.0;
+  } else {
+    W[0] = ( 18 - std::sqrt( 30 ) ) / 36.0;
+    W[1] = ( 18 + std::sqrt( 30 ) ) / 36.0;
+    W[2] = ( 18 + std::sqrt( 30 ) ) / 36.0;
+    W[3] = ( 18 - std::sqrt( 30 ) ) / 36.0;
+  }
 }
- void determine_legendre_nodes(int N, double* N_i) {
-    if (N==1) {
-       N_i[0] = 0.0; 
-    }
-    else if(N==2) {
-        N_i[0] = -1 / std::sqrt(3);
-        N_i[1] = 1 / std::sqrt(3);
-    }
-    else if(N==3) {
-        N_i[0] = -std::sqrt(3.0/5.0);
-        N_i[1] = 0.0;
-        N_i[2] = std::sqrt(3.0/5.0);
-    }
-    else {
-        N_i[0] = -1.0 * std::sqrt((15 + 2 * std::sqrt(30)) / 35);
-        N_i[1] = -1.0 * std::sqrt((15 - 2 * std::sqrt(30)) / 35);
-        N_i[2] = -std::sqrt((15 - 2 * std::sqrt(30)) / 35);
-        N_i[4] = -std::sqrt((15 + 2 * std::sqrt(30)) / 35);
-    }
- }
 
- void determine_legendre_weights(int N, double* W) {
-    if (N == 1) {
-        W[0] = 2.0;
-    }
-    else if(N == 2) {
-        W[0] = 1.0;
-        W[1] = 1.0;
-    }
-    else if (N == 3) {
-        W[0] = 5.0 / 9.0;
-        W[1] = 8.0 / 9.0;
-        W[2] = 5.0 / 9.0;
-    }
-    else {
-        W[0] = (18 - std::sqrt(30)) / 36.0;
-        W[1] = (18 + std::sqrt(30)) / 36.0;
-        W[2] = (18 + std::sqrt(30)) / 36.0;
-        W[3] = (18 - std::sqrt(30)) / 36.0;
-    }
- }
-
-void iso_map(const double* coord1, const double* coord2, double xi,  double* mapped_coord) {
-    double N1 = 1.0 - xi;
-    double N2 = xi;
-    // double N1 = 0.5 - xi;
-    // double N2 = 0.5 + xi;
-    mapped_coord[0] = N1 * coord1[0] + N2 * coord2[0];
-    mapped_coord[1] =  N1 * coord1[1] + N2 * coord2[1];
+void iso_map( const double* coord1, const double* coord2, double xi, double* mapped_coord )
+{
+  double N1 = 1.0 - xi;
+  double N2 = xi;
+  // double N1 = 0.5 - xi;
+  // double N2 = 0.5 + xi;
+  mapped_coord[0] = N1 * coord1[0] + N2 * coord2[0];
+  mapped_coord[1] = N1 * coord1[1] + N2 * coord2[1];
 }
 
-void iso_map2(const double* coord1, const double* coord2, double xi, double* mapped_coord){
-    double N1 = 0.5 - xi;
-    double N2 = 0.5 + xi;
-    mapped_coord[0] = N1 * coord1[0] + N2 * coord2[0];
-    mapped_coord[1] =  N1 * coord1[1] + N2 * coord2[1];
+void iso_map2( const double* coord1, const double* coord2, double xi, double* mapped_coord )
+{
+  double N1 = 0.5 - xi;
+  double N2 = 0.5 + xi;
+  mapped_coord[0] = N1 * coord1[0] + N2 * coord2[0];
+  mapped_coord[1] = N1 * coord1[1] + N2 * coord2[1];
 }
 
+void iso_map_deriv( const double* coord1, const double* coord2, double* deriv )
+{
+  deriv[0] = 0.5 * ( coord2[0] - coord1[0] );
+  deriv[1] = 0.5 * ( coord2[1] - coord1[1] );
+}
 
+bool segmentsIntersect( const double A0[2], const double A1[2], const double B0[2], const double B1[2],
+                        double intersection[2] )
+{
+  auto cross = []( double x0, double y0, double x1, double y1 ) { return x0 * y1 - y0 * x1; };
 
-void iso_map_deriv(const double* coord1, const double* coord2, double* deriv) {
-    deriv[0] = 0.5 * (coord2[0] - coord1[0]);
-    deriv[1] = 0.5 * (coord2[1] - coord1[1]);
-}
+  double dxA = A1[0] - A0[0], dyA = A1[1] - A0[1];
+  double dxB = B1[0] - B0[0], dyB = B1[1] - B0[1];
+  double dxAB = B0[0] - A0[0], dyAB = B0[1] - A0[1];
 
-bool segmentsIntersect(const double A0[2], const double A1[2],
-                       const double B0[2], const double B1[2],
-                       double intersection[2]) {
-    auto cross = [](double x0, double y0, double x1, double y1) {
-        return x0 * y1 - y0 * x1;
-    };
-
-    double dxA = A1[0] - A0[0], dyA = A1[1] - A0[1];
-    double dxB = B1[0] - B0[0], dyB = B1[1] - B0[1];
-    double dxAB = B0[0] - A0[0], dyAB = B0[1] - A0[1];
-
-    double denom = cross(dxA, dyA, dxB, dyB);
-    double numeA = cross(dxAB, dyAB, dxB, dyB);
-    double numeB = cross(dxAB, dyAB, dxA, dyA);
-
-    // Collinear or parallel
-    if (std::abs(denom) < 1e-12) {
-        if (std::abs(numeA) > 1e-12 || std::abs(numeB) > 1e-12)
-            return false; // Parallel, not collinear
-
-        // Collinear: check for overlap
-        auto between = [](double a, double b, double c) {
-            return std::min(a, b) <= c && c <= std::max(a, b);
-        };
-
-        // Check if endpoints overlap
-        for (int i = 0; i < 2; ++i) {
-            if (between(A0[0], A1[0], B0[0]) && between(A0[1], A1[1], B0[1])) {
-                intersection[0] = B0[0];
-                intersection[1] = B0[1];
-                return true;
-            }
-            if (between(A0[0], A1[0], B1[0]) && between(A0[1], A1[1], B1[1])) {
-                intersection[0] = B1[0];
-                intersection[1] = B1[1];
-                return true;
-            }
-            if (between(B0[0], B1[0], A0[0]) && between(B0[1], B1[1], A0[1])) {
-                intersection[0] = A0[0];
-                intersection[1] = A0[1];
-                return true;
-            }
-            if (between(B0[0], B1[0], A1[0]) && between(B0[1], B1[1], A1[1])) {
-                intersection[0] = A1[0];
-                intersection[1] = A1[1];
-                return true;
-            }
-        }
-        // Overlap but not at a single point
-        return false;
-    }
+  double denom = cross( dxA, dyA, dxB, dyB );
+  double numeA = cross( dxAB, dyAB, dxB, dyB );
+  double numeB = cross( dxAB, dyAB, dxA, dyA );
 
-    double ua = numeA / denom;
-    double ub = numeB / denom;
+  // Collinear or parallel
+  if ( std::abs( denom ) < 1e-12 ) {
+    if ( std::abs( numeA ) > 1e-12 || std::abs( numeB ) > 1e-12 ) return false;  // Parallel, not collinear
 
-    if (ua >= 0.0 && ua <= 1.0 && ub >= 0.0 && ub <= 1.0) {
-        intersection[0] = A0[0] + ua * dxA;
-        intersection[1] = A0[1] + ua * dyA;
+    // Collinear: check for overlap
+    auto between = []( double a, double b, double c ) { return std::min( a, b ) <= c && c <= std::max( a, b ); };
+
+    // Check if endpoints overlap
+    for ( int i = 0; i < 2; ++i ) {
+      if ( between( A0[0], A1[0], B0[0] ) && between( A0[1], A1[1], B0[1] ) ) {
+        intersection[0] = B0[0];
+        intersection[1] = B0[1];
+        return true;
+      }
+      if ( between( A0[0], A1[0], B1[0] ) && between( A0[1], A1[1], B1[1] ) ) {
+        intersection[0] = B1[0];
+        intersection[1] = B1[1];
         return true;
+      }
+      if ( between( B0[0], B1[0], A0[0] ) && between( B0[1], B1[1], A0[1] ) ) {
+        intersection[0] = A0[0];
+        intersection[1] = A0[1];
+        return true;
+      }
+      if ( between( B0[0], B1[0], A1[0] ) && between( B0[1], B1[1], A1[1] ) ) {
+        intersection[0] = A1[0];
+        intersection[1] = A1[1];
+        return true;
+      }
     }
+    // Overlap but not at a single point
     return false;
-}
-
-
+  }
 
+  double ua = numeA / denom;
+  double ub = numeB / denom;
 
+  if ( ua >= 0.0 && ua <= 1.0 && ub >= 0.0 && ub <= 1.0 ) {
+    intersection[0] = A0[0] + ua * dxA;
+    intersection[1] = A0[1] + ua * dyA;
+    return true;
+  }
+  return false;
+}
 
 // void lagrange_shape_functions(int N, double xi, const double* nodes, double* N_i) {
 //     for(int i = 0; i < N; ++i) {
@@ -228,16 +217,16 @@ bool segmentsIntersect(const double A0[2], const double A1[2],
 //     mapped_coords[1] = 0.0;
 //     for(int i = 0; i < N; ++i) {
 //         mapped_coords[0] += shape_functions[i] * coords[2 * i];
-//         mapped_coords[1] += shape_functions[i] * coords[2 * i + 1]; 
+//         mapped_coords[1] += shape_functions[i] * coords[2 * i + 1];
 //     }
 // }
 
-
 // void iso_map_deriv(double xi, const double* coords, int N, double* dxi_dx) {
 //     double mapped_coords[2] = {0.0, 0.0};
 //     double d_mapped_coords[2] = {0.0, 0.0};
 //     double dxi = 1.0;
-//     __enzyme_autodiff<void>( iso_map, enzyme_const, coords, enzyme_const, N, enzyme_dup, mapped_coords, d_mapped_coords, enzyme_dup, xi, dxi);
+//     __enzyme_autodiff<void>( iso_map, enzyme_const, coords, enzyme_const, N, enzyme_dup, mapped_coords,
+//     d_mapped_coords, enzyme_dup, xi, dxi);
 
 //     dxi_dx[0] = d_mapped_coords[0];
 //     dxi_dx[1] = d_mapped_coords[1];
@@ -256,8 +245,8 @@ bool segmentsIntersect(const double A0[2], const double A1[2],
 //     return J;
 // }
 
-
-// double newtons_method(const double* p, const double* coord1, const double* coord2, double tol = 1e-20, int iter = 20) {
+// double newtons_method(const double* p, const double* coord1, const double* coord2, double tol = 1e-20, int iter = 20)
+// {
 //     double xi = 0.0; //initial guess
 
 //     for(int i = 0; i < iter; ++i) {
@@ -284,42 +273,37 @@ bool segmentsIntersect(const double A0[2], const double A1[2],
 //             break;
 //         }
 
-        
 //     }
 //     return xi;
 // }
 
+void find_intersection( const double* A0, const double* A1, const double* p, const double* nB, double* intersection )
+{
+  double tA[2] = { A1[0] - A0[0], A1[1] - A0[1] };
+  double d[2] = { p[0] - A0[0], p[1] - A0[1] };
 
+  double det = tA[0] * nB[1] - tA[1] * nB[0];
 
+  if ( std::abs( det ) < 1e-12 ) {
+    intersection[0] = p[0];
+    intersection[1] = p[1];
+    return;
+  }
 
-void find_intersection(const double* A0, const double* A1, const double* p, const double* nB, double* intersection) {
-    double tA[2] = {A1[0] - A0[0], A1[1] - A0[1] };
-    double d[2] = {p[0] - A0[0], p[1] - A0[1]};
-
-    double det = tA[0] * nB[1] - tA[1] * nB[0];
-
-    if(std::abs(det) < 1e-12) {
-        intersection[0] = p[0];
-        intersection[1] = p[1];
-        return;
-    }
-
-    double inv_det = 1.0 / det;
-
-    double alpha = (d[0] * nB[1] - d[1] * nB[0]) * inv_det;
+  double inv_det = 1.0 / det;
 
-    // if (alpha < 0.0) alpha = 0.0;
-    // if (alpha > 1.0) alpha = 1.0;
+  double alpha = ( d[0] * nB[1] - d[1] * nB[0] ) * inv_det;
 
-    intersection[0] = (A0[0] + alpha * tA[0]);
-    intersection[1] = A0[1]  + alpha * tA[1];
+  // if (alpha < 0.0) alpha = 0.0;
+  // if (alpha > 1.0) alpha = 1.0;
 
+  intersection[0] = ( A0[0] + alpha * tA[0] );
+  intersection[1] = A0[1] + alpha * tA[1];
 }
 
-
 // void get_projections(const double* A0, const double* A1, const double* B0, const double* B1, double* projections) {
 //     double nA[2] = {0.0};
-//     double nB[2] = {0.0}; 
+//     double nB[2] = {0.0};
 //     find_normal(A0, A1, nA);
 //     find_normal(B0, B1, nB);
 //     // double eta_values[N];
@@ -334,16 +318,13 @@ void find_intersection(const double* A0, const double* A1, const double* p, cons
 
 //         // std::cout << "gx: " << p[0] << "gy: " << p[1] << std::endl;
 //         // // double xiA = newtons_method(p, A0, A1);
-//         // // tribol::ProjectPointToSegment(p[0], p[1],  nB[0], nB[1], A0[0], A0[1], px, py); 
+//         // // tribol::ProjectPointToSegment(p[0], p[1],  nB[0], nB[1], A0[0], A0[1], px, py);
 //         // std::cout << "px: " << p[0] << ", " << "py: " << p[1] <<std::endl;
 
 //         find_intersection(A0, A1, p, nB, intersection);
 
-
-
 //         // std::cout << "intersection: " << intersection[0] << ',' << intersection[1] << std::endl;
 
-
 //         double dx = A1[0] - A0[0];
 //         double dy = A1[1] - A0[1];
 //         double len2 = dx*dx + dy*dy;
@@ -360,8 +341,6 @@ void find_intersection(const double* A0, const double* A1, const double* p, cons
 //         double dy_gap = intersection[1] - p[1];
 //         double gap = dx_gap * nB_unit[0] + dy_gap * nB_unit[1];
 
-
-
 //         // if (gap > 0) {
 //         //     xiA_was_inside[i] = true;  // mark this slot as valid
 //         // }
@@ -372,7 +351,8 @@ void find_intersection(const double* A0, const double* A1, const double* p, cons
 //             // std::cout << "Segments intersect" << std::endl;
 //             // if(xiA < 0.0 || xiA > 1.0) {
 //                 // std::cout << "entered loop" << std::endl;
-//                 // std::cout << "Seg intersection: " << seg_intersection[0] << ", " << seg_intersection[1] << std::endl;
+//                 // std::cout << "Seg intersection: " << seg_intersection[0] << ", " << seg_intersection[1] <<
+//                 std::endl;
 //                 // std::cout << "xia before: " << xiA << std::endl;
 //                 xiA = ((seg_intersection[0] - A0[0]) * dx + (seg_intersection[1] - A0[1]) * dy) / len2;
 //                 // std::cout << "xia after: " << xiA << std::endl;
@@ -389,7 +369,8 @@ void find_intersection(const double* A0, const double* A1, const double* p, cons
 //     }
 // }
 
-// void get_endpoint_projections(const double* A0, const double* A1, const double* B0, const double* B1, double* proj0, double* proj1) {
+// void get_endpoint_projections(const double* A0, const double* A1, const double* B0, const double* B1, double* proj0,
+// double* proj1) {
 //     double nA[2];
 //     find_normal(A0, A1, nA);
 //     find_intersection(B0, B1, A0, nA, proj0);
@@ -397,17 +378,17 @@ void find_intersection(const double* A0, const double* A1, const double* p, cons
 
 // }
 
-
-// void get_projections(const double* A0, const double* A1, const double* B0, const double* B1, double* projections, double del) {
-//     double nA[2] = {0.0}; 
+// void get_projections(const double* A0, const double* A1, const double* B0, const double* B1, double* projections,
+// double del) {
+//     double nA[2] = {0.0};
 //     find_normal(A0, A1, nA);
-    
-//     double end_points[2] = {-0.5, 0.5}; 
+
+//     double end_points[2] = {-0.5, 0.5};
 //     for (int i = 0; i < 2; ++i) {
 //         double p[2] = {0.0};
 //         iso_map2(B0, B1, end_points[i], p);
 //         std::cout << "EndPoints: " << end_points[0] << ", " << end_points[1] << std::endl;
-        
+
 //         double intersection[2] = {0.0};
 //         find_intersection(B0, B1, p, nA, intersection);
 //         std::cout << "intersection: " << intersection[0] << ", " << intersection[1] << std::endl;
@@ -419,53 +400,51 @@ void find_intersection(const double* A0, const double* A1, const double* p, cons
 //         // std::cout << "len2: " << len2 << std::endl;
 //         // double xiA = ((intersection[0] - A0[0]) * dx + (intersection[1] - A0[1]) * dy) / len2;
 //         // std::cout << "Xia: " << xiA << std::endl;
-        
+
 //         // Apply constraints and convert to reference interval
 //         // xiA = std::max(del, std::min(1.0 - del, xiA)) - 0.5;
-  
+
 //         // xiA = 0.5 - xiA;
 //         projections[i] = intersection[i];
 //     }
 // }
-void get_projections(const double* A0, const double* A1, const double* B0, const double* B1, double* projections, double del) {
-    double nB[2] = {0.0};
-    find_normal(B0, B1, nB);
-    double B_endpoints[2][2];
-    B_endpoints[0][0] = B0[0]; B_endpoints[0][1] = B0[1];
-    B_endpoints[1][0] = B1[0]; B_endpoints[1][1] = B1[1];
-    
-    for (int i =0; i < 2; ++i) {
-        //prohect A endpoints onto B
-        double intersection[2] = {0.0};
-        find_intersection(A0,A1, B_endpoints[i], nB, intersection);
-
-        // std::cout << "Intersection: " << intersection[0] << ", " << intersection[1] << std::endl;
-    
-
-        //convert to parametric coords
-        double dx = A1[0] - A0[0];
-        // std::cout << "dx: " << dx << std::endl;
-        double dy = A1[1] - A0[1];
-        // std::cout << "dy: " << dy << std::endl;
-        double len2 = dx*dx + dy*dy;
-        double alpha = ((intersection[0] - A0[0]) * dx + (intersection[1] - A0[1]) * dy) / len2;
-        //map to xiB
-        // std::cout << "alpha: " << alpha << std::endl;
-        // double xiB = 0.5 - alpha;
-        double xiB = alpha - 0.5;
-        // xiB = std::max(-0.5, std::min(0.5, xiB));
-        
-        // std::cout << "xi on B: " << xiB << std::endl;
-        
-        projections[i] = xiB;
-    }
-
+void get_projections( const double* A0, const double* A1, const double* B0, const double* B1, double* projections,
+                      double del )
+{
+  double nB[2] = { 0.0 };
+  find_normal( B0, B1, nB );
+  double B_endpoints[2][2];
+  B_endpoints[0][0] = B0[0];
+  B_endpoints[0][1] = B0[1];
+  B_endpoints[1][0] = B1[0];
+  B_endpoints[1][1] = B1[1];
+
+  for ( int i = 0; i < 2; ++i ) {
+    // prohect A endpoints onto B
+    double intersection[2] = { 0.0 };
+    find_intersection( A0, A1, B_endpoints[i], nB, intersection );
+
+    // std::cout << "Intersection: " << intersection[0] << ", " << intersection[1] << std::endl;
+
+    // convert to parametric coords
+    double dx = A1[0] - A0[0];
+    // std::cout << "dx: " << dx << std::endl;
+    double dy = A1[1] - A0[1];
+    // std::cout << "dy: " << dy << std::endl;
+    double len2 = dx * dx + dy * dy;
+    double alpha = ( ( intersection[0] - A0[0] ) * dx + ( intersection[1] - A0[1] ) * dy ) / len2;
+    // map to xiB
+    //  std::cout << "alpha: " << alpha << std::endl;
+    //  double xiB = 0.5 - alpha;
+    double xiB = alpha - 0.5;
+    // xiB = std::max(-0.5, std::min(0.5, xiB));
+
+    // std::cout << "xi on B: " << xiB << std::endl;
+
+    projections[i] = xiB;
+  }
 }
 
-
-
-
-
 // void compute_integration_bounds(const double* projections, double* integration_bounds, int N) {
 //     double xi_min = projections[0];
 //     double xi_max = projections[0];
@@ -474,7 +453,7 @@ void get_projections(const double* A0, const double* A1, const double* B0, const
 //             xi_min = projections[i];
 //         }
 //         if(xi_max < projections[i]) {
-//             xi_max = projections[i]; 
+//             xi_max = projections[i];
 //         }
 
 //     }
@@ -485,7 +464,7 @@ void get_projections(const double* A0, const double* A1, const double* B0, const
 //     if(xi_min > 0.5) {
 //         xi_min  = 0.5;
 //     }
-//     if (xi_min < -0.5) { 
+//     if (xi_min < -0.5) {
 //         xi_min = -0.5;
 //     }
 //     if (xi_max > 0.5) {
@@ -500,114 +479,110 @@ void get_projections(const double* A0, const double* A1, const double* B0, const
 
 // }
 
-void compute_integration_bounds(const double* projections, double* integration_bounds, double del) {
-    // std::cout << "Projections in Compute bounds: " << projections[0] << ", " <<  projections[1] << std::endl;
-    double xi_min = projections[0];
-    double xi_max = projections[0];
-    for (int i = 0; i < 2; ++i) {
-        if (xi_min > projections[i]) {
-            xi_min = projections[i];
-        }
-        if(xi_max < projections[i]) {
-            xi_max = projections[i]; 
-        }
-
-    }
-
-    // std::cout << "BEFORE xi min: " << xi_min << " xi_max: " << xi_max << std::endl;
-
-
-    if (xi_max < -0.5 -del ) {
-        xi_max = -0.5 - del;
-    }
-    if(xi_min > 0.5 + del) {
-        xi_min  = 0.5 + del;
+void compute_integration_bounds( const double* projections, double* integration_bounds, double del )
+{
+  // std::cout << "Projections in Compute bounds: " << projections[0] << ", " <<  projections[1] << std::endl;
+  double xi_min = projections[0];
+  double xi_max = projections[0];
+  for ( int i = 0; i < 2; ++i ) {
+    if ( xi_min > projections[i] ) {
+      xi_min = projections[i];
     }
-    if (xi_min < -0.5 - del) { 
-        xi_min = -0.5 -del;
+    if ( xi_max < projections[i] ) {
+      xi_max = projections[i];
     }
-    if (xi_max > 0.5 + del) {
-        xi_max = 0.5 + del;
-    }
-
-    // if (xi_max < -0.5) {
-    //     xi_max = -0.5;
-    // }
-    // if(xi_min > 0.5) {
-    //     xi_min  = 0.5;
-    // }
-    // if (xi_min < -0.5) { 
-    //     xi_min = -0.5;
-    // }
-    // if (xi_max > 0.5) {
-    //     xi_max = 0.5;
-    // }
-
-    integration_bounds[0] = xi_min;
-    integration_bounds[1] = xi_max;
-    // std::cout << "xi min: " << xi_min << " xi_max: " << xi_max << std::endl;
+  }
+
+  // std::cout << "BEFORE xi min: " << xi_min << " xi_max: " << xi_max << std::endl;
+
+  if ( xi_max < -0.5 - del ) {
+    xi_max = -0.5 - del;
+  }
+  if ( xi_min > 0.5 + del ) {
+    xi_min = 0.5 + del;
+  }
+  if ( xi_min < -0.5 - del ) {
+    xi_min = -0.5 - del;
+  }
+  if ( xi_max > 0.5 + del ) {
+    xi_max = 0.5 + del;
+  }
+
+  // if (xi_max < -0.5) {
+  //     xi_max = -0.5;
+  // }
+  // if(xi_min > 0.5) {
+  //     xi_min  = 0.5;
+  // }
+  // if (xi_min < -0.5) {
+  //     xi_min = -0.5;
+  // }
+  // if (xi_max > 0.5) {
+  //     xi_max = 0.5;
+  // }
+
+  integration_bounds[0] = xi_min;
+  integration_bounds[1] = xi_max;
+  // std::cout << "xi min: " << xi_min << " xi_max: " << xi_max << std::endl;
 }
 
-
-void modify_bounds(double* integration_bounds, double del, double* modified_bounds) {
-    double xi = 0.0;
-    double int_bound[2] = {0.0};
-    for(int i = 0; i < 2; ++i) {
-        int_bound[i] = integration_bounds[i];
-    }
-    // int_bound[0] -= del;
-    // int_bound[1] += del;
-
-
-    for (int i = 0; i < 2; ++i) {
-        double xi_hat = 0.0;
-        // xi = 0.5 * (integration_bounds[i] + 1.0);
-        xi = int_bound[i] + 0.5;
-        // std::cout << "xi in smoothoing: " << xi << std::endl;
-        if (0.0 - del <= xi && xi <= del) {
-            xi_hat = (1.0/(4*del)) * (xi*xi) + 0.5 * xi + del/4.0;
-            // std::cout << "zone1" << std::endl;
-        }
-        else if((1.0 - del) <= xi && xi <= 1.0 + del) {
-        double b = -1.0/(4.0*del);
-        double c = 0.5 + 1.0/(2.0*del);
-        double d = 1.0 - del + (1.0/(4.0*del)) * pow(1.0-del, 2) - 0.5*(1.0-del) - (1.0-del)/(2.0*del);
-
-        xi_hat = b*xi*xi + c*xi + d;
-
-            // xi_hat = (1.0/del) * xi*xi - (2.0*(1.0-del)/del) * xi + (-1.0 + 1.0/del);
-            // xi_hat = -1.0/del * xi*xi + 2.0/del * xi + (1.0 - 1.0/del);
-
-
-            // xi_hat= (-1.0/(del*del))*pow(xi,3) + ((3.0/(del*del)) - (2.0/del))*pow(xi,2) + ((-3.0/(del*del)) + (4.0/del))*xi + (1.0 + (1.0/(del*del)) - (2.0/del));
-
-
-            // xi_hat = -1.0/(del*del)*pow(xi,3) + (3.0+del)/(del*del)*pow(xi,2) + (1.0 + (-3.0-2.0*del)/(del*del))*xi + (1.0+del)/(del*del);
-
-        //     double d = 1 - del
-        //  + (1.0 / (4.0 * del * del)) * (1 - 3 * del + 3 * del * del - del * del * del)
-        //  - ((-1.0 / (4.0 * del) + 3.0 / (4.0 * del * del)) * (1 - 2 * del + del * del))
-        //  - ((5.0 / 4.0 + 1.0 / (2.0 * del) - 3.0 / (4.0 * del * del)) * (1 - del));
-        //         xi_hat = 
-        // -1.0*(xi*xi*xi) / (4.0 * del * del)
-        // + (-1.0/(4.0*del) + 3.0/(4.0*del*del)) * (xi*xi)
-        // + (1.25 + 1.0/(2.0*del) - 3.0/(4.0*del*del)) * xi
-        // + d;
-            // std::cout << "d: " << d << std::endl;
-   
-        //  std::cout << "zone2" << std::endl;
-        }
-        else if(del <= xi && xi <= (1.0 - del)) { 
-            xi_hat = xi;
-            // std::cout << "zone3" << std::endl;
-        }
-        else{ 
-            // std::cerr << "Xi did not fall in an expected range for modifying bounds for 1" << std::endl;
-        }
-        // modified_bounds[i] = 2.0 * xi_hat - 1;
-        modified_bounds[i] = xi_hat - 0.5;
+void modify_bounds( double* integration_bounds, double del, double* modified_bounds )
+{
+  double xi = 0.0;
+  double int_bound[2] = { 0.0 };
+  for ( int i = 0; i < 2; ++i ) {
+    int_bound[i] = integration_bounds[i];
+  }
+  // int_bound[0] -= del;
+  // int_bound[1] += del;
+
+  for ( int i = 0; i < 2; ++i ) {
+    double xi_hat = 0.0;
+    // xi = 0.5 * (integration_bounds[i] + 1.0);
+    xi = int_bound[i] + 0.5;
+    // std::cout << "xi in smoothoing: " << xi << std::endl;
+    if ( 0.0 - del <= xi && xi <= del ) {
+      xi_hat = ( 1.0 / ( 4 * del ) ) * ( xi * xi ) + 0.5 * xi + del / 4.0;
+      // std::cout << "zone1" << std::endl;
+    } else if ( ( 1.0 - del ) <= xi && xi <= 1.0 + del ) {
+      double b = -1.0 / ( 4.0 * del );
+      double c = 0.5 + 1.0 / ( 2.0 * del );
+      double d = 1.0 - del + ( 1.0 / ( 4.0 * del ) ) * pow( 1.0 - del, 2 ) - 0.5 * ( 1.0 - del ) -
+                 ( 1.0 - del ) / ( 2.0 * del );
+
+      xi_hat = b * xi * xi + c * xi + d;
+
+      // xi_hat = (1.0/del) * xi*xi - (2.0*(1.0-del)/del) * xi + (-1.0 + 1.0/del);
+      // xi_hat = -1.0/del * xi*xi + 2.0/del * xi + (1.0 - 1.0/del);
+
+      // xi_hat= (-1.0/(del*del))*pow(xi,3) + ((3.0/(del*del)) - (2.0/del))*pow(xi,2) + ((-3.0/(del*del)) +
+      // (4.0/del))*xi + (1.0 + (1.0/(del*del)) - (2.0/del));
+
+      // xi_hat = -1.0/(del*del)*pow(xi,3) + (3.0+del)/(del*del)*pow(xi,2) + (1.0 + (-3.0-2.0*del)/(del*del))*xi +
+      // (1.0+del)/(del*del);
+
+      //     double d = 1 - del
+      //  + (1.0 / (4.0 * del * del)) * (1 - 3 * del + 3 * del * del - del * del * del)
+      //  - ((-1.0 / (4.0 * del) + 3.0 / (4.0 * del * del)) * (1 - 2 * del + del * del))
+      //  - ((5.0 / 4.0 + 1.0 / (2.0 * del) - 3.0 / (4.0 * del * del)) * (1 - del));
+      //         xi_hat =
+      // -1.0*(xi*xi*xi) / (4.0 * del * del)
+      // + (-1.0/(4.0*del) + 3.0/(4.0*del*del)) * (xi*xi)
+      // + (1.25 + 1.0/(2.0*del) - 3.0/(4.0*del*del)) * xi
+      // + d;
+      // std::cout << "d: " << d << std::endl;
+
+      //  std::cout << "zone2" << std::endl;
+    } else if ( del <= xi && xi <= ( 1.0 - del ) ) {
+      xi_hat = xi;
+      // std::cout << "zone3" << std::endl;
+    } else {
+      // std::cerr << "Xi did not fall in an expected range for modifying bounds for 1" << std::endl;
     }
-    // std::cout << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
+    // modified_bounds[i] = 2.0 * xi_hat - 1;
+    modified_bounds[i] = xi_hat - 0.5;
+  }
+  // std::cout << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
 }
 
 // void modify_bounds(double* integration_bounds, double del, double* modified_bounds) {
@@ -615,7 +590,6 @@ void modify_bounds(double* integration_bounds, double del, double* modified_boun
 //     // integration_bounds[0] -= del;
 //     // integration_bounds[1] += del;
 
-
 //     for (int i = 0; i < 2; ++i) {
 //         double xi_hat = 0.0;
 //         // xi = 0.5 * (integration_bounds[i] + 1.0);
@@ -629,11 +603,11 @@ void modify_bounds(double* integration_bounds, double del, double* modified_boun
 //             xi_hat =  1.0 -(((1.0 - xi) * (1.0 - xi)) / (2 * del * (1.0 - del)));
 //             // std::cout << "zone2" << std::endl;
 //         }
-//         else if(del <= xi && xi <= (1.0)) { 
+//         else if(del <= xi && xi <= (1.0)) {
 //             xi_hat = xi;
 //             // std::cout << "zone3" << std::endl;
 //         }
-//         else{ 
+//         else{
 //             std::cerr << "Xi did not fall in an expected range for modifying bounds for 1" << std::endl;
 //         }
 //         // modified_bounds[i] = 2.0 * xi_hat - 1;
@@ -642,100 +616,91 @@ void modify_bounds(double* integration_bounds, double del, double* modified_boun
 //     // std::cout << "modified bounds: " << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
 // }
 
-void modify_bounds_for_weight(double* integration_bounds, double del, double* modified_bounds) {
-    double xi = 0.0;
-    integration_bounds[0];
-    integration_bounds[1];
-    for (int i = 0; i < 2; ++i) {
-        double xi_hat = 0.0;
-        // xi = 0.5 * (integration_bounds[i] + 1.0);
-        xi = integration_bounds[i] + 0.5;
-        if (xi < std::abs(1e-10)) {
-            xi = 0.0;
-        
-        }
-        // std::cout << "xi: " << xi << std::endl;
-        if (0 <= xi && xi <= del) {
-            xi_hat = ((xi)*(xi)) / (2.0 * del * (1.0 - del));
-            // std::cout << "zone1" << std::endl;
-        }
-        else if((1.0 - del) <= xi && xi <= 1.0) {
-            xi_hat =  1.0 -(((1.0 - xi) * (1.0 - xi)) / (2 * del * (1.0 - del)));
-            // std::cout << "zone2" << std::endl;
-        }
-        else if(del <= xi && xi <= (1.0 - del)) { 
-            xi_hat = ((2.0 * xi) - del) / (2.0 * (1.0 - del));
-            // std::cout << "zone3" << std::endl;
-        }
-        else{ 
-            std::cerr << "Xi did not fall in an expected range for modifying bounds for weight fpr 2" << std::endl;
-        }
-        // modified_bounds[i] = 2.0 * xi_hat - 1;
-        modified_bounds[i] = xi_hat - 0.5;
+void modify_bounds_for_weight( double* integration_bounds, double del, double* modified_bounds )
+{
+  double xi = 0.0;
+  integration_bounds[0];
+  integration_bounds[1];
+  for ( int i = 0; i < 2; ++i ) {
+    double xi_hat = 0.0;
+    // xi = 0.5 * (integration_bounds[i] + 1.0);
+    xi = integration_bounds[i] + 0.5;
+    if ( xi < std::abs( 1e-10 ) ) {
+      xi = 0.0;
+    }
+    // std::cout << "xi: " << xi << std::endl;
+    if ( 0 <= xi && xi <= del ) {
+      xi_hat = ( ( xi ) * ( xi ) ) / ( 2.0 * del * ( 1.0 - del ) );
+      // std::cout << "zone1" << std::endl;
+    } else if ( ( 1.0 - del ) <= xi && xi <= 1.0 ) {
+      xi_hat = 1.0 - ( ( ( 1.0 - xi ) * ( 1.0 - xi ) ) / ( 2 * del * ( 1.0 - del ) ) );
+      // std::cout << "zone2" << std::endl;
+    } else if ( del <= xi && xi <= ( 1.0 - del ) ) {
+      xi_hat = ( ( 2.0 * xi ) - del ) / ( 2.0 * ( 1.0 - del ) );
+      // std::cout << "zone3" << std::endl;
+    } else {
+      std::cerr << "Xi did not fall in an expected range for modifying bounds for weight fpr 2" << std::endl;
     }
-    // std::cout << "modified bounds: " << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
+    // modified_bounds[i] = 2.0 * xi_hat - 1;
+    modified_bounds[i] = xi_hat - 0.5;
+  }
+  // std::cout << "modified bounds: " << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
 }
 
-
-void compute_quadrature_point(double* integration_bounds, const double* A0, const double* A1, int N, double* quad_points) {
-    // std::cout << "=== ENTERING compute_quadrature_point ===" << std::endl;
-    double eta_values[N];
-    determine_legendre_nodes(N, eta_values);
-    // for(int i = 0; i < N; ++i) {
-    //     eta_values[i] = (eta_values[i] + 1) / 2;
-    // }
-
-
-
-    // for (int i = 0; i < N; ++i) {
-    //     eta_values[i] = eta_values[i] - 0.5;  // scale to [-0.5, 0.5] per suggestion of mike.
-    // }
-
-    // for (int i = 0; i < N; ++i) {
-    //     eta_values[i] *= 0.5;
-    // }
-
-    double xi_min = integration_bounds[0];
-    double xi_max = integration_bounds[1];
-    // std::cout << "xi values: " << xi_min << ", " << xi_max << std::endl;
-
-    for ( int i = 0; i < N; ++i) {
-        double xi_i = 0.5 * (xi_max - xi_min) * eta_values[i] + 0.5 * (xi_max + xi_min); //this was th original implementation
-        // double xi_i = 0.5 * (xi_max + xi_min) + eta_values[i] + 0.5 *(xi_max - xi_min); //mikes suggestions
-        // double xi_i = xi_min + (xi_max - xi_min) * eta_values[i];
-        // xi_i *= 0.5;
-        double mapped_coords[2] = {0.0, 0.0};
-
-
-        iso_map2(A0, A1, xi_i, mapped_coords);
-        quad_points[2 * i] = mapped_coords[0];
-        quad_points[2 * i + 1] = mapped_coords[1];
-        // std::cout << "x: " << quad_points[2 * i] << " y: " << quad_points[2 * i + 1] << std::endl;
-        
-    }
-    
-     
+void compute_quadrature_point( double* integration_bounds, const double* A0, const double* A1, int N,
+                               double* quad_points )
+{
+  // std::cout << "=== ENTERING compute_quadrature_point ===" << std::endl;
+  double eta_values[N];
+  determine_legendre_nodes( N, eta_values );
+  // for(int i = 0; i < N; ++i) {
+  //     eta_values[i] = (eta_values[i] + 1) / 2;
+  // }
+
+  // for (int i = 0; i < N; ++i) {
+  //     eta_values[i] = eta_values[i] - 0.5;  // scale to [-0.5, 0.5] per suggestion of mike.
+  // }
+
+  // for (int i = 0; i < N; ++i) {
+  //     eta_values[i] *= 0.5;
+  // }
+
+  double xi_min = integration_bounds[0];
+  double xi_max = integration_bounds[1];
+  // std::cout << "xi values: " << xi_min << ", " << xi_max << std::endl;
+
+  for ( int i = 0; i < N; ++i ) {
+    double xi_i =
+        0.5 * ( xi_max - xi_min ) * eta_values[i] + 0.5 * ( xi_max + xi_min );  // this was th original implementation
+    // double xi_i = 0.5 * (xi_max + xi_min) + eta_values[i] + 0.5 *(xi_max - xi_min); //mikes suggestions
+    // double xi_i = xi_min + (xi_max - xi_min) * eta_values[i];
+    // xi_i *= 0.5;
+    double mapped_coords[2] = { 0.0, 0.0 };
+
+    iso_map2( A0, A1, xi_i, mapped_coords );
+    quad_points[2 * i] = mapped_coords[0];
+    quad_points[2 * i + 1] = mapped_coords[1];
+    // std::cout << "x: " << quad_points[2 * i] << " y: " << quad_points[2 * i + 1] << std::endl;
+  }
 }
 
-void assign_weights(const double* integration_bounds, int N, double* weights) {
-    double ref_weights[N];
-    determine_legendre_weights(N, ref_weights);
-    // std::cout << integration_bounds[0] << ' ' << integration_bounds[1] << std::endl;
-    double J = 0.0;
-  
+void assign_weights( const double* integration_bounds, int N, double* weights )
+{
+  double ref_weights[N];
+  determine_legendre_weights( N, ref_weights );
+  // std::cout << integration_bounds[0] << ' ' << integration_bounds[1] << std::endl;
+  double J = 0.0;
 
+  double xi_min = integration_bounds[0];
+  double xi_max = integration_bounds[1];
 
-    double xi_min = integration_bounds[0];
-    double xi_max = integration_bounds[1];
-    
-    J = 0.5 * (xi_max - xi_min);
+  J = 0.5 * ( xi_max - xi_min );
 
-    for( int i = 0; i < N; ++i) {
-        weights[i] = ref_weights[i] * J;
-    }
+  for ( int i = 0; i < N; ++i ) {
+    weights[i] = ref_weights[i] * J;
+  }
 }
 
-
 // double compute_gap(const double* p, const double* B0, const double* B1, double* A0, double* A1, double* nB) {
 //     double nB_orig[2] = {nB[0], nB[1]};
 //     double len = std::sqrt(nB[0] * nB[0] + nB[1] * nB[1]);
@@ -748,7 +713,7 @@ void assign_weights(const double* integration_bounds, int N, double* weights) {
 //     find_intersection(B0, B1, p, nB_orig, intersection);
 
 //     // std::cout << "intersection at B: " << intersection[0] << ", " << intersection[1] << std::endl;
-  
+
 //     //  std::cout << "intersection for gap: " << intersection[0] << ',' << intersection[1] << std::endl;
 
 //     // double eta = newtons_method(p, B0, B1); //closest projection of p onto elem B
@@ -769,175 +734,172 @@ void assign_weights(const double* integration_bounds, int N, double* weights) {
 //     //     gap = (A0[1] - p[1]) * nB_orig[1];
 //     //     // std::cout << "gap in loop: " << gap << std::endl;
 //     //     return gap;
-        
 
 //     // }
 //     // std::cout << "gap in compute_gap: " << gap << std::endl;
 //     return gap;
 // }
 
-double compute_gap(const double* p, const double* B0, const double* B1, const double* nA, const double* A0, const double* A1) {
-    double nA_orig[2] = {nA[0], nA[1]};
-    // std::cout << "nA: " << nA_orig[0] << ", " << nA_orig[1] << std::endl;
-
-    double len = std::sqrt(nA[0] * nA[0] + nA[1] * nA[1]);
-    // std::cout << "LEN: " << len << std::endl;
-    nA_orig[0] /= len;
-    nA_orig[1] /= len;
-    double intersection[2] = {0.0};
-    find_intersection(B0, B1, p, nA_orig, intersection);
-    // std::cout << "INTERSECTION: " << intersection[0] << ", " << intersection[1] << std::endl;
-
-
-    double dx = intersection[0] - p[0];
-    double dy = intersection[1] - p[1];
-
-    double gap = dx * nA_orig[0] + dy * nA_orig[1];
-    gap *= -1;
-    // std::cout << "GAP: " << gap << std::endl;
-    return gap;
+double compute_gap( const double* p, const double* B0, const double* B1, const double* nA, const double* A0,
+                    const double* A1 )
+{
+  double nA_orig[2] = { nA[0], nA[1] };
+  // std::cout << "nA: " << nA_orig[0] << ", " << nA_orig[1] << std::endl;
+
+  double len = std::sqrt( nA[0] * nA[0] + nA[1] * nA[1] );
+  // std::cout << "LEN: " << len << std::endl;
+  nA_orig[0] /= len;
+  nA_orig[1] /= len;
+  double intersection[2] = { 0.0 };
+  find_intersection( B0, B1, p, nA_orig, intersection );
+  // std::cout << "INTERSECTION: " << intersection[0] << ", " << intersection[1] << std::endl;
+
+  double dx = intersection[0] - p[0];
+  double dy = intersection[1] - p[1];
+
+  double gap = dx * nA_orig[0] + dy * nA_orig[1];
+  gap *= -1;
+  // std::cout << "GAP: " << gap << std::endl;
+  return gap;
 }
 
+double compute_modified_gap( double gap, double* nA, double* nB )
+{
+  double dot = nA[0] * nB[0] + nA[1] * nB[1];
+  double eta = ( dot < 0 ) ? -dot : 0.0;
 
-double compute_modified_gap(double gap, double* nA, double* nB) {
-    double dot = nA[0] * nB[0] + nA[1] * nB[1];
-    double eta = (dot < 0) ? -dot:0.0;
-
-//    if(nu >= 0) {
-//         nu = 0;
-//     } 
+  //    if(nu >= 0) {
+  //         nu = 0;
+  //     }
 
-//     gap *= nu;
-    // std::cout << "gap in modify gap: " << gap << std::endl;
-    // std::cout << "eta: " << eta << std::endl;
-    return gap * eta;
+  //     gap *= nu;
+  // std::cout << "gap in modify gap: " << gap << std::endl;
+  // std::cout << "eta: " << eta << std::endl;
+  return gap * eta;
 }
 
-
-double compute_contact_potential(double gap, double k1, double k2) {
-    if (gap < 1e-12) {
-        return 0;
-    }
-    double gap1 = gap;
-    double pot = k1 * (gap1 * gap1) - k2 * (gap1 * gap1 * gap1);
-    // std::cout << "potential: " << pot << std::endl;
-    return pot;
+double compute_contact_potential( double gap, double k1, double k2 )
+{
+  if ( gap < 1e-12 ) {
+    return 0;
+  }
+  double gap1 = gap;
+  double pot = k1 * ( gap1 * gap1 ) - k2 * ( gap1 * gap1 * gap1 );
+  // std::cout << "potential: " << pot << std::endl;
+  return pot;
 }
 
+void compute_contact_energy( const double* coords, double del, double k1, double k2, int N, double lenA,
+                             double* projections, double* energy )
+{
+  double A0[2] = { coords[0], coords[1] };
+  double A1[2] = { coords[2], coords[3] };
+  double B0[2] = { coords[4], coords[5] };
+  double B1[2] = { coords[6], coords[7] };
 
-void compute_contact_energy(const double* coords, double del, double k1, double k2, int N, double lenA, double* projections, double* energy) {
-    double A0[2] = {coords[0], coords[1]};
-    double A1[2] = {coords[2], coords[3]};
-    double B0[2] = {coords[4], coords[5]};
-    double B1[2] = {coords[6], coords[7]};
+  // double lenA = sqrt((A1[0] - A0[0]) * (A1[0] - A0[0]) + (A1[1] - A0[1]) * (A1[1] - A0[1]));
+  double lenB = sqrt( ( B1[0] - B0[0] ) * ( B1[0] - B0[0] ) + ( B1[1] - B0[1] ) * ( B1[1] - B0[1] ) );
 
-    // double lenA = sqrt((A1[0] - A0[0]) * (A1[0] - A0[0]) + (A1[1] - A0[1]) * (A1[1] - A0[1]));
-    double lenB = sqrt((B1[0] - B0[0]) * (B1[0] - B0[0]) + (B1[1] - B0[1]) * (B1[1] - B0[1]));
-    
-    double AC[2] = {0.5 * (A0[0]+A1[0]), 0.5*(A0[1]+A1[1])};
-    double AR[2] = {0.5 * (A0[0]-A1[0]), 0.5*(A0[1]-A1[1])};
-    double normAR = std::sqrt(AR[0]*AR[0] + AR[1]*AR[1]);
+  double AC[2] = { 0.5 * ( A0[0] + A1[0] ), 0.5 * ( A0[1] + A1[1] ) };
+  double AR[2] = { 0.5 * ( A0[0] - A1[0] ), 0.5 * ( A0[1] - A1[1] ) };
+  double normAR = std::sqrt( AR[0] * AR[0] + AR[1] * AR[1] );
 
-    double BC[2] = {0.5 * (B0[0]+B1[0]), 0.5*(B0[1]+B1[1])};
-    double BR[2] = {0.5 * (B0[0]-B1[0]), 0.5*(B0[1]-B1[1])};
-    double normBR = std::sqrt(BR[0]*BR[0] + BR[1]*BR[1]);
+  double BC[2] = { 0.5 * ( B0[0] + B1[0] ), 0.5 * ( B0[1] + B1[1] ) };
+  double BR[2] = { 0.5 * ( B0[0] - B1[0] ), 0.5 * ( B0[1] - B1[1] ) };
+  double normBR = std::sqrt( BR[0] * BR[0] + BR[1] * BR[1] );
 
-    A0[0] = AC[0] + AR[0] * lenA * 0.5 / normAR;
-    A0[1] = AC[1] + AR[1] * lenA * 0.5 / normAR;
+  A0[0] = AC[0] + AR[0] * lenA * 0.5 / normAR;
+  A0[1] = AC[1] + AR[1] * lenA * 0.5 / normAR;
 
-    A1[0] = AC[0] - AR[0] * lenA * 0.5 / normAR;
-    A1[1] = AC[1] - AR[1] * lenA * 0.5 / normAR;
+  A1[0] = AC[0] - AR[0] * lenA * 0.5 / normAR;
+  A1[1] = AC[1] - AR[1] * lenA * 0.5 / normAR;
 
-    B0[0] = BC[0] + BR[0] * lenB * 0.5 / normBR;
-    B0[1] = BC[1] + BR[1] * lenB * 0.5 / normBR;;
+  B0[0] = BC[0] + BR[0] * lenB * 0.5 / normBR;
+  B0[1] = BC[1] + BR[1] * lenB * 0.5 / normBR;
+  ;
 
-    B1[0] = BC[0] - BR[0] * lenB * 0.5 / normBR;;
-    B1[1] = BC[1] - BR[1] * lenB * 0.5 / normBR;;
+  B1[0] = BC[0] - BR[0] * lenB * 0.5 / normBR;
+  ;
+  B1[1] = BC[1] - BR[1] * lenB * 0.5 / normBR;
+  ;
 
-    double nA[2] = {0.0};
-    double nB[2] = {0.0};
-    find_normal(A0, A1, nA);
-    find_normal(B0, B1, nB);
+  double nA[2] = { 0.0 };
+  double nB[2] = { 0.0 };
+  find_normal( A0, A1, nA );
+  find_normal( B0, B1, nB );
 
-    double dot_product = nA[0] * nB[0] + nA[1] * nB[1];
+  double dot_product = nA[0] * nB[0] + nA[1] * nB[1];
 
-    if (std::abs(dot_product) < 1e-10) {
-        *energy = 0;
-    }
+  if ( std::abs( dot_product ) < 1e-10 ) {
+    *energy = 0;
+  }
 
-    else{
- 
+  else {
     // std::cout << "length: " << lenA << std::endl;
 
-
-
     // double projections[2];
     // get_projections(A0, A1, B0, B1, projections);
 
     double integration_bounds[2];
-    compute_integration_bounds(projections, integration_bounds, del);
+    compute_integration_bounds( projections, integration_bounds, del );
 
     // double len = sqrt((A1[0] - A0[0]) * (A1[0] - A0[0]) + (A1[1] - A0[1]) * (A1[1] - A0[1]));
     // std::cout << "length: " << len << std::endl;
 
-
-
     double modified_bounds[2];
-    modify_bounds(integration_bounds, del, modified_bounds);
-    // std::cout << "Integration Bounds Original" << integration_bounds[0] << ", " << integration_bounds[1] << std::endl;
-    // std::cout << "Modifed Bounds" << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
+    modify_bounds( integration_bounds, del, modified_bounds );
+    // std::cout << "Integration Bounds Original" << integration_bounds[0] << ", " << integration_bounds[1] <<
+    // std::endl; std::cout << "Modifed Bounds" << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
 
     // double modified_bounds_w[2];
     // modify_bounds_for_weight(integration_bounds, del, modified_bounds_w);
 
-//     std::cout << "A: x from " << A0[0] << " to " << A1[0] << std::endl;
-// std::cout << "B: x from " << B0[0] << " to " << B1[0] << std::endl;
-// std::cout << "Raw projections from get_projections: [" << projections[0] << ", " << projections[1] << "]" << std::endl;
-// std::cout << "Integration bounds: [" << integration_bounds[0] << ", " << integration_bounds[1] << "]" << std::endl;
-// std::cout << "Modified bounds for quadrature: [" << modified_bounds[0] << ", " << modified_bounds[1] << "]" << std::endl;
-    
+    //     std::cout << "A: x from " << A0[0] << " to " << A1[0] << std::endl;
+    // std::cout << "B: x from " << B0[0] << " to " << B1[0] << std::endl;
+    // std::cout << "Raw projections from get_projections: [" << projections[0] << ", " << projections[1] << "]" <<
+    // std::endl; std::cout << "Integration bounds: [" << integration_bounds[0] << ", " << integration_bounds[1] << "]"
+    // << std::endl; std::cout << "Modified bounds for quadrature: [" << modified_bounds[0] << ", " <<
+    // modified_bounds[1] << "]" << std::endl;
 
     double quad_points[2 * N];
-    compute_quadrature_point(modified_bounds, A0, A1, N, quad_points);
+    compute_quadrature_point( modified_bounds, A0, A1, N, quad_points );
 
-    
     // std::cout << "integration Bounds" << integration_bounds[0] << ", " << integration_bounds[1] << std::endl;
     // double modified_bounds_w[2];
     // modify_bounds_for_weight(integration_bounds, del, modified_bounds_w);
 
     double weights[N];
-    assign_weights(modified_bounds, N, weights); //was for weigh orginalally 
+    assign_weights( modified_bounds, N, weights );  // was for weigh orginalally
 
     *energy = 0.0;
-    for(int i = 0; i < N; ++i) {
-        // double p[2] = {quad_points[2 * i], quad_points[2 * i + 1]};
-        double mapped_coords[2] = {quad_points[2 * i], quad_points[2 * i + 1]};
-        // iso_map2(A0, A1, quad_points[i], mapped_coords); 
-        // std::cout << "quad point: " << quad_points[2*i] << std::endl;
-
-        // std::cout << "Mapped coords: " << mapped_coords[0] << ", " << mapped_coords[1] << std::endl;
+    for ( int i = 0; i < N; ++i ) {
+      // double p[2] = {quad_points[2 * i], quad_points[2 * i + 1]};
+      double mapped_coords[2] = { quad_points[2 * i], quad_points[2 * i + 1] };
+      // iso_map2(A0, A1, quad_points[i], mapped_coords);
+      // std::cout << "quad point: " << quad_points[2*i] << std::endl;
 
-        double gap = compute_gap(mapped_coords, B0, B1, nA, A0, A1);
-        // if (gap < 0.0) {
-        //     continue;
-        // }
-        double smooth_gap = compute_modified_gap(gap, nA, nB);
-        // std::cout << "gap: " << smooth_gap << std::endl;
+      // std::cout << "Mapped coords: " << mapped_coords[0] << ", " << mapped_coords[1] << std::endl;
 
-        double potential = compute_contact_potential(smooth_gap, k1, k2);
+      double gap = compute_gap( mapped_coords, B0, B1, nA, A0, A1 );
+      // if (gap < 0.0) {
+      //     continue;
+      // }
+      double smooth_gap = compute_modified_gap( gap, nA, nB );
+      // std::cout << "gap: " << smooth_gap << std::endl;
 
-        *energy +=  weights[i] * potential;
-        // std::cout << "energy: " << *energy << std::endl;
+      double potential = compute_contact_potential( smooth_gap, k1, k2 );
 
+      *energy += weights[i] * potential;
+      // std::cout << "energy: " << *energy << std::endl;
     }
     *energy *= lenA * 0.5;
     // std::cout << "energy: " << *energy << std::endl;
-    }
+  }
 }
 
 // void compute_sym_energy(const double* coords, double del, double k1, double k2, int N, double len, double* energy) {
-//     double energy1 = 0.0; 
-//     compute_contact_energy(coords, del, k1, k2, N, len, &energy1); 
+//     double energy1 = 0.0;
+//     compute_contact_energy(coords, del, k1, k2, N, len, &energy1);
 
 //     double A0[2] = {coords[0], coords[1]};
 //     double A1[2] = {coords[2], coords[3]};
@@ -946,7 +908,7 @@ void compute_contact_energy(const double* coords, double del, double k1, double
 
 //     double nA[2] = {0.0};
 //     double nB[2] = {0.0};
- 
+
 //     // std::cout << "length: " << len << std::endl;
 //     double energy2 = 0.0;
 
@@ -961,18 +923,14 @@ void compute_contact_energy(const double* coords, double del, double k1, double
 
 //     // double switch_bounds[2] = {integration_bounds[1], integration_bounds[0]};
 
-
-
 //     double modified_bounds[2];
 //     modify_bounds(integration_bounds, del, modified_bounds);
 //     // std::cout << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
-    
+
 //      double switch_bounds[2] = {modified_bounds[1], modified_bounds[0]};
 
 //     double quad_points[2 * N];
 //     compute_quadrature_point(switch_bounds, A0, A1, N, quad_points);
-    
-    
 
 //     // double modified_bounds[2];
 //     // modify_bounds(switch_bounds, del, modified_bounds);
@@ -996,38 +954,32 @@ void compute_contact_energy(const double* coords, double del, double k1, double
 //     energy2 *= len * 0.5;
 
 //     *energy = 0.5 * (energy1 - energy2);
-    
 
 // }
 
+void read_element_coords( int N, std::vector<double>& coords )
+{
+  for ( int i = 0; i < 2; ++i ) {
+    double x;
+    double y;
+    std::cout << "Enter x" << i + 1 << ": ";
+    std::cin >> x;
 
+    std::cout << "Enter y" << i + 1 << ": ";
+    std::cin >> y;
 
-
-
-
-
-void read_element_coords(int N, std::vector<double>& coords) {
-    for(int i = 0; i < 2; ++i) {
-        double x;
-        double y;
-        std::cout << "Enter x" << i+1 << ": ";
-        std::cin >> x;
-        
-        std::cout << "Enter y" << i+1 << ": ";
-        std::cin >> y;
-        
-        coords.push_back(x);
-        coords.push_back(y);
-    }
+    coords.push_back( x );
+    coords.push_back( y );
+  }
 }
 
-void populate_C_arrays(double* C, const std::vector<double>& elem) {
-    for (size_t i = 0; i < elem.size(); ++i){
-        C[i] = elem[i];
-    }
+void populate_C_arrays( double* C, const std::vector<double>& elem )
+{
+  for ( size_t i = 0; i < elem.size(); ++i ) {
+    C[i] = elem[i];
+  }
 }
 
-
 // void calc_force(double* coords, double del, double k1, double k2, int N, double len, double* dE_dX) {
 // double E = 0.0;
 // for (int i = 0; i < 8; ++i) {
@@ -1038,21 +990,25 @@ void populate_C_arrays(double* C, const std::vector<double>& elem) {
 //     double ddel = 0.0;
 //     double dE = 1.0;
 //     double dlen = 0.0;
-//     __enzyme_fwddiff<void>( compute_contact_energy, coords, dcoords, del, ddel, k1, dk1, k2, dk2, enzyme_const, N, dlen, len, &E, &dE);
-//     dE_dX[i] = -dE;
+//     __enzyme_fwddiff<void>( compute_contact_energy, coords, dcoords, del, ddel, k1, dk1, k2, dk2, enzyme_const, N,
+//     dlen, len, &E, &dE); dE_dX[i] = -dE;
 
 // }
 // }
 
-void calc_force_reverse(const double* coords, double del, double k1, double k2, int N, double len, double* projections, double* dE_dX) {
-    double dcoords[8] = {0.0};
-    double E = 0.0;
-    double dE = 1.0;
-    __enzyme_autodiff<void>( compute_contact_energy, enzyme_dup, coords, dcoords, enzyme_const, del, enzyme_const, k1, enzyme_const, k2, enzyme_const, N, enzyme_const, len, enzyme_const, projections, enzyme_dup, &E, &dE);
-
-    for(int i = 0; i < 8; ++i) {
-        dE_dX[i] = dcoords[i];
-    }
+void calc_force_reverse( const double* coords, double del, double k1, double k2, int N, double len, double* projections,
+                         double* dE_dX )
+{
+  double dcoords[8] = { 0.0 };
+  double E = 0.0;
+  double dE = 1.0;
+  __enzyme_autodiff<void>( compute_contact_energy, enzyme_dup, coords, dcoords, enzyme_const, del, enzyme_const, k1,
+                           enzyme_const, k2, enzyme_const, N, enzyme_const, len, enzyme_const, projections, enzyme_dup,
+                           &E, &dE );
+
+  for ( int i = 0; i < 8; ++i ) {
+    dE_dX[i] = dcoords[i];
+  }
 }
 
 // void calc_force_FD(double* coords, double del, double k1, double k2, int N, double* dE_dX, double h = 1e-10) {
@@ -1074,45 +1030,49 @@ void calc_force_reverse(const double* coords, double del, double k1, double k2,
 
 // }
 
-
-void calc_stiffness_rev_fwd(double* coords, double del, double k1, double k2, int N, double lenA, double* projections, double* force, double* d2E_d2X) {
-    double dE[8] = {0.0};
-    double d2E[8] = {0.0};
-    double dEF[8] = {0.0};
-    calc_force_reverse(coords, del, k1, k2, N, lenA, projections, dEF);
-    for (int i = 0; i < 8; ++i) {
-        force[i] = dEF[i];
-    }
-    for(int i = 0; i < 8; ++i) {
-        double d2coords[8] = {0.0};
-        d2coords[i] = 1.0;
-        double d2k1 = 0.0;
-        double d2del = 0.0;
-        double d2k2 = 0.0;
-        double d2lenA = 0.0;
-        double d2projections[] = {0.0};
-        __enzyme_fwddiff<void>( (void*) calc_force_reverse, coords, d2coords, del, d2del, k1, d2k1, k2, d2k2, N, lenA, d2lenA, projections, d2projections, dE, d2E);
-        for(int j = 0; j < 8; ++j) {
-            d2E_d2X[8 * i + j] = d2E[j];
-        }
-
+void calc_stiffness_rev_fwd( double* coords, double del, double k1, double k2, int N, double lenA, double* projections,
+                             double* force, double* d2E_d2X )
+{
+  double dE[8] = { 0.0 };
+  double d2E[8] = { 0.0 };
+  double dEF[8] = { 0.0 };
+  calc_force_reverse( coords, del, k1, k2, N, lenA, projections, dEF );
+  for ( int i = 0; i < 8; ++i ) {
+    force[i] = dEF[i];
+  }
+  for ( int i = 0; i < 8; ++i ) {
+    double d2coords[8] = { 0.0 };
+    d2coords[i] = 1.0;
+    double d2k1 = 0.0;
+    double d2del = 0.0;
+    double d2k2 = 0.0;
+    double d2lenA = 0.0;
+    double d2projections[] = { 0.0 };
+    __enzyme_fwddiff<void>( (void*)calc_force_reverse, coords, d2coords, del, d2del, k1, d2k1, k2, d2k2, N, lenA,
+                            d2lenA, projections, d2projections, dE, d2E );
+    for ( int j = 0; j < 8; ++j ) {
+      d2E_d2X[8 * i + j] = d2E[j];
     }
+  }
 }
 
-// void calc_stiffness_rev_rev(double* coords, double del, double k1, double k2, int N, double lenA, double lenB, double* d2E_d2X) {
+// void calc_stiffness_rev_rev(double* coords, double del, double k1, double k2, int N, double lenA, double lenB,
+// double* d2E_d2X) {
 //     for (int i = 0; i < 8; ++i) {
 //         double d2X[8] = {0.0};
 //         double dE[8] = {0.0};
 //         double d2E[8] = {0.0};
 //         d2E[i] = 1.0;
-//         __enzyme_autodiff<void>( (void*)calc_force_reverse, enzyme_dup, coords, d2X, enzyme_const, del, enzyme_const, k1, enzyme_const, k2, enzyme_const, N, enzyme_const, lenA, enzyme_const, lenB, enzyme_dup, dE, d2E);
-//         for(int j = 0; j < 8; ++j) {
+//         __enzyme_autodiff<void>( (void*)calc_force_reverse, enzyme_dup, coords, d2X, enzyme_const, del, enzyme_const,
+//         k1, enzyme_const, k2, enzyme_const, N, enzyme_const, lenA, enzyme_const, lenB, enzyme_dup, dE, d2E); for(int
+//         j = 0; j < 8; ++j) {
 //             d2E_d2X[8 * i + j] = d2X[j];
 //         }
 //     }
 // }
 
-// void calc_stiffness_FD(double* coords, double del, double k1, double k2, double lenA , double lenB, int N, double *d2E_d2X, double h = 1e-7) {
+// void calc_stiffness_FD(double* coords, double del, double k1, double k2, double lenA , double lenB, int N, double
+// *d2E_d2X, double h = 1e-7) {
 //     double dX_plus[8] = {0.0};
 //     double dX_minus[8] = {0.0};
 //     double dW_plus[8] = {0.0};
@@ -1124,12 +1084,12 @@ void calc_stiffness_rev_fwd(double* coords, double del, double k1, double k2, in
 //         }
 //         dX_plus[i] = coords[i] + h;
 //         dX_minus[i] = coords[i] - h;
-        
+
 //         calc_force_reverse(dX_plus, del, k1, k2, N, lenA, lenB, dW_plus);
 //         calc_force_reverse(dX_minus, del, k1, k2, N, lenA, lenB, dW_minus);
 //         for(int j = 0; j < 8; ++j){
 //         d2E_d2X[8 * i + j] = (dW_plus[j] - dW_minus[j]) / (2  * h);
-        
+
 //     }
 
 // }
@@ -1163,8 +1123,11 @@ void calc_stiffness_rev_fwd(double* coords, double del, double k1, double k2, in
 //     double xi[2] = {0.0};
 //     modify_bounds(integration_bounds,del, xi);
 
-//     double term_one = (k1 * ((a * a) * (xi[1] * xi[1] * xi[1] / 3) + a * b * xi[1] + (b * b * xi[1])) + k2 * ((a * a * a) * (xi[1] * xi[1] * xi[1] * xi[1]) / 4) + (a * a) * (xi[1] * xi[1]) * b + ((3 * a * (xi[1] * xi[1] * xi[1]) * b) / 2) + (b * b * b) * (xi[1]));
-//     double term_two = (k1 * ((a * a) * (xi[0] * xi[0] * xi[0] / 3) + a * b * xi[0] + (b * b * xi[0])) + k2 * ((a * a * a) * (xi[0] * xi[0] * xi[0] * xi[0]) / 4) + (a * a) * (xi[0] * xi[0]) * b + ((3 * a * (xi[0] * xi[0] * xi[0]) * b) / 2) + (b * b * b) * (xi[0]));
+//     double term_one = (k1 * ((a * a) * (xi[1] * xi[1] * xi[1] / 3) + a * b * xi[1] + (b * b * xi[1])) + k2 * ((a * a
+//     * a) * (xi[1] * xi[1] * xi[1] * xi[1]) / 4) + (a * a) * (xi[1] * xi[1]) * b + ((3 * a * (xi[1] * xi[1] * xi[1]) *
+//     b) / 2) + (b * b * b) * (xi[1])); double term_two = (k1 * ((a * a) * (xi[0] * xi[0] * xi[0] / 3) + a * b * xi[0]
+//     + (b * b * xi[0])) + k2 * ((a * a * a) * (xi[0] * xi[0] * xi[0] * xi[0]) / 4) + (a * a) * (xi[0] * xi[0]) * b +
+//     ((3 * a * (xi[0] * xi[0] * xi[0]) * b) / 2) + (b * b * b) * (xi[0]));
 
 //     *energy = term_one - term_two;
 //     *energy *= len;
@@ -1174,7 +1137,8 @@ void calc_stiffness_rev_fwd(double* coords, double del, double k1, double k2, in
 //     double dcoords[8] = {0.0};
 //     double E = 0.0;
 //     double dE = 1.0;
-//     __enzyme_autodiff<void>( analytical_integral, enzyme_dup, coords, dcoords, enzyme_const, del, enzyme_const, k1, enzyme_const, k2, enzyme_const, N, enzyme_const, len, enzyme_dup, &E, &dE);
+//     __enzyme_autodiff<void>( analytical_integral, enzyme_dup, coords, dcoords, enzyme_const, del, enzyme_const, k1,
+//     enzyme_const, k2, enzyme_const, N, enzyme_const, len, enzyme_dup, &E, &dE);
 
 //     for(int i = 0; i < 8; ++i) {
 //         dE_dX[i] = -dcoords[i];
@@ -1185,117 +1149,112 @@ void calc_stiffness_rev_fwd(double* coords, double del, double k1, double k2, in
 //     double dcoords[8] = {0.0};
 //     double E = 0.0;
 //     double dE = 1.0;
-//     __enzyme_autodiff<void>( compute_sym_energy, enzyme_dup, coords, dcoords, enzyme_const, del, enzyme_const, k1, enzyme_const, k2, enzyme_const, N, enzyme_const, len, enzyme_dup, &E, &dE);
+//     __enzyme_autodiff<void>( compute_sym_energy, enzyme_dup, coords, dcoords, enzyme_const, del, enzyme_const, k1,
+//     enzyme_const, k2, enzyme_const, N, enzyme_const, len, enzyme_dup, &E, &dE);
 
 //     for(int i = 0; i < 8; ++i) {
 //         dE_dX[i] = -dcoords[i];
 //     }
 // }
 
+#endif  // TRIBOL_USE_ENZYME
+
+int main()
+{
+  // int N;
+  // std::cout << "Enter N quadrature points: ";
+  // std::cin >> N;
+
+  // if(N !=3 && N != 4 && N != 5) {
+  //     std::cerr << "Error: not a valid number qaud pts" << std::endl;
+  // }
+
+  // std::vector<double> elem_A;
+  // std::vector<double> elem_B;
+
+  // std::cout << "Enter coordinates for element A:
+  // read_element_coords(N, elem_A);
+
+  // std::cout << "Eneter coordinates for element B: ";
+  // read_element_coords(N, elem_B);
+
+  // double A[4] = {0.0};
+  // double B[4] = {0.0};
 
+  // populate_C_arrays(A, elem_A);
+  // populate_C_arrays(B, elem_B);
 
+#ifdef TRIBOL_USE_ENZYME
 
+  int N = 3;
 
+  // double A0[2] = {A[0], A[1]};
+  // double A1[2] = {A[2], A[3]};
+  // double B0[2] = {B[0], B[1]};
+  // double B1[2] = {B[2], B[3]};
 
-int main() {
-    // int N;
-    // std::cout << "Enter N quadrature points: ";
-    // std::cin >> N;
-    
-    // if(N !=3 && N != 4 && N != 5) {
-    //     std::cerr << "Error: not a valid number qaud pts" << std::endl;
+  double A0_i[2] = { -0.3, -0.05 };
+  double A1_i[2] = { 0.0, -0.05 };
+  double B0[2] = { 1.0, 0.0 };
+  double B1[2] = { 0.1, 0.0 };
+  double del = 0.05;
+  double k1 = 100;
+  double k2 = 0.0;
+  for ( int i = 0; i < 140; ++i ) {
+    // std::cout << i << std::endl;
+    double energy = 0.0;
+    double energy2;
+    double shift = 0.01 * i;
+
+    // std::cout << i << std::endl;
+    // std::cout << "location: " << shift << std::endl;
+    double A0[2] = { A0_i[0] + shift, A0_i[1] };
+    double A1[2] = { A1_i[0] + shift, A1_i[1] };
+
+    // std::cout << "A0x: " << A0[0] << " A0y: " << A0[1] << std::endl;
+    // std::cout << "A1x: " << A1[0] << " A1y: " << A1[1] << std::endl;
+
+    double coords[8] = { A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1] };
+    double lenA = sqrt( ( A1[0] - A0[0] ) * ( A1[0] - A0[0] ) + ( A1[1] - A0[1] ) * ( A1[1] - A0[1] ) );
+
+    // double lenB = sqrt((B1[0] - B0[0]) * (B1[0] - B0[0]) + (B1[1] - B0[1]) * (B1[1] - B0[1]));
+    // analytical_integral(coords, del, k1, k2, N, len, &energy2);
+    // if (i == 410) {
+    // std::cout << "B0x: " << B0[0] << ' ' << "B1x: " << B1[0] << std::endl;
+    // }
+    // // compute_sym_energy(coords, k1, k2, del, N, len, &energy);
+    // compute_contact_energy(coords, del, k1, k2, N, lenA, lenB, &energy);
+
+    double dE_dX[8] = { 0.0 };
+    double projections[2] = { 0.0 };
+    double proj1[2];
+    double proj0[2];
+    // get_endpoint_projections(A0, A1, B0, B1, proj0, proj1);
+    // std::cout << "Proj 0: " << proj0[0] << ", " << proj0[1] << std::endl;
+    // std::cout << "Proj 1: " << proj1[0] << ", " << proj1[1] << std::endl;
+    get_projections( A0, A1, B0, B1, projections, del );
+    // std::cout << "Projections in Main: " << projections[0] << ", " << projections[1] << std::endl;
+    compute_contact_energy( coords, del, k1, k2, N, lenA, projections, &energy );
+    // calc_force_reverse_sym(coords, del, k1, k2, N, len, dE_dX);
+    calc_force_reverse( coords, del, k1, k2, N, lenA, projections, dE_dX );
+    //  calc_force_reverse_exact(coords, del, k1, k2, N, len, dE_dX);
+    // std::cout << '[';
+    // for(int j = 0; j < 8; ++j) {
+
+    for ( int j = 0; j < 8; ++j ) {
+      if ( j == 0 ) {
+        std::cout << dE_dX[j];
+
+      } else {
+        std::cout << "," << dE_dX[j];
+      }
+    }
+    std::cout << std::endl;
     // }
-     
-    // std::vector<double> elem_A;
-    // std::vector<double> elem_B;
-
-    // std::cout << "Enter coordinates for element A: 
-    // read_element_coords(N, elem_A);
-
-    // std::cout << "Eneter coordinates for element B: ";
-    // read_element_coords(N, elem_B);
-
-    // double A[4] = {0.0};
-    // double B[4] = {0.0};
-
-    // populate_C_arrays(A, elem_A);
-    // populate_C_arrays(B, elem_B);
-
-    int N = 3;
-
-    // double A0[2] = {A[0], A[1]};
-    // double A1[2] = {A[2], A[3]};
-    // double B0[2] = {B[0], B[1]};
-    // double B1[2] = {B[2], B[3]};
-
-    double A0_i[2] = {-0.3, -0.05};
-    double A1_i[2] = {0.0, -0.05};
-    double B0[2] = {1.0, 0.0};
-    double B1[2] = {0.1, 0.0};
-    double del = 0.05;
-    double k1 = 100;
-    double k2 = 0.0;
-    for(int i = 0; i < 140; ++i) {
-        // std::cout << i << std::endl;
-        double energy = 0.0;
-        double energy2;
-        double shift = 0.01 * i;
-        
-        // std::cout << i << std::endl;
-        // std::cout << "location: " << shift << std::endl;
-        double A0[2] = {A0_i[0] + shift, A0_i[1]};
-        double A1[2] = {A1_i[0] + shift, A1_i[1]};
-        
-        
-        // std::cout << "A0x: " << A0[0] << " A0y: " << A0[1] << std::endl; 
-        // std::cout << "A1x: " << A1[0] << " A1y: " << A1[1] << std::endl; 
-
-        double coords[8] = {A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1]};
-        double lenA = sqrt((A1[0] - A0[0]) * (A1[0] - A0[0]) + (A1[1] - A0[1]) * (A1[1] - A0[1]));
-        
-        // double lenB = sqrt((B1[0] - B0[0]) * (B1[0] - B0[0]) + (B1[1] - B0[1]) * (B1[1] - B0[1]));
-        // analytical_integral(coords, del, k1, k2, N, len, &energy2); 
-        // if (i == 410) {
-            // std::cout << "B0x: " << B0[0] << ' ' << "B1x: " << B1[0] << std::endl;
-        // }
-        // // compute_sym_energy(coords, k1, k2, del, N, len, &energy);
-        // compute_contact_energy(coords, del, k1, k2, N, lenA, lenB, &energy);
- 
-        double dE_dX[8] = {0.0};
-        double projections[2] = {0.0};
-        double proj1[2];
-        double proj0[2];
-        // get_endpoint_projections(A0, A1, B0, B1, proj0, proj1);
-        // std::cout << "Proj 0: " << proj0[0] << ", " << proj0[1] << std::endl;
-        // std::cout << "Proj 1: " << proj1[0] << ", " << proj1[1] << std::endl;
-        get_projections(A0, A1, B0, B1, projections, del);
-        // std::cout << "Projections in Main: " << projections[0] << ", " << projections[1] << std::endl;
-        compute_contact_energy(coords, del, k1, k2, N, lenA, projections, &energy);
-        // calc_force_reverse_sym(coords, del, k1, k2, N, len, dE_dX);
-        calc_force_reverse(coords, del, k1, k2, N, lenA, projections, dE_dX);
-        //  calc_force_reverse_exact(coords, del, k1, k2, N, len, dE_dX);
-        // std::cout << '[';
-        // for(int j = 0; j < 8; ++j) {
-       
-                    for(int j = 0; j < 8; ++j) {
-                      
-                        if (j == 0) {
-                            
-                            std::cout << dE_dX[j];
-        
-                        }
-                        else{
-
-            std::cout << "," << dE_dX[j];
-            
-                        }
-        }
-        std::cout << std::endl;
-        // }
-        
-// // //         std::cout << ']' << std::endl;
+
+    // // //         std::cout << ']' << std::endl;
     //    std::cout << i * 0.01 << ',' << energy << std::endl;
-       double dE_dXrev[8] = {0.0};
+    double dE_dXrev[8] = { 0.0 };
     //    calc_force_reverse(coords, del, k1, k2, N, len,dE_dXrev);
     //    std::cout << "[";
     //    for (int j = 0; j < 8; ++j) {
@@ -1316,49 +1275,47 @@ int main() {
     //     std::cout << ", " << d2E_d2X[j];
     // }
     // std::cout << "]" << std::endl;
-//      double d2E_d2XFD[64] = {0.0};
+    //      double d2E_d2XFD[64] = {0.0};
     // calc_stiffness_FD(coords, del, k1, k2, lenA, lenB, N, d2E_d2XFD);
-//     for (int i = 0; i < 16; ++i) {
-//     // Create unit vector e_i
-//     double v[16] = {0.0};
-//     v[i] = 1.0;
-
-//     // Multiply: result = K * v
-//     double result[16] = {0.0};
-//     for (int row = 0; row < 16; ++row) {
-//         for (int col = 0; col < 16; ++col){
-//             result[row] += d2E_d2XFD[16 * row + col] * v[col];
-//             if (std::abs(result[row]) < 1e-10) {
-//                 result[row] = 0.0;
-//             }
-//         }
-//     }
-
-//     std::cout << "Column " << i << ": [";
-//     for (int j = 0; j < 16; ++j) {
-//         std::cout << result[j];
-//         if (j < 15) std::cout << ", ";
-//     }
-//     std::cout << "]" << std::endl;
-// }
-
-// const int N = 8;
-// int k = 5; // The DOF (column) you want
+    //     for (int i = 0; i < 16; ++i) {
+    //     // Create unit vector e_i
+    //     double v[16] = {0.0};
+    //     v[i] = 1.0;
+
+    //     // Multiply: result = K * v
+    //     double result[16] = {0.0};
+    //     for (int row = 0; row < 16; ++row) {
+    //         for (int col = 0; col < 16; ++col){
+    //             result[row] += d2E_d2XFD[16 * row + col] * v[col];
+    //             if (std::abs(result[row]) < 1e-10) {
+    //                 result[row] = 0.0;
+    //             }
+    //         }
+    //     }
+
+    //     std::cout << "Column " << i << ": [";
+    //     for (int j = 0; j < 16; ++j) {
+    //         std::cout << result[j];
+    //         if (j < 15) std::cout << ", ";
+    //     }
+    //     std::cout << "]" << std::endl;
+    // }
 
-// double result[N] = {0.0};
-// for (int j = 0; j < N; ++j) {
-//     result[j] = d2E_d2XFD[N * j + k];
-//     // This grabs the k-th column (since your matrix is row-major)
-//     // If you want the k-th row, swap indices
-// }
+    // const int N = 8;
+    // int k = 5; // The DOF (column) you want
 
-// // Print result to compare with J_exact
-// for (int j = 0; j < N; ++j) {
-//     printf("J exact: %.17g\n", result[j]);
-// }
+    // double result[N] = {0.0};
+    // for (int j = 0; j < N; ++j) {
+    //     result[j] = d2E_d2XFD[N * j + k];
+    //     // This grabs the k-th column (since your matrix is row-major)
+    //     // If you want the k-th row, swap indices
+    // }
 
+    // // Print result to compare with J_exact
+    // for (int j = 0; j < N; ++j) {
+    //     printf("J exact: %.17g\n", result[j]);
+    // }
 
-   
     //     double d2E_d2XFD[64] = {0.0};
     // calc_stiffness_FD(coords, del, k1, k2, lenA, lenB, N, d2E_d2XFD);
     // std::cout << "FD: [";
@@ -1367,27 +1324,28 @@ int main() {
     // }
     // std::cout << "]" << std::endl;
 
-//             double d2E_d2Xrevrev[64] = {0.0};
-//     calc_stiffness_rev_rev(coords, del, k1, k2, lenA, lenB, N, d2E_d2Xrevrev);
-//     std::cout << "Rev rev: [";
-//     for (int j = 0; j < 64; ++j) {
-//         std::cout << ", " << d2E_d2Xrevrev[j];
-//     }
-//     std::cout << "]" << std::endl;
+    //             double d2E_d2Xrevrev[64] = {0.0};
+    //     calc_stiffness_rev_rev(coords, del, k1, k2, lenA, lenB, N, d2E_d2Xrevrev);
+    //     std::cout << "Rev rev: [";
+    //     for (int j = 0; j < 64; ++j) {
+    //         std::cout << ", " << d2E_d2Xrevrev[j];
+    //     }
+    //     std::cout << "]" << std::endl;
 
-//     std::cout << "Difference rev fwd - FD: [";
-// for (int j = 0; j < 64; ++j) {
-//     std::cout << ", " << (d2E_d2X[j] - d2E_d2XFD[j]);
-// }
-// std::cout << "]" << std::endl;
+    //     std::cout << "Difference rev fwd - FD: [";
+    // for (int j = 0; j < 64; ++j) {
+    //     std::cout << ", " << (d2E_d2X[j] - d2E_d2XFD[j]);
+    // }
+    // std::cout << "]" << std::endl;
 
-// std::cout << "Difference rev rev - FD: [";
-// for (int j = 0; j < 64; ++j) {
-//     std::cout << ", " << (d2E_d2Xrevrev[j] - d2E_d2XFD[j]);
-// }
-// std::cout << "]" << std::endl;
+    // std::cout << "Difference rev rev - FD: [";
+    // for (int j = 0; j < 64; ++j) {
+    //     std::cout << ", " << (d2E_d2Xrevrev[j] - d2E_d2XFD[j]);
+    // }
+    // std::cout << "]" << std::endl;
 
     // double energy = compute_contact_energy(A0, A1, B0, B1, del, k1, k2, N);
     // std::cout << "Energy: " << energy << std::endl;
-}
+  }
+#endif  // TRIBOL_USE_ENZYME
 }
\ No newline at end of file

From 3e9782660a84be8eef943be1736d685a296cd2a2 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Wed, 11 Feb 2026 17:11:01 -0800
Subject: [PATCH 28/56] add more TRIBOL_USE_ENZYME guards; new API functions

---
 src/tests/CMakeLists.txt                      |  2 +-
 src/tests/tribol_mfem_mortar_energy.cpp       |  4 +-
 src/tribol/interface/mfem_tribol.cpp          | 47 +++++++++++++------
 src/tribol/interface/mfem_tribol.hpp          |  6 +++
 .../physics/ContactFormulationFactory.cpp     |  4 +-
 src/tribol/physics/NewMethodAdapter.cpp       | 14 ++----
 src/tribol/physics/NewMethodAdapter.hpp       |  4 ++
 src/tribol/physics/new_method.cpp             |  4 ++
 src/tribol/physics/new_method.hpp             |  6 +++
 9 files changed, 62 insertions(+), 29 deletions(-)

diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index f6de0d55..d7ab0f17 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -149,7 +149,6 @@ if ( BUILD_REDECOMP AND TRIBOL_USE_MPI )
   set( combined_tests
       tribol_mfem_common_plane.cpp
       tribol_mfem_mortar_lm.cpp
-      tribol_mfem_mortar_energy.cpp
       tribol_proximity_check.cpp
       tribol_redecomp_tol.cpp
       )
@@ -198,6 +197,7 @@ if( TRIBOL_USE_ENZYME )
       tribol_enzyme_nodal_normal.cpp
       tribol_enzyme_mortar_assembled.cpp
       tribol_enzyme_poly_intersect.cpp
+      tribol_mfem_mortar_energy.cpp
       )
 
   set(combined_test_depends tribol gtest)
diff --git a/src/tests/tribol_mfem_mortar_energy.cpp b/src/tests/tribol_mfem_mortar_energy.cpp
index 0caabe10..7bd21547 100644
--- a/src/tests/tribol_mfem_mortar_energy.cpp
+++ b/src/tests/tribol_mfem_mortar_energy.cpp
@@ -148,9 +148,7 @@ class MfemMortarEnergyTest : public testing::TestWithParam<std::tuple<int>> {
     auto A_cont = tribol::getMfemDfDx( coupling_scheme_id );
 
     // retrieve contact force (response)
-    mfem::Vector f_contact( par_fe_space.GetTrueVSize() );
-    f_contact = 0.0;
-    tribol::getMfemResponse( coupling_scheme_id, f_contact );
+    auto f_contact = tribol::getMfemTDofForce( coupling_scheme_id );
     f_contact.Neg();
     for ( int i{ 0 }; i < ess_tdof_list.Size(); ++i ) {
       f_contact( ess_tdof_list[i] ) = 0.0;
diff --git a/src/tribol/interface/mfem_tribol.cpp b/src/tribol/interface/mfem_tribol.cpp
index 31194c13..e741a83e 100644
--- a/src/tribol/interface/mfem_tribol.cpp
+++ b/src/tribol/interface/mfem_tribol.cpp
@@ -307,17 +307,23 @@ void getMfemResponse( IndexT cs_id, mfem::Vector& r )
                               "to create a coupling scheme with this cs_id.",
                               cs_id ) );
 
-  if ( cs->hasContactFormulation() ) {
-    cs->getContactFormulation()->getMfemForce( r );
-    return;
-  }
-
   SLIC_ERROR_ROOT_IF( !cs->hasMfemData(),
                       "Coupling scheme does not contain MFEM data. "
                       "Create the coupling scheme using registerMfemCouplingScheme() to return a response vector." );
   cs->getMfemMeshData()->GetParentResponse( r );
 }
 
+mfem::HypreParVector getMfemTDofForce( IndexT cs_id )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
+  SLIC_ERROR_ROOT_IF( !cs->hasContactFormulation(), "Coupling scheme does not contain a contact formulation." );
+  return cs->getContactFormulation()->getMfemForce();
+}
+
 std::unique_ptr<mfem::BlockOperator> getMfemBlockJacobian( IndexT cs_id )
 {
   CouplingScheme* cs = CouplingSchemeManager::getInstance().findData( cs_id );
@@ -444,11 +450,6 @@ void getMfemGap( IndexT cs_id, mfem::Vector& g )
                               "to create a coupling scheme with this cs_id.",
                               cs_id ) );
 
-  if ( cs->hasContactFormulation() ) {
-    cs->getContactFormulation()->getMfemGap( g );
-    return;
-  }
-
   SLIC_ERROR_ROOT_IF( !cs->hasMfemSubmeshData(),
                       axom::fmt::format( "Coupling scheme cs_id={0} does not contain MFEM pressure field data. "
                                          "Create the coupling scheme using registerMfemCouplingScheme() and set the "
@@ -457,17 +458,24 @@ void getMfemGap( IndexT cs_id, mfem::Vector& g )
   cs->getMfemSubmeshData()->GetSubmeshGap( g );
 }
 
-mfem::ParGridFunction& getMfemPressure( IndexT cs_id )
+mfem::HypreParVector getMfemTDofGap( IndexT cs_id )
 {
   auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
   SLIC_ERROR_ROOT_IF(
       !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
                               "to create a coupling scheme with this cs_id.",
                               cs_id ) );
+  SLIC_ERROR_ROOT_IF( !cs->hasContactFormulation(), "Coupling scheme does not contain a contact formulation." );
+  return cs->getContactFormulation()->getMfemGap();
+}
 
-  if ( cs->hasContactFormulation() ) {
-    return cs->getContactFormulation()->getMfemPressure();
-  }
+mfem::ParGridFunction& getMfemPressure( IndexT cs_id )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
 
   SLIC_ERROR_ROOT_IF( !cs->hasMfemSubmeshData(),
                       axom::fmt::format( "Coupling scheme cs_id={0} does not contain MFEM pressure field data. "
@@ -477,6 +485,17 @@ mfem::ParGridFunction& getMfemPressure( IndexT cs_id )
   return cs->getMfemSubmeshData()->GetSubmeshPressure();
 }
 
+mfem::HypreParVector getMfemTDofPressure( IndexT cs_id )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
+  SLIC_ERROR_ROOT_IF( !cs->hasContactFormulation(), "Coupling scheme does not contain a contact formulation." );
+  return cs->getContactFormulation()->getMfemPressure();
+}
+
 void updateMfemParallelDecomposition( int n_ranks, bool force_new_redecomp )
 {
   for ( auto& cs_pair : CouplingSchemeManager::getInstance() ) {
diff --git a/src/tribol/interface/mfem_tribol.hpp b/src/tribol/interface/mfem_tribol.hpp
index 53a144af..13bf07cb 100644
--- a/src/tribol/interface/mfem_tribol.hpp
+++ b/src/tribol/interface/mfem_tribol.hpp
@@ -240,6 +240,8 @@ void registerMfemReferenceCoords( IndexT cs_id, const mfem::ParGridFunction& ref
  */
 void getMfemResponse( IndexT cs_id, mfem::Vector& r );
 
+mfem::HypreParVector getMfemTDofForce( IndexT cs_id );
+
 /**
  * @brief Get assembled contact contributions for the Jacobian matrix
  *
@@ -307,6 +309,8 @@ std::unique_ptr<mfem::HypreParMatrix> getMfemDgDx( IndexT cs_id );
  */
 void getMfemGap( IndexT cs_id, mfem::Vector& g );
 
+mfem::HypreParVector getMfemTDofGap( IndexT cs_id );
+
 /**
  * @brief Returns reference to nodal pressure vector on the submesh surface
  *
@@ -319,6 +323,8 @@ void getMfemGap( IndexT cs_id, mfem::Vector& g );
  */
 mfem::ParGridFunction& getMfemPressure( IndexT cs_id );
 
+mfem::HypreParVector getMfemTDofPressure( IndexT cs_id );
+
 /**
  * @brief Updates mesh parallel decomposition and related grid functions/Jacobian when coordinates are updated
  *
diff --git a/src/tribol/physics/ContactFormulationFactory.cpp b/src/tribol/physics/ContactFormulationFactory.cpp
index 23a91125..46bf4470 100644
--- a/src/tribol/physics/ContactFormulationFactory.cpp
+++ b/src/tribol/physics/ContactFormulationFactory.cpp
@@ -22,7 +22,7 @@ std::unique_ptr<ContactFormulation> createContactFormulation( CouplingScheme* cs
     double delta = 0.01;
     int N = 3;
 
-#ifdef BUILD_REDECOMP
+#if defined( TRIBOL_USE_ENZYME ) && defined( BUILD_REDECOMP )
     if ( cs->hasMfemData() ) {
       // Attempt to get penalty from MfemMeshData if available
       auto* k_ptr = cs->getMfemMeshData()->GetMesh1KinematicConstantPenalty();
@@ -37,7 +37,7 @@ std::unique_ptr<ContactFormulation> createContactFormulation( CouplingScheme* cs
     return std::make_unique<NewMethodAdapter>( *cs->getMfemSubmeshData(), *cs->getMfemJacobianData(), cs->getMesh1(),
                                                cs->getMesh2(), k, delta, N );
 #else
-    SLIC_ERROR_ROOT( "ENERGY_MORTAR requires BUILD_REDECOMP" );
+    SLIC_ERROR_ROOT( "ENERGY_MORTAR requires Enzyme and redecomp to be built." );
     return nullptr;
 #endif
   }
diff --git a/src/tribol/physics/NewMethodAdapter.cpp b/src/tribol/physics/NewMethodAdapter.cpp
index e1415b1b..9da64231 100644
--- a/src/tribol/physics/NewMethodAdapter.cpp
+++ b/src/tribol/physics/NewMethodAdapter.cpp
@@ -7,6 +7,8 @@
 
 namespace tribol {
 
+#ifdef TRIBOL_USE_ENZYME
+
 NewMethodAdapter::NewMethodAdapter( MfemSubmeshData& submesh_data, MfemJacobianData& jac_data, MeshData& mesh1,
                                     MeshData& mesh2, double k, double delta, int N )
     // NOTE: mesh1 maps to mesh2_ and mesh2 maps to mesh1_. This is to keep consistent with mesh1_ being non-mortar and
@@ -167,18 +169,10 @@ void NewMethodAdapter::updateNodalForces()
 
   auto k_over_a = params_.k * A_vec_.inverse( area_tol_ );
 
-  // mfem::HypreParVector k_over_a( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
-  // k_over_a = 0.0;
-  // for ( int i{ 0 }; i < k_over_a.Size(); ++i ) {
-  //   if ( A_vec_[i] > 1.0e-14 ) {
-  //     k_over_a[i] = params_.k / A_vec_[i];
-  //   }
-  // }
-
   auto p_over_a = pressure_vec_.divide( A_vec_, area_tol_ );
 
   shared::ParSparseMat dp_dx( dg_tilde_dx_.get() );
-  dp_dx->ScaleRows( k_over_a );
+  dp_dx->ScaleRows( k_over_a.get() );
   shared::ParSparseMat dp_dx_temp( dA_dx_.get() );
   dp_dx_temp->ScaleRows( p_over_a.get() );
   dp_dx -= dp_dx_temp;
@@ -340,4 +334,6 @@ std::unique_ptr<mfem::HypreParMatrix> NewMethodAdapter::getMfemDfDp() const
   return nullptr;
 }
 
+#endif  // TRIBOL_USE_ENZYME
+
 }  // namespace tribol
diff --git a/src/tribol/physics/NewMethodAdapter.hpp b/src/tribol/physics/NewMethodAdapter.hpp
index d2bbb1ab..2e3e0960 100644
--- a/src/tribol/physics/NewMethodAdapter.hpp
+++ b/src/tribol/physics/NewMethodAdapter.hpp
@@ -19,6 +19,8 @@
 
 namespace tribol {
 
+#ifdef TRIBOL_USE_ENZYME
+
 class NewMethodAdapter : public ContactFormulation {
  public:
   /**
@@ -90,6 +92,8 @@ class NewMethodAdapter : public ContactFormulation {
   mutable shared::ParSparseMat df_dx_;
 };
 
+#endif  // TRIBOL_USE_ENZYME
+
 }  // namespace tribol
 
 #endif /* SRC_TRIBOL_PHYSICS_NEWMETHODADAPTER_HPP_ */
diff --git a/src/tribol/physics/new_method.cpp b/src/tribol/physics/new_method.cpp
index 45c0f2f0..4943e434 100644
--- a/src/tribol/physics/new_method.cpp
+++ b/src/tribol/physics/new_method.cpp
@@ -16,6 +16,8 @@
 
 namespace tribol {
 
+#ifdef TRIBOL_USE_ENZYME
+
 namespace {
 
 struct Gparams {
@@ -1345,4 +1347,6 @@ void ContactEvaluator::grad_gtilde_with_qp( const InterfacePair& pair, const Mes
 //     std::cout << std::string(120, '=') << "\n\n";
 // }
 
+#endif  // TRIBOL_USE_ENZYME
+
 }  // namespace tribol
diff --git a/src/tribol/physics/new_method.hpp b/src/tribol/physics/new_method.hpp
index af11061a..522ef51c 100644
--- a/src/tribol/physics/new_method.hpp
+++ b/src/tribol/physics/new_method.hpp
@@ -2,11 +2,15 @@
 #include <vector>
 #include <array>
 
+#include "tribol/config.hpp"
+
 #include "tribol/mesh/InterfacePairs.hpp"
 #include "tribol/mesh/MeshData.hpp"
 
 namespace tribol {
 
+#ifdef TRIBOL_USE_ENZYME
+
 struct Node {
   double x, y;
   int id;
@@ -133,4 +137,6 @@ class ContactEvaluator {
   std::array<double, 2> compute_pressures( const NodalContactData& ncd ) const;
 };
 
+#endif  // TRIBOL_USE_ENZYME
+
 }  // namespace tribol

From cc616867d25506dbca899f89f8cc771201f9fc4e Mon Sep 17 00:00:00 2001
From: EB Chin <chin23@llnl.gov>
Date: Wed, 11 Feb 2026 18:14:37 -0800
Subject: [PATCH 29/56] fix new vs free issues

---
 src/shared/math/ParSparseMat.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/shared/math/ParSparseMat.cpp b/src/shared/math/ParSparseMat.cpp
index d754a590..3d479897 100644
--- a/src/shared/math/ParSparseMat.cpp
+++ b/src/shared/math/ParSparseMat.cpp
@@ -122,6 +122,10 @@ ParSparseMat::ParSparseMat( MPI_Comm comm, HYPRE_BigInt glob_size, HYPRE_BigInt*
   diag.GetMemoryI().ClearOwnerFlags();
   diag.GetMemoryJ().ClearOwnerFlags();
   diag.GetMemoryData().ClearOwnerFlags();
+  // The mfem::Memory in mfem::SparseMatrix allocates using operator new [], so mark the diag memory as owned by MFEM so
+  // it can be deleted correctly
+  constexpr int mfem_owned_host_flag = 3;
+  owned_mat_->SetOwnerFlags( mfem_owned_host_flag, owned_mat_->OwnsOffd(), owned_mat_->OwnsColMap() );
 }
 
 ParSparseMat::ParSparseMat( ParSparseMat&& other ) noexcept
@@ -235,6 +239,9 @@ ParSparseMat ParSparseMat::diagonalMatrix( MPI_Comm comm, HYPRE_BigInt global_si
       offd_i, offd_j, offd_data, 0, offd_col_map, true ) );
   diag_hpm->CopyRowStarts();
   diag_hpm->CopyColStarts();
+  // We allocated memory using operator new [], so mark all memory as owned by MFEM so it can be deleted correctly
+  constexpr int mfem_owned_host_flag = 3;
+  diag_hpm->SetOwnerFlags( mfem_owned_host_flag, mfem_owned_host_flag, mfem_owned_host_flag );
   return ParSparseMat( std::move( diag_hpm ) );
 }
 

From f7d078f807b44b3e2eec84c94d3ee186ff6e0049 Mon Sep 17 00:00:00 2001
From: EB Chin <chin23@llnl.gov>
Date: Wed, 11 Feb 2026 18:15:03 -0800
Subject: [PATCH 30/56] fix ambiguous operator

---
 src/tribol/physics/ContactFormulationFactory.hpp |  2 +-
 src/tribol/physics/NewMethodAdapter.cpp          | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/tribol/physics/ContactFormulationFactory.hpp b/src/tribol/physics/ContactFormulationFactory.hpp
index 7c6dcff8..1ad6233d 100644
--- a/src/tribol/physics/ContactFormulationFactory.hpp
+++ b/src/tribol/physics/ContactFormulationFactory.hpp
@@ -16,7 +16,7 @@ class CouplingScheme;
 
 /**
  * @brief Factory function to create a ContactFormulation based on the CouplingScheme settings.
- * 
+ *
  * @param cs Pointer to the CouplingScheme
  * @return std::unique_ptr<ContactFormulation> The created formulation, or nullptr if no formulation applies.
  */
diff --git a/src/tribol/physics/NewMethodAdapter.cpp b/src/tribol/physics/NewMethodAdapter.cpp
index 9da64231..c385f8e2 100644
--- a/src/tribol/physics/NewMethodAdapter.cpp
+++ b/src/tribol/physics/NewMethodAdapter.cpp
@@ -75,11 +75,11 @@ void NewMethodAdapter::updateNodalGaps()
     auto A_conn = mesh1_view.getConnectivity()( elem1 );
 
     // Add to nodes of Element A
-    redecomp_gap[A_conn[0]] += g_tilde_elem[0];
-    redecomp_gap[A_conn[1]] += g_tilde_elem[1];
+    redecomp_gap( A_conn[0] ) += g_tilde_elem[0];
+    redecomp_gap( A_conn[1] ) += g_tilde_elem[1];
 
-    redecomp_area[A_conn[0]] += A_elem[0];
-    redecomp_area[A_conn[1]] += A_elem[1];
+    redecomp_area( A_conn[0] ) += A_elem[0];
+    redecomp_area( A_conn[1] ) += A_elem[1];
 
     // compute g_tilde first derivative
     double dg_dx_node1[8];
@@ -213,15 +213,15 @@ void NewMethodAdapter::updateNodalForces()
     const auto node12 = mesh1_view.getConnectivity()( elem1, 1 );
     const auto elem2 = static_cast<int>( flipped_pair.m_element_id2 );
 
-    const RealT pressure1 = 2.0 * redecomp_pressure[node11];
-    const RealT pressure2 = 2.0 * redecomp_pressure[node12];
+    const RealT pressure1 = 2.0 * redecomp_pressure( node11 );
+    const RealT pressure2 = 2.0 * redecomp_pressure( node12 );
 
     if ( pressure1 == 0.0 && pressure2 == 0.0 ) {
       continue;
     }
 
-    const RealT g_p_ainv1 = -redecomp_g_tilde[node11] * redecomp_pressure[node11] / redecomp_A[node11];
-    const RealT g_p_ainv2 = -redecomp_g_tilde[node12] * redecomp_pressure[node12] / redecomp_A[node12];
+    const RealT g_p_ainv1 = -redecomp_g_tilde( node11 ) * redecomp_pressure( node11 ) / redecomp_A( node11 );
+    const RealT g_p_ainv2 = -redecomp_g_tilde( node12 ) * redecomp_pressure( node12 ) / redecomp_A( node12 );
 
     double df_dx_node1[64];
     double df_dx_node2[64];

From f9875e2e938d46c1603032fa5b398214959cc1c9 Mon Sep 17 00:00:00 2001
From: EB Chin <chin23@llnl.gov>
Date: Thu, 12 Feb 2026 01:10:55 -0800
Subject: [PATCH 31/56] bugfixes

---
 src/redecomp/transfer/MatrixTransfer.cpp |  3 ---
 src/shared/math/ParSparseMat.cpp         |  4 ++--
 src/tests/tribol_enforcement_options.cpp | 24 ++++++++++++------------
 src/tests/tribol_mfem_mortar_energy.cpp  |  2 +-
 src/tribol/mesh/MfemData.cpp             |  2 +-
 5 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/src/redecomp/transfer/MatrixTransfer.cpp b/src/redecomp/transfer/MatrixTransfer.cpp
index 3e764392..e9a05c2d 100644
--- a/src/redecomp/transfer/MatrixTransfer.cpp
+++ b/src/redecomp/transfer/MatrixTransfer.cpp
@@ -165,9 +165,6 @@ shared::ParSparseMat MatrixTransfer::ConvertToParSparseMat( mfem::SparseMatrix&&
   shared::ParSparseMat J_full( getMPIUtility().MPIComm(), parent_test_fes_.GetVSize(), parent_test_fes_.GlobalVSize(),
                                parent_trial_fes_.GlobalVSize(), sparse.GetI(), J_bigint.GetData(), sparse.GetData(),
                                parent_test_fes_.GetDofOffsets(), parent_trial_fes_.GetDofOffsets() );
-  sparse.GetMemoryI().ClearOwnerFlags();
-  sparse.GetMemoryJ().ClearOwnerFlags();
-  sparse.GetMemoryData().ClearOwnerFlags();
   if ( !parallel_assemble ) {
     return J_full;
   } else {
diff --git a/src/shared/math/ParSparseMat.cpp b/src/shared/math/ParSparseMat.cpp
index 3d479897..7318af82 100644
--- a/src/shared/math/ParSparseMat.cpp
+++ b/src/shared/math/ParSparseMat.cpp
@@ -37,7 +37,7 @@ ParSparseMat operator*( const ParSparseMatView& lhs, const ParSparseMatView& rhs
 
 ParVector ParSparseMatView::operator*( const ParVectorView& x ) const
 {
-  ParVector y( *mat_ );
+  ParVector y( *mat_, 1 );
   invokeHypreMethod<MemorySpace::Host>(
       [&]() { mat_->Mult( const_cast<mfem::HypreParVector&>( x.get() ), y.get() ); } );
   return y;
@@ -92,7 +92,7 @@ ParSparseMat operator*( double s, const ParSparseMatView& mat ) { return mat * s
 
 ParVector operator*( const ParVectorView& x, const ParSparseMatView& mat )
 {
-  ParVector y( *mat.mat_, 1 );
+  ParVector y( *mat.mat_, 0 );
   ParSparseMatView::invokeHypreMethod<MemorySpace::Host>(
       [&]() { mat.mat_->MultTranspose( const_cast<mfem::HypreParVector&>( x.get() ), y.get() ); } );
   return y;
diff --git a/src/tests/tribol_enforcement_options.cpp b/src/tests/tribol_enforcement_options.cpp
index cc2b8e80..12113bc3 100644
--- a/src/tests/tribol_enforcement_options.cpp
+++ b/src/tests/tribol_enforcement_options.cpp
@@ -195,10 +195,10 @@ TEST_F( EnforcementOptionsTest, penalty_kinematic_element_error )
 
   tribol::finalize();
 
-  delete bulk_modulus_1;
-  delete bulk_modulus_2;
-  delete element_thickness_1;
-  delete element_thickness_2;
+  delete[] bulk_modulus_1;
+  delete[] bulk_modulus_2;
+  delete[] element_thickness_1;
+  delete[] element_thickness_2;
   delete mesh;
 }
 
@@ -355,10 +355,10 @@ TEST_F( EnforcementOptionsTest, penalty_kinematic_element_pass )
 
   tribol::finalize();
 
-  delete bulk_modulus_1;
-  delete bulk_modulus_2;
-  delete element_thickness_1;
-  delete element_thickness_2;
+  delete[] bulk_modulus_1;
+  delete[] bulk_modulus_2;
+  delete[] element_thickness_1;
+  delete[] element_thickness_2;
   delete mesh;
 }
 
@@ -394,10 +394,10 @@ TEST_F( EnforcementOptionsTest, penalty_kinematic_element_invalid_element_input
 
   tribol::finalize();
 
-  delete bulk_modulus_1;
-  delete bulk_modulus_2;
-  delete element_thickness_1;
-  delete element_thickness_2;
+  delete[] bulk_modulus_1;
+  delete[] bulk_modulus_2;
+  delete[] element_thickness_1;
+  delete[] element_thickness_2;
   delete mesh;
 }
 
diff --git a/src/tests/tribol_mfem_mortar_energy.cpp b/src/tests/tribol_mfem_mortar_energy.cpp
index 7bd21547..2f1a024a 100644
--- a/src/tests/tribol_mfem_mortar_energy.cpp
+++ b/src/tests/tribol_mfem_mortar_energy.cpp
@@ -156,7 +156,7 @@ class MfemMortarEnergyTest : public testing::TestWithParam<std::tuple<int>> {
 
     // Add contact stiffness to elasticity stiffness
     auto A_total = std::unique_ptr<mfem::HypreParMatrix>( mfem::Add( 1.0, *A, 1.0, *A_cont ) );
-    A_total->EliminateRowsCols( ess_tdof_list );
+    auto A_elim = std::unique_ptr<mfem::HypreParMatrix>( A_total->EliminateRowsCols( ess_tdof_list ) );
 
     // Solve for X (displacement)
     mfem::Vector X( par_fe_space.GetTrueVSize() );
diff --git a/src/tribol/mesh/MfemData.cpp b/src/tribol/mesh/MfemData.cpp
index 2b0c47dd..36a2775e 100644
--- a/src/tribol/mesh/MfemData.cpp
+++ b/src/tribol/mesh/MfemData.cpp
@@ -1002,7 +1002,7 @@ std::unique_ptr<mfem::BlockOperator> MfemJacobianData::GetMfemBlockJacobian(
       // Pick xfer again for conversion
       redecomp::MatrixTransfer* xfer = GetUpdateData().submesh_redecomp_xfer_( r_blk, c_blk ).get();
 
-      auto submesh_J_hypre = xfer->ConvertToParSparseMat( std::move( *submesh_J.release() ), false );
+      auto submesh_J_hypre = xfer->ConvertToParSparseMat( std::move( *submesh_J ), false );
 
       mfem::HypreParMatrix* block_mat = nullptr;
 

From ed4a366232cb131da79e1bac4961ca448bd80707 Mon Sep 17 00:00:00 2001
From: Ryan Lutz <lutz23@llnl.gov>
Date: Wed, 4 Feb 2026 15:29:57 -0800
Subject: [PATCH 32/56] changes

---
 src/tests/CMakeLists.txt         |  1 +
 src/tribol/common/BasicTypes.hpp | 74 ++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+)

diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index d7ab0f17..5b8ac893 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -198,6 +198,7 @@ if( TRIBOL_USE_ENZYME )
       tribol_enzyme_mortar_assembled.cpp
       tribol_enzyme_poly_intersect.cpp
       tribol_mfem_mortar_energy.cpp
+      new_mortar_deriv.cpp
       )
 
   set(combined_test_depends tribol gtest)
diff --git a/src/tribol/common/BasicTypes.hpp b/src/tribol/common/BasicTypes.hpp
index 7947a991..51c0d10d 100644
--- a/src/tribol/common/BasicTypes.hpp
+++ b/src/tribol/common/BasicTypes.hpp
@@ -14,6 +14,80 @@ using CommT = shared::CommT;
 using IndexT = shared::IndexT;
 using SizeT = shared::SizeT;
 using RealT = shared::RealT;
+#ifdef TRIBOL_USE_MPI
+
+using CommT = MPI_Comm;
+#define TRIBOL_COMM_WORLD MPI_COMM_WORLD
+#define TRIBOL_COMM_NULL MPI_COMM_NULL
+
+#else
+
+using CommT = int;
+#define TRIBOL_COMM_WORLD 0
+#define TRIBOL_COMM_NULL -1
+
+#endif
+
+// match index type used in axom (since data is held in axom data structures)
+using IndexT = axom::IndexType;
+
+// size type matching size of addressable memory
+using SizeT = size_t;
+
+#ifdef TRIBOL_USE_SINGLE_PRECISION
+
+#error "Tribol does not support single precision."
+using RealT = float;
+
+#else
+
+using RealT = double;
+
+#endif
+
+// mfem's real_t should match ours
+static_assert( std::is_same<RealT, mfem::real_t>::value, "tribol::RealT and mfem::real_t are required to match" );
+
+#define TRIBOL_UNUSED_VAR AXOM_UNUSED_VAR
+#define TRIBOL_UNUSED_PARAM AXOM_UNUSED_PARAM
+
+// Execution space specifiers
+#if defined( TRIBOL_USE_CUDA ) || defined( TRIBOL_USE_HIP )
+#ifndef __device__
+#error "TRIBOL_USE_CUDA or TRIBOL_USE_HIP but __device__ is undefined.  Check include files"
+#endif
+#define TRIBOL_DEVICE __device__
+#define TRIBOL_HOST_DEVICE __host__ __device__
+#else
+#define TRIBOL_DEVICE
+#define TRIBOL_HOST_DEVICE
+#endif
+
+// Execution space identifier for defaulted constructors and destructors
+#ifdef TRIBOL_USE_HIP
+#define TRIBOL_DEFAULT_DEVICE __device__
+#define TRIBOL_DEFAULT_HOST_DEVICE __host__ __device__
+#else
+#define TRIBOL_DEFAULT_DEVICE
+#define TRIBOL_DEFAULT_HOST_DEVICE
+#endif
+
+// Defined when Tribol doesn't have a device available
+#if !( defined( TRIBOL_USE_CUDA ) || defined( TRIBOL_USE_HIP ) )
+#define TRIBOL_USE_HOST
+#endif
+
+// Define variable when in device code
+#if defined( __CUDA_ARCH__ ) || defined( __HIP_DEVICE_COMPILE__ )
+#define TRIBOL_DEVICE_CODE
+#endif
+
+// Ignore host code in __host__ __device__ code warning on NVCC
+#ifdef TRIBOL_USE_CUDA
+#define TRIBOL_NVCC_EXEC_CHECK_DISABLE #pragma nv_exec_check_disable
+#else
+#define TRIBOL_NVCC_EXEC_CHECK_DISABLE
+#endif
 
 }  // namespace tribol
 

From 995c8539a84603ee8e709b3f8b7a538d78aebeb2 Mon Sep 17 00:00:00 2001
From: Ryan Lutz <lutz23@llnl.gov>
Date: Thu, 12 Feb 2026 08:01:40 -0800
Subject: [PATCH 33/56] Patch test added

---
 src/tests/CMakeLists.txt              |   1 +
 src/tests/tribol_new_energy_patch.cpp | 377 ++++++++++++++++++++++++++
 src/tribol/physics/new_method.cpp     | 282 +++++++++++++------
 src/tribol/physics/new_method.hpp     |   4 +-
 4 files changed, 573 insertions(+), 91 deletions(-)
 create mode 100644 src/tests/tribol_new_energy_patch.cpp

diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 5b8ac893..a62e6990 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -151,6 +151,7 @@ if ( BUILD_REDECOMP AND TRIBOL_USE_MPI )
       tribol_mfem_mortar_lm.cpp
       tribol_proximity_check.cpp
       tribol_redecomp_tol.cpp
+      tribol_new_energy_patch.cpp
       )
 
   set(combined_test_depends tribol gtest)
diff --git a/src/tests/tribol_new_energy_patch.cpp b/src/tests/tribol_new_energy_patch.cpp
new file mode 100644
index 00000000..bbe79327
--- /dev/null
+++ b/src/tests/tribol_new_energy_patch.cpp
@@ -0,0 +1,377 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#include <cmath>
+#include <set>
+
+#include <gtest/gtest.h>
+
+#ifdef TRIBOL_USE_UMPIRE
+#include "umpire/ResourceManager.hpp"
+#endif
+
+#include "mfem.hpp"
+
+#include "axom/CLI11.hpp"
+#include "axom/slic.hpp"
+
+#include "shared/mesh/MeshBuilder.hpp"
+#include "redecomp/redecomp.hpp"
+
+#include "tribol/config.hpp"
+#include "tribol/common/Parameters.hpp"
+#include "tribol/interface/tribol.hpp"
+#include "tribol/interface/mfem_tribol.hpp"
+
+/**
+ * @brief Contact patch test using ENERGY_MORTAR with zero initial gap
+ *        and prescribed displacement applied incrementally over timesteps.
+ *
+ * Two unit squares [0,1]x[0,1] and [0,1]x[1,2] with zero gap.
+ * Linear elasticity with lambda = mu = 50.
+ *
+ * Analytical solution (plane strain, uniaxial stress with sigma_xx = 0):
+ *   eps_yy = applied_disp / total_height
+ *   eps_xx = -lambda / (lambda + 2*mu) * eps_yy
+ *   u_y(x,y) = eps_yy * y
+ *   u_x(x,y) = eps_xx * x
+ */
+class MfemMortarEnergyPatchTest : public testing::TestWithParam<std::tuple<int>> {
+ protected:
+  tribol::RealT max_disp_;
+  double l2_err_vec_;
+  double l2_err_x_;
+  double l2_err_y_;
+
+  // --- User-configurable parameters ---
+  static constexpr int    num_timesteps_ = 10;
+  static constexpr double total_prescribed_disp_ = -0.01;
+  static constexpr double lam_ = 5.0;
+  static constexpr double mu_  = 5.0;
+  // ------------------------------------
+
+  void SetUp() override
+  {
+    int ref_levels = std::get<0>( GetParam() );
+    int order = 1;
+
+    auto mortar_attrs     = std::set<int>( { 5 } );
+    auto nonmortar_attrs  = std::set<int>( { 3 } );
+    auto xfixed_attrs     = std::set<int>( { 4 } );
+    auto yfixed_bottom_attrs = std::set<int>( { 1 } );
+    auto prescribed_attrs = std::set<int>( { 6 } );
+
+    int nel_per_dir = std::pow( 2, ref_levels );
+
+    // clang-format off
+    mfem::ParMesh mesh = shared::ParMeshBuilder(MPI_COMM_WORLD, shared::MeshBuilder::Unify({
+      shared::MeshBuilder::SquareMesh(nel_per_dir, nel_per_dir)
+        .updateBdrAttrib(1, 1)   // bottom (Fixed Y)
+        .updateBdrAttrib(2, 2)   // right 
+        .updateBdrAttrib(3, 3)   // top  (NonMortar)
+        .updateBdrAttrib(4, 4),  // left (X-fixed)
+      shared::MeshBuilder::SquareMesh(nel_per_dir, nel_per_dir)
+        .translate({0.0, 1.0})
+        .updateBdrAttrib(1, 5)   // bottom (Mortar)
+        .updateBdrAttrib(2, 2)   // right 
+        .updateBdrAttrib(3, 6)   // top  (prescribed displacement)
+        .updateBdrAttrib(4, 4)   // left  (Fixed x)
+    }));
+    // clang-format on
+
+    // FE space and grid functions
+    auto fe_coll = mfem::H1_FECollection( order, mesh.SpaceDimension() );
+    auto par_fe_space = mfem::ParFiniteElementSpace( &mesh, &fe_coll, mesh.SpaceDimension() );
+    auto coords = mfem::ParGridFunction( &par_fe_space );
+    if ( order > 1 ) {
+      mesh.SetNodalGridFunction( &coords, false );
+    } else {
+      mesh.GetNodes( coords );
+    }
+
+
+    //Grid fucntion for displacement
+    mfem::ParGridFunction displacement( &par_fe_space );
+    displacement = 0.0;
+
+    mfem::ParGridFunction ref_coords( &par_fe_space );
+    mesh.GetNodes( ref_coords );
+
+    //recover dirchlet bd tdof list
+    mfem::Array<int> ess_vdof_marker( par_fe_space.GetVSize() );
+    ess_vdof_marker = 0;
+
+    // x-fixed on left
+    {
+      mfem::Array<int> tmp;
+      mfem::Array<int> bdr( mesh.bdr_attributes.Max() );
+      bdr = 0;
+      for ( auto a : xfixed_attrs ) bdr[a - 1] = 1;
+      par_fe_space.GetEssentialVDofs( bdr, tmp, 0 );
+      for ( int i = 0; i < tmp.Size(); ++i )
+        ess_vdof_marker[i] = ess_vdof_marker[i] || tmp[i];
+    }
+
+    // y-fixed on bottom
+    {
+      mfem::Array<int> tmp;
+      mfem::Array<int> bdr( mesh.bdr_attributes.Max() );
+      bdr = 0;
+      for ( auto a : yfixed_bottom_attrs ) bdr[a - 1] = 1;
+      par_fe_space.GetEssentialVDofs( bdr, tmp, 1 );
+      for ( int i = 0; i < tmp.Size(); ++i )
+        ess_vdof_marker[i] = ess_vdof_marker[i] || tmp[i];
+    }
+
+    // y-prescribed on top
+    mfem::Array<int> prescribed_vdof_marker( par_fe_space.GetVSize() );
+    prescribed_vdof_marker = 0;
+    {
+      mfem::Array<int> tmp;
+      mfem::Array<int> bdr( mesh.bdr_attributes.Max() );
+      bdr = 0;
+      for ( auto a : prescribed_attrs ) bdr[a - 1] = 1;
+      par_fe_space.GetEssentialVDofs( bdr, tmp, 1 );
+      prescribed_vdof_marker = tmp;
+      for ( int i = 0; i < tmp.Size(); ++i )
+        ess_vdof_marker[i] = ess_vdof_marker[i] || tmp[i];
+    }
+
+    mfem::Array<int> ess_tdof_list;
+    {
+      mfem::Array<int> ess_tdof_marker;
+      par_fe_space.GetRestrictionMatrix()->BooleanMult( ess_vdof_marker, ess_tdof_marker );
+      mfem::FiniteElementSpace::MarkerToList( ess_tdof_marker, ess_tdof_list );
+    }
+
+    mfem::Array<int> prescribed_tdof_list;
+    {
+      mfem::Array<int> marker;
+      par_fe_space.GetRestrictionMatrix()->BooleanMult( prescribed_vdof_marker, marker );
+      mfem::FiniteElementSpace::MarkerToList( marker, prescribed_tdof_list );
+    }
+
+    // set up mfem elasticity bilinear form
+    mfem::ParBilinearForm a( &par_fe_space );
+    mfem::ConstantCoefficient lambda_coeff( lam_ );
+    mfem::ConstantCoefficient mu_coeff( mu_ );
+    a.AddDomainIntegrator( new mfem::ElasticityIntegrator( lambda_coeff, mu_coeff ) );
+    a.Assemble();
+    a.Finalize();
+    auto A_elastic_raw = std::unique_ptr<mfem::HypreParMatrix>( a.ParallelAssemble() );
+
+    //Visit Output
+    mfem::VisItDataCollection visit_dc( "energy_patch_test", &mesh );
+    visit_dc.SetPrecision( 8 );
+    visit_dc.RegisterField( "displacement", &displacement );
+    visit_dc.SetCycle( 0 );
+    visit_dc.SetTime( 0.0 );
+    visit_dc.Save();
+
+    // timestepping loop for displacement
+    double disp_increment = total_prescribed_disp_ / num_timesteps_;
+    tribol::RealT dt = 1.0 / num_timesteps_;
+    int cs_id = 0, mesh1_id = 0, mesh2_id = 1;
+
+    mfem::Vector X( par_fe_space.GetTrueVSize() );
+    X = 0.0;
+
+    for ( int step = 1; step <= num_timesteps_; ++step )
+    {
+      double current_prescribed_disp = disp_increment * step;
+
+      // Prescribed displacement vector
+      mfem::Vector X_prescribed( par_fe_space.GetTrueVSize() );
+      X_prescribed = 0.0;
+      for ( int i = 0; i < prescribed_tdof_list.Size(); ++i ) {
+        X_prescribed( prescribed_tdof_list[i] ) = current_prescribed_disp;
+      }
+
+      // Update coordinates for contact detection
+      {
+        mfem::Vector X_temp( X );
+        for ( int i = 0; i < prescribed_tdof_list.Size(); ++i ) {
+          X_temp( prescribed_tdof_list[i] ) = current_prescribed_disp;
+        }
+        auto& P = *par_fe_space.GetProlongationMatrix();
+        P.Mult( X_temp, displacement );
+      }
+      coords = ref_coords;
+      coords += displacement;
+
+      // Re-register tribol each step (internal arrays need fresh allocation
+      // when contact pairs change between steps)
+      coords.ReadWrite();
+      tribol::registerMfemCouplingScheme( cs_id, mesh1_id, mesh2_id, mesh, coords,
+                                          mortar_attrs, nonmortar_attrs,
+                                          tribol::SURFACE_TO_SURFACE, tribol::NO_SLIDING,
+                                          tribol::ENERGY_MORTAR, tribol::FRICTIONLESS,
+                                          tribol::LAGRANGE_MULTIPLIER, tribol::BINNING_GRID );
+      tribol::setLagrangeMultiplierOptions( cs_id, tribol::ImplicitEvalMode::MORTAR_RESIDUAL_JACOBIAN );
+      tribol::setMfemKinematicConstantPenalty( cs_id, 10000.0, 10000.0 );
+
+      tribol::updateMfemParallelDecomposition();
+      tribol::update( step, step * dt, dt );
+
+      auto A_cont = tribol::getMfemDfDx( cs_id );
+
+      mfem::Vector f_contact( par_fe_space.GetTrueVSize() );
+      f_contact = 0.0;
+      tribol::getMfemResponse( cs_id, f_contact );
+      f_contact.Neg();
+
+      // Inhomogeneous Dirichlet: rhs = f_contact - K * u_prescribed
+      auto A_total = std::unique_ptr<mfem::HypreParMatrix>(
+        mfem::Add( 1.0, *A_elastic_raw, 1.0, *A_cont ) );
+
+      mfem::Vector rhs( par_fe_space.GetTrueVSize() );
+      A_total->Mult( X_prescribed, rhs );
+      rhs.Neg();
+      rhs += f_contact;
+
+      for ( int i = 0; i < ess_tdof_list.Size(); ++i ) {
+        rhs( ess_tdof_list[i] ) = 0.0;
+      }
+
+      A_total->EliminateRowsCols( ess_tdof_list );
+
+      mfem::Vector X_free( par_fe_space.GetTrueVSize() );
+      X_free = 0.0;
+
+      mfem::HypreBoomerAMG amg( *A_total );
+      amg.SetElasticityOptions( &par_fe_space );
+      amg.SetPrintLevel( 0 );
+
+      mfem::MINRESSolver solver( MPI_COMM_WORLD );
+      solver.SetRelTol( 1.0e-8 );
+      solver.SetAbsTol( 1.0e-12 );
+      solver.SetMaxIter( 5000 );
+      solver.SetPrintLevel( step == num_timesteps_ ? 3 : 1 );
+      solver.SetPreconditioner( amg );
+      solver.SetOperator( *A_total );
+      solver.Mult( rhs, X_free );
+
+      X = X_free;
+      X += X_prescribed;
+
+      SLIC_INFO( "Timestep " << step << "/" << num_timesteps_
+                 << " | prescribed disp = " << current_prescribed_disp );
+
+      // Save VisIt output
+      {
+        auto& P = *par_fe_space.GetProlongationMatrix();
+        P.Mult( X, displacement );
+      }
+      visit_dc.SetCycle( step );
+      visit_dc.SetTime( step * dt );
+      visit_dc.Save();
+    }
+
+    //Get final disaplacent
+    {
+      auto& P = *par_fe_space.GetProlongationMatrix();
+      P.Mult( X, displacement );
+    }
+
+    auto local_max = displacement.Max();
+    max_disp_ = 0.0;
+    MPI_Allreduce( &local_max, &max_disp_, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD );
+    SLIC_INFO( "Max displacement: " << max_disp_ );
+
+    // -----------------------------------------------------------------
+    // Analytical solution comparison
+    //
+    // Plane strain, uniaxial stress (sigma_xx = 0, free right side):
+    //   eps_yy = applied_disp / total_height = -0.01 / 2.0 = -0.005
+    //   eps_xx = -lambda/(lambda + 2*mu) * eps_yy
+    //   u_y = eps_yy * y
+    //   u_x = eps_xx * x
+    // -----------------------------------------------------------------
+    double total_height = 2.0;
+    double eps_yy = total_prescribed_disp_ / total_height;
+    double eps_xx = -lam_ / ( lam_ + 2.0 * mu_ ) * eps_yy;
+
+    SLIC_INFO( "Analytical: eps_yy = " << eps_yy << ", eps_xx = " << eps_xx );
+
+    mfem::VectorFunctionCoefficient exact_sol_coeff( 2,
+      [eps_xx, eps_yy]( const mfem::Vector& x, mfem::Vector& u ) {
+        u[0] = eps_xx * x[0];
+        u[1] = eps_yy * x[1];
+      } );
+
+    mfem::ParGridFunction exact_disp( &par_fe_space );
+    exact_disp.ProjectCoefficient( exact_sol_coeff );
+
+    // Vector error
+    mfem::ParGridFunction error_vec( exact_disp );
+    error_vec -= displacement;
+    l2_err_vec_ = mfem::ParNormlp( error_vec, 2, MPI_COMM_WORLD );
+
+    // Component-wise errors
+    const mfem::FiniteElementCollection* fec = par_fe_space.FEColl();
+    mfem::ParFiniteElementSpace scalar_fes( &mesh, fec, 1, par_fe_space.GetOrdering() );
+    const int n = scalar_fes.GetNDofs();
+
+    mfem::ParGridFunction ux_exact( &scalar_fes ), ux_num( &scalar_fes );
+    mfem::ParGridFunction uy_exact( &scalar_fes ), uy_num( &scalar_fes );
+
+    for ( int i = 0; i < n; ++i ) {
+      ux_exact( i ) = exact_disp( i );
+      ux_num( i )   = displacement( i );
+      uy_exact( i ) = exact_disp( n + i );
+      uy_num( i )   = displacement( n + i );
+    }
+
+    mfem::ParGridFunction ux_err( ux_exact );
+    ux_err -= ux_num;
+    l2_err_x_ = mfem::ParNormlp( ux_err, 2, MPI_COMM_WORLD );
+
+    mfem::ParGridFunction uy_err( uy_exact );
+    uy_err -= uy_num;
+    l2_err_y_ = mfem::ParNormlp( uy_err, 2, MPI_COMM_WORLD );
+
+    SLIC_INFO( "L2 error (vector): " << l2_err_vec_ );
+    SLIC_INFO( "L2 error (x):      " << l2_err_x_ );
+    SLIC_INFO( "L2 error (y):      " << l2_err_y_ );
+    SLIC_INFO( "Consistency check |err_vec^2 - (err_x^2 + err_y^2)| = "
+               << std::abs( l2_err_vec_ * l2_err_vec_
+                            - ( l2_err_x_ * l2_err_x_ + l2_err_y_ * l2_err_y_ ) ) );
+  }
+};
+
+TEST_P( MfemMortarEnergyPatchTest, check_patch_test )
+{
+  EXPECT_GT( max_disp_, 0.0 );
+  EXPECT_NEAR( 0.0, l2_err_vec_, 1.0e-2 );
+  EXPECT_NEAR( 0.0, l2_err_x_,  1.0e-2 );
+  EXPECT_NEAR( 0.0, l2_err_y_,  1.0e-2 );
+
+  MPI_Barrier( MPI_COMM_WORLD );
+}
+
+INSTANTIATE_TEST_SUITE_P( tribol, MfemMortarEnergyPatchTest, testing::Values( std::make_tuple( 2 ) ) );
+
+//------------------------------------------------------------------------------
+#include "axom/slic/core/SimpleLogger.hpp"
+
+int main( int argc, char* argv[] )
+{
+  int result = 0;
+
+  MPI_Init( &argc, &argv );
+  ::testing::InitGoogleTest( &argc, argv );
+
+#ifdef TRIBOL_USE_UMPIRE
+  umpire::ResourceManager::getInstance();
+#endif
+
+  axom::slic::SimpleLogger logger;
+  result = RUN_ALL_TESTS();
+
+  tribol::finalize();
+  MPI_Finalize();
+
+  return result;
+}
diff --git a/src/tribol/physics/new_method.cpp b/src/tribol/physics/new_method.cpp
index 4943e434..5fb48188 100644
--- a/src/tribol/physics/new_method.cpp
+++ b/src/tribol/physics/new_method.cpp
@@ -8,9 +8,11 @@
 #include <algorithm>
 #include <cassert>
 #include <iomanip>
+#include "tribol/common/ArrayTypes.hpp"
 #include "tribol/common/Parameters.hpp"
 #include "tribol/geom/GeomUtilities.hpp"
 #include "tribol/common/Enzyme.hpp"
+#include "tribol/mesh/MeshData.hpp"
 #include <set>
 #include <map>
 
@@ -1012,110 +1014,212 @@ std::pair<double, double> ContactEvaluator::eval_gtilde_fixed_qp( const Interfac
   return { gt1, gt2 };
 }
 
-// FiniteDiffResult ContactEvaluator::validate_g_tilde(const InterfacePair& pair, const MeshData::Viewer& mesh1, const
-// MeshData::Viewer& mesh2, double epsilon) const {
+FiniteDiffResult ContactEvaluator::validate_g_tilde(const InterfacePair& pair, MeshData& mesh1,
+MeshData& mesh2, double epsilon) const {
+
+    FiniteDiffResult result;
+
+    auto viewer1 = mesh1.getView();
+    auto viewer2 = mesh2.getView();
+
+    auto projs0 = projections(pair, viewer1, viewer2);
+    auto bounds0 = smoother_.bounds_from_projections(projs0);
+    auto smooth_bounds0 = smoother_.smooth_bounds(bounds0);
+    QuadPoints qp0 = compute_quadrature(smooth_bounds0);
+
+// auto [g1_base, g2_base] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+
+    auto [g1_base, g2_base] = eval_gtilde(pair, viewer1, viewer2);
+    result.g_tilde1_baseline = g1_base;
+    result.g_tilde2_baseline = g2_base;
+
+    // Collect nodes in sorted order
+    std::set<int> node_set;
+    auto A_conn = viewer1.getConnectivity()(pair.m_element_id1);
+    node_set.insert(A_conn[0]);
+    node_set.insert(A_conn[1]);
+    auto B_conn = viewer2.getConnectivity()(pair.m_element_id2);
+    node_set.insert(B_conn[0]);
+    node_set.insert(B_conn[1]);
+
+    result.node_ids = std::vector<int>(node_set.begin(), node_set.end());
+    // std::sort(result.node_ids.begin(), result.node_ids.end()); //Redundant??
+
+    int num_dofs = result.node_ids.size() * 2;
+    result.fd_gradient_g1.resize(num_dofs);
+    result.fd_gradient_g2.resize(num_dofs);
+
+    // ===== GET AND REORDER ENZYME GRADIENTS =====
+    double dgt1_dx[8] = {0.0};
+    double dgt2_dx[8] = {0.0};
+    grad_trib_area(pair, viewer1, viewer2, dgt1_dx, dgt2_dx);
+
+    // Map from node_id to position in x[8]
+    std::map<int, int> node_to_x_idx;
+    node_to_x_idx[A_conn[0]] = 0;  // A0 → x[0,1]
+    node_to_x_idx[A_conn[1]] = 1;  // A1 → x[2,3]
+    node_to_x_idx[B_conn[0]] = 2;  // B0 → x[4,5]
+    node_to_x_idx[B_conn[1]] = 3;  // B1 → x[6,7]
+
+    // Reorder Enzyme gradients to match sorted node order
+    result.analytical_gradient_g1.resize(num_dofs);
+    result.analytical_gradient_g2.resize(num_dofs);
+
+    for (size_t i = 0; i < result.node_ids.size(); ++i) {
+        int node_id = result.node_ids[i];
+        int x_idx = node_to_x_idx[node_id];
+
+        result.analytical_gradient_g1[2*i + 0] = dgt1_dx[2*x_idx + 0];  // x component
+        result.analytical_gradient_g1[2*i + 1] = dgt1_dx[2*x_idx + 1];  // y component
+        result.analytical_gradient_g2[2*i + 0] = dgt2_dx[2*x_idx + 0];
+        result.analytical_gradient_g2[2*i + 1] = dgt2_dx[2*x_idx + 1];
+    }
+    // =
 
-//     FiniteDiffResult result;
+    int dof_idx = 0;
+    //X-direction
 
-//     auto projs0 = projections(pair, mesh1, mesh2);
-//     auto bounds0 = smoother_.bounds_from_projections(projs0);
-//     auto smooth_bounds0 = smoother_.smooth_bounds(bounds0);
-//     QuadPoints qp0 = compute_quadrature(smooth_bounds0);
+    std::set<int> mesh1_nodes = {A_conn[0], A_conn[1]};
+    std::set<int> mesh2_nodes = {B_conn[0], B_conn[1]};
+    
 
-// // auto [g1_base, g2_base] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
-
-//     auto [g1_base, g2_base] = eval_gtilde(pair, mesh1, mesh2);
-//     result.g_tilde1_baseline = g1_base;
-//     result.g_tilde2_baseline = g2_base;
-
-//     // Collect nodes in sorted order
-//     std::set<int> node_set;
-//     auto A_conn = mesh1.getConnectivity()(pair.m_element_id1);
-//     node_set.insert(A_conn[0]);
-//     node_set.insert(A_conn[1]);
-//     auto B_conn = mesh2.getConnectivity()(pair.m_element_id2);
-//     node_set.insert(B_conn[0]);
-//     node_set.insert(B_conn[1]);
-
-//     result.node_ids = std::vector<int>(node_set.begin(), node_set.end());
-//     std::sort(result.node_ids.begin(), result.node_ids.end());
-
-//     int num_dofs = result.node_ids.size() * 2;
-//     result.fd_gradient_g1.resize(num_dofs);
-//     result.fd_gradient_g2.resize(num_dofs);
-
-//     // ===== GET AND REORDER ENZYME GRADIENTS =====
-//     double dgt1_dx[8] = {0.0};
-//     double dgt2_dx[8] = {0.0};
-//     grad_trib_area(pair, mesh1, mesh2, dgt1_dx, dgt2_dx);
-
-//     // Map from node_id to position in x[8]
-//     std::map<int, int> node_to_x_idx;
-//     node_to_x_idx[A_conn[0]] = 0;  // A0 → x[0,1]
-//     node_to_x_idx[A_conn[1]] = 1;  // A1 → x[2,3]
-//     node_to_x_idx[B_conn[0]] = 2;  // B0 → x[4,5]
-//     node_to_x_idx[B_conn[1]] = 3;  // B1 → x[6,7]
-
-//     // Reorder Enzyme gradients to match sorted node order
-//     result.analytical_gradient_g1.resize(num_dofs);
-//     result.analytical_gradient_g2.resize(num_dofs);
-
-//     for (size_t i = 0; i < result.node_ids.size(); ++i) {
-//         int node_id = result.node_ids[i];
-//         int x_idx = node_to_x_idx[node_id];
-
-//         result.analytical_gradient_g1[2*i + 0] = dgt1_dx[2*x_idx + 0];  // x component
-//         result.analytical_gradient_g1[2*i + 1] = dgt1_dx[2*x_idx + 1];  // y component
-//         result.analytical_gradient_g2[2*i + 0] = dgt2_dx[2*x_idx + 0];
-//         result.analytical_gradient_g2[2*i + 1] = dgt2_dx[2*x_idx + 1];
-//     }
-//     // =
+    for (int node_id : result.node_ids) {
+        {
 
-//     int dof_idx = 0;
-//     //X-direction
-//     for (int node_id : result.node_ids) {
-//         {
-//             double original = mesh.node(node_id).x;
+          bool is_in_mesh1 = (mesh1_nodes.count(node_id) > 0);
+          MeshData& mesh_to_perturb = is_in_mesh1 ? mesh1 : mesh2;
 
-//             mesh.node(node_id).x = original + epsilon;
-//             auto [g1_plus, g2_plus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+          //Store Original Mesh coords:
+          auto pos = mesh_to_perturb.getView().getPosition();
+          int num_nodes = mesh_to_perturb.numberOfNodes();
+          int dim = mesh_to_perturb.spatialDimension();
 
-//             mesh.node(node_id).x = original - epsilon;
-//             auto [g1_minus, g2_minus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+          std::vector<RealT> x_original(num_nodes);
+          std::vector<RealT> y_original(num_nodes);
+          std::vector<RealT> z_original(num_nodes);
+          
+          for (int i = 0; i < num_nodes; ++i) {
+            x_original[i] = pos[0][i];
+            y_original[i] = pos[1][i];
+            if (dim == 3) z_original[i] = pos[2][i];
+          }
 
-//             //Restorre orginal
-//             mesh.node(node_id).x = original;
+          std::vector<RealT> x_pert = x_original;
+          x_pert[node_id] += epsilon;
+          mesh_to_perturb.setPosition(x_pert.data(), y_original.data(), dim == 3 ? z_original.data() : nullptr);
 
-//             result.fd_gradient_g1[dof_idx] = (g1_plus - g1_minus) / (2.0 * epsilon);
-//             result.fd_gradient_g2[dof_idx] = (g2_plus - g2_minus) / (2.0 * epsilon);
+          //Evalaute with x_plus
+          auto viewer1_plus = mesh1.getView();
+          auto viewer2_plus = mesh2.getView();
 
-//             dof_idx++;
-//         }
+          auto[g1_plus, g2_plus] = eval_gtilde_fixed_qp(pair, viewer1_plus, viewer2_plus, qp0);
 
-//     //y - direction
-//         {
-//             double original = mesh.node(node_id).y;
+          x_pert[node_id] = x_original[node_id] - epsilon;
 
-//             // +epsilon
-//             mesh.node(node_id).y = original + epsilon;
-//             auto [g1_plus, g2_plus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+          mesh_to_perturb.setPosition(x_pert.data(), y_original.data(), dim == 3 ? z_original.data() : nullptr);
+          
+          auto viewer1_minus = mesh1.getView();
+          auto viewer2_minus = mesh2.getView();
 
-//             // -epsilon
-//             mesh.node(node_id).y = original - epsilon;
-//             auto [g1_minus, g2_minus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+          auto [g1_minus, g2_minus] = eval_gtilde_fixed_qp(pair, viewer1_minus, viewer2_minus, qp0);
 
-//             // Restore
-//             mesh.node(node_id).y = original;
+          //Restore orginal 
+          mesh_to_perturb.setPosition(x_original.data(), y_original.data(), dim == 3 ? z_original.data() : nullptr);
 
-//             // Central difference
-//             result.fd_gradient_g1[dof_idx] = (g1_plus - g1_minus) / (2.0 * epsilon);
-//             result.fd_gradient_g2[dof_idx] = (g2_plus - g2_minus) / (2.0 * epsilon);
+          // Compute gradient
+          result.fd_gradient_g1[dof_idx] = (g1_plus - g1_minus) / (2.0 * epsilon);
+          result.fd_gradient_g2[dof_idx] = (g2_plus - g2_minus) / (2.0 * epsilon);
 
-//             dof_idx++;
-//         }
-//     }
-//     return result;
-// }
+          dof_idx++;
+        }
+        {
+          bool is_in_mesh1 = (mesh1_nodes.count(node_id) > 0);
+          MeshData& mesh_to_perturb = is_in_mesh1 ? mesh1 : mesh2;
+
+          //Store Original Mesh coords:
+          auto pos = mesh_to_perturb.getView().getPosition();
+          int num_nodes = mesh_to_perturb.numberOfNodes();
+          int dim = mesh_to_perturb.spatialDimension();
+
+          std::vector<RealT> x_original(num_nodes);
+          std::vector<RealT> y_original(num_nodes);
+          std::vector<RealT> z_original(num_nodes);
+          
+          for (int i = 0; i < num_nodes; ++i) {
+            x_original[i] = pos[0][i];
+            y_original[i] = pos[1][i];
+            if (dim == 3) z_original[i] = pos[2][i];
+          }
+          std::vector<RealT> y_pert = y_original;
+
+          y_pert[node_id] += epsilon;
+
+          mesh_to_perturb.setPosition(x_original.data(), y_pert.data(), dim == 3 ? z_original.data() : nullptr);
+
+          auto viewer1_plus2 = mesh1.getView();
+          auto viewer2_plus2 = mesh2.getView();
+
+          auto [g1_plus, g2_plus] = eval_gtilde_fixed_qp(pair, viewer1_plus2, viewer2_plus2, qp0);
+
+          y_pert[node_id] = y_original[node_id] - epsilon;
+
+          mesh_to_perturb.setPosition(x_original.data(), y_pert.data(), dim == 3 ? z_original.data() : nullptr);
+
+          auto viewer1_minus2 = mesh1.getView();
+          auto viewer2_minus2 = mesh2.getView();
+          auto [g1_minus, g2_minus] = eval_gtilde_fixed_qp(pair, viewer1_minus2, viewer2_minus2, qp0);
+
+          mesh_to_perturb.setPosition(x_original.data(), y_original.data(), dim == 3 ? z_original.data() : nullptr);
+
+          result.fd_gradient_g1[dof_idx] = (g1_plus - g1_minus) / (2.0 * epsilon);
+          result.fd_gradient_g2[dof_idx] = (g2_plus - g2_minus) / (2.0 * epsilon);
+          
+          dof_idx++;
+        }
+
+    //         double original = mesh.node(node_id).x;
+
+    //         double x_plus = 
+
+    //         mesh.node(node_id).x = original + epsilon;
+    //         auto [g1_plus, g2_plus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+
+    //         mesh.node(node_id).x = original - epsilon;
+    //         auto [g1_minus, g2_minus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+
+    //         //Restorre orginal
+    //         mesh.node(node_id).x = original;
+
+    //         result.fd_gradient_g1[dof_idx] = (g1_plus - g1_minus) / (2.0 * epsilon);
+    //         result.fd_gradient_g2[dof_idx] = (g2_plus - g2_minus) / (2.0 * epsilon);
+
+    //         dof_idx++;
+    //     }
+
+    // //y - direction
+    //     {
+    //         double original = mesh.node(node_id).y;
+
+    //         // +epsilon
+    //         mesh.node(node_id).y = original + epsilon;
+    //         auto [g1_plus, g2_plus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+
+    //         // -epsilon
+    //         mesh.node(node_id).y = original - epsilon;
+    //         auto [g1_minus, g2_minus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+
+    //         // Restore
+    //         mesh.node(node_id).y = original;
+
+    //         // Central difference
+    //         result.fd_gradient_g1[dof_idx] = (g1_plus - g1_minus) / (2.0 * epsilon);
+    //         result.fd_gradient_g2[dof_idx] = (g2_plus - g2_minus) / (2.0 * epsilon);
+
+    //         dof_idx++;
+    //     }
+    }
+    return result;
+}
 
 void ContactEvaluator::grad_gtilde_with_qp( const InterfacePair& pair, const MeshData::Viewer& mesh1,
                                             const MeshData::Viewer& mesh2, const QuadPoints& qp_fixed,
diff --git a/src/tribol/physics/new_method.hpp b/src/tribol/physics/new_method.hpp
index 522ef51c..b83fedef 100644
--- a/src/tribol/physics/new_method.hpp
+++ b/src/tribol/physics/new_method.hpp
@@ -104,8 +104,8 @@ class ContactEvaluator {
   std::pair<double, double> eval_gtilde( const InterfacePair& pair, const MeshData::Viewer& mesh1,
                                          const MeshData::Viewer& mesh2 ) const;
 
-  FiniteDiffResult validate_g_tilde( const InterfacePair& pair, const MeshData::Viewer& mesh1,
-                                     const MeshData::Viewer& mesh2, double epsilon = 1e-7 ) const;
+  FiniteDiffResult validate_g_tilde( const InterfacePair& pair, MeshData& mesh1,
+                                     MeshData& mesh2, double epsilon = 1e-7 ) const;
 
   void print_gradient_comparison( const FiniteDiffResult& val ) const;
 

From 42673b47242491d590ce97c081b6257244f12e62 Mon Sep 17 00:00:00 2001
From: Ryan Lutz <lutz23@llnl.gov>
Date: Thu, 12 Feb 2026 08:04:11 -0800
Subject: [PATCH 34/56] Patch Test added

---
 src/tests/enzyme_smoke.cpp | 60 ++++++++++++++++++++++++++++++++------
 1 file changed, 51 insertions(+), 9 deletions(-)

diff --git a/src/tests/enzyme_smoke.cpp b/src/tests/enzyme_smoke.cpp
index 4a3b1d77..bd964098 100644
--- a/src/tests/enzyme_smoke.cpp
+++ b/src/tests/enzyme_smoke.cpp
@@ -31,6 +31,30 @@ void LinearQuadBasisDeriv( const double* xi, double* phi, double* dphi_dxi, doub
   __enzyme_fwddiff<void>( (void*)LinearQuadBasis, xi, xi_dot, phi, dphi_deta );
 }
 
+void LinearQuadBasisDeriv_FD(const double* xi, double* phi, double* dphi_dxi, double* dphi_deta, double h = 1e-4) {
+ // compute drivatives wrt to xi[0]
+  double phi_p[4];
+  double phi_m[4];
+  double xi_plush[2] = {xi[0] + h, xi[1]};
+  double xi_minush[2] = {xi[0] - h, xi[1]}; 
+  LinearQuadBasis(xi_plush, phi_p);
+  LinearQuadBasis(xi_minush, phi_m);
+  for (int i = 0; i < 4; ++i) {
+    dphi_dxi[i] = (phi_p[i] - phi_m[i]) / (2 * h);
+  }
+
+  //compute derivatives wrt xi[1]
+  xi_plush[1] = xi[1] + h; 
+  xi_plush[0] = xi[0];
+  xi_minush[1] = xi[1] - h; 
+  xi_minush[0] = xi[0];
+  LinearQuadBasis(xi_plush, phi_p);
+  LinearQuadBasis(xi_minush, phi_m);
+  for (int i = 0; i < 4; ++i) {
+    dphi_deta[i] = (phi_p[i] - phi_m[i]) / (2 * h);
+  }
+}
+
 TEST( enzyme_smoke, basic_use )
 {
   double xi[2] = { 0.2, -0.4 };
@@ -38,14 +62,32 @@ TEST( enzyme_smoke, basic_use )
   double dphi_dxi[4] = { 0.0, 0.0, 0.0, 0.0 };
   double dphi_deta[4] = { 0.0, 0.0, 0.0, 0.0 };
 
-  LinearQuadBasisDeriv( xi, phi, dphi_dxi, dphi_deta );
+  double xi_fw[2] = { 0.2, -0.4 };
+  double phi_fw[4] = { 0.0, 0.0, 0.0, 0.0 };
+  double dphi_dxi_fw[4] = { 0.0, 0.0, 0.0, 0.0 };
+  double dphi_deta_fw[4] = { 0.0, 0.0, 0.0, 0.0 };
+
+
+  LinearQuadBasisDeriv_FD( xi, phi, dphi_dxi, dphi_deta);
+  LinearQuadBasisDeriv(xi_fw, phi_fw, dphi_dxi_fw, dphi_deta_fw);
+
+  EXPECT_NEAR( dphi_dxi[0], dphi_dxi_fw[0], 1e-6 );
+  EXPECT_NEAR( dphi_deta[0], dphi_deta_fw[0], 1e-6 );
+  EXPECT_NEAR( dphi_dxi[1], dphi_dxi_fw[1], 1e-6 );
+  EXPECT_NEAR( dphi_deta[1], dphi_deta_fw[1], 1e-6 );
+  EXPECT_NEAR( dphi_dxi[2], dphi_dxi_fw[2], 1e-6 );
+  EXPECT_NEAR( dphi_deta[2],  dphi_deta_fw[2], 1e-6);
+  EXPECT_NEAR( dphi_dxi[3], dphi_dxi_fw[3], 1e-6 );
+  EXPECT_NEAR( dphi_deta[3], dphi_deta_fw[3], 1e-6 );
+
+  //EXPECT_EQ( dphi_dxi[0], -0.25 * ( 1.0 - xi[1] ) );
+  //EXPECT_EQ( dphi_deta[0], -0.25 * ( 1.0 - xi[0] ) );
+  //EXPECT_EQ( dphi_dxi[1], 0.25 * ( 1.0 - xi[1] ) );
+  //EXPECT_EQ( dphi_deta[1], -0.25 * ( 1.0 + xi[0] ) );
+  //EXPECT_EQ( dphi_dxi[2], 0.25 * ( 1.0 + xi[1] ) );
+  //EXPECT_EQ( dphi_deta[2], 0.25 * ( 1.0 + xi[0] ) );
+  //EXPECT_EQ( dphi_dxi[3], -0.25 * ( 1.0 + xi[1] ) );
+  //EXPECT_EQ( dphi_deta[3], 0.25 * ( 1.0 - xi[0] ) );
+
 
-  EXPECT_EQ( dphi_dxi[0], -0.25 * ( 1.0 - xi[1] ) );
-  EXPECT_EQ( dphi_deta[0], -0.25 * ( 1.0 - xi[0] ) );
-  EXPECT_EQ( dphi_dxi[1], 0.25 * ( 1.0 - xi[1] ) );
-  EXPECT_EQ( dphi_deta[1], -0.25 * ( 1.0 + xi[0] ) );
-  EXPECT_EQ( dphi_dxi[2], 0.25 * ( 1.0 + xi[1] ) );
-  EXPECT_EQ( dphi_deta[2], 0.25 * ( 1.0 + xi[0] ) );
-  EXPECT_EQ( dphi_dxi[3], -0.25 * ( 1.0 + xi[1] ) );
-  EXPECT_EQ( dphi_deta[3], 0.25 * ( 1.0 - xi[0] ) );
 }

From 8008d152f77212119ee790cf11b3a297022b7bbd Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Thu, 12 Feb 2026 09:50:01 -0800
Subject: [PATCH 35/56] fix leak

---
 src/examples/mfem_mortar_energy_patch.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/examples/mfem_mortar_energy_patch.cpp b/src/examples/mfem_mortar_energy_patch.cpp
index 5f105df8..a90c019a 100644
--- a/src/examples/mfem_mortar_energy_patch.cpp
+++ b/src/examples/mfem_mortar_energy_patch.cpp
@@ -250,7 +250,7 @@ int main( int argc, char** argv )
 
   // Add contact stiffness to elasticity stiffness
   auto A_total = std::unique_ptr<mfem::HypreParMatrix>( mfem::Add( 1.0, *A_elasticity, 1.0, *A_contact ) );
-  A_total->EliminateRowsCols( ess_tdof_list );
+  auto A_elim = std::unique_ptr<mfem::HypreParMatrix>( A_total->EliminateRowsCols( ess_tdof_list ) );
 
   timer.stop();
   SLIC_INFO_ROOT(

From b9a3eb7a23eb0546af20df94b0598153d09e6372 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Thu, 12 Feb 2026 09:50:12 -0800
Subject: [PATCH 36/56] update file list

---
 src/tests/CMakeLists.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index a62e6990..7e757b36 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -151,7 +151,6 @@ if ( BUILD_REDECOMP AND TRIBOL_USE_MPI )
       tribol_mfem_mortar_lm.cpp
       tribol_proximity_check.cpp
       tribol_redecomp_tol.cpp
-      tribol_new_energy_patch.cpp
       )
 
   set(combined_test_depends tribol gtest)
@@ -199,7 +198,7 @@ if( TRIBOL_USE_ENZYME )
       tribol_enzyme_mortar_assembled.cpp
       tribol_enzyme_poly_intersect.cpp
       tribol_mfem_mortar_energy.cpp
-      new_mortar_deriv.cpp
+      tribol_new_energy_patch.cpp
       )
 
   set(combined_test_depends tribol gtest)

From 2fd1af3e3c42fbdc27644715609bc781df5dc016 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Thu, 12 Feb 2026 09:50:34 -0800
Subject: [PATCH 37/56] formatting

---
 src/tribol/physics/new_method.cpp | 276 +++++++++++++++---------------
 1 file changed, 137 insertions(+), 139 deletions(-)

diff --git a/src/tribol/physics/new_method.cpp b/src/tribol/physics/new_method.cpp
index 5fb48188..fe82753d 100644
--- a/src/tribol/physics/new_method.cpp
+++ b/src/tribol/physics/new_method.cpp
@@ -1014,172 +1014,170 @@ std::pair<double, double> ContactEvaluator::eval_gtilde_fixed_qp( const Interfac
   return { gt1, gt2 };
 }
 
-FiniteDiffResult ContactEvaluator::validate_g_tilde(const InterfacePair& pair, MeshData& mesh1,
-MeshData& mesh2, double epsilon) const {
-
-    FiniteDiffResult result;
-
-    auto viewer1 = mesh1.getView();
-    auto viewer2 = mesh2.getView();
-
-    auto projs0 = projections(pair, viewer1, viewer2);
-    auto bounds0 = smoother_.bounds_from_projections(projs0);
-    auto smooth_bounds0 = smoother_.smooth_bounds(bounds0);
-    QuadPoints qp0 = compute_quadrature(smooth_bounds0);
-
-// auto [g1_base, g2_base] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
-
-    auto [g1_base, g2_base] = eval_gtilde(pair, viewer1, viewer2);
-    result.g_tilde1_baseline = g1_base;
-    result.g_tilde2_baseline = g2_base;
-
-    // Collect nodes in sorted order
-    std::set<int> node_set;
-    auto A_conn = viewer1.getConnectivity()(pair.m_element_id1);
-    node_set.insert(A_conn[0]);
-    node_set.insert(A_conn[1]);
-    auto B_conn = viewer2.getConnectivity()(pair.m_element_id2);
-    node_set.insert(B_conn[0]);
-    node_set.insert(B_conn[1]);
-
-    result.node_ids = std::vector<int>(node_set.begin(), node_set.end());
-    // std::sort(result.node_ids.begin(), result.node_ids.end()); //Redundant??
-
-    int num_dofs = result.node_ids.size() * 2;
-    result.fd_gradient_g1.resize(num_dofs);
-    result.fd_gradient_g2.resize(num_dofs);
-
-    // ===== GET AND REORDER ENZYME GRADIENTS =====
-    double dgt1_dx[8] = {0.0};
-    double dgt2_dx[8] = {0.0};
-    grad_trib_area(pair, viewer1, viewer2, dgt1_dx, dgt2_dx);
-
-    // Map from node_id to position in x[8]
-    std::map<int, int> node_to_x_idx;
-    node_to_x_idx[A_conn[0]] = 0;  // A0 → x[0,1]
-    node_to_x_idx[A_conn[1]] = 1;  // A1 → x[2,3]
-    node_to_x_idx[B_conn[0]] = 2;  // B0 → x[4,5]
-    node_to_x_idx[B_conn[1]] = 3;  // B1 → x[6,7]
-
-    // Reorder Enzyme gradients to match sorted node order
-    result.analytical_gradient_g1.resize(num_dofs);
-    result.analytical_gradient_g2.resize(num_dofs);
-
-    for (size_t i = 0; i < result.node_ids.size(); ++i) {
-        int node_id = result.node_ids[i];
-        int x_idx = node_to_x_idx[node_id];
-
-        result.analytical_gradient_g1[2*i + 0] = dgt1_dx[2*x_idx + 0];  // x component
-        result.analytical_gradient_g1[2*i + 1] = dgt1_dx[2*x_idx + 1];  // y component
-        result.analytical_gradient_g2[2*i + 0] = dgt2_dx[2*x_idx + 0];
-        result.analytical_gradient_g2[2*i + 1] = dgt2_dx[2*x_idx + 1];
-    }
-    // =
+FiniteDiffResult ContactEvaluator::validate_g_tilde( const InterfacePair& pair, MeshData& mesh1, MeshData& mesh2,
+                                                     double epsilon ) const
+{
+  FiniteDiffResult result;
+
+  auto viewer1 = mesh1.getView();
+  auto viewer2 = mesh2.getView();
+
+  auto projs0 = projections( pair, viewer1, viewer2 );
+  auto bounds0 = smoother_.bounds_from_projections( projs0 );
+  auto smooth_bounds0 = smoother_.smooth_bounds( bounds0 );
+  QuadPoints qp0 = compute_quadrature( smooth_bounds0 );
+
+  // auto [g1_base, g2_base] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
+
+  auto [g1_base, g2_base] = eval_gtilde( pair, viewer1, viewer2 );
+  result.g_tilde1_baseline = g1_base;
+  result.g_tilde2_baseline = g2_base;
+
+  // Collect nodes in sorted order
+  std::set<int> node_set;
+  auto A_conn = viewer1.getConnectivity()( pair.m_element_id1 );
+  node_set.insert( A_conn[0] );
+  node_set.insert( A_conn[1] );
+  auto B_conn = viewer2.getConnectivity()( pair.m_element_id2 );
+  node_set.insert( B_conn[0] );
+  node_set.insert( B_conn[1] );
+
+  result.node_ids = std::vector<int>( node_set.begin(), node_set.end() );
+  // std::sort(result.node_ids.begin(), result.node_ids.end()); //Redundant??
+
+  int num_dofs = result.node_ids.size() * 2;
+  result.fd_gradient_g1.resize( num_dofs );
+  result.fd_gradient_g2.resize( num_dofs );
+
+  // ===== GET AND REORDER ENZYME GRADIENTS =====
+  double dgt1_dx[8] = { 0.0 };
+  double dgt2_dx[8] = { 0.0 };
+  grad_trib_area( pair, viewer1, viewer2, dgt1_dx, dgt2_dx );
+
+  // Map from node_id to position in x[8]
+  std::map<int, int> node_to_x_idx;
+  node_to_x_idx[A_conn[0]] = 0;  // A0 → x[0,1]
+  node_to_x_idx[A_conn[1]] = 1;  // A1 → x[2,3]
+  node_to_x_idx[B_conn[0]] = 2;  // B0 → x[4,5]
+  node_to_x_idx[B_conn[1]] = 3;  // B1 → x[6,7]
+
+  // Reorder Enzyme gradients to match sorted node order
+  result.analytical_gradient_g1.resize( num_dofs );
+  result.analytical_gradient_g2.resize( num_dofs );
+
+  for ( size_t i = 0; i < result.node_ids.size(); ++i ) {
+    int node_id = result.node_ids[i];
+    int x_idx = node_to_x_idx[node_id];
+
+    result.analytical_gradient_g1[2 * i + 0] = dgt1_dx[2 * x_idx + 0];  // x component
+    result.analytical_gradient_g1[2 * i + 1] = dgt1_dx[2 * x_idx + 1];  // y component
+    result.analytical_gradient_g2[2 * i + 0] = dgt2_dx[2 * x_idx + 0];
+    result.analytical_gradient_g2[2 * i + 1] = dgt2_dx[2 * x_idx + 1];
+  }
+  // =
 
-    int dof_idx = 0;
-    //X-direction
+  int dof_idx = 0;
+  // X-direction
 
-    std::set<int> mesh1_nodes = {A_conn[0], A_conn[1]};
-    std::set<int> mesh2_nodes = {B_conn[0], B_conn[1]};
-    
+  std::set<IndexT> mesh1_nodes = { A_conn[0], A_conn[1] };
+  std::set<IndexT> mesh2_nodes = { B_conn[0], B_conn[1] };
 
-    for (int node_id : result.node_ids) {
-        {
+  for ( int node_id : result.node_ids ) {
+    {
+      bool is_in_mesh1 = ( mesh1_nodes.count( node_id ) > 0 );
+      MeshData& mesh_to_perturb = is_in_mesh1 ? mesh1 : mesh2;
 
-          bool is_in_mesh1 = (mesh1_nodes.count(node_id) > 0);
-          MeshData& mesh_to_perturb = is_in_mesh1 ? mesh1 : mesh2;
+      // Store Original Mesh coords:
+      auto pos = mesh_to_perturb.getView().getPosition();
+      int num_nodes = mesh_to_perturb.numberOfNodes();
+      int dim = mesh_to_perturb.spatialDimension();
 
-          //Store Original Mesh coords:
-          auto pos = mesh_to_perturb.getView().getPosition();
-          int num_nodes = mesh_to_perturb.numberOfNodes();
-          int dim = mesh_to_perturb.spatialDimension();
+      std::vector<RealT> x_original( num_nodes );
+      std::vector<RealT> y_original( num_nodes );
+      std::vector<RealT> z_original( num_nodes );
 
-          std::vector<RealT> x_original(num_nodes);
-          std::vector<RealT> y_original(num_nodes);
-          std::vector<RealT> z_original(num_nodes);
-          
-          for (int i = 0; i < num_nodes; ++i) {
-            x_original[i] = pos[0][i];
-            y_original[i] = pos[1][i];
-            if (dim == 3) z_original[i] = pos[2][i];
-          }
+      for ( int i = 0; i < num_nodes; ++i ) {
+        x_original[i] = pos[0][i];
+        y_original[i] = pos[1][i];
+        if ( dim == 3 ) z_original[i] = pos[2][i];
+      }
 
-          std::vector<RealT> x_pert = x_original;
-          x_pert[node_id] += epsilon;
-          mesh_to_perturb.setPosition(x_pert.data(), y_original.data(), dim == 3 ? z_original.data() : nullptr);
+      std::vector<RealT> x_pert = x_original;
+      x_pert[node_id] += epsilon;
+      mesh_to_perturb.setPosition( x_pert.data(), y_original.data(), dim == 3 ? z_original.data() : nullptr );
 
-          //Evalaute with x_plus
-          auto viewer1_plus = mesh1.getView();
-          auto viewer2_plus = mesh2.getView();
+      // Evalaute with x_plus
+      auto viewer1_plus = mesh1.getView();
+      auto viewer2_plus = mesh2.getView();
 
-          auto[g1_plus, g2_plus] = eval_gtilde_fixed_qp(pair, viewer1_plus, viewer2_plus, qp0);
+      auto [g1_plus, g2_plus] = eval_gtilde_fixed_qp( pair, viewer1_plus, viewer2_plus, qp0 );
 
-          x_pert[node_id] = x_original[node_id] - epsilon;
+      x_pert[node_id] = x_original[node_id] - epsilon;
 
-          mesh_to_perturb.setPosition(x_pert.data(), y_original.data(), dim == 3 ? z_original.data() : nullptr);
-          
-          auto viewer1_minus = mesh1.getView();
-          auto viewer2_minus = mesh2.getView();
+      mesh_to_perturb.setPosition( x_pert.data(), y_original.data(), dim == 3 ? z_original.data() : nullptr );
 
-          auto [g1_minus, g2_minus] = eval_gtilde_fixed_qp(pair, viewer1_minus, viewer2_minus, qp0);
+      auto viewer1_minus = mesh1.getView();
+      auto viewer2_minus = mesh2.getView();
 
-          //Restore orginal 
-          mesh_to_perturb.setPosition(x_original.data(), y_original.data(), dim == 3 ? z_original.data() : nullptr);
+      auto [g1_minus, g2_minus] = eval_gtilde_fixed_qp( pair, viewer1_minus, viewer2_minus, qp0 );
 
-          // Compute gradient
-          result.fd_gradient_g1[dof_idx] = (g1_plus - g1_minus) / (2.0 * epsilon);
-          result.fd_gradient_g2[dof_idx] = (g2_plus - g2_minus) / (2.0 * epsilon);
+      // Restore orginal
+      mesh_to_perturb.setPosition( x_original.data(), y_original.data(), dim == 3 ? z_original.data() : nullptr );
 
-          dof_idx++;
-        }
-        {
-          bool is_in_mesh1 = (mesh1_nodes.count(node_id) > 0);
-          MeshData& mesh_to_perturb = is_in_mesh1 ? mesh1 : mesh2;
+      // Compute gradient
+      result.fd_gradient_g1[dof_idx] = ( g1_plus - g1_minus ) / ( 2.0 * epsilon );
+      result.fd_gradient_g2[dof_idx] = ( g2_plus - g2_minus ) / ( 2.0 * epsilon );
 
-          //Store Original Mesh coords:
-          auto pos = mesh_to_perturb.getView().getPosition();
-          int num_nodes = mesh_to_perturb.numberOfNodes();
-          int dim = mesh_to_perturb.spatialDimension();
+      dof_idx++;
+    }
+    {
+      bool is_in_mesh1 = ( mesh1_nodes.count( node_id ) > 0 );
+      MeshData& mesh_to_perturb = is_in_mesh1 ? mesh1 : mesh2;
+
+      // Store Original Mesh coords:
+      auto pos = mesh_to_perturb.getView().getPosition();
+      int num_nodes = mesh_to_perturb.numberOfNodes();
+      int dim = mesh_to_perturb.spatialDimension();
+
+      std::vector<RealT> x_original( num_nodes );
+      std::vector<RealT> y_original( num_nodes );
+      std::vector<RealT> z_original( num_nodes );
+
+      for ( int i = 0; i < num_nodes; ++i ) {
+        x_original[i] = pos[0][i];
+        y_original[i] = pos[1][i];
+        if ( dim == 3 ) z_original[i] = pos[2][i];
+      }
+      std::vector<RealT> y_pert = y_original;
 
-          std::vector<RealT> x_original(num_nodes);
-          std::vector<RealT> y_original(num_nodes);
-          std::vector<RealT> z_original(num_nodes);
-          
-          for (int i = 0; i < num_nodes; ++i) {
-            x_original[i] = pos[0][i];
-            y_original[i] = pos[1][i];
-            if (dim == 3) z_original[i] = pos[2][i];
-          }
-          std::vector<RealT> y_pert = y_original;
+      y_pert[node_id] += epsilon;
 
-          y_pert[node_id] += epsilon;
+      mesh_to_perturb.setPosition( x_original.data(), y_pert.data(), dim == 3 ? z_original.data() : nullptr );
 
-          mesh_to_perturb.setPosition(x_original.data(), y_pert.data(), dim == 3 ? z_original.data() : nullptr);
+      auto viewer1_plus2 = mesh1.getView();
+      auto viewer2_plus2 = mesh2.getView();
 
-          auto viewer1_plus2 = mesh1.getView();
-          auto viewer2_plus2 = mesh2.getView();
+      auto [g1_plus, g2_plus] = eval_gtilde_fixed_qp( pair, viewer1_plus2, viewer2_plus2, qp0 );
 
-          auto [g1_plus, g2_plus] = eval_gtilde_fixed_qp(pair, viewer1_plus2, viewer2_plus2, qp0);
+      y_pert[node_id] = y_original[node_id] - epsilon;
 
-          y_pert[node_id] = y_original[node_id] - epsilon;
+      mesh_to_perturb.setPosition( x_original.data(), y_pert.data(), dim == 3 ? z_original.data() : nullptr );
 
-          mesh_to_perturb.setPosition(x_original.data(), y_pert.data(), dim == 3 ? z_original.data() : nullptr);
+      auto viewer1_minus2 = mesh1.getView();
+      auto viewer2_minus2 = mesh2.getView();
+      auto [g1_minus, g2_minus] = eval_gtilde_fixed_qp( pair, viewer1_minus2, viewer2_minus2, qp0 );
 
-          auto viewer1_minus2 = mesh1.getView();
-          auto viewer2_minus2 = mesh2.getView();
-          auto [g1_minus, g2_minus] = eval_gtilde_fixed_qp(pair, viewer1_minus2, viewer2_minus2, qp0);
+      mesh_to_perturb.setPosition( x_original.data(), y_original.data(), dim == 3 ? z_original.data() : nullptr );
 
-          mesh_to_perturb.setPosition(x_original.data(), y_original.data(), dim == 3 ? z_original.data() : nullptr);
+      result.fd_gradient_g1[dof_idx] = ( g1_plus - g1_minus ) / ( 2.0 * epsilon );
+      result.fd_gradient_g2[dof_idx] = ( g2_plus - g2_minus ) / ( 2.0 * epsilon );
 
-          result.fd_gradient_g1[dof_idx] = (g1_plus - g1_minus) / (2.0 * epsilon);
-          result.fd_gradient_g2[dof_idx] = (g2_plus - g2_minus) / (2.0 * epsilon);
-          
-          dof_idx++;
-        }
+      dof_idx++;
+    }
 
     //         double original = mesh.node(node_id).x;
 
-    //         double x_plus = 
+    //         double x_plus =
 
     //         mesh.node(node_id).x = original + epsilon;
     //         auto [g1_plus, g2_plus] = eval_gtilde_fixed_qp(mesh, A, B, qp0);
@@ -1217,8 +1215,8 @@ MeshData& mesh2, double epsilon) const {
 
     //         dof_idx++;
     //     }
-    }
-    return result;
+  }
+  return result;
 }
 
 void ContactEvaluator::grad_gtilde_with_qp( const InterfacePair& pair, const MeshData::Viewer& mesh1,

From 9f80592451532d3fef469065e7bc14212ee77bf5 Mon Sep 17 00:00:00 2001
From: EB Chin <chin23@llnl.gov>
Date: Thu, 12 Feb 2026 10:44:54 -0800
Subject: [PATCH 38/56] test/example consistency updates

---
 src/examples/mfem_mortar_energy_patch.cpp | 4 +---
 src/tests/tribol_mfem_mortar_energy.cpp   | 4 ++--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/examples/mfem_mortar_energy_patch.cpp b/src/examples/mfem_mortar_energy_patch.cpp
index a90c019a..06216a5e 100644
--- a/src/examples/mfem_mortar_energy_patch.cpp
+++ b/src/examples/mfem_mortar_energy_patch.cpp
@@ -262,9 +262,7 @@ int main( int argc, char** argv )
   timer.start();
 
   // Retrieve contact force (response)
-  mfem::Vector f_contact( fespace.GetTrueVSize() );
-  f_contact = 0.0;
-  tribol::getMfemResponse( coupling_scheme_id, f_contact );
+  auto f_contact = tribol::getMfemTDofForce( coupling_scheme_id );
   f_contact.Neg();
   for ( int i{ 0 }; i < ess_tdof_list.Size(); ++i ) {
     f_contact( ess_tdof_list[i] ) = 0.0;
diff --git a/src/tests/tribol_mfem_mortar_energy.cpp b/src/tests/tribol_mfem_mortar_energy.cpp
index 2f1a024a..d5d4817a 100644
--- a/src/tests/tribol_mfem_mortar_energy.cpp
+++ b/src/tests/tribol_mfem_mortar_energy.cpp
@@ -54,7 +54,7 @@ class MfemMortarEnergyTest : public testing::TestWithParam<std::tuple<int>> {
     // boundary element attributes of x-fixed surfaces (left side)
     auto xfixed_attrs = std::set<int>( { 4 } );
     // boundary element attributes of y-fixed surfaces (bottom of bottom square, top of top square)
-    auto yfixed_attrs = std::set<int>( { 1, 6 } );
+    auto yfixed_attrs = std::set<int>( { 1 } );
 
     // build mesh of 2 squares
     int nel_per_dir = std::pow( 2, ref_levels );
@@ -70,7 +70,7 @@ class MfemMortarEnergyTest : public testing::TestWithParam<std::tuple<int>> {
         .translate({0.0, 0.99}) // Shift up to [0,1]x[0.99, 1.99]. Overlap 0.01.
         .updateBdrAttrib(1, 5) // Bottom (Mortar)
         .updateBdrAttrib(2, 2) // Right
-        .updateBdrAttrib(3, 6) // Top (Fixed Y)
+        .updateBdrAttrib(3, 1) // Top (Fixed Y)
         .updateBdrAttrib(4, 4) // Left (Fixed X)
     }));
     // clang-format on

From 6caac3b9367ffd447821b22d63e3a4df3a2754ea Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Thu, 12 Feb 2026 11:38:19 -0800
Subject: [PATCH 39/56] force submesh and jacobian data for ENERGY_MORTAR

---
 src/tribol/interface/mfem_tribol.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/tribol/interface/mfem_tribol.cpp b/src/tribol/interface/mfem_tribol.cpp
index e741a83e..fdeb5f99 100644
--- a/src/tribol/interface/mfem_tribol.cpp
+++ b/src/tribol/interface/mfem_tribol.cpp
@@ -69,7 +69,7 @@ void registerMfemCouplingScheme( IndexT cs_id, int mesh_id_1, int mesh_id_2, con
   // Set data required for use with Lagrange multiplier enforcement option.
   // Coupling scheme validity will be checked later, but here some initial
   // data is created/initialized for use with LMs.
-  if ( enforcement_method == LAGRANGE_MULTIPLIER ) {
+  if ( enforcement_method == LAGRANGE_MULTIPLIER || contact_method == ENERGY_MORTAR ) {
     std::unique_ptr<mfem::FiniteElementCollection> pressure_fec = std::make_unique<mfem::H1_FECollection>(
         current_coords.FESpace()->FEColl()->GetOrder(), mesh.SpaceDimension() );
     int pressure_vdim = 0;
@@ -95,8 +95,10 @@ void registerMfemCouplingScheme( IndexT cs_id, int mesh_id_1, int mesh_id_2, con
                                                               std::move( pressure_fec ), pressure_vdim ) );
     // set up Jacobian transfer if the coupling scheme requires it
     auto lm_options = cs.getEnforcementOptions().lm_implicit_options;
-    if ( lm_options.enforcement_option_set && ( lm_options.eval_mode == ImplicitEvalMode::MORTAR_JACOBIAN ||
-                                                lm_options.eval_mode == ImplicitEvalMode::MORTAR_RESIDUAL_JACOBIAN ) ) {
+    if ( ( lm_options.enforcement_option_set &&
+           ( lm_options.eval_mode == ImplicitEvalMode::MORTAR_JACOBIAN ||
+             lm_options.eval_mode == ImplicitEvalMode::MORTAR_RESIDUAL_JACOBIAN ) ) ||
+         contact_method == ENERGY_MORTAR ) {
       // create matrix transfer operator between redecomp and
       // parent/parent-linked boundary submesh
       cs.setMfemJacobianData(

From 4f108a5931a099174f7efc8232f2a4736800f255 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Fri, 13 Feb 2026 09:28:47 -0800
Subject: [PATCH 40/56] backup defs when mfem is built without enzyme

---
 src/tribol/common/Enzyme.hpp | 50 +++++++++++++++++++++++++++++++++---
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/src/tribol/common/Enzyme.hpp b/src/tribol/common/Enzyme.hpp
index a384d9f5..226c7a52 100644
--- a/src/tribol/common/Enzyme.hpp
+++ b/src/tribol/common/Enzyme.hpp
@@ -11,7 +11,49 @@
 #include "tribol/common/BasicTypes.hpp"
 
 #ifdef TRIBOL_USE_ENZYME
+#ifdef MFEM_USE_ENZYME
 #include "mfem/general/enzyme.hpp"
+#else  // MFEM_USE_ENZYME
+// NOTE: contents copied from MFEM's general/enzyme.hpp
+
+/*
+ * Variables prefixed with enzyme_* or function types prefixed with __enzyme_*,
+ * are variables which will get preprocessed in the LLVM intermediate
+ * representation when the Enzyme LLVM plugin is loaded. See the Enzyme
+ * documentation (https://enzyme.mit.edu) for more information.
+ */
+
+extern int enzyme_dup;
+extern int enzyme_dupnoneed;
+extern int enzyme_out;
+extern int enzyme_const;
+extern int enzyme_interleave;
+
+#if defined( MFEM_USE_CUDA ) || defined( MFEM_USE_HIP )
+#define MFEM_DEVICE_EXTERN_STMT( name ) extern __device__ int name;
+#else
+#define MFEM_DEVICE_EXTERN_STMT( name )
+#endif
+
+MFEM_DEVICE_EXTERN_STMT( enzyme_dup )
+MFEM_DEVICE_EXTERN_STMT( enzyme_dupnoneed )
+MFEM_DEVICE_EXTERN_STMT( enzyme_out )
+MFEM_DEVICE_EXTERN_STMT( enzyme_const )
+MFEM_DEVICE_EXTERN_STMT( enzyme_interleave )
+
+// warning: if inlined, triggers function '__enzyme_autodiff' is not defined
+template <typename return_type, typename... Args>
+MFEM_HOST_DEVICE return_type __enzyme_autodiff( Args... );
+
+// warning: if inlined, triggers function '__enzyme_fwddiff' is not defined
+template <typename return_type, typename... Args>
+MFEM_HOST_DEVICE return_type __enzyme_fwddiff( Args... );
+
+#define MFEM_ENZYME_INACTIVENOFREE __attribute__( ( enzyme_inactive, enzyme_nofree ) )
+#define MFEM_ENZYME_INACTIVE __attribute__( ( enzyme_inactive ) )
+#define MFEM_ENZYME_FN_LIKE( x ) __attribute__( ( enzyme_function_like( #x ) ) )
+
+#endif  // MFEM_USE_ENZYME
 
 #if !defined( TRIBOL_USE_HOST ) && !defined( TRIBOL_DEVICE_CODE )
 // When compiling with NVCC or HIPCC, the compiler performs multiple passes.
@@ -33,7 +75,7 @@ extern int tribol_host_enzyme_dupnoneed asm( "enzyme_dupnoneed" );
 #define TRIBOL_ENZYME_OUT tribol_host_enzyme_out
 #define TRIBOL_ENZYME_DUPNONEED tribol_host_enzyme_dupnoneed
 
-#else
+#else  // !defined( TRIBOL_USE_HOST ) && !defined( TRIBOL_DEVICE_CODE )
 // We are either:
 // 1. In a device compilation pass (__CUDA_ARCH__ or __HIP_DEVICE_COMPILE__ defined).
 // 2. Using a standard host compiler (GCC, Clang, etc.).
@@ -42,14 +84,14 @@ extern int tribol_host_enzyme_dupnoneed asm( "enzyme_dupnoneed" );
 #define TRIBOL_ENZYME_DUP enzyme_dup
 #define TRIBOL_ENZYME_OUT enzyme_out
 #define TRIBOL_ENZYME_DUPNONEED enzyme_dupnoneed
-#endif
+#endif  // !defined( TRIBOL_USE_HOST ) && !defined( TRIBOL_DEVICE_CODE )
 
-#else
+#else  // TRIBOL_USE_ENZYME
 // Fallback definitions if Enzyme is disabled
 #define TRIBOL_ENZYME_CONST 0
 #define TRIBOL_ENZYME_DUP 0
 #define TRIBOL_ENZYME_OUT 0
 #define TRIBOL_ENZYME_DUPNONEED 0
-#endif
+#endif  // TRIBOL_USE_ENZYME
 
 #endif /* SRC_TRIBOL_COMMON_ENZYME_HPP_ */
\ No newline at end of file

From b7e090ba30b405f404a735c0afc277e2ee08cfd4 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Fri, 13 Feb 2026 13:01:12 -0800
Subject: [PATCH 41/56] more energy mortar checks

---
 src/tribol/interface/mfem_tribol.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tribol/interface/mfem_tribol.cpp b/src/tribol/interface/mfem_tribol.cpp
index fdeb5f99..fe36016e 100644
--- a/src/tribol/interface/mfem_tribol.cpp
+++ b/src/tribol/interface/mfem_tribol.cpp
@@ -540,7 +540,7 @@ void updateMfemParallelDecomposition( int n_ranks, bool force_new_redecomp )
         registerNodalReferenceCoords( mesh_ids[0], xref_ptrs[0], xref_ptrs[1], xref_ptrs[2] );
         registerNodalReferenceCoords( mesh_ids[1], xref_ptrs[0], xref_ptrs[1], xref_ptrs[2] );
       }
-      if ( cs.getEnforcementMethod() == LAGRANGE_MULTIPLIER ) {
+      if ( cs.getEnforcementMethod() == LAGRANGE_MULTIPLIER || cs.getContactMethod() == ENERGY_MORTAR ) {
         SLIC_ERROR_ROOT_IF( cs.getContactModel() != FRICTIONLESS, "Only frictionless contact is supported." );
         SLIC_ERROR_ROOT_IF( cs.getContactMethod() != SINGLE_MORTAR && cs.getContactMethod() != ENERGY_MORTAR,
                             "Only single mortar or ENERGY_MORTAR contact is supported." );
@@ -552,7 +552,7 @@ void updateMfemParallelDecomposition( int n_ranks, bool force_new_redecomp )
         registerMortarGaps( mesh_ids[1], g_ptrs[0] );
         auto p_ptrs = submesh_data->GetRedecompPressurePtrs();
         registerMortarPressures( mesh_ids[1], p_ptrs[0] );
-        if ( cs.hasMfemJacobianData() && new_redecomp ) {
+        if ( ( cs.hasMfemJacobianData() || cs.getContactMethod() == ENERGY_MORTAR ) && new_redecomp ) {
           // updates Jacobian transfer operator for new redecomp mesh
           cs.getMfemJacobianData()->UpdateJacobianXfer();
         }

From 8ba043c18bce03e652284c6ebb85cd3098c57ef7 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Fri, 13 Feb 2026 14:20:18 -0800
Subject: [PATCH 42/56] add option for non-tied contact

---
 src/tribol/physics/NewMethodAdapter.cpp | 16 ++++++++++++++++
 src/tribol/physics/NewMethodAdapter.hpp |  1 +
 2 files changed, 17 insertions(+)

diff --git a/src/tribol/physics/NewMethodAdapter.cpp b/src/tribol/physics/NewMethodAdapter.cpp
index c385f8e2..74b6ab12 100644
--- a/src/tribol/physics/NewMethodAdapter.cpp
+++ b/src/tribol/physics/NewMethodAdapter.cpp
@@ -137,6 +137,17 @@ void NewMethodAdapter::updateNodalGaps()
   g_tilde_vec_.Fill( 0.0 );
   P_submesh.MultTranspose( g_tilde_linear_form, g_tilde_vec_.get() );
 
+  mfem::Array<int> rows_to_elim;
+  if ( !tied_contact_ ) {
+    rows_to_elim.Reserve( g_tilde_vec_.Size() );
+    for ( int i{ 0 }; i < g_tilde_vec_.Size(); ++i ) {
+      if ( g_tilde_vec_[i] > 0.0 ) {
+        g_tilde_vec_[i] = 0.0;
+        rows_to_elim.push_back( i );
+      }
+    }
+  }
+
   mfem::ParLinearForm A_linear_form( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
   submesh_data_.GetPressureTransfer().RedecompToSubmesh( redecomp_area, A_linear_form );
   A_vec_ = shared::ParVector( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
@@ -151,6 +162,11 @@ void NewMethodAdapter::updateNodalGaps()
   auto dg_tilde_dx_block = jac_data_.GetMfemBlockJacobian( dg_tilde_dx, row_info, col_info );
   dg_tilde_dx_block->owns_blocks = false;
   dg_tilde_dx_ = shared::ParSparseMat( static_cast<mfem::HypreParMatrix*>( &dg_tilde_dx_block->GetBlock( 1, 0 ) ) );
+  if ( !tied_contact_ ) {
+    // technically, we should do this on all the vectors/matrices below, but it looks like the mutliplication operators
+    // below will zero them out anyway
+    dg_tilde_dx_.EliminateRows( rows_to_elim );
+  }
 
   auto dA_dx_block = jac_data_.GetMfemBlockJacobian( dA_dx, row_info, col_info );
   dA_dx_block->owns_blocks = false;
diff --git a/src/tribol/physics/NewMethodAdapter.hpp b/src/tribol/physics/NewMethodAdapter.hpp
index 2e3e0960..652ccaa2 100644
--- a/src/tribol/physics/NewMethodAdapter.hpp
+++ b/src/tribol/physics/NewMethodAdapter.hpp
@@ -68,6 +68,7 @@ class NewMethodAdapter : public ContactFormulation {
   // --- Member Variables ---
 
   double area_tol_{ 1.0e-14 };
+  bool tied_contact_ = false;
 
   MfemSubmeshData& submesh_data_;
   MfemJacobianData& jac_data_;

From aec5c35018c3e0662f7a5fdacd875557ab2442dd Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Fri, 13 Feb 2026 15:08:43 -0800
Subject: [PATCH 43/56] updated naming

---
 src/tribol/mesh/MfemData.cpp | 40 ++++++++++++++++++------------------
 src/tribol/mesh/MfemData.hpp | 14 ++++++-------
 2 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/src/tribol/mesh/MfemData.cpp b/src/tribol/mesh/MfemData.cpp
index bfac6df3..fd8071ee 100644
--- a/src/tribol/mesh/MfemData.cpp
+++ b/src/tribol/mesh/MfemData.cpp
@@ -1082,9 +1082,9 @@ const MfemJacobianData::UpdateData& MfemJacobianData::GetUpdateData() const
   return *update_data_;
 }
 
-ParSparseMat MfemJacobianData::GetMfemJacobian( const std::vector<ComputedElementData>& contributions ) const
+shared::ParSparseMat MfemJacobianData::GetMfemJacobian( const std::vector<ComputedElementData>& contributions ) const
 {
-  std::unique_ptr<ParSparseMat> par_J;
+  std::unique_ptr<shared::ParSparseMat> par_J;
 
   // Maps BlockSpaces (MORTAR, NONMORTAR, LAGRANGE_MULTIPLIER) to a tribol element map
   const std::vector<const Array1D<int>*> elem_map_by_space{ &parent_data_.GetElemMap1(), &parent_data_.GetElemMap2(),
@@ -1097,7 +1097,6 @@ ParSparseMat MfemJacobianData::GetMfemJacobian( const std::vector<ComputedElemen
   // 1: Pressure/Gap (LAGRANGE_MULTIPLIER)
   for ( int r_blk = 0; r_blk < 2; ++r_blk ) {
     for ( int c_blk = 0; c_blk < 2; ++c_blk ) {
-      
       // Check if we have a transfer operator for this block
       if ( GetUpdateData().submesh_redecomp_xfer_.shape()[0] <= r_blk ||
            GetUpdateData().submesh_redecomp_xfer_.shape()[1] <= c_blk ||
@@ -1142,30 +1141,31 @@ ParSparseMat MfemJacobianData::GetMfemJacobian( const std::vector<ComputedElemen
 
       if ( global_has_data ) {
         redecomp::MatrixTransfer* xfer = GetUpdateData().submesh_redecomp_xfer_( r_blk, c_blk ).get();
-        auto submesh_J_hypre = xfer->TransferToParallelSparse( row_redecomp_ids, col_redecomp_ids, jacobian_data,
-                                                               jacobian_offsets );
+        auto submesh_J =
+            xfer->TransferToParallel( row_redecomp_ids, col_redecomp_ids, jacobian_data, jacobian_offsets, false );
 
-        ParSparseMatView submesh_J_view( submesh_J_hypre.get() );
-        std::unique_ptr<ParSparseMat> contrib_J;
+        shared::ParSparseMatView submesh_J_view( &submesh_J.get() );
+        std::unique_ptr<shared::ParSparseMat> contrib_J;
 
         if ( r_blk == 0 && c_blk == 0 ) {
           auto parent_J = submesh_J_view.RAP( *submesh_parent_vdof_xfer_ );
-          ParSparseMatView parent_P( parent_data_.GetParentCoords().ParFESpace()->Dof_TrueDof_Matrix() );
-          contrib_J = std::make_unique<ParSparseMat>( parent_J.RAP( parent_P ) );
+          shared::ParSparseMatView parent_P( parent_data_.GetParentCoords().ParFESpace()->Dof_TrueDof_Matrix() );
+          contrib_J = std::make_unique<shared::ParSparseMat>( parent_J.RAP( parent_P ) );
         } else if ( r_blk == 0 && c_blk == 1 ) {
-          auto parent_J = submesh_parent_vdof_xfer_->transpose() * submesh_J_hypre.get();
-          contrib_J = std::make_unique<ParSparseMat>(
-              ParSparseMat::RAP( parent_data_.GetParentCoords().ParFESpace()->Dof_TrueDof_Matrix(), parent_J,
-                                 submesh_data_.GetSubmeshFESpace().Dof_TrueDof_Matrix() ) );
+          auto parent_J = submesh_parent_vdof_xfer_->transpose() * submesh_J;
+          contrib_J = std::make_unique<shared::ParSparseMat>(
+              shared::ParSparseMat::RAP( parent_data_.GetParentCoords().ParFESpace()->Dof_TrueDof_Matrix(), parent_J,
+                                         submesh_data_.GetSubmeshFESpace().Dof_TrueDof_Matrix() ) );
         } else if ( r_blk == 1 && c_blk == 0 ) {
           auto parent_J = submesh_J_view * ( *submesh_parent_vdof_xfer_ );
-          ParSparseMatView submesh_P( submesh_data_.GetSubmeshFESpace().Dof_TrueDof_Matrix() );
-          ParSparseMatView parent_P( parent_data_.GetParentCoords().ParFESpace()->Dof_TrueDof_Matrix() );
-          contrib_J = std::make_unique<ParSparseMat>( ParSparseMat::RAP( submesh_P, parent_J, parent_P ) );
+          shared::ParSparseMatView submesh_P( submesh_data_.GetSubmeshFESpace().Dof_TrueDof_Matrix() );
+          shared::ParSparseMatView parent_P( parent_data_.GetParentCoords().ParFESpace()->Dof_TrueDof_Matrix() );
+          contrib_J =
+              std::make_unique<shared::ParSparseMat>( shared::ParSparseMat::RAP( submesh_P, parent_J, parent_P ) );
         } else {
           // (1, 1) block
-          ParSparseMatView submesh_P( submesh_data_.GetSubmeshFESpace().Dof_TrueDof_Matrix() );
-          contrib_J = std::make_unique<ParSparseMat>( submesh_J_view.RAP( submesh_P ) );
+          shared::ParSparseMatView submesh_P( submesh_data_.GetSubmeshFESpace().Dof_TrueDof_Matrix() );
+          contrib_J = std::make_unique<shared::ParSparseMat>( submesh_J_view.RAP( submesh_P ) );
         }
 
         if ( !par_J ) {
@@ -1179,8 +1179,8 @@ ParSparseMat MfemJacobianData::GetMfemJacobian( const std::vector<ComputedElemen
 
   if ( !par_J ) {
     auto& fes = *parent_data_.GetParentCoords().ParFESpace();
-    return ParSparseMat::diagonalMatrix( TRIBOL_COMM_WORLD, fes.GetTrueVSize(), fes.GetTrueDofOffsets(), 0.0,
-                                         mfem::Array<int>(), true );
+    return shared::ParSparseMat::diagonalMatrix( TRIBOL_COMM_WORLD, fes.GetTrueVSize(), fes.GetTrueDofOffsets(), 0.0,
+                                                 mfem::Array<int>(), true );
   }
 
   return std::move( *par_J );
diff --git a/src/tribol/mesh/MfemData.hpp b/src/tribol/mesh/MfemData.hpp
index 29c225ff..8f709c1b 100644
--- a/src/tribol/mesh/MfemData.hpp
+++ b/src/tribol/mesh/MfemData.hpp
@@ -1635,12 +1635,12 @@ class MfemSubmeshData {
  * @brief Struct to hold computed element data for Jacobian assembly
  */
 struct ComputedElementData {
-  BlockSpace row_space;              ///< Block space for row elements
-  BlockSpace col_space;              ///< Block space for column elements
-  axom::Array<int> row_elem_ids;     ///< Tribol element IDs for rows
-  axom::Array<int> col_elem_ids;     ///< Tribol element IDs for columns
-  axom::Array<double> jacobian_data; ///< Flattened Jacobian data
-  axom::Array<int> jacobian_offsets; ///< Offsets into data for each element
+  BlockSpace row_space;               ///< Block space for row elements
+  BlockSpace col_space;               ///< Block space for column elements
+  axom::Array<int> row_elem_ids;      ///< Tribol element IDs for rows
+  axom::Array<int> col_elem_ids;      ///< Tribol element IDs for columns
+  axom::Array<double> jacobian_data;  ///< Flattened Jacobian data
+  axom::Array<int> jacobian_offsets;  ///< Offsets into data for each element
 };
 
 /**
@@ -1683,7 +1683,7 @@ class MfemJacobianData {
    * @param contributions List of element computed data chunks
    * @return ParSparseMat
    */
-  ParSparseMat GetMfemJacobian( const std::vector<ComputedElementData>& contributions ) const;
+  shared::ParSparseMat GetMfemJacobian( const std::vector<ComputedElementData>& contributions ) const;
 
  private:
   /**

From d83b7f4c71a25ba00d44f6d170d44ba025fab918 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Fri, 13 Feb 2026 15:09:07 -0800
Subject: [PATCH 44/56] new method name and caliper annotations

---
 src/redecomp/transfer/MatrixTransfer.cpp | 42 ++++++++++++++++++++----
 src/redecomp/transfer/MatrixTransfer.hpp | 10 ++++--
 2 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/src/redecomp/transfer/MatrixTransfer.cpp b/src/redecomp/transfer/MatrixTransfer.cpp
index 04b960e2..cf5b29bc 100644
--- a/src/redecomp/transfer/MatrixTransfer.cpp
+++ b/src/redecomp/transfer/MatrixTransfer.cpp
@@ -11,6 +11,7 @@
 
 #include "redecomp/RedecompMesh.hpp"
 #include "shared/math/ParSparseMat.hpp"
+#include "shared/infrastructure/Profiling.hpp"
 
 namespace redecomp {
 
@@ -52,6 +53,7 @@ mfem::SparseMatrix MatrixTransfer::TransferToParallelSparse( const axom::Array<i
                                                              const axom::Array<int>& trial_elem_idx,
                                                              const axom::Array<mfem::DenseMatrix>& src_elem_mat ) const
 {
+  TRIBOL_MARK_SCOPE( "TransferToParallelSparse_Method1" );
   // TODO (EBC): we need a SparseMatrix-like data structure that allows HYPRE_BigInt on the columns
   auto parentJ = mfem::SparseMatrix( parent_test_fes_.GetVSize(), parent_trial_fes_.GlobalVSize() );
 
@@ -77,6 +79,7 @@ mfem::SparseMatrix MatrixTransfer::TransferToParallelSparse( const axom::Array<i
   auto test_redecomp = dynamic_cast<const RedecompMesh*>( redecomp_test_fes_.GetMesh() );
   auto trial_redecomp = dynamic_cast<const RedecompMesh*>( redecomp_trial_fes_.GetMesh() );
 
+  TRIBOL_MARK_BEGIN( "BuildCommunicationData" );
   // List of entries in src_elem_mat that belong on each parent test space rank.
   // This is needed so we know which rank to send entries in src_elem_mat to.
   auto send_array_ids = buildSendArrayIDs( test_elem_idx );
@@ -98,7 +101,9 @@ mfem::SparseMatrix MatrixTransfer::TransferToParallelSparse( const axom::Array<i
   // entries received from redecomp ranks.  The second column of recv_mat_sizes
   // determines the offset for each trial element.
   auto recv_trial_elem_dofs = buildRecvTrialElemDofs( *trial_redecomp, test_elem_idx, trial_elem_idx );
+  TRIBOL_MARK_END( "BuildCommunicationData" );
 
+  TRIBOL_MARK_BEGIN( "SendRecvEach" );
   // aggregate dense matrix values, send and assemble
   getMPIUtility().SendRecvEach(
       type<axom::Array<double>>(),
@@ -140,14 +145,18 @@ mfem::SparseMatrix MatrixTransfer::TransferToParallelSparse( const axom::Array<i
           dof_ct += recv_mat_sizes[src]( e, 0 ) * recv_mat_sizes[src]( e, 1 );
         }
       } );
+  TRIBOL_MARK_END( "SendRecvEach" );
 
   return parentJ;
 }
 
-std::unique_ptr<mfem::HypreParMatrix> MatrixTransfer::TransferToParallelSparse(
-    const axom::Array<int>& test_elem_idx, const axom::Array<int>& trial_elem_idx,
-    const axom::Array<double>& src_elem_mat_data, const axom::Array<int>& src_elem_mat_offsets ) const
+shared::ParSparseMat MatrixTransfer::TransferToParallel( const axom::Array<int>& test_elem_idx,
+                                                         const axom::Array<int>& trial_elem_idx,
+                                                         const axom::Array<double>& src_elem_mat_data,
+                                                         const axom::Array<int>& src_elem_mat_offsets,
+                                                         bool parallel_assemble ) const
 {
+  TRIBOL_MARK_FUNCTION;
   // verify inputs
   SLIC_ERROR_IF( test_elem_idx.size() != trial_elem_idx.size() || test_elem_idx.size() != src_elem_mat_offsets.size(),
                  "Element index arrays and element Jacobian offsets array must be the same size." );
@@ -170,6 +179,7 @@ std::unique_ptr<mfem::HypreParMatrix> MatrixTransfer::TransferToParallelSparse(
   auto test_redecomp = dynamic_cast<const RedecompMesh*>( redecomp_test_fes_.GetMesh() );
   auto trial_redecomp = dynamic_cast<const RedecompMesh*>( redecomp_trial_fes_.GetMesh() );
 
+  TRIBOL_MARK_BEGIN( "BuildCommunicationData" );
   // List of entries in src_elem_mat that belong on each parent test space rank.
   // This is needed so we know which rank to send entries in src_elem_mat to.
   auto send_array_ids = buildSendArrayIDs( test_elem_idx );
@@ -191,7 +201,9 @@ std::unique_ptr<mfem::HypreParMatrix> MatrixTransfer::TransferToParallelSparse(
   // entries received from redecomp ranks.  The second column of recv_mat_sizes
   // determines the offset for each trial element.
   auto recv_trial_elem_dofs = buildRecvTrialElemDofs( *trial_redecomp, test_elem_idx, trial_elem_idx );
+  TRIBOL_MARK_END( "BuildCommunicationData" );
 
+  TRIBOL_MARK_BEGIN( "SetupTriplets" );
   // Intermediate storage for triplets
   struct Triplet {
     int row;
@@ -207,7 +219,9 @@ std::unique_ptr<mfem::HypreParMatrix> MatrixTransfer::TransferToParallelSparse(
     }
   }
   triplets.reserve( total_recv_entries );
+  TRIBOL_MARK_END( "SetupTriplets" );
 
+  TRIBOL_MARK_BEGIN( "SendRecvEach" );
   // aggregate dense matrix values, send and assemble
   getMPIUtility().SendRecvEach(
       type<axom::Array<double>>(),
@@ -256,13 +270,17 @@ std::unique_ptr<mfem::HypreParMatrix> MatrixTransfer::TransferToParallelSparse(
           dof_ct += recv_mat_sizes[src]( e, 0 ) * recv_mat_sizes[src]( e, 1 );
         }
       } );
+  TRIBOL_MARK_END( "SendRecvEach" );
 
+  TRIBOL_MARK_BEGIN( "SortTriplets" );
   // Sort triplets by row then column
   std::sort( triplets.begin(), triplets.end(), []( const Triplet& a, const Triplet& b ) {
     if ( a.row != b.row ) return a.row < b.row;
     return a.col < b.col;
   } );
+  TRIBOL_MARK_END( "SortTriplets" );
 
+  TRIBOL_MARK_BEGIN( "CSRConstruction" );
   // Count non-zeros and merge duplicates
   int num_unique_nonzeros = 0;
   if ( !triplets.empty() ) {
@@ -306,11 +324,23 @@ std::unique_ptr<mfem::HypreParMatrix> MatrixTransfer::TransferToParallelSparse(
   for ( int i = 0; i < num_rows; ++i ) {
     I_ptr[i + 1] += I_ptr[i];
   }
+  TRIBOL_MARK_END( "CSRConstruction" );
 
   // Construct rectangular HypreParMatrix
-  return std::make_unique<mfem::HypreParMatrix>( getMPIUtility().MPIComm(), num_rows, parent_test_fes_.GlobalVSize(),
-                                                 parent_trial_fes_.GlobalVSize(), I_ptr, J_ptr, data_ptr,
-                                                 parent_test_fes_.GetDofOffsets(), parent_trial_fes_.GetDofOffsets() );
+  shared::ParSparseMat J_full( getMPIUtility().MPIComm(), num_rows, parent_test_fes_.GlobalVSize(),
+                               parent_trial_fes_.GlobalVSize(), I_ptr, J_ptr, data_ptr,
+                               parent_test_fes_.GetDofOffsets(), parent_trial_fes_.GetDofOffsets() );
+
+  if ( !parallel_assemble ) {
+    return J_full;
+  } else {
+    auto P_test = parent_test_fes_.Dof_TrueDof_Matrix();
+    P_test->HostRead();
+    auto P_trial = parent_trial_fes_.Dof_TrueDof_Matrix();
+    P_trial->HostRead();
+    auto J_true = shared::ParSparseMat::RAP( P_test, J_full, P_trial );
+    return J_true;
+  }
 }
 
 shared::ParSparseMat MatrixTransfer::ConvertToParSparseMat( mfem::SparseMatrix&& sparse, bool parallel_assemble ) const
diff --git a/src/redecomp/transfer/MatrixTransfer.hpp b/src/redecomp/transfer/MatrixTransfer.hpp
index d46c0f19..ad4c6a1e 100644
--- a/src/redecomp/transfer/MatrixTransfer.hpp
+++ b/src/redecomp/transfer/MatrixTransfer.hpp
@@ -93,14 +93,18 @@ class MatrixTransfer {
    * @param trial_elem_idx List of element IDs on the redecomp trial space
    * @param src_elem_mat_data Flattened array of element-level dense matrices from the redecomp mesh
    * @param src_elem_mat_offsets Offsets into src_elem_mat_data for each element
+   * @param parallel_assemble Performs parallel assembly (transforms to tdofs)
+   * on the HypreParMatrix if true, returns ldofs otherwise
    * @return mfem::HypreParMatrix on the parent mesh (ldofs on the rows, global
    * ldofs on the columns) in rectangular format
    *
    * @note This method constructs the parallel matrix directly, bypassing mfem::SparseMatrix.
    */
-  std::unique_ptr<mfem::HypreParMatrix> TransferToParallelSparse(
-      const axom::Array<int>& test_elem_idx, const axom::Array<int>& trial_elem_idx,
-      const axom::Array<double>& src_elem_mat_data, const axom::Array<int>& src_elem_mat_offsets ) const;
+  shared::ParSparseMat TransferToParallel( const axom::Array<int>& test_elem_idx,
+                                           const axom::Array<int>& trial_elem_idx,
+                                           const axom::Array<double>& src_elem_mat_data,
+                                           const axom::Array<int>& src_elem_mat_offsets,
+                                           bool parallel_assemble = true ) const;
 
   /**
    * @brief Converts SparseMatrix from TransferToParallelSparse to HypreParMatrix

From e56d66b99c4420cf2616cfb4ec55bd3ef3e64bcc Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Fri, 13 Feb 2026 15:09:24 -0800
Subject: [PATCH 45/56] formatting and updated naming

---
 src/examples/jacobian_transfer_comparison.cpp | 140 +++++++++---------
 1 file changed, 70 insertions(+), 70 deletions(-)

diff --git a/src/examples/jacobian_transfer_comparison.cpp b/src/examples/jacobian_transfer_comparison.cpp
index b960c5a7..3e899007 100644
--- a/src/examples/jacobian_transfer_comparison.cpp
+++ b/src/examples/jacobian_transfer_comparison.cpp
@@ -48,14 +48,14 @@ int main( int argc, char** argv )
   auto mortar_attrs = std::set<int>( { 4 } );
   auto nonmortar_attrs = std::set<int>( { 5 } );
 
-  mfem::ParMesh mesh = shared::ParMeshBuilder(MPI_COMM_WORLD, shared::MeshBuilder::Unify({
-    shared::MeshBuilder::CubeMesh(nel_per_dir, nel_per_dir, nel_per_dir, elem_type)
-      .updateBdrAttrib(4, 7)
-      .updateBdrAttrib(6, 4),
-    shared::MeshBuilder::CubeMesh(nel_per_dir, nel_per_dir, nel_per_dir, elem_type)
-      .translate({0.0, 0.0, 0.99})
-      .updateBdrAttrib(8, 5)
-  }));
+  mfem::ParMesh mesh = shared::ParMeshBuilder(
+      MPI_COMM_WORLD,
+      shared::MeshBuilder::Unify( { shared::MeshBuilder::CubeMesh( nel_per_dir, nel_per_dir, nel_per_dir, elem_type )
+                                        .updateBdrAttrib( 4, 7 )
+                                        .updateBdrAttrib( 6, 4 ),
+                                    shared::MeshBuilder::CubeMesh( nel_per_dir, nel_per_dir, nel_per_dir, elem_type )
+                                        .translate( { 0.0, 0.0, 0.99 } )
+                                        .updateBdrAttrib( 8, 5 ) } ) );
 
   int dim = mesh.SpaceDimension();
   mfem::H1_FECollection fec( 1, dim );
@@ -65,19 +65,18 @@ int main( int argc, char** argv )
 
   // 2. Register Tribol Coupling Scheme
   int cs_id = 0;
-  tribol::registerMfemCouplingScheme( cs_id, 0, 1, mesh, coords, mortar_attrs,
-                                      nonmortar_attrs, tribol::SURFACE_TO_SURFACE, tribol::NO_SLIDING,
-                                      tribol::SINGLE_MORTAR, tribol::FRICTIONLESS, tribol::LAGRANGE_MULTIPLIER,
-                                      tribol::BINNING_GRID );
+  tribol::registerMfemCouplingScheme( cs_id, 0, 1, mesh, coords, mortar_attrs, nonmortar_attrs,
+                                      tribol::SURFACE_TO_SURFACE, tribol::NO_SLIDING, tribol::SINGLE_MORTAR,
+                                      tribol::FRICTIONLESS, tribol::LAGRANGE_MULTIPLIER, tribol::BINNING_GRID );
   tribol::setMPIComm( cs_id, MPI_COMM_WORLD );
   tribol::updateMfemParallelDecomposition();
 
   // 3. Setup MfemJacobianData
   auto& cs_manager = tribol::CouplingSchemeManager::getInstance();
   auto* cs = cs_manager.findData( cs_id );
-  if (!cs->hasMfemJacobianData()) {
-      cs->setMfemJacobianData( std::make_unique<tribol::MfemJacobianData>( 
-          *cs->getMfemMeshData(), *cs->getMfemSubmeshData(), cs->getContactMethod() ) );
+  if ( !cs->hasMfemJacobianData() ) {
+    cs->setMfemJacobianData( std::make_unique<tribol::MfemJacobianData>(
+        *cs->getMfemMeshData(), *cs->getMfemSubmeshData(), cs->getContactMethod() ) );
   }
   auto* jac_data = cs->getMfemJacobianData();
   jac_data->UpdateJacobianXfer();
@@ -85,66 +84,65 @@ int main( int argc, char** argv )
   // 4. Synthesize Jacobian data for comparison
   // We'll create some dummy element matrices for elements on Mesh 1 (Mortar)
   int ne1 = cs->getMfemMeshData()->GetMesh1NE();
-  int num_nodes_per_elem = 4; // Quad faces
+  int num_nodes_per_elem = 4;  // Quad faces
   int num_dofs_per_elem = num_nodes_per_elem * dim;
-  
+
   // New format: ComputedElementData
   tribol::ComputedElementData new_data;
   new_data.row_space = tribol::BlockSpace::MORTAR;
   new_data.col_space = tribol::BlockSpace::MORTAR;
-  new_data.jacobian_data.resize(ne1 * num_dofs_per_elem * num_dofs_per_elem);
-  new_data.jacobian_offsets.resize(ne1);
-  new_data.row_elem_ids.resize(ne1);
-  new_data.col_elem_ids.resize(ne1);
+  new_data.jacobian_data.resize( ne1 * num_dofs_per_elem * num_dofs_per_elem );
+  new_data.jacobian_offsets.resize( ne1 );
+  new_data.row_elem_ids.resize( ne1 );
+  new_data.col_elem_ids.resize( ne1 );
 
   // Prepare MethodData for OLD path comparison
   if ( cs->getMethodData() == nullptr ) {
-      cs->allocateMethodData();
+    cs->allocateMethodData();
   }
   auto* method_data = cs->getMethodData();
   SLIC_ASSERT( method_data != nullptr );
 
-  tribol::ArrayT<tribol::BlockSpace> spaces({tribol::BlockSpace::MORTAR});
-  method_data->reserveBlockJ(std::move(spaces), ne1);
-  
-  for (int e = 0; e < ne1; ++e) {
-      new_data.row_elem_ids[e] = e;
-      new_data.col_elem_ids[e] = e;
-      new_data.jacobian_offsets[e] = e * num_dofs_per_elem * num_dofs_per_elem;
-      
-      tribol::StackArray<tribol::DeviceArray2D<tribol::RealT>, 9> blockJ;
-      blockJ[0] = tribol::DeviceArray2D<tribol::RealT>(num_dofs_per_elem, num_dofs_per_elem);
-      
-      for (int i = 0; i < num_dofs_per_elem; ++i) {
-          for (int j = 0; j < num_dofs_per_elem; ++j) {
-              double val = static_cast<double>(e + i + j);
-              blockJ[0](i, j) = val;
-              new_data.jacobian_data[new_data.jacobian_offsets[e] + i + j * num_dofs_per_elem] = val;
-          }
+  tribol::ArrayT<tribol::BlockSpace> spaces( { tribol::BlockSpace::MORTAR } );
+  method_data->reserveBlockJ( std::move( spaces ), ne1 );
+
+  for ( int e = 0; e < ne1; ++e ) {
+    new_data.row_elem_ids[e] = e;
+    new_data.col_elem_ids[e] = e;
+    new_data.jacobian_offsets[e] = e * num_dofs_per_elem * num_dofs_per_elem;
+
+    tribol::StackArray<tribol::DeviceArray2D<tribol::RealT>, 9> blockJ;
+    blockJ[0] = tribol::DeviceArray2D<tribol::RealT>( num_dofs_per_elem, num_dofs_per_elem );
+
+    for ( int i = 0; i < num_dofs_per_elem; ++i ) {
+      for ( int j = 0; j < num_dofs_per_elem; ++j ) {
+        double val = static_cast<double>( e + i + j );
+        blockJ[0]( i, j ) = val;
+        new_data.jacobian_data[new_data.jacobian_offsets[e] + i + j * num_dofs_per_elem] = val;
       }
-      tribol::ArrayT<int> ids({e});
-      method_data->storeElemBlockJ(std::move(ids), blockJ);
+    }
+    tribol::ArrayT<int> ids( { e } );
+    method_data->storeElemBlockJ( std::move( ids ), blockJ );
   }
 
   // 5. Time and assemble using Old Method
   auto start_old = std::chrono::high_resolution_clock::now();
-  
+
   // Simulated OLD path logic
-  auto xfer = cs->getMfemJacobianData()->GetMfemBlockJacobian(
-      *method_data, {{0, tribol::BlockSpace::MORTAR}}, {{0, tribol::BlockSpace::MORTAR}}
-  );
-  
+  auto xfer = cs->getMfemJacobianData()->GetMfemBlockJacobian( *method_data, { { 0, tribol::BlockSpace::MORTAR } },
+                                                               { { 0, tribol::BlockSpace::MORTAR } } );
+
   auto end_old = std::chrono::high_resolution_clock::now();
-  
+
   // 6. Time and assemble using New Method
   // We must call this on ALL ranks collectively.
   std::vector<tribol::ComputedElementData> contribs_vec;
-  if (ne1 > 0) {
-      contribs_vec.push_back(std::move(new_data));
+  if ( ne1 > 0 ) {
+    contribs_vec.push_back( std::move( new_data ) );
   }
-  
+
   auto start_new = std::chrono::high_resolution_clock::now();
-  auto par_J_new = jac_data->GetMfemJacobian(contribs_vec);
+  auto par_J_new = jac_data->GetMfemJacobian( contribs_vec );
   auto end_new = std::chrono::high_resolution_clock::now();
 
   // 7. Verify match
@@ -153,39 +151,41 @@ int main( int argc, char** argv )
   auto* new_hypre = &par_J_new.get();
 
   // Check difference: A_old - A_new
-  tribol::ParSparseMat diff_psm = tribol::ParSparseMatView( old_hypre ) - tribol::ParSparseMatView( new_hypre );
-  
+  shared::ParSparseMat diff_psm = shared::ParSparseMatView( old_hypre ) - shared::ParSparseMatView( new_hypre );
+
   // Verify match by checking NNZ of difference
   // Since we subtracted, exact match means NNZ should be 0 (or values very small)
   // mfem::HypreParMatrix doesn't have an easy "max norm" without converting to SparseMatrix
   // but we can check NNZ. Note that operator- might keep zero entries.
   // A better way is to check the data array of the resulting matrix.
-  
+
   double max_err = 0.0;
   HYPRE_ParCSRMatrix diff_csr = diff_psm.get();
   hypre_ParCSRMatrix* diff_parcsr = (hypre_ParCSRMatrix*)diff_csr;
-  hypre_CSRMatrix* diag = hypre_ParCSRMatrixDiag(diff_parcsr);
-  double* data = hypre_CSRMatrixData(diag);
-  int num_nonzeros = hypre_CSRMatrixNumNonzeros(diag);
-  for(int i=0; i<num_nonzeros; ++i) {
-      max_err = std::max(max_err, std::abs(data[i]));
+  hypre_CSRMatrix* diag = hypre_ParCSRMatrixDiag( diff_parcsr );
+  double* data = hypre_CSRMatrixData( diag );
+  int num_nonzeros = hypre_CSRMatrixNumNonzeros( diag );
+  for ( int i = 0; i < num_nonzeros; ++i ) {
+    max_err = std::max( max_err, std::abs( data[i] ) );
   }
   // Also check off-diagonal block
-  hypre_CSRMatrix* offd = hypre_ParCSRMatrixOffd(diff_parcsr);
-  data = hypre_CSRMatrixData(offd);
-  num_nonzeros = hypre_CSRMatrixNumNonzeros(offd);
-  for(int i=0; i<num_nonzeros; ++i) {
-      max_err = std::max(max_err, std::abs(data[i]));
+  hypre_CSRMatrix* offd = hypre_ParCSRMatrixOffd( diff_parcsr );
+  data = hypre_CSRMatrixData( offd );
+  num_nonzeros = hypre_CSRMatrixNumNonzeros( offd );
+  for ( int i = 0; i < num_nonzeros; ++i ) {
+    max_err = std::max( max_err, std::abs( data[i] ) );
   }
-  
+
   double global_max_err = 0.0;
-  MPI_Allreduce(&max_err, &global_max_err, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+  MPI_Allreduce( &max_err, &global_max_err, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD );
 
   if ( rank == 0 ) {
-    std::cout << "Old method time: " << std::chrono::duration_cast<std::chrono::microseconds>( end_old - start_old ).count()
-              << " us" << std::endl;
-    std::cout << "New method time: " << std::chrono::duration_cast<std::chrono::microseconds>( end_new - start_new ).count()
-              << " us" << std::endl;
+    std::cout << "Old method time: "
+              << std::chrono::duration_cast<std::chrono::microseconds>( end_old - start_old ).count() << " us"
+              << std::endl;
+    std::cout << "New method time: "
+              << std::chrono::duration_cast<std::chrono::microseconds>( end_new - start_new ).count() << " us"
+              << std::endl;
     std::cout << "Matrix difference max err: " << global_max_err << std::endl;
   }
 

From 9c705f35b33b43e0bb02b943b6413ae9637116a6 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Thu, 12 Feb 2026 17:50:25 -0800
Subject: [PATCH 46/56] change LoggingLevel enum to lowercase

---
 src/tribol/common/Parameters.hpp   | 12 ++++++------
 src/tribol/interface/tribol.cpp    |  4 ++--
 src/tribol/mesh/CouplingScheme.cpp | 12 ++++++------
 src/tribol/utils/TestUtils.cpp     |  2 +-
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/tribol/common/Parameters.hpp b/src/tribol/common/Parameters.hpp
index c944a135..d5dd2e6b 100644
--- a/src/tribol/common/Parameters.hpp
+++ b/src/tribol/common/Parameters.hpp
@@ -32,12 +32,12 @@ constexpr int ANY_MESH = -1;
  */
 enum class LoggingLevel
 {
-  UNDEFINED,  ///! Undefined
-  DEBUG,      ///! Debug and higher
-  INFO,       ///! Info and higher
-  WARNING,    ///! Warning and higher
-  ERROR,      ///! Errors only
-  NUM_LOGGING_LEVELS = ERROR
+  Undefined,  ///! Undefined
+  Debug,      ///! Debug and higher
+  Info,       ///! Info and higher
+  Warning,    ///! Warning and higher
+  Error,      ///! Errors only
+  NumLoggingLevels = Error
 };
 
 /*!
diff --git a/src/tribol/interface/tribol.cpp b/src/tribol/interface/tribol.cpp
index f31f1f69..bad3cbe3 100644
--- a/src/tribol/interface/tribol.cpp
+++ b/src/tribol/interface/tribol.cpp
@@ -310,9 +310,9 @@ void setLoggingLevel( IndexT cs_id, LoggingLevel log_level )
 
   SLIC_ERROR_IF( !cs, "tribol::setLoggingLevel(): " << "invalid CouplingScheme id." );
 
-  if ( !in_range( static_cast<int>( log_level ), static_cast<int>( LoggingLevel::NUM_LOGGING_LEVELS ) ) ) {
+  if ( !in_range( static_cast<int>( log_level ), static_cast<int>( LoggingLevel::NumLoggingLevels ) ) ) {
     SLIC_INFO_ROOT( "tribol::setLoggingLevel(): Logging level not an option; " << "using 'warning' level." );
-    cs->setLoggingLevel( LoggingLevel::WARNING );
+    cs->setLoggingLevel( LoggingLevel::Warning );
   } else {
     cs->setLoggingLevel( log_level );
   }
diff --git a/src/tribol/mesh/CouplingScheme.cpp b/src/tribol/mesh/CouplingScheme.cpp
index c7c3bdce..c98a0d22 100644
--- a/src/tribol/mesh/CouplingScheme.cpp
+++ b/src/tribol/mesh/CouplingScheme.cpp
@@ -340,7 +340,7 @@ CouplingScheme::CouplingScheme( IndexT cs_id, IndexT mesh_id1, IndexT mesh_id2,
   m_couplingSchemeInfo.cs_case_info = NO_CASE_INFO;
   m_couplingSchemeInfo.cs_enforcement_info = NO_ENFORCEMENT_INFO;
 
-  m_loggingLevel = LoggingLevel::UNDEFINED;
+  m_loggingLevel = LoggingLevel::Undefined;
 
 }  // end CouplingScheme::CouplingScheme()
 
@@ -1176,21 +1176,21 @@ bool CouplingScheme::init()
 void CouplingScheme::setSlicLoggingLevel()
 {
   // set slic logging level for coupling schemes that have API modified logging levels
-  if ( this->m_loggingLevel != LoggingLevel::UNDEFINED ) {
+  if ( this->m_loggingLevel != LoggingLevel::Undefined ) {
     switch ( this->m_loggingLevel ) {
-      case LoggingLevel::DEBUG: {
+      case LoggingLevel::Debug: {
         axom::slic::setLoggingMsgLevel( axom::slic::message::Debug );
         break;
       }
-      case LoggingLevel::INFO: {
+      case LoggingLevel::Info: {
         axom::slic::setLoggingMsgLevel( axom::slic::message::Info );
         break;
       }
-      case LoggingLevel::WARNING: {
+      case LoggingLevel::Warning: {
         axom::slic::setLoggingMsgLevel( axom::slic::message::Warning );
         break;
       }
-      case LoggingLevel::ERROR: {
+      case LoggingLevel::Error: {
         axom::slic::setLoggingMsgLevel( axom::slic::message::Error );
         break;
       }
diff --git a/src/tribol/utils/TestUtils.cpp b/src/tribol/utils/TestUtils.cpp
index 110fa74d..6da885f4 100644
--- a/src/tribol/utils/TestUtils.cpp
+++ b/src/tribol/utils/TestUtils.cpp
@@ -1125,7 +1125,7 @@ int TestMesh::tribolSetupAndUpdate( ContactMethod method, EnforcementMethod enfo
     setPlotCycleIncrement( csIndex, 1 );
   }
 
-  setLoggingLevel( csIndex, LoggingLevel::WARNING );
+  setLoggingLevel( csIndex, LoggingLevel::Warning );
 
   if ( method == COMMON_PLANE && enforcement == PENALTY ) {
     PenaltyConstraintType constraint_type =

From 029e711b5b13576927a8661f6218743e47225274 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Fri, 13 Feb 2026 15:54:25 -0800
Subject: [PATCH 47/56] start using getMfemJacobian

---
 src/tribol/physics/NewMethodAdapter.cpp | 56 +++++++++++++++++++------
 1 file changed, 44 insertions(+), 12 deletions(-)

diff --git a/src/tribol/physics/NewMethodAdapter.cpp b/src/tribol/physics/NewMethodAdapter.cpp
index 74b6ab12..c9e3a6b9 100644
--- a/src/tribol/physics/NewMethodAdapter.cpp
+++ b/src/tribol/physics/NewMethodAdapter.cpp
@@ -9,6 +9,44 @@ namespace tribol {
 
 #ifdef TRIBOL_USE_ENZYME
 
+static std::vector<ComputedElementData> convertMethodData( const MethodData& method_data,
+                                                           const std::vector<BlockSpace>& row_spaces,
+                                                           const std::vector<BlockSpace>& col_spaces )
+{
+  std::vector<ComputedElementData> contributions;
+  for ( auto rs : row_spaces ) {
+    for ( auto cs : col_spaces ) {
+      const auto& J_block = method_data.getBlockJ()( static_cast<int>( rs ), static_cast<int>( cs ) );
+      if ( J_block.size() == 0 ) {
+        continue;
+      }
+
+      ComputedElementData data;
+      data.row_space = rs;
+      data.col_space = cs;
+
+      const auto& row_ids = method_data.getBlockJElementIds()[static_cast<int>( rs )];
+      const auto& col_ids = method_data.getBlockJElementIds()[static_cast<int>( cs )];
+
+      data.row_elem_ids.append( axom::ArrayView<const int>( row_ids.data(), row_ids.size() ) );
+      data.col_elem_ids.append( axom::ArrayView<const int>( col_ids.data(), col_ids.size() ) );
+
+      int n_elems = J_block.size();
+      int n_rows = J_block[0].Height();
+      int n_cols = J_block[0].Width();
+      data.jacobian_data.resize( n_elems * n_rows * n_cols );
+      data.jacobian_offsets.resize( n_elems );
+      for ( int k = 0; k < n_elems; ++k ) {
+        data.jacobian_offsets[k] = k * n_rows * n_cols;
+        std::copy( J_block[k].GetData(), J_block[k].GetData() + n_rows * n_cols,
+                   data.jacobian_data.data() + data.jacobian_offsets[k] );
+      }
+      contributions.push_back( std::move( data ) );
+    }
+  }
+  return contributions;
+}
+
 NewMethodAdapter::NewMethodAdapter( MfemSubmeshData& submesh_data, MfemJacobianData& jac_data, MeshData& mesh1,
                                     MeshData& mesh2, double k, double delta, int N )
     // NOTE: mesh1 maps to mesh2_ and mesh2 maps to mesh1_. This is to keep consistent with mesh1_ being non-mortar and
@@ -157,20 +195,16 @@ void NewMethodAdapter::updateNodalGaps()
   gap_vec_ = g_tilde_vec_.divide( A_vec_, area_tol_ );
 
   // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
-  const std::vector<std::pair<int, BlockSpace>> row_info{ { 1, BlockSpace::LAGRANGE_MULTIPLIER } };
-  const std::vector<std::pair<int, BlockSpace>> col_info{ { 0, BlockSpace::NONMORTAR }, { 0, BlockSpace::MORTAR } };
-  auto dg_tilde_dx_block = jac_data_.GetMfemBlockJacobian( dg_tilde_dx, row_info, col_info );
-  dg_tilde_dx_block->owns_blocks = false;
-  dg_tilde_dx_ = shared::ParSparseMat( static_cast<mfem::HypreParMatrix*>( &dg_tilde_dx_block->GetBlock( 1, 0 ) ) );
+  dg_tilde_dx_ = jac_data_.GetMfemJacobian(
+      convertMethodData( dg_tilde_dx, { BlockSpace::LAGRANGE_MULTIPLIER }, { BlockSpace::NONMORTAR, BlockSpace::MORTAR } ) );
   if ( !tied_contact_ ) {
     // technically, we should do this on all the vectors/matrices below, but it looks like the mutliplication operators
     // below will zero them out anyway
     dg_tilde_dx_.EliminateRows( rows_to_elim );
   }
 
-  auto dA_dx_block = jac_data_.GetMfemBlockJacobian( dA_dx, row_info, col_info );
-  dA_dx_block->owns_blocks = false;
-  dA_dx_ = shared::ParSparseMat( static_cast<mfem::HypreParMatrix*>( &dA_dx_block->GetBlock( 1, 0 ) ) );
+  dA_dx_ = jac_data_.GetMfemJacobian(
+      convertMethodData( dA_dx, { BlockSpace::LAGRANGE_MULTIPLIER }, { BlockSpace::NONMORTAR, BlockSpace::MORTAR } ) );
 }
 
 void NewMethodAdapter::updateNodalForces()
@@ -305,10 +339,8 @@ void NewMethodAdapter::updateNodalForces()
   }
 
   // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
-  const std::vector<std::pair<int, BlockSpace>> all_info{ { 0, BlockSpace::NONMORTAR }, { 0, BlockSpace::MORTAR } };
-  auto df_dx_block = jac_data_.GetMfemBlockJacobian( df_dx_data, all_info, all_info );
-  df_dx_block->owns_blocks = false;
-  df_dx_ = shared::ParSparseMat( static_cast<mfem::HypreParMatrix*>( &df_dx_block->GetBlock( 0, 0 ) ) );
+  df_dx_ = jac_data_.GetMfemJacobian( convertMethodData(
+      df_dx_data, { BlockSpace::NONMORTAR, BlockSpace::MORTAR }, { BlockSpace::NONMORTAR, BlockSpace::MORTAR } ) );
 
   auto pg2_over_asq = ( 2.0 * pressure_vec_ )
                           .multiplyInPlace( g_tilde_vec_ )

From 4f9163ba8be9e7ab59bebb6dadd5a77807285d6b Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Fri, 13 Feb 2026 16:44:35 -0800
Subject: [PATCH 48/56] assemble element contribs directly into
 ComputedElementData

---
 src/tribol/physics/NewMethodAdapter.cpp | 237 ++++++++++--------------
 1 file changed, 97 insertions(+), 140 deletions(-)

diff --git a/src/tribol/physics/NewMethodAdapter.cpp b/src/tribol/physics/NewMethodAdapter.cpp
index c9e3a6b9..0cdeec70 100644
--- a/src/tribol/physics/NewMethodAdapter.cpp
+++ b/src/tribol/physics/NewMethodAdapter.cpp
@@ -9,44 +9,6 @@ namespace tribol {
 
 #ifdef TRIBOL_USE_ENZYME
 
-static std::vector<ComputedElementData> convertMethodData( const MethodData& method_data,
-                                                           const std::vector<BlockSpace>& row_spaces,
-                                                           const std::vector<BlockSpace>& col_spaces )
-{
-  std::vector<ComputedElementData> contributions;
-  for ( auto rs : row_spaces ) {
-    for ( auto cs : col_spaces ) {
-      const auto& J_block = method_data.getBlockJ()( static_cast<int>( rs ), static_cast<int>( cs ) );
-      if ( J_block.size() == 0 ) {
-        continue;
-      }
-
-      ComputedElementData data;
-      data.row_space = rs;
-      data.col_space = cs;
-
-      const auto& row_ids = method_data.getBlockJElementIds()[static_cast<int>( rs )];
-      const auto& col_ids = method_data.getBlockJElementIds()[static_cast<int>( cs )];
-
-      data.row_elem_ids.append( axom::ArrayView<const int>( row_ids.data(), row_ids.size() ) );
-      data.col_elem_ids.append( axom::ArrayView<const int>( col_ids.data(), col_ids.size() ) );
-
-      int n_elems = J_block.size();
-      int n_rows = J_block[0].Height();
-      int n_cols = J_block[0].Width();
-      data.jacobian_data.resize( n_elems * n_rows * n_cols );
-      data.jacobian_offsets.resize( n_elems );
-      for ( int k = 0; k < n_elems; ++k ) {
-        data.jacobian_offsets[k] = k * n_rows * n_cols;
-        std::copy( J_block[k].GetData(), J_block[k].GetData() + n_rows * n_cols,
-                   data.jacobian_data.data() + data.jacobian_offsets[k] );
-      }
-      contributions.push_back( std::move( data ) );
-    }
-  }
-  return contributions;
-}
-
 NewMethodAdapter::NewMethodAdapter( MfemSubmeshData& submesh_data, MfemJacobianData& jac_data, MeshData& mesh1,
                                     MeshData& mesh2, double k, double delta, int N )
     // NOTE: mesh1 maps to mesh2_ and mesh2 maps to mesh1_. This is to keep consistent with mesh1_ being non-mortar and
@@ -83,11 +45,32 @@ void NewMethodAdapter::updateNodalGaps()
   auto& redecomp_gap = submesh_data_.GetRedecompGap();
   mfem::GridFunction redecomp_area( redecomp_gap.FESpace() );
   redecomp_area = 0.0;
-  MethodData dg_tilde_dx;
-  dg_tilde_dx.reserveBlockJ( { BlockSpace::NONMORTAR, BlockSpace::MORTAR, BlockSpace::LAGRANGE_MULTIPLIER },
-                             pairs_.size() );
-  MethodData dA_dx;
-  dA_dx.reserveBlockJ( { BlockSpace::NONMORTAR, BlockSpace::MORTAR, BlockSpace::LAGRANGE_MULTIPLIER }, pairs_.size() );
+
+  std::vector<ComputedElementData> dg_tilde_dx_contribs( 2 );
+  dg_tilde_dx_contribs[0].row_space = BlockSpace::LAGRANGE_MULTIPLIER;
+  dg_tilde_dx_contribs[0].col_space = BlockSpace::NONMORTAR;
+  dg_tilde_dx_contribs[1].row_space = BlockSpace::LAGRANGE_MULTIPLIER;
+  dg_tilde_dx_contribs[1].col_space = BlockSpace::MORTAR;
+
+  std::vector<ComputedElementData> dA_dx_contribs( 2 );
+  dA_dx_contribs[0].row_space = BlockSpace::LAGRANGE_MULTIPLIER;
+  dA_dx_contribs[0].col_space = BlockSpace::NONMORTAR;
+  dA_dx_contribs[1].row_space = BlockSpace::LAGRANGE_MULTIPLIER;
+  dA_dx_contribs[1].col_space = BlockSpace::MORTAR;
+
+  for ( auto& contrib : dg_tilde_dx_contribs ) {
+    contrib.row_elem_ids.reserve( pairs_.size() );
+    contrib.col_elem_ids.reserve( pairs_.size() );
+    contrib.jacobian_data.reserve( pairs_.size() * 8 );
+    contrib.jacobian_offsets.reserve( pairs_.size() );
+  }
+  for ( auto& contrib : dA_dx_contribs ) {
+    contrib.row_elem_ids.reserve( pairs_.size() );
+    contrib.col_elem_ids.reserve( pairs_.size() );
+    contrib.jacobian_data.reserve( pairs_.size() * 8 );
+    contrib.jacobian_offsets.reserve( pairs_.size() );
+  }
+
   const int node_idx[8] = { 0, 2, 1, 3, 4, 6, 5, 7 };
 
   auto mesh1_view = mesh1_.getView();
@@ -122,48 +105,41 @@ void NewMethodAdapter::updateNodalGaps()
     // compute g_tilde first derivative
     double dg_dx_node1[8];
     double dg_dx_node2[8];
+    // TODO: make grad_gtilde return directly in dg_tilde_dx_blocks format
     evaluator_->grad_gtilde( flipped_pair, mesh1_view, mesh2_view, dg_dx_node1, dg_dx_node2 );
-    StackArray<DeviceArray2D<RealT>, 9> dg_tilde_dx_block( 3 );
-    dg_tilde_dx_block( 2, 0 ) = DeviceArray2D<RealT>( 2, 4 );
-    dg_tilde_dx_block( 2, 0 ).fill( 0.0 );
-    dg_tilde_dx_block( 2, 1 ) = DeviceArray2D<RealT>( 2, 4 );
-    dg_tilde_dx_block( 2, 1 ).fill( 0.0 );
+    double dg_tilde_dx_blocks[2][8];
     for ( int i{ 0 }; i < 4; ++i ) {
-      dg_tilde_dx_block( 2, 0 )( 0, i ) = dg_dx_node1[node_idx[i]];
+      dg_tilde_dx_blocks[0][i * 2] = dg_dx_node1[node_idx[i]];
+      dg_tilde_dx_blocks[0][i * 2 + 1] = dg_dx_node2[node_idx[i]];
+      dg_tilde_dx_blocks[1][i * 2] = dg_dx_node1[node_idx[i + 4]];
+      dg_tilde_dx_blocks[1][i * 2 + 1] = dg_dx_node2[node_idx[i + 4]];
     }
-    for ( int i{ 0 }; i < 4; ++i ) {
-      dg_tilde_dx_block( 2, 0 )( 1, i ) = dg_dx_node2[node_idx[i]];
-    }
-    for ( int i{ 0 }; i < 4; ++i ) {
-      dg_tilde_dx_block( 2, 1 )( 0, i ) = dg_dx_node1[node_idx[i + 4]];
+    for ( int i{ 0 }; i < 2; ++i ) {
+      auto& contrib = dg_tilde_dx_contribs[i];
+      contrib.row_elem_ids.push_back( elem1 );
+      contrib.col_elem_ids.push_back( i == 0 ? elem1 : elem2 );
+      contrib.jacobian_offsets.push_back( contrib.jacobian_data.size() );
+      contrib.jacobian_data.append( axom::ArrayView<const double>( dg_tilde_dx_blocks[i], 8 ) );
     }
-    for ( int i{ 0 }; i < 4; ++i ) {
-      dg_tilde_dx_block( 2, 1 )( 1, i ) = dg_dx_node2[node_idx[i + 4]];
-    }
-    dg_tilde_dx.storeElemBlockJ( { elem1, elem2, elem1 }, dg_tilde_dx_block );
 
-    // compute area first derivative
     double dA_dx_node1[8];
     double dA_dx_node2[8];
+    // TODO: make grad_trib_area return directly in dA_dx_blocks format
     evaluator_->grad_trib_area( flipped_pair, mesh1_view, mesh2_view, dA_dx_node1, dA_dx_node2 );
-    StackArray<DeviceArray2D<RealT>, 9> dA_dx_block( 3 );
-    dA_dx_block( 2, 0 ) = DeviceArray2D<RealT>( 2, 4 );
-    dA_dx_block( 2, 0 ).fill( 0.0 );
-    dA_dx_block( 2, 1 ) = DeviceArray2D<RealT>( 2, 4 );
-    dA_dx_block( 2, 1 ).fill( 0.0 );
+    double dA_dx_blocks[2][8];
     for ( int i{ 0 }; i < 4; ++i ) {
-      dA_dx_block( 2, 0 )( 0, i ) = dA_dx_node1[node_idx[i]];
+      dA_dx_blocks[0][i * 2] = dA_dx_node1[node_idx[i]];
+      dA_dx_blocks[0][i * 2 + 1] = dA_dx_node2[node_idx[i]];
+      dA_dx_blocks[1][i * 2] = dA_dx_node1[node_idx[i + 4]];
+      dA_dx_blocks[1][i * 2 + 1] = dA_dx_node2[node_idx[i + 4]];
     }
-    for ( int i{ 0 }; i < 4; ++i ) {
-      dA_dx_block( 2, 0 )( 1, i ) = dA_dx_node2[node_idx[i]];
+    for ( int i{ 0 }; i < 2; ++i ) {
+      auto& contrib = dA_dx_contribs[i];
+      contrib.row_elem_ids.push_back( elem1 );
+      contrib.col_elem_ids.push_back( i == 0 ? elem1 : elem2 );
+      contrib.jacobian_offsets.push_back( contrib.jacobian_data.size() );
+      contrib.jacobian_data.append( axom::ArrayView<const double>( dA_dx_blocks[i], 8 ) );
     }
-    for ( int i{ 0 }; i < 4; ++i ) {
-      dA_dx_block( 2, 1 )( 0, i ) = dA_dx_node1[node_idx[i + 4]];
-    }
-    for ( int i{ 0 }; i < 4; ++i ) {
-      dA_dx_block( 2, 1 )( 1, i ) = dA_dx_node2[node_idx[i + 4]];
-    }
-    dA_dx.storeElemBlockJ( { elem1, elem2, elem1 }, dA_dx_block );
   }
 
   // Move gap and area to submesh level vectors
@@ -195,16 +171,14 @@ void NewMethodAdapter::updateNodalGaps()
   gap_vec_ = g_tilde_vec_.divide( A_vec_, area_tol_ );
 
   // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
-  dg_tilde_dx_ = jac_data_.GetMfemJacobian(
-      convertMethodData( dg_tilde_dx, { BlockSpace::LAGRANGE_MULTIPLIER }, { BlockSpace::NONMORTAR, BlockSpace::MORTAR } ) );
+  dg_tilde_dx_ = jac_data_.GetMfemJacobian( dg_tilde_dx_contribs );
   if ( !tied_contact_ ) {
     // technically, we should do this on all the vectors/matrices below, but it looks like the mutliplication operators
     // below will zero them out anyway
     dg_tilde_dx_.EliminateRows( rows_to_elim );
   }
 
-  dA_dx_ = jac_data_.GetMfemJacobian(
-      convertMethodData( dA_dx, { BlockSpace::LAGRANGE_MULTIPLIER }, { BlockSpace::NONMORTAR, BlockSpace::MORTAR } ) );
+  dA_dx_ = jac_data_.GetMfemJacobian( dA_dx_contribs );
 }
 
 void NewMethodAdapter::updateNodalForces()
@@ -229,8 +203,23 @@ void NewMethodAdapter::updateNodalForces()
 
   force_vec_ = ( pressure_vec_ * dg_tilde_dx_ ) + ( g_tilde_vec_ * dp_dx );
 
-  MethodData df_dx_data;
-  df_dx_data.reserveBlockJ( { BlockSpace::NONMORTAR, BlockSpace::MORTAR }, pairs_.size() );
+  std::vector<ComputedElementData> df_dx_contribs( 4 );
+  df_dx_contribs[0].row_space = BlockSpace::NONMORTAR;
+  df_dx_contribs[0].col_space = BlockSpace::NONMORTAR;
+  df_dx_contribs[1].row_space = BlockSpace::NONMORTAR;
+  df_dx_contribs[1].col_space = BlockSpace::MORTAR;
+  df_dx_contribs[2].row_space = BlockSpace::MORTAR;
+  df_dx_contribs[2].col_space = BlockSpace::NONMORTAR;
+  df_dx_contribs[3].row_space = BlockSpace::MORTAR;
+  df_dx_contribs[3].col_space = BlockSpace::MORTAR;
+
+  for ( auto& contrib : df_dx_contribs ) {
+    contrib.row_elem_ids.reserve( pairs_.size() );
+    contrib.col_elem_ids.reserve( pairs_.size() );
+    contrib.jacobian_data.reserve( pairs_.size() * 16 );
+    contrib.jacobian_offsets.reserve( pairs_.size() );
+  }
+
   const int node_idx[8] = { 0, 2, 1, 3, 4, 6, 5, 7 };
 
   mfem::GridFunction redecomp_pressure( submesh_data_.GetRedecompGap() );
@@ -273,74 +262,42 @@ void NewMethodAdapter::updateNodalForces()
     const RealT g_p_ainv1 = -redecomp_g_tilde( node11 ) * redecomp_pressure( node11 ) / redecomp_A( node11 );
     const RealT g_p_ainv2 = -redecomp_g_tilde( node12 ) * redecomp_pressure( node12 ) / redecomp_A( node12 );
 
-    double df_dx_node1[64];
-    double df_dx_node2[64];
+    double d2g_dx2_node1[64];
+    double d2g_dx2_node2[64];
     // ordering: [dg/(dx0dx0) dg/(dy0dx0) dg/(dx1dx0) ...]
-    evaluator_->d2_g2tilde( flipped_pair, mesh1_view, mesh2_view, df_dx_node1, df_dx_node2 );
-    StackArray<DeviceArray2D<RealT>, 9> df_dx_block( 2 );
-    df_dx_block( 0, 0 ) = DeviceArray2D<RealT>( 4, 4 );
-    df_dx_block( 0, 0 ).fill( 0.0 );
-    df_dx_block( 0, 1 ) = DeviceArray2D<RealT>( 4, 4 );
-    df_dx_block( 0, 1 ).fill( 0.0 );
-    df_dx_block( 1, 0 ) = DeviceArray2D<RealT>( 4, 4 );
-    df_dx_block( 1, 0 ).fill( 0.0 );
-    df_dx_block( 1, 1 ) = DeviceArray2D<RealT>( 4, 4 );
-    df_dx_block( 1, 1 ).fill( 0.0 );
-    for ( int j{ 0 }; j < 4; ++j ) {
-      for ( int i{ 0 }; i < 4; ++i ) {
-        df_dx_block( 0, 0 )( i, j ) = pressure1 * df_dx_node1[node_idx[i] + node_idx[j] * 8] +
-                                      pressure2 * df_dx_node2[node_idx[i] + node_idx[j] * 8];
-      }
-    }
-    for ( int j{ 0 }; j < 4; ++j ) {
-      for ( int i{ 0 }; i < 4; ++i ) {
-        df_dx_block( 0, 1 )( i, j ) = pressure1 * df_dx_node1[node_idx[i] + node_idx[j + 4] * 8] +
-                                      pressure2 * df_dx_node2[node_idx[i] + node_idx[j + 4] * 8];
-      }
-    }
-    for ( int j{ 0 }; j < 4; ++j ) {
-      for ( int i{ 0 }; i < 4; ++i ) {
-        df_dx_block( 1, 0 )( i, j ) = pressure1 * df_dx_node1[node_idx[i + 4] + node_idx[j] * 8] +
-                                      pressure2 * df_dx_node2[node_idx[i + 4] + node_idx[j] * 8];
+    evaluator_->d2_g2tilde( flipped_pair, mesh1_view, mesh2_view, d2g_dx2_node1, d2g_dx2_node2 );
+
+    double d2A_dx2_node1[64];
+    double d2A_dx2_node2[64];
+    evaluator_->compute_d2A_d2u( flipped_pair, mesh1_view, mesh2_view, d2A_dx2_node1, d2A_dx2_node2 );
+
+    double df_dx_blocks[2][2][16];
+    for ( int i{ 0 }; i < 2; ++i ) {
+      for ( int j{ 0 }; j < 2; ++j ) {
+        for ( int k{ 0 }; k < 4; ++k ) {
+          for ( int l{ 0 }; l < 4; ++l ) {
+            df_dx_blocks[i][j][l + k * 4] = pressure1 * d2g_dx2_node1[node_idx[l + i * 4] + node_idx[k + j * 4] * 8] +
+                                            pressure2 * d2g_dx2_node2[node_idx[l + i * 4] + node_idx[k + j * 4] * 8] +
+                                            g_p_ainv1 * d2A_dx2_node1[node_idx[l + i * 4] + node_idx[k + j * 4] * 8] +
+                                            g_p_ainv2 * d2A_dx2_node2[node_idx[l + i * 4] + node_idx[k + j * 4] * 8];
+          }
+        }
       }
     }
-    for ( int j{ 0 }; j < 4; ++j ) {
-      for ( int i{ 0 }; i < 4; ++i ) {
-        df_dx_block( 1, 1 )( i, j ) = pressure1 * df_dx_node1[node_idx[i + 4] + node_idx[j + 4] * 8] +
-                                      pressure2 * df_dx_node2[node_idx[i + 4] + node_idx[j + 4] * 8];
-      }
-    }
-    evaluator_->compute_d2A_d2u( flipped_pair, mesh1_view, mesh2_view, df_dx_node1, df_dx_node2 );
-    for ( int j{ 0 }; j < 4; ++j ) {
-      for ( int i{ 0 }; i < 4; ++i ) {
-        df_dx_block( 0, 0 )( i, j ) += g_p_ainv1 * df_dx_node1[node_idx[i] + node_idx[j] * 8] +
-                                       g_p_ainv2 * df_dx_node2[node_idx[i] + node_idx[j] * 8];
-      }
-    }
-    for ( int j{ 0 }; j < 4; ++j ) {
-      for ( int i{ 0 }; i < 4; ++i ) {
-        df_dx_block( 0, 1 )( i, j ) += g_p_ainv1 * df_dx_node1[node_idx[i] + node_idx[j + 4] * 8] +
-                                       g_p_ainv2 * df_dx_node2[node_idx[i] + node_idx[j + 4] * 8];
-      }
-    }
-    for ( int j{ 0 }; j < 4; ++j ) {
-      for ( int i{ 0 }; i < 4; ++i ) {
-        df_dx_block( 1, 0 )( i, j ) += g_p_ainv1 * df_dx_node1[node_idx[i + 4] + node_idx[j] * 8] +
-                                       g_p_ainv2 * df_dx_node2[node_idx[i + 4] + node_idx[j] * 8];
-      }
-    }
-    for ( int j{ 0 }; j < 4; ++j ) {
-      for ( int i{ 0 }; i < 4; ++i ) {
-        df_dx_block( 1, 1 )( i, j ) += g_p_ainv1 * df_dx_node1[node_idx[i + 4] + node_idx[j + 4] * 8] +
-                                       g_p_ainv2 * df_dx_node2[node_idx[i + 4] + node_idx[j + 4] * 8];
+
+    for ( int i{ 0 }; i < 2; ++i ) {
+      for ( int j{ 0 }; j < 2; ++j ) {
+        auto& contrib = df_dx_contribs[i * 2 + j];
+        contrib.row_elem_ids.push_back( i == 0 ? elem1 : elem2 );
+        contrib.col_elem_ids.push_back( j == 0 ? elem1 : elem2 );
+        contrib.jacobian_offsets.push_back( contrib.jacobian_data.size() );
+        contrib.jacobian_data.append( axom::ArrayView<const double>( df_dx_blocks[i][j], 16 ) );
       }
     }
-    df_dx_data.storeElemBlockJ( { elem1, elem2 }, df_dx_block );
   }
 
   // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
-  df_dx_ = jac_data_.GetMfemJacobian( convertMethodData(
-      df_dx_data, { BlockSpace::NONMORTAR, BlockSpace::MORTAR }, { BlockSpace::NONMORTAR, BlockSpace::MORTAR } ) );
+  df_dx_ = jac_data_.GetMfemJacobian( df_dx_contribs );
 
   auto pg2_over_asq = ( 2.0 * pressure_vec_ )
                           .multiplyInPlace( g_tilde_vec_ )

From 38895568a7afa1ec773c2951ca95ce73bf1045a9 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Fri, 13 Feb 2026 22:15:35 -0800
Subject: [PATCH 49/56] add a wrapper class for element contributions

---
 src/tribol/mesh/MfemData.cpp | 29 +++++++++++++++++++++++++
 src/tribol/mesh/MfemData.hpp | 42 ++++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)

diff --git a/src/tribol/mesh/MfemData.cpp b/src/tribol/mesh/MfemData.cpp
index fd8071ee..16f80a95 100644
--- a/src/tribol/mesh/MfemData.cpp
+++ b/src/tribol/mesh/MfemData.cpp
@@ -1186,6 +1186,35 @@ shared::ParSparseMat MfemJacobianData::GetMfemJacobian( const std::vector<Comput
   return std::move( *par_J );
 }
 
+JacobianContributions::JacobianContributions( std::initializer_list<std::pair<BlockSpace, BlockSpace>> blocks )
+{
+  for ( const auto& block : blocks ) {
+    ComputedElementData data;
+    data.row_space = block.first;
+    data.col_space = block.second;
+    contributions_.push_back( std::move( data ) );
+  }
+}
+
+void JacobianContributions::reserve( int n_pairs, int n_entries_per_pair )
+{
+  for ( auto& contrib : contributions_ ) {
+    contrib.row_elem_ids.reserve( n_pairs );
+    contrib.col_elem_ids.reserve( n_pairs );
+    contrib.jacobian_data.reserve( n_pairs * n_entries_per_pair );
+    contrib.jacobian_offsets.reserve( n_pairs );
+  }
+}
+
+void JacobianContributions::push_back( int block_idx, int row_elem_id, int col_elem_id, const double* data, int size )
+{
+  auto& contrib = contributions_[block_idx];
+  contrib.row_elem_ids.push_back( row_elem_id );
+  contrib.col_elem_ids.push_back( col_elem_id );
+  contrib.jacobian_offsets.push_back( contrib.jacobian_data.size() );
+  contrib.jacobian_data.append( axom::ArrayView<const double>( data, size ) );
+}
+
 }  // namespace tribol
 
 #endif /* BUILD_REDECOMP */
diff --git a/src/tribol/mesh/MfemData.hpp b/src/tribol/mesh/MfemData.hpp
index b331b7ff..6e93ebf9 100644
--- a/src/tribol/mesh/MfemData.hpp
+++ b/src/tribol/mesh/MfemData.hpp
@@ -1650,6 +1650,48 @@ struct ComputedElementData {
   axom::Array<int> jacobian_offsets;  ///< Offsets into data for each element
 };
 
+/**
+ * @brief Helper class to manage Jacobian contributions for different block spaces
+ */
+class JacobianContributions {
+ public:
+  /**
+   * @brief Construct a new JacobianContributions object
+   *
+   * @param blocks List of {row_space, col_space} pairs defining the Jacobian blocks
+   */
+  JacobianContributions( std::initializer_list<std::pair<BlockSpace, BlockSpace>> blocks );
+
+  /**
+   * @brief Reserve memory for each block contribution
+   *
+   * @param n_pairs Number of interface pairs
+   * @param n_entries_per_pair Number of entries in the element Jacobian block
+   */
+  void reserve( int n_pairs, int n_entries_per_pair );
+
+  /**
+   * @brief Add an element Jacobian contribution to a specific block
+   *
+   * @param block_idx Index of the block space pair (in the order provided to the constructor)
+   * @param row_elem_id Tribol element ID for the row space
+   * @param col_elem_id Tribol element ID for the column space
+   * @param data Pointer to the flattened element Jacobian data (column-major)
+   * @param size Number of entries in the element Jacobian data
+   */
+  void push_back( int block_idx, int row_elem_id, int col_elem_id, const double* data, int size );
+
+  /**
+   * @brief Return the underlying contributions vector
+   *
+   * @return const std::vector<ComputedElementData>&
+   */
+  const std::vector<ComputedElementData>& get() const { return contributions_; }
+
+ private:
+  std::vector<ComputedElementData> contributions_;
+};
+
 /**
  * @brief Simplifies transfer of Jacobian matrix data between MFEM and Tribol
  */

From 95a30138ca3914fe128bc80187c5649dce9dcea8 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Fri, 13 Feb 2026 22:16:30 -0800
Subject: [PATCH 50/56] apply jacobian contribution wrapper

---
 src/tribol/physics/NewMethodAdapter.cpp | 92 +++++++------------------
 1 file changed, 26 insertions(+), 66 deletions(-)

diff --git a/src/tribol/physics/NewMethodAdapter.cpp b/src/tribol/physics/NewMethodAdapter.cpp
index 0cdeec70..820d1e39 100644
--- a/src/tribol/physics/NewMethodAdapter.cpp
+++ b/src/tribol/physics/NewMethodAdapter.cpp
@@ -46,30 +46,13 @@ void NewMethodAdapter::updateNodalGaps()
   mfem::GridFunction redecomp_area( redecomp_gap.FESpace() );
   redecomp_area = 0.0;
 
-  std::vector<ComputedElementData> dg_tilde_dx_contribs( 2 );
-  dg_tilde_dx_contribs[0].row_space = BlockSpace::LAGRANGE_MULTIPLIER;
-  dg_tilde_dx_contribs[0].col_space = BlockSpace::NONMORTAR;
-  dg_tilde_dx_contribs[1].row_space = BlockSpace::LAGRANGE_MULTIPLIER;
-  dg_tilde_dx_contribs[1].col_space = BlockSpace::MORTAR;
-
-  std::vector<ComputedElementData> dA_dx_contribs( 2 );
-  dA_dx_contribs[0].row_space = BlockSpace::LAGRANGE_MULTIPLIER;
-  dA_dx_contribs[0].col_space = BlockSpace::NONMORTAR;
-  dA_dx_contribs[1].row_space = BlockSpace::LAGRANGE_MULTIPLIER;
-  dA_dx_contribs[1].col_space = BlockSpace::MORTAR;
-
-  for ( auto& contrib : dg_tilde_dx_contribs ) {
-    contrib.row_elem_ids.reserve( pairs_.size() );
-    contrib.col_elem_ids.reserve( pairs_.size() );
-    contrib.jacobian_data.reserve( pairs_.size() * 8 );
-    contrib.jacobian_offsets.reserve( pairs_.size() );
-  }
-  for ( auto& contrib : dA_dx_contribs ) {
-    contrib.row_elem_ids.reserve( pairs_.size() );
-    contrib.col_elem_ids.reserve( pairs_.size() );
-    contrib.jacobian_data.reserve( pairs_.size() * 8 );
-    contrib.jacobian_offsets.reserve( pairs_.size() );
-  }
+  JacobianContributions dg_tilde_dx_contribs( { { BlockSpace::LAGRANGE_MULTIPLIER, BlockSpace::NONMORTAR },
+                                                { BlockSpace::LAGRANGE_MULTIPLIER, BlockSpace::MORTAR } } );
+  JacobianContributions dA_dx_contribs( { { BlockSpace::LAGRANGE_MULTIPLIER, BlockSpace::NONMORTAR },
+                                          { BlockSpace::LAGRANGE_MULTIPLIER, BlockSpace::MORTAR } } );
+
+  dg_tilde_dx_contribs.reserve( pairs_.size(), 8 );
+  dA_dx_contribs.reserve( pairs_.size(), 8 );
 
   const int node_idx[8] = { 0, 2, 1, 3, 4, 6, 5, 7 };
 
@@ -114,13 +97,8 @@ void NewMethodAdapter::updateNodalGaps()
       dg_tilde_dx_blocks[1][i * 2] = dg_dx_node1[node_idx[i + 4]];
       dg_tilde_dx_blocks[1][i * 2 + 1] = dg_dx_node2[node_idx[i + 4]];
     }
-    for ( int i{ 0 }; i < 2; ++i ) {
-      auto& contrib = dg_tilde_dx_contribs[i];
-      contrib.row_elem_ids.push_back( elem1 );
-      contrib.col_elem_ids.push_back( i == 0 ? elem1 : elem2 );
-      contrib.jacobian_offsets.push_back( contrib.jacobian_data.size() );
-      contrib.jacobian_data.append( axom::ArrayView<const double>( dg_tilde_dx_blocks[i], 8 ) );
-    }
+    dg_tilde_dx_contribs.push_back( 0, elem1, elem1, dg_tilde_dx_blocks[0], 8 );
+    dg_tilde_dx_contribs.push_back( 1, elem1, elem2, dg_tilde_dx_blocks[1], 8 );
 
     double dA_dx_node1[8];
     double dA_dx_node2[8];
@@ -133,13 +111,8 @@ void NewMethodAdapter::updateNodalGaps()
       dA_dx_blocks[1][i * 2] = dA_dx_node1[node_idx[i + 4]];
       dA_dx_blocks[1][i * 2 + 1] = dA_dx_node2[node_idx[i + 4]];
     }
-    for ( int i{ 0 }; i < 2; ++i ) {
-      auto& contrib = dA_dx_contribs[i];
-      contrib.row_elem_ids.push_back( elem1 );
-      contrib.col_elem_ids.push_back( i == 0 ? elem1 : elem2 );
-      contrib.jacobian_offsets.push_back( contrib.jacobian_data.size() );
-      contrib.jacobian_data.append( axom::ArrayView<const double>( dA_dx_blocks[i], 8 ) );
-    }
+    dA_dx_contribs.push_back( 0, elem1, elem1, dA_dx_blocks[0], 8 );
+    dA_dx_contribs.push_back( 1, elem1, elem2, dA_dx_blocks[1], 8 );
   }
 
   // Move gap and area to submesh level vectors
@@ -171,16 +144,17 @@ void NewMethodAdapter::updateNodalGaps()
   gap_vec_ = g_tilde_vec_.divide( A_vec_, area_tol_ );
 
   // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
-  dg_tilde_dx_ = jac_data_.GetMfemJacobian( dg_tilde_dx_contribs );
+  dg_tilde_dx_ = jac_data_.GetMfemJacobian( dg_tilde_dx_contribs.get() );
   if ( !tied_contact_ ) {
     // technically, we should do this on all the vectors/matrices below, but it looks like the mutliplication operators
     // below will zero them out anyway
     dg_tilde_dx_.EliminateRows( rows_to_elim );
   }
 
-  dA_dx_ = jac_data_.GetMfemJacobian( dA_dx_contribs );
+  dA_dx_ = jac_data_.GetMfemJacobian( dA_dx_contribs.get() );
 }
 
+
 void NewMethodAdapter::updateNodalForces()
 {
   // NOTE: user should have called updateNodalGaps() with updated coords before calling this
@@ -203,22 +177,12 @@ void NewMethodAdapter::updateNodalForces()
 
   force_vec_ = ( pressure_vec_ * dg_tilde_dx_ ) + ( g_tilde_vec_ * dp_dx );
 
-  std::vector<ComputedElementData> df_dx_contribs( 4 );
-  df_dx_contribs[0].row_space = BlockSpace::NONMORTAR;
-  df_dx_contribs[0].col_space = BlockSpace::NONMORTAR;
-  df_dx_contribs[1].row_space = BlockSpace::NONMORTAR;
-  df_dx_contribs[1].col_space = BlockSpace::MORTAR;
-  df_dx_contribs[2].row_space = BlockSpace::MORTAR;
-  df_dx_contribs[2].col_space = BlockSpace::NONMORTAR;
-  df_dx_contribs[3].row_space = BlockSpace::MORTAR;
-  df_dx_contribs[3].col_space = BlockSpace::MORTAR;
-
-  for ( auto& contrib : df_dx_contribs ) {
-    contrib.row_elem_ids.reserve( pairs_.size() );
-    contrib.col_elem_ids.reserve( pairs_.size() );
-    contrib.jacobian_data.reserve( pairs_.size() * 16 );
-    contrib.jacobian_offsets.reserve( pairs_.size() );
-  }
+  JacobianContributions df_dx_contribs( { { BlockSpace::NONMORTAR, BlockSpace::NONMORTAR },
+                                          { BlockSpace::NONMORTAR, BlockSpace::MORTAR },
+                                          { BlockSpace::MORTAR, BlockSpace::NONMORTAR },
+                                          { BlockSpace::MORTAR, BlockSpace::MORTAR } } );
+
+  df_dx_contribs.reserve( pairs_.size(), 16 );
 
   const int node_idx[8] = { 0, 2, 1, 3, 4, 6, 5, 7 };
 
@@ -285,19 +249,15 @@ void NewMethodAdapter::updateNodalForces()
       }
     }
 
-    for ( int i{ 0 }; i < 2; ++i ) {
-      for ( int j{ 0 }; j < 2; ++j ) {
-        auto& contrib = df_dx_contribs[i * 2 + j];
-        contrib.row_elem_ids.push_back( i == 0 ? elem1 : elem2 );
-        contrib.col_elem_ids.push_back( j == 0 ? elem1 : elem2 );
-        contrib.jacobian_offsets.push_back( contrib.jacobian_data.size() );
-        contrib.jacobian_data.append( axom::ArrayView<const double>( df_dx_blocks[i][j], 16 ) );
-      }
-    }
+    df_dx_contribs.push_back( 0, elem1, elem1, df_dx_blocks[0][0], 16 );
+    df_dx_contribs.push_back( 1, elem1, elem2, df_dx_blocks[0][1], 16 );
+    df_dx_contribs.push_back( 2, elem2, elem1, df_dx_blocks[1][0], 16 );
+    df_dx_contribs.push_back( 3, elem2, elem2, df_dx_blocks[1][1], 16 );
   }
 
   // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
-  df_dx_ = jac_data_.GetMfemJacobian( df_dx_contribs );
+  df_dx_ = jac_data_.GetMfemJacobian( df_dx_contribs.get() );
+
 
   auto pg2_over_asq = ( 2.0 * pressure_vec_ )
                           .multiplyInPlace( g_tilde_vec_ )

From 4f84afdc07c9fe2586b19dd780076baf531953c7 Mon Sep 17 00:00:00 2001
From: Ryan Lutz <lutz23@llnl.gov>
Date: Mon, 23 Feb 2026 10:21:03 -0800
Subject: [PATCH 51/56]  fixed smoothing issue

---
 src/tests/tribol_new_energy_patch.cpp         | 758 +++++++++++++++---
 src/tribol/interface/mfem_tribol.cpp          |  47 ++
 src/tribol/interface/mfem_tribol.hpp          |  12 +
 src/tribol/physics/ContactFormulation.hpp     |   8 +
 .../physics/ContactFormulationFactory.cpp     |   5 +-
 src/tribol/physics/NewMethodAdapter.cpp       | 112 ++-
 src/tribol/physics/NewMethodAdapter.hpp       |  18 +-
 src/tribol/physics/new_method.cpp             | 123 ++-
 8 files changed, 951 insertions(+), 132 deletions(-)

diff --git a/src/tests/tribol_new_energy_patch.cpp b/src/tests/tribol_new_energy_patch.cpp
index bbe79327..0103ece7 100644
--- a/src/tests/tribol_new_energy_patch.cpp
+++ b/src/tests/tribol_new_energy_patch.cpp
@@ -1,3 +1,383 @@
+// // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// // other Tribol Project Developers. See the top-level LICENSE file for details.
+// //
+// // SPDX-License-Identifier: (MIT)
+
+// #include <cmath>
+// #include <set>
+
+// #include <gtest/gtest.h>
+
+// #ifdef TRIBOL_USE_UMPIRE
+// #include "umpire/ResourceManager.hpp"
+// #endif
+
+// #include "mfem.hpp"
+
+// #include "axom/CLI11.hpp"
+// #include "axom/slic.hpp"
+
+// #include "shared/mesh/MeshBuilder.hpp"
+// #include "redecomp/redecomp.hpp"
+
+// #include "tribol/config.hpp"
+// #include "tribol/common/Parameters.hpp"
+// #include "tribol/interface/tribol.hpp"
+// #include "tribol/interface/mfem_tribol.hpp"
+
+// /**
+//  * @brief Contact patch test using ENERGY_MORTAR with zero initial gap
+//  *        and prescribed displacement applied incrementally over timesteps.
+//  *
+//  * Two unit squares [0,1]x[0,1] and [0,1]x[1,2] with zero gap.
+//  * Linear elasticity with lambda = mu = 50.
+//  *
+//  * Analytical solution (plane strain, uniaxial stress with sigma_xx = 0):
+//  *   eps_yy = applied_disp / total_height
+//  *   eps_xx = -lambda / (lambda + 2*mu) * eps_yy
+//  *   u_y(x,y) = eps_yy * y
+//  *   u_x(x,y) = eps_xx * x
+//  */
+// class MfemMortarEnergyPatchTest : public testing::TestWithParam<std::tuple<int>> {
+//  protected:
+//   tribol::RealT max_disp_;
+//   double l2_err_vec_;
+//   double l2_err_x_;
+//   double l2_err_y_;
+
+//   // --- User-configurable parameters ---
+//   static constexpr int    num_timesteps_ = 10;
+//   static constexpr double total_prescribed_disp_ = -0.01;
+//   static constexpr double lam_ = 5.0;
+//   static constexpr double mu_  = 5.0;
+//   // ------------------------------------
+
+//   void SetUp() override
+//   {
+//     int ref_levels = std::get<0>( GetParam() );
+//     int order = 1;
+
+//     auto mortar_attrs     = std::set<int>( { 5 } );
+//     auto nonmortar_attrs  = std::set<int>( { 3 } );
+//     auto xfixed_attrs     = std::set<int>( { 4 } );
+//     auto yfixed_bottom_attrs = std::set<int>( { 1 } );
+//     auto prescribed_attrs = std::set<int>( { 6 } );
+
+//     int nel_per_dir = std::pow( 2, ref_levels );
+
+//     // clang-format off
+//     mfem::ParMesh mesh = shared::ParMeshBuilder(MPI_COMM_WORLD, shared::MeshBuilder::Unify({
+//       shared::MeshBuilder::SquareMesh(nel_per_dir, nel_per_dir)
+//         .updateBdrAttrib(1, 1)   // bottom (Fixed Y)
+//         .updateBdrAttrib(2, 2)   // right 
+//         .updateBdrAttrib(3, 3)   // top  (NonMortar)
+//         .updateBdrAttrib(4, 4),  // left (X-fixed)
+//       shared::MeshBuilder::SquareMesh(nel_per_dir, nel_per_dir)
+//         .translate({0.0, 1.0})
+//         .updateBdrAttrib(1, 5)   // bottom (Mortar)
+//         .updateBdrAttrib(2, 2)   // right 
+//         .updateBdrAttrib(3, 6)   // top  (prescribed displacement)
+//         .updateBdrAttrib(4, 4)   // left  (Fixed x)
+//     }));
+//     // clang-format on
+
+//     // FE space and grid functions
+//     auto fe_coll = mfem::H1_FECollection( order, mesh.SpaceDimension() );
+//     auto par_fe_space = mfem::ParFiniteElementSpace( &mesh, &fe_coll, mesh.SpaceDimension() );
+//     auto coords = mfem::ParGridFunction( &par_fe_space );
+//     if ( order > 1 ) {
+//       mesh.SetNodalGridFunction( &coords, false );
+//     } else {
+//       mesh.GetNodes( coords );
+//     }
+
+
+//     //Grid fucntion for displacement
+//     mfem::ParGridFunction displacement( &par_fe_space );
+//     displacement = 0.0;
+
+//     mfem::ParGridFunction ref_coords( &par_fe_space );
+//     mesh.GetNodes( ref_coords );
+
+//     //recover dirchlet bd tdof list
+//     mfem::Array<int> ess_vdof_marker( par_fe_space.GetVSize() );
+//     ess_vdof_marker = 0;
+
+//     // x-fixed on left
+//     {
+//       mfem::Array<int> tmp;
+//       mfem::Array<int> bdr( mesh.bdr_attributes.Max() );
+//       bdr = 0;
+//       for ( auto a : xfixed_attrs ) bdr[a - 1] = 1;
+//       par_fe_space.GetEssentialVDofs( bdr, tmp, 0 );
+//       for ( int i = 0; i < tmp.Size(); ++i )
+//         ess_vdof_marker[i] = ess_vdof_marker[i] || tmp[i];
+//     }
+
+//     // y-fixed on bottom
+//     {
+//       mfem::Array<int> tmp;
+//       mfem::Array<int> bdr( mesh.bdr_attributes.Max() );
+//       bdr = 0;
+//       for ( auto a : yfixed_bottom_attrs ) bdr[a - 1] = 1;
+//       par_fe_space.GetEssentialVDofs( bdr, tmp, 1 );
+//       for ( int i = 0; i < tmp.Size(); ++i )
+//         ess_vdof_marker[i] = ess_vdof_marker[i] || tmp[i];
+//     }
+
+//     // y-prescribed on top
+//     mfem::Array<int> prescribed_vdof_marker( par_fe_space.GetVSize() );
+//     prescribed_vdof_marker = 0;
+//     {
+//       mfem::Array<int> tmp;
+//       mfem::Array<int> bdr( mesh.bdr_attributes.Max() );
+//       bdr = 0;
+//       for ( auto a : prescribed_attrs ) bdr[a - 1] = 1;
+//       par_fe_space.GetEssentialVDofs( bdr, tmp, 1 );
+//       prescribed_vdof_marker = tmp;
+//       for ( int i = 0; i < tmp.Size(); ++i )
+//         ess_vdof_marker[i] = ess_vdof_marker[i] || tmp[i];
+//     }
+
+//     mfem::Array<int> ess_tdof_list;
+//     {
+//       mfem::Array<int> ess_tdof_marker;
+//       par_fe_space.GetRestrictionMatrix()->BooleanMult( ess_vdof_marker, ess_tdof_marker );
+//       mfem::FiniteElementSpace::MarkerToList( ess_tdof_marker, ess_tdof_list );
+//     }
+
+//     mfem::Array<int> prescribed_tdof_list;
+//     {
+//       mfem::Array<int> marker;
+//       par_fe_space.GetRestrictionMatrix()->BooleanMult( prescribed_vdof_marker, marker );
+//       mfem::FiniteElementSpace::MarkerToList( marker, prescribed_tdof_list );
+//     }
+
+//     // set up mfem elasticity bilinear form
+//     mfem::ParBilinearForm a( &par_fe_space );
+//     mfem::ConstantCoefficient lambda_coeff( lam_ );
+//     mfem::ConstantCoefficient mu_coeff( mu_ );
+//     a.AddDomainIntegrator( new mfem::ElasticityIntegrator( lambda_coeff, mu_coeff ) );
+//     a.Assemble();
+//     a.Finalize();
+//     auto A_elastic_raw = std::unique_ptr<mfem::HypreParMatrix>( a.ParallelAssemble() );
+
+//     //Visit Output
+//     mfem::VisItDataCollection visit_dc( "energy_patch_test", &mesh );
+//     visit_dc.SetPrecision( 8 );
+//     visit_dc.RegisterField( "displacement", &displacement );
+//     visit_dc.SetCycle( 0 );
+//     visit_dc.SetTime( 0.0 );
+//     visit_dc.Save();
+
+//     // timestepping loop for displacement
+//     double disp_increment = total_prescribed_disp_ / num_timesteps_;
+//     tribol::RealT dt = 1.0 / num_timesteps_;
+//     int cs_id = 0, mesh1_id = 0, mesh2_id = 1;
+
+//     mfem::Vector X( par_fe_space.GetTrueVSize() );
+//     X = 0.0;
+
+//     for ( int step = 1; step <= num_timesteps_; ++step )
+//     {
+//       double current_prescribed_disp = disp_increment * step;
+
+//       // Prescribed displacement vector
+//       mfem::Vector X_prescribed( par_fe_space.GetTrueVSize() );
+//       X_prescribed = 0.0;
+//       for ( int i = 0; i < prescribed_tdof_list.Size(); ++i ) {
+//         X_prescribed( prescribed_tdof_list[i] ) = current_prescribed_disp;
+//       }
+
+//       // Update coordinates for contact detection
+//       {
+//         mfem::Vector X_temp( X );
+//         for ( int i = 0; i < prescribed_tdof_list.Size(); ++i ) {
+//           X_temp( prescribed_tdof_list[i] ) = current_prescribed_disp;
+//         }
+//         auto& P = *par_fe_space.GetProlongationMatrix();
+//         P.Mult( X_temp, displacement );
+//       }
+//       coords = ref_coords;
+//       coords += displacement;
+
+//       // Re-register tribol each step (internal arrays need fresh allocation
+//       // when contact pairs change between steps)
+//       coords.ReadWrite();
+//       tribol::registerMfemCouplingScheme( cs_id, mesh1_id, mesh2_id, mesh, coords,
+//                                           mortar_attrs, nonmortar_attrs,
+//                                           tribol::SURFACE_TO_SURFACE, tribol::NO_SLIDING,
+//                                           tribol::ENERGY_MORTAR, tribol::FRICTIONLESS,
+//                                           tribol::LAGRANGE_MULTIPLIER, tribol::BINNING_GRID );
+//       tribol::setLagrangeMultiplierOptions( cs_id, tribol::ImplicitEvalMode::MORTAR_RESIDUAL_JACOBIAN );
+//       tribol::setMfemKinematicConstantPenalty( cs_id, 10000.0, 10000.0 );
+
+//       tribol::updateMfemParallelDecomposition();
+//       tribol::update( step, step * dt, dt );
+
+//       auto A_cont = tribol::getMfemDfDx( cs_id );
+
+//       mfem::Vector f_contact( par_fe_space.GetTrueVSize() );
+//       f_contact = 0.0;
+//       tribol::getMfemResponse( cs_id, f_contact );
+//       f_contact.Neg();
+
+//       // Inhomogeneous Dirichlet: rhs = f_contact - K * u_prescribed
+//       auto A_total = std::unique_ptr<mfem::HypreParMatrix>(
+//         mfem::Add( 1.0, *A_elastic_raw, 1.0, *A_cont ) );
+
+//       mfem::Vector rhs( par_fe_space.GetTrueVSize() );
+//       A_total->Mult( X_prescribed, rhs );
+//       rhs.Neg();
+//       rhs += f_contact;
+
+//       for ( int i = 0; i < ess_tdof_list.Size(); ++i ) {
+//         rhs( ess_tdof_list[i] ) = 0.0;
+//       }
+
+//       A_total->EliminateRowsCols( ess_tdof_list );
+
+//       mfem::Vector X_free( par_fe_space.GetTrueVSize() );
+//       X_free = 0.0;
+
+//       mfem::HypreBoomerAMG amg( *A_total );
+//       amg.SetElasticityOptions( &par_fe_space );
+//       amg.SetPrintLevel( 0 );
+
+//       mfem::MINRESSolver solver( MPI_COMM_WORLD );
+//       solver.SetRelTol( 1.0e-8 );
+//       solver.SetAbsTol( 1.0e-12 );
+//       solver.SetMaxIter( 5000 );
+//       solver.SetPrintLevel( step == num_timesteps_ ? 3 : 1 );
+//       solver.SetPreconditioner( amg );
+//       solver.SetOperator( *A_total );
+//       solver.Mult( rhs, X_free );
+
+//       X = X_free;
+//       X += X_prescribed;
+
+//       SLIC_INFO( "Timestep " << step << "/" << num_timesteps_
+//                  << " | prescribed disp = " << current_prescribed_disp );
+
+//       // Save VisIt output
+//       {
+//         auto& P = *par_fe_space.GetProlongationMatrix();
+//         P.Mult( X, displacement );
+//       }
+//       visit_dc.SetCycle( step );
+//       visit_dc.SetTime( step * dt );
+//       visit_dc.Save();
+//     }
+
+//     //Get final disaplacent
+//     {
+//       auto& P = *par_fe_space.GetProlongationMatrix();
+//       P.Mult( X, displacement );
+//     }
+
+//     auto local_max = displacement.Max();
+//     max_disp_ = 0.0;
+//     MPI_Allreduce( &local_max, &max_disp_, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD );
+//     SLIC_INFO( "Max displacement: " << max_disp_ );
+
+//     // -----------------------------------------------------------------
+//     // Analytical solution comparison
+//     //
+//     // Plane strain, uniaxial stress (sigma_xx = 0, free right side):
+//     //   eps_yy = applied_disp / total_height = -0.01 / 2.0 = -0.005
+//     //   eps_xx = -lambda/(lambda + 2*mu) * eps_yy
+//     //   u_y = eps_yy * y
+//     //   u_x = eps_xx * x
+//     // -----------------------------------------------------------------
+//     double total_height = 2.0;
+//     double eps_yy = total_prescribed_disp_ / total_height;
+//     double eps_xx = -lam_ / ( lam_ + 2.0 * mu_ ) * eps_yy;
+
+//     SLIC_INFO( "Analytical: eps_yy = " << eps_yy << ", eps_xx = " << eps_xx );
+
+//     mfem::VectorFunctionCoefficient exact_sol_coeff( 2,
+//       [eps_xx, eps_yy]( const mfem::Vector& x, mfem::Vector& u ) {
+//         u[0] = eps_xx * x[0];
+//         u[1] = eps_yy * x[1];
+//       } );
+
+//     mfem::ParGridFunction exact_disp( &par_fe_space );
+//     exact_disp.ProjectCoefficient( exact_sol_coeff );
+
+//     // Vector error
+//     mfem::ParGridFunction error_vec( exact_disp );
+//     error_vec -= displacement;
+//     l2_err_vec_ = mfem::ParNormlp( error_vec, 2, MPI_COMM_WORLD );
+
+//     // Component-wise errors
+//     const mfem::FiniteElementCollection* fec = par_fe_space.FEColl();
+//     mfem::ParFiniteElementSpace scalar_fes( &mesh, fec, 1, par_fe_space.GetOrdering() );
+//     const int n = scalar_fes.GetNDofs();
+
+//     mfem::ParGridFunction ux_exact( &scalar_fes ), ux_num( &scalar_fes );
+//     mfem::ParGridFunction uy_exact( &scalar_fes ), uy_num( &scalar_fes );
+
+//     for ( int i = 0; i < n; ++i ) {
+//       ux_exact( i ) = exact_disp( i );
+//       ux_num( i )   = displacement( i );
+//       uy_exact( i ) = exact_disp( n + i );
+//       uy_num( i )   = displacement( n + i );
+//     }
+
+//     mfem::ParGridFunction ux_err( ux_exact );
+//     ux_err -= ux_num;
+//     l2_err_x_ = mfem::ParNormlp( ux_err, 2, MPI_COMM_WORLD );
+
+//     mfem::ParGridFunction uy_err( uy_exact );
+//     uy_err -= uy_num;
+//     l2_err_y_ = mfem::ParNormlp( uy_err, 2, MPI_COMM_WORLD );
+
+//     SLIC_INFO( "L2 error (vector): " << l2_err_vec_ );
+//     SLIC_INFO( "L2 error (x):      " << l2_err_x_ );
+//     SLIC_INFO( "L2 error (y):      " << l2_err_y_ );
+//     SLIC_INFO( "Consistency check |err_vec^2 - (err_x^2 + err_y^2)| = "
+//                << std::abs( l2_err_vec_ * l2_err_vec_
+//                             - ( l2_err_x_ * l2_err_x_ + l2_err_y_ * l2_err_y_ ) ) );
+//   }
+// };
+
+// TEST_P( MfemMortarEnergyPatchTest, check_patch_test )
+// {
+//   EXPECT_GT( max_disp_, 0.0 );
+//   EXPECT_NEAR( 0.0, l2_err_vec_, 1.0e-2 );
+//   EXPECT_NEAR( 0.0, l2_err_x_,  1.0e-2 );
+//   EXPECT_NEAR( 0.0, l2_err_y_,  1.0e-2 );
+
+//   MPI_Barrier( MPI_COMM_WORLD );
+// }
+
+// INSTANTIATE_TEST_SUITE_P( tribol, MfemMortarEnergyPatchTest, testing::Values( std::make_tuple( 2 ) ) );
+
+// //------------------------------------------------------------------------------
+// #include "axom/slic/core/SimpleLogger.hpp"
+
+// int main( int argc, char* argv[] )
+// {
+//   int result = 0;
+
+//   MPI_Init( &argc, &argv );
+//   ::testing::InitGoogleTest( &argc, argv );
+
+// #ifdef TRIBOL_USE_UMPIRE
+//   umpire::ResourceManager::getInstance();
+// #endif
+
+//   axom::slic::SimpleLogger logger;
+//   result = RUN_ALL_TESTS();
+
+//   tribol::finalize();
+//   MPI_Finalize();
+
+//   return result;
+// }
+
+
+
 // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
 // other Tribol Project Developers. See the top-level LICENSE file for details.
 //
@@ -26,11 +406,23 @@
 #include "tribol/interface/mfem_tribol.hpp"
 
 /**
- * @brief Contact patch test using ENERGY_MORTAR with zero initial gap
- *        and prescribed displacement applied incrementally over timesteps.
+ * @brief Contact patch test using ENERGY_MORTAR with Lagrange multiplier
+ *        enforcement and prescribed displacement applied incrementally.
+ *
+ * Two unit squares [0,1]x[0,1] and [0,1]x[1,2] with zero initial gap.
+ * Linear elasticity with lambda = mu = 5.
+ *
+ * Saddle point system solved each Newton iteration:
  *
- * Two unit squares [0,1]x[0,1] and [0,1]x[1,2] with zero gap.
- * Linear elasticity with lambda = mu = 50.
+ *   [ K + H    G^T ] [ δu ] = -[ R_u ]
+ *   [ G         0  ] [ δλ ]    [ R_λ ]
+ *
+ * where:
+ *   K   = elastic stiffness
+ *   H   = λ · d²g̃/du²  (contact Hessian contribution)
+ *   G   = dg̃/du         (constraint Jacobian)
+ *   R_u = K·u + G^T·λ - f_ext  (force residual)
+ *   R_λ = g̃(u)                  (gap constraint residual)
  *
  * Analytical solution (plane strain, uniaxial stress with sigma_xx = 0):
  *   eps_yy = applied_disp / total_height
@@ -38,7 +430,7 @@
  *   u_y(x,y) = eps_yy * y
  *   u_x(x,y) = eps_xx * x
  */
-class MfemMortarEnergyPatchTest : public testing::TestWithParam<std::tuple<int>> {
+class MfemMortarEnergyLagrangePatchTest : public testing::TestWithParam<std::tuple<int>> {
  protected:
   tribol::RealT max_disp_;
   double l2_err_vec_;
@@ -46,10 +438,13 @@ class MfemMortarEnergyPatchTest : public testing::TestWithParam<std::tuple<int>>
   double l2_err_y_;
 
   // --- User-configurable parameters ---
-  static constexpr int    num_timesteps_ = 10;
+  static constexpr int    num_timesteps_ = 1;
   static constexpr double total_prescribed_disp_ = -0.01;
-  static constexpr double lam_ = 5.0;
-  static constexpr double mu_  = 5.0;
+  static constexpr double lam_ = 50.0;
+  static constexpr double mu_  = 50.0;
+  static constexpr int    max_newton_iter_ = 10;
+  static constexpr double newton_rtol_ = 1.0e-10;
+  static constexpr double newton_atol_ = 1.0e-12;
   // ------------------------------------
 
   void SetUp() override
@@ -63,19 +458,22 @@ class MfemMortarEnergyPatchTest : public testing::TestWithParam<std::tuple<int>>
     auto yfixed_bottom_attrs = std::set<int>( { 1 } );
     auto prescribed_attrs = std::set<int>( { 6 } );
 
-    int nel_per_dir = std::pow( 2, ref_levels );
+    // int nel_per_dir_top = std::pow( 2, ref_levels );
+    // int nel_per_dir_bottom = std::pow(3, ref_levels);
+    int nel_per_dir_top = 4;
+    int nel_per_dir_bottom = 4;
 
     // clang-format off
     mfem::ParMesh mesh = shared::ParMeshBuilder(MPI_COMM_WORLD, shared::MeshBuilder::Unify({
-      shared::MeshBuilder::SquareMesh(nel_per_dir, nel_per_dir)
+      shared::MeshBuilder::SquareMesh(nel_per_dir_top, nel_per_dir_top)
         .updateBdrAttrib(1, 1)   // bottom (Fixed Y)
-        .updateBdrAttrib(2, 2)   // right 
+        .updateBdrAttrib(2, 2)   // right
         .updateBdrAttrib(3, 3)   // top  (NonMortar)
         .updateBdrAttrib(4, 4),  // left (X-fixed)
-      shared::MeshBuilder::SquareMesh(nel_per_dir, nel_per_dir)
+      shared::MeshBuilder::SquareMesh(nel_per_dir_bottom, nel_per_dir_bottom)
         .translate({0.0, 1.0})
         .updateBdrAttrib(1, 5)   // bottom (Mortar)
-        .updateBdrAttrib(2, 2)   // right 
+        .updateBdrAttrib(2, 2)   // right
         .updateBdrAttrib(3, 6)   // top  (prescribed displacement)
         .updateBdrAttrib(4, 4)   // left  (Fixed x)
     }));
@@ -91,15 +489,15 @@ class MfemMortarEnergyPatchTest : public testing::TestWithParam<std::tuple<int>>
       mesh.GetNodes( coords );
     }
 
-
-    //Grid fucntion for displacement
+    // Grid function for displacement
     mfem::ParGridFunction displacement( &par_fe_space );
     displacement = 0.0;
 
     mfem::ParGridFunction ref_coords( &par_fe_space );
     mesh.GetNodes( ref_coords );
 
-    //recover dirchlet bd tdof list
+    // ---- Essential boundary conditions ----
+
     mfem::Array<int> ess_vdof_marker( par_fe_space.GetVSize() );
     ess_vdof_marker = 0;
 
@@ -114,6 +512,27 @@ class MfemMortarEnergyPatchTest : public testing::TestWithParam<std::tuple<int>>
         ess_vdof_marker[i] = ess_vdof_marker[i] || tmp[i];
     }
 
+// Pin corners: bottom-left of bottom block (0,0) and top-left of top block (0,2)
+// {
+//   const double tol = 1.0e-10;
+//   const std::vector<std::pair<double, double>> pin_pts = { {0.0, 0.0}, {0.0, 2.0} };
+
+//   for ( int v = 0; v < mesh.GetNV(); ++v ) {
+//     const double* vc = mesh.GetVertex(v);
+//     for ( auto& [px, py] : pin_pts ) {
+//       if ( std::abs(vc[0] - px) < tol && std::abs(vc[1] - py) < tol ) {
+//         mfem::Array<int> vdofs;
+//         par_fe_space.GetVertexVDofs( v, vdofs );
+//         for ( int i = 0; i < vdofs.Size(); ++i )
+//           ess_vdof_marker[ vdofs[i] ] = 1;
+//         break;
+//       }
+//     }
+//   }
+// }
+
+
+
     // y-fixed on bottom
     {
       mfem::Array<int> tmp;
@@ -153,108 +572,240 @@ class MfemMortarEnergyPatchTest : public testing::TestWithParam<std::tuple<int>>
       mfem::FiniteElementSpace::MarkerToList( marker, prescribed_tdof_list );
     }
 
-    // set up mfem elasticity bilinear form
+    // ---- Elastic stiffness matrix ----
+
     mfem::ParBilinearForm a( &par_fe_space );
     mfem::ConstantCoefficient lambda_coeff( lam_ );
     mfem::ConstantCoefficient mu_coeff( mu_ );
     a.AddDomainIntegrator( new mfem::ElasticityIntegrator( lambda_coeff, mu_coeff ) );
     a.Assemble();
     a.Finalize();
-    auto A_elastic_raw = std::unique_ptr<mfem::HypreParMatrix>( a.ParallelAssemble() );
+    auto K_elastic = std::unique_ptr<mfem::HypreParMatrix>( a.ParallelAssemble() );
 
-    //Visit Output
-    mfem::VisItDataCollection visit_dc( "energy_patch_test", &mesh );
+    // ---- VisIt output ----
+
+    mfem::VisItDataCollection visit_dc( "energy_lagrange_patch_test", &mesh );
     visit_dc.SetPrecision( 8 );
     visit_dc.RegisterField( "displacement", &displacement );
+
+    mfem::ParGridFunction exact_disp( &par_fe_space );
+    exact_disp = 0.0;
+    visit_dc.RegisterField( "Exact Replacement", &exact_disp);
     visit_dc.SetCycle( 0 );
     visit_dc.SetTime( 0.0 );
     visit_dc.Save();
 
-    // timestepping loop for displacement
+    // ---- Time-stepping loop ----
+
     double disp_increment = total_prescribed_disp_ / num_timesteps_;
     tribol::RealT dt = 1.0 / num_timesteps_;
     int cs_id = 0, mesh1_id = 0, mesh2_id = 1;
 
-    mfem::Vector X( par_fe_space.GetTrueVSize() );
-    X = 0.0;
+    const int disp_size = par_fe_space.GetTrueVSize();
+
+    mfem::Vector U( disp_size );  // total displacement true-dof vector
+    U = 0.0;
+
+    // Lambda persists across timesteps (warm start)
+    // NOTE: sized after first tribol registration when contact FE space is known
+    mfem::HypreParVector* lambda = nullptr;
+    int contact_size = 0;
 
     for ( int step = 1; step <= num_timesteps_; ++step )
     {
       double current_prescribed_disp = disp_increment * step;
 
-      // Prescribed displacement vector
-      mfem::Vector X_prescribed( par_fe_space.GetTrueVSize() );
-      X_prescribed = 0.0;
+      // Build prescribed displacement vector
+      mfem::Vector U_prescribed( disp_size );
+      U_prescribed = 0.0;
+      for ( int i = 0; i < prescribed_tdof_list.Size(); ++i ) {
+        U_prescribed( prescribed_tdof_list[i] ) = current_prescribed_disp;
+      }
+
+      // Set initial guess for this step: use previous converged displacement
+      // with updated prescribed DOFs
       for ( int i = 0; i < prescribed_tdof_list.Size(); ++i ) {
-        X_prescribed( prescribed_tdof_list[i] ) = current_prescribed_disp;
+        U( prescribed_tdof_list[i] ) = current_prescribed_disp;
       }
 
-      // Update coordinates for contact detection
+      // ---- Newton iteration ----
+      for ( int newton = 0; newton < max_newton_iter_; ++newton )
       {
-        mfem::Vector X_temp( X );
-        for ( int i = 0; i < prescribed_tdof_list.Size(); ++i ) {
-          X_temp( prescribed_tdof_list[i] ) = current_prescribed_disp;
+        // Update coordinates with current displacement
+        {
+          auto& P = *par_fe_space.GetProlongationMatrix();
+          P.Mult( U, displacement );
         }
-        auto& P = *par_fe_space.GetProlongationMatrix();
-        P.Mult( X_temp, displacement );
-      }
-      coords = ref_coords;
-      coords += displacement;
-
-      // Re-register tribol each step (internal arrays need fresh allocation
-      // when contact pairs change between steps)
-      coords.ReadWrite();
-      tribol::registerMfemCouplingScheme( cs_id, mesh1_id, mesh2_id, mesh, coords,
-                                          mortar_attrs, nonmortar_attrs,
-                                          tribol::SURFACE_TO_SURFACE, tribol::NO_SLIDING,
-                                          tribol::ENERGY_MORTAR, tribol::FRICTIONLESS,
-                                          tribol::LAGRANGE_MULTIPLIER, tribol::BINNING_GRID );
-      tribol::setLagrangeMultiplierOptions( cs_id, tribol::ImplicitEvalMode::MORTAR_RESIDUAL_JACOBIAN );
-      tribol::setMfemKinematicConstantPenalty( cs_id, 10000.0, 10000.0 );
-
-      tribol::updateMfemParallelDecomposition();
-      tribol::update( step, step * dt, dt );
-
-      auto A_cont = tribol::getMfemDfDx( cs_id );
-
-      mfem::Vector f_contact( par_fe_space.GetTrueVSize() );
-      f_contact = 0.0;
-      tribol::getMfemResponse( cs_id, f_contact );
-      f_contact.Neg();
-
-      // Inhomogeneous Dirichlet: rhs = f_contact - K * u_prescribed
-      auto A_total = std::unique_ptr<mfem::HypreParMatrix>(
-        mfem::Add( 1.0, *A_elastic_raw, 1.0, *A_cont ) );
-
-      mfem::Vector rhs( par_fe_space.GetTrueVSize() );
-      A_total->Mult( X_prescribed, rhs );
-      rhs.Neg();
-      rhs += f_contact;
-
-      for ( int i = 0; i < ess_tdof_list.Size(); ++i ) {
-        rhs( ess_tdof_list[i] ) = 0.0;
-      }
+        coords = ref_coords;
+        coords += displacement;
+
+        // Register tribol and update contact data
+        coords.ReadWrite();
+        tribol::registerMfemCouplingScheme( cs_id, mesh1_id, mesh2_id, mesh, coords,
+                                            mortar_attrs, nonmortar_attrs,
+                                            tribol::SURFACE_TO_SURFACE, tribol::NO_SLIDING,
+                                            tribol::ENERGY_MORTAR, tribol::FRICTIONLESS,
+                                            tribol::LAGRANGE_MULTIPLIER, tribol::BINNING_GRID );
+        tribol::setLagrangeMultiplierOptions( cs_id, tribol::ImplicitEvalMode::MORTAR_RESIDUAL_JACOBIAN );
+
+        tribol::updateMfemParallelDecomposition();
+        tribol::update( step, step * dt, dt );
+
+        // ---- Get contact surface FE space and initialize lambda on first pass ----
+        // TODO: adapt to actual tribol API for accessing contact FE space
+        auto& contact_fes = tribol::getMfemContactFESpace( cs_id );
+        contact_size = contact_fes.GetTrueVSize();
+
+        if ( lambda == nullptr ) {
+          lambda = new mfem::HypreParVector( &contact_fes );
+          *lambda = 0.0;
+        }
+
+        // ---- Evaluate contact residual ----
+        mfem::HypreParVector r_contact_force( &par_fe_space );  // G^T * lambda (disp-sized)
+        r_contact_force = 0.0;
+        mfem::HypreParVector r_gap( &contact_fes );             // g_tilde (contact-sized)
+        r_gap = 0.0;
+
+        // TODO: adapt to actual tribol API
+        // NOTE: verify that evaluateContactResidual computes r_force = G^T * lambda
+        //       (check Mult vs MultTranspose in the adapter -- see note below)
+        tribol::evaluateContactResidual( cs_id, *lambda, r_contact_force, r_gap );
+
+        // ---- Evaluate contact Jacobian blocks ----
+        std::unique_ptr<mfem::HypreParMatrix> H;   // lambda * d2g/du2 (disp x disp)
+        std::unique_ptr<mfem::HypreParMatrix> G;   // dg/du (contact x disp)
+        // TODO: adapt to actual tribol API
+        tribol::evaluateContactJacobian( cs_id, *lambda, H, G );
+
+        // ---- Assemble block residual ----
+        //
+        //   R_u = K*U + G^T*lambda    (elastic force + contact force)
+        //         (no external body forces in this test, only prescribed disp)
+        //   R_λ = g̃(U)
+        //
+        // Note: the prescribed displacement is handled by eliminating those DOFs
+        // from the Newton system and keeping them fixed at the prescribed values.
+
+        mfem::Vector R_u( disp_size );
+        K_elastic->Mult( U, R_u );       // R_u = K * U
+        R_u += r_contact_force;           // R_u += G^T * lambda
+
+        mfem::Vector R_lambda( contact_size );
+        R_lambda = r_gap;                 // R_lambda = g_tilde
+
+        // Compute residual norms for convergence check
+        double norm_R_u = mfem::InnerProduct( MPI_COMM_WORLD, R_u, R_u );
+        double norm_R_lambda = mfem::InnerProduct( MPI_COMM_WORLD, R_lambda, R_lambda );
+        // Zero out essential DOF contributions before computing norm
+        for ( int i = 0; i < ess_tdof_list.Size(); ++i ) {
+          norm_R_u -= R_u( ess_tdof_list[i] ) * R_u( ess_tdof_list[i] );
+        }
+        double residual_norm = std::sqrt( std::abs( norm_R_u ) + norm_R_lambda );
+
+        SLIC_INFO( "  Step " << step << " Newton " << newton
+                   << " | residual = " << residual_norm );
 
-      A_total->EliminateRowsCols( ess_tdof_list );
+        if ( newton > 0 && residual_norm < newton_atol_ ) {
+          SLIC_INFO( "  Newton converged (abs tol) at iteration " << newton );
+          break;
+        }
+
+        // ---- Assemble block Jacobian ----
+        //
+        //   J = [ K + H    G^T ]
+        //       [ G         0  ]
 
-      mfem::Vector X_free( par_fe_space.GetTrueVSize() );
-      X_free = 0.0;
+        // SLIC_INFO( "    Building J_uu..." );
+
+        // (0,0) block: K + H
+        // NOTE: H may be null on the first Newton iteration when lambda = 0
+        std::unique_ptr<mfem::HypreParMatrix> J_uu;
+        if ( H && H->NumRows() > 0 ) {
+          J_uu.reset( mfem::Add( 1.0, *K_elastic, 1.0, *H ) );
+        } else {
+          J_uu.reset( new mfem::HypreParMatrix( *K_elastic ) );
+        }
 
-      mfem::HypreBoomerAMG amg( *A_total );
-      amg.SetElasticityOptions( &par_fe_space );
-      amg.SetPrintLevel( 0 );
+        // SLIC_INFO( "    J_uu: " << J_uu->NumRows() << " x " << J_uu->NumCols() );
+        // SLIC_INFO( "    G:    " << G->NumRows() << " x " << G->NumCols() );
 
-      mfem::MINRESSolver solver( MPI_COMM_WORLD );
-      solver.SetRelTol( 1.0e-8 );
-      solver.SetAbsTol( 1.0e-12 );
-      solver.SetMaxIter( 5000 );
-      solver.SetPrintLevel( step == num_timesteps_ ? 3 : 1 );
-      solver.SetPreconditioner( amg );
-      solver.SetOperator( *A_total );
-      solver.Mult( rhs, X_free );
+        // G^T for the (0,1) block
+        auto G_T = std::unique_ptr<mfem::HypreParMatrix>( G->Transpose() );
 
-      X = X_free;
-      X += X_prescribed;
+        // SLIC_INFO( "    G^T:  " << G_T->NumRows() << " x " << G_T->NumCols() );
+
+        // ---- Apply essential BCs ----
+        // Zero out essential DOF rows/cols in J_uu
+        for ( int i = 0; i < ess_tdof_list.Size(); ++i ) {
+          R_u( ess_tdof_list[i] ) = 0.0;
+        }
+        J_uu->EliminateRowsCols( ess_tdof_list );
+
+        // Zero out essential DOF rows in G^T (cols in G)
+        // Use EliminateRows on G^T which is simpler than EliminateCols on G
+        G_T->EliminateRows( ess_tdof_list );
+
+        // Rebuild G from the modified G^T to stay consistent
+        G = std::unique_ptr<mfem::HypreParMatrix>( G_T->Transpose() );
+
+        // SLIC_INFO( "    After BC elim - J_uu: " << J_uu->NumRows() << " x " << J_uu->NumCols() );
+        // SLIC_INFO( "    After BC elim - G:    " << G->NumRows() << " x " << G->NumCols() );
+
+        // ---- Set up block system ----
+
+        mfem::Array<int> block_offsets( 3 );
+        block_offsets[0] = 0;
+        block_offsets[1] = disp_size;
+        block_offsets[2] = disp_size + contact_size;
+
+        // SLIC_INFO( "    Block offsets: [0, " << disp_size << ", " << disp_size + contact_size << "]" );
+
+        mfem::BlockOperator J_block( block_offsets );
+        J_block.SetBlock( 0, 0, J_uu.get() );
+        J_block.SetBlock( 0, 1, G_T.get() );
+        J_block.SetBlock( 1, 0, G.get() );
+
+        // Block RHS = -[R_u; R_lambda]
+        mfem::BlockVector rhs( block_offsets );
+        rhs.GetBlock( 0 ) = R_u;
+        rhs.GetBlock( 0 ).Neg();
+        rhs.GetBlock( 1 ) = R_lambda;
+        rhs.GetBlock( 1 ).Neg();
+
+        // SLIC_INFO( "    Solving saddle point system..." );
+
+        // ---- Solve with unpreconditioned MINRES ----
+        // (keep it simple for debugging; add preconditioner once this works)
+
+        mfem::BlockVector delta( block_offsets );
+        delta = 0.0;
+
+        mfem::MINRESSolver solver( MPI_COMM_WORLD );
+        solver.SetRelTol( 1.0e-10 );
+        solver.SetAbsTol( 1.0e-14 );
+        solver.SetMaxIter( 5000 );
+        solver.SetPrintLevel( 3 );
+        solver.SetOperator( J_block );
+        solver.Mult( rhs, delta );
+
+        SLIC_INFO( "    Solver converged: " << solver.GetConverged()
+                   << " in " << solver.GetNumIterations() << " iterations" );
+
+        // ---- Update solution ----
+
+        mfem::Vector& delta_u = delta.GetBlock( 0 );
+        mfem::Vector& delta_lambda = delta.GetBlock( 1 );
+
+        U += delta_u;
+        *lambda += delta_lambda;
+
+        // Re-enforce prescribed DOFs exactly (guard against solver drift)
+        for ( int i = 0; i < prescribed_tdof_list.Size(); ++i ) {
+          U( prescribed_tdof_list[i] ) = current_prescribed_disp;
+        }
+
+      }  // end Newton loop
 
       SLIC_INFO( "Timestep " << step << "/" << num_timesteps_
                  << " | prescribed disp = " << current_prescribed_disp );
@@ -262,17 +813,19 @@ class MfemMortarEnergyPatchTest : public testing::TestWithParam<std::tuple<int>>
       // Save VisIt output
       {
         auto& P = *par_fe_space.GetProlongationMatrix();
-        P.Mult( X, displacement );
+        P.Mult( U, displacement );
       }
-      visit_dc.SetCycle( step );
-      visit_dc.SetTime( step * dt );
-      visit_dc.Save();
-    }
 
-    //Get final disaplacent
+
+    }  // end timestep loop
+
+    // Clean up
+    delete lambda;
+
+    // ---- Get final displacement ----
     {
       auto& P = *par_fe_space.GetProlongationMatrix();
-      P.Mult( X, displacement );
+      P.Mult( U, displacement );
     }
 
     auto local_max = displacement.Max();
@@ -282,12 +835,6 @@ class MfemMortarEnergyPatchTest : public testing::TestWithParam<std::tuple<int>>
 
     // -----------------------------------------------------------------
     // Analytical solution comparison
-    //
-    // Plane strain, uniaxial stress (sigma_xx = 0, free right side):
-    //   eps_yy = applied_disp / total_height = -0.01 / 2.0 = -0.005
-    //   eps_xx = -lambda/(lambda + 2*mu) * eps_yy
-    //   u_y = eps_yy * y
-    //   u_x = eps_xx * x
     // -----------------------------------------------------------------
     double total_height = 2.0;
     double eps_yy = total_prescribed_disp_ / total_height;
@@ -301,9 +848,16 @@ class MfemMortarEnergyPatchTest : public testing::TestWithParam<std::tuple<int>>
         u[1] = eps_yy * x[1];
       } );
 
-    mfem::ParGridFunction exact_disp( &par_fe_space );
+
+
     exact_disp.ProjectCoefficient( exact_sol_coeff );
 
+
+
+    visit_dc.SetCycle( 1 );
+    visit_dc.SetTime( 1.0 );
+    visit_dc.Save();
+
     // Vector error
     mfem::ParGridFunction error_vec( exact_disp );
     error_vec -= displacement;
@@ -341,7 +895,7 @@ class MfemMortarEnergyPatchTest : public testing::TestWithParam<std::tuple<int>>
   }
 };
 
-TEST_P( MfemMortarEnergyPatchTest, check_patch_test )
+TEST_P( MfemMortarEnergyLagrangePatchTest, check_patch_test )
 {
   EXPECT_GT( max_disp_, 0.0 );
   EXPECT_NEAR( 0.0, l2_err_vec_, 1.0e-2 );
@@ -351,7 +905,7 @@ TEST_P( MfemMortarEnergyPatchTest, check_patch_test )
   MPI_Barrier( MPI_COMM_WORLD );
 }
 
-INSTANTIATE_TEST_SUITE_P( tribol, MfemMortarEnergyPatchTest, testing::Values( std::make_tuple( 2 ) ) );
+INSTANTIATE_TEST_SUITE_P( tribol, MfemMortarEnergyLagrangePatchTest, testing::Values( std::make_tuple( 2 ) ) );
 
 //------------------------------------------------------------------------------
 #include "axom/slic/core/SimpleLogger.hpp"
diff --git a/src/tribol/interface/mfem_tribol.cpp b/src/tribol/interface/mfem_tribol.cpp
index fe36016e..c86b3b83 100644
--- a/src/tribol/interface/mfem_tribol.cpp
+++ b/src/tribol/interface/mfem_tribol.cpp
@@ -444,6 +444,53 @@ std::unique_ptr<mfem::HypreParMatrix> getMfemDgDx( IndexT cs_id )
   return nullptr;
 }
 
+
+//**************** */NEW LAGRANGE FUNTIONS:
+mfem::ParFiniteElementSpace& getMfemContactFESpace( IndexT cs_id )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
+  SLIC_ERROR_ROOT_IF( !cs->hasMfemSubmeshData(),
+                      axom::fmt::format( "Coupling scheme cs_id={0} does not contain MFEM submesh data.",
+                                         cs_id ) );
+  return const_cast<mfem::ParFiniteElementSpace&>( cs->getMfemSubmeshData()->GetSubmeshFESpace() );
+}
+
+void evaluateContactResidual( IndexT cs_id,
+                              const mfem::HypreParVector& lambda,
+                              mfem::HypreParVector& r_force,
+                              mfem::HypreParVector& r_gap )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
+  SLIC_ERROR_ROOT_IF( !cs->hasContactFormulation(),
+                      "Coupling scheme does not contain a contact formulation." );
+  cs->getContactFormulation()->evaluateContactResidual( lambda, r_force, r_gap );
+}
+
+void evaluateContactJacobian( IndexT cs_id,
+                              const mfem::HypreParVector& lambda,
+                              std::unique_ptr<mfem::HypreParMatrix>& df_du,
+                              std::unique_ptr<mfem::HypreParMatrix>& df_dlambda )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
+  SLIC_ERROR_ROOT_IF( !cs->hasContactFormulation(),
+                      "Coupling scheme does not contain a contact formulation." );
+  cs->getContactFormulation()->evaluateContactJacobian( lambda, df_du, df_dlambda );
+}
+
+/////***************** */END LAGRANGE FUNCTIONS
+
 void getMfemGap( IndexT cs_id, mfem::Vector& g )
 {
   auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
diff --git a/src/tribol/interface/mfem_tribol.hpp b/src/tribol/interface/mfem_tribol.hpp
index 13bf07cb..c358fb11 100644
--- a/src/tribol/interface/mfem_tribol.hpp
+++ b/src/tribol/interface/mfem_tribol.hpp
@@ -325,6 +325,18 @@ mfem::ParGridFunction& getMfemPressure( IndexT cs_id );
 
 mfem::HypreParVector getMfemTDofPressure( IndexT cs_id );
 
+mfem::ParFiniteElementSpace& getMfemContactFESpace( IndexT cs_id );
+
+void evaluateContactResidual( IndexT cs_id,
+                              const mfem::HypreParVector& lambda,
+                              mfem::HypreParVector& r_force,
+                              mfem::HypreParVector& r_gap );
+
+void evaluateContactJacobian( IndexT cs_id,
+                              const mfem::HypreParVector& lambda,
+                              std::unique_ptr<mfem::HypreParMatrix>& df_du,
+                              std::unique_ptr<mfem::HypreParMatrix>& df_dlambda );
+
 /**
  * @brief Updates mesh parallel decomposition and related grid functions/Jacobian when coordinates are updated
  *
diff --git a/src/tribol/physics/ContactFormulation.hpp b/src/tribol/physics/ContactFormulation.hpp
index 3a190f86..cbb398e0 100644
--- a/src/tribol/physics/ContactFormulation.hpp
+++ b/src/tribol/physics/ContactFormulation.hpp
@@ -135,6 +135,14 @@ class ContactFormulation {
    * @note Requires updateNodalForces() to be called first.
    */
   virtual std::unique_ptr<mfem::HypreParMatrix> getMfemDfDp() const = 0;
+
+  virtual void evaluateContactResidual( const mfem::HypreParVector& lambda,
+                                        mfem::HypreParVector& r_force,
+                                        mfem::HypreParVector& r_gap ) = 0;
+
+  virtual void evaluateContactJacobian( const mfem::HypreParVector& lambda,
+                                        std::unique_ptr<mfem::HypreParMatrix>& df_du,
+                                        std::unique_ptr<mfem::HypreParMatrix>& df_dlambda ) = 0;
 #endif
 };
 
diff --git a/src/tribol/physics/ContactFormulationFactory.cpp b/src/tribol/physics/ContactFormulationFactory.cpp
index 46bf4470..325493e3 100644
--- a/src/tribol/physics/ContactFormulationFactory.cpp
+++ b/src/tribol/physics/ContactFormulationFactory.cpp
@@ -19,8 +19,9 @@ std::unique_ptr<ContactFormulation> createContactFormulation( CouplingScheme* cs
   if ( cs->getContactMethod() == ENERGY_MORTAR ) {
     // Default parameters for now, or extract from CouplingScheme if available
     double k = 1.0;
-    double delta = 0.01;
+    double delta = 0.0;
     int N = 3;
+    bool use_penalty_ = (cs->getEnforcementMethod() == PENALTY);
 
 #if defined( TRIBOL_USE_ENZYME ) && defined( BUILD_REDECOMP )
     if ( cs->hasMfemData() ) {
@@ -35,7 +36,7 @@ std::unique_ptr<ContactFormulation> createContactFormulation( CouplingScheme* cs
     SLIC_ERROR_ROOT_IF( !cs->hasMfemJacobianData(), "ENERGY_MORTAR requires MFEM Jacobian data." );
 
     return std::make_unique<NewMethodAdapter>( *cs->getMfemSubmeshData(), *cs->getMfemJacobianData(), cs->getMesh1(),
-                                               cs->getMesh2(), k, delta, N );
+                                               cs->getMesh2(), k, delta, N, use_penalty_ );
 #else
     SLIC_ERROR_ROOT( "ENERGY_MORTAR requires Enzyme and redecomp to be built." );
     return nullptr;
diff --git a/src/tribol/physics/NewMethodAdapter.cpp b/src/tribol/physics/NewMethodAdapter.cpp
index 820d1e39..221eda8a 100644
--- a/src/tribol/physics/NewMethodAdapter.cpp
+++ b/src/tribol/physics/NewMethodAdapter.cpp
@@ -4,16 +4,18 @@
 // SPDX-License-Identifier: (MIT)
 
 #include "tribol/physics/NewMethodAdapter.hpp"
+#include "tribol/common/Parameters.hpp"
+#include "tribol/mesh/MfemData.hpp"
 
 namespace tribol {
 
 #ifdef TRIBOL_USE_ENZYME
 
 NewMethodAdapter::NewMethodAdapter( MfemSubmeshData& submesh_data, MfemJacobianData& jac_data, MeshData& mesh1,
-                                    MeshData& mesh2, double k, double delta, int N )
+                                    MeshData& mesh2, double k, double delta, int N, bool use_penalty_ )
     // NOTE: mesh1 maps to mesh2_ and mesh2 maps to mesh1_. This is to keep consistent with mesh1_ being non-mortar and
     // mesh2_ being mortar as is typical in the literature, but different from Tribol convention.
-    : submesh_data_( submesh_data ), jac_data_( jac_data ), mesh1_( mesh2 ), mesh2_( mesh1 )
+    : submesh_data_( submesh_data ), jac_data_( jac_data ), mesh1_( mesh2 ), mesh2_( mesh1 ), use_penalty_( use_penalty_ )
 {
   if ( mesh1.numberOfNodes() > 0 && mesh2.numberOfNodes() > 0 ) {
     SLIC_ERROR_ROOT_IF( mesh1.spatialDimension() != 2 || mesh2.spatialDimension() != 2,
@@ -125,7 +127,7 @@ void NewMethodAdapter::updateNodalGaps()
   P_submesh.MultTranspose( g_tilde_linear_form, g_tilde_vec_.get() );
 
   mfem::Array<int> rows_to_elim;
-  if ( !tied_contact_ ) {
+  if ( !tied_contact_ && use_penalty_) {
     rows_to_elim.Reserve( g_tilde_vec_.Size() );
     for ( int i{ 0 }; i < g_tilde_vec_.Size(); ++i ) {
       if ( g_tilde_vec_[i] > 0.0 ) {
@@ -145,7 +147,7 @@ void NewMethodAdapter::updateNodalGaps()
 
   // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
   dg_tilde_dx_ = jac_data_.GetMfemJacobian( dg_tilde_dx_contribs.get() );
-  if ( !tied_contact_ ) {
+  if ( !tied_contact_ && use_penalty_) {
     // technically, we should do this on all the vectors/matrices below, but it looks like the mutliplication operators
     // below will zero them out anyway
     dg_tilde_dx_.EliminateRows( rows_to_elim );
@@ -161,6 +163,7 @@ void NewMethodAdapter::updateNodalForces()
 
   // compute nodal pressures. these are used in the Hessian vector product below so we don't have to assemble a Hessian
   // NOTE: in general, pressure should likely be set by the host code
+
   pressure_vec_ = params_.k * gap_vec_;
 
   energy_ = pressure_vec_.dot( g_tilde_vec_ );
@@ -283,6 +286,107 @@ RealT NewMethodAdapter::computeTimeStep()
   return 1.0;
 }
 
+void NewMethodAdapter::compute_df_du_lagrange( const mfem::HypreParVector& lambda, 
+                                               std::unique_ptr<mfem::HypreParMatrix>& df_du)
+{
+  //Convert Lambda to redecomp space for element wise access 
+  mfem::GridFunction redecomp_lambda( submesh_data_.GetRedecompGap() );
+  mfem::ParGridFunction submesh_lambda(
+      const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_lambda.SetFromTrueDofs(lambda );
+  submesh_data_.GetPressureTransfer().SubmeshToRedecomp( submesh_lambda, redecomp_lambda);
+
+  JacobianContributions df_dx_contribs( { { BlockSpace::NONMORTAR, BlockSpace::NONMORTAR },
+                                                      { BlockSpace::NONMORTAR, BlockSpace::MORTAR},
+                                                      { BlockSpace::MORTAR, BlockSpace::NONMORTAR },
+                                                      { BlockSpace::MORTAR, BlockSpace::MORTAR}});
+
+  df_dx_contribs.reserve( pairs_.size(), 16);
+
+  const int node_idx[8] = { 0, 2, 1, 3, 4, 6, 5, 7};
+
+  auto mesh1_view = mesh1_.getView();
+  auto mesh2_view = mesh2_.getView();
+    // Loop over element pairs and compute Hessian contributions
+  for ( auto& pair : pairs_ ) {
+    InterfacePair flipped_pair( pair.m_element_id2, pair.m_element_id1 );
+    const auto elem1 = static_cast<int>( flipped_pair.m_element_id1 );
+    const auto node11 = mesh1_view.getConnectivity()( elem1, 0 );
+    const auto node12 = mesh1_view.getConnectivity()( elem1, 1 );
+    const auto elem2 = static_cast<int>( flipped_pair.m_element_id2 );
+
+    // Get lambda values at nodes (no factor of 2 in Lagrange mode)
+    const RealT lambda1 = redecomp_lambda( node11 );
+    const RealT lambda2 = redecomp_lambda( node12 );
+
+    // if ( lambda1 == 0.0 && lambda2 == 0.0 ) {
+    //   continue;
+    // }
+
+    // Compute Hessian of g̃
+    double d2g_dx2_node1[64];
+    double d2g_dx2_node2[64];
+    evaluator_->d2_g2tilde( flipped_pair, mesh1_view, mesh2_view, d2g_dx2_node1, d2g_dx2_node2 );
+
+    // Assemble df/du blocks: df/du = λ · d²g̃/du²
+    double df_dx_blocks[2][2][16];
+    for ( int i{ 0 }; i < 2; ++i ) {
+      for ( int j{ 0 }; j < 2; ++j ) {
+        for ( int k{ 0 }; k < 4; ++k ) {
+          for ( int l{ 0 }; l < 4; ++l ) {
+            df_dx_blocks[i][j][l + k * 4] = lambda1 * d2g_dx2_node1[node_idx[l + i * 4] + node_idx[k + j * 4] * 8] +
+                                            lambda2 * d2g_dx2_node2[node_idx[l + i * 4] + node_idx[k + j * 4] * 8];
+          }
+        }
+      }
+    }
+
+    df_dx_contribs.push_back( 0, elem1, elem1, df_dx_blocks[0][0], 16 );
+    df_dx_contribs.push_back( 1, elem1, elem2, df_dx_blocks[0][1], 16 );
+    df_dx_contribs.push_back( 2, elem2, elem1, df_dx_blocks[1][0], 16 );
+    df_dx_contribs.push_back( 3, elem2, elem2, df_dx_blocks[1][1], 16 );
+  }
+
+    auto df_dx_temp = jac_data_.GetMfemJacobian( df_dx_contribs.get() );
+
+    df_du = std::unique_ptr<mfem::HypreParMatrix>( df_dx_temp.release() );
+}
+
+
+void NewMethodAdapter::evaluateContactResidual( const mfem::HypreParVector& lambda,
+                                                mfem::HypreParVector& r_force,
+                                                mfem::HypreParVector& r_gap )
+{
+  SLIC_ERROR_ROOT_IF(use_penalty_, "evaluateContactResidual() should only be  called in lagrange multiplier mode");
+
+  SLIC_ERROR_ROOT_IF( g_tilde_vec_.Size() == 0, "updateNodalGaps() must be called before evaluateContactResidual()");
+
+  //Force residual = r_f = lambda * dg_tilde/du
+  dg_tilde_dx_->MultTranspose(lambda, r_force);
+
+  //gap residual
+  r_gap = g_tilde_vec_.get();
+}
+
+void NewMethodAdapter::evaluateContactJacobian( const mfem::HypreParVector& lambda,
+                                                std::unique_ptr<mfem::HypreParMatrix>& df_du,
+                                                std::unique_ptr<mfem::HypreParMatrix>& df_dlambda )
+{
+  SLIC_ERROR_ROOT_IF( use_penalty_, 
+                      "evaluateContactJacobian() should only be called in Lagrange multiplier mode" );
+  
+  SLIC_ERROR_ROOT_IF( g_tilde_vec_.Size() == 0,
+                      "updateNodalGaps() must be called before evaluateContactJacobian()" );
+
+  // df/dlambda = dg_tilde/du:
+  df_dlambda = std::unique_ptr<mfem::HypreParMatrix>( dg_tilde_dx_.release() );
+
+  //df/du = lambda * d2g_tilde/du2
+  compute_df_du_lagrange( lambda, df_du ); 
+
+}
+
+
 std::unique_ptr<mfem::HypreParMatrix> NewMethodAdapter::getMfemDfDx() const
 {
   return std::unique_ptr<mfem::HypreParMatrix>( df_dx_.release() );
diff --git a/src/tribol/physics/NewMethodAdapter.hpp b/src/tribol/physics/NewMethodAdapter.hpp
index 652ccaa2..caf986d7 100644
--- a/src/tribol/physics/NewMethodAdapter.hpp
+++ b/src/tribol/physics/NewMethodAdapter.hpp
@@ -32,7 +32,7 @@ class NewMethodAdapter : public ContactFormulation {
    * @param N Quadrature order
    */
   NewMethodAdapter( MfemSubmeshData& submesh_data, MfemJacobianData& jac_data, MeshData& mesh1, MeshData& mesh2,
-                    double k, double delta, int N );
+                    double k, double delta, int N, bool use_penalty_ = true );
 
   virtual ~NewMethodAdapter() = default;
 
@@ -62,11 +62,27 @@ class NewMethodAdapter : public ContactFormulation {
   std::unique_ptr<mfem::HypreParMatrix> getMfemDgDx() const override;
 
   std::unique_ptr<mfem::HypreParMatrix> getMfemDfDp() const override;
+
+  void evaluateContactResidual( const mfem::HypreParVector& lambda,
+                                 mfem::HypreParVector& r_force,
+                                 mfem::HypreParVector& r_gap);
+                              
+  //Lagrange multiplier mode
+  void evaluateContactJacobian( const mfem::HypreParVector& lambda, 
+                                std::unique_ptr<mfem::HypreParMatrix>& df_du,
+                                std::unique_ptr<mfem::HypreParMatrix>& df_dlambda);
+  
+  void compute_df_du_lagrange( const mfem::HypreParVector& lambda, 
+                               std::unique_ptr<mfem::HypreParMatrix>& df_du);
+
+
 #endif
 
  private:
   // --- Member Variables ---
 
+  bool use_penalty_;
+
   double area_tol_{ 1.0e-14 };
   bool tied_contact_ = false;
 
diff --git a/src/tribol/physics/new_method.cpp b/src/tribol/physics/new_method.cpp
index fe82753d..1b12319e 100644
--- a/src/tribol/physics/new_method.cpp
+++ b/src/tribol/physics/new_method.cpp
@@ -61,6 +61,14 @@ void determine_legendre_nodes( int N, std::vector<double>& x )
     x[1] = -a;
     x[2] = a;
     x[3] = b;
+    } else if ( N == 5 ) {
+    const double a = std::sqrt( 5.0 - 2.0 * std::sqrt( 10.0 / 7.0 ) ) / 3.0;
+    const double b = std::sqrt( 5.0 + 2.0 * std::sqrt( 10.0 / 7.0 ) ) / 3.0;
+    x[0] = -b;
+    x[1] = -a;
+    x[2] = 0.0;
+    x[3] = a;
+    x[4] = b;
   } else {
     assert( false && "Unsupported quadrature order" );
   }
@@ -78,14 +86,23 @@ void determine_legendre_weights( int N, std::vector<double>& W )
     W[0] = 5.0 / 9.0;
     W[1] = 8.0 / 9.0;
     W[2] = 5.0 / 9.0;
-  } else {
+  } else if (N ==4) {
     W[0] = ( 18 - std::sqrt( 30 ) ) / 36.0;
     W[1] = ( 18 + std::sqrt( 30 ) ) / 36.0;
     W[2] = ( 18 + std::sqrt( 30 ) ) / 36.0;
     W[3] = ( 18 - std::sqrt( 30 ) ) / 36.0;
+  } else if ( N == 5 ) {
+    W[0] = ( 322.0 - 13.0 * std::sqrt( 70.0 ) ) / 900.0;
+    W[1] = ( 322.0 + 13.0 * std::sqrt( 70.0 ) ) / 900.0;
+    W[2] = 128.0 / 225.0;
+    W[3] = ( 322.0 + 13.0 * std::sqrt( 70.0 ) ) / 900.0;
+    W[4] = ( 322.0 - 13.0 * std::sqrt( 70.0 ) ) / 900.0;
+  } else {
+    assert( false && "Unsupported quadrature order" );
   }
 }
 
+
 void iso_map( const double* coord1, const double* coord2, double xi, double* mapped_coord )
 {
   double N1 = 0.5 - xi;
@@ -408,22 +425,72 @@ std::array<double, 2> ContactSmoothing::bounds_from_projections( const std::arra
 
   const double del = p_.del;
 
-  if ( xi_max < -0.5 - del ) {
-    xi_max = -0.5 - del;
+    if ( xi_max < -0.5) {
+    xi_max = -0.5;
   }
-  if ( xi_min > 0.5 + del ) {
-    xi_min = 0.5 + del;
+  if ( xi_min > 0.5 ) {
+    xi_min = 0.5;
   }
-  if ( xi_min < -0.5 - del ) {
-    xi_min = -0.5 - del;
+  if ( xi_min < -0.5 ) {
+    xi_min = -0.5;
   }
-  if ( xi_max > 0.5 + del ) {
-    xi_max = 0.5 + del;
+  if ( xi_max > 0.5 ) {
+    xi_max = 0.5;
   }
 
+  // if ( xi_max < -0.5 - del ) {
+  //   xi_max = -0.5 - del;
+  // }
+  // if ( xi_min > 0.5 + del ) {
+  //   xi_min = 0.5 + del;
+  // }
+  // if ( xi_min < -0.5 - del ) {
+  //   xi_min = -0.5 - del;
+  // }
+  // if ( xi_max > 0.5 + del ) {
+  //   xi_max = 0.5 + del;
+  // }
+
   return { xi_min, xi_max };
 }
 
+// std::array<double, 2> ContactSmoothing::smooth_bounds( const std::array<double, 2>& bounds ) const
+// {
+//   std::array<double, 2> smooth_bounds;
+//   const double del = p_.del;
+//   for ( int i = 0; i < 2; ++i ) {
+//     double xi = 0.0;
+//     double xi_hat = 0.0;
+//     xi = bounds[i] + 0.5;
+//     if (del == 0.0) {
+//       xi_hat = xi;
+//     }
+//     else{
+//     if ( 0.0 - del <= xi && xi <= del ) {
+//       xi_hat = ( 1.0 / ( 4 * del ) ) * ( xi * xi ) + 0.5 * xi + del / 4.0;
+//       std::cout << "zone1" << std::endl;
+//     } else if ( ( 1.0 - del ) <= xi && xi <= 1.0 + del ) {
+//       std::cout << "Zone 2: " << std::endl;
+//       double b = -1.0 / ( 4.0 * del );
+//       double c = 0.5 + 1.0 / ( 2.0 * del );
+//       double d = 1.0 - del + ( 1.0 / ( 4.0 * del ) ) * pow( 1.0 - del, 2 ) - 0.5 * ( 1.0 - del ) -
+//                  ( 1.0 - del ) / ( 2.0 * del );
+
+//       xi_hat = b * xi * xi + c * xi + d;
+//     } else if ( del <= xi && xi <= ( 1.0 - del ) ) {
+//       xi_hat = xi;
+//       std::cout << "zone3" << std::endl;
+//     }
+//     }
+//     smooth_bounds[i] = xi_hat - 0.5;
+//       std::cout << "Smooth Bounds: " << smooth_bounds[i] << std::endl;
+//   }
+
+//   return smooth_bounds;
+// }
+
+
+
 std::array<double, 2> ContactSmoothing::smooth_bounds( const std::array<double, 2>& bounds ) const
 {
   std::array<double, 2> smooth_bounds;
@@ -432,28 +499,38 @@ std::array<double, 2> ContactSmoothing::smooth_bounds( const std::array<double,
     double xi = 0.0;
     double xi_hat = 0.0;
     xi = bounds[i] + 0.5;
-    if ( 0.0 - del <= xi && xi <= del ) {
-      xi_hat = ( 1.0 / ( 4 * del ) ) * ( xi * xi ) + 0.5 * xi + del / 4.0;
-      // std::cout << "zone1" << std::endl;
-    } else if ( ( 1.0 - del ) <= xi && xi <= 1.0 + del ) {
-      // std::cout << "Zone 2: " << std::endl;
-      double b = -1.0 / ( 4.0 * del );
-      double c = 0.5 + 1.0 / ( 2.0 * del );
-      double d = 1.0 - del + ( 1.0 / ( 4.0 * del ) ) * pow( 1.0 - del, 2 ) - 0.5 * ( 1.0 - del ) -
-                 ( 1.0 - del ) / ( 2.0 * del );
-
-      xi_hat = b * xi * xi + c * xi + d;
+    if (del == 0.0) {
+      xi_hat = xi;
+    }
+    else{
+    if ( 0.0 <= xi && xi <= del ) {
+      xi_hat = (xi*xi*(2 * del -xi)) / (del * del);
+      std::cout << "zone1" << std::endl;
+    } else if ( ( 1.0 - del ) <= xi && xi <= 1.0 ) {
+      std::cout << "Zone 2: " << std::endl;
+      xi_hat = 1 - ((1 - xi)*(1 - xi) * (2 * del - ( 1 - xi )) / (del * del));
+    
     } else if ( del <= xi && xi <= ( 1.0 - del ) ) {
       xi_hat = xi;
-      // std::cout << "zone3" << std::endl;
+      std::cout << "zone3" << std::endl;
+    }
     }
     smooth_bounds[i] = xi_hat - 0.5;
-    //   std::cout << "Smooth Bounds: " << smooth_bounds[i] << std::endl;
+      std::cout << "Smooth Bounds: " << smooth_bounds[i] << std::endl;
   }
 
   return smooth_bounds;
 }
 
+
+
+
+
+
+
+
+
+
 QuadPoints ContactEvaluator::compute_quadrature( const std::array<double, 2>& xi_bounds ) const
 {
   const int N = p_.N;
@@ -633,7 +710,7 @@ std::array<double, 2> ContactEvaluator::compute_pressures( const NodalContactDat
       pressures[i] = 0.0;
     }
   }
-  // std::cout << "pressures: " << pressures[0] << ", " << pressures[1] << std::endl;
+  // std::cout << "*************************pressures: " << pressures[0] << ", " << pressures[1] << std::endl;
 
   return pressures;
 }

From 8a9b850cc0ee7e3d966ea8fcbca5641ab58adb1b Mon Sep 17 00:00:00 2001
From: Ryan Lutz <lutz23@llnl.gov>
Date: Mon, 23 Feb 2026 12:45:23 -0800
Subject: [PATCH 52/56] more changes

---
 src/tests/tribol_new_energy_patch.cpp            | 4 ++--
 src/tribol/physics/ContactFormulationFactory.cpp | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/tests/tribol_new_energy_patch.cpp b/src/tests/tribol_new_energy_patch.cpp
index 0103ece7..c28cebe5 100644
--- a/src/tests/tribol_new_energy_patch.cpp
+++ b/src/tests/tribol_new_energy_patch.cpp
@@ -460,8 +460,8 @@ class MfemMortarEnergyLagrangePatchTest : public testing::TestWithParam<std::tup
 
     // int nel_per_dir_top = std::pow( 2, ref_levels );
     // int nel_per_dir_bottom = std::pow(3, ref_levels);
-    int nel_per_dir_top = 4;
-    int nel_per_dir_bottom = 4;
+    int nel_per_dir_top = 10;
+    int nel_per_dir_bottom = 3;
 
     // clang-format off
     mfem::ParMesh mesh = shared::ParMeshBuilder(MPI_COMM_WORLD, shared::MeshBuilder::Unify({
diff --git a/src/tribol/physics/ContactFormulationFactory.cpp b/src/tribol/physics/ContactFormulationFactory.cpp
index 325493e3..6c37776a 100644
--- a/src/tribol/physics/ContactFormulationFactory.cpp
+++ b/src/tribol/physics/ContactFormulationFactory.cpp
@@ -19,7 +19,7 @@ std::unique_ptr<ContactFormulation> createContactFormulation( CouplingScheme* cs
   if ( cs->getContactMethod() == ENERGY_MORTAR ) {
     // Default parameters for now, or extract from CouplingScheme if available
     double k = 1.0;
-    double delta = 0.0;
+    double delta = 0.2;
     int N = 3;
     bool use_penalty_ = (cs->getEnforcementMethod() == PENALTY);
 

From 91035110ec89777953f25abe2eb355af75375839 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Thu, 12 Mar 2026 13:29:29 -0700
Subject: [PATCH 53/56] size empty blocks correctly

---
 src/shared/math/ParSparseMat.cpp | 17 +++++++++++++++++
 src/shared/math/ParSparseMat.hpp | 15 +++++++++++++++
 src/tribol/mesh/MfemData.cpp     | 32 +++++++++++++++++++++++++-------
 3 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/src/shared/math/ParSparseMat.cpp b/src/shared/math/ParSparseMat.cpp
index 7318af82..800d81c1 100644
--- a/src/shared/math/ParSparseMat.cpp
+++ b/src/shared/math/ParSparseMat.cpp
@@ -128,6 +128,23 @@ ParSparseMat::ParSparseMat( MPI_Comm comm, HYPRE_BigInt glob_size, HYPRE_BigInt*
   owned_mat_->SetOwnerFlags( mfem_owned_host_flag, owned_mat_->OwnsOffd(), owned_mat_->OwnsColMap() );
 }
 
+ParSparseMat::ParSparseMat( MPI_Comm comm, HYPRE_BigInt global_num_rows, HYPRE_BigInt global_num_cols,
+                            HYPRE_BigInt* row_starts, HYPRE_BigInt* col_starts, mfem::SparseMatrix&& diag )
+    : ParSparseMatView( nullptr )
+{
+  owned_mat_.reset( createHypreParMatrix<MemorySpace::Host>( [&]() {
+    return new mfem::HypreParMatrix( comm, global_num_rows, global_num_cols, row_starts, col_starts, &diag );
+  } ) );
+  mat_ = owned_mat_.get();
+  diag.GetMemoryI().ClearOwnerFlags();
+  diag.GetMemoryJ().ClearOwnerFlags();
+  diag.GetMemoryData().ClearOwnerFlags();
+  // The mfem::Memory in mfem::SparseMatrix allocates using operator new [], so mark the diag memory as owned by MFEM so
+  // it can be deleted correctly
+  constexpr int mfem_owned_host_flag = 3;
+  owned_mat_->SetOwnerFlags( mfem_owned_host_flag, owned_mat_->OwnsOffd(), owned_mat_->OwnsColMap() );
+}
+
 ParSparseMat::ParSparseMat( ParSparseMat&& other ) noexcept
     : ParSparseMatView( other.owned_mat_.get() ), owned_mat_( std::move( other.owned_mat_ ) )
 {
diff --git a/src/shared/math/ParSparseMat.hpp b/src/shared/math/ParSparseMat.hpp
index 6536f31b..54fda42f 100644
--- a/src/shared/math/ParSparseMat.hpp
+++ b/src/shared/math/ParSparseMat.hpp
@@ -246,6 +246,21 @@ class ParSparseMat : public ParSparseMatView {
    */
   ParSparseMat( MPI_Comm comm, HYPRE_BigInt glob_size, HYPRE_BigInt* row_starts, mfem::SparseMatrix&& diag );
 
+  /**
+   * @brief Construct from MPI communicator, global size, row/column starts, and mfem::SparseMatrix rvalue
+   *
+   * @param comm MPI communicator
+   * @param global_num_rows Global number of rows
+   * @param global_num_cols Global number of columns
+   * @param row_starts Global row partitioning
+   * @param col_starts Global column partitioning
+   * @param diag Local diagonal block SparseMatrix (rvalue)
+   *
+   * @note The HypreParMatrix will take ownership of the I, J, and Data from diag.
+   */
+  ParSparseMat( MPI_Comm comm, HYPRE_BigInt global_num_rows, HYPRE_BigInt global_num_cols, HYPRE_BigInt* row_starts,
+                HYPRE_BigInt* col_starts, mfem::SparseMatrix&& diag );
+
   /// Template constructor forwarding arguments to mfem::HypreParMatrix constructor
   template <typename... Args>
   explicit ParSparseMat( Args&&... args )
diff --git a/src/tribol/mesh/MfemData.cpp b/src/tribol/mesh/MfemData.cpp
index 16f80a95..b522b19c 100644
--- a/src/tribol/mesh/MfemData.cpp
+++ b/src/tribol/mesh/MfemData.cpp
@@ -860,7 +860,7 @@ MfemJacobianData::MfemJacobianData( const MfemMeshData& parent_data, const MfemS
   submesh_parent_data = 1.0;
   // This constructor copies all of the data, so don't worry about ownership of the CSR data
   submesh_parent_vdof_xfer_ = std::make_unique<shared::ParSparseMat>(
-      TRIBOL_COMM_WORLD, submesh_fes.GetVSize(), submesh_fes.GlobalVSize(), parent_fes.GlobalVSize(),
+      parent_fes.GetComm(), submesh_fes.GetVSize(), submesh_fes.GlobalVSize(), parent_fes.GlobalVSize(),
       submesh_parent_I.data(), submesh2parent_vdof_list_.GetData(), submesh_parent_data.GetData(),
       submesh_fes.GetDofOffsets(), parent_fes.GetDofOffsets() );
 
@@ -1037,9 +1037,9 @@ std::unique_ptr<mfem::BlockOperator> MfemJacobianData::GetMfemBlockJacobian(
 
   if ( has_11 ) {
     auto& submesh_fes_full = submesh_data_.GetSubmeshFESpace();
-    shared::ParSparseMat inactive_hpm_full =
-        shared::ParSparseMat::diagonalMatrix( TRIBOL_COMM_WORLD, submesh_fes_full.GlobalTrueVSize(),
-                                              submesh_fes_full.GetTrueDofOffsets(), 1.0, mortar_tdof_list_, false );
+    auto comm = parent_data_.GetParentCoords().ParFESpace()->GetComm();
+    shared::ParSparseMat inactive_hpm_full = shared::ParSparseMat::diagonalMatrix(
+        comm, submesh_fes_full.GlobalTrueVSize(), submesh_fes_full.GetTrueDofOffsets(), 1.0, mortar_tdof_list_, false );
 
     if ( block_J->IsZeroBlock( 1, 1 ) ) {
       block_J->SetBlock( 1, 1, inactive_hpm_full.release() );
@@ -1178,9 +1178,27 @@ shared::ParSparseMat MfemJacobianData::GetMfemJacobian( const std::vector<Comput
   }
 
   if ( !par_J ) {
-    auto& fes = *parent_data_.GetParentCoords().ParFESpace();
-    return shared::ParSparseMat::diagonalMatrix( TRIBOL_COMM_WORLD, fes.GetTrueVSize(), fes.GetTrueDofOffsets(), 0.0,
-                                                 mfem::Array<int>(), true );
+    int target_r_blk = 0;
+    int target_c_blk = 0;
+    if ( !contributions.empty() ) {
+      target_r_blk = ( contributions[0].row_space == BlockSpace::LAGRANGE_MULTIPLIER ) ? 1 : 0;
+      target_c_blk = ( contributions[0].col_space == BlockSpace::LAGRANGE_MULTIPLIER ) ? 1 : 0;
+    }
+
+    auto& row_fes =
+        ( target_r_blk == 0 ) ? *parent_data_.GetParentCoords().ParFESpace() : submesh_data_.GetSubmeshFESpace();
+    auto& col_fes =
+        ( target_c_blk == 0 ) ? *parent_data_.GetParentCoords().ParFESpace() : submesh_data_.GetSubmeshFESpace();
+
+    if ( target_r_blk == target_c_blk ) {
+      return shared::ParSparseMat::diagonalMatrix( comm, row_fes.GlobalTrueVSize(), row_fes.GetTrueDofOffsets(), 0.0,
+                                                   mfem::Array<int>(), true );
+    } else {
+      mfem::SparseMatrix empty_diag( row_fes.GetTrueVSize(), col_fes.GetTrueVSize() );
+      empty_diag.Finalize();
+      return shared::ParSparseMat( comm, row_fes.GlobalTrueVSize(), col_fes.GlobalTrueVSize(),
+                                   row_fes.GetTrueDofOffsets(), col_fes.GetTrueDofOffsets(), std::move( empty_diag ) );
+    }
   }
 
   return std::move( *par_J );

From b37c16345e9bd8e3b95287596f2693017c29c36c Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Thu, 12 Mar 2026 13:34:40 -0700
Subject: [PATCH 54/56] fix some bugs

---
 src/tribol/interface/mfem_tribol.cpp | 21 +++++++--------------
 src/tribol/mesh/CouplingScheme.cpp   |  2 +-
 2 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/src/tribol/interface/mfem_tribol.cpp b/src/tribol/interface/mfem_tribol.cpp
index c86b3b83..d2f9a7d4 100644
--- a/src/tribol/interface/mfem_tribol.cpp
+++ b/src/tribol/interface/mfem_tribol.cpp
@@ -344,8 +344,8 @@ std::unique_ptr<mfem::BlockOperator> getMfemBlockJacobian( IndexT cs_id )
     // Determine sizes
     mfem::Array<int> offsets( 3 );
     offsets[0] = 0;
-    offsets[1] = DfDx->Height();                             // Force rows (displacement dofs)
-    offsets[2] = offsets[1] + ( DfDp ? DfDp->Width() : 0 );  // Pressure cols (pressure dofs)
+    offsets[1] = DfDx->Height();                                                     // Force rows (displacement dofs)
+    offsets[2] = offsets[1] + ( DfDp ? DfDp->Width() : DgDx ? DgDx->Height() : 0 );  // Pressure cols (pressure dofs)
 
     auto blockOp = std::make_unique<mfem::BlockOperator>( offsets );
     if ( DfDx ) blockOp->SetBlock( 0, 0, DfDx.release() );
@@ -444,7 +444,6 @@ std::unique_ptr<mfem::HypreParMatrix> getMfemDgDx( IndexT cs_id )
   return nullptr;
 }
 
-
 //**************** */NEW LAGRANGE FUNTIONS:
 mfem::ParFiniteElementSpace& getMfemContactFESpace( IndexT cs_id )
 {
@@ -454,14 +453,11 @@ mfem::ParFiniteElementSpace& getMfemContactFESpace( IndexT cs_id )
                               "to create a coupling scheme with this cs_id.",
                               cs_id ) );
   SLIC_ERROR_ROOT_IF( !cs->hasMfemSubmeshData(),
-                      axom::fmt::format( "Coupling scheme cs_id={0} does not contain MFEM submesh data.",
-                                         cs_id ) );
+                      axom::fmt::format( "Coupling scheme cs_id={0} does not contain MFEM submesh data.", cs_id ) );
   return const_cast<mfem::ParFiniteElementSpace&>( cs->getMfemSubmeshData()->GetSubmeshFESpace() );
 }
 
-void evaluateContactResidual( IndexT cs_id,
-                              const mfem::HypreParVector& lambda,
-                              mfem::HypreParVector& r_force,
+void evaluateContactResidual( IndexT cs_id, const mfem::HypreParVector& lambda, mfem::HypreParVector& r_force,
                               mfem::HypreParVector& r_gap )
 {
   auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
@@ -469,13 +465,11 @@ void evaluateContactResidual( IndexT cs_id,
       !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
                               "to create a coupling scheme with this cs_id.",
                               cs_id ) );
-  SLIC_ERROR_ROOT_IF( !cs->hasContactFormulation(),
-                      "Coupling scheme does not contain a contact formulation." );
+  SLIC_ERROR_ROOT_IF( !cs->hasContactFormulation(), "Coupling scheme does not contain a contact formulation." );
   cs->getContactFormulation()->evaluateContactResidual( lambda, r_force, r_gap );
 }
 
-void evaluateContactJacobian( IndexT cs_id,
-                              const mfem::HypreParVector& lambda,
+void evaluateContactJacobian( IndexT cs_id, const mfem::HypreParVector& lambda,
                               std::unique_ptr<mfem::HypreParMatrix>& df_du,
                               std::unique_ptr<mfem::HypreParMatrix>& df_dlambda )
 {
@@ -484,8 +478,7 @@ void evaluateContactJacobian( IndexT cs_id,
       !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
                               "to create a coupling scheme with this cs_id.",
                               cs_id ) );
-  SLIC_ERROR_ROOT_IF( !cs->hasContactFormulation(),
-                      "Coupling scheme does not contain a contact formulation." );
+  SLIC_ERROR_ROOT_IF( !cs->hasContactFormulation(), "Coupling scheme does not contain a contact formulation." );
   cs->getContactFormulation()->evaluateContactJacobian( lambda, df_du, df_dlambda );
 }
 
diff --git a/src/tribol/mesh/CouplingScheme.cpp b/src/tribol/mesh/CouplingScheme.cpp
index c9be3cab..0901d7ce 100644
--- a/src/tribol/mesh/CouplingScheme.cpp
+++ b/src/tribol/mesh/CouplingScheme.cpp
@@ -1146,7 +1146,7 @@ int CouplingScheme::apply( int cycle, RealT t, RealT& dt )
 //------------------------------------------------------------------------------
 bool CouplingScheme::init()
 {
-  if ( !m_formulation_impl && m_contactMethod == ENERGY_MORTAR ) {
+  if ( m_contactMethod == ENERGY_MORTAR ) {
     // these calls still need to be made to set mesh pointers and allocator id
     if ( !setMeshPointers() || checkExecutionModeData() != 0 ) {
       return false;

From 16b2ca8e28a5cf17f29279e13de5da92288eee49 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Thu, 12 Mar 2026 13:34:54 -0700
Subject: [PATCH 55/56] formatting

---
 src/tribol/physics/NewMethodAdapter.cpp | 68 ++++++++++++-------------
 1 file changed, 32 insertions(+), 36 deletions(-)

diff --git a/src/tribol/physics/NewMethodAdapter.cpp b/src/tribol/physics/NewMethodAdapter.cpp
index 221eda8a..24f5a471 100644
--- a/src/tribol/physics/NewMethodAdapter.cpp
+++ b/src/tribol/physics/NewMethodAdapter.cpp
@@ -15,7 +15,11 @@ NewMethodAdapter::NewMethodAdapter( MfemSubmeshData& submesh_data, MfemJacobianD
                                     MeshData& mesh2, double k, double delta, int N, bool use_penalty_ )
     // NOTE: mesh1 maps to mesh2_ and mesh2 maps to mesh1_. This is to keep consistent with mesh1_ being non-mortar and
     // mesh2_ being mortar as is typical in the literature, but different from Tribol convention.
-    : submesh_data_( submesh_data ), jac_data_( jac_data ), mesh1_( mesh2 ), mesh2_( mesh1 ), use_penalty_( use_penalty_ )
+    : submesh_data_( submesh_data ),
+      jac_data_( jac_data ),
+      mesh1_( mesh2 ),
+      mesh2_( mesh1 ),
+      use_penalty_( use_penalty_ )
 {
   if ( mesh1.numberOfNodes() > 0 && mesh2.numberOfNodes() > 0 ) {
     SLIC_ERROR_ROOT_IF( mesh1.spatialDimension() != 2 || mesh2.spatialDimension() != 2,
@@ -127,7 +131,7 @@ void NewMethodAdapter::updateNodalGaps()
   P_submesh.MultTranspose( g_tilde_linear_form, g_tilde_vec_.get() );
 
   mfem::Array<int> rows_to_elim;
-  if ( !tied_contact_ && use_penalty_) {
+  if ( !tied_contact_ && use_penalty_ ) {
     rows_to_elim.Reserve( g_tilde_vec_.Size() );
     for ( int i{ 0 }; i < g_tilde_vec_.Size(); ++i ) {
       if ( g_tilde_vec_[i] > 0.0 ) {
@@ -147,7 +151,7 @@ void NewMethodAdapter::updateNodalGaps()
 
   // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
   dg_tilde_dx_ = jac_data_.GetMfemJacobian( dg_tilde_dx_contribs.get() );
-  if ( !tied_contact_ && use_penalty_) {
+  if ( !tied_contact_ && use_penalty_ ) {
     // technically, we should do this on all the vectors/matrices below, but it looks like the mutliplication operators
     // below will zero them out anyway
     dg_tilde_dx_.EliminateRows( rows_to_elim );
@@ -156,7 +160,6 @@ void NewMethodAdapter::updateNodalGaps()
   dA_dx_ = jac_data_.GetMfemJacobian( dA_dx_contribs.get() );
 }
 
-
 void NewMethodAdapter::updateNodalForces()
 {
   // NOTE: user should have called updateNodalGaps() with updated coords before calling this
@@ -261,7 +264,6 @@ void NewMethodAdapter::updateNodalForces()
   // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
   df_dx_ = jac_data_.GetMfemJacobian( df_dx_contribs.get() );
 
-
   auto pg2_over_asq = ( 2.0 * pressure_vec_ )
                           .multiplyInPlace( g_tilde_vec_ )
                           .divideInPlace( A_vec_, area_tol_ )
@@ -286,28 +288,28 @@ RealT NewMethodAdapter::computeTimeStep()
   return 1.0;
 }
 
-void NewMethodAdapter::compute_df_du_lagrange( const mfem::HypreParVector& lambda, 
-                                               std::unique_ptr<mfem::HypreParMatrix>& df_du)
+void NewMethodAdapter::compute_df_du_lagrange( const mfem::HypreParVector& lambda,
+                                               std::unique_ptr<mfem::HypreParMatrix>& df_du )
 {
-  //Convert Lambda to redecomp space for element wise access 
+  // Convert Lambda to redecomp space for element wise access
   mfem::GridFunction redecomp_lambda( submesh_data_.GetRedecompGap() );
   mfem::ParGridFunction submesh_lambda(
       const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
-  submesh_lambda.SetFromTrueDofs(lambda );
-  submesh_data_.GetPressureTransfer().SubmeshToRedecomp( submesh_lambda, redecomp_lambda);
+  submesh_lambda.SetFromTrueDofs( lambda );
+  submesh_data_.GetPressureTransfer().SubmeshToRedecomp( submesh_lambda, redecomp_lambda );
 
   JacobianContributions df_dx_contribs( { { BlockSpace::NONMORTAR, BlockSpace::NONMORTAR },
-                                                      { BlockSpace::NONMORTAR, BlockSpace::MORTAR},
-                                                      { BlockSpace::MORTAR, BlockSpace::NONMORTAR },
-                                                      { BlockSpace::MORTAR, BlockSpace::MORTAR}});
+                                          { BlockSpace::NONMORTAR, BlockSpace::MORTAR },
+                                          { BlockSpace::MORTAR, BlockSpace::NONMORTAR },
+                                          { BlockSpace::MORTAR, BlockSpace::MORTAR } } );
 
-  df_dx_contribs.reserve( pairs_.size(), 16);
+  df_dx_contribs.reserve( pairs_.size(), 16 );
 
-  const int node_idx[8] = { 0, 2, 1, 3, 4, 6, 5, 7};
+  const int node_idx[8] = { 0, 2, 1, 3, 4, 6, 5, 7 };
 
   auto mesh1_view = mesh1_.getView();
   auto mesh2_view = mesh2_.getView();
-    // Loop over element pairs and compute Hessian contributions
+  // Loop over element pairs and compute Hessian contributions
   for ( auto& pair : pairs_ ) {
     InterfacePair flipped_pair( pair.m_element_id2, pair.m_element_id1 );
     const auto elem1 = static_cast<int>( flipped_pair.m_element_id1 );
@@ -347,24 +349,22 @@ void NewMethodAdapter::compute_df_du_lagrange( const mfem::HypreParVector& lambd
     df_dx_contribs.push_back( 3, elem2, elem2, df_dx_blocks[1][1], 16 );
   }
 
-    auto df_dx_temp = jac_data_.GetMfemJacobian( df_dx_contribs.get() );
+  auto df_dx_temp = jac_data_.GetMfemJacobian( df_dx_contribs.get() );
 
-    df_du = std::unique_ptr<mfem::HypreParMatrix>( df_dx_temp.release() );
+  df_du = std::unique_ptr<mfem::HypreParMatrix>( df_dx_temp.release() );
 }
 
-
-void NewMethodAdapter::evaluateContactResidual( const mfem::HypreParVector& lambda,
-                                                mfem::HypreParVector& r_force,
+void NewMethodAdapter::evaluateContactResidual( const mfem::HypreParVector& lambda, mfem::HypreParVector& r_force,
                                                 mfem::HypreParVector& r_gap )
 {
-  SLIC_ERROR_ROOT_IF(use_penalty_, "evaluateContactResidual() should only be  called in lagrange multiplier mode");
+  SLIC_ERROR_ROOT_IF( use_penalty_, "evaluateContactResidual() should only be  called in lagrange multiplier mode" );
 
-  SLIC_ERROR_ROOT_IF( g_tilde_vec_.Size() == 0, "updateNodalGaps() must be called before evaluateContactResidual()");
+  SLIC_ERROR_ROOT_IF( g_tilde_vec_.Size() == 0, "updateNodalGaps() must be called before evaluateContactResidual()" );
 
-  //Force residual = r_f = lambda * dg_tilde/du
-  dg_tilde_dx_->MultTranspose(lambda, r_force);
+  // Force residual = r_f = lambda * dg_tilde/du
+  dg_tilde_dx_->MultTranspose( lambda, r_force );
 
-  //gap residual
+  // gap residual
   r_gap = g_tilde_vec_.get();
 }
 
@@ -372,21 +372,17 @@ void NewMethodAdapter::evaluateContactJacobian( const mfem::HypreParVector& lamb
                                                 std::unique_ptr<mfem::HypreParMatrix>& df_du,
                                                 std::unique_ptr<mfem::HypreParMatrix>& df_dlambda )
 {
-  SLIC_ERROR_ROOT_IF( use_penalty_, 
-                      "evaluateContactJacobian() should only be called in Lagrange multiplier mode" );
-  
-  SLIC_ERROR_ROOT_IF( g_tilde_vec_.Size() == 0,
-                      "updateNodalGaps() must be called before evaluateContactJacobian()" );
+  SLIC_ERROR_ROOT_IF( use_penalty_, "evaluateContactJacobian() should only be called in Lagrange multiplier mode" );
+
+  SLIC_ERROR_ROOT_IF( g_tilde_vec_.Size() == 0, "updateNodalGaps() must be called before evaluateContactJacobian()" );
 
   // df/dlambda = dg_tilde/du:
   df_dlambda = std::unique_ptr<mfem::HypreParMatrix>( dg_tilde_dx_.release() );
 
-  //df/du = lambda * d2g_tilde/du2
-  compute_df_du_lagrange( lambda, df_du ); 
-
+  // df/du = lambda * d2g_tilde/du2
+  compute_df_du_lagrange( lambda, df_du );
 }
 
-
 std::unique_ptr<mfem::HypreParMatrix> NewMethodAdapter::getMfemDfDx() const
 {
   return std::unique_ptr<mfem::HypreParMatrix>( df_dx_.release() );
@@ -399,7 +395,7 @@ std::unique_ptr<mfem::HypreParMatrix> NewMethodAdapter::getMfemDgDx() const
 
 std::unique_ptr<mfem::HypreParMatrix> NewMethodAdapter::getMfemDfDp() const
 {
-  SLIC_ERROR_ROOT( "NewMethod does not support getMfemDfDp()" );
+  // SLIC_ERROR_ROOT( "NewMethod does not support getMfemDfDp()" );
   return nullptr;
 }
 

From b394f71391da53daa1eef4b490422110dc299849 Mon Sep 17 00:00:00 2001
From: "Eric B. Chin" <chin23@llnl.gov>
Date: Thu, 12 Mar 2026 13:44:02 -0700
Subject: [PATCH 56/56] temp workaround for axom Array issue

---
 src/tribol/mesh/MfemData.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/tribol/mesh/MfemData.cpp b/src/tribol/mesh/MfemData.cpp
index b522b19c..f08f4af2 100644
--- a/src/tribol/mesh/MfemData.cpp
+++ b/src/tribol/mesh/MfemData.cpp
@@ -1126,7 +1126,10 @@ shared::ParSparseMat MfemJacobianData::GetMfemJacobian( const std::vector<Comput
             col_redecomp_ids.push_back(
                 ( *elem_map_by_space[static_cast<size_t>( contrib.col_space )] )[static_cast<size_t>( id )] );
           }
-          jacobian_data.append( axom::ArrayView<const double>( contrib.jacobian_data ) );
+          // NOTE (EBC): This can be removed when Axom PR 1819 goes in
+          if ( contrib.jacobian_data.size() > 0 ) {
+            jacobian_data.append( axom::ArrayView<const double>( contrib.jacobian_data ) );
+          }
           jacobian_offsets.reserve( jacobian_offsets.size() + contrib.jacobian_offsets.size() );
           for ( auto offset : contrib.jacobian_offsets ) {
             jacobian_offsets.push_back( current_offset + offset );