diff --git a/.gitignore b/.gitignore
index e760fcbf..cc53de42 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,7 +6,6 @@ Manifest.toml
 .DS_Store
 *.csv
 *.nc
-/projects/BulkDSOC/eval/
 
 build/
 node_modules/
diff --git a/Project.toml b/Project.toml
index c474af24..f2df38d5 100644
--- a/Project.toml
+++ b/Project.toml
@@ -27,6 +27,7 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
+Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
@@ -60,6 +61,7 @@ OptimizationOptimisers = "0.3.7"
 PrettyTables = "3.1.2"
 ProgressMeter = "1.10.4"
 Reexport = "1.2.2"
+Static = "1.3.1"
 StyledStrings = "1.0.3, 1.11.0"
 Zygote = "0.7.7"
 julia = "1.10"
diff --git a/projects/BulkDSOC/BulkDensitySOC.jl b/projects/BulkDSOC/BulkDensitySOC.jl
deleted file mode 100644
index 02f40135..00000000
--- a/projects/BulkDSOC/BulkDensitySOC.jl
+++ /dev/null
@@ -1,135 +0,0 @@
-# CC BY-SA 4.0
-using Pkg
-Pkg.activate("projects/BulkDSOC")
-Pkg.develop(path = pwd())
-Pkg.instantiate()
-
-using Revise
-using EasyHybrid
-using EasyHybrid.Lux
-# plotting
-using GLMakie
-using AlgebraOfGraphics
-import Plots as pl
-
-# ? move the `csv` file into the `BulkDSOC/data` folder (create folder)
-df = CSV.read(joinpath(@__DIR__, "./data/lucas_preprocessed.csv"), DataFrame, normalizenames = true)
-println(size(df))
-df_d = dropmissing(df)
-println(size(df_d))
-
-target_names = [:BD, :SOCconc, :CF, :SOCdensity]
-
-
-names_cov = Symbol.(names(df_d))[4:(end - 1)]
-#names_cov = Symbol.(names(df_d))[end-40:end-1]
-ds_all = to_keyedArray(df_d);
-
-
-ds_p = ds_all(names_cov);
-ds_t = ds_all(target_names)
-
-length(ds_t) * 10 # number of samples as guidance for number of parameters
-nfeatures = length(names_cov)
-p_dropout = 0.2
-NN = Lux.Chain(
-    Dense(nfeatures, 256, Lux.sigmoid),
-    Dropout(p_dropout),
-    Dense(256, 128, Lux.sigmoid),
-    Dropout(p_dropout),
-    Dense(128, 64, Lux.sigmoid),
-    Dropout(p_dropout),
-    Dense(64, 32, Lux.sigmoid),
-    Dropout(p_dropout),
-    Dense(32, 3, Lux.sigmoid) # Output layer
-)
-# ? we might need to set output bounds for the expected parameter values
-
-# ? do different initial oBDs
-BulkDSOC = BulkDensitySOC(NN, names_cov, target_names, 1.0f0)
-
-ps, st = LuxCore.setup(Random.default_rng(), BulkDSOC)
-# the Tuple `ds_p, ds_t` is later used for batching in the `dataloader`.
-ds_t_nan = .!isnan.(ds_t)
-
-ls = EasyHybrid.lossfn(BulkDSOC, ds_p, (ds_t, ds_t_nan), ps, LuxCore.testmode(st), LoggingLoss(train_mode = false)) # testmode to switch of dropout
-
-println(length(names_cov))
-out = train(BulkDSOC, (ds_p, ds_t), (:oBD,); nepochs = 100, batchsize = 32, opt = AdaMax(0.01));
-
-# plot train history
-series(WrappedTuples(out.train_history.mse); axis = (; xlabel = "epoch", ylabel = "loss", xscale = log10, yscale = log10))
-
-# physical parameter
-series(out.ps_history; axis = (; xlabel = "epoch", ylabel = ""))
-
-# plot trained bulk density function
-trained_oBD = out.ps_history.oBD[end] # taking the last one
-trained_mBD = out.train_diffs.mBD
-
-SOCrange = range(0.0, 0.5; step = 0.01)
-median_BDc = compute_bulk_density(SOCrange, trained_oBD, median(trained_mBD))
-q25_BDc = compute_bulk_density(SOCrange, trained_oBD, quantile(trained_mBD, 0.25))
-q75_BDc = compute_bulk_density(SOCrange, trained_oBD, quantile(trained_mBD, 0.75))
-
-pl.plot(ds_t(:SOCconc), ds_t(:BD), seriestype = :scatter, ylabel = :BD, xlabel = :SOCconc)
-pl.plot!(collect(SOCrange), median_BDc, width = 4.0, label = "q50")
-pl.plot!(collect(SOCrange), q25_BDc, width = 4.0, label = "q25")
-pl.plot!(collect(SOCrange), q75_BDc, width = 4.0, label = "q75")
-
-pl.scatter(out.train_obs_pred[!, :SOCconc], out.train_obs_pred[!, :SOCconc_pred])
-pl.scatter(out.train_obs_pred[!, :BD], out.train_obs_pred[!, :BD_pred])
-pl.scatter(out.train_obs_pred[!, :CF], out.train_obs_pred[!, :CF_pred])
-pl.scatter(out.train_obs_pred[!, :SOCdensity], out.train_obs_pred[!, :SOCdensity_pred])
-
-# AoG
-yvars = target_names # [:BD, :SOCconc, :CF, :SOCdensity]
-xvars = Symbol.(string.(yvars) .* "_pred")
-
-layers = visual(Scatter, alpha = 0.35)
-plt = data(out.train_obs_pred) * layers * mapping(xvars, yvars, col = dims(1) => renamer(string.(yvars)))
-plt *= mapping(color = dims(1) => renamer(string.(xvars)) => "Metrics")
-# linear
-l_linear = linear() * visual(color = :grey25)
-plt += data(out.train_obs_pred) * l_linear * mapping(xvars, yvars, col = dims(1) => renamer(string.(yvars)))
-
-with_theme(theme_light()) do
-    draw(
-        plt, scales(
-            X = (; label = rich("Prediction", font = :bold)),
-            Y = (; label = "Observation"),
-            Color = (; palette = [:tomato, :teal, :orange, :dodgerblue3])
-        ),
-        # legend = (; show = false),
-        legend = (; position = :bottom, titleposition = :left, merge = false),
-        facet = (; linkxaxes = :none, linkyaxes = :none),
-        figure = (; size = (1400, 400))
-    )
-end
-
-with_theme(theme_light()) do
-    fig = Figure(; size = (1200, 300))
-    ax = Makie.Axis(
-        fig[1, 1], title = "Loss",
-        # yscale=log10,
-        xscale = log10
-    )
-    lines!(ax, WrappedTuples(out.train_history.mse).sum, color = :orangered, label = "train")
-    lines!(ax, WrappedTuples(out.val_history.mse).sum, color = :dodgerblue, label = "validation")
-    # limits!(ax, 1, 1000, 0.04, 1)
-    axislegend()
-    fig
-end
-
-layers = visual(Scatter, alpha = 0.65)
-plt = data(out.val_obs_pred) * layers * mapping(:index, [:BD, :BD_pred]) *
-    mapping(color = dims(1) => renamer(string.([:BD, :BD_pred])))
-
-with_theme(theme_light()) do
-    colors = ["a" => :tomato, "c" => :lime]
-    draw(
-        plt, scales(Color = (; palette = [:grey25, :tomato])),
-        legend = (position = :top,),
-        figure = (; size = (1400, 400))
-    )
-end
diff --git a/projects/BulkDSOC/DataPreProcess.jl b/projects/BulkDSOC/DataPreProcess.jl
deleted file mode 100644
index 797f0fea..00000000
--- a/projects/BulkDSOC/DataPreProcess.jl
+++ /dev/null
@@ -1,114 +0,0 @@
-# CC BY-SA 4.0
-using Revise
-using EasyHybrid
-using EasyHybrid.MLUtils
-
-# ? move the `csv` file into the `BulkDSOC/data` folder (create folder)
-df_o = CSV.read(joinpath(@__DIR__, "./data/lucas_overlaid.csv"), DataFrame, normalizenames = true)
-println(size(df_o))
-
-coords = collect(zip(df_o.lat, df_o.lon))
-
-using Rasters
-
-zarr_path = "/Net/Groups/BGI/scratch/HYCO/npp.zarr/"
-data = Raster(zarr_path)
-
-# t clean covariates
-names_cov = Symbol.(names(df_o))[19:end]
-names_meta = Symbol.(names(df_o))[1:18]
-
-# Fix soilsuite and cropland extent columns
-for col in names_cov
-    if occursin("_soilsuite_", String(col))
-        df_o[!, col] = replace(df_o[!, col], missing => 0)
-    elseif occursin("cropland_extent_", String(col))
-        df_o[!, col] = replace(df_o[!, col], missing => 0)
-        df_o[!, col] .= ifelse.(df_o[!, col] .> 0, 1, 0)
-    end
-end
-
-# rm missing values: 1. >5%, drop col; 2. <=5%, drop row
-cols_to_drop_row = Symbol[]
-cols_to_drop_col = Symbol[]
-for col in names_cov
-    n_missing = count(ismissing, df_o[!, col])
-    frac_missing = n_missing / nrow(df_o)
-    if frac_missing > 0.05
-        println(n_missing, " ", col)
-        select!(df_o, Not(col))  # drop the column
-        push!(cols_to_drop_col, col)
-    elseif n_missing > 0
-        # println(n_missing, " ", col)
-        push!(cols_to_drop_row, col)  # collect column name
-    end
-
-    if occursin("CHELSA_kg", String(col))
-        push!(cols_to_drop_col, col)
-        select!(df_o, Not(col))  # rm kg catagorical col
-    end
-end
-
-names_cov = filter(x -> !(x in cols_to_drop_col), names_cov) # remove cols-to-drop from names_cov
-if !isempty(cols_to_drop_row)
-    df_o = subset(df_o, cols_to_drop_row .=> ByRow(!ismissing)) # drop rows with missing values in cols_to_drop_row
-end
-println(size(df_o))
-
-cols_to_drop_col = Symbol[]
-for col in names_cov
-    if std(df_o[:, col]) == 0
-        push!(cols_to_drop_col, col)  # rm constant col (std==0)
-        select!(df_o, Not(col))
-    end
-end
-names_cov = filter(x -> !(x in cols_to_drop_col), names_cov) # remove cols-to-drop from names_cov
-println(size(df_o))
-
-
-df = df_o[:, [:bulk_density_fe, :soc, :coarse_vol, names_cov...]]
-
-# ? match target_names
-rename!(df, :bulk_density_fe => :BD, :soc => :SOCconc, :coarse_vol => :CF) # rename as in hybrid model
-# BD g/cm3, SOCconc g/kg, CF [0,1]
-
-df[:, :SOCconc] .= df[:, :SOCconc] ./ 1000 # convert to fraction
-
-# ? calculate SOC density
-df[!, :SOCdensity] = df.BD .* df.SOCconc .* (1 .- df.CF) # SOCdensity ton/m3
-target_names = [:BD, :SOCconc, :CF, :SOCdensity]
-# df[:, target_names] = replace.(df[:, target_names], missing => NaN) # replace missing with NaN
-
-# for col in names_cov # to check covairate distribution
-#     println(string(col)[1:10], ' ', round(std(df[:, col]); digits=2), ' ', round(mean(df[:, col]); digits=2))
-# end
-
-# # Normalize covariates with std>1
-means = mean.(eachcol(df[:, names_cov]))
-stds = std.(eachcol(df[:, names_cov]))
-for col in names_cov
-    df[!, col] = Float64.(df[!, col])
-end
-df[:, names_cov] .= (df[:, names_cov] .- means') ./ stds'
-
-println(size(df))
-
-# CSV.write(joinpath(@__DIR__, "data/lucas_preprocessed.csv"), df)
-
-
-# plot BD vs SOCconc
-bd_lims = extrema(skipmissing(df[:, "BD"]))
-soc_lims = extrema(skipmissing(df[:, "SOCconc"]))
-plt = histogram2d(
-    df[:, "BD"], df[:, "SOCconc"];
-    nbins = (30, 30),
-    cbar = true,
-    xlab = "BD",
-    ylab = "SOCconc",
-    xlims = bd_lims, ylims = soc_lims,
-    #title      = "SOCdensity-MTD\nR2=$(round(r2, digits=3)), MAE=$(round(mae, digits=3)), bias=$(round(bias, digits=3))",
-    color = cgrad(:bamako, rev = true),
-    normalize = false,
-    size = (460, 400)
-)
-savefig(plt, joinpath(@__DIR__, "./eval/00_truth_BD.vs.SOCconc.png"))
diff --git a/projects/BulkDSOC/Project.toml b/projects/BulkDSOC/Project.toml
deleted file mode 100644
index 6efec671..00000000
--- a/projects/BulkDSOC/Project.toml
+++ /dev/null
@@ -1,11 +0,0 @@
-[deps]
-AlgebraOfGraphics = "cbdf2221-f076-402e-a563-3d30da359d67"
-EasyHybrid = "61bb816a-e6af-4913-ab9e-91bff2e122e3"
-Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
-GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a"
-Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
-Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
-StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
-StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
-Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7"
-Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
diff --git a/projects/BulkDSOC/UniNaiveNN_EasyHybridNN.jl b/projects/BulkDSOC/UniNaiveNN_EasyHybridNN.jl
deleted file mode 100644
index 35a3d9b8..00000000
--- a/projects/BulkDSOC/UniNaiveNN_EasyHybridNN.jl
+++ /dev/null
@@ -1,126 +0,0 @@
-# CC BY-SA 4.0
-project_path = "projects/BulkDSOC"
-Pkg.activate(project_path)
-
-# Only instantiate if Manifest.toml is missing
-manifest_path = joinpath(project_path, "Manifest.toml")
-if !isfile(manifest_path)
-    Pkg.develop(path = pwd())
-    Pkg.instantiate()
-end
-using EasyHybrid
-using WGLMakie
-using Random
-using EasyHybrid.MLUtils
-using Statistics
-using Plots
-# using StatsBase
-
-import Flux
-
-# 01 - univariate naive NN using EasyHybrid's SingleNNModel
-
-testid = "01_univariate_EasyHybridNN"
-
-# input
-df = CSV.read(joinpath(@__DIR__, "./data/lucas_preprocessed.csv"), DataFrame, normalizenames = true)
-df_d = dropmissing(df) # complete SOCD
-
-target_names = [:BD, :SOCconc, :CF, :SOCdensity]
-names_cov = Symbol.(names(df_d))[4:(end - 1)]
-ds_all = to_keyedArray(Float32.(df_d))
-ds_p = ds_all(names_cov)
-ds_t = ds_all(target_names)
-ds_t = Flux.normalise(ds_t)
-
-df_out = DataFrame()
-
-nfeatures = length(names_cov)
-p_dropout = 0.2
-
-for (i, tname) in enumerate(target_names)
-
-    y = ds_t([tname])
-    # Use EasyHybrid's constructNNModel
-    predictors = names_cov
-    targets = [tname]
-    neural_param_names = [tname]
-    model = EasyHybrid.constructNNModel(predictors, targets; hidden_layers = [32], scale_nn_outputs = false)
-    #model = EasyHybrid.constructNNModel(predictors, targets; hidden_layers=Chain(Dense(32, 32, tanh), Dense(32, 16, tanh)), scale_nn_outputs=false)
-
-    ps, st = LuxCore.setup(Random.default_rng(), model)
-    # Training using EasyHybrid's train function
-    result = train(model, (ds_p, y), (); nepochs = 100, batchsize = 512, opt = AdamW(0.0001, (0.9, 0.999), 0.01), training_loss = :nse, loss_types = [:mse, :nse], shuffleobs = true, yscale = identity)
-
-    y_val_true = vec(result.val_obs_pred[!, tname])
-    y_val_pred = vec(result.val_obs_pred[!, Symbol(string(tname, "_pred"))])
-    df_out[!, "true_$(tname)"] = y_val_true
-    df_out[!, "pred_$(tname)"] = y_val_pred
-    ss_res = sum((y_val_true .- y_val_pred) .^ 2)
-    ss_tot = sum((y_val_true .- mean(y_val_true)) .^ 2)
-    r2 = 1 - ss_res / ss_tot
-    mae = mean(abs.(y_val_pred .- y_val_true))
-    bias = mean(y_val_pred .- y_val_true)
-    plt = histogram2d(
-        y_val_true, y_val_pred;
-        nbins = (30, 30),
-        cbar = true,
-        xlab = "True",
-        ylab = "Predicted",
-        title = "$tname\nR2=$(round(r2, digits = 3)),MAE=$(round(mae, digits = 3)),bias=$(round(bias, digits = 3))",
-        color = cgrad(:bamako, rev = true),
-        normalize = false
-    )
-    lims = extrema(vcat(y_val_true, y_val_pred))
-    Plots.plot!(
-        plt,
-        [lims[1], lims[2]], [lims[1], lims[2]];
-        color = :black, linewidth = 2, label = "1:1 line",
-        aspect_ratio = :equal, xlims = lims, ylims = lims
-    )
-    savefig(plt, joinpath(@__DIR__, "./eval/$(testid)_accuracy_$(tname).png"))
-
-end
-
-# TODO undo the z-transformation
-df_out[:, "pred_calc_SOCdensity"] = df_out[:, "pred_SOCconc"] .* df_out[:, "pred_BD"] .* (1 .- df_out[:, "pred_CF"])
-true_SOCdensity = df_out[:, "true_SOCdensity"]
-pred_SOCdensity = df_out[:, "pred_calc_SOCdensity"]
-ss_res = sum((true_SOCdensity .- pred_SOCdensity) .^ 2)
-ss_tot = sum((true_SOCdensity .- mean(true_SOCdensity)) .^ 2)
-r2 = 1 - ss_res / ss_tot
-mae = mean(abs.(pred_SOCdensity .- true_SOCdensity))
-bias = mean(pred_SOCdensity .- true_SOCdensity)
-plt = histogram2d(
-    true_SOCdensity, pred_SOCdensity;
-    nbins = (30, 30),
-    cbar = true,
-    xlab = "True",
-    ylab = "Predicted",
-    title = "SOCdensity-MTD\nR2=$(round(r2, digits = 3)), MAE=$(round(mae, digits = 3)), bias=$(round(bias, digits = 3))",
-    color = cgrad(:bamako, rev = true),
-    normalize = false
-)
-lims = extrema(vcat(true_SOCdensity, pred_SOCdensity))
-Plots.plot!(
-    plt,
-    [lims[1], lims[2]], [lims[1], lims[2]];
-    color = :black, linewidth = 2, label = "1:1 line",
-    aspect_ratio = :equal, xlims = lims, ylims = lims
-)
-savefig(plt, joinpath(@__DIR__, "./eval/$(testid)_accuracy_SOCdensity_MTD.png"))
-
-bd_lims = extrema(skipmissing(df_out[:, "pred_BD"]))
-soc_lims = extrema(skipmissing(df_out[:, "pred_SOCconc"]))
-plt = histogram2d(
-    df_out[:, "pred_BD"], df_out[:, "pred_SOCconc"];
-    nbins = (30, 30),
-    cbar = true,
-    xlab = "BD",
-    ylab = "SOCconc",
-    xlims = bd_lims, ylims = soc_lims,
-    color = cgrad(:bamako, rev = true),
-    normalize = false,
-    size = (460, 400)
-)
-savefig(plt, joinpath(@__DIR__, "./eval/$(testid)_BD.vs.SOCconc.png"))
diff --git a/projects/RbQ10/Project.toml b/projects/RbQ10/Project.toml
index d1f9c50b..ee0f9637 100644
--- a/projects/RbQ10/Project.toml
+++ b/projects/RbQ10/Project.toml
@@ -2,5 +2,6 @@
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
 EasyHybrid = "61bb816a-e6af-4913-ab9e-91bff2e122e3"
+Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
diff --git a/projects/RbQ10/Q10.jl b/projects/RbQ10/Q10.jl
index 6b24e888..b802b238 100644
--- a/projects/RbQ10/Q10.jl
+++ b/projects/RbQ10/Q10.jl
@@ -7,9 +7,10 @@ using Pkg
 
 # start using the package
 using EasyHybrid
+using Enzyme
 
 # for Plotting
-using GLMakie
+# using GLMakie
 # using AlgebraOfGraphics
 
 # Local Path (MPI-BGC server):
@@ -28,12 +29,29 @@ forcing_names = [:cham_temp_filled]
 predictor_names = [:moisture_filled, :rgpot2]
 
 # Define neural network
-NN = Chain(Dense(2, 15, relu), Dense(15, 15, relu), Dense(15, 1));
+NN = Chain(Lux.BatchNorm(2, affine = false), Dense(2, 15, relu), Dense(15, 15, relu), Dense(15, 1));
 # instantiate Hybrid Model
 RbQ10 = RespirationRbQ10(NN, predictor_names, forcing_names, target_names, 2.5f0) # ? do different initial Q10s
 # train model
 out = train(RbQ10, ds_keyed, (:Q10,); nepochs = 100, batchsize = 512, opt = Adam(0.01), monitor_names = [:Rb]);
 
+out.st
+mean(ds_keyed(:moisture_filled))
+mean(ds_keyed(:rgpot2)) # BatchNorm works as expected
+
+# ? with AutoEnzyme()
+# ! KeyedArrays failed!
+using DimensionalData, ChainRulesCore
+# mat = Matrix(df)'
+mat = Array(Matrix(df)')
+da = DimArray(mat, (Dim{:col}(Symbol.(names(df))), Dim{:row}(1:size(df, 1))))
+
+out = train(
+    RbQ10, da, (:Q10,); nepochs = 100, batchsize = 512, opt = Adam(0.01),
+    autodiff_backend = AutoEnzyme(),
+    monitor_names = [:Rb]
+);
+
 # test custom loss function, keyword arguments
 function pinball(ŷ, y; τ = 0.9)
     r = ŷ .- y
diff --git a/projects/book_chapter/example_synthetic.jl b/projects/book_chapter/example_synthetic.jl
index a60c4cdf..50505483 100644
--- a/projects/book_chapter/example_synthetic.jl
+++ b/projects/book_chapter/example_synthetic.jl
@@ -14,6 +14,10 @@ using Pkg
 # Set project path and activate environment
 project_path = "projects/book_chapter"
 Pkg.activate(project_path)
+EasyHybrid_path = joinpath(pwd())
+Pkg.develop(path = EasyHybrid_path)
+#Pkg.resolve()
+#Pkg.instantiate()
 
 using EasyHybrid
 
@@ -55,7 +59,6 @@ parameters = (
 # =============================================================================
 # Define input variables
 forcing = [:ta]                    # Forcing variables (temperature)
-predictors = [:sw_pot, :dsw_pot]   # Predictor variables (solar radiation, and its derivative)
 
 # Target variable
 target = [:reco]                   # Target variable (respiration)
@@ -65,11 +68,14 @@ global_param_names = [:Q10]        # Global parameters (same for all samples)
 neural_param_names = [:rb]         # Neural network predicted parameters
 
 # =============================================================================
-# Construct the Hybrid Model
+# Single NN Hybrid Model Training
 # =============================================================================
-# Create hybrid model using the unified constructor
-hybrid_model = constructHybridModel(
-    predictors,              # Input features
+using WGLMakie
+# Create single NN hybrid model using the unified constructor
+predictors_single_nn = [:sw_pot, :dsw_pot]   # Predictor variables (solar radiation, and its derivative)
+
+single_nn_hybrid_model = constructHybridModel(
+    predictors_single_nn,              # Input features
     forcing,                 # Forcing variables
     target,                  # Target variables
     RbQ10,                  # Process-based model function
@@ -79,26 +85,81 @@ hybrid_model = constructHybridModel(
     hidden_layers = [16, 16], # Neural network architecture
     activation = sigmoid,      # Activation function
     scale_nn_outputs = true, # Scale neural network outputs
-    input_batchnorm = false   # Apply batch normalization to inputs
+    input_batchnorm = true   # Apply batch normalization to inputs
+)
+# Train the hybrid model
+single_nn_out = train(
+    single_nn_hybrid_model,
+    ds,
+    ();
+    nepochs = 10,           # Number of training epochs
+    batchsize = 512,         # Batch size for training
+    opt = AdamW(0.1),   # Optimizer and learning rate
+    monitor_names = [:rb, :Q10], # Parameters to monitor during training
+    yscale = identity,       # Scaling for outputs
+    shuffleobs = true
 )
 
+LuxCore.testmode(single_nn_out.st)
+mean(ds.dsw_pot)
+mean(ds.sw_pot)
+
 # =============================================================================
-# Model Training
+# Multi NN Hybrid Model Training
 # =============================================================================
-using WGLMakie
+predictors_multi_nn = (rb = [:sw_pot, :dsw_pot],)
 
-# Train the hybrid model
-out = train(
-    hybrid_model,
+multi_nn_hybrid_model = constructHybridModel(
+    predictors_multi_nn,              # Input features
+    forcing,                 # Forcing variables
+    target,                  # Target variables
+    RbQ10,                  # Process-based model function
+    parameters,              # Parameter definitions
+    global_param_names,      # Global parameters
+    hidden_layers = [16, 16], # Neural network architecture
+    activation = sigmoid,      # Activation function
+    scale_nn_outputs = true, # Scale neural network outputs
+    input_batchnorm = true   # Apply batch normalization to inputs
+)
+
+multi_nn_out = train(
+    multi_nn_hybrid_model,
     ds,
     ();
-    nepochs = 100,           # Number of training epochs
+    nepochs = 10,           # Number of training epochs
     batchsize = 512,         # Batch size for training
     opt = AdamW(0.1),   # Optimizer and learning rate
     monitor_names = [:rb, :Q10], # Parameters to monitor during training
-    yscale = identity       # Scaling for outputs
+    yscale = identity,       # Scaling for outputs
+    shuffleobs = true
 )
 
+LuxCore.testmode(multi_nn_out.st)
+mean(ds.dsw_pot)
+mean(ds.sw_pot)
+
+# =============================================================================
+# Pure ML Single NN Model Training
+# =============================================================================
+
+# TODO does not train well build on top of SingleNNHybridModel
+predictors_single_nn_ml = [:sw_pot, :dsw_pot, :ta]
+
+single_nn_model = constructNNModel(predictors_single_nn_ml, target; input_batchnorm = true, activation = tanh)
+single_nn_out = train(single_nn_model, ds, (); nepochs = 10, batchsize = 512, opt = AdamW(0.01), yscale = identity, shuffleobs = true)
+LuxCore.testmode(single_nn_out.st)
+mean(ds.dsw_pot)
+mean(ds.sw_pot)
+
+single_nn_model.targets
+
+# =============================================================================
+# Pure ML Multi NN Model Training
+# =============================================================================
+
+# TODO does not train well build on top of MultiNNHybridModel
+
+
 # =============================================================================
 # Results Analysis
 # =============================================================================
diff --git a/src/EasyHybrid.jl b/src/EasyHybrid.jl
index 11149668..9cccc784 100644
--- a/src/EasyHybrid.jl
+++ b/src/EasyHybrid.jl
@@ -30,6 +30,7 @@ using Reexport: @reexport
 using Statistics: Statistics, mean, cor, quantile, var
 using StyledStrings: StyledStrings, @styled_str
 using Zygote: Zygote
+using Static: False, True
 
 @reexport begin
     import LuxCore
diff --git a/src/models/BulkDensity_SOC.jl b/src/models/BulkDensity_SOC.jl
deleted file mode 100644
index 4aee2beb..00000000
--- a/src/models/BulkDensity_SOC.jl
+++ /dev/null
@@ -1,64 +0,0 @@
-export BulkDensitySOC
-export compute_bulk_density
-
-"""
-    BulkDensitySOC(NN, predictors, targets, oBD)
-
-A hybrid model with a neural network `NN`, `predictors` and one global parameter oBD.
-"""
-struct BulkDensitySOC{D, T1, T2, T3} <: LuxCore.AbstractLuxContainerLayer{(:NN, :predictors, :targets, :oBD)}
-    NN
-    predictors # names of predictors
-    targets # names of targets
-    oBD # organic matter bulk density
-    function BulkDensitySOC(NN::D, predictors::T1, targets::T2, oBD::T3) where {D, T1, T2, T3}
-        return new{D, T1, T2, T3}(NN, collect(predictors), collect(targets), [oBD])
-    end
-end
-
-# ? oBD is a parameter, so expand the initialparameters!
-function LuxCore.initialparameters(::AbstractRNG, layer::BulkDensitySOC)
-    ps, _ = LuxCore.setup(Random.default_rng(), layer.NN)
-    return (; ps, oBD = layer.oBD)
-end
-
-function LuxCore.initialstates(::AbstractRNG, layer::BulkDensitySOC)
-    _, st = LuxCore.setup(Random.default_rng(), layer.NN)
-    return (; st)
-end
-
-
-"""
-    compute_bulk_density(SOCconc, oBD, mBD)
-
-# model for bulk density based on the Federer (1993) paper http://dx.doi.org/10.1139/x93-131 plus SOC concentrations, density and coarse fraction
-"""
-function compute_bulk_density(SOCconc, oBD, mBD)
-    oF = SOCconc .* 1.724  # TODO: has to be a ratio
-    BD = @. oBD * mBD / (oF * mBD + (1.0f0 - oF) * oBD)
-    return BD
-end
-
-
-"""
-    BulkDensitySOC(NN, predictors, oBD)(ds_k)
-
-# Hybrid model for bulk density based on the Federer (1993) paper http://dx.doi.org/10.1139/x93-131 plus SOC concentrations, density and coarse fraction
-"""
-function (hm::BulkDensitySOC)(ds_p, ps, st::NamedTuple)
-    p = ds_p(hm.predictors)
-
-    out, stSOC = LuxCore.apply(hm.NN, p, ps.ps, st.st)
-
-    SOCconc = out[1, :] .* 0.6f0 #TODO has to be a ratio
-    CF = out[2, :]
-    mBD = out[3, :] .* (1.5f0 - 0.75f0) .+ 0.75f0 # mineral bulk density
-
-    oBD = sigmoid(ps.oBD) .* (0.4f0 - 0.05f0) .+ 0.05f0
-
-    BD = compute_bulk_density(SOCconc, oBD, mBD)
-
-    SOCdensity = @. SOCconc * BD * (1 - CF)
-
-    return (; SOCconc, CF, BD, SOCdensity, mBD), (; st = (; st = stSOC)) # removed oBD from here since its logged via ps.oBD
-end
diff --git a/src/models/GenericHybridModel.jl b/src/models/GenericHybridModel.jl
index efe9d856..b7fb01dd 100644
--- a/src/models/GenericHybridModel.jl
+++ b/src/models/GenericHybridModel.jl
@@ -34,7 +34,13 @@ end
 
 # ───────────────────────────────────────────────────────────────────────────
 # Single NN Hybrid Model Structure (optimized for performance)
-struct SingleNNHybridModel
+struct SingleNNHybridModel <: LuxCore.AbstractLuxContainerLayer{
+        (
+            :NN, #:predictors, :forcing, :targets,
+            #:mechanistic_model, :parameters, :neural_param_names, :global_param_names, :fixed_param_names,
+            #:scale_nn_outputs, :start_from_default,
+        ),
+    }
     NN::Chain
     predictors::Vector{Symbol}
     forcing::Vector{Symbol}
@@ -49,7 +55,14 @@ struct SingleNNHybridModel
 end
 
 # Multi-NN Hybrid Model Structure (optimized for performance)
-struct MultiNNHybridModel
+struct MultiNNHybridModel <: LuxCore.AbstractLuxContainerLayer{
+        (
+            :NNs, #:predictors, :forcing, :targets,
+            # :mechanistic_model, :parameters, :neural_param_names, :global_param_names, :fixed_param_names,
+            # :scale_nn_outputs, :start_from_default,
+        ),
+    }
+
     NNs::NamedTuple
     predictors::NamedTuple
     forcing::Vector{Symbol}
@@ -256,7 +269,7 @@ function LuxCore.initialstates(rng::AbstractRNG, m::SingleNNHybridModel)
         end
     end
 
-    nt = (; st = st_nn, fixed = nt)
+    nt = (; st_nn = st_nn, fixed = nt)
     return nt
 end
 
@@ -344,7 +357,7 @@ function (m::SingleNNHybridModel)(ds_k::KeyedArray, ps, st)
 
     # 3) scale NN parameters (handle empty case)
     if !isempty(m.neural_param_names)
-        nn_out, st_NN = LuxCore.apply(m.NN, predictors, ps.ps, st.st)
+        nn_out, st_nn = LuxCore.apply(m.NN, predictors, ps.ps, st.st_nn)
         nn_cols = eachrow(nn_out)
         nn_params = NamedTuple(zip(m.neural_param_names, nn_cols))
 
@@ -360,7 +373,7 @@ function (m::SingleNNHybridModel)(ds_k::KeyedArray, ps, st)
         scaled_nn_params = NamedTuple(zip(m.neural_param_names, scaled_nn_vals))
     else
         scaled_nn_params = NamedTuple()
-        st_NN = st.st
+        st_nn = st.st_nn
     end
 
     # 4) pick fixed parameters (handle empty case)
@@ -382,9 +395,9 @@ function (m::SingleNNHybridModel)(ds_k::KeyedArray, ps, st)
     y_pred = m.mechanistic_model(; all_kwargs...)
 
     out = (; y_pred..., parameters = all_params)
-    st_new = (; st = st_NN, fixed = st.fixed)
+    st_new = (; st_nn = st_nn, fixed = st.fixed)
 
-    return out, (; st = st_new)
+    return out, st_new
 end
 
 function (m::SingleNNHybridModel)(df::DataFrame, ps, st)
@@ -481,7 +494,7 @@ function (m::MultiNNHybridModel)(ds_k::KeyedArray, ps, st)
 
     st_new = (; nn_states..., fixed = st.fixed)
 
-    return out, (; st = st_new)
+    return out, st_new
 end
 
 function (m::MultiNNHybridModel)(df::DataFrame, ps, st)
diff --git a/src/models/NNModels.jl b/src/models/NNModels.jl
index 37ee18f2..d1ec716f 100644
--- a/src/models/NNModels.jl
+++ b/src/models/NNModels.jl
@@ -5,7 +5,11 @@ using ..EasyHybrid: hard_sigmoid
 
 # Pure Neural Network Models (no mechanistic component)
 
-struct SingleNNModel
+struct SingleNNModel <: LuxCore.AbstractLuxContainerLayer{
+        (
+            :NN, :predictors, :targets, :scale_nn_outputs,
+        ),
+    }
     NN::Chain
     predictors::Vector{Symbol}
     targets::Vector{Symbol}
@@ -98,7 +102,11 @@ function constructNNModel(
 end
 
 # MultiNNModel remains as before
-struct MultiNNModel
+struct MultiNNModel <: LuxCore.AbstractLuxContainerLayer{
+        (
+            :NNs, :predictors, :targets, :scale_nn_outputs,
+        ),
+    }
     NNs::NamedTuple
     predictors::NamedTuple
     targets::Vector{Symbol}
@@ -145,7 +153,7 @@ end
 # LuxCore initial states for SingleNNModel
 function LuxCore.initialstates(rng::AbstractRNG, m::SingleNNModel)
     _, st_nn = LuxCore.setup(rng, m.NN)
-    nt = (; st = st_nn)
+    nt = (; st_nn = st_nn)
     return nt
 end
 
@@ -163,7 +171,7 @@ end
 # Forward pass for SingleNNModel
 function (m::SingleNNModel)(ds_k, ps, st)
     predictors = ds_k(m.predictors)
-    nn_out, st_NN = LuxCore.apply(m.NN, predictors, ps.ps, st.st)
+    nn_out, st_nn = LuxCore.apply(m.NN, predictors, ps.ps, st.st_nn)
     nn_cols = eachrow(nn_out)
     nn_params = NamedTuple(zip(m.targets, nn_cols))
     if m.scale_nn_outputs
@@ -174,8 +182,8 @@ function (m::SingleNNModel)(ds_k, ps, st)
     scaled_nn_params = NamedTuple(zip(m.targets, scaled_nn_vals))
 
     out = (; scaled_nn_params...)
-    st_new = (; st = st_NN)
-    return out, (; st = st_new)
+    st_new = (; st_nn = st_nn)
+    return out, st_new
 end
 
 # Forward pass for MultiNNModel
@@ -206,17 +214,17 @@ function (m::MultiNNModel)(ds_k, ps, st)
     end
     out = (; scaled_nn_params..., nn_outputs = nn_outputs)
     st_new = (; nn_states...)
-    return out, (; st = st_new)
+    return out, st_new
 end
 
 # Display functions
-function Base.display(m::SingleNNModel)
+function Base.show(io::IO, ::MIME"text/plain", m::SingleNNModel)
     println("Neural Network: ", m.NN)
     println("Predictors: ", m.predictors)
     return println("scale NN outputs: ", m.scale_nn_outputs)
 end
 
-function Base.display(m::MultiNNModel)
+function Base.show(io::IO, ::MIME"text/plain", m::MultiNNModel)
     println("Neural Networks:")
     for (name, nn) in pairs(m.NNs)
         println("  $name: ", nn)
diff --git a/src/models/Respiration_Rb_Q10.jl b/src/models/Respiration_Rb_Q10.jl
index 3a8e834b..a0cf0d5a 100644
--- a/src/models/Respiration_Rb_Q10.jl
+++ b/src/models/Respiration_Rb_Q10.jl
@@ -51,13 +51,12 @@ ŷ (respiration rate) is computed as a function of the neural network output `R
 function (hm::RespirationRbQ10)(ds_k, ps, st::NamedTuple)
     p = ds_k(hm.predictors)
     x = Array(ds_k(hm.forcing)) # don't propagate names after this
-
     Rb, stQ10 = LuxCore.apply(hm.NN, p, ps.ps, st.st) #! NN(αᵢ(t)) ≡ Rb(T(t), M(t))
 
     #TODO output name flexible - could be R_soil, heterotrophic, autotrophic, etc.
     R_soil = mRbQ10(Rb, ps.Q10, x, 15.0f0) # ? should 15°C be the reference temperature also an input variable?
 
-    return (; R_soil, Rb), (; st = (; st = stQ10))
+    return (; R_soil, Rb), (; st = stQ10)
 end
 
 function (hm::RespirationRbQ10)(ds_k::AbstractDimArray, ps, st::NamedTuple)
@@ -69,5 +68,5 @@ function (hm::RespirationRbQ10)(ds_k::AbstractDimArray, ps, st::NamedTuple)
     #TODO output name flexible - could be R_soil, heterotrophic, autotrophic, etc.
     R_soil = mRbQ10(Rb, ps.Q10, x, 15.0f0) # ? should 15°C be the reference temperature also an input variable?
 
-    return (; R_soil, Rb), (; st = (; st = stQ10))
+    return (; R_soil, Rb), (; st = stQ10)
 end
diff --git a/src/models/models.jl b/src/models/models.jl
index 57bbcd41..82691b2d 100644
--- a/src/models/models.jl
+++ b/src/models/models.jl
@@ -9,7 +9,6 @@ abstract type EasyHybridModels end
 # include("SinusHybridModel.jl")
 include("LinearHM.jl")
 include("Respiration_Rb_Q10.jl")
-include("BulkDensity_SOC.jl")
 include("Rs_components.jl")
 include("simple_Rb_Q10_PBM.jl")
 include("GenericHybridModel.jl")
diff --git a/src/train.jl b/src/train.jl
index 61757f38..9eee122c 100644
--- a/src/train.jl
+++ b/src/train.jl
@@ -69,7 +69,8 @@ function train(
         batchsize = 64,
         opt = AdamW(0.01),
         patience = typemax(Int),
-
+        autodiff_backend = AutoZygote(),
+        return_gradients = True(),
         # Loss and evaluation
         training_loss = :mse,
         loss_types = [:mse, :r2],
@@ -120,8 +121,8 @@ function train(
     else
         ps, st = get_ps_st(train_from)
     end
-
-    opt_state = Optimisers.setup(opt, ps)
+    ps = ComponentArray(ps)
+    train_state = Lux.Training.TrainState(hybridModel, ps, st, opt)
 
     # ? initial losses
     is_no_nan_t = .!isnan.(y_train)
@@ -193,24 +194,28 @@ function train(
     @info "Check the saved output (.png, .mp4, .jld2) from training at: $(tmp_folder)"
 
     prog = Progress(nepochs, desc = "Training loss", enabled = show_progress)
+    loss(hybridModel, ps, st, (x, y)) = lossfn(
+        hybridModel, ps, st, (x, y);
+        logging = LoggingLoss(train_mode = true, loss_types = loss_types, training_loss = training_loss, agg = agg)
+    )
     maybe_record_history(!isnothing(ext), fig, joinpath(tmp_folder, "training_history_$(hybrid_name).mp4"); framerate = 24) do io
         for epoch in 1:nepochs
             for (x, y) in train_loader
                 # ? check NaN indices before going forward, and pass filtered `x, y`.
                 is_no_nan = .!isnan.(y)
                 if length(is_no_nan) > 0 # ! be careful here, multivariate needs fine tuning
-                    l, backtrace = Zygote.pullback(
-                        (ps) -> lossfn(
-                            hybridModel, x, (y, is_no_nan), ps, st,
-                            LoggingLoss(training_loss = training_loss, agg = agg)
-                        ), ps
+                    # ? let's keep grads, they might be useful for mixed gradient methods
+                    grads, loss_val, stats, train_state = Lux.Training.single_train_step!(
+                        autodiff_backend, loss, (x, (y, is_no_nan)), train_state;
+                        return_gradients
                     )
-                    grads = backtrace(l)[1]
-                    Optimisers.update!(opt_state, ps, grads)
-                    st = (; l[2].st...)
                 end
             end
 
+            # sync ps and st from train_state
+            ps = train_state.parameters
+            st = train_state.states
+
             ps_values = [copy(getproperty(ps, e)[1]) for e in save_ps]
             tmp_e = NamedTuple{save_ps}(ps_values)
 
@@ -355,10 +360,7 @@ function train(
 end
 
 function evaluate_acc(ghm, x, y, y_no_nan, ps, st, loss_types, training_loss, agg)
-    loss_val, sts, ŷ = lossfn(
-        ghm, x, (y, y_no_nan), ps, LuxCore.testmode(st),
-        LoggingLoss(train_mode = false, loss_types = loss_types, training_loss = training_loss, agg = agg)
-    )
+    loss_val, sts, ŷ = lossfn(ghm, ps, st, (x, (y, y_no_nan)), logging = LoggingLoss(train_mode = false, loss_types = loss_types, training_loss = training_loss, agg = agg))
     return loss_val, sts, ŷ
 end
 function maybe_record_history(block, should_record, fig, output_path; framerate = 24)
diff --git a/src/utils/logging_loss.jl b/src/utils/logging_loss.jl
index 27873d31..b3c85830 100644
--- a/src/utils/logging_loss.jl
+++ b/src/utils/logging_loss.jl
@@ -94,31 +94,20 @@ Main loss function for hybrid models that handles both training and evaluation m
 - In evaluation mode (`logging.train_mode = false`):
   - `(loss_values, st, ŷ)`: NamedTuple of losses, state and predictions
 """
-function lossfn(HM::LuxCore.AbstractLuxContainerLayer, x, (y_t, y_nan), ps, st, logging::LoggingLoss)
+function lossfn(HM::LuxCore.AbstractLuxContainerLayer, ps, st, (x, (y_t, y_nan)); logging::LoggingLoss)
     targets = HM.targets
-    ŷ, y, y_nan, st = get_predictions_targets(HM, x, (y_t, y_nan), ps, st, targets)
     if logging.train_mode
+        ŷ, y, y_nan, st = get_predictions_targets(HM, x, (y_t, y_nan), ps, st, targets)
         loss_value = compute_loss(ŷ, y, y_nan, targets, logging.training_loss, logging.agg)
-        return loss_value, st
+        stats = NamedTuple()
     else
+        ŷ, y, y_nan, _ = get_predictions_targets(HM, x, (y_t, y_nan), ps, LuxCore.testmode(st), targets)
         loss_value = compute_loss(ŷ, y, y_nan, targets, logging.loss_types, logging.agg)
-        return loss_value, st, ŷ
+        stats = (; ŷ...)
     end
+    return loss_value, st, stats
 end
 
-function lossfn(HM::Union{SingleNNHybridModel, MultiNNHybridModel, SingleNNModel, MultiNNModel}, x, (y_t, y_nan), ps, st, logging::LoggingLoss)
-    targets = HM.targets
-    ŷ, y, y_nan, st = get_predictions_targets(HM, x, (y_t, y_nan), ps, st, targets)
-    if logging.train_mode
-        loss_value = compute_loss(ŷ, y, y_nan, targets, logging.training_loss, logging.agg)
-        return loss_value, st
-    else
-        loss_value = compute_loss(ŷ, y, y_nan, targets, logging.loss_types, logging.agg)
-        return loss_value, st, ŷ
-    end
-end
-
-
 """
     get_predictions_targets(HM, x, (y_t, y_nan), ps, st, targets)
 
diff --git a/src/utils/show_generic.jl b/src/utils/show_generic.jl
index 9dda9cc2..d7c0d08f 100644
--- a/src/utils/show_generic.jl
+++ b/src/utils/show_generic.jl
@@ -5,7 +5,7 @@ function print_key_value(io, key, value; color = :light_red)
     return println(io)
 end
 
-function Base.show(io::IO, pc::ParameterContainer)
+function Base.show(io::IO, ::MIME"text/plain", pc::ParameterContainer)
     table = pc.table
     println(io)
     return PrettyTables.pretty_table(
@@ -16,7 +16,7 @@ function Base.show(io::IO, pc::ParameterContainer)
     )
 end
 
-function Base.show(io::IO, hm::SingleNNHybridModel)
+function Base.show(io::IO, ::MIME"text/plain", hm::SingleNNHybridModel)
     println(io, "Neural Network:")
     show(IndentedIO(io), MIME"text/plain"(), hm.NN)
     println(io)
@@ -32,7 +32,7 @@ function Base.show(io::IO, hm::SingleNNHybridModel)
     return show(IndentedIO(io), MIME"text/plain"(), hm.parameters)
 end
 
-function Base.show(io::IO, hm::MultiNNHybridModel)
+function Base.show(io::IO, ::MIME"text/plain", hm::MultiNNHybridModel)
     printstyled(io, "Neural Networks:"; color = :light_yellow)
     println(io)
     for (name, nn) in pairs(hm.NNs)
diff --git a/test/Project.toml b/test/Project.toml
index cdb79376..f2051aa7 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -5,7 +5,11 @@ ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
 EasyHybrid = "61bb816a-e6af-4913-ab9e-91bff2e122e3"
+ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[sources]
+EasyHybrid = {path = ".."}
diff --git a/test/runtests.jl b/test/runtests.jl
index f56852f5..540d16a3 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -11,7 +11,7 @@ dk_twos = gen_linear_data_2outputs()
 include("test_generic_hybrid_model.jl")
 # Include SplitData tests
 include("test_split_data_train.jl")
-
+include("test_autodiff_backend.jl")
 
 @testset "LinearHM" begin
     # test model instantiation
diff --git a/test/test_autodiff_backend.jl b/test/test_autodiff_backend.jl
new file mode 100644
index 00000000..4eca0bfa
--- /dev/null
+++ b/test/test_autodiff_backend.jl
@@ -0,0 +1,41 @@
+using ForwardDiff
+# using Mooncake
+# using Enzyme
+
+@testset "autodiff backends" verbose = true begin
+    df = make_synth_df(32)  # keep it small/fast
+
+    forcing = [:ta]
+    predictors = [:sw_pot, :dsw_pot]
+    target = [:reco]
+    global_param_names = [:Q10]
+    neural_param_names = [:rb]
+
+    model = constructHybridModel(
+        predictors, forcing, target, RbQ10,
+        RbQ10_PARAMS, neural_param_names, global_param_names
+    )
+
+    ka = prepare_data(model, df)
+
+    _BACKENDS_SPEC = (
+        # ("EnzymeConst",  AutoEnzyme(; function_annotation = Enzyme.Const)),
+        ("ForwardDiff", AutoForwardDiff()),
+        # ("Mooncake", AutoMooncake(; config = nothing)), # ? it needs special rrules
+        ("Zygote", AutoZygote()),
+    )
+    for (backend_name, backend_fn) in _BACKENDS_SPEC
+        @testset "backend: $backend_name" begin
+            out = train(
+                model, ka, ();
+                nepochs = 1,
+                batchsize = 12,
+                plotting = false,
+                show_progress = false,
+                hybrid_name = "test_$(backend_name)",
+                autodiff_backend = backend_fn,
+            )
+            @test !isnothing(out)
+        end
+    end
+end
diff --git a/test/test_generic_hybrid_model.jl b/test/test_generic_hybrid_model.jl
index efbe8fb0..e4e52c15 100644
--- a/test/test_generic_hybrid_model.jl
+++ b/test/test_generic_hybrid_model.jl
@@ -208,7 +208,7 @@ end
         rng = Random.default_rng()
         st = LuxCore.initialstates(rng, model)
 
-        @test haskey(st, :st)    # Neural network states
+        @test haskey(st, :st_nn)    # Neural network states
         @test haskey(st, :fixed) # Fixed parameters
         @test haskey(st.fixed, :c)
         @test length(st.fixed.c) == 1
@@ -247,8 +247,8 @@ end
         @test haskey(output.parameters, :a)
         @test haskey(output.parameters, :b)
         @test haskey(output.parameters, :c)
-        @test haskey(new_st, :st)
-        @test haskey(new_st.st, :fixed)
+        @test haskey(new_st, :st_nn)
+        @test haskey(new_st, :fixed)
     end
 
     @testset "SingleNNHybridModel with scale_nn_outputs=true" begin
@@ -413,9 +413,9 @@ end
         @test haskey(output.parameters, :b)
         @test haskey(output.parameters, :c)
         @test haskey(output.parameters, :d)
-        @test haskey(new_st.st, :a)
-        @test haskey(new_st.st, :d)
-        @test haskey(new_st.st, :fixed)
+        @test haskey(new_st, :a)
+        @test haskey(new_st, :d)
+        @test haskey(new_st, :fixed)
     end
 
     @testset "MultiNNHybridModel with scale_nn_outputs=true" begin