diff --git a/.gitignore b/.gitignore index e760fcbf..cc53de42 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,6 @@ Manifest.toml .DS_Store *.csv *.nc -/projects/BulkDSOC/eval/ build/ node_modules/ diff --git a/Project.toml b/Project.toml index c474af24..f2df38d5 100644 --- a/Project.toml +++ b/Project.toml @@ -27,6 +27,7 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" +Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" @@ -60,6 +61,7 @@ OptimizationOptimisers = "0.3.7" PrettyTables = "3.1.2" ProgressMeter = "1.10.4" Reexport = "1.2.2" +Static = "1.3.1" StyledStrings = "1.0.3, 1.11.0" Zygote = "0.7.7" julia = "1.10" diff --git a/projects/BulkDSOC/BulkDensitySOC.jl b/projects/BulkDSOC/BulkDensitySOC.jl deleted file mode 100644 index 02f40135..00000000 --- a/projects/BulkDSOC/BulkDensitySOC.jl +++ /dev/null @@ -1,135 +0,0 @@ -# CC BY-SA 4.0 -using Pkg -Pkg.activate("projects/BulkDSOC") -Pkg.develop(path = pwd()) -Pkg.instantiate() - -using Revise -using EasyHybrid -using EasyHybrid.Lux -# plotting -using GLMakie -using AlgebraOfGraphics -import Plots as pl - -# ? move the `csv` file into the `BulkDSOC/data` folder (create folder) -df = CSV.read(joinpath(@__DIR__, "./data/lucas_preprocessed.csv"), DataFrame, normalizenames = true) -println(size(df)) -df_d = dropmissing(df) -println(size(df_d)) - -target_names = [:BD, :SOCconc, :CF, :SOCdensity] - - -names_cov = Symbol.(names(df_d))[4:(end - 1)] -#names_cov = Symbol.(names(df_d))[end-40:end-1] -ds_all = to_keyedArray(df_d); - - -ds_p = ds_all(names_cov); -ds_t = ds_all(target_names) - -length(ds_t) * 10 # number of samples as guidance for number of parameters -nfeatures = length(names_cov) -p_dropout = 0.2 -NN = Lux.Chain( - Dense(nfeatures, 256, Lux.sigmoid), - Dropout(p_dropout), - Dense(256, 128, Lux.sigmoid), - Dropout(p_dropout), - Dense(128, 64, Lux.sigmoid), - Dropout(p_dropout), - Dense(64, 32, Lux.sigmoid), - Dropout(p_dropout), - Dense(32, 3, Lux.sigmoid) # Output layer -) -# ? we might need to set output bounds for the expected parameter values - -# ? do different initial oBDs -BulkDSOC = BulkDensitySOC(NN, names_cov, target_names, 1.0f0) - -ps, st = LuxCore.setup(Random.default_rng(), BulkDSOC) -# the Tuple `ds_p, ds_t` is later used for batching in the `dataloader`. -ds_t_nan = .!isnan.(ds_t) - -ls = EasyHybrid.lossfn(BulkDSOC, ds_p, (ds_t, ds_t_nan), ps, LuxCore.testmode(st), LoggingLoss(train_mode = false)) # testmode to switch of dropout - -println(length(names_cov)) -out = train(BulkDSOC, (ds_p, ds_t), (:oBD,); nepochs = 100, batchsize = 32, opt = AdaMax(0.01)); - -# plot train history -series(WrappedTuples(out.train_history.mse); axis = (; xlabel = "epoch", ylabel = "loss", xscale = log10, yscale = log10)) - -# physical parameter -series(out.ps_history; axis = (; xlabel = "epoch", ylabel = "")) - -# plot trained bulk density function -trained_oBD = out.ps_history.oBD[end] # taking the last one -trained_mBD = out.train_diffs.mBD - -SOCrange = range(0.0, 0.5; step = 0.01) -median_BDc = compute_bulk_density(SOCrange, trained_oBD, median(trained_mBD)) -q25_BDc = compute_bulk_density(SOCrange, trained_oBD, quantile(trained_mBD, 0.25)) -q75_BDc = compute_bulk_density(SOCrange, trained_oBD, quantile(trained_mBD, 0.75)) - -pl.plot(ds_t(:SOCconc), ds_t(:BD), seriestype = :scatter, ylabel = :BD, xlabel = :SOCconc) -pl.plot!(collect(SOCrange), median_BDc, width = 4.0, label = "q50") -pl.plot!(collect(SOCrange), q25_BDc, width = 4.0, label = "q25") -pl.plot!(collect(SOCrange), q75_BDc, width = 4.0, label = "q75") - -pl.scatter(out.train_obs_pred[!, :SOCconc], out.train_obs_pred[!, :SOCconc_pred]) -pl.scatter(out.train_obs_pred[!, :BD], out.train_obs_pred[!, :BD_pred]) -pl.scatter(out.train_obs_pred[!, :CF], out.train_obs_pred[!, :CF_pred]) -pl.scatter(out.train_obs_pred[!, :SOCdensity], out.train_obs_pred[!, :SOCdensity_pred]) - -# AoG -yvars = target_names # [:BD, :SOCconc, :CF, :SOCdensity] -xvars = Symbol.(string.(yvars) .* "_pred") - -layers = visual(Scatter, alpha = 0.35) -plt = data(out.train_obs_pred) * layers * mapping(xvars, yvars, col = dims(1) => renamer(string.(yvars))) -plt *= mapping(color = dims(1) => renamer(string.(xvars)) => "Metrics") -# linear -l_linear = linear() * visual(color = :grey25) -plt += data(out.train_obs_pred) * l_linear * mapping(xvars, yvars, col = dims(1) => renamer(string.(yvars))) - -with_theme(theme_light()) do - draw( - plt, scales( - X = (; label = rich("Prediction", font = :bold)), - Y = (; label = "Observation"), - Color = (; palette = [:tomato, :teal, :orange, :dodgerblue3]) - ), - # legend = (; show = false), - legend = (; position = :bottom, titleposition = :left, merge = false), - facet = (; linkxaxes = :none, linkyaxes = :none), - figure = (; size = (1400, 400)) - ) -end - -with_theme(theme_light()) do - fig = Figure(; size = (1200, 300)) - ax = Makie.Axis( - fig[1, 1], title = "Loss", - # yscale=log10, - xscale = log10 - ) - lines!(ax, WrappedTuples(out.train_history.mse).sum, color = :orangered, label = "train") - lines!(ax, WrappedTuples(out.val_history.mse).sum, color = :dodgerblue, label = "validation") - # limits!(ax, 1, 1000, 0.04, 1) - axislegend() - fig -end - -layers = visual(Scatter, alpha = 0.65) -plt = data(out.val_obs_pred) * layers * mapping(:index, [:BD, :BD_pred]) * - mapping(color = dims(1) => renamer(string.([:BD, :BD_pred]))) - -with_theme(theme_light()) do - colors = ["a" => :tomato, "c" => :lime] - draw( - plt, scales(Color = (; palette = [:grey25, :tomato])), - legend = (position = :top,), - figure = (; size = (1400, 400)) - ) -end diff --git a/projects/BulkDSOC/DataPreProcess.jl b/projects/BulkDSOC/DataPreProcess.jl deleted file mode 100644 index 797f0fea..00000000 --- a/projects/BulkDSOC/DataPreProcess.jl +++ /dev/null @@ -1,114 +0,0 @@ -# CC BY-SA 4.0 -using Revise -using EasyHybrid -using EasyHybrid.MLUtils - -# ? move the `csv` file into the `BulkDSOC/data` folder (create folder) -df_o = CSV.read(joinpath(@__DIR__, "./data/lucas_overlaid.csv"), DataFrame, normalizenames = true) -println(size(df_o)) - -coords = collect(zip(df_o.lat, df_o.lon)) - -using Rasters - -zarr_path = "/Net/Groups/BGI/scratch/HYCO/npp.zarr/" -data = Raster(zarr_path) - -# t clean covariates -names_cov = Symbol.(names(df_o))[19:end] -names_meta = Symbol.(names(df_o))[1:18] - -# Fix soilsuite and cropland extent columns -for col in names_cov - if occursin("_soilsuite_", String(col)) - df_o[!, col] = replace(df_o[!, col], missing => 0) - elseif occursin("cropland_extent_", String(col)) - df_o[!, col] = replace(df_o[!, col], missing => 0) - df_o[!, col] .= ifelse.(df_o[!, col] .> 0, 1, 0) - end -end - -# rm missing values: 1. >5%, drop col; 2. <=5%, drop row -cols_to_drop_row = Symbol[] -cols_to_drop_col = Symbol[] -for col in names_cov - n_missing = count(ismissing, df_o[!, col]) - frac_missing = n_missing / nrow(df_o) - if frac_missing > 0.05 - println(n_missing, " ", col) - select!(df_o, Not(col)) # drop the column - push!(cols_to_drop_col, col) - elseif n_missing > 0 - # println(n_missing, " ", col) - push!(cols_to_drop_row, col) # collect column name - end - - if occursin("CHELSA_kg", String(col)) - push!(cols_to_drop_col, col) - select!(df_o, Not(col)) # rm kg catagorical col - end -end - -names_cov = filter(x -> !(x in cols_to_drop_col), names_cov) # remove cols-to-drop from names_cov -if !isempty(cols_to_drop_row) - df_o = subset(df_o, cols_to_drop_row .=> ByRow(!ismissing)) # drop rows with missing values in cols_to_drop_row -end -println(size(df_o)) - -cols_to_drop_col = Symbol[] -for col in names_cov - if std(df_o[:, col]) == 0 - push!(cols_to_drop_col, col) # rm constant col (std==0) - select!(df_o, Not(col)) - end -end -names_cov = filter(x -> !(x in cols_to_drop_col), names_cov) # remove cols-to-drop from names_cov -println(size(df_o)) - - -df = df_o[:, [:bulk_density_fe, :soc, :coarse_vol, names_cov...]] - -# ? match target_names -rename!(df, :bulk_density_fe => :BD, :soc => :SOCconc, :coarse_vol => :CF) # rename as in hybrid model -# BD g/cm3, SOCconc g/kg, CF [0,1] - -df[:, :SOCconc] .= df[:, :SOCconc] ./ 1000 # convert to fraction - -# ? calculate SOC density -df[!, :SOCdensity] = df.BD .* df.SOCconc .* (1 .- df.CF) # SOCdensity ton/m3 -target_names = [:BD, :SOCconc, :CF, :SOCdensity] -# df[:, target_names] = replace.(df[:, target_names], missing => NaN) # replace missing with NaN - -# for col in names_cov # to check covairate distribution -# println(string(col)[1:10], ' ', round(std(df[:, col]); digits=2), ' ', round(mean(df[:, col]); digits=2)) -# end - -# # Normalize covariates with std>1 -means = mean.(eachcol(df[:, names_cov])) -stds = std.(eachcol(df[:, names_cov])) -for col in names_cov - df[!, col] = Float64.(df[!, col]) -end -df[:, names_cov] .= (df[:, names_cov] .- means') ./ stds' - -println(size(df)) - -# CSV.write(joinpath(@__DIR__, "data/lucas_preprocessed.csv"), df) - - -# plot BD vs SOCconc -bd_lims = extrema(skipmissing(df[:, "BD"])) -soc_lims = extrema(skipmissing(df[:, "SOCconc"])) -plt = histogram2d( - df[:, "BD"], df[:, "SOCconc"]; - nbins = (30, 30), - cbar = true, - xlab = "BD", - ylab = "SOCconc", - xlims = bd_lims, ylims = soc_lims, - #title = "SOCdensity-MTD\nR2=$(round(r2, digits=3)), MAE=$(round(mae, digits=3)), bias=$(round(bias, digits=3))", - color = cgrad(:bamako, rev = true), - normalize = false, - size = (460, 400) -) -savefig(plt, joinpath(@__DIR__, "./eval/00_truth_BD.vs.SOCconc.png")) diff --git a/projects/BulkDSOC/Project.toml b/projects/BulkDSOC/Project.toml deleted file mode 100644 index 6efec671..00000000 --- a/projects/BulkDSOC/Project.toml +++ /dev/null @@ -1,11 +0,0 @@ -[deps] -AlgebraOfGraphics = "cbdf2221-f076-402e-a563-3d30da359d67" -EasyHybrid = "61bb816a-e6af-4913-ab9e-91bff2e122e3" -Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" -GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a" -Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" -Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" -StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" -StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" -Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" -Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" diff --git a/projects/BulkDSOC/UniNaiveNN_EasyHybridNN.jl b/projects/BulkDSOC/UniNaiveNN_EasyHybridNN.jl deleted file mode 100644 index 35a3d9b8..00000000 --- a/projects/BulkDSOC/UniNaiveNN_EasyHybridNN.jl +++ /dev/null @@ -1,126 +0,0 @@ -# CC BY-SA 4.0 -project_path = "projects/BulkDSOC" -Pkg.activate(project_path) - -# Only instantiate if Manifest.toml is missing -manifest_path = joinpath(project_path, "Manifest.toml") -if !isfile(manifest_path) - Pkg.develop(path = pwd()) - Pkg.instantiate() -end -using EasyHybrid -using WGLMakie -using Random -using EasyHybrid.MLUtils -using Statistics -using Plots -# using StatsBase - -import Flux - -# 01 - univariate naive NN using EasyHybrid's SingleNNModel - -testid = "01_univariate_EasyHybridNN" - -# input -df = CSV.read(joinpath(@__DIR__, "./data/lucas_preprocessed.csv"), DataFrame, normalizenames = true) -df_d = dropmissing(df) # complete SOCD - -target_names = [:BD, :SOCconc, :CF, :SOCdensity] -names_cov = Symbol.(names(df_d))[4:(end - 1)] -ds_all = to_keyedArray(Float32.(df_d)) -ds_p = ds_all(names_cov) -ds_t = ds_all(target_names) -ds_t = Flux.normalise(ds_t) - -df_out = DataFrame() - -nfeatures = length(names_cov) -p_dropout = 0.2 - -for (i, tname) in enumerate(target_names) - - y = ds_t([tname]) - # Use EasyHybrid's constructNNModel - predictors = names_cov - targets = [tname] - neural_param_names = [tname] - model = EasyHybrid.constructNNModel(predictors, targets; hidden_layers = [32], scale_nn_outputs = false) - #model = EasyHybrid.constructNNModel(predictors, targets; hidden_layers=Chain(Dense(32, 32, tanh), Dense(32, 16, tanh)), scale_nn_outputs=false) - - ps, st = LuxCore.setup(Random.default_rng(), model) - # Training using EasyHybrid's train function - result = train(model, (ds_p, y), (); nepochs = 100, batchsize = 512, opt = AdamW(0.0001, (0.9, 0.999), 0.01), training_loss = :nse, loss_types = [:mse, :nse], shuffleobs = true, yscale = identity) - - y_val_true = vec(result.val_obs_pred[!, tname]) - y_val_pred = vec(result.val_obs_pred[!, Symbol(string(tname, "_pred"))]) - df_out[!, "true_$(tname)"] = y_val_true - df_out[!, "pred_$(tname)"] = y_val_pred - ss_res = sum((y_val_true .- y_val_pred) .^ 2) - ss_tot = sum((y_val_true .- mean(y_val_true)) .^ 2) - r2 = 1 - ss_res / ss_tot - mae = mean(abs.(y_val_pred .- y_val_true)) - bias = mean(y_val_pred .- y_val_true) - plt = histogram2d( - y_val_true, y_val_pred; - nbins = (30, 30), - cbar = true, - xlab = "True", - ylab = "Predicted", - title = "$tname\nR2=$(round(r2, digits = 3)),MAE=$(round(mae, digits = 3)),bias=$(round(bias, digits = 3))", - color = cgrad(:bamako, rev = true), - normalize = false - ) - lims = extrema(vcat(y_val_true, y_val_pred)) - Plots.plot!( - plt, - [lims[1], lims[2]], [lims[1], lims[2]]; - color = :black, linewidth = 2, label = "1:1 line", - aspect_ratio = :equal, xlims = lims, ylims = lims - ) - savefig(plt, joinpath(@__DIR__, "./eval/$(testid)_accuracy_$(tname).png")) - -end - -# TODO undo the z-transformation -df_out[:, "pred_calc_SOCdensity"] = df_out[:, "pred_SOCconc"] .* df_out[:, "pred_BD"] .* (1 .- df_out[:, "pred_CF"]) -true_SOCdensity = df_out[:, "true_SOCdensity"] -pred_SOCdensity = df_out[:, "pred_calc_SOCdensity"] -ss_res = sum((true_SOCdensity .- pred_SOCdensity) .^ 2) -ss_tot = sum((true_SOCdensity .- mean(true_SOCdensity)) .^ 2) -r2 = 1 - ss_res / ss_tot -mae = mean(abs.(pred_SOCdensity .- true_SOCdensity)) -bias = mean(pred_SOCdensity .- true_SOCdensity) -plt = histogram2d( - true_SOCdensity, pred_SOCdensity; - nbins = (30, 30), - cbar = true, - xlab = "True", - ylab = "Predicted", - title = "SOCdensity-MTD\nR2=$(round(r2, digits = 3)), MAE=$(round(mae, digits = 3)), bias=$(round(bias, digits = 3))", - color = cgrad(:bamako, rev = true), - normalize = false -) -lims = extrema(vcat(true_SOCdensity, pred_SOCdensity)) -Plots.plot!( - plt, - [lims[1], lims[2]], [lims[1], lims[2]]; - color = :black, linewidth = 2, label = "1:1 line", - aspect_ratio = :equal, xlims = lims, ylims = lims -) -savefig(plt, joinpath(@__DIR__, "./eval/$(testid)_accuracy_SOCdensity_MTD.png")) - -bd_lims = extrema(skipmissing(df_out[:, "pred_BD"])) -soc_lims = extrema(skipmissing(df_out[:, "pred_SOCconc"])) -plt = histogram2d( - df_out[:, "pred_BD"], df_out[:, "pred_SOCconc"]; - nbins = (30, 30), - cbar = true, - xlab = "BD", - ylab = "SOCconc", - xlims = bd_lims, ylims = soc_lims, - color = cgrad(:bamako, rev = true), - normalize = false, - size = (460, 400) -) -savefig(plt, joinpath(@__DIR__, "./eval/$(testid)_BD.vs.SOCconc.png")) diff --git a/projects/RbQ10/Project.toml b/projects/RbQ10/Project.toml index d1f9c50b..ee0f9637 100644 --- a/projects/RbQ10/Project.toml +++ b/projects/RbQ10/Project.toml @@ -2,5 +2,6 @@ ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" EasyHybrid = "61bb816a-e6af-4913-ab9e-91bff2e122e3" +Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" diff --git a/projects/RbQ10/Q10.jl b/projects/RbQ10/Q10.jl index 6b24e888..b802b238 100644 --- a/projects/RbQ10/Q10.jl +++ b/projects/RbQ10/Q10.jl @@ -7,9 +7,10 @@ using Pkg # start using the package using EasyHybrid +using Enzyme # for Plotting -using GLMakie +# using GLMakie # using AlgebraOfGraphics # Local Path (MPI-BGC server): @@ -28,12 +29,29 @@ forcing_names = [:cham_temp_filled] predictor_names = [:moisture_filled, :rgpot2] # Define neural network -NN = Chain(Dense(2, 15, relu), Dense(15, 15, relu), Dense(15, 1)); +NN = Chain(Lux.BatchNorm(2, affine = false), Dense(2, 15, relu), Dense(15, 15, relu), Dense(15, 1)); # instantiate Hybrid Model RbQ10 = RespirationRbQ10(NN, predictor_names, forcing_names, target_names, 2.5f0) # ? do different initial Q10s # train model out = train(RbQ10, ds_keyed, (:Q10,); nepochs = 100, batchsize = 512, opt = Adam(0.01), monitor_names = [:Rb]); +out.st +mean(ds_keyed(:moisture_filled)) +mean(ds_keyed(:rgpot2)) # BatchNorm works as expected + +# ? with AutoEnzyme() +# ! KeyedArrays failed! +using DimensionalData, ChainRulesCore +# mat = Matrix(df)' +mat = Array(Matrix(df)') +da = DimArray(mat, (Dim{:col}(Symbol.(names(df))), Dim{:row}(1:size(df, 1)))) + +out = train( + RbQ10, da, (:Q10,); nepochs = 100, batchsize = 512, opt = Adam(0.01), + autodiff_backend = AutoEnzyme(), + monitor_names = [:Rb] +); + # test custom loss function, keyword arguments function pinball(ŷ, y; τ = 0.9) r = ŷ .- y diff --git a/projects/book_chapter/example_synthetic.jl b/projects/book_chapter/example_synthetic.jl index a60c4cdf..50505483 100644 --- a/projects/book_chapter/example_synthetic.jl +++ b/projects/book_chapter/example_synthetic.jl @@ -14,6 +14,10 @@ using Pkg # Set project path and activate environment project_path = "projects/book_chapter" Pkg.activate(project_path) +EasyHybrid_path = joinpath(pwd()) +Pkg.develop(path = EasyHybrid_path) +#Pkg.resolve() +#Pkg.instantiate() using EasyHybrid @@ -55,7 +59,6 @@ parameters = ( # ============================================================================= # Define input variables forcing = [:ta] # Forcing variables (temperature) -predictors = [:sw_pot, :dsw_pot] # Predictor variables (solar radiation, and its derivative) # Target variable target = [:reco] # Target variable (respiration) @@ -65,11 +68,14 @@ global_param_names = [:Q10] # Global parameters (same for all samples) neural_param_names = [:rb] # Neural network predicted parameters # ============================================================================= -# Construct the Hybrid Model +# Single NN Hybrid Model Training # ============================================================================= -# Create hybrid model using the unified constructor -hybrid_model = constructHybridModel( - predictors, # Input features +using WGLMakie +# Create single NN hybrid model using the unified constructor +predictors_single_nn = [:sw_pot, :dsw_pot] # Predictor variables (solar radiation, and its derivative) + +single_nn_hybrid_model = constructHybridModel( + predictors_single_nn, # Input features forcing, # Forcing variables target, # Target variables RbQ10, # Process-based model function @@ -79,26 +85,81 @@ hybrid_model = constructHybridModel( hidden_layers = [16, 16], # Neural network architecture activation = sigmoid, # Activation function scale_nn_outputs = true, # Scale neural network outputs - input_batchnorm = false # Apply batch normalization to inputs + input_batchnorm = true # Apply batch normalization to inputs +) +# Train the hybrid model +single_nn_out = train( + single_nn_hybrid_model, + ds, + (); + nepochs = 10, # Number of training epochs + batchsize = 512, # Batch size for training + opt = AdamW(0.1), # Optimizer and learning rate + monitor_names = [:rb, :Q10], # Parameters to monitor during training + yscale = identity, # Scaling for outputs + shuffleobs = true ) +LuxCore.testmode(single_nn_out.st) +mean(ds.dsw_pot) +mean(ds.sw_pot) + # ============================================================================= -# Model Training +# Multi NN Hybrid Model Training # ============================================================================= -using WGLMakie +predictors_multi_nn = (rb = [:sw_pot, :dsw_pot],) -# Train the hybrid model -out = train( - hybrid_model, +multi_nn_hybrid_model = constructHybridModel( + predictors_multi_nn, # Input features + forcing, # Forcing variables + target, # Target variables + RbQ10, # Process-based model function + parameters, # Parameter definitions + global_param_names, # Global parameters + hidden_layers = [16, 16], # Neural network architecture + activation = sigmoid, # Activation function + scale_nn_outputs = true, # Scale neural network outputs + input_batchnorm = true # Apply batch normalization to inputs +) + +multi_nn_out = train( + multi_nn_hybrid_model, ds, (); - nepochs = 100, # Number of training epochs + nepochs = 10, # Number of training epochs batchsize = 512, # Batch size for training opt = AdamW(0.1), # Optimizer and learning rate monitor_names = [:rb, :Q10], # Parameters to monitor during training - yscale = identity # Scaling for outputs + yscale = identity, # Scaling for outputs + shuffleobs = true ) +LuxCore.testmode(multi_nn_out.st) +mean(ds.dsw_pot) +mean(ds.sw_pot) + +# ============================================================================= +# Pure ML Single NN Model Training +# ============================================================================= + +# TODO does not train well build on top of SingleNNHybridModel +predictors_single_nn_ml = [:sw_pot, :dsw_pot, :ta] + +single_nn_model = constructNNModel(predictors_single_nn_ml, target; input_batchnorm = true, activation = tanh) +single_nn_out = train(single_nn_model, ds, (); nepochs = 10, batchsize = 512, opt = AdamW(0.01), yscale = identity, shuffleobs = true) +LuxCore.testmode(single_nn_out.st) +mean(ds.dsw_pot) +mean(ds.sw_pot) + +single_nn_model.targets + +# ============================================================================= +# Pure ML Multi NN Model Training +# ============================================================================= + +# TODO does not train well build on top of MultiNNHybridModel + + # ============================================================================= # Results Analysis # ============================================================================= diff --git a/src/EasyHybrid.jl b/src/EasyHybrid.jl index 11149668..9cccc784 100644 --- a/src/EasyHybrid.jl +++ b/src/EasyHybrid.jl @@ -30,6 +30,7 @@ using Reexport: @reexport using Statistics: Statistics, mean, cor, quantile, var using StyledStrings: StyledStrings, @styled_str using Zygote: Zygote +using Static: False, True @reexport begin import LuxCore diff --git a/src/models/BulkDensity_SOC.jl b/src/models/BulkDensity_SOC.jl deleted file mode 100644 index 4aee2beb..00000000 --- a/src/models/BulkDensity_SOC.jl +++ /dev/null @@ -1,64 +0,0 @@ -export BulkDensitySOC -export compute_bulk_density - -""" - BulkDensitySOC(NN, predictors, targets, oBD) - -A hybrid model with a neural network `NN`, `predictors` and one global parameter oBD. -""" -struct BulkDensitySOC{D, T1, T2, T3} <: LuxCore.AbstractLuxContainerLayer{(:NN, :predictors, :targets, :oBD)} - NN - predictors # names of predictors - targets # names of targets - oBD # organic matter bulk density - function BulkDensitySOC(NN::D, predictors::T1, targets::T2, oBD::T3) where {D, T1, T2, T3} - return new{D, T1, T2, T3}(NN, collect(predictors), collect(targets), [oBD]) - end -end - -# ? oBD is a parameter, so expand the initialparameters! -function LuxCore.initialparameters(::AbstractRNG, layer::BulkDensitySOC) - ps, _ = LuxCore.setup(Random.default_rng(), layer.NN) - return (; ps, oBD = layer.oBD) -end - -function LuxCore.initialstates(::AbstractRNG, layer::BulkDensitySOC) - _, st = LuxCore.setup(Random.default_rng(), layer.NN) - return (; st) -end - - -""" - compute_bulk_density(SOCconc, oBD, mBD) - -# model for bulk density based on the Federer (1993) paper http://dx.doi.org/10.1139/x93-131 plus SOC concentrations, density and coarse fraction -""" -function compute_bulk_density(SOCconc, oBD, mBD) - oF = SOCconc .* 1.724 # TODO: has to be a ratio - BD = @. oBD * mBD / (oF * mBD + (1.0f0 - oF) * oBD) - return BD -end - - -""" - BulkDensitySOC(NN, predictors, oBD)(ds_k) - -# Hybrid model for bulk density based on the Federer (1993) paper http://dx.doi.org/10.1139/x93-131 plus SOC concentrations, density and coarse fraction -""" -function (hm::BulkDensitySOC)(ds_p, ps, st::NamedTuple) - p = ds_p(hm.predictors) - - out, stSOC = LuxCore.apply(hm.NN, p, ps.ps, st.st) - - SOCconc = out[1, :] .* 0.6f0 #TODO has to be a ratio - CF = out[2, :] - mBD = out[3, :] .* (1.5f0 - 0.75f0) .+ 0.75f0 # mineral bulk density - - oBD = sigmoid(ps.oBD) .* (0.4f0 - 0.05f0) .+ 0.05f0 - - BD = compute_bulk_density(SOCconc, oBD, mBD) - - SOCdensity = @. SOCconc * BD * (1 - CF) - - return (; SOCconc, CF, BD, SOCdensity, mBD), (; st = (; st = stSOC)) # removed oBD from here since its logged via ps.oBD -end diff --git a/src/models/GenericHybridModel.jl b/src/models/GenericHybridModel.jl index efe9d856..b7fb01dd 100644 --- a/src/models/GenericHybridModel.jl +++ b/src/models/GenericHybridModel.jl @@ -34,7 +34,13 @@ end # ─────────────────────────────────────────────────────────────────────────── # Single NN Hybrid Model Structure (optimized for performance) -struct SingleNNHybridModel +struct SingleNNHybridModel <: LuxCore.AbstractLuxContainerLayer{ + ( + :NN, #:predictors, :forcing, :targets, + #:mechanistic_model, :parameters, :neural_param_names, :global_param_names, :fixed_param_names, + #:scale_nn_outputs, :start_from_default, + ), + } NN::Chain predictors::Vector{Symbol} forcing::Vector{Symbol} @@ -49,7 +55,14 @@ struct SingleNNHybridModel end # Multi-NN Hybrid Model Structure (optimized for performance) -struct MultiNNHybridModel +struct MultiNNHybridModel <: LuxCore.AbstractLuxContainerLayer{ + ( + :NNs, #:predictors, :forcing, :targets, + # :mechanistic_model, :parameters, :neural_param_names, :global_param_names, :fixed_param_names, + # :scale_nn_outputs, :start_from_default, + ), + } + NNs::NamedTuple predictors::NamedTuple forcing::Vector{Symbol} @@ -256,7 +269,7 @@ function LuxCore.initialstates(rng::AbstractRNG, m::SingleNNHybridModel) end end - nt = (; st = st_nn, fixed = nt) + nt = (; st_nn = st_nn, fixed = nt) return nt end @@ -344,7 +357,7 @@ function (m::SingleNNHybridModel)(ds_k::KeyedArray, ps, st) # 3) scale NN parameters (handle empty case) if !isempty(m.neural_param_names) - nn_out, st_NN = LuxCore.apply(m.NN, predictors, ps.ps, st.st) + nn_out, st_nn = LuxCore.apply(m.NN, predictors, ps.ps, st.st_nn) nn_cols = eachrow(nn_out) nn_params = NamedTuple(zip(m.neural_param_names, nn_cols)) @@ -360,7 +373,7 @@ function (m::SingleNNHybridModel)(ds_k::KeyedArray, ps, st) scaled_nn_params = NamedTuple(zip(m.neural_param_names, scaled_nn_vals)) else scaled_nn_params = NamedTuple() - st_NN = st.st + st_nn = st.st_nn end # 4) pick fixed parameters (handle empty case) @@ -382,9 +395,9 @@ function (m::SingleNNHybridModel)(ds_k::KeyedArray, ps, st) y_pred = m.mechanistic_model(; all_kwargs...) out = (; y_pred..., parameters = all_params) - st_new = (; st = st_NN, fixed = st.fixed) + st_new = (; st_nn = st_nn, fixed = st.fixed) - return out, (; st = st_new) + return out, st_new end function (m::SingleNNHybridModel)(df::DataFrame, ps, st) @@ -481,7 +494,7 @@ function (m::MultiNNHybridModel)(ds_k::KeyedArray, ps, st) st_new = (; nn_states..., fixed = st.fixed) - return out, (; st = st_new) + return out, st_new end function (m::MultiNNHybridModel)(df::DataFrame, ps, st) diff --git a/src/models/NNModels.jl b/src/models/NNModels.jl index 37ee18f2..d1ec716f 100644 --- a/src/models/NNModels.jl +++ b/src/models/NNModels.jl @@ -5,7 +5,11 @@ using ..EasyHybrid: hard_sigmoid # Pure Neural Network Models (no mechanistic component) -struct SingleNNModel +struct SingleNNModel <: LuxCore.AbstractLuxContainerLayer{ + ( + :NN, :predictors, :targets, :scale_nn_outputs, + ), + } NN::Chain predictors::Vector{Symbol} targets::Vector{Symbol} @@ -98,7 +102,11 @@ function constructNNModel( end # MultiNNModel remains as before -struct MultiNNModel +struct MultiNNModel <: LuxCore.AbstractLuxContainerLayer{ + ( + :NNs, :predictors, :targets, :scale_nn_outputs, + ), + } NNs::NamedTuple predictors::NamedTuple targets::Vector{Symbol} @@ -145,7 +153,7 @@ end # LuxCore initial states for SingleNNModel function LuxCore.initialstates(rng::AbstractRNG, m::SingleNNModel) _, st_nn = LuxCore.setup(rng, m.NN) - nt = (; st = st_nn) + nt = (; st_nn = st_nn) return nt end @@ -163,7 +171,7 @@ end # Forward pass for SingleNNModel function (m::SingleNNModel)(ds_k, ps, st) predictors = ds_k(m.predictors) - nn_out, st_NN = LuxCore.apply(m.NN, predictors, ps.ps, st.st) + nn_out, st_nn = LuxCore.apply(m.NN, predictors, ps.ps, st.st_nn) nn_cols = eachrow(nn_out) nn_params = NamedTuple(zip(m.targets, nn_cols)) if m.scale_nn_outputs @@ -174,8 +182,8 @@ function (m::SingleNNModel)(ds_k, ps, st) scaled_nn_params = NamedTuple(zip(m.targets, scaled_nn_vals)) out = (; scaled_nn_params...) - st_new = (; st = st_NN) - return out, (; st = st_new) + st_new = (; st_nn = st_nn) + return out, st_new end # Forward pass for MultiNNModel @@ -206,17 +214,17 @@ function (m::MultiNNModel)(ds_k, ps, st) end out = (; scaled_nn_params..., nn_outputs = nn_outputs) st_new = (; nn_states...) - return out, (; st = st_new) + return out, st_new end # Display functions -function Base.display(m::SingleNNModel) +function Base.show(io::IO, ::MIME"text/plain", m::SingleNNModel) println("Neural Network: ", m.NN) println("Predictors: ", m.predictors) return println("scale NN outputs: ", m.scale_nn_outputs) end -function Base.display(m::MultiNNModel) +function Base.show(io::IO, ::MIME"text/plain", m::MultiNNModel) println("Neural Networks:") for (name, nn) in pairs(m.NNs) println(" $name: ", nn) diff --git a/src/models/Respiration_Rb_Q10.jl b/src/models/Respiration_Rb_Q10.jl index 3a8e834b..a0cf0d5a 100644 --- a/src/models/Respiration_Rb_Q10.jl +++ b/src/models/Respiration_Rb_Q10.jl @@ -51,13 +51,12 @@ ŷ (respiration rate) is computed as a function of the neural network output `R function (hm::RespirationRbQ10)(ds_k, ps, st::NamedTuple) p = ds_k(hm.predictors) x = Array(ds_k(hm.forcing)) # don't propagate names after this - Rb, stQ10 = LuxCore.apply(hm.NN, p, ps.ps, st.st) #! NN(αᵢ(t)) ≡ Rb(T(t), M(t)) #TODO output name flexible - could be R_soil, heterotrophic, autotrophic, etc. R_soil = mRbQ10(Rb, ps.Q10, x, 15.0f0) # ? should 15°C be the reference temperature also an input variable? - return (; R_soil, Rb), (; st = (; st = stQ10)) + return (; R_soil, Rb), (; st = stQ10) end function (hm::RespirationRbQ10)(ds_k::AbstractDimArray, ps, st::NamedTuple) @@ -69,5 +68,5 @@ function (hm::RespirationRbQ10)(ds_k::AbstractDimArray, ps, st::NamedTuple) #TODO output name flexible - could be R_soil, heterotrophic, autotrophic, etc. R_soil = mRbQ10(Rb, ps.Q10, x, 15.0f0) # ? should 15°C be the reference temperature also an input variable? - return (; R_soil, Rb), (; st = (; st = stQ10)) + return (; R_soil, Rb), (; st = stQ10) end diff --git a/src/models/models.jl b/src/models/models.jl index 57bbcd41..82691b2d 100644 --- a/src/models/models.jl +++ b/src/models/models.jl @@ -9,7 +9,6 @@ abstract type EasyHybridModels end # include("SinusHybridModel.jl") include("LinearHM.jl") include("Respiration_Rb_Q10.jl") -include("BulkDensity_SOC.jl") include("Rs_components.jl") include("simple_Rb_Q10_PBM.jl") include("GenericHybridModel.jl") diff --git a/src/train.jl b/src/train.jl index 61757f38..9eee122c 100644 --- a/src/train.jl +++ b/src/train.jl @@ -69,7 +69,8 @@ function train( batchsize = 64, opt = AdamW(0.01), patience = typemax(Int), - + autodiff_backend = AutoZygote(), + return_gradients = True(), # Loss and evaluation training_loss = :mse, loss_types = [:mse, :r2], @@ -120,8 +121,8 @@ function train( else ps, st = get_ps_st(train_from) end - - opt_state = Optimisers.setup(opt, ps) + ps = ComponentArray(ps) + train_state = Lux.Training.TrainState(hybridModel, ps, st, opt) # ? initial losses is_no_nan_t = .!isnan.(y_train) @@ -193,24 +194,28 @@ function train( @info "Check the saved output (.png, .mp4, .jld2) from training at: $(tmp_folder)" prog = Progress(nepochs, desc = "Training loss", enabled = show_progress) + loss(hybridModel, ps, st, (x, y)) = lossfn( + hybridModel, ps, st, (x, y); + logging = LoggingLoss(train_mode = true, loss_types = loss_types, training_loss = training_loss, agg = agg) + ) maybe_record_history(!isnothing(ext), fig, joinpath(tmp_folder, "training_history_$(hybrid_name).mp4"); framerate = 24) do io for epoch in 1:nepochs for (x, y) in train_loader # ? check NaN indices before going forward, and pass filtered `x, y`. is_no_nan = .!isnan.(y) if length(is_no_nan) > 0 # ! be careful here, multivariate needs fine tuning - l, backtrace = Zygote.pullback( - (ps) -> lossfn( - hybridModel, x, (y, is_no_nan), ps, st, - LoggingLoss(training_loss = training_loss, agg = agg) - ), ps + # ? let's keep grads, they might be useful for mixed gradient methods + grads, loss_val, stats, train_state = Lux.Training.single_train_step!( + autodiff_backend, loss, (x, (y, is_no_nan)), train_state; + return_gradients ) - grads = backtrace(l)[1] - Optimisers.update!(opt_state, ps, grads) - st = (; l[2].st...) end end + # sync ps and st from train_state + ps = train_state.parameters + st = train_state.states + ps_values = [copy(getproperty(ps, e)[1]) for e in save_ps] tmp_e = NamedTuple{save_ps}(ps_values) @@ -355,10 +360,7 @@ function train( end function evaluate_acc(ghm, x, y, y_no_nan, ps, st, loss_types, training_loss, agg) - loss_val, sts, ŷ = lossfn( - ghm, x, (y, y_no_nan), ps, LuxCore.testmode(st), - LoggingLoss(train_mode = false, loss_types = loss_types, training_loss = training_loss, agg = agg) - ) + loss_val, sts, ŷ = lossfn(ghm, ps, st, (x, (y, y_no_nan)), logging = LoggingLoss(train_mode = false, loss_types = loss_types, training_loss = training_loss, agg = agg)) return loss_val, sts, ŷ end function maybe_record_history(block, should_record, fig, output_path; framerate = 24) diff --git a/src/utils/logging_loss.jl b/src/utils/logging_loss.jl index 27873d31..b3c85830 100644 --- a/src/utils/logging_loss.jl +++ b/src/utils/logging_loss.jl @@ -94,31 +94,20 @@ Main loss function for hybrid models that handles both training and evaluation m - In evaluation mode (`logging.train_mode = false`): - `(loss_values, st, ŷ)`: NamedTuple of losses, state and predictions """ -function lossfn(HM::LuxCore.AbstractLuxContainerLayer, x, (y_t, y_nan), ps, st, logging::LoggingLoss) +function lossfn(HM::LuxCore.AbstractLuxContainerLayer, ps, st, (x, (y_t, y_nan)); logging::LoggingLoss) targets = HM.targets - ŷ, y, y_nan, st = get_predictions_targets(HM, x, (y_t, y_nan), ps, st, targets) if logging.train_mode + ŷ, y, y_nan, st = get_predictions_targets(HM, x, (y_t, y_nan), ps, st, targets) loss_value = compute_loss(ŷ, y, y_nan, targets, logging.training_loss, logging.agg) - return loss_value, st + stats = NamedTuple() else + ŷ, y, y_nan, _ = get_predictions_targets(HM, x, (y_t, y_nan), ps, LuxCore.testmode(st), targets) loss_value = compute_loss(ŷ, y, y_nan, targets, logging.loss_types, logging.agg) - return loss_value, st, ŷ + stats = (; ŷ...) end + return loss_value, st, stats end -function lossfn(HM::Union{SingleNNHybridModel, MultiNNHybridModel, SingleNNModel, MultiNNModel}, x, (y_t, y_nan), ps, st, logging::LoggingLoss) - targets = HM.targets - ŷ, y, y_nan, st = get_predictions_targets(HM, x, (y_t, y_nan), ps, st, targets) - if logging.train_mode - loss_value = compute_loss(ŷ, y, y_nan, targets, logging.training_loss, logging.agg) - return loss_value, st - else - loss_value = compute_loss(ŷ, y, y_nan, targets, logging.loss_types, logging.agg) - return loss_value, st, ŷ - end -end - - """ get_predictions_targets(HM, x, (y_t, y_nan), ps, st, targets) diff --git a/src/utils/show_generic.jl b/src/utils/show_generic.jl index 9dda9cc2..d7c0d08f 100644 --- a/src/utils/show_generic.jl +++ b/src/utils/show_generic.jl @@ -5,7 +5,7 @@ function print_key_value(io, key, value; color = :light_red) return println(io) end -function Base.show(io::IO, pc::ParameterContainer) +function Base.show(io::IO, ::MIME"text/plain", pc::ParameterContainer) table = pc.table println(io) return PrettyTables.pretty_table( @@ -16,7 +16,7 @@ function Base.show(io::IO, pc::ParameterContainer) ) end -function Base.show(io::IO, hm::SingleNNHybridModel) +function Base.show(io::IO, ::MIME"text/plain", hm::SingleNNHybridModel) println(io, "Neural Network:") show(IndentedIO(io), MIME"text/plain"(), hm.NN) println(io) @@ -32,7 +32,7 @@ function Base.show(io::IO, hm::SingleNNHybridModel) return show(IndentedIO(io), MIME"text/plain"(), hm.parameters) end -function Base.show(io::IO, hm::MultiNNHybridModel) +function Base.show(io::IO, ::MIME"text/plain", hm::MultiNNHybridModel) printstyled(io, "Neural Networks:"; color = :light_yellow) println(io) for (name, nn) in pairs(hm.NNs) diff --git a/test/Project.toml b/test/Project.toml index cdb79376..f2051aa7 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -5,7 +5,11 @@ ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" EasyHybrid = "61bb816a-e6af-4913-ab9e-91bff2e122e3" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Lux = "b2108857-7c20-44ae-9111-449ecde12c47" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[sources] +EasyHybrid = {path = ".."} diff --git a/test/runtests.jl b/test/runtests.jl index f56852f5..540d16a3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,7 +11,7 @@ dk_twos = gen_linear_data_2outputs() include("test_generic_hybrid_model.jl") # Include SplitData tests include("test_split_data_train.jl") - +include("test_autodiff_backend.jl") @testset "LinearHM" begin # test model instantiation diff --git a/test/test_autodiff_backend.jl b/test/test_autodiff_backend.jl new file mode 100644 index 00000000..4eca0bfa --- /dev/null +++ b/test/test_autodiff_backend.jl @@ -0,0 +1,41 @@ +using ForwardDiff +# using Mooncake +# using Enzyme + +@testset "autodiff backends" verbose = true begin + df = make_synth_df(32) # keep it small/fast + + forcing = [:ta] + predictors = [:sw_pot, :dsw_pot] + target = [:reco] + global_param_names = [:Q10] + neural_param_names = [:rb] + + model = constructHybridModel( + predictors, forcing, target, RbQ10, + RbQ10_PARAMS, neural_param_names, global_param_names + ) + + ka = prepare_data(model, df) + + _BACKENDS_SPEC = ( + # ("EnzymeConst", AutoEnzyme(; function_annotation = Enzyme.Const)), + ("ForwardDiff", AutoForwardDiff()), + # ("Mooncake", AutoMooncake(; config = nothing)), # ? it needs special rrules + ("Zygote", AutoZygote()), + ) + for (backend_name, backend_fn) in _BACKENDS_SPEC + @testset "backend: $backend_name" begin + out = train( + model, ka, (); + nepochs = 1, + batchsize = 12, + plotting = false, + show_progress = false, + hybrid_name = "test_$(backend_name)", + autodiff_backend = backend_fn, + ) + @test !isnothing(out) + end + end +end diff --git a/test/test_generic_hybrid_model.jl b/test/test_generic_hybrid_model.jl index efbe8fb0..e4e52c15 100644 --- a/test/test_generic_hybrid_model.jl +++ b/test/test_generic_hybrid_model.jl @@ -208,7 +208,7 @@ end rng = Random.default_rng() st = LuxCore.initialstates(rng, model) - @test haskey(st, :st) # Neural network states + @test haskey(st, :st_nn) # Neural network states @test haskey(st, :fixed) # Fixed parameters @test haskey(st.fixed, :c) @test length(st.fixed.c) == 1 @@ -247,8 +247,8 @@ end @test haskey(output.parameters, :a) @test haskey(output.parameters, :b) @test haskey(output.parameters, :c) - @test haskey(new_st, :st) - @test haskey(new_st.st, :fixed) + @test haskey(new_st, :st_nn) + @test haskey(new_st, :fixed) end @testset "SingleNNHybridModel with scale_nn_outputs=true" begin @@ -413,9 +413,9 @@ end @test haskey(output.parameters, :b) @test haskey(output.parameters, :c) @test haskey(output.parameters, :d) - @test haskey(new_st.st, :a) - @test haskey(new_st.st, :d) - @test haskey(new_st.st, :fixed) + @test haskey(new_st, :a) + @test haskey(new_st, :d) + @test haskey(new_st, :fixed) end @testset "MultiNNHybridModel with scale_nn_outputs=true" begin