From 4e1cff7f25db395d7b274a6e2ed737e1c23e942c Mon Sep 17 00:00:00 2001 From: "djgagne@ou.edu" Date: Thu, 25 May 2023 19:11:39 -0600 Subject: [PATCH 1/2] Added new processing config for run 7 files --- config/cesm_tau_run7_process.yml | 16 ++++++++++++++++ environment.yml | 2 +- mlmicrophysics/compile.sh | 4 ++-- mlmicrophysics/test_quantile_emulator.f90 | 4 ++-- scripts/process_cesm_output.py | 20 ++++++++++---------- scripts/process_phys_data.sh | 4 ++-- 6 files changed, 33 insertions(+), 17 deletions(-) create mode 100644 config/cesm_tau_run7_process.yml diff --git a/config/cesm_tau_run7_process.yml b/config/cesm_tau_run7_process.yml new file mode 100644 index 0000000..eb11074 --- /dev/null +++ b/config/cesm_tau_run7_process.yml @@ -0,0 +1,16 @@ +model_path: "/glade/p/cisl/aiml/dgagne/TAU_ported_cam7_run_1/" +model_file_start: "cam_ml_ported5_tau.cam.h1" +model_file_end: "nc" +time_var: "time" +time_split_interval: 1 +staggered_variables: [] +out_variables: ["depth", "row", "col", "T", "RHO_CLUBB", + "CLOUD", "FREQR", + "QC_TAU_in", "NC_TAU_in", "QR_TAU_in", "NR_TAU_in", + "QC_TAU_out", "NC_TAU_out", "QR_TAU_out", "NR_TAU_out", + "qctend_TAU", "nctend_TAU", "qrtend_TAU", "nrtend_TAU",] +subset_variable: ["QC_TAU_in"] +subset_threshold: [1.0e-12] +out_path: "/glade/p/cisl/aiml/dgagne/cam_mp_files_run7_parquet/" +out_start: "cam_mp_data_run7" +out_format: "parquet" diff --git a/environment.yml b/environment.yml index b8c8055..5bb9983 100644 --- a/environment.yml +++ b/environment.yml @@ -26,5 +26,5 @@ dependencies: - pyarrow - tensorflow - tensorflow-probability - - git+https://github.com/NCAR/echo-opt + - echo-opt diff --git a/mlmicrophysics/compile.sh b/mlmicrophysics/compile.sh index 20e141c..2113215 100755 --- a/mlmicrophysics/compile.sh +++ b/mlmicrophysics/compile.sh @@ -1,10 +1,10 @@ #!/bin/bash #DEBUG_FLAGS="-fPIC -g -fimplicit-none -Wall -O3 -Wline-truncation -Wcharacter-truncation -Wsurprising -Waliasing -Wimplicit-interface -Wunused-parameter -fwhole-file -fcheck=all -std=f2008 -pedantic -fbacktrace -fbounds-check -ffpe-trap=zero,invalid,overflow,underflow" -DEBUG_FLAGS="-fPIC -O3 -pg" +DEBUG_FLAGS="-fPIC -O3 -g" F_INC="-I$NCAR_ROOT_INTEL/include -I$NCAR_INC_NETCDF -I$NCAR_INC_MKL" F_LIB="-L$NCAR_ROOT_INTEL/lib -L$NCAR_LDFLAGS_NETCDF -L$NCAR_LDFLAGS_MKL -L$NCAR_LDFLAGS_MKLAUX" all_paths="$F_INC $F_LIB $NCAR_LIBS_NETCDF -mkl" -rm *.mod *.o test_emulator +rm *.mod *.o test_quantile_emulator echo $FC $DEBUG_FLAGS -c module_neural_net.f90 tau_neural_net_quantile.f90 $all_paths $FC $DEBUG_FLAGS -c module_neural_net.f90 tau_neural_net_quantile.f90 $all_paths $FC $DEBUG_FLAGS test_quantile_emulator.f90 tau_neural_net_quantile.o module_neural_net.o -o test_quantile_emulator $all_paths diff --git a/mlmicrophysics/test_quantile_emulator.f90 b/mlmicrophysics/test_quantile_emulator.f90 index 7d8a37a..7bfd818 100644 --- a/mlmicrophysics/test_quantile_emulator.f90 +++ b/mlmicrophysics/test_quantile_emulator.f90 @@ -10,7 +10,7 @@ program test_quantile_emulator print *, "load emulators" call initialize_tau_emulators print *, "loaded emulators" -qc = (/ 5e-6_r8, 1e-5_r8, 1e-3_r8, 2e-3_r8, 5.2e-4_r8 /) +qc = (/ 5e-10_r8, 1e-5_r8, 1e-3_r8, 2e-3_r8, 5.2e-4_r8 /) qr = (/ 1e-10_r8, 1e-8_r8, 1e-2_r8, 1e-4_r8, 2e-3_r8 /) nc = (/ 10.0_r8, 100.0_r8, 500.0_r8, 50000.0_r8, 1.0_r8 /) nr = (/ 10.0_r8, 1.0_r8, 1000.0_r8, 1e6_r8, 10000.0_r8 /) @@ -21,7 +21,7 @@ program test_quantile_emulator n0r = (/ 0.5e5_r8, 1.0e6_r8, 1.1e7_r8, 1.12e3_r8, 2.0e4_r8 /) pgam = (/ 10.0_r8, 50.0_r8, 25.0_r8, 19.0_r8, 100.0_r8 /) precip_frac = (/ 0.3_r8, 0.4_r8, 0.5_r8, 0.6_r8, 0.7_r8 /) -qsmall = 1.0e-6_r8 +qsmall = 1.0e-18_r8 num_loops = 1000 print *, qc call cpu_time(t_start) diff --git a/scripts/process_cesm_output.py b/scripts/process_cesm_output.py index 419a009..ec60153 100644 --- a/scripts/process_cesm_output.py +++ b/scripts/process_cesm_output.py @@ -20,7 +20,7 @@ def main(): if not exists(args.config): raise FileNotFoundError(args.config + " not found.") with open(args.config) as config_file: - config = yaml.load(config_file) + config = yaml.load(config_file, Loader=yaml.FullLoader) #time_files = get_cam_output_times(config["model_path"], time_var=config["time_var"], # file_start=config["model_file_start"], # file_end=config["model_file_end"]) @@ -71,15 +71,15 @@ def process_cesm_file_subset(filename, staggered_variables=None, time_var="time" model_ds[staggered_variable + "_lev"] = unstagger_vertical(model_ds, staggered_variable) model_ds.update(split_staggered_variable(model_ds, staggered_variable)) model_ds.update(add_index_coords(model_ds)) - model_ds["pressure"] = calc_pressure_field(model_ds) - model_ds["temperature"] = calc_temperature(model_ds) - for var in ["QC", "QR", "NC", "NR"]: - if var + "_TAU_in" in model_ds.variables.keys(): - model_ds[var + "_TAU_out"] = (model_ds[var + "_TAU_in"] + model_ds[var.lower() + "tend_TAU"] * dt) - model_ds[var + "_MG2_out"] = (model_ds[var + "_TAU_in"] + model_ds[var.lower() + "tend_MG2"] * dt) - elif var + "_sd_in" in model_ds.variables.keys(): - model_ds[var + "_sd_out"] = (model_ds[var + "_sd_in"] + model_ds[var.lower() + "tend_sd"] * dt) - model_ds[var + "_MG2_out"] = (model_ds[var + "_sd_in"] + model_ds[var.lower() + "tend_MG2"] * dt) + #model_ds["pressure"] = calc_pressure_field(model_ds) + #model_ds["temperature"] = calc_temperature(model_ds) + #for var in ["QC", "QR", "NC", "NR"]: + # if var + "_TAU_in" in model_ds.variables.keys(): + # model_ds[var + "_TAU_out"] = (model_ds[var + "_TAU_in"] + model_ds[var.lower() + "tend_TAU"] * dt) + #model_ds[var + "_MG2_out"] = (model_ds[var + "_TAU_in"] + model_ds[var.lower() + "tend_MG2"] * dt) + # elif var + "_sd_in" in model_ds.variables.keys(): + # model_ds[var + "_sd_out"] = (model_ds[var + "_sd_in"] + model_ds[var.lower() + "tend_sd"] * dt) + #model_ds[var + "_MG2_out"] = (model_ds[var + "_sd_in"] + model_ds[var.lower() + "tend_MG2"] * dt) times = model_ds[time_var] for time in times: diff --git a/scripts/process_phys_data.sh b/scripts/process_phys_data.sh index 770c3fd..447055e 100644 --- a/scripts/process_phys_data.sh +++ b/scripts/process_phys_data.sh @@ -1,7 +1,7 @@ #!/bin/bash -l #PBS -N phys_proc #PBS -l walltime=02:00:00 -#PBS -l select=1:ncpus=30:ngpus=0:mem=300GB +#PBS -l select=1:ncpus=36:ngpus=0:mem=500GB #PBS -A NAML0001 #PBS -q casper @@ -12,5 +12,5 @@ conda activate mlmicro echo `which python` cd ~/mlmicrophysics/scripts #python -u process_cesm_output.py ../config/cesm_tau_run6_process.yml -p 30 >& tau_run6_process.log -python -u process_cesm_output.py ../config/cesm_tau_run6_lim_process.yml -p 30 >& tau_run6_lim_process.log +python -u process_cesm_output.py ../config/cesm_tau_run7_process.yml -p 36 >& tau_run7_lim_process.log #python -u process_cesm_output.py ../config/cesm_sd_phys_process.yml -p 30 >& tau_phys_process.log From 5f42cbbeaa5d45a4c2e73df6e1ed1a12fbc38cb8 Mon Sep 17 00:00:00 2001 From: "djgagne@ou.edu" Date: Tue, 30 May 2023 16:16:52 -0600 Subject: [PATCH 2/2] Updated settings --- config/cesm_tau_run6_train_quantile_nn.yml | 8 +++---- config/cesm_tau_run7_train.yml | 28 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 config/cesm_tau_run7_train.yml diff --git a/config/cesm_tau_run6_train_quantile_nn.yml b/config/cesm_tau_run6_train_quantile_nn.yml index f4d7255..1eb8d92 100644 --- a/config/cesm_tau_run6_train_quantile_nn.yml +++ b/config/cesm_tau_run6_train_quantile_nn.yml @@ -2,10 +2,10 @@ data: data_path: "/glade/p/cisl/aiml/dgagne/cam_mp_files_run6_lim10ppm_parquet/" scratch_path: "/glade/scratch/dgagne/cam_mp_run6_quantile_nn/" out_path: "/glade/work/dgagne/cam_mp_run6_quantile_nn/" - input_cols: ["QC_TAU_in_v2", "QR_TAU_in_v2", "NC_TAU_in_v2", "NR_TAU_in_v2", 'RHO_CLUBB_lev', "precip_frac", "lcldm"] - output_cols: ["QR_TAU_out_v2", "NC_TAU_out_v2", "NR_TAU_out_v2"] - qc_thresh: 1e-6 - n_quantiles: 100 + input_cols: ["QC_TAU_in", "QR_TAU_in", "NC_TAU_in", "NR_TAU_in"] + output_cols: ["qctend_TAU", "qrtend_TAU", "nctend_TAU", "nrtend_TAU"] + qc_thresh: 1e-12 + n_quantiles: 500 subsample: 1 random_seed: 215689 subset_data: diff --git a/config/cesm_tau_run7_train.yml b/config/cesm_tau_run7_train.yml new file mode 100644 index 0000000..88df6c8 --- /dev/null +++ b/config/cesm_tau_run7_train.yml @@ -0,0 +1,28 @@ +data: + data_path: "/glade/p/cisl/aiml/dgagne/cam_mp_files_run7_parquet/" + scratch_path: "/glade/scratch/dgagne/cam_mp_run7_quantile_nn/" + out_path: "/glade/work/dgagne/cam_mp_run7_quantile_nn/" + input_cols: ["QC_TAU_in", "QR_TAU_in", "NC_TAU_in", "NR_TAU_in"] + output_cols: ["qctend_TAU", "qrtend_TAU", "nctend_TAU", "nrtend_TAU"] + qc_thresh: 1e-12 + n_quantiles: 500 + subsample: 1 + random_seed: 215689 + subset_data: + train_date_start: 0 + train_date_end: 6000 + test_date_start: 6001 + test_date_end: 10000 + validation_frequency: 3 + +model: + hidden_layers: 2 + hidden_neurons: 200 + activation: "relu" + output_activation: "sigmoid" + loss: "mse" + lr: 0.001 + batch_size: 1024 + epochs: 100 + verbose: 2 +