Merge pull request #9 from CaioDaumann/2023_training

2023 training
CaioDaumann · Nov 1, 2024 · dd82119 · dd82119
2 parents 4c3f094 + 538e140
commit dd82119
Show file tree

Hide file tree

Showing 22 changed files with 3,228 additions and 690 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,35 @@
+# macOS system files
+.DS_Store
+._*
+
+# Python cache
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+
+# Project-specific directories to ignore
+higgsdna_implementation/
+old_enviroments_just_to_be_safe/
+results/
+run3_mvaID_models/
+
+plots/
+plotting_for_paper/
+post_processing_script/
+post_processing_script/paper_with_IC/
+plot/validation_plots/
+standalone_application/
+standalone_application/*
+
+.github/workflows/._*
+
+# Logs
+logs/
+*.log
+
+# Temporary files
+*.tmp
+*.swp
+*~
+*.err
diff --git a/README.md b/README.md
@@ -15,8 +15,8 @@ HoverE is a example of distirbution not well modeled and that needs to be correc
 
 
 <div style="display: flex; justify-content: space-between;">
-  <img src="./plot/probe_hoe.png" width="48%">
-  <img src="./plot/mvaID_barrel.png" width="48%">
+  <img src="./plot/CMS-PAS-HIG-23-014_Figure_002-c.png" width="48%">
+  <img src="./plot/CMS-PAS-HIG-23-014_Figure_002-d.png" width="48%">
 </div>
 
 ## Usage

diff --git a/condor_submission_files/flow.sub b/condor_submission_files/flow.sub
diff --git a/data_reading/read_data.py b/data_reading/read_data.py
diff --git a/environment.yml b/environment.yml
@@ -1,4 +1,4 @@
-name: flow_corrections
+name: flow_corrections_5
 channels:
   - conda-forge
   - defaults

diff --git a/flow_configuration.yaml b/flow_configuration.yaml
@@ -1,11 +1,21 @@
 # This file holds the configurations train a normalizing flow by the main.py file
 # You can set multiple configurations and the main file will loop trough them and perform diferent trainings
 
-configuration_6:
-    n_transforms   : 10
-    aux_nodes      : 512
-    aux_layers     : 3
-    n_splines_bins : 8
-    initial_lr     : 1.0e-3
-    batch_size     : 1024
-    max_epochs     : -35
+configuration_2023_test_v3:
+    n_transforms    : 6
+    aux_nodes       : 128
+    aux_layers      : 2
+    n_splines_bins  : 8
+    initial_lr      : 1.0e-3
+    batch_size      : 512
+    max_epochs      : 75
+    DoKinematicsRW  : True
+    IsAutoRegressive: False
+
+
+
+
+
+
+
+
diff --git a/main.py b/main.py
@@ -9,6 +9,7 @@
 
 import yaml
 from yaml import Loader
+import json
 
 # importing other scripts
 import data_reading.read_data as data_reader
@@ -18,17 +19,32 @@
 def test_big_boy():
     assert 1 == 1
 
-
 def main():
     print("Welcome to the simulation corrections 2000!")
 
     # Lets call the function responsible for reading and treat the data
     # this function reads both mc and data, perform a basic selection and reweight the 4d-kinematics distirbutions
     # in the end they are saved into the a folder, so one doesnt need to go through this function all the time
-    re_process_data = True
-    if( re_process_data ):
-        data_reader.read_zee_data()
 
+    # First we read the config files with the list of variables that should be corrected and used as conditions
+    # the path for the TnP MC and data files are also given here
+    with open("var_training_list.json", "r") as file:
+        data = json.load(file)
+
+    var_list = data["var_list"]
+    var_list_barrel_only = data["var_list_barrel_only"] # This list is need to calculate the correlation matrices, as for end-cap variables, when in barrel are always zero!
+    conditions_list = data["conditions_list"]
+    data_samples_path = data["data_files"]
+    mc_samples_path = data["MC_files"]
+    mc_samples_lumi_norm = data["MC_files_normalization"]
+
+    # These quantities are related to the transformations applied to the non-continious variables
+    # You should add the index of these quantities in the variables name list
+    Index_for_Iso_transform = data["Index_for_Iso_transform"]
+    Iso_transform_shift = data["Iso_transform_shift"]
+
+    assert len(Iso_transform_shift) == len(Index_for_Iso_transform)
+
     #loop to read over network condigurations from the yaml file: - one way to do hyperparameter optimization
     stream = open("flow_configuration.yaml", 'r')
     dictionary = yaml.load(stream,Loader)
@@ -46,17 +62,18 @@ def main():
         initial_lr       = dictionary[key]["initial_lr"]
         batch_size       = dictionary[key]["batch_size"]
 
+        DoKinematicsRW   = dictionary[key]["DoKinematicsRW"]
+
+        IsAutoRegressive = dictionary[key]["IsAutoRegressive"]
+
+        re_process_data = True
+        if( re_process_data ):
+            data_reader.read_zee_data(var_list, conditions_list, data_samples_path, mc_samples_path, mc_samples_lumi_norm, DoKinematicsRW)
+
         # Now, we call the class that handles the transformations, training and validaiton of the corrections
-        corrections = training_utils.Simulation_correction( str(key) ,n_transforms, n_splines_bins, aux_nodes, aux_layers, max_epoch_number, initial_lr, batch_size  )
+        corrections = training_utils.Simulation_correction( str(key),  var_list, var_list_barrel_only, conditions_list , Index_for_Iso_transform, Iso_transform_shift , IsAutoRegressive,n_transforms, n_splines_bins, aux_nodes, aux_layers, max_epoch_number, initial_lr, batch_size  )
         corrections.setup_flow()
         corrections.train_the_flow()
 
-        #exit()
-
-    # Now, we call the class that handles the transformations, training and validaiton of the corrections
-    #corrections = training_utils.Simulation_correction()
-    #corrections.setup_flow()
-    #corrections.train_the_flow()
-
 if __name__ == "__main__":
     main()