Skip to content

Commit

Permalink
Merge pull request #9 from CaioDaumann/2023_training
Browse files Browse the repository at this point in the history
2023 training
  • Loading branch information
CaioDaumann authored Nov 1, 2024
2 parents 4c3f094 + 538e140 commit dd82119
Show file tree
Hide file tree
Showing 22 changed files with 3,228 additions and 690 deletions.
35 changes: 35 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# macOS system files
.DS_Store
._*

# Python cache
__pycache__/
*.pyc
*.pyo
*.pyd

# Project-specific directories to ignore
higgsdna_implementation/
old_enviroments_just_to_be_safe/
results/
run3_mvaID_models/

plots/
plotting_for_paper/
post_processing_script/
post_processing_script/paper_with_IC/
plot/validation_plots/
standalone_application/
standalone_application/*

.github/workflows/._*

# Logs
logs/
*.log

# Temporary files
*.tmp
*.swp
*~
*.err
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ HoverE is a example of distirbution not well modeled and that needs to be correc


<div style="display: flex; justify-content: space-between;">
<img src="./plot/probe_hoe.png" width="48%">
<img src="./plot/mvaID_barrel.png" width="48%">
<img src="./plot/CMS-PAS-HIG-23-014_Figure_002-c.png" width="48%">
<img src="./plot/CMS-PAS-HIG-23-014_Figure_002-d.png" width="48%">
</div>

## Usage
Expand Down
Empty file modified condor_submission_files/flow.sub
100644 → 100755
Empty file.
343 changes: 156 additions & 187 deletions data_reading/read_data.py
100644 → 100755

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion environment.yml
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: flow_corrections
name: flow_corrections_5
channels:
- conda-forge
- defaults
Expand Down
26 changes: 18 additions & 8 deletions flow_configuration.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
# This file holds the configurations train a normalizing flow by the main.py file
# You can set multiple configurations and the main file will loop trough them and perform diferent trainings

configuration_6:
n_transforms : 10
aux_nodes : 512
aux_layers : 3
n_splines_bins : 8
initial_lr : 1.0e-3
batch_size : 1024
max_epochs : -35
configuration_2023_test_v3:
n_transforms : 6
aux_nodes : 128
aux_layers : 2
n_splines_bins : 8
initial_lr : 1.0e-3
batch_size : 512
max_epochs : 75
DoKinematicsRW : True
IsAutoRegressive: False








41 changes: 29 additions & 12 deletions main.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import yaml
from yaml import Loader
import json

# importing other scripts
import data_reading.read_data as data_reader
Expand All @@ -18,17 +19,32 @@
def test_big_boy():
assert 1 == 1


def main():
print("Welcome to the simulation corrections 2000!")

# Lets call the function responsible for reading and treat the data
# this function reads both mc and data, perform a basic selection and reweight the 4d-kinematics distirbutions
# in the end they are saved into the a folder, so one doesnt need to go through this function all the time
re_process_data = True
if( re_process_data ):
data_reader.read_zee_data()

# First we read the config files with the list of variables that should be corrected and used as conditions
# the path for the TnP MC and data files are also given here
with open("var_training_list.json", "r") as file:
data = json.load(file)

var_list = data["var_list"]
var_list_barrel_only = data["var_list_barrel_only"] # This list is need to calculate the correlation matrices, as for end-cap variables, when in barrel are always zero!
conditions_list = data["conditions_list"]
data_samples_path = data["data_files"]
mc_samples_path = data["MC_files"]
mc_samples_lumi_norm = data["MC_files_normalization"]

# These quantities are related to the transformations applied to the non-continious variables
# You should add the index of these quantities in the variables name list
Index_for_Iso_transform = data["Index_for_Iso_transform"]
Iso_transform_shift = data["Iso_transform_shift"]

assert len(Iso_transform_shift) == len(Index_for_Iso_transform)

#loop to read over network condigurations from the yaml file: - one way to do hyperparameter optimization
stream = open("flow_configuration.yaml", 'r')
dictionary = yaml.load(stream,Loader)
Expand All @@ -46,17 +62,18 @@ def main():
initial_lr = dictionary[key]["initial_lr"]
batch_size = dictionary[key]["batch_size"]

DoKinematicsRW = dictionary[key]["DoKinematicsRW"]

IsAutoRegressive = dictionary[key]["IsAutoRegressive"]

re_process_data = True
if( re_process_data ):
data_reader.read_zee_data(var_list, conditions_list, data_samples_path, mc_samples_path, mc_samples_lumi_norm, DoKinematicsRW)

# Now, we call the class that handles the transformations, training and validaiton of the corrections
corrections = training_utils.Simulation_correction( str(key) ,n_transforms, n_splines_bins, aux_nodes, aux_layers, max_epoch_number, initial_lr, batch_size )
corrections = training_utils.Simulation_correction( str(key), var_list, var_list_barrel_only, conditions_list , Index_for_Iso_transform, Iso_transform_shift , IsAutoRegressive,n_transforms, n_splines_bins, aux_nodes, aux_layers, max_epoch_number, initial_lr, batch_size )
corrections.setup_flow()
corrections.train_the_flow()

#exit()

# Now, we call the class that handles the transformations, training and validaiton of the corrections
#corrections = training_utils.Simulation_correction()
#corrections.setup_flow()
#corrections.train_the_flow()

if __name__ == "__main__":
main()
Loading

0 comments on commit dd82119

Please sign in to comment.