From 3bdec2ceb98c0f5e9dac30ac2294958061338038 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 19 Aug 2024 23:01:41 +0200 Subject: [PATCH] [cmsdy] regenerate pp_dy3j.mad (with new timers/counters and with #969 improvements in dsample.f) on itscrd90 Code generation completed in 245 seconds Code generation and additional checks completed in 372 seconds --- .../pp_dy3j.mad/CODEGEN_mad_pp_dy3j_log.txt | 188 ++++++++-------- epochX/cudacpp/pp_dy3j.mad/Source/dsample.f | 23 +- .../SubProcesses/P0_dc_epemgdc/check_sa.cc | 66 +++--- .../SubProcesses/P0_dc_epemgdc/driver.f | 3 +- .../SubProcesses/P0_dc_taptamgdc/check_sa.cc | 66 +++--- .../SubProcesses/P0_dc_taptamgdc/driver.f | 3 +- .../SubProcesses/P0_dd_epemgdd/check_sa.cc | 66 +++--- .../SubProcesses/P0_dd_epemgdd/driver.f | 3 +- .../SubProcesses/P0_dd_taptamgdd/check_sa.cc | 66 +++--- .../SubProcesses/P0_dd_taptamgdd/driver.f | 3 +- .../SubProcesses/P0_ddx_epemgddx/check_sa.cc | 66 +++--- .../SubProcesses/P0_ddx_epemgddx/driver.f | 3 +- .../SubProcesses/P0_ddx_epemggg/check_sa.cc | 66 +++--- .../SubProcesses/P0_ddx_epemggg/driver.f | 3 +- .../SubProcesses/P0_ddx_epemgssx/check_sa.cc | 66 +++--- .../SubProcesses/P0_ddx_epemgssx/driver.f | 3 +- .../SubProcesses/P0_ddx_epemguux/check_sa.cc | 66 +++--- .../SubProcesses/P0_ddx_epemguux/driver.f | 3 +- .../P0_ddx_taptamgddx/check_sa.cc | 66 +++--- .../SubProcesses/P0_ddx_taptamgddx/driver.f | 3 +- .../SubProcesses/P0_ddx_taptamggg/check_sa.cc | 66 +++--- .../SubProcesses/P0_ddx_taptamggg/driver.f | 3 +- .../P0_ddx_taptamgssx/check_sa.cc | 66 +++--- .../SubProcesses/P0_ddx_taptamgssx/driver.f | 3 +- .../P0_ddx_taptamguux/check_sa.cc | 66 +++--- .../SubProcesses/P0_ddx_taptamguux/driver.f | 3 +- .../SubProcesses/P0_ds_epemgds/check_sa.cc | 66 +++--- .../SubProcesses/P0_ds_epemgds/driver.f | 3 +- .../SubProcesses/P0_ds_taptamgds/check_sa.cc | 66 +++--- .../SubProcesses/P0_ds_taptamgds/driver.f | 3 +- .../SubProcesses/P0_dsx_epemgdsx/check_sa.cc | 66 +++--- .../SubProcesses/P0_dsx_epemgdsx/driver.f | 3 +- .../SubProcesses/P0_dsx_epemgdsx/matrix1.pdf | Bin 171241 -> 171241 bytes .../P0_dsx_taptamgdsx/check_sa.cc | 66 +++--- .../SubProcesses/P0_dsx_taptamgdsx/driver.f | 3 +- .../SubProcesses/P0_dux_epemgdux/check_sa.cc | 66 +++--- .../SubProcesses/P0_dux_epemgdux/driver.f | 3 +- .../P0_dux_taptamgdux/check_sa.cc | 66 +++--- .../SubProcesses/P0_dux_taptamgdux/driver.f | 3 +- .../P0_dxcx_epemgdxcx/check_sa.cc | 66 +++--- .../SubProcesses/P0_dxcx_epemgdxcx/driver.f | 3 +- .../P0_dxcx_taptamgdxcx/check_sa.cc | 66 +++--- .../SubProcesses/P0_dxcx_taptamgdxcx/driver.f | 3 +- .../P0_dxdx_epemgdxdx/check_sa.cc | 66 +++--- .../SubProcesses/P0_dxdx_epemgdxdx/driver.f | 3 +- .../P0_dxdx_taptamgdxdx/check_sa.cc | 66 +++--- .../SubProcesses/P0_dxdx_taptamgdxdx/driver.f | 3 +- .../P0_dxsx_epemgdxsx/check_sa.cc | 66 +++--- .../SubProcesses/P0_dxsx_epemgdxsx/driver.f | 3 +- .../P0_dxsx_taptamgdxsx/check_sa.cc | 66 +++--- .../SubProcesses/P0_dxsx_taptamgdxsx/driver.f | 3 +- .../SubProcesses/P0_gd_epemdddx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gd_epemdddx/driver.f | 3 +- .../SubProcesses/P0_gd_epemdssx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gd_epemdssx/driver.f | 3 +- .../SubProcesses/P0_gd_epemggd/check_sa.cc | 66 +++--- .../SubProcesses/P0_gd_epemggd/driver.f | 3 +- .../SubProcesses/P0_gd_epemudux/check_sa.cc | 66 +++--- .../SubProcesses/P0_gd_epemudux/driver.f | 3 +- .../SubProcesses/P0_gd_taptamdddx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gd_taptamdddx/driver.f | 3 +- .../SubProcesses/P0_gd_taptamdssx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gd_taptamdssx/driver.f | 3 +- .../SubProcesses/P0_gd_taptamggd/check_sa.cc | 66 +++--- .../SubProcesses/P0_gd_taptamggd/driver.f | 3 +- .../SubProcesses/P0_gd_taptamudux/check_sa.cc | 66 +++--- .../SubProcesses/P0_gd_taptamudux/driver.f | 3 +- .../SubProcesses/P0_gdx_epemddxdx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gdx_epemddxdx/driver.f | 3 +- .../SubProcesses/P0_gdx_epemggdx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gdx_epemggdx/driver.f | 3 +- .../SubProcesses/P0_gdx_epemsdxsx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gdx_epemsdxsx/driver.f | 3 +- .../SubProcesses/P0_gdx_epemuuxdx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gdx_epemuuxdx/driver.f | 3 +- .../P0_gdx_taptamddxdx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gdx_taptamddxdx/driver.f | 3 +- .../P0_gdx_taptamggdx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gdx_taptamggdx/driver.f | 3 +- .../P0_gdx_taptamsdxsx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gdx_taptamsdxsx/driver.f | 3 +- .../P0_gdx_taptamuuxdx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gdx_taptamuuxdx/driver.f | 3 +- .../SubProcesses/P0_gg_epemgddx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gg_epemgddx/driver.f | 3 +- .../SubProcesses/P0_gg_epemguux/check_sa.cc | 66 +++--- .../SubProcesses/P0_gg_epemguux/driver.f | 3 +- .../SubProcesses/P0_gg_taptamgddx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gg_taptamgddx/driver.f | 3 +- .../SubProcesses/P0_gg_taptamguux/check_sa.cc | 66 +++--- .../SubProcesses/P0_gg_taptamguux/driver.f | 3 +- .../SubProcesses/P0_gu_epemggu/check_sa.cc | 66 +++--- .../SubProcesses/P0_gu_epemggu/driver.f | 3 +- .../SubProcesses/P0_gu_epemuccx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gu_epemuccx/driver.f | 3 +- .../SubProcesses/P0_gu_epemuddx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gu_epemuddx/driver.f | 3 +- .../SubProcesses/P0_gu_epemuuux/check_sa.cc | 66 +++--- .../SubProcesses/P0_gu_epemuuux/driver.f | 3 +- .../SubProcesses/P0_gu_taptamggu/check_sa.cc | 66 +++--- .../SubProcesses/P0_gu_taptamggu/driver.f | 3 +- .../SubProcesses/P0_gu_taptamuccx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gu_taptamuccx/driver.f | 3 +- .../SubProcesses/P0_gu_taptamuddx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gu_taptamuddx/driver.f | 3 +- .../SubProcesses/P0_gu_taptamuuux/check_sa.cc | 66 +++--- .../SubProcesses/P0_gu_taptamuuux/driver.f | 3 +- .../SubProcesses/P0_gux_epemcuxcx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gux_epemcuxcx/driver.f | 3 +- .../SubProcesses/P0_gux_epemduxdx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gux_epemduxdx/driver.f | 3 +- .../SubProcesses/P0_gux_epemggux/check_sa.cc | 66 +++--- .../SubProcesses/P0_gux_epemggux/driver.f | 3 +- .../SubProcesses/P0_gux_epemuuxux/check_sa.cc | 66 +++--- .../SubProcesses/P0_gux_epemuuxux/driver.f | 3 +- .../P0_gux_taptamcuxcx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gux_taptamcuxcx/driver.f | 3 +- .../P0_gux_taptamduxdx/check_sa.cc | 66 +++--- .../SubProcesses/P0_gux_taptamduxdx/driver.f | 3 +- .../P0_gux_taptamggux/check_sa.cc | 66 +++--- .../SubProcesses/P0_gux_taptamggux/driver.f | 3 +- .../P0_gux_taptamuuxux/check_sa.cc | 66 +++--- .../SubProcesses/P0_gux_taptamuuxux/driver.f | 3 +- .../SubProcesses/P0_uc_epemguc/check_sa.cc | 66 +++--- .../SubProcesses/P0_uc_epemguc/driver.f | 3 +- .../SubProcesses/P0_uc_taptamguc/check_sa.cc | 66 +++--- .../SubProcesses/P0_uc_taptamguc/driver.f | 3 +- .../SubProcesses/P0_ucx_epemgucx/check_sa.cc | 66 +++--- .../SubProcesses/P0_ucx_epemgucx/driver.f | 3 +- .../P0_ucx_taptamgucx/check_sa.cc | 66 +++--- .../SubProcesses/P0_ucx_taptamgucx/driver.f | 3 +- .../SubProcesses/P0_ud_epemgud/check_sa.cc | 66 +++--- .../SubProcesses/P0_ud_epemgud/driver.f | 3 +- .../SubProcesses/P0_ud_taptamgud/check_sa.cc | 66 +++--- .../SubProcesses/P0_ud_taptamgud/driver.f | 3 +- .../SubProcesses/P0_udx_epemgudx/check_sa.cc | 66 +++--- .../SubProcesses/P0_udx_epemgudx/driver.f | 3 +- .../P0_udx_taptamgudx/check_sa.cc | 66 +++--- .../SubProcesses/P0_udx_taptamgudx/driver.f | 3 +- .../SubProcesses/P0_uu_epemguu/check_sa.cc | 66 +++--- .../SubProcesses/P0_uu_epemguu/driver.f | 3 +- .../SubProcesses/P0_uu_taptamguu/check_sa.cc | 66 +++--- .../SubProcesses/P0_uu_taptamguu/driver.f | 3 +- .../SubProcesses/P0_uux_epemgccx/check_sa.cc | 66 +++--- .../SubProcesses/P0_uux_epemgccx/driver.f | 3 +- .../SubProcesses/P0_uux_epemgddx/check_sa.cc | 66 +++--- .../SubProcesses/P0_uux_epemgddx/driver.f | 3 +- .../SubProcesses/P0_uux_epemggg/check_sa.cc | 66 +++--- .../SubProcesses/P0_uux_epemggg/driver.f | 3 +- .../SubProcesses/P0_uux_epemguux/check_sa.cc | 66 +++--- .../SubProcesses/P0_uux_epemguux/driver.f | 3 +- .../P0_uux_taptamgccx/check_sa.cc | 66 +++--- .../SubProcesses/P0_uux_taptamgccx/driver.f | 3 +- .../P0_uux_taptamgddx/check_sa.cc | 66 +++--- .../SubProcesses/P0_uux_taptamgddx/driver.f | 3 +- .../SubProcesses/P0_uux_taptamggg/check_sa.cc | 66 +++--- .../SubProcesses/P0_uux_taptamggg/driver.f | 3 +- .../P0_uux_taptamguux/check_sa.cc | 66 +++--- .../SubProcesses/P0_uux_taptamguux/driver.f | 3 +- .../P0_uxcx_epemguxcx/check_sa.cc | 66 +++--- .../SubProcesses/P0_uxcx_epemguxcx/driver.f | 3 +- .../P0_uxcx_taptamguxcx/check_sa.cc | 66 +++--- .../SubProcesses/P0_uxcx_taptamguxcx/driver.f | 3 +- .../P0_uxdx_epemguxdx/check_sa.cc | 66 +++--- .../SubProcesses/P0_uxdx_epemguxdx/driver.f | 3 +- .../P0_uxdx_taptamguxdx/check_sa.cc | 66 +++--- .../SubProcesses/P0_uxdx_taptamguxdx/driver.f | 3 +- .../P0_uxux_epemguxux/check_sa.cc | 66 +++--- .../SubProcesses/P0_uxux_epemguxux/driver.f | 3 +- .../P0_uxux_taptamguxux/check_sa.cc | 66 +++--- .../SubProcesses/P0_uxux_taptamguxux/driver.f | 3 +- .../pp_dy3j.mad/SubProcesses/counters.cc | 95 +++++--- .../cudacpp/pp_dy3j.mad/SubProcesses/timer.h | 209 ++++++++++++++---- .../pp_dy3j.mad/SubProcesses/timermap.h | 79 +++++-- 174 files changed, 3922 insertions(+), 2468 deletions(-) diff --git a/epochX/cudacpp/pp_dy3j.mad/CODEGEN_mad_pp_dy3j_log.txt b/epochX/cudacpp/pp_dy3j.mad/CODEGEN_mad_pp_dy3j_log.txt index aad9e4050d..954bda0925 100644 --- a/epochX/cudacpp/pp_dy3j.mad/CODEGEN_mad_pp_dy3j_log.txt +++ b/epochX/cudacpp/pp_dy3j.mad/CODEGEN_mad_pp_dy3j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F import model sm-no_b_mass INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005564451217651367  +DEBUG: model prefixing takes 0.005532503128051758  INFO: Restrict model sm-no_b_mass with file models/sm/restrict_no_b_mass.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -1851,7 +1851,7 @@ INFO: Process b~ s~ > ta+ ta- g s~ b~ added to mirror process s~ b~ > ta+ ta- g INFO: Crossed process found for b~ b~ > e+ e- g b~ b~, reuse diagrams. INFO: Crossed process found for b~ b~ > mu+ mu- g b~ b~, reuse diagrams. INFO: Crossed process found for b~ b~ > ta+ ta- g b~ b~, reuse diagrams. -435 processes with 27600 diagrams generated in 164.432 s +435 processes with 27600 diagrams generated in 171.219 s Total: 435 processes with 27600 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_dy3j --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -2391,7 +2391,7 @@ INFO: Combined process c~ b~ > ta+ ta- g c~ b~ WEIGHTED<=7 with process u~ d~ > INFO: Combined process s~ b~ > ta+ ta- g s~ b~ WEIGHTED<=7 with process d~ s~ > ta+ ta- g d~ s~ WEIGHTED<=7 INFO: Creating files in directory P0_gg_epemguux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2412,7 +2412,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_epemguux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gg_epemgddx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2433,7 +2433,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_epemgddx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gg_taptamguux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2454,7 +2454,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_taptamguux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gg_taptamgddx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2475,7 +2475,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_taptamgddx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gu_epemggu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2496,7 +2496,7 @@ INFO: Finding symmetric diagrams for subprocess group gu_epemggu DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gd_epemggd DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2517,7 +2517,7 @@ INFO: Finding symmetric diagrams for subprocess group gd_epemggd DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gux_epemggux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2538,7 +2538,7 @@ INFO: Finding symmetric diagrams for subprocess group gux_epemggux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gdx_epemggdx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2559,7 +2559,7 @@ INFO: Finding symmetric diagrams for subprocess group gdx_epemggdx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gu_taptamggu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2580,7 +2580,7 @@ INFO: Finding symmetric diagrams for subprocess group gu_taptamggu DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gd_taptamggd DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2601,7 +2601,7 @@ INFO: Finding symmetric diagrams for subprocess group gd_taptamggd DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gux_taptamggux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2622,7 +2622,7 @@ INFO: Finding symmetric diagrams for subprocess group gux_taptamggux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gdx_taptamggdx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2643,7 +2643,7 @@ INFO: Finding symmetric diagrams for subprocess group gdx_taptamggdx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uux_epemggg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2664,7 +2664,7 @@ INFO: Finding symmetric diagrams for subprocess group uux_epemggg DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_ddx_epemggg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2685,7 +2685,7 @@ INFO: Finding symmetric diagrams for subprocess group ddx_epemggg DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uux_taptamggg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2706,7 +2706,7 @@ INFO: Finding symmetric diagrams for subprocess group uux_taptamggg DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_ddx_taptamggg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2727,7 +2727,7 @@ INFO: Finding symmetric diagrams for subprocess group ddx_taptamggg DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gu_epemuuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2748,7 +2748,7 @@ INFO: Finding symmetric diagrams for subprocess group gu_epemuuux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gd_epemdddx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2769,7 +2769,7 @@ INFO: Finding symmetric diagrams for subprocess group gd_epemdddx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gux_epemuuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2790,7 +2790,7 @@ INFO: Finding symmetric diagrams for subprocess group gux_epemuuxux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gdx_epemddxdx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2811,7 +2811,7 @@ INFO: Finding symmetric diagrams for subprocess group gdx_epemddxdx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gu_epemuddx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2832,7 +2832,7 @@ INFO: Finding symmetric diagrams for subprocess group gu_epemuddx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gu_epemuccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2853,7 +2853,7 @@ INFO: Finding symmetric diagrams for subprocess group gu_epemuccx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gd_epemudux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2874,7 +2874,7 @@ INFO: Finding symmetric diagrams for subprocess group gd_epemudux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gd_epemdssx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2895,7 +2895,7 @@ INFO: Finding symmetric diagrams for subprocess group gd_epemdssx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gux_epemduxdx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2916,7 +2916,7 @@ INFO: Finding symmetric diagrams for subprocess group gux_epemduxdx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gux_epemcuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2937,7 +2937,7 @@ INFO: Finding symmetric diagrams for subprocess group gux_epemcuxcx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gdx_epemuuxdx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2958,7 +2958,7 @@ INFO: Finding symmetric diagrams for subprocess group gdx_epemuuxdx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gdx_epemsdxsx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -2979,7 +2979,7 @@ INFO: Finding symmetric diagrams for subprocess group gdx_epemsdxsx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gu_taptamuuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3000,7 +3000,7 @@ INFO: Finding symmetric diagrams for subprocess group gu_taptamuuux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gd_taptamdddx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3021,7 +3021,7 @@ INFO: Finding symmetric diagrams for subprocess group gd_taptamdddx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gux_taptamuuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3042,7 +3042,7 @@ INFO: Finding symmetric diagrams for subprocess group gux_taptamuuxux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gdx_taptamddxdx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3063,7 +3063,7 @@ INFO: Finding symmetric diagrams for subprocess group gdx_taptamddxdx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gu_taptamuddx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3084,7 +3084,7 @@ INFO: Finding symmetric diagrams for subprocess group gu_taptamuddx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gu_taptamuccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3105,7 +3105,7 @@ INFO: Finding symmetric diagrams for subprocess group gu_taptamuccx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gd_taptamudux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3126,7 +3126,7 @@ INFO: Finding symmetric diagrams for subprocess group gd_taptamudux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gd_taptamdssx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3147,7 +3147,7 @@ INFO: Finding symmetric diagrams for subprocess group gd_taptamdssx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gux_taptamduxdx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3168,7 +3168,7 @@ INFO: Finding symmetric diagrams for subprocess group gux_taptamduxdx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gux_taptamcuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3189,7 +3189,7 @@ INFO: Finding symmetric diagrams for subprocess group gux_taptamcuxcx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gdx_taptamuuxdx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3210,7 +3210,7 @@ INFO: Finding symmetric diagrams for subprocess group gdx_taptamuuxdx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gdx_taptamsdxsx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3231,7 +3231,7 @@ INFO: Finding symmetric diagrams for subprocess group gdx_taptamsdxsx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uu_epemguu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3252,7 +3252,7 @@ INFO: Finding symmetric diagrams for subprocess group uu_epemguu DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uux_epemguux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3273,7 +3273,7 @@ INFO: Finding symmetric diagrams for subprocess group uux_epemguux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_dd_epemgdd DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3294,7 +3294,7 @@ INFO: Finding symmetric diagrams for subprocess group dd_epemgdd DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_ddx_epemgddx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3315,7 +3315,7 @@ INFO: Finding symmetric diagrams for subprocess group ddx_epemgddx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uxux_epemguxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3336,7 +3336,7 @@ INFO: Finding symmetric diagrams for subprocess group uxux_epemguxux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_dxdx_epemgdxdx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3357,7 +3357,7 @@ INFO: Finding symmetric diagrams for subprocess group dxdx_epemgdxdx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_ud_epemgud DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3378,7 +3378,7 @@ INFO: Finding symmetric diagrams for subprocess group ud_epemgud DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uc_epemguc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3399,7 +3399,7 @@ INFO: Finding symmetric diagrams for subprocess group uc_epemguc DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uux_epemgddx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3420,7 +3420,7 @@ INFO: Finding symmetric diagrams for subprocess group uux_epemgddx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uux_epemgccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3441,7 +3441,7 @@ INFO: Finding symmetric diagrams for subprocess group uux_epemgccx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_udx_epemgudx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3462,7 +3462,7 @@ INFO: Finding symmetric diagrams for subprocess group udx_epemgudx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_ucx_epemgucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3483,7 +3483,7 @@ INFO: Finding symmetric diagrams for subprocess group ucx_epemgucx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_dc_epemgdc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3504,7 +3504,7 @@ INFO: Finding symmetric diagrams for subprocess group dc_epemgdc DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_ds_epemgds DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3525,7 +3525,7 @@ INFO: Finding symmetric diagrams for subprocess group ds_epemgds DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_dux_epemgdux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3546,7 +3546,7 @@ INFO: Finding symmetric diagrams for subprocess group dux_epemgdux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_ddx_epemguux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3567,7 +3567,7 @@ INFO: Finding symmetric diagrams for subprocess group ddx_epemguux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_ddx_epemgssx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3588,7 +3588,7 @@ INFO: Finding symmetric diagrams for subprocess group ddx_epemgssx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_dsx_epemgdsx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3609,7 +3609,7 @@ INFO: Finding symmetric diagrams for subprocess group dsx_epemgdsx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uxdx_epemguxdx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3630,7 +3630,7 @@ INFO: Finding symmetric diagrams for subprocess group uxdx_epemguxdx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uxcx_epemguxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3651,7 +3651,7 @@ INFO: Finding symmetric diagrams for subprocess group uxcx_epemguxcx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_dxcx_epemgdxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3672,7 +3672,7 @@ INFO: Finding symmetric diagrams for subprocess group dxcx_epemgdxcx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_dxsx_epemgdxsx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3693,7 +3693,7 @@ INFO: Finding symmetric diagrams for subprocess group dxsx_epemgdxsx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uu_taptamguu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3714,7 +3714,7 @@ INFO: Finding symmetric diagrams for subprocess group uu_taptamguu DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uux_taptamguux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3735,7 +3735,7 @@ INFO: Finding symmetric diagrams for subprocess group uux_taptamguux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_dd_taptamgdd DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3756,7 +3756,7 @@ INFO: Finding symmetric diagrams for subprocess group dd_taptamgdd DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_ddx_taptamgddx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3777,7 +3777,7 @@ INFO: Finding symmetric diagrams for subprocess group ddx_taptamgddx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uxux_taptamguxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3798,7 +3798,7 @@ INFO: Finding symmetric diagrams for subprocess group uxux_taptamguxux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_dxdx_taptamgdxdx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3819,7 +3819,7 @@ INFO: Finding symmetric diagrams for subprocess group dxdx_taptamgdxdx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96} [model_handling.py at line 1545]  INFO: Creating files in directory P0_ud_taptamgud DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3840,7 +3840,7 @@ INFO: Finding symmetric diagrams for subprocess group ud_taptamgud DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uc_taptamguc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3861,7 +3861,7 @@ INFO: Finding symmetric diagrams for subprocess group uc_taptamguc DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uux_taptamgddx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3882,7 +3882,7 @@ INFO: Finding symmetric diagrams for subprocess group uux_taptamgddx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uux_taptamgccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3903,7 +3903,7 @@ INFO: Finding symmetric diagrams for subprocess group uux_taptamgccx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_udx_taptamgudx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3924,7 +3924,7 @@ INFO: Finding symmetric diagrams for subprocess group udx_taptamgudx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_ucx_taptamgucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3945,7 +3945,7 @@ INFO: Finding symmetric diagrams for subprocess group ucx_taptamgucx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_dc_taptamgdc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3966,7 +3966,7 @@ INFO: Finding symmetric diagrams for subprocess group dc_taptamgdc DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_ds_taptamgds DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -3987,7 +3987,7 @@ INFO: Finding symmetric diagrams for subprocess group ds_taptamgds DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_dux_taptamgdux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -4008,7 +4008,7 @@ INFO: Finding symmetric diagrams for subprocess group dux_taptamgdux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_ddx_taptamguux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -4029,7 +4029,7 @@ INFO: Finding symmetric diagrams for subprocess group ddx_taptamguux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_ddx_taptamgssx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -4050,7 +4050,7 @@ INFO: Finding symmetric diagrams for subprocess group ddx_taptamgssx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_dsx_taptamgdsx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -4071,7 +4071,7 @@ INFO: Finding symmetric diagrams for subprocess group dsx_taptamgdsx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uxdx_taptamguxdx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -4092,7 +4092,7 @@ INFO: Finding symmetric diagrams for subprocess group uxdx_taptamguxdx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uxcx_taptamguxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -4113,7 +4113,7 @@ INFO: Finding symmetric diagrams for subprocess group uxcx_taptamguxcx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_dxcx_taptamgdxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -4134,7 +4134,7 @@ INFO: Finding symmetric diagrams for subprocess group dxcx_taptamgdxcx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  INFO: Creating files in directory P0_dxsx_taptamgdxsx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -4153,8 +4153,8 @@ INFO: Finding symmetric diagrams for subprocess group dxsx_taptamgdxsx DEBUG: len(subproc_diagrams_for_config) =  48 [model_handling.py at line 1520]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1544]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48} [model_handling.py at line 1545]  -Generated helas calls for 84 subprocesses (5824 diagrams) in 22.647 s -Wrote files for 10324 helas calls in 35.674 s +Generated helas calls for 84 subprocesses (5824 diagrams) in 22.479 s +Wrote files for 10324 helas calls in 35.785 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines @@ -4165,7 +4165,7 @@ ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 ALOHA: aloha creates FFV3 routines -ALOHA: aloha creates 9 routines in 0.609 s +ALOHA: aloha creates 9 routines in 0.613 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines @@ -4179,7 +4179,7 @@ ALOHA: aloha creates FFV3 routines ALOHA: aloha creates FFV2_4 routines ALOHA: aloha creates FFV2_5 routines ALOHA: aloha creates FFV2_3 routines -ALOHA: aloha creates 21 routines in 1.001 s +ALOHA: aloha creates 21 routines in 1.022 s VVV1 FFV1 FFV1 @@ -5470,10 +5470,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 3m58.340s -user 3m53.414s -sys 0m3.818s -Code generation completed in 238 seconds +real 4m5.175s +user 4m0.103s +sys 0m3.937s +Code generation completed in 245 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/pp_dy3j.mad/Source/dsample.f b/epochX/cudacpp/pp_dy3j.mad/Source/dsample.f index ea24d64c46..35e52cf44c 100644 --- a/epochX/cudacpp/pp_dy3j.mad/Source/dsample.f +++ b/epochX/cudacpp/pp_dy3j.mad/Source/dsample.f @@ -737,7 +737,7 @@ subroutine sample_init(p1, p2, p3, p4, p5, VECSIZE_USED) common/read_grid_file/read_grid_file data use_cut/2/ !Grid: 0=fixed , 1=standard, 2=non-zero - data ituple/1/ !1=htuple, 2=sobel + data ituple/1/ !1=ntuple(ranmar or htuple), 2=sobel data Minvar(1,1)/-1/ !No special variable mapping c----- @@ -1264,12 +1264,6 @@ subroutine sample_get_x(wgt, x, j, ipole, xmin, xmax) save xbin_min0_saved, xbin_max1_saved data xbin_min0_saved/xbinarraydim*.false./ data xbin_max1_saved/xbinarraydim*.false./ - - character*255 env_name, env_value - integer env_length, env_status - logical first, skipxbinchecks - data first, skipxbinchecks/.true., .false./ - save first, skipxbinchecks c c External c @@ -1421,19 +1415,7 @@ subroutine sample_get_x(wgt, x, j, ipole, xmin, xmax) c double precision is about 18 digits, we expect things to agree to c 3 digit accuracy. c - if (first) then - env_name = 'CUDACPP_RUNTIME_SKIPXBINCHECKS' - call get_environment_variable(env_name, env_value, env_length, env_status) - if( env_status.eq.0 ) then - skipxbinchecks = .true. - endif - endif - - if (skipxbinchecks) then - if (first) then - write(6,*) 'WARNING: skipping xbin checks (CUDACPP_RUNTIME_SKIPXBINCHECKS is set)' - endif - else if (abs(ddum(j)-xbin(x,ij))/(ddum(j)+1d-22) .gt. 1e-3) then + if (abs(ddum(j)-xbin(x,ij))/(ddum(j)+1d-22) .gt. 1e-3) then if (icount .lt. 5) then write(*,'(a,i4,2e14.6,1e12.4)') & 'Warning xbin not returning correct x', ij, @@ -1444,7 +1426,6 @@ subroutine sample_get_x(wgt, x, j, ipole, xmin, xmax) endif icount=icount+1 endif - first = .false. c if (x .lt. xmin .or. x .gt. xmax) then c write(*,'(a,4i4,2f24.16,1e10.2)') 'Bad x',ij,int(xbin_min),ip, c & int(xbin_max),xmin,x,xmax-xmin diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dc_epemgdc/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dc_epemgdc/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dc_epemgdc/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dc_epemgdc/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dc_epemgdc/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dc_epemgdc/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dc_epemgdc/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dc_epemgdc/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dc_taptamgdc/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dc_taptamgdc/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dc_taptamgdc/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dc_taptamgdc/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dc_taptamgdc/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dc_taptamgdc/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dc_taptamgdc/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dc_taptamgdc/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dd_epemgdd/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dd_epemgdd/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dd_epemgdd/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dd_epemgdd/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dd_epemgdd/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dd_epemgdd/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dd_epemgdd/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dd_epemgdd/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dd_taptamgdd/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dd_taptamgdd/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dd_taptamgdd/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dd_taptamgdd/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dd_taptamgdd/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dd_taptamgdd/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dd_taptamgdd/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dd_taptamgdd/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemgddx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemgddx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemgddx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemgddx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemgddx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemgddx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemgddx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemgddx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemggg/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemggg/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemggg/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemggg/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemggg/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemggg/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemggg/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemggg/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemgssx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemgssx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemgssx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemgssx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemgssx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemgssx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemgssx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemgssx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemguux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemguux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemguux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemguux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemguux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemguux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemguux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_epemguux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamgddx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamgddx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamgddx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamgddx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamgddx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamgddx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamgddx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamgddx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamggg/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamggg/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamggg/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamggg/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamggg/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamggg/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamggg/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamggg/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamgssx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamgssx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamgssx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamgssx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamgssx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamgssx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamgssx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamgssx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamguux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamguux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamguux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamguux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamguux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamguux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamguux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ddx_taptamguux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ds_epemgds/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ds_epemgds/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ds_epemgds/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ds_epemgds/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ds_epemgds/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ds_epemgds/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ds_epemgds/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ds_epemgds/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ds_taptamgds/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ds_taptamgds/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ds_taptamgds/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ds_taptamgds/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ds_taptamgds/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ds_taptamgds/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ds_taptamgds/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ds_taptamgds/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_epemgdsx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_epemgdsx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_epemgdsx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_epemgdsx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_epemgdsx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_epemgdsx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_epemgdsx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_epemgdsx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_epemgdsx/matrix1.pdf b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_epemgdsx/matrix1.pdf index e8d9268fd8e4114a91c04a34e615b8d600a1dc16..1ecd611ce7a37e85c99e7a573604ac155ff61e57 100644 GIT binary patch delta 35 ncmaF4lk4S9u7(!IElizr7&WG^nZu;OqzR!JHK(7Q!z2R$1tSdq delta 35 ncmaF4lk4S9u7(!IElizr7;UDnnZu;OWCx)c?WUid!z2R$32zNi diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_taptamgdsx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_taptamgdsx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_taptamgdsx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_taptamgdsx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_taptamgdsx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_taptamgdsx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_taptamgdsx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dsx_taptamgdsx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dux_epemgdux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dux_epemgdux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dux_epemgdux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dux_epemgdux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dux_epemgdux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dux_epemgdux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dux_epemgdux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dux_epemgdux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dux_taptamgdux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dux_taptamgdux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dux_taptamgdux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dux_taptamgdux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dux_taptamgdux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dux_taptamgdux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dux_taptamgdux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dux_taptamgdux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxcx_epemgdxcx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxcx_epemgdxcx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxcx_epemgdxcx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxcx_epemgdxcx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxcx_epemgdxcx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxcx_epemgdxcx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxcx_epemgdxcx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxcx_epemgdxcx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxcx_taptamgdxcx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxcx_taptamgdxcx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxcx_taptamgdxcx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxcx_taptamgdxcx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxcx_taptamgdxcx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxcx_taptamgdxcx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxcx_taptamgdxcx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxcx_taptamgdxcx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxdx_epemgdxdx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxdx_epemgdxdx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxdx_epemgdxdx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxdx_epemgdxdx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxdx_epemgdxdx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxdx_epemgdxdx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxdx_epemgdxdx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxdx_epemgdxdx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxdx_taptamgdxdx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxdx_taptamgdxdx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxdx_taptamgdxdx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxdx_taptamgdxdx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxdx_taptamgdxdx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxdx_taptamgdxdx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxdx_taptamgdxdx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxdx_taptamgdxdx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxsx_epemgdxsx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxsx_epemgdxsx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxsx_epemgdxsx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxsx_epemgdxsx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxsx_epemgdxsx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxsx_epemgdxsx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxsx_epemgdxsx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxsx_epemgdxsx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxsx_taptamgdxsx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxsx_taptamgdxsx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxsx_taptamgdxsx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxsx_taptamgdxsx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxsx_taptamgdxsx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxsx_taptamgdxsx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxsx_taptamgdxsx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_dxsx_taptamgdxsx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemdddx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemdddx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemdddx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemdddx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemdddx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemdddx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemdddx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemdddx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemdssx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemdssx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemdssx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemdssx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemdssx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemdssx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemdssx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemdssx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemggd/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemggd/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemggd/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemggd/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemggd/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemggd/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemggd/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemggd/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemudux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemudux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemudux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemudux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemudux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemudux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemudux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_epemudux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamdddx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamdddx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamdddx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamdddx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamdddx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamdddx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamdddx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamdddx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamdssx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamdssx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamdssx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamdssx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamdssx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamdssx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamdssx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamdssx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamggd/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamggd/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamggd/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamggd/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamggd/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamggd/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamggd/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamggd/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamudux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamudux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamudux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamudux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamudux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamudux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamudux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gd_taptamudux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemddxdx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemddxdx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemddxdx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemddxdx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemddxdx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemddxdx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemddxdx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemddxdx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemggdx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemggdx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemggdx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemggdx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemggdx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemggdx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemggdx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemggdx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemsdxsx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemsdxsx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemsdxsx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemsdxsx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemsdxsx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemsdxsx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemsdxsx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemsdxsx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemuuxdx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemuuxdx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemuuxdx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemuuxdx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemuuxdx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemuuxdx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemuuxdx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_epemuuxdx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamddxdx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamddxdx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamddxdx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamddxdx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamddxdx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamddxdx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamddxdx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamddxdx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamggdx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamggdx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamggdx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamggdx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamggdx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamggdx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamggdx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamggdx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamsdxsx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamsdxsx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamsdxsx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamsdxsx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamsdxsx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamsdxsx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamsdxsx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamsdxsx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamuuxdx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamuuxdx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamuuxdx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamuuxdx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamuuxdx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamuuxdx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamuuxdx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gdx_taptamuuxdx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_epemgddx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_epemgddx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_epemgddx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_epemgddx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_epemgddx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_epemgddx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_epemgddx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_epemgddx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_epemguux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_epemguux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_epemguux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_epemguux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_epemguux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_epemguux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_epemguux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_epemguux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_taptamgddx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_taptamgddx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_taptamgddx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_taptamgddx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_taptamgddx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_taptamgddx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_taptamgddx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_taptamgddx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_taptamguux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_taptamguux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_taptamguux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_taptamguux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_taptamguux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_taptamguux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_taptamguux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gg_taptamguux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemggu/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemggu/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemggu/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemggu/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemggu/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemggu/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemggu/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemggu/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuccx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuccx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuccx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuccx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuccx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuccx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuccx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuccx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuddx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuddx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuddx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuddx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuddx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuddx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuddx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuddx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuuux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuuux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuuux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuuux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuuux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuuux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuuux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_epemuuux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamggu/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamggu/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamggu/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamggu/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamggu/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamggu/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamggu/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamggu/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuccx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuccx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuccx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuccx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuccx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuccx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuccx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuccx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuddx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuddx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuddx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuddx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuddx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuddx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuddx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuddx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuuux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuuux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuuux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuuux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuuux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuuux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuuux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gu_taptamuuux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemcuxcx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemcuxcx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemcuxcx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemcuxcx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemcuxcx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemcuxcx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemcuxcx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemcuxcx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemduxdx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemduxdx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemduxdx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemduxdx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemduxdx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemduxdx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemduxdx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemduxdx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemggux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemggux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemggux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemggux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemggux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemggux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemggux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemggux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemuuxux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemuuxux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemuuxux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemuuxux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemuuxux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemuuxux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemuuxux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_epemuuxux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamcuxcx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamcuxcx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamcuxcx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamcuxcx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamcuxcx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamcuxcx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamcuxcx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamcuxcx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamduxdx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamduxdx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamduxdx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamduxdx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamduxdx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamduxdx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamduxdx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamduxdx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamggux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamggux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamggux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamggux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamggux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamggux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamggux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamggux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamuuxux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamuuxux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamuuxux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamuuxux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamuuxux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamuuxux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamuuxux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_gux_taptamuuxux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uc_epemguc/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uc_epemguc/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uc_epemguc/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uc_epemguc/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uc_epemguc/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uc_epemguc/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uc_epemguc/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uc_epemguc/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uc_taptamguc/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uc_taptamguc/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uc_taptamguc/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uc_taptamguc/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uc_taptamguc/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uc_taptamguc/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uc_taptamguc/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uc_taptamguc/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ucx_epemgucx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ucx_epemgucx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ucx_epemgucx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ucx_epemgucx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ucx_epemgucx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ucx_epemgucx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ucx_epemgucx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ucx_epemgucx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ucx_taptamgucx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ucx_taptamgucx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ucx_taptamgucx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ucx_taptamgucx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ucx_taptamgucx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ucx_taptamgucx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ucx_taptamgucx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ucx_taptamgucx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ud_epemgud/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ud_epemgud/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ud_epemgud/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ud_epemgud/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ud_epemgud/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ud_epemgud/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ud_epemgud/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ud_epemgud/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ud_taptamgud/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ud_taptamgud/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ud_taptamgud/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ud_taptamgud/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ud_taptamgud/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ud_taptamgud/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ud_taptamgud/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_ud_taptamgud/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_udx_epemgudx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_udx_epemgudx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_udx_epemgudx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_udx_epemgudx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_udx_epemgudx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_udx_epemgudx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_udx_epemgudx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_udx_epemgudx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_udx_taptamgudx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_udx_taptamgudx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_udx_taptamgudx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_udx_taptamgudx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_udx_taptamgudx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_udx_taptamgudx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_udx_taptamgudx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_udx_taptamgudx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uu_epemguu/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uu_epemguu/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uu_epemguu/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uu_epemguu/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uu_epemguu/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uu_epemguu/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uu_epemguu/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uu_epemguu/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uu_taptamguu/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uu_taptamguu/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uu_taptamguu/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uu_taptamguu/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uu_taptamguu/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uu_taptamguu/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uu_taptamguu/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uu_taptamguu/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemgccx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemgccx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemgccx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemgccx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemgccx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemgccx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemgccx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemgccx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemgddx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemgddx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemgddx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemgddx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemgddx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemgddx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemgddx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemgddx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemggg/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemggg/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemggg/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemggg/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemggg/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemggg/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemggg/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemggg/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemguux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemguux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemguux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemguux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemguux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemguux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemguux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_epemguux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamgccx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamgccx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamgccx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamgccx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamgccx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamgccx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamgccx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamgccx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamgddx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamgddx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamgddx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamgddx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamgddx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamgddx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamgddx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamgddx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamggg/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamggg/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamggg/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamggg/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamggg/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamggg/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamggg/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamggg/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamguux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamguux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamguux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamguux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamguux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamguux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamguux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uux_taptamguux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxcx_epemguxcx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxcx_epemguxcx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxcx_epemguxcx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxcx_epemguxcx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxcx_epemguxcx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxcx_epemguxcx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxcx_epemguxcx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxcx_epemguxcx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxcx_taptamguxcx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxcx_taptamguxcx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxcx_taptamguxcx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxcx_taptamguxcx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxcx_taptamguxcx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxcx_taptamguxcx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxcx_taptamguxcx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxcx_taptamguxcx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxdx_epemguxdx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxdx_epemguxdx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxdx_epemguxdx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxdx_epemguxdx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxdx_epemguxdx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxdx_epemguxdx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxdx_epemguxdx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxdx_epemguxdx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxdx_taptamguxdx/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxdx_taptamguxdx/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxdx_taptamguxdx/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxdx_taptamguxdx/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxdx_taptamguxdx/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxdx_taptamguxdx/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxdx_taptamguxdx/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxdx_taptamguxdx/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxux_epemguxux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxux_epemguxux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxux_epemguxux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxux_epemguxux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxux_epemguxux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxux_epemguxux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxux_epemguxux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxux_epemguxux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxux_taptamguxux/check_sa.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxux_taptamguxux/check_sa.cc index d6312eaeeb..fb1fff1598 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxux_taptamguxux/check_sa.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxux_taptamguxux/check_sa.cc @@ -420,10 +420,10 @@ main( int argc, char** argv ) DeviceBufferSelectedColor devSelCol( nevt ); #endif - std::unique_ptr genrtimes( new double[niter] ); - std::unique_ptr rambtimes( new double[niter] ); - std::unique_ptr wavetimes( new double[niter] ); - std::unique_ptr wv3atimes( new double[niter] ); + std::unique_ptr genrcounts( new uint64_t[niter] ); + std::unique_ptr rambcounts( new uint64_t[niter] ); + std::unique_ptr wavecounts( new uint64_t[niter] ); + std::unique_ptr wv3acounts( new uint64_t[niter] ); // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; @@ -527,7 +527,7 @@ main( int argc, char** argv ) // === STEP 1 OF 3 // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** - double genrtime = 0; + uint64_t genrcount = 0; // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions @@ -538,7 +538,7 @@ main( int argc, char** argv ) const std::string sgenKey = "1a GenSeed "; timermap.start( sgenKey ); prnk->seedGenerator( seed + iiter ); - genrtime += timermap.stop(); + genrcount += timermap.stop(); // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host const std::string rngnKey = "1b GenRnGen"; @@ -553,19 +553,19 @@ main( int argc, char** argv ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; - genrtime += timermap.start( htodKey ); + genrcount += timermap.start( htodKey ); copyDeviceFromHost( devRndmom, hstRndmom ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** - genrtime += timermap.stop(); + genrcount += timermap.stop(); // === STEP 2 OF 3 // Fill in particle momenta for each of nevt events on the device // *** START THE OLD-STYLE TIMER FOR RAMBO *** - double rambtime = 0; + uint64_t rambcount = 0; // --- 2a. Fill in momenta of initial state particles on the device const std::string riniKey = "2a RamboIni"; @@ -576,7 +576,7 @@ main( int argc, char** argv ) // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device // (i.e. map random numbers to final-state particle momenta for each of nevt events) const std::string rfinKey = "2b RamboFin"; - rambtime += timermap.start( rfinKey ); + rambcount += timermap.start( rfinKey ); prsk->getMomentaFinal(); //std::cout << "Got final momenta" << std::endl; @@ -585,30 +585,30 @@ main( int argc, char** argv ) { // --- 2c. CopyDToH Weights const std::string cwgtKey = "2c CpDTHwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyHostFromDevice( hstWeights, devWeights ); // --- 2d. CopyDToH Momenta const std::string cmomKey = "2d CpDTHmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyHostFromDevice( hstMomenta, devMomenta ); } else // only if ( ! bridge ) ??? { // --- 2c. CopyHToD Weights const std::string cwgtKey = "2c CpHTDwgt"; - rambtime += timermap.start( cwgtKey ); + rambcount += timermap.start( cwgtKey ); copyDeviceFromHost( devWeights, hstWeights ); // --- 2d. CopyHToD Momenta const std::string cmomKey = "2d CpHTDmom"; - rambtime += timermap.start( cmomKey ); + rambcount += timermap.start( cmomKey ); copyDeviceFromHost( devMomenta, hstMomenta ); } #endif // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** - rambtime += timermap.stop(); + rambcount += timermap.stop(); // === STEP 3 OF 3 // Evaluate matrix elements for all nevt events @@ -628,7 +628,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL // --- 2d. CopyHToD Momenta const std::string gKey = "0.. CpHTDg"; - rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + rambcount += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! copyDeviceFromHost( devGs, hstGs ); #endif @@ -641,8 +641,8 @@ main( int argc, char** argv ) } // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - double wavetime = 0; // calc plus copy - double wv3atime = 0; // calc only + uint64_t wavecount = 0; // calc plus copy + uint64_t wv3acount = 0; // calc only // --- 3a. SigmaKin const std::string skinKey = "3a SigmaKin"; @@ -651,8 +651,8 @@ main( int argc, char** argv ) pmek->computeMatrixElements( channelId ); // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wv3atime += timermap.stop(); // calc only - wavetime += wv3atime; // calc plus copy + wv3acount += timermap.stop(); // calc only + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -662,7 +662,7 @@ main( int argc, char** argv ) timermap.start( cmesKey ); copyHostFromDevice( hstMatrixElements, devMatrixElements ); // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** - wavetime += timermap.stop(); // calc plus copy + wavecount += timermap.stop(); // calc plus copy } #endif @@ -675,16 +675,16 @@ main( int argc, char** argv ) // --- 4a Dump within the loop const std::string loopKey = "4a DumpLoop"; timermap.start( loopKey ); - genrtimes[iiter] = genrtime; - rambtimes[iiter] = rambtime; - wavetimes[iiter] = wavetime; - wv3atimes[iiter] = wv3atime; + genrcounts[iiter] = genrcount; + rambcounts[iiter] = rambcount; + wavecounts[iiter] = wavecount; + wv3acounts[iiter] = wv3acount; if( verbose ) { std::cout << std::string( SEP79, '*' ) << std::endl << "Iteration #" << iiter + 1 << " of " << niter << std::endl; - if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + if( perf ) std::cout << "Wave function time: " << wavecount * timermap.secondsPerCount() << std::endl; } for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration @@ -723,6 +723,20 @@ main( int argc, char** argv ) // *** END MAIN LOOP ON #ITERATIONS *** // ************************************** + // Calibrate seconds per count + float secPerCount = timermap.secondsPerCount(); + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + genrtimes[iiter] = genrcounts[iiter] * secPerCount; + rambtimes[iiter] = rambcounts[iiter] * secPerCount; + wavetimes[iiter] = wavecounts[iiter] * secPerCount; + wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; + } + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxux_taptamguxux/driver.f b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxux_taptamguxux/driver.f index a29f020760..0e59684c9b 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxux_taptamguxux/driver.f +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/P0_uxux_taptamguxux/driver.f @@ -96,7 +96,7 @@ Program DRIVER CALL COUNTERS_INITIALISE() c Use null-terminated C-string in COUNTERS_REGISTER_COUNTER calls (maybe it is not needed, but it does not harm) CALL COUNTERS_REGISTER_COUNTER( 1, 'Fortran Initialise(I/O)'//char(0) ) - CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran Random2Momenta'//char(0) ) + CALL COUNTERS_REGISTER_COUNTER( 3, 'Fortran PhaseSpaceSampling'//char(0) ) ! uniform [0,1] + vegas to [0,1] + map to momenta CALL COUNTERS_REGISTER_COUNTER( 4, 'Fortran PDFs'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 5, 'Fortran UpdateScaleCouplings'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 6, 'Fortran Reweight'//char(0) ) @@ -106,6 +106,7 @@ Program DRIVER CALL COUNTERS_REGISTER_COUNTER( 11, 'CudaCpp Initialise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 12, 'CudaCpp Finalise'//char(0) ) CALL COUNTERS_REGISTER_COUNTER( 19, 'CudaCpp MEs'//char(0) ) +c CALL COUNTERS_REGISTER_COUNTER( 21, 'TEST SampleGetX'//char(0) ) #ifdef MG5AMC_MEEXPORTER_CUDACPP fbridge_mode = 1 ! CppOnly=1, default for CUDACPP #else diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/counters.cc b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/counters.cc index 95fe72bb5d..ab508f2a5d 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/counters.cc @@ -10,6 +10,7 @@ #include #include #include // for strlen +#include #include #include @@ -25,25 +26,46 @@ extern "C" { namespace counters { - constexpr int NCOUNTERSMAX = 20; - static bool disablecounters = false; + constexpr int NCOUNTERSMAX = 30; + static bool disablecalltimers = false; + static bool disabletesttimers = false; + static bool usechronotimers = false; // Overall program timer - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; + static mgOnGpu::ChronoTimer program_chronotimer; + static mgOnGpu::RdtscTimer program_rdtsctimer; // Individual timers static std::string array_tags[NCOUNTERSMAX + 3]; - static mgOnGpu::Timer array_timers[NCOUNTERSMAX + 3]; - static float array_totaltimes[NCOUNTERSMAX + 3] = { 0 }; + static bool array_istesttimer[NCOUNTERSMAX + 3]; + static mgOnGpu::ChronoTimer array_chronotimers[NCOUNTERSMAX + 3]; + static mgOnGpu::RdtscTimer array_rdtsctimers[NCOUNTERSMAX + 3]; static int array_counters[NCOUNTERSMAX + 3] = { 0 }; } + inline bool starts_with( std::string_view str, std::string_view prefix ) // https://stackoverflow.com/a/42844629 + { + return str.size() >= prefix.size() && str.compare( 0, prefix.size(), prefix ) == 0; + } + + inline bool ends_with( std::string_view str, std::string_view suffix ) // https://stackoverflow.com/a/42844629 + { + return str.size() >= suffix.size() && str.compare( str.size() - suffix.size(), suffix.size(), suffix ) == 0; + } + void counters_initialise_() { using namespace counters; - if( getenv( "CUDACPP_RUNTIME_DISABLECOUNTERS" ) ) disablecounters = true; - for( int icounter = 1; icounter < NCOUNTERSMAX + 1; icounter++ ) - array_tags[icounter] = ""; // ensure that this is initialized to "" - program_timer.Start(); + if( getenv( "CUDACPP_RUNTIME_DISABLECALLTIMERS" ) ) disablecalltimers = true; + if( getenv( "CUDACPP_RUNTIME_DISABLETESTTIMERS" ) ) disabletesttimers = true; + if( getenv( "CUDACPP_RUNTIME_USECHRONOTIMERS" ) ) usechronotimers = true; + for( int icounter = 0; icounter < NCOUNTERSMAX + 3; icounter++ ) + { + array_tags[icounter] = ""; // ensure that this is initialized to "" + array_istesttimer[icounter] = false; // ensure that this is initialized to false + } + if( usechronotimers ) + program_chronotimer.start(); + else + program_rdtsctimer.start(); return; } @@ -68,6 +90,7 @@ extern "C" if( array_tags[icounter] == "" ) { array_tags[icounter] = tag; + if( starts_with( array_tags[icounter], "TEST" ) ) array_istesttimer[icounter] = true; } else { @@ -81,8 +104,9 @@ extern "C" void counters_start_counter_( const int* picounter, const int* pnevt ) { using namespace counters; - if( disablecounters ) return; + if( disablecalltimers ) return; int icounter = *picounter; + if( disabletesttimers && array_istesttimer[icounter] ) return; if( array_tags[icounter] == "" ) { std::ostringstream sstr; @@ -90,49 +114,64 @@ extern "C" throw std::runtime_error( sstr.str() ); } array_counters[icounter] += *pnevt; - array_timers[icounter].Start(); + if( usechronotimers ) + array_chronotimers[icounter].start(); + else + array_rdtsctimers[icounter].start(); return; } void counters_stop_counter_( const int* picounter ) { using namespace counters; - if( disablecounters ) return; + if( disablecalltimers ) return; int icounter = *picounter; + if( disabletesttimers && array_istesttimer[icounter] ) return; if( array_tags[icounter] == "" ) { std::ostringstream sstr; sstr << "ERROR! counter #" << icounter << " does not exist"; throw std::runtime_error( sstr.str() ); } - array_totaltimes[icounter] += array_timers[icounter].GetDuration(); + if( usechronotimers ) + array_chronotimers[icounter].stop(); + else + array_rdtsctimers[icounter].stop(); return; } - inline bool starts_with( std::string_view str, std::string_view prefix ) // https://stackoverflow.com/a/42844629 - { - return str.size() >= prefix.size() && str.compare( 0, prefix.size(), prefix ) == 0; - } - - inline bool ends_with( std::string_view str, std::string_view suffix ) // https://stackoverflow.com/a/42844629 - { - return str.size() >= suffix.size() && str.compare( str.size() - suffix.size(), suffix.size(), suffix ) == 0; - } - void counters_finalise_() { using namespace counters; // Dump program counters - program_totaltime += program_timer.GetDuration(); + if( usechronotimers ) + program_chronotimer.stop(); + else + program_rdtsctimer.stop(); + float program_totaltime = ( usechronotimers ? program_chronotimer.getTotalDurationSeconds() : program_rdtsctimer.getTotalDurationSeconds() ); + if( usechronotimers ) + printf( " [COUNTERS] *** USING STD::CHRONO TIMERS ***\n" ); + else + printf( " [COUNTERS] *** USING RDTSC-BASED TIMERS ***\n" ); printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - if( disablecounters ) return; + if( disablecalltimers ) return; + // Extract time duration from all timers + float array_totaltimes[NCOUNTERSMAX + 3] = { 0 }; + for( int icounter = 1; icounter < NCOUNTERSMAX + 1; icounter++ ) + { + if( usechronotimers ) + array_totaltimes[icounter] = array_chronotimers[icounter].getTotalDurationSeconds(); + else + array_totaltimes[icounter] = array_rdtsctimers[icounter].getTotalDurationSeconds(); + } // Create counter[0] "Fortran Other" array_tags[0] = "Fortran Other"; array_counters[0] = 1; array_totaltimes[0] = program_totaltime; for( int icounter = 1; icounter < NCOUNTERSMAX + 1; icounter++ ) { - if( !starts_with( array_tags[icounter], "PROGRAM" ) ) // skip counters whose tags start with "PROGRAM" + if( !starts_with( array_tags[icounter], "PROGRAM" ) && + !starts_with( array_tags[icounter], "TEST" ) ) // skip counters whose tags start with "PROGRAM" or "TEST" array_totaltimes[0] -= array_totaltimes[icounter]; } // Create counters[NCOUNTERSMAX+2] "OVERALL MEs" and counters[NCOUNTERSMAX+1] "OVERALL NON-MEs" @@ -162,7 +201,7 @@ extern "C" icounter, array_totaltimes[icounter], array_counters[icounter], - array_totaltimes[icounter] / array_counters[icounter] ); + array_counters[icounter] / array_totaltimes[icounter] ); } else if( array_counters[icounter] == 1 ) // one-off counters for initialisation tasks (e.g. helicity filtering) { diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/timer.h b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/timer.h index 0f2712facf..8132335701 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/timer.h +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/timer.h @@ -1,72 +1,203 @@ // Copyright (C) 2020-2024 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: S. Roiser (Feb 2020) for the MG5aMC CUDACPP plugin. +//========================================================================== +// Created by: S. Roiser (Feb 2020) for the MG5aMC CUDACPP plugin [old chrono timer, old API]. // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin. +//========================================================================== +// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin [new chrono timer, new API, add rdtsc timer]. +// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin. +//========================================================================== #ifndef MGONGPUTIMER_H #define MGONGPUTIMER_H 1 +#include #include #include +#include +#include namespace mgOnGpu { - /* - high_resolution_clock - steady_clock - system_clock - - from https://www.modernescpp.com/index.php/the-three-clocks - and https://codereview.stackexchange.com/questions/196245/extremely-simple-timer-class-in-c - */ + // --------------------------------------------------------------------------- + // ChronoTimer: default ("old") timers based on std::chrono clocks + // With respect to the original Timer class, this uses a new implementation with nanosecond counts + // With respect to the original Timer class, this also uses a new API with explicit start/stop + // Template argument T can be any of high_resolution_clock, steady_clock, system_clock + // See https://www.modernescpp.com/index.php/the-three-clocks + // See https://codereview.stackexchange.com/questions/196245/extremely-simple-timer-class-in-c template - class Timer + class ChronoTimer { public: - Timer() - : m_StartTime( T::now() ) {} - virtual ~Timer() {} - void Start(); - float GetDuration(); - void Info(); + ChronoTimer(); + virtual ~ChronoTimer() {} + void start(); + void stop(); + uint64_t getCountsSinceStart() const; + float secondsPerCount() const; // constant throughout time + float getTotalDurationSeconds(); + typedef std::nano RATIO; + typedef std::chrono::duration DURATION; + typedef std::chrono::time_point TIMEPOINT; private: - typedef typename T::time_point TTP; - TTP m_StartTime; + DURATION getDurationSinceStart() const; + DURATION m_totalDuration; + bool m_started; + TIMEPOINT m_startTime; }; template - void - Timer::Start() + inline ChronoTimer::ChronoTimer() + : m_totalDuration() + , m_started( false ) + , m_startTime() + { + static_assert( std::is_same::value || + std::is_same::value || + std::is_same::value ); + } + + template + inline void + ChronoTimer::start() + { + assert( !m_started ); + m_started = true; + m_startTime = T::now(); + } + + template + inline void + ChronoTimer::stop() { - m_StartTime = T::now(); + assert( m_started ); + m_started = false; + m_totalDuration += getDurationSinceStart(); } template - float - Timer::GetDuration() + inline uint64_t + ChronoTimer::getCountsSinceStart() const { - std::chrono::duration duration = T::now() - m_StartTime; - return duration.count(); + return getDurationSinceStart().count(); } template - void - Timer::Info() - { - typedef typename T::period TPER; - typedef typename std::ratio_multiply MilliSec; - typedef typename std::ratio_multiply MicroSec; - std::cout << std::boolalpha << std::endl; - std::cout << "clock info: " << std::endl; - std::cout << " is steady: " << T::is_steady << std::endl; - std::cout << " precision: " << TPER::num << "/" << TPER::den << " second " << std::endl; - std::cout << std::fixed; - std::cout << " " << static_cast( MilliSec::num ) / MilliSec::den << " milliseconds " << std::endl; - std::cout << " " << static_cast( MicroSec::num ) / MicroSec::den << " microseconds " << std::endl; - std::cout << std::endl; + inline + typename ChronoTimer::DURATION + ChronoTimer::getDurationSinceStart() const + { + return T::now() - m_startTime; + } + + template + inline float + ChronoTimer::secondsPerCount() const + { + return (float)RATIO::num / RATIO::den; + } + + template + inline float + ChronoTimer::getTotalDurationSeconds() + { + assert( !m_started ); + auto count = m_totalDuration.count(); + return count * secondsPerCount(); + } + + // --------------------------------------------------------------------------- + + // RdtscTimer: faster ("new") *EXPERIMENTAL* timers based on rdtsc + // The rdtsc() call is derived from the TSCNS class (https://github.com/MengRao/tscns) + // The conversion of rdtsc counts to seconds is calibrated on the average frequency during the timer lifetime + // See https://stackoverflow.com/q/76063685 and the Intel 64 and IA-32 Architectures Software Developer’s Manual + // (https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html, June 2024): + // "To determine average processor clock frequency, Intel recommends the use of performance monitoring + // logic to count processor core clocks over the period of time for which the average is required." + class RdtscTimer + { + public: + RdtscTimer(); + virtual ~RdtscTimer() {} + void start(); + void stop(); + uint64_t getCountsSinceStart() const; + float secondsPerCount(); // calibrated at this point in time + float getTotalDurationSeconds(); + private: + static uint64_t rdtsc(); + uint64_t m_totalDuration; + bool m_started; + uint64_t m_startCount; + ChronoTimer m_ctorTimer; + uint64_t m_ctorCount; + }; + + inline uint64_t + RdtscTimer::rdtsc() + { +#if defined( __x86_64__ ) + return __builtin_ia32_rdtsc(); +#else +#error "rdtsc is not defined for this platform yet" +#endif + } + + inline RdtscTimer::RdtscTimer() + : m_totalDuration( 0 ) + , m_started( false ) + , m_startCount( 0 ) + , m_ctorTimer() + , m_ctorCount( 0 ) + { + m_ctorTimer.start(); + m_ctorCount = rdtsc(); + } + + inline void + RdtscTimer::start() + { + assert( !m_started ); + m_started = true; + m_startCount = rdtsc(); } + inline void + RdtscTimer::stop() + { + assert( m_started ); + m_started = false; + m_totalDuration += getCountsSinceStart(); + } + + inline uint64_t + RdtscTimer::getCountsSinceStart() const + { + return rdtsc() - m_startCount; + } + + inline float + RdtscTimer::secondsPerCount() + { + m_ctorTimer.stop(); + float secPerCount = m_ctorTimer.getTotalDurationSeconds() / ( rdtsc() - m_ctorCount ); + m_ctorTimer.start(); // allow secondsPerCount() to be called again... + return secPerCount; + } + + inline float + RdtscTimer::getTotalDurationSeconds() + { + assert( !m_started ); + auto count = m_totalDuration; + return count * secondsPerCount(); + } + + // --------------------------------------------------------------------------- + } #endif // MGONGPUTIMER_H diff --git a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/timermap.h b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/timermap.h index 90468bd768..61222e0ecc 100644 --- a/epochX/cudacpp/pp_dy3j.mad/SubProcesses/timermap.h +++ b/epochX/cudacpp/pp_dy3j.mad/SubProcesses/timermap.h @@ -7,6 +7,7 @@ #define MGONGPUTIMERMAP_H 1 #include +#include #include #include #include @@ -28,23 +29,40 @@ namespace mgOnGpu public: TimerMap() - : m_timer(), m_active( "" ), m_partitionTimers(), m_partitionIds() {} + : m_chronoTimer() + , m_rdtscTimer() + , m_active( "" ) + , m_partitionTotalCounts() + , m_partitionIds() + , m_useChronoTimers( false ) + , m_started( false ) + { + if( getenv( "CUDACPP_RUNTIME_USECHRONOTIMERS" ) ) m_useChronoTimers = true; + } + virtual ~TimerMap() {} // Start the timer for a specific partition (key must be a non-empty string) // Stop the timer for the current partition if there is one active - float start( const std::string& key ) + uint64_t start( const std::string& key ) { assert( key != "" ); // Close the previously active partition - float last = stop(); + uint64_t last = stop(); // Switch to a new partition - m_timer.Start(); + if( !m_started ) + { + if( m_useChronoTimers ) + m_chronoTimer.start(); + else + m_rdtscTimer.start(); + m_started = true; + } m_active = key; - if( m_partitionTimers.find( key ) == m_partitionTimers.end() ) + if( m_partitionTotalCounts.find( key ) == m_partitionTotalCounts.end() ) { - m_partitionIds[key] = m_partitionTimers.size(); - m_partitionTimers[key] = 0; + m_partitionIds[key] = m_partitionTotalCounts.size(); + m_partitionTotalCounts[key] = 0; } // Open a new Cuda NVTX range NVTX_PUSH( key.c_str(), m_partitionIds[key] ); @@ -53,14 +71,22 @@ namespace mgOnGpu } // Stop the timer for the current partition if there is one active - float stop() + uint64_t stop() { // Close the previously active partition - float last = 0; + uint64_t last = 0; if( m_active != "" ) { - last = m_timer.GetDuration(); - m_partitionTimers[m_active] += last; + if( m_useChronoTimers ) + last = m_chronoTimer.getCountsSinceStart(); + else + last = m_rdtscTimer.getCountsSinceStart(); + m_partitionTotalCounts[m_active] += last; + if( m_useChronoTimers ) + m_chronoTimer.stop(); + else + m_rdtscTimer.stop(); + m_started = false; } m_active = ""; // Close the current Cuda NVTX range @@ -69,6 +95,15 @@ namespace mgOnGpu return last; } + // Return timer calibration (at this point in time for rdtsc, constant in time for chrono) + float secondsPerCount() + { + if( m_useChronoTimers ) + return m_chronoTimer.secondsPerCount(); + else + return m_rdtscTimer.secondsPerCount(); + } + // Dump the overall results void dump( std::ostream& ostr = std::cout, bool json = false ) { @@ -82,9 +117,14 @@ namespace mgOnGpu const std::string total3Key = "TOTAL (3)"; const std::string total3aKey = "TOTAL (3a)"; size_t maxsize = 0; - for( auto ip: m_partitionTimers ) + for( auto ip: m_partitionTotalCounts ) maxsize = std::max( maxsize, ip.first.size() ); maxsize = std::max( maxsize, totalKey.size() ); + // Compute individual partition total times from partition total counts + std::map partitionTotalTimes; + float secPerCount = secondsPerCount(); + for( auto ip: m_partitionTotalCounts ) + partitionTotalTimes[ip.first] = m_partitionTotalCounts[ip.first] * secPerCount; // Compute the overall total //size_t ipart = 0; float total = 0; @@ -95,10 +135,10 @@ namespace mgOnGpu float total2 = 0; float total3 = 0; float total3a = 0; - for( auto ip: m_partitionTimers ) + for( auto ip: partitionTotalTimes ) { total += ip.second; - //if ( ipart != 0 && ipart+1 != m_partitionTimers.size() ) totalBut2 += ip.second; + //if ( ipart != 0 && ipart+1 != partitionTotalTimes.size() ) totalBut2 += ip.second; if( ip.first[0] == '1' || ip.first[0] == '2' || ip.first[0] == '3' ) total123 += ip.second; if( ip.first[0] == '2' || ip.first[0] == '3' ) total23 += ip.second; if( ip.first[0] == '1' ) total1 += ip.second; @@ -113,7 +153,7 @@ namespace mgOnGpu std::string s1 = "\"", s2 = "\" : \"", s3 = " sec\","; ostr << std::setprecision( 6 ); // set precision (default=6): affects all floats ostr << std::fixed; // fixed format: affects all floats - for( auto ip: m_partitionTimers ) + for( auto ip: partitionTotalTimes ) ostr << s1 << ip.first << s2 << ip.second << s3 << std::endl; ostr << s1 << totalKey << s2 << total << s3 << std::endl << s1 << total123Key << s2 << total123 << s3 << std::endl @@ -127,7 +167,7 @@ namespace mgOnGpu // NB: 'setw' affects only the next field (of any type) ostr << std::setprecision( 6 ); // set precision (default=6): affects all floats ostr << std::fixed; // fixed format: affects all floats - for( auto ip: m_partitionTimers ) + for( auto ip: partitionTotalTimes ) ostr << std::setw( maxsize ) << ip.first << " : " << std::setw( 12 ) << ip.second << " sec" << std::endl; ostr << std::setw( maxsize ) << totalKey << " : " @@ -150,10 +190,13 @@ namespace mgOnGpu private: - Timer m_timer; + ChronoTimer m_chronoTimer; + RdtscTimer m_rdtscTimer; std::string m_active; - std::map m_partitionTimers; + std::map m_partitionTotalCounts; std::map m_partitionIds; + bool m_useChronoTimers; + bool m_started; // when the timer is stopped, it must be explicitly restarted }; }